varnish-cache/lib/libvarnish/vtcp.c
1
/*-
2
 * Copyright (c) 2006 Verdens Gang AS
3
 * Copyright (c) 2006-2015 Varnish Software AS
4
 * All rights reserved.
5
 *
6
 * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
21
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
 * SUCH DAMAGE.
28
 */
29
30
#include "config.h"
31
32
#include <sys/types.h>
33
#include <sys/socket.h>
34
#include <sys/ioctl.h>
35
#ifdef HAVE_SYS_FILIO_H
36
#  include <sys/filio.h>
37
#endif
38
39
#include <netinet/in.h>
40
#include <netinet/tcp.h>
41
42
#include <errno.h>
43
#include <math.h>
44
#include <netdb.h>
45
#include <poll.h>
46
#include <stdio.h>
47
#include <string.h>
48
#include <unistd.h>
49
50
#include "vdef.h"
51
#include "vas.h"
52
#include "vsa.h"
53
#include "vss.h"
54
#include "vtcp.h"
55
56
/*--------------------------------------------------------------------*/
57
static void
58 42380
vtcp_sa_to_ascii(const void *sa, socklen_t l, char *abuf, unsigned alen,
59
    char *pbuf, unsigned plen)
60
{
61
        int i;
62
63 42380
        assert(abuf == NULL || alen > 0);
64 42380
        assert(pbuf == NULL || plen > 0);
65 42380
        i = getnameinfo(sa, l, abuf, alen, pbuf, plen,
66
           NI_NUMERICHOST | NI_NUMERICSERV);
67 42381
        if (i) {
68
                /*
69
                 * XXX this printf is shitty, but we may not have space
70
                 * for the gai_strerror in the bufffer :-(
71
                 */
72 0
                printf("getnameinfo = %d %s\n", i, gai_strerror(i));
73 0
                if (abuf != NULL)
74 0
                        (void)snprintf(abuf, alen, "Conversion");
75 0
                if (pbuf != NULL)
76 0
                        (void)snprintf(pbuf, plen, "Failed");
77 42381
                return;
78
        }
79
        /* XXX dirty hack for v4-to-v6 mapped addresses */
80 42381
        if (abuf != NULL && strncmp(abuf, "::ffff:", 7) == 0) {
81 0
                for (i = 0; abuf[i + 7]; ++i)
82 0
                        abuf[i] = abuf[i + 7];
83 0
                abuf[i] = '\0';
84
        }
85
}
86
87
/*--------------------------------------------------------------------*/
88
89
void
90 13415
VTCP_name(const struct suckaddr *addr, char *abuf, unsigned alen,
91
    char *pbuf, unsigned plen)
92
{
93
        const struct sockaddr *sa;
94
        socklen_t sl;
95
96 13415
        sa = VSA_Get_Sockaddr(addr, &sl);
97 13415
        vtcp_sa_to_ascii(sa, sl, abuf, alen, pbuf, plen);
98 13418
}
99
100
/*--------------------------------------------------------------------*/
101
102
struct suckaddr *
103 1306
VTCP_my_suckaddr(int sock)
104
{
105
        struct sockaddr_storage addr_s;
106
        socklen_t l;
107
108 1306
        l = sizeof addr_s;
109 1306
        AZ(getsockname(sock, (void *)&addr_s, &l));
110 1306
        return (VSA_Malloc(&addr_s, l));
111
}
112
113
/*--------------------------------------------------------------------*/
114
115
void
116 16502
VTCP_myname(int sock, char *abuf, unsigned alen, char *pbuf, unsigned plen)
117
{
118
        struct sockaddr_storage addr_s;
119
        socklen_t l;
120
121 16502
        l = sizeof addr_s;
122 16502
        AZ(getsockname(sock, (void *)&addr_s, &l));
123 16502
        vtcp_sa_to_ascii(&addr_s, l, abuf, alen, pbuf, plen);
124 16502
}
125
126
/*--------------------------------------------------------------------*/
127
128
void
129 12461
VTCP_hisname(int sock, char *abuf, unsigned alen, char *pbuf, unsigned plen)
130
{
131
        struct sockaddr_storage addr_s;
132
        socklen_t l;
133
134 12461
        l = sizeof addr_s;
135 12461
        if (!getpeername(sock, (void*)&addr_s, &l))
136 12462
                vtcp_sa_to_ascii(&addr_s, l, abuf, alen, pbuf, plen);
137
        else {
138 0
                (void)snprintf(abuf, alen, "<none>");
139 0
                (void)snprintf(pbuf, plen, "<none>");
140
        }
141 12462
}
142
143
/*--------------------------------------------------------------------*/
144
145
#ifdef HAVE_ACCEPT_FILTERS
146
147
int
148 1224
VTCP_filter_http(int sock)
149
{
150
        int retval;
151
        struct accept_filter_arg afa;
152
153 1224
        memset(&afa, 0, sizeof(afa));
154 1224
        strcpy(afa.af_name, "httpready");
155 1224
        retval = setsockopt(sock, SOL_SOCKET, SO_ACCEPTFILTER,
156
            &afa, sizeof afa );
157 1224
        return (retval);
158
}
159
160
#elif defined(__linux)
161
162
int
163
VTCP_filter_http(int sock)
164
{
165
        int retval;
166
        int defer = 1;
167
168
        retval = setsockopt(sock, SOL_TCP,TCP_DEFER_ACCEPT,
169
            &defer, sizeof defer);
170
        return (retval);
171
}
172
173
#else
174
175
int
176
VTCP_filter_http(int sock)
177
{
178
        errno = EOPNOTSUPP;
179
        (void)sock;
180
        return (-1);
181
}
182
183
#endif
184
185
/*--------------------------------------------------------------------*/
186
187
#ifdef HAVE_TCP_FASTOPEN
188
189
int
190
VTCP_fastopen(int sock, int depth)
191
{
192
        return (setsockopt(sock, SOL_TCP, TCP_FASTOPEN,
193
            &depth, sizeof depth));
194
}
195
196
#else
197
198
int
199 0
VTCP_fastopen(int sock, int depth)
200
{
201 0
        errno = EOPNOTSUPP;
202
        (void)sock;
203
        (void)depth;
204 0
        return (-1);
205
}
206
207
#endif
208
209
/*--------------------------------------------------------------------
210
 * Functions for controlling NONBLOCK mode.
211
 *
212
 * We use FIONBIO because it is cheaper than fcntl(2), which requires
213
 * us to do two syscalls, one to get and one to set, the latter of
214
 * which mucks about a bit before it ends up calling ioctl(FIONBIO),
215
 * at least on FreeBSD.
216
 */
217
218
int
219 15323
VTCP_blocking(int sock)
220
{
221
        int i, j;
222
223 15323
        i = 0;
224 15323
        j = ioctl(sock, FIONBIO, &i);
225 15324
        VTCP_Assert(j);
226 15322
        return (j);
227
}
228
229
int
230 7064
VTCP_nonblocking(int sock)
231
{
232
        int i, j;
233
234 7064
        i = 1;
235 7064
        j = ioctl(sock, FIONBIO, &i);
236 7064
        VTCP_Assert(j);
237 7064
        return (j);
238
}
239
240
/*--------------------------------------------------------------------
241
 * On TCP a connect(2) can block for a looong time, and we don't want that.
242
 * Unfortunately, the SocketWizards back in those days were happy to wait
243
 * any amount of time for a connection, so the connect(2) syscall does not
244
 * take an argument for patience.
245
 *
246
 * There is a little used work-around, and we employ it at our peril.
247
 *
248
 */
249
250
int
251 5560
VTCP_connected(int s)
252
{
253
        int k;
254
        socklen_t l;
255
256
        /* Find out if we got a connection */
257 5560
        l = sizeof k;
258 5560
        AZ(getsockopt(s, SOL_SOCKET, SO_ERROR, &k, &l));
259
260
        /* An error means no connection established */
261 5560
        errno = k;
262 5560
        if (k) {
263 152
                closefd(&s);
264 152
                return (-1);
265
        }
266
267 5408
        (void)VTCP_blocking(s);
268 5408
        return (s);
269
}
270
271
int
272 5796
VTCP_connect(const struct suckaddr *name, int msec)
273
{
274
        int s, i;
275
        struct pollfd fds[1];
276
        const struct sockaddr *sa;
277
        socklen_t sl;
278
        int val;
279
280 5796
        if (name == NULL)
281 152
                return (-1);
282
        /* Attempt the connect */
283 5644
        AN(VSA_Sane(name));
284 5643
        sa = VSA_Get_Sockaddr(name, &sl);
285 5644
        AN(sa);
286 5644
        AN(sl);
287
288 5644
        s = socket(sa->sa_family, SOCK_STREAM, 0);
289 5644
        if (s < 0)
290 0
                return (s);
291
292
        /* Set the socket non-blocking */
293 5644
        if (msec != 0)
294 5584
                (void)VTCP_nonblocking(s);
295
296 5643
        val = 1;
297 5643
        AZ(setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &val, sizeof val));
298
299 5644
        i = connect(s, sa, sl);
300 5644
        if (i == 0)
301 84
                return (s);
302 5560
        if (errno != EINPROGRESS) {
303 0
                closefd(&s);
304 0
                return (-1);
305
        }
306
307 5560
        if (msec < 0) {
308
                /*
309
                 * Caller is responsible for waiting and
310
                 * calling VTCP_connected
311
                 */
312 1242
                return (s);
313
        }
314
315 4318
        assert(msec > 0);
316
        /* Exercise our patience, polling for write */
317 4318
        fds[0].fd = s;
318 4318
        fds[0].events = POLLWRNORM;
319 4318
        fds[0].revents = 0;
320 4318
        i = poll(fds, 1, msec);
321
322 4318
        if (i == 0) {
323
                /* Timeout, close and give up */
324 0
                closefd(&s);
325 0
                errno = ETIMEDOUT;
326 0
                return (-1);
327
        }
328
329 4318
        return (VTCP_connected(s));
330
}
331
332
/*--------------------------------------------------------------------
333
 * When closing a TCP connection, a couple of errno's are legit, we
334
 * can't be held responsible for the other end wanting to talk to us.
335
 */
336
337
void
338 7588
VTCP_close(int *s)
339
{
340
        int i;
341
342 7588
        i = close(*s);
343
344 7588
        assert(VTCP_Check(i));
345 7588
        *s = -1;
346 7588
}
347
348
void
349 2574
VTCP_set_read_timeout(int s, double seconds)
350
{
351
#ifdef SO_RCVTIMEO_WORKS
352
        struct timeval timeout;
353 2574
        timeout.tv_sec = (int)floor(seconds);
354 2574
        timeout.tv_usec = (int)(1e6 * (seconds - timeout.tv_sec));
355
        /*
356
         * Solaris bug (present at least in snv_151 and older): If this fails
357
         * with EINVAL, the socket is half-closed (SS_CANTSENDMORE) and the
358
         * timeout does not get set. Needs to be fixed in Solaris, there is
359
         * nothing we can do about this.
360
         */
361 2574
        VTCP_Assert(setsockopt(s, SOL_SOCKET, SO_RCVTIMEO,
362
            &timeout, sizeof timeout));
363
#else
364
        (void)s;
365
        (void)seconds;
366
#endif
367 2574
}
368
369
/*--------------------------------------------------------------------
370
 */
371
372
static int v_matchproto_(vss_resolved_f)
373 2356
vtcp_open_callback(void *priv, const struct suckaddr *sa)
374
{
375 2356
        double *p = priv;
376
377 2356
        return (VTCP_connect(sa, (int)floor(*p * 1e3)));
378
}
379
380
int
381 2356
VTCP_open(const char *addr, const char *def_port, double timeout,
382
    const char **errp)
383
{
384
        int error;
385
        const char *err;
386
387 2356
        if (errp != NULL)
388 2356
                *errp = NULL;
389 2356
        assert(timeout >= 0);
390 2356
        error = VSS_resolver(addr, def_port, vtcp_open_callback,
391
            &timeout, &err);
392 2356
        if (err != NULL) {
393 0
                if (errp != NULL)
394 0
                        *errp = err;
395 0
                return (-1);
396
        }
397 2356
        return (error);
398
}
399
400
/*--------------------------------------------------------------------
401
 * Given a struct suckaddr, open a socket of the appropriate type, and bind
402
 * it to the requested address.
403
 *
404
 * If the address is an IPv6 address, the IPV6_V6ONLY option is set to
405
 * avoid conflicts between INADDR_ANY and IN6ADDR_ANY.
406
 */
407
408
int
409 10512
VTCP_bind(const struct suckaddr *sa, const char **errp)
410
{
411
        int sd, val, e;
412
        socklen_t sl;
413
        const struct sockaddr *so;
414
        int proto;
415
416 10512
        if (errp != NULL)
417 6654
                *errp = NULL;
418
419 10512
        proto = VSA_Get_Proto(sa);
420 10512
        sd = socket(proto, SOCK_STREAM, 0);
421 10512
        if (sd < 0) {
422 0
                if (errp != NULL)
423 0
                        *errp = "socket(2)";
424 0
                return (-1);
425
        }
426 10512
        val = 1;
427 10512
        if (setsockopt(sd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof val) != 0) {
428 0
                if (errp != NULL)
429 0
                        *errp = "setsockopt(SO_REUSEADDR, 1)";
430 0
                e = errno;
431 0
                closefd(&sd);
432 0
                errno = e;
433 0
                return (-1);
434
        }
435
#ifdef IPV6_V6ONLY
436
        /* forcibly use separate sockets for IPv4 and IPv6 */
437 10512
        val = 1;
438 13198
        if (proto == AF_INET6 &&
439 2686
            setsockopt(sd, IPPROTO_IPV6, IPV6_V6ONLY, &val, sizeof val) != 0) {
440 0
                if (errp != NULL)
441 0
                        *errp = "setsockopt(IPV6_V6ONLY, 1)";
442 0
                e = errno;
443 0
                closefd(&sd);
444 0
                errno = e;
445 0
                return (-1);
446
        }
447
#endif
448 10512
        so = VSA_Get_Sockaddr(sa, &sl);
449 10512
        if (bind(sd, so, sl) != 0) {
450 2
                if (errp != NULL)
451 0
                        *errp = "bind(2)";
452 2
                e = errno;
453 2
                closefd(&sd);
454 2
                errno = e;
455 2
                return (-1);
456
        }
457 10510
        return (sd);
458
}
459
460
/*--------------------------------------------------------------------
461
 * Given a struct suckaddr, open a socket of the appropriate type, bind it
462
 * to the requested address, and start listening.
463
 */
464
465
int
466 6654
VTCP_listen(const struct suckaddr *sa, int depth, const char **errp)
467
{
468
        int sd;
469
        int e;
470
471 6654
        if (errp != NULL)
472 6654
                *errp = NULL;
473 6654
        sd = VTCP_bind(sa, errp);
474 6654
        if (sd >= 0)  {
475 6654
                if (listen(sd, depth) != 0) {
476 0
                        e = errno;
477 0
                        closefd(&sd);
478 0
                        errno = e;
479 0
                        if (errp != NULL)
480 0
                                *errp = "listen(2)";
481 0
                        return (-1);
482
                }
483
        }
484 6654
        return (sd);
485
}
486
487
/*--------------------------------------------------------------------*/
488
489
struct helper {
490
        int             depth;
491
        const char      **errp;
492
};
493
494
static int v_matchproto_(vss_resolved_f)
495 4158
vtcp_lo_cb(void *priv, const struct suckaddr *sa)
496
{
497
        int sock;
498 4158
        struct helper *hp = priv;
499
500 4158
        sock = VTCP_listen(sa, hp->depth, hp->errp);
501 4158
        if (sock > 0) {
502 4158
                *hp->errp = NULL;
503 4158
                return (sock);
504
        }
505 0
        AN(*hp->errp);
506 0
        return (0);
507
}
508
509
int
510 4158
VTCP_listen_on(const char *addr, const char *def_port, int depth,
511
    const char **errp)
512
{
513
        struct helper h;
514
        int sock;
515
516 4158
        h.depth = depth;
517 4158
        h.errp = errp;
518
519 4158
        sock = VSS_resolver(addr, def_port, vtcp_lo_cb, &h, errp);
520 4158
        if (*errp != NULL)
521 0
                return (-1);
522 4158
        return(sock);
523
}
524
525
/*--------------------------------------------------------------------
526
 * Set or reset SO_LINGER flag
527
 */
528
529
int
530 0
VTCP_linger(int sock, int linger)
531
{
532
        struct linger lin;
533
        int i;
534
535 0
        memset(&lin, 0, sizeof lin);
536 0
        lin.l_onoff = linger;
537 0
        i = setsockopt(sock, SOL_SOCKET, SO_LINGER, &lin, sizeof lin);
538 0
        VTCP_Assert(i);
539 0
        return (i);
540
}
541
542
/*--------------------------------------------------------------------
543
 * Do a poll to check for remote HUP
544
 */
545
546
int
547 35
VTCP_check_hup(int sock)
548
{
549
        struct pollfd pfd;
550
551 35
        assert(sock > 0);
552 35
        pfd.fd = sock;
553 35
        pfd.events = POLLOUT;
554 35
        pfd.revents = 0;
555
556 35
        if (poll(&pfd, 1, 0) == 1 && pfd.revents & POLLHUP)
557 0
                return (1);
558 35
        return (0);
559
}
560
561
/*--------------------------------------------------------------------
562
 * Check if a TCP syscall return value is fatal
563
 */
564
565
int
566 49249
VTCP_Check(int a)
567
{
568 49249
        if (a == 0)
569 49242
                return (1);
570 7
        if (errno == ECONNRESET || errno == ENOTCONN)
571 7
                return (1);
572
#if (defined (__SVR4) && defined (__sun)) || defined (__NetBSD__)
573
        /*
574
         * Solaris returns EINVAL if the other end unexpectedly reset the
575
         * connection.
576
         * This is a bug in Solaris and documented behaviour on NetBSD.
577
         */
578
        if (errno == EINVAL || errno == ETIMEDOUT || errno == EPIPE)
579
                return (1);
580
#elif defined (__APPLE__)
581
        /*
582
         * MacOS returns EINVAL if the other end unexpectedly reset
583
         * the connection.
584
         */
585
        if (errno == EINVAL)
586
                return (1);
587
#endif
588 0
        return (0);
589
}
590
591
/*--------------------------------------------------------------------
592
 *
593
 */
594
595
int
596 9418
VTCP_read(int fd, void *ptr, size_t len, double tmo)
597
{
598
        struct pollfd pfd[1];
599
        int i, j;
600
601 9418
        if (tmo > 0.0) {
602 9406
                pfd[0].fd = fd;
603 9406
                pfd[0].events = POLLIN;
604 9406
                pfd[0].revents = 0;
605 9406
                j = (int)floor(tmo * 1e3);
606 9406
                if (j == 0)
607 23
                        j++;
608 9406
                j = poll(pfd, 1, j);
609 9404
                if (j == 0)
610 483
                        return (-2);
611
        }
612 8933
        i = read(fd, ptr, len);
613 8933
        return (i < 0 ? -1 : i);
614
}