varnish-cache/lib/libvarnish/vtcp.c
1
/*-
2
 * Copyright (c) 2006 Verdens Gang AS
3
 * Copyright (c) 2006-2015 Varnish Software AS
4
 * All rights reserved.
5
 *
6
 * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
7
 *
8
 * SPDX-License-Identifier: BSD-2-Clause
9
 *
10
 * Redistribution and use in source and binary forms, with or without
11
 * modification, are permitted provided that the following conditions
12
 * are met:
13
 * 1. Redistributions of source code must retain the above copyright
14
 *    notice, this list of conditions and the following disclaimer.
15
 * 2. Redistributions in binary form must reproduce the above copyright
16
 *    notice, this list of conditions and the following disclaimer in the
17
 *    documentation and/or other materials provided with the distribution.
18
 *
19
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
23
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29
 * SUCH DAMAGE.
30
 */
31
32
#include "config.h"
33
34
#include <sys/types.h>
35
#include <sys/socket.h>
36
#include <sys/time.h>           // for NetBSD
37
#include <sys/ioctl.h>
38
#ifdef HAVE_SYS_FILIO_H
39
#  include <sys/filio.h>
40
#endif
41
42
#include <netinet/in.h>
43
#include <netinet/tcp.h>
44
45
#include <math.h>
46
#include <netdb.h>
47
#include <poll.h>
48
#include <stdio.h>
49
#include <string.h>
50
#include <unistd.h>
51
#include <stdlib.h>
52
53
#include "vdef.h"
54
#include "miniobj.h"
55
#include "vas.h"
56
#include "vsa.h"
57
#include "vss.h"
58
#include "vtcp.h"
59
#include "vtim.h"
60
61
/*--------------------------------------------------------------------*/
62
static void
63 973687
vtcp_sa_to_ascii(const void *sa, socklen_t l, char *abuf, unsigned alen,
64
    char *pbuf, unsigned plen)
65
{
66
        int i;
67
68 973687
        assert(abuf == NULL || alen > 0);
69 973708
        assert(pbuf == NULL || plen > 0);
70 973726
        i = getnameinfo(sa, l, abuf, alen, pbuf, plen,
71
           NI_NUMERICHOST | NI_NUMERICSERV);
72 973726
        if (i) {
73
                /*
74
                 * XXX this printf is shitty, but we may not have space
75
                 * for the gai_strerror in the bufffer :-(
76
                 */
77 0
                fprintf(stderr, "getnameinfo = %d %s\n", i, gai_strerror(i));
78 0
                if (i == EAI_SYSTEM)
79 0
                        fprintf(stderr, "errno = %d %s\n", errno, VAS_errtxt(errno));
80 0
                if (abuf != NULL)
81 0
                        (void)snprintf(abuf, alen, "Conversion");
82 0
                if (pbuf != NULL)
83 0
                        (void)snprintf(pbuf, plen, "Failed");
84 0
                return;
85
        }
86
        /* XXX dirty hack for v4-to-v6 mapped addresses */
87 973750
        if (abuf != NULL && strncmp(abuf, "::ffff:", 7) == 0) {
88 0
                for (i = 0; abuf[i + 7]; ++i)
89 0
                        abuf[i] = abuf[i + 7];
90 0
                abuf[i] = '\0';
91 0
        }
92 973738
}
93
94
/*--------------------------------------------------------------------*/
95
96
void
97 973719
VTCP_name(const struct suckaddr *addr, char *abuf, unsigned alen,
98
    char *pbuf, unsigned plen)
99
{
100
        const struct sockaddr *sa;
101
        socklen_t sl;
102
103 973719
        sa = VSA_Get_Sockaddr(addr, &sl);
104 973719
        AN(sa);
105 973691
        vtcp_sa_to_ascii(sa, sl, abuf, alen, pbuf, plen);
106 973691
}
107
108
/*--------------------------------------------------------------------*/
109
110
struct suckaddr *
111 33160
VTCP_my_suckaddr(int sock)
112
{
113
        struct suckaddr *r;
114
115 33160
        r = malloc(vsa_suckaddr_len);
116 33160
        AN(VSA_getsockname(sock, r, vsa_suckaddr_len));
117 33160
        return (r);
118
}
119
120
/*--------------------------------------------------------------------*/
121
122
void
123 405438
VTCP_myname(int sock, char *abuf, unsigned alen, char *pbuf, unsigned plen)
124
{
125 405438
        char buf[vsa_suckaddr_len];
126
127 810876
        VTCP_name(VSA_getsockname(sock, buf, sizeof buf),
128 405438
                  abuf, alen, pbuf, plen);
129 405438
}
130
131
/*--------------------------------------------------------------------*/
132
133
void
134 281573
VTCP_hisname(int sock, char *abuf, unsigned alen, char *pbuf, unsigned plen)
135
{
136 281573
        char buf[vsa_suckaddr_len];
137
        struct suckaddr *sua;
138
139 281573
        sua = VSA_getpeername(sock, buf, sizeof buf);
140 281573
        if (sua != NULL)
141 281560
                VTCP_name(sua, abuf, alen, pbuf, plen);
142
        else {
143 0
                (void)snprintf(abuf, alen, "<none>");
144 0
                (void)snprintf(pbuf, plen, "<none>");
145
        }
146 281572
}
147
148
/*--------------------------------------------------------------------*/
149
150
#ifdef HAVE_ACCEPT_FILTERS
151
152
int
153 32120
VTCP_filter_http(int sock)
154
{
155
        int retval;
156
        struct accept_filter_arg afa;
157
158 32120
        memset(&afa, 0, sizeof afa);
159 32120
        bprintf(afa.af_name, "%s", "httpready");
160 64240
        retval = setsockopt(sock, SOL_SOCKET, SO_ACCEPTFILTER,
161 32120
            &afa, sizeof afa);
162 32120
        return (retval);
163
}
164
165
#elif defined(__linux)
166
167
int
168
VTCP_filter_http(int sock)
169
{
170
        int retval;
171
        int defer = 1;
172
173
        retval = setsockopt(sock, SOL_TCP, TCP_DEFER_ACCEPT,
174
            &defer, sizeof defer);
175
        return (retval);
176
}
177
178
#else
179
180
int
181
VTCP_filter_http(int sock)
182
{
183
        errno = EOPNOTSUPP;
184
        (void)sock;
185
        return (-1);
186
}
187
188
#endif
189
190
/*--------------------------------------------------------------------*/
191
192
#ifdef HAVE_TCP_FASTOPEN
193
194
int
195
VTCP_fastopen(int sock, int depth)
196
{
197
        return (setsockopt(sock, SOL_TCP, TCP_FASTOPEN, &depth, sizeof depth));
198
}
199
200
#else
201
202
int
203 0
VTCP_fastopen(int sock, int depth)
204
{
205 0
        errno = EOPNOTSUPP;
206 0
        (void)sock;
207 0
        (void)depth;
208 0
        return (-1);
209
}
210
211
#endif
212
213
/*--------------------------------------------------------------------
214
 * Functions for controlling NONBLOCK mode.
215
 *
216
 * We use FIONBIO because it is cheaper than fcntl(2), which requires
217
 * us to do two syscalls, one to get and one to set, the latter of
218
 * which mucks about a bit before it ends up calling ioctl(FIONBIO),
219
 * at least on FreeBSD.
220
 * On Solaris ioctl(FIONBIO) can fail with connection related errnos,
221
 * but as long as that is how they fail, we're fine.
222
 */
223
224
void
225 467284
VTCP_blocking(int sock)
226
{
227
        int i, j;
228
229 467284
        i = 0;
230 467284
        j = ioctl(sock, FIONBIO, &i);
231 467284
        VTCP_Assert(j);
232 467286
}
233
234
void
235 170044
VTCP_nonblocking(int sock)
236
{
237
        int i, j;
238
239 170044
        i = 1;
240 170044
        j = ioctl(sock, FIONBIO, &i);
241 170044
        VTCP_Assert(j);
242 170034
}
243
244
/*--------------------------------------------------------------------
245
 * On TCP a connect(2) can block for a looong time, and we don't want that.
246
 * Unfortunately, the SocketWizards back in those days were happy to wait
247
 * any amount of time for a connection, so the connect(2) syscall does not
248
 * take an argument for patience.
249
 *
250
 * There is a little used work-around, and we employ it at our peril.
251
 *
252
 */
253
254
int
255 156059
VTCP_connected(int s)
256
{
257
        int k;
258
        socklen_t l;
259
260
        /* Find out if we got a connection */
261 156059
        l = sizeof k;
262 156059
        AZ(getsockopt(s, SOL_SOCKET, SO_ERROR, &k, &l));
263
264
        /* An error means no connection established */
265 156059
        errno = k;
266 156059
        if (k) {
267 911
                closefd(&s);
268 911
                return (-1);
269
        }
270
271 155148
        VTCP_blocking(s);
272 155148
        return (s);
273 156058
}
274
275
int
276 161169
VTCP_connect(const struct suckaddr *name, int msec)
277
{
278
        int s, i;
279
        struct pollfd fds[1];
280
        const struct sockaddr *sa;
281
        socklen_t sl;
282
        int val;
283
284 161169
        if (name == NULL)
285 1031
                return (-1);
286
        /* Attempt the connect */
287 160137
        AN(VSA_Sane(name));
288 160133
        sa = VSA_Get_Sockaddr(name, &sl);
289 160133
        AN(sa);
290 160131
        AN(sl);
291
292 160133
        s = socket(sa->sa_family, SOCK_STREAM, 0);
293 160133
        if (s < 0)
294 0
                return (s);
295
296
        /* Set the socket non-blocking */
297 160132
        if (msec != 0)
298 156173
                VTCP_nonblocking(s);
299
300 160127
        val = 1;
301 160127
        AZ(setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &val, sizeof val));
302
303 160127
        i = connect(s, sa, sl);
304 160127
        if (i == 0)
305 4025
                return (s);
306 156113
        if (errno != EINPROGRESS) {
307 0
                closefd(&s);
308 0
                return (-1);
309
        }
310
311 156113
        if (msec < 0) {
312
                /*
313
                 * Caller is responsible for waiting and
314
                 * calling VTCP_connected
315
                 */
316 32578
                return (s);
317
        }
318
319 123536
        assert(msec > 0);
320
        /* Exercise our patience, polling for write */
321 123536
        fds[0].fd = s;
322 123536
        fds[0].events = POLLWRNORM;
323 123536
        fds[0].revents = 0;
324 123536
        i = poll(fds, 1, msec);
325
326 123536
        if (i == 0) {
327
                /* Timeout, close and give up */
328 40
                closefd(&s);
329 40
                errno = ETIMEDOUT;
330 40
                return (-1);
331
        }
332
333 123459
        return (VTCP_connected(s));
334 161132
}
335
336
/*--------------------------------------------------------------------
337
 * When closing a TCP connection, a couple of errno's are legit, we
338
 * can't be held responsible for the other end wanting to talk to us.
339
 */
340
341
void
342 220584
VTCP_close(int *s)
343
{
344
        int i;
345
346 220584
        i = close(*s);
347
348 220584
        VTCP_Assert(i);
349 220571
        *s = -1;
350 220571
}
351
352
void
353 74477
VTCP_set_read_timeout(int s, vtim_dur seconds)
354
{
355
#ifdef SO_RCVTIMEO_WORKS
356 74477
        struct timeval timeout = VTIM_timeval(seconds);
357
        /*
358
         * Solaris bug (present at least in snv_151 and older): If this fails
359
         * with EINVAL, the socket is half-closed (SS_CANTSENDMORE) and the
360
         * timeout does not get set. Needs to be fixed in Solaris, there is
361
         * nothing we can do about this.
362
         */
363 74477
        VTCP_Assert(setsockopt(s, SOL_SOCKET, SO_RCVTIMEO,
364
            &timeout, sizeof timeout));
365
#else
366
        (void)s;
367
        (void)seconds;
368
#endif
369 74479
}
370
371
/*--------------------------------------------------------------------
372
 */
373
374
struct vto_priv {
375
        unsigned                magic;
376
#define VTO_PRIV_MAGIC          0xca70b0e7
377
        int                     latest_errno;
378
        int                     fd;
379
        double                  timeout;
380
};
381
382
static int v_matchproto_(vss_resolved_f)
383 76398
vtcp_open_callback(void *priv, const struct suckaddr *sa)
384
{
385
        struct vto_priv *vto;
386
        int fd;
387
388 76398
        CAST_OBJ_NOTNULL(vto, priv, VTO_PRIV_MAGIC);
389
390 76397
        errno = 0;
391 76397
        fd = VTCP_connect(sa, (int)floor(vto->timeout * 1e3));
392 76397
        if (fd >= 0) {
393 76359
                vto->fd = fd;
394 76359
                vto->latest_errno = 0;
395 76359
                return (1);
396
        }
397 40
        vto->latest_errno = errno;
398 40
        return (0);
399 76399
}
400
401
int
402 76395
VTCP_open(const char *addr, const char *def_port, vtim_dur timeout,
403
    const char **errp)
404
{
405
        struct vto_priv vto[1];
406
407 76395
        AN(errp);
408 76397
        assert(timeout >= 0);
409 76396
        INIT_OBJ(vto, VTO_PRIV_MAGIC);
410 76396
        vto->fd = -1;
411 76396
        vto->timeout = timeout;
412
413 76396
        if (VSS_resolver(addr, def_port, vtcp_open_callback, vto, errp) < 0)
414 0
                return (-1);
415 76399
        if (vto->fd < 0)
416 40
                *errp = strerror(vto->latest_errno);
417 76399
        return (vto->fd);
418 76399
}
419
420
/*--------------------------------------------------------------------
421
 * Given a struct suckaddr, open a socket of the appropriate type, and bind
422
 * it to the requested address.
423
 *
424
 * If the address is an IPv6 address, the IPV6_V6ONLY option is set to
425
 * avoid conflicts between INADDR_ANY and IN6ADDR_ANY.
426
 */
427
428
int
429 270720
VTCP_bind(const struct suckaddr *sa, const char **errp)
430
{
431
        int sd, val, e;
432
        socklen_t sl;
433
        const struct sockaddr *so;
434
        int proto;
435
436 270720
        if (errp != NULL)
437 171240
                *errp = NULL;
438
439 270720
        proto = VSA_Get_Proto(sa);
440 270720
        sd = socket(proto, SOCK_STREAM, 0);
441 270720
        if (sd < 0) {
442 0
                if (errp != NULL)
443 0
                        *errp = "socket(2)";
444 0
                return (-1);
445
        }
446 270720
        val = 1;
447 270720
        if (setsockopt(sd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof val) != 0) {
448 0
                if (errp != NULL)
449 0
                        *errp = "setsockopt(SO_REUSEADDR, 1)";
450 0
                e = errno;
451 0
                closefd(&sd);
452 0
                errno = e;
453 0
                return (-1);
454
        }
455
#ifdef IPV6_V6ONLY
456
        /* forcibly use separate sockets for IPv4 and IPv6 */
457 270720
        val = 1;
458 270720
        if (proto == AF_INET6 &&
459 69440
            setsockopt(sd, IPPROTO_IPV6, IPV6_V6ONLY, &val, sizeof val) != 0) {
460 0
                if (errp != NULL)
461 0
                        *errp = "setsockopt(IPV6_V6ONLY, 1)";
462 0
                e = errno;
463 0
                closefd(&sd);
464 0
                errno = e;
465 0
                return (-1);
466
        }
467
#endif
468 270720
        so = VSA_Get_Sockaddr(sa, &sl);
469 270720
        if (bind(sd, so, sl) != 0) {
470 40
                if (errp != NULL)
471 0
                        *errp = "bind(2)";
472 40
                e = errno;
473 40
                closefd(&sd);
474 40
                errno = e;
475 40
                return (-1);
476
        }
477 270680
        return (sd);
478 270720
}
479
480
/*--------------------------------------------------------------------
481
 * Given a struct suckaddr, open a socket of the appropriate type, bind it
482
 * to the requested address, and start listening.
483
 */
484
485
int
486 171240
VTCP_listen(const struct suckaddr *sa, int depth, const char **errp)
487
{
488
        int sd;
489
        int e;
490
491 171240
        if (errp != NULL)
492 171240
                *errp = NULL;
493 171240
        sd = VTCP_bind(sa, errp);
494 171240
        if (sd >= 0)  {
495 171240
                if (listen(sd, depth) != 0) {
496 0
                        e = errno;
497 0
                        closefd(&sd);
498 0
                        errno = e;
499 0
                        if (errp != NULL)
500 0
                                *errp = "listen(2)";
501 0
                        return (-1);
502
                }
503 171240
        }
504 171240
        return (sd);
505 171240
}
506
507
/*--------------------------------------------------------------------*/
508
509
struct helper {
510
        int             depth;
511
        const char      **errp;
512
};
513
514
static int v_matchproto_(vss_resolved_f)
515 105720
vtcp_lo_cb(void *priv, const struct suckaddr *sa)
516
{
517
        int sock;
518 105720
        struct helper *hp = priv;
519
520 105720
        sock = VTCP_listen(sa, hp->depth, hp->errp);
521 105720
        if (sock >= 0) {
522 105720
                *hp->errp = NULL;
523 105720
                return (sock);
524
        }
525 0
        AN(*hp->errp);
526 0
        return (0);
527 105720
}
528
529
int
530 105720
VTCP_listen_on(const char *addr, const char *def_port, int depth,
531
    const char **errp)
532
{
533
        struct helper h;
534
        int sock;
535
536 105720
        AN(errp);
537 105720
        h.depth = depth;
538 105720
        h.errp = errp;
539
540 105720
        sock = VSS_resolver(addr, def_port, vtcp_lo_cb, &h, errp);
541 105720
        if (*errp != NULL)
542 0
                return (-1);
543 105720
        return (sock);
544 105720
}
545
546
/*--------------------------------------------------------------------
547
 * Set or reset SO_LINGER flag
548
 */
549
550
int
551 0
VTCP_linger(int sock, int linger)
552
{
553
        struct linger lin;
554
        int i;
555
556 0
        memset(&lin, 0, sizeof lin);
557 0
        lin.l_onoff = linger;
558 0
        i = setsockopt(sock, SOL_SOCKET, SO_LINGER, &lin, sizeof lin);
559 0
        VTCP_Assert(i);
560 0
        return (i);
561
}
562
563
/*--------------------------------------------------------------------
564
 * Do a poll to check for remote HUP
565
 */
566
567
int
568 0
VTCP_check_hup(int sock)
569
{
570
        struct pollfd pfd;
571
572 0
        assert(sock > 0);
573 0
        pfd.fd = sock;
574 0
        pfd.events = POLLOUT;
575 0
        pfd.revents = 0;
576
577 0
        if (poll(&pfd, 1, 0) == 1 && pfd.revents & POLLHUP)
578 0
                return (1);
579 0
        return (0);
580 0
}
581
582
/*--------------------------------------------------------------------
583
 * Check if a TCP syscall return value is fatal
584
 */
585
586
int
587 1837166
VTCP_Check(ssize_t a)
588
{
589 1837166
        if (a == 0)
590 1599622
                return (1);
591 237538
        if (a > 0)
592 236946
                return (1);
593 593
        if (errno == ECONNRESET || errno == ENOTCONN || errno == EPIPE)
594 473
                return (1);
595
        /* Accept EAGAIN (and EWOULDBLOCK in case they are not the same)
596
         * as errno values. Even though our sockets are all non-blocking,
597
         * when a SO_{SND|RCV}TIMEO expires, read() or write() on the
598
         * socket will return (-1) and errno set to EAGAIN. (This is not
599
         * documented in the read(2) and write(2) manpages, but is
600
         * described in the socket(7) manpage.) */
601 120
        if (errno == EAGAIN || errno == EWOULDBLOCK)
602 120
                return (1);
603
        /* tcp(7): The other end didn't acknowledge retransmitted data after
604
         * some time. */
605 0
        if (errno == ETIMEDOUT)
606 0
                return (1);
607
        /* #3539 various errnos documented on linux as POSIX.1 */
608 0
        if (errno == ENETDOWN || errno == ENETUNREACH || errno == ENETRESET ||
609 0
            errno == ECONNABORTED || /* ECONNRESET see above */
610 0
            errno == EHOSTUNREACH || errno == EHOSTDOWN) {
611 0
                return (1);
612
        }
613
614
#if (defined (__SVR4) && defined (__sun))
615
        if (errno == ECONNREFUSED)      // in r02702.vtc
616
                return (1);
617
        if (errno == EPROTO)
618
                return (1);
619
#endif
620
#if (defined (__SVR4) && defined (__sun)) ||            \
621
    defined (__NetBSD__) ||                             \
622
    defined (__APPLE__)
623
        /*
624
         * Solaris and MacOS returns EINVAL if the other end unexpectedly reset
625
         * the connection.
626
         *
627
         * On NetBSD it is documented behaviour.
628
         */
629
        if (errno == EINVAL)
630
                return (1);
631
#endif
632
#if (defined(__SANITIZER) || __has_feature(address_sanitizer))
633
        if (errno == EINTR)
634
                return (1);
635
#endif
636 0
        return (0);
637 1837148
}
638
639
/*--------------------------------------------------------------------
640
 *
641
 */
642
643
int
644 285166
VTCP_read(int fd, void *ptr, size_t len, vtim_dur tmo)
645
{
646
        struct pollfd pfd[1];
647
        int i, j;
648
649 285166
        if (tmo > 0.0) {
650 283724
                pfd[0].fd = fd;
651 283724
                pfd[0].events = POLLIN;
652 283724
                pfd[0].revents = 0;
653 283724
                j = (int)floor(tmo * 1e3);
654 283724
                if (j == 0)
655 0
                        j++;
656 283728
                j = poll(pfd, 1, j);
657 283728
                if (j == 0)
658 6899
                        return (-2);
659 276845
        }
660 278283
        i = read(fd, ptr, len);
661 278283
        VTCP_Assert(i);
662 278273
        return (i < 0 ? -1 : i);
663 285171
}