varnish-cache/lib/libvarnish/vtcp.c
0
/*-
1
 * Copyright (c) 2006 Verdens Gang AS
2
 * Copyright (c) 2006-2015 Varnish Software AS
3
 * All rights reserved.
4
 *
5
 * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
6
 *
7
 * SPDX-License-Identifier: BSD-2-Clause
8
 *
9
 * Redistribution and use in source and binary forms, with or without
10
 * modification, are permitted provided that the following conditions
11
 * are met:
12
 * 1. Redistributions of source code must retain the above copyright
13
 *    notice, this list of conditions and the following disclaimer.
14
 * 2. Redistributions in binary form must reproduce the above copyright
15
 *    notice, this list of conditions and the following disclaimer in the
16
 *    documentation and/or other materials provided with the distribution.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
22
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28
 * SUCH DAMAGE.
29
 */
30
31
#include "config.h"
32
33
#include <sys/types.h>
34
#include <sys/socket.h>
35
#include <sys/time.h>           // for NetBSD
36
#include <sys/ioctl.h>
37
#ifdef HAVE_SYS_FILIO_H
38
#  include <sys/filio.h>
39
#endif
40
41
#include <netinet/in.h>
42
#include <netinet/tcp.h>
43
44
#include <math.h>
45
#include <netdb.h>
46
#include <poll.h>
47
#include <stdio.h>
48
#include <string.h>
49
#include <unistd.h>
50
#include <stdlib.h>
51
52
#include "vdef.h"
53
#include "miniobj.h"
54
#include "vas.h"
55
#include "vsa.h"
56
#include "vss.h"
57
#include "vtcp.h"
58
#include "vtim.h"
59
60
/*--------------------------------------------------------------------*/
61
static void
62 655269
vtcp_sa_to_ascii(const void *sa, socklen_t l, char *abuf, unsigned alen,
63
    char *pbuf, unsigned plen)
64
{
65
        int i;
66
67 655269
        assert(abuf == NULL || alen > 0);
68 655253
        assert(pbuf == NULL || plen > 0);
69 655253
        i = getnameinfo(sa, l, abuf, alen, pbuf, plen,
70
           NI_NUMERICHOST | NI_NUMERICSERV);
71 655253
        if (i) {
72
                /*
73
                 * XXX this printf is shitty, but we may not have space
74
                 * for the gai_strerror in the bufffer :-(
75
                 */
76 0
                fprintf(stderr, "getnameinfo = %d %s\n", i, gai_strerror(i));
77 0
                if (i == EAI_SYSTEM)
78 0
                        fprintf(stderr, "errno = %d %s\n", errno, VAS_errtxt(errno));
79 0
                if (abuf != NULL)
80 0
                        (void)snprintf(abuf, alen, "Conversion");
81 0
                if (pbuf != NULL)
82 0
                        (void)snprintf(pbuf, plen, "Failed");
83 0
                return;
84
        }
85
        /* XXX dirty hack for v4-to-v6 mapped addresses */
86 655253
        if (abuf != NULL && strncmp(abuf, "::ffff:", 7) == 0) {
87 0
                for (i = 0; abuf[i + 7]; ++i)
88 0
                        abuf[i] = abuf[i + 7];
89 0
                abuf[i] = '\0';
90 0
        }
91 655253
}
92
93
/*--------------------------------------------------------------------*/
94
95
void
96 655249
VTCP_name(const struct suckaddr *addr, char *abuf, unsigned alen,
97
    char *pbuf, unsigned plen)
98
{
99
        const struct sockaddr *sa;
100
        socklen_t sl;
101
102 655249
        sa = VSA_Get_Sockaddr(addr, &sl);
103 655249
        AN(sa);
104 655249
        vtcp_sa_to_ascii(sa, sl, abuf, alen, pbuf, plen);
105 655249
}
106
107
/*--------------------------------------------------------------------*/
108
109
struct suckaddr *
110 23200
VTCP_my_suckaddr(int sock)
111
{
112
        struct suckaddr *r;
113
114 23200
        r = malloc(vsa_suckaddr_len);
115 23200
        AN(VSA_getsockname(sock, r, vsa_suckaddr_len));
116 23200
        return (r);
117
}
118
119
/*--------------------------------------------------------------------*/
120
121
void
122 276194
VTCP_myname(int sock, char *abuf, unsigned alen, char *pbuf, unsigned plen)
123
{
124 276194
        char buf[vsa_suckaddr_len];
125
126 552388
        VTCP_name(VSA_getsockname(sock, buf, sizeof buf),
127 276194
                  abuf, alen, pbuf, plen);
128 276194
}
129
130
/*--------------------------------------------------------------------*/
131
132
void
133 188335
VTCP_hisname(int sock, char *abuf, unsigned alen, char *pbuf, unsigned plen)
134
{
135 188335
        char buf[vsa_suckaddr_len];
136
        const struct suckaddr *sua;
137
138 188335
        sua = VSA_getpeername(sock, buf, sizeof buf);
139 188335
        if (sua != NULL)
140 188335
                VTCP_name(sua, abuf, alen, pbuf, plen);
141
        else {
142 0
                (void)snprintf(abuf, alen, "<none>");
143 0
                (void)snprintf(pbuf, plen, "<none>");
144
        }
145 188335
}
146
147
/*--------------------------------------------------------------------*/
148
149
#ifdef HAVE_ACCEPT_FILTERS
150
151
int
152 22075
VTCP_filter_http(int sock)
153
{
154
        int retval;
155
        struct accept_filter_arg afa;
156
157 22075
        memset(&afa, 0, sizeof afa);
158 22075
        bprintf(afa.af_name, "%s", "httpready");
159 22075
        retval = setsockopt(sock, SOL_SOCKET, SO_ACCEPTFILTER,
160
            &afa, sizeof afa);
161 22075
        return (retval);
162
}
163
164
#elif defined(__linux)
165
166
int
167
VTCP_filter_http(int sock)
168
{
169
        int retval;
170
        int defer = 1;
171
172
        retval = setsockopt(sock, SOL_TCP, TCP_DEFER_ACCEPT,
173
            &defer, sizeof defer);
174
        return (retval);
175
}
176
177
#else
178
179
int
180
VTCP_filter_http(int sock)
181
{
182
        errno = EOPNOTSUPP;
183
        (void)sock;
184
        return (-1);
185
}
186
187
#endif
188
189
/*--------------------------------------------------------------------*/
190
191
192
int
193 0
VTCP_fastopen(int sock, int depth)
194
{
195
#ifdef HAVE_TCP_FASTOPEN
196
#  ifndef SOL_TCP
197
#    define SOL_TCP IPPROTO_TCP
198
#  endif
199 0
        return (setsockopt(sock, SOL_TCP, TCP_FASTOPEN, &depth, sizeof depth));
200
#else
201
        errno = EOPNOTSUPP;
202
        (void)sock;
203
        (void)depth;
204
        return (-1);
205
#endif
206
}
207
208
209
/*--------------------------------------------------------------------
210
 * Functions for controlling NONBLOCK mode.
211
 *
212
 * We use FIONBIO because it is cheaper than fcntl(2), which requires
213
 * us to do two syscalls, one to get and one to set, the latter of
214
 * which mucks about a bit before it ends up calling ioctl(FIONBIO),
215
 * at least on FreeBSD.
216
 * On Solaris ioctl(FIONBIO) can fail with connection related errnos,
217
 * but as long as that is how they fail, we're fine.
218
 */
219
220
void
221 333819
VTCP_blocking(int sock)
222
{
223
        int i, j;
224
225 333819
        i = 0;
226 333819
        j = ioctl(sock, FIONBIO, &i);
227 333819
        VTCP_Assert(j);
228 333819
}
229
230
void
231 119089
VTCP_nonblocking(int sock)
232
{
233
        int i, j;
234
235 119089
        i = 1;
236 119089
        j = ioctl(sock, FIONBIO, &i);
237 119089
        VTCP_Assert(j);
238 119089
}
239
240
/*--------------------------------------------------------------------
241
 * On TCP a connect(2) can block for a looong time, and we don't want that.
242
 * Unfortunately, the SocketWizards back in those days were happy to wait
243
 * any amount of time for a connection, so the connect(2) syscall does not
244
 * take an argument for patience.
245
 *
246
 * There is a little used work-around, and we employ it at our peril.
247
 *
248
 */
249
250
int
251 104429
VTCP_connected(int s)
252
{
253
        int k;
254
        socklen_t l;
255
256
        /* Find out if we got a connection */
257 104429
        l = sizeof k;
258 104429
        AZ(getsockopt(s, SOL_SOCKET, SO_ERROR, &k, &l));
259
260
        /* An error means no connection established */
261 104429
        errno = k;
262 104429
        if (k) {
263 593
                closefd(&s);
264 593
                return (-1);
265
        }
266
267 103836
        VTCP_blocking(s);
268 103836
        return (s);
269 104429
}
270
271
int
272 108398
VTCP_connect(const struct suckaddr *name, int msec)
273
{
274
        int s, i;
275
        struct pollfd fds[1];
276
        const struct sockaddr *sa;
277
        socklen_t sl;
278
        int val;
279
280 108398
        if (name == NULL)
281 669
                return (-1);
282
        /* Attempt the connect */
283 107729
        AN(VSA_Sane(name));
284 107729
        sa = VSA_Get_Sockaddr(name, &sl);
285 107729
        AN(sa);
286 107729
        AN(sl);
287
288 107729
        s = socket(sa->sa_family, SOCK_STREAM, 0);
289 107729
        if (s < 0)
290 0
                return (s);
291
292
        /* Set the socket non-blocking */
293 107729
        if (msec != 0)
294 104500
                VTCP_nonblocking(s);
295
296 107729
        val = 1;
297 107729
        AZ(setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &val, sizeof val));
298
299 107729
        i = connect(s, sa, sl);
300 107729
        if (i == 0)
301 3281
                return (s);
302 104448
        if (errno != EINPROGRESS) {
303 0
                closefd(&s);
304 0
                return (-1);
305
        }
306
307 104448
        if (msec < 0) {
308
                /*
309
                 * Caller is responsible for waiting and
310
                 * calling VTCP_connected
311
                 */
312 22618
                return (s);
313
        }
314
315 81830
        assert(msec > 0);
316
        /* Exercise our patience, polling for write */
317 81830
        fds[0].fd = s;
318 81830
        fds[0].events = POLLWRNORM;
319 81830
        fds[0].revents = 0;
320 81830
        i = poll(fds, 1, msec);
321
322 81830
        if (i == 0) {
323
                /* Timeout, close and give up */
324 25
                closefd(&s);
325 25
                errno = ETIMEDOUT;
326 25
                return (-1);
327
        }
328
329 81805
        return (VTCP_connected(s));
330 108398
}
331
332
/*--------------------------------------------------------------------
333
 * When closing a TCP connection, a couple of errno's are legit, we
334
 * can't be held responsible for the other end wanting to talk to us.
335
 */
336
337
void
338 153773
VTCP_close(int *s)
339
{
340
        int i;
341
342 153773
        i = close(*s);
343
344 153773
        VTCP_Assert(i);
345 153773
        *s = -1;
346 153773
}
347
348
void
349 49323
VTCP_set_read_timeout(int s, vtim_dur seconds)
350
{
351 49323
        struct timeval timeout = VTIM_timeval(seconds);
352
353
        /*
354
         * Solaris bug (present at least in snv_151 and older): If this fails
355
         * with EINVAL, the socket is half-closed (SS_CANTSENDMORE) and the
356
         * timeout does not get set. Needs to be fixed in Solaris, there is
357
         * nothing we can do about this.
358
         */
359 49323
        VTCP_Assert(setsockopt(s, SOL_SOCKET, SO_RCVTIMEO,
360
            &timeout, sizeof timeout));
361 49323
}
362
363
/*--------------------------------------------------------------------
364
 */
365
366
struct vto_priv {
367
        unsigned                magic;
368
#define VTO_PRIV_MAGIC          0xca70b0e7
369
        int                     latest_errno;
370
        int                     fd;
371
        double                  timeout;
372
};
373
374
static int v_matchproto_(vss_resolved_f)
375 50650
vtcp_open_callback(void *priv, const struct suckaddr *sa)
376
{
377
        struct vto_priv *vto;
378
        int fd;
379
380 50650
        CAST_OBJ_NOTNULL(vto, priv, VTO_PRIV_MAGIC);
381
382 50650
        errno = 0;
383 50650
        fd = VTCP_connect(sa, (int)floor(vto->timeout * 1e3));
384 50650
        if (fd >= 0) {
385 50625
                vto->fd = fd;
386 50625
                vto->latest_errno = 0;
387 50625
                return (1);
388
        }
389 25
        vto->latest_errno = errno;
390 25
        return (0);
391 50650
}
392
393
int
394 50649
VTCP_open(const char *addr, const char *def_port, vtim_dur timeout,
395
    const char **errp)
396
{
397
        struct vto_priv vto[1];
398
399 50649
        AN(errp);
400 50649
        assert(timeout >= 0);
401 50649
        INIT_OBJ(vto, VTO_PRIV_MAGIC);
402 50649
        vto->fd = -1;
403 50649
        vto->timeout = timeout;
404
405 50649
        if (VSS_resolver(addr, def_port, vtcp_open_callback, vto, errp) < 0)
406 0
                return (-1);
407 50649
        if (vto->fd < 0)
408 25
                *errp = strerror(vto->latest_errno);
409 50649
        return (vto->fd);
410 50649
}
411
412
/*--------------------------------------------------------------------
413
 * Given a struct suckaddr, open a socket of the appropriate type, and bind
414
 * it to the requested address.
415
 *
416
 * If the address is an IPv6 address, the IPV6_V6ONLY option is set to
417
 * avoid conflicts between INADDR_ANY and IN6ADDR_ANY.
418
 */
419
420
int
421 189450
VTCP_bind(const struct suckaddr *sa, const char **errp)
422
{
423
        int sd, val, e;
424
        socklen_t sl;
425
        const struct sockaddr *so;
426
        int proto;
427
428 189450
        if (errp != NULL)
429 119950
                *errp = NULL;
430
431 189450
        proto = VSA_Get_Proto(sa);
432 189450
        sd = socket(proto, SOCK_STREAM, 0);
433 189450
        if (sd < 0) {
434 0
                if (errp != NULL)
435 0
                        *errp = "socket(2)";
436 0
                return (-1);
437
        }
438 189450
        val = 1;
439 189450
        if (setsockopt(sd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof val) != 0) {
440 0
                if (errp != NULL)
441 0
                        *errp = "setsockopt(SO_REUSEADDR, 1)";
442 0
                e = errno;
443 0
                closefd(&sd);
444 0
                errno = e;
445 0
                return (-1);
446
        }
447
#ifdef IPV6_V6ONLY
448
        /* forcibly use separate sockets for IPv4 and IPv6 */
449 189450
        val = 1;
450 189450
        if (proto == AF_INET6 &&
451 48675
            setsockopt(sd, IPPROTO_IPV6, IPV6_V6ONLY, &val, sizeof val) != 0) {
452 0
                if (errp != NULL)
453 0
                        *errp = "setsockopt(IPV6_V6ONLY, 1)";
454 0
                e = errno;
455 0
                closefd(&sd);
456 0
                errno = e;
457 0
                return (-1);
458
        }
459
#endif
460 189450
        so = VSA_Get_Sockaddr(sa, &sl);
461 189450
        if (bind(sd, so, sl) != 0) {
462 25
                if (errp != NULL)
463 0
                        *errp = "bind(2)";
464 25
                e = errno;
465 25
                closefd(&sd);
466 25
                errno = e;
467 25
                return (-1);
468
        }
469 189425
        return (sd);
470 189450
}
471
472
/*--------------------------------------------------------------------
473
 * Given a struct suckaddr, open a socket of the appropriate type, bind it
474
 * to the requested address, and start listening.
475
 */
476
477
int
478 119950
VTCP_listen(const struct suckaddr *sa, int depth, const char **errp)
479
{
480
        int sd;
481
        int e;
482
483 119950
        if (errp != NULL)
484 119950
                *errp = NULL;
485 119950
        sd = VTCP_bind(sa, errp);
486 119950
        if (sd >= 0)  {
487 119950
                if (listen(sd, depth) != 0) {
488 0
                        e = errno;
489 0
                        closefd(&sd);
490 0
                        errno = e;
491 0
                        if (errp != NULL)
492 0
                                *errp = "listen(2)";
493 0
                        return (-1);
494
                }
495 119950
        }
496 119950
        return (sd);
497 119950
}
498
499
/*--------------------------------------------------------------------*/
500
501
struct helper {
502
        int             depth;
503
        const char      **errp;
504
};
505
506
static int v_matchproto_(vss_resolved_f)
507 74100
vtcp_lo_cb(void *priv, const struct suckaddr *sa)
508
{
509
        int sock;
510 74100
        struct helper *hp = priv;
511
512 74100
        sock = VTCP_listen(sa, hp->depth, hp->errp);
513 74100
        if (sock >= 0) {
514 74100
                *hp->errp = NULL;
515 74100
                return (sock);
516
        }
517 0
        AN(*hp->errp);
518 0
        return (0);
519 74100
}
520
521
int
522 74100
VTCP_listen_on(const char *addr, const char *def_port, int depth,
523
    const char **errp)
524
{
525
        struct helper h;
526
        int sock;
527
528 74100
        AN(errp);
529 74100
        h.depth = depth;
530 74100
        h.errp = errp;
531
532 74100
        sock = VSS_resolver(addr, def_port, vtcp_lo_cb, &h, errp);
533 74100
        if (*errp != NULL)
534 0
                return (-1);
535 74100
        return (sock);
536 74100
}
537
538
/*--------------------------------------------------------------------
539
 * Set or reset SO_LINGER flag
540
 */
541
542
int
543 0
VTCP_linger(int sock, int linger)
544
{
545
        struct linger lin;
546
        int i;
547
548 0
        memset(&lin, 0, sizeof lin);
549 0
        lin.l_onoff = linger;
550 0
        i = setsockopt(sock, SOL_SOCKET, SO_LINGER, &lin, sizeof lin);
551 0
        VTCP_Assert(i);
552 0
        return (i);
553
}
554
555
/*--------------------------------------------------------------------
556
 * Do a poll to check for remote HUP
557
 */
558
559
int
560 0
VTCP_check_hup(int sock)
561
{
562
        struct pollfd pfd;
563
564 0
        assert(sock > 0);
565 0
        pfd.fd = sock;
566 0
        pfd.events = POLLOUT;
567 0
        pfd.revents = 0;
568
569 0
        if (poll(&pfd, 1, 0) == 1 && pfd.revents & POLLHUP)
570 0
                return (1);
571 0
        return (0);
572 0
}
573
574
/*--------------------------------------------------------------------
575
 * Check if a TCP syscall return value is fatal
576
 */
577
578
int
579 1174024
VTCP_Check(ssize_t a)
580
{
581 1174024
        if (a == 0)
582 1007001
                return (1);
583 167023
        if (a > 0)
584 166337
                return (1);
585 686
        if (errno == ECONNRESET || errno == ENOTCONN || errno == EPIPE)
586 461
                return (1);
587
        /* Accept EAGAIN (and EWOULDBLOCK in case they are not the same)
588
         * as errno values. Even though our sockets are all non-blocking,
589
         * when a SO_{SND|RCV}TIMEO expires, read() or write() on the
590
         * socket will return (-1) and errno set to EAGAIN. (This is not
591
         * documented in the read(2) and write(2) manpages, but is
592
         * described in the socket(7) manpage.) */
593 225
        if (errno == EAGAIN || errno == EWOULDBLOCK)
594 225
                return (1);
595
        /* tcp(7): The other end didn't acknowledge retransmitted data after
596
         * some time. */
597 0
        if (errno == ETIMEDOUT)
598 0
                return (1);
599
        /* #3539 various errnos documented on linux as POSIX.1 */
600 0
        if (errno == ENETDOWN || errno == ENETUNREACH || errno == ENETRESET ||
601 0
            errno == ECONNABORTED || /* ECONNRESET see above */
602 0
            errno == EHOSTUNREACH || errno == EHOSTDOWN) {
603 0
                return (1);
604
        }
605
606
#if (defined (__SVR4) && defined (__sun))
607
        if (errno == ECONNREFUSED)      // in r02702.vtc
608
                return (1);
609
        if (errno == EPROTO)
610
                return (1);
611
#endif
612
#if (defined (__SVR4) && defined (__sun)) ||            \
613
    defined (__NetBSD__) ||                             \
614
    defined (__APPLE__)
615
        /*
616
         * Solaris and MacOS returns EINVAL if the other end unexpectedly reset
617
         * the connection.
618
         *
619
         * On NetBSD it is documented behaviour.
620
         */
621
        if (errno == EINVAL)
622
                return (1);
623
#endif
624
#if defined(ENABLE_SANITIZER)
625
        if (errno == EINTR)
626
                return (1);
627
#endif
628 0
        return (0);
629 1174024
}
630
631
/*--------------------------------------------------------------------
632
 *
633
 */
634
635
int
636 196740
VTCP_read(int fd, void *ptr, size_t len, vtim_dur tmo)
637
{
638
        struct pollfd pfd[1];
639
        int i, j;
640
641 196740
        if (tmo > 0.0) {
642 195386
                pfd[0].fd = fd;
643 195386
                pfd[0].events = POLLIN;
644 195386
                pfd[0].revents = 0;
645 195386
                j = poll(pfd, 1, VTIM_poll_tmo(tmo));
646 195386
                if (j == 0)
647 4521
                        return (-2);
648 190865
        }
649 192219
        i = read(fd, ptr, len);
650 192219
        VTCP_Assert(i);
651 192219
        return (i < 0 ? -1 : i);
652 196740
}