varnish-cache/bin/varnishd/acceptor/cache_acceptor_uds.c
0
/*-
1
 * Copyright (c) 2006 Verdens Gang AS
2
 * Copyright (c) 2006-2015 Varnish Software AS
3
 * All rights reserved.
4
 *
5
 * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
6
 *
7
 * SPDX-License-Identifier: BSD-2-Clause
8
 *
9
 * Redistribution and use in source and binary forms, with or without
10
 * modification, are permitted provided that the following conditions
11
 * are met:
12
 * 1. Redistributions of source code must retain the above copyright
13
 *    notice, this list of conditions and the following disclaimer.
14
 * 2. Redistributions in binary form must reproduce the above copyright
15
 *    notice, this list of conditions and the following disclaimer in the
16
 *    documentation and/or other materials provided with the distribution.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
22
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28
 * SUCH DAMAGE.
29
 *
30
 */
31
32
#include "config.h"
33
34
#include <stdlib.h>
35
#include <netinet/in.h>
36
#include <netinet/tcp.h>
37
38
#include "cache/cache_varnishd.h"
39
40
#include "acceptor/cache_acceptor.h"
41
#include "acceptor/acceptor_priv.h"
42
#include "acceptor/acceptor_uds.h"
43
44
#include "cache/cache_transport.h"
45
#include "cache/cache_pool.h"
46
#include "common/heritage.h"
47
48
#include "vcli_serve.h"
49
#include "vsa.h"
50
#include "vtcp.h"
51
#include "vtim.h"
52
53
/*--------------------------------------------------------------------
54
 * We want to get out of any kind of trouble-hit TCP connections as fast
55
 * as absolutely possible, so we set them LINGER disabled, so that even if
56
 * there are outstanding write data on the socket, a close(2) will return
57
 * immediately.
58
 */
59
static const struct linger disable_so_linger = {
60
        .l_onoff        =       0,
61
};
62
63
/*
64
 * We turn on keepalives by default to assist in detecting clients that have
65
 * hung up on connections returning from waitinglists
66
 */
67
static const unsigned enable_so_keepalive = 1;
68
69
/*--------------------------------------------------------------------
70
 * UDS options we want to control
71
 */
72
73
static struct sock_opt sock_opts[] = {
74
        /* Note: Setting the mod counter to something not-zero is needed
75
         * to force the setsockopt() calls on startup */
76
77
        SOCK_OPT(SOL_SOCKET, SO_LINGER, struct linger)
78
        SOCK_OPT(SOL_SOCKET, SO_KEEPALIVE, int)
79
        SOCK_OPT(SOL_SOCKET, SO_SNDTIMEO, struct timeval)
80
        SOCK_OPT(SOL_SOCKET, SO_RCVTIMEO, struct timeval)
81
82
#undef SOCK_OPT
83
};
84
85
static const int n_sock_opts = sizeof sock_opts / sizeof sock_opts[0];
86
87
/*--------------------------------------------------------------------
88
 * Some kernels have bugs/limitations with respect to which options are
89
 * inherited from the accept/listen socket, so we have to keep track of
90
 * which, if any, sockopts we have to set on the accepted socket.
91
 */
92
93
static int
94 70123
vca_uds_sockopt_init(void)
95
{
96
        struct sock_opt *so;
97
        union sock_arg tmp;
98 70123
        int n, chg = 0;
99
        size_t sz;
100
101 70123
        memset(&tmp, 0, sizeof tmp);
102
103 350615
        for (n = 0; n < n_sock_opts; n++) {
104 280492
                so = &sock_opts[n];
105
106
#define SET_VAL(nm, so, fld, val)                                       \
107
        do {                                                            \
108
                if (!strcmp(#nm, so->strname)) {                        \
109
                        assert(so->sz == sizeof so->arg->fld);          \
110
                        so->arg->fld = (val);                           \
111
                }                                                       \
112
        } while (0)
113
114
#define NEW_VAL(nm, so, fld, val)                                       \
115
        do {                                                            \
116
                if (!strcmp(#nm, so->strname)) {                        \
117
                        sz = sizeof tmp.fld;                            \
118
                        assert(so->sz == sz);                           \
119
                        tmp.fld = (val);                                \
120
                        if (memcmp(&so->arg->fld, &(tmp.fld), sz)) {    \
121
                                memcpy(&so->arg->fld, &(tmp.fld), sz);  \
122
                                so->mod++;                              \
123
                                chg = 1;                                \
124
                        }                                               \
125
                }                                                       \
126
        } while (0)
127
128 280492
                SET_VAL(SO_LINGER, so, lg, disable_so_linger);
129 280492
                SET_VAL(SO_KEEPALIVE, so, i, enable_so_keepalive);
130 280492
                NEW_VAL(SO_SNDTIMEO, so, tv,
131
                    VTIM_timeval_sock(cache_param->idle_send_timeout));
132 280492
                NEW_VAL(SO_RCVTIMEO, so, tv,
133
                    VTIM_timeval_sock(cache_param->timeout_idle));
134 280492
        }
135
136 70123
        return (chg);
137
}
138
139
static void
140 943
vca_uds_sockopt_test(const struct listen_sock *ls, const struct sess *sp)
141
{
142
        struct conn_heritage *ch;
143
        struct sock_opt *so;
144
        union sock_arg tmp;
145
        socklen_t l;
146
        int i, n;
147
148 943
        CHECK_OBJ_NOTNULL(ls, LISTEN_SOCK_MAGIC);
149 943
        CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
150
151 4714
        for (n = 0; n < n_sock_opts; n++) {
152 3771
                so = &sock_opts[n];
153 3771
                ch = &ls->conn_heritage[n];
154
155 3771
                if (ch->sess_set) {
156 32
                        VSL(SLT_Debug, sp->vxid,
157
                            "sockopt: Not testing nonhereditary %s for %s=%s",
158 16
                            so->strname, ls->name, ls->endpoint);
159 16
                        continue;
160
                }
161
162 3755
                memset(&tmp, 0, sizeof tmp);
163 3755
                l = so->sz;
164 3755
                i = getsockopt(sp->fd, so->level, so->optname, &tmp, &l);
165
166 3755
                if (i == 0 && memcmp(&tmp, so->arg, so->sz)) {
167 1854
                        VSL(SLT_Debug, sp->vxid,
168
                            "sockopt: Test confirmed %s non heredity for %s=%s",
169 927
                            so->strname, ls->name, ls->endpoint);
170 927
                        ch->sess_set = 1;
171 927
                }
172
173 3755
                if (i && errno != ENOPROTOOPT)
174 0
                        VTCP_Assert(i);
175 3755
        }
176 943
}
177
178
static void
179 10614
vca_uds_sockopt_set(const struct listen_sock *ls, const struct sess *sp)
180
{
181
        struct conn_heritage *ch;
182
        struct sock_opt *so;
183
        vxid_t vxid;
184
        int n, sock;
185
186 10614
        CHECK_OBJ_NOTNULL(ls, LISTEN_SOCK_MAGIC);
187
188 10614
        if (sp != NULL) {
189 9574
                CHECK_OBJ(sp, SESS_MAGIC);
190 9574
                sock = sp->fd;
191 9574
                vxid = sp->vxid;
192 9574
        } else {
193 1040
                sock = ls->sock;
194 1040
                vxid = NO_VXID;
195
        }
196
197 53069
        for (n = 0; n < n_sock_opts; n++) {
198 42455
                so = &sock_opts[n];
199 42455
                ch = &ls->conn_heritage[n];
200
201 42455
                if (sp == NULL && ch->listen_mod == so->mod) {
202 120
                        VSL(SLT_Debug, vxid,
203
                            "sockopt: Not setting unmodified %s for %s=%s",
204 120
                            so->strname, ls->name, ls->endpoint);
205 120
                        continue;
206
                }
207
208 42335
                if  (sp != NULL && !ch->sess_set) {
209 57442
                        VSL(SLT_Debug, sp->vxid,
210
                            "sockopt: %s may be inherited for %s=%s",
211 28721
                            so->strname, ls->name, ls->endpoint);
212 28721
                        continue;
213
                }
214
215 13614
                VSL(SLT_Debug, vxid,
216
                    "sockopt: Setting %s for %s=%s",
217 13614
                    so->strname, ls->name, ls->endpoint);
218 13614
                VTCP_Assert(setsockopt(sock,
219
                    so->level, so->optname, so->arg, so->sz));
220
221 13614
                if (sp == NULL)
222 4040
                        ch->listen_mod = so->mod;
223 13614
        }
224 10614
}
225
226
static void
227 36671
vca_uds_init(void)
228
{
229
230 36671
}
231
232
static int
233 1000
vca_uds_listen(struct cli *cli, struct listen_sock *ls)
234
{
235
236 1000
        CHECK_OBJ_NOTNULL(ls->transport, TRANSPORT_MAGIC);
237 1000
        assert (ls->sock > 0);  // We know where stdin is
238
239 1000
        if (listen(ls->sock, cache_param->listen_depth)) {
240 0
                VCLI_SetResult(cli, CLIS_CANT);
241 0
                VCLI_Out(cli, "Listen failed on socket '%s': %s",
242 0
                    ls->endpoint, VAS_errtxt(errno));
243 0
                return (-1);
244
        }
245
246 1000
        AZ(ls->conn_heritage);
247 1000
        ls->conn_heritage = calloc(n_sock_opts,
248
            sizeof *ls->conn_heritage);
249 1000
        AN(ls->conn_heritage);
250
251 1000
        ls->test_heritage = 1;
252 1000
        vca_uds_sockopt_set(ls, NULL);
253
254 1000
        if (cache_param->accept_filter && VTCP_filter_http(ls->sock))
255 1920
                VSL(SLT_Error, NO_VXID,
256
                    "Kernel filtering: sock=%d, errno=%d %s",
257 960
                    ls->sock, errno, VAS_errtxt(errno));
258
259 1000
        return (0);
260 1000
}
261
262
static void
263 36480
vca_uds_start(struct cli *cli)
264
{
265
        struct listen_sock *ls;
266
267 36480
        ASSERT_CLI();
268
269 36480
        (void)vca_uds_sockopt_init();
270
271 37480
        VTAILQ_FOREACH(ls, &UDS_acceptor.socks, vcalist) {
272 1000
                CHECK_OBJ_NOTNULL(ls, LISTEN_SOCK_MAGIC);
273
274 1000
                if (vca_uds_listen(cli, ls))
275 0
                        return;
276 1000
        }
277 36480
}
278
279
static void
280 2000
vca_uds_event(struct cli *cli, struct listen_sock *ls, enum vca_event event)
281
{
282
283 2000
        (void) ls; // XXX const?
284 2000
        switch (event) {
285
        case VCA_EVENT_LADDR:
286 2000
                CHECK_OBJ_NOTNULL(ls, LISTEN_SOCK_MAGIC);
287 2000
                VCLI_Out(cli, "%s %s -\n", ls->name, ls->endpoint);
288 2000
                break;
289
        default:
290 0
                WRONG("INVALID VCA_EVENT");
291 0
        }
292 2000
}
293
294
static void
295 9572
vca_mk_uds(struct wrk_accept *wa, struct sess *sp)
296
{
297 9572
        struct suckaddr *sa = NULL;
298
        ssize_t sz;
299
300 9572
        (void) wa;
301 9572
        AN(SES_Reserve_remote_addr(sp, &sa, &sz));
302 9572
        AN(sa);
303 9572
        assert(sz == vsa_suckaddr_len);
304 9572
        AZ(SES_Set_remote_addr(sp, bogo_ip));
305 9572
        sp->sattr[SA_CLIENT_ADDR] = sp->sattr[SA_REMOTE_ADDR];
306 9572
        sp->sattr[SA_LOCAL_ADDR] = sp->sattr[SA_REMOTE_ADDR];
307 9572
        sp->sattr[SA_SERVER_ADDR] = sp->sattr[SA_REMOTE_ADDR];
308 9572
        AN(SES_Set_String_Attr(sp, SA_CLIENT_IP, "0.0.0.0"));
309 9572
        AN(SES_Set_String_Attr(sp, SA_CLIENT_PORT, "0"));
310 9572
}
311
312
static void v_matchproto_(task_func_t)
313 9573
vca_uds_make_session(struct worker *wrk, void *arg)
314
{
315
        struct wrk_accept *wa;
316
        struct sess *sp;
317
        struct req *req;
318
319 9573
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
320 9573
        CAST_OBJ_NOTNULL(wa, arg, WRK_ACCEPT_MAGIC);
321
322 9573
        VTCP_blocking(wa->acceptsock);
323
324
        /* Turn accepted socket into a session */
325 9573
        AN(WS_Reservation(wrk->aws));
326 9573
        sp = SES_New(wrk->pool);
327 9573
        CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
328 9573
        wrk->stats->s_sess++;
329
330 9573
        sp->t_open = VTIM_real();
331 9573
        sp->t_idle = sp->t_open;
332 9573
        sp->vxid = VXID_Get(wrk, VSL_CLIENTMARKER);
333
334 9573
        sp->fd = wa->acceptsock;
335 9573
        wa->acceptsock = -1;
336 9573
        sp->listen_sock = wa->acceptlsock;
337
338 9573
        assert((size_t)wa->acceptaddrlen <= vsa_suckaddr_len);
339
340 9573
        vca_mk_uds(wa, sp);
341
342 9573
        AN(wa->acceptlsock->name);
343 19146
        VSL(SLT_Begin, sp->vxid, "sess 0 %s",
344 9573
            wa->acceptlsock->transport->name);
345 19146
        VSL(SLT_SessOpen, sp->vxid, "0.0.0.0 0 %s 0.0.0.0 0 %.6f %d",
346 9573
            wa->acceptlsock->name, sp->t_open, sp->fd);
347
348 9573
        vca_pace_good();
349 9573
        wrk->stats->sess_conn++;
350
351 9573
        if (wa->acceptlsock->test_heritage) {
352 943
                vca_uds_sockopt_test(wa->acceptlsock, sp);
353 943
                wa->acceptlsock->test_heritage = 0;
354 943
        }
355
356 9573
        vca_uds_sockopt_set(wa->acceptlsock, sp);
357
358 9573
        req = Req_New(sp);
359 9573
        CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
360 9573
        req->htc->rfd = &sp->fd;
361
362 9573
        SES_SetTransport(wrk, sp, req, wa->acceptlsock->transport);
363 9573
        WS_Release(wrk->aws, 0);
364 9573
}
365
366
/*--------------------------------------------------------------------
367
 * This function accepts on a single socket for a single thread pool.
368
 *
369
 * As long as we can stick the accepted connection to another thread
370
 * we do so, otherwise we put the socket back on the "BACK" pool
371
 * and handle the new connection ourselves.
372
 */
373
374
static void v_matchproto_(task_func_t)
375 0
vca_uds_accept_task(struct worker *wrk, void *arg)
376
{
377
        struct listen_sock *ls;
378
        struct wrk_accept wa;
379
        struct poolsock *ps;
380
        int i;
381
382 0
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
383 0
        CAST_OBJ_NOTNULL(ps, arg, POOLSOCK_MAGIC);
384 0
        ls = ps->lsock;
385 0
        CHECK_OBJ_NOTNULL(ls, LISTEN_SOCK_MAGIC);
386
387 1113
        while (!pool_accepting)
388 1113
                VTIM_sleep(.1);
389
390
        /* Dont hold on to (possibly) discarded VCLs */
391 0
        if (wrk->wpriv->vcl != NULL)
392 0
                VCL_Rel(&wrk->wpriv->vcl);
393
394 9574
        while (!ps->pool->die) {
395 9574
                INIT_OBJ(&wa, WRK_ACCEPT_MAGIC);
396 9574
                wa.acceptlsock = ls;
397
398 9574
                vca_pace_check();
399
400 9574
                wa.acceptaddrlen = sizeof wa.acceptaddr;
401 9574
                do {
402 23146
                        i = accept(ls->sock, (void*)&wa.acceptaddr,
403 11573
                            &wa.acceptaddrlen);
404 11573
                } while (i < 0 && errno == EAGAIN && !ps->pool->die);
405
406 9574
                if (i < 0 && ps->pool->die)
407 0
                        break;
408
409 9574
                if (i < 0 && ls->sock == -2) {
410
                        /* Shut down in progress */
411 0
                        sleep(2);
412 0
                        continue;
413
                }
414
415 9574
                if (i < 0) {
416 0
                        switch (errno) {
417
                        case ECONNABORTED:
418 0
                                wrk->stats->sess_fail_econnaborted++;
419 0
                                break;
420
                        case EINTR:
421 0
                                wrk->stats->sess_fail_eintr++;
422 0
                                break;
423
                        case EMFILE:
424 0
                                wrk->stats->sess_fail_emfile++;
425 0
                                vca_pace_bad();
426 0
                                break;
427
                        case EBADF:
428 0
                                wrk->stats->sess_fail_ebadf++;
429 0
                                vca_pace_bad();
430 0
                                break;
431
                        case ENOBUFS:
432
                        case ENOMEM:
433 0
                                wrk->stats->sess_fail_enomem++;
434 0
                                vca_pace_bad();
435 0
                                break;
436
                        default:
437 0
                                wrk->stats->sess_fail_other++;
438 0
                                vca_pace_bad();
439 0
                                break;
440
                        }
441
442 0
                        i = errno;
443 0
                        wrk->stats->sess_fail++;
444
445 0
                        VSL(SLT_SessError, NO_VXID, "%s 0.0.0.0 0 %d %d \"%s\"",
446 0
                            wa.acceptlsock->name, ls->sock, i, VAS_errtxt(i));
447 0
                        (void)Pool_TrySumstat(wrk);
448 0
                        continue;
449
                }
450
451 9574
                wa.acceptsock = i;
452
453 9574
                if (!Pool_Task_Arg(wrk, TASK_QUEUE_REQ,
454
                    vca_uds_make_session, &wa, sizeof wa)) {
455
                        /*
456
                         * We couldn't get another thread, so we will handle
457
                         * the request in this worker thread, but first we
458
                         * must reschedule the listening task so it will be
459
                         * taken up by another thread again.
460
                         */
461 0
                        if (!ps->pool->die) {
462 0
                                AZ(Pool_Task(wrk->pool, ps->task,
463
                                    TASK_QUEUE_VCA));
464 0
                                return;
465
                        }
466 0
                }
467 9574
                if (!ps->pool->die && DO_DEBUG(DBG_SLOW_ACCEPTOR))
468 0
                        VTIM_sleep(2.0);
469
470
        }
471
472 0
        VSL(SLT_Debug, NO_VXID, "XXX Accept thread dies %p", ps);
473 0
        FREE_OBJ(ps);
474 0
}
475
476
static void
477 72552
vca_uds_accept(struct pool *pp)
478
{
479
        struct listen_sock *ls;
480
        struct poolsock *ps;
481
482 74552
        VTAILQ_FOREACH(ls, &UDS_acceptor.socks, vcalist) {
483 2000
                CHECK_OBJ_NOTNULL(ls, LISTEN_SOCK_MAGIC);
484
485 2000
                ALLOC_OBJ(ps, POOLSOCK_MAGIC);
486 2000
                AN(ps);
487 2000
                ps->lsock = ls;
488 2000
                ps->task->func = vca_uds_accept_task;
489 2000
                ps->task->priv = ps;
490 2000
                ps->pool = pp;
491 2000
                VTAILQ_INSERT_TAIL(&pp->poolsocks, ps, list);
492 2000
                AZ(Pool_Task(pp, ps->task, TASK_QUEUE_VCA));
493 2000
        }
494 72552
}
495
496
static void
497 33643
vca_uds_update(pthread_mutex_t *shut_mtx)
498
{
499
        struct listen_sock *ls;
500
501 33643
        if (!vca_uds_sockopt_init())
502 33487
                return;
503
504 156
        PTOK(pthread_mutex_lock(shut_mtx));
505
506 196
        VTAILQ_FOREACH(ls, &UDS_acceptor.socks, vcalist) {
507 40
                CHECK_OBJ_NOTNULL(ls, LISTEN_SOCK_MAGIC);
508
509 40
                if (ls->sock == -2)
510 0
                        continue;       // VCA_Shutdown
511 40
                assert (ls->sock > 0);
512 40
                vca_uds_sockopt_set(ls, NULL);
513
                /* If one of the options on a socket has
514
                 * changed, also force a retest of whether
515
                 * the values are inherited to the
516
                 * accepted sockets. This should then
517
                 * catch any false positives from previous
518
                 * tests that could happen if the set
519
                 * value of an option happened to just be
520
                 * the OS default for that value, and
521
                 * wasn't actually inherited from the
522
                 * listening socket. */
523 40
                ls->test_heritage = 1;
524 40
        }
525
526 156
        PTOK(pthread_mutex_unlock(shut_mtx));
527 33643
}
528
529
static void
530 36160
vca_uds_shutdown(void)
531
{
532
        struct listen_sock *ls;
533
        int i;
534
535 37160
        VTAILQ_FOREACH(ls, &UDS_acceptor.socks, vcalist) {
536 1000
                CHECK_OBJ_NOTNULL(ls, LISTEN_SOCK_MAGIC);
537
538 1000
                i = ls->sock;
539 1000
                ls->sock = -2;
540 1000
                (void)close(i);
541 1000
        }
542 36160
}
543
544
struct acceptor UDS_acceptor = {
545
        .magic          = ACCEPTOR_MAGIC,
546
        .name           = "uds",
547
        .config         = vca_uds_config,
548
        .init           = vca_uds_init,
549
        .open           = vca_uds_open,
550
        .reopen         = vca_uds_reopen,
551
        .start          = vca_uds_start,
552
        .event          = vca_uds_event,
553
        .accept         = vca_uds_accept,
554
        .update         = vca_uds_update,
555
        .shutdown       = vca_uds_shutdown,
556
};