varnish-cache/bin/varnishd/mgt/mgt_child.c
1
/*-
2
 * Copyright (c) 2006 Verdens Gang AS
3
 * Copyright (c) 2006-2015 Varnish Software AS
4
 * All rights reserved.
5
 *
6
 * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
21
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
 * SUCH DAMAGE.
28
 *
29
 * The mechanics of handling the child process
30
 */
31
32
#include "config.h"
33
34
#include <sys/types.h>
35
#include <sys/wait.h>
36
37
#include <errno.h>
38
#include <fcntl.h>
39
#include <poll.h>
40
#include <signal.h>
41
#include <stdarg.h>
42
#include <stdio.h>
43
#include <string.h>
44
#include <syslog.h>
45
#include <unistd.h>
46
47
#include "mgt/mgt.h"
48
#include "common/heritage.h"
49
50
#include "vbm.h"
51
#include "vcli_serve.h"
52
#include "vev.h"
53
#include "vfil.h"
54
#include "vlu.h"
55
#include "vtim.h"
56
#include "vsmw.h"
57
58
static pid_t            child_pid = -1;
59
60
static struct vbitmap   *fd_map;
61
62
static int              child_cli_in = -1;
63
static int              child_cli_out = -1;
64
static int              child_output = -1;
65
66
static enum {
67
        CH_STOPPED = 0,
68
        CH_STARTING = 1,
69
        CH_RUNNING = 2,
70
        CH_STOPPING = 3,
71
        CH_DIED = 4
72
}                       child_state = CH_STOPPED;
73
74
static const char * const ch_state[] = {
75
        [CH_STOPPED] =  "stopped",
76
        [CH_STARTING] = "starting",
77
        [CH_RUNNING] =  "running",
78
        [CH_STOPPING] = "stopping",
79
        [CH_DIED] =     "died, (restarting)",
80
};
81
82
static struct vev       *ev_poker;
83
static struct vev       *ev_listen;
84
static struct vlu       *child_std_vlu;
85
86
static struct vsb *child_panic = NULL;
87
88
static void mgt_reap_child(void);
89
90
/*=====================================================================
91
 * Panic string evacuation and handling
92
 */
93
94
static void
95 10
mgt_panic_record(pid_t r)
96
{
97
        char time_str[30];
98
99 10
        if (child_panic != NULL)
100 0
                VSB_destroy(&child_panic);
101 10
        child_panic = VSB_new_auto();
102 10
        AN(child_panic);
103 10
        VTIM_format(VTIM_real(), time_str);
104 10
        VSB_printf(child_panic, "Panic at: %s\n", time_str);
105 10
        VSB_quote(child_panic, heritage.panic_str,
106 10
            strnlen(heritage.panic_str, heritage.panic_str_len),
107
            VSB_QUOTE_NONL);
108 10
        AZ(VSB_finish(child_panic));
109 10
        MGT_Complain(C_ERR, "Child (%jd) %s",
110
            (intmax_t)r, VSB_data(child_panic));
111 10
}
112
113
static void
114 10
mgt_panic_clear(void)
115
{
116 10
        VSB_destroy(&child_panic);
117 10
}
118
119
static void v_matchproto_(cli_func_t)
120 4
mch_cli_panic_show(struct cli *cli, const char * const *av, void *priv)
121
{
122
        (void)av;
123
        (void)priv;
124
125 4
        if (!child_panic) {
126 0
                VCLI_SetResult(cli, CLIS_CANT);
127 0
                VCLI_Out(cli,
128
                    "Child has not panicked or panic has been cleared");
129 4
                return;
130
        }
131
132 4
        VCLI_Out(cli, "%s\n", VSB_data(child_panic));
133
}
134
135
static void v_matchproto_(cli_func_t)
136 1252
mch_cli_panic_clear(struct cli *cli, const char * const *av, void *priv)
137
{
138
        (void)priv;
139
140 1252
        if (av[2] != NULL && strcmp(av[2], "-z")) {
141 0
                VCLI_SetResult(cli, CLIS_PARAM);
142 0
                VCLI_Out(cli, "Unknown parameter \"%s\".", av[2]);
143 0
                return;
144 1252
        } else if (av[2] != NULL) {
145 4
                VSC_C_mgt->child_panic = 0;
146 4
                if (child_panic == NULL)
147 2
                        return;
148
        }
149 1250
        if (child_panic == NULL) {
150 1240
                VCLI_SetResult(cli, CLIS_CANT);
151 1240
                VCLI_Out(cli, "No panic to clear");
152 1240
                return;
153
        }
154 10
        mgt_panic_clear();
155
}
156
157
/*=====================================================================
158
 * Track the highest file descriptor the parent knows is being used.
159
 *
160
 * This allows the child process to clean/close only a small fraction
161
 * of the possible file descriptors after exec(2).
162
 *
163
 * This is likely to a bit on the low side, as libc and other libraries
164
 * has a tendency to cache file descriptors (syslog, resolver, etc.)
165
 * so we add a margin of 100 fds.
166
 */
167
168
static int              mgt_max_fd;
169
170
#define CLOSE_FD_UP_TO  (mgt_max_fd + 100)
171
172
void
173 5150
MCH_TrackHighFd(int fd)
174
{
175
        /*
176
         * Assert > 0, to catch bogus opens, we know where stdin goes
177
         * in the master process.
178
         */
179 5150
        assert(fd > 0);
180 5150
        if (fd > mgt_max_fd)
181 3876
                mgt_max_fd = fd;
182 5150
}
183
184
/*--------------------------------------------------------------------
185
 * Keep track of which filedescriptors the child should inherit and
186
 * which should be closed after fork()
187
 */
188
189
void
190 9976
MCH_Fd_Inherit(int fd, const char *what)
191
{
192
193 9976
        assert(fd >= 0);
194 9976
        if (fd_map == NULL)
195 1276
                fd_map = vbit_new(128);
196 9976
        AN(fd_map);
197 9976
        if (what != NULL)
198 6284
                vbit_set(fd_map, fd);
199
        else
200 3692
                vbit_clr(fd_map, fd);
201 9976
}
202
203
/*=====================================================================
204
 * Listen to stdout+stderr from the child
205
 */
206
207
static int v_matchproto_(vlu_f)
208 2760
child_line(void *priv, const char *p)
209
{
210
        (void)priv;
211
212 2760
        MGT_Complain(C_INFO, "Child (%jd) said %s", (intmax_t)child_pid, p);
213 2760
        return (0);
214
}
215
216
/*--------------------------------------------------------------------
217
 * NB: Notice cleanup call from mgt_reap_child()
218
 */
219
220
static int v_matchproto_(vev_cb_f)
221 3062
child_listener(const struct vev *e, int what)
222
{
223
224 3062
        if ((what & ~VEV__RD) || VLU_Fd(child_std_vlu, child_output)) {
225 22
                ev_listen = NULL;
226 22
                if (e != NULL)
227 10
                        mgt_reap_child();
228 22
                return (1);
229
        }
230 3040
        return (0);
231
}
232
233
/*=====================================================================
234
 * Periodically poke the child, to see that it still lives
235
 */
236
237
static int v_matchproto_(vev_cb_f)
238 424
child_poker(const struct vev *e, int what)
239
{
240 424
        char *r = NULL;
241
        unsigned status;
242
243
        (void)e;
244
        (void)what;
245 424
        if (child_state != CH_RUNNING)
246 0
                return (1);
247 424
        if (child_pid < 0)
248 0
                return (0);
249 424
        if (mgt_cli_askchild(&status, &r, "ping\n") || strncmp("PONG ", r, 5)) {
250 0
                MGT_Complain(C_ERR, "Unexpected reply from ping: %u %s",
251
                    status, r);
252 0
                if (status != CLIS_COMMS)
253 0
                        MCH_Cli_Fail();
254
        }
255 424
        free(r);
256 424
        return 0;
257
}
258
259
/*=====================================================================
260
 * Launch the child process
261
 */
262
263
static void
264 1230
mgt_launch_child(struct cli *cli)
265
{
266
        pid_t pid;
267
        unsigned u;
268
        char *p;
269
        struct vev *e;
270
        int i, cp[2];
271
272 1230
        if (child_state != CH_STOPPED && child_state != CH_DIED)
273 1230
                return;
274
275 1230
        child_state = CH_STARTING;
276
277
        /* Open pipe for mgt->child CLI */
278 1230
        AZ(pipe(cp));
279 1230
        heritage.cli_in = cp[0];
280 1230
        MCH_Fd_Inherit(heritage.cli_in, "cli_in");
281 1230
        child_cli_out = cp[1];
282
283
        /* Open pipe for child->mgt CLI */
284 1230
        AZ(pipe(cp));
285 1230
        heritage.cli_out = cp[1];
286 1230
        MCH_Fd_Inherit(heritage.cli_out, "cli_out");
287 1230
        child_cli_in = cp[0];
288
289
        /*
290
         * Open pipe for child stdout/err
291
         * NB: not inherited, because we dup2() it to stdout/stderr in child
292
         */
293 1230
        AZ(pipe(cp));
294 1230
        heritage.std_fd = cp[1];
295 1230
        child_output = cp[0];
296
297 1230
        mgt_SHM_ChildNew();
298
299 1230
        AN(heritage.param);
300 1230
        AN(heritage.panic_str);
301 1230
        if ((pid = fork()) < 0) {
302 0
                perror("Could not fork child");
303 0
                exit(1);                // XXX Harsh ?
304
        }
305 2458
        if (pid == 0) {
306
307
                /* Redirect stdin/out/err */
308 1228
                VFIL_null_fd(STDIN_FILENO);
309 1228
                assert(dup2(heritage.std_fd, STDOUT_FILENO) == STDOUT_FILENO);
310 1228
                assert(dup2(heritage.std_fd, STDERR_FILENO) == STDERR_FILENO);
311
312
                /*
313
                 * Close all FDs the child shouldn't know about
314
                 *
315
                 * We cannot just close these filedescriptors, some random
316
                 * library routine might miss it later on and wantonly close
317
                 * a FD we use at that point in time. (See bug #1841).
318
                 * We close the FD and replace it with /dev/null instead,
319
                 * That prevents security leakage, and gives the library
320
                 * code a valid FD to close when it discovers the changed
321
                 * circumstances.
322
                 */
323 1228
                closelog();
324
325 132756
                for (i = STDERR_FILENO + 1; i < CLOSE_FD_UP_TO; i++) {
326 131528
                        if (vbit_test(fd_map, i))
327 4996
                                continue;
328 126532
                        if (close(i) == 0)
329 16124
                                VFIL_null_fd(i);
330
                }
331
332 1228
                mgt_ProcTitle("Child");
333
334 1228
                heritage.cls = mgt_cls;
335 1228
                heritage.ident = VSB_data(vident) + 1;
336
337 1228
                VJ_subproc(JAIL_SUBPROC_WORKER);
338
339 1228
                heritage.proc_vsmw = VSMW_New(heritage.vsm_fd, 0640, "_.index");
340 1228
                AN(heritage.proc_vsmw);
341
342
                /*
343
                 * We pass these two params because child_main needs them
344
                 * Well before it has found its own param struct.
345
                 */
346 1228
                child_main(mgt_param.sigsegv_handler,
347 1228
                    mgt_param.wthread_stacksize);
348
349 1218
                exit(0);
350
        }
351 1230
        assert(pid > 1);
352 1230
        MGT_Complain(C_DEBUG, "Child (%jd) Started", (intmax_t)pid);
353 1230
        VSC_C_mgt->child_start++;
354
355
        /* Close stuff the child got */
356 1230
        closefd(&heritage.std_fd);
357
358 1230
        MCH_Fd_Inherit(heritage.cli_in, NULL);
359 1230
        closefd(&heritage.cli_in);
360
361 1230
        MCH_Fd_Inherit(heritage.cli_out, NULL);
362 1230
        closefd(&heritage.cli_out);
363
364 1230
        child_std_vlu = VLU_New(child_line, NULL, 0);
365 1230
        AN(child_std_vlu);
366
367 1230
        AZ(ev_listen);
368 1230
        e = VEV_Alloc();
369 1230
        XXXAN(e);
370 1230
        e->fd = child_output;
371 1230
        e->fd_flags = VEV__RD;
372 1230
        e->name = "Child listener";
373 1230
        e->callback = child_listener;
374 1230
        AZ(VEV_Start(mgt_evb, e));
375 1230
        ev_listen = e;
376 1230
        AZ(ev_poker);
377 1230
        if (mgt_param.ping_interval > 0) {
378 1230
                e = VEV_Alloc();
379 1230
                XXXAN(e);
380 1230
                e->timeout = mgt_param.ping_interval;
381 1230
                e->callback = child_poker;
382 1230
                e->name = "child poker";
383 1230
                AZ(VEV_Start(mgt_evb, e));
384 1230
                ev_poker = e;
385
        }
386
387 1230
        mgt_cli_start_child(child_cli_in, child_cli_out);
388 1230
        child_pid = pid;
389 1230
        if (mgt_push_vcls_and_start(cli, &u, &p)) {
390 2
                VCLI_SetResult(cli, u);
391 2
                MGT_Complain(C_ERR, "Child (%jd) Pushing vcls failed:\n%s",
392
                    (intmax_t)child_pid, p);
393 2
                free(p);
394 2
                child_state = CH_RUNNING;
395 2
                MCH_Stop_Child();
396
        } else
397 1228
                child_state = CH_RUNNING;
398
}
399
400
/*=====================================================================
401
 * Cleanup when child dies.
402
 */
403
404
static int
405 2
kill_child(void)
406
{
407
        int i, error;
408
409 2
        VJ_master(JAIL_MASTER_KILL);
410 2
        if (MGT_FEATURE(FEATURE_NO_COREDUMP))
411 2
                i = kill(child_pid, SIGKILL);
412
        else
413 0
                i = kill(child_pid, SIGQUIT);
414 2
        error = errno;
415 2
        VJ_master(JAIL_MASTER_LOW);
416 2
        errno = error;
417 2
        return (i);
418
}
419
420
static void
421 1230
mgt_reap_child(void)
422
{
423
        int i;
424 1230
        int status = 0xffff;
425
        struct vsb *vsb;
426 1230
        pid_t r = 0;
427
428 1230
        assert(child_pid != -1);
429
430
        /*
431
         * Close the CLI connections
432
         * This signals orderly shut down to child
433
         */
434 1230
        mgt_cli_stop_child();
435 1230
        if (child_cli_out >= 0)
436 1230
                closefd(&child_cli_out);
437 1230
        if (child_cli_in >= 0)
438 1230
                closefd(&child_cli_in);
439
440
        /* Stop the poker */
441 1230
        if (ev_poker != NULL) {
442 1230
                VEV_Stop(mgt_evb, ev_poker);
443 1230
                free(ev_poker);
444
        }
445 1230
        ev_poker = NULL;
446
447
        /* Stop the listener */
448 1230
        if (ev_listen != NULL) {
449 1220
                VEV_Stop(mgt_evb, ev_listen);
450 1220
                free(ev_listen);
451 1220
                ev_listen = NULL;
452
        }
453
454
        /* Compose obituary */
455 1230
        vsb = VSB_new_auto();
456 1230
        XXXAN(vsb);
457
458
        /* Wait for child to die */
459 2480
        for (i = 0; i < mgt_param.cli_timeout; i++) {
460 2480
                r = waitpid(child_pid, &status, WNOHANG);
461 2480
                if (r == child_pid)
462 1230
                        break;
463 1250
                (void)sleep(1);
464
        }
465 1230
        if (r == 0) {
466 0
                VSB_printf(vsb, "Child (%jd) not dying, killing", (intmax_t)r);
467
468
                /* Kick it Jim... */
469 0
                (void)kill_child();
470 0
                r = waitpid(child_pid, &status, 0);
471
        }
472 1230
        if (r != child_pid)
473 0
                fprintf(stderr, "WAIT 0x%jd\n", (intmax_t)r);
474 1230
        assert(r == child_pid);
475
476
        /*
477
         * XXX exit mgr if we fail even with retries?
478
         * number of retries? interval?
479
         */
480 1230
        for (i = 0; i < 3; i++) {
481 1230
                if (MAC_reopen_sockets() == 0)
482 1230
                        break;
483
                /* error already logged */
484 0
                (void)sleep(1);
485
        }
486
487 1230
        VSB_printf(vsb, "Child (%jd) %s", (intmax_t)r,
488 1230
            status ? "died" : "ended");
489 1230
        if (WIFEXITED(status) && WEXITSTATUS(status)) {
490 10
                VSB_printf(vsb, " status=%d", WEXITSTATUS(status));
491 10
                exit_status |= 0x20;
492 10
                if (WEXITSTATUS(status) == 1)
493 0
                        VSC_C_mgt->child_exit++;
494
                else
495 10
                        VSC_C_mgt->child_stop++;
496
        }
497 1230
        if (WIFSIGNALED(status)) {
498 2
                VSB_printf(vsb, " signal=%d", WTERMSIG(status));
499 2
                exit_status |= 0x40;
500 2
                VSC_C_mgt->child_died++;
501
        }
502
#ifdef WCOREDUMP
503 1230
        if (WCOREDUMP(status)) {
504 0
                VSB_printf(vsb, " (core dumped)");
505 0
                exit_status |= 0x80;
506 0
                VSC_C_mgt->child_dump++;
507
        }
508
#endif
509 1230
        AZ(VSB_finish(vsb));
510 1230
        MGT_Complain(status ? C_ERR : C_INFO, "%s", VSB_data(vsb));
511 1230
        VSB_destroy(&vsb);
512
513
        /* Dispose of shared memory but evacuate panic messages first */
514 1230
        if (heritage.panic_str[0] != '\0') {
515 10
                mgt_panic_record(r);
516 10
                VSC_C_mgt->child_panic++;
517
        }
518
519 1230
        mgt_SHM_ChildDestroy();
520
521 1230
        if (child_state == CH_RUNNING)
522 10
                child_state = CH_DIED;
523
524
        /* Pick up any stuff lingering on stdout/stderr */
525 1230
        (void)child_listener(NULL, VEV__RD);
526 1230
        closefd(&child_output);
527 1230
        VLU_Destroy(&child_std_vlu);
528
529 1230
        child_pid = -1;
530
531 1230
        MGT_Complain(C_DEBUG, "Child cleanup complete");
532
533 1230
        if (child_state == CH_DIED && mgt_param.auto_restart)
534 0
                mgt_launch_child(NULL);
535 1230
        else if (child_state == CH_DIED)
536 10
                child_state = CH_STOPPED;
537 1220
        else if (child_state == CH_STOPPING)
538 1220
                child_state = CH_STOPPED;
539 1230
}
540
541
/*=====================================================================
542
 * If CLI communications with the child process fails, there is nothing
543
 * for us to do but to drag it behind the barn and get it over with.
544
 *
545
 * The typical case is where the child process fails to return a reply
546
 * before the cli_timeout expires.  This invalidates the CLI pipes for
547
 * all future use, as we don't know if the child was just slow and the
548
 * result gets piped later on, or if the child is catatonic.
549
 */
550
551
void
552 2
MCH_Cli_Fail(void)
553
{
554
555 2
        if (child_state != CH_RUNNING)
556 0
                return;
557 2
        if (child_pid < 0)
558 0
                return;
559 2
        if (kill_child() == 0)
560 2
                MGT_Complain(C_ERR, "Child (%jd) not responding to CLI,"
561
                    " killed it.", (intmax_t)child_pid);
562
        else
563 0
                MGT_Complain(C_ERR, "Failed to kill child with PID %jd: %s",
564 0
                    (intmax_t)child_pid, strerror(errno));
565
}
566
567
/*=====================================================================
568
 * Controlled stop of child process
569
 *
570
 * Reaping the child asks for orderly shutdown
571
 */
572
573
void
574 2464
MCH_Stop_Child(void)
575
{
576
577 2464
        if (child_state != CH_RUNNING)
578 3708
                return;
579
580 1220
        child_state = CH_STOPPING;
581
582 1220
        MGT_Complain(C_DEBUG, "Stopping Child");
583
584 1220
        mgt_reap_child();
585
}
586
587
/*=====================================================================
588
 */
589
590
int
591 0
MCH_Start_Child(void)
592
{
593 0
        mgt_launch_child(NULL);
594 0
        if (child_state != CH_RUNNING)
595 0
                return (2);
596 0
        return(0);
597
}
598
599
/*====================================================================
600
 * Query if the child is running
601
 */
602
603
int
604 13934
MCH_Running(void)
605
{
606
607 13934
        return (child_pid > 0);
608
}
609
610
/*=====================================================================
611
 * CLI commands
612
 */
613
614
static void v_matchproto_(cli_func_t)
615 1238
mch_cli_server_start(struct cli *cli, const char * const *av, void *priv)
616
{
617
618
        (void)av;
619
        (void)priv;
620 1238
        if (child_state == CH_STOPPED) {
621 1230
                if (mgt_has_vcl()) {
622 1230
                        mgt_launch_child(cli);
623
                } else {
624 0
                        VCLI_SetResult(cli, CLIS_CANT);
625 0
                        VCLI_Out(cli, "No VCL available");
626
                }
627
        } else {
628 8
                VCLI_SetResult(cli, CLIS_CANT);
629 8
                VCLI_Out(cli, "Child in state %s", ch_state[child_state]);
630
        }
631 1238
}
632
633
static void v_matchproto_(cli_func_t)
634 1322
mch_cli_server_stop(struct cli *cli, const char * const *av, void *priv)
635
{
636
637
        (void)av;
638
        (void)priv;
639 1322
        if (child_state == CH_RUNNING) {
640 1214
                MCH_Stop_Child();
641
        } else {
642 108
                VCLI_SetResult(cli, CLIS_CANT);
643 108
                VCLI_Out(cli, "Child in state %s", ch_state[child_state]);
644
        }
645 1322
}
646
647
static void v_matchproto_(cli_func_t)
648 2538
mch_cli_server_status(struct cli *cli, const char * const *av, void *priv)
649
{
650
        (void)av;
651
        (void)priv;
652 2538
        VCLI_Out(cli, "Child in state %s", ch_state[child_state]);
653 2538
}
654
655
static struct cli_proto cli_mch[] = {
656
        { CLICMD_SERVER_STATUS,         "", mch_cli_server_status },
657
        { CLICMD_SERVER_START,          "", mch_cli_server_start },
658
        { CLICMD_SERVER_STOP,           "", mch_cli_server_stop },
659
        { CLICMD_PANIC_SHOW,            "", mch_cli_panic_show },
660
        { CLICMD_PANIC_CLEAR,           "", mch_cli_panic_clear },
661
        { NULL }
662
};
663
664
/*=====================================================================
665
 * This thread is the master thread in the management process.
666
 * The relatively simple task is to start and stop the child process
667
 * and to reincarnate it in case of trouble.
668
 */
669
670
void
671 1248
MCH_Init(void)
672
{
673
674 1248
        VCLS_AddFunc(mgt_cls, MCF_AUTH, cli_mch);
675 1248
}