| | varnish-cache/bin/varnishd/mgt/mgt_child.c |
0 |
|
/*- |
1 |
|
* Copyright (c) 2006 Verdens Gang AS |
2 |
|
* Copyright (c) 2006-2015 Varnish Software AS |
3 |
|
* All rights reserved. |
4 |
|
* |
5 |
|
* Author: Poul-Henning Kamp <phk@phk.freebsd.dk> |
6 |
|
* |
7 |
|
* SPDX-License-Identifier: BSD-2-Clause |
8 |
|
* |
9 |
|
* Redistribution and use in source and binary forms, with or without |
10 |
|
* modification, are permitted provided that the following conditions |
11 |
|
* are met: |
12 |
|
* 1. Redistributions of source code must retain the above copyright |
13 |
|
* notice, this list of conditions and the following disclaimer. |
14 |
|
* 2. Redistributions in binary form must reproduce the above copyright |
15 |
|
* notice, this list of conditions and the following disclaimer in the |
16 |
|
* documentation and/or other materials provided with the distribution. |
17 |
|
* |
18 |
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
19 |
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 |
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
21 |
|
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE |
22 |
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 |
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
24 |
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
25 |
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
26 |
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
27 |
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
28 |
|
* SUCH DAMAGE. |
29 |
|
* |
30 |
|
* The mechanics of handling the child process |
31 |
|
*/ |
32 |
|
|
33 |
|
#include "config.h" |
34 |
|
|
35 |
|
#include <sys/types.h> |
36 |
|
|
37 |
|
#include <poll.h> |
38 |
|
#include <stdarg.h> |
39 |
|
#include <stdio.h> |
40 |
|
#include <string.h> |
41 |
|
#include <syslog.h> |
42 |
|
#include <unistd.h> |
43 |
|
#include <sys/types.h> |
44 |
|
#include <sys/socket.h> |
45 |
|
#include <sys/time.h> |
46 |
|
#include <sys/resource.h> |
47 |
|
|
48 |
|
#include "mgt.h" |
49 |
|
#include "acceptor/cache_acceptor.h" |
50 |
|
#include "acceptor/mgt_acceptor.h" |
51 |
|
|
52 |
|
#include "vapi/vsig.h" |
53 |
|
|
54 |
|
#include "vbm.h" |
55 |
|
#include "vcli_serve.h" |
56 |
|
#include "vev.h" |
57 |
|
#include "vfil.h" |
58 |
|
#include "vlu.h" |
59 |
|
#include "vtim.h" |
60 |
|
|
61 |
|
#include "common/heritage.h" |
62 |
|
|
63 |
|
static pid_t child_pid = -1; |
64 |
|
|
65 |
|
static struct vbitmap *fd_map; |
66 |
|
|
67 |
|
static int child_cli_fd = -1; |
68 |
|
static int child_output = -1; |
69 |
|
|
70 |
|
static enum { |
71 |
|
CH_STOPPED = 0, |
72 |
|
CH_STARTING = 1, |
73 |
|
CH_RUNNING = 2, |
74 |
|
CH_STOPPING = 3, |
75 |
|
CH_DIED = 4 |
76 |
|
} child_state = CH_STOPPED; |
77 |
|
|
78 |
|
static const char * const ch_state[] = { |
79 |
|
[CH_STOPPED] = "stopped", |
80 |
|
[CH_STARTING] = "starting", |
81 |
|
[CH_RUNNING] = "running", |
82 |
|
[CH_STOPPING] = "stopping", |
83 |
|
[CH_DIED] = "died, (restarting)", |
84 |
|
}; |
85 |
|
|
86 |
|
static struct vev *ev_poker; |
87 |
|
static struct vev *ev_listen; |
88 |
|
static struct vlu *child_std_vlu; |
89 |
|
|
90 |
|
static struct vsb *child_panic = NULL; |
91 |
|
|
92 |
|
static void mgt_reap_child(void); |
93 |
|
static int kill_child(void); |
94 |
|
|
95 |
|
/*===================================================================== |
96 |
|
* Panic string evacuation and handling |
97 |
|
*/ |
98 |
|
|
99 |
|
static void |
100 |
514 |
mgt_panic_record(pid_t r) |
101 |
|
{ |
102 |
|
char time_str[30]; |
103 |
|
|
104 |
514 |
if (child_panic != NULL) |
105 |
0 |
VSB_destroy(&child_panic); |
106 |
514 |
child_panic = VSB_new_auto(); |
107 |
514 |
AN(child_panic); |
108 |
514 |
VTIM_format(VTIM_real(), time_str); |
109 |
1028 |
VSB_printf(child_panic, "Child (%jd) Panic at: %s\n", |
110 |
514 |
(intmax_t)r, time_str); |
111 |
1028 |
VSB_quote(child_panic, heritage.panic_str, |
112 |
514 |
strnlen(heritage.panic_str, heritage.panic_str_len), |
113 |
|
VSB_QUOTE_NONL); |
114 |
514 |
MGT_ComplainVSB(C_ERR, child_panic); |
115 |
514 |
} |
116 |
|
|
117 |
|
static void |
118 |
439 |
mgt_panic_clear(void) |
119 |
|
{ |
120 |
439 |
VSB_destroy(&child_panic); |
121 |
439 |
} |
122 |
|
|
123 |
|
static void |
124 |
37880 |
cli_panic_show(struct cli *cli, const char * const *av, int json) |
125 |
|
{ |
126 |
37880 |
if (!child_panic) { |
127 |
37440 |
VCLI_SetResult(cli, CLIS_CANT); |
128 |
37440 |
VCLI_Out(cli, |
129 |
|
"Child has not panicked or panic has been cleared"); |
130 |
37440 |
return; |
131 |
|
} |
132 |
|
|
133 |
440 |
if (!json) { |
134 |
280 |
VCLI_Out(cli, "%s\n", VSB_data(child_panic)); |
135 |
280 |
return; |
136 |
|
} |
137 |
|
|
138 |
160 |
VCLI_JSON_begin(cli, 2, av); |
139 |
160 |
VCLI_Out(cli, ",\n"); |
140 |
160 |
VCLI_JSON_str(cli, VSB_data(child_panic)); |
141 |
160 |
VCLI_JSON_end(cli); |
142 |
37880 |
} |
143 |
|
|
144 |
|
static void v_matchproto_(cli_func_t) |
145 |
37720 |
mch_cli_panic_show(struct cli *cli, const char * const *av, void *priv) |
146 |
|
{ |
147 |
37720 |
(void)priv; |
148 |
37720 |
cli_panic_show(cli, av, 0); |
149 |
37720 |
} |
150 |
|
|
151 |
|
static void v_matchproto_(cli_func_t) |
152 |
160 |
mch_cli_panic_show_json(struct cli *cli, const char * const *av, void *priv) |
153 |
|
{ |
154 |
160 |
(void)priv; |
155 |
160 |
cli_panic_show(cli, av, 1); |
156 |
160 |
} |
157 |
|
|
158 |
|
static void v_matchproto_(cli_func_t) |
159 |
560 |
mch_cli_panic_clear(struct cli *cli, const char * const *av, void *priv) |
160 |
|
{ |
161 |
560 |
(void)priv; |
162 |
|
|
163 |
560 |
if (av[2] != NULL && strcmp(av[2], "-z")) { |
164 |
0 |
VCLI_SetResult(cli, CLIS_PARAM); |
165 |
0 |
VCLI_Out(cli, "Unknown parameter \"%s\".", av[2]); |
166 |
0 |
return; |
167 |
80 |
} else if (av[2] != NULL) { |
168 |
80 |
VSC_C_mgt->child_panic = 0; |
169 |
80 |
if (child_panic == NULL) |
170 |
40 |
return; |
171 |
40 |
} |
172 |
520 |
if (child_panic == NULL) { |
173 |
81 |
VCLI_SetResult(cli, CLIS_CANT); |
174 |
81 |
VCLI_Out(cli, "No panic to clear"); |
175 |
81 |
return; |
176 |
|
} |
177 |
439 |
mgt_panic_clear(); |
178 |
560 |
} |
179 |
|
|
180 |
|
/*===================================================================== |
181 |
|
* Track the highest file descriptor the parent knows is being used. |
182 |
|
* |
183 |
|
* This allows the child process to clean/close only a small fraction |
184 |
|
* of the possible file descriptors after exec(2). |
185 |
|
* |
186 |
|
* This is likely to a bit on the low side, as libc and other libraries |
187 |
|
* has a tendency to cache file descriptors (syslog, resolver, etc.) |
188 |
|
* so we add a margin of 10 fds. |
189 |
|
* |
190 |
|
* For added safety, we check that we see no file descriptor open for |
191 |
|
* another margin above the limit for which we close by design |
192 |
|
*/ |
193 |
|
|
194 |
|
static int mgt_max_fd; |
195 |
|
|
196 |
|
#define CLOSE_FD_UP_TO (mgt_max_fd + 10) |
197 |
|
#define CHECK_FD_UP_TO (CLOSE_FD_UP_TO + 10) |
198 |
|
|
199 |
|
void |
200 |
390480 |
MCH_TrackHighFd(int fd) |
201 |
|
{ |
202 |
|
/* |
203 |
|
* Assert > 0, to catch bogus opens, we know where stdin goes |
204 |
|
* in the master process. |
205 |
|
*/ |
206 |
390480 |
assert(fd > 0); |
207 |
390480 |
mgt_max_fd = vmax(mgt_max_fd, fd); |
208 |
390480 |
} |
209 |
|
|
210 |
|
/*-------------------------------------------------------------------- |
211 |
|
* Keep track of which filedescriptors the child should inherit and |
212 |
|
* which should be closed after fork() |
213 |
|
*/ |
214 |
|
|
215 |
|
void |
216 |
233040 |
MCH_Fd_Inherit(int fd, const char *what) |
217 |
|
{ |
218 |
|
|
219 |
233040 |
assert(fd >= 0); |
220 |
|
// XXX why? |
221 |
233040 |
if (fd > 0) |
222 |
233040 |
MCH_TrackHighFd(fd); |
223 |
427440 |
if (fd_map == NULL) |
224 |
38640 |
fd_map = vbit_new(128); |
225 |
233040 |
AN(fd_map); |
226 |
233040 |
if (what != NULL) |
227 |
156360 |
vbit_set(fd_map, fd); |
228 |
|
else |
229 |
76680 |
vbit_clr(fd_map, fd); |
230 |
233040 |
} |
231 |
|
|
232 |
|
/*===================================================================== |
233 |
|
* Listen to stdout+stderr from the child |
234 |
|
*/ |
235 |
|
|
236 |
|
static const char *whining_child = C_ERR; |
237 |
|
|
238 |
|
static int v_matchproto_(vlu_f) |
239 |
104809 |
child_line(void *priv, const char *p) |
240 |
|
{ |
241 |
104809 |
(void)priv; |
242 |
|
|
243 |
104809 |
MGT_Complain(whining_child, "Child (%jd) said %s", (intmax_t)child_pid, p); |
244 |
104809 |
return (0); |
245 |
|
} |
246 |
|
|
247 |
|
/*-------------------------------------------------------------------- |
248 |
|
* NB: Notice cleanup call from mgt_reap_child() |
249 |
|
*/ |
250 |
|
|
251 |
|
static int v_matchproto_(vev_cb_f) |
252 |
167252 |
child_listener(const struct vev *e, int what) |
253 |
|
{ |
254 |
|
|
255 |
167252 |
if ((what & ~VEV__RD) || VLU_Fd(child_std_vlu, child_output)) { |
256 |
360 |
ev_listen = NULL; |
257 |
360 |
if (e != NULL) |
258 |
360 |
mgt_reap_child(); |
259 |
85071 |
return (1); |
260 |
|
} |
261 |
82181 |
return (0); |
262 |
167252 |
} |
263 |
|
|
264 |
|
/*===================================================================== |
265 |
|
* Periodically poke the child, to see that it still lives |
266 |
|
*/ |
267 |
|
|
268 |
|
static int v_matchproto_(vev_cb_f) |
269 |
7474 |
child_poker(const struct vev *e, int what) |
270 |
|
{ |
271 |
7474 |
char *r = NULL; |
272 |
|
unsigned status; |
273 |
|
|
274 |
7474 |
(void)e; |
275 |
7474 |
(void)what; |
276 |
7474 |
if (child_state != CH_RUNNING) |
277 |
0 |
return (1); |
278 |
7474 |
if (child_pid < 0) |
279 |
0 |
return (0); |
280 |
7474 |
if (mgt_cli_askchild(&status, &r, "ping\n") || strncmp("PONG ", r, 5)) { |
281 |
0 |
MGT_Complain(C_ERR, "Unexpected reply from ping: %u %s", |
282 |
0 |
status, r); |
283 |
0 |
if (status != CLIS_COMMS) |
284 |
0 |
MCH_Cli_Fail(); |
285 |
0 |
} |
286 |
7474 |
free(r); |
287 |
7474 |
return (0); |
288 |
7474 |
} |
289 |
|
|
290 |
|
/*===================================================================== |
291 |
|
* Launch the child process |
292 |
|
*/ |
293 |
|
|
294 |
|
#define mgt_launch_err(cli, status, ...) do { \ |
295 |
|
MGT_Complain(C_ERR, __VA_ARGS__); \ |
296 |
|
if (cli == NULL) \ |
297 |
|
break; \ |
298 |
|
VCLI_Out(cli, __VA_ARGS__); \ |
299 |
|
VCLI_SetResult(cli, status); \ |
300 |
|
} while (0) |
301 |
|
|
302 |
|
static void |
303 |
38080 |
mgt_launch_child(struct cli *cli) |
304 |
|
{ |
305 |
|
pid_t pid; |
306 |
|
unsigned u; |
307 |
|
char *p; |
308 |
|
struct vev *e; |
309 |
|
int i, cp[2]; |
310 |
|
struct rlimit rl[1]; |
311 |
|
vtim_dur dstart; |
312 |
|
int bstart; |
313 |
|
vtim_mono t0; |
314 |
|
|
315 |
38080 |
if (child_state != CH_STOPPED && child_state != CH_DIED) |
316 |
0 |
return; |
317 |
|
|
318 |
38080 |
child_state = CH_STARTING; |
319 |
|
|
320 |
|
/* Open pipe for mgt->child CLI */ |
321 |
38080 |
AZ(socketpair(AF_UNIX, SOCK_STREAM, 0, cp)); |
322 |
38080 |
heritage.cli_fd = cp[0]; |
323 |
38080 |
assert(cp[0] > STDERR_FILENO); // See #2782 |
324 |
38080 |
assert(cp[1] > STDERR_FILENO); |
325 |
38080 |
MCH_Fd_Inherit(heritage.cli_fd, "cli_fd"); |
326 |
38080 |
child_cli_fd = cp[1]; |
327 |
|
|
328 |
|
/* |
329 |
|
* Open pipe for child stdout/err |
330 |
|
* NB: not inherited, because we dup2() it to stdout/stderr in child |
331 |
|
*/ |
332 |
38080 |
AZ(pipe(cp)); |
333 |
38080 |
heritage.std_fd = cp[1]; |
334 |
38080 |
child_output = cp[0]; |
335 |
|
|
336 |
38080 |
mgt_SHM_ChildNew(); |
337 |
|
|
338 |
38080 |
AN(heritage.param); |
339 |
38080 |
AN(heritage.panic_str); |
340 |
38080 |
VJ_master(JAIL_MASTER_SYSTEM); |
341 |
38080 |
if ((pid = fork()) < 0) { |
342 |
0 |
VJ_master(JAIL_MASTER_LOW); |
343 |
0 |
perror("Could not fork child"); |
344 |
0 |
exit(1); // XXX Harsh ? |
345 |
|
} |
346 |
75393 |
if (pid == 0) { |
347 |
|
|
348 |
37313 |
if (MGT_FEATURE(FEATURE_NO_COREDUMP)) { |
349 |
598 |
memset(rl, 0, sizeof *rl); |
350 |
598 |
rl->rlim_cur = 0; |
351 |
598 |
AZ(setrlimit(RLIMIT_CORE, rl)); |
352 |
598 |
} |
353 |
|
|
354 |
|
/* Redirect stdin/out/err */ |
355 |
37313 |
VFIL_null_fd(STDIN_FILENO); |
356 |
37313 |
assert(dup2(heritage.std_fd, STDOUT_FILENO) == STDOUT_FILENO); |
357 |
37313 |
assert(dup2(heritage.std_fd, STDERR_FILENO) == STDERR_FILENO); |
358 |
|
|
359 |
37313 |
setbuf(stdout, NULL); |
360 |
37313 |
setbuf(stderr, NULL); |
361 |
37313 |
printf("Child starts\n"); |
362 |
|
|
363 |
|
/* |
364 |
|
* Close all FDs the child shouldn't know about |
365 |
|
* |
366 |
|
* We cannot just close these filedescriptors, some random |
367 |
|
* library routine might miss it later on and wantonly close |
368 |
|
* a FD we use at that point in time. (See bug #1841). |
369 |
|
* We close the FD and replace it with /dev/null instead, |
370 |
|
* That prevents security leakage, and gives the library |
371 |
|
* code a valid FD to close when it discovers the changed |
372 |
|
* circumstances. |
373 |
|
*/ |
374 |
37313 |
closelog(); |
375 |
|
|
376 |
858634 |
for (i = STDERR_FILENO + 1; i <= CLOSE_FD_UP_TO; i++) { |
377 |
821321 |
if (vbit_test(fd_map, i)) |
378 |
112414 |
continue; |
379 |
708907 |
if (close(i) == 0) |
380 |
335542 |
VFIL_null_fd(i); |
381 |
708907 |
} |
382 |
410443 |
for (i = CLOSE_FD_UP_TO + 1; i <= CHECK_FD_UP_TO; i++) { |
383 |
373130 |
assert(close(i) == -1); |
384 |
373130 |
assert(errno == EBADF); |
385 |
373130 |
} |
386 |
|
|
387 |
37313 |
mgt_ProcTitle("Child"); |
388 |
|
|
389 |
37313 |
heritage.cls = mgt_cls; |
390 |
37313 |
heritage.ident = VSB_data(vident) + 1; |
391 |
|
|
392 |
37313 |
vext_load(); |
393 |
|
|
394 |
37313 |
STV_Init(); |
395 |
|
|
396 |
37313 |
VJ_subproc(JAIL_SUBPROC_WORKER); |
397 |
|
|
398 |
|
/* |
399 |
|
* We pass these two params because child_main needs them |
400 |
|
* well before it has found its own param struct. |
401 |
|
*/ |
402 |
74626 |
child_main(mgt_param.sigsegv_handler, |
403 |
37313 |
mgt_param.wthread_stacksize); |
404 |
|
|
405 |
|
/* |
406 |
|
* It would be natural to clean VSMW up here, but it is apt |
407 |
|
* to fail in some scenarios because of the fall-back |
408 |
|
* "rm -rf" in mgt_SHM_ChildDestroy() which is there to |
409 |
|
* catch the cases were we don't get here. |
410 |
|
*/ |
411 |
|
// VSMW_Destroy(&heritage.proc_vsmw); |
412 |
|
|
413 |
37313 |
exit(0); |
414 |
|
} |
415 |
38080 |
VJ_master(JAIL_MASTER_LOW); |
416 |
38080 |
assert(pid > 1); |
417 |
38080 |
MGT_Complain(C_DEBUG, "Child (%jd) Started", (intmax_t)pid); |
418 |
38080 |
VSC_C_mgt->child_start++; |
419 |
|
|
420 |
|
/* Close stuff the child got */ |
421 |
38080 |
closefd(&heritage.std_fd); |
422 |
|
|
423 |
38080 |
MCH_Fd_Inherit(heritage.cli_fd, NULL); |
424 |
38080 |
closefd(&heritage.cli_fd); |
425 |
|
|
426 |
38080 |
child_std_vlu = VLU_New(child_line, NULL, 0); |
427 |
38080 |
AN(child_std_vlu); |
428 |
|
|
429 |
|
/* Wait for cache/cache_cli.c::CLI_Run() to check in */ |
430 |
38080 |
bstart = mgt_param.startup_timeout >= mgt_param.cli_timeout; |
431 |
38080 |
dstart = bstart ? mgt_param.startup_timeout : mgt_param.cli_timeout; |
432 |
38080 |
t0 = VTIM_mono(); |
433 |
38080 |
u = mgt_cli_start_child(child_cli_fd, dstart); |
434 |
38080 |
if (u != CLIS_OK) { |
435 |
320 |
assert(u == CLIS_COMMS); |
436 |
320 |
if (VTIM_mono() - t0 < dstart) |
437 |
160 |
mgt_launch_err(cli, u, "Child failed on launch "); |
438 |
|
else |
439 |
160 |
mgt_launch_err(cli, u, "Child failed on launch " |
440 |
|
"within %s_timeout=%.2fs%s", |
441 |
|
bstart ? "startup" : "cli", dstart, |
442 |
|
bstart ? "" : " (tip: set startup_timeout)"); |
443 |
320 |
child_pid = pid; |
444 |
320 |
(void)kill_child(); |
445 |
320 |
mgt_reap_child(); |
446 |
320 |
child_state = CH_STOPPED; |
447 |
320 |
return; |
448 |
|
} else { |
449 |
37760 |
assert(u == CLIS_OK); |
450 |
37760 |
fprintf(stderr, "Child launched OK\n"); |
451 |
|
} |
452 |
37760 |
whining_child = C_INFO; |
453 |
|
|
454 |
37760 |
AZ(ev_listen); |
455 |
37760 |
e = VEV_Alloc(); |
456 |
37760 |
XXXAN(e); |
457 |
37760 |
e->fd = child_output; |
458 |
37760 |
e->fd_flags = VEV__RD; |
459 |
37760 |
e->name = "Child listener"; |
460 |
37760 |
e->callback = child_listener; |
461 |
37760 |
AZ(VEV_Start(mgt_evb, e)); |
462 |
37760 |
ev_listen = e; |
463 |
37760 |
AZ(ev_poker); |
464 |
37760 |
if (mgt_param.ping_interval > 0) { |
465 |
37760 |
e = VEV_Alloc(); |
466 |
37760 |
XXXAN(e); |
467 |
37760 |
e->timeout = mgt_param.ping_interval; |
468 |
37760 |
e->callback = child_poker; |
469 |
37760 |
e->name = "child poker"; |
470 |
37760 |
AZ(VEV_Start(mgt_evb, e)); |
471 |
37760 |
ev_poker = e; |
472 |
37760 |
} |
473 |
|
|
474 |
37760 |
child_pid = pid; |
475 |
|
|
476 |
37760 |
if (mgt_push_vcls(cli, &u, &p)) { |
477 |
40 |
mgt_launch_err(cli, u, "Child (%jd) Pushing vcls failed:\n%s", |
478 |
|
(intmax_t)child_pid, p); |
479 |
40 |
free(p); |
480 |
40 |
MCH_Stop_Child(); |
481 |
40 |
return; |
482 |
|
} |
483 |
|
|
484 |
37720 |
if (mgt_cli_askchild(&u, &p, "start\n")) { |
485 |
0 |
mgt_launch_err(cli, u, "Child (%jd) Acceptor start failed:\n%s", |
486 |
|
(intmax_t)child_pid, p); |
487 |
0 |
free(p); |
488 |
0 |
MCH_Stop_Child(); |
489 |
0 |
return; |
490 |
|
} |
491 |
|
|
492 |
37720 |
free(p); |
493 |
37720 |
child_state = CH_RUNNING; |
494 |
38080 |
} |
495 |
|
|
496 |
|
/*===================================================================== |
497 |
|
* Cleanup when child dies. |
498 |
|
*/ |
499 |
|
|
500 |
|
static int |
501 |
467 |
kill_child(void) |
502 |
|
{ |
503 |
|
int i, error; |
504 |
|
|
505 |
467 |
VJ_master(JAIL_MASTER_KILL); |
506 |
467 |
i = kill(child_pid, SIGQUIT); |
507 |
467 |
error = errno; |
508 |
467 |
VJ_master(JAIL_MASTER_LOW); |
509 |
467 |
errno = error; |
510 |
467 |
return (i); |
511 |
|
} |
512 |
|
|
513 |
|
static void |
514 |
38080 |
mgt_reap_child(void) |
515 |
|
{ |
516 |
|
int i; |
517 |
38080 |
int status = 0xffff; |
518 |
|
struct vsb *vsb; |
519 |
38080 |
pid_t r = 0; |
520 |
|
|
521 |
38080 |
assert(child_pid != -1); |
522 |
|
|
523 |
|
/* |
524 |
|
* Close the CLI connections |
525 |
|
* This signals orderly shut down to child |
526 |
|
*/ |
527 |
38080 |
mgt_cli_stop_child(); |
528 |
38080 |
if (child_cli_fd >= 0) |
529 |
38080 |
closefd(&child_cli_fd); |
530 |
|
|
531 |
|
/* Stop the poker */ |
532 |
38400 |
if (ev_poker != NULL) { |
533 |
37760 |
VEV_Stop(mgt_evb, ev_poker); |
534 |
37760 |
free(ev_poker); |
535 |
37760 |
ev_poker = NULL; |
536 |
37760 |
} |
537 |
|
|
538 |
|
/* Stop the listener */ |
539 |
38120 |
if (ev_listen != NULL) { |
540 |
37400 |
VEV_Stop(mgt_evb, ev_listen); |
541 |
37400 |
free(ev_listen); |
542 |
37400 |
ev_listen = NULL; |
543 |
37400 |
} |
544 |
|
|
545 |
|
/* Compose obituary */ |
546 |
38080 |
vsb = VSB_new_auto(); |
547 |
38080 |
XXXAN(vsb); |
548 |
|
|
549 |
38080 |
(void)VFIL_nonblocking(child_output); |
550 |
|
/* Wait for child to die */ |
551 |
85129 |
for (i = 0; i < mgt_param.cli_timeout * 10; i++) { |
552 |
85062 |
(void)child_listener(NULL, VEV__RD); |
553 |
85062 |
r = waitpid(child_pid, &status, WNOHANG); |
554 |
85062 |
if (r == child_pid) |
555 |
38013 |
break; |
556 |
47049 |
VTIM_sleep(0.1); |
557 |
47049 |
} |
558 |
75959 |
if (r == 0) { |
559 |
134 |
VSB_printf(vsb, "Child (%jd) not dying (waitpid = %jd)," |
560 |
67 |
" killing\n", (intmax_t)child_pid, (intmax_t)r); |
561 |
|
|
562 |
|
/* Kick it Jim... */ |
563 |
67 |
(void)kill_child(); |
564 |
67 |
r = waitpid(child_pid, &status, 0); |
565 |
67 |
} |
566 |
134 |
if (r != child_pid) |
567 |
0 |
fprintf(stderr, "WAIT 0x%jd\n", (intmax_t)r); |
568 |
38080 |
assert(r == child_pid); |
569 |
|
|
570 |
76160 |
VSB_printf(vsb, "Child (%jd) %s", (intmax_t)r, |
571 |
38080 |
status ? "died" : "ended"); |
572 |
38080 |
if (WIFEXITED(status) && WEXITSTATUS(status)) { |
573 |
160 |
VSB_printf(vsb, " status=%d", WEXITSTATUS(status)); |
574 |
160 |
exit_status |= 0x20; |
575 |
160 |
if (WEXITSTATUS(status) == 1) |
576 |
0 |
VSC_C_mgt->child_exit++; |
577 |
|
else |
578 |
160 |
VSC_C_mgt->child_stop++; |
579 |
160 |
} |
580 |
38080 |
if (WIFSIGNALED(status)) { |
581 |
600 |
VSB_printf(vsb, " signal=%d", WTERMSIG(status)); |
582 |
600 |
exit_status |= 0x40; |
583 |
600 |
VSC_C_mgt->child_died++; |
584 |
600 |
} |
585 |
|
#ifdef WCOREDUMP |
586 |
1120 |
if (WCOREDUMP(status)) { |
587 |
80 |
VSB_cat(vsb, " (core dumped)"); |
588 |
80 |
if (!MGT_FEATURE(FEATURE_NO_COREDUMP)) |
589 |
80 |
exit_status |= 0x80; |
590 |
80 |
VSC_C_mgt->child_dump++; |
591 |
80 |
} |
592 |
|
#endif |
593 |
514 |
MGT_ComplainVSB(status ? C_ERR : C_INFO, vsb); |
594 |
514 |
VSB_destroy(&vsb); |
595 |
|
|
596 |
|
/* Dispose of shared memory but evacuate panic messages first */ |
597 |
514 |
if (heritage.panic_str[0] != '\0') { |
598 |
514 |
mgt_panic_record(r); |
599 |
514 |
VSC_C_mgt->child_panic++; |
600 |
514 |
} |
601 |
|
|
602 |
668 |
mgt_SHM_ChildDestroy(); |
603 |
|
|
604 |
668 |
if (child_state == CH_RUNNING) |
605 |
360 |
child_state = CH_DIED; |
606 |
|
|
607 |
|
/* Pick up any stuff lingering on stdout/stderr */ |
608 |
38080 |
(void)child_listener(NULL, VEV__RD); |
609 |
38080 |
closefd(&child_output); |
610 |
38080 |
VLU_Destroy(&child_std_vlu); |
611 |
|
|
612 |
38080 |
child_pid = -1; |
613 |
|
|
614 |
38080 |
MGT_Complain(C_DEBUG, "Child cleanup complete"); |
615 |
|
|
616 |
|
/* XXX number of retries? interval? */ |
617 |
38080 |
for (i = 0; i < 3; i++) { |
618 |
38080 |
if (VCA_reopen_sockets() == 0) |
619 |
38080 |
break; |
620 |
|
/* error already logged */ |
621 |
0 |
(void)sleep(1); |
622 |
0 |
} |
623 |
38080 |
if (i == 3) { |
624 |
|
/* We failed to reopen our listening sockets. No choice |
625 |
|
* but to exit. */ |
626 |
0 |
MGT_Complain(C_ERR, |
627 |
|
"Could not reopen listening sockets. Exiting."); |
628 |
0 |
exit(1); |
629 |
|
} |
630 |
|
|
631 |
38080 |
if (child_state == CH_DIED && mgt_param.auto_restart) |
632 |
0 |
mgt_launch_child(NULL); |
633 |
38080 |
else if (child_state == CH_DIED) |
634 |
360 |
child_state = CH_STOPPED; |
635 |
37720 |
else if (child_state == CH_STOPPING) |
636 |
37400 |
child_state = CH_STOPPED; |
637 |
38080 |
} |
638 |
|
|
639 |
|
/*===================================================================== |
640 |
|
* If CLI communications with the child process fails, there is nothing |
641 |
|
* for us to do but to drag it behind the barn and get it over with. |
642 |
|
* |
643 |
|
* The typical case is where the child process fails to return a reply |
644 |
|
* before the cli_timeout expires. This invalidates the CLI pipes for |
645 |
|
* all future use, as we don't know if the child was just slow and the |
646 |
|
* result gets piped later on, or if the child is catatonic. |
647 |
|
*/ |
648 |
|
|
649 |
|
void |
650 |
80 |
MCH_Cli_Fail(void) |
651 |
|
{ |
652 |
|
|
653 |
80 |
if (child_state != CH_RUNNING && child_state != CH_STARTING) |
654 |
0 |
return; |
655 |
80 |
if (child_pid < 0) |
656 |
0 |
return; |
657 |
80 |
if (kill_child() == 0) |
658 |
80 |
MGT_Complain(C_ERR, "Child (%jd) not responding to CLI," |
659 |
80 |
" killed it.", (intmax_t)child_pid); |
660 |
|
else |
661 |
0 |
MGT_Complain(C_ERR, "Failed to kill child with PID %jd: %s", |
662 |
0 |
(intmax_t)child_pid, VAS_errtxt(errno)); |
663 |
80 |
} |
664 |
|
|
665 |
|
/*===================================================================== |
666 |
|
* Controlled stop of child process |
667 |
|
* |
668 |
|
* Reaping the child asks for orderly shutdown |
669 |
|
*/ |
670 |
|
|
671 |
|
void |
672 |
75120 |
MCH_Stop_Child(void) |
673 |
|
{ |
674 |
|
|
675 |
75120 |
if (child_state != CH_RUNNING && child_state != CH_STARTING) |
676 |
37720 |
return; |
677 |
|
|
678 |
37400 |
child_state = CH_STOPPING; |
679 |
|
|
680 |
37400 |
MGT_Complain(C_DEBUG, "Stopping Child"); |
681 |
|
|
682 |
37400 |
mgt_reap_child(); |
683 |
75120 |
} |
684 |
|
|
685 |
|
/*===================================================================== |
686 |
|
*/ |
687 |
|
|
688 |
|
int |
689 |
240 |
MCH_Start_Child(void) |
690 |
|
{ |
691 |
240 |
mgt_launch_child(NULL); |
692 |
240 |
if (child_state != CH_RUNNING) |
693 |
200 |
return (2); |
694 |
40 |
return (0); |
695 |
240 |
} |
696 |
|
|
697 |
|
/*==================================================================== |
698 |
|
* Query if the child is running |
699 |
|
*/ |
700 |
|
|
701 |
|
int |
702 |
595680 |
MCH_Running(void) |
703 |
|
{ |
704 |
|
|
705 |
595680 |
return (child_pid > 0); |
706 |
|
} |
707 |
|
|
708 |
|
/*===================================================================== |
709 |
|
* CLI commands |
710 |
|
*/ |
711 |
|
|
712 |
|
static void v_matchproto_(cli_func_t) |
713 |
80 |
mch_pid(struct cli *cli, const char * const *av, void *priv) |
714 |
|
{ |
715 |
|
|
716 |
80 |
(void)av; |
717 |
80 |
(void)priv; |
718 |
80 |
VCLI_Out(cli, "Master: %10jd\n", (intmax_t)getpid()); |
719 |
80 |
if (!MCH_Running()) |
720 |
40 |
return; |
721 |
40 |
VCLI_Out(cli, "Worker: %10jd\n", (intmax_t)child_pid); |
722 |
80 |
} |
723 |
|
|
724 |
|
static void v_matchproto_(cli_func_t) |
725 |
80 |
mch_pid_json(struct cli *cli, const char * const *av, void *priv) |
726 |
|
{ |
727 |
|
|
728 |
80 |
(void)priv; |
729 |
80 |
VCLI_JSON_begin(cli, 2, av); |
730 |
80 |
VCLI_Out(cli, ",\n {\"master\": %jd", (intmax_t)getpid()); |
731 |
80 |
if (MCH_Running()) |
732 |
40 |
VCLI_Out(cli, ", \"worker\": %jd", (intmax_t)child_pid); |
733 |
80 |
VCLI_Out(cli, "}"); |
734 |
80 |
VCLI_JSON_end(cli); |
735 |
80 |
} |
736 |
|
|
737 |
|
static void v_matchproto_(cli_func_t) |
738 |
38040 |
mch_cli_server_start(struct cli *cli, const char * const *av, void *priv) |
739 |
|
{ |
740 |
|
const char *err; |
741 |
|
|
742 |
38040 |
(void)av; |
743 |
38040 |
(void)priv; |
744 |
38040 |
if (child_state == CH_STOPPED) { |
745 |
37880 |
err = mgt_has_vcl(); |
746 |
37880 |
if (err == NULL) { |
747 |
37840 |
mgt_launch_child(cli); |
748 |
37840 |
} else { |
749 |
40 |
VCLI_SetResult(cli, CLIS_CANT); |
750 |
40 |
VCLI_Out(cli, "%s", err); |
751 |
|
} |
752 |
37880 |
} else { |
753 |
160 |
VCLI_SetResult(cli, CLIS_CANT); |
754 |
160 |
VCLI_Out(cli, "Child in state %s", ch_state[child_state]); |
755 |
|
} |
756 |
38040 |
} |
757 |
|
|
758 |
|
static void v_matchproto_(cli_func_t) |
759 |
40000 |
mch_cli_server_stop(struct cli *cli, const char * const *av, void *priv) |
760 |
|
{ |
761 |
|
|
762 |
40000 |
(void)av; |
763 |
40000 |
(void)priv; |
764 |
40000 |
if (child_state == CH_RUNNING) { |
765 |
37240 |
MCH_Stop_Child(); |
766 |
37240 |
} else { |
767 |
2760 |
VCLI_SetResult(cli, CLIS_CANT); |
768 |
2760 |
VCLI_Out(cli, "Child in state %s", ch_state[child_state]); |
769 |
|
} |
770 |
40000 |
} |
771 |
|
|
772 |
|
static void v_matchproto_(cli_func_t) |
773 |
77400 |
mch_cli_server_status(struct cli *cli, const char * const *av, void *priv) |
774 |
|
{ |
775 |
77400 |
(void)av; |
776 |
77400 |
(void)priv; |
777 |
77400 |
VCLI_Out(cli, "Child in state %s", ch_state[child_state]); |
778 |
77400 |
} |
779 |
|
|
780 |
|
static void v_matchproto_(cli_func_t) |
781 |
320 |
mch_cli_server_status_json(struct cli *cli, const char * const *av, void *priv) |
782 |
|
{ |
783 |
320 |
(void)priv; |
784 |
320 |
VCLI_JSON_begin(cli, 2, av); |
785 |
320 |
VCLI_Out(cli, ", "); |
786 |
320 |
VCLI_JSON_str(cli, ch_state[child_state]); |
787 |
320 |
VCLI_JSON_end(cli); |
788 |
320 |
} |
789 |
|
|
790 |
|
static struct cli_proto cli_mch[] = { |
791 |
|
{ CLICMD_SERVER_STATUS, "", mch_cli_server_status, |
792 |
|
mch_cli_server_status_json }, |
793 |
|
{ CLICMD_SERVER_START, "", mch_cli_server_start }, |
794 |
|
{ CLICMD_SERVER_STOP, "", mch_cli_server_stop }, |
795 |
|
{ CLICMD_PANIC_SHOW, "", mch_cli_panic_show, |
796 |
|
mch_cli_panic_show_json }, |
797 |
|
{ CLICMD_PANIC_CLEAR, "", mch_cli_panic_clear }, |
798 |
|
{ CLICMD_PID, "", mch_pid, mch_pid_json }, |
799 |
|
{ NULL } |
800 |
|
}; |
801 |
|
|
802 |
|
/*===================================================================== |
803 |
|
* This thread is the master thread in the management process. |
804 |
|
* The relatively simple task is to start and stop the child process |
805 |
|
* and to reincarnate it in case of trouble. |
806 |
|
*/ |
807 |
|
|
808 |
|
void |
809 |
38040 |
MCH_Init(void) |
810 |
|
{ |
811 |
|
|
812 |
38040 |
VCLS_AddFunc(mgt_cls, MCF_AUTH, cli_mch); |
813 |
38040 |
} |