| | varnish-cache/bin/varnishd/mgt/mgt_child.c |
0 |
|
/*- |
1 |
|
* Copyright (c) 2006 Verdens Gang AS |
2 |
|
* Copyright (c) 2006-2015 Varnish Software AS |
3 |
|
* All rights reserved. |
4 |
|
* |
5 |
|
* Author: Poul-Henning Kamp <phk@phk.freebsd.dk> |
6 |
|
* |
7 |
|
* SPDX-License-Identifier: BSD-2-Clause |
8 |
|
* |
9 |
|
* Redistribution and use in source and binary forms, with or without |
10 |
|
* modification, are permitted provided that the following conditions |
11 |
|
* are met: |
12 |
|
* 1. Redistributions of source code must retain the above copyright |
13 |
|
* notice, this list of conditions and the following disclaimer. |
14 |
|
* 2. Redistributions in binary form must reproduce the above copyright |
15 |
|
* notice, this list of conditions and the following disclaimer in the |
16 |
|
* documentation and/or other materials provided with the distribution. |
17 |
|
* |
18 |
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
19 |
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 |
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
21 |
|
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE |
22 |
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 |
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
24 |
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
25 |
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
26 |
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
27 |
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
28 |
|
* SUCH DAMAGE. |
29 |
|
* |
30 |
|
* The mechanics of handling the child process |
31 |
|
*/ |
32 |
|
|
33 |
|
#include "config.h" |
34 |
|
|
35 |
|
#include <sys/types.h> |
36 |
|
|
37 |
|
#include <poll.h> |
38 |
|
#include <stdarg.h> |
39 |
|
#include <stdio.h> |
40 |
|
#include <string.h> |
41 |
|
#include <syslog.h> |
42 |
|
#include <unistd.h> |
43 |
|
#include <sys/types.h> |
44 |
|
#include <sys/socket.h> |
45 |
|
#include <sys/time.h> |
46 |
|
#include <sys/resource.h> |
47 |
|
|
48 |
|
#include "mgt.h" |
49 |
|
|
50 |
|
#include "vapi/vsig.h" |
51 |
|
|
52 |
|
#include "vbm.h" |
53 |
|
#include "vcli_serve.h" |
54 |
|
#include "vev.h" |
55 |
|
#include "vfil.h" |
56 |
|
#include "vlu.h" |
57 |
|
#include "vtim.h" |
58 |
|
|
59 |
|
#include "common/heritage.h" |
60 |
|
|
61 |
|
static pid_t child_pid = -1; |
62 |
|
|
63 |
|
static struct vbitmap *fd_map; |
64 |
|
|
65 |
|
static int child_cli_fd = -1; |
66 |
|
static int child_output = -1; |
67 |
|
|
68 |
|
static enum { |
69 |
|
CH_STOPPED = 0, |
70 |
|
CH_STARTING = 1, |
71 |
|
CH_RUNNING = 2, |
72 |
|
CH_STOPPING = 3, |
73 |
|
CH_DIED = 4 |
74 |
|
} child_state = CH_STOPPED; |
75 |
|
|
76 |
|
static const char * const ch_state[] = { |
77 |
|
[CH_STOPPED] = "stopped", |
78 |
|
[CH_STARTING] = "starting", |
79 |
|
[CH_RUNNING] = "running", |
80 |
|
[CH_STOPPING] = "stopping", |
81 |
|
[CH_DIED] = "died, (restarting)", |
82 |
|
}; |
83 |
|
|
84 |
|
static struct vev *ev_poker; |
85 |
|
static struct vev *ev_listen; |
86 |
|
static struct vlu *child_std_vlu; |
87 |
|
|
88 |
|
static struct vsb *child_panic = NULL; |
89 |
|
|
90 |
|
static void mgt_reap_child(void); |
91 |
|
static int kill_child(void); |
92 |
|
|
93 |
|
/*===================================================================== |
94 |
|
* Panic string evacuation and handling |
95 |
|
*/ |
96 |
|
|
97 |
|
static void |
98 |
298 |
mgt_panic_record(pid_t r) |
99 |
|
{ |
100 |
|
char time_str[30]; |
101 |
|
|
102 |
298 |
if (child_panic != NULL) |
103 |
0 |
VSB_destroy(&child_panic); |
104 |
298 |
child_panic = VSB_new_auto(); |
105 |
298 |
AN(child_panic); |
106 |
298 |
VTIM_format(VTIM_real(), time_str); |
107 |
298 |
VSB_printf(child_panic, "Panic at: %s\n", time_str); |
108 |
596 |
VSB_quote(child_panic, heritage.panic_str, |
109 |
298 |
strnlen(heritage.panic_str, heritage.panic_str_len), |
110 |
|
VSB_QUOTE_NONL); |
111 |
298 |
AZ(VSB_finish(child_panic)); |
112 |
298 |
MGT_Complain(C_ERR, "Child (%jd) %s", |
113 |
298 |
(intmax_t)r, VSB_data(child_panic)); |
114 |
298 |
} |
115 |
|
|
116 |
|
static void |
117 |
250 |
mgt_panic_clear(void) |
118 |
|
{ |
119 |
250 |
VSB_destroy(&child_panic); |
120 |
250 |
} |
121 |
|
|
122 |
|
static void |
123 |
22800 |
cli_panic_show(struct cli *cli, const char * const *av, int json) |
124 |
|
{ |
125 |
22800 |
if (!child_panic) { |
126 |
22550 |
VCLI_SetResult(cli, CLIS_CANT); |
127 |
22550 |
VCLI_Out(cli, |
128 |
|
"Child has not panicked or panic has been cleared"); |
129 |
22550 |
return; |
130 |
|
} |
131 |
|
|
132 |
250 |
if (!json) { |
133 |
150 |
VCLI_Out(cli, "%s\n", VSB_data(child_panic)); |
134 |
150 |
return; |
135 |
|
} |
136 |
|
|
137 |
100 |
VCLI_JSON_begin(cli, 2, av); |
138 |
100 |
VCLI_Out(cli, ",\n"); |
139 |
100 |
VCLI_JSON_str(cli, VSB_data(child_panic)); |
140 |
100 |
VCLI_JSON_end(cli); |
141 |
22800 |
} |
142 |
|
|
143 |
|
static void v_matchproto_(cli_func_t) |
144 |
22700 |
mch_cli_panic_show(struct cli *cli, const char * const *av, void *priv) |
145 |
|
{ |
146 |
22700 |
(void)priv; |
147 |
22700 |
cli_panic_show(cli, av, 0); |
148 |
22700 |
} |
149 |
|
|
150 |
|
static void v_matchproto_(cli_func_t) |
151 |
100 |
mch_cli_panic_show_json(struct cli *cli, const char * const *av, void *priv) |
152 |
|
{ |
153 |
100 |
(void)priv; |
154 |
100 |
cli_panic_show(cli, av, 1); |
155 |
100 |
} |
156 |
|
|
157 |
|
static void v_matchproto_(cli_func_t) |
158 |
325 |
mch_cli_panic_clear(struct cli *cli, const char * const *av, void *priv) |
159 |
|
{ |
160 |
325 |
(void)priv; |
161 |
|
|
162 |
325 |
if (av[2] != NULL && strcmp(av[2], "-z")) { |
163 |
0 |
VCLI_SetResult(cli, CLIS_PARAM); |
164 |
0 |
VCLI_Out(cli, "Unknown parameter \"%s\".", av[2]); |
165 |
0 |
return; |
166 |
50 |
} else if (av[2] != NULL) { |
167 |
50 |
VSC_C_mgt->child_panic = 0; |
168 |
50 |
if (child_panic == NULL) |
169 |
25 |
return; |
170 |
25 |
} |
171 |
300 |
if (child_panic == NULL) { |
172 |
50 |
VCLI_SetResult(cli, CLIS_CANT); |
173 |
50 |
VCLI_Out(cli, "No panic to clear"); |
174 |
50 |
return; |
175 |
|
} |
176 |
250 |
mgt_panic_clear(); |
177 |
325 |
} |
178 |
|
|
179 |
|
/*===================================================================== |
180 |
|
* Track the highest file descriptor the parent knows is being used. |
181 |
|
* |
182 |
|
* This allows the child process to clean/close only a small fraction |
183 |
|
* of the possible file descriptors after exec(2). |
184 |
|
* |
185 |
|
* This is likely to a bit on the low side, as libc and other libraries |
186 |
|
* has a tendency to cache file descriptors (syslog, resolver, etc.) |
187 |
|
* so we add a margin of 10 fds. |
188 |
|
* |
189 |
|
* For added safety, we check that we see no file descriptor open for |
190 |
|
* another margin above the limit for which we close by design |
191 |
|
*/ |
192 |
|
|
193 |
|
static int mgt_max_fd; |
194 |
|
|
195 |
|
#define CLOSE_FD_UP_TO (mgt_max_fd + 10) |
196 |
|
#define CHECK_FD_UP_TO (CLOSE_FD_UP_TO + 10) |
197 |
|
|
198 |
|
void |
199 |
234400 |
MCH_TrackHighFd(int fd) |
200 |
|
{ |
201 |
|
/* |
202 |
|
* Assert > 0, to catch bogus opens, we know where stdin goes |
203 |
|
* in the master process. |
204 |
|
*/ |
205 |
234400 |
assert(fd > 0); |
206 |
234400 |
mgt_max_fd = vmax(mgt_max_fd, fd); |
207 |
234400 |
} |
208 |
|
|
209 |
|
/*-------------------------------------------------------------------- |
210 |
|
* Keep track of which filedescriptors the child should inherit and |
211 |
|
* which should be closed after fork() |
212 |
|
*/ |
213 |
|
|
214 |
|
void |
215 |
139575 |
MCH_Fd_Inherit(int fd, const char *what) |
216 |
|
{ |
217 |
|
|
218 |
139575 |
assert(fd >= 0); |
219 |
|
// XXX why? |
220 |
139575 |
if (fd > 0) |
221 |
139575 |
MCH_TrackHighFd(fd); |
222 |
255900 |
if (fd_map == NULL) |
223 |
23250 |
fd_map = vbit_new(128); |
224 |
139575 |
AN(fd_map); |
225 |
139575 |
if (what != NULL) |
226 |
93700 |
vbit_set(fd_map, fd); |
227 |
|
else |
228 |
45875 |
vbit_clr(fd_map, fd); |
229 |
139575 |
} |
230 |
|
|
231 |
|
/*===================================================================== |
232 |
|
* Listen to stdout+stderr from the child |
233 |
|
*/ |
234 |
|
|
235 |
|
static const char *whining_child = C_ERR; |
236 |
|
|
237 |
|
static int v_matchproto_(vlu_f) |
238 |
63478 |
child_line(void *priv, const char *p) |
239 |
|
{ |
240 |
63478 |
(void)priv; |
241 |
|
|
242 |
63478 |
MGT_Complain(whining_child, "Child (%jd) said %s", (intmax_t)child_pid, p); |
243 |
63478 |
return (0); |
244 |
|
} |
245 |
|
|
246 |
|
/*-------------------------------------------------------------------- |
247 |
|
* NB: Notice cleanup call from mgt_reap_child() |
248 |
|
*/ |
249 |
|
|
250 |
|
static int v_matchproto_(vev_cb_f) |
251 |
100733 |
child_listener(const struct vev *e, int what) |
252 |
|
{ |
253 |
|
|
254 |
100733 |
if ((what & ~VEV__RD) || VLU_Fd(child_std_vlu, child_output)) { |
255 |
200 |
ev_listen = NULL; |
256 |
200 |
if (e != NULL) |
257 |
200 |
mgt_reap_child(); |
258 |
51127 |
return (1); |
259 |
|
} |
260 |
49606 |
return (0); |
261 |
100733 |
} |
262 |
|
|
263 |
|
/*===================================================================== |
264 |
|
* Periodically poke the child, to see that it still lives |
265 |
|
*/ |
266 |
|
|
267 |
|
static int v_matchproto_(vev_cb_f) |
268 |
4376 |
child_poker(const struct vev *e, int what) |
269 |
|
{ |
270 |
4376 |
char *r = NULL; |
271 |
|
unsigned status; |
272 |
|
|
273 |
4376 |
(void)e; |
274 |
4376 |
(void)what; |
275 |
4376 |
if (child_state != CH_RUNNING) |
276 |
0 |
return (1); |
277 |
4376 |
if (child_pid < 0) |
278 |
0 |
return (0); |
279 |
4376 |
if (mgt_cli_askchild(&status, &r, "ping\n") || strncmp("PONG ", r, 5)) { |
280 |
0 |
MGT_Complain(C_ERR, "Unexpected reply from ping: %u %s", |
281 |
0 |
status, r); |
282 |
0 |
if (status != CLIS_COMMS) |
283 |
0 |
MCH_Cli_Fail(); |
284 |
0 |
} |
285 |
4376 |
free(r); |
286 |
4376 |
return (0); |
287 |
4376 |
} |
288 |
|
|
289 |
|
/*===================================================================== |
290 |
|
* Launch the child process |
291 |
|
*/ |
292 |
|
|
293 |
|
#define mgt_launch_err(cli, status, ...) do { \ |
294 |
|
MGT_Complain(C_ERR, __VA_ARGS__); \ |
295 |
|
if (cli == NULL) \ |
296 |
|
break; \ |
297 |
|
VCLI_Out(cli, __VA_ARGS__); \ |
298 |
|
VCLI_SetResult(cli, status); \ |
299 |
|
} while (0) |
300 |
|
|
301 |
|
static void |
302 |
22775 |
mgt_launch_child(struct cli *cli) |
303 |
|
{ |
304 |
|
pid_t pid; |
305 |
|
unsigned u; |
306 |
|
char *p; |
307 |
|
struct vev *e; |
308 |
|
int i, cp[2]; |
309 |
|
struct rlimit rl[1]; |
310 |
|
vtim_dur dstart; |
311 |
|
int bstart; |
312 |
|
vtim_mono t0; |
313 |
|
|
314 |
22775 |
if (child_state != CH_STOPPED && child_state != CH_DIED) |
315 |
0 |
return; |
316 |
|
|
317 |
22775 |
child_state = CH_STARTING; |
318 |
|
|
319 |
|
/* Open pipe for mgt->child CLI */ |
320 |
22775 |
AZ(socketpair(AF_UNIX, SOCK_STREAM, 0, cp)); |
321 |
22775 |
heritage.cli_fd = cp[0]; |
322 |
22775 |
assert(cp[0] > STDERR_FILENO); // See #2782 |
323 |
22775 |
assert(cp[1] > STDERR_FILENO); |
324 |
22775 |
MCH_Fd_Inherit(heritage.cli_fd, "cli_fd"); |
325 |
22775 |
child_cli_fd = cp[1]; |
326 |
|
|
327 |
|
/* |
328 |
|
* Open pipe for child stdout/err |
329 |
|
* NB: not inherited, because we dup2() it to stdout/stderr in child |
330 |
|
*/ |
331 |
22775 |
AZ(pipe(cp)); |
332 |
22775 |
heritage.std_fd = cp[1]; |
333 |
22775 |
child_output = cp[0]; |
334 |
|
|
335 |
22775 |
mgt_SHM_ChildNew(); |
336 |
|
|
337 |
22775 |
AN(heritage.param); |
338 |
22775 |
AN(heritage.panic_str); |
339 |
22775 |
VJ_master(JAIL_MASTER_SYSTEM); |
340 |
22775 |
if ((pid = fork()) < 0) { |
341 |
0 |
VJ_master(JAIL_MASTER_LOW); |
342 |
0 |
perror("Could not fork child"); |
343 |
0 |
exit(1); // XXX Harsh ? |
344 |
|
} |
345 |
45072 |
if (pid == 0) { |
346 |
|
|
347 |
22297 |
if (MGT_FEATURE(FEATURE_NO_COREDUMP)) { |
348 |
349 |
memset(rl, 0, sizeof *rl); |
349 |
349 |
rl->rlim_cur = 0; |
350 |
349 |
AZ(setrlimit(RLIMIT_CORE, rl)); |
351 |
349 |
} |
352 |
|
|
353 |
|
/* Redirect stdin/out/err */ |
354 |
22297 |
VFIL_null_fd(STDIN_FILENO); |
355 |
22297 |
assert(dup2(heritage.std_fd, STDOUT_FILENO) == STDOUT_FILENO); |
356 |
22297 |
assert(dup2(heritage.std_fd, STDERR_FILENO) == STDERR_FILENO); |
357 |
|
|
358 |
22297 |
setbuf(stdout, NULL); |
359 |
22297 |
setbuf(stderr, NULL); |
360 |
22297 |
printf("Child starts\n"); |
361 |
|
|
362 |
|
/* |
363 |
|
* Close all FDs the child shouldn't know about |
364 |
|
* |
365 |
|
* We cannot just close these filedescriptors, some random |
366 |
|
* library routine might miss it later on and wantonly close |
367 |
|
* a FD we use at that point in time. (See bug #1841). |
368 |
|
* We close the FD and replace it with /dev/null instead, |
369 |
|
* That prevents security leakage, and gives the library |
370 |
|
* code a valid FD to close when it discovers the changed |
371 |
|
* circumstances. |
372 |
|
*/ |
373 |
22297 |
closelog(); |
374 |
|
|
375 |
513104 |
for (i = STDERR_FILENO + 1; i <= CLOSE_FD_UP_TO; i++) { |
376 |
490807 |
if (vbit_test(fd_map, i)) |
377 |
67189 |
continue; |
378 |
423618 |
if (close(i) == 0) |
379 |
200500 |
VFIL_null_fd(i); |
380 |
423618 |
} |
381 |
245267 |
for (i = CLOSE_FD_UP_TO + 1; i <= CHECK_FD_UP_TO; i++) { |
382 |
222970 |
assert(close(i) == -1); |
383 |
222970 |
assert(errno == EBADF); |
384 |
222970 |
} |
385 |
|
|
386 |
22297 |
mgt_ProcTitle("Child"); |
387 |
|
|
388 |
22297 |
heritage.cls = mgt_cls; |
389 |
22297 |
heritage.ident = VSB_data(vident) + 1; |
390 |
|
|
391 |
22297 |
vext_load(); |
392 |
|
|
393 |
22297 |
STV_Init(); |
394 |
|
|
395 |
22297 |
VJ_subproc(JAIL_SUBPROC_WORKER); |
396 |
|
|
397 |
|
/* |
398 |
|
* We pass these two params because child_main needs them |
399 |
|
* well before it has found its own param struct. |
400 |
|
*/ |
401 |
44594 |
child_main(mgt_param.sigsegv_handler, |
402 |
22297 |
mgt_param.wthread_stacksize); |
403 |
|
|
404 |
|
/* |
405 |
|
* It would be natural to clean VSMW up here, but it is apt |
406 |
|
* to fail in some scenarios because of the fall-back |
407 |
|
* "rm -rf" in mgt_SHM_ChildDestroy() which is there to |
408 |
|
* catch the cases were we don't get here. |
409 |
|
*/ |
410 |
|
// VSMW_Destroy(&heritage.proc_vsmw); |
411 |
|
|
412 |
22297 |
exit(0); |
413 |
|
} |
414 |
22775 |
VJ_master(JAIL_MASTER_LOW); |
415 |
22775 |
assert(pid > 1); |
416 |
22775 |
MGT_Complain(C_DEBUG, "Child (%jd) Started", (intmax_t)pid); |
417 |
22775 |
VSC_C_mgt->child_start++; |
418 |
|
|
419 |
|
/* Close stuff the child got */ |
420 |
22775 |
closefd(&heritage.std_fd); |
421 |
|
|
422 |
22775 |
MCH_Fd_Inherit(heritage.cli_fd, NULL); |
423 |
22775 |
closefd(&heritage.cli_fd); |
424 |
|
|
425 |
22775 |
child_std_vlu = VLU_New(child_line, NULL, 0); |
426 |
22775 |
AN(child_std_vlu); |
427 |
|
|
428 |
|
/* Wait for cache/cache_cli.c::CLI_Run() to check in */ |
429 |
22775 |
bstart = mgt_param.startup_timeout >= mgt_param.cli_timeout; |
430 |
22775 |
dstart = bstart ? mgt_param.startup_timeout : mgt_param.cli_timeout; |
431 |
22775 |
t0 = VTIM_mono(); |
432 |
22775 |
u = mgt_cli_start_child(child_cli_fd, dstart); |
433 |
22775 |
if (u != CLIS_OK) { |
434 |
200 |
assert(u == CLIS_COMMS); |
435 |
200 |
if (VTIM_mono() - t0 < dstart) |
436 |
100 |
mgt_launch_err(cli, u, "Child failed on launch "); |
437 |
|
else |
438 |
100 |
mgt_launch_err(cli, u, "Child failed on launch " |
439 |
|
"within %s_timeout=%.2fs%s", |
440 |
|
bstart ? "startup" : "cli", dstart, |
441 |
|
bstart ? "" : " (tip: set startup_timeout)"); |
442 |
200 |
child_pid = pid; |
443 |
200 |
(void)kill_child(); |
444 |
200 |
mgt_reap_child(); |
445 |
200 |
child_state = CH_STOPPED; |
446 |
200 |
return; |
447 |
|
} else { |
448 |
22575 |
assert(u == CLIS_OK); |
449 |
22575 |
fprintf(stderr, "Child launched OK\n"); |
450 |
|
} |
451 |
22575 |
whining_child = C_INFO; |
452 |
|
|
453 |
22575 |
AZ(ev_listen); |
454 |
22575 |
e = VEV_Alloc(); |
455 |
22575 |
XXXAN(e); |
456 |
22575 |
e->fd = child_output; |
457 |
22575 |
e->fd_flags = VEV__RD; |
458 |
22575 |
e->name = "Child listener"; |
459 |
22575 |
e->callback = child_listener; |
460 |
22575 |
AZ(VEV_Start(mgt_evb, e)); |
461 |
22575 |
ev_listen = e; |
462 |
22575 |
AZ(ev_poker); |
463 |
22575 |
if (mgt_param.ping_interval > 0) { |
464 |
22575 |
e = VEV_Alloc(); |
465 |
22575 |
XXXAN(e); |
466 |
22575 |
e->timeout = mgt_param.ping_interval; |
467 |
22575 |
e->callback = child_poker; |
468 |
22575 |
e->name = "child poker"; |
469 |
22575 |
AZ(VEV_Start(mgt_evb, e)); |
470 |
22575 |
ev_poker = e; |
471 |
22575 |
} |
472 |
|
|
473 |
22575 |
child_pid = pid; |
474 |
|
|
475 |
22575 |
if (mgt_push_vcls(cli, &u, &p)) { |
476 |
25 |
mgt_launch_err(cli, u, "Child (%jd) Pushing vcls failed:\n%s", |
477 |
|
(intmax_t)child_pid, p); |
478 |
25 |
free(p); |
479 |
25 |
MCH_Stop_Child(); |
480 |
25 |
return; |
481 |
|
} |
482 |
|
|
483 |
22550 |
if (mgt_cli_askchild(&u, &p, "start\n")) { |
484 |
0 |
mgt_launch_err(cli, u, "Child (%jd) Acceptor start failed:\n%s", |
485 |
|
(intmax_t)child_pid, p); |
486 |
0 |
free(p); |
487 |
0 |
MCH_Stop_Child(); |
488 |
0 |
return; |
489 |
|
} |
490 |
|
|
491 |
22550 |
free(p); |
492 |
22550 |
child_state = CH_RUNNING; |
493 |
22775 |
} |
494 |
|
|
495 |
|
/*===================================================================== |
496 |
|
* Cleanup when child dies. |
497 |
|
*/ |
498 |
|
|
499 |
|
static int |
500 |
292 |
kill_child(void) |
501 |
|
{ |
502 |
|
int i, error; |
503 |
|
|
504 |
292 |
VJ_master(JAIL_MASTER_KILL); |
505 |
292 |
i = kill(child_pid, SIGQUIT); |
506 |
292 |
error = errno; |
507 |
292 |
VJ_master(JAIL_MASTER_LOW); |
508 |
292 |
errno = error; |
509 |
292 |
return (i); |
510 |
|
} |
511 |
|
|
512 |
|
static void |
513 |
22775 |
mgt_reap_child(void) |
514 |
|
{ |
515 |
|
int i; |
516 |
22775 |
int status = 0xffff; |
517 |
|
struct vsb *vsb; |
518 |
22775 |
pid_t r = 0; |
519 |
|
|
520 |
22775 |
assert(child_pid != -1); |
521 |
|
|
522 |
|
/* |
523 |
|
* Close the CLI connections |
524 |
|
* This signals orderly shut down to child |
525 |
|
*/ |
526 |
22775 |
mgt_cli_stop_child(); |
527 |
22775 |
if (child_cli_fd >= 0) |
528 |
22775 |
closefd(&child_cli_fd); |
529 |
|
|
530 |
|
/* Stop the poker */ |
531 |
22975 |
if (ev_poker != NULL) { |
532 |
22575 |
VEV_Stop(mgt_evb, ev_poker); |
533 |
22575 |
free(ev_poker); |
534 |
22575 |
ev_poker = NULL; |
535 |
22575 |
} |
536 |
|
|
537 |
|
/* Stop the listener */ |
538 |
22775 |
if (ev_listen != NULL) { |
539 |
22375 |
VEV_Stop(mgt_evb, ev_listen); |
540 |
22375 |
free(ev_listen); |
541 |
22375 |
ev_listen = NULL; |
542 |
22375 |
} |
543 |
|
|
544 |
|
/* Compose obituary */ |
545 |
22775 |
vsb = VSB_new_auto(); |
546 |
22775 |
XXXAN(vsb); |
547 |
|
|
548 |
22775 |
(void)VFIL_nonblocking(child_output); |
549 |
|
/* Wait for child to die */ |
550 |
51216 |
for (i = 0; i < mgt_param.cli_timeout * 10; i++) { |
551 |
51174 |
(void)child_listener(NULL, VEV__RD); |
552 |
51174 |
r = waitpid(child_pid, &status, WNOHANG); |
553 |
51174 |
if (r == child_pid) |
554 |
22733 |
break; |
555 |
28441 |
(void)usleep(100000); |
556 |
28441 |
} |
557 |
45424 |
if (r == 0) { |
558 |
84 |
VSB_printf(vsb, "Child (%jd) not dying (waitpid = %jd)," |
559 |
42 |
" killing\n", (intmax_t)child_pid, (intmax_t)r); |
560 |
|
|
561 |
|
/* Kick it Jim... */ |
562 |
42 |
(void)kill_child(); |
563 |
42 |
r = waitpid(child_pid, &status, 0); |
564 |
42 |
} |
565 |
84 |
if (r != child_pid) |
566 |
0 |
fprintf(stderr, "WAIT 0x%jd\n", (intmax_t)r); |
567 |
22775 |
assert(r == child_pid); |
568 |
|
|
569 |
45550 |
VSB_printf(vsb, "Child (%jd) %s", (intmax_t)r, |
570 |
22775 |
status ? "died" : "ended"); |
571 |
22775 |
if (WIFEXITED(status) && WEXITSTATUS(status)) { |
572 |
100 |
VSB_printf(vsb, " status=%d", WEXITSTATUS(status)); |
573 |
100 |
exit_status |= 0x20; |
574 |
100 |
if (WEXITSTATUS(status) == 1) |
575 |
0 |
VSC_C_mgt->child_exit++; |
576 |
|
else |
577 |
100 |
VSC_C_mgt->child_stop++; |
578 |
100 |
} |
579 |
22775 |
if (WIFSIGNALED(status)) { |
580 |
350 |
VSB_printf(vsb, " signal=%d", WTERMSIG(status)); |
581 |
350 |
exit_status |= 0x40; |
582 |
350 |
VSC_C_mgt->child_died++; |
583 |
350 |
} |
584 |
|
#ifdef WCOREDUMP |
585 |
650 |
if (WCOREDUMP(status)) { |
586 |
50 |
VSB_cat(vsb, " (core dumped)"); |
587 |
50 |
if (!MGT_FEATURE(FEATURE_NO_COREDUMP)) |
588 |
50 |
exit_status |= 0x80; |
589 |
50 |
VSC_C_mgt->child_dump++; |
590 |
50 |
} |
591 |
|
#endif |
592 |
22775 |
AZ(VSB_finish(vsb)); |
593 |
22775 |
MGT_Complain(status ? C_ERR : C_INFO, "%s", VSB_data(vsb)); |
594 |
22775 |
VSB_destroy(&vsb); |
595 |
|
|
596 |
|
/* Dispose of shared memory but evacuate panic messages first */ |
597 |
22775 |
if (heritage.panic_str[0] != '\0') { |
598 |
298 |
mgt_panic_record(r); |
599 |
298 |
VSC_C_mgt->child_panic++; |
600 |
298 |
} |
601 |
|
|
602 |
396 |
mgt_SHM_ChildDestroy(); |
603 |
|
|
604 |
396 |
if (child_state == CH_RUNNING) |
605 |
200 |
child_state = CH_DIED; |
606 |
|
|
607 |
|
/* Pick up any stuff lingering on stdout/stderr */ |
608 |
22775 |
(void)child_listener(NULL, VEV__RD); |
609 |
22775 |
closefd(&child_output); |
610 |
22775 |
VLU_Destroy(&child_std_vlu); |
611 |
|
|
612 |
22775 |
child_pid = -1; |
613 |
|
|
614 |
22775 |
MGT_Complain(C_DEBUG, "Child cleanup complete"); |
615 |
|
|
616 |
|
/* XXX number of retries? interval? */ |
617 |
22775 |
for (i = 0; i < 3; i++) { |
618 |
22775 |
if (MAC_reopen_sockets() == 0) |
619 |
22775 |
break; |
620 |
|
/* error already logged */ |
621 |
0 |
(void)sleep(1); |
622 |
0 |
} |
623 |
22775 |
if (i == 3) { |
624 |
|
/* We failed to reopen our listening sockets. No choice |
625 |
|
* but to exit. */ |
626 |
0 |
MGT_Complain(C_ERR, |
627 |
|
"Could not reopen listening sockets. Exiting."); |
628 |
0 |
exit(1); |
629 |
|
} |
630 |
|
|
631 |
22775 |
if (child_state == CH_DIED && mgt_param.auto_restart) |
632 |
0 |
mgt_launch_child(NULL); |
633 |
22775 |
else if (child_state == CH_DIED) |
634 |
200 |
child_state = CH_STOPPED; |
635 |
22575 |
else if (child_state == CH_STOPPING) |
636 |
22375 |
child_state = CH_STOPPED; |
637 |
22775 |
} |
638 |
|
|
639 |
|
/*===================================================================== |
640 |
|
* If CLI communications with the child process fails, there is nothing |
641 |
|
* for us to do but to drag it behind the barn and get it over with. |
642 |
|
* |
643 |
|
* The typical case is where the child process fails to return a reply |
644 |
|
* before the cli_timeout expires. This invalidates the CLI pipes for |
645 |
|
* all future use, as we don't know if the child was just slow and the |
646 |
|
* result gets piped later on, or if the child is catatonic. |
647 |
|
*/ |
648 |
|
|
649 |
|
void |
650 |
50 |
MCH_Cli_Fail(void) |
651 |
|
{ |
652 |
|
|
653 |
50 |
if (child_state != CH_RUNNING && child_state != CH_STARTING) |
654 |
0 |
return; |
655 |
50 |
if (child_pid < 0) |
656 |
0 |
return; |
657 |
50 |
if (kill_child() == 0) |
658 |
50 |
MGT_Complain(C_ERR, "Child (%jd) not responding to CLI," |
659 |
50 |
" killed it.", (intmax_t)child_pid); |
660 |
|
else |
661 |
0 |
MGT_Complain(C_ERR, "Failed to kill child with PID %jd: %s", |
662 |
0 |
(intmax_t)child_pid, VAS_errtxt(errno)); |
663 |
50 |
} |
664 |
|
|
665 |
|
/*===================================================================== |
666 |
|
* Controlled stop of child process |
667 |
|
* |
668 |
|
* Reaping the child asks for orderly shutdown |
669 |
|
*/ |
670 |
|
|
671 |
|
void |
672 |
45075 |
MCH_Stop_Child(void) |
673 |
|
{ |
674 |
|
|
675 |
45075 |
if (child_state != CH_RUNNING && child_state != CH_STARTING) |
676 |
22700 |
return; |
677 |
|
|
678 |
22375 |
child_state = CH_STOPPING; |
679 |
|
|
680 |
22375 |
MGT_Complain(C_DEBUG, "Stopping Child"); |
681 |
|
|
682 |
22375 |
mgt_reap_child(); |
683 |
45075 |
} |
684 |
|
|
685 |
|
/*===================================================================== |
686 |
|
*/ |
687 |
|
|
688 |
|
int |
689 |
150 |
MCH_Start_Child(void) |
690 |
|
{ |
691 |
150 |
mgt_launch_child(NULL); |
692 |
150 |
if (child_state != CH_RUNNING) |
693 |
125 |
return (2); |
694 |
25 |
return (0); |
695 |
150 |
} |
696 |
|
|
697 |
|
/*==================================================================== |
698 |
|
* Query if the child is running |
699 |
|
*/ |
700 |
|
|
701 |
|
int |
702 |
357600 |
MCH_Running(void) |
703 |
|
{ |
704 |
|
|
705 |
357600 |
return (child_pid > 0); |
706 |
|
} |
707 |
|
|
708 |
|
/*===================================================================== |
709 |
|
* CLI commands |
710 |
|
*/ |
711 |
|
|
712 |
|
static void v_matchproto_(cli_func_t) |
713 |
50 |
mch_pid(struct cli *cli, const char * const *av, void *priv) |
714 |
|
{ |
715 |
|
|
716 |
50 |
(void)av; |
717 |
50 |
(void)priv; |
718 |
50 |
VCLI_Out(cli, "Master: %10jd\n", (intmax_t)getpid()); |
719 |
50 |
if (!MCH_Running()) |
720 |
25 |
return; |
721 |
25 |
VCLI_Out(cli, "Worker: %10jd\n", (intmax_t)child_pid); |
722 |
50 |
} |
723 |
|
|
724 |
|
static void v_matchproto_(cli_func_t) |
725 |
50 |
mch_pid_json(struct cli *cli, const char * const *av, void *priv) |
726 |
|
{ |
727 |
|
|
728 |
50 |
(void)priv; |
729 |
50 |
VCLI_JSON_begin(cli, 2, av); |
730 |
50 |
VCLI_Out(cli, ",\n {\"master\": %jd", (intmax_t)getpid()); |
731 |
50 |
if (MCH_Running()) |
732 |
25 |
VCLI_Out(cli, ", \"worker\": %jd", (intmax_t)child_pid); |
733 |
50 |
VCLI_Out(cli, "}"); |
734 |
50 |
VCLI_JSON_end(cli); |
735 |
50 |
} |
736 |
|
|
737 |
|
static void v_matchproto_(cli_func_t) |
738 |
22750 |
mch_cli_server_start(struct cli *cli, const char * const *av, void *priv) |
739 |
|
{ |
740 |
|
const char *err; |
741 |
|
|
742 |
22750 |
(void)av; |
743 |
22750 |
(void)priv; |
744 |
22750 |
if (child_state == CH_STOPPED) { |
745 |
22650 |
err = mgt_has_vcl(); |
746 |
22650 |
if (err == NULL) { |
747 |
22625 |
mgt_launch_child(cli); |
748 |
22625 |
} else { |
749 |
25 |
VCLI_SetResult(cli, CLIS_CANT); |
750 |
25 |
VCLI_Out(cli, "%s", err); |
751 |
|
} |
752 |
22650 |
} else { |
753 |
100 |
VCLI_SetResult(cli, CLIS_CANT); |
754 |
100 |
VCLI_Out(cli, "Child in state %s", ch_state[child_state]); |
755 |
|
} |
756 |
22750 |
} |
757 |
|
|
758 |
|
static void v_matchproto_(cli_func_t) |
759 |
23950 |
mch_cli_server_stop(struct cli *cli, const char * const *av, void *priv) |
760 |
|
{ |
761 |
|
|
762 |
23950 |
(void)av; |
763 |
23950 |
(void)priv; |
764 |
23950 |
if (child_state == CH_RUNNING) { |
765 |
22275 |
MCH_Stop_Child(); |
766 |
22275 |
} else { |
767 |
1675 |
VCLI_SetResult(cli, CLIS_CANT); |
768 |
1675 |
VCLI_Out(cli, "Child in state %s", ch_state[child_state]); |
769 |
|
} |
770 |
23950 |
} |
771 |
|
|
772 |
|
static void v_matchproto_(cli_func_t) |
773 |
46150 |
mch_cli_server_status(struct cli *cli, const char * const *av, void *priv) |
774 |
|
{ |
775 |
46150 |
(void)av; |
776 |
46150 |
(void)priv; |
777 |
46150 |
VCLI_Out(cli, "Child in state %s", ch_state[child_state]); |
778 |
46150 |
} |
779 |
|
|
780 |
|
static void v_matchproto_(cli_func_t) |
781 |
200 |
mch_cli_server_status_json(struct cli *cli, const char * const *av, void *priv) |
782 |
|
{ |
783 |
200 |
(void)priv; |
784 |
200 |
VCLI_JSON_begin(cli, 2, av); |
785 |
200 |
VCLI_Out(cli, ", "); |
786 |
200 |
VCLI_JSON_str(cli, ch_state[child_state]); |
787 |
200 |
VCLI_JSON_end(cli); |
788 |
200 |
} |
789 |
|
|
790 |
|
static struct cli_proto cli_mch[] = { |
791 |
|
{ CLICMD_SERVER_STATUS, "", mch_cli_server_status, |
792 |
|
mch_cli_server_status_json }, |
793 |
|
{ CLICMD_SERVER_START, "", mch_cli_server_start }, |
794 |
|
{ CLICMD_SERVER_STOP, "", mch_cli_server_stop }, |
795 |
|
{ CLICMD_PANIC_SHOW, "", mch_cli_panic_show, |
796 |
|
mch_cli_panic_show_json }, |
797 |
|
{ CLICMD_PANIC_CLEAR, "", mch_cli_panic_clear }, |
798 |
|
{ CLICMD_PID, "", mch_pid, mch_pid_json }, |
799 |
|
{ NULL } |
800 |
|
}; |
801 |
|
|
802 |
|
/*===================================================================== |
803 |
|
* This thread is the master thread in the management process. |
804 |
|
* The relatively simple task is to start and stop the child process |
805 |
|
* and to reincarnate it in case of trouble. |
806 |
|
*/ |
807 |
|
|
808 |
|
void |
809 |
22900 |
MCH_Init(void) |
810 |
|
{ |
811 |
|
|
812 |
22900 |
VCLS_AddFunc(mgt_cls, MCF_AUTH, cli_mch); |
813 |
22900 |
} |