[master] 300ede262 varnishncsa: Add -j to format for JSON
Nils Goroll
nils.goroll at uplex.de
Fri Apr 30 19:14:05 UTC 2021
commit 300ede2629a962df55fceba268933eaaf7846463
Author: madhavi.dintakurthy <madhavi.dintakurthy at target.com>
Date: Thu Apr 15 10:46:27 2021 -0500
varnishncsa: Add -j to format for JSON
The new -j flag for varnishncsa allows writing JSON logs in combination
with -f or -F to specify a custom format. Without -j, the format
specifiers could be replaced with strings that would make the JSON
invalid.
One example is headers: without -j, some headers may be replaced with
C-style \xXX escape sequences, which are not valid JSON. Since request
headers are controlled by users on the internet, it would be easy for an
attacker to make a log entry impossible to parse, which might cause it
to be missed by administrators. Another example is numbers. Without -j,
format specifiers are replaced with - if the value is empty. A bare - is
not valid in JSON, so numbers are replaced with null with the -j flag.
In general, -j makes the replacements JSON-compatible for all inputs.
Co-authored-by: Ben Zvan <ben.zvan at target.com>
Co-authored-by: Jasmine Wang <jasmine.wang at target.com>
Co-authored-by: Jordan Christiansen <jordan.christiansen at target.com>
Co-authored-by: Madhavi Dintakurthy <madhavi.dintakurthy at target.com>
diff --git a/bin/varnishncsa/varnishncsa.c b/bin/varnishncsa/varnishncsa.c
index a9b5fd185..d394220be 100644
--- a/bin/varnishncsa/varnishncsa.c
+++ b/bin/varnishncsa/varnishncsa.c
@@ -146,6 +146,9 @@ static struct ctx {
struct vsb *vsb;
unsigned gen;
VTAILQ_HEAD(,format) format;
+ int quote_how;
+ char *missing_string;
+ char *missing_int;
/* State */
struct watch_head watch_vcl_log;
@@ -199,9 +202,9 @@ vsb_fcat(struct vsb *vsb, const struct fragment *f, const char *dflt)
{
if (f->gen == CTX.gen) {
assert(f->b <= f->e);
- VSB_quote(vsb, f->b, f->e - f->b, VSB_QUOTE_ESCHEX);
+ VSB_quote(vsb, f->b, f->e - f->b, CTX.quote_how);
} else if (dflt)
- VSB_quote(vsb, dflt, -1, VSB_QUOTE_ESCHEX);
+ VSB_quote(vsb, dflt, -1, CTX.quote_how);
else
return (-1);
return (0);
@@ -247,7 +250,7 @@ format_fragment(const struct format *format)
if (format->frag->gen != CTX.gen) {
if (format->string == NULL)
return (-1);
- VSB_quote(CTX.vsb, format->string, -1, VSB_QUOTE_ESCHEX);
+ VSB_quote(CTX.vsb, format->string, -1, CTX.quote_how);
return (0);
}
AZ(vsb_fcat(CTX.vsb, format->frag, NULL));
@@ -363,13 +366,13 @@ format_auth(const struct format *format)
CTX.frag[F_auth].e)) {
if (format->string == NULL)
return (-1);
- VSB_quote(CTX.vsb, format->string, -1, VSB_QUOTE_ESCHEX);
+ VSB_quote(CTX.vsb, format->string, -1, CTX.quote_how);
return (0);
}
q = strchr(buf, ':');
if (q != NULL)
*q = '\0';
- VSB_quote(CTX.vsb, buf, -1, VSB_QUOTE_ESCHEX);
+ VSB_quote(CTX.vsb, buf, -1, CTX.quote_how);
return (1);
}
@@ -557,7 +560,7 @@ addf_hdr(struct watch_head *head, const char *key)
AN(f);
f->func = format_fragment;
f->frag = &w->frag;
- f->string = strdup("-");
+ f->string = strdup(CTX.missing_string);
AN(f->string);
VTAILQ_INSERT_TAIL(&CTX.format, f, list);
}
@@ -580,7 +583,7 @@ addf_vsl(enum VSL_tag_e tag, long i, const char *prefix)
assert(w->prefixlen > 0);
}
VTAILQ_INSERT_TAIL(&CTX.watch_vsl, w, list);
- addf_fragment(&w->frag, "-");
+ addf_fragment(&w->frag, CTX.missing_string);
}
static void
@@ -716,28 +719,28 @@ parse_format(const char *format)
p++;
switch (*p) {
case 'b': /* Body bytes sent */
- addf_fragment(&CTX.frag[F_b], "-");
+ addf_fragment(&CTX.frag[F_b], CTX.missing_int);
break;
case 'D': /* Float request time */
addf_time('T', "us");
break;
case 'h': /* Client host name / IP Address */
- addf_fragment(&CTX.frag[F_h], "-");
+ addf_fragment(&CTX.frag[F_h], CTX.missing_string);
break;
case 'H': /* Protocol */
addf_fragment(&CTX.frag[F_H], "HTTP/1.0");
break;
case 'I': /* Bytes received */
- addf_fragment(&CTX.frag[F_I], "-");
+ addf_fragment(&CTX.frag[F_I], CTX.missing_int);
break;
case 'l': /* Client user ID (identd) always '-' */
AZ(VSB_putc(vsb, '-'));
break;
case 'm': /* Method */
- addf_fragment(&CTX.frag[F_m], "-");
+ addf_fragment(&CTX.frag[F_m], CTX.missing_string);
break;
case 'O': /* Bytes sent */
- addf_fragment(&CTX.frag[F_O], "-");
+ addf_fragment(&CTX.frag[F_O], CTX.missing_int);
break;
case 'q': /* Query string */
addf_fragment(&CTX.frag[F_q], "");
@@ -746,7 +749,7 @@ parse_format(const char *format)
addf_requestline();
break;
case 's': /* Status code */
- addf_fragment(&CTX.frag[F_s], "-");
+ addf_fragment(&CTX.frag[F_s], CTX.missing_int);
break;
case 't': /* strftime */
addf_time(*p, TIME_FMT);
@@ -758,7 +761,7 @@ parse_format(const char *format)
addf_auth();
break;
case 'U': /* URL */
- addf_fragment(&CTX.frag[F_U], "-");
+ addf_fragment(&CTX.frag[F_U], CTX.missing_string);
break;
case '{':
p++;
@@ -1153,6 +1156,9 @@ main(int argc, char * const *argv)
CTX.vsb = VSB_new_auto();
AN(CTX.vsb);
VB64_init();
+ CTX.quote_how = VSB_QUOTE_ESCHEX;
+ REPLACE(CTX.missing_string, "-");
+ REPLACE(CTX.missing_int, "-");
tzset(); // We use localtime_r(3)
@@ -1184,6 +1190,11 @@ main(int argc, char * const *argv)
/* Usage help */
VUT_Usage(vut, &vopt_spec, 0);
break;
+ case 'j':
+ REPLACE(CTX.missing_string, "");
+ REPLACE(CTX.missing_int, "0");
+ CTX.quote_how = VSB_QUOTE_JSON;
+ break;
case 'w':
/* Write to file */
REPLACE(CTX.w_arg, optarg);
diff --git a/bin/varnishncsa/varnishncsa_options.h b/bin/varnishncsa/varnishncsa_options.h
index 8c86761ed..fa2dd57d1 100644
--- a/bin/varnishncsa/varnishncsa_options.h
+++ b/bin/varnishncsa/varnishncsa_options.h
@@ -78,6 +78,15 @@
VOPT("E", "[-E]", "Show ESI requests", \
"Show ESI requests, implies client mode." \
)
+#define NCSA_OPT_j \
+ VOPT("j", "[-j]", "Make output JSON-compatible", \
+ "Make format-specifier replacements JSON-compatible. When" \
+ " escaping characters, use JSON-style \\\\uXXXX escape" \
+ " sequences instead of C-style \\\\xXX sequences. Empty" \
+ " strings will be replaced with \"\" instead of \"-\", and" \
+ " empty integers will be replaced with null. Use -F or -f" \
+ " in combination with -j to write JSON logs." \
+ )
NCSA_OPT_a
NCSA_OPT_b
@@ -90,6 +99,7 @@ NCSA_OPT_F
NCSA_OPT_f
NCSA_OPT_g
VUT_OPT_h
+NCSA_OPT_j
VSL_OPT_L
VUT_OPT_n
VUT_GLOBAL_OPT_P
diff --git a/bin/varnishtest/tests/u00016.vtc b/bin/varnishtest/tests/u00016.vtc
new file mode 100644
index 000000000..de46499b4
--- /dev/null
+++ b/bin/varnishtest/tests/u00016.vtc
@@ -0,0 +1,56 @@
+varnishtest "varnishncsa -j json escaping"
+
+feature cmd "python3 -c 'import json'"
+
+server s1 -repeat 3 {
+ rxreq
+ txresp
+} -start
+
+varnish v1 -vcl+backend {} -start
+
+client c1 {
+ txreq -hdr "x-request-id: YES" -hdr "test:ascii"
+ rxresp
+ txreq -hdr "x-request-id: ∃" -hdr "test:unicode"
+ rxresp
+ txreq -hdr "test:null"
+ rxresp
+} -run
+
+delay 1
+
+# ASCII string with -j is valid JSON
+shell -match "^YES$" {
+ varnishncsa -n ${v1_name} -d -q 'ReqHeader:test eq "ascii"' -j \
+ -F '{"xrid": "%{X-REQUEST-ID}i"}' |
+ python3 -c 'import sys, json; print(json.load(sys.stdin)["xrid"])'
+}
+
+# ∃ without -j is not valid JSON
+shell -err -expect "Invalid \\escape" {
+ varnishncsa -n ${v1_name} -d -q 'ReqHeader:test eq "unicode"' \
+ -F '{"xrid": "%{X-REQUEST-ID}i"}' |
+ python3 -c 'import sys, json; print(json.load(sys.stdin)["xrid"])'
+}
+
+# ∃ with -j is valid JSON
+shell -match "^∃$" {
+ varnishncsa -n ${v1_name} -d -q 'ReqHeader:test eq "unicode"' -j \
+ -F '{"xrid": "%{X-REQUEST-ID}i"}' |
+ python3 -c 'import sys, json; print(json.load(sys.stdin)["xrid"])'
+}
+
+# Empty strings are not replaced with "-"
+shell -match "" {
+ varnishncsa -n ${v1_name} -d -q 'ReqHeader:test eq "null"' -j \
+ -F '{"xrid": "%{X-REQUEST-ID}i"}' |
+ python3 -c 'import sys, json; print(json.load(sys.stdin)["xrid"])'
+}
+
+# Empty VCL_Log entries are not replaced with "-"
+shell -match "" {
+ varnishncsa -n ${v1_name} -d -q 'ReqHeader:test eq "null"' -j \
+ -F '{"xrid": "%{VCL_Log:varnishncsa}x"}' |
+ python3 -c 'import sys, json; print(json.load(sys.stdin)["xrid"])'
+}
diff --git a/doc/sphinx/reference/varnishncsa.rst b/doc/sphinx/reference/varnishncsa.rst
index ceb115b5e..51b9316ab 100644
--- a/doc/sphinx/reference/varnishncsa.rst
+++ b/doc/sphinx/reference/varnishncsa.rst
@@ -246,6 +246,11 @@ Log the entire Timestamp record associated with the processing length::
varnishncsa -F "%{VSL:Timestamp:Process}x"
+Log in JSON, using the -j flag to ensure that the output is valid JSON
+for all inputs::
+
+ varnishncsa -j -F '{"size": %b, "time": "%t", "ua": "%{User-Agent}i"}'
+
SEE ALSO
========
More information about the varnish-commit
mailing list