[master] 300ede262 varnishncsa: Add -j to format for JSON

Nils Goroll nils.goroll at uplex.de
Fri Apr 30 19:14:05 UTC 2021


commit 300ede2629a962df55fceba268933eaaf7846463
Author: madhavi.dintakurthy <madhavi.dintakurthy at target.com>
Date:   Thu Apr 15 10:46:27 2021 -0500

    varnishncsa: Add -j to format for JSON
    
    The new -j flag for varnishncsa allows writing JSON logs in combination
    with -f or -F to specify a custom format. Without -j, the format
    specifiers could be replaced with strings that would make the JSON
    invalid.
    
    One example is headers: without -j, some headers may be replaced with
    C-style \xXX escape sequences, which are not valid JSON. Since request
    headers are controlled by users on the internet, it would be easy for an
    attacker to make a log entry impossible to parse, which might cause it
    to be missed by administrators. Another example is numbers. Without -j,
    format specifiers are replaced with - if the value is empty. A bare - is
    not valid in JSON, so numbers are replaced with null with the -j flag.
    
    In general, -j makes the replacements JSON-compatible for all inputs.
    
    Co-authored-by: Ben Zvan <ben.zvan at target.com>
    Co-authored-by: Jasmine Wang <jasmine.wang at target.com>
    Co-authored-by: Jordan Christiansen <jordan.christiansen at target.com>
    Co-authored-by: Madhavi Dintakurthy <madhavi.dintakurthy at target.com>

diff --git a/bin/varnishncsa/varnishncsa.c b/bin/varnishncsa/varnishncsa.c
index a9b5fd185..d394220be 100644
--- a/bin/varnishncsa/varnishncsa.c
+++ b/bin/varnishncsa/varnishncsa.c
@@ -146,6 +146,9 @@ static struct ctx {
 	struct vsb		*vsb;
 	unsigned		gen;
 	VTAILQ_HEAD(,format)	format;
+	int			quote_how;
+	char			*missing_string;
+	char			*missing_int;
 
 	/* State */
 	struct watch_head	watch_vcl_log;
@@ -199,9 +202,9 @@ vsb_fcat(struct vsb *vsb, const struct fragment *f, const char *dflt)
 {
 	if (f->gen == CTX.gen) {
 		assert(f->b <= f->e);
-		VSB_quote(vsb, f->b, f->e - f->b, VSB_QUOTE_ESCHEX);
+		VSB_quote(vsb, f->b, f->e - f->b, CTX.quote_how);
 	} else if (dflt)
-		VSB_quote(vsb, dflt, -1, VSB_QUOTE_ESCHEX);
+		VSB_quote(vsb, dflt, -1, CTX.quote_how);
 	else
 		return (-1);
 	return (0);
@@ -247,7 +250,7 @@ format_fragment(const struct format *format)
 	if (format->frag->gen != CTX.gen) {
 		if (format->string == NULL)
 			return (-1);
-		VSB_quote(CTX.vsb, format->string, -1, VSB_QUOTE_ESCHEX);
+		VSB_quote(CTX.vsb, format->string, -1, CTX.quote_how);
 		return (0);
 	}
 	AZ(vsb_fcat(CTX.vsb, format->frag, NULL));
@@ -363,13 +366,13 @@ format_auth(const struct format *format)
 	    CTX.frag[F_auth].e)) {
 		if (format->string == NULL)
 			return (-1);
-		VSB_quote(CTX.vsb, format->string, -1, VSB_QUOTE_ESCHEX);
+		VSB_quote(CTX.vsb, format->string, -1, CTX.quote_how);
 		return (0);
 	}
 	q = strchr(buf, ':');
 	if (q != NULL)
 		*q = '\0';
-	VSB_quote(CTX.vsb, buf, -1, VSB_QUOTE_ESCHEX);
+	VSB_quote(CTX.vsb, buf, -1, CTX.quote_how);
 	return (1);
 }
 
@@ -557,7 +560,7 @@ addf_hdr(struct watch_head *head, const char *key)
 	AN(f);
 	f->func = format_fragment;
 	f->frag = &w->frag;
-	f->string = strdup("-");
+	f->string = strdup(CTX.missing_string);
 	AN(f->string);
 	VTAILQ_INSERT_TAIL(&CTX.format, f, list);
 }
@@ -580,7 +583,7 @@ addf_vsl(enum VSL_tag_e tag, long i, const char *prefix)
 		assert(w->prefixlen > 0);
 	}
 	VTAILQ_INSERT_TAIL(&CTX.watch_vsl, w, list);
-	addf_fragment(&w->frag, "-");
+	addf_fragment(&w->frag, CTX.missing_string);
 }
 
 static void
@@ -716,28 +719,28 @@ parse_format(const char *format)
 		p++;
 		switch (*p) {
 		case 'b':	/* Body bytes sent */
-			addf_fragment(&CTX.frag[F_b], "-");
+			addf_fragment(&CTX.frag[F_b], CTX.missing_int);
 			break;
 		case 'D':	/* Float request time */
 			addf_time('T', "us");
 			break;
 		case 'h':	/* Client host name / IP Address */
-			addf_fragment(&CTX.frag[F_h], "-");
+			addf_fragment(&CTX.frag[F_h], CTX.missing_string);
 			break;
 		case 'H':	/* Protocol */
 			addf_fragment(&CTX.frag[F_H], "HTTP/1.0");
 			break;
 		case 'I':	/* Bytes received */
-			addf_fragment(&CTX.frag[F_I], "-");
+			addf_fragment(&CTX.frag[F_I], CTX.missing_int);
 			break;
 		case 'l':	/* Client user ID (identd) always '-' */
 			AZ(VSB_putc(vsb, '-'));
 			break;
 		case 'm':	/* Method */
-			addf_fragment(&CTX.frag[F_m], "-");
+			addf_fragment(&CTX.frag[F_m], CTX.missing_string);
 			break;
 		case 'O':	/* Bytes sent */
-			addf_fragment(&CTX.frag[F_O], "-");
+			addf_fragment(&CTX.frag[F_O], CTX.missing_int);
 			break;
 		case 'q':	/* Query string */
 			addf_fragment(&CTX.frag[F_q], "");
@@ -746,7 +749,7 @@ parse_format(const char *format)
 			addf_requestline();
 			break;
 		case 's':	/* Status code */
-			addf_fragment(&CTX.frag[F_s], "-");
+			addf_fragment(&CTX.frag[F_s], CTX.missing_int);
 			break;
 		case 't':	/* strftime */
 			addf_time(*p, TIME_FMT);
@@ -758,7 +761,7 @@ parse_format(const char *format)
 			addf_auth();
 			break;
 		case 'U':	/* URL */
-			addf_fragment(&CTX.frag[F_U], "-");
+			addf_fragment(&CTX.frag[F_U], CTX.missing_string);
 			break;
 		case '{':
 			p++;
@@ -1153,6 +1156,9 @@ main(int argc, char * const *argv)
 	CTX.vsb = VSB_new_auto();
 	AN(CTX.vsb);
 	VB64_init();
+	CTX.quote_how = VSB_QUOTE_ESCHEX;
+	REPLACE(CTX.missing_string, "-");
+	REPLACE(CTX.missing_int, "-");
 
 	tzset();		// We use localtime_r(3)
 
@@ -1184,6 +1190,11 @@ main(int argc, char * const *argv)
 			/* Usage help */
 			VUT_Usage(vut, &vopt_spec, 0);
 			break;
+		case 'j':
+			REPLACE(CTX.missing_string, "");
+			REPLACE(CTX.missing_int, "0");
+			CTX.quote_how = VSB_QUOTE_JSON;
+			break;
 		case 'w':
 			/* Write to file */
 			REPLACE(CTX.w_arg, optarg);
diff --git a/bin/varnishncsa/varnishncsa_options.h b/bin/varnishncsa/varnishncsa_options.h
index 8c86761ed..fa2dd57d1 100644
--- a/bin/varnishncsa/varnishncsa_options.h
+++ b/bin/varnishncsa/varnishncsa_options.h
@@ -78,6 +78,15 @@
 	VOPT("E", "[-E]", "Show ESI requests",				\
 	    "Show ESI requests, implies client mode."			\
 	)
+#define NCSA_OPT_j							\
+	VOPT("j", "[-j]", "Make output JSON-compatible",		\
+	    "Make format-specifier replacements JSON-compatible. When"	\
+	    " escaping characters, use JSON-style \\\\uXXXX escape"	\
+	    " sequences instead of C-style \\\\xXX sequences. Empty"	\
+	    " strings will be replaced with \"\" instead of \"-\", and"	\
+	    " empty integers will be replaced with null. Use -F or -f"	\
+	    " in combination with -j to write JSON logs."		\
+	)
 
 NCSA_OPT_a
 NCSA_OPT_b
@@ -90,6 +99,7 @@ NCSA_OPT_F
 NCSA_OPT_f
 NCSA_OPT_g
 VUT_OPT_h
+NCSA_OPT_j
 VSL_OPT_L
 VUT_OPT_n
 VUT_GLOBAL_OPT_P
diff --git a/bin/varnishtest/tests/u00016.vtc b/bin/varnishtest/tests/u00016.vtc
new file mode 100644
index 000000000..de46499b4
--- /dev/null
+++ b/bin/varnishtest/tests/u00016.vtc
@@ -0,0 +1,56 @@
+varnishtest "varnishncsa -j json escaping"
+
+feature cmd "python3 -c 'import json'"
+
+server s1 -repeat 3 {
+	rxreq
+	txresp
+} -start
+
+varnish v1 -vcl+backend {} -start
+
+client c1 {
+	txreq -hdr "x-request-id: YES" -hdr "test:ascii"
+	rxresp
+	txreq -hdr "x-request-id: ∃" -hdr "test:unicode"
+	rxresp
+	txreq -hdr "test:null"
+	rxresp
+} -run
+
+delay 1
+
+# ASCII string with -j is valid JSON
+shell -match "^YES$" {
+	varnishncsa -n ${v1_name} -d -q 'ReqHeader:test eq "ascii"' -j \
+		-F '{"xrid": "%{X-REQUEST-ID}i"}' |
+	python3 -c 'import sys, json; print(json.load(sys.stdin)["xrid"])'
+}
+
+# ∃ without -j is not valid JSON
+shell -err -expect "Invalid \\escape" {
+	varnishncsa -n ${v1_name} -d -q 'ReqHeader:test eq "unicode"' \
+		-F '{"xrid": "%{X-REQUEST-ID}i"}' |
+	python3 -c 'import sys, json; print(json.load(sys.stdin)["xrid"])'
+}
+
+# ∃ with -j is valid JSON
+shell -match "^∃$" {
+	varnishncsa -n ${v1_name} -d -q 'ReqHeader:test eq "unicode"' -j \
+		-F '{"xrid": "%{X-REQUEST-ID}i"}' |
+	python3 -c 'import sys, json; print(json.load(sys.stdin)["xrid"])'
+}
+
+# Empty strings are not replaced with "-"
+shell -match "" {
+	varnishncsa -n ${v1_name} -d -q 'ReqHeader:test eq "null"' -j \
+		-F '{"xrid": "%{X-REQUEST-ID}i"}' |
+	python3 -c 'import sys, json; print(json.load(sys.stdin)["xrid"])'
+}
+
+# Empty VCL_Log entries are not replaced with "-"
+shell -match "" {
+	varnishncsa -n ${v1_name} -d -q 'ReqHeader:test eq "null"' -j \
+		-F '{"xrid": "%{VCL_Log:varnishncsa}x"}' |
+	python3 -c 'import sys, json; print(json.load(sys.stdin)["xrid"])'
+}
diff --git a/doc/sphinx/reference/varnishncsa.rst b/doc/sphinx/reference/varnishncsa.rst
index ceb115b5e..51b9316ab 100644
--- a/doc/sphinx/reference/varnishncsa.rst
+++ b/doc/sphinx/reference/varnishncsa.rst
@@ -246,6 +246,11 @@ Log the entire Timestamp record associated with the processing length::
 
   varnishncsa -F "%{VSL:Timestamp:Process}x"
 
+Log in JSON, using the -j flag to ensure that the output is valid JSON
+for all inputs::
+
+  varnishncsa -j -F '{"size": %b, "time": "%t", "ua": "%{User-Agent}i"}'
+
 SEE ALSO
 ========
 


More information about the varnish-commit mailing list