[master] 5c03d65 Add a feature 'esi_remove_bom' which will make ESI ignore and remove UTF-8 BOM's at the start of an ESI-object.

Poul-Henning Kamp phk at varnish-cache.org
Thu Oct 17 12:07:19 CEST 2013


commit 5c03d65b8f63adccabce33d827203cf9d259bb6d
Author: Poul-Henning Kamp <phk at FreeBSD.org>
Date:   Thu Oct 17 09:36:55 2013 +0000

    Add a feature 'esi_remove_bom' which will make ESI ignore and remove
    UTF-8 BOM's at the start of an ESI-object.
    
    Notice that the removal only happens if the file is actually ESI
    processed on delivery, so to get BOM removal for non-XML files,
    you may have to disable the XML test and insert a dummy ESI directive
    such as <esi:remove>BOMs Be Gone!<esi:remove/> or similar.
    
    Fixes   #1355

diff --git a/bin/varnishd/cache/cache_esi_parse.c b/bin/varnishd/cache/cache_esi_parse.c
index 880d3d2..48482c9 100644
--- a/bin/varnishd/cache/cache_esi_parse.c
+++ b/bin/varnishd/cache/cache_esi_parse.c
@@ -121,6 +121,7 @@ struct vep_state {
 /*---------------------------------------------------------------------*/
 
 static const char * const VEP_START =		"[Start]";
+static const char * const VEP_BOM =		"[BOM]";
 static const char * const VEP_TESTXML =		"[TestXml]";
 static const char * const VEP_NOTXML =		"[NotXml]";
 
@@ -175,6 +176,13 @@ static struct vep_match vep_match_attr_include[] = {
 	{ NULL,		&VEP_SKIPATTR }
 };
 
+/*---------------------------------------------------------------------*/
+
+static struct vep_match vep_match_bom[] = {
+	{ "\xeb\xbb\xbf",	&VEP_START },
+	{ NULL,			&VEP_BOM }
+};
+
 /*--------------------------------------------------------------------
  * Report a parsing error
  */
@@ -594,6 +602,13 @@ VEP_Parse(const struct busyobj *bo, const char *p, size_t l)
 		 */
 
 		if (vep->state == VEP_START) {
+			if (FEATURE(FEATURE_ESI_REMOVE_BOM) && *p == '\xeb') {
+				vep->match = vep_match_bom;
+				vep->state = VEP_MATCH;
+			} else
+				vep->state = VEP_BOM;
+		} else if (vep->state == VEP_BOM) {
+			vep_mark_skip(vep, p);
 			if (FEATURE(FEATURE_ESI_DISABLE_XML_CHECK))
 				vep->state = VEP_NEXTTAG;
 			else
@@ -609,6 +624,12 @@ VEP_Parse(const struct busyobj *bo, const char *p, size_t l)
 			if (p < e && *p == '<') {
 				p++;
 				vep->state = VEP_STARTTAG;
+			} else if (p < e && *p == '\xeb') {
+				VSLb(vep->bo->vsl, SLT_ESI_xmlerror,
+				    "No ESI processing, first char not '<'"
+				    " (BOM? see feature esi_remove_bom)"
+				);
+				vep->state = VEP_NOTXML;
 			} else if (p < e) {
 				VSLb(vep->bo->vsl, SLT_ESI_xmlerror,
 				    "No ESI processing, first char not '<'");
diff --git a/bin/varnishtest/tests/r01355.vtc b/bin/varnishtest/tests/r01355.vtc
new file mode 100644
index 0000000..0aa132b
--- /dev/null
+++ b/bin/varnishtest/tests/r01355.vtc
@@ -0,0 +1,52 @@
+varnishtest "Test ESI ignoring BOMs"
+
+server s1 {
+	rxreq
+	expect req.url == /1
+	txresp -body "\xeb\xbb\xbf    <html>   <esi:remove>blabla</esi:remove>"
+	rxreq
+	expect req.url == /2
+	txresp -body "\xeb\xbb\xbf    <html>   <esi:remove>blabla</esi:remove>"
+	rxreq
+	expect req.url == /3
+	txresp -body "\xeb\xbb\xbf\xeb\xbb\xbf    <html>   <esi:remove>blabla</esi:remove>"
+	rxreq
+	expect req.url == /4
+	txresp -body "\xeb\xbc    <html>   <esi:remove>blabla</esi:remove>"
+} -start
+
+varnish v1 -vcl+backend {
+	sub vcl_backend_response {
+		set beresp.do_esi = true;
+	}
+} -start
+
+client c1 {
+	# No ESI processing
+	txreq -url /1
+	rxresp
+	expect resp.bodylen == 47
+} -run
+
+varnish v1 -cliok "param.set feature +esi_remove_bom"
+
+client c1 {
+	# BOM removed, ESI processing
+	txreq -url /2
+	rxresp
+	expect resp.bodylen == 13
+} -run
+
+client c1 {
+	# BOMs removed, ESI processing
+	txreq -url /3
+	rxresp
+	expect resp.bodylen == 13
+} -run
+
+client c1 {
+	# Not a BOM, no ESI processing
+	txreq -url /4
+	rxresp
+	expect resp.bodylen == 46
+} -run
diff --git a/include/tbl/feature_bits.h b/include/tbl/feature_bits.h
index 3e4ecba..2f30e58 100644
--- a/include/tbl/feature_bits.h
+++ b/include/tbl/feature_bits.h
@@ -53,3 +53,8 @@ FEATURE_BIT(ESI_IGNORE_OTHER_ELEMENTS,	esi_ignore_other_elements, "",
     "Ignore non-esi XML-elements",
     "Allows syntax errors in the XML"
 )
+FEATURE_BIT(ESI_REMOVE_BOM,		esi_remove_bom, "",
+    "Remove UTF-8 BOM",
+    "Remove UTF-8 BOM from front of object."
+    "Ignore and remove the UTF-8 BOM (0xeb 0xbb 0xbf) from front of object."
+)



More information about the varnish-commit mailing list