[master] 5c03d65 Add a feature 'esi_remove_bom' which will make ESI ignore and remove UTF-8 BOM's at the start of an ESI-object.
Poul-Henning Kamp
phk at varnish-cache.org
Thu Oct 17 12:07:19 CEST 2013
commit 5c03d65b8f63adccabce33d827203cf9d259bb6d
Author: Poul-Henning Kamp <phk at FreeBSD.org>
Date: Thu Oct 17 09:36:55 2013 +0000
Add a feature 'esi_remove_bom' which will make ESI ignore and remove
UTF-8 BOM's at the start of an ESI-object.
Notice that the removal only happens if the file is actually ESI
processed on delivery, so to get BOM removal for non-XML files,
you may have to disable the XML test and insert a dummy ESI directive
such as <esi:remove>BOMs Be Gone!<esi:remove/> or similar.
Fixes #1355
diff --git a/bin/varnishd/cache/cache_esi_parse.c b/bin/varnishd/cache/cache_esi_parse.c
index 880d3d2..48482c9 100644
--- a/bin/varnishd/cache/cache_esi_parse.c
+++ b/bin/varnishd/cache/cache_esi_parse.c
@@ -121,6 +121,7 @@ struct vep_state {
/*---------------------------------------------------------------------*/
static const char * const VEP_START = "[Start]";
+static const char * const VEP_BOM = "[BOM]";
static const char * const VEP_TESTXML = "[TestXml]";
static const char * const VEP_NOTXML = "[NotXml]";
@@ -175,6 +176,13 @@ static struct vep_match vep_match_attr_include[] = {
{ NULL, &VEP_SKIPATTR }
};
+/*---------------------------------------------------------------------*/
+
+static struct vep_match vep_match_bom[] = {
+ { "\xeb\xbb\xbf", &VEP_START },
+ { NULL, &VEP_BOM }
+};
+
/*--------------------------------------------------------------------
* Report a parsing error
*/
@@ -594,6 +602,13 @@ VEP_Parse(const struct busyobj *bo, const char *p, size_t l)
*/
if (vep->state == VEP_START) {
+ if (FEATURE(FEATURE_ESI_REMOVE_BOM) && *p == '\xeb') {
+ vep->match = vep_match_bom;
+ vep->state = VEP_MATCH;
+ } else
+ vep->state = VEP_BOM;
+ } else if (vep->state == VEP_BOM) {
+ vep_mark_skip(vep, p);
if (FEATURE(FEATURE_ESI_DISABLE_XML_CHECK))
vep->state = VEP_NEXTTAG;
else
@@ -609,6 +624,12 @@ VEP_Parse(const struct busyobj *bo, const char *p, size_t l)
if (p < e && *p == '<') {
p++;
vep->state = VEP_STARTTAG;
+ } else if (p < e && *p == '\xeb') {
+ VSLb(vep->bo->vsl, SLT_ESI_xmlerror,
+ "No ESI processing, first char not '<'"
+ " (BOM? see feature esi_remove_bom)"
+ );
+ vep->state = VEP_NOTXML;
} else if (p < e) {
VSLb(vep->bo->vsl, SLT_ESI_xmlerror,
"No ESI processing, first char not '<'");
diff --git a/bin/varnishtest/tests/r01355.vtc b/bin/varnishtest/tests/r01355.vtc
new file mode 100644
index 0000000..0aa132b
--- /dev/null
+++ b/bin/varnishtest/tests/r01355.vtc
@@ -0,0 +1,52 @@
+varnishtest "Test ESI ignoring BOMs"
+
+server s1 {
+ rxreq
+ expect req.url == /1
+ txresp -body "\xeb\xbb\xbf <html> <esi:remove>blabla</esi:remove>"
+ rxreq
+ expect req.url == /2
+ txresp -body "\xeb\xbb\xbf <html> <esi:remove>blabla</esi:remove>"
+ rxreq
+ expect req.url == /3
+ txresp -body "\xeb\xbb\xbf\xeb\xbb\xbf <html> <esi:remove>blabla</esi:remove>"
+ rxreq
+ expect req.url == /4
+ txresp -body "\xeb\xbc <html> <esi:remove>blabla</esi:remove>"
+} -start
+
+varnish v1 -vcl+backend {
+ sub vcl_backend_response {
+ set beresp.do_esi = true;
+ }
+} -start
+
+client c1 {
+ # No ESI processing
+ txreq -url /1
+ rxresp
+ expect resp.bodylen == 47
+} -run
+
+varnish v1 -cliok "param.set feature +esi_remove_bom"
+
+client c1 {
+ # BOM removed, ESI processing
+ txreq -url /2
+ rxresp
+ expect resp.bodylen == 13
+} -run
+
+client c1 {
+ # BOMs removed, ESI processing
+ txreq -url /3
+ rxresp
+ expect resp.bodylen == 13
+} -run
+
+client c1 {
+ # Not a BOM, no ESI processing
+ txreq -url /4
+ rxresp
+ expect resp.bodylen == 46
+} -run
diff --git a/include/tbl/feature_bits.h b/include/tbl/feature_bits.h
index 3e4ecba..2f30e58 100644
--- a/include/tbl/feature_bits.h
+++ b/include/tbl/feature_bits.h
@@ -53,3 +53,8 @@ FEATURE_BIT(ESI_IGNORE_OTHER_ELEMENTS, esi_ignore_other_elements, "",
"Ignore non-esi XML-elements",
"Allows syntax errors in the XML"
)
+FEATURE_BIT(ESI_REMOVE_BOM, esi_remove_bom, "",
+ "Remove UTF-8 BOM",
+ "Remove UTF-8 BOM from front of object."
+ "Ignore and remove the UTF-8 BOM (0xeb 0xbb 0xbf) from front of object."
+)
More information about the varnish-commit
mailing list