[master] c360e97 Simplify the ESI parser a tiny bit, and prevent certain bogus XML constructs ("</!--" and "</!CDATA[[" from confusing it.

Poul-Henning Kamp phk at FreeBSD.org
Thu Feb 25 09:45:06 CET 2016


commit c360e97a0280dd566cd63d3dc62e982877c6e779
Author: Poul-Henning Kamp <phk at FreeBSD.org>
Date:   Thu Feb 25 08:44:13 2016 +0000

    Simplify the ESI parser a tiny bit, and prevent certain bogus
    XML constructs ("</!--" and "</!CDATA[[" from confusing it.

diff --git a/bin/varnishd/cache/cache_esi_parse.c b/bin/varnishd/cache/cache_esi_parse.c
index 1e86b25..e4de056 100644
--- a/bin/varnishd/cache/cache_esi_parse.c
+++ b/bin/varnishd/cache/cache_esi_parse.c
@@ -128,9 +128,11 @@ static const char * const VEP_NEXTTAG =		"[NxtTag]";
 static const char * const VEP_NOTMYTAG =	"[NotMyTag]";
 
 static const char * const VEP_STARTTAG =	"[StartTag]";
+static const char * const VEP_COMMENTESI =	"[CommentESI]";
 static const char * const VEP_COMMENT =		"[Comment]";
 static const char * const VEP_CDATA =		"[CDATA]";
 static const char * const VEP_ESITAG =		"[ESITag]";
+static const char * const VEP_ESIENDTAG =	"[/ESITag]";
 
 static const char * const VEP_ESIREMOVE =	"[ESI:Remove]";
 static const char * const VEP_ESIINCLUDE =	"[ESI:Include]";
@@ -153,7 +155,10 @@ static const char * const VEP_MATCH =		"[Match]";
 /*---------------------------------------------------------------------*/
 
 static struct vep_match vep_match_starttag[] = {
+	{ "!--esi",	&VEP_COMMENTESI },
+	{ "!---->",	&VEP_NEXTTAG },
 	{ "!--",	&VEP_COMMENT },
+	{ "/esi:",	&VEP_ESIENDTAG },
 	{ "esi:",	&VEP_ESITAG },
 	{ "![CDATA[",	&VEP_CDATA },
 	{ NULL,		&VEP_NOTMYTAG }
@@ -413,19 +418,14 @@ vep_do_remove(struct vep_state *vep, enum dowhat what)
 	Debug("DO_REMOVE(%d, end %d empty %d remove %d)\n",
 	    what, vep->endtag, vep->emptytag, vep->remove);
 	assert(what == DO_TAG);
-	if (vep->emptytag) {
-		vep_error(vep,
-		    "ESI 1.0 <esi:remove/> not legal");
-	} else {
-		if (vep->remove && !vep->endtag)
-			vep_error(vep,
-			    "ESI 1.0 <esi:remove> already open");
-		else if (!vep->remove && vep->endtag)
-			vep_error(vep,
-			    "ESI 1.0 <esi:remove> not open");
-		else
-			vep->remove = !vep->endtag;
-	}
+	if (vep->emptytag)
+		vep_error(vep, "ESI 1.0 <esi:remove/> not legal");
+	else if (vep->remove && !vep->endtag)
+		vep_error(vep, "ESI 1.0 <esi:remove> already open");
+	else if (!vep->remove && vep->endtag)
+		vep_error(vep, "ESI 1.0 <esi:remove> not open");
+	else
+		vep->remove = !vep->endtag;
 }
 
 /*---------------------------------------------------------------------
@@ -457,11 +457,9 @@ vep_do_include(struct vep_state *vep, enum dowhat what)
 	}
 	assert(what == DO_TAG);
 	if (!vep->emptytag)
-		vep_warn(vep,
-		    "ESI 1.0 <esi:include> lacks final '/'");
+		vep_warn(vep, "ESI 1.0 <esi:include> lacks final '/'");
 	if (vep->include_src == NULL) {
-		vep_error(vep,
-		    "ESI 1.0 <esi:include> lacks src attr");
+		vep_error(vep, "ESI 1.0 <esi:include> lacks src attr");
 		return;
 	}
 
@@ -672,7 +670,6 @@ VEP_Parse(struct vep_state *vep, const char *p, size_t l)
 			 * out for end of EsiCmt if armed.
 			 */
 			vep->emptytag = 0;
-			vep->endtag = 0;
 			vep->attr = NULL;
 			vep->dostuff = NULL;
 			while (p < e && *p != '<') {
@@ -685,8 +682,7 @@ VEP_Parse(struct vep_state *vep, const char *p, size_t l)
 					vep->esicmt_p = vep->esicmt;
 					continue;
 				}
-				if (!vep->remove &&
-				    vep->esicmt_p == vep->esicmt)
+				if (!vep->remove && vep->esicmt_p == vep->esicmt)
 					vep_mark_verbatim(vep, p);
 				p++;
 				if (*++vep->esicmt_p == '\0') {
@@ -715,44 +711,23 @@ VEP_Parse(struct vep_state *vep, const char *p, size_t l)
 		 */
 
 		} else if (vep->state == VEP_STARTTAG) {
-			/*
-			 * Start of tag, set up match table
-			 */
-			if (p < e) {
-				if (*p == '/') {
-					vep->endtag = 1;
-					p++;
-				}
-				vep->match = vep_match_starttag;
-				vep->state = VEP_MATCH;
-			}
+			/* Start of tag, set up match table */
+			vep->endtag = 0;
+			vep->match = vep_match_starttag;
+			vep->state = VEP_MATCH;
 		} else if (vep->state == VEP_COMMENT) {
-			/*
-			 * We are in a comment, find out if it is an
-			 * ESI comment or a regular comment
-			 */
-			if (vep->esicmt == NULL)
-				vep->esicmt_p = vep->esicmt = "esi";
-			while (p < e) {
-				if (*p != *vep->esicmt_p) {
-					vep->esicmt_p = vep->esicmt = NULL;
-					vep->until_p = vep->until = "-->";
-					vep->until_s = VEP_NEXTTAG;
-					vep->state = VEP_UNTIL;
-					break;
-				}
-				p++;
-				if (*++vep->esicmt_p != '\0')
-					continue;
-				if (vep->remove)
-					vep_error(vep,
-					    "ESI 1.0 Nested <!--esi"
-					    " element in <esi:remove>");
-				vep->esicmt_p = vep->esicmt = "-->";
-				vep->state = VEP_NEXTTAG;
-				vep_mark_skip(vep, p);
-				break;
-			}
+			vep->esicmt_p = vep->esicmt = NULL;
+			vep->until_p = vep->until = "-->";
+			vep->until_s = VEP_NEXTTAG;
+			vep->state = VEP_UNTIL;
+		} else if (vep->state == VEP_COMMENTESI) {
+			if (vep->remove)
+				vep_error(vep,
+				    "ESI 1.0 Nested <!--esi"
+				    " element in <esi:remove>");
+			vep->esicmt_p = vep->esicmt = "-->";
+			vep->state = VEP_NEXTTAG;
+			vep_mark_skip(vep, p);
 		} else if (vep->state == VEP_CDATA) {
 			/*
 			 * Easy: just look for the end of CDATA
@@ -760,6 +735,9 @@ VEP_Parse(struct vep_state *vep, const char *p, size_t l)
 			vep->until_p = vep->until = "]]>";
 			vep->until_s = VEP_NEXTTAG;
 			vep->state = VEP_UNTIL;
+		} else if (vep->state == VEP_ESIENDTAG) {
+			vep->endtag = 1;
+			vep->state = VEP_ESITAG;
 		} else if (vep->state == VEP_ESITAG) {
 			vep->in_esi_tag = 1;
 			vep->esi_found = 1;
@@ -987,7 +965,7 @@ VEP_Parse(struct vep_state *vep, const char *p, size_t l)
 				vep_mark_verbatim(vep, p);
 		} else {
 			Debug("*** Unknown state %s\n", vep->state);
-			INCOMPL();
+			WRONG("WRONG ESI PARSER STATE");
 		}
 	}
 	/*
diff --git a/bin/varnishtest/tests/e00008.vtc b/bin/varnishtest/tests/e00008.vtc
index 0345844..51c8c88 100644
--- a/bin/varnishtest/tests/e00008.vtc
+++ b/bin/varnishtest/tests/e00008.vtc
@@ -36,8 +36,11 @@ server s1 {
 		<esi:include  src=/>			30
 		<esi:include  src= />			31
 		</esi:include>				32
-		<esi:include  src="foofof />		33
-		<esi:include  foo=bar src=/body2 />	34
+		<!---->					33
+		</!-- bogocommentt -->			34
+		</![CDATA[ bogo-cdata ]]>		35
+		<esi:include  src="foofof />		36
+		<esi:include  foo=bar src=/body2 />	37
 	}
 	rxreq
 	expect req.url == "/body"
@@ -69,7 +72,7 @@ client c1 {
 	txreq
 	rxresp
 	expect resp.status == 200
-	expect resp.bodylen == 385
+	expect resp.bodylen == 464
 }
 
 client c1 -run



More information about the varnish-commit mailing list