r5725 - trunk/varnish-cache/bin/varnishd

phk at varnish-cache.org phk at varnish-cache.org
Thu Jan 13 10:04:55 CET 2011


Author: phk
Date: 2011-01-13 10:04:55 +0100 (Thu, 13 Jan 2011)
New Revision: 5725

Modified:
   trunk/varnish-cache/bin/varnishd/cache_esi_parse.c
Log:
Get comments and cdata right



Modified: trunk/varnish-cache/bin/varnishd/cache_esi_parse.c
===================================================================
--- trunk/varnish-cache/bin/varnishd/cache_esi_parse.c	2011-01-13 08:53:44 UTC (rev 5724)
+++ trunk/varnish-cache/bin/varnishd/cache_esi_parse.c	2011-01-13 09:04:55 UTC (rev 5725)
@@ -32,14 +32,15 @@
 #include "svnid.h"
 SVNID("$Id")
 
+#include <stdio.h>
+#include <stdlib.h>
+
 #include "cache.h"
 #include "cache_esi.h"
 #include "vend.h"
 #include "vct.h"
 #include "stevedore.h"
 
-#include <stdio.h>
-
 #ifndef OLD_ESI
 
 struct vep_match {
@@ -68,12 +69,14 @@
 	const char		*until_p;
 	const char		*until_s;
 
+	const char		*esicmt;
+	const char		*esicmt_p;
+
 	struct vep_match	*match;
 	int			match_l;
 
 	char			tag[10];
 	int			tag_i;
-	
 };
 
 /*---------------------------------------------------------------------*/
@@ -85,8 +88,7 @@
 static const char *VEP_MATCHBUF = 	"[MatchBuf]";
 static const char *VEP_NEXTTAG = 	"[NxtTag]";
 static const char *VEP_NOTMYTAG =	"[NotMyTag]";
-static const char *VEP_ESICMT =		"[EsiComment]";
-static const char *VEP_CMT =		"[Comment]";
+static const char *VEP_COMMENT =	"[Comment]";
 static const char *VEP_CDATA =		"[CDATA]";
 static const char *VEP_ESITAG =		"[ESITag]";
 static const char *VEP_ESIETAG =	"[ESIEndTag]";
@@ -100,18 +102,19 @@
 
 /*---------------------------------------------------------------------*/
 
-static struct vep_match vep_match_tbl[] = {
-	{ "<!--esi",	&VEP_ESICMT },
-	{ "<!--",	&VEP_CMT },
+static struct vep_match vep_match_starttag[] = {
+	{ "<!--",	&VEP_COMMENT },
 	{ "</esi:",	&VEP_ESIETAG },
 	{ "<esi:",	&VEP_ESITAG },
 	{ "<![CDATA[",	&VEP_CDATA },
 	{ NULL,		&VEP_NOTMYTAG }
 };
 
-static const int vep_match_tbl_len =
-    sizeof vep_match_tbl / sizeof vep_match_tbl[0];
+static const int vep_match_starttag_len =
+    sizeof vep_match_starttag / sizeof vep_match_starttag[0];
 
+/*---------------------------------------------------------------------*/
+
 static struct vep_match vep_match_esi[] = {
 	{ "include",	&VEP_ESIINCLUDE },
 	{ "remove",	&VEP_ESIREMOVE },
@@ -122,6 +125,8 @@
 static const int vep_match_esi_len =
     sizeof vep_match_esi / sizeof vep_match_esi[0];
 
+/*---------------------------------------------------------------------*/
+
 static struct vep_match vep_match_esie[] = {
 	{ "remove",	&VEP_ESI_REMOVE },
 	{ NULL,		&VEP_XXX }
@@ -208,6 +213,25 @@
 } 
 
 static void
+vep_emit_literal(struct vep_state *vep, const char *p, const char *e)
+{
+	ssize_t l;
+
+	if (e == NULL)
+		e = strchr(p, '\0');
+	if (vep->o_verbatim > 0) 
+		vep_emit_verbatim(vep);
+	if (vep->o_skip > 0) 
+		vep_emit_skip(vep);
+	l = e - p;
+	printf("---->L(%d) [%.*s]\n", (int)l, (int)l, p);
+	vep_emit_len(vep, l, VEC_L1, VEC_L2, VEC_L4);
+	vsb_printf(vep->vsb, "%lx\r\n%c", l, 0);
+	vsb_bcat(vep->vsb, p, l);
+}
+
+
+static void
 vep_mark_verbatim(struct vep_state *vep, const char *p)
 {
 	ssize_t l;
@@ -241,24 +265,12 @@
 	vep->ver_p = p;
 } 
 
-static void
-vep_emit_literal(struct vep_state *vep, const char *p, const char *e)
-{
-	ssize_t l;
-
-	if (vep->o_verbatim > 0) 
-		vep_emit_verbatim(vep);
-	if (vep->o_skip > 0) 
-		vep_emit_skip(vep);
-	l = e - p;
-	printf("---->L(%d) [%.*s]\n", (int)l, (int)l, p);
-	vep_emit_len(vep, l, VEC_L1, VEC_L2, VEC_L4);
-	vsb_printf(vep->vsb, "%lx\r\n%c", l, 0);
-	vsb_bcat(vep->vsb, p, l);
-}
-
 /*---------------------------------------------------------------------
- * Parse object for ESI instructions
+ * Lex/Parse object for ESI instructions
+ *
+ * This function is called with the input object piecemal so do not
+ * assume that we have more than one char available at at time, but
+ * optimize for getting huge chunks. 
  */
 
 static void
@@ -272,49 +284,98 @@
 
 	e = b + l;
 
-	vep->ver_p = b;
-	printf("EP Call %d [%.*s]\n", (int)l, (int)l, b);
+	if (0)
+		vep_emit_literal(vep, "A", "B");
+
 	p = b;
 	while (p < e) {
 		AN(vep->state);
-		printf("EP %s [%.*s]\n",
+		printf("EP %s %d %d (%.*s) [%.*s]\n",
 		    vep->state,
+		    vep->skip,
+		    vep->remove,
+		    vep->tag_i, vep->tag,
 		    (int)(e - p), p);
+		fflush(stdout);
+		usleep(10);
+
+		/******************************************************
+		 *
+		 */
+
 		if (vep->state == VEP_START) {
 			/*
-			 * Look for the first non-white char, and
-			 * abandon if it is not '<' under the assumption
-			 * that it is not an ESI file
+			 * If the first non-whitespace char is different
+			 * from '<' we assume this is not XML.
 			 */
-			while (p < e && vct_islws(*p))
+			while (p < e && vct_islws(*p)) {
 				p++;
+				vep_mark_verbatim(vep, p);
+			}
 			if (p < e) {
-				if (*p == '<')
+				if (*p == '<') {
 					vep->state = VEP_STARTTAG;
-				else
+				} else
 					vep->state = VEP_NOTXML;
 			}
 		} else if (vep->state == VEP_NOTXML) {
+			/*
+			 * This is not recognized as XML, just skip thru
+			 * vfp_esi_end() will handle the rest
+			 */
 			p = e;
+
+		/******************************************************
+		 *
+		 */
+
+		} else if (vep->state == VEP_NOTMYTAG) {
+			vep->tag_i = 0;
+			while (p < e) {
+				if (!vep->remove)
+					vep_mark_verbatim(vep, p + 1);
+				if (*p++ == '>') {
+					vep->state = VEP_NEXTTAG;
+					break;
+				}
+			}
 		} else if (vep->state == VEP_NEXTTAG) {
 			/*
-			 * Hunt for start of next tag
+			 * Hunt for start of next tag and keep an eye
+			 * out for end of EsiCmt if armed.
 			 */
 			while (p < e && *p != '<') {
-#if 0
-				if (vep->incmt != NULL &&
-				    *p == *vep->incmt_p) {
-					if (*++vep->incmt_p == '\0') {
-						vep->incmt = NULL;
-						vep->incmt = NULL;
+				if (vep->esicmt_p != NULL &&
+				    *p == *vep->esicmt_p++) {
+					p++;
+					if (*vep->esicmt_p == '\0') {
+						vep->esicmt = NULL;
+						vep->esicmt_p = NULL;
+						/*
+						 * The end of the esicmt
+						 * should not be emitted.
+						 * But the stuff before should
+						 */
+						if (!vep->remove)
+							vep_mark_verbatim(vep,
+							    p - 3);
+						vep_mark_skip(vep, p);
 					}
-				} else
-					vep->incmt_p = vep->incmt;
-#endif
-				p++;
+				} else {
+					p++;
+					vep->esicmt_p = vep->esicmt;
+					if (vep->esicmt_p == NULL &&
+					    !vep->remove)
+						vep_mark_verbatim(vep, p);
+				}
 			}
 			if (p < e)
 				vep->state = VEP_STARTTAG;
+
+		/******************************************************
+		 *
+		 */
+
 		} else if (vep->state == VEP_STARTTAG) {
 			/*
 			 * Start of tag, set up match table
@@ -322,10 +383,41 @@
 			assert(*p == '<');
 			if (!vep->remove)
 				vep_mark_verbatim(vep, p);
-			vep->match = vep_match_tbl;
-			vep->match_l = vep_match_tbl_len;
+			vep->match = vep_match_starttag;
+			vep->match_l = vep_match_starttag_len;
 			vep->state = VEP_MATCH;
-			vep->skip = 1;
+		} else if (vep->state == VEP_COMMENT) {
+			/*
+			 * We are in a comment, find out if it is an
+			 * ESI comment or a regular comment
+			 */
+			if (vep->esicmt == NULL)
+				vep->esicmt_p = vep->esicmt = "esi";
+			while (p < e) {
+				if (*p == *vep->esicmt_p) {
+					p++;
+					if (*++vep->esicmt_p == '\0') {
+						vep->esicmt_p =
+						    vep->esicmt = "-->";
+						vep->state = VEP_NEXTTAG;
+						vep_mark_skip(vep, p);
+						break;
+					}
+				} else {
+					vep->esicmt_p = vep->esicmt = NULL; 
+					vep->until_p = vep->until = "-->";
+					vep->until_s = VEP_NEXTTAG;
+					vep->state = VEP_UNTIL;
+					break;
+				}
+			}
+		} else if (vep->state == VEP_CDATA) {
+			/*
+			 * Easy: just look for the end of CDATA
+			 */
+			vep->until_p = vep->until = "]]>";
+			vep->until_s = VEP_NEXTTAG;
+			vep->state = VEP_UNTIL;
 		} else if (vep->state == VEP_ENDTAG) {
 			while (p < e && *p != '>') 
 				p++;
@@ -335,10 +427,38 @@
 			}
 			vep_mark_skip(vep, p);
 			vep->skip = 0;
-		} else if (vep->state == VEP_CDATA) {
-			vep->until_p = vep->until = "]]>";
-			vep->until_s = VEP_NEXTTAG;
-			vep->state = VEP_UNTIL;
+
+		/******************************************************
+		 *
+		 */
+
+		} else if (vep->state == VEP_ESITAG) {
+			if (vep->remove) {
+				VSC_main->esi_errors++;
+				vep->state = VEP_NOTMYTAG;
+				break;
+			}
+			vep->skip = 1;
+			vep_mark_skip(vep, p);
+			vep->match = vep_match_esi;
+			vep->match_l = vep_match_esi_len;
+			vep->state = VEP_MATCH;
+		} else if (vep->state == VEP_ESIETAG) {
+			vep->match = vep_match_esie;
+			vep->match_l = vep_match_esie_len;
+			vep->state = VEP_MATCH;
+		} else if (vep->state == VEP_ESIREMOVE) {
+			vep_mark_skip(vep, p);
+			vep->remove = 1;
+			vep->state = VEP_NEXTTAG;
+		} else if (vep->state == VEP_ESI_REMOVE) {
+			vep->remove = 0;
+			vep->state = VEP_ENDTAG;
+
+		/******************************************************
+		 *
+		 */
+
 		} else if (vep->state == VEP_MATCH) {
 			/*
 			 * Match against a table
@@ -376,24 +496,9 @@
 				b = e;
 				break;
 			}
-			if (vm->match && vep->tag_i > strlen(vm->match)) {
-				/*
-				 * not generally safe but
-				 * works for the required
-				 * case of <--esi and <--
-				 */
-				p -= vep->tag_i -
-				    strlen(vm->match);
-				vep->tag_i--;
-			}
-			if (vm->match == NULL) {
-				vep_emit_literal(vep,
-				    vep->tag, vep->tag + vep->tag_i);
-			}
 			b = p;
 			vep->state = *vm->state;
 			vep->match = NULL;
-			vep->tag_i = 0;
 		} else if (vep->state == VEP_UNTIL) {
 			/*
 			 * Skip until we see magic string
@@ -406,40 +511,11 @@
 					break;
 				}
 			}
-		} else if (vep->state == VEP_NOTMYTAG) {
-			vep->skip = 0;
-			while (p < e) {
-				if (*p++ == '>') {
-					vep->state = VEP_NEXTTAG;
-					break;
-				}
-			}
-		} else if (vep->state == VEP_ESITAG) {
-			vep->skip = 1;
-			vep_mark_skip(vep, p);
-			vep->match = vep_match_esi;
-			vep->match_l = vep_match_esi_len;
-			vep->state = VEP_MATCH;
-		} else if (vep->state == VEP_ESIETAG) {
-			vep->match = vep_match_esie;
-			vep->match_l = vep_match_esie_len;
-			vep->state = VEP_MATCH;
-		} else if (vep->state == VEP_ESIREMOVE) {
-			vep_mark_skip(vep, p);
-			vep->remove = 1;
-			vep->state = VEP_NEXTTAG;
-		} else if (vep->state == VEP_ESI_REMOVE) {
-			vep->remove = 0;
-			vep->state = VEP_ENDTAG;
 		} else {
 			printf("*** Unknown state %s\n", vep->state);
-			break;
+			INCOMPL();
 		}
 	}
-	if (vep->remove || vep->skip)
-		vep_mark_skip(vep, p);
-	else
-		vep_mark_verbatim(vep, p);
 }
 
 /*---------------------------------------------------------------------
@@ -473,6 +549,7 @@
 		w = HTC_Read(htc, st->ptr + st->len, l);
 		if (w <= 0)
 			return (w);
+		vep->ver_p = (const char *)st->ptr + st->len;
 #if 1
 		{
 		for (l = 0; l < w; l++) 




More information about the varnish-commit mailing list