| | varnish-cache/bin/varnishd/cache/cache_esi_parse.c |
0 |
|
/*- |
1 |
|
* Copyright (c) 2011 Varnish Software AS |
2 |
|
* All rights reserved. |
3 |
|
* |
4 |
|
* Author: Poul-Henning Kamp <phk@phk.freebsd.dk> |
5 |
|
* |
6 |
|
* SPDX-License-Identifier: BSD-2-Clause |
7 |
|
* |
8 |
|
* Redistribution and use in source and binary forms, with or without |
9 |
|
* modification, are permitted provided that the following conditions |
10 |
|
* are met: |
11 |
|
* 1. Redistributions of source code must retain the above copyright |
12 |
|
* notice, this list of conditions and the following disclaimer. |
13 |
|
* 2. Redistributions in binary form must reproduce the above copyright |
14 |
|
* notice, this list of conditions and the following disclaimer in the |
15 |
|
* documentation and/or other materials provided with the distribution. |
16 |
|
* |
17 |
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
18 |
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
19 |
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
20 |
|
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE |
21 |
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
22 |
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
23 |
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
24 |
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
25 |
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
26 |
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
27 |
|
* SUCH DAMAGE. |
28 |
|
* |
29 |
|
* VEP Varnish Esi Parsing |
30 |
|
*/ |
31 |
|
|
32 |
|
#include "config.h" |
33 |
|
|
34 |
|
#include "cache_varnishd.h" |
35 |
|
#include "cache_filter.h" |
36 |
|
|
37 |
|
#include "cache_vgz.h" |
38 |
|
#include "cache_esi.h" |
39 |
|
#include "vct.h" |
40 |
|
#include "vend.h" |
41 |
|
#include "vgz.h" |
42 |
|
|
43 |
|
//#define Debug(fmt, ...) printf(fmt, __VA_ARGS__) |
44 |
|
#define Debug(fmt, ...) /**/ |
45 |
|
|
46 |
|
struct vep_state; |
47 |
|
|
48 |
|
enum dowhat {DO_ATTR, DO_TAG}; |
49 |
|
typedef void dostuff_f(struct vep_state *, enum dowhat); |
50 |
|
|
51 |
|
struct vep_match { |
52 |
|
const char *match; |
53 |
|
const char * const *state; |
54 |
|
}; |
55 |
|
|
56 |
|
enum vep_mark { VERBATIM = 0, SKIP }; |
57 |
|
|
58 |
|
struct vep_state { |
59 |
|
unsigned magic; |
60 |
|
#define VEP_MAGIC 0x55cb9b82 |
61 |
|
struct vsb *vsb; |
62 |
|
|
63 |
|
const char *url; |
64 |
|
struct vfp_ctx *vc; |
65 |
|
int dogzip; |
66 |
|
vep_callback_t *cb; |
67 |
|
void *cb_priv; |
68 |
|
|
69 |
|
/* Internal Counter for default call-back function */ |
70 |
|
ssize_t cb_x; |
71 |
|
|
72 |
|
/* parser state */ |
73 |
|
const char *state; |
74 |
|
unsigned startup; |
75 |
|
unsigned esi_found; |
76 |
|
|
77 |
|
unsigned endtag; |
78 |
|
unsigned emptytag; |
79 |
|
unsigned canattr; |
80 |
|
|
81 |
|
unsigned remove; |
82 |
|
|
83 |
|
ssize_t o_wait; |
84 |
|
ssize_t o_pending; |
85 |
|
ssize_t o_total; |
86 |
|
uint32_t crc; |
87 |
|
ssize_t o_crc; |
88 |
|
uint32_t crcp; |
89 |
|
ssize_t o_last; |
90 |
|
|
91 |
|
const char *hack_p; |
92 |
|
const char *ver_p; |
93 |
|
|
94 |
|
const char *until; |
95 |
|
const char *until_p; |
96 |
|
const char *until_s; |
97 |
|
|
98 |
|
int in_esi_tag; |
99 |
|
|
100 |
|
const char *esicmt; |
101 |
|
const char *esicmt_p; |
102 |
|
|
103 |
|
struct vep_match *attr; |
104 |
|
struct vsb *attr_vsb; |
105 |
|
int attr_delim; |
106 |
|
|
107 |
|
struct vep_match *match; |
108 |
|
struct vep_match *match_hit; |
109 |
|
|
110 |
|
char tag[8]; |
111 |
|
int tag_i; |
112 |
|
|
113 |
|
dostuff_f *dostuff; |
114 |
|
|
115 |
|
struct vsb *include_src; |
116 |
|
unsigned include_continue; |
117 |
|
|
118 |
|
unsigned nm_skip; |
119 |
|
unsigned nm_verbatim; |
120 |
|
unsigned nm_pending; |
121 |
|
enum vep_mark last_mark; |
122 |
|
}; |
123 |
|
|
124 |
|
/*---------------------------------------------------------------------*/ |
125 |
|
|
126 |
|
static const char * const VEP_START = "[Start]"; |
127 |
|
static const char * const VEP_BOM = "[BOM]"; |
128 |
|
static const char * const VEP_TESTXML = "[TestXml]"; |
129 |
|
static const char * const VEP_NOTXML = "[NotXml]"; |
130 |
|
|
131 |
|
static const char * const VEP_NEXTTAG = "[NxtTag]"; |
132 |
|
static const char * const VEP_NOTMYTAG = "[NotMyTag]"; |
133 |
|
|
134 |
|
static const char * const VEP_STARTTAG = "[StartTag]"; |
135 |
|
static const char * const VEP_COMMENTESI = "[CommentESI]"; |
136 |
|
static const char * const VEP_COMMENT = "[Comment]"; |
137 |
|
static const char * const VEP_CDATA = "[CDATA]"; |
138 |
|
static const char * const VEP_ESITAG = "[ESITag]"; |
139 |
|
static const char * const VEP_ESIENDTAG = "[/ESITag]"; |
140 |
|
|
141 |
|
static const char * const VEP_ESIREMOVE = "[ESI:Remove]"; |
142 |
|
static const char * const VEP_ESIINCLUDE = "[ESI:Include]"; |
143 |
|
static const char * const VEP_ESICOMMENT = "[ESI:Comment]"; |
144 |
|
static const char * const VEP_ESIBOGON = "[ESI:Bogon]"; |
145 |
|
|
146 |
|
static const char * const VEP_INTAG = "[InTag]"; |
147 |
|
static const char * const VEP_TAGERROR = "[TagError]"; |
148 |
|
|
149 |
|
static const char * const VEP_ATTR = "[Attribute]"; |
150 |
|
static const char * const VEP_SKIPATTR = "[SkipAttribute]"; |
151 |
|
static const char * const VEP_ATTRDELIM = "[AttrDelim]"; |
152 |
|
static const char * const VEP_ATTRGETVAL = "[AttrGetValue]"; |
153 |
|
static const char * const VEP_ATTRVAL = "[AttrValue]"; |
154 |
|
|
155 |
|
static const char * const VEP_UNTIL = "[Until]"; |
156 |
|
static const char * const VEP_MATCHBUF = "[MatchBuf]"; |
157 |
|
static const char * const VEP_MATCH = "[Match]"; |
158 |
|
|
159 |
|
/*---------------------------------------------------------------------*/ |
160 |
|
|
161 |
|
static struct vep_match vep_match_starttag[] = { |
162 |
|
{ "!--esi", &VEP_COMMENTESI }, |
163 |
|
{ "!---->", &VEP_NEXTTAG }, |
164 |
|
{ "!--", &VEP_COMMENT }, |
165 |
|
{ "/esi:", &VEP_ESIENDTAG }, |
166 |
|
{ "esi:", &VEP_ESITAG }, |
167 |
|
{ "![CDATA[", &VEP_CDATA }, |
168 |
|
{ NULL, &VEP_NOTMYTAG } |
169 |
|
}; |
170 |
|
|
171 |
|
/*---------------------------------------------------------------------*/ |
172 |
|
|
173 |
|
static struct vep_match vep_match_esi[] = { |
174 |
|
{ "include", &VEP_ESIINCLUDE }, |
175 |
|
{ "remove", &VEP_ESIREMOVE }, |
176 |
|
{ "comment", &VEP_ESICOMMENT }, |
177 |
|
{ NULL, &VEP_ESIBOGON } |
178 |
|
}; |
179 |
|
|
180 |
|
/*---------------------------------------------------------------------*/ |
181 |
|
|
182 |
|
static struct vep_match vep_match_attr_include[] = { |
183 |
|
{ "src=", &VEP_ATTRGETVAL }, |
184 |
|
{ "onerror=", &VEP_ATTRGETVAL }, |
185 |
|
{ NULL, &VEP_SKIPATTR } |
186 |
|
}; |
187 |
|
|
188 |
|
/*---------------------------------------------------------------------*/ |
189 |
|
|
190 |
|
static struct vep_match vep_match_bom[] = { |
191 |
|
{ "\xeb\xbb\xbf", &VEP_START }, |
192 |
|
{ NULL, &VEP_BOM } |
193 |
|
}; |
194 |
|
|
195 |
|
/*-------------------------------------------------------------------- |
196 |
|
* Report a parsing error |
197 |
|
*/ |
198 |
|
|
199 |
|
static void |
200 |
1424 |
vep_error(const struct vep_state *vep, const char *p) |
201 |
|
{ |
202 |
1424 |
VSC_C_main->esi_errors++; |
203 |
2848 |
VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror, "ERR after %zd %s", |
204 |
1424 |
vep->o_last, p); |
205 |
1424 |
} |
206 |
|
|
207 |
|
/*-------------------------------------------------------------------- |
208 |
|
* Report a parsing warning |
209 |
|
*/ |
210 |
|
|
211 |
|
static void |
212 |
200 |
vep_warn(const struct vep_state *vep, const char *p) |
213 |
|
{ |
214 |
200 |
VSC_C_main->esi_warnings++; |
215 |
400 |
VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror, "WARN after %zd %s", |
216 |
200 |
vep->o_last, p); |
217 |
200 |
} |
218 |
|
|
219 |
|
/*--------------------------------------------------------------------- |
220 |
|
* return match or NULL if more input needed. |
221 |
|
*/ |
222 |
|
|
223 |
|
static struct vep_match * |
224 |
492801 |
vep_match(const struct vep_state *vep, const char *b, const char *e) |
225 |
|
{ |
226 |
|
struct vep_match *vm; |
227 |
|
const char *q, *r; |
228 |
|
|
229 |
492801 |
AN(vep->match); |
230 |
3204600 |
for (vm = vep->match; vm->match != NULL; vm++) { |
231 |
2767039 |
assert(strlen(vm->match) <= sizeof (vep->tag)); |
232 |
2767039 |
r = b; |
233 |
3032309 |
for (q = vm->match; *q != '\0' && r < e; q++, r++) |
234 |
2977069 |
if (*q != *r) |
235 |
2711799 |
break; |
236 |
2767039 |
if (*q == '\0') |
237 |
45560 |
break; |
238 |
2721479 |
if (r == e) |
239 |
9680 |
return (NULL); |
240 |
2711799 |
} |
241 |
483121 |
return (vm); |
242 |
492801 |
} |
243 |
|
|
244 |
|
/*--------------------------------------------------------------------- |
245 |
|
* |
246 |
|
*/ |
247 |
|
|
248 |
|
static void |
249 |
39160 |
vep_emit_len(const struct vep_state *vep, ssize_t l, int m8, int m16, int m64) |
250 |
|
{ |
251 |
|
uint8_t buf[9]; |
252 |
|
|
253 |
39160 |
assert(l > 0); |
254 |
39160 |
if (l < 256) { |
255 |
38480 |
buf[0] = (uint8_t)m8; |
256 |
38480 |
buf[1] = (uint8_t)l; |
257 |
38480 |
assert((ssize_t)buf[1] == l); |
258 |
38480 |
VSB_bcat(vep->vsb, buf, 2); |
259 |
39160 |
} else if (l < 65536) { |
260 |
440 |
buf[0] = (uint8_t)m16; |
261 |
440 |
vbe16enc(buf + 1, (uint16_t)l); |
262 |
440 |
assert((ssize_t)vbe16dec(buf + 1) == l); |
263 |
440 |
VSB_bcat(vep->vsb, buf, 3); |
264 |
440 |
} else { |
265 |
240 |
buf[0] = (uint8_t)m64; |
266 |
240 |
vbe64enc(buf + 1, l); |
267 |
240 |
assert((ssize_t)vbe64dec(buf + 1) == l); |
268 |
240 |
VSB_bcat(vep->vsb, buf, 9); |
269 |
|
} |
270 |
39160 |
} |
271 |
|
|
272 |
|
static void |
273 |
17800 |
vep_emit_skip(const struct vep_state *vep, ssize_t l) |
274 |
|
{ |
275 |
|
|
276 |
17800 |
vep_emit_len(vep, l, VEC_S1, VEC_S2, VEC_S8); |
277 |
17800 |
} |
278 |
|
|
279 |
|
static void |
280 |
17320 |
vep_emit_verbatim(const struct vep_state *vep, ssize_t l, ssize_t l_crc) |
281 |
|
{ |
282 |
|
uint8_t buf[4]; |
283 |
|
|
284 |
17320 |
vep_emit_len(vep, l, VEC_V1, VEC_V2, VEC_V8); |
285 |
17320 |
if (vep->dogzip) { |
286 |
4040 |
vep_emit_len(vep, l_crc, VEC_C1, VEC_C2, VEC_C8); |
287 |
4040 |
vbe32enc(buf, vep->crc); |
288 |
4040 |
VSB_bcat(vep->vsb, buf, sizeof buf); |
289 |
4040 |
} |
290 |
17320 |
} |
291 |
|
|
292 |
|
static void |
293 |
40720 |
vep_emit_common(struct vep_state *vep, ssize_t l, enum vep_mark mark) |
294 |
|
{ |
295 |
|
|
296 |
40720 |
assert(l >= 0); |
297 |
40720 |
if (l == 0) |
298 |
5600 |
return; |
299 |
35120 |
assert(mark == SKIP || mark == VERBATIM); |
300 |
35120 |
if (mark == SKIP) |
301 |
17800 |
vep_emit_skip(vep, l); |
302 |
|
else |
303 |
17320 |
vep_emit_verbatim(vep, l, vep->o_crc); |
304 |
|
|
305 |
35120 |
vep->crc = crc32(0L, Z_NULL, 0); |
306 |
35120 |
vep->o_crc = 0; |
307 |
35120 |
vep->o_total += l; |
308 |
40720 |
} |
309 |
|
|
310 |
|
/*--------------------------------------------------------------------- |
311 |
|
* |
312 |
|
*/ |
313 |
|
|
314 |
|
static void |
315 |
2648427 |
vep_mark_common(struct vep_state *vep, const char *p, enum vep_mark mark) |
316 |
|
{ |
317 |
|
ssize_t l, lcb; |
318 |
|
|
319 |
2648427 |
assert(mark == SKIP || mark == VERBATIM); |
320 |
|
|
321 |
|
/* The NO-OP case, no data, no pending data & no change of mode */ |
322 |
2648427 |
if (vep->last_mark == mark && p == vep->ver_p && vep->o_pending == 0) |
323 |
1770 |
return; |
324 |
|
|
325 |
|
/* |
326 |
|
* If we changed mode, emit whatever the opposite mode |
327 |
|
* assembled before the pending bytes. |
328 |
|
*/ |
329 |
|
|
330 |
2646657 |
if (vep->last_mark != mark && (vep->o_wait > 0 || vep->startup)) { |
331 |
64400 |
lcb = vep->cb(vep->vc, vep->cb_priv, 0, |
332 |
32200 |
mark == VERBATIM ? VGZ_RESET : VGZ_ALIGN); |
333 |
32200 |
vep_emit_common(vep, lcb - vep->o_last, vep->last_mark); |
334 |
32200 |
vep->o_last = lcb; |
335 |
32200 |
vep->o_wait = 0; |
336 |
32200 |
} |
337 |
|
|
338 |
|
/* Transfer pending bytes CRC into active mode CRC */ |
339 |
2646657 |
if (vep->o_pending) { |
340 |
2687 |
(void)vep->cb(vep->vc, vep->cb_priv, vep->o_pending, |
341 |
|
VGZ_NORMAL); |
342 |
2687 |
if (vep->o_crc == 0) { |
343 |
2087 |
vep->crc = vep->crcp; |
344 |
2087 |
vep->o_crc = vep->o_pending; |
345 |
2087 |
} else { |
346 |
1200 |
vep->crc = crc32_combine(vep->crc, |
347 |
600 |
vep->crcp, vep->o_pending); |
348 |
600 |
vep->o_crc += vep->o_pending; |
349 |
|
} |
350 |
2687 |
vep->crcp = crc32(0L, Z_NULL, 0); |
351 |
2687 |
vep->o_wait += vep->o_pending; |
352 |
2687 |
vep->o_pending = 0; |
353 |
2687 |
} |
354 |
|
|
355 |
|
/* * Process this bit of input */ |
356 |
2646657 |
AN(vep->ver_p); |
357 |
2646657 |
l = p - vep->ver_p; |
358 |
2646657 |
assert(l >= 0); |
359 |
2646657 |
vep->crc = crc32(vep->crc, (const void*)vep->ver_p, l); |
360 |
2646657 |
vep->o_crc += l; |
361 |
2646657 |
vep->ver_p = p; |
362 |
|
|
363 |
2646657 |
vep->o_wait += l; |
364 |
2646657 |
vep->last_mark = mark; |
365 |
2646657 |
(void)vep->cb(vep->vc, vep->cb_priv, l, VGZ_NORMAL); |
366 |
2648427 |
} |
367 |
|
|
368 |
|
static void |
369 |
1528145 |
vep_mark_verbatim(struct vep_state *vep, const char *p) |
370 |
|
{ |
371 |
|
|
372 |
1528145 |
vep_mark_common(vep, p, VERBATIM); |
373 |
1528145 |
vep->nm_verbatim++; |
374 |
1528145 |
} |
375 |
|
|
376 |
|
static void |
377 |
1111762 |
vep_mark_skip(struct vep_state *vep, const char *p) |
378 |
|
{ |
379 |
|
|
380 |
1111762 |
vep_mark_common(vep, p, SKIP); |
381 |
1111762 |
vep->nm_skip++; |
382 |
1111762 |
} |
383 |
|
|
384 |
|
static void |
385 |
5000 |
vep_mark_pending(struct vep_state *vep, const char *p) |
386 |
|
{ |
387 |
|
ssize_t l; |
388 |
|
|
389 |
5000 |
AN(vep->ver_p); |
390 |
5000 |
l = p - vep->ver_p; |
391 |
5000 |
assert(l > 0); |
392 |
5000 |
vep->crcp = crc32(vep->crcp, (const void *)vep->ver_p, l); |
393 |
5000 |
vep->ver_p = p; |
394 |
|
|
395 |
5000 |
vep->o_pending += l; |
396 |
5000 |
vep->nm_pending++; |
397 |
5000 |
} |
398 |
|
|
399 |
|
/*--------------------------------------------------------------------- |
400 |
|
*/ |
401 |
|
|
402 |
|
static void v_matchproto_() |
403 |
240 |
vep_do_comment(struct vep_state *vep, enum dowhat what) |
404 |
|
{ |
405 |
|
Debug("DO_COMMENT(%d)\n", what); |
406 |
240 |
assert(what == DO_TAG); |
407 |
240 |
if (!vep->emptytag) |
408 |
80 |
vep_error(vep, "ESI 1.0 <esi:comment> needs final '/'"); |
409 |
240 |
} |
410 |
|
|
411 |
|
/*--------------------------------------------------------------------- |
412 |
|
*/ |
413 |
|
|
414 |
|
static void v_matchproto_() |
415 |
5680 |
vep_do_remove(struct vep_state *vep, enum dowhat what) |
416 |
|
{ |
417 |
|
Debug("DO_REMOVE(%d, end %d empty %d remove %d)\n", |
418 |
|
what, vep->endtag, vep->emptytag, vep->remove); |
419 |
5680 |
assert(what == DO_TAG); |
420 |
5680 |
if (vep->emptytag) |
421 |
40 |
vep_error(vep, "ESI 1.0 <esi:remove/> not legal"); |
422 |
5640 |
else if (vep->remove && !vep->endtag) |
423 |
40 |
vep_error(vep, "ESI 1.0 <esi:remove> already open"); |
424 |
5600 |
else if (!vep->remove && vep->endtag) |
425 |
40 |
vep_error(vep, "ESI 1.0 <esi:remove> not open"); |
426 |
|
else |
427 |
5560 |
vep->remove = !vep->endtag; |
428 |
5680 |
} |
429 |
|
|
430 |
|
/*--------------------------------------------------------------------- |
431 |
|
*/ |
432 |
|
|
433 |
|
static void |
434 |
10280 |
include_attr_src(struct vep_state *vep) |
435 |
|
{ |
436 |
|
const char *p; |
437 |
|
|
438 |
10280 |
if (vep->include_src != NULL) { |
439 |
40 |
vep_error(vep, |
440 |
|
"ESI 1.0 <esi:include> " |
441 |
|
"has multiple src= attributes"); |
442 |
40 |
vep->state = VEP_TAGERROR; |
443 |
40 |
VSB_destroy(&vep->attr_vsb); |
444 |
40 |
VSB_destroy(&vep->include_src); |
445 |
40 |
return; |
446 |
|
} |
447 |
369680 |
for (p = VSB_data(vep->attr_vsb); *p != '\0'; p++) |
448 |
359480 |
if (vct_islws(*p)) |
449 |
40 |
break; |
450 |
10240 |
if (*p != '\0') { |
451 |
40 |
vep_error(vep, |
452 |
|
"ESI 1.0 <esi:include> " |
453 |
|
"has whitespace in src= attribute"); |
454 |
40 |
vep->state = VEP_TAGERROR; |
455 |
40 |
VSB_destroy(&vep->attr_vsb); |
456 |
40 |
if (vep->include_src != NULL) |
457 |
0 |
VSB_destroy(&vep->include_src); |
458 |
40 |
return; |
459 |
|
} |
460 |
10200 |
vep->include_src = vep->attr_vsb; |
461 |
10200 |
vep->attr_vsb = NULL; |
462 |
10280 |
} |
463 |
|
|
464 |
|
static void |
465 |
280 |
include_attr_onerror(struct vep_state *vep) |
466 |
|
{ |
467 |
|
|
468 |
280 |
vep->include_continue = !strcmp("continue", VSB_data(vep->attr_vsb)); |
469 |
280 |
VSB_destroy(&vep->attr_vsb); |
470 |
280 |
} |
471 |
|
|
472 |
|
static void v_matchproto_() |
473 |
20800 |
vep_do_include(struct vep_state *vep, enum dowhat what) |
474 |
|
{ |
475 |
|
const char *p, *q, *h; |
476 |
|
ssize_t l; |
477 |
|
char incl; |
478 |
|
|
479 |
|
Debug("DO_INCLUDE(%d)\n", what); |
480 |
20800 |
if (what == DO_ATTR) { |
481 |
|
Debug("ATTR (%s) (%s)\n", vep->match_hit->match, |
482 |
|
VSB_data(vep->attr_vsb)); |
483 |
10560 |
if (!strcmp("src=", vep->match_hit->match)) { |
484 |
10280 |
include_attr_src(vep); |
485 |
10280 |
return; |
486 |
|
} |
487 |
280 |
if (!strcmp("onerror=", vep->match_hit->match)) { |
488 |
280 |
include_attr_onerror(vep); |
489 |
280 |
return; |
490 |
|
} |
491 |
0 |
WRONG("Unhandled <esi:include> attribute"); |
492 |
0 |
} |
493 |
10240 |
assert(what == DO_TAG); |
494 |
10240 |
if (!vep->emptytag) |
495 |
80 |
vep_warn(vep, "ESI 1.0 <esi:include> lacks final '/'"); |
496 |
10240 |
if (vep->include_src == NULL) { |
497 |
80 |
vep_error(vep, "ESI 1.0 <esi:include> lacks src attr"); |
498 |
80 |
return; |
499 |
|
} |
500 |
|
|
501 |
|
/* |
502 |
|
* Strictly speaking, we ought to spit out any piled up skip before |
503 |
|
* emitting the VEC for the include, but objectively that makes no |
504 |
|
* difference and robs us of a chance to collapse another skip into |
505 |
|
* this on so we don't do that. |
506 |
|
* However, we cannot tolerate any verbatim stuff piling up. |
507 |
|
* The mark_skip() before calling dostuff should have taken |
508 |
|
* care of that. Make sure. |
509 |
|
*/ |
510 |
10160 |
assert(vep->o_wait == 0 || vep->last_mark == SKIP); |
511 |
|
/* XXX: what if it contains NUL bytes ?? */ |
512 |
10160 |
p = VSB_data(vep->include_src); |
513 |
10160 |
l = VSB_len(vep->include_src); |
514 |
10160 |
h = 0; |
515 |
|
|
516 |
10160 |
incl = vep->include_continue ? VEC_IC : VEC_IA; |
517 |
|
|
518 |
10160 |
if (l > 7 && !memcmp(p, "http://", 7)) { |
519 |
80 |
h = p + 7; |
520 |
80 |
p = strchr(h, '/'); |
521 |
80 |
if (p == NULL) { |
522 |
40 |
vep_error(vep, |
523 |
|
"ESI 1.0 <esi:include> invalid src= URL"); |
524 |
40 |
vep->state = VEP_TAGERROR; |
525 |
40 |
AZ(vep->attr_vsb); |
526 |
40 |
VSB_destroy(&vep->include_src); |
527 |
40 |
return; |
528 |
|
} |
529 |
|
Debug("HOST <%.*s> PATH <%s>\n", (int)(p-h),h, p); |
530 |
40 |
VSB_printf(vep->vsb, "%c", incl); |
531 |
40 |
VSB_printf(vep->vsb, "Host: %.*s%c", (int)(p-h), h, 0); |
532 |
10120 |
} else if (l > 8 && !memcmp(p, "https://", 8)) { |
533 |
120 |
if (!FEATURE(FEATURE_ESI_IGNORE_HTTPS)) { |
534 |
40 |
vep_warn(vep, |
535 |
|
"ESI 1.0 <esi:include> with https:// ignored"); |
536 |
40 |
vep->state = VEP_TAGERROR; |
537 |
40 |
AZ(vep->attr_vsb); |
538 |
40 |
VSB_destroy(&vep->include_src); |
539 |
40 |
return; |
540 |
|
} |
541 |
80 |
vep_warn(vep, |
542 |
|
"ESI 1.0 <esi:include> https:// treated as http://"); |
543 |
80 |
h = p + 8; |
544 |
80 |
p = strchr(h, '/'); |
545 |
80 |
if (p == NULL) { |
546 |
40 |
vep_error(vep, |
547 |
|
"ESI 1.0 <esi:include> invalid src= URL"); |
548 |
40 |
vep->state = VEP_TAGERROR; |
549 |
40 |
AZ(vep->attr_vsb); |
550 |
40 |
VSB_destroy(&vep->include_src); |
551 |
40 |
return; |
552 |
|
} |
553 |
40 |
VSB_printf(vep->vsb, "%c", incl); |
554 |
40 |
VSB_printf(vep->vsb, "Host: %.*s%c", (int)(p-h), h, 0); |
555 |
10000 |
} else if (*p == '/') { |
556 |
8760 |
VSB_printf(vep->vsb, "%c", incl); |
557 |
8760 |
VSB_printf(vep->vsb, "%c", 0); |
558 |
8760 |
} else { |
559 |
1200 |
VSB_printf(vep->vsb, "%c", incl); |
560 |
1200 |
VSB_printf(vep->vsb, "%c", 0); |
561 |
|
/* Look for the last / before a '?' */ |
562 |
1200 |
h = NULL; |
563 |
3600 |
for (q = vep->url; *q && *q != '?'; q++) |
564 |
3720 |
if (*q == '/') |
565 |
1320 |
h = q; |
566 |
1200 |
if (h == NULL) |
567 |
40 |
h = q + 1; |
568 |
|
|
569 |
|
Debug("INCL:: [%.*s]/[%s]\n", |
570 |
|
(int)(h - vep->url), vep->url, p); |
571 |
1200 |
VSB_printf(vep->vsb, "%.*s/", (int)(h - vep->url), vep->url); |
572 |
|
} |
573 |
10040 |
l -= (p - VSB_data(vep->include_src)); |
574 |
364960 |
for (q = p; *q != '\0'; ) { |
575 |
354920 |
if (*q == '&') { |
576 |
|
#define R(w,f,r) \ |
577 |
|
if (q + w <= p + l && !memcmp(q, f, w)) { \ |
578 |
|
VSB_printf(vep->vsb, "%c", r); \ |
579 |
|
q += w; \ |
580 |
|
continue; \ |
581 |
|
} |
582 |
400 |
R(6, "'", '\''); |
583 |
320 |
R(6, """, '"'); |
584 |
240 |
R(4, "<", '<'); |
585 |
160 |
R(4, ">", '>'); |
586 |
80 |
R(5, "&", '&'); |
587 |
0 |
} |
588 |
354520 |
VSB_printf(vep->vsb, "%c", *q++); |
589 |
|
} |
590 |
|
#undef R |
591 |
10040 |
VSB_printf(vep->vsb, "%c", 0); |
592 |
10040 |
VSB_destroy(&vep->include_src); |
593 |
10040 |
vep->include_continue = 0; |
594 |
20800 |
} |
595 |
|
|
596 |
|
/*--------------------------------------------------------------------- |
597 |
|
* Lex/Parse object for ESI instructions |
598 |
|
* |
599 |
|
* This function is called with the input object piecemeal so do not |
600 |
|
* assume that we have more than one char available at at time, but |
601 |
|
* optimize for getting huge chunks. |
602 |
|
* |
603 |
|
* NB: At the bottom of this source-file, there is a dot-diagram matching |
604 |
|
* NB: the state-machine. Please maintain it along with the code. |
605 |
|
*/ |
606 |
|
|
607 |
|
void |
608 |
2150922 |
VEP_Parse(struct vep_state *vep, const char *p, size_t l) |
609 |
|
{ |
610 |
|
const char *e; |
611 |
|
struct vep_match *vm; |
612 |
|
int i; |
613 |
|
|
614 |
2150922 |
CHECK_OBJ_NOTNULL(vep, VEP_MAGIC); |
615 |
2150922 |
assert(l > 0); |
616 |
|
|
617 |
2150922 |
if (vep->startup) { |
618 |
|
/* |
619 |
|
* We must force the GZIP header out as a SKIP string, |
620 |
|
* otherwise an object starting with <esi:include would |
621 |
|
* have its GZIP header appear after the included object |
622 |
|
* (e000026.vtc) |
623 |
|
*/ |
624 |
8520 |
vep->ver_p = ""; |
625 |
8520 |
vep->last_mark = SKIP; |
626 |
8520 |
vep_mark_common(vep, vep->ver_p, VERBATIM); |
627 |
8520 |
vep->startup = 0; |
628 |
8520 |
AZ(vep->hack_p); |
629 |
8520 |
vep->hack_p = p; |
630 |
8520 |
} |
631 |
|
|
632 |
2150922 |
vep->ver_p = p; |
633 |
|
|
634 |
2150922 |
e = p + l; |
635 |
|
|
636 |
6250438 |
while (p < e) { |
637 |
4099516 |
AN(vep->state); |
638 |
|
Debug("EP %s %d (%.*s) [%.*s]\n", |
639 |
|
vep->state, |
640 |
|
vep->remove, |
641 |
|
vep->tag_i, vep->tag, |
642 |
|
(e - p) > 10 ? 10 : (int)(e-p), p); |
643 |
4099516 |
assert(p >= vep->ver_p); |
644 |
|
|
645 |
|
/****************************************************** |
646 |
|
* SECTION A |
647 |
|
*/ |
648 |
|
|
649 |
4099516 |
if (vep->state == VEP_START) { |
650 |
8640 |
if (FEATURE(FEATURE_ESI_REMOVE_BOM) && |
651 |
240 |
*p == (char)0xeb) { |
652 |
160 |
vep->match = vep_match_bom; |
653 |
160 |
vep->state = VEP_MATCH; |
654 |
160 |
} else |
655 |
8480 |
vep->state = VEP_BOM; |
656 |
4099516 |
} else if (vep->state == VEP_BOM) { |
657 |
8520 |
vep_mark_skip(vep, p); |
658 |
8520 |
if (FEATURE(FEATURE_ESI_DISABLE_XML_CHECK)) |
659 |
2160 |
vep->state = VEP_NEXTTAG; |
660 |
|
else |
661 |
6360 |
vep->state = VEP_TESTXML; |
662 |
4090876 |
} else if (vep->state == VEP_TESTXML) { |
663 |
|
/* |
664 |
|
* If the first non-whitespace char is different |
665 |
|
* from '<' we assume this is not XML. |
666 |
|
*/ |
667 |
14205 |
while (p < e && vct_islws(*p)) |
668 |
7520 |
p++; |
669 |
6685 |
vep_mark_verbatim(vep, p); |
670 |
6685 |
if (p < e && *p == '<') { |
671 |
5800 |
p++; |
672 |
5800 |
vep->state = VEP_STARTTAG; |
673 |
6685 |
} else if (p < e && *p == (char)0xeb) { |
674 |
80 |
VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror, |
675 |
|
"No ESI processing, " |
676 |
|
"first char not '<' but BOM." |
677 |
|
" (See feature esi_remove_bom)" |
678 |
|
); |
679 |
80 |
vep->state = VEP_NOTXML; |
680 |
885 |
} else if (p < e) { |
681 |
440 |
VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror, |
682 |
|
"No ESI processing, " |
683 |
|
"first char not '<'." |
684 |
|
" (See feature esi_disable_xml_check)" |
685 |
|
); |
686 |
440 |
vep->state = VEP_NOTXML; |
687 |
440 |
} |
688 |
4082356 |
} else if (vep->state == VEP_NOTXML) { |
689 |
|
/* |
690 |
|
* This is not recognized as XML, just skip thru |
691 |
|
* vfp_esi_end() will handle the rest |
692 |
|
*/ |
693 |
520 |
p = e; |
694 |
520 |
vep_mark_verbatim(vep, p); |
695 |
|
|
696 |
|
/****************************************************** |
697 |
|
* SECTION B |
698 |
|
*/ |
699 |
|
|
700 |
4075671 |
} else if (vep->state == VEP_NOTMYTAG) { |
701 |
438664 |
if (FEATURE(FEATURE_ESI_IGNORE_OTHER_ELEMENTS)) { |
702 |
80 |
p++; |
703 |
80 |
vep->state = VEP_NEXTTAG; |
704 |
80 |
} else { |
705 |
438584 |
vep->tag_i = 0; |
706 |
1856593 |
while (p < e) { |
707 |
1855066 |
if (*p++ == '>') { |
708 |
437057 |
vep->state = VEP_NEXTTAG; |
709 |
437057 |
break; |
710 |
|
} |
711 |
|
} |
712 |
|
} |
713 |
438664 |
if (p == e && !vep->remove) |
714 |
4001 |
vep_mark_verbatim(vep, p); |
715 |
4075151 |
} else if (vep->state == VEP_NEXTTAG) { |
716 |
|
/* |
717 |
|
* Hunt for start of next tag and keep an eye |
718 |
|
* out for end of EsiCmt if armed. |
719 |
|
*/ |
720 |
2574045 |
vep->emptytag = 0; |
721 |
2574045 |
vep->attr = NULL; |
722 |
2574045 |
vep->dostuff = NULL; |
723 |
89079729 |
while (p < e && *p != '<') { |
724 |
86505684 |
if (vep->esicmt_p == NULL) { |
725 |
86501044 |
p++; |
726 |
86501044 |
continue; |
727 |
|
} |
728 |
4640 |
if (*p != *vep->esicmt_p) { |
729 |
2640 |
p++; |
730 |
2640 |
vep->esicmt_p = vep->esicmt; |
731 |
2640 |
continue; |
732 |
|
} |
733 |
2000 |
if (!vep->remove && vep->esicmt_p == vep->esicmt) |
734 |
480 |
vep_mark_verbatim(vep, p); |
735 |
2000 |
p++; |
736 |
2000 |
if (*++vep->esicmt_p == '\0') { |
737 |
640 |
vep->esi_found = 1; |
738 |
640 |
vep->esicmt = NULL; |
739 |
640 |
vep->esicmt_p = NULL; |
740 |
|
/* |
741 |
|
* The end of the esicmt |
742 |
|
* should not be emitted. |
743 |
|
* But the stuff before should |
744 |
|
*/ |
745 |
640 |
vep_mark_skip(vep, p); |
746 |
640 |
} |
747 |
|
} |
748 |
2574045 |
if (p < e) { |
749 |
449361 |
if (!vep->remove) |
750 |
446041 |
vep_mark_verbatim(vep, p); |
751 |
449361 |
assert(*p == '<'); |
752 |
449361 |
p++; |
753 |
449361 |
vep->state = VEP_STARTTAG; |
754 |
2574045 |
} else if (vep->esicmt_p == vep->esicmt && !vep->remove) |
755 |
1069676 |
vep_mark_verbatim(vep, p); |
756 |
|
|
757 |
|
/****************************************************** |
758 |
|
* SECTION C |
759 |
|
*/ |
760 |
|
|
761 |
3636487 |
} else if (vep->state == VEP_STARTTAG) { |
762 |
|
/* Start of tag, set up match table */ |
763 |
455161 |
vep->endtag = 0; |
764 |
455161 |
vep->match = vep_match_starttag; |
765 |
455161 |
vep->state = VEP_MATCH; |
766 |
1062442 |
} else if (vep->state == VEP_COMMENT) { |
767 |
200 |
vep->esicmt_p = vep->esicmt = NULL; |
768 |
200 |
vep->until_p = vep->until = "-->"; |
769 |
200 |
vep->until_s = VEP_NEXTTAG; |
770 |
200 |
vep->state = VEP_UNTIL; |
771 |
607281 |
} else if (vep->state == VEP_COMMENTESI) { |
772 |
680 |
if (vep->remove) |
773 |
200 |
vep_error(vep, |
774 |
|
"ESI 1.0 Nested <!--esi" |
775 |
|
" element in <esi:remove>"); |
776 |
680 |
vep->esicmt_p = vep->esicmt = "-->"; |
777 |
680 |
vep->state = VEP_NEXTTAG; |
778 |
680 |
vep_mark_skip(vep, p); |
779 |
607081 |
} else if (vep->state == VEP_CDATA) { |
780 |
|
/* |
781 |
|
* Easy: just look for the end of CDATA |
782 |
|
*/ |
783 |
160 |
vep->until_p = vep->until = "]]>"; |
784 |
160 |
vep->until_s = VEP_NEXTTAG; |
785 |
160 |
vep->state = VEP_UNTIL; |
786 |
606401 |
} else if (vep->state == VEP_ESIENDTAG) { |
787 |
2920 |
vep->endtag = 1; |
788 |
2920 |
vep->state = VEP_ESITAG; |
789 |
606241 |
} else if (vep->state == VEP_ESITAG) { |
790 |
16880 |
vep->in_esi_tag = 1; |
791 |
16880 |
vep->esi_found = 1; |
792 |
16880 |
vep_mark_skip(vep, p); |
793 |
16880 |
vep->match = vep_match_esi; |
794 |
16880 |
vep->state = VEP_MATCH; |
795 |
603321 |
} else if (vep->state == VEP_ESIINCLUDE) { |
796 |
10720 |
if (vep->remove) { |
797 |
160 |
vep_error(vep, |
798 |
|
"ESI 1.0 <esi:include> element" |
799 |
|
" nested in <esi:remove>"); |
800 |
160 |
vep->state = VEP_TAGERROR; |
801 |
10720 |
} else if (vep->endtag) { |
802 |
40 |
vep_error(vep, |
803 |
|
"ESI 1.0 </esi:include> illegal end-tag"); |
804 |
40 |
vep->state = VEP_TAGERROR; |
805 |
40 |
} else { |
806 |
10520 |
vep->dostuff = vep_do_include; |
807 |
10520 |
vep->state = VEP_INTAG; |
808 |
10520 |
vep->attr = vep_match_attr_include; |
809 |
|
} |
810 |
586441 |
} else if (vep->state == VEP_ESIREMOVE) { |
811 |
5680 |
vep->dostuff = vep_do_remove; |
812 |
5680 |
vep->state = VEP_INTAG; |
813 |
575721 |
} else if (vep->state == VEP_ESICOMMENT) { |
814 |
400 |
if (vep->remove) { |
815 |
40 |
vep_error(vep, |
816 |
|
"ESI 1.0 <esi:comment> element" |
817 |
|
" nested in <esi:remove>"); |
818 |
40 |
vep->state = VEP_TAGERROR; |
819 |
400 |
} else if (vep->endtag) { |
820 |
80 |
vep_error(vep, |
821 |
|
"ESI 1.0 </esi:comment> illegal end-tag"); |
822 |
80 |
vep->state = VEP_TAGERROR; |
823 |
80 |
} else { |
824 |
280 |
vep->dostuff = vep_do_comment; |
825 |
280 |
vep->state = VEP_INTAG; |
826 |
|
} |
827 |
570041 |
} else if (vep->state == VEP_ESIBOGON) { |
828 |
80 |
vep_error(vep, |
829 |
|
"ESI 1.0 <esi:bogus> element"); |
830 |
80 |
vep->state = VEP_TAGERROR; |
831 |
|
|
832 |
|
/****************************************************** |
833 |
|
* SECTION D |
834 |
|
*/ |
835 |
|
|
836 |
569641 |
} else if (vep->state == VEP_INTAG) { |
837 |
28291 |
vep->tag_i = 0; |
838 |
43691 |
while (p < e && vct_islws(*p) && !vep->emptytag) { |
839 |
15400 |
p++; |
840 |
15400 |
vep->canattr = 1; |
841 |
|
} |
842 |
28291 |
if (p < e && *p == '/' && !vep->emptytag) { |
843 |
10400 |
p++; |
844 |
10400 |
vep->emptytag = 1; |
845 |
10400 |
vep->canattr = 0; |
846 |
10400 |
} |
847 |
28291 |
if (p < e && *p == '>') { |
848 |
16160 |
p++; |
849 |
16160 |
AN(vep->dostuff); |
850 |
16160 |
vep_mark_skip(vep, p); |
851 |
16160 |
vep->dostuff(vep, DO_TAG); |
852 |
16160 |
vep->in_esi_tag = 0; |
853 |
16160 |
vep->state = VEP_NEXTTAG; |
854 |
28291 |
} else if (p < e && vep->emptytag) { |
855 |
40 |
vep_error(vep, |
856 |
|
"XML 1.0 '>' does not follow '/' in tag"); |
857 |
40 |
vep->state = VEP_TAGERROR; |
858 |
12131 |
} else if (p < e && vep->canattr && |
859 |
11080 |
vct_isxmlnamestart(*p)) { |
860 |
11040 |
vep->state = VEP_ATTR; |
861 |
12091 |
} else if (p < e) { |
862 |
40 |
vep_error(vep, |
863 |
|
"XML 1.0 Illegal attribute start char"); |
864 |
40 |
vep->state = VEP_TAGERROR; |
865 |
40 |
} |
866 |
569561 |
} else if (vep->state == VEP_TAGERROR) { |
867 |
7346 |
while (p < e && *p != '>') |
868 |
5232 |
p++; |
869 |
2114 |
if (p < e) { |
870 |
720 |
p++; |
871 |
720 |
vep_mark_skip(vep, p); |
872 |
720 |
vep->in_esi_tag = 0; |
873 |
720 |
vep->state = VEP_NEXTTAG; |
874 |
720 |
if (vep->attr_vsb) |
875 |
40 |
VSB_destroy(&vep->attr_vsb); |
876 |
720 |
} |
877 |
|
|
878 |
|
/****************************************************** |
879 |
|
* SECTION E |
880 |
|
*/ |
881 |
|
|
882 |
541270 |
} else if (vep->state == VEP_ATTR) { |
883 |
11040 |
AZ(vep->attr_delim); |
884 |
11040 |
if (vep->attr == NULL) { |
885 |
120 |
p++; |
886 |
120 |
AZ(vep->attr_vsb); |
887 |
120 |
vep->state = VEP_SKIPATTR; |
888 |
120 |
} else { |
889 |
10920 |
vep->match = vep->attr; |
890 |
10920 |
vep->state = VEP_MATCH; |
891 |
|
} |
892 |
539156 |
} else if (vep->state == VEP_SKIPATTR) { |
893 |
1308 |
while (p < e && vct_isxmlname(*p)) |
894 |
723 |
p++; |
895 |
585 |
if (p < e && *p == '=') { |
896 |
160 |
p++; |
897 |
160 |
vep->state = VEP_ATTRDELIM; |
898 |
585 |
} else if (p < e && *p == '>') { |
899 |
40 |
vep->state = VEP_INTAG; |
900 |
425 |
} else if (p < e && *p == '/') { |
901 |
40 |
vep->state = VEP_INTAG; |
902 |
385 |
} else if (p < e && vct_issp(*p)) { |
903 |
80 |
vep->state = VEP_INTAG; |
904 |
345 |
} else if (p < e) { |
905 |
40 |
vep_error(vep, |
906 |
|
"XML 1.0 Illegal attr char"); |
907 |
40 |
vep->state = VEP_TAGERROR; |
908 |
40 |
} |
909 |
528116 |
} else if (vep->state == VEP_ATTRGETVAL) { |
910 |
10680 |
AZ(vep->attr_vsb); |
911 |
10680 |
vep->attr_vsb = VSB_new_auto(); |
912 |
10680 |
vep->state = VEP_ATTRDELIM; |
913 |
527531 |
} else if (vep->state == VEP_ATTRDELIM) { |
914 |
10840 |
AZ(vep->attr_delim); |
915 |
10840 |
if (*p == '"' || *p == '\'') { |
916 |
10680 |
vep->attr_delim = *p++; |
917 |
10680 |
vep->state = VEP_ATTRVAL; |
918 |
10840 |
} else if (!vct_issp(*p)) { |
919 |
120 |
vep->attr_delim = ' '; |
920 |
120 |
vep->state = VEP_ATTRVAL; |
921 |
120 |
} else { |
922 |
40 |
vep_error(vep, |
923 |
|
"XML 1.0 Illegal attribute delimiter"); |
924 |
40 |
vep->state = VEP_TAGERROR; |
925 |
|
} |
926 |
|
|
927 |
516851 |
} else if (vep->state == VEP_ATTRVAL) { |
928 |
737082 |
while (p < e && *p != '>' && *p != vep->attr_delim && |
929 |
362400 |
(vep->attr_delim != ' ' || !vct_issp(*p))) { |
930 |
362400 |
if (vep->attr_vsb != NULL) |
931 |
361920 |
VSB_putc(vep->attr_vsb, *p); |
932 |
362400 |
p++; |
933 |
|
} |
934 |
12282 |
if (p < e && *p == '>') { |
935 |
80 |
vep_error(vep, |
936 |
|
"XML 1.0 Missing end attribute delimiter"); |
937 |
80 |
vep->state = VEP_TAGERROR; |
938 |
80 |
vep->attr_delim = 0; |
939 |
80 |
if (vep->attr_vsb != NULL) { |
940 |
80 |
AZ(VSB_finish(vep->attr_vsb)); |
941 |
80 |
VSB_destroy(&vep->attr_vsb); |
942 |
80 |
} |
943 |
12282 |
} else if (p < e) { |
944 |
10720 |
vep->attr_delim = 0; |
945 |
10720 |
p++; |
946 |
10720 |
vep->state = VEP_INTAG; |
947 |
10720 |
if (vep->attr_vsb != NULL) { |
948 |
10560 |
AZ(VSB_finish(vep->attr_vsb)); |
949 |
10560 |
AN(vep->dostuff); |
950 |
10560 |
vep->dostuff(vep, DO_ATTR); |
951 |
10560 |
vep->attr_vsb = NULL; |
952 |
10560 |
} |
953 |
10720 |
} |
954 |
|
|
955 |
|
/****************************************************** |
956 |
|
* Utility Section |
957 |
|
*/ |
958 |
|
|
959 |
506011 |
} else if (vep->state == VEP_MATCH) { |
960 |
|
/* |
961 |
|
* Match against a table |
962 |
|
*/ |
963 |
483121 |
vm = vep_match(vep, p, e); |
964 |
483121 |
vep->match_hit = vm; |
965 |
483121 |
if (vm != NULL) { |
966 |
477539 |
if (vm->match != NULL) |
967 |
40272 |
p += strlen(vm->match); |
968 |
477539 |
vep->state = *vm->state; |
969 |
477539 |
vep->match = NULL; |
970 |
477539 |
vep->tag_i = 0; |
971 |
477539 |
} else { |
972 |
5582 |
assert(p + sizeof(vep->tag) >= e); |
973 |
5582 |
memcpy(vep->tag, p, e - p); |
974 |
5582 |
vep->tag_i = e - p; |
975 |
5582 |
vep->state = VEP_MATCHBUF; |
976 |
5582 |
p = e; |
977 |
|
} |
978 |
493729 |
} else if (vep->state == VEP_MATCHBUF) { |
979 |
|
/* |
980 |
|
* Match against a table while split over input |
981 |
|
* sections. |
982 |
|
*/ |
983 |
9680 |
AN(vep->match); |
984 |
9680 |
i = sizeof(vep->tag) - vep->tag_i; |
985 |
9680 |
if (i > e - p) |
986 |
8349 |
i = e - p; |
987 |
9680 |
memcpy(vep->tag + vep->tag_i, p, i); |
988 |
19360 |
vm = vep_match(vep, vep->tag, |
989 |
9680 |
vep->tag + vep->tag_i + i); |
990 |
|
Debug("MB (%.*s) tag_i %d i %d = vm %p match %s\n", |
991 |
|
vep->tag_i + i, vep->tag, |
992 |
|
vep->tag_i, |
993 |
|
i, |
994 |
|
vm, |
995 |
|
vm ? vm->match : "(nil)"); |
996 |
|
|
997 |
9680 |
if (vm == NULL) { |
998 |
4098 |
vep->tag_i += i; |
999 |
4098 |
p += i; |
1000 |
4098 |
assert(p == e); |
1001 |
4098 |
} else { |
1002 |
5582 |
vep->match_hit = vm; |
1003 |
5582 |
vep->state = *vm->state; |
1004 |
5582 |
if (vm->match != NULL) { |
1005 |
5288 |
i = strlen(vm->match); |
1006 |
5288 |
if (i > vep->tag_i) |
1007 |
5231 |
p += i - vep->tag_i; |
1008 |
5288 |
} |
1009 |
5582 |
vep->match = NULL; |
1010 |
5582 |
vep->tag_i = 0; |
1011 |
|
} |
1012 |
10608 |
} else if (vep->state == VEP_UNTIL) { |
1013 |
|
/* |
1014 |
|
* Skip until we see magic string |
1015 |
|
*/ |
1016 |
7408 |
while (p < e) { |
1017 |
6760 |
if (*p++ != *vep->until_p++) { |
1018 |
5840 |
vep->until_p = vep->until; |
1019 |
6760 |
} else if (*vep->until_p == '\0') { |
1020 |
280 |
vep->state = vep->until_s; |
1021 |
280 |
break; |
1022 |
|
} |
1023 |
|
} |
1024 |
928 |
if (p == e && !vep->remove) |
1025 |
742 |
vep_mark_verbatim(vep, p); |
1026 |
928 |
} else { |
1027 |
|
Debug("*** Unknown state %s\n", vep->state); |
1028 |
0 |
WRONG("WRONG ESI PARSER STATE"); |
1029 |
|
} |
1030 |
|
} |
1031 |
|
/* |
1032 |
|
* We must always mark up the storage we got, try to do so |
1033 |
|
* in the most efficient way, in particular with respect to |
1034 |
|
* minimizing and limiting use of pending. |
1035 |
|
*/ |
1036 |
2150922 |
if (p == vep->ver_p) |
1037 |
|
; |
1038 |
1073162 |
else if (vep->in_esi_tag) |
1039 |
11898 |
vep_mark_skip(vep, p); |
1040 |
1061264 |
else if (vep->remove) |
1041 |
1056264 |
vep_mark_skip(vep, p); |
1042 |
|
else |
1043 |
5000 |
vep_mark_pending(vep, p); |
1044 |
2150922 |
} |
1045 |
|
|
1046 |
|
/*--------------------------------------------------------------------- |
1047 |
|
*/ |
1048 |
|
|
1049 |
|
static ssize_t v_matchproto_(vep_callback_t) |
1050 |
2652396 |
vep_default_cb(struct vfp_ctx *vc, void *priv, ssize_t l, enum vgz_flag flg) |
1051 |
|
{ |
1052 |
|
ssize_t *s; |
1053 |
|
|
1054 |
2652396 |
CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC); |
1055 |
2652396 |
AN(priv); |
1056 |
2652396 |
s = priv; |
1057 |
2652396 |
*s += l; |
1058 |
2652396 |
(void)flg; |
1059 |
2652396 |
return (*s); |
1060 |
|
} |
1061 |
|
|
1062 |
|
/*--------------------------------------------------------------------- |
1063 |
|
*/ |
1064 |
|
|
1065 |
|
struct vep_state * |
1066 |
10760 |
VEP_Init(struct vfp_ctx *vc, const struct http *req, vep_callback_t *cb, |
1067 |
|
void *cb_priv) |
1068 |
|
{ |
1069 |
|
struct vep_state *vep; |
1070 |
|
|
1071 |
10760 |
CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC); |
1072 |
10760 |
CHECK_OBJ_NOTNULL(req, HTTP_MAGIC); |
1073 |
10760 |
vep = WS_Alloc(vc->resp->ws, sizeof *vep); |
1074 |
10760 |
if (vep == NULL) { |
1075 |
2080 |
VSLb(vc->wrk->vsl, SLT_VCL_Error, |
1076 |
|
"VEP_Init() workspace overflow"); |
1077 |
2080 |
return (NULL); |
1078 |
|
} |
1079 |
|
|
1080 |
8680 |
INIT_OBJ(vep, VEP_MAGIC); |
1081 |
8680 |
vep->url = req->hd[HTTP_HDR_URL].b; |
1082 |
8680 |
vep->vc = vc; |
1083 |
8680 |
vep->vsb = VSB_new_auto(); |
1084 |
8680 |
AN(vep->vsb); |
1085 |
|
|
1086 |
8680 |
if (cb != NULL) { |
1087 |
3040 |
vep->dogzip = 1; |
1088 |
|
/* XXX */ |
1089 |
3040 |
VSB_printf(vep->vsb, "%c", VEC_GZ); |
1090 |
3040 |
vep->cb = cb; |
1091 |
3040 |
vep->cb_priv = cb_priv; |
1092 |
3040 |
} else { |
1093 |
5640 |
vep->cb = vep_default_cb; |
1094 |
5640 |
vep->cb_priv = &vep->cb_x; |
1095 |
|
} |
1096 |
|
|
1097 |
8680 |
vep->state = VEP_START; |
1098 |
8680 |
vep->crc = crc32(0L, Z_NULL, 0); |
1099 |
8680 |
vep->crcp = crc32(0L, Z_NULL, 0); |
1100 |
|
|
1101 |
8680 |
vep->startup = 1; |
1102 |
8680 |
return (vep); |
1103 |
10760 |
} |
1104 |
|
|
1105 |
|
/*--------------------------------------------------------------------- |
1106 |
|
*/ |
1107 |
|
|
1108 |
|
struct vsb * |
1109 |
8680 |
VEP_Finish(struct vep_state *vep) |
1110 |
|
{ |
1111 |
|
ssize_t l, lcb; |
1112 |
|
|
1113 |
8680 |
CHECK_OBJ_NOTNULL(vep, VEP_MAGIC); |
1114 |
|
|
1115 |
8680 |
if (vep->include_src) |
1116 |
0 |
VSB_destroy(&vep->include_src); |
1117 |
8680 |
if (vep->attr_vsb) |
1118 |
0 |
VSB_destroy(&vep->attr_vsb); |
1119 |
|
|
1120 |
16640 |
if (vep->state != VEP_START && |
1121 |
8520 |
vep->state != VEP_BOM && |
1122 |
8520 |
vep->state != VEP_TESTXML && |
1123 |
8480 |
vep->state != VEP_NOTXML && |
1124 |
7960 |
vep->state != VEP_NEXTTAG) { |
1125 |
144 |
vep_error(vep, "VEP ended inside a tag"); |
1126 |
144 |
} |
1127 |
|
|
1128 |
8680 |
if (vep->o_pending) |
1129 |
0 |
vep_mark_common(vep, vep->ver_p, vep->last_mark); |
1130 |
8680 |
if (vep->o_wait > 0) { |
1131 |
8520 |
lcb = vep->cb(vep->vc, vep->cb_priv, 0, VGZ_ALIGN); |
1132 |
8520 |
vep_emit_common(vep, lcb - vep->o_last, vep->last_mark); |
1133 |
8520 |
} |
1134 |
|
// NB: We don't account for PAD+SUM+LEN in gzipped objects |
1135 |
8680 |
(void)vep->cb(vep->vc, vep->cb_priv, 0, VGZ_FINISH); |
1136 |
|
|
1137 |
8680 |
AZ(VSB_finish(vep->vsb)); |
1138 |
8680 |
l = VSB_len(vep->vsb); |
1139 |
8680 |
if (vep->esi_found && l > 0) |
1140 |
4680 |
return (vep->vsb); |
1141 |
4000 |
VSB_destroy(&vep->vsb); |
1142 |
4000 |
return (NULL); |
1143 |
8680 |
} |
1144 |
|
|
1145 |
|
#if 0 |
1146 |
|
|
1147 |
|
digraph xml { |
1148 |
|
rankdir="LR" |
1149 |
|
size="7,10" |
1150 |
|
################################################################# |
1151 |
|
# SECTION A |
1152 |
|
# |
1153 |
|
|
1154 |
|
START [shape=ellipse] |
1155 |
|
TESTXML [shape=ellipse] |
1156 |
|
NOTXML [shape=ellipse] |
1157 |
|
NEXTTAGa [shape=hexagon, label="NEXTTAG"] |
1158 |
|
STARTTAGa [shape=hexagon, label="STARTTAG"] |
1159 |
|
START -> TESTXML |
1160 |
|
START -> NEXTTAGa [style=dotted, label="syntax:1"] |
1161 |
|
TESTXML -> TESTXML [label="lws"] |
1162 |
|
TESTXML -> NOTXML |
1163 |
|
TESTXML -> STARTTAGa [label="'<'"] |
1164 |
|
|
1165 |
|
################################################################# |
1166 |
|
# SECTION B |
1167 |
|
|
1168 |
|
NOTMYTAG [shape=ellipse] |
1169 |
|
NEXTTAG [shape=ellipse] |
1170 |
|
NOTMYTAG -> NEXTTAG [style=dotted, label="syntax:2"] |
1171 |
|
STARTTAGb [shape=hexagon, label="STARTTAG"] |
1172 |
|
NOTMYTAG -> NEXTTAG [label="'>'"] |
1173 |
|
NOTMYTAG -> NOTMYTAG [label="*"] |
1174 |
|
NEXTTAG -> NEXTTAG [label="'-->'"] |
1175 |
|
NEXTTAG -> NEXTTAG [label="*"] |
1176 |
|
NEXTTAG -> STARTTAGb [label="'<'"] |
1177 |
|
|
1178 |
|
################################################################# |
1179 |
|
# SECTION C |
1180 |
|
|
1181 |
|
STARTTAG [shape=ellipse] |
1182 |
|
COMMENT [shape=ellipse] |
1183 |
|
CDATA [shape=ellipse] |
1184 |
|
ESITAG [shape=ellipse] |
1185 |
|
ESIETAG [shape=ellipse] |
1186 |
|
ESIINCLUDE [shape=ellipse] |
1187 |
|
ESIREMOVE [shape=ellipse] |
1188 |
|
ESICOMMENT [shape=ellipse] |
1189 |
|
ESIBOGON [shape=ellipse] |
1190 |
|
INTAGc [shape=hexagon, label="INTAG"] |
1191 |
|
NOTMYTAGc [shape=hexagon, label="NOTMYTAG"] |
1192 |
|
NEXTTAGc [shape=hexagon, label="NEXTTAG"] |
1193 |
|
TAGERRORc [shape=hexagon, label="TAGERROR"] |
1194 |
|
C1 [shape=circle,label=""] |
1195 |
|
STARTTAG -> COMMENT [label="'<!--'"] |
1196 |
|
STARTTAG -> ESITAG [label="'<esi'"] |
1197 |
|
STARTTAG -> CDATA [label="'<![CDATA['"] |
1198 |
|
STARTTAG -> NOTMYTAGc [label="'*'"] |
1199 |
|
COMMENT -> NEXTTAGc [label="'esi'"] |
1200 |
|
COMMENT -> C1 [label="*"] |
1201 |
|
C1 -> C1 [label="*"] |
1202 |
|
C1 -> NEXTTAGc [label="-->"] |
1203 |
|
CDATA -> CDATA [label="*"] |
1204 |
|
CDATA -> NEXTTAGc [label="]]>"] |
1205 |
|
ESITAG -> ESIINCLUDE [label="'include'"] |
1206 |
|
ESITAG -> ESIREMOVE [label="'remove'"] |
1207 |
|
ESITAG -> ESICOMMENT [label="'comment'"] |
1208 |
|
ESITAG -> ESIBOGON [label="*"] |
1209 |
|
ESICOMMENT -> INTAGc |
1210 |
|
ESICOMMENT -> TAGERRORc |
1211 |
|
ESICOMMENT -> TAGERRORc [style=dotted, label="nested\nin\nremove"] |
1212 |
|
ESIREMOVE -> INTAGc |
1213 |
|
ESIREMOVE -> TAGERRORc |
1214 |
|
ESIINCLUDE -> INTAGc |
1215 |
|
ESIINCLUDE -> TAGERRORc |
1216 |
|
ESIINCLUDE -> TAGERRORc [style=dotted, label="nested\nin\nremove"] |
1217 |
|
ESIBOGON -> TAGERRORc |
1218 |
|
|
1219 |
|
################################################################# |
1220 |
|
# SECTION D |
1221 |
|
|
1222 |
|
INTAG [shape=ellipse] |
1223 |
|
TAGERROR [shape=ellipse] |
1224 |
|
NEXTTAGd [shape=hexagon, label="NEXTTAG"] |
1225 |
|
ATTRd [shape=hexagon, label="ATTR"] |
1226 |
|
D1 [shape=circle, label=""] |
1227 |
|
D2 [shape=circle, label=""] |
1228 |
|
INTAG -> D1 [label="lws"] |
1229 |
|
D1 -> D2 [label="/"] |
1230 |
|
INTAG -> D2 [label="/"] |
1231 |
|
INTAG -> NEXTTAGd [label=">"] |
1232 |
|
D1 -> NEXTTAGd [label=">"] |
1233 |
|
D2 -> NEXTTAGd [label=">"] |
1234 |
|
D1 -> ATTRd [label="XMLstartchar"] |
1235 |
|
D1 -> TAGERROR [label="*"] |
1236 |
|
D2 -> TAGERROR [label="*"] |
1237 |
|
TAGERROR -> TAGERROR [label="*"] |
1238 |
|
TAGERROR -> NEXTTAGd [label="'>'"] |
1239 |
|
|
1240 |
|
################################################################# |
1241 |
|
# SECTION E |
1242 |
|
|
1243 |
|
ATTR [shape=ellipse] |
1244 |
|
SKIPATTR [shape=ellipse] |
1245 |
|
ATTRGETVAL [shape=ellipse] |
1246 |
|
ATTRDELIM [shape=ellipse] |
1247 |
|
ATTRVAL [shape=ellipse] |
1248 |
|
TAGERRORe [shape=hexagon, label="TAGERROR"] |
1249 |
|
INTAGe [shape=hexagon, label="INTAG"] |
1250 |
|
ATTR -> SKIPATTR [label="*"] |
1251 |
|
ATTR -> ATTRGETVAL [label="wanted attr"] |
1252 |
|
SKIPATTR -> SKIPATTR [label="XMLname"] |
1253 |
|
SKIPATTR -> ATTRDELIM [label="'='"] |
1254 |
|
SKIPATTR -> TAGERRORe [label="*"] |
1255 |
|
ATTRGETVAL -> ATTRDELIM |
1256 |
|
ATTRDELIM -> ATTRVAL [label="\""] |
1257 |
|
ATTRDELIM -> ATTRVAL [label="\'"] |
1258 |
|
ATTRDELIM -> ATTRVAL [label="*"] |
1259 |
|
ATTRDELIM -> TAGERRORe [label="lws"] |
1260 |
|
ATTRVAL -> TAGERRORe [label="'>'"] |
1261 |
|
ATTRVAL -> INTAGe [label="delim"] |
1262 |
|
ATTRVAL -> ATTRVAL [label="*"] |
1263 |
|
|
1264 |
|
} |
1265 |
|
|
1266 |
|
#endif |