varnish-cache/bin/varnishd/cache/cache_esi_parse.c
0
/*-
1
 * Copyright (c) 2011 Varnish Software AS
2
 * All rights reserved.
3
 *
4
 * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
5
 *
6
 * SPDX-License-Identifier: BSD-2-Clause
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
21
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
 * SUCH DAMAGE.
28
 *
29
 * VEP Varnish Esi Parsing
30
 */
31
32
#include "config.h"
33
34
#include "cache_varnishd.h"
35
#include "cache_filter.h"
36
37
#include "cache_vgz.h"
38
#include "cache_esi.h"
39
#include "vct.h"
40
#include "vend.h"
41
#include "vgz.h"
42
43
//#define Debug(fmt, ...) printf(fmt, __VA_ARGS__)
44
#define Debug(fmt, ...) /**/
45
46
struct vep_state;
47
48
enum dowhat {DO_ATTR, DO_TAG};
49
typedef void dostuff_f(struct vep_state *, enum dowhat);
50
51
struct vep_match {
52
        const char      *match;
53
        const char      * const *state;
54
};
55
56
enum vep_mark { VERBATIM = 0, SKIP };
57
58
struct vep_state {
59
        unsigned                magic;
60
#define VEP_MAGIC               0x55cb9b82
61
        struct vsb              *vsb;
62
63
        const char              *url;
64
        struct vfp_ctx          *vc;
65
        int                     dogzip;
66
        vep_callback_t          *cb;
67
        void                    *cb_priv;
68
69
        /* Internal Counter for default call-back function */
70
        ssize_t                 cb_x;
71
72
        /* parser state */
73
        const char              *state;
74
        unsigned                startup;
75
        unsigned                esi_found;
76
77
        unsigned                endtag;
78
        unsigned                emptytag;
79
        unsigned                canattr;
80
81
        unsigned                remove;
82
83
        ssize_t                 o_wait;
84
        ssize_t                 o_pending;
85
        ssize_t                 o_total;
86
        uint32_t                crc;
87
        ssize_t                 o_crc;
88
        uint32_t                crcp;
89
        ssize_t                 o_last;
90
91
        const char              *hack_p;
92
        const char              *ver_p;
93
94
        const char              *until;
95
        const char              *until_p;
96
        const char              *until_s;
97
98
        int                     in_esi_tag;
99
100
        const char              *esicmt;
101
        const char              *esicmt_p;
102
103
        struct vep_match        *attr;
104
        struct vsb              *attr_vsb;
105
        int                     attr_delim;
106
107
        struct vep_match        *match;
108
        struct vep_match        *match_hit;
109
110
        char                    tag[8];
111
        int                     tag_i;
112
113
        dostuff_f               *dostuff;
114
115
        struct vsb              *include_src;
116
        unsigned                include_continue;
117
118
        unsigned                nm_skip;
119
        unsigned                nm_verbatim;
120
        unsigned                nm_pending;
121
        enum vep_mark           last_mark;
122
};
123
124
/*---------------------------------------------------------------------*/
125
126
static const char * const VEP_START =           "[Start]";
127
static const char * const VEP_BOM =             "[BOM]";
128
static const char * const VEP_TESTXML =         "[TestXml]";
129
static const char * const VEP_NOTXML =          "[NotXml]";
130
131
static const char * const VEP_NEXTTAG =         "[NxtTag]";
132
static const char * const VEP_NOTMYTAG =        "[NotMyTag]";
133
134
static const char * const VEP_STARTTAG =        "[StartTag]";
135
static const char * const VEP_COMMENTESI =      "[CommentESI]";
136
static const char * const VEP_COMMENT =         "[Comment]";
137
static const char * const VEP_CDATA =           "[CDATA]";
138
static const char * const VEP_ESITAG =          "[ESITag]";
139
static const char * const VEP_ESIENDTAG =       "[/ESITag]";
140
141
static const char * const VEP_ESIREMOVE =       "[ESI:Remove]";
142
static const char * const VEP_ESIINCLUDE =      "[ESI:Include]";
143
static const char * const VEP_ESICOMMENT =      "[ESI:Comment]";
144
static const char * const VEP_ESIBOGON =        "[ESI:Bogon]";
145
146
static const char * const VEP_INTAG =           "[InTag]";
147
static const char * const VEP_TAGERROR =        "[TagError]";
148
149
static const char * const VEP_ATTR =            "[Attribute]";
150
static const char * const VEP_SKIPATTR =        "[SkipAttribute]";
151
static const char * const VEP_ATTRDELIM =       "[AttrDelim]";
152
static const char * const VEP_ATTRGETVAL =      "[AttrGetValue]";
153
static const char * const VEP_ATTRVAL =         "[AttrValue]";
154
155
static const char * const VEP_UNTIL =           "[Until]";
156
static const char * const VEP_MATCHBUF =        "[MatchBuf]";
157
static const char * const VEP_MATCH =           "[Match]";
158
159
/*---------------------------------------------------------------------*/
160
161
static struct vep_match vep_match_starttag[] = {
162
        { "!--esi",     &VEP_COMMENTESI },
163
        { "!---->",     &VEP_NEXTTAG },
164
        { "!--",        &VEP_COMMENT },
165
        { "/esi:",      &VEP_ESIENDTAG },
166
        { "esi:",       &VEP_ESITAG },
167
        { "![CDATA[",   &VEP_CDATA },
168
        { NULL,         &VEP_NOTMYTAG }
169
};
170
171
/*---------------------------------------------------------------------*/
172
173
static struct vep_match vep_match_esi[] = {
174
        { "include",    &VEP_ESIINCLUDE },
175
        { "remove",     &VEP_ESIREMOVE },
176
        { "comment",    &VEP_ESICOMMENT },
177
        { NULL,         &VEP_ESIBOGON }
178
};
179
180
/*---------------------------------------------------------------------*/
181
182
static struct vep_match vep_match_attr_include[] = {
183
        { "src=",       &VEP_ATTRGETVAL },
184
        { "onerror=",   &VEP_ATTRGETVAL },
185
        { NULL,         &VEP_SKIPATTR }
186
};
187
188
/*---------------------------------------------------------------------*/
189
190
static struct vep_match vep_match_bom[] = {
191
        { "\xeb\xbb\xbf",       &VEP_START },
192
        { NULL,                 &VEP_BOM }
193
};
194
195
/*--------------------------------------------------------------------
196
 * Report a parsing error
197
 */
198
199
static void
200 1417
vep_error(const struct vep_state *vep, const char *p)
201
{
202 1417
        VSC_C_main->esi_errors++;
203 2834
        VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror, "ERR after %zd %s",
204 1417
             vep->o_last, p);
205 1417
}
206
207
/*--------------------------------------------------------------------
208
 * Report a parsing warning
209
 */
210
211
static void
212 200
vep_warn(const struct vep_state *vep, const char *p)
213
{
214 200
        VSC_C_main->esi_warnings++;
215 400
        VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror, "WARN after %zd %s",
216 200
             vep->o_last, p);
217 200
}
218
219
/*---------------------------------------------------------------------
220
 * return match or NULL if more input needed.
221
 */
222
223
static struct vep_match *
224 492842
vep_match(const struct vep_state *vep, const char *b, const char *e)
225
{
226
        struct vep_match *vm;
227
        const char *q, *r;
228
229 492842
        AN(vep->match);
230 3205031
        for (vm = vep->match; vm->match != NULL; vm++) {
231 2767421
                assert(strlen(vm->match) <= sizeof (vep->tag));
232 2767421
                r = b;
233 3032592
                for (q = vm->match; *q != '\0' && r < e; q++, r++)
234 2977360
                        if (*q != *r)
235 2712189
                                break;
236 2767421
                if (*q == '\0')
237 45560
                        break;
238 2721861
                if (r == e)
239 9672
                        return (NULL);
240 2712189
        }
241 483170
        return (vm);
242 492842
}
243
244
/*---------------------------------------------------------------------
245
 *
246
 */
247
248
static void
249 39160
vep_emit_len(const struct vep_state *vep, ssize_t l, int m8, int m16, int m64)
250
{
251
        uint8_t buf[9];
252
253 39160
        assert(l > 0);
254 39160
        if (l < 256) {
255 38480
                buf[0] = (uint8_t)m8;
256 38480
                buf[1] = (uint8_t)l;
257 38480
                assert((ssize_t)buf[1] == l);
258 38480
                VSB_bcat(vep->vsb, buf, 2);
259 39160
        } else if (l < 65536) {
260 440
                buf[0] = (uint8_t)m16;
261 440
                vbe16enc(buf + 1, (uint16_t)l);
262 440
                assert((ssize_t)vbe16dec(buf + 1) == l);
263 440
                VSB_bcat(vep->vsb, buf, 3);
264 440
        } else {
265 240
                buf[0] = (uint8_t)m64;
266 240
                vbe64enc(buf + 1, l);
267 240
                assert((ssize_t)vbe64dec(buf + 1) == l);
268 240
                VSB_bcat(vep->vsb, buf, 9);
269
        }
270 39160
}
271
272
static void
273 17800
vep_emit_skip(const struct vep_state *vep, ssize_t l)
274
{
275
276 17800
        vep_emit_len(vep, l, VEC_S1, VEC_S2, VEC_S8);
277 17800
}
278
279
static void
280 17320
vep_emit_verbatim(const struct vep_state *vep, ssize_t l, ssize_t l_crc)
281
{
282
        uint8_t buf[4];
283
284 17320
        vep_emit_len(vep, l, VEC_V1, VEC_V2, VEC_V8);
285 17320
        if (vep->dogzip) {
286 4040
                vep_emit_len(vep, l_crc, VEC_C1, VEC_C2, VEC_C8);
287 4040
                vbe32enc(buf, vep->crc);
288 4040
                VSB_bcat(vep->vsb, buf, sizeof buf);
289 4040
        }
290 17320
}
291
292
static void
293 40720
vep_emit_common(struct vep_state *vep, ssize_t l, enum vep_mark mark)
294
{
295
296 40720
        assert(l >= 0);
297 40720
        if (l == 0)
298 5600
                return;
299 35120
        assert(mark == SKIP || mark == VERBATIM);
300 35120
        if (mark == SKIP)
301 17800
                vep_emit_skip(vep, l);
302
        else
303 17320
                vep_emit_verbatim(vep, l, vep->o_crc);
304
305 35120
        vep->crc = crc32(0L, Z_NULL, 0);
306 35120
        vep->o_crc = 0;
307 35120
        vep->o_total += l;
308 40720
}
309
310
/*---------------------------------------------------------------------
311
 *
312
 */
313
314
static void
315 2650807
vep_mark_common(struct vep_state *vep, const char *p, enum vep_mark mark)
316
{
317
        ssize_t l, lcb;
318
319 2650807
        assert(mark == SKIP || mark == VERBATIM);
320
321
        /* The NO-OP case, no data, no pending data & no change of mode */
322 2650807
        if (vep->last_mark == mark && p == vep->ver_p && vep->o_pending == 0)
323 1751
                return;
324
325
        /*
326
         * If we changed mode, emit whatever the opposite mode
327
         * assembled before the pending bytes.
328
         */
329
330 2649056
        if (vep->last_mark != mark && (vep->o_wait > 0 || vep->startup)) {
331 64400
                lcb = vep->cb(vep->vc, vep->cb_priv, 0,
332 32200
                    mark == VERBATIM ? VGZ_RESET : VGZ_ALIGN);
333 32200
                vep_emit_common(vep, lcb - vep->o_last, vep->last_mark);
334 32200
                vep->o_last = lcb;
335 32200
                vep->o_wait = 0;
336 32200
        }
337
338
        /* Transfer pending bytes CRC into active mode CRC */
339 2649056
        if (vep->o_pending) {
340 2689
                (void)vep->cb(vep->vc, vep->cb_priv, vep->o_pending,
341
                     VGZ_NORMAL);
342 2689
                if (vep->o_crc == 0) {
343 2096
                        vep->crc = vep->crcp;
344 2096
                        vep->o_crc = vep->o_pending;
345 2096
                } else {
346 1186
                        vep->crc = crc32_combine(vep->crc,
347 593
                            vep->crcp, vep->o_pending);
348 593
                        vep->o_crc += vep->o_pending;
349
                }
350 2689
                vep->crcp = crc32(0L, Z_NULL, 0);
351 2689
                vep->o_wait += vep->o_pending;
352 2689
                vep->o_pending = 0;
353 2689
        }
354
355
        /* * Process this bit of input */
356 2649056
        AN(vep->ver_p);
357 2649056
        l = p - vep->ver_p;
358 2649056
        assert(l >= 0);
359 2649056
        vep->crc = crc32(vep->crc, (const void*)vep->ver_p, l);
360 2649056
        vep->o_crc += l;
361 2649056
        vep->ver_p = p;
362
363 2649056
        vep->o_wait += l;
364 2649056
        vep->last_mark = mark;
365 2649056
        (void)vep->cb(vep->vc, vep->cb_priv, l, VGZ_NORMAL);
366 2650807
}
367
368
static void
369 1529919
vep_mark_verbatim(struct vep_state *vep, const char *p)
370
{
371
372 1529919
        vep_mark_common(vep, p, VERBATIM);
373 1529919
        vep->nm_verbatim++;
374 1529919
}
375
376
static void
377 1112368
vep_mark_skip(struct vep_state *vep, const char *p)
378
{
379
380 1112368
        vep_mark_common(vep, p, SKIP);
381 1112368
        vep->nm_skip++;
382 1112368
}
383
384
static void
385 4990
vep_mark_pending(struct vep_state *vep, const char *p)
386
{
387
        ssize_t l;
388
389 4990
        AN(vep->ver_p);
390 4990
        l = p - vep->ver_p;
391 4990
        assert(l > 0);
392 4990
        vep->crcp = crc32(vep->crcp, (const void *)vep->ver_p, l);
393 4990
        vep->ver_p = p;
394
395 4990
        vep->o_pending += l;
396 4990
        vep->nm_pending++;
397 4990
}
398
399
/*---------------------------------------------------------------------
400
 */
401
402
static void v_matchproto_()
403 240
vep_do_comment(struct vep_state *vep, enum dowhat what)
404
{
405
        Debug("DO_COMMENT(%d)\n", what);
406 240
        assert(what == DO_TAG);
407 240
        if (!vep->emptytag)
408 80
                vep_error(vep, "ESI 1.0 <esi:comment> needs final '/'");
409 240
}
410
411
/*---------------------------------------------------------------------
412
 */
413
414
static void v_matchproto_()
415 5680
vep_do_remove(struct vep_state *vep, enum dowhat what)
416
{
417
        Debug("DO_REMOVE(%d, end %d empty %d remove %d)\n",
418
            what, vep->endtag, vep->emptytag, vep->remove);
419 5680
        assert(what == DO_TAG);
420 5680
        if (vep->emptytag)
421 40
                vep_error(vep, "ESI 1.0 <esi:remove/> not legal");
422 5640
        else if (vep->remove && !vep->endtag)
423 40
                vep_error(vep, "ESI 1.0 <esi:remove> already open");
424 5600
        else if (!vep->remove && vep->endtag)
425 40
                vep_error(vep, "ESI 1.0 <esi:remove> not open");
426
        else
427 5560
                vep->remove = !vep->endtag;
428 5680
}
429
430
/*---------------------------------------------------------------------
431
 */
432
433
static void
434 10280
include_attr_src(struct vep_state *vep)
435
{
436
        const char *p;
437
438 10280
        if (vep->include_src != NULL) {
439 40
                vep_error(vep,
440
                    "ESI 1.0 <esi:include> "
441
                    "has multiple src= attributes");
442 40
                vep->state = VEP_TAGERROR;
443 40
                VSB_destroy(&vep->attr_vsb);
444 40
                VSB_destroy(&vep->include_src);
445 40
                return;
446
        }
447 369680
        for (p = VSB_data(vep->attr_vsb); *p != '\0'; p++)
448 359480
                if (vct_islws(*p))
449 40
                        break;
450 10240
        if (*p != '\0') {
451 40
                vep_error(vep,
452
                    "ESI 1.0 <esi:include> "
453
                    "has whitespace in src= attribute");
454 40
                vep->state = VEP_TAGERROR;
455 40
                VSB_destroy(&vep->attr_vsb);
456 40
                if (vep->include_src != NULL)
457 0
                        VSB_destroy(&vep->include_src);
458 40
                return;
459
        }
460 10200
        vep->include_src = vep->attr_vsb;
461 10200
        vep->attr_vsb = NULL;
462 10280
}
463
464
static void
465 280
include_attr_onerror(struct vep_state *vep)
466
{
467
468 280
        vep->include_continue = !strcmp("continue", VSB_data(vep->attr_vsb));
469 280
        VSB_destroy(&vep->attr_vsb);
470 280
}
471
472
static void v_matchproto_()
473 20800
vep_do_include(struct vep_state *vep, enum dowhat what)
474
{
475
        const char *p, *q, *h;
476
        ssize_t l;
477
        char incl;
478
479
        Debug("DO_INCLUDE(%d)\n", what);
480 20800
        if (what == DO_ATTR) {
481
                Debug("ATTR (%s) (%s)\n", vep->match_hit->match,
482
                        VSB_data(vep->attr_vsb));
483 10560
                if (!strcmp("src=", vep->match_hit->match)) {
484 10280
                        include_attr_src(vep);
485 10280
                        return;
486
                }
487 280
                if (!strcmp("onerror=", vep->match_hit->match)) {
488 280
                        include_attr_onerror(vep);
489 280
                        return;
490
                }
491 0
                WRONG("Unhandled <esi:include> attribute");
492 0
        }
493 10240
        assert(what == DO_TAG);
494 10240
        if (!vep->emptytag)
495 80
                vep_warn(vep, "ESI 1.0 <esi:include> lacks final '/'");
496 10240
        if (vep->include_src == NULL) {
497 80
                vep_error(vep, "ESI 1.0 <esi:include> lacks src attr");
498 80
                return;
499
        }
500
501
        /*
502
         * Strictly speaking, we ought to spit out any piled up skip before
503
         * emitting the VEC for the include, but objectively that makes no
504
         * difference and robs us of a chance to collapse another skip into
505
         * this on so we don't do that.
506
         * However, we cannot tolerate any verbatim stuff piling up.
507
         * The mark_skip() before calling dostuff should have taken
508
         * care of that.  Make sure.
509
         */
510 10160
        assert(vep->o_wait == 0 || vep->last_mark == SKIP);
511
        /* XXX: what if it contains NUL bytes ?? */
512 10160
        p = VSB_data(vep->include_src);
513 10160
        l = VSB_len(vep->include_src);
514 10160
        h = 0;
515
516 10160
        incl = vep->include_continue ? VEC_IC : VEC_IA;
517
518 10160
        if (l > 7 && !memcmp(p, "http://", 7)) {
519 80
                h = p + 7;
520 80
                p = strchr(h, '/');
521 80
                if (p == NULL) {
522 40
                        vep_error(vep,
523
                            "ESI 1.0 <esi:include> invalid src= URL");
524 40
                        vep->state = VEP_TAGERROR;
525 40
                        AZ(vep->attr_vsb);
526 40
                        VSB_destroy(&vep->include_src);
527 40
                        return;
528
                }
529
                Debug("HOST <%.*s> PATH <%s>\n", (int)(p-h),h, p);
530 40
                VSB_printf(vep->vsb, "%c", incl);
531 40
                VSB_printf(vep->vsb, "Host: %.*s%c", (int)(p-h), h, 0);
532 10120
        } else if (l > 8 && !memcmp(p, "https://", 8)) {
533 120
                if (!FEATURE(FEATURE_ESI_IGNORE_HTTPS)) {
534 40
                        vep_warn(vep,
535
                            "ESI 1.0 <esi:include> with https:// ignored");
536 40
                        vep->state = VEP_TAGERROR;
537 40
                        AZ(vep->attr_vsb);
538 40
                        VSB_destroy(&vep->include_src);
539 40
                        return;
540
                }
541 80
                vep_warn(vep,
542
                    "ESI 1.0 <esi:include> https:// treated as http://");
543 80
                h = p + 8;
544 80
                p = strchr(h, '/');
545 80
                if (p == NULL) {
546 40
                        vep_error(vep,
547
                            "ESI 1.0 <esi:include> invalid src= URL");
548 40
                        vep->state = VEP_TAGERROR;
549 40
                        AZ(vep->attr_vsb);
550 40
                        VSB_destroy(&vep->include_src);
551 40
                        return;
552
                }
553 40
                VSB_printf(vep->vsb, "%c", incl);
554 40
                VSB_printf(vep->vsb, "Host: %.*s%c", (int)(p-h), h, 0);
555 10000
        } else if (*p == '/') {
556 8760
                VSB_printf(vep->vsb, "%c", incl);
557 8760
                VSB_printf(vep->vsb, "%c", 0);
558 8760
        } else {
559 1200
                VSB_printf(vep->vsb, "%c", incl);
560 1200
                VSB_printf(vep->vsb, "%c", 0);
561
                /* Look for the last / before a '?' */
562 1200
                h = NULL;
563 3600
                for (q = vep->url; *q && *q != '?'; q++)
564 3720
                        if (*q == '/')
565 1320
                                h = q;
566 1200
                if (h == NULL)
567 40
                        h = q + 1;
568
569
                Debug("INCL:: [%.*s]/[%s]\n",
570
                    (int)(h - vep->url), vep->url, p);
571 1200
                VSB_printf(vep->vsb, "%.*s/", (int)(h - vep->url), vep->url);
572
        }
573 10040
        l -= (p - VSB_data(vep->include_src));
574 364960
        for (q = p; *q != '\0'; ) {
575 354920
                if (*q == '&') {
576
#define R(w,f,r)                                                        \
577
                        if (q + w <= p + l && !memcmp(q, f, w)) { \
578
                                VSB_printf(vep->vsb, "%c", r);  \
579
                                q += w;                         \
580
                                continue;                       \
581
                        }
582 400
                        R(6, "&apos;", '\'');
583 320
                        R(6, "&quot;", '"');
584 240
                        R(4, "&lt;", '<');
585 160
                        R(4, "&gt;", '>');
586 80
                        R(5, "&amp;", '&');
587 0
                }
588 354520
                VSB_printf(vep->vsb, "%c", *q++);
589
        }
590
#undef R
591 10040
        VSB_printf(vep->vsb, "%c", 0);
592 10040
        VSB_destroy(&vep->include_src);
593 10040
        vep->include_continue = 0;
594 20800
}
595
596
/*---------------------------------------------------------------------
597
 * Lex/Parse object for ESI instructions
598
 *
599
 * This function is called with the input object piecemeal so do not
600
 * assume that we have more than one char available at at time, but
601
 * optimize for getting huge chunks.
602
 *
603
 * NB: At the bottom of this source-file, there is a dot-diagram matching
604
 * NB: the state-machine.  Please maintain it along with the code.
605
 */
606
607
void
608 2153252
VEP_Parse(struct vep_state *vep, const char *p, size_t l)
609
{
610
        const char *e;
611
        struct vep_match *vm;
612
        int i;
613
614 2153252
        CHECK_OBJ_NOTNULL(vep, VEP_MAGIC);
615 2153252
        assert(l > 0);
616
617 2153252
        if (vep->startup) {
618
                /*
619
                 * We must force the GZIP header out as a SKIP string,
620
                 * otherwise an object starting with <esi:include would
621
                 * have its GZIP header appear after the included object
622
                 * (e000026.vtc)
623
                 */
624 8520
                vep->ver_p = "";
625 8520
                vep->last_mark = SKIP;
626 8520
                vep_mark_common(vep, vep->ver_p, VERBATIM);
627 8520
                vep->startup = 0;
628 8520
                AZ(vep->hack_p);
629 8520
                vep->hack_p = p;
630 8520
        }
631
632 2153252
        vep->ver_p = p;
633
634 2153252
        e = p + l;
635
636 6255353
        while (p < e) {
637 4102101
                AN(vep->state);
638
                Debug("EP %s %d (%.*s) [%.*s]\n",
639
                    vep->state,
640
                    vep->remove,
641
                    vep->tag_i, vep->tag,
642
                    (e - p) > 10 ? 10 : (int)(e-p), p);
643 4102101
                assert(p >= vep->ver_p);
644
645
                /******************************************************
646
                 * SECTION A
647
                 */
648
649 4102101
                if (vep->state == VEP_START) {
650 8640
                        if (FEATURE(FEATURE_ESI_REMOVE_BOM) &&
651 240
                            *p == (char)0xeb) {
652 160
                                vep->match = vep_match_bom;
653 160
                                vep->state = VEP_MATCH;
654 160
                        } else
655 8480
                                vep->state = VEP_BOM;
656 4102101
                } else if (vep->state == VEP_BOM) {
657 8520
                        vep_mark_skip(vep, p);
658 8520
                        if (FEATURE(FEATURE_ESI_DISABLE_XML_CHECK))
659 2160
                                vep->state = VEP_NEXTTAG;
660
                        else
661 6360
                                vep->state = VEP_TESTXML;
662 4093461
                } else if (vep->state == VEP_TESTXML) {
663
                        /*
664
                         * If the first non-whitespace char is different
665
                         * from '<' we assume this is not XML.
666
                         */
667 14189
                        while (p < e && vct_islws(*p))
668 7520
                                p++;
669 6669
                        vep_mark_verbatim(vep, p);
670 6669
                        if (p < e && *p == '<') {
671 5800
                                p++;
672 5800
                                vep->state = VEP_STARTTAG;
673 6669
                        } else if (p < e && *p == (char)0xeb) {
674 80
                                VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror,
675
                                    "No ESI processing, "
676
                                    "first char not '<' but BOM."
677
                                    " (See feature esi_remove_bom)"
678
                                );
679 80
                                vep->state = VEP_NOTXML;
680 869
                        } else if (p < e) {
681 440
                                VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror,
682
                                    "No ESI processing, "
683
                                    "first char not '<'."
684
                                    " (See feature esi_disable_xml_check)"
685
                                );
686 440
                                vep->state = VEP_NOTXML;
687 440
                        }
688 4084941
                } else if (vep->state == VEP_NOTXML) {
689
                        /*
690
                         * This is not recognized as XML, just skip thru
691
                         * vfp_esi_end() will handle the rest
692
                         */
693 520
                        p = e;
694 520
                        vep_mark_verbatim(vep, p);
695
696
                /******************************************************
697
                 * SECTION B
698
                 */
699
700 4078272
                } else if (vep->state == VEP_NOTMYTAG) {
701 438738
                        if (FEATURE(FEATURE_ESI_IGNORE_OTHER_ELEMENTS)) {
702 80
                                p++;
703 80
                                vep->state = VEP_NEXTTAG;
704 80
                        } else {
705 438658
                                vep->tag_i = 0;
706 1860140
                                while (p < e) {
707 1858595
                                        if (*p++ == '>') {
708 437113
                                                vep->state = VEP_NEXTTAG;
709 437113
                                                break;
710
                                        }
711
                                }
712
                        }
713 438738
                        if (p == e && !vep->remove)
714 3998
                                vep_mark_verbatim(vep, p);
715 4077752
                } else if (vep->state == VEP_NEXTTAG) {
716
                        /*
717
                         * Hunt for start of next tag and keep an eye
718
                         * out for end of EsiCmt if armed.
719
                         */
720 2576519
                        vep->emptytag = 0;
721 2576519
                        vep->attr = NULL;
722 2576519
                        vep->dostuff = NULL;
723 89078621
                        while (p < e && *p != '<') {
724 86502102
                                if (vep->esicmt_p == NULL) {
725 86497462
                                        p++;
726 86497462
                                        continue;
727
                                }
728 4640
                                if (*p != *vep->esicmt_p) {
729 2640
                                        p++;
730 2640
                                        vep->esicmt_p = vep->esicmt;
731 2640
                                        continue;
732
                                }
733 2000
                                if (!vep->remove && vep->esicmt_p == vep->esicmt)
734 480
                                        vep_mark_verbatim(vep, p);
735 2000
                                p++;
736 2000
                                if (*++vep->esicmt_p == '\0') {
737 640
                                        vep->esi_found = 1;
738 640
                                        vep->esicmt = NULL;
739 640
                                        vep->esicmt_p = NULL;
740
                                        /*
741
                                         * The end of the esicmt
742
                                         * should not be emitted.
743
                                         * But the stuff before should
744
                                         */
745 640
                                        vep_mark_skip(vep, p);
746 640
                                }
747
                        }
748 2576519
                        if (p < e) {
749 449410
                                if (!vep->remove)
750 446090
                                        vep_mark_verbatim(vep, p);
751 449410
                                assert(*p == '<');
752 449410
                                p++;
753 449410
                                vep->state = VEP_STARTTAG;
754 2576519
                        } else if (vep->esicmt_p == vep->esicmt && !vep->remove)
755 1071412
                                vep_mark_verbatim(vep, p);
756
757
                /******************************************************
758
                 * SECTION C
759
                 */
760
761 3639014
                } else if (vep->state == VEP_STARTTAG) {
762
                        /* Start of tag, set up match table */
763 455210
                        vep->endtag = 0;
764 455210
                        vep->match = vep_match_starttag;
765 455210
                        vep->state = VEP_MATCH;
766 1062495
                } else if (vep->state == VEP_COMMENT) {
767 200
                        vep->esicmt_p = vep->esicmt = NULL;
768 200
                        vep->until_p = vep->until = "-->";
769 200
                        vep->until_s = VEP_NEXTTAG;
770 200
                        vep->state = VEP_UNTIL;
771 607285
                } else if (vep->state == VEP_COMMENTESI) {
772 680
                        if (vep->remove)
773 200
                                vep_error(vep,
774
                                    "ESI 1.0 Nested <!--esi"
775
                                    " element in <esi:remove>");
776 680
                        vep->esicmt_p = vep->esicmt = "-->";
777 680
                        vep->state = VEP_NEXTTAG;
778 680
                        vep_mark_skip(vep, p);
779 607085
                } else if (vep->state == VEP_CDATA) {
780
                        /*
781
                         * Easy: just look for the end of CDATA
782
                         */
783 160
                        vep->until_p = vep->until = "]]>";
784 160
                        vep->until_s = VEP_NEXTTAG;
785 160
                        vep->state = VEP_UNTIL;
786 606405
                } else if (vep->state == VEP_ESIENDTAG) {
787 2920
                        vep->endtag = 1;
788 2920
                        vep->state = VEP_ESITAG;
789 606245
                } else if (vep->state == VEP_ESITAG) {
790 16880
                        vep->in_esi_tag = 1;
791 16880
                        vep->esi_found = 1;
792 16880
                        vep_mark_skip(vep, p);
793 16880
                        vep->match = vep_match_esi;
794 16880
                        vep->state = VEP_MATCH;
795 603325
                } else if (vep->state == VEP_ESIINCLUDE) {
796 10720
                        if (vep->remove) {
797 160
                                vep_error(vep,
798
                                    "ESI 1.0 <esi:include> element"
799
                                    " nested in <esi:remove>");
800 160
                                vep->state = VEP_TAGERROR;
801 10720
                        } else if (vep->endtag) {
802 40
                                vep_error(vep,
803
                                    "ESI 1.0 </esi:include> illegal end-tag");
804 40
                                vep->state = VEP_TAGERROR;
805 40
                        } else {
806 10520
                                vep->dostuff = vep_do_include;
807 10520
                                vep->state = VEP_INTAG;
808 10520
                                vep->attr = vep_match_attr_include;
809
                        }
810 586445
                } else if (vep->state == VEP_ESIREMOVE) {
811 5680
                        vep->dostuff = vep_do_remove;
812 5680
                        vep->state = VEP_INTAG;
813 575725
                } else if (vep->state == VEP_ESICOMMENT) {
814 400
                        if (vep->remove) {
815 40
                                vep_error(vep,
816
                                    "ESI 1.0 <esi:comment> element"
817
                                    " nested in <esi:remove>");
818 40
                                vep->state = VEP_TAGERROR;
819 400
                        } else if (vep->endtag) {
820 80
                                vep_error(vep,
821
                                    "ESI 1.0 </esi:comment> illegal end-tag");
822 80
                                vep->state = VEP_TAGERROR;
823 80
                        } else {
824 280
                                vep->dostuff = vep_do_comment;
825 280
                                vep->state = VEP_INTAG;
826
                        }
827 570045
                } else if (vep->state == VEP_ESIBOGON) {
828 80
                        vep_error(vep,
829
                            "ESI 1.0 <esi:bogus> element");
830 80
                        vep->state = VEP_TAGERROR;
831
832
                /******************************************************
833
                 * SECTION D
834
                 */
835
836 569645
                } else if (vep->state == VEP_INTAG) {
837 28224
                        vep->tag_i = 0;
838 43624
                        while (p < e && vct_islws(*p) && !vep->emptytag) {
839 15400
                                p++;
840 15400
                                vep->canattr = 1;
841
                        }
842 28224
                        if (p < e && *p == '/' && !vep->emptytag) {
843 10400
                                p++;
844 10400
                                vep->emptytag = 1;
845 10400
                                vep->canattr = 0;
846 10400
                        }
847 28224
                        if (p < e && *p == '>') {
848 16160
                                p++;
849 16160
                                AN(vep->dostuff);
850 16160
                                vep_mark_skip(vep, p);
851 16160
                                vep->dostuff(vep, DO_TAG);
852 16160
                                vep->in_esi_tag = 0;
853 16160
                                vep->state = VEP_NEXTTAG;
854 28224
                        } else if (p < e && vep->emptytag) {
855 40
                                vep_error(vep,
856
                                    "XML 1.0 '>' does not follow '/' in tag");
857 40
                                vep->state = VEP_TAGERROR;
858 12064
                        } else if (p < e && vep->canattr &&
859 11080
                            vct_isxmlnamestart(*p)) {
860 11040
                                vep->state = VEP_ATTR;
861 12024
                        } else if (p < e) {
862 40
                                vep_error(vep,
863
                                    "XML 1.0 Illegal attribute start char");
864 40
                                vep->state = VEP_TAGERROR;
865 40
                        }
866 569565
                } else if (vep->state == VEP_TAGERROR) {
867 7359
                        while (p < e && *p != '>')
868 5234
                                p++;
869 2125
                        if (p < e) {
870 720
                                p++;
871 720
                                vep_mark_skip(vep, p);
872 720
                                vep->in_esi_tag = 0;
873 720
                                vep->state = VEP_NEXTTAG;
874 720
                                if (vep->attr_vsb)
875 40
                                        VSB_destroy(&vep->attr_vsb);
876 720
                        }
877
878
                /******************************************************
879
                 * SECTION E
880
                 */
881
882 541341
                } else if (vep->state == VEP_ATTR) {
883 11040
                        AZ(vep->attr_delim);
884 11040
                        if (vep->attr == NULL) {
885 120
                                p++;
886 120
                                AZ(vep->attr_vsb);
887 120
                                vep->state = VEP_SKIPATTR;
888 120
                        } else {
889 10920
                                vep->match = vep->attr;
890 10920
                                vep->state = VEP_MATCH;
891
                        }
892 539216
                } else if (vep->state == VEP_SKIPATTR) {
893 1289
                        while (p < e && vct_isxmlname(*p))
894 718
                                p++;
895 571
                        if (p < e && *p == '=') {
896 160
                                p++;
897 160
                                vep->state = VEP_ATTRDELIM;
898 571
                        } else if (p < e && *p == '>') {
899 40
                                vep->state = VEP_INTAG;
900 411
                        } else if (p < e && *p == '/') {
901 40
                                vep->state = VEP_INTAG;
902 371
                        } else if (p < e && vct_issp(*p)) {
903 80
                                vep->state = VEP_INTAG;
904 331
                        } else if (p < e) {
905 40
                                vep_error(vep,
906
                                    "XML 1.0 Illegal attr char");
907 40
                                vep->state = VEP_TAGERROR;
908 40
                        }
909 528176
                } else if (vep->state == VEP_ATTRGETVAL) {
910 10680
                        AZ(vep->attr_vsb);
911 10680
                        vep->attr_vsb = VSB_new_auto();
912 10680
                        vep->state = VEP_ATTRDELIM;
913 527605
                } else if (vep->state == VEP_ATTRDELIM) {
914 10840
                        AZ(vep->attr_delim);
915 10840
                        if (*p == '"' || *p == '\'') {
916 10680
                                vep->attr_delim = *p++;
917 10680
                                vep->state = VEP_ATTRVAL;
918 10840
                        } else if (!vct_issp(*p)) {
919 120
                                vep->attr_delim = ' ';
920 120
                                vep->state = VEP_ATTRVAL;
921 120
                        } else {
922 40
                                vep_error(vep,
923
                                    "XML 1.0 Illegal attribute delimiter");
924 40
                                vep->state = VEP_TAGERROR;
925
                        }
926
927 516925
                } else if (vep->state == VEP_ATTRVAL) {
928 737111
                        while (p < e && *p != '>' && *p != vep->attr_delim &&
929 362400
                           (vep->attr_delim != ' ' || !vct_issp(*p))) {
930 362400
                                if (vep->attr_vsb != NULL)
931 361920
                                        VSB_putc(vep->attr_vsb, *p);
932 362400
                                p++;
933
                        }
934 12311
                        if (p < e && *p == '>') {
935 80
                                vep_error(vep,
936
                                    "XML 1.0 Missing end attribute delimiter");
937 80
                                vep->state = VEP_TAGERROR;
938 80
                                vep->attr_delim = 0;
939 80
                                if (vep->attr_vsb != NULL) {
940 80
                                        AZ(VSB_finish(vep->attr_vsb));
941 80
                                        VSB_destroy(&vep->attr_vsb);
942 80
                                }
943 12311
                        } else if (p < e) {
944 10720
                                vep->attr_delim = 0;
945 10720
                                p++;
946 10720
                                vep->state = VEP_INTAG;
947 10720
                                if (vep->attr_vsb != NULL) {
948 10560
                                        AZ(VSB_finish(vep->attr_vsb));
949 10560
                                        AN(vep->dostuff);
950 10560
                                        vep->dostuff(vep, DO_ATTR);
951 10560
                                        vep->attr_vsb = NULL;
952 10560
                                }
953 10720
                        }
954
955
                /******************************************************
956
                 * Utility Section
957
                 */
958
959 506085
                } else if (vep->state == VEP_MATCH) {
960
                        /*
961
                         * Match against a table
962
                         */
963 483170
                        vm = vep_match(vep, p, e);
964 483170
                        vep->match_hit = vm;
965 483170
                        if (vm != NULL) {
966 477564
                                if (vm->match != NULL)
967 40246
                                        p += strlen(vm->match);
968 477564
                                vep->state = *vm->state;
969 477564
                                vep->match = NULL;
970 477564
                                vep->tag_i = 0;
971 477564
                        } else {
972 5606
                                assert(p + sizeof(vep->tag) >= e);
973 5606
                                memcpy(vep->tag, p, e - p);
974 5606
                                vep->tag_i = e - p;
975 5606
                                vep->state = VEP_MATCHBUF;
976 5606
                                p = e;
977
                        }
978 493774
                } else if (vep->state == VEP_MATCHBUF) {
979
                        /*
980
                         * Match against a table while split over input
981
                         * sections.
982
                         */
983 9672
                        AN(vep->match);
984 9672
                        i = sizeof(vep->tag) - vep->tag_i;
985 9672
                        if (i > e - p)
986 8365
                                i = e - p;
987 9672
                        memcpy(vep->tag + vep->tag_i, p, i);
988 19344
                        vm = vep_match(vep, vep->tag,
989 9672
                            vep->tag + vep->tag_i + i);
990
                        Debug("MB (%.*s) tag_i %d i %d = vm %p match %s\n",
991
                            vep->tag_i + i, vep->tag,
992
                            vep->tag_i,
993
                            i,
994
                            vm,
995
                            vm ? vm->match : "(nil)");
996
997 9672
                        if (vm == NULL) {
998 4066
                                vep->tag_i += i;
999 4066
                                p += i;
1000 4066
                                assert(p == e);
1001 4066
                        } else {
1002 5606
                                vep->match_hit = vm;
1003 5606
                                vep->state = *vm->state;
1004 5606
                                if (vm->match != NULL) {
1005 5314
                                        i = strlen(vm->match);
1006 5314
                                        if (i > vep->tag_i)
1007 5253
                                                p += i - vep->tag_i;
1008 5314
                                }
1009 5606
                                vep->match = NULL;
1010 5606
                                vep->tag_i = 0;
1011
                        }
1012 10604
                } else if (vep->state == VEP_UNTIL) {
1013
                        /*
1014
                         * Skip until we see magic string
1015
                         */
1016 7412
                        while (p < e) {
1017 6760
                                if (*p++ != *vep->until_p++) {
1018 5840
                                        vep->until_p = vep->until;
1019 6760
                                } else if (*vep->until_p == '\0') {
1020 280
                                        vep->state = vep->until_s;
1021 280
                                        break;
1022
                                }
1023
                        }
1024 932
                        if (p == e && !vep->remove)
1025 750
                                vep_mark_verbatim(vep, p);
1026 932
                } else {
1027
                        Debug("*** Unknown state %s\n", vep->state);
1028 0
                        WRONG("WRONG ESI PARSER STATE");
1029
                }
1030
        }
1031
        /*
1032
         * We must always mark up the storage we got, try to do so
1033
         * in the most efficient way, in particular with respect to
1034
         * minimizing and limiting use of pending.
1035
         */
1036 2153252
        if (p == vep->ver_p)
1037
                ;
1038 1073758
        else if (vep->in_esi_tag)
1039 11839
                vep_mark_skip(vep, p);
1040 1061919
        else if (vep->remove)
1041 1056929
                vep_mark_skip(vep, p);
1042
        else
1043 4990
                vep_mark_pending(vep, p);
1044 2153252
}
1045
1046
/*---------------------------------------------------------------------
1047
 */
1048
1049
static ssize_t v_matchproto_(vep_callback_t)
1050 2654760
vep_default_cb(struct vfp_ctx *vc, void *priv, ssize_t l, enum vgz_flag flg)
1051
{
1052
        ssize_t *s;
1053
1054 2654760
        CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC);
1055 2654760
        AN(priv);
1056 2654760
        s = priv;
1057 2654760
        *s += l;
1058 2654760
        (void)flg;
1059 2654760
        return (*s);
1060
}
1061
1062
/*---------------------------------------------------------------------
1063
 */
1064
1065
struct vep_state *
1066 10760
VEP_Init(struct vfp_ctx *vc, const struct http *req, vep_callback_t *cb,
1067
    void *cb_priv)
1068
{
1069
        struct vep_state *vep;
1070
1071 10760
        CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC);
1072 10760
        CHECK_OBJ_NOTNULL(req, HTTP_MAGIC);
1073 10760
        vep = WS_Alloc(vc->resp->ws, sizeof *vep);
1074 10760
        if (vep == NULL) {
1075 2080
                VSLb(vc->wrk->vsl, SLT_VCL_Error,
1076
                     "VEP_Init() workspace overflow");
1077 2080
                return (NULL);
1078
        }
1079
1080 8680
        INIT_OBJ(vep, VEP_MAGIC);
1081 8680
        vep->url = req->hd[HTTP_HDR_URL].b;
1082 8680
        vep->vc = vc;
1083 8680
        vep->vsb = VSB_new_auto();
1084 8680
        AN(vep->vsb);
1085
1086 8680
        if (cb != NULL) {
1087 3040
                vep->dogzip = 1;
1088
                /* XXX */
1089 3040
                VSB_printf(vep->vsb, "%c", VEC_GZ);
1090 3040
                vep->cb = cb;
1091 3040
                vep->cb_priv = cb_priv;
1092 3040
        } else {
1093 5640
                vep->cb = vep_default_cb;
1094 5640
                vep->cb_priv = &vep->cb_x;
1095
        }
1096
1097 8680
        vep->state = VEP_START;
1098 8680
        vep->crc = crc32(0L, Z_NULL, 0);
1099 8680
        vep->crcp = crc32(0L, Z_NULL, 0);
1100
1101 8680
        vep->startup = 1;
1102 8680
        return (vep);
1103 10760
}
1104
1105
/*---------------------------------------------------------------------
1106
 */
1107
1108
struct vsb *
1109 8680
VEP_Finish(struct vep_state *vep)
1110
{
1111
        ssize_t l, lcb;
1112
1113 8680
        CHECK_OBJ_NOTNULL(vep, VEP_MAGIC);
1114
1115 8680
        if (vep->include_src)
1116 0
                VSB_destroy(&vep->include_src);
1117 8680
        if (vep->attr_vsb)
1118 0
                VSB_destroy(&vep->attr_vsb);
1119
1120 16640
        if (vep->state != VEP_START &&
1121 8520
            vep->state != VEP_BOM &&
1122 8520
            vep->state != VEP_TESTXML &&
1123 8480
            vep->state != VEP_NOTXML &&
1124 7960
            vep->state != VEP_NEXTTAG) {
1125 137
                vep_error(vep, "VEP ended inside a tag");
1126 137
        }
1127
1128 8680
        if (vep->o_pending)
1129 0
                vep_mark_common(vep, vep->ver_p, vep->last_mark);
1130 8680
        if (vep->o_wait > 0) {
1131 8520
                lcb = vep->cb(vep->vc, vep->cb_priv, 0, VGZ_ALIGN);
1132 8520
                vep_emit_common(vep, lcb - vep->o_last, vep->last_mark);
1133 8520
        }
1134
        // NB: We don't account for PAD+SUM+LEN in gzipped objects
1135 8680
        (void)vep->cb(vep->vc, vep->cb_priv, 0, VGZ_FINISH);
1136
1137 8680
        AZ(VSB_finish(vep->vsb));
1138 8680
        l = VSB_len(vep->vsb);
1139 8680
        if (vep->esi_found && l > 0)
1140 4680
                return (vep->vsb);
1141 4000
        VSB_destroy(&vep->vsb);
1142 4000
        return (NULL);
1143 8680
}
1144
1145
#if 0
1146
1147
digraph xml {
1148
        rankdir="LR"
1149
        size="7,10"
1150
#################################################################
1151
# SECTION A
1152
#
1153
1154
START           [shape=ellipse]
1155
TESTXML         [shape=ellipse]
1156
NOTXML          [shape=ellipse]
1157
NEXTTAGa        [shape=hexagon, label="NEXTTAG"]
1158
STARTTAGa       [shape=hexagon, label="STARTTAG"]
1159
START           -> TESTXML
1160
START           -> NEXTTAGa     [style=dotted, label="syntax:1"]
1161
TESTXML         -> TESTXML      [label="lws"]
1162
TESTXML         -> NOTXML
1163
TESTXML         -> STARTTAGa    [label="'<'"]
1164
1165
#################################################################
1166
# SECTION B
1167
1168
NOTMYTAG        [shape=ellipse]
1169
NEXTTAG         [shape=ellipse]
1170
NOTMYTAG        -> NEXTTAG      [style=dotted, label="syntax:2"]
1171
STARTTAGb       [shape=hexagon, label="STARTTAG"]
1172
NOTMYTAG        -> NEXTTAG      [label="'>'"]
1173
NOTMYTAG        -> NOTMYTAG     [label="*"]
1174
NEXTTAG         -> NEXTTAG      [label="'-->'"]
1175
NEXTTAG         -> NEXTTAG      [label="*"]
1176
NEXTTAG         -> STARTTAGb    [label="'<'"]
1177
1178
#################################################################
1179
# SECTION C
1180
1181
STARTTAG        [shape=ellipse]
1182
COMMENT         [shape=ellipse]
1183
CDATA           [shape=ellipse]
1184
ESITAG          [shape=ellipse]
1185
ESIETAG         [shape=ellipse]
1186
ESIINCLUDE      [shape=ellipse]
1187
ESIREMOVE       [shape=ellipse]
1188
ESICOMMENT      [shape=ellipse]
1189
ESIBOGON        [shape=ellipse]
1190
INTAGc          [shape=hexagon, label="INTAG"]
1191
NOTMYTAGc       [shape=hexagon, label="NOTMYTAG"]
1192
NEXTTAGc        [shape=hexagon, label="NEXTTAG"]
1193
TAGERRORc       [shape=hexagon, label="TAGERROR"]
1194
C1              [shape=circle,label=""]
1195
STARTTAG        -> COMMENT      [label="'<!--'"]
1196
STARTTAG        -> ESITAG       [label="'<esi'"]
1197
STARTTAG        -> CDATA        [label="'<![CDATA['"]
1198
STARTTAG        -> NOTMYTAGc    [label="'*'"]
1199
COMMENT         -> NEXTTAGc     [label="'esi'"]
1200
COMMENT         -> C1           [label="*"]
1201
C1              -> C1           [label="*"]
1202
C1              -> NEXTTAGc     [label="-->"]
1203
CDATA           -> CDATA        [label="*"]
1204
CDATA           -> NEXTTAGc     [label="]]>"]
1205
ESITAG          -> ESIINCLUDE   [label="'include'"]
1206
ESITAG          -> ESIREMOVE    [label="'remove'"]
1207
ESITAG          -> ESICOMMENT   [label="'comment'"]
1208
ESITAG          -> ESIBOGON     [label="*"]
1209
ESICOMMENT      -> INTAGc
1210
ESICOMMENT      -> TAGERRORc
1211
ESICOMMENT      -> TAGERRORc    [style=dotted, label="nested\nin\nremove"]
1212
ESIREMOVE       -> INTAGc
1213
ESIREMOVE       -> TAGERRORc
1214
ESIINCLUDE      -> INTAGc
1215
ESIINCLUDE      -> TAGERRORc
1216
ESIINCLUDE      -> TAGERRORc    [style=dotted, label="nested\nin\nremove"]
1217
ESIBOGON        -> TAGERRORc
1218
1219
#################################################################
1220
# SECTION D
1221
1222
INTAG           [shape=ellipse]
1223
TAGERROR        [shape=ellipse]
1224
NEXTTAGd        [shape=hexagon, label="NEXTTAG"]
1225
ATTRd           [shape=hexagon, label="ATTR"]
1226
D1              [shape=circle, label=""]
1227
D2              [shape=circle, label=""]
1228
INTAG           -> D1           [label="lws"]
1229
D1              -> D2           [label="/"]
1230
INTAG           -> D2           [label="/"]
1231
INTAG           -> NEXTTAGd     [label=">"]
1232
D1              -> NEXTTAGd     [label=">"]
1233
D2              -> NEXTTAGd     [label=">"]
1234
D1              -> ATTRd        [label="XMLstartchar"]
1235
D1              -> TAGERROR     [label="*"]
1236
D2              -> TAGERROR     [label="*"]
1237
TAGERROR        -> TAGERROR     [label="*"]
1238
TAGERROR        -> NEXTTAGd     [label="'>'"]
1239
1240
#################################################################
1241
# SECTION E
1242
1243
ATTR            [shape=ellipse]
1244
SKIPATTR        [shape=ellipse]
1245
ATTRGETVAL      [shape=ellipse]
1246
ATTRDELIM       [shape=ellipse]
1247
ATTRVAL         [shape=ellipse]
1248
TAGERRORe       [shape=hexagon, label="TAGERROR"]
1249
INTAGe          [shape=hexagon, label="INTAG"]
1250
ATTR            -> SKIPATTR     [label="*"]
1251
ATTR            -> ATTRGETVAL   [label="wanted attr"]
1252
SKIPATTR        -> SKIPATTR     [label="XMLname"]
1253
SKIPATTR        -> ATTRDELIM    [label="'='"]
1254
SKIPATTR        -> TAGERRORe    [label="*"]
1255
ATTRGETVAL      -> ATTRDELIM
1256
ATTRDELIM       -> ATTRVAL      [label="\""]
1257
ATTRDELIM       -> ATTRVAL      [label="\'"]
1258
ATTRDELIM       -> ATTRVAL      [label="*"]
1259
ATTRDELIM       -> TAGERRORe    [label="lws"]
1260
ATTRVAL         -> TAGERRORe    [label="'>'"]
1261
ATTRVAL         -> INTAGe       [label="delim"]
1262
ATTRVAL         -> ATTRVAL      [label="*"]
1263
1264
}
1265
1266
#endif