varnish-cache/bin/varnishd/cache/cache_esi_parse.c
1
/*-
2
 * Copyright (c) 2011 Varnish Software AS
3
 * All rights reserved.
4
 *
5
 * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
6
 *
7
 * Redistribution and use in source and binary forms, with or without
8
 * modification, are permitted provided that the following conditions
9
 * are met:
10
 * 1. Redistributions of source code must retain the above copyright
11
 *    notice, this list of conditions and the following disclaimer.
12
 * 2. Redistributions in binary form must reproduce the above copyright
13
 *    notice, this list of conditions and the following disclaimer in the
14
 *    documentation and/or other materials provided with the distribution.
15
 *
16
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
20
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26
 * SUCH DAMAGE.
27
 *
28
 * VEP Varnish Esi Parsing
29
 */
30
31
#include "config.h"
32
33
#include "cache_varnishd.h"
34
#include "cache_filter.h"
35
36
#include "cache_vgz.h"
37
#include "cache_esi.h"
38
#include "vct.h"
39
#include "vend.h"
40
#include "vgz.h"
41
42
//#define Debug(fmt, ...) printf(fmt, __VA_ARGS__)
43
#define Debug(fmt, ...) /**/
44
45
struct vep_state;
46
47
enum dowhat {DO_ATTR, DO_TAG};
48
typedef void dostuff_f(struct vep_state *, enum dowhat);
49
50
struct vep_match {
51
        const char      *match;
52
        const char      * const *state;
53
};
54
55
enum vep_mark { VERBATIM = 0, SKIP };
56
57
struct vep_state {
58
        unsigned                magic;
59
#define VEP_MAGIC               0x55cb9b82
60
        struct vsb              *vsb;
61
62
        const char              *url;
63
        struct vfp_ctx          *vc;
64
        int                     dogzip;
65
        vep_callback_t          *cb;
66
        void                    *cb_priv;
67
68
        /* Internal Counter for default call-back function */
69
        ssize_t                 cb_x;
70
71
        /* parser state */
72
        const char              *state;
73
        unsigned                startup;
74
        unsigned                esi_found;
75
76
        unsigned                endtag;
77
        unsigned                emptytag;
78
        unsigned                canattr;
79
80
        unsigned                remove;
81
82
        ssize_t                 o_wait;
83
        ssize_t                 o_pending;
84
        ssize_t                 o_total;
85
        uint32_t                crc;
86
        ssize_t                 o_crc;
87
        uint32_t                crcp;
88
        ssize_t                 o_last;
89
90
        const char              *hack_p;
91
        const char              *ver_p;
92
93
        const char              *until;
94
        const char              *until_p;
95
        const char              *until_s;
96
97
        int                     in_esi_tag;
98
99
        const char              *esicmt;
100
        const char              *esicmt_p;
101
102
        struct vep_match        *attr;
103
        struct vsb              *attr_vsb;
104
        int                     attr_delim;
105
106
        struct vep_match        *match;
107
        struct vep_match        *match_hit;
108
109
        char                    tag[8];
110
        int                     tag_i;
111
112
        dostuff_f               *dostuff;
113
114
        struct vsb              *include_src;
115
116
        unsigned                nm_skip;
117
        unsigned                nm_verbatim;
118
        unsigned                nm_pending;
119
        enum vep_mark           last_mark;
120
};
121
122
/*---------------------------------------------------------------------*/
123
124
static const char * const VEP_START =           "[Start]";
125
static const char * const VEP_BOM =             "[BOM]";
126
static const char * const VEP_TESTXML =         "[TestXml]";
127
static const char * const VEP_NOTXML =          "[NotXml]";
128
129
static const char * const VEP_NEXTTAG =         "[NxtTag]";
130
static const char * const VEP_NOTMYTAG =        "[NotMyTag]";
131
132
static const char * const VEP_STARTTAG =        "[StartTag]";
133
static const char * const VEP_COMMENTESI =      "[CommentESI]";
134
static const char * const VEP_COMMENT =         "[Comment]";
135
static const char * const VEP_CDATA =           "[CDATA]";
136
static const char * const VEP_ESITAG =          "[ESITag]";
137
static const char * const VEP_ESIENDTAG =       "[/ESITag]";
138
139
static const char * const VEP_ESIREMOVE =       "[ESI:Remove]";
140
static const char * const VEP_ESIINCLUDE =      "[ESI:Include]";
141
static const char * const VEP_ESICOMMENT =      "[ESI:Comment]";
142
static const char * const VEP_ESIBOGON =        "[ESI:Bogon]";
143
144
static const char * const VEP_INTAG =           "[InTag]";
145
static const char * const VEP_TAGERROR =        "[TagError]";
146
147
static const char * const VEP_ATTR =            "[Attribute]";
148
static const char * const VEP_SKIPATTR =        "[SkipAttribute]";
149
static const char * const VEP_ATTRDELIM =       "[AttrDelim]";
150
static const char * const VEP_ATTRGETVAL =      "[AttrGetValue]";
151
static const char * const VEP_ATTRVAL =         "[AttrValue]";
152
153
static const char * const VEP_UNTIL =           "[Until]";
154
static const char * const VEP_MATCHBUF =        "[MatchBuf]";
155
static const char * const VEP_MATCH =           "[Match]";
156
157
/*---------------------------------------------------------------------*/
158
159
static struct vep_match vep_match_starttag[] = {
160
        { "!--esi",     &VEP_COMMENTESI },
161
        { "!---->",     &VEP_NEXTTAG },
162
        { "!--",        &VEP_COMMENT },
163
        { "/esi:",      &VEP_ESIENDTAG },
164
        { "esi:",       &VEP_ESITAG },
165
        { "![CDATA[",   &VEP_CDATA },
166
        { NULL,         &VEP_NOTMYTAG }
167
};
168
169
/*---------------------------------------------------------------------*/
170
171
static struct vep_match vep_match_esi[] = {
172
        { "include",    &VEP_ESIINCLUDE },
173
        { "remove",     &VEP_ESIREMOVE },
174
        { "comment",    &VEP_ESICOMMENT },
175
        { NULL,         &VEP_ESIBOGON }
176
};
177
178
/*---------------------------------------------------------------------*/
179
180
static struct vep_match vep_match_attr_include[] = {
181
        { "src=",       &VEP_ATTRGETVAL },
182
        { NULL,         &VEP_SKIPATTR }
183
};
184
185
/*---------------------------------------------------------------------*/
186
187
static struct vep_match vep_match_bom[] = {
188
        { "\xeb\xbb\xbf",       &VEP_START },
189
        { NULL,                 &VEP_BOM }
190
};
191
192
/*--------------------------------------------------------------------
193
 * Report a parsing error
194
 */
195
196
static void
197 72
vep_error(const struct vep_state *vep, const char *p)
198
{
199 72
        VSC_C_main->esi_errors++;
200 72
        VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror, "ERR after %zd %s",
201
             vep->o_last, p);
202 72
}
203
204
/*--------------------------------------------------------------------
205
 * Report a parsing warning
206
 */
207
208
static void
209 10
vep_warn(const struct vep_state *vep, const char *p)
210
{
211 10
        VSC_C_main->esi_warnings++;
212 10
        VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror, "WARN after %zd %s",
213
             vep->o_last, p);
214 10
}
215
216
/*---------------------------------------------------------------------
217
 * return match or NULL if more input needed.
218
 */
219
220
static struct vep_match *
221 24320
vep_match(const struct vep_state *vep, const char *b, const char *e)
222
{
223
        struct vep_match *vm;
224
        const char *q, *r;
225
226 158911
        for (vm = vep->match; vm->match != NULL; vm++) {
227 137157
                assert(strlen(vm->match) <= sizeof (vep->tag));
228 137157
                r = b;
229 149340
                for (q = vm->match; *q != '\0' && r < e; q++, r++)
230 146774
                        if (*q != *r)
231 134591
                                break;
232 137157
                if (*q == '\0')
233 2068
                        break;
234 135089
                if (r == e)
235 498
                        return (NULL);
236
        }
237 23822
        return (vm);
238
}
239
240
/*---------------------------------------------------------------------
241
 *
242
 */
243
244
static void
245 1570
vep_emit_len(const struct vep_state *vep, ssize_t l, int m8, int m16, int m64)
246
{
247
        uint8_t buf[9];
248
249 1570
        assert(l > 0);
250 1570
        if (l < 256) {
251 1544
                buf[0] = (uint8_t)m8;
252 1544
                buf[1] = (uint8_t)l;
253 1544
                assert((ssize_t)buf[1] == l);
254 1544
                VSB_bcat(vep->vsb, buf, 2);
255 26
        } else if (l < 65536) {
256 14
                buf[0] = (uint8_t)m16;
257 14
                vbe16enc(buf + 1, (uint16_t)l);
258 14
                assert((ssize_t)vbe16dec(buf + 1) == l);
259 14
                VSB_bcat(vep->vsb, buf, 3);
260
        } else {
261 12
                buf[0] = (uint8_t)m64;
262 12
                vbe64enc(buf + 1, l);
263 12
                assert((ssize_t)vbe64dec(buf + 1) == l);
264 12
                VSB_bcat(vep->vsb, buf, 9);
265
        }
266 1570
}
267
268
static void
269 746
vep_emit_skip(const struct vep_state *vep, ssize_t l)
270
{
271
272 746
        vep_emit_len(vep, l, VEC_S1, VEC_S2, VEC_S8);
273 746
}
274
275
static void
276 702
vep_emit_verbatim(const struct vep_state *vep, ssize_t l, ssize_t l_crc)
277
{
278
        uint8_t buf[4];
279
280 702
        vep_emit_len(vep, l, VEC_V1, VEC_V2, VEC_V8);
281 702
        if (vep->dogzip) {
282 122
                vep_emit_len(vep, l_crc, VEC_C1, VEC_C2, VEC_C8);
283 122
                vbe32enc(buf, vep->crc);
284 122
                VSB_bcat(vep->vsb, buf, sizeof buf);
285
        }
286 702
}
287
288
static void
289 1686
vep_emit_common(struct vep_state *vep, ssize_t l, enum vep_mark mark)
290
{
291
292 1686
        assert(l >= 0);
293 1686
        if (l == 0)
294 238
                return;
295 1448
        assert(mark == SKIP || mark == VERBATIM);
296 1448
        if (mark == SKIP)
297 746
                vep_emit_skip(vep, l);
298
        else
299 702
                vep_emit_verbatim(vep, l, vep->o_crc);
300
301 1448
        vep->crc = crc32(0L, Z_NULL, 0);
302 1448
        vep->o_crc = 0;
303 1448
        vep->o_total += l;
304
}
305
306
/*---------------------------------------------------------------------
307
 *
308
 */
309
310
static void
311 131739
vep_mark_common(struct vep_state *vep, const char *p, enum vep_mark mark)
312
{
313
        ssize_t l, lcb;
314
315 131739
        assert(mark == SKIP || mark == VERBATIM);
316
317
        /* The NO-OP case, no data, no pending data & no change of mode */
318 131739
        if (vep->last_mark == mark && p == vep->ver_p && vep->o_pending == 0)
319 69
                return;
320
321
        /*
322
         * If we changed mode, emit whatever the opposite mode
323
         * assembled before the pending bytes.
324
         */
325
326 131670
        if (vep->last_mark != mark && (vep->o_wait > 0 || vep->startup)) {
327 1374
                lcb = vep->cb(vep->vc, vep->cb_priv, 0,
328
                    mark == VERBATIM ? VGZ_RESET : VGZ_ALIGN);
329 1374
                vep_emit_common(vep, lcb - vep->o_last, vep->last_mark);
330 1374
                vep->o_last = lcb;
331 1374
                vep->o_wait = 0;
332
        }
333
334
        /* Transfer pending bytes CRC into active mode CRC */
335 131670
        if (vep->o_pending) {
336 133
                (void)vep->cb(vep->vc, vep->cb_priv, vep->o_pending,
337
                     VGZ_NORMAL);
338 133
                if (vep->o_crc == 0) {
339 105
                        vep->crc = vep->crcp;
340 105
                        vep->o_crc = vep->o_pending;
341
                } else {
342 56
                        vep->crc = crc32_combine(vep->crc,
343 28
                            vep->crcp, vep->o_pending);
344 28
                        vep->o_crc += vep->o_pending;
345
                }
346 133
                vep->crcp = crc32(0L, Z_NULL, 0);
347 133
                vep->o_wait += vep->o_pending;
348 133
                vep->o_pending = 0;
349
        }
350
351
        /* * Process this bit of input */
352 131670
        AN(vep->ver_p);
353 131670
        l = p - vep->ver_p;
354 131670
        assert(l >= 0);
355 131670
        vep->crc = crc32(vep->crc, (const void*)vep->ver_p, l);
356 131670
        vep->o_crc += l;
357 131670
        vep->ver_p = p;
358
359 131670
        vep->o_wait += l;
360 131670
        vep->last_mark = mark;
361 131670
        (void)vep->cb(vep->vc, vep->cb_priv, l, VGZ_NORMAL);
362
}
363
364
static void
365 76039
vep_mark_verbatim(struct vep_state *vep, const char *p)
366
{
367
368 76039
        vep_mark_common(vep, p, VERBATIM);
369 76039
        vep->nm_verbatim++;
370 76039
}
371
372
static void
373 55388
vep_mark_skip(struct vep_state *vep, const char *p)
374
{
375
376 55388
        vep_mark_common(vep, p, SKIP);
377 55388
        vep->nm_skip++;
378 55388
}
379
380
static void
381 250
vep_mark_pending(struct vep_state *vep, const char *p)
382
{
383
        ssize_t l;
384
385 250
        AN(vep->ver_p);
386 250
        l = p - vep->ver_p;
387 250
        assert(l > 0);
388 250
        vep->crcp = crc32(vep->crcp, (const void *)vep->ver_p, l);
389 250
        vep->ver_p = p;
390
391 250
        vep->o_pending += l;
392 250
        vep->nm_pending++;
393 250
}
394
395
/*---------------------------------------------------------------------
396
 */
397
398
static void v_matchproto_()
399 12
vep_do_comment(struct vep_state *vep, enum dowhat what)
400
{
401
        Debug("DO_COMMENT(%d)\n", what);
402 12
        assert(what == DO_TAG);
403 12
        if (!vep->emptytag)
404 4
                vep_error(vep, "ESI 1.0 <esi:comment> needs final '/'");
405 12
}
406
407
/*---------------------------------------------------------------------
408
 */
409
410
static void v_matchproto_()
411 284
vep_do_remove(struct vep_state *vep, enum dowhat what)
412
{
413
        Debug("DO_REMOVE(%d, end %d empty %d remove %d)\n",
414
            what, vep->endtag, vep->emptytag, vep->remove);
415 284
        assert(what == DO_TAG);
416 284
        if (vep->emptytag)
417 2
                vep_error(vep, "ESI 1.0 <esi:remove/> not legal");
418 282
        else if (vep->remove && !vep->endtag)
419 2
                vep_error(vep, "ESI 1.0 <esi:remove> already open");
420 280
        else if (!vep->remove && vep->endtag)
421 2
                vep_error(vep, "ESI 1.0 <esi:remove> not open");
422
        else
423 278
                vep->remove = !vep->endtag;
424 284
}
425
426
/*---------------------------------------------------------------------
427
 */
428
429
static void v_matchproto_()
430 898
vep_do_include(struct vep_state *vep, enum dowhat what)
431
{
432
        const char *p, *q, *h;
433
        ssize_t l;
434
435
        Debug("DO_INCLUDE(%d)\n", what);
436 898
        if (what == DO_ATTR) {
437
                Debug("ATTR (%s) (%s)\n", vep->match_hit->match,
438
                        VSB_data(vep->attr_vsb));
439 450
                if (vep->include_src != NULL) {
440 2
                        vep_error(vep,
441
                            "ESI 1.0 <esi:include> "
442
                            "has multiple src= attributes");
443 2
                        vep->state = VEP_TAGERROR;
444 2
                        VSB_destroy(&vep->attr_vsb);
445 2
                        VSB_destroy(&vep->include_src);
446 2
                        return;
447
                }
448 18044
                for (p = VSB_data(vep->attr_vsb); *p != '\0'; p++)
449 17598
                        if (vct_islws(*p))
450 2
                                break;
451 448
                if (*p != '\0') {
452 2
                        vep_error(vep,
453
                            "ESI 1.0 <esi:include> "
454
                            "has whitespace in src= attribute");
455 2
                        vep->state = VEP_TAGERROR;
456 2
                        VSB_destroy(&vep->attr_vsb);
457 2
                        if (vep->include_src != NULL)
458 0
                                VSB_destroy(&vep->include_src);
459 2
                        return;
460
                }
461 446
                vep->include_src = vep->attr_vsb;
462 446
                vep->attr_vsb = NULL;
463 446
                return;
464
        }
465 448
        assert(what == DO_TAG);
466 448
        if (!vep->emptytag)
467 4
                vep_warn(vep, "ESI 1.0 <esi:include> lacks final '/'");
468 448
        if (vep->include_src == NULL) {
469 4
                vep_error(vep, "ESI 1.0 <esi:include> lacks src attr");
470 4
                return;
471
        }
472
473
        /*
474
         * Strictly speaking, we ought to spit out any piled up skip before
475
         * emitting the VEC for the include, but objectively that makes no
476
         * difference and robs us of a chance to collapse another skip into
477
         * this on so we don't do that.
478
         * However, we cannot tolerate any verbatim stuff piling up.
479
         * The mark_skip() before calling dostuff should have taken
480
         * care of that.  Make sure.
481
         */
482 444
        assert(vep->o_wait == 0 || vep->last_mark == SKIP);
483
        /* XXX: what if it contains NUL bytes ?? */
484 444
        p = VSB_data(vep->include_src);
485 444
        l = VSB_len(vep->include_src);
486 444
        h = 0;
487
488 444
        if (l > 7 && !memcmp(p, "http://", 7)) {
489 4
                h = p + 7;
490 4
                p = strchr(h, '/');
491 4
                if (p == NULL) {
492 2
                        vep_error(vep,
493
                            "ESI 1.0 <esi:include> invalid src= URL");
494 2
                        vep->state = VEP_TAGERROR;
495 2
                        AZ(vep->attr_vsb);
496 2
                        VSB_destroy(&vep->include_src);
497 2
                        return;
498
                }
499
                Debug("HOST <%.*s> PATH <%s>\n", (int)(p-h),h, p);
500 2
                VSB_printf(vep->vsb, "%c", VEC_INCL);
501 2
                VSB_printf(vep->vsb, "Host: %.*s%c", (int)(p-h), h, 0);
502 440
        } else if (l > 8 && !memcmp(p, "https://", 8)) {
503 6
                if (!FEATURE(FEATURE_ESI_IGNORE_HTTPS)) {
504 2
                        vep_warn(vep,
505
                            "ESI 1.0 <esi:include> with https:// ignored");
506 2
                        vep->state = VEP_TAGERROR;
507 2
                        AZ(vep->attr_vsb);
508 2
                        VSB_destroy(&vep->include_src);
509 2
                        return;
510
                }
511 4
                vep_warn(vep,
512
                    "ESI 1.0 <esi:include> https:// treated as http://");
513 4
                h = p + 8;
514 4
                p = strchr(h, '/');
515 4
                if (p == NULL) {
516 2
                        vep_error(vep,
517
                            "ESI 1.0 <esi:include> invalid src= URL");
518 2
                        vep->state = VEP_TAGERROR;
519 2
                        AZ(vep->attr_vsb);
520 2
                        VSB_destroy(&vep->include_src);
521 2
                        return;
522
                }
523 2
                VSB_printf(vep->vsb, "%c", VEC_INCL);
524 2
                VSB_printf(vep->vsb, "Host: %.*s%c", (int)(p-h), h, 0);
525 434
        } else if (*p == '/') {
526 378
                VSB_printf(vep->vsb, "%c", VEC_INCL);
527 378
                VSB_printf(vep->vsb, "%c", 0);
528
        } else {
529 56
                VSB_printf(vep->vsb, "%c", VEC_INCL);
530 56
                VSB_printf(vep->vsb, "%c", 0);
531
                /* Look for the last / before a '?' */
532 56
                h = NULL;
533 144
                for (q = vep->url; *q && *q != '?'; q++)
534 88
                        if (*q == '/')
535 58
                                h = q;
536 56
                if (h == NULL)
537 2
                        h = q + 1;
538
539
                Debug("INCL:: [%.*s]/[%s]\n",
540
                    (int)(h - vep->url), vep->url, p);
541 56
                VSB_printf(vep->vsb, "%.*s/", (int)(h - vep->url), vep->url);
542
        }
543 438
        l -= (p - VSB_data(vep->include_src));
544 18246
        for (q = p; *q != '\0'; ) {
545 17370
                if (*q == '&') {
546
#define R(w,f,r)                                                        \
547
                        if (q + w <= p + l && !memcmp(q, f, w)) { \
548
                                VSB_printf(vep->vsb, "%c", r);  \
549
                                q += w;                         \
550
                                continue;                       \
551
                        }
552 20
                        R(6, "&apos;", '\'');
553 16
                        R(6, "&quot;", '"');
554 12
                        R(4, "&lt;", '<');
555 8
                        R(4, "&gt;", '>');
556 4
                        R(5, "&amp;", '&');
557
                }
558 17350
                VSB_printf(vep->vsb, "%c", *q++);
559
        }
560
#undef R
561 438
        VSB_printf(vep->vsb, "%c", 0);
562 438
        VSB_destroy(&vep->include_src);
563
}
564
565
/*---------------------------------------------------------------------
566
 * Lex/Parse object for ESI instructions
567
 *
568
 * This function is called with the input object piecemal so do not
569
 * assume that we have more than one char available at at time, but
570
 * optimize for getting huge chunks.
571
 *
572
 * NB: At the bottom of this source-file, there is a dot-diagram matching
573
 * NB: the state-machine.  Please maintain it along with the code.
574
 */
575
576
void
577 107427
VEP_Parse(struct vep_state *vep, const char *p, size_t l)
578
{
579
        const char *e;
580
        struct vep_match *vm;
581
        int i;
582
583 107427
        CHECK_OBJ_NOTNULL(vep, VEP_MAGIC);
584 107427
        assert(l > 0);
585
586 107427
        if (vep->startup) {
587
                /*
588
                 * We must force the GZIP header out as a SKIP string,
589
                 * otherwise an object starting with <esi:include would
590
                 * have its GZIP header appear after the included object
591
                 * (e000026.vtc)
592
                 */
593 312
                vep->ver_p = "";
594 312
                vep->last_mark = SKIP;
595 312
                vep_mark_common(vep, vep->ver_p, VERBATIM);
596 312
                vep->startup = 0;
597 312
                AZ(vep->hack_p);
598 312
                vep->hack_p = p;
599
        }
600
601 107427
        vep->ver_p = p;
602
603 107427
        e = p + l;
604
605 418112
        while (p < e) {
606 203258
                AN(vep->state);
607
                Debug("EP %s %d (%.*s) [%.*s]\n",
608
                    vep->state,
609
                    vep->remove,
610
                    vep->tag_i, vep->tag,
611
                    (e - p) > 10 ? 10 : (int)(e-p), p);
612 203258
                assert(p >= vep->ver_p);
613
614
                /******************************************************
615
                 * SECTION A
616
                 */
617
618 203258
                if (vep->state == VEP_START) {
619 330
                        if (FEATURE(FEATURE_ESI_REMOVE_BOM) &&
620 12
                            *p == (char)0xeb) {
621 8
                                vep->match = vep_match_bom;
622 8
                                vep->state = VEP_MATCH;
623
                        } else
624 310
                                vep->state = VEP_BOM;
625 202940
                } else if (vep->state == VEP_BOM) {
626 312
                        vep_mark_skip(vep, p);
627 312
                        if (FEATURE(FEATURE_ESI_DISABLE_XML_CHECK))
628 88
                                vep->state = VEP_NEXTTAG;
629
                        else
630 224
                                vep->state = VEP_TESTXML;
631 202628
                } else if (vep->state == VEP_TESTXML) {
632
                        /*
633
                         * If the first non-whitespace char is different
634
                         * from '<' we assume this is not XML.
635
                         */
636 780
                        while (p < e && vct_islws(*p))
637 304
                                p++;
638 238
                        vep_mark_verbatim(vep, p);
639 238
                        if (p < e && *p == '<') {
640 208
                                p++;
641 208
                                vep->state = VEP_STARTTAG;
642 30
                        } else if (p < e && *p == (char)0xeb) {
643 4
                                VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror,
644
                                    "No ESI processing, "
645
                                    "first char not '<' but BOM."
646
                                    " (See feature esi_remove_bom)"
647
                                );
648 4
                                vep->state = VEP_NOTXML;
649 26
                        } else if (p < e) {
650 10
                                VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror,
651
                                    "No ESI processing, "
652
                                    "first char not '<'."
653
                                    " (See feature esi_disable_xml_check)"
654
                                );
655 10
                                vep->state = VEP_NOTXML;
656
                        }
657 202390
                } else if (vep->state == VEP_NOTXML) {
658
                        /*
659
                         * This is not recognized as XML, just skip thru
660
                         * vfp_esi_end() will handle the rest
661
                         */
662 14
                        p = e;
663 14
                        vep_mark_verbatim(vep, p);
664
665
                /******************************************************
666
                 * SECTION B
667
                 */
668
669 202376
                } else if (vep->state == VEP_NOTMYTAG) {
670 21818
                        if (FEATURE(FEATURE_ESI_IGNORE_OTHER_ELEMENTS)) {
671 4
                                p++;
672 4
                                vep->state = VEP_NEXTTAG;
673
                        } else {
674 21814
                                vep->tag_i = 0;
675 114786
                                while (p < e) {
676 92888
                                        if (*p++ == '>') {
677 21730
                                                vep->state = VEP_NEXTTAG;
678 21730
                                                break;
679
                                        }
680
                                }
681
                        }
682 21818
                        if (p == e && !vep->remove)
683 151
                                vep_mark_verbatim(vep, p);
684 180558
                } else if (vep->state == VEP_NEXTTAG) {
685
                        /*
686
                         * Hunt for start of next tag and keep an eye
687
                         * out for end of EsiCmt if armed.
688
                         */
689 128539
                        vep->emptytag = 0;
690 128539
                        vep->attr = NULL;
691 128539
                        vep->dostuff = NULL;
692 4572704
                        while (p < e && *p != '<') {
693 4315626
                                if (vep->esicmt_p == NULL) {
694 4315414
                                        p++;
695 4315414
                                        continue;
696
                                }
697 212
                                if (*p != *vep->esicmt_p) {
698 124
                                        p++;
699 124
                                        vep->esicmt_p = vep->esicmt;
700 124
                                        continue;
701
                                }
702 88
                                if (!vep->remove && vep->esicmt_p == vep->esicmt)
703 20
                                        vep_mark_verbatim(vep, p);
704 88
                                p++;
705 88
                                if (*++vep->esicmt_p == '\0') {
706 28
                                        vep->esi_found = 1;
707 28
                                        vep->esicmt = NULL;
708 28
                                        vep->esicmt_p = NULL;
709
                                        /*
710
                                         * The end of the esicmt
711
                                         * should not be emitted.
712
                                         * But the stuff before should
713
                                         */
714 28
                                        vep_mark_skip(vep, p);
715
                                }
716
                        }
717 128539
                        if (p < e) {
718 22360
                                if (!vep->remove)
719 22194
                                        vep_mark_verbatim(vep, p);
720 22360
                                assert(*p == '<');
721 22360
                                p++;
722 22360
                                vep->state = VEP_STARTTAG;
723 106179
                        } else if (vep->esicmt_p == vep->esicmt && !vep->remove)
724 53385
                                vep_mark_verbatim(vep, p);
725
726
                /******************************************************
727
                 * SECTION C
728
                 */
729
730 52019
                } else if (vep->state == VEP_STARTTAG) {
731
                        /* Start of tag, set up match table */
732 22568
                        vep->endtag = 0;
733 22568
                        vep->match = vep_match_starttag;
734 22568
                        vep->state = VEP_MATCH;
735 29451
                } else if (vep->state == VEP_COMMENT) {
736 10
                        vep->esicmt_p = vep->esicmt = NULL;
737 10
                        vep->until_p = vep->until = "-->";
738 10
                        vep->until_s = VEP_NEXTTAG;
739 10
                        vep->state = VEP_UNTIL;
740 29441
                } else if (vep->state == VEP_COMMENTESI) {
741 30
                        if (vep->remove)
742 10
                                vep_error(vep,
743
                                    "ESI 1.0 Nested <!--esi"
744
                                    " element in <esi:remove>");
745 30
                        vep->esicmt_p = vep->esicmt = "-->";
746 30
                        vep->state = VEP_NEXTTAG;
747 30
                        vep_mark_skip(vep, p);
748 29411
                } else if (vep->state == VEP_CDATA) {
749
                        /*
750
                         * Easy: just look for the end of CDATA
751
                         */
752 8
                        vep->until_p = vep->until = "]]>";
753 8
                        vep->until_s = VEP_NEXTTAG;
754 8
                        vep->state = VEP_UNTIL;
755 29403
                } else if (vep->state == VEP_ESIENDTAG) {
756 146
                        vep->endtag = 1;
757 146
                        vep->state = VEP_ESITAG;
758 29257
                } else if (vep->state == VEP_ESITAG) {
759 780
                        vep->in_esi_tag = 1;
760 780
                        vep->esi_found = 1;
761 780
                        vep_mark_skip(vep, p);
762 780
                        vep->match = vep_match_esi;
763 780
                        vep->state = VEP_MATCH;
764 28477
                } else if (vep->state == VEP_ESIINCLUDE) {
765 472
                        if (vep->remove) {
766 8
                                vep_error(vep,
767
                                    "ESI 1.0 <esi:include> element"
768
                                    " nested in <esi:remove>");
769 8
                                vep->state = VEP_TAGERROR;
770 464
                        } else if (vep->endtag) {
771 2
                                vep_error(vep,
772
                                    "ESI 1.0 </esi:include> illegal end-tag");
773 2
                                vep->state = VEP_TAGERROR;
774
                        } else {
775 462
                                vep->dostuff = vep_do_include;
776 462
                                vep->state = VEP_INTAG;
777 462
                                vep->attr = vep_match_attr_include;
778
                        }
779 28005
                } else if (vep->state == VEP_ESIREMOVE) {
780 284
                        vep->dostuff = vep_do_remove;
781 284
                        vep->state = VEP_INTAG;
782 27721
                } else if (vep->state == VEP_ESICOMMENT) {
783 20
                        if (vep->remove) {
784 2
                                vep_error(vep,
785
                                    "ESI 1.0 <esi:comment> element"
786
                                    " nested in <esi:remove>");
787 2
                                vep->state = VEP_TAGERROR;
788 18
                        } else if (vep->endtag) {
789 4
                                vep_error(vep,
790
                                    "ESI 1.0 </esi:comment> illegal end-tag");
791 4
                                vep->state = VEP_TAGERROR;
792
                        } else {
793 14
                                vep->dostuff = vep_do_comment;
794 14
                                vep->state = VEP_INTAG;
795
                        }
796 27701
                } else if (vep->state == VEP_ESIBOGON) {
797 4
                        vep_error(vep,
798
                            "ESI 1.0 <esi:bogus> element");
799 4
                        vep->state = VEP_TAGERROR;
800
801
                /******************************************************
802
                 * SECTION D
803
                 */
804
805 27697
                } else if (vep->state == VEP_INTAG) {
806 1273
                        vep->tag_i = 0;
807 3236
                        while (p < e && vct_islws(*p) && !vep->emptytag) {
808 690
                                p++;
809 690
                                vep->canattr = 1;
810
                        }
811 1273
                        if (p < e && *p == '/' && !vep->emptytag) {
812 456
                                p++;
813 456
                                vep->emptytag = 1;
814 456
                                vep->canattr = 0;
815
                        }
816 1273
                        if (p < e && *p == '>') {
817 744
                                p++;
818 744
                                AN(vep->dostuff);
819 744
                                vep_mark_skip(vep, p);
820 744
                                vep->dostuff(vep, DO_TAG);
821 744
                                vep->in_esi_tag = 0;
822 744
                                vep->state = VEP_NEXTTAG;
823 529
                        } else if (p < e && vep->emptytag) {
824 2
                                vep_error(vep,
825
                                    "XML 1.0 '>' does not follow '/' in tag");
826 2
                                vep->state = VEP_TAGERROR;
827 1001
                        } else if (p < e && vep->canattr &&
828 474
                            vct_isxmlnamestart(*p)) {
829 472
                                vep->state = VEP_ATTR;
830 55
                        } else if (p < e) {
831 2
                                vep_error(vep,
832
                                    "XML 1.0 Illegal attribute start char");
833 2
                                vep->state = VEP_TAGERROR;
834
                        }
835 26424
                } else if (vep->state == VEP_TAGERROR) {
836 475
                        while (p < e && *p != '>')
837 261
                                p++;
838 107
                        if (p < e) {
839 36
                                p++;
840 36
                                vep_mark_skip(vep, p);
841 36
                                vep->in_esi_tag = 0;
842 36
                                vep->state = VEP_NEXTTAG;
843 36
                                if (vep->attr_vsb)
844 2
                                        VSB_destroy(&vep->attr_vsb);
845
                        }
846
847
                /******************************************************
848
                 * SECTION E
849
                 */
850
851 26317
                } else if (vep->state == VEP_ATTR) {
852 472
                        AZ(vep->attr_delim);
853 472
                        if (vep->attr == NULL) {
854 6
                                p++;
855 6
                                AZ(vep->attr_vsb);
856 6
                                vep->state = VEP_SKIPATTR;
857
                        } else {
858 466
                                vep->match = vep->attr;
859 466
                                vep->state = VEP_MATCH;
860
                        }
861 25845
                } else if (vep->state == VEP_SKIPATTR) {
862 87
                        while (p < e && vct_isxmlname(*p))
863 33
                                p++;
864 27
                        if (p < e && *p == '=') {
865 6
                                p++;
866 6
                                vep->state = VEP_ATTRDELIM;
867 21
                        } else if (p < e && *p == '>') {
868 2
                                vep->state = VEP_INTAG;
869 19
                        } else if (p < e && *p == '/') {
870 2
                                vep->state = VEP_INTAG;
871 17
                        } else if (p < e && vct_issp(*p)) {
872 4
                                vep->state = VEP_INTAG;
873 13
                        } else if (p < e) {
874 2
                                vep_error(vep,
875
                                    "XML 1.0 Illegal attr char");
876 2
                                vep->state = VEP_TAGERROR;
877
                        }
878 25818
                } else if (vep->state == VEP_ATTRGETVAL) {
879 456
                        AZ(vep->attr_vsb);
880 456
                        vep->attr_vsb = VSB_new_auto();
881 456
                        vep->state = VEP_ATTRDELIM;
882 25362
                } else if (vep->state == VEP_ATTRDELIM) {
883 462
                        AZ(vep->attr_delim);
884 462
                        if (*p == '"' || *p == '\'') {
885 454
                                vep->attr_delim = *p++;
886 454
                                vep->state = VEP_ATTRVAL;
887 8
                        } else if (!vct_issp(*p)) {
888 6
                                vep->attr_delim = ' ';
889 6
                                vep->state = VEP_ATTRVAL;
890
                        } else {
891 2
                                vep_error(vep,
892
                                    "XML 1.0 Illegal attribute delimiter");
893 2
                                vep->state = VEP_TAGERROR;
894
                        }
895
896 24900
                } else if (vep->state == VEP_ATTRVAL) {
897 36344
                        while (p < e && *p != '>' && *p != vep->attr_delim &&
898 17658
                           (vep->attr_delim != ' ' || !vct_issp(*p))) {
899 17638
                                if (vep->attr_vsb != NULL)
900 17620
                                        VSB_putc(vep->attr_vsb, *p);
901 17638
                                p++;
902
                        }
903 534
                        if (p < e && *p == '>') {
904 4
                                vep_error(vep,
905
                                    "XML 1.0 Missing end attribute delimiter");
906 4
                                vep->state = VEP_TAGERROR;
907 4
                                vep->attr_delim = 0;
908 8
                                if (vep->attr_vsb != NULL) {
909 4
                                        AZ(VSB_finish(vep->attr_vsb));
910 4
                                        VSB_destroy(&vep->attr_vsb);
911
                                }
912 530
                        } else if (p < e) {
913 456
                                vep->attr_delim = 0;
914 456
                                p++;
915 456
                                vep->state = VEP_INTAG;
916 456
                                if (vep->attr_vsb != NULL) {
917 450
                                        AZ(VSB_finish(vep->attr_vsb));
918 450
                                        AN(vep->dostuff);
919 450
                                        vep->dostuff(vep, DO_ATTR);
920 450
                                        vep->attr_vsb = NULL;
921
                                }
922
                        }
923
924
                /******************************************************
925
                 * Utility Section
926
                 */
927
928 24366
                } else if (vep->state == VEP_MATCH) {
929
                        /*
930
                         * Match against a table
931
                         */
932 23822
                        vm = vep_match(vep, p, e);
933 23822
                        vep->match_hit = vm;
934 23822
                        if (vm != NULL) {
935 23537
                                if (vm->match != NULL)
936 1799
                                        p += strlen(vm->match);
937 23537
                                vep->state = *vm->state;
938 23537
                                vep->match = NULL;
939 23537
                                vep->tag_i = 0;
940
                        } else {
941 285
                                assert(e - p <= sizeof(vep->tag));
942 285
                                memcpy(vep->tag, p, e - p);
943 285
                                vep->tag_i = e - p;
944 285
                                vep->state = VEP_MATCHBUF;
945 285
                                p = e;
946
                        }
947 544
                } else if (vep->state == VEP_MATCHBUF) {
948
                        /*
949
                         * Match against a table while split over input
950
                         * sections.
951
                         */
952 498
                        AN(vep->match);
953 498
                        i = sizeof(vep->tag) - vep->tag_i;
954 498
                        if (i > e - p)
955 421
                                i = e - p;
956 498
                        memcpy(vep->tag + vep->tag_i, p, i);
957 498
                        vm = vep_match(vep, vep->tag,
958 498
                            vep->tag + vep->tag_i + i);
959
                        Debug("MB (%.*s) tag_i %d i %d = vm %p match %s\n",
960
                            vep->tag_i + i, vep->tag,
961
                            vep->tag_i,
962
                            i,
963
                            vm,
964
                            vm ? vm->match : "(nil)");
965
966 498
                        if (vm == NULL) {
967 213
                                vep->tag_i += i;
968 213
                                p += i;
969 213
                                assert(p == e);
970
                        } else {
971 285
                                vep->match_hit = vm;
972 285
                                vep->state = *vm->state;
973 285
                                if (vm->match != NULL) {
974 269
                                        i = strlen(vm->match);
975 269
                                        if (i > vep->tag_i)
976 266
                                                p += i - vep->tag_i;
977
                                }
978 285
                                vep->match = NULL;
979 285
                                vep->tag_i = 0;
980
                        }
981 46
                } else if (vep->state == VEP_UNTIL) {
982
                        /*
983
                         * Skip until we see magic string
984
                         */
985 416
                        while (p < e) {
986 338
                                if (*p++ != *vep->until_p++) {
987 292
                                        vep->until_p = vep->until;
988 46
                                } else if (*vep->until_p == '\0') {
989 14
                                        vep->state = vep->until_s;
990 14
                                        break;
991
                                }
992
                        }
993 46
                        if (p == e && !vep->remove)
994 37
                                vep_mark_verbatim(vep, p);
995
                } else {
996
                        Debug("*** Unknown state %s\n", vep->state);
997 0
                        WRONG("WRONG ESI PARSER STATE");
998
                }
999
        }
1000
        /*
1001
         * We must always mark up the storage we got, try to do so
1002
         * in the most efficient way, in particular with respect to
1003
         * minimizing and limiting use of pending.
1004
         */
1005 107427
        if (p == vep->ver_p)
1006
                ;
1007 53708
        else if (vep->in_esi_tag)
1008 604
                vep_mark_skip(vep, p);
1009 53104
        else if (vep->remove)
1010 52854
                vep_mark_skip(vep, p);
1011
        else
1012 250
                vep_mark_pending(vep, p);
1013 107427
}
1014
1015
/*---------------------------------------------------------------------
1016
 */
1017
1018
static ssize_t v_matchproto_(vep_callback_t)
1019 132118
vep_default_cb(struct vfp_ctx *vc, void *priv, ssize_t l, enum vgz_flag flg)
1020
{
1021
        ssize_t *s;
1022
1023 132118
        CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC);
1024 132118
        AN(priv);
1025 132118
        s = priv;
1026 132118
        *s += l;
1027
        (void)flg;
1028 132118
        return (*s);
1029
}
1030
1031
/*---------------------------------------------------------------------
1032
 */
1033
1034
struct vep_state *
1035 318
VEP_Init(struct vfp_ctx *vc, const struct http *req, vep_callback_t *cb,
1036
    void *cb_priv)
1037
{
1038
        struct vep_state *vep;
1039
1040 318
        CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC);
1041 318
        CHECK_OBJ_NOTNULL(req, HTTP_MAGIC);
1042 318
        vep = WS_Alloc(vc->resp->ws, sizeof *vep);
1043 318
        AN(vep);
1044
1045 318
        INIT_OBJ(vep, VEP_MAGIC);
1046 318
        vep->url = req->hd[HTTP_HDR_URL].b;
1047 318
        vep->vc = vc;
1048 318
        vep->vsb = VSB_new_auto();
1049 318
        AN(vep->vsb);
1050
1051 318
        if (cb != NULL) {
1052 78
                vep->dogzip = 1;
1053
                /* XXX */
1054 78
                VSB_printf(vep->vsb, "%c", VEC_GZ);
1055 78
                vep->cb = cb;
1056 78
                vep->cb_priv = cb_priv;
1057
        } else {
1058 240
                vep->cb = vep_default_cb;
1059 240
                vep->cb_priv = &vep->cb_x;
1060
        }
1061
1062 318
        vep->state = VEP_START;
1063 318
        vep->crc = crc32(0L, Z_NULL, 0);
1064 318
        vep->crcp = crc32(0L, Z_NULL, 0);
1065
1066 318
        vep->startup = 1;
1067 318
        return (vep);
1068
}
1069
1070
/*---------------------------------------------------------------------
1071
 */
1072
1073
struct vsb *
1074 318
VEP_Finish(struct vep_state *vep)
1075
{
1076
        ssize_t l, lcb;
1077
1078 318
        CHECK_OBJ_NOTNULL(vep, VEP_MAGIC);
1079
1080 318
        if (vep->include_src)
1081 0
                VSB_destroy(&vep->include_src);
1082 318
        if (vep->attr_vsb)
1083 0
                VSB_destroy(&vep->attr_vsb);
1084
1085 630
        if (vep->state != VEP_START &&
1086 624
            vep->state != VEP_BOM &&
1087 622
            vep->state != VEP_TESTXML &&
1088 606
            vep->state != VEP_NOTXML &&
1089 296
            vep->state != VEP_NEXTTAG) {
1090 8
                vep_error(vep, "VEP ended inside a tag");
1091
        }
1092
1093 318
        if (vep->o_pending)
1094 0
                vep_mark_common(vep, vep->ver_p, vep->last_mark);
1095 318
        if (vep->o_wait > 0) {
1096 312
                lcb = vep->cb(vep->vc, vep->cb_priv, 0, VGZ_ALIGN);
1097 312
                vep_emit_common(vep, lcb - vep->o_last, vep->last_mark);
1098
        }
1099
        // NB: We don't account for PAD+SUM+LEN in gzip'ed objects
1100 318
        (void)vep->cb(vep->vc, vep->cb_priv, 0, VGZ_FINISH);
1101
1102 318
        AZ(VSB_finish(vep->vsb));
1103 318
        l = VSB_len(vep->vsb);
1104 318
        if (vep->esi_found && l > 0)
1105 184
                return (vep->vsb);
1106 134
        VSB_destroy(&vep->vsb);
1107 134
        return (NULL);
1108
}
1109
1110
#if 0
1111
1112
digraph xml {
1113
        rankdir="LR"
1114
        size="7,10"
1115
#################################################################
1116
# SECTION A
1117
#
1118
1119
START           [shape=ellipse]
1120
TESTXML         [shape=ellipse]
1121
NOTXML          [shape=ellipse]
1122
NEXTTAGa        [shape=hexagon, label="NEXTTAG"]
1123
STARTTAGa       [shape=hexagon, label="STARTTAG"]
1124
START           -> TESTXML
1125
START           -> NEXTTAGa     [style=dotted, label="syntax:1"]
1126
TESTXML         -> TESTXML      [label="lws"]
1127
TESTXML         -> NOTXML
1128
TESTXML         -> STARTTAGa    [label="'<'"]
1129
1130
#################################################################
1131
# SECTION B
1132
1133
NOTMYTAG        [shape=ellipse]
1134
NEXTTAG         [shape=ellipse]
1135
NOTMYTAG        -> NEXTTAG      [style=dotted, label="syntax:2"]
1136
STARTTAGb       [shape=hexagon, label="STARTTAG"]
1137
NOTMYTAG        -> NEXTTAG      [label="'>'"]
1138
NOTMYTAG        -> NOTMYTAG     [label="*"]
1139
NEXTTAG         -> NEXTTAG      [label="'-->'"]
1140
NEXTTAG         -> NEXTTAG      [label="*"]
1141
NEXTTAG         -> STARTTAGb    [label="'<'"]
1142
1143
#################################################################
1144
# SECTION C
1145
1146
STARTTAG        [shape=ellipse]
1147
COMMENT         [shape=ellipse]
1148
CDATA           [shape=ellipse]
1149
ESITAG          [shape=ellipse]
1150
ESIETAG         [shape=ellipse]
1151
ESIINCLUDE      [shape=ellipse]
1152
ESIREMOVE       [shape=ellipse]
1153
ESICOMMENT      [shape=ellipse]
1154
ESIBOGON        [shape=ellipse]
1155
INTAGc          [shape=hexagon, label="INTAG"]
1156
NOTMYTAGc       [shape=hexagon, label="NOTMYTAG"]
1157
NEXTTAGc        [shape=hexagon, label="NEXTTAG"]
1158
TAGERRORc       [shape=hexagon, label="TAGERROR"]
1159
C1              [shape=circle,label=""]
1160
STARTTAG        -> COMMENT      [label="'<!--'"]
1161
STARTTAG        -> ESITAG       [label="'<esi'"]
1162
STARTTAG        -> CDATA        [label="'<![CDATA['"]
1163
STARTTAG        -> NOTMYTAGc    [label="'*'"]
1164
COMMENT         -> NEXTTAGc     [label="'esi'"]
1165
COMMENT         -> C1           [label="*"]
1166
C1              -> C1           [label="*"]
1167
C1              -> NEXTTAGc     [label="-->"]
1168
CDATA           -> CDATA        [label="*"]
1169
CDATA           -> NEXTTAGc     [label="]]>"]
1170
ESITAG          -> ESIINCLUDE   [label="'include'"]
1171
ESITAG          -> ESIREMOVE    [label="'remove'"]
1172
ESITAG          -> ESICOMMENT   [label="'comment'"]
1173
ESITAG          -> ESIBOGON     [label="*"]
1174
ESICOMMENT      -> INTAGc
1175
ESICOMMENT      -> TAGERRORc
1176
ESICOMMENT      -> TAGERRORc    [style=dotted, label="nested\nin\nremove"]
1177
ESIREMOVE       -> INTAGc
1178
ESIREMOVE       -> TAGERRORc
1179
ESIINCLUDE      -> INTAGc
1180
ESIINCLUDE      -> TAGERRORc
1181
ESIINCLUDE      -> TAGERRORc    [style=dotted, label="nested\nin\nremove"]
1182
ESIBOGON        -> TAGERRORc
1183
1184
#################################################################
1185
# SECTION D
1186
1187
INTAG           [shape=ellipse]
1188
TAGERROR        [shape=ellipse]
1189
NEXTTAGd        [shape=hexagon, label="NEXTTAG"]
1190
ATTRd           [shape=hexagon, label="ATTR"]
1191
D1              [shape=circle, label=""]
1192
D2              [shape=circle, label=""]
1193
INTAG           -> D1           [label="lws"]
1194
D1              -> D2           [label="/"]
1195
INTAG           -> D2           [label="/"]
1196
INTAG           -> NEXTTAGd     [label=">"]
1197
D1              -> NEXTTAGd     [label=">"]
1198
D2              -> NEXTTAGd     [label=">"]
1199
D1              -> ATTRd        [label="XMLstartchar"]
1200
D1              -> TAGERROR     [label="*"]
1201
D2              -> TAGERROR     [label="*"]
1202
TAGERROR        -> TAGERROR     [label="*"]
1203
TAGERROR        -> NEXTTAGd     [label="'>'"]
1204
1205
#################################################################
1206
# SECTION E
1207
1208
ATTR            [shape=ellipse]
1209
SKIPATTR        [shape=ellipse]
1210
ATTRGETVAL      [shape=ellipse]
1211
ATTRDELIM       [shape=ellipse]
1212
ATTRVAL         [shape=ellipse]
1213
TAGERRORe       [shape=hexagon, label="TAGERROR"]
1214
INTAGe          [shape=hexagon, label="INTAG"]
1215
ATTR            -> SKIPATTR     [label="*"]
1216
ATTR            -> ATTRGETVAL   [label="wanted attr"]
1217
SKIPATTR        -> SKIPATTR     [label="XMLname"]
1218
SKIPATTR        -> ATTRDELIM    [label="'='"]
1219
SKIPATTR        -> TAGERRORe    [label="*"]
1220
ATTRGETVAL      -> ATTRDELIM
1221
ATTRDELIM       -> ATTRVAL      [label="\""]
1222
ATTRDELIM       -> ATTRVAL      [label="\'"]
1223
ATTRDELIM       -> ATTRVAL      [label="*"]
1224
ATTRDELIM       -> TAGERRORe    [label="lws"]
1225
ATTRVAL         -> TAGERRORe    [label="'>'"]
1226
ATTRVAL         -> INTAGe       [label="delim"]
1227
ATTRVAL         -> ATTRVAL      [label="*"]
1228
1229
}
1230
1231
#endif