varnish-cache/bin/varnishd/cache/cache_esi_parse.c
1
/*-
2
 * Copyright (c) 2011 Varnish Software AS
3
 * All rights reserved.
4
 *
5
 * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
6
 *
7
 * Redistribution and use in source and binary forms, with or without
8
 * modification, are permitted provided that the following conditions
9
 * are met:
10
 * 1. Redistributions of source code must retain the above copyright
11
 *    notice, this list of conditions and the following disclaimer.
12
 * 2. Redistributions in binary form must reproduce the above copyright
13
 *    notice, this list of conditions and the following disclaimer in the
14
 *    documentation and/or other materials provided with the distribution.
15
 *
16
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
20
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26
 * SUCH DAMAGE.
27
 *
28
 * VEP Varnish Esi Parsing
29
 */
30
31
#include "config.h"
32
33
#include "cache_varnishd.h"
34
#include "cache_filter.h"
35
36
#include "cache_vgz.h"
37
#include "cache_esi.h"
38
#include "vct.h"
39
#include "vend.h"
40
#include "vgz.h"
41
42
//#define Debug(fmt, ...) printf(fmt, __VA_ARGS__)
43
#define Debug(fmt, ...) /**/
44
45
struct vep_state;
46
47
enum dowhat {DO_ATTR, DO_TAG};
48
typedef void dostuff_f(struct vep_state *, enum dowhat);
49
50
struct vep_match {
51
        const char      *match;
52
        const char      * const *state;
53
};
54
55
enum vep_mark { VERBATIM = 0, SKIP };
56
57
struct vep_state {
58
        unsigned                magic;
59
#define VEP_MAGIC               0x55cb9b82
60
        struct vsb              *vsb;
61
62
        const char              *url;
63
        struct vfp_ctx          *vc;
64
        int                     dogzip;
65
        vep_callback_t          *cb;
66
        void                    *cb_priv;
67
68
        /* Internal Counter for default call-back function */
69
        ssize_t                 cb_x;
70
71
        /* parser state */
72
        const char              *state;
73
        unsigned                startup;
74
        unsigned                esi_found;
75
76
        unsigned                endtag;
77
        unsigned                emptytag;
78
        unsigned                canattr;
79
80
        unsigned                remove;
81
82
        ssize_t                 o_wait;
83
        ssize_t                 o_pending;
84
        ssize_t                 o_total;
85
        uint32_t                crc;
86
        ssize_t                 o_crc;
87
        uint32_t                crcp;
88
        ssize_t                 o_last;
89
90
        const char              *hack_p;
91
        const char              *ver_p;
92
93
        const char              *until;
94
        const char              *until_p;
95
        const char              *until_s;
96
97
        int                     in_esi_tag;
98
99
        const char              *esicmt;
100
        const char              *esicmt_p;
101
102
        struct vep_match        *attr;
103
        struct vsb              *attr_vsb;
104
        int                     attr_delim;
105
106
        struct vep_match        *match;
107
        struct vep_match        *match_hit;
108
109
        char                    tag[8];
110
        int                     tag_i;
111
112
        dostuff_f               *dostuff;
113
114
        struct vsb              *include_src;
115
116
        unsigned                nm_skip;
117
        unsigned                nm_verbatim;
118
        unsigned                nm_pending;
119
        enum vep_mark           last_mark;
120
};
121
122
/*---------------------------------------------------------------------*/
123
124
static const char * const VEP_START =           "[Start]";
125
static const char * const VEP_BOM =             "[BOM]";
126
static const char * const VEP_TESTXML =         "[TestXml]";
127
static const char * const VEP_NOTXML =          "[NotXml]";
128
129
static const char * const VEP_NEXTTAG =         "[NxtTag]";
130
static const char * const VEP_NOTMYTAG =        "[NotMyTag]";
131
132
static const char * const VEP_STARTTAG =        "[StartTag]";
133
static const char * const VEP_COMMENTESI =      "[CommentESI]";
134
static const char * const VEP_COMMENT =         "[Comment]";
135
static const char * const VEP_CDATA =           "[CDATA]";
136
static const char * const VEP_ESITAG =          "[ESITag]";
137
static const char * const VEP_ESIENDTAG =       "[/ESITag]";
138
139
static const char * const VEP_ESIREMOVE =       "[ESI:Remove]";
140
static const char * const VEP_ESIINCLUDE =      "[ESI:Include]";
141
static const char * const VEP_ESICOMMENT =      "[ESI:Comment]";
142
static const char * const VEP_ESIBOGON =        "[ESI:Bogon]";
143
144
static const char * const VEP_INTAG =           "[InTag]";
145
static const char * const VEP_TAGERROR =        "[TagError]";
146
147
static const char * const VEP_ATTR =            "[Attribute]";
148
static const char * const VEP_SKIPATTR =        "[SkipAttribute]";
149
static const char * const VEP_ATTRDELIM =       "[AttrDelim]";
150
static const char * const VEP_ATTRGETVAL =      "[AttrGetValue]";
151
static const char * const VEP_ATTRVAL =         "[AttrValue]";
152
153
static const char * const VEP_UNTIL =           "[Until]";
154
static const char * const VEP_MATCHBUF =        "[MatchBuf]";
155
static const char * const VEP_MATCH =           "[Match]";
156
157
/*---------------------------------------------------------------------*/
158
159
static struct vep_match vep_match_starttag[] = {
160
        { "!--esi",     &VEP_COMMENTESI },
161
        { "!---->",     &VEP_NEXTTAG },
162
        { "!--",        &VEP_COMMENT },
163
        { "/esi:",      &VEP_ESIENDTAG },
164
        { "esi:",       &VEP_ESITAG },
165
        { "![CDATA[",   &VEP_CDATA },
166
        { NULL,         &VEP_NOTMYTAG }
167
};
168
169
/*---------------------------------------------------------------------*/
170
171
static struct vep_match vep_match_esi[] = {
172
        { "include",    &VEP_ESIINCLUDE },
173
        { "remove",     &VEP_ESIREMOVE },
174
        { "comment",    &VEP_ESICOMMENT },
175
        { NULL,         &VEP_ESIBOGON }
176
};
177
178
/*---------------------------------------------------------------------*/
179
180
static struct vep_match vep_match_attr_include[] = {
181
        { "src=",       &VEP_ATTRGETVAL },
182
        { NULL,         &VEP_SKIPATTR }
183
};
184
185
/*---------------------------------------------------------------------*/
186
187
static struct vep_match vep_match_bom[] = {
188
        { "\xeb\xbb\xbf",       &VEP_START },
189
        { NULL,                 &VEP_BOM }
190
};
191
192
/*--------------------------------------------------------------------
193
 * Report a parsing error
194
 */
195
196
static void
197 70
vep_error(const struct vep_state *vep, const char *p)
198
{
199 70
        VSC_C_main->esi_errors++;
200 70
        VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror, "ERR after %zd %s",
201
             vep->o_last, p);
202 70
}
203
204
/*--------------------------------------------------------------------
205
 * Report a parsing warning
206
 */
207
208
static void
209 8
vep_warn(const struct vep_state *vep, const char *p)
210
{
211 8
        VSC_C_main->esi_warnings++;
212 8
        VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror, "WARN after %zd %s",
213
             vep->o_last, p);
214 8
}
215
216
/*---------------------------------------------------------------------
217
 * return match or NULL if more input needed.
218
 */
219
220
static struct vep_match *
221 24271
vep_match(const struct vep_state *vep, const char *b, const char *e)
222
{
223
        struct vep_match *vm;
224
        const char *q, *r;
225
226 158747
        for (vm = vep->match; vm->match != NULL; vm++) {
227 136995
                assert(strlen(vm->match) <= sizeof (vep->tag));
228 136995
                r = b;
229 149044
                for (q = vm->match; *q != '\0' && r < e; q++, r++)
230 146525
                        if (*q != *r)
231 134476
                                break;
232 136995
                if (*q == '\0')
233 2042
                        break;
234 134953
                if (r == e)
235 477
                        return (NULL);
236
        }
237 23794
        return (vm);
238
}
239
240
/*---------------------------------------------------------------------
241
 *
242
 */
243
244
static void
245 1542
vep_emit_len(const struct vep_state *vep, ssize_t l, int m8, int m16, int m64)
246
{
247
        uint8_t buf[9];
248
249 1542
        assert(l > 0);
250 1542
        if (l < 256) {
251 1516
                buf[0] = (uint8_t)m8;
252 1516
                buf[1] = (uint8_t)l;
253 1516
                assert((ssize_t)buf[1] == l);
254 1516
                VSB_bcat(vep->vsb, buf, 2);
255 26
        } else if (l < 65536) {
256 14
                buf[0] = (uint8_t)m16;
257 14
                vbe16enc(buf + 1, (uint16_t)l);
258 14
                assert((ssize_t)vbe16dec(buf + 1) == l);
259 14
                VSB_bcat(vep->vsb, buf, 3);
260
        } else {
261 12
                buf[0] = (uint8_t)m64;
262 12
                vbe64enc(buf + 1, l);
263 12
                assert((ssize_t)vbe64dec(buf + 1) == l);
264 12
                VSB_bcat(vep->vsb, buf, 9);
265
        }
266 1542
}
267
268
static void
269 732
vep_emit_skip(const struct vep_state *vep, ssize_t l)
270
{
271
272 732
        vep_emit_len(vep, l, VEC_S1, VEC_S2, VEC_S8);
273 732
}
274
275
static void
276 692
vep_emit_verbatim(const struct vep_state *vep, ssize_t l, ssize_t l_crc)
277
{
278
        uint8_t buf[4];
279
280 692
        vep_emit_len(vep, l, VEC_V1, VEC_V2, VEC_V8);
281 692
        if (vep->dogzip) {
282 118
                vep_emit_len(vep, l_crc, VEC_C1, VEC_C2, VEC_C8);
283 118
                vbe32enc(buf, vep->crc);
284 118
                VSB_bcat(vep->vsb, buf, sizeof buf);
285
        }
286 692
}
287
288
static void
289 1660
vep_emit_common(struct vep_state *vep, ssize_t l, enum vep_mark mark)
290
{
291
292 1660
        assert(l >= 0);
293 1660
        if (l == 0)
294 1896
                return;
295 1424
        assert(mark == SKIP || mark == VERBATIM);
296 1424
        if (mark == SKIP)
297 732
                vep_emit_skip(vep, l);
298
        else
299 692
                vep_emit_verbatim(vep, l, vep->o_crc);
300
301 1424
        vep->crc = crc32(0L, Z_NULL, 0);
302 1424
        vep->o_crc = 0;
303 1424
        vep->o_total += l;
304
}
305
306
/*---------------------------------------------------------------------
307
 *
308
 */
309
310
static void
311 131695
vep_mark_common(struct vep_state *vep, const char *p, enum vep_mark mark)
312
{
313
        ssize_t l, lcb;
314
315 131695
        assert(mark == SKIP || mark == VERBATIM);
316
317
        /* The NO-OP case, no data, no pending data & no change of mode */
318 131695
        if (vep->last_mark == mark && p == vep->ver_p && vep->o_pending == 0)
319 131775
                return;
320
321
        /*
322
         * If we changed mode, emit whatever the opposite mode
323
         * assembled before the pending bytes.
324
         */
325
326 131615
        if (vep->last_mark != mark && (vep->o_wait > 0 || vep->startup)) {
327 1356
                lcb = vep->cb(vep->vc, vep->cb_priv, 0,
328
                    mark == VERBATIM ? VGZ_RESET : VGZ_ALIGN);
329 1356
                vep_emit_common(vep, lcb - vep->o_last, vep->last_mark);
330 1356
                vep->o_last = lcb;
331 1356
                vep->o_wait = 0;
332
        }
333
334
        /* Transfer pending bytes CRC into active mode CRC */
335 131615
        if (vep->o_pending) {
336 131
                (void)vep->cb(vep->vc, vep->cb_priv, vep->o_pending,
337
                     VGZ_NORMAL);
338 131
                if (vep->o_crc == 0) {
339 103
                        vep->crc = vep->crcp;
340 103
                        vep->o_crc = vep->o_pending;
341
                } else {
342 56
                        vep->crc = crc32_combine(vep->crc,
343 28
                            vep->crcp, vep->o_pending);
344 28
                        vep->o_crc += vep->o_pending;
345
                }
346 131
                vep->crcp = crc32(0L, Z_NULL, 0);
347 131
                vep->o_wait += vep->o_pending;
348 131
                vep->o_pending = 0;
349
        }
350
351
        /* * Process this bit of input */
352 131615
        AN(vep->ver_p);
353 131615
        l = p - vep->ver_p;
354 131615
        assert(l >= 0);
355 131615
        vep->crc = crc32(vep->crc, (const void*)vep->ver_p, l);
356 131615
        vep->o_crc += l;
357 131615
        vep->ver_p = p;
358
359 131615
        vep->o_wait += l;
360 131615
        vep->last_mark = mark;
361 131615
        (void)vep->cb(vep->vc, vep->cb_priv, l, VGZ_NORMAL);
362
}
363
364
static void
365 76158
vep_mark_verbatim(struct vep_state *vep, const char *p)
366
{
367
368 76158
        vep_mark_common(vep, p, VERBATIM);
369 76158
        vep->nm_verbatim++;
370 76158
}
371
372
static void
373 55233
vep_mark_skip(struct vep_state *vep, const char *p)
374
{
375
376 55233
        vep_mark_common(vep, p, SKIP);
377 55233
        vep->nm_skip++;
378 55233
}
379
380
static void
381 237
vep_mark_pending(struct vep_state *vep, const char *p)
382
{
383
        ssize_t l;
384
385 237
        AN(vep->ver_p);
386 237
        l = p - vep->ver_p;
387 237
        assert(l > 0);
388 237
        vep->crcp = crc32(vep->crcp, (const void *)vep->ver_p, l);
389 237
        vep->ver_p = p;
390
391 237
        vep->o_pending += l;
392 237
        vep->nm_pending++;
393 237
}
394
395
/*---------------------------------------------------------------------
396
 */
397
398
static void v_matchproto_()
399 12
vep_do_comment(struct vep_state *vep, enum dowhat what)
400
{
401
        Debug("DO_COMMENT(%d)\n", what);
402 12
        assert(what == DO_TAG);
403 12
        if (!vep->emptytag)
404 4
                vep_error(vep, "ESI 1.0 <esi:comment> needs final '/'");
405 12
}
406
407
/*---------------------------------------------------------------------
408
 */
409
410
static void v_matchproto_()
411 280
vep_do_remove(struct vep_state *vep, enum dowhat what)
412
{
413
        Debug("DO_REMOVE(%d, end %d empty %d remove %d)\n",
414
            what, vep->endtag, vep->emptytag, vep->remove);
415 280
        assert(what == DO_TAG);
416 280
        if (vep->emptytag)
417 2
                vep_error(vep, "ESI 1.0 <esi:remove/> not legal");
418 278
        else if (vep->remove && !vep->endtag)
419 2
                vep_error(vep, "ESI 1.0 <esi:remove> already open");
420 276
        else if (!vep->remove && vep->endtag)
421 2
                vep_error(vep, "ESI 1.0 <esi:remove> not open");
422
        else
423 274
                vep->remove = !vep->endtag;
424 280
}
425
426
/*---------------------------------------------------------------------
427
 */
428
429
static void v_matchproto_()
430 888
vep_do_include(struct vep_state *vep, enum dowhat what)
431
{
432
        const char *p, *q, *h;
433
        ssize_t l;
434
435
        Debug("DO_INCLUDE(%d)\n", what);
436 888
        if (what == DO_ATTR) {
437
                Debug("ATTR (%s) (%s)\n", vep->match_hit->match,
438
                        VSB_data(vep->attr_vsb));
439 444
                if (vep->include_src != NULL) {
440 2
                        vep_error(vep,
441
                            "ESI 1.0 <esi:include> "
442
                            "has multiple src= attributes");
443 2
                        vep->state = VEP_TAGERROR;
444 2
                        VSB_destroy(&vep->attr_vsb);
445 2
                        VSB_destroy(&vep->include_src);
446 2
                        return;
447
                }
448 442
                vep->include_src = vep->attr_vsb;
449 442
                vep->attr_vsb = NULL;
450 442
                return;
451
        }
452 444
        assert(what == DO_TAG);
453 444
        if (!vep->emptytag)
454 2
                vep_warn(vep, "ESI 1.0 <esi:include> lacks final '/'");
455 444
        if (vep->include_src == NULL) {
456 4
                vep_error(vep, "ESI 1.0 <esi:include> lacks src attr");
457 4
                return;
458
        }
459
460
        /*
461
         * Strictly speaking, we ought to spit out any piled up skip before
462
         * emitting the VEC for the include, but objectively that makes no
463
         * difference and robs us of a chance to collapse another skip into
464
         * this on so we don't do that.
465
         * However, we cannot tolerate any verbatim stuff piling up.
466
         * The mark_skip() before calling dostuff should have taken
467
         * care of that.  Make sure.
468
         */
469 440
        assert(vep->o_wait == 0 || vep->last_mark == SKIP);
470
        /* XXX: what if it contains NUL bytes ?? */
471 440
        p = VSB_data(vep->include_src);
472 440
        l = VSB_len(vep->include_src);
473 440
        h = 0;
474
475 440
        if (l > 7 && !memcmp(p, "http://", 7)) {
476 4
                h = p + 7;
477 4
                p = strchr(h, '/');
478 4
                if (p == NULL) {
479 2
                        vep_error(vep,
480
                            "ESI 1.0 <esi:include> invalid src= URL");
481 2
                        vep->state = VEP_TAGERROR;
482 2
                        AZ(vep->attr_vsb);
483 2
                        VSB_destroy(&vep->include_src);
484 2
                        return;
485
                }
486
                Debug("HOST <%.*s> PATH <%s>\n", (int)(p-h),h, p);
487 2
                VSB_printf(vep->vsb, "%c", VEC_INCL);
488 2
                VSB_printf(vep->vsb, "Host: %.*s%c", (int)(p-h), h, 0);
489 436
        } else if (l > 8 && !memcmp(p, "https://", 8)) {
490 6
                if (!FEATURE(FEATURE_ESI_IGNORE_HTTPS)) {
491 2
                        vep_warn(vep,
492
                            "ESI 1.0 <esi:include> with https:// ignored");
493 2
                        vep->state = VEP_TAGERROR;
494 2
                        AZ(vep->attr_vsb);
495 2
                        VSB_destroy(&vep->include_src);
496 2
                        return;
497
                }
498 4
                vep_warn(vep,
499
                    "ESI 1.0 <esi:include> https:// treated as http://");
500 4
                h = p + 8;
501 4
                p = strchr(h, '/');
502 4
                if (p == NULL) {
503 2
                        vep_error(vep,
504
                            "ESI 1.0 <esi:include> invalid src= URL");
505 2
                        vep->state = VEP_TAGERROR;
506 2
                        AZ(vep->attr_vsb);
507 2
                        VSB_destroy(&vep->include_src);
508 2
                        return;
509
                }
510 2
                VSB_printf(vep->vsb, "%c", VEC_INCL);
511 2
                VSB_printf(vep->vsb, "Host: %.*s%c", (int)(p-h), h, 0);
512 430
        } else if (*p == '/') {
513 374
                VSB_printf(vep->vsb, "%c", VEC_INCL);
514 374
                VSB_printf(vep->vsb, "%c", 0);
515
        } else {
516 56
                VSB_printf(vep->vsb, "%c", VEC_INCL);
517 56
                VSB_printf(vep->vsb, "%c", 0);
518
                /* Look for the last / before a '?' */
519 56
                h = NULL;
520 144
                for (q = vep->url; *q && *q != '?'; q++)
521 88
                        if (*q == '/')
522 58
                                h = q;
523 56
                if (h == NULL)
524 2
                        h = q + 1;
525
526
                Debug("INCL:: [%.*s]/[%s]\n",
527
                    (int)(h - vep->url), vep->url, p);
528 56
                VSB_printf(vep->vsb, "%.*s/", (int)(h - vep->url), vep->url);
529
        }
530 434
        l -= (p - VSB_data(vep->include_src));
531 18220
        for (q = p; *q != '\0'; ) {
532 17352
                if (*q == '&') {
533
#define R(w,f,r)                                                        \
534
                        if (q + w <= p + l && !memcmp(q, f, w)) { \
535
                                VSB_printf(vep->vsb, "%c", r);  \
536
                                q += w;                         \
537
                                continue;                       \
538
                        }
539 20
                        R(6, "&apos;", '\'');
540 16
                        R(6, "&quot;", '"');
541 12
                        R(4, "&lt;", '<');
542 8
                        R(4, "&gt;", '>');
543 4
                        R(5, "&amp;", '&');
544
                }
545 17332
                VSB_printf(vep->vsb, "%c", *q++);
546
        }
547
#undef R
548 434
        VSB_printf(vep->vsb, "%c", 0);
549 434
        VSB_destroy(&vep->include_src);
550
}
551
552
/*---------------------------------------------------------------------
553
 * Lex/Parse object for ESI instructions
554
 *
555
 * This function is called with the input object piecemal so do not
556
 * assume that we have more than one char available at at time, but
557
 * optimize for getting huge chunks.
558
 *
559
 * NB: At the bottom of this source-file, there is a dot-diagram matching
560
 * NB: the state-machine.  Please maintain it along with the code.
561
 */
562
563
void
564 107408
VEP_Parse(struct vep_state *vep, const char *p, size_t l)
565
{
566
        const char *e;
567
        struct vep_match *vm;
568
        int i;
569
570 107408
        CHECK_OBJ_NOTNULL(vep, VEP_MAGIC);
571 107408
        assert(l > 0);
572
573 107408
        if (vep->startup) {
574
                /*
575
                 * We must force the GZIP header out as a SKIP string,
576
                 * otherwise an object starting with <esi:include would
577
                 * have its GZIP header appear after the included object
578
                 * (e000026.vtc)
579
                 */
580 304
                vep->ver_p = "";
581 304
                vep->last_mark = SKIP;
582 304
                vep_mark_common(vep, vep->ver_p, VERBATIM);
583 304
                vep->startup = 0;
584 304
                AZ(vep->hack_p);
585 304
                vep->hack_p = p;
586
        }
587
588 107408
        vep->ver_p = p;
589
590 107408
        e = p + l;
591
592 417910
        while (p < e) {
593 203094
                AN(vep->state);
594
                Debug("EP %s %d (%.*s) [%.*s]\n",
595
                    vep->state,
596
                    vep->remove,
597
                    vep->tag_i, vep->tag,
598
                    (e - p) > 10 ? 10 : (int)(e-p), p);
599 203094
                assert(p >= vep->ver_p);
600
601
                /******************************************************
602
                 * SECTION A
603
                 */
604
605 203094
                if (vep->state == VEP_START) {
606 310
                        if (FEATURE(FEATURE_ESI_REMOVE_BOM) && *p == '\xeb') {
607 8
                                vep->match = vep_match_bom;
608 8
                                vep->state = VEP_MATCH;
609
                        } else
610 302
                                vep->state = VEP_BOM;
611 202784
                } else if (vep->state == VEP_BOM) {
612 304
                        vep_mark_skip(vep, p);
613 304
                        if (FEATURE(FEATURE_ESI_DISABLE_XML_CHECK))
614 88
                                vep->state = VEP_NEXTTAG;
615
                        else
616 216
                                vep->state = VEP_TESTXML;
617 202480
                } else if (vep->state == VEP_TESTXML) {
618
                        /*
619
                         * If the first non-whitespace char is different
620
                         * from '<' we assume this is not XML.
621
                         */
622 766
                        while (p < e && vct_islws(*p))
623 298
                                p++;
624 234
                        vep_mark_verbatim(vep, p);
625 234
                        if (p < e && *p == '<') {
626 202
                                p++;
627 202
                                vep->state = VEP_STARTTAG;
628 32
                        } else if (p < e && *p == '\xeb') {
629 4
                                VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror,
630
                                    "No ESI processing, "
631
                                    "first char not '<' but BOM."
632
                                    " (See feature esi_remove_bom)"
633
                                );
634 4
                                vep->state = VEP_NOTXML;
635 28
                        } else if (p < e) {
636 8
                                VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror,
637
                                    "No ESI processing, "
638
                                    "first char not '<'."
639
                                    " (See feature esi_disable_xml_check)"
640
                                );
641 8
                                vep->state = VEP_NOTXML;
642
                        }
643 202246
                } else if (vep->state == VEP_NOTXML) {
644
                        /*
645
                         * This is not recognized as XML, just skip thru
646
                         * vfp_esi_end() will handle the rest
647
                         */
648 12
                        p = e;
649 12
                        vep_mark_verbatim(vep, p);
650
651
                /******************************************************
652
                 * SECTION B
653
                 */
654
655 202234
                } else if (vep->state == VEP_NOTMYTAG) {
656 21807
                        if (FEATURE(FEATURE_ESI_IGNORE_OTHER_ELEMENTS)) {
657 4
                                p++;
658 4
                                vep->state = VEP_NEXTTAG;
659
                        } else {
660 21803
                                vep->tag_i = 0;
661 114315
                                while (p < e) {
662 92437
                                        if (*p++ == '>') {
663 21728
                                                vep->state = VEP_NEXTTAG;
664 21728
                                                break;
665
                                        }
666
                                }
667
                        }
668 21807
                        if (p == e && !vep->remove)
669 149
                                vep_mark_verbatim(vep, p);
670 180427
                } else if (vep->state == VEP_NEXTTAG) {
671
                        /*
672
                         * Hunt for start of next tag and keep an eye
673
                         * out for end of EsiCmt if armed.
674
                         */
675 128548
                        vep->emptytag = 0;
676 128548
                        vep->attr = NULL;
677 128548
                        vep->dostuff = NULL;
678 4573060
                        while (p < e && *p != '<') {
679 4315964
                                if (vep->esicmt_p == NULL) {
680 4315752
                                        p++;
681 4315752
                                        continue;
682
                                }
683 212
                                if (*p != *vep->esicmt_p) {
684 124
                                        p++;
685 124
                                        vep->esicmt_p = vep->esicmt;
686 124
                                        continue;
687
                                }
688 88
                                if (!vep->remove && vep->esicmt_p == vep->esicmt)
689 20
                                        vep_mark_verbatim(vep, p);
690 88
                                p++;
691 88
                                if (*++vep->esicmt_p == '\0') {
692 28
                                        vep->esi_found = 1;
693 28
                                        vep->esicmt = NULL;
694 28
                                        vep->esicmt_p = NULL;
695
                                        /*
696
                                         * The end of the esicmt
697
                                         * should not be emitted.
698
                                         * But the stuff before should
699
                                         */
700 28
                                        vep_mark_skip(vep, p);
701
                                }
702
                        }
703 128548
                        if (p < e) {
704 22354
                                if (!vep->remove)
705 22190
                                        vep_mark_verbatim(vep, p);
706 22354
                                assert(*p == '<');
707 22354
                                p++;
708 22354
                                vep->state = VEP_STARTTAG;
709 106194
                        } else if (vep->esicmt_p == vep->esicmt && !vep->remove)
710 53515
                                vep_mark_verbatim(vep, p);
711
712
                /******************************************************
713
                 * SECTION C
714
                 */
715
716 51879
                } else if (vep->state == VEP_STARTTAG) {
717
                        /* Start of tag, set up match table */
718 22556
                        vep->endtag = 0;
719 22556
                        vep->match = vep_match_starttag;
720 22556
                        vep->state = VEP_MATCH;
721 29323
                } else if (vep->state == VEP_COMMENT) {
722 10
                        vep->esicmt_p = vep->esicmt = NULL;
723 10
                        vep->until_p = vep->until = "-->";
724 10
                        vep->until_s = VEP_NEXTTAG;
725 10
                        vep->state = VEP_UNTIL;
726 29313
                } else if (vep->state == VEP_COMMENTESI) {
727 30
                        if (vep->remove)
728 10
                                vep_error(vep,
729
                                    "ESI 1.0 Nested <!--esi"
730
                                    " element in <esi:remove>");
731 30
                        vep->esicmt_p = vep->esicmt = "-->";
732 30
                        vep->state = VEP_NEXTTAG;
733 30
                        vep_mark_skip(vep, p);
734 29283
                } else if (vep->state == VEP_CDATA) {
735
                        /*
736
                         * Easy: just look for the end of CDATA
737
                         */
738 8
                        vep->until_p = vep->until = "]]>";
739 8
                        vep->until_s = VEP_NEXTTAG;
740 8
                        vep->state = VEP_UNTIL;
741 29275
                } else if (vep->state == VEP_ESIENDTAG) {
742 144
                        vep->endtag = 1;
743 144
                        vep->state = VEP_ESITAG;
744 29131
                } else if (vep->state == VEP_ESITAG) {
745 770
                        vep->in_esi_tag = 1;
746 770
                        vep->esi_found = 1;
747 770
                        vep_mark_skip(vep, p);
748 770
                        vep->match = vep_match_esi;
749 770
                        vep->state = VEP_MATCH;
750 28361
                } else if (vep->state == VEP_ESIINCLUDE) {
751 466
                        if (vep->remove) {
752 8
                                vep_error(vep,
753
                                    "ESI 1.0 <esi:include> element"
754
                                    " nested in <esi:remove>");
755 8
                                vep->state = VEP_TAGERROR;
756 458
                        } else if (vep->endtag) {
757 2
                                vep_error(vep,
758
                                    "ESI 1.0 </esi:include> illegal end-tag");
759 2
                                vep->state = VEP_TAGERROR;
760
                        } else {
761 456
                                vep->dostuff = vep_do_include;
762 456
                                vep->state = VEP_INTAG;
763 456
                                vep->attr = vep_match_attr_include;
764
                        }
765 27895
                } else if (vep->state == VEP_ESIREMOVE) {
766 280
                        vep->dostuff = vep_do_remove;
767 280
                        vep->state = VEP_INTAG;
768 27615
                } else if (vep->state == VEP_ESICOMMENT) {
769 20
                        if (vep->remove) {
770 2
                                vep_error(vep,
771
                                    "ESI 1.0 <esi:comment> element"
772
                                    " nested in <esi:remove>");
773 2
                                vep->state = VEP_TAGERROR;
774 18
                        } else if (vep->endtag) {
775 4
                                vep_error(vep,
776
                                    "ESI 1.0 </esi:comment> illegal end-tag");
777 4
                                vep->state = VEP_TAGERROR;
778
                        } else {
779 14
                                vep->dostuff = vep_do_comment;
780 14
                                vep->state = VEP_INTAG;
781
                        }
782 27595
                } else if (vep->state == VEP_ESIBOGON) {
783 4
                        vep_error(vep,
784
                            "ESI 1.0 <esi:bogus> element");
785 4
                        vep->state = VEP_TAGERROR;
786
787
                /******************************************************
788
                 * SECTION D
789
                 */
790
791 27591
                } else if (vep->state == VEP_INTAG) {
792 1252
                        vep->tag_i = 0;
793 3186
                        while (p < e && vct_islws(*p) && !vep->emptytag) {
794 682
                                p++;
795 682
                                vep->canattr = 1;
796
                        }
797 1252
                        if (p < e && *p == '/' && !vep->emptytag) {
798 454
                                p++;
799 454
                                vep->emptytag = 1;
800 454
                                vep->canattr = 0;
801
                        }
802 1252
                        if (p < e && *p == '>') {
803 736
                                p++;
804 736
                                AN(vep->dostuff);
805 736
                                vep_mark_skip(vep, p);
806 736
                                vep->dostuff(vep, DO_TAG);
807 736
                                vep->in_esi_tag = 0;
808 736
                                vep->state = VEP_NEXTTAG;
809 516
                        } else if (p < e && vep->emptytag) {
810 2
                                vep_error(vep,
811
                                    "XML 1.0 '>' does not follow '/' in tag");
812 2
                                vep->state = VEP_TAGERROR;
813 982
                        } else if (p < e && vep->canattr &&
814 468
                            vct_isxmlnamestart(*p)) {
815 466
                                vep->state = VEP_ATTR;
816 48
                        } else if (p < e) {
817 2
                                vep_error(vep,
818
                                    "XML 1.0 Illegal attribute start char");
819 2
                                vep->state = VEP_TAGERROR;
820
                        }
821 26339
                } else if (vep->state == VEP_TAGERROR) {
822 463
                        while (p < e && *p != '>')
823 261
                                p++;
824 101
                        if (p < e) {
825 34
                                p++;
826 34
                                vep_mark_skip(vep, p);
827 34
                                vep->in_esi_tag = 0;
828 34
                                vep->state = VEP_NEXTTAG;
829 34
                                if (vep->attr_vsb)
830 2
                                        VSB_destroy(&vep->attr_vsb);
831
                        }
832
833
                /******************************************************
834
                 * SECTION E
835
                 */
836
837 26238
                } else if (vep->state == VEP_ATTR) {
838 466
                        AZ(vep->attr_delim);
839 466
                        if (vep->attr == NULL) {
840 6
                                p++;
841 6
                                AZ(vep->attr_vsb);
842 6
                                vep->state = VEP_SKIPATTR;
843
                        } else {
844 460
                                vep->match = vep->attr;
845 460
                                vep->state = VEP_MATCH;
846
                        }
847 25772
                } else if (vep->state == VEP_SKIPATTR) {
848 91
                        while (p < e && vct_isxmlname(*p))
849 31
                                p++;
850 30
                        if (p < e && *p == '=') {
851 6
                                p++;
852 6
                                vep->state = VEP_ATTRDELIM;
853 24
                        } else if (p < e && *p == '>') {
854 2
                                vep->state = VEP_INTAG;
855 22
                        } else if (p < e && *p == '/') {
856 2
                                vep->state = VEP_INTAG;
857 20
                        } else if (p < e && vct_issp(*p)) {
858 4
                                vep->state = VEP_INTAG;
859 16
                        } else if (p < e) {
860 2
                                vep_error(vep,
861
                                    "XML 1.0 Illegal attr char");
862 2
                                vep->state = VEP_TAGERROR;
863
                        }
864 25742
                } else if (vep->state == VEP_ATTRGETVAL) {
865 450
                        AZ(vep->attr_vsb);
866 450
                        vep->attr_vsb = VSB_new_auto();
867 450
                        vep->state = VEP_ATTRDELIM;
868 25292
                } else if (vep->state == VEP_ATTRDELIM) {
869 456
                        AZ(vep->attr_delim);
870 456
                        if (*p == '"' || *p == '\'') {
871 448
                                vep->attr_delim = *p++;
872 448
                                vep->state = VEP_ATTRVAL;
873 8
                        } else if (!vct_issp(*p)) {
874 6
                                vep->attr_delim = ' ';
875 6
                                vep->state = VEP_ATTRVAL;
876
                        } else {
877 2
                                vep_error(vep,
878
                                    "XML 1.0 Illegal attribute delimiter");
879 2
                                vep->state = VEP_TAGERROR;
880
                        }
881
882 24836
                } else if (vep->state == VEP_ATTRVAL) {
883 36250
                        while (p < e && *p != '>' && *p != vep->attr_delim &&
884 17626
                           (vep->attr_delim != ' ' || !vct_issp(*p))) {
885 17606
                                if (vep->attr_vsb != NULL)
886 17588
                                        VSB_putc(vep->attr_vsb, *p);
887 17606
                                p++;
888
                        }
889 519
                        if (p < e && *p == '>') {
890 4
                                vep_error(vep,
891
                                    "XML 1.0 Missing end attribute delimiter");
892 4
                                vep->state = VEP_TAGERROR;
893 4
                                vep->attr_delim = 0;
894 8
                                if (vep->attr_vsb != NULL) {
895 4
                                        AZ(VSB_finish(vep->attr_vsb));
896 4
                                        VSB_destroy(&vep->attr_vsb);
897
                                }
898 515
                        } else if (p < e) {
899 450
                                vep->attr_delim = 0;
900 450
                                p++;
901 450
                                vep->state = VEP_INTAG;
902 450
                                if (vep->attr_vsb != NULL) {
903 444
                                        AZ(VSB_finish(vep->attr_vsb));
904 444
                                        AN(vep->dostuff);
905 444
                                        vep->dostuff(vep, DO_ATTR);
906 444
                                        vep->attr_vsb = NULL;
907
                                }
908
                        }
909
910
                /******************************************************
911
                 * Utility Section
912
                 */
913
914 24317
                } else if (vep->state == VEP_MATCH) {
915
                        /*
916
                         * Match against a table
917
                         */
918 23794
                        vm = vep_match(vep, p, e);
919 23794
                        vep->match_hit = vm;
920 23794
                        if (vm != NULL) {
921 23525
                                if (vm->match != NULL)
922 1785
                                        p += strlen(vm->match);
923 23525
                                vep->state = *vm->state;
924 23525
                                vep->match = NULL;
925 23525
                                vep->tag_i = 0;
926
                        } else {
927 269
                                assert(e - p <= sizeof(vep->tag));
928 269
                                memcpy(vep->tag, p, e - p);
929 269
                                vep->tag_i = e - p;
930 269
                                vep->state = VEP_MATCHBUF;
931 269
                                p = e;
932
                        }
933 523
                } else if (vep->state == VEP_MATCHBUF) {
934
                        /*
935
                         * Match against a table while split over input
936
                         * sections.
937
                         */
938 477
                        AN(vep->match);
939 477
                        i = sizeof(vep->tag) - vep->tag_i;
940 477
                        if (i > e - p)
941 411
                                i = e - p;
942 477
                        memcpy(vep->tag + vep->tag_i, p, i);
943 477
                        vm = vep_match(vep, vep->tag,
944 477
                            vep->tag + vep->tag_i + i);
945
                        Debug("MB (%.*s) tag_i %d i %d = vm %p match %s\n",
946
                            vep->tag_i + i, vep->tag,
947
                            vep->tag_i,
948
                            i,
949
                            vm,
950
                            vm ? vm->match : "(nil)");
951
952 477
                        if (vm == NULL) {
953 208
                                vep->tag_i += i;
954 208
                                p += i;
955 208
                                assert(p == e);
956
                        } else {
957 269
                                vep->match_hit = vm;
958 269
                                vep->state = *vm->state;
959 269
                                if (vm->match != NULL) {
960 257
                                        i = strlen(vm->match);
961 257
                                        if (i > vep->tag_i)
962 254
                                                p += i - vep->tag_i;
963
                                }
964 269
                                vep->match = NULL;
965 269
                                vep->tag_i = 0;
966
                        }
967 46
                } else if (vep->state == VEP_UNTIL) {
968
                        /*
969
                         * Skip until we see magic string
970
                         */
971 416
                        while (p < e) {
972 338
                                if (*p++ != *vep->until_p++) {
973 292
                                        vep->until_p = vep->until;
974 46
                                } else if (*vep->until_p == '\0') {
975 14
                                        vep->state = vep->until_s;
976 14
                                        break;
977
                                }
978
                        }
979 46
                        if (p == e && !vep->remove)
980 38
                                vep_mark_verbatim(vep, p);
981
                } else {
982
                        Debug("*** Unknown state %s\n", vep->state);
983 0
                        WRONG("WRONG ESI PARSER STATE");
984
                }
985
        }
986
        /*
987
         * We must always mark up the storage we got, try to do so
988
         * in the most efficient way, in particular with respect to
989
         * minimizing and limiting use of pending.
990
         */
991 107408
        if (p == vep->ver_p)
992
                ;
993 53568
        else if (vep->in_esi_tag)
994 585
                vep_mark_skip(vep, p);
995 52983
        else if (vep->remove)
996 52746
                vep_mark_skip(vep, p);
997
        else
998 237
                vep_mark_pending(vep, p);
999 107408
}
1000
1001
/*---------------------------------------------------------------------
1002
 */
1003
1004
static ssize_t v_matchproto_(vep_callback_t)
1005 132065
vep_default_cb(struct vfp_ctx *vc, void *priv, ssize_t l, enum vgz_flag flg)
1006
{
1007
        ssize_t *s;
1008
1009 132065
        CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC);
1010 132065
        AN(priv);
1011 132065
        s = priv;
1012 132065
        *s += l;
1013
        (void)flg;
1014 132065
        return (*s);
1015
}
1016
1017
/*---------------------------------------------------------------------
1018
 */
1019
1020
struct vep_state *
1021 310
VEP_Init(struct vfp_ctx *vc, const struct http *req, vep_callback_t *cb,
1022
    void *cb_priv)
1023
{
1024
        struct vep_state *vep;
1025
1026 310
        CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC);
1027 310
        CHECK_OBJ_NOTNULL(req, HTTP_MAGIC);
1028 310
        vep = WS_Alloc(vc->resp->ws, sizeof *vep);
1029 310
        AN(vep);
1030
1031 310
        INIT_OBJ(vep, VEP_MAGIC);
1032 310
        vep->url = req->hd[HTTP_HDR_URL].b;
1033 310
        vep->vc = vc;
1034 310
        vep->vsb = VSB_new_auto();
1035 310
        AN(vep->vsb);
1036
1037 310
        if (cb != NULL) {
1038 72
                vep->dogzip = 1;
1039
                /* XXX */
1040 72
                VSB_printf(vep->vsb, "%c", VEC_GZ);
1041 72
                vep->cb = cb;
1042 72
                vep->cb_priv = cb_priv;
1043
        } else {
1044 238
                vep->cb = vep_default_cb;
1045 238
                vep->cb_priv = &vep->cb_x;
1046
        }
1047
1048 310
        vep->state = VEP_START;
1049 310
        vep->crc = crc32(0L, Z_NULL, 0);
1050 310
        vep->crcp = crc32(0L, Z_NULL, 0);
1051
1052 310
        vep->startup = 1;
1053 310
        return (vep);
1054
}
1055
1056
/*---------------------------------------------------------------------
1057
 */
1058
1059
struct vsb *
1060 310
VEP_Finish(struct vep_state *vep)
1061
{
1062
        ssize_t l, lcb;
1063
1064 310
        CHECK_OBJ_NOTNULL(vep, VEP_MAGIC);
1065
1066 310
        if (vep->include_src)
1067 0
                VSB_destroy(&vep->include_src);
1068 310
        if (vep->attr_vsb)
1069 0
                VSB_destroy(&vep->attr_vsb);
1070
1071 614
        if (vep->state != VEP_START &&
1072 608
            vep->state != VEP_BOM &&
1073 606
            vep->state != VEP_TESTXML &&
1074 592
            vep->state != VEP_NOTXML &&
1075 290
            vep->state != VEP_NEXTTAG) {
1076 8
                vep_error(vep, "VEP ended inside a tag");
1077
        }
1078
1079 310
        if (vep->o_pending)
1080 0
                vep_mark_common(vep, vep->ver_p, vep->last_mark);
1081 310
        if (vep->o_wait > 0) {
1082 304
                lcb = vep->cb(vep->vc, vep->cb_priv, 0, VGZ_ALIGN);
1083 304
                vep_emit_common(vep, lcb - vep->o_last, vep->last_mark);
1084
        }
1085
        // NB: We don't account for PAD+SUM+LEN in gzip'ed objects
1086 310
        (void)vep->cb(vep->vc, vep->cb_priv, 0, VGZ_FINISH);
1087
1088 310
        AZ(VSB_finish(vep->vsb));
1089 310
        l = VSB_len(vep->vsb);
1090 310
        if (vep->esi_found && l > 0)
1091 178
                return (vep->vsb);
1092 132
        VSB_destroy(&vep->vsb);
1093 132
        return (NULL);
1094
}
1095
1096
#if 0
1097
1098
digraph xml {
1099
        rankdir="LR"
1100
        size="7,10"
1101
#################################################################
1102
# SECTION A
1103
#
1104
1105
START           [shape=ellipse]
1106
TESTXML         [shape=ellipse]
1107
NOTXML          [shape=ellipse]
1108
NEXTTAGa        [shape=hexagon, label="NEXTTAG"]
1109
STARTTAGa       [shape=hexagon, label="STARTTAG"]
1110
START           -> TESTXML
1111
START           -> NEXTTAGa     [style=dotted, label="syntax:1"]
1112
TESTXML         -> TESTXML      [label="lws"]
1113
TESTXML         -> NOTXML
1114
TESTXML         -> STARTTAGa    [label="'<'"]
1115
1116
#################################################################
1117
# SECTION B
1118
1119
NOTMYTAG        [shape=ellipse]
1120
NEXTTAG         [shape=ellipse]
1121
NOTMYTAG        -> NEXTTAG      [style=dotted, label="syntax:2"]
1122
STARTTAGb       [shape=hexagon, label="STARTTAG"]
1123
NOTMYTAG        -> NEXTTAG      [label="'>'"]
1124
NOTMYTAG        -> NOTMYTAG     [label="*"]
1125
NEXTTAG         -> NEXTTAG      [label="'-->'"]
1126
NEXTTAG         -> NEXTTAG      [label="*"]
1127
NEXTTAG         -> STARTTAGb    [label="'<'"]
1128
1129
#################################################################
1130
# SECTION C
1131
1132
STARTTAG        [shape=ellipse]
1133
COMMENT         [shape=ellipse]
1134
CDATA           [shape=ellipse]
1135
ESITAG          [shape=ellipse]
1136
ESIETAG         [shape=ellipse]
1137
ESIINCLUDE      [shape=ellipse]
1138
ESIREMOVE       [shape=ellipse]
1139
ESICOMMENT      [shape=ellipse]
1140
ESIBOGON        [shape=ellipse]
1141
INTAGc          [shape=hexagon, label="INTAG"]
1142
NOTMYTAGc       [shape=hexagon, label="NOTMYTAG"]
1143
NEXTTAGc        [shape=hexagon, label="NEXTTAG"]
1144
TAGERRORc       [shape=hexagon, label="TAGERROR"]
1145
C1              [shape=circle,label=""]
1146
STARTTAG        -> COMMENT      [label="'<!--'"]
1147
STARTTAG        -> ESITAG       [label="'<esi'"]
1148
STARTTAG        -> CDATA        [label="'<![CDATA['"]
1149
STARTTAG        -> NOTMYTAGc    [label="'*'"]
1150
COMMENT         -> NEXTTAGc     [label="'esi'"]
1151
COMMENT         -> C1           [label="*"]
1152
C1              -> C1           [label="*"]
1153
C1              -> NEXTTAGc     [label="-->"]
1154
CDATA           -> CDATA        [label="*"]
1155
CDATA           -> NEXTTAGc     [label="]]>"]
1156
ESITAG          -> ESIINCLUDE   [label="'include'"]
1157
ESITAG          -> ESIREMOVE    [label="'remove'"]
1158
ESITAG          -> ESICOMMENT   [label="'comment'"]
1159
ESITAG          -> ESIBOGON     [label="*"]
1160
ESICOMMENT      -> INTAGc
1161
ESICOMMENT      -> TAGERRORc
1162
ESICOMMENT      -> TAGERRORc    [style=dotted, label="nested\nin\nremove"]
1163
ESIREMOVE       -> INTAGc
1164
ESIREMOVE       -> TAGERRORc
1165
ESIINCLUDE      -> INTAGc
1166
ESIINCLUDE      -> TAGERRORc
1167
ESIINCLUDE      -> TAGERRORc    [style=dotted, label="nested\nin\nremove"]
1168
ESIBOGON        -> TAGERRORc
1169
1170
#################################################################
1171
# SECTION D
1172
1173
INTAG           [shape=ellipse]
1174
TAGERROR        [shape=ellipse]
1175
NEXTTAGd        [shape=hexagon, label="NEXTTAG"]
1176
ATTRd           [shape=hexagon, label="ATTR"]
1177
D1              [shape=circle, label=""]
1178
D2              [shape=circle, label=""]
1179
INTAG           -> D1           [label="lws"]
1180
D1              -> D2           [label="/"]
1181
INTAG           -> D2           [label="/"]
1182
INTAG           -> NEXTTAGd     [label=">"]
1183
D1              -> NEXTTAGd     [label=">"]
1184
D2              -> NEXTTAGd     [label=">"]
1185
D1              -> ATTRd        [label="XMLstartchar"]
1186
D1              -> TAGERROR     [label="*"]
1187
D2              -> TAGERROR     [label="*"]
1188
TAGERROR        -> TAGERROR     [label="*"]
1189
TAGERROR        -> NEXTTAGd     [label="'>'"]
1190
1191
#################################################################
1192
# SECTION E
1193
1194
ATTR            [shape=ellipse]
1195
SKIPATTR        [shape=ellipse]
1196
ATTRGETVAL      [shape=ellipse]
1197
ATTRDELIM       [shape=ellipse]
1198
ATTRVAL         [shape=ellipse]
1199
TAGERRORe       [shape=hexagon, label="TAGERROR"]
1200
INTAGe          [shape=hexagon, label="INTAG"]
1201
ATTR            -> SKIPATTR     [label="*"]
1202
ATTR            -> ATTRGETVAL   [label="wanted attr"]
1203
SKIPATTR        -> SKIPATTR     [label="XMLname"]
1204
SKIPATTR        -> ATTRDELIM    [label="'='"]
1205
SKIPATTR        -> TAGERRORe    [label="*"]
1206
ATTRGETVAL      -> ATTRDELIM
1207
ATTRDELIM       -> ATTRVAL      [label="\""]
1208
ATTRDELIM       -> ATTRVAL      [label="\'"]
1209
ATTRDELIM       -> ATTRVAL      [label="*"]
1210
ATTRDELIM       -> TAGERRORe    [label="lws"]
1211
ATTRVAL         -> TAGERRORe    [label="'>'"]
1212
ATTRVAL         -> INTAGe       [label="delim"]
1213
ATTRVAL         -> ATTRVAL      [label="*"]
1214
1215
}
1216
1217
#endif