| | varnish-cache/bin/varnishd/cache/cache_esi_parse.c |
| 0 |
|
/*- |
| 1 |
|
* Copyright (c) 2011 Varnish Software AS |
| 2 |
|
* All rights reserved. |
| 3 |
|
* |
| 4 |
|
* Author: Poul-Henning Kamp <phk@phk.freebsd.dk> |
| 5 |
|
* |
| 6 |
|
* SPDX-License-Identifier: BSD-2-Clause |
| 7 |
|
* |
| 8 |
|
* Redistribution and use in source and binary forms, with or without |
| 9 |
|
* modification, are permitted provided that the following conditions |
| 10 |
|
* are met: |
| 11 |
|
* 1. Redistributions of source code must retain the above copyright |
| 12 |
|
* notice, this list of conditions and the following disclaimer. |
| 13 |
|
* 2. Redistributions in binary form must reproduce the above copyright |
| 14 |
|
* notice, this list of conditions and the following disclaimer in the |
| 15 |
|
* documentation and/or other materials provided with the distribution. |
| 16 |
|
* |
| 17 |
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
| 18 |
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 19 |
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 20 |
|
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE |
| 21 |
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 22 |
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| 23 |
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 24 |
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 25 |
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| 26 |
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 27 |
|
* SUCH DAMAGE. |
| 28 |
|
* |
| 29 |
|
* VEP Varnish Esi Parsing |
| 30 |
|
*/ |
| 31 |
|
|
| 32 |
|
#include "config.h" |
| 33 |
|
|
| 34 |
|
#include "cache_varnishd.h" |
| 35 |
|
#include "cache_filter.h" |
| 36 |
|
|
| 37 |
|
#include "cache_vgz.h" |
| 38 |
|
#include "cache_esi.h" |
| 39 |
|
#include "vct.h" |
| 40 |
|
#include "vend.h" |
| 41 |
|
#include "vgz.h" |
| 42 |
|
|
| 43 |
|
//#define Debug(fmt, ...) printf(fmt, __VA_ARGS__) |
| 44 |
|
#define Debug(fmt, ...) /**/ |
| 45 |
|
|
| 46 |
|
struct vep_state; |
| 47 |
|
|
| 48 |
|
enum dowhat {DO_ATTR, DO_TAG}; |
| 49 |
|
typedef void dostuff_f(struct vep_state *, enum dowhat); |
| 50 |
|
|
| 51 |
|
struct vep_match { |
| 52 |
|
const char *match; |
| 53 |
|
const char * const *state; |
| 54 |
|
}; |
| 55 |
|
|
| 56 |
|
enum vep_mark { VERBATIM = 0, SKIP }; |
| 57 |
|
|
| 58 |
|
struct vep_state { |
| 59 |
|
unsigned magic; |
| 60 |
|
#define VEP_MAGIC 0x55cb9b82 |
| 61 |
|
struct vsb *vsb; |
| 62 |
|
|
| 63 |
|
const char *url; |
| 64 |
|
struct vfp_ctx *vc; |
| 65 |
|
int dogzip; |
| 66 |
|
vep_callback_t *cb; |
| 67 |
|
void *cb_priv; |
| 68 |
|
|
| 69 |
|
/* Internal Counter for default call-back function */ |
| 70 |
|
ssize_t cb_x; |
| 71 |
|
|
| 72 |
|
/* parser state */ |
| 73 |
|
const char *state; |
| 74 |
|
unsigned startup; |
| 75 |
|
unsigned esi_found; |
| 76 |
|
|
| 77 |
|
unsigned endtag; |
| 78 |
|
unsigned emptytag; |
| 79 |
|
unsigned canattr; |
| 80 |
|
|
| 81 |
|
unsigned remove; |
| 82 |
|
|
| 83 |
|
ssize_t o_wait; |
| 84 |
|
ssize_t o_pending; |
| 85 |
|
ssize_t o_total; |
| 86 |
|
uint32_t crc; |
| 87 |
|
ssize_t o_crc; |
| 88 |
|
uint32_t crcp; |
| 89 |
|
ssize_t o_last; |
| 90 |
|
|
| 91 |
|
const char *hack_p; |
| 92 |
|
const char *ver_p; |
| 93 |
|
|
| 94 |
|
const char *until; |
| 95 |
|
const char *until_p; |
| 96 |
|
const char *until_s; |
| 97 |
|
|
| 98 |
|
int in_esi_tag; |
| 99 |
|
|
| 100 |
|
const char *esicmt; |
| 101 |
|
const char *esicmt_p; |
| 102 |
|
|
| 103 |
|
struct vep_match *attr; |
| 104 |
|
struct vsb *attr_vsb; |
| 105 |
|
int attr_delim; |
| 106 |
|
|
| 107 |
|
struct vep_match *match; |
| 108 |
|
struct vep_match *match_hit; |
| 109 |
|
|
| 110 |
|
char tag[8]; |
| 111 |
|
int tag_i; |
| 112 |
|
|
| 113 |
|
dostuff_f *dostuff; |
| 114 |
|
|
| 115 |
|
struct vsb *include_src; |
| 116 |
|
unsigned include_continue; |
| 117 |
|
|
| 118 |
|
unsigned nm_skip; |
| 119 |
|
unsigned nm_verbatim; |
| 120 |
|
unsigned nm_pending; |
| 121 |
|
enum vep_mark last_mark; |
| 122 |
|
}; |
| 123 |
|
|
| 124 |
|
/*---------------------------------------------------------------------*/ |
| 125 |
|
|
| 126 |
|
static const char * const VEP_START = "[Start]"; |
| 127 |
|
static const char * const VEP_BOM = "[BOM]"; |
| 128 |
|
static const char * const VEP_TESTXML = "[TestXml]"; |
| 129 |
|
static const char * const VEP_NOTXML = "[NotXml]"; |
| 130 |
|
|
| 131 |
|
static const char * const VEP_NEXTTAG = "[NxtTag]"; |
| 132 |
|
static const char * const VEP_NOTMYTAG = "[NotMyTag]"; |
| 133 |
|
|
| 134 |
|
static const char * const VEP_STARTTAG = "[StartTag]"; |
| 135 |
|
static const char * const VEP_COMMENTESI = "[CommentESI]"; |
| 136 |
|
static const char * const VEP_COMMENT = "[Comment]"; |
| 137 |
|
static const char * const VEP_CDATA = "[CDATA]"; |
| 138 |
|
static const char * const VEP_ESITAG = "[ESITag]"; |
| 139 |
|
static const char * const VEP_ESIENDTAG = "[/ESITag]"; |
| 140 |
|
|
| 141 |
|
static const char * const VEP_ESIREMOVE = "[ESI:Remove]"; |
| 142 |
|
static const char * const VEP_ESIINCLUDE = "[ESI:Include]"; |
| 143 |
|
static const char * const VEP_ESICOMMENT = "[ESI:Comment]"; |
| 144 |
|
static const char * const VEP_ESIBOGON = "[ESI:Bogon]"; |
| 145 |
|
|
| 146 |
|
static const char * const VEP_INTAG = "[InTag]"; |
| 147 |
|
static const char * const VEP_TAGERROR = "[TagError]"; |
| 148 |
|
|
| 149 |
|
static const char * const VEP_ATTR = "[Attribute]"; |
| 150 |
|
static const char * const VEP_SKIPATTR = "[SkipAttribute]"; |
| 151 |
|
static const char * const VEP_ATTRDELIM = "[AttrDelim]"; |
| 152 |
|
static const char * const VEP_ATTRGETVAL = "[AttrGetValue]"; |
| 153 |
|
static const char * const VEP_ATTRVAL = "[AttrValue]"; |
| 154 |
|
|
| 155 |
|
static const char * const VEP_UNTIL = "[Until]"; |
| 156 |
|
static const char * const VEP_MATCHBUF = "[MatchBuf]"; |
| 157 |
|
static const char * const VEP_MATCH = "[Match]"; |
| 158 |
|
|
| 159 |
|
/*---------------------------------------------------------------------*/ |
| 160 |
|
|
| 161 |
|
static struct vep_match vep_match_starttag[] = { |
| 162 |
|
{ "!--esi", &VEP_COMMENTESI }, |
| 163 |
|
{ "!---->", &VEP_NEXTTAG }, |
| 164 |
|
{ "!--", &VEP_COMMENT }, |
| 165 |
|
{ "/esi:", &VEP_ESIENDTAG }, |
| 166 |
|
{ "esi:", &VEP_ESITAG }, |
| 167 |
|
{ "![CDATA[", &VEP_CDATA }, |
| 168 |
|
{ NULL, &VEP_NOTMYTAG } |
| 169 |
|
}; |
| 170 |
|
|
| 171 |
|
/*---------------------------------------------------------------------*/ |
| 172 |
|
|
| 173 |
|
static struct vep_match vep_match_esi[] = { |
| 174 |
|
{ "include", &VEP_ESIINCLUDE }, |
| 175 |
|
{ "remove", &VEP_ESIREMOVE }, |
| 176 |
|
{ "comment", &VEP_ESICOMMENT }, |
| 177 |
|
{ NULL, &VEP_ESIBOGON } |
| 178 |
|
}; |
| 179 |
|
|
| 180 |
|
/*---------------------------------------------------------------------*/ |
| 181 |
|
|
| 182 |
|
static struct vep_match vep_match_attr_include[] = { |
| 183 |
|
{ "src=", &VEP_ATTRGETVAL }, |
| 184 |
|
{ "onerror=", &VEP_ATTRGETVAL }, |
| 185 |
|
{ NULL, &VEP_SKIPATTR } |
| 186 |
|
}; |
| 187 |
|
|
| 188 |
|
/*---------------------------------------------------------------------*/ |
| 189 |
|
|
| 190 |
|
static struct vep_match vep_match_bom[] = { |
| 191 |
|
{ "\xeb\xbb\xbf", &VEP_START }, |
| 192 |
|
{ NULL, &VEP_BOM } |
| 193 |
|
}; |
| 194 |
|
|
| 195 |
|
/*-------------------------------------------------------------------- |
| 196 |
|
* Report a parsing error |
| 197 |
|
*/ |
| 198 |
|
|
| 199 |
|
static void |
| 200 |
1425 |
vep_error(const struct vep_state *vep, const char *p) |
| 201 |
|
{ |
| 202 |
1425 |
VSC_C_main->esi_errors++; |
| 203 |
2850 |
VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror, "ERR: after %zd %s", |
| 204 |
1425 |
vep->o_last, p); |
| 205 |
1425 |
} |
| 206 |
|
|
| 207 |
|
/*-------------------------------------------------------------------- |
| 208 |
|
* Report a parsing warning |
| 209 |
|
*/ |
| 210 |
|
|
| 211 |
|
static void |
| 212 |
200 |
vep_warn(const struct vep_state *vep, const char *p) |
| 213 |
|
{ |
| 214 |
200 |
VSC_C_main->esi_warnings++; |
| 215 |
400 |
VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror, "WARN: after %zd %s", |
| 216 |
200 |
vep->o_last, p); |
| 217 |
200 |
} |
| 218 |
|
|
| 219 |
|
/*--------------------------------------------------------------------- |
| 220 |
|
* return match or NULL if more input needed. |
| 221 |
|
*/ |
| 222 |
|
|
| 223 |
|
static struct vep_match * |
| 224 |
492766 |
vep_match(const struct vep_state *vep, const char *b, const char *e) |
| 225 |
|
{ |
| 226 |
|
struct vep_match *vm; |
| 227 |
|
const char *q, *r; |
| 228 |
|
|
| 229 |
492766 |
AN(vep->match); |
| 230 |
3204734 |
for (vm = vep->match; vm->match != NULL; vm++) { |
| 231 |
2767160 |
assert(strlen(vm->match) <= sizeof (vep->tag)); |
| 232 |
2767160 |
r = b; |
| 233 |
3032293 |
for (q = vm->match; *q != '\0' && r < e; q++, r++) |
| 234 |
2977101 |
if (*q != *r) |
| 235 |
2711968 |
break; |
| 236 |
2767160 |
if (*q == '\0') |
| 237 |
45560 |
break; |
| 238 |
2721600 |
if (r == e) |
| 239 |
9632 |
return (NULL); |
| 240 |
2711968 |
} |
| 241 |
483134 |
return (vm); |
| 242 |
492766 |
} |
| 243 |
|
|
| 244 |
|
/*--------------------------------------------------------------------- |
| 245 |
|
* |
| 246 |
|
*/ |
| 247 |
|
|
| 248 |
|
static void |
| 249 |
39160 |
vep_emit_len(const struct vep_state *vep, ssize_t l, int m8, int m16, int m64) |
| 250 |
|
{ |
| 251 |
|
uint8_t buf[9]; |
| 252 |
|
|
| 253 |
39160 |
assert(l > 0); |
| 254 |
39160 |
if (l < 256) { |
| 255 |
38480 |
buf[0] = (uint8_t)m8; |
| 256 |
38480 |
buf[1] = (uint8_t)l; |
| 257 |
38480 |
assert((ssize_t)buf[1] == l); |
| 258 |
38480 |
VSB_bcat(vep->vsb, buf, 2); |
| 259 |
39160 |
} else if (l < 65536) { |
| 260 |
440 |
buf[0] = (uint8_t)m16; |
| 261 |
440 |
vbe16enc(buf + 1, (uint16_t)l); |
| 262 |
440 |
assert((ssize_t)vbe16dec(buf + 1) == l); |
| 263 |
440 |
VSB_bcat(vep->vsb, buf, 3); |
| 264 |
440 |
} else { |
| 265 |
240 |
buf[0] = (uint8_t)m64; |
| 266 |
240 |
vbe64enc(buf + 1, l); |
| 267 |
240 |
assert((ssize_t)vbe64dec(buf + 1) == l); |
| 268 |
240 |
VSB_bcat(vep->vsb, buf, 9); |
| 269 |
|
} |
| 270 |
39160 |
} |
| 271 |
|
|
| 272 |
|
static void |
| 273 |
17800 |
vep_emit_skip(const struct vep_state *vep, ssize_t l) |
| 274 |
|
{ |
| 275 |
|
|
| 276 |
17800 |
vep_emit_len(vep, l, VEC_S1, VEC_S2, VEC_S8); |
| 277 |
17800 |
} |
| 278 |
|
|
| 279 |
|
static void |
| 280 |
17320 |
vep_emit_verbatim(const struct vep_state *vep, ssize_t l, ssize_t l_crc) |
| 281 |
|
{ |
| 282 |
|
uint8_t buf[4]; |
| 283 |
|
|
| 284 |
17320 |
vep_emit_len(vep, l, VEC_V1, VEC_V2, VEC_V8); |
| 285 |
17320 |
if (vep->dogzip) { |
| 286 |
4040 |
vep_emit_len(vep, l_crc, VEC_C1, VEC_C2, VEC_C8); |
| 287 |
4040 |
vbe32enc(buf, vep->crc); |
| 288 |
4040 |
VSB_bcat(vep->vsb, buf, sizeof buf); |
| 289 |
4040 |
} |
| 290 |
17320 |
} |
| 291 |
|
|
| 292 |
|
static void |
| 293 |
40720 |
vep_emit_common(struct vep_state *vep, ssize_t l, enum vep_mark mark) |
| 294 |
|
{ |
| 295 |
|
|
| 296 |
40720 |
assert(l >= 0); |
| 297 |
40720 |
if (l == 0) |
| 298 |
5600 |
return; |
| 299 |
35120 |
assert(mark == SKIP || mark == VERBATIM); |
| 300 |
35120 |
if (mark == SKIP) |
| 301 |
17800 |
vep_emit_skip(vep, l); |
| 302 |
|
else |
| 303 |
17320 |
vep_emit_verbatim(vep, l, vep->o_crc); |
| 304 |
|
|
| 305 |
35120 |
vep->crc = crc32(0L, Z_NULL, 0); |
| 306 |
35120 |
vep->o_crc = 0; |
| 307 |
35120 |
vep->o_total += l; |
| 308 |
40720 |
} |
| 309 |
|
|
| 310 |
|
/*--------------------------------------------------------------------- |
| 311 |
|
* |
| 312 |
|
*/ |
| 313 |
|
|
| 314 |
|
static void |
| 315 |
2652314 |
vep_mark_common(struct vep_state *vep, const char *p, enum vep_mark mark) |
| 316 |
|
{ |
| 317 |
|
ssize_t l, lcb; |
| 318 |
|
|
| 319 |
2652314 |
assert(mark == SKIP || mark == VERBATIM); |
| 320 |
|
|
| 321 |
|
/* The NO-OP case, no data, no pending data & no change of mode */ |
| 322 |
2652314 |
if (vep->last_mark == mark && p == vep->ver_p && vep->o_pending == 0) |
| 323 |
1720 |
return; |
| 324 |
|
|
| 325 |
|
/* |
| 326 |
|
* If we changed mode, emit whatever the opposite mode |
| 327 |
|
* assembled before the pending bytes. |
| 328 |
|
*/ |
| 329 |
|
|
| 330 |
2650594 |
if (vep->last_mark != mark && (vep->o_wait > 0 || vep->startup)) { |
| 331 |
64400 |
lcb = vep->cb(vep->vc, vep->cb_priv, 0, |
| 332 |
32200 |
mark == VERBATIM ? VGZ_RESET : VGZ_ALIGN); |
| 333 |
32200 |
vep_emit_common(vep, lcb - vep->o_last, vep->last_mark); |
| 334 |
32200 |
vep->o_last = lcb; |
| 335 |
32200 |
vep->o_wait = 0; |
| 336 |
32200 |
} |
| 337 |
|
|
| 338 |
|
/* Transfer pending bytes CRC into active mode CRC */ |
| 339 |
2650594 |
if (vep->o_pending) { |
| 340 |
2699 |
(void)vep->cb(vep->vc, vep->cb_priv, vep->o_pending, |
| 341 |
|
VGZ_NORMAL); |
| 342 |
2699 |
if (vep->o_crc == 0) { |
| 343 |
2089 |
vep->crc = vep->crcp; |
| 344 |
2089 |
vep->o_crc = vep->o_pending; |
| 345 |
2089 |
} else { |
| 346 |
1220 |
vep->crc = crc32_combine(vep->crc, |
| 347 |
610 |
vep->crcp, vep->o_pending); |
| 348 |
610 |
vep->o_crc += vep->o_pending; |
| 349 |
|
} |
| 350 |
2699 |
vep->crcp = crc32(0L, Z_NULL, 0); |
| 351 |
2699 |
vep->o_wait += vep->o_pending; |
| 352 |
2699 |
vep->o_pending = 0; |
| 353 |
2699 |
} |
| 354 |
|
|
| 355 |
|
/* * Process this bit of input */ |
| 356 |
2650594 |
AN(vep->ver_p); |
| 357 |
2650594 |
l = p - vep->ver_p; |
| 358 |
2650594 |
assert(l >= 0); |
| 359 |
2650594 |
vep->crc = crc32(vep->crc, (const void*)vep->ver_p, l); |
| 360 |
2650594 |
vep->o_crc += l; |
| 361 |
2650594 |
vep->ver_p = p; |
| 362 |
|
|
| 363 |
2650594 |
vep->o_wait += l; |
| 364 |
2650594 |
vep->last_mark = mark; |
| 365 |
2650594 |
(void)vep->cb(vep->vc, vep->cb_priv, l, VGZ_NORMAL); |
| 366 |
2652314 |
} |
| 367 |
|
|
| 368 |
|
static void |
| 369 |
1529797 |
vep_mark_verbatim(struct vep_state *vep, const char *p) |
| 370 |
|
{ |
| 371 |
|
|
| 372 |
1529797 |
vep_mark_common(vep, p, VERBATIM); |
| 373 |
1529797 |
vep->nm_verbatim++; |
| 374 |
1529797 |
} |
| 375 |
|
|
| 376 |
|
static void |
| 377 |
1113997 |
vep_mark_skip(struct vep_state *vep, const char *p) |
| 378 |
|
{ |
| 379 |
|
|
| 380 |
1113997 |
vep_mark_common(vep, p, SKIP); |
| 381 |
1113997 |
vep->nm_skip++; |
| 382 |
1113997 |
} |
| 383 |
|
|
| 384 |
|
static void |
| 385 |
5035 |
vep_mark_pending(struct vep_state *vep, const char *p) |
| 386 |
|
{ |
| 387 |
|
ssize_t l; |
| 388 |
|
|
| 389 |
5035 |
AN(vep->ver_p); |
| 390 |
5035 |
l = p - vep->ver_p; |
| 391 |
5035 |
assert(l > 0); |
| 392 |
5035 |
vep->crcp = crc32(vep->crcp, (const void *)vep->ver_p, l); |
| 393 |
5035 |
vep->ver_p = p; |
| 394 |
|
|
| 395 |
5035 |
vep->o_pending += l; |
| 396 |
5035 |
vep->nm_pending++; |
| 397 |
5035 |
} |
| 398 |
|
|
| 399 |
|
/*--------------------------------------------------------------------- |
| 400 |
|
*/ |
| 401 |
|
|
| 402 |
|
static void v_matchproto_() |
| 403 |
240 |
vep_do_comment(struct vep_state *vep, enum dowhat what) |
| 404 |
|
{ |
| 405 |
|
Debug("DO_COMMENT(%d)\n", what); |
| 406 |
240 |
assert(what == DO_TAG); |
| 407 |
240 |
if (!vep->emptytag) |
| 408 |
80 |
vep_error(vep, "ESI 1.0 <esi:comment> needs final '/'"); |
| 409 |
240 |
} |
| 410 |
|
|
| 411 |
|
/*--------------------------------------------------------------------- |
| 412 |
|
*/ |
| 413 |
|
|
| 414 |
|
static void v_matchproto_() |
| 415 |
5680 |
vep_do_remove(struct vep_state *vep, enum dowhat what) |
| 416 |
|
{ |
| 417 |
|
Debug("DO_REMOVE(%d, end %d empty %d remove %d)\n", |
| 418 |
|
what, vep->endtag, vep->emptytag, vep->remove); |
| 419 |
5680 |
assert(what == DO_TAG); |
| 420 |
5680 |
if (vep->emptytag) |
| 421 |
40 |
vep_error(vep, "ESI 1.0 <esi:remove/> not legal"); |
| 422 |
5640 |
else if (vep->remove && !vep->endtag) |
| 423 |
40 |
vep_error(vep, "ESI 1.0 <esi:remove> already open"); |
| 424 |
5600 |
else if (!vep->remove && vep->endtag) |
| 425 |
40 |
vep_error(vep, "ESI 1.0 <esi:remove> not open"); |
| 426 |
|
else |
| 427 |
5560 |
vep->remove = !vep->endtag; |
| 428 |
5680 |
} |
| 429 |
|
|
| 430 |
|
/*--------------------------------------------------------------------- |
| 431 |
|
*/ |
| 432 |
|
|
| 433 |
|
static void |
| 434 |
10280 |
include_attr_src(struct vep_state *vep) |
| 435 |
|
{ |
| 436 |
|
const char *p; |
| 437 |
|
|
| 438 |
10280 |
if (vep->include_src != NULL) { |
| 439 |
40 |
vep_error(vep, |
| 440 |
|
"ESI 1.0 <esi:include> " |
| 441 |
|
"has multiple src= attributes"); |
| 442 |
40 |
vep->state = VEP_TAGERROR; |
| 443 |
40 |
VSB_destroy(&vep->attr_vsb); |
| 444 |
40 |
VSB_destroy(&vep->include_src); |
| 445 |
40 |
return; |
| 446 |
|
} |
| 447 |
369680 |
for (p = VSB_data(vep->attr_vsb); *p != '\0'; p++) |
| 448 |
359480 |
if (vct_islws(*p)) |
| 449 |
40 |
break; |
| 450 |
10240 |
if (*p != '\0') { |
| 451 |
40 |
vep_error(vep, |
| 452 |
|
"ESI 1.0 <esi:include> " |
| 453 |
|
"has whitespace in src= attribute"); |
| 454 |
40 |
vep->state = VEP_TAGERROR; |
| 455 |
40 |
VSB_destroy(&vep->attr_vsb); |
| 456 |
40 |
if (vep->include_src != NULL) |
| 457 |
0 |
VSB_destroy(&vep->include_src); |
| 458 |
40 |
return; |
| 459 |
|
} |
| 460 |
10200 |
vep->include_src = vep->attr_vsb; |
| 461 |
10200 |
vep->attr_vsb = NULL; |
| 462 |
10280 |
} |
| 463 |
|
|
| 464 |
|
static void |
| 465 |
280 |
include_attr_onerror(struct vep_state *vep) |
| 466 |
|
{ |
| 467 |
|
|
| 468 |
280 |
vep->include_continue = !strcmp("continue", VSB_data(vep->attr_vsb)); |
| 469 |
280 |
VSB_destroy(&vep->attr_vsb); |
| 470 |
280 |
} |
| 471 |
|
|
| 472 |
|
static void v_matchproto_() |
| 473 |
20800 |
vep_do_include(struct vep_state *vep, enum dowhat what) |
| 474 |
|
{ |
| 475 |
|
const char *p, *q, *h; |
| 476 |
|
ssize_t l; |
| 477 |
|
char incl; |
| 478 |
|
|
| 479 |
|
Debug("DO_INCLUDE(%d)\n", what); |
| 480 |
20800 |
if (what == DO_ATTR) { |
| 481 |
|
Debug("ATTR (%s) (%s)\n", vep->match_hit->match, |
| 482 |
|
VSB_data(vep->attr_vsb)); |
| 483 |
10560 |
if (!strcmp("src=", vep->match_hit->match)) { |
| 484 |
10280 |
include_attr_src(vep); |
| 485 |
10280 |
return; |
| 486 |
|
} |
| 487 |
280 |
if (!strcmp("onerror=", vep->match_hit->match)) { |
| 488 |
280 |
include_attr_onerror(vep); |
| 489 |
280 |
return; |
| 490 |
|
} |
| 491 |
0 |
WRONG("Unhandled <esi:include> attribute"); |
| 492 |
0 |
} |
| 493 |
10240 |
assert(what == DO_TAG); |
| 494 |
10240 |
if (!vep->emptytag) |
| 495 |
80 |
vep_warn(vep, "ESI 1.0 <esi:include> lacks final '/'"); |
| 496 |
10240 |
if (vep->include_src == NULL) { |
| 497 |
80 |
vep_error(vep, "ESI 1.0 <esi:include> lacks src attr"); |
| 498 |
80 |
return; |
| 499 |
|
} |
| 500 |
|
|
| 501 |
|
/* |
| 502 |
|
* Strictly speaking, we ought to spit out any piled up skip before |
| 503 |
|
* emitting the VEC for the include, but objectively that makes no |
| 504 |
|
* difference and robs us of a chance to collapse another skip into |
| 505 |
|
* this on so we don't do that. |
| 506 |
|
* However, we cannot tolerate any verbatim stuff piling up. |
| 507 |
|
* The mark_skip() before calling dostuff should have taken |
| 508 |
|
* care of that. Make sure. |
| 509 |
|
*/ |
| 510 |
10160 |
assert(vep->o_wait == 0 || vep->last_mark == SKIP); |
| 511 |
|
/* XXX: what if it contains NUL bytes ?? */ |
| 512 |
10160 |
p = VSB_data(vep->include_src); |
| 513 |
10160 |
l = VSB_len(vep->include_src); |
| 514 |
10160 |
h = 0; |
| 515 |
|
|
| 516 |
10160 |
incl = vep->include_continue ? VEC_IC : VEC_IA; |
| 517 |
|
|
| 518 |
10160 |
if (l > 7 && !memcmp(p, "http://", 7)) { |
| 519 |
80 |
h = p + 7; |
| 520 |
80 |
p = strchr(h, '/'); |
| 521 |
80 |
if (p == NULL) { |
| 522 |
40 |
vep_error(vep, |
| 523 |
|
"ESI 1.0 <esi:include> invalid src= URL"); |
| 524 |
40 |
vep->state = VEP_TAGERROR; |
| 525 |
40 |
AZ(vep->attr_vsb); |
| 526 |
40 |
VSB_destroy(&vep->include_src); |
| 527 |
40 |
return; |
| 528 |
|
} |
| 529 |
|
Debug("HOST <%.*s> PATH <%s>\n", (int)(p-h),h, p); |
| 530 |
40 |
VSB_printf(vep->vsb, "%c", incl); |
| 531 |
40 |
VSB_printf(vep->vsb, "Host: %.*s%c", (int)(p-h), h, 0); |
| 532 |
10120 |
} else if (l > 8 && !memcmp(p, "https://", 8)) { |
| 533 |
120 |
if (!FEATURE(FEATURE_ESI_IGNORE_HTTPS)) { |
| 534 |
40 |
vep_warn(vep, |
| 535 |
|
"ESI 1.0 <esi:include> with https:// ignored"); |
| 536 |
40 |
vep->state = VEP_TAGERROR; |
| 537 |
40 |
AZ(vep->attr_vsb); |
| 538 |
40 |
VSB_destroy(&vep->include_src); |
| 539 |
40 |
return; |
| 540 |
|
} |
| 541 |
80 |
vep_warn(vep, |
| 542 |
|
"ESI 1.0 <esi:include> https:// treated as http://"); |
| 543 |
80 |
h = p + 8; |
| 544 |
80 |
p = strchr(h, '/'); |
| 545 |
80 |
if (p == NULL) { |
| 546 |
40 |
vep_error(vep, |
| 547 |
|
"ESI 1.0 <esi:include> invalid src= URL"); |
| 548 |
40 |
vep->state = VEP_TAGERROR; |
| 549 |
40 |
AZ(vep->attr_vsb); |
| 550 |
40 |
VSB_destroy(&vep->include_src); |
| 551 |
40 |
return; |
| 552 |
|
} |
| 553 |
40 |
VSB_printf(vep->vsb, "%c", incl); |
| 554 |
40 |
VSB_printf(vep->vsb, "Host: %.*s%c", (int)(p-h), h, 0); |
| 555 |
10000 |
} else if (*p == '/') { |
| 556 |
8760 |
VSB_printf(vep->vsb, "%c", incl); |
| 557 |
8760 |
VSB_printf(vep->vsb, "%c", 0); |
| 558 |
8760 |
} else { |
| 559 |
1200 |
VSB_printf(vep->vsb, "%c", incl); |
| 560 |
1200 |
VSB_printf(vep->vsb, "%c", 0); |
| 561 |
|
/* Look for the last / before a '?' */ |
| 562 |
1200 |
h = NULL; |
| 563 |
3600 |
for (q = vep->url; *q && *q != '?'; q++) |
| 564 |
3720 |
if (*q == '/') |
| 565 |
1320 |
h = q; |
| 566 |
1200 |
if (h == NULL) |
| 567 |
40 |
h = q + 1; |
| 568 |
|
|
| 569 |
|
Debug("INCL:: [%.*s]/[%s]\n", |
| 570 |
|
(int)(h - vep->url), vep->url, p); |
| 571 |
1200 |
VSB_printf(vep->vsb, "%.*s/", (int)(h - vep->url), vep->url); |
| 572 |
|
} |
| 573 |
10040 |
l -= (p - VSB_data(vep->include_src)); |
| 574 |
364960 |
for (q = p; *q != '\0'; ) { |
| 575 |
354920 |
if (*q == '&') { |
| 576 |
|
#define R(w,f,r) \ |
| 577 |
|
if (q + w <= p + l && !memcmp(q, f, w)) { \ |
| 578 |
|
VSB_printf(vep->vsb, "%c", r); \ |
| 579 |
|
q += w; \ |
| 580 |
|
continue; \ |
| 581 |
|
} |
| 582 |
400 |
R(6, "'", '\''); |
| 583 |
320 |
R(6, """, '"'); |
| 584 |
240 |
R(4, "<", '<'); |
| 585 |
160 |
R(4, ">", '>'); |
| 586 |
80 |
R(5, "&", '&'); |
| 587 |
0 |
} |
| 588 |
354520 |
VSB_printf(vep->vsb, "%c", *q++); |
| 589 |
|
} |
| 590 |
|
#undef R |
| 591 |
10040 |
VSB_printf(vep->vsb, "%c", 0); |
| 592 |
10040 |
VSB_destroy(&vep->include_src); |
| 593 |
10040 |
vep->include_continue = 0; |
| 594 |
20800 |
} |
| 595 |
|
|
| 596 |
|
/*--------------------------------------------------------------------- |
| 597 |
|
* Lex/Parse object for ESI instructions |
| 598 |
|
* |
| 599 |
|
* This function is called with the input object piecemeal so do not |
| 600 |
|
* assume that we have more than one char available at at time, but |
| 601 |
|
* optimize for getting huge chunks. |
| 602 |
|
* |
| 603 |
|
* NB: At the bottom of this source-file, there is a dot-diagram matching |
| 604 |
|
* NB: the state-machine. Please maintain it along with the code. |
| 605 |
|
*/ |
| 606 |
|
|
| 607 |
|
void |
| 608 |
2154839 |
VEP_Parse(struct vep_state *vep, const char *p, size_t l) |
| 609 |
|
{ |
| 610 |
|
const char *e; |
| 611 |
|
struct vep_match *vm; |
| 612 |
|
int i; |
| 613 |
|
|
| 614 |
2154839 |
CHECK_OBJ_NOTNULL(vep, VEP_MAGIC); |
| 615 |
2154839 |
assert(l > 0); |
| 616 |
|
|
| 617 |
2154839 |
if (vep->startup) { |
| 618 |
|
/* |
| 619 |
|
* We must force the GZIP header out as a SKIP string, |
| 620 |
|
* otherwise an object starting with <esi:include would |
| 621 |
|
* have its GZIP header appear after the included object |
| 622 |
|
* (e000026.vtc) |
| 623 |
|
*/ |
| 624 |
8520 |
vep->ver_p = ""; |
| 625 |
8520 |
vep->last_mark = SKIP; |
| 626 |
8520 |
vep_mark_common(vep, vep->ver_p, VERBATIM); |
| 627 |
8520 |
vep->startup = 0; |
| 628 |
8520 |
AZ(vep->hack_p); |
| 629 |
8520 |
vep->hack_p = p; |
| 630 |
8520 |
} |
| 631 |
|
|
| 632 |
2154839 |
vep->ver_p = p; |
| 633 |
|
|
| 634 |
2154839 |
e = p + l; |
| 635 |
|
|
| 636 |
6258271 |
while (p < e) { |
| 637 |
4103432 |
AN(vep->state); |
| 638 |
|
Debug("EP %s %d (%.*s) [%.*s]\n", |
| 639 |
|
vep->state, |
| 640 |
|
vep->remove, |
| 641 |
|
vep->tag_i, vep->tag, |
| 642 |
|
(e - p) > 10 ? 10 : (int)(e-p), p); |
| 643 |
4103432 |
assert(p >= vep->ver_p); |
| 644 |
|
|
| 645 |
|
/****************************************************** |
| 646 |
|
* SECTION A |
| 647 |
|
*/ |
| 648 |
|
|
| 649 |
4103432 |
if (vep->state == VEP_START) { |
| 650 |
8640 |
if (FEATURE(FEATURE_ESI_REMOVE_BOM) && |
| 651 |
240 |
*p == (char)0xeb) { |
| 652 |
160 |
vep->match = vep_match_bom; |
| 653 |
160 |
vep->state = VEP_MATCH; |
| 654 |
160 |
} else |
| 655 |
8480 |
vep->state = VEP_BOM; |
| 656 |
4103432 |
} else if (vep->state == VEP_BOM) { |
| 657 |
8520 |
vep_mark_skip(vep, p); |
| 658 |
8520 |
if (FEATURE(FEATURE_ESI_DISABLE_XML_CHECK)) |
| 659 |
2160 |
vep->state = VEP_NEXTTAG; |
| 660 |
|
else |
| 661 |
6360 |
vep->state = VEP_TESTXML; |
| 662 |
4094792 |
} else if (vep->state == VEP_TESTXML) { |
| 663 |
|
/* |
| 664 |
|
* If the first non-whitespace char is different |
| 665 |
|
* from '<' we assume this is not XML. |
| 666 |
|
*/ |
| 667 |
14180 |
while (p < e && vct_islws(*p)) |
| 668 |
7520 |
p++; |
| 669 |
6660 |
vep_mark_verbatim(vep, p); |
| 670 |
6660 |
if (p < e && *p == '<') { |
| 671 |
5800 |
p++; |
| 672 |
5800 |
vep->state = VEP_STARTTAG; |
| 673 |
6660 |
} else if (p < e && *p == (char)0xeb) { |
| 674 |
80 |
VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror, |
| 675 |
|
"WARN: No ESI processing, " |
| 676 |
|
"first char not '<' but BOM." |
| 677 |
|
" (See feature esi_remove_bom)" |
| 678 |
|
); |
| 679 |
80 |
vep->state = VEP_NOTXML; |
| 680 |
860 |
} else if (p < e) { |
| 681 |
440 |
VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror, |
| 682 |
|
"WARN: No ESI processing, " |
| 683 |
|
"first char not '<'." |
| 684 |
|
" (See feature esi_disable_xml_check)" |
| 685 |
|
); |
| 686 |
440 |
vep->state = VEP_NOTXML; |
| 687 |
440 |
} |
| 688 |
4086272 |
} else if (vep->state == VEP_NOTXML) { |
| 689 |
|
/* |
| 690 |
|
* This is not recognized as XML, just skip thru |
| 691 |
|
* vfp_esi_end() will handle the rest |
| 692 |
|
*/ |
| 693 |
520 |
p = e; |
| 694 |
520 |
vep_mark_verbatim(vep, p); |
| 695 |
|
|
| 696 |
|
/****************************************************** |
| 697 |
|
* SECTION B |
| 698 |
|
*/ |
| 699 |
|
|
| 700 |
4079612 |
} else if (vep->state == VEP_NOTMYTAG) { |
| 701 |
438728 |
if (FEATURE(FEATURE_ESI_IGNORE_OTHER_ELEMENTS)) { |
| 702 |
80 |
p++; |
| 703 |
80 |
vep->state = VEP_NEXTTAG; |
| 704 |
80 |
} else { |
| 705 |
438648 |
vep->tag_i = 0; |
| 706 |
1856952 |
while (p < e) { |
| 707 |
1855373 |
if (*p++ == '>') { |
| 708 |
437069 |
vep->state = VEP_NEXTTAG; |
| 709 |
437069 |
break; |
| 710 |
|
} |
| 711 |
|
} |
| 712 |
|
} |
| 713 |
438728 |
if (p == e && !vep->remove) |
| 714 |
4058 |
vep_mark_verbatim(vep, p); |
| 715 |
4079092 |
} else if (vep->state == VEP_NEXTTAG) { |
| 716 |
|
/* |
| 717 |
|
* Hunt for start of next tag and keep an eye |
| 718 |
|
* out for end of EsiCmt if armed. |
| 719 |
|
*/ |
| 720 |
2577977 |
vep->emptytag = 0; |
| 721 |
2577977 |
vep->attr = NULL; |
| 722 |
2577977 |
vep->dostuff = NULL; |
| 723 |
89083359 |
while (p < e && *p != '<') { |
| 724 |
86505382 |
if (vep->esicmt_p == NULL) { |
| 725 |
86500742 |
p++; |
| 726 |
86500742 |
continue; |
| 727 |
|
} |
| 728 |
4640 |
if (*p != *vep->esicmt_p) { |
| 729 |
2640 |
p++; |
| 730 |
2640 |
vep->esicmt_p = vep->esicmt; |
| 731 |
2640 |
continue; |
| 732 |
|
} |
| 733 |
2000 |
if (!vep->remove && vep->esicmt_p == vep->esicmt) |
| 734 |
480 |
vep_mark_verbatim(vep, p); |
| 735 |
2000 |
p++; |
| 736 |
2000 |
if (*++vep->esicmt_p == '\0') { |
| 737 |
640 |
vep->esi_found = 1; |
| 738 |
640 |
vep->esicmt = NULL; |
| 739 |
640 |
vep->esicmt_p = NULL; |
| 740 |
|
/* |
| 741 |
|
* The end of the esicmt |
| 742 |
|
* should not be emitted. |
| 743 |
|
* But the stuff before should |
| 744 |
|
*/ |
| 745 |
640 |
vep_mark_skip(vep, p); |
| 746 |
640 |
} |
| 747 |
|
} |
| 748 |
2577977 |
if (p < e) { |
| 749 |
449374 |
if (!vep->remove) |
| 750 |
446054 |
vep_mark_verbatim(vep, p); |
| 751 |
449374 |
assert(*p == '<'); |
| 752 |
449374 |
p++; |
| 753 |
449374 |
vep->state = VEP_STARTTAG; |
| 754 |
2577977 |
} else if (vep->esicmt_p == vep->esicmt && !vep->remove) |
| 755 |
1071292 |
vep_mark_verbatim(vep, p); |
| 756 |
|
|
| 757 |
|
/****************************************************** |
| 758 |
|
* SECTION C |
| 759 |
|
*/ |
| 760 |
|
|
| 761 |
3640364 |
} else if (vep->state == VEP_STARTTAG) { |
| 762 |
|
/* Start of tag, set up match table */ |
| 763 |
455174 |
vep->endtag = 0; |
| 764 |
455174 |
vep->match = vep_match_starttag; |
| 765 |
455174 |
vep->state = VEP_MATCH; |
| 766 |
1062387 |
} else if (vep->state == VEP_COMMENT) { |
| 767 |
200 |
vep->esicmt_p = vep->esicmt = NULL; |
| 768 |
200 |
vep->until_p = vep->until = "-->"; |
| 769 |
200 |
vep->until_s = VEP_NEXTTAG; |
| 770 |
200 |
vep->state = VEP_UNTIL; |
| 771 |
607213 |
} else if (vep->state == VEP_COMMENTESI) { |
| 772 |
680 |
if (vep->remove) |
| 773 |
200 |
vep_error(vep, |
| 774 |
|
"ESI 1.0 Nested <!--esi" |
| 775 |
|
" element in <esi:remove>"); |
| 776 |
680 |
vep->esicmt_p = vep->esicmt = "-->"; |
| 777 |
680 |
vep->state = VEP_NEXTTAG; |
| 778 |
680 |
vep_mark_skip(vep, p); |
| 779 |
607013 |
} else if (vep->state == VEP_CDATA) { |
| 780 |
|
/* |
| 781 |
|
* Easy: just look for the end of CDATA |
| 782 |
|
*/ |
| 783 |
160 |
vep->until_p = vep->until = "]]>"; |
| 784 |
160 |
vep->until_s = VEP_NEXTTAG; |
| 785 |
160 |
vep->state = VEP_UNTIL; |
| 786 |
606333 |
} else if (vep->state == VEP_ESIENDTAG) { |
| 787 |
2920 |
vep->endtag = 1; |
| 788 |
2920 |
vep->state = VEP_ESITAG; |
| 789 |
606173 |
} else if (vep->state == VEP_ESITAG) { |
| 790 |
16880 |
vep->in_esi_tag = 1; |
| 791 |
16880 |
vep->esi_found = 1; |
| 792 |
16880 |
vep_mark_skip(vep, p); |
| 793 |
16880 |
vep->match = vep_match_esi; |
| 794 |
16880 |
vep->state = VEP_MATCH; |
| 795 |
603253 |
} else if (vep->state == VEP_ESIINCLUDE) { |
| 796 |
10720 |
if (vep->remove) { |
| 797 |
160 |
vep_error(vep, |
| 798 |
|
"ESI 1.0 <esi:include> element" |
| 799 |
|
" nested in <esi:remove>"); |
| 800 |
160 |
vep->state = VEP_TAGERROR; |
| 801 |
10720 |
} else if (vep->endtag) { |
| 802 |
40 |
vep_error(vep, |
| 803 |
|
"ESI 1.0 </esi:include> illegal end-tag"); |
| 804 |
40 |
vep->state = VEP_TAGERROR; |
| 805 |
40 |
} else { |
| 806 |
10520 |
vep->dostuff = vep_do_include; |
| 807 |
10520 |
vep->state = VEP_INTAG; |
| 808 |
10520 |
vep->attr = vep_match_attr_include; |
| 809 |
|
} |
| 810 |
586373 |
} else if (vep->state == VEP_ESIREMOVE) { |
| 811 |
5680 |
vep->dostuff = vep_do_remove; |
| 812 |
5680 |
vep->state = VEP_INTAG; |
| 813 |
575653 |
} else if (vep->state == VEP_ESICOMMENT) { |
| 814 |
400 |
if (vep->remove) { |
| 815 |
40 |
vep_error(vep, |
| 816 |
|
"ESI 1.0 <esi:comment> element" |
| 817 |
|
" nested in <esi:remove>"); |
| 818 |
40 |
vep->state = VEP_TAGERROR; |
| 819 |
400 |
} else if (vep->endtag) { |
| 820 |
80 |
vep_error(vep, |
| 821 |
|
"ESI 1.0 </esi:comment> illegal end-tag"); |
| 822 |
80 |
vep->state = VEP_TAGERROR; |
| 823 |
80 |
} else { |
| 824 |
280 |
vep->dostuff = vep_do_comment; |
| 825 |
280 |
vep->state = VEP_INTAG; |
| 826 |
|
} |
| 827 |
569973 |
} else if (vep->state == VEP_ESIBOGON) { |
| 828 |
80 |
vep_error(vep, |
| 829 |
|
"ESI 1.0 <esi:bogus> element"); |
| 830 |
80 |
vep->state = VEP_TAGERROR; |
| 831 |
|
|
| 832 |
|
/****************************************************** |
| 833 |
|
* SECTION D |
| 834 |
|
*/ |
| 835 |
|
|
| 836 |
569573 |
} else if (vep->state == VEP_INTAG) { |
| 837 |
28267 |
vep->tag_i = 0; |
| 838 |
43667 |
while (p < e && vct_islws(*p) && !vep->emptytag) { |
| 839 |
15400 |
p++; |
| 840 |
15400 |
vep->canattr = 1; |
| 841 |
|
} |
| 842 |
28267 |
if (p < e && *p == '/' && !vep->emptytag) { |
| 843 |
10400 |
p++; |
| 844 |
10400 |
vep->emptytag = 1; |
| 845 |
10400 |
vep->canattr = 0; |
| 846 |
10400 |
} |
| 847 |
28267 |
if (p < e && *p == '>') { |
| 848 |
16160 |
p++; |
| 849 |
16160 |
AN(vep->dostuff); |
| 850 |
16160 |
vep_mark_skip(vep, p); |
| 851 |
16160 |
vep->dostuff(vep, DO_TAG); |
| 852 |
16160 |
vep->in_esi_tag = 0; |
| 853 |
16160 |
vep->state = VEP_NEXTTAG; |
| 854 |
28267 |
} else if (p < e && vep->emptytag) { |
| 855 |
40 |
vep_error(vep, |
| 856 |
|
"XML 1.0 '>' does not follow '/' in tag"); |
| 857 |
40 |
vep->state = VEP_TAGERROR; |
| 858 |
12107 |
} else if (p < e && vep->canattr && |
| 859 |
11080 |
vct_isxmlnamestart(*p)) { |
| 860 |
11040 |
vep->state = VEP_ATTR; |
| 861 |
12067 |
} else if (p < e) { |
| 862 |
40 |
vep_error(vep, |
| 863 |
|
"XML 1.0 Illegal attribute start char"); |
| 864 |
40 |
vep->state = VEP_TAGERROR; |
| 865 |
40 |
} |
| 866 |
569493 |
} else if (vep->state == VEP_TAGERROR) { |
| 867 |
7322 |
while (p < e && *p != '>') |
| 868 |
5230 |
p++; |
| 869 |
2092 |
if (p < e) { |
| 870 |
720 |
p++; |
| 871 |
720 |
vep_mark_skip(vep, p); |
| 872 |
720 |
vep->in_esi_tag = 0; |
| 873 |
720 |
vep->state = VEP_NEXTTAG; |
| 874 |
720 |
if (vep->attr_vsb) |
| 875 |
40 |
VSB_destroy(&vep->attr_vsb); |
| 876 |
720 |
} |
| 877 |
|
|
| 878 |
|
/****************************************************** |
| 879 |
|
* SECTION E |
| 880 |
|
*/ |
| 881 |
|
|
| 882 |
541226 |
} else if (vep->state == VEP_ATTR) { |
| 883 |
11040 |
AZ(vep->attr_delim); |
| 884 |
11040 |
if (vep->attr == NULL) { |
| 885 |
120 |
p++; |
| 886 |
120 |
AZ(vep->attr_vsb); |
| 887 |
120 |
vep->state = VEP_SKIPATTR; |
| 888 |
120 |
} else { |
| 889 |
10920 |
vep->match = vep->attr; |
| 890 |
10920 |
vep->state = VEP_MATCH; |
| 891 |
|
} |
| 892 |
539134 |
} else if (vep->state == VEP_SKIPATTR) { |
| 893 |
1310 |
while (p < e && vct_isxmlname(*p)) |
| 894 |
728 |
p++; |
| 895 |
582 |
if (p < e && *p == '=') { |
| 896 |
160 |
p++; |
| 897 |
160 |
vep->state = VEP_ATTRDELIM; |
| 898 |
582 |
} else if (p < e && *p == '>') { |
| 899 |
40 |
vep->state = VEP_INTAG; |
| 900 |
422 |
} else if (p < e && *p == '/') { |
| 901 |
40 |
vep->state = VEP_INTAG; |
| 902 |
382 |
} else if (p < e && vct_issp(*p)) { |
| 903 |
80 |
vep->state = VEP_INTAG; |
| 904 |
342 |
} else if (p < e) { |
| 905 |
40 |
vep_error(vep, |
| 906 |
|
"XML 1.0 Illegal attr char"); |
| 907 |
40 |
vep->state = VEP_TAGERROR; |
| 908 |
40 |
} |
| 909 |
528094 |
} else if (vep->state == VEP_ATTRGETVAL) { |
| 910 |
10680 |
AZ(vep->attr_vsb); |
| 911 |
10680 |
vep->attr_vsb = VSB_new_auto(); |
| 912 |
10680 |
vep->state = VEP_ATTRDELIM; |
| 913 |
527512 |
} else if (vep->state == VEP_ATTRDELIM) { |
| 914 |
10840 |
AZ(vep->attr_delim); |
| 915 |
10840 |
if (*p == '"' || *p == '\'') { |
| 916 |
10680 |
vep->attr_delim = *p++; |
| 917 |
10680 |
vep->state = VEP_ATTRVAL; |
| 918 |
10840 |
} else if (!vct_issp(*p)) { |
| 919 |
120 |
vep->attr_delim = ' '; |
| 920 |
120 |
vep->state = VEP_ATTRVAL; |
| 921 |
120 |
} else { |
| 922 |
40 |
vep_error(vep, |
| 923 |
|
"XML 1.0 Illegal attribute delimiter"); |
| 924 |
40 |
vep->state = VEP_TAGERROR; |
| 925 |
|
} |
| 926 |
|
|
| 927 |
516832 |
} else if (vep->state == VEP_ATTRVAL) { |
| 928 |
737111 |
while (p < e && *p != '>' && *p != vep->attr_delim && |
| 929 |
362400 |
(vep->attr_delim != ' ' || !vct_issp(*p))) { |
| 930 |
362400 |
if (vep->attr_vsb != NULL) |
| 931 |
361920 |
VSB_putc(vep->attr_vsb, *p); |
| 932 |
362400 |
p++; |
| 933 |
|
} |
| 934 |
12311 |
if (p < e && *p == '>') { |
| 935 |
80 |
vep_error(vep, |
| 936 |
|
"XML 1.0 Missing end attribute delimiter"); |
| 937 |
80 |
vep->state = VEP_TAGERROR; |
| 938 |
80 |
vep->attr_delim = 0; |
| 939 |
80 |
if (vep->attr_vsb != NULL) { |
| 940 |
80 |
AZ(VSB_finish(vep->attr_vsb)); |
| 941 |
80 |
VSB_destroy(&vep->attr_vsb); |
| 942 |
80 |
} |
| 943 |
12311 |
} else if (p < e) { |
| 944 |
10720 |
vep->attr_delim = 0; |
| 945 |
10720 |
p++; |
| 946 |
10720 |
vep->state = VEP_INTAG; |
| 947 |
10720 |
if (vep->attr_vsb != NULL) { |
| 948 |
10560 |
AZ(VSB_finish(vep->attr_vsb)); |
| 949 |
10560 |
AN(vep->dostuff); |
| 950 |
10560 |
vep->dostuff(vep, DO_ATTR); |
| 951 |
10560 |
vep->attr_vsb = NULL; |
| 952 |
10560 |
} |
| 953 |
10720 |
} |
| 954 |
|
|
| 955 |
|
/****************************************************** |
| 956 |
|
* Utility Section |
| 957 |
|
*/ |
| 958 |
|
|
| 959 |
505992 |
} else if (vep->state == VEP_MATCH) { |
| 960 |
|
/* |
| 961 |
|
* Match against a table |
| 962 |
|
*/ |
| 963 |
483134 |
vm = vep_match(vep, p, e); |
| 964 |
483134 |
vep->match_hit = vm; |
| 965 |
483134 |
if (vm != NULL) { |
| 966 |
477573 |
if (vm->match != NULL) |
| 967 |
40277 |
p += strlen(vm->match); |
| 968 |
477573 |
vep->state = *vm->state; |
| 969 |
477573 |
vep->match = NULL; |
| 970 |
477573 |
vep->tag_i = 0; |
| 971 |
477573 |
} else { |
| 972 |
5561 |
assert(p + sizeof(vep->tag) >= e); |
| 973 |
5561 |
memcpy(vep->tag, p, e - p); |
| 974 |
5561 |
vep->tag_i = e - p; |
| 975 |
5561 |
vep->state = VEP_MATCHBUF; |
| 976 |
5561 |
p = e; |
| 977 |
|
} |
| 978 |
493681 |
} else if (vep->state == VEP_MATCHBUF) { |
| 979 |
|
/* |
| 980 |
|
* Match against a table while split over input |
| 981 |
|
* sections. |
| 982 |
|
*/ |
| 983 |
9632 |
AN(vep->match); |
| 984 |
9632 |
i = sizeof(vep->tag) - vep->tag_i; |
| 985 |
9632 |
if (i > e - p) |
| 986 |
8372 |
i = e - p; |
| 987 |
9632 |
memcpy(vep->tag + vep->tag_i, p, i); |
| 988 |
19264 |
vm = vep_match(vep, vep->tag, |
| 989 |
9632 |
vep->tag + vep->tag_i + i); |
| 990 |
|
Debug("MB (%.*s) tag_i %d i %d = vm %p match %s\n", |
| 991 |
|
vep->tag_i + i, vep->tag, |
| 992 |
|
vep->tag_i, |
| 993 |
|
i, |
| 994 |
|
vm, |
| 995 |
|
vm ? vm->match : "(nil)"); |
| 996 |
|
|
| 997 |
9632 |
if (vm == NULL) { |
| 998 |
4071 |
vep->tag_i += i; |
| 999 |
4071 |
p += i; |
| 1000 |
4071 |
assert(p == e); |
| 1001 |
4071 |
} else { |
| 1002 |
5561 |
vep->match_hit = vm; |
| 1003 |
5561 |
vep->state = *vm->state; |
| 1004 |
5561 |
if (vm->match != NULL) { |
| 1005 |
5283 |
i = strlen(vm->match); |
| 1006 |
5283 |
if (i > vep->tag_i) |
| 1007 |
5227 |
p += i - vep->tag_i; |
| 1008 |
5283 |
} |
| 1009 |
5561 |
vep->match = NULL; |
| 1010 |
5561 |
vep->tag_i = 0; |
| 1011 |
|
} |
| 1012 |
10547 |
} else if (vep->state == VEP_UNTIL) { |
| 1013 |
|
/* |
| 1014 |
|
* Skip until we see magic string |
| 1015 |
|
*/ |
| 1016 |
7395 |
while (p < e) { |
| 1017 |
6760 |
if (*p++ != *vep->until_p++) { |
| 1018 |
5840 |
vep->until_p = vep->until; |
| 1019 |
6760 |
} else if (*vep->until_p == '\0') { |
| 1020 |
280 |
vep->state = vep->until_s; |
| 1021 |
280 |
break; |
| 1022 |
|
} |
| 1023 |
|
} |
| 1024 |
915 |
if (p == e && !vep->remove) |
| 1025 |
733 |
vep_mark_verbatim(vep, p); |
| 1026 |
915 |
} else { |
| 1027 |
|
Debug("*** Unknown state %s\n", vep->state); |
| 1028 |
0 |
WRONG("WRONG ESI PARSER STATE"); |
| 1029 |
|
} |
| 1030 |
|
} |
| 1031 |
|
/* |
| 1032 |
|
* We must always mark up the storage we got, try to do so |
| 1033 |
|
* in the most efficient way, in particular with respect to |
| 1034 |
|
* minimizing and limiting use of pending. |
| 1035 |
|
*/ |
| 1036 |
2154839 |
if (p == vep->ver_p) |
| 1037 |
|
; |
| 1038 |
1075432 |
else if (vep->in_esi_tag) |
| 1039 |
11867 |
vep_mark_skip(vep, p); |
| 1040 |
1063565 |
else if (vep->remove) |
| 1041 |
1058530 |
vep_mark_skip(vep, p); |
| 1042 |
|
else |
| 1043 |
5035 |
vep_mark_pending(vep, p); |
| 1044 |
2154839 |
} |
| 1045 |
|
|
| 1046 |
|
/*--------------------------------------------------------------------- |
| 1047 |
|
*/ |
| 1048 |
|
|
| 1049 |
|
static ssize_t v_matchproto_(vep_callback_t) |
| 1050 |
2656403 |
vep_default_cb(struct vfp_ctx *vc, void *priv, ssize_t l, enum vgz_flag flg) |
| 1051 |
|
{ |
| 1052 |
|
ssize_t *s; |
| 1053 |
|
|
| 1054 |
2656403 |
CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC); |
| 1055 |
2656403 |
AN(priv); |
| 1056 |
2656403 |
s = priv; |
| 1057 |
2656403 |
*s += l; |
| 1058 |
2656403 |
(void)flg; |
| 1059 |
2656403 |
return (*s); |
| 1060 |
|
} |
| 1061 |
|
|
| 1062 |
|
/*--------------------------------------------------------------------- |
| 1063 |
|
*/ |
| 1064 |
|
|
| 1065 |
|
struct vep_state * |
| 1066 |
10760 |
VEP_Init(struct vfp_ctx *vc, const struct http *req, vep_callback_t *cb, |
| 1067 |
|
void *cb_priv) |
| 1068 |
|
{ |
| 1069 |
|
struct vep_state *vep; |
| 1070 |
|
|
| 1071 |
10760 |
CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC); |
| 1072 |
10760 |
CHECK_OBJ_NOTNULL(req, HTTP_MAGIC); |
| 1073 |
10760 |
vep = WS_Alloc(vc->resp->ws, sizeof *vep); |
| 1074 |
10760 |
if (vep == NULL) { |
| 1075 |
2080 |
VSLb(vc->wrk->vsl, SLT_VCL_Error, |
| 1076 |
|
"VEP_Init() workspace overflow"); |
| 1077 |
2080 |
return (NULL); |
| 1078 |
|
} |
| 1079 |
|
|
| 1080 |
8680 |
INIT_OBJ(vep, VEP_MAGIC); |
| 1081 |
8680 |
vep->url = req->hd[HTTP_HDR_URL].b; |
| 1082 |
8680 |
vep->vc = vc; |
| 1083 |
8680 |
vep->vsb = VSB_new_auto(); |
| 1084 |
8680 |
AN(vep->vsb); |
| 1085 |
|
|
| 1086 |
8680 |
if (cb != NULL) { |
| 1087 |
3040 |
vep->dogzip = 1; |
| 1088 |
|
/* XXX */ |
| 1089 |
3040 |
VSB_printf(vep->vsb, "%c", VEC_GZ); |
| 1090 |
3040 |
vep->cb = cb; |
| 1091 |
3040 |
vep->cb_priv = cb_priv; |
| 1092 |
3040 |
} else { |
| 1093 |
5640 |
vep->cb = vep_default_cb; |
| 1094 |
5640 |
vep->cb_priv = &vep->cb_x; |
| 1095 |
|
} |
| 1096 |
|
|
| 1097 |
8680 |
vep->state = VEP_START; |
| 1098 |
8680 |
vep->crc = crc32(0L, Z_NULL, 0); |
| 1099 |
8680 |
vep->crcp = crc32(0L, Z_NULL, 0); |
| 1100 |
|
|
| 1101 |
8680 |
vep->startup = 1; |
| 1102 |
8680 |
return (vep); |
| 1103 |
10760 |
} |
| 1104 |
|
|
| 1105 |
|
/*--------------------------------------------------------------------- |
| 1106 |
|
*/ |
| 1107 |
|
|
| 1108 |
|
struct vsb * |
| 1109 |
8680 |
VEP_Finish(struct vep_state *vep) |
| 1110 |
|
{ |
| 1111 |
|
ssize_t l, lcb; |
| 1112 |
|
|
| 1113 |
8680 |
CHECK_OBJ_NOTNULL(vep, VEP_MAGIC); |
| 1114 |
|
|
| 1115 |
8680 |
if (vep->include_src) |
| 1116 |
0 |
VSB_destroy(&vep->include_src); |
| 1117 |
8680 |
if (vep->attr_vsb) |
| 1118 |
0 |
VSB_destroy(&vep->attr_vsb); |
| 1119 |
|
|
| 1120 |
16640 |
if (vep->state != VEP_START && |
| 1121 |
8520 |
vep->state != VEP_BOM && |
| 1122 |
8520 |
vep->state != VEP_TESTXML && |
| 1123 |
8480 |
vep->state != VEP_NOTXML && |
| 1124 |
7960 |
vep->state != VEP_NEXTTAG) { |
| 1125 |
145 |
vep_error(vep, "VEP ended inside a tag"); |
| 1126 |
145 |
} |
| 1127 |
|
|
| 1128 |
8680 |
if (vep->o_pending) |
| 1129 |
0 |
vep_mark_common(vep, vep->ver_p, vep->last_mark); |
| 1130 |
8680 |
if (vep->o_wait > 0) { |
| 1131 |
8520 |
lcb = vep->cb(vep->vc, vep->cb_priv, 0, VGZ_ALIGN); |
| 1132 |
8520 |
vep_emit_common(vep, lcb - vep->o_last, vep->last_mark); |
| 1133 |
8520 |
} |
| 1134 |
|
// NB: We don't account for PAD+SUM+LEN in gzipped objects |
| 1135 |
8680 |
(void)vep->cb(vep->vc, vep->cb_priv, 0, VGZ_FINISH); |
| 1136 |
|
|
| 1137 |
8680 |
AZ(VSB_finish(vep->vsb)); |
| 1138 |
8680 |
l = VSB_len(vep->vsb); |
| 1139 |
8680 |
if (vep->esi_found && l > 0) |
| 1140 |
4680 |
return (vep->vsb); |
| 1141 |
4000 |
VSB_destroy(&vep->vsb); |
| 1142 |
4000 |
return (NULL); |
| 1143 |
8680 |
} |
| 1144 |
|
|
| 1145 |
|
#if 0 |
| 1146 |
|
|
| 1147 |
|
digraph xml { |
| 1148 |
|
rankdir="LR" |
| 1149 |
|
size="7,10" |
| 1150 |
|
################################################################# |
| 1151 |
|
# SECTION A |
| 1152 |
|
# |
| 1153 |
|
|
| 1154 |
|
START [shape=ellipse] |
| 1155 |
|
TESTXML [shape=ellipse] |
| 1156 |
|
NOTXML [shape=ellipse] |
| 1157 |
|
NEXTTAGa [shape=hexagon, label="NEXTTAG"] |
| 1158 |
|
STARTTAGa [shape=hexagon, label="STARTTAG"] |
| 1159 |
|
START -> TESTXML |
| 1160 |
|
START -> NEXTTAGa [style=dotted, label="syntax:1"] |
| 1161 |
|
TESTXML -> TESTXML [label="lws"] |
| 1162 |
|
TESTXML -> NOTXML |
| 1163 |
|
TESTXML -> STARTTAGa [label="'<'"] |
| 1164 |
|
|
| 1165 |
|
################################################################# |
| 1166 |
|
# SECTION B |
| 1167 |
|
|
| 1168 |
|
NOTMYTAG [shape=ellipse] |
| 1169 |
|
NEXTTAG [shape=ellipse] |
| 1170 |
|
NOTMYTAG -> NEXTTAG [style=dotted, label="syntax:2"] |
| 1171 |
|
STARTTAGb [shape=hexagon, label="STARTTAG"] |
| 1172 |
|
NOTMYTAG -> NEXTTAG [label="'>'"] |
| 1173 |
|
NOTMYTAG -> NOTMYTAG [label="*"] |
| 1174 |
|
NEXTTAG -> NEXTTAG [label="'-->'"] |
| 1175 |
|
NEXTTAG -> NEXTTAG [label="*"] |
| 1176 |
|
NEXTTAG -> STARTTAGb [label="'<'"] |
| 1177 |
|
|
| 1178 |
|
################################################################# |
| 1179 |
|
# SECTION C |
| 1180 |
|
|
| 1181 |
|
STARTTAG [shape=ellipse] |
| 1182 |
|
COMMENT [shape=ellipse] |
| 1183 |
|
CDATA [shape=ellipse] |
| 1184 |
|
ESITAG [shape=ellipse] |
| 1185 |
|
ESIETAG [shape=ellipse] |
| 1186 |
|
ESIINCLUDE [shape=ellipse] |
| 1187 |
|
ESIREMOVE [shape=ellipse] |
| 1188 |
|
ESICOMMENT [shape=ellipse] |
| 1189 |
|
ESIBOGON [shape=ellipse] |
| 1190 |
|
INTAGc [shape=hexagon, label="INTAG"] |
| 1191 |
|
NOTMYTAGc [shape=hexagon, label="NOTMYTAG"] |
| 1192 |
|
NEXTTAGc [shape=hexagon, label="NEXTTAG"] |
| 1193 |
|
TAGERRORc [shape=hexagon, label="TAGERROR"] |
| 1194 |
|
C1 [shape=circle,label=""] |
| 1195 |
|
STARTTAG -> COMMENT [label="'<!--'"] |
| 1196 |
|
STARTTAG -> ESITAG [label="'<esi'"] |
| 1197 |
|
STARTTAG -> CDATA [label="'<![CDATA['"] |
| 1198 |
|
STARTTAG -> NOTMYTAGc [label="'*'"] |
| 1199 |
|
COMMENT -> NEXTTAGc [label="'esi'"] |
| 1200 |
|
COMMENT -> C1 [label="*"] |
| 1201 |
|
C1 -> C1 [label="*"] |
| 1202 |
|
C1 -> NEXTTAGc [label="-->"] |
| 1203 |
|
CDATA -> CDATA [label="*"] |
| 1204 |
|
CDATA -> NEXTTAGc [label="]]>"] |
| 1205 |
|
ESITAG -> ESIINCLUDE [label="'include'"] |
| 1206 |
|
ESITAG -> ESIREMOVE [label="'remove'"] |
| 1207 |
|
ESITAG -> ESICOMMENT [label="'comment'"] |
| 1208 |
|
ESITAG -> ESIBOGON [label="*"] |
| 1209 |
|
ESICOMMENT -> INTAGc |
| 1210 |
|
ESICOMMENT -> TAGERRORc |
| 1211 |
|
ESICOMMENT -> TAGERRORc [style=dotted, label="nested\nin\nremove"] |
| 1212 |
|
ESIREMOVE -> INTAGc |
| 1213 |
|
ESIREMOVE -> TAGERRORc |
| 1214 |
|
ESIINCLUDE -> INTAGc |
| 1215 |
|
ESIINCLUDE -> TAGERRORc |
| 1216 |
|
ESIINCLUDE -> TAGERRORc [style=dotted, label="nested\nin\nremove"] |
| 1217 |
|
ESIBOGON -> TAGERRORc |
| 1218 |
|
|
| 1219 |
|
################################################################# |
| 1220 |
|
# SECTION D |
| 1221 |
|
|
| 1222 |
|
INTAG [shape=ellipse] |
| 1223 |
|
TAGERROR [shape=ellipse] |
| 1224 |
|
NEXTTAGd [shape=hexagon, label="NEXTTAG"] |
| 1225 |
|
ATTRd [shape=hexagon, label="ATTR"] |
| 1226 |
|
D1 [shape=circle, label=""] |
| 1227 |
|
D2 [shape=circle, label=""] |
| 1228 |
|
INTAG -> D1 [label="lws"] |
| 1229 |
|
D1 -> D2 [label="/"] |
| 1230 |
|
INTAG -> D2 [label="/"] |
| 1231 |
|
INTAG -> NEXTTAGd [label=">"] |
| 1232 |
|
D1 -> NEXTTAGd [label=">"] |
| 1233 |
|
D2 -> NEXTTAGd [label=">"] |
| 1234 |
|
D1 -> ATTRd [label="XMLstartchar"] |
| 1235 |
|
D1 -> TAGERROR [label="*"] |
| 1236 |
|
D2 -> TAGERROR [label="*"] |
| 1237 |
|
TAGERROR -> TAGERROR [label="*"] |
| 1238 |
|
TAGERROR -> NEXTTAGd [label="'>'"] |
| 1239 |
|
|
| 1240 |
|
################################################################# |
| 1241 |
|
# SECTION E |
| 1242 |
|
|
| 1243 |
|
ATTR [shape=ellipse] |
| 1244 |
|
SKIPATTR [shape=ellipse] |
| 1245 |
|
ATTRGETVAL [shape=ellipse] |
| 1246 |
|
ATTRDELIM [shape=ellipse] |
| 1247 |
|
ATTRVAL [shape=ellipse] |
| 1248 |
|
TAGERRORe [shape=hexagon, label="TAGERROR"] |
| 1249 |
|
INTAGe [shape=hexagon, label="INTAG"] |
| 1250 |
|
ATTR -> SKIPATTR [label="*"] |
| 1251 |
|
ATTR -> ATTRGETVAL [label="wanted attr"] |
| 1252 |
|
SKIPATTR -> SKIPATTR [label="XMLname"] |
| 1253 |
|
SKIPATTR -> ATTRDELIM [label="'='"] |
| 1254 |
|
SKIPATTR -> TAGERRORe [label="*"] |
| 1255 |
|
ATTRGETVAL -> ATTRDELIM |
| 1256 |
|
ATTRDELIM -> ATTRVAL [label="\""] |
| 1257 |
|
ATTRDELIM -> ATTRVAL [label="\'"] |
| 1258 |
|
ATTRDELIM -> ATTRVAL [label="*"] |
| 1259 |
|
ATTRDELIM -> TAGERRORe [label="lws"] |
| 1260 |
|
ATTRVAL -> TAGERRORe [label="'>'"] |
| 1261 |
|
ATTRVAL -> INTAGe [label="delim"] |
| 1262 |
|
ATTRVAL -> ATTRVAL [label="*"] |
| 1263 |
|
|
| 1264 |
|
} |
| 1265 |
|
|
| 1266 |
|
#endif |