[master] 5a99d056f vcc: Fold string parsing into a single function
Dridi Boukelmoune
dridi.boukelmoune at gmail.com
Tue Nov 24 14:26:11 UTC 2020
commit 5a99d056f4bd02896972d90a5d491094bebbdfa7
Author: Dridi Boukelmoune <dridi.boukelmoune at gmail.com>
Date: Tue Nov 24 10:32:32 2020 +0100
vcc: Fold string parsing into a single function
Having now the guarantee that VCL sources are always null-terminated we
can use standard string.h functions to detect long strings. We can also
generalize vcc_decstr() to work with all kinds of strings.
Instead of duplicating the code in 3 locations with slight variations,
encode the specificities of each type of string into a struct and have
one location to parse them.
Bonus deduplication, inline C parsing is identical.
This is loosely inspired by vcl_fixed_token() and we could further
reduce the size of the loop in vcc_Lexer() by extracting more logic.
diff --git a/bin/varnishtest/tests/v00019.vtc b/bin/varnishtest/tests/v00019.vtc
index d53a5aeb2..904586790 100644
--- a/bin/varnishtest/tests/v00019.vtc
+++ b/bin/varnishtest/tests/v00019.vtc
@@ -23,7 +23,7 @@ varnish v1 -errvcl {Unterminated long-string, starting at} {
""" ""
}
-varnish v1 -errvcl {Unterminated string at} {
+varnish v1 -errvcl {Unterminated string, starting at} {
backend b { .host = "127.0.0.1"; }
"
}
diff --git a/lib/libvcc/vcc_token.c b/lib/libvcc/vcc_token.c
index d8d80491d..34ee49ad4 100644
--- a/lib/libvcc/vcc_token.c
+++ b/lib/libvcc/vcc_token.c
@@ -344,20 +344,19 @@ vcc_ExpectVid(struct vcc *tl, const char *what)
* Decode a string
*/
-static int
-vcc_decstr(struct vcc *tl)
+static void
+vcc_decstr(struct vcc *tl, unsigned sep)
{
char *q;
unsigned int l;
assert(tl->t->tok == CSTR);
- l = (tl->t->e - tl->t->b) - 2;
+ l = (tl->t->e - tl->t->b) - (sep * 2);
tl->t->dec = TlAlloc(tl, l + 1);
- assert(tl->t->dec != NULL);
+ AN(tl->t->dec);
q = tl->t->dec;
- memcpy(q, tl->t->b + 1, l);
+ memcpy(q, tl->t->b + sep, l);
q[l] = '\0';
- return (0);
}
/*--------------------------------------------------------------------
@@ -383,6 +382,64 @@ vcc_addtoken(struct vcc *tl, unsigned tok,
tl->t = t;
}
+/*--------------------------------------------------------------------
+ * Find a delimited token
+ */
+
+static const struct delim_def {
+ const char *name;
+ const char *b;
+ const char *e;
+ unsigned len; /* NB: must be the same for both delimiters */
+ unsigned crlf;
+ unsigned tok;
+} delim_defs[] = {
+#define DELIM_DEF(nm, l, r, c, t) \
+ { nm, l, r, sizeof (l) - 1, c, t }
+ DELIM_DEF("long-string", "\"\"\"", "\"\"\"", 1, CSTR), /* """...""" */
+ DELIM_DEF("long-string", "{\"", "\"}", 1, CSTR), /* {"..."} */
+ DELIM_DEF("string", "\"", "\"", 0, CSTR), /* "..." */
+ DELIM_DEF("inline C source", "C{", "}C", 1, CSRC), /* C{...}C */
+#undef DELIM_DEF
+ { NULL }
+};
+
+static unsigned
+vcc_delim_token(struct vcc *tl, const struct source *sp, const char *p,
+ const char **qp)
+{
+ const struct delim_def *dd;
+ const char *q, *r;
+
+ for (dd = delim_defs; dd->name != NULL; dd++)
+ if (!strncmp(p, dd->b, dd->len))
+ break;
+
+ if (dd->name == NULL)
+ return (0);
+
+ q = strstr(p + dd->len, dd->e);
+ if (q != NULL && !dd->crlf) {
+ r = strpbrk(p + dd->len, "\r\n");
+ if (r != NULL && r < q)
+ q = NULL;
+ }
+
+ if (q == NULL) {
+ vcc_addtoken(tl, EOI, sp, p, p + dd->len);
+ VSB_printf(tl->sb, "Unterminated %s, starting at\n", dd->name);
+ vcc_ErrWhere(tl, tl->t);
+ return (0);
+ }
+
+ assert(q < sp->e);
+ vcc_addtoken(tl, dd->tok, sp, p, q + dd->len);
+ if (dd->tok == CSTR)
+ vcc_decstr(tl, dd->len);
+ *qp = q + dd->len;
+ return (1);
+}
+
/*--------------------------------------------------------------------
* Lexical analysis and token generation
*/
@@ -443,75 +500,6 @@ vcc_Lexer(struct vcc *tl, const struct source *sp, int eoi)
continue;
}
- /* Recognize inline C-code */
- if (*p == 'C' && p[1] == '{') {
- for (q = p + 2; q < sp->e; q++) {
- if (*q == '}' && q[1] == 'C') {
- vcc_addtoken(tl, CSRC, sp, p, q + 2);
- break;
- }
- }
- if (q < sp->e) {
- p = q + 2;
- continue;
- }
- vcc_addtoken(tl, EOI, sp, p, p + 2);
- VSB_cat(tl->sb,
- "Unterminated inline C source, starting at\n");
- vcc_ErrWhere(tl, tl->t);
- return;
- }
-
- /* Recognize long-strings {" "} */
- if (*p == '{' && p[1] == '"') {
- for (q = p + 2; q < sp->e; q++) {
- if (*q == '"' && q[1] == '}') {
- vcc_addtoken(tl, CSTR, sp, p, q + 2);
- break;
- }
- }
- if (q < sp->e) {
- p = q + 2;
- u = tl->t->e - tl->t->b;
- u -= 4; /* {" ... "} */
- tl->t->dec = TlAlloc(tl, u + 1 );
- AN(tl->t->dec);
- memcpy(tl->t->dec, tl->t->b + 2, u);
- tl->t->dec[u] = '\0';
- continue;
- }
- vcc_addtoken(tl, EOI, sp, p, p + 2);
- VSB_cat(tl->sb,
- "Unterminated long-string, starting at\n");
- vcc_ErrWhere(tl, tl->t);
- return;
- }
-
- /* Recognize long-strings """ """ */
- if (*p == '"' && p[1] == '"' && p[2] == '"') {
- for (q = p + 3; q < sp->e; q++) {
- if (*q == '"' && q[1] == '"' && q[2] == '"') {
- vcc_addtoken(tl, CSTR, sp, p, q + 3);
- break;
- }
- }
- if (q < sp->e) {
- p = q + 3;
- u = tl->t->e - tl->t->b;
- u -= 6; /* """ ... """ */
- tl->t->dec = TlAlloc(tl, u + 1 );
- AN(tl->t->dec);
- memcpy(tl->t->dec, tl->t->b + 3, u);
- tl->t->dec[u] = '\0';
- continue;
- }
- vcc_addtoken(tl, EOI, sp, p, p + 3);
- VSB_cat(tl->sb,
- "Unterminated long-string, starting at\n");
- vcc_ErrWhere(tl, tl->t);
- return;
- }
-
/* Recognize BLOB (= SF-binary) */
if (*p == ':') {
vsb = VSB_new_auto();
@@ -574,32 +562,17 @@ vcc_Lexer(struct vcc *tl, const struct source *sp, int eoi)
continue;
}
- /* Match for the fixed tokens (see generate.py) */
- u = vcl_fixed_token(p, &q);
- if (u != 0) {
- vcc_addtoken(tl, u, sp, p, q);
+ /* Match delimited tokens */
+ if (vcc_delim_token(tl, sp, p, &q) != 0) {
p = q;
continue;
}
+ ERRCHK(tl);
- /* Match strings */
- if (*p == '"') {
- for (q = p + 1; q < sp->e; q++) {
- if (*q == '"') {
- q++;
- break;
- }
- if (*q == '\r' || *q == '\n') {
- vcc_addtoken(tl, EOI, sp, p, q);
- VSB_cat(tl->sb,
- "Unterminated string at\n");
- vcc_ErrWhere(tl, tl->t);
- return;
- }
- }
- vcc_addtoken(tl, CSTR, sp, p, q);
- if (vcc_decstr(tl))
- return;
+ /* Match for the fixed tokens (see generate.py) */
+ u = vcl_fixed_token(p, &q);
+ if (u != 0) {
+ vcc_addtoken(tl, u, sp, p, q);
p = q;
continue;
}
More information about the varnish-commit
mailing list