varnish-cache/lib/libvcc/vcc_token.c
1
/*-
2
 * Copyright (c) 2006 Verdens Gang AS
3
 * Copyright (c) 2006-2011 Varnish Software AS
4
 * All rights reserved.
5
 *
6
 * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
21
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
 * SUCH DAMAGE.
28
 */
29
30
#include "config.h"
31
32
#include <stdlib.h>
33
#include <string.h>
34
35
#include "vcc_compile.h"
36
37
#include "vct.h"
38
39
/*--------------------------------------------------------------------*/
40
41
void
42 55
vcc_ErrToken(const struct vcc *tl, const struct token *t)
43
{
44
45 55
        if (t->tok == EOI)
46 0
                VSB_printf(tl->sb, "end of input");
47 55
        else if (t->tok == CSRC)
48 0
                VSB_printf(tl->sb, "C{ ... }C");
49
        else
50 55
                VSB_printf(tl->sb, "'%.*s'", PF(t));
51 55
}
52
53
void
54 0
vcc__ErrInternal(struct vcc *tl, const char *func, unsigned line)
55
{
56
57 0
        VSB_printf(tl->sb, "VCL compiler internal error at %s():%u\n",
58
            func, line);
59 0
        tl->err = 1;
60 0
}
61
62
/*--------------------------------------------------------------------
63
 * Find start of source-line of token
64
 */
65
66
static void
67 380
vcc_iline(const struct token *t, const char **ll, int tail)
68
{
69
        const char *p, *b, *x;
70
71 380
        b = t->src->b;
72 380
        if (ll != NULL)
73 380
                *ll = b;
74 380
        x = tail ? t->e - 1 : t->b;
75 257524
        for (p = b; p < x; p++) {
76 257144
                if (*p == '\n') {
77 6189
                        if (ll != NULL)
78 6189
                                *ll = p + 1;
79
                }
80
        }
81 380
}
82
83
/*--------------------------------------------------------------------
84
 * Find and print src+line+pos of this token
85
 */
86
87
static void
88 14035
vcc_icoord(struct vsb *vsb, const struct token *t, int tail)
89
{
90
        unsigned lin, pos;
91
        const char *p, *b, *x;
92
93 14035
        lin = 1;
94 14035
        pos = 0;
95 14035
        b = t->src->b;
96 14035
        x = tail ? t->e - 1 : t->b;
97 46033980
        for (p = b; p < x; p++) {
98 46019945
                if (*p == '\n') {
99 1506113
                        lin++;
100 1506113
                        pos = 0;
101 44513832
                } else if (*p == '\t') {
102 15052
                        pos &= ~7;
103 15052
                        pos += 8;
104
                } else
105 44498780
                        pos++;
106
        }
107 14035
        VSB_printf(vsb, "(");
108 14035
        if (tail < 2)
109 13993
                VSB_printf(vsb, "'%s' Line %u ", t->src->name, lin);
110 14035
        VSB_printf(vsb, "Pos %u)", pos + 1);
111 14035
}
112
113
/*--------------------------------------------------------------------*/
114
115
void
116 13655
vcc_Coord(const struct vcc *tl, struct vsb *vsb, const struct token *t)
117
{
118
119 13655
        if (t == NULL)
120 13655
                t = tl->t;
121 13655
        vcc_icoord(vsb, t, 0);
122 13655
}
123
124
/*--------------------------------------------------------------------
125
 * Output one line of source code, starting at 'l' and ending at the
126
 * first NL or 'le'.
127
 */
128
129
static void
130 338
vcc_quoteline(const struct vcc *tl, const char *l, const char *le)
131
{
132
        const char *p;
133
        unsigned x, y;
134
135 338
        x = y = 0;
136 12054
        for (p = l; p < le && *p != '\n'; p++) {
137 11716
                if (*p == '\t') {
138 448
                        y &= ~7;
139 448
                        y += 8;
140 4480
                        while (x < y) {
141 3584
                                VSB_putc(tl->sb, ' ');
142 3584
                                x++;
143
                        }
144
                } else {
145 11268
                        x++;
146 11268
                        y++;
147 11268
                        VSB_putc(tl->sb, *p);
148
                }
149
        }
150 338
        VSB_putc(tl->sb, '\n');
151 338
}
152
153
/*--------------------------------------------------------------------
154
 * Output a marker line for a sourceline starting at 'l' and ending at
155
 * the first NL or 'le'.  Characters between 'b' and 'e' are marked.
156
 */
157
158
static void
159 338
vcc_markline(const struct vcc *tl, const char *l, const char *le,
160
    const char *b, const char *e)
161
{
162
        const char *p;
163
        unsigned x, y;
164
        char c;
165
166 338
        x = y = 0;
167 12054
        for (p = l; p < le && *p != '\n'; p++) {
168 11716
                if (p >= b && p < e)
169 2453
                        c = '#';
170
                else
171 9263
                        c = '-';
172
173 11716
                if (*p == '\t') {
174 448
                        y &= ~7;
175 448
                        y += 8;
176
                } else
177 11268
                        y++;
178 38284
                while (x < y) {
179 14852
                        VSB_putc(tl->sb, c);
180 14852
                        x++;
181
                }
182
        }
183 338
        VSB_putc(tl->sb, '\n');
184 338
}
185
186
/*--------------------------------------------------------------------*/
187
/* XXX: should take first+last token */
188
189
void
190 43
vcc_ErrWhere2(struct vcc *tl, const struct token *t, const struct token *t2)
191
{
192
        const char  *l1, *l2, *l3;
193
194 43
        if (t == NULL) {
195 1
                vcc_ErrWhere(tl, t2);
196 1
                return;
197
        }
198 42
        vcc_iline(t, &l1, 0);
199 42
        t2 = VTAILQ_PREV(t2, tokenhead, list);
200 42
        vcc_iline(t2, &l2, 1);
201
202
203 42
        if (l1 == l2) {
204 42
                vcc_icoord(tl->sb, t, 0);
205 42
                VSB_cat(tl->sb, " -- ");
206 42
                vcc_icoord(tl->sb, t2, 2);
207 42
                VSB_putc(tl->sb, '\n');
208
                /* Two tokens on same line */
209 42
                vcc_quoteline(tl, l1, t->src->e);
210 42
                vcc_markline(tl, l1, t->src->e, t->b, t2->e);
211
        } else {
212
                /* Two tokens different lines */
213 0
                l3 = strchr(l1, '\n');
214 0
                AN(l3);
215
                /* XXX: t had better be before t2 */
216 0
                vcc_icoord(tl->sb, t, 0);
217 0
                if (l3 + 1 == l2) {
218 0
                        VSB_cat(tl->sb, " -- ");
219 0
                        vcc_icoord(tl->sb, t2, 1);
220
                }
221 0
                VSB_putc(tl->sb, '\n');
222 0
                vcc_quoteline(tl, l1, t->src->e);
223 0
                vcc_markline(tl, l1, t->src->e, t->b, t2->e);
224 0
                if (l3 + 1 != l2) {
225 0
                        VSB_cat(tl->sb, "[...]\n");
226 0
                        vcc_icoord(tl->sb, t2, 1);
227 0
                        VSB_putc(tl->sb, '\n');
228
                }
229 0
                vcc_quoteline(tl, l2, t->src->e);
230 0
                vcc_markline(tl, l2, t->src->e, t->b, t2->e);
231
        }
232 42
        VSB_putc(tl->sb, '\n');
233 42
        tl->err = 1;
234
}
235
236
void
237 296
vcc_ErrWhere(struct vcc *tl, const struct token *t)
238
{
239
        const char  *l1;
240
241 296
        vcc_iline(t, &l1, 0);
242 296
        vcc_icoord(tl->sb, t, 0);
243 296
        VSB_putc(tl->sb, '\n');
244 296
        vcc_quoteline(tl, l1, t->src->e);
245 296
        vcc_markline(tl, l1, t->src->e, t->b, t->e);
246 296
        VSB_putc(tl->sb, '\n');
247 296
        tl->err = 1;
248 296
}
249
250
/*--------------------------------------------------------------------*/
251
252
void
253 449940
vcc_NextToken(struct vcc *tl)
254
{
255
256 449940
        tl->t = VTAILQ_NEXT(tl->t, list);
257 449940
        if (tl->t == NULL) {
258 0
                VSB_printf(tl->sb,
259
                    "Ran out of input, something is missing or"
260
                    " maybe unbalanced (...) or {...}\n");
261 0
                tl->err = 1;
262 0
                return;
263
        }
264
}
265
266
void
267 229138
vcc__Expect(struct vcc *tl, unsigned tok, unsigned line)
268
{
269 229138
        if (tl->t->tok == tok)
270 229128
                return;
271 10
        VSB_printf(tl->sb, "Expected %s got ", vcl_tnames[tok]);
272 10
        vcc_ErrToken(tl, tl->t);
273 10
        VSB_printf(tl->sb, "\n(program line %u), at\n", line);
274 10
        vcc_ErrWhere(tl, tl->t);
275
}
276
277
/*--------------------------------------------------------------------
278
 * Compare ID token to string, return true of match
279
 */
280
281
int
282 669624
vcc_IdIs(const struct token *t, const char *p)
283
{
284
        const char *q;
285
286 669624
        assert(t->tok == ID);
287 906528
        for (q = t->b; q < t->e && *p != '\0'; p++, q++)
288 853779
                if (*q != *p)
289 616875
                        return (0);
290 52749
        if (q != t->e || *p != '\0')
291 268
                return (0);
292 52481
        return (1);
293
}
294
295
/*--------------------------------------------------------------------
296
 * Check that we have a Varnish identifier
297
 */
298
299
void
300 15058
vcc_ExpectVid(struct vcc *tl, const char *what)
301
{
302
        const char *bad;
303
304 15058
        ExpectErr(tl, ID);
305 15057
        ERRCHK(tl);
306
307 15057
        bad = VCT_invalid_name(tl->t->b, tl->t->e);
308 15057
        if (bad != NULL) {
309 3
                VSB_printf(tl->sb, "Name of %s, ", what);
310 3
                vcc_ErrToken(tl, tl->t);
311 3
                VSB_printf(tl->sb,
312 3
                    ", contains illegal character '%c'\n", *bad);
313 3
                vcc_ErrWhere(tl, tl->t);
314 3
                return;
315
        }
316
}
317
318
/*--------------------------------------------------------------------
319
 * Decode a string
320
 */
321
322
static int
323 29877
vcc_decstr(struct vcc *tl)
324
{
325
        char *q;
326
        unsigned int l;
327
328 29877
        assert(tl->t->tok == CSTR);
329 29877
        l = (tl->t->e - tl->t->b) - 2;
330 29877
        tl->t->dec = TlAlloc(tl, l + 1);
331 29877
        assert(tl->t->dec != NULL);
332 29877
        q = tl->t->dec;
333 29877
        memcpy(q, tl->t->b + 1, l);
334 29877
        q[l] = '\0';
335 29877
        return (0);
336
}
337
338
/*--------------------------------------------------------------------
339
 * Add a token to the token list.
340
 */
341
342
void
343 523449
vcc_AddToken(struct vcc *tl, unsigned tok, const char *b, const char *e)
344
{
345
        struct token *t;
346
347 523449
        t = TlAlloc(tl, sizeof *t);
348 523449
        assert(t != NULL);
349 523449
        t->tok = tok;
350 523449
        t->b = b;
351 523449
        t->e = e;
352 523449
        t->src = tl->src;
353 523449
        if (tl->t != NULL)
354 522386
                VTAILQ_INSERT_AFTER(&tl->tokens, tl->t, t, list);
355
        else
356 1063
                VTAILQ_INSERT_TAIL(&tl->tokens, t, list);
357 523449
        tl->t = t;
358 523449
}
359
360
/*--------------------------------------------------------------------
361
 * Lexical analysis and token generation
362
 */
363
364
void
365 2142
vcc_Lexer(struct vcc *tl, struct source *sp)
366
{
367
        const char *p, *q;
368
        unsigned u;
369
370 2142
        tl->src = sp;
371 1466308
        for (p = sp->b; p < sp->e; ) {
372
373
                /* Skip any whitespace */
374 1462030
                if (vct_isspace(*p)) {
375 915221
                        p++;
376 915221
                        continue;
377
                }
378
379
                /* Skip '#.*\n' comments */
380 546809
                if (*p == '#') {
381 607496
                        while (p < sp->e && *p != '\n')
382 581928
                                p++;
383 12784
                        continue;
384
                }
385
386
                /* Skip C-style comments */
387 534025
                if (*p == '/' && p[1] == '*') {
388 1868842
                        for (q = p + 2; q < sp->e; q++) {
389 1868841
                                if (*q == '/' && q[1] == '*') {
390 1
                                        VSB_printf(tl->sb,
391
                                            "/* ... */ comment contains /*\n");
392 1
                                        vcc_AddToken(tl, EOI, p, p + 2);
393 1
                                        vcc_ErrWhere(tl, tl->t);
394 1
                                        vcc_AddToken(tl, EOI, q, q + 2);
395 1
                                        vcc_ErrWhere(tl, tl->t);
396 1
                                        return;
397
                                }
398 1868840
                                if (*q == '*' && q[1] == '/') {
399 7401
                                        p = q + 2;
400 7401
                                        break;
401
                                }
402
                        }
403 7402
                        if (q < sp->e)
404 7401
                                continue;
405 1
                        vcc_AddToken(tl, EOI, p, p + 2);
406 1
                        VSB_printf(tl->sb,
407
                            "Unterminated /* ... */ comment, starting at\n");
408 1
                        vcc_ErrWhere(tl, tl->t);
409 1
                        return;
410
                }
411
412
                /* Skip C++-style comments */
413 526622
                if (*p == '/' && p[1] == '/') {
414 174522
                        while (p < sp->e && *p != '\n')
415 166056
                                p++;
416 4233
                        continue;
417
                }
418
419
                /* Recognize inline C-code */
420 522389
                if (*p == 'C' && p[1] == '{') {
421 853
                        for (q = p + 2; q < sp->e; q++) {
422 852
                                if (*q == '}' && q[1] == 'C') {
423 15
                                        vcc_AddToken(tl, CSRC, p, q + 2);
424 15
                                        break;
425
                                }
426
                        }
427 16
                        if (q < sp->e) {
428 15
                                p = q + 2;
429 15
                                continue;
430
                        }
431 1
                        vcc_AddToken(tl, EOI, p, p + 2);
432 1
                        VSB_printf(tl->sb,
433
                            "Unterminated inline C source, starting at\n");
434 1
                        vcc_ErrWhere(tl, tl->t);
435 1
                        return;
436
                }
437
438
                /* Recognize long-strings */
439 522373
                if (*p == '{' && p[1] == '"') {
440 464378
                        for (q = p + 2; q < sp->e; q++) {
441 464377
                                if (*q == '"' && q[1] == '}') {
442 10607
                                        vcc_AddToken(tl, CSTR, p, q + 2);
443 10607
                                        break;
444
                                }
445
                        }
446 10608
                        if (q < sp->e) {
447 10607
                                p = q + 2;
448 10607
                                u = tl->t->e - tl->t->b;
449 10607
                                u -= 4;         /* {" ... "} */
450 10607
                                tl->t->dec = TlAlloc(tl, u + 1 );
451 10607
                                AN(tl->t->dec);
452 10607
                                memcpy(tl->t->dec, tl->t->b + 2, u);
453 10607
                                tl->t->dec[u] = '\0';
454 10607
                                continue;
455
                        }
456 1
                        vcc_AddToken(tl, EOI, p, p + 2);
457 1
                        VSB_printf(tl->sb,
458
                            "Unterminated long-string, starting at\n");
459 1
                        vcc_ErrWhere(tl, tl->t);
460 1
                        return;
461
                }
462
463
                /* Match for the fixed tokens (see generate.py) */
464 511765
                u = vcl_fixed_token(p, &q);
465 511765
                if (u != 0) {
466 283626
                        vcc_AddToken(tl, u, p, q);
467 283626
                        p = q;
468 283626
                        continue;
469
                }
470
471
                /* Match strings */
472 228139
                if (*p == '"') {
473 244290
                        for (q = p + 1; q < sp->e; q++) {
474 244290
                                if (*q == '"') {
475 29877
                                        q++;
476 29877
                                        break;
477
                                }
478 214413
                                if (*q == '\r' || *q == '\n') {
479 1
                                        vcc_AddToken(tl, EOI, p, q);
480 1
                                        VSB_printf(tl->sb,
481
                                            "Unterminated string at\n");
482 1
                                        vcc_ErrWhere(tl, tl->t);
483 1
                                        return;
484
                                }
485
                        }
486 29877
                        vcc_AddToken(tl, CSTR, p, q);
487 29877
                        if (vcc_decstr(tl))
488 0
                                return;
489 29877
                        p = q;
490 29877
                        continue;
491
                }
492
493
                /* Match Identifiers */
494 198261
                if (vct_isident1(*p)) {
495 1620364
                        for (q = p; q < sp->e; q++)
496 1620364
                                if (!vct_isvar(*q))
497 184476
                                        break;
498 184476
                        vcc_AddToken(tl, ID, p, q);
499 184476
                        p = q;
500 184476
                        continue;
501
                }
502
503
                /* Match numbers { [0-9]+ } */
504 13785
                if (vct_isdigit(*p)) {
505 36928
                        for (q = p; q < sp->e; q++)
506 36928
                                if (!vct_isdigit(*q))
507 13784
                                        break;
508 13784
                        vcc_AddToken(tl, CNUM, p, q);
509 13784
                        p = q;
510 13784
                        continue;
511
                }
512 1
                vcc_AddToken(tl, EOI, p, p + 1);
513 1
                VSB_printf(tl->sb, "Syntax error at\n");
514 1
                vcc_ErrWhere(tl, tl->t);
515 1
                return;
516
        }
517
}