varnish-cache/lib/libvcc/vcc_token.c
1
/*-
2
 * Copyright (c) 2006 Verdens Gang AS
3
 * Copyright (c) 2006-2011 Varnish Software AS
4
 * All rights reserved.
5
 *
6
 * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
21
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
 * SUCH DAMAGE.
28
 */
29
30
#include "config.h"
31
32
#include <stdlib.h>
33
#include <string.h>
34
35
#include "vcc_compile.h"
36
37
#include "vct.h"
38
39
/*--------------------------------------------------------------------*/
40
41
void
42 110
vcc_ErrToken(const struct vcc *tl, const struct token *t)
43
{
44
45 110
        if (t->tok == EOI)
46 0
                VSB_printf(tl->sb, "end of input");
47 110
        else if (t->tok == CSRC)
48 0
                VSB_printf(tl->sb, "C{ ... }C");
49
        else
50 110
                VSB_printf(tl->sb, "'%.*s'", PF(t));
51 110
}
52
53
void
54 0
vcc__ErrInternal(struct vcc *tl, const char *func, unsigned line)
55
{
56
57 0
        VSB_printf(tl->sb, "VCL compiler internal error at %s():%u\n",
58
            func, line);
59 0
        tl->err = 1;
60 0
}
61
62
/*--------------------------------------------------------------------
63
 * Find start of source-line of token
64
 */
65
66
static void
67 752
vcc_iline(const struct token *t, const char **ll, int tail)
68
{
69
        const char *p, *b, *x;
70
71 752
        b = t->src->b;
72 752
        if (ll != NULL)
73 752
                *ll = b;
74 752
        x = tail ? t->e - 1 : t->b;
75 515000
        for (p = b; p < x; p++) {
76 514248
                if (*p == '\n') {
77 12374
                        if (ll != NULL)
78 12374
                                *ll = p + 1;
79
                }
80
        }
81 752
}
82
83
/*--------------------------------------------------------------------
84
 * Find and print src+line+pos of this token
85
 */
86
87
static void
88 28374
vcc_icoord(struct vsb *vsb, const struct token *t, int tail)
89
{
90
        unsigned lin, pos;
91
        const char *p, *b, *x;
92
93 28374
        lin = 1;
94 28374
        pos = 0;
95 28374
        b = t->src->b;
96 28374
        x = tail ? t->e - 1 : t->b;
97 93091654
        for (p = b; p < x; p++) {
98 93063280
                if (*p == '\n') {
99 3045852
                        lin++;
100 3045852
                        pos = 0;
101 90017428
                } else if (*p == '\t') {
102 30420
                        pos &= ~7;
103 30420
                        pos += 8;
104
                } else
105 89987008
                        pos++;
106
        }
107 28374
        VSB_printf(vsb, "(");
108 28374
        if (tail < 2)
109 28296
                VSB_printf(vsb, "'%s' Line %u ", t->src->name, lin);
110 28374
        VSB_printf(vsb, "Pos %u)", pos + 1);
111 28374
}
112
113
/*--------------------------------------------------------------------*/
114
115
void
116 27622
vcc_Coord(const struct vcc *tl, struct vsb *vsb, const struct token *t)
117
{
118
119 27622
        if (t == NULL)
120 27622
                t = tl->t;
121 27622
        vcc_icoord(vsb, t, 0);
122 27622
}
123
124
/*--------------------------------------------------------------------
125
 * Output one line of source code, starting at 'l' and ending at the
126
 * first NL or 'le'.
127
 */
128
129
static void
130 674
vcc_quoteline(const struct vcc *tl, const char *l, const char *le)
131
{
132
        const char *p;
133
        unsigned x, y;
134
135 674
        x = y = 0;
136 24092
        for (p = l; p < le && *p != '\n'; p++) {
137 23418
                if (*p == '\t') {
138 896
                        y &= ~7;
139 896
                        y += 8;
140 8960
                        while (x < y) {
141 7168
                                VSB_putc(tl->sb, ' ');
142 7168
                                x++;
143
                        }
144
                } else {
145 22522
                        x++;
146 22522
                        y++;
147 22522
                        VSB_putc(tl->sb, *p);
148
                }
149
        }
150 674
        VSB_putc(tl->sb, '\n');
151 674
}
152
153
/*--------------------------------------------------------------------
154
 * Output a marker line for a sourceline starting at 'l' and ending at
155
 * the first NL or 'le'.  Characters between 'b' and 'e' are marked.
156
 */
157
158
static void
159 674
vcc_markline(const struct vcc *tl, const char *l, const char *le,
160
    const char *b, const char *e)
161
{
162
        const char *p;
163
        unsigned x, y;
164
        char c;
165
166 674
        x = y = 0;
167 24092
        for (p = l; p < le && *p != '\n'; p++) {
168 23418
                if (p >= b && p < e)
169 4876
                        c = '#';
170
                else
171 18542
                        c = '-';
172
173 23418
                if (*p == '\t') {
174 896
                        y &= ~7;
175 896
                        y += 8;
176
                } else
177 22522
                        y++;
178 76526
                while (x < y) {
179 29690
                        VSB_putc(tl->sb, c);
180 29690
                        x++;
181
                }
182
        }
183 674
        VSB_putc(tl->sb, '\n');
184 674
}
185
186
/*--------------------------------------------------------------------*/
187
/* XXX: should take first+last token */
188
189
void
190 80
vcc_ErrWhere2(struct vcc *tl, const struct token *t, const struct token *t2)
191
{
192
        const char  *l1, *l2, *l3;
193
194 80
        if (t == NULL) {
195 2
                vcc_ErrWhere(tl, t2);
196 2
                return;
197
        }
198 78
        vcc_iline(t, &l1, 0);
199 78
        t2 = VTAILQ_PREV(t2, tokenhead, list);
200 78
        vcc_iline(t2, &l2, 1);
201
202
203 78
        if (l1 == l2) {
204 78
                vcc_icoord(tl->sb, t, 0);
205 78
                VSB_cat(tl->sb, " -- ");
206 78
                vcc_icoord(tl->sb, t2, 2);
207 78
                VSB_putc(tl->sb, '\n');
208
                /* Two tokens on same line */
209 78
                vcc_quoteline(tl, l1, t->src->e);
210 78
                vcc_markline(tl, l1, t->src->e, t->b, t2->e);
211
        } else {
212
                /* Two tokens different lines */
213 0
                l3 = strchr(l1, '\n');
214 0
                AN(l3);
215
                /* XXX: t had better be before t2 */
216 0
                vcc_icoord(tl->sb, t, 0);
217 0
                if (l3 + 1 == l2) {
218 0
                        VSB_cat(tl->sb, " -- ");
219 0
                        vcc_icoord(tl->sb, t2, 1);
220
                }
221 0
                VSB_putc(tl->sb, '\n');
222 0
                vcc_quoteline(tl, l1, t->src->e);
223 0
                vcc_markline(tl, l1, t->src->e, t->b, t2->e);
224 0
                if (l3 + 1 != l2) {
225 0
                        VSB_cat(tl->sb, "[...]\n");
226 0
                        vcc_icoord(tl->sb, t2, 1);
227 0
                        VSB_putc(tl->sb, '\n');
228
                }
229 0
                vcc_quoteline(tl, l2, t->src->e);
230 0
                vcc_markline(tl, l2, t->src->e, t->b, t2->e);
231
        }
232 78
        VSB_putc(tl->sb, '\n');
233 78
        tl->err = 1;
234
}
235
236
void
237 596
vcc_ErrWhere(struct vcc *tl, const struct token *t)
238
{
239
        const char  *l1;
240
241 596
        vcc_iline(t, &l1, 0);
242 596
        vcc_icoord(tl->sb, t, 0);
243 596
        VSB_putc(tl->sb, '\n');
244 596
        vcc_quoteline(tl, l1, t->src->e);
245 596
        vcc_markline(tl, l1, t->src->e, t->b, t->e);
246 596
        VSB_putc(tl->sb, '\n');
247 596
        tl->err = 1;
248 596
}
249
250
/*--------------------------------------------------------------------*/
251
252
void
253 902258
vcc_NextToken(struct vcc *tl)
254
{
255
256 902258
        tl->t = VTAILQ_NEXT(tl->t, list);
257 902258
        if (tl->t == NULL) {
258 0
                VSB_printf(tl->sb,
259
                    "Ran out of input, something is missing or"
260
                    " maybe unbalanced (...) or {...}\n");
261 0
                tl->err = 1;
262 0
                return;
263
        }
264
}
265
266
void
267 439274
vcc__Expect(struct vcc *tl, unsigned tok, unsigned line)
268
{
269 439274
        if (tl->t->tok == tok)
270 439254
                return;
271 20
        VSB_printf(tl->sb, "Expected %s got ", vcl_tnames[tok]);
272 20
        vcc_ErrToken(tl, tl->t);
273 20
        VSB_printf(tl->sb, "\n(program line %u), at\n", line);
274 20
        vcc_ErrWhere(tl, tl->t);
275
}
276
277
/*--------------------------------------------------------------------
278
 * Compare ID token to string, return true of match
279
 */
280
281
int
282 1353848
vcc_IdIs(const struct token *t, const char *p)
283
{
284
        const char *q;
285
286 1353848
        assert(t->tok == ID);
287 1832700
        for (q = t->b; q < t->e && *p != '\0'; p++, q++)
288 1726166
                if (*q != *p)
289 1247314
                        return (0);
290 106534
        if (q != t->e || *p != '\0')
291 444
                return (0);
292 106090
        return (1);
293
}
294
295
/*--------------------------------------------------------------------
296
 * Check that we have a Varnish identifier
297
 */
298
299
void
300 30452
vcc_ExpectVid(struct vcc *tl, const char *what)
301
{
302
        const char *bad;
303
304 30452
        ExpectErr(tl, ID);
305 30450
        ERRCHK(tl);
306
307 30450
        bad = VCT_invalid_name(tl->t->b, tl->t->e);
308 30450
        if (bad != NULL) {
309 6
                VSB_printf(tl->sb, "Name of %s, ", what);
310 6
                vcc_ErrToken(tl, tl->t);
311 6
                VSB_printf(tl->sb,
312 6
                    ", contains illegal character '%c'\n", *bad);
313 6
                vcc_ErrWhere(tl, tl->t);
314 6
                return;
315
        }
316
}
317
318
/*--------------------------------------------------------------------
319
 * Decode a string
320
 */
321
322
static int
323 60320
vcc_decstr(struct vcc *tl)
324
{
325
        char *q;
326
        unsigned int l;
327
328 60320
        assert(tl->t->tok == CSTR);
329 60320
        l = (tl->t->e - tl->t->b) - 2;
330 60320
        tl->t->dec = TlAlloc(tl, l + 1);
331 60320
        assert(tl->t->dec != NULL);
332 60320
        q = tl->t->dec;
333 60320
        memcpy(q, tl->t->b + 1, l);
334 60320
        q[l] = '\0';
335 60320
        return (0);
336
}
337
338
/*--------------------------------------------------------------------
339
 * Add a token to the token list.
340
 */
341
342
void
343 1047728
vcc_AddToken(struct vcc *tl, unsigned tok, const char *b, const char *e)
344
{
345
        struct token *t;
346
347 1047728
        t = TlAlloc(tl, sizeof *t);
348 1047728
        assert(t != NULL);
349 1047728
        t->tok = tok;
350 1047728
        t->b = b;
351 1047728
        t->e = e;
352 1047728
        t->src = tl->src;
353 1047728
        if (tl->t != NULL)
354 1045584
                VTAILQ_INSERT_AFTER(&tl->tokens, tl->t, t, list);
355
        else
356 2144
                VTAILQ_INSERT_TAIL(&tl->tokens, t, list);
357 1047728
        tl->t = t;
358 1047728
}
359
360
/*--------------------------------------------------------------------
361
 * Lexical analysis and token generation
362
 */
363
364
void
365 4320
vcc_Lexer(struct vcc *tl, struct source *sp)
366
{
367
        const char *p, *q;
368
        unsigned u;
369
370 4320
        tl->src = sp;
371 2950744
        for (p = sp->b; p < sp->e; ) {
372
373
                /* Skip any whitespace */
374 2942116
                if (vct_isspace(*p)) {
375 1847252
                        p++;
376 1847252
                        continue;
377
                }
378
379
                /* Skip '#.*\n' comments */
380 1094864
                if (*p == '#') {
381 1225950
                        while (p < sp->e && *p != '\n')
382 1174346
                                p++;
383 25802
                        continue;
384
                }
385
386
                /* Skip C-style comments */
387 1069062
                if (*p == '/' && p[1] == '*') {
388 3769508
                        for (q = p + 2; q < sp->e; q++) {
389 3769506
                                if (*q == '/' && q[1] == '*') {
390 2
                                        VSB_printf(tl->sb,
391
                                            "/* ... */ comment contains /*\n");
392 2
                                        vcc_AddToken(tl, EOI, p, p + 2);
393 2
                                        vcc_ErrWhere(tl, tl->t);
394 2
                                        vcc_AddToken(tl, EOI, q, q + 2);
395 2
                                        vcc_ErrWhere(tl, tl->t);
396 2
                                        return;
397
                                }
398 3769504
                                if (*q == '*' && q[1] == '/') {
399 14928
                                        p = q + 2;
400 14928
                                        break;
401
                                }
402
                        }
403 14930
                        if (q < sp->e)
404 14928
                                continue;
405 2
                        vcc_AddToken(tl, EOI, p, p + 2);
406 2
                        VSB_printf(tl->sb,
407
                            "Unterminated /* ... */ comment, starting at\n");
408 2
                        vcc_ErrWhere(tl, tl->t);
409 2
                        return;
410
                }
411
412
                /* Skip C++-style comments */
413 1054130
                if (*p == '/' && p[1] == '/') {
414 352046
                        while (p < sp->e && *p != '\n')
415 334966
                                p++;
416 8540
                        continue;
417
                }
418
419
                /* Recognize inline C-code */
420 1045590
                if (*p == 'C' && p[1] == '{') {
421 1706
                        for (q = p + 2; q < sp->e; q++) {
422 1704
                                if (*q == '}' && q[1] == 'C') {
423 30
                                        vcc_AddToken(tl, CSRC, p, q + 2);
424 30
                                        break;
425
                                }
426
                        }
427 32
                        if (q < sp->e) {
428 30
                                p = q + 2;
429 30
                                continue;
430
                        }
431 2
                        vcc_AddToken(tl, EOI, p, p + 2);
432 2
                        VSB_printf(tl->sb,
433
                            "Unterminated inline C source, starting at\n");
434 2
                        vcc_ErrWhere(tl, tl->t);
435 2
                        return;
436
                }
437
438
                /* Recognize long-strings */
439 1045558
                if (*p == '{' && p[1] == '"') {
440 936460
                        for (q = p + 2; q < sp->e; q++) {
441 936458
                                if (*q == '"' && q[1] == '}') {
442 21394
                                        vcc_AddToken(tl, CSTR, p, q + 2);
443 21394
                                        break;
444
                                }
445
                        }
446 21396
                        if (q < sp->e) {
447 21394
                                p = q + 2;
448 21394
                                u = tl->t->e - tl->t->b;
449 21394
                                u -= 4;         /* {" ... "} */
450 21394
                                tl->t->dec = TlAlloc(tl, u + 1 );
451 21394
                                AN(tl->t->dec);
452 21394
                                memcpy(tl->t->dec, tl->t->b + 2, u);
453 21394
                                tl->t->dec[u] = '\0';
454 21394
                                continue;
455
                        }
456 2
                        vcc_AddToken(tl, EOI, p, p + 2);
457 2
                        VSB_printf(tl->sb,
458
                            "Unterminated long-string, starting at\n");
459 2
                        vcc_ErrWhere(tl, tl->t);
460 2
                        return;
461
                }
462
463
                /* Match for the fixed tokens (see generate.py) */
464 1024162
                u = vcl_fixed_token(p, &q);
465 1024162
                if (u != 0) {
466 568040
                        vcc_AddToken(tl, u, p, q);
467 568040
                        p = q;
468 568040
                        continue;
469
                }
470
471
                /* Match strings */
472 456122
                if (*p == '"') {
473 492544
                        for (q = p + 1; q < sp->e; q++) {
474 492544
                                if (*q == '"') {
475 60320
                                        q++;
476 60320
                                        break;
477
                                }
478 432224
                                if (*q == '\r' || *q == '\n') {
479 2
                                        vcc_AddToken(tl, EOI, p, q);
480 2
                                        VSB_printf(tl->sb,
481
                                            "Unterminated string at\n");
482 2
                                        vcc_ErrWhere(tl, tl->t);
483 2
                                        return;
484
                                }
485
                        }
486 60320
                        vcc_AddToken(tl, CSTR, p, q);
487 60320
                        if (vcc_decstr(tl))
488 0
                                return;
489 60320
                        p = q;
490 60320
                        continue;
491
                }
492
493
                /* Match Identifiers */
494 395800
                if (vct_isident1(*p)) {
495 3272712
                        for (q = p; q < sp->e; q++)
496 3272712
                                if (!vct_isvar(*q))
497 372460
                                        break;
498 372460
                        vcc_AddToken(tl, ID, p, q);
499 372460
                        p = q;
500 372460
                        continue;
501
                }
502
503
                /* Match numbers { [0-9]+ } */
504 23340
                if (vct_isdigit(*p)) {
505 65596
                        for (q = p; q < sp->e; q++)
506 65596
                                if (!vct_isdigit(*q))
507 23338
                                        break;
508 23338
                        if (*q != '.') {
509 18854
                                vcc_AddToken(tl, CNUM, p, q);
510 18854
                                p = q;
511 18854
                                continue;
512
                        }
513 9002
                        for (++q; q < sp->e; q++)
514 9002
                                if (!vct_isdigit(*q))
515 4484
                                        break;
516 4484
                        vcc_AddToken(tl, FNUM, p, q);
517 4484
                        p = q;
518 4484
                        continue;
519
                }
520 2
                vcc_AddToken(tl, EOI, p, p + 1);
521 2
                VSB_printf(tl->sb, "Syntax error at\n");
522 2
                vcc_ErrWhere(tl, tl->t);
523 2
                return;
524
        }
525
}