varnish-cache/lib/libvcc/vcc_token.c
1
/*-
2
 * Copyright (c) 2006 Verdens Gang AS
3
 * Copyright (c) 2006-2011 Varnish Software AS
4
 * All rights reserved.
5
 *
6
 * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
21
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
 * SUCH DAMAGE.
28
 */
29
30
#include "config.h"
31
32
#include <stdlib.h>
33
#include <string.h>
34
35
#include "vcc_compile.h"
36
37
#include "vct.h"
38
39
/*--------------------------------------------------------------------*/
40
41
void
42 41
vcc_ErrToken(const struct vcc *tl, const struct token *t)
43
{
44
45 41
        if (t->tok == EOI)
46 0
                VSB_printf(tl->sb, "end of input");
47 41
        else if (t->tok == CSRC)
48 0
                VSB_printf(tl->sb, "C{ ... }C");
49
        else
50 41
                VSB_printf(tl->sb, "'%.*s'", PF(t));
51 41
}
52
53
void
54 0
vcc__ErrInternal(struct vcc *tl, const char *func, unsigned line)
55
{
56
57 0
        VSB_printf(tl->sb, "VCL compiler internal error at %s():%u\n",
58
            func, line);
59 0
        tl->err = 1;
60 0
}
61
62
/*--------------------------------------------------------------------
63
 * Find start of source-line of token
64
 */
65
66
static void
67 199
vcc_iline(const struct token *t, const char **ll, int tail)
68
{
69
        const char *p, *b, *x;
70
71 199
        b = t->src->b;
72 199
        if (ll != NULL)
73 199
                *ll = b;
74 199
        x = tail ? t->e - 1 : t->b;
75 14302
        for (p = b; p < x; p++) {
76 14103
                if (*p == '\n') {
77 782
                        if (ll != NULL)
78 782
                                *ll = p + 1;
79
                }
80
        }
81 199
}
82
83
/*--------------------------------------------------------------------
84
 * Find and print src+line+pos of this token
85
 */
86
87
static void
88 11926
vcc_icoord(struct vsb *vsb, const struct token *t, int tail)
89
{
90
        unsigned lin, pos;
91
        const char *p, *b, *x;
92
93 11926
        lin = 1;
94 11926
        pos = 0;
95 11926
        b = t->src->b;
96 11926
        x = tail ? t->e - 1 : t->b;
97 37393970
        for (p = b; p < x; p++) {
98 37382044
                if (*p == '\n') {
99 1230584
                        lin++;
100 1230584
                        pos = 0;
101 36151460
                } else if (*p == '\t') {
102 27786
                        pos &= ~7;
103 27786
                        pos += 8;
104
                } else
105 36123674
                        pos++;
106
        }
107 11926
        VSB_printf(vsb, "('%s' Line %u Pos %u)", t->src->name, lin, pos + 1);
108 11926
}
109
110
/*--------------------------------------------------------------------*/
111
112
void
113 11727
vcc_Coord(const struct vcc *tl, struct vsb *vsb, const struct token *t)
114
{
115
116 11727
        if (t == NULL)
117 11727
                t = tl->t;
118 11727
        vcc_icoord(vsb, t, 0);
119 11727
}
120
121
/*--------------------------------------------------------------------
122
 * Output one line of source code, starting at 'l' and ending at the
123
 * first NL or 'le'.
124
 */
125
126
static void
127 176
vcc_quoteline(const struct vcc *tl, const char *l, const char *le)
128
{
129
        const char *p;
130
        unsigned x, y;
131
132 176
        x = y = 0;
133 5138
        for (p = l; p < le && *p != '\n'; p++) {
134 4962
                if (*p == '\t') {
135 263
                        y &= ~7;
136 263
                        y += 8;
137 2630
                        while (x < y) {
138 2104
                                VSB_putc(tl->sb, ' ');
139 2104
                                x++;
140
                        }
141
                } else {
142 4699
                        x++;
143 4699
                        y++;
144 4699
                        VSB_putc(tl->sb, *p);
145
                }
146
        }
147 176
        VSB_putc(tl->sb, '\n');
148 176
}
149
150
/*--------------------------------------------------------------------
151
 * Output a marker line for a sourceline starting at 'l' and ending at
152
 * the first NL or 'le'.  Characters between 'b' and 'e' are marked.
153
 */
154
155
static void
156 176
vcc_markline(const struct vcc *tl, const char *l, const char *le,
157
    const char *b, const char *e)
158
{
159
        const char *p;
160
        unsigned x, y;
161
        char c;
162
163 176
        x = y = 0;
164 5138
        for (p = l; p < le && *p != '\n'; p++) {
165 4962
                if (p >= b && p < e)
166 1227
                        c = '#';
167
                else
168 3735
                        c = '-';
169
170 4962
                if (*p == '\t') {
171 263
                        y &= ~7;
172 263
                        y += 8;
173
                } else
174 4699
                        y++;
175 16727
                while (x < y) {
176 6803
                        VSB_putc(tl->sb, c);
177 6803
                        x++;
178
                }
179
        }
180 176
        VSB_putc(tl->sb, '\n');
181 176
}
182
183
/*--------------------------------------------------------------------*/
184
/* XXX: should take first+last token */
185
186
void
187 24
vcc_ErrWhere2(struct vcc *tl, const struct token *t, const struct token *t2)
188
{
189
        const char  *l1, *l2, *l3;
190
191 24
        if (t == NULL) {
192 1
                vcc_ErrWhere(tl, t2);
193 25
                return;
194
        }
195 23
        vcc_iline(t, &l1, 0);
196 23
        t2 = VTAILQ_PREV(t2, tokenhead, list);
197 23
        vcc_iline(t2, &l2, 1);
198
199
200 23
        if (l1 == l2) {
201 23
                vcc_icoord(tl->sb, t, 0);
202 23
                VSB_cat(tl->sb, " -- ");
203 23
                vcc_icoord(tl->sb, t2, 1);
204 23
                VSB_putc(tl->sb, '\n');
205
                /* Two tokens on same line */
206 23
                vcc_quoteline(tl, l1, t->src->e);
207 23
                vcc_markline(tl, l1, t->src->e, t->b, t2->e);
208
        } else {
209
                /* Two tokens different lines */
210 0
                l3 = strchr(l1, '\n');
211 0
                AN(l3);
212
                /* XXX: t had better be before t2 */
213 0
                vcc_icoord(tl->sb, t, 0);
214 0
                if (l3 + 1 == l2) {
215 0
                        VSB_cat(tl->sb, " -- ");
216 0
                        vcc_icoord(tl->sb, t2, 1);
217
                }
218 0
                VSB_putc(tl->sb, '\n');
219 0
                vcc_quoteline(tl, l1, t->src->e);
220 0
                vcc_markline(tl, l1, t->src->e, t->b, t2->e);
221 0
                if (l3 + 1 != l2) {
222 0
                        VSB_cat(tl->sb, "[...]\n");
223 0
                        vcc_icoord(tl->sb, t2, 1);
224 0
                        VSB_putc(tl->sb, '\n');
225
                }
226 0
                vcc_quoteline(tl, l2, t->src->e);
227 0
                vcc_markline(tl, l2, t->src->e, t->b, t2->e);
228
        }
229 23
        VSB_putc(tl->sb, '\n');
230 23
        tl->err = 1;
231
}
232
233
void
234 153
vcc_ErrWhere(struct vcc *tl, const struct token *t)
235
{
236
        const char  *l1;
237
238 153
        vcc_iline(t, &l1, 0);
239 153
        vcc_icoord(tl->sb, t, 0);
240 153
        VSB_putc(tl->sb, '\n');
241 153
        vcc_quoteline(tl, l1, t->src->e);
242 153
        vcc_markline(tl, l1, t->src->e, t->b, t->e);
243 153
        VSB_putc(tl->sb, '\n');
244 153
        tl->err = 1;
245 153
}
246
247
/*--------------------------------------------------------------------*/
248
249
void
250 368736
vcc_NextToken(struct vcc *tl)
251
{
252
253 368736
        tl->t = VTAILQ_NEXT(tl->t, list);
254 368736
        if (tl->t == NULL) {
255 0
                VSB_printf(tl->sb,
256
                    "Ran out of input, something is missing or"
257
                    " maybe unbalanced (...) or {...}\n");
258 0
                tl->err = 1;
259 0
                return;
260
        }
261
}
262
263
void
264 193153
vcc__Expect(struct vcc *tl, unsigned tok, unsigned line)
265
{
266 193153
        if (tl->t->tok == tok)
267 386296
                return;
268 10
        VSB_printf(tl->sb, "Expected %s got ", vcl_tnames[tok]);
269 10
        vcc_ErrToken(tl, tl->t);
270 10
        VSB_printf(tl->sb, "\n(program line %u), at\n", line);
271 10
        vcc_ErrWhere(tl, tl->t);
272
}
273
274
/*--------------------------------------------------------------------
275
 * Compare ID token to string, return true of match
276
 */
277
278
int
279 669179
vcc_IdIs(const struct token *t, const char *p)
280
{
281
        const char *q;
282
283 669179
        assert(t->tok == ID);
284 1054767
        for (q = t->b; q < t->e && *p != '\0'; p++, q++)
285 974679
                if (*q != *p)
286 589091
                        return (0);
287 80088
        if (q != t->e || *p != '\0')
288 254
                return (0);
289 79834
        return (1);
290
}
291
292
/*--------------------------------------------------------------------
293
 * Check that we have a Varnish identifier
294
 */
295
296
void
297 13023
vcc_ExpectVid(struct vcc *tl, const char *what)
298
{
299
        const char *bad;
300
301 13023
        ExpectErr(tl, ID);
302 13022
        ERRCHK(tl);
303
304 13022
        bad = VCT_invalid_name(tl->t->b, tl->t->e);
305 13022
        if (bad != NULL) {
306 3
                VSB_printf(tl->sb, "Name of %s, ", what);
307 3
                vcc_ErrToken(tl, tl->t);
308 3
                VSB_printf(tl->sb,
309 3
                    ", contains illegal character '%c'\n", *bad);
310 3
                vcc_ErrWhere(tl, tl->t);
311 3
                return;
312
        }
313
}
314
315
/*--------------------------------------------------------------------
316
 * Decode a string
317
 */
318
319
static int
320 24573
vcc_decstr(struct vcc *tl)
321
{
322
        char *q;
323
        unsigned int l;
324
325 24573
        assert(tl->t->tok == CSTR);
326 24573
        l = (tl->t->e - tl->t->b) - 2;
327 24573
        tl->t->dec = TlAlloc(tl, l + 1);
328 24573
        assert(tl->t->dec != NULL);
329 24573
        q = tl->t->dec;
330 24573
        memcpy(q, tl->t->b + 1, l);
331 24573
        q[l] = '\0';
332 24573
        return (0);
333
}
334
335
/*--------------------------------------------------------------------
336
 * Add a token to the token list.
337
 */
338
339
void
340 424450
vcc_AddToken(struct vcc *tl, unsigned tok, const char *b, const char *e)
341
{
342
        struct token *t;
343
344 424450
        t = TlAlloc(tl, sizeof *t);
345 424450
        assert(t != NULL);
346 424450
        t->tok = tok;
347 424450
        t->b = b;
348 424450
        t->e = e;
349 424450
        t->src = tl->src;
350 424450
        if (tl->t != NULL)
351 423545
                VTAILQ_INSERT_AFTER(&tl->tokens, tl->t, t, list);
352
        else
353 905
                VTAILQ_INSERT_TAIL(&tl->tokens, t, list);
354 424450
        tl->t = t;
355 424450
}
356
357
/*--------------------------------------------------------------------
358
 * Lexical analysis and token generation
359
 */
360
361
void
362 1826
vcc_Lexer(struct vcc *tl, struct source *sp)
363
{
364
        const char *p, *q;
365
        unsigned u;
366
367 1826
        tl->src = sp;
368 1167087
        for (p = sp->b; p < sp->e; ) {
369
370
                /* Skip any whitespace */
371 1163441
                if (vct_isspace(*p)) {
372 720038
                        p++;
373 720038
                        continue;
374
                }
375
376
                /* Skip '#.*\n' comments */
377 443403
                if (*p == '#') {
378 516076
                        while (p < sp->e && *p != '\n')
379 494362
                                p++;
380 10857
                        continue;
381
                }
382
383
                /* Skip C-style comments */
384 432546
                if (*p == '/' && p[1] == '*') {
385 1559831
                        for (q = p + 2; q < sp->e; q++) {
386 1559830
                                if (*q == '/' && q[1] == '*') {
387 1
                                        VSB_printf(tl->sb,
388
                                            "/* ... */ comment contains /*\n");
389 1
                                        vcc_AddToken(tl, EOI, p, p + 2);
390 1
                                        vcc_ErrWhere(tl, tl->t);
391 1
                                        vcc_AddToken(tl, EOI, q, q + 2);
392 1
                                        vcc_ErrWhere(tl, tl->t);
393 1
                                        return;
394
                                }
395 1559829
                                if (*q == '*' && q[1] == '/') {
396 5396
                                        p = q + 2;
397 5396
                                        break;
398
                                }
399
                        }
400 5397
                        if (q < sp->e)
401 5396
                                continue;
402 1
                        vcc_AddToken(tl, EOI, p, p + 2);
403 1
                        VSB_printf(tl->sb,
404
                            "Unterminated /* ... */ comment, starting at\n");
405 1
                        vcc_ErrWhere(tl, tl->t);
406 1
                        return;
407
                }
408
409
                /* Skip C++-style comments */
410 427148
                if (*p == '/' && p[1] == '/') {
411 148436
                        while (p < sp->e && *p != '\n')
412 141236
                                p++;
413 3600
                        continue;
414
                }
415
416
                /* Recognize inline C-code */
417 423548
                if (*p == 'C' && p[1] == '{') {
418 779
                        for (q = p + 2; q < sp->e; q++) {
419 778
                                if (*q == '}' && q[1] == 'C') {
420 13
                                        vcc_AddToken(tl, CSRC, p, q + 2);
421 13
                                        break;
422
                                }
423
                        }
424 14
                        if (q < sp->e) {
425 13
                                p = q + 2;
426 13
                                continue;
427
                        }
428 1
                        vcc_AddToken(tl, EOI, p, p + 2);
429 1
                        VSB_printf(tl->sb,
430
                            "Unterminated inline C source, starting at\n");
431 1
                        vcc_ErrWhere(tl, tl->t);
432 1
                        return;
433
                }
434
435
                /* Recognize long-strings */
436 423534
                if (*p == '{' && p[1] == '"') {
437 396754
                        for (q = p + 2; q < sp->e; q++) {
438 396753
                                if (*q == '"' && q[1] == '}') {
439 9027
                                        vcc_AddToken(tl, CSTR, p, q + 2);
440 9027
                                        break;
441
                                }
442
                        }
443 9028
                        if (q < sp->e) {
444 9027
                                p = q + 2;
445 9027
                                u = tl->t->e - tl->t->b;
446 9027
                                u -= 4;         /* {" ... "} */
447 9027
                                tl->t->dec = TlAlloc(tl, u + 1 );
448 9027
                                AN(tl->t->dec);
449 9027
                                memcpy(tl->t->dec, tl->t->b + 2, u);
450 9027
                                tl->t->dec[u] = '\0';
451 9027
                                continue;
452
                        }
453 1
                        vcc_AddToken(tl, EOI, p, p + 2);
454 1
                        VSB_printf(tl->sb,
455
                            "Unterminated long-string, starting at\n");
456 1
                        vcc_ErrWhere(tl, tl->t);
457 1
                        return;
458
                }
459
460
                /* Match for the fixed tokens (see generate.py) */
461 414506
                u = vcl_fixed_token(p, &q);
462 414506
                if (u != 0) {
463 228546
                        vcc_AddToken(tl, u, p, q);
464 228546
                        p = q;
465 228546
                        continue;
466
                }
467
468
                /* Match strings */
469 185960
                if (*p == '"') {
470 193335
                        for (q = p + 1; q < sp->e; q++) {
471 193335
                                if (*q == '"') {
472 24573
                                        q++;
473 24573
                                        break;
474
                                }
475 168762
                                if (*q == '\r' || *q == '\n') {
476 1
                                        vcc_AddToken(tl, EOI, p, q);
477 1
                                        VSB_printf(tl->sb,
478
                                            "Unterminated string at\n");
479 1
                                        vcc_ErrWhere(tl, tl->t);
480 1
                                        return;
481
                                }
482
                        }
483 24573
                        vcc_AddToken(tl, CSTR, p, q);
484 24573
                        if (vcc_decstr(tl))
485 0
                                return;
486 24573
                        p = q;
487 24573
                        continue;
488
                }
489
490
                /* Match Identifiers */
491 161386
                if (vct_isident1(*p)) {
492 1330340
                        for (q = p; q < sp->e; q++)
493 1330340
                                if (!vct_isvar(*q))
494 151480
                                        break;
495 151480
                        vcc_AddToken(tl, ID, p, q);
496 151480
                        p = q;
497 151480
                        continue;
498
                }
499
500
                /* Match numbers { [0-9]+ } */
501 9906
                if (vct_isdigit(*p)) {
502 25927
                        for (q = p; q < sp->e; q++)
503 25927
                                if (!vct_isdigit(*q))
504 9905
                                        break;
505 9905
                        vcc_AddToken(tl, CNUM, p, q);
506 9905
                        p = q;
507 9905
                        continue;
508
                }
509 1
                vcc_AddToken(tl, EOI, p, p + 1);
510 1
                VSB_printf(tl->sb, "Syntax error at\n");
511 1
                vcc_ErrWhere(tl, tl->t);
512 1
                return;
513
        }
514
}