varnish-cache/lib/libvarnishapi/vxp_lexer.c
0
/*-
1
 * Copyright (c) 2006 Verdens Gang AS
2
 * Copyright (c) 2006-2015 Varnish Software AS
3
 * All rights reserved.
4
 *
5
 * Author: Martin Blix Grydeland <martin@varnish-software.com>
6
 *
7
 * SPDX-License-Identifier: BSD-2-Clause
8
 *
9
 * Redistribution and use in source and binary forms, with or without
10
 * modification, are permitted provided that the following conditions
11
 * are met:
12
 * 1. Redistributions of source code must retain the above copyright
13
 *    notice, this list of conditions and the following disclaimer.
14
 * 2. Redistributions in binary form must reproduce the above copyright
15
 *    notice, this list of conditions and the following disclaimer in the
16
 *    documentation and/or other materials provided with the distribution.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
22
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28
 * SUCH DAMAGE.
29
 *
30
 */
31
32
#include "config.h"
33
34
#include <ctype.h>
35
#include <stdlib.h>
36
#include <string.h>
37
#include <stdint.h>
38
#include <unistd.h> /* for MUSL */
39
40
#include "vdef.h"
41
#include "vas.h"
42
#include "vqueue.h"
43
44
#include "vre.h"
45
#include "vsb.h"
46
47
#include "vxp.h"
48
49
static void
50 30240
vxp_append_token(struct vxp *vxp, unsigned tok, const char *b, const char *e)
51
{
52
        struct token *t;
53
54 30240
        t = vxp_Alloc(vxp, sizeof *t);
55 30240
        AN(t);
56 30240
        t->tok = tok;
57 30240
        t->b = b;
58 30240
        t->e = e;
59 30240
        VTAILQ_INSERT_TAIL(&vxp->tokens, t, list);
60 30240
        vxp->t = t;
61 30240
}
62
63
/* Unquote and unescape string */
64
static void
65 1224
vxp_decstr(struct vxp *vxp)
66
{
67
        const char *b, *e, *p;
68
        char *s;
69 1224
        int esc = 0;
70
71 1224
        assert(vxp->t->tok == VAL);
72
73 1224
        b = vxp->t->b;
74 1224
        e = vxp->t->e;
75 1224
        assert(e - b >= 2);
76 1224
        b++;
77 1224
        e--;
78
79 1224
        s = vxp->t->dec = vxp_Alloc(vxp, (e - b) + 1);
80 1224
        AN(vxp->t->dec);
81 8784
        for (p = b; p < e; p++) {
82 7560
                if (!esc && *p == '\\') {
83 36
                        esc = 1;
84 36
                        continue;
85
                }
86 7524
                esc = 0;
87 7524
                *s++ = *p;
88 7524
        }
89 1224
        *s = '\0';
90 1224
}
91
92
/*
93
 * Build a token list
94
 */
95
96
void
97 6336
vxp_Lexer(struct vxp *vxp)
98
{
99
        const char *p, *q;
100
        unsigned u;
101
        char quote;
102
103 43956
        for (p = vxp->b; p < vxp->e; ) {
104
105
                /* Skip any space or tab */
106 37764
                if (isblank(*p)) {
107 13176
                        p++;
108 13176
                        continue;
109
                }
110
111 24588
                if (*p == '\\' && p[1] == '\n') {
112 72
                        p += 2;
113 72
                        continue;
114
                }
115
116
                /* Skip comments */
117 24516
                if (*p == '#') {
118 6552
                        while (p < vxp->e && *p != '\n')
119 6084
                                p++;
120 468
                        continue;
121
                }
122
123
                /* Match for the fixed tokens */
124 24048
                u = vxp_fixed_token(p, &q);
125 24048
                if (u != 0) {
126 9900
                        AN(q);
127 9900
                        vxp_append_token(vxp, u, p, q);
128 9900
                        p = q;
129 9900
                        continue;
130
                }
131
132
                /* Match quoted strings */
133 14148
                if (*p == '"' || *p == '\'') {
134 1332
                        quote = *p;
135 9972
                        for (q = p + 1; q < vxp->e; q++) {
136 9936
                                if (*q == '\\') {
137 108
                                        q++;
138 108
                                        if (q == vxp->e || *q == '\n')
139 36
                                                break;
140 9900
                                } else if (*q == '\n') {
141 36
                                        break;
142 9792
                                } else if (*q == quote) {
143 1224
                                        q++;
144 1224
                                        quote = '\0';
145 1224
                                        break;
146
                                }
147 8640
                        }
148 1332
                        vxp_append_token(vxp, VAL, p, q);
149 1332
                        if (quote != '\0') {
150 108
                                VSB_cat(vxp->sb, "Unterminated string ");
151 108
                                vxp_ErrWhere(vxp, vxp->t, q - p);
152 108
                                return;
153
                        }
154 1224
                        vxp_decstr(vxp);
155 1224
                        p = q;
156 1224
                        continue;
157
                }
158
159
                /* Match bareword */
160 12816
                if (isword(*p)) {
161 81792
                        for (q = p; q < vxp->e; q++)
162 77508
                                if (!isword(*q))
163 7416
                                        break;
164 11700
                        vxp_append_token(vxp, VAL, p, q);
165 11700
                        vxp->t->dec = vxp_Alloc(vxp, (q - p) + 1);
166 11700
                        AN(vxp->t->dec);
167 11700
                        memcpy(vxp->t->dec, p, q - p);
168 11700
                        vxp->t->dec[q - p] = '\0';
169 11700
                        p = q;
170 11700
                        continue;
171
                }
172
173
                /* On to the next query */
174 1116
                if (*p == '\n') {
175 1080
                        vxp_append_token(vxp, EOI, p, p + 1);
176 1080
                        p++;
177 1080
                        continue;
178
                }
179
180
                /* Error */
181 36
                vxp_append_token(vxp, EOI, p, p + 1);
182 36
                VSB_cat(vxp->sb, "Syntax error ");
183 36
                vxp_ErrWhere(vxp, vxp->t, q - p);
184 36
                return;
185
        }
186
187
        /* Finished */
188 6192
        vxp_append_token(vxp, EOI, vxp->e, vxp->e);
189 6336
}
190
191
#ifdef VXP_DEBUG
192
#include <stdio.h>
193
void
194 108
vxp_PrintTokens(const struct vxp *vxp)
195
{
196
        struct token *t;
197
198 108
        fprintf(stderr, "Token list:\n");
199 108
        fprintf(stderr, "  %-5s %-20s %s\n", "TOK", "SUBSTR", "DECODED");
200 1404
        VTAILQ_FOREACH(t, &vxp->tokens, list) {
201 1296
                fprintf(stderr, "  ");
202 1296
                fprintf(stderr, "%-5s", vxp_tnames[t->tok]);
203 1296
                fprintf(stderr, " %-20.*s", (unsigned)(t->e - t->b), t->b);
204 1296
                if (t->dec)
205 684
                        fprintf(stderr, " '%s'", t->dec);
206 1296
                fprintf(stderr, "\n");
207 1296
        }
208 108
        fprintf(stderr, "\n");
209 108
}
210
#endif /* VXP_DEBUG */