varnish-cache/lib/libvarnishapi/vxp_lexer.c
0
/*-
1
 * Copyright (c) 2006 Verdens Gang AS
2
 * Copyright (c) 2006-2015 Varnish Software AS
3
 * All rights reserved.
4
 *
5
 * Author: Martin Blix Grydeland <martin@varnish-software.com>
6
 *
7
 * SPDX-License-Identifier: BSD-2-Clause
8
 *
9
 * Redistribution and use in source and binary forms, with or without
10
 * modification, are permitted provided that the following conditions
11
 * are met:
12
 * 1. Redistributions of source code must retain the above copyright
13
 *    notice, this list of conditions and the following disclaimer.
14
 * 2. Redistributions in binary form must reproduce the above copyright
15
 *    notice, this list of conditions and the following disclaimer in the
16
 *    documentation and/or other materials provided with the distribution.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
22
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28
 * SUCH DAMAGE.
29
 *
30
 */
31
32
#include "config.h"
33
34
#include <ctype.h>
35
#include <stdlib.h>
36
#include <string.h>
37
#include <stdint.h>
38
#include <unistd.h> /* for MUSL */
39
40
#include "vdef.h"
41
#include "vas.h"
42
#include "vqueue.h"
43
44
#include "vre.h"
45
#include "vsb.h"
46
47
#include "vxp.h"
48
49
static void
50 20700
vxp_append_token(struct vxp *vxp, unsigned tok, const char *b, const char *e)
51
{
52
        struct token *t;
53
54 20700
        t = vxp_Alloc(vxp, sizeof *t);
55 20700
        AN(t);
56 20700
        t->tok = tok;
57 20700
        t->b = b;
58 20700
        t->e = e;
59 20700
        VTAILQ_INSERT_TAIL(&vxp->tokens, t, list);
60 20700
        vxp->t = t;
61 20700
}
62
63
/* Unquote and unescape string */
64
static void
65 850
vxp_decstr(struct vxp *vxp)
66
{
67
        const char *b, *e, *p;
68
        char *s;
69 850
        int esc = 0;
70
71 850
        assert(vxp->t->tok == VAL);
72
73 850
        b = vxp->t->b;
74 850
        e = vxp->t->e;
75 850
        assert(e - b >= 2);
76 850
        b++;
77 850
        e--;
78
79 850
        s = vxp->t->dec = vxp_Alloc(vxp, (e - b) + 1);
80 850
        AN(vxp->t->dec);
81 6100
        for (p = b; p < e; p++) {
82 5250
                if (!esc && *p == '\\') {
83 25
                        esc = 1;
84 25
                        continue;
85
                }
86 5225
                esc = 0;
87 5225
                *s++ = *p;
88 5225
        }
89 850
        *s = '\0';
90 850
}
91
92
/*
93
 * Build a token list
94
 */
95
96
void
97 4325
vxp_Lexer(struct vxp *vxp)
98
{
99
        const char *p, *q;
100
        unsigned u;
101
        char quote;
102
103 30175
        for (p = vxp->b; p < vxp->e; ) {
104
105
                /* Skip any space or tab */
106 25950
                if (isblank(*p)) {
107 9100
                        p++;
108 9100
                        continue;
109
                }
110
111 16850
                if (*p == '\\' && p[1] == '\n') {
112 50
                        p += 2;
113 50
                        continue;
114
                }
115
116
                /* Skip comments */
117 16800
                if (*p == '#') {
118 4550
                        while (p < vxp->e && *p != '\n')
119 4225
                                p++;
120 325
                        continue;
121
                }
122
123
                /* Match for the fixed tokens */
124 16475
                u = vxp_fixed_token(p, &q);
125 16475
                if (u != 0) {
126 6800
                        AN(q);
127 6800
                        vxp_append_token(vxp, u, p, q);
128 6800
                        p = q;
129 6800
                        continue;
130
                }
131
132
                /* Match quoted strings */
133 9675
                if (*p == '"' || *p == '\'') {
134 925
                        quote = *p;
135 6925
                        for (q = p + 1; q < vxp->e; q++) {
136 6900
                                if (*q == '\\') {
137 75
                                        q++;
138 75
                                        if (q == vxp->e || *q == '\n')
139 25
                                                break;
140 6875
                                } else if (*q == '\n') {
141 25
                                        break;
142 6800
                                } else if (*q == quote) {
143 850
                                        q++;
144 850
                                        quote = '\0';
145 850
                                        break;
146
                                }
147 6000
                        }
148 925
                        vxp_append_token(vxp, VAL, p, q);
149 925
                        if (quote != '\0') {
150 75
                                VSB_cat(vxp->sb, "Unterminated string ");
151 75
                                vxp_ErrWhere(vxp, vxp->t, q - p);
152 75
                                return;
153
                        }
154 850
                        vxp_decstr(vxp);
155 850
                        p = q;
156 850
                        continue;
157
                }
158
159
                /* Match bareword */
160 8750
                if (isword(*p)) {
161 55475
                        for (q = p; q < vxp->e; q++)
162 52575
                                if (!isword(*q))
163 5075
                                        break;
164 7975
                        vxp_append_token(vxp, VAL, p, q);
165 7975
                        vxp->t->dec = vxp_Alloc(vxp, (q - p) + 1);
166 7975
                        AN(vxp->t->dec);
167 7975
                        memcpy(vxp->t->dec, p, q - p);
168 7975
                        vxp->t->dec[q - p] = '\0';
169 7975
                        p = q;
170 7975
                        continue;
171
                }
172
173
                /* On to the next query */
174 775
                if (*p == '\n') {
175 750
                        vxp_append_token(vxp, EOI, p, p + 1);
176 750
                        p++;
177 750
                        continue;
178
                }
179
180
                /* Error */
181 25
                vxp_append_token(vxp, EOI, p, p + 1);
182 25
                VSB_cat(vxp->sb, "Syntax error ");
183 25
                vxp_ErrWhere(vxp, vxp->t, q - p);
184 25
                return;
185
        }
186
187
        /* Finished */
188 4225
        vxp_append_token(vxp, EOI, vxp->e, vxp->e);
189 4325
}
190
191
#ifdef VXP_DEBUG
192
#include <stdio.h>
193
void
194 75
vxp_PrintTokens(const struct vxp *vxp)
195
{
196
        struct token *t;
197
198 75
        fprintf(stderr, "Token list:\n");
199 75
        fprintf(stderr, "  %-5s %-20s %s\n", "TOK", "SUBSTR", "DECODED");
200 975
        VTAILQ_FOREACH(t, &vxp->tokens, list) {
201 900
                fprintf(stderr, "  ");
202 900
                fprintf(stderr, "%-5s", vxp_tnames[t->tok]);
203 900
                fprintf(stderr, " %-20.*s", (unsigned)(t->e - t->b), t->b);
204 900
                if (t->dec)
205 475
                        fprintf(stderr, " '%s'", t->dec);
206 900
                fprintf(stderr, "\n");
207 900
        }
208 75
        fprintf(stderr, "\n");
209 75
}
210
#endif /* VXP_DEBUG */