varnish-cache/lib/libvarnishapi/vxp_lexer.c
0
/*-
1
 * Copyright (c) 2006 Verdens Gang AS
2
 * Copyright (c) 2006-2015 Varnish Software AS
3
 * All rights reserved.
4
 *
5
 * Author: Martin Blix Grydeland <martin@varnish-software.com>
6
 *
7
 * SPDX-License-Identifier: BSD-2-Clause
8
 *
9
 * Redistribution and use in source and binary forms, with or without
10
 * modification, are permitted provided that the following conditions
11
 * are met:
12
 * 1. Redistributions of source code must retain the above copyright
13
 *    notice, this list of conditions and the following disclaimer.
14
 * 2. Redistributions in binary form must reproduce the above copyright
15
 *    notice, this list of conditions and the following disclaimer in the
16
 *    documentation and/or other materials provided with the distribution.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
22
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28
 * SUCH DAMAGE.
29
 *
30
 */
31
32
#include "config.h"
33
34
#include <ctype.h>
35
#include <stdlib.h>
36
#include <string.h>
37
#include <stdint.h>
38
#include <unistd.h> /* for MUSL */
39
40
#include "vdef.h"
41
#include "vas.h"
42
#include "vqueue.h"
43
44
#include "vre.h"
45
#include "vsb.h"
46
47
#include "vxp.h"
48
49
static void
50 33920
vxp_append_token(struct vxp *vxp, unsigned tok, const char *b, const char *e)
51
{
52
        struct token *t;
53
54 33920
        t = vxp_Alloc(vxp, sizeof *t);
55 33920
        AN(t);
56 33920
        t->tok = tok;
57 33920
        t->b = b;
58 33920
        t->e = e;
59 33920
        VTAILQ_INSERT_TAIL(&vxp->tokens, t, list);
60 33920
        vxp->t = t;
61 33920
}
62
63
/* Unquote and unescape string */
64
static void
65 1360
vxp_decstr(struct vxp *vxp)
66
{
67
        const char *b, *e, *p;
68
        char *s;
69 1360
        int esc = 0;
70
71 1360
        assert(vxp->t->tok == VAL);
72
73 1360
        b = vxp->t->b;
74 1360
        e = vxp->t->e;
75 1360
        assert(e - b >= 2);
76 1360
        b++;
77 1360
        e--;
78
79 1360
        s = vxp->t->dec = vxp_Alloc(vxp, (e - b) + 1);
80 1360
        AN(vxp->t->dec);
81 9760
        for (p = b; p < e; p++) {
82 8400
                if (!esc && *p == '\\') {
83 40
                        esc = 1;
84 40
                        continue;
85
                }
86 8360
                esc = 0;
87 8360
                *s++ = *p;
88 8360
        }
89 1360
        *s = '\0';
90 1360
}
91
92
/*
93
 * Build a token list
94
 */
95
96
void
97 7120
vxp_Lexer(struct vxp *vxp)
98
{
99
        const char *p, *q;
100
        unsigned u;
101
        char quote;
102
103 49320
        for (p = vxp->b; p < vxp->e; ) {
104
105
                /* Skip any space or tab */
106 42360
                if (isblank(*p)) {
107 14800
                        p++;
108 14800
                        continue;
109
                }
110
111 27560
                if (*p == '\\' && p[1] == '\n') {
112 80
                        p += 2;
113 80
                        continue;
114
                }
115
116
                /* Skip comments */
117 27480
                if (*p == '#') {
118 7280
                        while (p < vxp->e && *p != '\n')
119 6760
                                p++;
120 520
                        continue;
121
                }
122
123
                /* Match for the fixed tokens */
124 26960
                u = vxp_fixed_token(p, &q);
125 26960
                if (u != 0) {
126 11160
                        AN(q);
127 11160
                        vxp_append_token(vxp, u, p, q);
128 11160
                        p = q;
129 11160
                        continue;
130
                }
131
132
                /* Match quoted strings */
133 15800
                if (*p == '"' || *p == '\'') {
134 1480
                        quote = *p;
135 11080
                        for (q = p + 1; q < vxp->e; q++) {
136 11040
                                if (*q == '\\') {
137 120
                                        q++;
138 120
                                        if (q == vxp->e || *q == '\n')
139 40
                                                break;
140 11000
                                } else if (*q == '\n') {
141 40
                                        break;
142 10880
                                } else if (*q == quote) {
143 1360
                                        q++;
144 1360
                                        quote = '\0';
145 1360
                                        break;
146
                                }
147 9600
                        }
148 1480
                        vxp_append_token(vxp, VAL, p, q);
149 1480
                        if (quote != '\0') {
150 120
                                VSB_cat(vxp->sb, "Unterminated string ");
151 120
                                vxp_ErrWhere(vxp, vxp->t, q - p);
152 120
                                return;
153
                        }
154 1360
                        vxp_decstr(vxp);
155 1360
                        p = q;
156 1360
                        continue;
157
                }
158
159
                /* Match bareword */
160 14320
                if (isword(*p)) {
161 91280
                        for (q = p; q < vxp->e; q++)
162 86440
                                if (!isword(*q))
163 8240
                                        break;
164 13080
                        vxp_append_token(vxp, VAL, p, q);
165 13080
                        vxp->t->dec = vxp_Alloc(vxp, (q - p) + 1);
166 13080
                        AN(vxp->t->dec);
167 13080
                        memcpy(vxp->t->dec, p, q - p);
168 13080
                        vxp->t->dec[q - p] = '\0';
169 13080
                        p = q;
170 13080
                        continue;
171
                }
172
173
                /* On to the next query */
174 1240
                if (*p == '\n') {
175 1200
                        vxp_append_token(vxp, EOI, p, p + 1);
176 1200
                        p++;
177 1200
                        continue;
178
                }
179
180
                /* Error */
181 40
                vxp_append_token(vxp, EOI, p, p + 1);
182 40
                VSB_cat(vxp->sb, "Syntax error ");
183 40
                vxp_ErrWhere(vxp, vxp->t, q - p);
184 40
                return;
185
        }
186
187
        /* Finished */
188 6960
        vxp_append_token(vxp, EOI, vxp->e, vxp->e);
189 7120
}
190
191
#ifdef VXP_DEBUG
192
#include <stdio.h>
193
void
194 120
vxp_PrintTokens(const struct vxp *vxp)
195
{
196
        struct token *t;
197
198 120
        fprintf(stderr, "Token list:\n");
199 120
        fprintf(stderr, "  %-5s %-20s %s\n", "TOK", "SUBSTR", "DECODED");
200 1560
        VTAILQ_FOREACH(t, &vxp->tokens, list) {
201 1440
                fprintf(stderr, "  ");
202 1440
                fprintf(stderr, "%-5s", vxp_tnames[t->tok]);
203 1440
                fprintf(stderr, " %-20.*s", (unsigned)(t->e - t->b), t->b);
204 1440
                if (t->dec)
205 760
                        fprintf(stderr, " '%s'", t->dec);
206 1440
                fprintf(stderr, "\n");
207 1440
        }
208 120
        fprintf(stderr, "\n");
209 120
}
210
#endif /* VXP_DEBUG */