varnish-cache/lib/libvarnish/vre.c
0
/*-
1
 * Copyright (c) 2006-2011 Varnish Software AS
2
 * All rights reserved.
3
 *
4
 * Author: Tollef Fog Heen <tfheen@redpill-linpro.com>
5
 *
6
 * SPDX-License-Identifier: BSD-2-Clause
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
21
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
 * SUCH DAMAGE.
28
 */
29
30
#include "config.h"
31
32
#include <ctype.h>
33
#include <string.h>
34
#include <unistd.h>
35
36
#include "vdef.h"
37
38
#include "vas.h"        // XXX Flexelint "not used" - but req'ed for assert()
39
#include "vsb.h"
40
#include "miniobj.h"
41
42
#include "vre.h"
43
#include "vre_pcre2.h"
44
45
#if !HAVE_PCRE2_SET_DEPTH_LIMIT
46
#  define pcre2_set_depth_limit(r, d) pcre2_set_recursion_limit(r, d)
47
#endif
48
49
#define VRE_PACKED_RE           (pcre2_code *)(-1)
50
51
struct vre {
52
        unsigned                magic;
53
#define VRE_MAGIC               0xe83097dc
54
        pcre2_code              *re;
55
        pcre2_match_context     *re_ctx;
56
};
57
58
/*
59
 * We don't want to spread or even expose the majority of PCRE2 options
60
 * and errors so we establish our own symbols and implement hard linkage
61
 * to PCRE2 here.
62
 */
63
const int VRE_ERROR_NOMATCH = PCRE2_ERROR_NOMATCH;
64
65
const unsigned VRE_CASELESS = PCRE2_CASELESS;
66
67
vre_t *
68 361023
VRE_compile(const char *pattern, unsigned options,
69
    int *errptr, int *erroffset, unsigned jit)
70
{
71
        PCRE2_SIZE erroff;
72
        vre_t *v;
73
74 361023
        AN(pattern);
75 361023
        AN(errptr);
76 361023
        AN(erroffset);
77
78 361023
        *errptr = 0;
79 361023
        *erroffset = -1;
80
81 361023
        ALLOC_OBJ(v, VRE_MAGIC);
82 361023
        if (v == NULL) {
83 0
                *errptr = PCRE2_ERROR_NOMEMORY;
84 0
                return (NULL);
85
        }
86 722046
        v->re = pcre2_compile((PCRE2_SPTR8)pattern, PCRE2_ZERO_TERMINATED,
87 361023
            options, errptr, &erroff, NULL);
88 361023
        *erroffset = erroff;
89 361023
        if (v->re == NULL) {
90 140
                VRE_free(&v);
91 140
                return (NULL);
92
        }
93 360883
        v->re_ctx = pcre2_match_context_create(NULL);
94 360883
        if (v->re_ctx == NULL) {
95 0
                *errptr = PCRE2_ERROR_NOMEMORY;
96 0
                VRE_free(&v);
97 0
                return (NULL);
98
        }
99
#if USE_PCRE2_JIT
100 360883
        if (jit)
101 141251
                (void)pcre2_jit_compile(v->re, PCRE2_JIT_COMPLETE);
102
#else
103
        (void)jit;
104
#endif
105 360883
        return (v);
106 361023
}
107
108
int
109 168
VRE_error(struct vsb *vsb, int err)
110
{
111
        char buf[VRE_ERROR_LEN];
112
        int i;
113
114 168
        CHECK_OBJ_NOTNULL(vsb, VSB_MAGIC);
115 168
        i = pcre2_get_error_message(err, (PCRE2_UCHAR *)buf, VRE_ERROR_LEN);
116 168
        if (i == PCRE2_ERROR_BADDATA) {
117 0
                VSB_printf(vsb, "unknown pcre2 error code (%d)", err);
118 0
                return (-1);
119
        }
120 168
        VSB_cat(vsb, buf);
121 168
        return (0);
122 168
}
123
124
pcre2_code *
125 201754
VRE_unpack(const vre_t *code)
126
{
127
128
        /* XXX: The ban code ensures that regex "lumps" are pointer-aligned,
129
         * but coming for example from a VMOD there is no guarantee. Should
130
         * we formally require that code is properly aligned?
131
         */
132 201754
        CHECK_OBJ_NOTNULL(code, VRE_MAGIC);
133 201754
        if (code->re == VRE_PACKED_RE) {
134 644
                AZ(code->re_ctx);
135 644
                return (TRUST_ME(code + 1));
136
        }
137 201110
        return (code->re);
138 201754
}
139
140
static void
141 200682
vre_limit(const vre_t *code, const volatile struct vre_limits *lim)
142
{
143
144 200682
        CHECK_OBJ_NOTNULL(code, VRE_MAGIC);
145
146 200682
        if (lim == NULL)
147 64562
                return;
148
149 136120
        assert(code->re != VRE_PACKED_RE);
150
151
        /* XXX: not reentrant */
152 136120
        AN(code->re_ctx);
153 136120
        AZ(pcre2_set_match_limit(code->re_ctx, lim->match));
154 136120
        AZ(pcre2_set_depth_limit(code->re_ctx, lim->depth));
155 200682
}
156
157
vre_t *
158 756
VRE_export(const vre_t *code, size_t *sz)
159
{
160
        pcre2_code *re;
161
        vre_t *exp;
162
163 756
        CHECK_OBJ_NOTNULL(code, VRE_MAGIC);
164 756
        re = VRE_unpack(code);
165 756
        AZ(pcre2_pattern_info(re, PCRE2_INFO_SIZE, sz));
166
167 756
        exp = malloc(sizeof(*exp) + *sz);
168 756
        if (exp == NULL)
169 0
                return (NULL);
170
171 756
        INIT_OBJ(exp, VRE_MAGIC);
172 756
        exp->re = VRE_PACKED_RE;
173 756
        memcpy(exp + 1, re, *sz);
174 756
        *sz += sizeof(*exp);
175 756
        return (exp);
176 756
}
177
178
static int
179 201006
vre_capture(const vre_t *code, const char *subject, size_t length,
180
    size_t offset, int options, txt *groups, size_t *count,
181
    pcre2_match_data **datap)
182
{
183
        pcre2_match_data *data;
184
        pcre2_code *re;
185
        PCRE2_SIZE *ovector, b, e;
186
        size_t nov, g;
187
        int matches;
188
189 201006
        re = VRE_unpack(code);
190
191 201006
        if (datap != NULL && *datap != NULL) {
192 308
                data = *datap;
193 308
                *datap = NULL;
194 308
        } else {
195 200698
                data = pcre2_match_data_create_from_pattern(re, NULL);
196 200698
                AN(data);
197
        }
198
199 201006
        ovector = pcre2_get_ovector_pointer(data);
200 201006
        nov = 2L * pcre2_get_ovector_count(data);
201 606932
        for (g = 0; g < nov; g++)
202 405926
                ovector[g] = PCRE2_UNSET;
203
204 402012
        matches = pcre2_match(re, (PCRE2_SPTR)subject, length, offset,
205 201006
            options, data, code->re_ctx);
206
207 201006
        if (groups != NULL) {
208 2044
                AN(count);
209 2044
                AN(*count);
210 2044
                ovector = pcre2_get_ovector_pointer(data);
211 2044
                nov = vmin_t(size_t, pcre2_get_ovector_count(data), *count);
212 4816
                for (g = 0; g < nov; g++) {
213 2772
                        b = ovector[2 * g];
214 2772
                        e = ovector[2 * g + 1];
215 2772
                        if (b == PCRE2_UNSET) {
216 980
                                groups->b = groups->e = "";
217 980
                        } else {
218 1792
                                groups->b = subject + b;
219 1792
                                groups->e = subject + e;
220
                        }
221 2772
                        groups++;
222 2772
                }
223 2044
                *count = nov;
224 2044
        }
225
226 201006
        if (datap != NULL && matches > VRE_ERROR_NOMATCH)
227 1092
                *datap = data;
228
        else
229 199914
                pcre2_match_data_free(data);
230 201006
        return (matches);
231
}
232
233
int
234 198958
VRE_match(const vre_t *code, const char *subject, size_t length,
235
    int options, const volatile struct vre_limits *lim)
236
{
237
238 198958
        CHECK_OBJ_NOTNULL(code, VRE_MAGIC);
239 198958
        AN(subject);
240
241 198958
        if (length == 0)
242 138454
                length = PCRE2_ZERO_TERMINATED;
243 198958
        vre_limit(code, lim);
244 198958
        return (vre_capture(code, subject, length, 0, options,
245
            NULL, NULL, NULL));
246
}
247
248
int
249 0
VRE_capture(const vre_t *code, const char *subject, size_t length, int options,
250
    txt *groups, size_t count, const volatile struct vre_limits *lim)
251
{
252
        int i;
253
254 0
        CHECK_OBJ_NOTNULL(code, VRE_MAGIC);
255 0
        AN(subject);
256 0
        AN(groups);
257 0
        AN(count);
258
259 0
        if (length == 0)
260 0
                length = PCRE2_ZERO_TERMINATED;
261 0
        vre_limit(code, lim);
262 0
        i = vre_capture(code, subject, length, 0, options,
263 0
            groups, &count, NULL);
264
265 0
        if (i <= 0)
266 0
                return (i);
267 0
        return (count);
268 0
}
269
270
int
271 1736
VRE_sub(const vre_t *code, const char *subject, const char *replacement,
272
    struct vsb *vsb, const volatile struct vre_limits *lim, int all)
273
{
274 1736
        pcre2_match_data *data = NULL;
275
        txt groups[10];
276
        size_t count;
277 1736
        int i, offset = 0;
278
        const char *s;
279
        unsigned x;
280
281 1736
        CHECK_OBJ_NOTNULL(code, VRE_MAGIC);
282 1736
        CHECK_OBJ_NOTNULL(vsb, VSB_MAGIC);
283 1736
        AN(subject);
284 1736
        AN(replacement);
285
286 1736
        vre_limit(code, lim);
287 1736
        count = 10;
288 3472
        i = vre_capture(code, subject, PCRE2_ZERO_TERMINATED, offset, 0,
289 1736
            groups, &count, &data);
290
291 1736
        if (i <= VRE_ERROR_NOMATCH) {
292 700
                AZ(data);
293 700
                return (i);
294
        }
295
296 1036
        do {
297 1092
                AN(data); /* check reuse across successful captures */
298 1092
                AN(count);
299
300
                /* Copy prefix to match */
301 1092
                s = subject + offset;
302 1092
                VSB_bcat(vsb, s, pdiff(s, groups[0].b));
303 5880
                for (s = replacement; *s != '\0'; s++ ) {
304 4788
                        if (*s != '\\' || s[1] == '\0') {
305 3948
                                VSB_putc(vsb, *s);
306 3948
                                continue;
307
                        }
308 840
                        s++;
309 840
                        if (isdigit(*s)) {
310 728
                                x = *s - '0';
311 728
                                if (x >= count)
312 168
                                        continue;
313 560
                                VSB_bcat(vsb, groups[x].b, Tlen(groups[x]));
314 560
                                continue;
315
                        }
316 112
                        VSB_putc(vsb, *s);
317 112
                }
318 1092
                offset = pdiff(subject, groups[0].e);
319 1092
                if (!all)
320 784
                        break;
321 308
                count = 10;
322 616
                i = vre_capture(code, subject, PCRE2_ZERO_TERMINATED, offset,
323 308
                    PCRE2_NOTEMPTY, groups, &count, &data);
324
325 308
                if (i < VRE_ERROR_NOMATCH) {
326 0
                        AZ(data);
327 0
                        return (i);
328
                }
329 308
        } while (i != VRE_ERROR_NOMATCH);
330
331 1036
        if (data != NULL) {
332 784
                assert(i > VRE_ERROR_NOMATCH);
333 784
                AZ(all);
334 784
                pcre2_match_data_free(data);
335 784
        }
336
337
        /* Copy suffix to match */
338 1036
        VSB_cat(vsb, subject + offset);
339 1036
        return (1);
340 1736
}
341
342
void
343 273571
VRE_free(vre_t **vv)
344
{
345
        vre_t *v;
346
347 273571
        TAKE_OBJ_NOTNULL(v, vv, VRE_MAGIC);
348
349 273571
        if (v->re == VRE_PACKED_RE) {
350 756
                v->re = NULL;
351 756
                AZ(v->re_ctx);
352 756
        }
353
354 273571
        if (v->re_ctx != NULL)
355 272675
                pcre2_match_context_free(v->re_ctx);
356 273571
        if (v->re != NULL)
357 272675
                pcre2_code_free(v->re);
358 273571
        FREE_OBJ(v);
359 273571
}
360
361
void
362 168
VRE_quote(struct vsb *vsb, const char *src)
363
{
364
        const char *b, *e;
365
366 168
        CHECK_OBJ_NOTNULL(vsb, VSB_MAGIC);
367 168
        if (src == NULL)
368 0
                return;
369 252
        for (b = src; (e = strstr(b, "\\E")) != NULL; b = e + 2)
370 84
                VSB_printf(vsb, "\\Q%.*s\\\\EE", (int)(e - b), b);
371 168
        if (*b != '\0')
372 84
                VSB_printf(vsb, "\\Q%s\\E", b);
373 168
}