varnish-cache/lib/libvarnish/vre.c
0
/*-
1
 * Copyright (c) 2006-2011 Varnish Software AS
2
 * All rights reserved.
3
 *
4
 * Author: Tollef Fog Heen <tfheen@redpill-linpro.com>
5
 *
6
 * SPDX-License-Identifier: BSD-2-Clause
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
21
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
 * SUCH DAMAGE.
28
 */
29
30
#include "config.h"
31
32
#include <ctype.h>
33
#include <string.h>
34
#include <unistd.h>
35
36
#include "vdef.h"
37
38
#include "vas.h"        // XXX Flexelint "not used" - but req'ed for assert()
39
#include "vsb.h"
40
#include "miniobj.h"
41
42
#include "vre.h"
43
#include "vre_pcre2.h"
44
45
#if !HAVE_PCRE2_SET_DEPTH_LIMIT
46
#  define pcre2_set_depth_limit(r, d) pcre2_set_recursion_limit(r, d)
47
#endif
48
49
#define VRE_PACKED_RE           (pcre2_code *)(-1)
50
51
struct vre {
52
        unsigned                magic;
53
#define VRE_MAGIC               0xe83097dc
54
        pcre2_code              *re;
55
        pcre2_match_context     *re_ctx;
56
};
57
58
/*
59
 * We don't want to spread or even expose the majority of PCRE2 options
60
 * and errors so we establish our own symbols and implement hard linkage
61
 * to PCRE2 here.
62
 */
63
const int VRE_ERROR_NOMATCH = PCRE2_ERROR_NOMATCH;
64
65
const unsigned VRE_CASELESS = PCRE2_CASELESS;
66
67
vre_t *
68 557200
VRE_compile(const char *pattern, unsigned options,
69
    int *errptr, int *erroffset, unsigned jit)
70
{
71
        PCRE2_SIZE erroff;
72
        vre_t *v;
73
74 557200
        AN(pattern);
75 557200
        AN(errptr);
76 557200
        AN(erroffset);
77
78 557200
        *errptr = 0;
79 557200
        *erroffset = -1;
80
81 557200
        ALLOC_OBJ(v, VRE_MAGIC);
82 557200
        if (v == NULL) {
83 0
                *errptr = PCRE2_ERROR_NOMEMORY;
84 0
                return (NULL);
85
        }
86 1114400
        v->re = pcre2_compile((PCRE2_SPTR8)pattern, PCRE2_ZERO_TERMINATED,
87 557200
            options, errptr, &erroff, NULL);
88 557200
        *erroffset = erroff;
89 557200
        if (v->re == NULL) {
90 200
                VRE_free(&v);
91 200
                return (NULL);
92
        }
93 557000
        v->re_ctx = pcre2_match_context_create(NULL);
94 557000
        if (v->re_ctx == NULL) {
95 0
                *errptr = PCRE2_ERROR_NOMEMORY;
96 0
                VRE_free(&v);
97 0
                return (NULL);
98
        }
99
#if USE_PCRE2_JIT
100 557000
        if (jit)
101 219040
                (void)pcre2_jit_compile(v->re, PCRE2_JIT_COMPLETE);
102
#else
103
        (void)jit;
104
#endif
105 557000
        return (v);
106 557200
}
107
108
int
109 240
VRE_error(struct vsb *vsb, int err)
110
{
111
        char buf[VRE_ERROR_LEN];
112
        int i;
113
114 240
        CHECK_OBJ_NOTNULL(vsb, VSB_MAGIC);
115 240
        i = pcre2_get_error_message(err, (PCRE2_UCHAR *)buf, VRE_ERROR_LEN);
116 240
        if (i == PCRE2_ERROR_BADDATA) {
117 0
                VSB_printf(vsb, "unknown pcre2 error code (%d)", err);
118 0
                return (-1);
119
        }
120 240
        VSB_cat(vsb, buf);
121 240
        return (0);
122 240
}
123
124
pcre2_code *
125 309317
VRE_unpack(const vre_t *code)
126
{
127
128
        /* XXX: The ban code ensures that regex "lumps" are pointer-aligned,
129
         * but coming for example from a VMOD there is no guarantee. Should
130
         * we formally require that code is properly aligned?
131
         */
132 309317
        CHECK_OBJ_NOTNULL(code, VRE_MAGIC);
133 309317
        if (code->re == VRE_PACKED_RE) {
134 920
                AZ(code->re_ctx);
135 920
                return (TRUST_ME(code + 1));
136
        }
137 308397
        return (code->re);
138 309317
}
139
140
static void
141 307693
vre_limit(const vre_t *code, const volatile struct vre_limits *lim)
142
{
143
144 307693
        CHECK_OBJ_NOTNULL(code, VRE_MAGIC);
145
146 307693
        if (lim == NULL)
147 100424
                return;
148
149 207269
        assert(code->re != VRE_PACKED_RE);
150
151
        /* XXX: not reentrant */
152 207269
        AN(code->re_ctx);
153 207269
        AZ(pcre2_set_match_limit(code->re_ctx, lim->match));
154 207269
        AZ(pcre2_set_depth_limit(code->re_ctx, lim->depth));
155 307693
}
156
157
vre_t *
158 1160
VRE_export(const vre_t *code, size_t *sz)
159
{
160
        pcre2_code *re;
161
        vre_t *exp;
162
163 1160
        CHECK_OBJ_NOTNULL(code, VRE_MAGIC);
164 1160
        re = VRE_unpack(code);
165 1160
        AZ(pcre2_pattern_info(re, PCRE2_INFO_SIZE, sz));
166
167 1160
        exp = malloc(sizeof(*exp) + *sz);
168 1160
        if (exp == NULL)
169 0
                return (NULL);
170
171 1160
        INIT_OBJ(exp, VRE_MAGIC);
172 1160
        exp->re = VRE_PACKED_RE;
173 1160
        memcpy(exp + 1, re, *sz);
174 1160
        *sz += sizeof(*exp);
175 1160
        return (exp);
176 1160
}
177
178
static int
179 308168
vre_capture(const vre_t *code, const char *subject, size_t length,
180
    size_t offset, int options, txt *groups, size_t *count,
181
    pcre2_match_data **datap)
182
{
183
        pcre2_match_data *data;
184
        pcre2_code *re;
185
        PCRE2_SIZE *ovector, b, e;
186
        size_t nov, g;
187
        int matches;
188
189 308168
        re = VRE_unpack(code);
190
191 308168
        if (datap != NULL && *datap != NULL) {
192 440
                data = *datap;
193 440
                *datap = NULL;
194 440
        } else {
195 307728
                data = pcre2_match_data_create_from_pattern(re, NULL);
196 307728
                AN(data);
197
        }
198
199 308168
        ovector = pcre2_get_ovector_pointer(data);
200 308168
        nov = 2L * pcre2_get_ovector_count(data);
201 930652
        for (g = 0; g < nov; g++)
202 622484
                ovector[g] = PCRE2_UNSET;
203
204 616336
        matches = pcre2_match(re, (PCRE2_SPTR)subject, length, offset,
205 308168
            options, data, code->re_ctx);
206
207 308168
        if (groups != NULL) {
208 2920
                AN(count);
209 2920
                AN(*count);
210 2920
                ovector = pcre2_get_ovector_pointer(data);
211 2920
                nov = vmin_t(size_t, pcre2_get_ovector_count(data), *count);
212 6880
                for (g = 0; g < nov; g++) {
213 3960
                        b = ovector[2 * g];
214 3960
                        e = ovector[2 * g + 1];
215 3960
                        if (b == PCRE2_UNSET) {
216 1400
                                groups->b = groups->e = "";
217 1400
                        } else {
218 2560
                                groups->b = subject + b;
219 2560
                                groups->e = subject + e;
220
                        }
221 3960
                        groups++;
222 3960
                }
223 2920
                *count = nov;
224 2920
        }
225
226 308168
        if (datap != NULL && matches > VRE_ERROR_NOMATCH)
227 1560
                *datap = data;
228
        else
229 306608
                pcre2_match_data_free(data);
230 308168
        return (matches);
231
}
232
233
int
234 305236
VRE_match(const vre_t *code, const char *subject, size_t length,
235
    int options, const volatile struct vre_limits *lim)
236
{
237
238 305236
        CHECK_OBJ_NOTNULL(code, VRE_MAGIC);
239 305236
        AN(subject);
240
241 305236
        if (length == 0)
242 212210
                length = PCRE2_ZERO_TERMINATED;
243 305236
        vre_limit(code, lim);
244 305236
        return (vre_capture(code, subject, length, 0, options,
245
            NULL, NULL, NULL));
246
}
247
248
int
249 0
VRE_capture(const vre_t *code, const char *subject, size_t length, int options,
250
    txt *groups, size_t count, const volatile struct vre_limits *lim)
251
{
252
        int i;
253
254 0
        CHECK_OBJ_NOTNULL(code, VRE_MAGIC);
255 0
        AN(subject);
256 0
        AN(groups);
257 0
        AN(count);
258
259 0
        if (length == 0)
260 0
                length = PCRE2_ZERO_TERMINATED;
261 0
        vre_limit(code, lim);
262 0
        i = vre_capture(code, subject, length, 0, options,
263 0
            groups, &count, NULL);
264
265 0
        if (i <= 0)
266 0
                return (i);
267 0
        return (count);
268 0
}
269
270
int
271 2480
VRE_sub(const vre_t *code, const char *subject, const char *replacement,
272
    struct vsb *vsb, const volatile struct vre_limits *lim, int all)
273
{
274 2480
        pcre2_match_data *data = NULL;
275
        txt groups[10];
276
        size_t count;
277 2480
        int i, offset = 0;
278
        const char *s, *e;
279
        unsigned x;
280
281 2480
        CHECK_OBJ_NOTNULL(code, VRE_MAGIC);
282 2480
        CHECK_OBJ_NOTNULL(vsb, VSB_MAGIC);
283 2480
        AN(subject);
284 2480
        AN(replacement);
285
286 2480
        vre_limit(code, lim);
287 2480
        count = 10;
288 4960
        i = vre_capture(code, subject, PCRE2_ZERO_TERMINATED, offset, 0,
289 2480
            groups, &count, &data);
290
291 2480
        if (i <= VRE_ERROR_NOMATCH) {
292 1000
                AZ(data);
293 1000
                return (i);
294
        }
295
296 1480
        do {
297 1560
                AN(data); /* check reuse across successful captures */
298 1560
                AN(count);
299
300
                /* Copy prefix to match */
301 1560
                s = subject + offset;
302 1560
                VSB_bcat(vsb, s, pdiff(s, groups[0].b));
303 8400
                for (s = e = replacement; *e != '\0'; e++ ) {
304 6840
                        if (*e != '\\' || e[1] == '\0')
305 5640
                                continue;
306 1200
                        VSB_bcat(vsb, s, pdiff(s, e));
307 1200
                        s = ++e;
308 1200
                        if (isdigit(*e)) {
309 1040
                                s++;
310 1040
                                x = *e - '0';
311 1040
                                if (x >= count)
312 240
                                        continue;
313 800
                                VSB_bcat(vsb, groups[x].b, Tlen(groups[x]));
314 800
                                continue;
315
                        }
316 160
                }
317 1560
                VSB_bcat(vsb, s, pdiff(s, e));
318 1560
                offset = pdiff(subject, groups[0].e);
319 1560
                if (!all)
320 1120
                        break;
321 440
                count = 10;
322 880
                i = vre_capture(code, subject, PCRE2_ZERO_TERMINATED, offset,
323 440
                    PCRE2_NOTEMPTY, groups, &count, &data);
324
325 440
                if (i < VRE_ERROR_NOMATCH) {
326 0
                        AZ(data);
327 0
                        return (i);
328
                }
329 440
        } while (i != VRE_ERROR_NOMATCH);
330
331 1480
        if (data != NULL) {
332 1120
                assert(i > VRE_ERROR_NOMATCH);
333 1120
                AZ(all);
334 1120
                pcre2_match_data_free(data);
335 1120
        }
336
337
        /* Copy suffix to match */
338 1480
        VSB_cat(vsb, subject + offset);
339 1480
        return (1);
340 2480
}
341
342
void
343 420541
VRE_free(vre_t **vv)
344
{
345
        vre_t *v;
346
347 420541
        TAKE_OBJ_NOTNULL(v, vv, VRE_MAGIC);
348
349 420541
        if (v->re == VRE_PACKED_RE) {
350 1160
                v->re = NULL;
351 1160
                AZ(v->re_ctx);
352 1160
        }
353
354 420541
        if (v->re_ctx != NULL)
355 419181
                pcre2_match_context_free(v->re_ctx);
356 420541
        if (v->re != NULL)
357 419181
                pcre2_code_free(v->re);
358 420541
        FREE_OBJ(v);
359 420541
}
360
361
void
362 240
VRE_quote(struct vsb *vsb, const char *src)
363
{
364
        const char *b, *e;
365
366 240
        CHECK_OBJ_NOTNULL(vsb, VSB_MAGIC);
367 240
        if (src == NULL)
368 0
                return;
369 360
        for (b = src; (e = strstr(b, "\\E")) != NULL; b = e + 2)
370 120
                VSB_printf(vsb, "\\Q%.*s\\\\EE", (int)(e - b), b);
371 240
        if (*b != '\0')
372 120
                VSB_printf(vsb, "\\Q%s\\E", b);
373 240
}