varnish-cache/lib/libvarnish/vre.c
0
/*-
1
 * Copyright (c) 2006-2011 Varnish Software AS
2
 * All rights reserved.
3
 *
4
 * Author: Tollef Fog Heen <tfheen@redpill-linpro.com>
5
 *
6
 * SPDX-License-Identifier: BSD-2-Clause
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
21
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
 * SUCH DAMAGE.
28
 */
29
30
#include "config.h"
31
32
#include <ctype.h>
33
#include <string.h>
34
#include <unistd.h>
35
36
#include "vdef.h"
37
38
#include "vas.h"        // XXX Flexelint "not used" - but req'ed for assert()
39
#include "vsb.h"
40
#include "miniobj.h"
41
42
#include "vre.h"
43
#include "vre_pcre2.h"
44
45
#if !HAVE_PCRE2_SET_DEPTH_LIMIT
46
#  define pcre2_set_depth_limit(r, d) pcre2_set_recursion_limit(r, d)
47
#endif
48
49
#define VRE_PACKED_RE           (pcre2_code *)(-1)
50
51
struct vre {
52
        unsigned                magic;
53
#define VRE_MAGIC               0xe83097dc
54
        pcre2_code              *re;
55
        pcre2_match_context     *re_ctx;
56
};
57
58
/*
59
 * We don't want to spread or even expose the majority of PCRE2 options
60
 * and errors so we establish our own symbols and implement hard linkage
61
 * to PCRE2 here.
62
 */
63
const int VRE_ERROR_NOMATCH = PCRE2_ERROR_NOMATCH;
64
65
const unsigned VRE_CASELESS = PCRE2_CASELESS;
66
67
vre_t *
68 338050
VRE_compile(const char *pattern, unsigned options,
69
    int *errptr, int *erroffset, unsigned jit)
70
{
71
        PCRE2_SIZE erroff;
72
        vre_t *v;
73
74 338050
        AN(pattern);
75 338050
        AN(errptr);
76 338050
        AN(erroffset);
77
78 338050
        *errptr = 0;
79 338050
        *erroffset = -1;
80
81 338050
        ALLOC_OBJ(v, VRE_MAGIC);
82 338050
        if (v == NULL) {
83 0
                *errptr = PCRE2_ERROR_NOMEMORY;
84 0
                return (NULL);
85
        }
86 676100
        v->re = pcre2_compile((PCRE2_SPTR8)pattern, PCRE2_ZERO_TERMINATED,
87 338050
            options, errptr, &erroff, NULL);
88 338050
        *erroffset = erroff;
89 338050
        if (v->re == NULL) {
90 125
                VRE_free(&v);
91 125
                return (NULL);
92
        }
93 337925
        v->re_ctx = pcre2_match_context_create(NULL);
94 337925
        if (v->re_ctx == NULL) {
95 0
                *errptr = PCRE2_ERROR_NOMEMORY;
96 0
                VRE_free(&v);
97 0
                return (NULL);
98
        }
99
#if USE_PCRE2_JIT
100 337925
        if (jit)
101 132425
                (void)pcre2_jit_compile(v->re, PCRE2_JIT_COMPLETE);
102
#else
103
        (void)jit;
104
#endif
105 337925
        return (v);
106 338050
}
107
108
int
109 150
VRE_error(struct vsb *vsb, int err)
110
{
111
        char buf[VRE_ERROR_LEN];
112
        int i;
113
114 150
        CHECK_OBJ_NOTNULL(vsb, VSB_MAGIC);
115 150
        i = pcre2_get_error_message(err, (PCRE2_UCHAR *)buf, VRE_ERROR_LEN);
116 150
        if (i == PCRE2_ERROR_BADDATA) {
117 0
                VSB_printf(vsb, "unknown pcre2 error code (%d)", err);
118 0
                return (-1);
119
        }
120 150
        VSB_cat(vsb, buf);
121 150
        return (0);
122 150
}
123
124
pcre2_code *
125 188805
VRE_unpack(const vre_t *code)
126
{
127
128
        /* XXX: The ban code ensures that regex "lumps" are pointer-aligned,
129
         * but coming for example from a VMOD there is no guarantee. Should
130
         * we formally require that code is properly aligned?
131
         */
132 188805
        CHECK_OBJ_NOTNULL(code, VRE_MAGIC);
133 188805
        if (code->re == VRE_PACKED_RE) {
134 575
                AZ(code->re_ctx);
135 575
                return (TRUST_ME(code + 1));
136
        }
137 188230
        return (code->re);
138 188805
}
139
140
static void
141 187794
vre_limit(const vre_t *code, const volatile struct vre_limits *lim)
142
{
143
144 187794
        CHECK_OBJ_NOTNULL(code, VRE_MAGIC);
145
146 187794
        if (lim == NULL)
147 61243
                return;
148
149 126551
        assert(code->re != VRE_PACKED_RE);
150
151
        /* XXX: not reentrant */
152 126551
        AN(code->re_ctx);
153 126551
        AZ(pcre2_set_match_limit(code->re_ctx, lim->match));
154 126551
        AZ(pcre2_set_depth_limit(code->re_ctx, lim->depth));
155 187794
}
156
157
vre_t *
158 725
VRE_export(const vre_t *code, size_t *sz)
159
{
160
        pcre2_code *re;
161
        vre_t *exp;
162
163 725
        CHECK_OBJ_NOTNULL(code, VRE_MAGIC);
164 725
        re = VRE_unpack(code);
165 725
        AZ(pcre2_pattern_info(re, PCRE2_INFO_SIZE, sz));
166
167 725
        exp = malloc(sizeof(*exp) + *sz);
168 725
        if (exp == NULL)
169 0
                return (NULL);
170
171 725
        INIT_OBJ(exp, VRE_MAGIC);
172 725
        exp->re = VRE_PACKED_RE;
173 725
        memcpy(exp + 1, re, *sz);
174 725
        *sz += sizeof(*exp);
175 725
        return (exp);
176 725
}
177
178
static int
179 188086
vre_capture(const vre_t *code, const char *subject, size_t length,
180
    size_t offset, int options, txt *groups, size_t *count,
181
    pcre2_match_data **datap)
182
{
183
        pcre2_match_data *data;
184
        pcre2_code *re;
185
        PCRE2_SIZE *ovector, b, e;
186
        size_t nov, g;
187
        int matches;
188
189 188086
        re = VRE_unpack(code);
190
191 188086
        if (datap != NULL && *datap != NULL) {
192 275
                data = *datap;
193 275
                *datap = NULL;
194 275
        } else {
195 187811
                data = pcre2_match_data_create_from_pattern(re, NULL);
196 187811
                AN(data);
197
        }
198
199 188086
        ovector = pcre2_get_ovector_pointer(data);
200 188086
        nov = 2L * pcre2_get_ovector_count(data);
201 567802
        for (g = 0; g < nov; g++)
202 379716
                ovector[g] = PCRE2_UNSET;
203
204 376172
        matches = pcre2_match(re, (PCRE2_SPTR)subject, length, offset,
205 188086
            options, data, code->re_ctx);
206
207 188086
        if (groups != NULL) {
208 1825
                AN(count);
209 1825
                AN(*count);
210 1825
                ovector = pcre2_get_ovector_pointer(data);
211 1825
                nov = vmin_t(size_t, pcre2_get_ovector_count(data), *count);
212 4300
                for (g = 0; g < nov; g++) {
213 2475
                        b = ovector[2 * g];
214 2475
                        e = ovector[2 * g + 1];
215 2475
                        if (b == PCRE2_UNSET) {
216 875
                                groups->b = groups->e = "";
217 875
                        } else {
218 1600
                                groups->b = subject + b;
219 1600
                                groups->e = subject + e;
220
                        }
221 2475
                        groups++;
222 2475
                }
223 1825
                *count = nov;
224 1825
        }
225
226 188086
        if (datap != NULL && matches > VRE_ERROR_NOMATCH)
227 975
                *datap = data;
228
        else
229 187111
                pcre2_match_data_free(data);
230 188086
        return (matches);
231
}
232
233
int
234 186254
VRE_match(const vre_t *code, const char *subject, size_t length,
235
    int options, const volatile struct vre_limits *lim)
236
{
237
238 186254
        CHECK_OBJ_NOTNULL(code, VRE_MAGIC);
239 186254
        AN(subject);
240
241 186254
        if (length == 0)
242 128985
                length = PCRE2_ZERO_TERMINATED;
243 186254
        vre_limit(code, lim);
244 186254
        return (vre_capture(code, subject, length, 0, options,
245
            NULL, NULL, NULL));
246
}
247
248
int
249 0
VRE_capture(const vre_t *code, const char *subject, size_t length, int options,
250
    txt *groups, size_t count, const volatile struct vre_limits *lim)
251
{
252
        int i;
253
254 0
        CHECK_OBJ_NOTNULL(code, VRE_MAGIC);
255 0
        AN(subject);
256 0
        AN(groups);
257 0
        AN(count);
258
259 0
        if (length == 0)
260 0
                length = PCRE2_ZERO_TERMINATED;
261 0
        vre_limit(code, lim);
262 0
        i = vre_capture(code, subject, length, 0, options,
263 0
            groups, &count, NULL);
264
265 0
        if (i <= 0)
266 0
                return (i);
267 0
        return (count);
268 0
}
269
270
int
271 1550
VRE_sub(const vre_t *code, const char *subject, const char *replacement,
272
    struct vsb *vsb, const volatile struct vre_limits *lim, int all)
273
{
274 1550
        pcre2_match_data *data = NULL;
275
        txt groups[10];
276
        size_t count;
277 1550
        int i, offset = 0;
278
        const char *s, *e;
279
        unsigned x;
280
281 1550
        CHECK_OBJ_NOTNULL(code, VRE_MAGIC);
282 1550
        CHECK_OBJ_NOTNULL(vsb, VSB_MAGIC);
283 1550
        AN(subject);
284 1550
        AN(replacement);
285
286 1550
        vre_limit(code, lim);
287 1550
        count = 10;
288 3100
        i = vre_capture(code, subject, PCRE2_ZERO_TERMINATED, offset, 0,
289 1550
            groups, &count, &data);
290
291 1550
        if (i <= VRE_ERROR_NOMATCH) {
292 625
                AZ(data);
293 625
                return (i);
294
        }
295
296 925
        do {
297 975
                AN(data); /* check reuse across successful captures */
298 975
                AN(count);
299
300
                /* Copy prefix to match */
301 975
                s = subject + offset;
302 975
                VSB_bcat(vsb, s, pdiff(s, groups[0].b));
303 5250
                for (s = e = replacement; *e != '\0'; e++ ) {
304 4275
                        if (*e != '\\' || e[1] == '\0')
305 3525
                                continue;
306 750
                        VSB_bcat(vsb, s, pdiff(s, e));
307 750
                        s = ++e;
308 750
                        if (isdigit(*e)) {
309 650
                                s++;
310 650
                                x = *e - '0';
311 650
                                if (x >= count)
312 150
                                        continue;
313 500
                                VSB_bcat(vsb, groups[x].b, Tlen(groups[x]));
314 500
                                continue;
315
                        }
316 100
                }
317 975
                VSB_bcat(vsb, s, pdiff(s, e));
318 975
                offset = pdiff(subject, groups[0].e);
319 975
                if (!all)
320 700
                        break;
321 275
                count = 10;
322 550
                i = vre_capture(code, subject, PCRE2_ZERO_TERMINATED, offset,
323 275
                    PCRE2_NOTEMPTY, groups, &count, &data);
324
325 275
                if (i < VRE_ERROR_NOMATCH) {
326 0
                        AZ(data);
327 0
                        return (i);
328
                }
329 275
        } while (i != VRE_ERROR_NOMATCH);
330
331 925
        if (data != NULL) {
332 700
                assert(i > VRE_ERROR_NOMATCH);
333 700
                AZ(all);
334 700
                pcre2_match_data_free(data);
335 700
        }
336
337
        /* Copy suffix to match */
338 925
        VSB_cat(vsb, subject + offset);
339 925
        return (1);
340 1550
}
341
342
void
343 255825
VRE_free(vre_t **vv)
344
{
345
        vre_t *v;
346
347 255825
        TAKE_OBJ_NOTNULL(v, vv, VRE_MAGIC);
348
349 255825
        if (v->re == VRE_PACKED_RE) {
350 725
                v->re = NULL;
351 725
                AZ(v->re_ctx);
352 725
        }
353
354 255825
        if (v->re_ctx != NULL)
355 254975
                pcre2_match_context_free(v->re_ctx);
356 255825
        if (v->re != NULL)
357 254975
                pcre2_code_free(v->re);
358 255825
        FREE_OBJ(v);
359 255825
}
360
361
void
362 150
VRE_quote(struct vsb *vsb, const char *src)
363
{
364
        const char *b, *e;
365
366 150
        CHECK_OBJ_NOTNULL(vsb, VSB_MAGIC);
367 150
        if (src == NULL)
368 0
                return;
369 225
        for (b = src; (e = strstr(b, "\\E")) != NULL; b = e + 2)
370 75
                VSB_printf(vsb, "\\Q%.*s\\\\EE", (int)(e - b), b);
371 150
        if (*b != '\0')
372 75
                VSB_printf(vsb, "\\Q%s\\E", b);
373 150
}