| | varnish-cache/lib/libvarnish/vre.c |
| 0 |
|
/*- |
| 1 |
|
* Copyright (c) 2006-2011 Varnish Software AS |
| 2 |
|
* All rights reserved. |
| 3 |
|
* |
| 4 |
|
* Author: Tollef Fog Heen <tfheen@redpill-linpro.com> |
| 5 |
|
* |
| 6 |
|
* SPDX-License-Identifier: BSD-2-Clause |
| 7 |
|
* |
| 8 |
|
* Redistribution and use in source and binary forms, with or without |
| 9 |
|
* modification, are permitted provided that the following conditions |
| 10 |
|
* are met: |
| 11 |
|
* 1. Redistributions of source code must retain the above copyright |
| 12 |
|
* notice, this list of conditions and the following disclaimer. |
| 13 |
|
* 2. Redistributions in binary form must reproduce the above copyright |
| 14 |
|
* notice, this list of conditions and the following disclaimer in the |
| 15 |
|
* documentation and/or other materials provided with the distribution. |
| 16 |
|
* |
| 17 |
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
| 18 |
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 19 |
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 20 |
|
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE |
| 21 |
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 22 |
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| 23 |
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 24 |
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 25 |
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| 26 |
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 27 |
|
* SUCH DAMAGE. |
| 28 |
|
*/ |
| 29 |
|
|
| 30 |
|
#include "config.h" |
| 31 |
|
|
| 32 |
|
#include <ctype.h> |
| 33 |
|
#include <string.h> |
| 34 |
|
#include <unistd.h> |
| 35 |
|
|
| 36 |
|
#include "vdef.h" |
| 37 |
|
|
| 38 |
|
#include "vas.h" // XXX Flexelint "not used" - but req'ed for assert() |
| 39 |
|
#include "vsb.h" |
| 40 |
|
#include "miniobj.h" |
| 41 |
|
|
| 42 |
|
#include "vre.h" |
| 43 |
|
#include "vre_pcre2.h" |
| 44 |
|
|
| 45 |
|
/* should be turned into an error sooner or later */ |
| 46 |
|
#if !defined(pcre2_set_depth_limit) |
| 47 |
|
# warning pcre2 missing pcre2_set_depth_limit - update recommended |
| 48 |
|
# define pcre2_set_depth_limit(r, d) pcre2_set_recursion_limit(r, d) |
| 49 |
|
#endif |
| 50 |
|
|
| 51 |
|
#define VRE_PACKED_RE (pcre2_code *)(-1) |
| 52 |
|
|
| 53 |
|
struct vre { |
| 54 |
|
unsigned magic; |
| 55 |
|
#define VRE_MAGIC 0xe83097dc |
| 56 |
|
pcre2_code *re; |
| 57 |
|
pcre2_match_context *re_ctx; |
| 58 |
|
}; |
| 59 |
|
|
| 60 |
|
/* |
| 61 |
|
* We don't want to spread or even expose the majority of PCRE2 options |
| 62 |
|
* and errors so we establish our own symbols and implement hard linkage |
| 63 |
|
* to PCRE2 here. |
| 64 |
|
*/ |
| 65 |
|
const int VRE_ERROR_NOMATCH = PCRE2_ERROR_NOMATCH; |
| 66 |
|
|
| 67 |
|
const unsigned VRE_CASELESS = PCRE2_CASELESS; |
| 68 |
|
|
| 69 |
|
vre_t * |
| 70 |
14220 |
VRE_compile(const char *pattern, unsigned options, |
| 71 |
|
int *errptr, int *erroffset, unsigned jit) |
| 72 |
|
{ |
| 73 |
|
PCRE2_SIZE erroff; |
| 74 |
|
vre_t *v; |
| 75 |
|
|
| 76 |
14220 |
AN(pattern); |
| 77 |
14220 |
AN(errptr); |
| 78 |
14220 |
AN(erroffset); |
| 79 |
|
|
| 80 |
14220 |
*errptr = 0; |
| 81 |
14220 |
*erroffset = -1; |
| 82 |
|
|
| 83 |
14220 |
ALLOC_OBJ(v, VRE_MAGIC); |
| 84 |
14220 |
if (v == NULL) { |
| 85 |
0 |
*errptr = PCRE2_ERROR_NOMEMORY; |
| 86 |
0 |
return (NULL); |
| 87 |
|
} |
| 88 |
28440 |
v->re = pcre2_compile((PCRE2_SPTR8)pattern, PCRE2_ZERO_TERMINATED, |
| 89 |
14220 |
options, errptr, &erroff, NULL); |
| 90 |
14220 |
*erroffset = erroff; |
| 91 |
14220 |
if (v->re == NULL) { |
| 92 |
5 |
VRE_free(&v); |
| 93 |
5 |
return (NULL); |
| 94 |
|
} |
| 95 |
14215 |
v->re_ctx = pcre2_match_context_create(NULL); |
| 96 |
14215 |
if (v->re_ctx == NULL) { |
| 97 |
0 |
*errptr = PCRE2_ERROR_NOMEMORY; |
| 98 |
0 |
VRE_free(&v); |
| 99 |
0 |
return (NULL); |
| 100 |
|
} |
| 101 |
|
#if USE_PCRE2_JIT |
| 102 |
14215 |
if (jit) |
| 103 |
5606 |
(void)pcre2_jit_compile(v->re, PCRE2_JIT_COMPLETE); |
| 104 |
|
#else |
| 105 |
|
(void)jit; |
| 106 |
|
#endif |
| 107 |
14215 |
return (v); |
| 108 |
14220 |
} |
| 109 |
|
|
| 110 |
|
int |
| 111 |
6 |
VRE_error(struct vsb *vsb, int err) |
| 112 |
|
{ |
| 113 |
|
char buf[VRE_ERROR_LEN]; |
| 114 |
|
int i; |
| 115 |
|
|
| 116 |
6 |
CHECK_OBJ_NOTNULL(vsb, VSB_MAGIC); |
| 117 |
6 |
i = pcre2_get_error_message(err, (PCRE2_UCHAR *)buf, VRE_ERROR_LEN); |
| 118 |
6 |
if (i == PCRE2_ERROR_BADDATA) { |
| 119 |
0 |
VSB_printf(vsb, "unknown pcre2 error code (%d)", err); |
| 120 |
0 |
return (-1); |
| 121 |
|
} |
| 122 |
6 |
VSB_cat(vsb, buf); |
| 123 |
6 |
return (0); |
| 124 |
6 |
} |
| 125 |
|
|
| 126 |
|
pcre2_code * |
| 127 |
8021 |
VRE_unpack(const vre_t *code) |
| 128 |
|
{ |
| 129 |
|
|
| 130 |
|
/* XXX: The ban code ensures that regex "lumps" are pointer-aligned, |
| 131 |
|
* but coming for example from a VMOD there is no guarantee. Should |
| 132 |
|
* we formally require that code is properly aligned? |
| 133 |
|
*/ |
| 134 |
8021 |
CHECK_OBJ_NOTNULL(code, VRE_MAGIC); |
| 135 |
8021 |
if (code->re == VRE_PACKED_RE) { |
| 136 |
23 |
AZ(code->re_ctx); |
| 137 |
23 |
return (TRUST_ME(code + 1)); |
| 138 |
|
} |
| 139 |
7998 |
return (code->re); |
| 140 |
8021 |
} |
| 141 |
|
|
| 142 |
|
static void |
| 143 |
7982 |
vre_limit(const vre_t *code, const volatile struct vre_limits *lim) |
| 144 |
|
{ |
| 145 |
|
|
| 146 |
7982 |
CHECK_OBJ_NOTNULL(code, VRE_MAGIC); |
| 147 |
|
|
| 148 |
7982 |
if (lim == NULL) |
| 149 |
2665 |
return; |
| 150 |
|
|
| 151 |
5317 |
assert(code->re != VRE_PACKED_RE); |
| 152 |
|
|
| 153 |
|
/* XXX: not reentrant */ |
| 154 |
5317 |
AN(code->re_ctx); |
| 155 |
5317 |
AZ(pcre2_set_match_limit(code->re_ctx, lim->match)); |
| 156 |
5317 |
AZ(pcre2_set_depth_limit(code->re_ctx, lim->depth)); |
| 157 |
7982 |
} |
| 158 |
|
|
| 159 |
|
vre_t * |
| 160 |
29 |
VRE_export(const vre_t *code, size_t *sz) |
| 161 |
|
{ |
| 162 |
|
pcre2_code *re; |
| 163 |
|
vre_t *exp; |
| 164 |
|
|
| 165 |
29 |
CHECK_OBJ_NOTNULL(code, VRE_MAGIC); |
| 166 |
29 |
re = VRE_unpack(code); |
| 167 |
29 |
AZ(pcre2_pattern_info(re, PCRE2_INFO_SIZE, sz)); |
| 168 |
|
|
| 169 |
29 |
exp = malloc(sizeof(*exp) + *sz); |
| 170 |
29 |
if (exp == NULL) |
| 171 |
0 |
return (NULL); |
| 172 |
|
|
| 173 |
29 |
INIT_OBJ(exp, VRE_MAGIC); |
| 174 |
29 |
exp->re = VRE_PACKED_RE; |
| 175 |
29 |
memcpy(exp + 1, re, *sz); |
| 176 |
29 |
*sz += sizeof(*exp); |
| 177 |
29 |
return (exp); |
| 178 |
29 |
} |
| 179 |
|
|
| 180 |
|
static int |
| 181 |
7994 |
vre_capture(const vre_t *code, const char *subject, size_t length, |
| 182 |
|
size_t offset, int options, txt *groups, size_t *count, |
| 183 |
|
pcre2_match_data **datap) |
| 184 |
|
{ |
| 185 |
|
pcre2_match_data *data; |
| 186 |
|
pcre2_code *re; |
| 187 |
|
PCRE2_SIZE *ovector, b, e; |
| 188 |
|
size_t nov, g; |
| 189 |
|
int matches; |
| 190 |
|
|
| 191 |
7994 |
re = VRE_unpack(code); |
| 192 |
|
|
| 193 |
7994 |
if (datap != NULL && *datap != NULL) { |
| 194 |
11 |
data = *datap; |
| 195 |
11 |
*datap = NULL; |
| 196 |
11 |
} else { |
| 197 |
7983 |
data = pcre2_match_data_create_from_pattern(re, NULL); |
| 198 |
7983 |
AN(data); |
| 199 |
|
} |
| 200 |
|
|
| 201 |
7994 |
ovector = pcre2_get_ovector_pointer(data); |
| 202 |
7994 |
nov = 2L * pcre2_get_ovector_count(data); |
| 203 |
24136 |
for (g = 0; g < nov; g++) |
| 204 |
16142 |
ovector[g] = PCRE2_UNSET; |
| 205 |
|
|
| 206 |
15988 |
matches = pcre2_match(re, (PCRE2_SPTR)subject, length, offset, |
| 207 |
7994 |
options, data, code->re_ctx); |
| 208 |
|
|
| 209 |
7994 |
if (groups != NULL) { |
| 210 |
73 |
AN(count); |
| 211 |
73 |
AN(*count); |
| 212 |
73 |
ovector = pcre2_get_ovector_pointer(data); |
| 213 |
73 |
nov = vmin_t(size_t, pcre2_get_ovector_count(data), *count); |
| 214 |
172 |
for (g = 0; g < nov; g++) { |
| 215 |
99 |
b = ovector[2 * g]; |
| 216 |
99 |
e = ovector[2 * g + 1]; |
| 217 |
99 |
if (b == PCRE2_UNSET) { |
| 218 |
35 |
groups->b = groups->e = ""; |
| 219 |
35 |
} else { |
| 220 |
64 |
groups->b = subject + b; |
| 221 |
64 |
groups->e = subject + e; |
| 222 |
|
} |
| 223 |
99 |
groups++; |
| 224 |
99 |
} |
| 225 |
73 |
*count = nov; |
| 226 |
73 |
} |
| 227 |
|
|
| 228 |
7994 |
if (datap != NULL && matches > VRE_ERROR_NOMATCH) |
| 229 |
39 |
*datap = data; |
| 230 |
|
else |
| 231 |
7955 |
pcre2_match_data_free(data); |
| 232 |
7994 |
return (matches); |
| 233 |
|
} |
| 234 |
|
|
| 235 |
|
int |
| 236 |
7921 |
VRE_match(const vre_t *code, const char *subject, size_t length, |
| 237 |
|
int options, const volatile struct vre_limits *lim) |
| 238 |
|
{ |
| 239 |
|
|
| 240 |
7921 |
CHECK_OBJ_NOTNULL(code, VRE_MAGIC); |
| 241 |
7921 |
AN(subject); |
| 242 |
|
|
| 243 |
7921 |
if (length == 0) |
| 244 |
5444 |
length = PCRE2_ZERO_TERMINATED; |
| 245 |
7921 |
vre_limit(code, lim); |
| 246 |
7921 |
return (vre_capture(code, subject, length, 0, options, |
| 247 |
|
NULL, NULL, NULL)); |
| 248 |
|
} |
| 249 |
|
|
| 250 |
|
int |
| 251 |
0 |
VRE_capture(const vre_t *code, const char *subject, size_t length, int options, |
| 252 |
|
txt *groups, size_t count, const volatile struct vre_limits *lim) |
| 253 |
|
{ |
| 254 |
|
int i; |
| 255 |
|
|
| 256 |
0 |
CHECK_OBJ_NOTNULL(code, VRE_MAGIC); |
| 257 |
0 |
AN(subject); |
| 258 |
0 |
AN(groups); |
| 259 |
0 |
AN(count); |
| 260 |
|
|
| 261 |
0 |
if (length == 0) |
| 262 |
0 |
length = PCRE2_ZERO_TERMINATED; |
| 263 |
0 |
vre_limit(code, lim); |
| 264 |
0 |
i = vre_capture(code, subject, length, 0, options, |
| 265 |
0 |
groups, &count, NULL); |
| 266 |
|
|
| 267 |
0 |
if (i <= 0) |
| 268 |
0 |
return (i); |
| 269 |
0 |
return (count); |
| 270 |
0 |
} |
| 271 |
|
|
| 272 |
|
int |
| 273 |
62 |
VRE_sub(const vre_t *code, const char *subject, const char *replacement, |
| 274 |
|
struct vsb *vsb, const volatile struct vre_limits *lim, int all) |
| 275 |
|
{ |
| 276 |
62 |
pcre2_match_data *data = NULL; |
| 277 |
|
txt groups[10]; |
| 278 |
|
size_t count; |
| 279 |
62 |
int i, offset = 0; |
| 280 |
|
const char *s, *e; |
| 281 |
|
unsigned x; |
| 282 |
|
|
| 283 |
62 |
CHECK_OBJ_NOTNULL(code, VRE_MAGIC); |
| 284 |
62 |
CHECK_OBJ_NOTNULL(vsb, VSB_MAGIC); |
| 285 |
62 |
AN(subject); |
| 286 |
62 |
AN(replacement); |
| 287 |
|
|
| 288 |
62 |
vre_limit(code, lim); |
| 289 |
62 |
count = 10; |
| 290 |
124 |
i = vre_capture(code, subject, PCRE2_ZERO_TERMINATED, offset, 0, |
| 291 |
62 |
groups, &count, &data); |
| 292 |
|
|
| 293 |
62 |
if (i <= VRE_ERROR_NOMATCH) { |
| 294 |
25 |
AZ(data); |
| 295 |
25 |
return (i); |
| 296 |
|
} |
| 297 |
|
|
| 298 |
37 |
do { |
| 299 |
39 |
AN(data); /* check reuse across successful captures */ |
| 300 |
39 |
AN(count); |
| 301 |
|
|
| 302 |
|
/* Copy prefix to match */ |
| 303 |
39 |
s = subject + offset; |
| 304 |
39 |
VSB_bcat(vsb, s, pdiff(s, groups[0].b)); |
| 305 |
210 |
for (s = e = replacement; *e != '\0'; e++ ) { |
| 306 |
171 |
if (*e != '\\' || e[1] == '\0') |
| 307 |
141 |
continue; |
| 308 |
30 |
VSB_bcat(vsb, s, pdiff(s, e)); |
| 309 |
30 |
s = ++e; |
| 310 |
30 |
if (isdigit(*e)) { |
| 311 |
26 |
s++; |
| 312 |
26 |
x = *e - '0'; |
| 313 |
26 |
if (x >= count) |
| 314 |
6 |
continue; |
| 315 |
20 |
VSB_bcat(vsb, groups[x].b, Tlen(groups[x])); |
| 316 |
20 |
continue; |
| 317 |
|
} |
| 318 |
4 |
} |
| 319 |
39 |
VSB_bcat(vsb, s, pdiff(s, e)); |
| 320 |
39 |
offset = pdiff(subject, groups[0].e); |
| 321 |
39 |
if (!all) |
| 322 |
28 |
break; |
| 323 |
11 |
count = 10; |
| 324 |
22 |
i = vre_capture(code, subject, PCRE2_ZERO_TERMINATED, offset, |
| 325 |
11 |
PCRE2_NOTEMPTY, groups, &count, &data); |
| 326 |
|
|
| 327 |
11 |
if (i < VRE_ERROR_NOMATCH) { |
| 328 |
0 |
AZ(data); |
| 329 |
0 |
return (i); |
| 330 |
|
} |
| 331 |
11 |
} while (i != VRE_ERROR_NOMATCH); |
| 332 |
|
|
| 333 |
37 |
if (data != NULL) { |
| 334 |
28 |
assert(i > VRE_ERROR_NOMATCH); |
| 335 |
28 |
AZ(all); |
| 336 |
28 |
pcre2_match_data_free(data); |
| 337 |
28 |
} |
| 338 |
|
|
| 339 |
|
/* Copy suffix to match */ |
| 340 |
37 |
VSB_cat(vsb, subject + offset); |
| 341 |
37 |
return (1); |
| 342 |
62 |
} |
| 343 |
|
|
| 344 |
|
void |
| 345 |
10716 |
VRE_free(vre_t **vv) |
| 346 |
|
{ |
| 347 |
|
vre_t *v; |
| 348 |
|
|
| 349 |
10716 |
TAKE_OBJ_NOTNULL(v, vv, VRE_MAGIC); |
| 350 |
|
|
| 351 |
10716 |
if (v->re == VRE_PACKED_RE) { |
| 352 |
29 |
v->re = NULL; |
| 353 |
29 |
AZ(v->re_ctx); |
| 354 |
29 |
} |
| 355 |
|
|
| 356 |
10716 |
if (v->re_ctx != NULL) |
| 357 |
10682 |
pcre2_match_context_free(v->re_ctx); |
| 358 |
10716 |
if (v->re != NULL) |
| 359 |
10682 |
pcre2_code_free(v->re); |
| 360 |
10716 |
FREE_OBJ(v); |
| 361 |
10716 |
} |
| 362 |
|
|
| 363 |
|
void |
| 364 |
6 |
VRE_quote(struct vsb *vsb, const char *src) |
| 365 |
|
{ |
| 366 |
|
const char *b, *e; |
| 367 |
|
|
| 368 |
6 |
CHECK_OBJ_NOTNULL(vsb, VSB_MAGIC); |
| 369 |
6 |
if (src == NULL) |
| 370 |
0 |
return; |
| 371 |
9 |
for (b = src; (e = strstr(b, "\\E")) != NULL; b = e + 2) |
| 372 |
3 |
VSB_printf(vsb, "\\Q%.*s\\\\EE", (int)(e - b), b); |
| 373 |
6 |
if (*b != '\0') |
| 374 |
3 |
VSB_printf(vsb, "\\Q%s\\E", b); |
| 375 |
6 |
} |