[master] 6014912e7 vre: Migrate to pcre2
Dridi Boukelmoune
dridi.boukelmoune at gmail.com
Tue Jul 6 17:51:06 UTC 2021
commit 6014912e74de7989e37b7be2737cef370b7147ba
Author: Dridi Boukelmoune <dridi.boukelmoune at gmail.com>
Date: Mon Jun 28 17:42:01 2021 +0200
vre: Migrate to pcre2
Now that VRE is the only regular expression API we use, we can migrate
its backend to pcre2. The existing 'pcre_*' parameters are also renamed
to reflect this migration, and 'pcre_match_limit_recursion' gets special
treatment and is renamed to pcre2_depth_limit.
This creates an additional API breakage in VRE: the `match_recursion`
field in `struct vre_limits` is also renamed. One last breakage is the
removal of `VRE_has_jit` used by only one undocumented varnishtest
feature, and the pcre_jit feature is only used by one test case that no
longer fails.
The pcre jit compilation feature was broken anyway: sealing it at
compile time will not reflect what VRE actually links to. Once we have
a test case needing the jit feature, we can introduce a better API for
that check.
There is one outstanding performance problem, the ovector that was
previously allocated on the stack now needs to be allocated from the
heap. It might be possible to implement a pcre2 context to fix that or
maybe pool them, but for now we have heap allocations on the critical
path. The VRE_sub() function makes sure to make a single ovector
allocation (technically a pcre2_match_data allocation) since it's the
only one guaranteed to loop on a single regular expression for the
`regsuball()` use case.
On the documentation front, the SmartOS installation instructions are
hidden for lack of a pcre2 package.
Closes #3616
Closes #3559
diff --git a/.circleci/Dockerfile b/.circleci/Dockerfile
index 7553611e0..71bbce5be 100644
--- a/.circleci/Dockerfile
+++ b/.circleci/Dockerfile
@@ -10,6 +10,6 @@ RUN set -e;\
libtool \
libunwind-devel \
make \
- pcre-devel \
+ pcre2-devel \
python3 \
python-sphinx
diff --git a/.circleci/Dockerfile.alpine b/.circleci/Dockerfile.alpine
index c09aa9d5a..b9749f284 100644
--- a/.circleci/Dockerfile.alpine
+++ b/.circleci/Dockerfile.alpine
@@ -14,7 +14,7 @@ RUN set -e; \
libtool \
libunwind-dev \
linux-headers \
- pcre-dev \
+ pcre2-dev \
py-docutils \
py3-sphinx \
tar
diff --git a/.circleci/Dockerfile.archlinux b/.circleci/Dockerfile.archlinux
index 1e6344354..c90140d43 100644
--- a/.circleci/Dockerfile.archlinux
+++ b/.circleci/Dockerfile.archlinux
@@ -10,7 +10,7 @@ RUN set -e; \
libtool \
libunwind \
linux-headers \
- pcre \
+ pcre2 \
python-docutils \
python-sphinx \
tar
diff --git a/.circleci/Dockerfile.ubuntu b/.circleci/Dockerfile.ubuntu
index cc50bd3a0..7c9920a19 100644
--- a/.circleci/Dockerfile.ubuntu
+++ b/.circleci/Dockerfile.ubuntu
@@ -15,7 +15,7 @@ RUN set -e; \
libedit-dev \
libjemalloc-dev \
libncurses-dev \
- libpcre3-dev \
+ libpcre2-dev \
libtool \
libunwind-dev \
pkg-config \
diff --git a/.circleci/config.yml b/.circleci/config.yml
index 950f0b9cd..95df2a5e2 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -33,7 +33,7 @@ jobs:
libtool \
libunwind-devel \
make \
- pcre-devel \
+ pcre2-devel \
python3 \
python-sphinx
- checkout
@@ -201,7 +201,7 @@ jobs:
libtool \
libunwind-devel \
make \
- pcre-devel \
+ pcre2-devel \
python3 \
sudo
elif [ << parameters.dist >> = debian -o << parameters.dist >> = ubuntu ]; then
@@ -219,7 +219,7 @@ jobs:
libedit-dev \
libjemalloc-dev \
libncurses-dev \
- libpcre3-dev \
+ libpcre2-dev \
libtool \
libunwind-dev \
pkg-config \
@@ -239,7 +239,7 @@ jobs:
libtool \
libunwind-dev \
linux-headers \
- pcre-dev \
+ pcre2-dev \
py-docutils \
py3-sphinx \
tar \
@@ -258,7 +258,7 @@ jobs:
libtool \
libunwind \
linux-headers \
- pcre \
+ pcre2 \
python-docutils \
python-sphinx \
tar
diff --git a/.travis.yml b/.travis.yml
index af4c0b79a..be91d56d2 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -21,7 +21,7 @@ jobs:
- python3-docutils
- python3-sphinx
- libunwind-dev
- - libpcre3-dev
+ - libpcre2-dev
before_script:
- ./autogen.sh
- ./configure --enable-maintainer-mode --with-unwind
diff --git a/bin/varnishd/cache/cache_vrt_re.c b/bin/varnishd/cache/cache_vrt_re.c
index 4cb5e7b17..4fc352364 100644
--- a/bin/varnishd/cache/cache_vrt_re.c
+++ b/bin/varnishd/cache/cache_vrt_re.c
@@ -46,7 +46,7 @@ VPI_re_init(vre_t **rep, const char *re)
/* This was already check-compiled by the VCL compiler */
t = VRE_compile(re, 0, &error, &erroroffset,
- cache_param->pcre_jit_compilation);
+ cache_param->pcre2_jit_compilation);
AN(t);
*rep = t;
}
diff --git a/bin/varnishd/flint.lnt b/bin/varnishd/flint.lnt
index 8f04936be..0d8f5c5e7 100644
--- a/bin/varnishd/flint.lnt
+++ b/bin/varnishd/flint.lnt
@@ -80,7 +80,7 @@
-emacro(835, O_LARGEFILE) // Info 835: A zero has been given as left argument to operator '<<'
-emacro(845, HTTPH) // Info 845: The left argument to operator '&&' is certain to be 0
--esym(773, PCRE_DATE) // Expression-like macro '___' not parenthesized
+-esym(773, PCRE2_DATE) // Expression-like macro '___' not parenthesized
//////////////
// Macros defined differently in each VMOD
diff --git a/bin/varnishtest/tests/r01576.vtc b/bin/varnishtest/tests/r01576.vtc
deleted file mode 100644
index 5dc0be6ec..000000000
--- a/bin/varnishtest/tests/r01576.vtc
+++ /dev/null
@@ -1,52 +0,0 @@
-varnishtest "Test recursive regexp's fail before consuming all the stack"
-
-feature pcre_jit
-
-# If you want to play around, uncomment the next lines and adjust
-# the length of the ABAB strings below to suit your needs.
-# Better yet: Rewrite your regexps to avoid this madness.
-
-# varnish v1 -arg "-p thread_pool_stack=48k"
-# varnish v1 -arg "-p pcre_match_limit=1000"
-# varnish v1 -arg "-p pcre_match_limit_recursion=89"
-
-# Approximate formula for FreeBSD/amd64:
-# pcre_match_limit_recursion = thread_pool_stack * 2 - 9
-
-# -p: use 64bit defaults also on 32bit
-varnish v1 -arg "-p workspace_client=64k -p http_req_size=32k" -vcl+backend {
- backend proforma none;
-
- sub vcl_recv {
- return (synth(200));
- }
- sub vcl_synth {
- # shamelessly copied from "bugzilla77 at gmail dot com"
- # https://bugs.php.net/bug.php?id=70110
- if (req.url ~ "^/(A{1,2}B)+$") {
- set resp.http.found = "1";
- }
- }
-} -start
-
-# This should succeed with default params and JIT/no-JIT
-client c1 {
- txreq -url /ABAABABAABABABAB
- rxresp
- expect resp.status == 200
- expect resp.http.found == 1
-} -run
-
-# PCRE_ERROR_RECURSIONLIMIT (-21)
-# PCRE_ERROR_JIT_STACKLIMIT (-27)
-logexpect l1 -v v1 {
- expect * * VCL_Error "Regexp matching returned -2[71]"
-} -start
-
-# This should fail with default params and JIT/no-JIT
-client c1 {
- txreq -url
- expect_close
-} -run
-
-logexpect l1 -wait
diff --git a/bin/varnishtest/tests/r01644.vtc b/bin/varnishtest/tests/r01644.vtc
index a6e4fbfa0..e5986d626 100644
--- a/bin/varnishtest/tests/r01644.vtc
+++ b/bin/varnishtest/tests/r01644.vtc
@@ -16,7 +16,7 @@ varnish v1 -vcl+backend {
} -start
-varnish v1 -cliok "param.set pcre_match_limit 100"
+varnish v1 -cliok "param.set pcre2_match_limit 100"
client c1 {
txreq
@@ -24,7 +24,7 @@ client c1 {
expect resp.http.foo == 100
} -run
-varnish v1 -cliok "param.set pcre_match_limit 200"
+varnish v1 -cliok "param.set pcre2_match_limit 200"
client c1 {
txreq
diff --git a/bin/varnishtest/tests/u00006.vtc b/bin/varnishtest/tests/u00006.vtc
index b2d7dcef9..e420aa18d 100644
--- a/bin/varnishtest/tests/u00006.vtc
+++ b/bin/varnishtest/tests/u00006.vtc
@@ -44,7 +44,7 @@ shell -err -expect {-I: "foo" matches zero tags} \
"varnishlog -I foo:bar"
shell -err -expect {-I: "Resp" is ambiguous} \
"varnishlog -I Resp:bar"
-shell -err -expect {-I: Regex error at position 4 (pcre error 14)} \
+shell -err -expect {-I: Regex error at position 4 (missing closing parenthesis)} \
{varnishlog -I "(foo"}
shell -err -expect "-t: Invalid argument" \
"varnishlog -t -1"
diff --git a/bin/varnishtest/tests/v00004.vtc b/bin/varnishtest/tests/v00004.vtc
index 8760279e5..805543c55 100644
--- a/bin/varnishtest/tests/v00004.vtc
+++ b/bin/varnishtest/tests/v00004.vtc
@@ -10,7 +10,7 @@ feature cmd {test -z "$GCOVPROG"}
# - 2019 header madness
# - 5 ESI levels down
# - 10 VCL subs down
-# - PCRE regsub
+# - PCRE2 regsub
server s1 {
rxreq
diff --git a/bin/varnishtest/vtc_misc.c b/bin/varnishtest/vtc_misc.c
index 5133ece37..eaf9a99a7 100644
--- a/bin/varnishtest/vtc_misc.c
+++ b/bin/varnishtest/vtc_misc.c
@@ -504,7 +504,6 @@ cmd_feature(CMD_ARGS)
FEATURE("ipv4", ipvx_works("127.0.0.1"));
FEATURE("ipv6", ipvx_works("[::1]"));
- FEATURE("pcre_jit", VRE_has_jit);
FEATURE("64bit", sizeof(void*) == 8);
FEATURE("disable_aslr", addr_no_randomize_works());
FEATURE("dns", dns_works());
diff --git a/configure.ac b/configure.ac
index 6a2874f8d..2b96413d6 100644
--- a/configure.ac
+++ b/configure.ac
@@ -111,67 +111,50 @@ AC_SUBST(LIBM)
m4_ifndef([PKG_PROG_PKG_CONFIG], [m4_fatal([pkg.m4 missing, please install pkg-config])])
PKG_PROG_PKG_CONFIG
if test -n $PKG_CONFIG; then
- PKG_CHECK_MODULES([PCRE], [libpcre])
+ PKG_CHECK_MODULES([PCRE2], [libpcre2-8])
else
- AC_CHECK_PROG(PCRE_CONFIG, pcre-config, pcre-config)
- AC_ARG_WITH(pcre-config,
- AS_HELP_STRING([--with-pcre-config=PATH],
- [Location of PCRE pcre-config (auto)]),
- [pcre_config="$withval"],
- [pcre_config=""])
-
- if test "x$pcre_config" != "x" ; then
- AC_MSG_CHECKING(for $pcre_config)
-
- if test -f $pcre_config ; then
- PCRE_CONFIG=$pcre_config
+ AC_CHECK_PROG(PCRE2_CONFIG, pcre2-config, pcre2-config)
+ AC_ARG_WITH(pcre2-config,
+ AS_HELP_STRING([--with-pcre2-config=PATH],
+ [Location of PCRE2 pcre2-config (auto)]),
+ [pcre2_config="$withval"],
+ [pcre2_config=""])
+
+ if test "x$pcre2_config" != "x" ; then
+ AC_MSG_CHECKING(for $pcre2_config)
+
+ if test -f $pcre2_config ; then
+ PCRE2_CONFIG=$pcre2_config
AC_MSG_RESULT(yes)
else
AC_MSG_RESULT(no - searching PATH)
fi
fi
- if test "x$PCRE_CONFIG" = "x"; then
- AC_CHECK_PROGS(PCRE_CONFIG, pcre-config)
+ if test "x$PCRE2_CONFIG" = "x"; then
+ AC_CHECK_PROGS(PCRE2_CONFIG, pcre2-config)
fi
- PCRE_CFLAGS=`$PCRE_CONFIG --cflags`
- PCRE_LIBS=`$PCRE_CONFIG --libs`
+ PCRE2_CFLAGS=`$PCRE2_CONFIG --cflags`
+ PCRE2_LIBS=`$PCRE2_CONFIG --libs8`
fi
-AC_SUBST(PCRE_CFLAGS)
-AC_SUBST(PCRE_LIBS)
+AC_SUBST(PCRE2_CFLAGS)
+AC_SUBST(PCRE2_LIBS)
+AC_DEFINE([PCRE2_CODE_UNIT_WIDTH], [8], [Work with 8-bit characters for PCRE2])
-# --enable-pcre-jit
-AC_ARG_ENABLE(pcre-jit,
- AS_HELP_STRING([--enable-pcre-jit],
- [use the PCRE JIT compiler (default is YES)]),
+save_LIBS="${LIBS}"
+LIBS="${LIBS} ${PCRE2_LIBS}"
+AC_CHECK_FUNCS([pcre2_set_depth_limit_8], [
+ AC_DEFINE([HAVE_PCRE2_SET_DEPTH_LIMIT], [1], [Use pcre2_set_depth_limit()])
+])
+LIBS="${save_LIBS}"
+
+# --enable-pcre2-jit
+AC_ARG_ENABLE(pcre2-jit,
+ AS_HELP_STRING([--enable-pcre2-jit],
+ [use the PCRE2 JIT compiler (default is YES)]),
[],
- [enable_pcre_jit=yes])
-if test "$enable_pcre_jit" = yes; then
- AC_MSG_CHECKING(for PCRE JIT usability)
- save_CFLAGS="${CFLAGS}"
- CFLAGS="${CFLAGS} ${PCRE_CFLAGS}"
- save_LIBS="${LIBS}"
- LIBS="${LIBS} ${PCRE_LIBS}"
- AC_RUN_IFELSE(
- [AC_LANG_PROGRAM([[
-#include <pcre.h>
-#if PCRE_MAJOR != 8 || PCRE_MINOR < 32
-#error no jit
-#endif
- ]],[[
-const char *error;
-pcre *re;
-int erroroffset;
-re = pcre_compile(".", 0, &error, &erroroffset, NULL);
-if (!pcre_study(re, PCRE_STUDY_JIT_COMPILE, &error))
- return (1);
- ]])],
- [AC_MSG_RESULT(yes)
- AC_DEFINE([USE_PCRE_JIT], [1], [Use the PCRE JIT compiler])
- ],
- [AC_MSG_RESULT(no)]
- )
- CFLAGS="${save_CFLAGS}"
- LIBS="${save_LIBS}"
+ [enable_pcre2_jit=yes])
+if test "$enable_pcre2_jit" = yes; then
+ AC_DEFINE([USE_PCRE2_JIT], [1], [Use the PCRE2 JIT compiler])
fi
diff --git a/doc/sphinx/installation/install_source.rst b/doc/sphinx/installation/install_source.rst
index ef67e00b3..a8e88b8b2 100644
--- a/doc/sphinx/installation/install_source.rst
+++ b/doc/sphinx/installation/install_source.rst
@@ -29,7 +29,7 @@ Build dependencies on FreeBSD
To get the dependencies required to build varnish from source
you can either::
- pkg install automake pkgconf py36-sphinx py36-docutils pcre libtool
+ pkg install automake pkgconf py36-sphinx py36-docutils pcre2 libtool
.. XXX does cpio need to be installed on FreeBSD?
@@ -60,7 +60,7 @@ them (replace ``sudo apt-get install`` if needed)::
libedit-dev \
libjemalloc-dev \
libncurses-dev \
- libpcre3-dev \
+ libpcre2-dev \
libtool \
pkg-config \
python3-docutils \
@@ -114,7 +114,7 @@ packages::
libtool \
libunwind-devel \
ncurses-devel \
- pcre-devel \
+ pcre2-devel \
pkgconfig \
python3-docutils \
cpio
@@ -176,7 +176,7 @@ As of Alpine 3, these steps should install the required dependencies:
libtool \
libunwind-dev \
linux-headers \
- pcre-dev \
+ pcre2-dev \
py-docutils \
py3-sphinx \
tar \
@@ -195,24 +195,26 @@ Then continue `Compiling Varnish`_, using the ``--with-unwind``
.. _Alpine Community Repository: https://wiki.alpinelinux.org/wiki/Enable_Community_Repository
-Build dependencies on a SmartOS Zone
-------------------------------------
-
-As of SmartOS pkgsrc 2019Q4, install the following packages::
-
- pkgin in autoconf automake editline libtool ncurses \
- pcre python37 py37-sphinx py37-docutils gmake gcc8 pkg-config
-
-*Note:* you will probably need to add ``/opt/local/gcc8/bin`` to
-``PATH`` in order to have ``gcc`` available.
-
-Optionally, to rebuild the svg files::
-
- pkgin in graphviz
-
-Optionally, to pull from a repository::
-
- pkgin in git
+.. XXX: no pcre2 on SmartOS
+..
+.. Build dependencies on a SmartOS Zone
+.. ------------------------------------
+..
+.. As of SmartOS pkgsrc 2019Q4, install the following packages::
+..
+.. pkgin in autoconf automake editline libtool ncurses \
+.. pcre python37 py37-sphinx py37-docutils gmake gcc8 pkg-config
+..
+.. *Note:* you will probably need to add ``/opt/local/gcc8/bin`` to
+.. ``PATH`` in order to have ``gcc`` available.
+..
+.. Optionally, to rebuild the svg files::
+..
+.. pkgin in graphviz
+..
+.. Optionally, to pull from a repository::
+..
+.. pkgin in git
Building on Solaris and other Solaris-ish OSes
----------------------------------------------
diff --git a/doc/sphinx/reference/vsl-query.rst b/doc/sphinx/reference/vsl-query.rst
index e7778c186..4bfd3f348 100644
--- a/doc/sphinx/reference/vsl-query.rst
+++ b/doc/sphinx/reference/vsl-query.rst
@@ -267,7 +267,7 @@ The following types of operands are available:
* Regular expression
- A PCRE regular expression. Valid for the regular expression
+ A PCRE2 regular expression. Valid for the regular expression
operators.
Boolean functions
diff --git a/doc/sphinx/users-guide/increasing-your-hitrate.rst b/doc/sphinx/users-guide/increasing-your-hitrate.rst
index 10f4c47fb..8c49e5a8b 100644
--- a/doc/sphinx/users-guide/increasing-your-hitrate.rst
+++ b/doc/sphinx/users-guide/increasing-your-hitrate.rst
@@ -154,7 +154,7 @@ difficult. Unfortunately Varnish doesn't have good tools for
manipulating the Cookies. We have to use regular expressions to do the
work. If you are familiar with regular expressions you'll understand
whats going on. If you aren't we recommend that you either pick up a book on
-the subject, read through the *pcrepattern* man page, or read through
+the subject, read through the *pcre2pattern* man page, or read through
one of many online guides.
Lets use the Varnish Software (VS) web as an example here. Very
diff --git a/doc/sphinx/users-guide/storage-backends.rst b/doc/sphinx/users-guide/storage-backends.rst
index 7464c74fc..700d47aaa 100644
--- a/doc/sphinx/users-guide/storage-backends.rst
+++ b/doc/sphinx/users-guide/storage-backends.rst
@@ -90,7 +90,7 @@ to indicate that `libumem`_ will not only be used for storage. Likely
reasons for this to be the case are:
* some library ``varnishd`` is linked against was linked against
- `libumem`_ (most likely ``libpcre``, check with ``ldd``)
+ `libumem`_ (most likely ``libpcre2-8``, check with ``ldd``)
* ``LD_PRELOAD_64=/usr/lib/amd64/libumem.so.1``,
``LD_PRELOAD_32=/usr/lib/libumem.so.1`` or
diff --git a/include/tbl/params.h b/include/tbl/params.h
index c6ba88eb3..66c14b1e6 100644
--- a/include/tbl/params.h
+++ b/include/tbl/params.h
@@ -707,14 +707,14 @@ PARAM_SIMPLE(
)
PARAM_SIMPLE(
- /* name */ pcre_jit_compilation,
+ /* name */ pcre2_jit_compilation,
/* type */ boolean,
/* min */ NULL,
/* max */ NULL,
/* def */ "on",
/* units */ "bool",
/* descr */
- "Use the pcre JIT compiler if available."
+ "Use the pcre2 JIT compiler if available."
)
PARAM_SIMPLE(
@@ -1615,47 +1615,46 @@ PARAM_VCC(
)
/*--------------------------------------------------------------------
- * PCRE parameters
+ * PCRE2 parameters
*/
-# define PARAM_PCRE(nm, pv, min, def, descr) \
+# define PARAM_PCRE2(nm, pv, min, def, descr) \
PARAM(, , nm, tweak_uint, &mgt_param.vre_limits.pv, \
min, NULL, def, NULL, descr)
-PARAM_PCRE(
- /* name */ pcre_match_limit,
+PARAM_PCRE2(
+ /* name */ pcre2_match_limit,
/* priv */ match,
/* min */ "1",
/* def */ "10000",
/* descr */
- "The limit for the number of calls to the internal match()"
- " function in pcre_exec().\n\n"
- "(See: PCRE_EXTRA_MATCH_LIMIT in pcre docs.)\n\n"
+ "The limit for the number of calls to the internal match"
+ " logic in pcre2_match().\n\n"
+ "(See: pcre2_set_match_limit() in pcre2 docs.)\n\n"
"This parameter limits how much CPU time"
" regular expression matching can soak up."
)
-PARAM_PCRE(
- /* name */ pcre_match_limit_recursion,
- /* priv */ match_recursion,
+PARAM_PCRE2(
+ /* name */ pcre2_depth_limit,
+ /* priv */ depth,
/* min */ "1",
/* def */ "20",
/* descr */
- "The recursion depth-limit for the internal match() function"
- " in a pcre_exec().\n\n"
- "(See: PCRE_EXTRA_MATCH_LIMIT_RECURSION in pcre docs.)\n\n"
+ "The recursion depth-limit for the internal match logic"
+ " in a pcre2_match().\n\n"
+ "(See: pcre2_set_depth_limit() in pcre2 docs.)\n\n"
"This puts an upper limit on the amount of stack used"
- " by PCRE for certain classes of regular expressions.\n\n"
+ " by PCRE2 for certain classes of regular expressions.\n\n"
"We have set the default value low in order to"
" prevent crashes, at the cost of possible regexp"
" matching failures.\n\n"
"Matching failures will show up in the log as VCL_Error"
- " messages with regexp errors -27 or -21.\n\n"
- "Testcase r01576 can be useful when tuning this parameter."
+ " messages with regexp errors -27 or -21."
)
# undef PARAM_ALL
-# undef PARAM_PCRE
+# undef PARAM_PCRE2
# undef PARAM_STRING
# undef PARAM_VCC
#endif /* defined(PARAM_ALL) */
diff --git a/include/vre.h b/include/vre.h
index 31dd339ab..c206078da 100644
--- a/include/vre.h
+++ b/include/vre.h
@@ -29,7 +29,7 @@
*
* Regular expression support
*
- * We wrap PCRE in VRE to make to make it feasible to use something else
+ * We wrap PCRE2 in VRE to make to make it feasible to use something else
* without hunting down stuff through out the Varnish source code.
*
*/
@@ -44,16 +44,15 @@ struct vsb;
struct vre_limits {
unsigned match;
- unsigned match_recursion;
+ unsigned depth;
};
typedef struct vre vre_t;
-/* This maps to PCRE error codes */
-#define VRE_ERROR_NOMATCH (-1)
+/* This maps to PCRE2 error codes */
+extern const int VRE_ERROR_NOMATCH;
-/* And those to PCRE options */
-extern const unsigned VRE_has_jit;
+/* And those to PCRE2 options */
extern const unsigned VRE_CASELESS;
vre_t *VRE_compile(const char *, unsigned, int *, int *, unsigned);
diff --git a/lib/libvarnish/Makefile.am b/lib/libvarnish/Makefile.am
index 6d27522fb..eef530795 100644
--- a/lib/libvarnish/Makefile.am
+++ b/lib/libvarnish/Makefile.am
@@ -3,7 +3,7 @@
AM_CPPFLAGS = \
-I$(top_srcdir)/include \
-I$(top_builddir)/include \
- @PCRE_CFLAGS@
+ @PCRE2_CFLAGS@
AM_CFLAGS = $(AM_LT_CFLAGS) @SAN_CFLAGS@
AM_LDFLAGS = $(AM_LT_LDFLAGS) @SAN_LDFLAGS@
@@ -44,7 +44,7 @@ libvarnish_la_SOURCES = \
vtim.c \
vus.c
-libvarnish_la_LIBADD = @PCRE_LIBS@
+libvarnish_la_LIBADD = @PCRE2_LIBS@
TESTS = vav_test vjsn_test vnum_c_test vbh_test vsb_test
diff --git a/lib/libvarnish/vre.c b/lib/libvarnish/vre.c
index f4e54f864..dd8eb70c3 100644
--- a/lib/libvarnish/vre.c
+++ b/lib/libvarnish/vre.c
@@ -30,13 +30,12 @@
#include "config.h"
-#include <pcre.h>
#include <ctype.h>
-#include <stdint.h>
-#include <stdio.h>
#include <string.h>
#include <unistd.h>
+#include <pcre2.h>
+
#include "vdef.h"
#include "vas.h" // XXX Flexelint "not used" - but req'ed for assert()
@@ -45,48 +44,41 @@
#include "vre.h"
-#if defined(USE_PCRE_JIT)
-#define VRE_STUDY_JIT_COMPILE PCRE_STUDY_JIT_COMPILE
-#else
-#define VRE_STUDY_JIT_COMPILE 0
-#endif
-
-const unsigned VRE_has_jit = VRE_STUDY_JIT_COMPILE;
-
-#if PCRE_MAJOR < 8 || (PCRE_MAJOR == 8 && PCRE_MINOR < 20)
-# define pcre_free_study pcre_free
+#if !HAVE_PCRE2_SET_DEPTH_LIMIT
+# define pcre2_set_depth_limit(r, d) pcre2_set_recursion_limit(r, d)
#endif
-#define VRE_PACKED_RE (pcre *)(-1)
+#define VRE_PACKED_RE (pcre2_code *)(-1)
struct vre {
unsigned magic;
#define VRE_MAGIC 0xe83097dc
- pcre *re;
- pcre_extra *re_extra;
- int my_extra;
+ pcre2_code *re;
+ pcre2_match_context *re_ctx;
};
/*
- * We don't want to spread or even expose the majority of PCRE options
- * so we establish our own options and implement hard linkage to PCRE
- * here.
+ * We don't want to spread or even expose the majority of PCRE2 options
+ * and errors so we establish our own symbols and implement hard linkage
+ * to PCRE2 here.
*/
-const unsigned VRE_CASELESS = PCRE_CASELESS;
+const int VRE_ERROR_NOMATCH = PCRE2_ERROR_NOMATCH;
+
+const unsigned VRE_CASELESS = PCRE2_CASELESS;
/*
* Even though we only have one for each case so far, keep track of masks
* to differentiate between compile and match options and enfore the hard
* VRE linkage.
*/
-#define VRE_MASK_COMPILE PCRE_CASELESS
+#define VRE_MASK_COMPILE PCRE2_CASELESS
#define VRE_MASK_MATCH 0
vre_t *
VRE_compile(const char *pattern, unsigned options,
int *errptr, int *erroffset, unsigned jit)
{
- const char *errstr = NULL;
+ PCRE2_SIZE erroff;
vre_t *v;
AN(pattern);
@@ -99,70 +91,85 @@ VRE_compile(const char *pattern, unsigned options,
ALLOC_OBJ(v, VRE_MAGIC);
if (v == NULL) {
- *errptr = PCRE_ERROR_NOMEMORY;
+ *errptr = PCRE2_ERROR_NOMEMORY;
return (NULL);
}
- AZ(options & (~VRE_MASK_COMPILE));
- v->re = pcre_compile2(pattern, options, errptr, &errstr, erroffset,
- NULL);
+ v->re = pcre2_compile((PCRE2_SPTR8)pattern, PCRE2_ZERO_TERMINATED,
+ options, errptr, &erroff, NULL);
+ *erroffset = erroff;
if (v->re == NULL) {
VRE_free(&v);
return (NULL);
}
-
- errstr = NULL;
- if (jit)
- v->re_extra = pcre_study(v->re, VRE_STUDY_JIT_COMPILE, &errstr);
-
- if (errstr != NULL) {
- *errptr = PCRE_ERROR_INTERNAL;
+ v->re_ctx = pcre2_match_context_create(NULL);
+ if (v->re_ctx == NULL) {
+ *errptr = PCRE2_ERROR_NOMEMORY;
VRE_free(&v);
return (NULL);
}
- if (v->re_extra == NULL) {
- /* allocate our own */
- v->re_extra = calloc(1, sizeof(pcre_extra));
- v->my_extra = 1;
- if (v->re_extra == NULL) {
- *errptr = PCRE_ERROR_NOMEMORY;
- VRE_free(&v);
- return (NULL);
- }
- }
+#if USE_PCRE2_JIT
+ if (jit)
+ (void)pcre2_jit_compile(v->re, 0);
+#else
+ (void)jit;
+#endif
return (v);
}
int
VRE_error(struct vsb *vsb, int err)
{
+ char buf[VRE_ERROR_LEN];
+ int i;
CHECK_OBJ_NOTNULL(vsb, VSB_MAGIC);
- VSB_printf(vsb, "pcre error %d", err);
+ i = pcre2_get_error_message(err, (PCRE2_UCHAR *)buf, VRE_ERROR_LEN);
+ if (i == PCRE2_ERROR_BADDATA) {
+ VSB_printf(vsb, "unknown pcre2 error code (%d)", err);
+ return (-1);
+ }
+ VSB_cat(vsb, buf);
return (0);
}
-static pcre *
+static pcre2_code *
vre_unpack(const vre_t *code)
{
CHECK_OBJ_NOTNULL(code, VRE_MAGIC);
if (code->re == VRE_PACKED_RE) {
- AZ(code->re_extra);
- AZ(code->my_extra);
+ AZ(code->re_ctx);
return (TRUST_ME(code + 1));
}
return (code->re);
}
+static void
+vre_limit(const vre_t *code, const volatile struct vre_limits *lim)
+{
+
+ CHECK_OBJ_NOTNULL(code, VRE_MAGIC);
+
+ if (lim == NULL)
+ return;
+
+ assert(code->re != VRE_PACKED_RE);
+
+ /* XXX: not reentrant */
+ AN(code->re_ctx);
+ pcre2_set_match_limit(code->re_ctx, lim->match);
+ pcre2_set_depth_limit(code->re_ctx, lim->depth);
+}
+
vre_t *
VRE_export(const vre_t *code, size_t *sz)
{
- pcre *re;
+ pcre2_code *re;
vre_t *exp;
CHECK_OBJ_NOTNULL(code, VRE_MAGIC);
re = vre_unpack(code);
- AZ(pcre_fullinfo(re, NULL, PCRE_INFO_SIZE, sz));
+ AZ(pcre2_pattern_info(re, PCRE2_INFO_SIZE, sz));
exp = malloc(sizeof(*exp) + *sz);
if (exp == NULL)
@@ -176,73 +183,75 @@ VRE_export(const vre_t *code, size_t *sz)
}
static int
-vre_exec(const vre_t *code, const char *subject, int length,
- int startoffset, int options, int *ovector, int ovecsize,
- const volatile struct vre_limits *lim)
+vre_match(const vre_t *code, const char *subject, size_t length, size_t offset,
+ int options, pcre2_match_data **datap)
{
- pcre *re;
+ pcre2_match_data *data;
+ pcre2_code *re;
+ int matches;
- CHECK_OBJ_NOTNULL(code, VRE_MAGIC);
- AN(ovector);
-
- if (lim != NULL) {
- /* XXX: not reentrant */
- AN(code->re_extra);
- code->re_extra->match_limit = lim->match;
- code->re_extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
- code->re_extra->match_limit_recursion = lim->match_recursion;
- code->re_extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
- } else if (code->re_extra != NULL) {
- code->re_extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
- code->re_extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT_RECURSION;
+ re = vre_unpack(code);
+
+ if (datap != NULL && *datap != NULL) {
+ data = *datap;
+ *datap = NULL;
+ } else {
+ data = pcre2_match_data_create_from_pattern(re, NULL);
+ AN(data);
}
- re = vre_unpack(code);
- return (pcre_exec(re, code->re_extra, subject, length,
- startoffset, options, ovector, ovecsize));
+ matches = pcre2_match(re, (PCRE2_SPTR)subject, length, offset,
+ options, data, NULL);
+
+ if (datap != NULL && matches > VRE_ERROR_NOMATCH)
+ *datap = data;
+ else
+ pcre2_match_data_free(data);
+ return (matches);
}
int
VRE_match(const vre_t *code, const char *subject, size_t length,
int options, const volatile struct vre_limits *lim)
{
- int ovector[30];
CHECK_OBJ_NOTNULL(code, VRE_MAGIC);
AN(subject);
AZ(options & (~VRE_MASK_MATCH));
if (length == 0)
- length = strlen(subject);
-
- return (vre_exec(code, subject, length, 0, options,
- ovector, 30, lim));
+ length = PCRE2_ZERO_TERMINATED;
+ vre_limit(code, lim);
+ return (vre_match(code, subject, length, 0, options, NULL));
}
int
VRE_sub(const vre_t *code, const char *subject, const char *replacement,
struct vsb *vsb, const volatile struct vre_limits *lim, int all)
{
- int ovector[30];
+ pcre2_match_data *data = NULL;
+ PCRE2_SIZE *ovector;
int i, l;
const char *s;
unsigned x;
int offset = 0;
- size_t len;
CHECK_OBJ_NOTNULL(code, VRE_MAGIC);
CHECK_OBJ_NOTNULL(vsb, VSB_MAGIC);
AN(subject);
AN(replacement);
- memset(ovector, 0, sizeof(ovector));
- len = strlen(subject);
- i = vre_exec(code, subject, len, 0, 0, ovector, 30, lim);
+ vre_limit(code, lim);
+ i = vre_match(code, subject, PCRE2_ZERO_TERMINATED, offset, 0, &data);
if (i <= VRE_ERROR_NOMATCH)
return (i);
do {
+ AN(data);
+ ovector = pcre2_get_ovector_pointer(data);
+ AN(ovector);
+
/* Copy prefix to match */
VSB_bcat(vsb, subject + offset, ovector[0] - offset);
for (s = replacement; *s != '\0'; s++ ) {
@@ -262,15 +271,20 @@ VRE_sub(const vre_t *code, const char *subject, const char *replacement,
offset = ovector[1];
if (!all)
break;
- memset(ovector, 0, sizeof(ovector));
- i = vre_exec(code, subject, len, offset, PCRE_NOTEMPTY,
- ovector, 30, lim);
- if (i < VRE_ERROR_NOMATCH )
+ i = vre_match(code, subject, PCRE2_ZERO_TERMINATED, offset,
+ PCRE2_NOTEMPTY, &data);
+ if (i < VRE_ERROR_NOMATCH)
return (i);
} while (i != VRE_ERROR_NOMATCH);
+ if (data != NULL) {
+ assert(i > VRE_ERROR_NOMATCH);
+ AZ(all);
+ pcre2_match_data_free(data);
+ }
+
/* Copy suffix to match */
- VSB_bcat(vsb, subject + offset, 1 + len - offset);
+ VSB_cat(vsb, subject + offset);
return (1);
}
@@ -284,18 +298,13 @@ VRE_free(vre_t **vv)
if (v->re == VRE_PACKED_RE) {
v->re = NULL;
- AZ(v->re_extra);
- AZ(v->my_extra);
+ AZ(v->re_ctx);
}
- if (v->re_extra != NULL) {
- if (v->my_extra)
- free(v->re_extra);
- else
- pcre_free_study(v->re_extra);
- }
+ if (v->re_ctx != NULL)
+ pcre2_match_context_free(v->re_ctx);
if (v->re != NULL)
- pcre_free(v->re);
+ pcre2_code_free(v->re);
FREE_OBJ(v);
}
diff --git a/vmod/vmod_std.vcc b/vmod/vmod_std.vcc
index fd0bf1a4a..a220b7872 100644
--- a/vmod/vmod_std.vcc
+++ b/vmod/vmod_std.vcc
@@ -137,7 +137,7 @@ following:
* a bracket expression such as ``[abc]`` or ``[!0-9]`` is interpreted
as a character class according to the rules of basic regular
- expressions (*not* `pcre(3)` regexen), except that ``!`` is used for
+ expressions (*not* `pcre2(3)` regexen), except that ``!`` is used for
character class negation instead of ``^``.
If *pathname* is ``true``, then the forward slash character ``/`` is
More information about the varnish-commit
mailing list