[master] 0e3667c5c vre: Extract a VRE_sub() function from VRT_regsub()

Dridi Boukelmoune dridi.boukelmoune at gmail.com
Mon Jul 5 15:49:05 UTC 2021


commit 0e3667c5c45b2605cbac25c43df0e2a0d219c4be
Author: Dridi Boukelmoune <dridi.boukelmoune at gmail.com>
Date:   Fri Jun 18 17:03:42 2021 +0200

    vre: Extract a VRE_sub() function from VRT_regsub()
    
    This gives us a clean separation of VCL and pcre interactions.

diff --git a/bin/varnishd/cache/cache_vrt_re.c b/bin/varnishd/cache/cache_vrt_re.c
index 63dfb9c07..886c35a8b 100644
--- a/bin/varnishd/cache/cache_vrt_re.c
+++ b/bin/varnishd/cache/cache_vrt_re.c
@@ -38,20 +38,6 @@
 #include "cache_varnishd.h"
 #include "vcc_interface.h"
 
-static void
-Tadd(char **b, char *e, const char *p, int l)
-{
-	assert((*b) <= e);
-
-	if (l <= 0) {
-	} if ((*b) + l < e) {
-		memcpy((*b), p, l);
-		(*b) += l;
-	} else {
-		(*b) = e;
-	}
-}
-
 void
 VPI_re_init(vre_t **rep, const char *re)
 {
@@ -93,19 +79,12 @@ VRT_re_match(VRT_CTX, const char *s, VCL_REGEX re)
 }
 
 VCL_STRING
-VRT_regsub(VRT_CTX, int all, VCL_STRING str, VCL_REGEX re,
-    VCL_STRING sub)
+VRT_regsub(VRT_CTX, int all, VCL_STRING str, VCL_REGEX re, VCL_STRING sub)
 {
-	int ovector[30];
-	int i, l;
-	char *res_b;
-	char *res_e;
-	char *b0;
-	const char *s;
-	unsigned u, x;
-	int options = 0;
-	int offset = 0;
-	size_t len;
+	struct vsb vsb[1];
+	const char *res;
+	uintptr_t snap;
+	int i;
 
 	CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
 	AN(re);
@@ -113,65 +92,18 @@ VRT_regsub(VRT_CTX, int all, VCL_STRING str, VCL_REGEX re,
 		str = "";
 	if (sub == NULL)
 		sub = "";
-	memset(ovector, 0, sizeof(ovector));
-	len = strlen(str);
-	i = VRE_exec(re, str, len, 0, options, ovector, 30,
-	    &cache_param->vre_limits);
-
-	/* If it didn't match, we can return the original string */
-	if (i == VRE_ERROR_NOMATCH)
-		return (str);
-	if (i < VRE_ERROR_NOMATCH ) {
-		VRT_fail(ctx, "Regexp matching returned %d", i);
-		return (str);
-	}
-
-	u = WS_ReserveAll(ctx->ws);
-	res_e = res_b = b0 = WS_Reservation(ctx->ws);
-	res_e += u;
-
-	do {
-		/* Copy prefix to match */
-		Tadd(&res_b, res_e, str + offset, ovector[0] - offset);
-		for (s = sub ; *s != '\0'; s++ ) {
-			if (*s != '\\' || s[1] == '\0') {
-				if (res_b < res_e)
-					*res_b++ = *s;
-				continue;
-			}
-			s++;
-			if (isdigit(*s)) {
-				x = *s - '0';
-				l = ovector[2*x+1] - ovector[2*x];
-				Tadd(&res_b, res_e, str + ovector[2*x], l);
-				continue;
-			} else {
-				if (res_b < res_e)
-					*res_b++ = *s;
-			}
-		}
-		offset = ovector[1];
-		if (!all)
-			break;
-		memset(ovector, 0, sizeof(ovector));
-		options |= VRE_NOTEMPTY;
-		i = VRE_exec(re, str, len, offset, options, ovector, 30,
-		    &cache_param->vre_limits);
-		if (i < VRE_ERROR_NOMATCH ) {
-			WS_Release(ctx->ws, 0);
-			VRT_fail(ctx, "Regexp matching returned %d", i);
-			return (str);
-		}
-	} while (i != VRE_ERROR_NOMATCH);
 
-	/* Copy suffix to match */
-	Tadd(&res_b, res_e, str + offset, 1 + len - offset);
-	if (res_b >= res_e) {
-		WS_MarkOverflow(ctx->ws);
-		WS_Release(ctx->ws, 0);
-		return (str);
-	}
-	assert(res_b <= res_e);
-	WS_ReleaseP(ctx->ws, res_b);
-	return (b0);
+	snap = WS_Snapshot(ctx->ws);
+	WS_VSB_new(vsb, ctx->ws);
+	i = VRE_sub(re, str, sub, vsb, &cache_param->vre_limits, all);
+	res = WS_VSB_finish(vsb, ctx->ws, NULL);
+
+	if (i < VRE_ERROR_NOMATCH)
+		VRT_fail(ctx, "regsub: Regexp matching returned %d", i);
+	else if (res == NULL)
+		VRT_fail(ctx, "regsub: Out of workspace");
+	else if (i > 0)
+		return (res);
+	WS_Reset(ctx->ws, snap);
+	return (str);
 }
diff --git a/include/vre.h b/include/vre.h
index 6dcf0e593..27158663a 100644
--- a/include/vre.h
+++ b/include/vre.h
@@ -59,6 +59,8 @@ vre_t *VRE_compile(const char *, unsigned, const char **, int *);
 int VRE_exec(const vre_t *code, const char *subject, int length,
     int startoffset, int options, int *ovector, int ovecsize,
     const volatile struct vre_limits *lim);
+int VRE_sub(const vre_t *code, const char *subject, const char *replacement,
+    struct vsb *vsb, const volatile struct vre_limits *lim, int all);
 void VRE_free(vre_t **);
 void VRE_quote(struct vsb *, const char *);
 
diff --git a/lib/libvarnish/vre.c b/lib/libvarnish/vre.c
index 60254aa4b..e7a15946d 100644
--- a/lib/libvarnish/vre.c
+++ b/lib/libvarnish/vre.c
@@ -31,6 +31,7 @@
 #include "config.h"
 
 #include <pcre.h>
+#include <ctype.h>
 #include <string.h>
 #include <unistd.h>
 
@@ -143,6 +144,63 @@ VRE_exec(const vre_t *code, const char *subject, int length,
 	    startoffset, options, ovector, ovecsize));
 }
 
+int
+VRE_sub(const vre_t *code, const char *subject, const char *replacement,
+    struct vsb *vsb, const volatile struct vre_limits *lim, int all)
+{
+	int ovector[30];
+	int i, l;
+	const char *s;
+	unsigned x;
+	int options = 0;
+	int offset = 0;
+	size_t len;
+
+	CHECK_OBJ_NOTNULL(code, VRE_MAGIC);
+	CHECK_OBJ_NOTNULL(vsb, VSB_MAGIC);
+	AN(subject);
+	AN(replacement);
+
+	memset(ovector, 0, sizeof(ovector));
+	len = strlen(subject);
+	i = VRE_exec(code, subject, len, 0, options, ovector, 30, lim);
+
+	if (i <= VRE_ERROR_NOMATCH)
+		return (i);
+
+	do {
+		/* Copy prefix to match */
+		VSB_bcat(vsb, subject + offset, ovector[0] - offset);
+		for (s = replacement; *s != '\0'; s++ ) {
+			if (*s != '\\' || s[1] == '\0') {
+				VSB_putc(vsb, *s);
+				continue;
+			}
+			s++;
+			if (isdigit(*s)) {
+				x = *s - '0';
+				l = ovector[2*x+1] - ovector[2*x];
+				VSB_bcat(vsb, subject + ovector[2*x], l);
+				continue;
+			}
+			VSB_putc(vsb, *s);
+		}
+		offset = ovector[1];
+		if (!all)
+			break;
+		memset(ovector, 0, sizeof(ovector));
+		options |= VRE_NOTEMPTY;
+		i = VRE_exec(code, subject, len, offset, options, ovector, 30,
+		    lim);
+		if (i < VRE_ERROR_NOMATCH )
+			return (i);
+	} while (i != VRE_ERROR_NOMATCH);
+
+	/* Copy suffix to match */
+	VSB_bcat(vsb, subject + offset, 1 + len - offset);
+	return (1);
+}
+
 void
 VRE_free(vre_t **vv)
 {


More information about the varnish-commit mailing list