r1667 - in trunk/varnish-cache: bin/varnishd include lib/libvcl

phk at projects.linpro.no phk at projects.linpro.no
Tue Jul 10 23:30:47 CEST 2007


Author: phk
Date: 2007-07-10 23:30:47 +0200 (Tue, 10 Jul 2007)
New Revision: 1667

Modified:
   trunk/varnish-cache/bin/varnishd/cache_vrt_re.c
   trunk/varnish-cache/include/vrt.h
   trunk/varnish-cache/lib/libvcl/vcc_fixed_token.c
Log:
Add "regsub" support for string manipulation.

Notice this facility is subject to change!

"regsub" is short for regular expression substitution and it is probably
easiest to explain with some examples:

	sub vcl_recv {
		set req.url = regsub(req.url, "#.*", "");
	}

This will replace the requests URL with the output of the regsub() function

regsub() takes three arguments: the string to be examined, a regular
expression and a replacement string.

In this case, everything after the first '#' is removed (replaced
with nothing).

The replacement string recognizes the following magic sequences:
	&	- insert everything matched by the regexp
	$0	- ditto.
	$1	- replace with the first submatch of the regexp
	$2	- replace with the second submatch of the regexp
	...
	$9	- replace with the ninth submatch of the regexp

(The $0..$9 syntax was chosen over the \0...\9 syntax in order to avoid
a nightmare of escape characters in the VCL source code.  Arguments and
suggestions are welcome).

A more advanced example:

	set bereq.http.ClientIP = regsub(client.ip, "(.*):(.*)", "$2 $1");

The client.ip variable expands to IP:port number, for instance
	127.0.0.1:54662

The regular expression "(.*):(.*)" results in the the following matches:
	& + $0		"127.0.0.1:54662"
	$1		"127.0.0.1"
	$2		"54662"

So the replacement string "$2 $1" results in "54662 127.0.0.1"

And the completed header which is sent to the backend will look like:

	"ClientIP: 54662 127.0.0.1"

An even more advanced example would be:

    set bereq.http.magic = "Client IP = " regsub(client.ip, ":", " port = ");

Where we also exploint the string concatenation ability of the "set" statement.

The result string is built in the request workspace, so you may need
to increase the workspace size if you do a lot of regsub()'s.

Currently there is no decent error handling for running out of workspace.


Modified: trunk/varnish-cache/bin/varnishd/cache_vrt_re.c
===================================================================
--- trunk/varnish-cache/bin/varnishd/cache_vrt_re.c	2007-07-10 20:43:24 UTC (rev 1666)
+++ trunk/varnish-cache/bin/varnishd/cache_vrt_re.c	2007-07-10 21:30:47 UTC (rev 1667)
@@ -35,6 +35,7 @@
 
 #include <stdio.h>
 #include <string.h>
+#include <ctype.h>
 #include <stdlib.h>
 #include <regex.h>
 
@@ -100,13 +101,72 @@
 	return (1);
 }
 
-char *
+const char *
 VRT_regsub(struct sess *sp, const char *str, void *re, const char *sub)
 {
-	static char foo[4] = "FOO";
-	(void)sp;
-	(void)str;
-	(void)re;
-	(void)sub;
-	return (foo);
+	regmatch_t pm[10];
+	regex_t *t;
+	int i, l;
+	char *b, *p, *e;
+	unsigned u, x;
+
+	AN(re);
+	t = re;
+	i = regexec(t, str, 10, pm, 0);
+
+	/* If it didn't match, we can return the original string */
+	if (i == REG_NOMATCH)
+		return(str);
+
+	u = WS_Reserve(sp->http->ws, 0);
+	e = p = b = sp->http->ws->f;
+	e += u;
+
+	/* Copy prefix to match */
+	if (pm[0].rm_so > 0) {
+		if (p + pm[0].rm_so < e)
+			memcpy(p, str, pm[0].rm_so);
+		p += pm[0].rm_so;
+	}
+
+	for ( ; *sub != '\0'; sub++ ) {
+		if (*sub == '&') {
+			l = pm[0].rm_eo - pm[0].rm_so;
+			if (l > 0) {
+				if (p + l < e)
+					memcpy(p, str + pm[0].rm_so, l);
+				p += l;
+			}
+		} else if (*sub == '$' && isdigit(sub[1])) {
+			x = sub[1] - '0';
+			sub++;
+			l = pm[x].rm_eo - pm[x].rm_so;
+			if (l > 0) {
+				if (p + l < e)
+					memcpy(p, str + pm[x].rm_so, l);
+				p += l;
+			}
+		} else {
+			if (p + 1 < e)
+				*p = *sub;
+			p++;
+		}
+	}
+
+	/* Copy suffix to match */
+	l = strlen(str + pm[0].rm_eo);
+	if (l > 0) {
+		if (p + l < e)
+			memcpy(p, str + pm[0].rm_eo, l);
+		p += l;
+	}
+	if (p + 1 < e)
+		*p++ = '\0';
+	xxxassert(p <= e);
+	if (p > e) {
+		WS_Release(sp->http->ws, 0);
+		return (str);
+	} 
+	WS_Release(sp->http->ws, p - b);
+	return (b);
 }

Modified: trunk/varnish-cache/include/vrt.h
===================================================================
--- trunk/varnish-cache/include/vrt.h	2007-07-10 20:43:24 UTC (rev 1666)
+++ trunk/varnish-cache/include/vrt.h	2007-07-10 21:30:47 UTC (rev 1667)
@@ -68,7 +68,7 @@
 void VRT_re_fini(void *);
 int VRT_re_match(const char *, void *re);
 int VRT_re_test(struct vsb *, const char *, int sub);
-char *VRT_regsub(struct sess *sp, const char *, void *, const char *);
+const char *VRT_regsub(struct sess *sp, const char *, void *, const char *);
 
 void VRT_count(struct sess *, unsigned);
 int VRT_rewrite(const char *, const char *);

Modified: trunk/varnish-cache/lib/libvcl/vcc_fixed_token.c
===================================================================
--- trunk/varnish-cache/lib/libvcl/vcc_fixed_token.c	2007-07-10 20:43:24 UTC (rev 1666)
+++ trunk/varnish-cache/lib/libvcl/vcc_fixed_token.c	2007-07-10 21:30:47 UTC (rev 1667)
@@ -424,7 +424,7 @@
 	vsb_cat(sb, "void VRT_re_fini(void *);\n");
 	vsb_cat(sb, "int VRT_re_match(const char *, void *re);\n");
 	vsb_cat(sb, "int VRT_re_test(struct vsb *, const char *, int sub);\n");
-	vsb_cat(sb, "char *VRT_regsub(struct sess *sp, const char *, void *, const char *);\n");
+	vsb_cat(sb, "const char *VRT_regsub(struct sess *sp, const char *, void *, const char *);\n");
 	vsb_cat(sb, "\n");
 	vsb_cat(sb, "void VRT_count(struct sess *, unsigned);\n");
 	vsb_cat(sb, "int VRT_rewrite(const char *, const char *);\n");




More information about the varnish-commit mailing list