[master] 21a5919 Initial merge of the shard director
Nils Goroll
nils.goroll at uplex.de
Mon Sep 12 18:15:19 CEST 2016
commit 21a5919d85220f44ef806c745a00e19942a0aec5
Author: Nils Goroll <nils.goroll at uplex.de>
Date: Mon Sep 12 17:38:32 2016 +0200
Initial merge of the shard director
- rename struct vmod_shard_shard -> vmod_directors_shard
- rename typedefs td_shard_* -> td_directors_*
- documentation reformatting
- remove vbe32dec copy
- include generated shard_parse_vcc_* files because enum parse code
generation should be included in vmodtool, if at all (do not want
an additional build dependency to perl)
- make key function a method
Source:
https://code.uplex.de/uplex-varnish/libvmod-vslp/tree/shard
0f8c3f0f52ca9911d4aed0ce5faf203ab7ff26b0
diff --git a/bin/varnishtest/tests/d00015.vtc b/bin/varnishtest/tests/d00015.vtc
new file mode 100644
index 0000000..c3aec0c
--- /dev/null
+++ b/bin/varnishtest/tests/d00015.vtc
@@ -0,0 +1,222 @@
+varnishtest "shard director reconfiguration in init"
+
+server s1 {
+} -start
+
+server s2 {
+} -start
+
+server s3 {
+ rxreq
+ txresp -body "xiuFi3Pe"
+} -start
+
+varnish v1 -vcl+backend {
+ import std;
+ import directors;
+
+ sub vcl_init {
+ new vd = directors.shard();
+ new vd2 = directors.shard();
+ vd.debug(3);
+
+ std.log("-- invalid replicas");
+ if (! vd.reconfigure(replicas=0)) {
+ std.log("reconfigure failed");
+ }
+
+ std.log("-- no changes - no debug output");
+ if (! vd.reconfigure(replicas=25)) {
+ std.log("reconfigure failed");
+ }
+
+ std.log("-- no backends");
+ if (! vd.clear()) {
+ std.log("clear failed");
+ }
+ if (! vd.reconfigure(replicas=25)) {
+ std.log("reconfigure failed");
+ }
+
+ std.log("-- one backend");
+ if (! vd.add_backend(s1)) {
+ std.log("add s1 failed");
+ }
+ if (! vd.reconfigure(replicas=1)) {
+ std.log("reconfigure failed");
+ }
+
+ std.log("-- no change - no output");
+ if (! vd.reconfigure(replicas=1)) {
+ std.log("reconfigure failed");
+ }
+
+ std.log("-- change, clear, no backends");
+ # change, then clear
+ vd.add_backend(s1);
+ vd.add_backend(s2);
+ vd.clear();
+ vd.add_backend(s1);
+ vd.add_backend(s2);
+ vd.clear();
+ if (! vd.reconfigure()) {
+ std.log("reconfigure failed");
+ }
+
+ std.log("-- duplicate add");
+ vd.clear();
+ vd.add_backend(s1);
+ vd.add_backend(s2);
+ vd.add_backend(s1);
+ if (! vd.reconfigure(replicas=1)) {
+ std.log("reconfigure failed");
+ }
+
+ std.log("-- duplicate add with idents");
+ vd.clear();
+ vd.add_backend(s1);
+ vd.add_backend(s1, ident="s1_1");
+ vd.add_backend(s1, ident="s1_2");
+ vd.add_backend(s2);
+ vd.add_backend(s2, ident="s1");
+ vd.add_backend(s2, ident="s1_1");
+ vd.add_backend(s2, ident="s1_2");
+ if (! vd.reconfigure(replicas=1)) {
+ std.log("reconfigure failed");
+ }
+
+ std.log("-- remove s1_2 specifically");
+ vd.remove_backend(ident="s1_2");
+ if (! vd.reconfigure(replicas=1)) {
+ std.log("reconfigure failed");
+ }
+
+ std.log("-- remove all instances of s1");
+ vd.remove_backend(s1);
+ if (! vd.reconfigure(replicas=1)) {
+ std.log("reconfigure failed");
+ }
+
+ std.log("-- re-add some - no 2nd director");
+ vd.clear();
+ vd.add_backend(s3, "1");
+ vd.add_backend(s3, "2");
+ vd.add_backend(s3, "3");
+ vd2.clear();
+ vd.add_backend(s3, "4");
+ vd.add_backend(s3, "5");
+ vd.add_backend(s3, "6");
+ vd.add_backend(s3, "7");
+ vd.add_backend(s3, "8");
+ vd.add_backend(s3, "9");
+ if (! vd.reconfigure(replicas=1)) {
+ std.log("reconfigure failed");
+ }
+
+ std.log("-- remove second-last");
+ vd.remove_backend(ident="8");
+ if (! vd.reconfigure(replicas=1)) {
+ std.log("reconfigure failed");
+ }
+
+ std.log("-- remove last");
+ vd.remove_backend(ident="9");
+ if (! vd.reconfigure(replicas=1)) {
+ std.log("reconfigure failed");
+ }
+
+ std.log("-- END");
+ }
+
+ sub vcl_recv {
+ set req.backend_hint = vd.backend();
+ return(pass);
+ }
+
+} -start
+
+logexpect l1 -v v1 -g raw -d 1 {
+ expect 0 0 CLI {^Rd vcl.load}
+
+ expect 0 0 VCL_Log {^-- invalid replicas$}
+ expect 0 0 Error {^shard vd: .reconfigure.. invalid replicas argument 0}
+ expect 0 0 VCL_Log {^reconfigure failed}
+
+ expect 0 0 VCL_Log {^-- no changes - no debug output$}
+
+ expect 0 0 VCL_Log {^-- no backends$}
+ expect 0 0 Error {^shard vd: .reconfigure.. no backends}
+ expect 0 0 VCL_Log {^reconfigure failed}
+
+ expect 0 0 VCL_Log {^-- one backend$}
+ expect 0 0 Debug {^shard:.*point = f08ad325, host = 0}
+
+ expect 0 0 VCL_Log {^-- no change - no output$}
+
+ expect 0 0 VCL_Log {^-- change, clear, no backends$}
+ expect 0 0 Error {^shard vd: .reconfigure.. no backends}
+ expect 0 0 VCL_Log {^reconfigure failed}
+
+ expect 0 0 VCL_Log {^-- duplicate add$}
+ expect 0 0 Error {^shard vd: .notice. backend s1 already exists - skipping$}
+ expect 0 0 Debug {^shard:.*point = 6e040182, host = 1}
+ expect 0 0 Debug {^shard:.*point = f08ad325, host = 0}
+
+ expect 0 0 VCL_Log {^-- duplicate add with idents$}
+ expect 0 0 Error {^shard vd: .notice. backend s1 already exists - skipping}
+ expect 0 0 Error {^shard vd: .notice. backend s1/s1_1 already exists - skipping}
+ expect 0 0 Error {^shard vd: .notice. backend s1/s1_2 already exists - skipping}
+ expect 0 0 Debug {^shard:.*point = 6e040182, host = 3}
+ expect 0 0 Debug {^shard:.*point = 732c7bbe, host = 2}
+ expect 0 0 Debug {^shard:.*point = bae80b0b, host = 1}
+ expect 0 0 Debug {^shard:.*point = f08ad325, host = 0}
+
+ expect 0 0 VCL_Log {^-- remove s1_2 specifically$}
+ expect 0 0 Debug {^shard:.*point = 6e040182, host = 2}
+ expect 0 0 Debug {^shard:.*point = bae80b0b, host = 1}
+ expect 0 0 Debug {^shard:.*point = f08ad325, host = 0}
+
+ expect 0 0 VCL_Log {^-- remove all instances of s1$}
+ expect 0 0 Debug {^shard:.*point = 6e040182, host = 0}
+
+ expect 0 0 VCL_Log {^-- re-add some - no 2nd director$}
+ expect 0 0 Error {^shard vd2: cannot change more than}
+ expect 0 0 Debug {^shard:.*point = 3d1fe97, host = 3}
+ expect 0 0 Debug {^shard:.*point = a25a43b, host = 6}
+ expect 0 0 Debug {^shard:.*point = 2b20d9a2, host = 1}
+ expect 0 0 Debug {^shard:.*point = 6337e62c, host = 8}
+ expect 0 0 Debug {^shard:.*point = c9803f17, host = 5}
+ expect 0 0 Debug {^shard:.*point = d51dafe6, host = 0}
+ expect 0 0 Debug {^shard:.*point = eb74a7d5, host = 4}
+ expect 0 0 Debug {^shard:.*point = f493ce58, host = 2}
+ expect 0 0 Debug {^shard:.*point = fc1a5162, host = 7}
+
+ expect 0 0 VCL_Log {^-- remove second-last$}
+ expect 0 0 Debug {^shard:.*point = 3d1fe97, host = 3}
+ expect 0 0 Debug {^shard:.*point = a25a43b, host = 6}
+ expect 0 0 Debug {^shard:.*point = 2b20d9a2, host = 1}
+ expect 0 0 Debug {^shard:.*point = 6337e62c, host = 7}
+ expect 0 0 Debug {^shard:.*point = c9803f17, host = 5}
+ expect 0 0 Debug {^shard:.*point = d51dafe6, host = 0}
+ expect 0 0 Debug {^shard:.*point = eb74a7d5, host = 4}
+ expect 0 0 Debug {^shard:.*point = f493ce58, host = 2}
+
+ expect 0 0 VCL_Log {^-- remove last$}
+ expect 0 0 Debug {^shard:.*point = 3d1fe97, host = 3}
+ expect 0 0 Debug {^shard:.*point = a25a43b, host = 6}
+ expect 0 0 Debug {^shard:.*point = 2b20d9a2, host = 1}
+ expect 0 0 Debug {^shard:.*point = c9803f17, host = 5}
+ expect 0 0 Debug {^shard:.*point = d51dafe6, host = 0}
+ expect 0 0 Debug {^shard:.*point = eb74a7d5, host = 4}
+ expect 0 0 Debug {^shard:.*point = f493ce58, host = 2}
+
+expect 0 0 VCL_Log {^-- END$}
+} -start
+
+client c1 {
+ txreq
+ rxresp
+ expect resp.body == "xiuFi3Pe"
+} -run
+
+logexpect l1 -wait
diff --git a/bin/varnishtest/tests/d00016.vtc b/bin/varnishtest/tests/d00016.vtc
new file mode 100644
index 0000000..2e5406d
--- /dev/null
+++ b/bin/varnishtest/tests/d00016.vtc
@@ -0,0 +1,222 @@
+varnishtest "shard director/int reconfiguration outside init"
+
+server s1 {
+} -start
+
+server s2 {
+} -start
+
+server s3 {
+ rxreq
+ txresp -body "xiuFi3Pe"
+} -start
+
+varnish v1 -vcl+backend {
+ import std;
+ import directors;
+
+ sub vcl_init {
+ new vd = directors.shard();
+ new vd2 = directors.shard();
+ vd.debug(3);
+ }
+
+ sub vcl_recv {
+ std.log("-- invalid replicas");
+ if (! vd.reconfigure(replicas=0)) {
+ std.log("reconfigure failed");
+ }
+
+ std.log("-- no changes - no debug output");
+ if (! vd.reconfigure(replicas=25)) {
+ std.log("reconfigure failed");
+ }
+
+ std.log("-- no backends");
+ if (! vd.clear()) {
+ std.log("clear failed");
+ }
+ if (! vd.reconfigure(replicas=25)) {
+ std.log("reconfigure failed");
+ }
+
+ std.log("-- one backend");
+ if (! vd.add_backend(s1)) {
+ std.log("add s1 failed");
+ }
+ if (! vd.reconfigure(replicas=1)) {
+ std.log("reconfigure failed");
+ }
+
+ std.log("-- no change - no output");
+ if (! vd.reconfigure(replicas=1)) {
+ std.log("reconfigure failed");
+ }
+
+ std.log("-- change, clear, no backends");
+ # change, then clear
+ vd.add_backend(s1);
+ vd.add_backend(s2);
+ vd.clear();
+ vd.add_backend(s1);
+ vd.add_backend(s2);
+ vd.clear();
+ if (! vd.reconfigure()) {
+ std.log("reconfigure failed");
+ }
+
+ std.log("-- duplicate add");
+ vd.clear();
+ vd.add_backend(s1);
+ vd.add_backend(s2);
+ vd.add_backend(s1);
+ if (! vd.reconfigure(replicas=1)) {
+ std.log("reconfigure failed");
+ }
+
+ std.log("-- duplicate add with idents");
+ vd.clear();
+ vd.add_backend(s1);
+ vd.add_backend(s1, ident="s1_1");
+ vd.add_backend(s1, ident="s1_2");
+ vd.add_backend(s2);
+ vd.add_backend(s2, ident="s1");
+ vd.add_backend(s2, ident="s1_1");
+ vd.add_backend(s2, ident="s1_2");
+ if (! vd.reconfigure(replicas=1)) {
+ std.log("reconfigure failed");
+ }
+
+ std.log("-- remove s1_2 specifically");
+ vd.remove_backend(ident="s1_2");
+ if (! vd.reconfigure(replicas=1)) {
+ std.log("reconfigure failed");
+ }
+
+ std.log("-- remove all instances of s1");
+ vd.remove_backend(s1);
+ if (! vd.reconfigure(replicas=1)) {
+ std.log("reconfigure failed");
+ }
+
+ std.log("-- re-add some - no 2nd director");
+ vd.clear();
+ vd.add_backend(s3, "1");
+ vd.add_backend(s3, "2");
+ vd.add_backend(s3, "3");
+ vd2.clear();
+ vd.add_backend(s3, "4");
+ vd.add_backend(s3, "5");
+ vd.add_backend(s3, "6");
+ vd.add_backend(s3, "7");
+ vd.add_backend(s3, "8");
+ vd.add_backend(s3, "9");
+ if (! vd.reconfigure(replicas=1)) {
+ std.log("reconfigure failed");
+ }
+
+ std.log("-- remove second-last");
+ vd.remove_backend(ident="8");
+ if (! vd.reconfigure(replicas=1)) {
+ std.log("reconfigure failed");
+ }
+
+ std.log("-- remove last");
+ vd.remove_backend(ident="9");
+ if (! vd.reconfigure(replicas=1)) {
+ std.log("reconfigure failed");
+ }
+
+ std.log("-- END");
+
+ set req.backend_hint = vd.backend();
+ return(pass);
+ }
+
+} -start
+
+# this is identical to v01100.vtc, but split into two logexps
+
+logexpect l1 -v v1 -g raw -d 1 {
+ expect * 1001 VCL_Log {^-- invalid replicas$}
+ expect 0 1001 Error {^shard vd: .reconfigure.. invalid replicas argument 0}
+ expect 0 1001 VCL_Log {^reconfigure failed}
+
+ expect 0 1001 VCL_Log {^-- no changes - no debug output$}
+
+ expect 0 1001 VCL_Log {^-- no backends$}
+ expect 0 1001 Error {^shard vd: .reconfigure.. no backends}
+ expect 0 1001 VCL_Log {^reconfigure failed}
+
+ expect 0 1001 VCL_Log {^-- one backend$}
+
+ expect 0 1001 VCL_Log {^-- no change - no output$}
+
+ expect 0 1001 VCL_Log {^-- change, clear, no backends$}
+ expect 0 1001 Error {^shard vd: .reconfigure.. no backends}
+ expect 0 1001 VCL_Log {^reconfigure failed}
+
+ expect 0 1001 VCL_Log {^-- duplicate add$}
+ expect 0 1001 Error {^shard vd: .notice. backend s1 already exists - skipping$}
+ expect 0 1001 VCL_Log {^-- duplicate add with idents$}
+ expect 0 1001 Error {^shard vd: .notice. backend s1 already exists - skipping}
+ expect 0 1001 Error {^shard vd: .notice. backend s1/s1_1 already exists - skipping}
+ expect 0 1001 Error {^shard vd: .notice. backend s1/s1_2 already exists - skipping}
+ expect 0 1001 VCL_Log {^-- remove s1_2 specifically$}
+ expect 0 1001 VCL_Log {^-- remove all instances of s1$}
+ expect 0 1001 VCL_Log {^-- re-add some - no 2nd director$}
+ expect 0 1001 Error {^shard vd2: cannot change more than}
+ expect 0 1001 VCL_Log {^-- remove second-last$}
+ expect 0 1001 VCL_Log {^-- remove last$}
+ expect 0 1001 VCL_Log {^-- END$}
+}
+
+logexpect l2 -v v1 -g raw -d 1 {
+ expect * 0 Debug {^shard:.*point = f08ad325, host = 0}
+ expect 0 0 Debug {^shard:.*point = 6e040182, host = 1}
+ expect 0 0 Debug {^shard:.*point = f08ad325, host = 0}
+ expect 0 0 Debug {^shard:.*point = 6e040182, host = 3}
+ expect 0 0 Debug {^shard:.*point = 732c7bbe, host = 2}
+ expect 0 0 Debug {^shard:.*point = bae80b0b, host = 1}
+ expect 0 0 Debug {^shard:.*point = f08ad325, host = 0}
+ expect 0 0 Debug {^shard:.*point = 6e040182, host = 2}
+ expect 0 0 Debug {^shard:.*point = bae80b0b, host = 1}
+ expect 0 0 Debug {^shard:.*point = f08ad325, host = 0}
+ expect 0 0 Debug {^shard:.*point = 6e040182, host = 0}
+ expect 0 0 Debug {^shard:.*point = 3d1fe97, host = 3}
+ expect 0 0 Debug {^shard:.*point = a25a43b, host = 6}
+ expect 0 0 Debug {^shard:.*point = 2b20d9a2, host = 1}
+ expect 0 0 Debug {^shard:.*point = 6337e62c, host = 8}
+ expect 0 0 Debug {^shard:.*point = c9803f17, host = 5}
+ expect 0 0 Debug {^shard:.*point = d51dafe6, host = 0}
+ expect 0 0 Debug {^shard:.*point = eb74a7d5, host = 4}
+ expect 0 0 Debug {^shard:.*point = f493ce58, host = 2}
+ expect 0 0 Debug {^shard:.*point = fc1a5162, host = 7}
+ expect 0 0 Debug {^shard:.*point = 3d1fe97, host = 3}
+ expect 0 0 Debug {^shard:.*point = a25a43b, host = 6}
+ expect 0 0 Debug {^shard:.*point = 2b20d9a2, host = 1}
+ expect 0 0 Debug {^shard:.*point = 6337e62c, host = 7}
+ expect 0 0 Debug {^shard:.*point = c9803f17, host = 5}
+ expect 0 0 Debug {^shard:.*point = d51dafe6, host = 0}
+ expect 0 0 Debug {^shard:.*point = eb74a7d5, host = 4}
+ expect 0 0 Debug {^shard:.*point = f493ce58, host = 2}
+ expect 0 0 Debug {^shard:.*point = 3d1fe97, host = 3}
+ expect 0 0 Debug {^shard:.*point = a25a43b, host = 6}
+ expect 0 0 Debug {^shard:.*point = 2b20d9a2, host = 1}
+ expect 0 0 Debug {^shard:.*point = c9803f17, host = 5}
+ expect 0 0 Debug {^shard:.*point = d51dafe6, host = 0}
+ expect 0 0 Debug {^shard:.*point = eb74a7d5, host = 4}
+ expect 0 0 Debug {^shard:.*point = f493ce58, host = 2}
+}
+
+logexpect l1 -start
+logexpect l2 -start
+
+client c1 {
+ txreq
+ rxresp
+ expect resp.body == "xiuFi3Pe"
+} -run
+
+logexpect l1 -wait
+logexpect l2 -wait
diff --git a/bin/varnishtest/tests/d00017.vtc b/bin/varnishtest/tests/d00017.vtc
new file mode 100644
index 0000000..d92cad5
--- /dev/null
+++ b/bin/varnishtest/tests/d00017.vtc
@@ -0,0 +1,158 @@
+varnishtest "shard director regression to vslp v01000"
+
+server s1 {
+ rxreq
+ txresp -body "ech3Ooj"
+} -start
+
+server s2 {
+ rxreq
+ txresp -body "ieQu2qua"
+} -start
+
+server s3 {
+ rxreq
+ txresp -body "xiuFi3Pe"
+} -start
+
+# VSLP:
+# picked preferred backend 0 for key 68b902f8
+# picked preferred backend 1 for key 39dc4614
+# picked preferred backend 2 for key c7793506
+#
+# shard:
+
+varnish v1 -vcl+backend {
+ import std;
+ import directors;
+
+ sub vcl_init {
+ new vd = directors.shard();
+ vd.debug(3);
+ if (! vd.add_backend(s1)) {
+ std.log("add s1 failed");
+ }
+ if (! vd.add_backend(s2)) {
+ std.log("add s2 failed");
+ }
+ if (! vd.add_backend(s3)) {
+ std.log("add s3 failed");
+ }
+ if (! vd.reconfigure(replicas=25)) {
+ std.log("reconfigure failed");
+ }
+ }
+
+ sub vcl_recv {
+ set req.backend_hint = vd.backend(by=KEY,
+ key=vd.key(req.url, CRC32));
+ return(pass);
+ }
+
+} -start
+
+# check that the hashcircle points are decremented by one compared to vslp
+#
+# generated from vslp test output
+# perl -ne </tmp/vslp_v0.log 'if (/VSLP hashcircle/) { my @f=split; $f[9] =~ s/\]/./; $f[13] =~ s/,//; $f[13]=hex($f[13]) - 1; $f[16] =~ s/\}//; printf ("\texpect 0 = Debug {^shard: hashcircle.* %s = .point = %8x, host = %2d.}\n", $f[9], $f[13], $f[16]); }' >/tmp/regression.logexp
+
+logexpect l1 -v v1 -g raw -d 1 {
+ expect 0 0 CLI "^Rd vcl.load"
+ expect 0 = Debug {^shard: hashcircle.* 0. = .point = 3b6b56a, host = 0.}
+ expect 0 = Debug {^shard: hashcircle.* 1. = .point = 66986a7, host = 2.}
+ expect 0 = Debug {^shard: hashcircle.* 2. = .point = 7e41e30, host = 2.}
+ expect 0 = Debug {^shard: hashcircle.* 3. = .point = b749e7b, host = 2.}
+ expect 0 = Debug {^shard: hashcircle.* 4. = .point = e543430, host = 0.}
+ expect 0 = Debug {^shard: hashcircle.* 5. = .point = 10136c05, host = 0.}
+ expect 0 = Debug {^shard: hashcircle.* 6. = .point = 102d847f, host = 2.}
+ expect 0 = Debug {^shard: hashcircle.* 7. = .point = 1112f910, host = 1.}
+ expect 0 = Debug {^shard: hashcircle.* 8. = .point = 1119a7c7, host = 2.}
+ expect 0 = Debug {^shard: hashcircle.* 9. = .point = 22464ee9, host = 1.}
+ expect 0 = Debug {^shard: hashcircle.* 10. = .point = 22b35675, host = 0.}
+ expect 0 = Debug {^shard: hashcircle.* 11. = .point = 2363bebb, host = 1.}
+ expect 0 = Debug {^shard: hashcircle.* 12. = .point = 259eeccf, host = 1.}
+ expect 0 = Debug {^shard: hashcircle.* 13. = .point = 26f0c3e7, host = 1.}
+ expect 0 = Debug {^shard: hashcircle.* 14. = .point = 28340f35, host = 0.}
+ expect 0 = Debug {^shard: hashcircle.* 15. = .point = 285e8475, host = 0.}
+ expect 0 = Debug {^shard: hashcircle.* 16. = .point = 28ec7a6f, host = 1.}
+ expect 0 = Debug {^shard: hashcircle.* 17. = .point = 2da0e37b, host = 0.}
+ expect 0 = Debug {^shard: hashcircle.* 18. = .point = 3392487a, host = 0.}
+ expect 0 = Debug {^shard: hashcircle.* 19. = .point = 37597c4c, host = 2.}
+ expect 0 = Debug {^shard: hashcircle.* 20. = .point = 4b1f5b22, host = 1.}
+ expect 0 = Debug {^shard: hashcircle.* 21. = .point = 523723f2, host = 2.}
+ expect 0 = Debug {^shard: hashcircle.* 22. = .point = 539234db, host = 0.}
+ expect 0 = Debug {^shard: hashcircle.* 23. = .point = 564ca84f, host = 0.}
+ expect 0 = Debug {^shard: hashcircle.* 24. = .point = 5d2df428, host = 0.}
+ expect 0 = Debug {^shard: hashcircle.* 25. = .point = 5fa294ee, host = 2.}
+ expect 0 = Debug {^shard: hashcircle.* 26. = .point = 60dded53, host = 1.}
+ expect 0 = Debug {^shard: hashcircle.* 27. = .point = 6257bc27, host = 0.}
+ expect 0 = Debug {^shard: hashcircle.* 28. = .point = 64014b25, host = 2.}
+ expect 0 = Debug {^shard: hashcircle.* 29. = .point = 6bfd5a2d, host = 0.}
+ expect 0 = Debug {^shard: hashcircle.* 30. = .point = 6e040182, host = 1.}
+ expect 0 = Debug {^shard: hashcircle.* 31. = .point = 6e3819f7, host = 2.}
+ expect 0 = Debug {^shard: hashcircle.* 32. = .point = 7232b381, host = 1.}
+ expect 0 = Debug {^shard: hashcircle.* 33. = .point = 74c384ad, host = 1.}
+ expect 0 = Debug {^shard: hashcircle.* 34. = .point = 83ce71ce, host = 1.}
+ expect 0 = Debug {^shard: hashcircle.* 35. = .point = 888b6447, host = 2.}
+ expect 0 = Debug {^shard: hashcircle.* 36. = .point = 8997c018, host = 1.}
+ expect 0 = Debug {^shard: hashcircle.* 37. = .point = 8aa6b5b4, host = 2.}
+ expect 0 = Debug {^shard: hashcircle.* 38. = .point = 8b47e6ac, host = 1.}
+ expect 0 = Debug {^shard: hashcircle.* 39. = .point = 8bc8bc11, host = 0.}
+ expect 0 = Debug {^shard: hashcircle.* 40. = .point = 8e2d3849, host = 0.}
+ expect 0 = Debug {^shard: hashcircle.* 41. = .point = 8e7e012c, host = 2.}
+ expect 0 = Debug {^shard: hashcircle.* 42. = .point = 99892987, host = 2.}
+ expect 0 = Debug {^shard: hashcircle.* 43. = .point = 9a6f2f00, host = 0.}
+ expect 0 = Debug {^shard: hashcircle.* 44. = .point = 9ef9125d, host = 0.}
+ expect 0 = Debug {^shard: hashcircle.* 45. = .point = 9f33cd30, host = 2.}
+ expect 0 = Debug {^shard: hashcircle.* 46. = .point = 9fc69b51, host = 0.}
+ expect 0 = Debug {^shard: hashcircle.* 47. = .point = a19f99eb, host = 1.}
+ expect 0 = Debug {^shard: hashcircle.* 48. = .point = a28b9595, host = 0.}
+ expect 0 = Debug {^shard: hashcircle.* 49. = .point = a8afe9c4, host = 0.}
+ expect 0 = Debug {^shard: hashcircle.* 50. = .point = ad923ad3, host = 1.}
+ expect 0 = Debug {^shard: hashcircle.* 51. = .point = ae8946c6, host = 2.}
+ expect 0 = Debug {^shard: hashcircle.* 52. = .point = b197e339, host = 1.}
+ expect 0 = Debug {^shard: hashcircle.* 53. = .point = b3c305e6, host = 1.}
+ expect 0 = Debug {^shard: hashcircle.* 54. = .point = b6bf43ea, host = 2.}
+ expect 0 = Debug {^shard: hashcircle.* 55. = .point = b9004d3d, host = 0.}
+ expect 0 = Debug {^shard: hashcircle.* 56. = .point = bbcc0bad, host = 1.}
+ expect 0 = Debug {^shard: hashcircle.* 57. = .point = c2542a5d, host = 2.}
+ expect 0 = Debug {^shard: hashcircle.* 58. = .point = c6c43fa7, host = 1.}
+ expect 0 = Debug {^shard: hashcircle.* 59. = .point = c945958a, host = 2.}
+ expect 0 = Debug {^shard: hashcircle.* 60. = .point = cbd9198a, host = 0.}
+ expect 0 = Debug {^shard: hashcircle.* 61. = .point = d4c93105, host = 0.}
+ expect 0 = Debug {^shard: hashcircle.* 62. = .point = d7de63b6, host = 1.}
+ expect 0 = Debug {^shard: hashcircle.* 63. = .point = d937a7df, host = 2.}
+ expect 0 = Debug {^shard: hashcircle.* 64. = .point = dac52229, host = 2.}
+ expect 0 = Debug {^shard: hashcircle.* 65. = .point = db7840f0, host = 1.}
+ expect 0 = Debug {^shard: hashcircle.* 66. = .point = dd5c6bef, host = 1.}
+ expect 0 = Debug {^shard: hashcircle.* 67. = .point = dfd5333b, host = 1.}
+ expect 0 = Debug {^shard: hashcircle.* 68. = .point = e991584c, host = 2.}
+ expect 0 = Debug {^shard: hashcircle.* 69. = .point = ec8891c5, host = 1.}
+ expect 0 = Debug {^shard: hashcircle.* 70. = .point = ef6b4ab5, host = 2.}
+ expect 0 = Debug {^shard: hashcircle.* 71. = .point = f08ad325, host = 0.}
+ expect 0 = Debug {^shard: hashcircle.* 72. = .point = f3325ba2, host = 2.}
+ expect 0 = Debug {^shard: hashcircle.* 73. = .point = f6530dd1, host = 0.}
+ expect 0 = Debug {^shard: hashcircle.* 74. = .point = fc28e8d2, host = 2.}
+
+ expect 0 = CLI Loaded
+
+ expect * = Debug {^shard: lookup key 68b902f8 idx 29 host 0}
+ expect * = Debug {^shard: lookup key 39dc4614 idx 20 host 1}
+ expect * = Debug {^shard: lookup key c7793506 idx 59 host 2}
+} -start
+
+client c1 {
+ txreq -url /eishoSu2
+ rxresp
+ expect resp.body == "ech3Ooj"
+
+ txreq -url /Zainao9d
+ rxresp
+ expect resp.body == "ieQu2qua"
+
+ txreq -url /Aunah3uo
+ rxresp
+ expect resp.body == "xiuFi3Pe"
+} -run
+
+logexpect l1 -wait
diff --git a/bin/varnishtest/tests/d00018.vtc b/bin/varnishtest/tests/d00018.vtc
new file mode 100644
index 0000000..3ca77be
--- /dev/null
+++ b/bin/varnishtest/tests/d00018.vtc
@@ -0,0 +1,66 @@
+varnishtest "shard director/int key"
+
+server s1 {
+ rxreq
+ txresp -body "ech3Ooj"
+} -start
+
+server s2 {
+ rxreq
+ txresp -body "ieQu2qua"
+} -start
+
+server s3 {
+ rxreq
+ txresp -body "xiuFi3Pe"
+} -start
+
+varnish v1 -vcl+backend {
+ import std;
+ import directors;
+
+ sub vcl_init {
+ new vd = directors.shard();
+ if (! vd.add_backend(s1)) {
+ std.log("add s1 failed");
+ }
+ if (! vd.add_backend(s2)) {
+ std.log("add s2 failed");
+ }
+ if (! vd.add_backend(s3)) {
+ std.log("add s3 failed");
+ }
+ if (! vd.reconfigure(replicas=25)) {
+ std.log("reconfigure failed");
+ }
+ }
+
+ sub vcl_recv {
+ if(req.url == "/1") {
+ set req.backend_hint = vd.backend(by=KEY, key=1);
+ }
+ if(req.url == "/2") {
+ set req.backend_hint = vd.backend(by=KEY, key=2147483647);
+ }
+ if(req.url == "/3") {
+ set req.backend_hint = vd.backend(by=KEY, key=4294967295);
+ }
+ return(pass);
+ }
+
+} -start
+
+
+client c1 {
+ txreq -url /1
+ rxresp
+ expect resp.body == "ech3Ooj"
+
+ txreq -url /2
+ rxresp
+ expect resp.body == "ieQu2qua"
+
+ txreq -url /3
+ rxresp
+ expect resp.body == "xiuFi3Pe"
+} -run
diff --git a/bin/varnishtest/tests/d00019.vtc b/bin/varnishtest/tests/d00019.vtc
new file mode 100644
index 0000000..5635a1c
--- /dev/null
+++ b/bin/varnishtest/tests/d00019.vtc
@@ -0,0 +1,49 @@
+varnishtest "shard director SHA256 (default)"
+
+server s1 {
+ rxreq
+ txresp -body "ech3Ooj"
+} -start
+
+server s2 {
+ rxreq
+ txresp -body "ieQu2qua"
+} -start
+
+server s3 {
+ rxreq
+ txresp -body "xiuFi3Pe"
+} -start
+
+varnish v1 -vcl+backend {
+ import directors;
+
+ sub vcl_init {
+ new vd = directors.shard();
+ vd.add_backend(s1);
+ vd.add_backend(s2);
+ vd.add_backend(s3);
+ vd.reconfigure(replicas=25);
+ }
+
+ sub vcl_recv {
+ set req.backend_hint = vd.backend();
+ return(pass);
+ }
+
+} -start
+
+
+client c1 {
+ txreq -url /Boo0aixe
+ rxresp
+ expect resp.body == "ech3Ooj"
+
+ txreq -url /eishoSu2
+ rxresp
+ expect resp.body == "ieQu2qua"
+
+ txreq -url /Aunah3uo
+ rxresp
+ expect resp.body == "xiuFi3Pe"
+} -run
diff --git a/bin/varnishtest/tests/d00020.vtc b/bin/varnishtest/tests/d00020.vtc
new file mode 100644
index 0000000..1fe91c7
--- /dev/null
+++ b/bin/varnishtest/tests/d00020.vtc
@@ -0,0 +1,50 @@
+varnishtest "shard director RS"
+
+server s1 {
+ rxreq
+ txresp -body "ech3Ooj"
+} -start
+
+server s2 {
+ rxreq
+ txresp -body "ieQu2qua"
+} -start
+
+server s3 {
+ rxreq
+ txresp -body "xiuFi3Pe"
+} -start
+
+varnish v1 -vcl+backend {
+ import directors;
+
+ sub vcl_init {
+ new vd = directors.shard();
+ vd.add_backend(s1);
+ vd.add_backend(s2);
+ vd.add_backend(s3);
+ vd.reconfigure(replicas=25);
+ }
+
+ sub vcl_recv {
+ set req.backend_hint = vd.backend(by=KEY,
+ key=vd.key(req.url, alg=RS));
+ return(pass);
+ }
+
+} -start
+
+
+client c1 {
+ txreq -url /we0eeTho
+ rxresp
+ expect resp.body == "ech3Ooj"
+
+ txreq -url /mae8ooNu
+ rxresp
+ expect resp.body == "ieQu2qua"
+
+ txreq -url /oob3dahS
+ rxresp
+ expect resp.body == "xiuFi3Pe"
+} -run
diff --git a/bin/varnishtest/tests/d00021.vtc b/bin/varnishtest/tests/d00021.vtc
new file mode 100644
index 0000000..9258abb
--- /dev/null
+++ b/bin/varnishtest/tests/d00021.vtc
@@ -0,0 +1,81 @@
+varnishtest "shard director key function"
+
+server s1 {
+ rxreq
+ txresp -body "ech3Ooj"
+ rxreq
+ txresp -body "ech3Ooj"
+ rxreq
+ txresp -body "ech3Ooj"
+} -start
+
+server s2 {
+ rxreq
+ txresp -body "ieQu2qua"
+} -start
+
+server s3 {
+ rxreq
+ txresp -body "xiuFi3Pe"
+} -start
+
+varnish v1 -vcl+backend {
+ import directors;
+
+ sub vcl_init {
+ new vd = directors.shard();
+ vd.add_backend(s1);
+ vd.add_backend(s2);
+ vd.add_backend(s3);
+ vd.reconfigure(25);
+ }
+
+ sub recv_sub {
+ set req.backend_hint = vd.backend(by=KEY,
+ key=vd.key(req.http.X-Hash, RS));
+ }
+
+ sub vcl_recv {
+ if(req.url == "/1") {
+ set req.backend_hint = vd.backend(by=KEY,
+ key=vd.key(alg=CRC32, string="/eishoSu2"));
+ } else if (req.url == "/2") {
+ set req.backend_hint = vd.backend(by=KEY,
+ key=vd.key("/eishoSu2"));
+ } else if (req.url == "/3") {
+ set req.http.X-Hash = "/oob3dahS";
+ call recv_sub;
+ } else if (req.url == "/null_by_string") {
+ set req.backend_hint = vd.backend(by=KEY,
+ key=vd.key(req.http.NonExistent));
+ } else if (req.url == "/null_by_string_hash") {
+ set req.backend_hint = vd.backend(by=KEY,
+ key=vd.key(req.http.NonExistent, SHA256));
+ }
+ return(pass);
+ }
+
+} -start
+
+
+client c1 {
+ txreq -url /1
+ rxresp
+ expect resp.body == "ech3Ooj"
+
+ txreq -url /2
+ rxresp
+ expect resp.body == "ieQu2qua"
+
+ txreq -url /3
+ rxresp
+ expect resp.body == "xiuFi3Pe"
+
+ txreq -url /null_by_string
+ rxresp
+ expect resp.body == "ech3Ooj"
+
+ txreq -url /null_by_string_hash
+ rxresp
+ expect resp.body == "ech3Ooj"
+} -run
diff --git a/bin/varnishtest/tests/d00022.vtc b/bin/varnishtest/tests/d00022.vtc
new file mode 100644
index 0000000..0aa1fad
--- /dev/null
+++ b/bin/varnishtest/tests/d00022.vtc
@@ -0,0 +1,77 @@
+varnishtest "shard director Restarts"
+
+server s1 {
+ rxreq
+ txresp -body "ech3Ooj"
+ rxreq
+ txresp -body "ech3Ooj"
+ rxreq
+ txresp -body "ech3Ooj"
+} -start
+
+server s2 {
+ rxreq
+ txresp -body "ieQu2qua"
+ rxreq
+ txresp -body "ieQu2qua"
+ rxreq
+ txresp -body "ieQu2qua"
+} -start
+
+server s3 {
+ rxreq
+ txresp -body "xiuFi3Pe"
+ rxreq
+ txresp -body "xiuFi3Pe"
+ rxreq
+ txresp -body "xiuFi3Pe"
+} -start
+
+varnish v1 -vcl+backend {
+ import directors;
+
+ sub vcl_init {
+ new vd = directors.shard();
+ vd.add_backend(s1);
+ vd.add_backend(s2);
+ vd.add_backend(s3);
+ vd.reconfigure(25);
+ }
+
+ sub vcl_recv {
+ set req.backend_hint = vd.backend(by=KEY,
+ key=vd.key("/eishoSu2", CRC32),
+ alt=req.restarts,
+ healthy=ALL);
+
+ if(req.url == "/2" && req.restarts > 0) {
+ unset req.http.vrstart;
+ }
+ if(req.url == "/3" && req.restarts > 1) {
+ unset req.http.vrstart;
+ }
+ return(pass);
+ }
+
+ sub vcl_deliver {
+ if(req.http.vrstart) {
+ return(restart);
+ }
+ }
+
+} -start
+
+
+client c1 {
+ txreq -url /1
+ rxresp
+ expect resp.body == "ech3Ooj"
+
+ txreq -url /2 -hdr "vrstart: 1"
+ rxresp
+ expect resp.body == "ieQu2qua"
+
+ txreq -url /3 -hdr "vrstart: 1"
+ rxresp
+ expect resp.body == "xiuFi3Pe"
+} -run
diff --git a/bin/varnishtest/tests/d00023.vtc b/bin/varnishtest/tests/d00023.vtc
new file mode 100644
index 0000000..e427264
--- /dev/null
+++ b/bin/varnishtest/tests/d00023.vtc
@@ -0,0 +1,55 @@
+varnishtest "shard director Unhealthy"
+
+server s1 {
+ rxreq
+ txresp -body "ech3Ooj"
+} -start
+
+server s2 {
+ rxreq
+ expect req.http.healthy == "true"
+ txresp -body "ieQu2qua"
+} -start
+
+server s3 {
+ rxreq
+ expect req.http.healthy == "true"
+ txresp -body "xiuFi3Pe"
+} -start
+
+varnish v1 -vcl+backend {
+ import std;
+ import directors;
+
+ sub vcl_init {
+ new vd = directors.shard();
+ vd.debug(3);
+ vd.add_backend(s1);
+ vd.add_backend(s2);
+ vd.add_backend(s3);
+ vd.reconfigure(25);
+ }
+
+ sub vcl_recv {
+ set req.backend_hint = vd.backend(by=KEY,
+ key=vd.key("/eishoSu2", CRC32));
+ set req.http.healthy = std.healthy(req.backend_hint);
+ return(pass);
+ }
+} -start
+
+varnish v1 -cliok "backend.set_health s1 sick"
+
+client c1 {
+ txreq
+ rxresp
+ expect resp.body == "ieQu2qua"
+} -run
+
+varnish v1 -cliok "backend.set_health s2 sick"
+
+client c1 {
+ txreq
+ rxresp
+ expect resp.body == "xiuFi3Pe"
+} -run
diff --git a/bin/varnishtest/tests/d00024.vtc b/bin/varnishtest/tests/d00024.vtc
new file mode 100644
index 0000000..57f76da
--- /dev/null
+++ b/bin/varnishtest/tests/d00024.vtc
@@ -0,0 +1,75 @@
+varnishtest "shard director Rampup Time"
+
+# NOTE: this test is timing sensitive by nature
+
+
+server s1 {
+ rxreq
+ txresp -body "ech3Ooj"
+} -start
+
+server s2 {
+ rxreq
+ txresp -body "ieQu2qua"
+ rxreq
+ txresp -body "ieQu2qua"
+} -start
+
+server s3 {
+ rxreq
+ txresp -body "xiuFi3Pe"
+} -start
+
+varnish v1 -vcl+backend {
+ import directors;
+
+ sub vcl_init {
+ new vd = directors.shard();
+ vd.debug(4);
+ vd.add_backend(s1);
+ vd.add_backend(s2);
+ vd.add_backend(s3);
+ vd.set_rampup(15s);
+ vd.reconfigure(25);
+ }
+
+ sub vcl_recv {
+ set req.backend_hint = vd.backend(by=KEY,
+ key=vd.key(alg=CRC32, string="/eishoSu2"));
+ return(pass);
+ }
+} -start
+
+varnish v1 -cliok "debug.srandom"
+
+delay 8
+
+varnish v1 -cliok "backend.set_health s1 sick"
+
+# s1 down, s2 and s3 both in rampup - s2 chosen
+client c1 {
+ txreq -url /1
+ rxresp
+ expect resp.body == "ieQu2qua"
+} -run
+
+
+delay 8
+
+varnish v1 -cliok "backend.set_health s1 healthy"
+
+# s1 just came up, s2 chosen for ramup
+client c1 {
+ txreq -url /2
+ rxresp
+ expect resp.body == "ieQu2qua"
+} -run
+
+delay 16
+
+# s1 out of ramup
+client c1 {
+ txreq -url /3
+ rxresp
+ expect resp.body == "ech3Ooj"
+} -run
diff --git a/bin/varnishtest/tests/d00025.vtc b/bin/varnishtest/tests/d00025.vtc
new file mode 100644
index 0000000..bf06d68
--- /dev/null
+++ b/bin/varnishtest/tests/d00025.vtc
@@ -0,0 +1,29 @@
+varnishtest "shard - single backend works"
+
+server s1 {
+ rxreq
+ txresp -body "ech3Ooj"
+} -start
+
+varnish v1 -vcl+backend {
+ import directors;
+
+ sub vcl_init {
+ new vd = directors.shard();
+ vd.add_backend(s1);
+ vd.reconfigure(1);
+ }
+
+ sub vcl_recv {
+ set req.backend_hint = vd.backend();
+ return(pass);
+ }
+
+} -start
+
+
+client c1 {
+ txreq -url /eishoSu2
+ rxresp
+ expect resp.body == "ech3Ooj"
+} -run
diff --git a/bin/varnishtest/tests/d00026.vtc b/bin/varnishtest/tests/d00026.vtc
new file mode 100644
index 0000000..64d63bc
--- /dev/null
+++ b/bin/varnishtest/tests/d00026.vtc
@@ -0,0 +1,50 @@
+varnishtest "shard director - same as v01000.vtc but setting backend in fetch"
+
+server s1 {
+ rxreq
+ txresp -body "ech3Ooj"
+} -start
+
+server s2 {
+ rxreq
+ txresp -body "ieQu2qua"
+} -start
+
+server s3 {
+ rxreq
+ txresp -body "xiuFi3Pe"
+} -start
+
+varnish v1 -vcl+backend {
+ import directors;
+
+ sub vcl_init {
+ new vd = directors.shard();
+ vd.add_backend(s1);
+ vd.add_backend(s2);
+ vd.add_backend(s3);
+ vd.reconfigure(25);
+ }
+
+ sub vcl_backend_fetch {
+ set bereq.backend = vd.backend(by=KEY,
+ key=vd.key(bereq.url, CRC32));
+ return(fetch);
+ }
+
+} -start
+
+
+client c1 {
+ txreq -url /eishoSu2
+ rxresp
+ expect resp.body == "ech3Ooj"
+
+ txreq -url /Zainao9d
+ rxresp
+ expect resp.body == "ieQu2qua"
+
+ txreq -url /Aunah3uo
+ rxresp
+ expect resp.body == "xiuFi3Pe"
+} -run
diff --git a/doc/changes.rst b/doc/changes.rst
index 412cbb5..9bcafa6 100644
--- a/doc/changes.rst
+++ b/doc/changes.rst
@@ -25,6 +25,8 @@ Major items:
Documentation, counters and log entries still refer to the mechanism
as hit-for-pass and Hitpass, respectively.
+* new shard director for loadbalancing by consistent hashing
+
======================================
Varnish Cache 4.1.3-beta1 (2016-06-15)
======================================
diff --git a/lib/libvmod_directors/Makefile.am b/lib/libvmod_directors/Makefile.am
index dbd238a..76e6528 100644
--- a/lib/libvmod_directors/Makefile.am
+++ b/lib/libvmod_directors/Makefile.am
@@ -24,7 +24,18 @@ libvmod_directors_la_SOURCES = \
fall_back.c \
hash.c \
random.c \
- round_robin.c
+ round_robin.c \
+ vmod_shard.c \
+ shard_cfg.c \
+ shard_cfg.h \
+ shard_dir.c \
+ shard_dir.h \
+ shard_hash.c \
+ shard_hash.h \
+ shard_parse_vcc_enums.h \
+ shard_parse_vcc_enums.c \
+ crc32.c \
+ crc32.h
nodist_libvmod_directors_la_SOURCES = \
vcc_if.c \
diff --git a/lib/libvmod_directors/crc32.c b/lib/libvmod_directors/crc32.c
new file mode 100644
index 0000000..714b0f0
--- /dev/null
+++ b/lib/libvmod_directors/crc32.c
@@ -0,0 +1,112 @@
+/*-
+ * Copyright (c) 2006 Verdens Gang AS
+ * Copyright (c) 2006-2008 Linpro AS
+ * All rights reserved.
+ *
+ * Author: Poul-Henning Kamp <phk at phk.freebsd.dk>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $Id: crc32.c 3599 2009-02-05 10:13:52Z tfheen $
+ *
+ * This CRC32 implementation is in the public domain.
+ */
+
+#include "config.h"
+#include "crc32.h"
+
+/*--------------------------------------------------------------------*/
+
+static const uint32_t crc32bits[] = {
+ 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
+ 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
+ 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2,
+ 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
+ 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
+ 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
+ 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c,
+ 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
+ 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423,
+ 0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
+ 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106,
+ 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
+ 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d,
+ 0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
+ 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
+ 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
+ 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7,
+ 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
+ 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa,
+ 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
+ 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81,
+ 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
+ 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84,
+ 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
+ 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
+ 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
+ 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e,
+ 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
+ 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55,
+ 0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
+ 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28,
+ 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
+ 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f,
+ 0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
+ 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
+ 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
+ 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69,
+ 0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
+ 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc,
+ 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
+ 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693,
+ 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
+ 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
+};
+
+uint32_t
+crc32(uint32_t crc, const void *p1, unsigned l)
+{
+ const unsigned char *p;
+
+ for (p = (const unsigned char*)p1; l-- > 0; p++)
+ crc = (crc >> 8) ^ crc32bits[(crc ^ *p) & 0xff];
+ return (crc);
+}
+
+uint32_t
+crc32_l(const void *p1, unsigned l)
+{
+ uint32_t crc;
+
+ crc = crc32(~0U, p1, l);
+ return (crc ^ ~0U);
+}
+
+#ifdef DEBUG
+
+#include <stdlib.h>
+int main(int argc, char *argv[]) {
+ if (argc > 1)
+ printf("%x\n", crc32_l(argv[1], strlen(argv[1])));
+}
+
+#endif
diff --git a/lib/libvmod_directors/crc32.h b/lib/libvmod_directors/crc32.h
new file mode 100644
index 0000000..39adf8a
--- /dev/null
+++ b/lib/libvmod_directors/crc32.h
@@ -0,0 +1,3 @@
+#include <stdint.h>
+uint32_t crc32(uint32_t crc, const void *p1, unsigned l);
+uint32_t crc32_l(const void *p1, unsigned l);
diff --git a/lib/libvmod_directors/shard_cfg.c b/lib/libvmod_directors/shard_cfg.c
new file mode 100644
index 0000000..ecfb936
--- /dev/null
+++ b/lib/libvmod_directors/shard_cfg.c
@@ -0,0 +1,663 @@
+/*-
+ * Copyright 2009-2016 UPLEX - Nils Goroll Systemoptimierung
+ * All rights reserved.
+ *
+ * Authors: Nils Goroll <nils.goroll at uplex.de>
+ * Geoffrey Simmons <geoff at uplex.de>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "cache/cache.h"
+#include "cache/cache_director.h"
+
+#include "vrt.h"
+
+#include "shard_dir.h"
+#include "shard_cfg.h"
+#include "shard_hash.h"
+
+enum shard_change_task_e {
+ _INVALID = 0,
+ CLEAR,
+ ADD_BE,
+ REMOVE_BE,
+ _SHARD_TASK_E_MAX
+};
+
+struct shard_change_task {
+ unsigned magic;
+#define SHARD_CHANGE_TASK_MAGIC 0x1e1168af
+ enum shard_change_task_e task;
+ void *priv;
+ VSTAILQ_ENTRY(shard_change_task) list;
+};
+
+struct shard_change {
+ unsigned magic;
+#define SHARD_CHANGE_MAGIC 0xdff5c9a6
+ const struct sharddir *shardd;
+ void *space;
+ VSTAILQ_HEAD(,shard_change_task) tasks;
+};
+
+struct backend_reconfig {
+ struct sharddir * const shardd;
+ int hint; // on number of backends after reconfig
+ int hole_n; // number of holes in backends array
+ int hole_i; // index hint on first hole
+};
+
+/*
+ * ============================================================
+ * change / task list
+ *
+ * for backend reconfiguration, we create a change list on the VCL workspace in
+ * a PRIV_TASK state, which we work in reconfigure.
+ *
+ * for now, we allow to only reconfigure one shard director at a time.
+ */
+
+static struct shard_change *
+shard_change_get(VRT_CTX, struct vmod_priv *priv,
+ const struct sharddir * const shardd)
+{
+ struct shard_change *change;
+
+ if (priv->priv) {
+ CAST_OBJ_NOTNULL(change, priv->priv, SHARD_CHANGE_MAGIC);
+ if (change->shardd == NULL) {
+ change->shardd = shardd;
+ VSTAILQ_INIT(&change->tasks);
+ } else if (change->shardd != shardd) {
+ shard_err0(ctx, shardd,
+ "cannot change more than one shard director "
+ "at a time");
+ return NULL;
+ }
+ return (change);
+ }
+
+ change = WS_Alloc(ctx->ws, sizeof(*change));
+ if (change == NULL) {
+ shard_err0(ctx, shardd, "could not get workspace");
+ return NULL;
+ }
+
+ INIT_OBJ(change, SHARD_CHANGE_MAGIC);
+ change->space = NULL;
+ change->shardd = shardd;
+ VSTAILQ_INIT(&change->tasks);
+ priv->priv = change;
+
+ return (change);
+}
+
+static void
+shard_change_finish(struct shard_change *change)
+{
+ CHECK_OBJ_NOTNULL(change, SHARD_CHANGE_MAGIC);
+
+ change->shardd = NULL;
+ VSTAILQ_INIT(&change->tasks);
+}
+
+static void
+shard_change_task_add(VRT_CTX, struct shard_change *change,
+ enum shard_change_task_e task_e, void *priv)
+{
+ struct shard_change_task *task;
+
+ CHECK_OBJ_NOTNULL(change, SHARD_CHANGE_MAGIC);
+
+ task = WS_Alloc(ctx->ws, sizeof(*task));
+ if (task == NULL) {
+ shard_err0(ctx, change->shardd,
+ "could not get workspace for task");
+ return;
+ }
+ INIT_OBJ(task, SHARD_CHANGE_TASK_MAGIC);
+ task->task = task_e;
+ task->priv = priv;
+ VSTAILQ_INSERT_TAIL(&change->tasks, task, list);
+}
+
+static inline VCL_BOOL
+shard_change_task_backend(VRT_CTX,
+ struct vmod_priv *priv, struct sharddir *shardd,
+ enum shard_change_task_e task_e, VCL_BACKEND be, VCL_STRING ident,
+ VCL_DURATION rampup)
+{
+ struct shard_change *change;
+ struct shard_backend *b;
+
+ CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
+ assert(task_e == ADD_BE || task_e == REMOVE_BE);
+
+ change = shard_change_get(ctx, priv, shardd);
+ if (change == NULL)
+ return 0;
+
+ b = WS_Alloc(ctx->ws, sizeof(*b));
+ if (b == NULL) {
+ shard_err(ctx, shardd, ".%s_backend() WS_Alloc() failed",
+ task_e == ADD_BE ? "add" : "remove");
+ return 0;
+ }
+
+ b->backend = be;
+ b->ident = ident != NULL && *ident != '\0' ? ident : NULL;
+ b->rampup = rampup;
+
+ shard_change_task_add(ctx, change, task_e, b);
+
+ return 1;
+}
+
+/*
+ * ============================================================
+ * director reconfiguration tasks
+ */
+VCL_BOOL
+shardcfg_add_backend(VRT_CTX, struct vmod_priv *priv, struct sharddir *shardd,
+ VCL_BACKEND be, VCL_STRING ident, VCL_DURATION rampup)
+{
+ AN(be);
+ return shard_change_task_backend(ctx, priv, shardd, ADD_BE,
+ be, ident, rampup);
+}
+
+VCL_BOOL
+shardcfg_remove_backend(VRT_CTX, struct vmod_priv *priv,
+ struct sharddir *shardd, VCL_BACKEND be, VCL_STRING ident)
+{
+ return shard_change_task_backend(ctx, priv, shardd, REMOVE_BE,
+ be, ident, 0);
+}
+
+VCL_BOOL
+shardcfg_clear(VRT_CTX, struct vmod_priv *priv, struct sharddir *shardd)
+{
+ struct shard_change *change;
+
+ CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
+
+ change = shard_change_get(ctx, priv, shardd);
+ if (change == NULL)
+ return 0;
+
+ shard_change_task_add(ctx, change, CLEAR, NULL);
+
+ return 1;
+}
+
+/*
+ * ============================================================
+ * consistent hashing cirle init
+ */
+
+typedef int (*compar)( const void*, const void* );
+
+static int
+circlepoint_compare(struct shard_circlepoint *a, struct shard_circlepoint *b)
+{
+ return (a->point == b->point) ? 0 : ((a->point > b->point) ? 1 : -1);
+}
+
+static void
+shardcfg_hashcircle(struct sharddir *shardd, VCL_INT replicas, enum alg_e alg)
+{
+ int i, j;
+ const char *ident;
+ int len;
+
+ CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
+ AZ(shardd->hashcircle);
+
+ assert(shardd->n_backend > 0);
+ AN(shardd->backend);
+
+ shardd->hashcircle = calloc(shardd->n_backend * replicas,
+ sizeof(struct shard_circlepoint));
+ AN(shardd->hashcircle);
+
+ shardd->replicas = replicas;
+
+ for (i = 0; i < shardd->n_backend; i++) {
+ CHECK_OBJ_NOTNULL(shardd->backend[i].backend, DIRECTOR_MAGIC);
+
+ ident = shardd->backend[i].ident
+ ? shardd->backend[i].ident
+ : shardd->backend[i].backend->vcl_name;
+
+ assert(ident[0] != '\0');
+
+ len = strlen(ident) + log10(UINT32_MAX) + 2;
+
+ char s[len];
+
+ for (j = 0; j < replicas; j++) {
+ sprintf(s, "%s%d", ident, j);
+ shardd->hashcircle[i * replicas + j].point =
+ shard_hash_f[alg](s);
+ shardd->hashcircle[i * replicas + j].host = i;
+ }
+ /* not used in current interface */
+ shardd->backend[i].canon_point =
+ shardd->hashcircle[i * replicas].point;
+ }
+ qsort( (void *) shardd->hashcircle, shardd->n_backend * replicas,
+ sizeof (struct shard_circlepoint), (compar) circlepoint_compare);
+
+ if ((shardd->debug_flags & SHDBG_CIRCLE) == 0)
+ return;
+
+ for (i = 0; i < shardd->n_backend; i++)
+ for (j = 0; j < replicas; j++)
+ SHDBG(SHDBG_CIRCLE, shardd,
+ "hashcircle[%5ld] = "
+ "{point = %8x, host = %2d}\n",
+ i * replicas + j,
+ shardd->hashcircle[i * replicas + j].point,
+ shardd->hashcircle[i * replicas + j].host);
+}
+
+/*
+ * ============================================================
+ * configure the director backends
+ */
+
+static void
+shardcfg_backend_free(struct shard_backend *f)
+{
+ if (f->ident)
+ free (TRUST_ME(f->ident));
+ memset(f, 0, sizeof(*f));
+}
+
+static void
+shardcfg_backend_copyin(struct shard_backend *dst,
+ const struct shard_backend *src)
+{
+ dst->backend = src->backend;
+ dst->ident = src->ident ? strdup(src->ident) : NULL;
+ dst->rampup = src->rampup;
+ dst->canon_point = 0xffffffff;
+}
+
+static int
+shardcfg_backend_cmp(const struct shard_backend *a,
+ const struct shard_backend *b)
+{
+ const char *ai, *bi;
+
+ ai = a->ident;
+ bi = b->ident;
+
+ /* vcl_names are unique, so we can compare the backend pointers */
+ if (ai == NULL && bi == NULL)
+ return a->backend != b->backend;
+
+ if (ai == NULL)
+ ai = a->backend->vcl_name;
+
+ if (bi == NULL)
+ bi = b->backend->vcl_name;
+
+ return strcmp(ai, bi);
+}
+
+/* for removal, we delete all instances if the backend matches */
+static int
+shardcfg_backend_del_cmp(const struct shard_backend *task,
+ const struct shard_backend *b)
+{
+ if (task->backend && task->ident == NULL)
+ return task->backend != b->backend;
+
+ return shardcfg_backend_cmp(task, b);
+}
+
+static const struct shard_backend *
+shardcfg_backend_lookup(struct backend_reconfig *re,
+ const struct shard_backend *b)
+{
+ int i, max = re->shardd->n_backend + re->hole_n;
+ const struct shard_backend *bb = re->shardd->backend;
+
+ for (i = 0; i < max; i++)
+ if (! shardcfg_backend_cmp(b, &bb[i]))
+ return &bb[i];
+
+ return NULL;
+}
+
+static void
+shardcfg_backend_expand(struct backend_reconfig *re)
+{
+ int min = re->hint;
+
+ CHECK_OBJ_NOTNULL(re->shardd, SHARDDIR_MAGIC);
+
+ if (min < 16)
+ min = 16;
+
+ if (re->shardd->l_backend < min)
+ re->shardd->l_backend = min;
+ else
+ re->shardd->l_backend <<= 1;
+
+ if (re->shardd->backend)
+ re->shardd->backend = realloc(re->shardd->backend,
+ re->shardd->l_backend * sizeof *re->shardd->backend);
+ else
+ re->shardd->backend = malloc(
+ re->shardd->l_backend * sizeof *re->shardd->backend);
+
+ AN(re->shardd->backend);
+}
+
+static void
+shardcfg_backend_add(struct backend_reconfig *re,
+ const struct shard_backend *b)
+{
+ int i;
+ struct shard_backend *bb = re->shardd->backend;
+
+ if (re->hole_n == 0) {
+ if (re->shardd->n_backend >= re->shardd->l_backend) {
+ shardcfg_backend_expand(re);
+ bb = re->shardd->backend;
+ }
+ assert(re->shardd->n_backend < re->shardd->l_backend);
+ i = re->shardd->n_backend;
+ } else {
+ do {
+ if (! bb[re->hole_i].backend)
+ break;
+ } while (++(re->hole_i) < re->shardd->n_backend + re->hole_n);
+ assert(re->hole_i < re->shardd->n_backend + re->hole_n);
+
+ i = (re->hole_i)++;
+ (re->hole_n)--;
+ }
+
+ re->shardd->n_backend++;
+ shardcfg_backend_copyin(&bb[i], b);
+ return;
+}
+
+static void
+shardcfg_backend_clear(struct sharddir *shardd)
+{
+ int i;
+ for (i = 0; i < shardd->n_backend; i++)
+ shardcfg_backend_free(&shardd->backend[i]);
+ shardd->n_backend = 0;
+}
+
+
+static void
+shardcfg_backend_del(struct backend_reconfig *re,
+ const struct shard_backend *spec)
+{
+ int i, max = re->shardd->n_backend + re->hole_n;
+ struct shard_backend * const bb = re->shardd->backend;
+
+ for (i = 0; i < max; i++) {
+ if (shardcfg_backend_del_cmp(spec, &bb[i]))
+ continue;
+
+ shardcfg_backend_free(&bb[i]);
+ re->shardd->n_backend--;
+ if (i < re->shardd->n_backend + re->hole_n) {
+ (re->hole_n)++;
+ if (i < re->hole_i)
+ re->hole_i = i;
+ }
+ }
+}
+
+static void
+shardcfg_backend_finalize(struct backend_reconfig *re)
+{
+ int i;
+ struct shard_backend * const bb = re->shardd->backend;
+
+ while (re->hole_n > 0) {
+ // trim end
+ i = re->shardd->n_backend + re->hole_n - 1;
+ while (re->hole_n && bb[i].backend == NULL) {
+ (re->hole_n)--;
+ i--;
+ }
+
+ if (re->hole_n == 0)
+ break;
+
+ assert(re->hole_i < i);
+
+ do {
+ if (! bb[re->hole_i].backend)
+ break;
+ } while (++(re->hole_i) <= i);
+
+ assert(re->hole_i < i);
+ assert(bb[re->hole_i].backend == NULL);
+ assert(bb[i].backend != NULL);
+
+ memcpy(&bb[re->hole_i], &bb[i], sizeof(*bb));
+ memset(&bb[i], 0, sizeof(*bb));
+
+ (re->hole_n)--;
+ (re->hole_i)++;
+ }
+
+ assert(re->hole_n == 0);
+}
+
+/*
+ * ============================================================
+ * work the change tasks
+ */
+
+static void
+shardcfg_apply_change(VRT_CTX, struct sharddir *shardd,
+ const struct shard_change *change)
+{
+ struct shard_change_task *task, *clear;
+ const struct shard_backend *b;
+
+ struct backend_reconfig re = {
+ .shardd = shardd,
+ .hint = shardd->n_backend,
+ .hole_n = 0,
+ .hole_i = INT_MAX
+ };
+
+ // XXX assert sharddir_locked(shardd)
+
+ clear = NULL;
+ VSTAILQ_FOREACH(task, &change->tasks, list) {
+ CHECK_OBJ_NOTNULL(task, SHARD_CHANGE_TASK_MAGIC);
+ switch (task->task) {
+ case CLEAR:
+ clear = task;
+ re.hint = 0;
+ break;
+ case ADD_BE:
+ re.hint++;
+ break;
+ case REMOVE_BE:
+ re.hint--;
+ break;
+ default:
+ INCOMPL();
+ }
+ }
+
+ if (clear) {
+ shardcfg_backend_clear(shardd);
+ clear = VSTAILQ_NEXT(clear, list);
+ if (clear == NULL)
+ return;
+ }
+
+ task = clear;
+ VSTAILQ_FOREACH_FROM(task, &change->tasks, list) {
+ CHECK_OBJ_NOTNULL(task, SHARD_CHANGE_TASK_MAGIC);
+ switch (task->task) {
+ case CLEAR:
+ assert(task->task != CLEAR);
+ break;
+ case ADD_BE:
+ b = shardcfg_backend_lookup(&re, task->priv);
+
+ if (b == NULL) {
+ shardcfg_backend_add(&re, task->priv);
+ break;
+ }
+
+ const char * const ident = b->ident;
+
+ shard_err(ctx, shardd, "(notice) backend %s%s%s "
+ "already exists - skipping",
+ b->backend->vcl_name,
+ ident ? "/" : "",
+ ident ? ident : "");
+ break;
+ case REMOVE_BE:
+ shardcfg_backend_del(&re, task->priv);
+ break;
+ default:
+ INCOMPL();
+ }
+ }
+ shardcfg_backend_finalize(&re);
+}
+
+/*
+ * ============================================================
+ * top reconfiguration function
+ */
+
+VCL_BOOL
+shardcfg_reconfigure(VRT_CTX, struct vmod_priv *priv,
+ struct sharddir *shardd, VCL_INT replicas, enum alg_e alg)
+{
+ struct shard_change *change;
+
+ CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
+ if (replicas <= 0) {
+ shard_err(ctx, shardd,
+ ".reconfigure() invalid replicas argument %ld", replicas);
+ return 0;
+ }
+
+ change = shard_change_get(ctx, priv, shardd);
+ if (change == NULL)
+ return 0;
+
+ if (VSTAILQ_FIRST(&change->tasks) == NULL)
+ return 1;
+
+ sharddir_wrlock(shardd);
+
+ shardcfg_apply_change(ctx, shardd, change);
+ shard_change_finish(change);
+
+ if (shardd->hashcircle)
+ free(shardd->hashcircle);
+ shardd->hashcircle = NULL;
+
+ if (shardd->n_backend == 0) {
+ shard_err0(ctx, shardd, ".reconfigure() no backends");
+ sharddir_unlock(shardd);
+ return 0;
+ }
+
+ shardcfg_hashcircle(shardd, replicas, alg);
+ sharddir_unlock(shardd);
+ return (1);
+}
+
+/*
+ * ============================================================
+ * misc config related
+ */
+
+/* only for sharddir_delete() */
+void
+shardcfg_delete(struct sharddir *shardd)
+{
+ int i;
+
+ for (i = 0; i < shardd->n_backend; i++)
+ shardcfg_backend_free(&shardd->backend[i]);
+ if (shardd->backend)
+ free(shardd->backend);
+ if (shardd->hashcircle)
+ free(shardd->hashcircle);
+}
+
+VCL_VOID
+shardcfg_set_warmup(struct sharddir *shardd, VCL_REAL ratio)
+{
+ CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
+ assert(ratio >= 0 && ratio < 1);
+ sharddir_wrlock(shardd);
+ shardd->warmup = ratio;
+ sharddir_unlock(shardd);
+}
+
+VCL_VOID
+shardcfg_set_rampup(struct sharddir *shardd, VCL_DURATION duration)
+{
+ CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
+ assert(duration >= 0);
+ sharddir_wrlock(shardd);
+ shardd->rampup_duration = duration;
+ sharddir_unlock(shardd);
+}
+
+VCL_DURATION
+shardcfg_get_rampup(struct sharddir *shardd, int host)
+{
+ VCL_DURATION r;
+
+ CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
+ // assert sharddir_rdlock_held(shardd);
+ assert (host < shardd->n_backend);
+
+ // magic value for default
+ if (shardd->backend[host].rampup == 973279260)
+ r = shardd->rampup_duration;
+ else
+ r = shardd->backend[host].rampup;
+
+ return (r);
+}
diff --git a/lib/libvmod_directors/shard_cfg.h b/lib/libvmod_directors/shard_cfg.h
new file mode 100644
index 0000000..ca50ddd
--- /dev/null
+++ b/lib/libvmod_directors/shard_cfg.h
@@ -0,0 +1,40 @@
+/*-
+ * Copyright 2016 UPLEX - Nils Goroll Systemoptimierung
+ * All rights reserved.
+ *
+ * Author: Nils Goroll <nils.goroll at uplex.de>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+VCL_BOOL shardcfg_add_backend(VRT_CTX, struct vmod_priv *priv,
+ struct sharddir *shardd, VCL_BACKEND be, VCL_STRING ident,
+ VCL_DURATION rampup);
+VCL_BOOL shardcfg_remove_backend(VRT_CTX, struct vmod_priv *priv,
+ struct sharddir *shardd, VCL_BACKEND be, VCL_STRING ident);
+VCL_BOOL shardcfg_clear(VRT_CTX, struct vmod_priv *priv,
+ struct sharddir *shardd);
+VCL_BOOL shardcfg_reconfigure(VRT_CTX, struct vmod_priv *priv,
+ struct sharddir *shardd, VCL_INT replicas, enum alg_e alg_e);
+VCL_VOID shardcfg_set_warmup(struct sharddir *shardd, VCL_REAL ratio);
+VCL_VOID shardcfg_set_rampup(struct sharddir *shardd,
+ VCL_DURATION duration);
diff --git a/lib/libvmod_directors/shard_dir.c b/lib/libvmod_directors/shard_dir.c
new file mode 100644
index 0000000..57a9495
--- /dev/null
+++ b/lib/libvmod_directors/shard_dir.c
@@ -0,0 +1,395 @@
+/*-
+ * Copyright 2009-2016 UPLEX - Nils Goroll Systemoptimierung
+ * All rights reserved.
+ *
+ * Authors: Nils Goroll <nils.goroll at uplex.de>
+ * Geoffrey Simmons <geoff.simmons at uplex.de>
+ * Julian Wiesener <jw at uplex.de>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <math.h>
+#include <time.h>
+
+#include "cache/cache.h"
+#include "cache/cache_director.h"
+
+#include "vrt.h"
+#include "vbm.h"
+#include "vrnd.h"
+
+#include "shard_dir.h"
+#include "shard_hash.h"
+
+struct shard_be_info {
+ int hostid;
+ unsigned healthy;
+ double changed; // when
+};
+
+/*
+ * circle walk state for shard_next
+ *
+ * pick* cut off the search after having seen all possible backends
+ */
+struct shard_state {
+ const struct vrt_ctx *ctx;
+ struct sharddir *shardd;
+ int idx;
+
+ struct vbitmap *picklist;
+ int pickcount;
+
+ struct shard_be_info previous;
+ struct shard_be_info last;
+};
+
+void
+sharddir_debug(struct sharddir *shardd, const uint32_t flags)
+{
+ CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
+ shardd->debug_flags = flags;
+}
+
+void
+sharddir_err(VRT_CTX, enum VSL_tag_e tag, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ if (ctx->vsl)
+ VSLbv(ctx->vsl, tag, fmt, ap);
+ else
+ VSLv(tag, 0, fmt, ap);
+ va_end(ap);
+}
+
+static int
+shard_lookup(const struct sharddir *shardd, const uint32_t key)
+{
+ CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
+
+ const int n = shardd->n_backend * shardd->replicas;
+ int idx = -1, high = n, low = 0, i;
+
+ do {
+ i = (high + low) >> 1;
+ if (shardd->hashcircle[i].point == key)
+ idx = i;
+ else if (i == n - 1)
+ idx = n - 1;
+ else if (shardd->hashcircle[i].point < key &&
+ shardd->hashcircle[i+1].point >= key)
+ idx = i + 1;
+ else if (shardd->hashcircle[i].point > key)
+ if (i == 0)
+ idx = 0;
+ else
+ high = i;
+ else
+ low = i;
+ } while (idx == -1);
+
+ return idx;
+}
+
+static int
+shard_next(struct shard_state *state, VCL_INT skip, VCL_BOOL healthy)
+{
+ int c, chosen = -1;
+ uint32_t ringsz;
+ VCL_BACKEND be;
+ double changed;
+ struct shard_be_info *sbe;
+
+ AN(state);
+ assert(state->idx >= 0);
+ CHECK_OBJ_NOTNULL(state->shardd, SHARDDIR_MAGIC);
+
+ if (state->pickcount >= state->shardd->n_backend)
+ return -1;
+
+ ringsz = state->shardd->n_backend * state->shardd->replicas;
+
+ while (state->pickcount < state->shardd->n_backend && skip >= 0) {
+
+ c = state->shardd->hashcircle[state->idx].host;
+
+ if (! vbit_test(state->picklist, c)) {
+
+ vbit_set(state->picklist, c);
+ state->pickcount++;
+
+ sbe = NULL;
+ be = state->shardd->backend[c].backend;
+ AN(be);
+ if (be->healthy(be, state->ctx->bo, &changed)) {
+ if (skip-- == 0) {
+ chosen = c;
+ sbe = &state->last;
+ } else {
+ sbe = &state->previous;
+ }
+
+ } else if (!healthy && skip-- == 0) {
+ chosen = c;
+ sbe = &state->last;
+ }
+ if (sbe == &state->last &&
+ state->last.hostid != -1)
+ memcpy(&state->previous, &state->last,
+ sizeof(state->previous));
+
+ if (sbe) {
+ sbe->hostid = c;
+ sbe->healthy = 1;
+ sbe->changed = changed;
+ }
+ if (chosen != -1)
+ break;
+ }
+
+ if (++(state->idx) == ringsz)
+ state->idx = 0;
+ }
+ return chosen;
+}
+
+void
+sharddir_new(struct sharddir **sharddp, const char *vcl_name)
+{
+ struct sharddir *shardd;
+
+ AN(vcl_name);
+ AN(sharddp);
+ AZ(*sharddp);
+ ALLOC_OBJ(shardd, SHARDDIR_MAGIC);
+ AN(shardd);
+ *sharddp = shardd;
+ shardd->name = vcl_name;
+ AZ(pthread_rwlock_init(&shardd->mtx, NULL));
+}
+
+void
+sharddir_delete(struct sharddir **sharddp)
+{
+ struct sharddir *shardd;
+
+ AN(sharddp);
+ shardd = *sharddp;
+ *sharddp = NULL;
+
+ CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
+ shardcfg_delete(shardd);
+ AZ(pthread_rwlock_destroy(&shardd->mtx));
+ FREE_OBJ(shardd);
+}
+
+void
+sharddir_rdlock(struct sharddir *shardd)
+{
+ CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
+ AZ(pthread_rwlock_rdlock(&shardd->mtx));
+}
+
+void
+sharddir_wrlock(struct sharddir *shardd)
+{
+ CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
+ AZ(pthread_rwlock_wrlock(&shardd->mtx));
+}
+
+void
+sharddir_unlock(struct sharddir *shardd)
+{
+ CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
+ AZ(pthread_rwlock_unlock(&shardd->mtx));
+}
+
+static inline void
+validate_alt(VRT_CTX, struct sharddir *shardd, VCL_INT *alt)
+{
+ const VCL_INT alt_max = shardd->n_backend - 1;
+
+ if (*alt < 0) {
+ shard_err(ctx, shardd,
+ "invalid negative parameter alt=%ld, set to 0", *alt);
+ *alt = 0;
+ } else if (*alt > alt_max) {
+ shard_err(ctx, shardd,
+ "parameter alt=%ld limited to %ld", *alt, alt_max);
+ *alt = alt_max;
+ }
+}
+
+static inline void
+init_state(struct shard_state *state,
+ VRT_CTX, struct sharddir *shardd, struct vbitmap *picklist)
+{
+ AN(picklist);
+
+ state->ctx = ctx;
+ state->shardd = shardd;
+ state->idx = -1;
+ state->picklist = picklist;
+ state->pickcount = 0;
+
+ /* healhy and changed only defined for hostid != -1 */
+ state->previous.hostid = -1;
+ state->last.hostid = -1;
+}
+
+/*
+ * core function for the director backend method
+ *
+ * while other directors return a reference to their own backend object (on
+ * which varnish will call the resolve method to resolve to a non-director
+ * backend), this director immediately reolves in the backend method, to make
+ * the director choice visible in VCL
+ *
+ * consequences:
+ * - we need no own struct director
+ * - we can only respect a busy object when being called on the backend side,
+ * which probably is, for all practical purposes, only relevant when the
+ * saintmode vmod is used
+ *
+ * if we wanted to offer delayed resolution, we'd need something like
+ * per-request per-director state or we'd need to return a dynamically created
+ * director object. That should be straight forward once we got director
+ * refcounting #2072. Until then, we could create it on the workspace, but then
+ * we'd need to keep other directors from storing any references to our dynamic
+ * object for longer than the current task
+ *
+ */
+VCL_BACKEND
+sharddir_pick_be(VRT_CTX, struct sharddir *shardd,
+ uint32_t key, VCL_INT alt, VCL_REAL warmup, VCL_BOOL rampup,
+ enum healthy_e healthy)
+{
+ VCL_BACKEND be;
+ struct shard_state state;
+ unsigned picklist_sz = VBITMAP_SZ(shardd->n_backend);
+ char picklist_spc[picklist_sz];
+ VCL_DURATION chosen_r, alt_r;
+
+ CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
+ CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
+ AN(ctx->vsl);
+
+ sharddir_rdlock(shardd);
+ if(shardd->n_backend == 0) {
+ shard_err0(ctx, shardd, "no backends");
+ goto err;
+ }
+
+ assert(shardd->hashcircle);
+
+ validate_alt(ctx, shardd, &alt);
+
+ init_state(&state, ctx, shardd, vbit_init(picklist_spc, picklist_sz));
+
+ state.idx = shard_lookup(shardd, key);
+ assert(state.idx >= 0);
+
+ SHDBG(SHDBG_LOOKUP, shardd, "lookup key %x idx %d host %d",
+ key, state.idx, shardd->hashcircle[state.idx].host);
+
+ if (alt > 0) {
+ if (shard_next(&state, alt - 1, healthy == ALL) == -1) {
+ if (state.previous.hostid != -1) {
+ be = sharddir_backend(shardd,
+ state.previous.hostid);
+ goto ok;
+ }
+ goto err;
+ }
+ }
+
+ if (shard_next(&state, 0, healthy != IGNORE) == -1) {
+ if (state.previous.hostid != -1) {
+ be = sharddir_backend(shardd, state.previous.hostid);
+ goto ok;
+ }
+ goto err;
+ }
+
+ be = sharddir_backend(shardd, state.last.hostid);
+
+ if (warmup == -1)
+ warmup = shardd->warmup;
+
+ /* short path for cases we dont want ramup/warmup or can't */
+ if (alt > 0 || healthy == IGNORE || (! rampup && warmup == 0) ||
+ shard_next(&state, 0, 0) == -1)
+ goto ok;
+
+ assert(alt == 0);
+ assert(state.previous.hostid >= 0);
+ assert(state.last.hostid >= 0);
+ assert(state.previous.hostid != state.last.hostid);
+ assert(be == sharddir_backend(shardd, state.previous.hostid));
+
+ chosen_r = shardcfg_get_rampup(shardd, state.previous.hostid);
+ alt_r = shardcfg_get_rampup(shardd, state.last.hostid);
+
+ SHDBG(SHDBG_RAMPWARM, shardd, "chosen host %d rampup %f changed %f",
+ state.previous.hostid, chosen_r,
+ ctx->now - state.previous.changed);
+ SHDBG(SHDBG_RAMPWARM, shardd, "alt host %d rampup %f changed %f",
+ state.last.hostid, alt_r,
+ ctx->now - state.last.changed);
+
+ if (ctx->now - state.previous.changed < chosen_r) {
+ /*
+ * chosen host is in rampup
+ * - no change if alternative host is also in rampup or the dice
+ * has rolled in favour of the chosen host
+ */
+ if (! rampup ||
+ ctx->now - state.last.changed < alt_r ||
+ VRND_RandomTestableDouble() * chosen_r <
+ (ctx->now - state.previous.changed))
+ goto ok;
+ } else {
+ /* chosen host not in rampup - warmup ? */
+ if (warmup == 0 || VRND_RandomTestableDouble() > warmup)
+ goto ok;
+ }
+
+ be = sharddir_backend(shardd, state.last.hostid);
+
+ ok:
+ AN(be);
+ sharddir_unlock(shardd);
+ vbit_destroy(state.picklist);
+ return (be);
+ err:
+ sharddir_unlock(shardd);
+ vbit_destroy(state.picklist);
+ return NULL;
+}
diff --git a/lib/libvmod_directors/shard_dir.h b/lib/libvmod_directors/shard_dir.h
new file mode 100644
index 0000000..b66fca1
--- /dev/null
+++ b/lib/libvmod_directors/shard_dir.h
@@ -0,0 +1,117 @@
+/*-
+ * Copyright 2009-2016 UPLEX - Nils Goroll Systemoptimierung
+ * All rights reserved.
+ *
+ * Authors: Julian Wiesener <jw at uplex.de>
+ * Nils Goroll <slink at uplex.de>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "shard_parse_vcc_enums.h"
+
+struct vbitmap;
+
+struct shard_circlepoint {
+ uint32_t point;
+ unsigned int host;
+};
+
+struct shard_backend {
+ VCL_BACKEND backend;
+ const char *ident; // XXX COPY IN !
+ VCL_DURATION rampup;
+ uint32_t canon_point;
+};
+
+#define SHDBG_LOOKUP 1
+#define SHDBG_CIRCLE (1<<1)
+#define SHDBG_RAMPWARM (1<<2)
+
+struct sharddir {
+ unsigned magic;
+#define SHARDDIR_MAGIC 0xdbb7d59f
+ uint32_t debug_flags;
+
+ pthread_rwlock_t mtx;
+
+ const char *name;
+
+ unsigned n_backend;
+ unsigned l_backend;
+ struct shard_backend *backend;
+
+ struct shard_circlepoint *hashcircle;
+
+ VCL_DURATION rampup_duration;
+ VCL_REAL warmup;
+ VCL_INT replicas;
+};
+
+static inline VCL_BACKEND
+sharddir_backend(const struct sharddir *shardd, int id)
+{
+ assert(id >= 0);
+ assert(id < shardd->n_backend);
+ return (shardd->backend[id].backend);
+}
+
+static inline const char *
+sharddir_backend_ident(const struct sharddir *shardd, int host)
+{
+ assert(host >= 0);
+ assert(host < shardd->n_backend);
+ return (shardd->backend[host].ident);
+}
+
+#define SHDBG(flag, shardd, ...) \
+ do { \
+ if ((shardd)->debug_flags & (flag)) \
+ VSL(SLT_Debug, 0, "shard: " __VA_ARGS__); \
+ } while (0)
+
+#define shard_err(ctx, shardd, fmt, ...) \
+ do { \
+ sharddir_err(ctx, SLT_Error, "shard %s: " fmt, \
+ (shardd)->name, __VA_ARGS__); \
+ } while (0)
+
+#define shard_err0(ctx, shardd, s) \
+ do { \
+ sharddir_err(ctx, SLT_Error, "shard %s: %s", \
+ (shardd)->name, (s)); \
+ } while (0)
+
+void sharddir_debug(struct sharddir *shardd, const uint32_t flags);
+void sharddir_err(VRT_CTX, enum VSL_tag_e tag, const char *fmt, ...);
+void sharddir_new(struct sharddir **sharddp, const char *vcl_name);
+void sharddir_delete(struct sharddir **sharddp);
+void sharddir_init_hashcircle(struct sharddir *shardd, VCL_INT replicas);
+void sharddir_rdlock(struct sharddir *shardd);
+void sharddir_wrlock(struct sharddir *shardd);
+void sharddir_unlock(struct sharddir *shardd);
+VCL_BACKEND sharddir_pick_be(VRT_CTX, struct sharddir *, uint32_t, VCL_INT,
+ VCL_REAL, VCL_BOOL, enum healthy_e);
+
+/* in shard_cfg.c */
+void shardcfg_delete(struct sharddir *shardd);
+VCL_DURATION shardcfg_get_rampup(struct sharddir *shardd, int host);
diff --git a/lib/libvmod_directors/shard_hash.c b/lib/libvmod_directors/shard_hash.c
new file mode 100644
index 0000000..9fc207e
--- /dev/null
+++ b/lib/libvmod_directors/shard_hash.c
@@ -0,0 +1,105 @@
+/*-
+ * Copyright 2009-2013 UPLEX - Nils Goroll Systemoptimierung
+ * All rights reserved.
+ *
+ * Authors: Nils Goroll <nils.goroll at uplex.de>
+ * Geoffrey Simmons <geoff.simmons at uplex.de>
+ * Julian Wiesener <jw at uplex.de>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <stdint.h>
+#include <string.h>
+
+#include "cache/cache.h"
+
+#include "vrt.h"
+#include "crc32.h"
+#include "vsha256.h"
+
+#include "shard_parse_vcc_enums.h"
+#include "shard_hash.h"
+
+static uint32_t __match_proto__(hash_func)
+shard_hash_crc32(VCL_STRING s)
+{
+ uint32_t crc;
+ crc = crc32(~0U, (const unsigned char*)s, strlen(s));
+ crc ^= ~0U;
+ crc++;
+
+ return (crc);
+}
+
+static uint32_t __match_proto__(hash_func)
+shard_hash_sha256(VCL_STRING s)
+{
+ struct SHA256Context sha256;
+ union {
+ unsigned char digest[32];
+ uint32_t uint32_digest[8];
+ } sha256_digest;
+
+ SHA256_Init(&sha256);
+ SHA256_Update(&sha256, s, strlen(s));
+ SHA256_Final(sha256_digest.digest, &sha256);
+
+ /*
+ * use low 32 bits only
+ * XXX: Are these the best bits to pick?
+ */
+ return (sha256_digest.uint32_digest[7]);
+}
+
+static uint32_t __match_proto__(hash_func)
+shard_hash_rs(VCL_STRING s)
+{
+ uint32_t res = 0;
+ /* hash function from Robert Sedgwicks 'Algorithms in C' book */
+ const uint32_t b = 378551;
+ uint32_t a = 63689;
+
+ while (*s) {
+ res = res * a + (*s++);
+ a *= b;
+ }
+
+ return (res);
+}
+
+static uint32_t __match_proto__(hash_func)
+_shard_hash_invalid(VCL_STRING s)
+{
+ (void) s;
+ WRONG("invalid hash fp _ALG_E_ENVALID");
+ return (0);
+}
+
+hash_func shard_hash_f[_ALG_E_MAX] = {
+ [_ALG_E_INVALID] = _shard_hash_invalid,
+ [CRC32] = shard_hash_crc32,
+ [SHA256] = shard_hash_sha256,
+ [RS] = shard_hash_rs
+};
diff --git a/lib/libvmod_directors/shard_hash.h b/lib/libvmod_directors/shard_hash.h
new file mode 100644
index 0000000..5ee38ad
--- /dev/null
+++ b/lib/libvmod_directors/shard_hash.h
@@ -0,0 +1,30 @@
+/*-
+ * Copyright 2009-2013 UPLEX - Nils Goroll Systemoptimierung
+ * All rights reserved.
+ *
+ * Author: Julian Wiesener <jw at uplex.de>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+typedef uint32_t (*hash_func)(VCL_STRING);
+extern hash_func shard_hash_f[_ALG_E_MAX];
diff --git a/lib/libvmod_directors/shard_parse_vcc_enums.c b/lib/libvmod_directors/shard_parse_vcc_enums.c
new file mode 100644
index 0000000..5dec48e
--- /dev/null
+++ b/lib/libvmod_directors/shard_parse_vcc_enums.c
@@ -0,0 +1,146 @@
+/*
+ * for the time being, this code is auto-generated outside the varnishd source
+ * tree, see
+ * https://code.uplex.de/uplex-varnish/libvmod-vslp/blob/shard/src/gen_enum_parse.pl
+ *
+ * TODO: integrate in vmodtool.py or replace with something else
+ */
+
+#include "shard_parse_vcc_enums.h"
+#define term(c) ((c) == '\0')
+
+
+
+enum alg_e parse_alg_e (const char *m) {
+ int p;
+ enum alg_e r;
+
+ switch (m[0]) {
+ case 'C': goto _0C; // CRC32
+ case 'R': goto _0R; // RS
+ case 'S': goto _0S; // SHA256
+ default: goto invalid;
+ }
+ _0C:
+ //CRC32
+ if ((m[1] == 'R') && (m[2] == 'C') && (m[3] == '3') && (m[4] == '2') && (term(m[5]))) {
+ r = CRC32;
+ p = 5;
+ goto ok;
+ }
+ goto invalid;
+ _0R:
+ //RS
+ if ((m[1] == 'S') && (term(m[2]))) {
+ r = RS;
+ p = 2;
+ goto ok;
+ }
+ goto invalid;
+ _0S:
+ //SHA256
+ if ((m[1] == 'H') && (m[2] == 'A') && (m[3] == '2') && (m[4] == '5') && (m[5] == '6') && (term(m[6]))) {
+ r = SHA256;
+ p = 6;
+ goto ok;
+ }
+ goto invalid;
+ ok:
+ return r;
+ invalid:
+ return _ALG_E_INVALID;
+ (void)p;
+}
+
+
+enum by_e parse_by_e (const char *m) {
+ int p;
+ enum by_e r;
+
+ switch (m[0]) {
+ case 'B': goto _0B; // BLOB
+ case 'H': goto _0H; // HASH
+ case 'K': goto _0K; // KEY
+ case 'U': goto _0U; // URL
+ default: goto invalid;
+ }
+ _0B:
+ //BLOB
+ if ((m[1] == 'L') && (m[2] == 'O') && (m[3] == 'B') && (term(m[4]))) {
+ r = BLOB;
+ p = 4;
+ goto ok;
+ }
+ goto invalid;
+ _0H:
+ //HASH
+ if ((m[1] == 'A') && (m[2] == 'S') && (m[3] == 'H') && (term(m[4]))) {
+ r = HASH;
+ p = 4;
+ goto ok;
+ }
+ goto invalid;
+ _0K:
+ //KEY
+ if ((m[1] == 'E') && (m[2] == 'Y') && (term(m[3]))) {
+ r = KEY;
+ p = 3;
+ goto ok;
+ }
+ goto invalid;
+ _0U:
+ //URL
+ if ((m[1] == 'R') && (m[2] == 'L') && (term(m[3]))) {
+ r = URL;
+ p = 3;
+ goto ok;
+ }
+ goto invalid;
+ ok:
+ return r;
+ invalid:
+ return _BY_E_INVALID;
+ (void)p;
+}
+
+
+enum healthy_e parse_healthy_e (const char *m) {
+ int p;
+ enum healthy_e r;
+
+ switch (m[0]) {
+ case 'A': goto _0A; // ALL
+ case 'C': goto _0C; // CHOSEN
+ case 'I': goto _0I; // IGNORE
+ default: goto invalid;
+ }
+ _0A:
+ //ALL
+ if ((m[1] == 'L') && (m[2] == 'L') && (term(m[3]))) {
+ r = ALL;
+ p = 3;
+ goto ok;
+ }
+ goto invalid;
+ _0C:
+ //CHOSEN
+ if ((m[1] == 'H') && (m[2] == 'O') && (m[3] == 'S') && (m[4] == 'E') && (m[5] == 'N') && (term(m[6]))) {
+ r = CHOSEN;
+ p = 6;
+ goto ok;
+ }
+ goto invalid;
+ _0I:
+ //IGNORE
+ if ((m[1] == 'G') && (m[2] == 'N') && (m[3] == 'O') && (m[4] == 'R') && (m[5] == 'E') && (term(m[6]))) {
+ r = IGNORE;
+ p = 6;
+ goto ok;
+ }
+ goto invalid;
+ ok:
+ return r;
+ invalid:
+ return _HEALTHY_E_INVALID;
+ (void)p;
+}
diff --git a/lib/libvmod_directors/shard_parse_vcc_enums.h b/lib/libvmod_directors/shard_parse_vcc_enums.h
new file mode 100644
index 0000000..aa097dc
--- /dev/null
+++ b/lib/libvmod_directors/shard_parse_vcc_enums.h
@@ -0,0 +1,42 @@
+/*
+ * for the time being, this code is auto-generated outside the varnishd source
+ * tree, see
+ * https://code.uplex.de/uplex-varnish/libvmod-vslp/blob/shard/src/gen_enum_parse.pl
+ *
+ * TODO: integrate in vmodtool.py or replace with something else
+ */
+
+enum alg_e {
+ _ALG_E_INVALID = 0,
+ CRC32,
+ SHA256,
+ RS,
+ _ALG_E_MAX
+};
+
+
+enum alg_e parse_alg_e (const char *);
+
+enum by_e {
+ _BY_E_INVALID = 0,
+ HASH,
+ URL,
+ KEY,
+ BLOB,
+ _BY_E_MAX
+};
+
+
+enum by_e parse_by_e (const char *);
+
+enum healthy_e {
+ _HEALTHY_E_INVALID = 0,
+ CHOSEN,
+ IGNORE,
+ ALL,
+ _HEALTHY_E_MAX
+};
+
+
+enum healthy_e parse_healthy_e (const char *);
+
diff --git a/lib/libvmod_directors/vmod.vcc b/lib/libvmod_directors/vmod.vcc
index 4adbfba..4348ba1 100644
--- a/lib/libvmod_directors/vmod.vcc
+++ b/lib/libvmod_directors/vmod.vcc
@@ -1,8 +1,15 @@
#-
+# This document is licensed under the same licence as Varnish
+# itself. See LICENCE for details.
+#
# Copyright (c) 2013-2015 Varnish Software AS
+# Copyright 2009-2016 UPLEX - Nils Goroll Systemoptimierung
# All rights reserved.
#
-# Author: Poul-Henning Kamp <phk at FreeBSD.org>
+# Authors: Poul-Henning Kamp <phk at FreeBSD.org>
+# Julian Wiesener <jw at uplex.de>
+# Nils Goroll <slink at uplex.de>
+# Geoffrey Simmons <geoff at uplex.de>
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
@@ -32,9 +39,9 @@ DESCRIPTION
`vmod_directors` enables backend load balancing in Varnish.
-The module implements a set of basic load balancing techniques, and
-also serves as an example on how one could extend the load balancing
-capabilities of Varnish.
+The module implements load balancing techniques, and also serves as an
+example on how one could extend the load balancing capabilities of
+Varnish.
To enable load balancing you must import this vmod (directors).
@@ -213,10 +220,302 @@ Example
# pick a backend based on the cookie header from the client
set req.backend_hint = vdir.backend(req.http.cookie);
-COPYRIGHT
-=========
+$Object shard()
+
+Create a shard director.
+
+Note that the shard director needs to be configured using at least one
+``shard.add_backend()`` call(s) **followed by a**
+``shard.reconfigure()`` **call** before it can hand out backends.
+
+Introduction
+````````````
+
+The shard director selects backends by a key, which can be provided
+directly or derived from strings. For the same key, the shard director
+will always return the same backend, unless the backend configuration
+or health state changes. Conversely, for differing keys, the shard
+director will likely choose different backends. In the default
+configuration, unhealthy backends are not selected.
+
+The shard director resembles the hash director, but its main advantage
+is that, when the backend configuration or health states change, the
+association of keys to backends remains as stable as possible.
+
+In addition, the rampup and warmup features can help to further
+improve user-perceived response times.
+
+Sharding
+````````
+
+This basic technique allows for numerious applications like optimizing
+backend server cache efficiency, Varnish clustering or persisting
+sessions to servers without keeping any state, and, in particular,
+without the need to synchronize state between nodes of a cluster of
+Varnish servers:
+
+* Many applications use caches for data objects, so, in a cluster of
+ application servers, requesting similar objects from the same server
+ may help to optimize efficiency of such caches.
+
+ For example, sharding by URL or some `id` component of the url has
+ been shown to drastically improve the efficiency of many content
+ management systems.
+
+* As special case of the previous example, in clusters of Varnish
+ servers without additional request distribution logic, each cache
+ will need store all hot objects, so the effective cache size is
+ approximately the smallest cache size of any server in the cluster.
+
+ Sharding allows to segregate objects within the cluster such that
+ each object is only cached on one of the servers (or on one primary
+ and one backup, on a primary for long and others for short
+ etc...). Effectively, this will lead to a cache size in the order of
+ the sum of all individual caches, with the potential to drastically
+ increase efficiency (scales by the number of servers).
+
+* Another application is to implement persistence of backend requests,
+ such that all requests sharing a certain criterium (such as an IP
+ address or session ID) get forwarded to the same backend server.
+
+When used with clusters of varnish servers, the shard director will,
+if otherwise configured equally, make the same decision on all
+servers. In other words, requests sharing a common criterium used as
+the shard key will be balanced onto the same backend server(s) no
+matter which Varnish server handles the request.
+
+The drawbacks are:
+
+* the distribution of requests depends on the number of requests per
+ key and the uniformity of the distribution of key values. In short,
+ while this technique may lead to much better efficiency overall, it
+ may also lead to less good load balancing for specific cases.
+
+* When a backend server becomes unavailable, every persistence
+ technique has to reselect a new backend server, but this technique
+ will also switch back to the preferred server once it becomes
+ healthy again, so when used for persistence, it is generally less
+ stable compared to stateful techniques (which would continue to use
+ a selected server for as long as possible (or dictated by a TTL)).
+
+Method
+``````
+
+When ``.reconfigure()`` is called, a consistent hashing circular data
+structure gets built from hash values of "ident%d" (default ident
+being the backend name) for each backend and for a running number from
+1 to n (n is the number of `replicas`). Hashing creates the seemingly
+random order for placement of backends on the consistent hashing ring.
+
+When ``.backend()`` is called, a load balacing key gets generated
+unless provided. The smallest hash value in the circle is looked up
+that is larger than the key (searching clockwise and wrapping around
+as necessary). The backend for this hash value is the preferred
+backend for the given key.
+
+If a healhy backend is requested, the search is continued linearly on
+the ring as long as backends found are unhealthy or all backends have
+been checked. The order auf these "alternative backends" on the ring
+is likely to differ for different keys. Alternative backends can also
+be selected explicitly.
+
+On consistent hashing see:
+
+* http://www8.org/w8-papers/2a-webserver/caching/paper2.html
+* http://www.audioscrobbler.net/development/ketama/
+* svn://svn.audioscrobbler.net/misc/ketama
+* http://en.wikipedia.org/wiki/Consistent_hashing
+
+Error Reporting
+```````````````
+
+Failing methods should report errors to VSL with the Error tag, so
+when configuring the shard director, you are advised to check::
+
+ varnishlog -I Error:^shard
+
+$Method VOID .set_warmup(REAL probability=0.0)
+
+Set the default warmup probability. See the `warmup` parameter of
+``shard.backend()``.
+
+Default: 0.0 (no warmup)
+
+$Method VOID .set_rampup(DURATION duration=0)
+
+Set the default rampup duration. See `rampup` parameter of
+`shard.backend()`.
+
+Default: 0s (no rampup)
+
+$Method BOOL .add_backend(PRIV_TASK, BACKEND backend,
+ STRING ident=0, DURATION rampup=973279260)
+
+Add a backend `backend` to the director.
+
+`ident`: Optionally specify an identification string for this backend,
+which will be hashed by `shard.reconfigure()` to construct the
+consistent hashing ring. The identification string defaults to the
+backend name.
+
+`ident` allows to add multuple instances of the same backend.
+
+`rampup`: Optionally specify a rampup time for this backend. Defaults
+to the director rampup time.
+
+NOTE: Backend changes need to be finalized with `shard.reconfigure()`
+and are only supported on one shard director at a time.
+
+$Method BOOL .remove_backend(PRIV_TASK, BACKEND backend=0, STRING ident=0)
+
+Remove backend(s) from the director. Either `backend` or `ident` must
+be specified. `ident` removes a specific instance. If `backend` is
+given without `ident`, all instances of this backend are removed.
+
+NOTE: Backend changes need to be finalized with `shard.reconfigure()`
+and are only supported on one shard director at a time.
+
+$Method BOOL .clear(PRIV_TASK)
+
+Remove all backends from the director.
+
+NOTE: Backend changes need to be finalized with `shard.reconfigure()`
+and are only supported on one shard director at a time.
+
+$Method BOOL .reconfigure(PRIV_TASK, INT replicas=67,
+ ENUM { CRC32, SHA256, RS } alg="SHA256")
+
+Reconfigure the consistent hashing ring to reflect backend changes.
+
+This method must be called at least once before the director can be
+used.
+
+$Method INT .key(STRING string, ENUM { CRC32, SHA256, RS } alg="SHA256")
+
+Utility method to generate a sharding key for use with the
+``shard.backend()`` method by hashing `string` with hash algorithm
+`alg`.
+
+$Method BACKEND .backend(
+ ENUM {HASH, URL, KEY, BLOB} by="HASH",
+ INT key=0,
+ BLOB key_blob=0,
+ INT alt=0,
+ REAL warmup=-1,
+ BOOL rampup=1,
+ ENUM {CHOSEN, IGNORE, ALL} healthy="CHOSEN")
+
+
+Lookup a backend on the consistent hashing ring.
+
+This documentation uses the notion of an order of backends for a
+particular shard key. This order is deterministic but seemingly random
+as determined by the consistent hashing algorithm and is likely to
+differ for different keys, depending on the number of backends and the
+number of replicas. In particular, the backend order referred to here
+is _not_ the order given when backends are added.
+
+* `by` how to determine the sharding key
+
+ default: `HASH`
+
+ * `HASH`:
+
+ * when called in backend context: Use the varnish hash value as
+ set by `vcl_hash`
+
+ * when called in client content: hash `req.url`
+
+ * `URL`: hash req.url / bereq.url
+
+ * `KEY`: use the `key` argument
+
+ * `BLOB`: use the `key_blob` argument
+
+ * `key` lookup key with `by=KEY`
+
+ the `shard.key()` function may come handy to generate a sharding
+ key from custom strings.
+
+ * `key_blob` lookup key with `by=BLOB`
+
+ Currently, this uses the first 4 bytes from the given blob in
+ network byte order (big endian), left-padded with zeros for blobs
+ smaller than 4 bytes.
+
+* `alt` alternative backend selection
+
+ default: `0`
+
+ Select the `alt`-th alternative backend for the given `key`.
+
+ This is particularly useful for retries / restarts due to backend
+ errors: By setting `alt=req.restarts` or `alt=bereq.retries` with
+ healthy=ALL, another server gets selected.
+
+ The rampup and warmup features are only active for `alt==0`
+
+* `rampup` slow start for servers which just went healthy
+
+ default: `true`
+
+ If `alt==0` and the chosen backend is in its rampup period, with a
+ probability proportional to the fraction of time since the backup
+ became healthy to the rampup period, return the next alternative
+ backend, unless this is also in its rampup period.
+
+ The default rampup interval can be set per shard director using the
+ `set_rampup()` method or specifically per backend with the
+ `set_backend()` method.
+
+* `warmup` probabalistic alternative server selection
+
+ possible values: -1, 0..1
+
+ default: `-1`
+
+ `-1`: use the warmup probability from the director definition
+
+ Only used for `alt==0`: Sets the ratio of requests (0.0 to 1.0) that
+ goes to the next alternate backend to warm it up when the preferred
+ backend is healthy. Not active if any of the preferred or
+ alternative backend are in rampup.
+
+ `warmup=0.5` is a convenient way to spread the load for each key
+ over two backends under normal operating conditions.
+
+* `healthy`
+
+ default: `CHOSEN`
+
+ * CHOSEN: Return a healthy backend if possible.
+
+ For `alt==0`, return the first healthy backend or none.
+
+ For `alt > 0`, ignore the health state of backends skipped for
+ alternative backend selection, then return the next healthy
+ backend. If this does not exist, return the last healthy backend
+ of those skipped or none.
+
+ * IGNORE: Completely ignore backend health state
+
+ Just return the first or `alt`-th alternative backend, ignoring
+ health state. Ignore `rampup` and `warmup`.
+
+ * ALL: Check health state also for alternative backend selection
+
+ For `alt > 0`, return the `alt`-th alternative backend of all
+ those healthy, the last healthy backend found or none.
+
+$Method VOID .debug(INT)
+
+`intentionally undocumented`
+
+ACKNOWLEDGEMENTS
+================
-This document is licensed under the same licence as Varnish
-itself. See LICENCE for details.
+Development of a previous version of the shard director was partly sponsored
+by Deutsche Telekom AG – Products & Innovation.
-* Copyright (c) 2013-2015 Varnish Software AS
+Development of this version of the shard director was partly sponsored
+by BILD GmbH & Co KG.
diff --git a/lib/libvmod_directors/vmod_shard.c b/lib/libvmod_directors/vmod_shard.c
new file mode 100644
index 0000000..7acb59b
--- /dev/null
+++ b/lib/libvmod_directors/vmod_shard.c
@@ -0,0 +1,278 @@
+/*-
+ * Copyright 2009-2016 UPLEX - Nils Goroll Systemoptimierung
+ * All rights reserved.
+ *
+ * Authors: Julian Wiesener <jw at uplex.de>
+ * Nils Goroll <slink at uplex.de>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "cache/cache.h"
+
+#include "vrt.h"
+#include "vend.h"
+
+#include "vcc_if.h"
+#include "shard_dir.h"
+#include "shard_cfg.h"
+#include "shard_hash.h"
+
+struct vmod_directors_shard {
+ unsigned magic;
+#define VMOD_SHARD_SHARD_MAGIC 0x6e63e1bf
+ struct sharddir *shardd;
+};
+
+VCL_VOID __match_proto__(td_directors_shard__init)
+vmod_shard__init(VRT_CTX, struct vmod_directors_shard **vshardp,
+ const char *vcl_name)
+{
+ struct vmod_directors_shard *vshard;
+ VCL_INT t1;
+ uint32_t t2a, t2b;
+
+ /* see vmod_key comment */
+ assert(sizeof(VCL_INT) >= sizeof(uint32_t));
+ t2a = UINT32_MAX;
+ t1 = (VCL_INT)t2a;
+ t2b = (uint32_t)t1;
+ assert(t2a == t2b);
+
+ (void) ctx;
+ AN(vshardp);
+ AZ(*vshardp);
+ ALLOC_OBJ(vshard, VMOD_SHARD_SHARD_MAGIC);
+ AN(vshard);
+
+ *vshardp = vshard;
+ sharddir_new(&vshard->shardd, vcl_name);
+}
+
+VCL_VOID __match_proto__(td_directors_shard__fini)
+vmod_shard__fini(struct vmod_directors_shard **vshardp)
+{
+ struct vmod_directors_shard *vshard = *vshardp;
+
+ *vshardp = NULL;
+ CHECK_OBJ_NOTNULL(vshard, VMOD_SHARD_SHARD_MAGIC);
+ sharddir_delete(&vshard->shardd);
+ FREE_OBJ(vshard);
+}
+
+/*
+ * our key is a uint32_t, but VCL_INT is a (signed) long. We cast back and
+ * forth, asserting in vmod_shard__init() that VCL_INT is a large enough
+ * container
+ */
+VCL_INT __match_proto__(td_directors_shard_key)
+ vmod_shard_key(VRT_CTX, struct vmod_directors_shard *vshard,
+ VCL_STRING s, VCL_ENUM alg_s)
+{
+ enum alg_e alg = parse_alg_e(alg_s);
+ hash_func hash_fp = shard_hash_f[alg];
+
+ (void) ctx;
+ (void) vshard;;
+
+ return (VCL_INT)hash_fp(s ? s : "");
+}
+
+VCL_VOID __match_proto__(td_directors_set_warmup)
+vmod_shard_set_warmup(VRT_CTX, struct vmod_directors_shard *vshard,
+ VCL_REAL probability) {
+ CHECK_OBJ_NOTNULL(vshard, VMOD_SHARD_SHARD_MAGIC);
+ if (probability < 0 || probability >= 1) {
+ shard_err(ctx, vshard->shardd,
+ ".set_warmup(%f) ignored", probability);
+ return;
+ }
+ shardcfg_set_warmup(vshard->shardd, probability);
+}
+
+VCL_VOID __match_proto__(td_directors_set_rampup)
+vmod_shard_set_rampup(VRT_CTX, struct vmod_directors_shard *vshard,
+ VCL_DURATION duration)
+{
+ (void) ctx;
+ CHECK_OBJ_NOTNULL(vshard, VMOD_SHARD_SHARD_MAGIC);
+ shardcfg_set_rampup(vshard->shardd, duration);
+}
+
+VCL_BOOL __match_proto__(td_directors_shard_add_backend)
+vmod_shard_add_backend(VRT_CTX, struct vmod_directors_shard *vshard,
+ struct vmod_priv *priv,
+ VCL_BACKEND be, VCL_STRING ident, VCL_DURATION rampup)
+{
+ CHECK_OBJ_NOTNULL(vshard, VMOD_SHARD_SHARD_MAGIC);
+
+ if (be == NULL) {
+ shard_err0(ctx, vshard->shardd,
+ ".backend_add() NULL backend given");
+ return 0;
+ }
+
+ return shardcfg_add_backend(ctx, priv, vshard->shardd,
+ be, ident, rampup);
+}
+
+VCL_BOOL __match_proto__(td_directors_shard_remove_backend)
+vmod_shard_remove_backend(VRT_CTX, struct vmod_directors_shard *vshard,
+ struct vmod_priv *priv,
+ VCL_BACKEND be, VCL_STRING ident)
+{
+ CHECK_OBJ_NOTNULL(vshard, VMOD_SHARD_SHARD_MAGIC);
+
+ if (be == NULL && ident == NULL) {
+ shard_err0(ctx, vshard->shardd,
+ ".backend_remove() at least one of backend "
+ "and ident must be given");
+ return 0;
+ }
+
+ return shardcfg_remove_backend(ctx, priv, vshard->shardd,
+ be, ident);
+}
+
+VCL_BOOL __match_proto__(td_directors_shard_clear)
+vmod_shard_clear(VRT_CTX, struct vmod_directors_shard *vshard,
+ struct vmod_priv *priv)
+{
+ CHECK_OBJ_NOTNULL(vshard, VMOD_SHARD_SHARD_MAGIC);
+ return shardcfg_clear(ctx, priv, vshard->shardd);
+}
+
+VCL_BOOL __match_proto__(td_directors_shard_reconfigure)
+vmod_shard_reconfigure(VRT_CTX, struct vmod_directors_shard *vshard,
+ struct vmod_priv *priv, VCL_INT replicas, VCL_ENUM alg_s)
+{
+ enum alg_e alg = parse_alg_e(alg_s);
+
+ return shardcfg_reconfigure(ctx, priv, vshard->shardd, replicas, alg);
+}
+
+static inline uint32_t
+get_key(VRT_CTX, enum by_e by, VCL_INT key_int, VCL_BLOB key_blob)
+{
+ struct http *http;
+ uint8_t k[4] = { 0 };
+ uint8_t *b;
+ int i, ki;
+
+ switch (by) {
+ case HASH:
+ if (ctx->bo) {
+ CHECK_OBJ_NOTNULL(ctx->bo, BUSYOBJ_MAGIC);
+ return (vbe32dec(ctx->bo->digest));
+ }
+ /* FALLTHROUGH */
+ case URL:
+ if (ctx->http_req) {
+ AN(http = ctx->http_req);
+ } else {
+ AN(ctx->http_bereq);
+ AN(http = ctx->http_bereq);
+ }
+ return (shard_hash_f[SHA256](http->hd[HTTP_HDR_URL].b));
+ break;
+ case KEY:
+ return ((uint32_t)key_int);
+ break;
+ case BLOB:
+ assert(key_blob);
+ assert(key_blob->len > 0);
+ assert(key_blob->priv != NULL);
+
+ if (key_blob->len >= 4)
+ ki = 0;
+ else
+ ki = 4 - key_blob->len;
+
+ b = key_blob->priv;
+ for (i = 0; ki < 4; i++, ki++)
+ k[ki] = b[i];
+ assert(i <= key_blob->len);
+
+ return (vbe32dec(k));
+ break;
+ default:
+ WRONG("by value");
+ }
+}
+
+VCL_BACKEND __match_proto__(td_directors_shard_backend)
+vmod_shard_backend(VRT_CTX, struct vmod_directors_shard *vshard,
+ VCL_ENUM by_s, VCL_INT key_int, VCL_BLOB key_blob, VCL_INT alt,
+ VCL_REAL warmup, VCL_BOOL rampup, VCL_ENUM healthy_s)
+{
+ enum by_e by = parse_by_e(by_s);
+ enum healthy_e healthy = parse_healthy_e(healthy_s);
+
+ uint32_t key;
+
+ CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
+ CHECK_OBJ_NOTNULL(vshard, VMOD_SHARD_SHARD_MAGIC);
+
+ if (key_int && by != KEY) {
+ shard_err(ctx, vshard->shardd,
+ "by=%s but key argument used", by_s);
+ return NULL;
+ }
+
+ if (key_blob && by != BLOB) {
+ shard_err(ctx, vshard->shardd,
+ "by=%s but key_blob argument used", by_s);
+ return NULL;
+ }
+
+ if (by == BLOB) {
+ if (key_blob == NULL ||
+ key_blob->len <= 0 ||
+ key_blob->priv == NULL) {
+ shard_err0(ctx, vshard->shardd,
+ "by=BLOB but no or empty key_blob "
+ "- using key 0");
+ by = KEY;
+ key_int = 0;
+ }
+ }
+
+ key = get_key(ctx, by, key_int, key_blob);
+
+ return (sharddir_pick_be(ctx, vshard->shardd,
+ key, alt, warmup, rampup, healthy));
+}
+
+VCL_VOID __match_proto__(td_directors_shard_backend)
+vmod_shard_debug(VRT_CTX, struct vmod_directors_shard *vshard,
+ VCL_INT i)
+{
+ CHECK_OBJ_NOTNULL(vshard, VMOD_SHARD_SHARD_MAGIC);
+
+ (void) ctx;
+ sharddir_debug(vshard->shardd, i & UINT32_MAX);
+}
More information about the varnish-commit
mailing list