| | varnish-cache/vmod/vmod_directors_shard_dir.c |
0 |
|
/*- |
1 |
|
* Copyright 2009-2016 UPLEX - Nils Goroll Systemoptimierung |
2 |
|
* All rights reserved. |
3 |
|
* |
4 |
|
* Authors: Nils Goroll <nils.goroll@uplex.de> |
5 |
|
* Geoffrey Simmons <geoff.simmons@uplex.de> |
6 |
|
* Julian Wiesener <jw@uplex.de> |
7 |
|
* |
8 |
|
* SPDX-License-Identifier: BSD-2-Clause |
9 |
|
* |
10 |
|
* Redistribution and use in source and binary forms, with or without |
11 |
|
* modification, are permitted provided that the following conditions |
12 |
|
* are met: |
13 |
|
* 1. Redistributions of source code must retain the above copyright |
14 |
|
* notice, this list of conditions and the following disclaimer. |
15 |
|
* 2. Redistributions in binary form must reproduce the above copyright |
16 |
|
* notice, this list of conditions and the following disclaimer in the |
17 |
|
* documentation and/or other materials provided with the distribution. |
18 |
|
* |
19 |
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
20 |
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
21 |
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
22 |
|
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE |
23 |
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
24 |
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
25 |
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
26 |
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
27 |
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
28 |
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
29 |
|
* SUCH DAMAGE. |
30 |
|
*/ |
31 |
|
|
32 |
|
/*lint -e801 */ |
33 |
|
|
34 |
|
#include "config.h" |
35 |
|
|
36 |
|
#include <stdlib.h> |
37 |
|
#include <stdio.h> |
38 |
|
#include <time.h> |
39 |
|
#include <limits.h> |
40 |
|
|
41 |
|
#include "cache/cache.h" |
42 |
|
|
43 |
|
#include "vbm.h" |
44 |
|
#include "vrnd.h" |
45 |
|
|
46 |
|
#include "vcc_directors_if.h" |
47 |
|
#include "vmod_directors_shard_dir.h" |
48 |
|
|
49 |
|
struct shard_be_info { |
50 |
|
unsigned hostid; |
51 |
|
unsigned healthy; |
52 |
|
double changed; // when |
53 |
|
}; |
54 |
|
|
55 |
|
/* |
56 |
|
* circle walk state for shard_next |
57 |
|
* |
58 |
|
* pick* cut off the search after having seen all possible backends |
59 |
|
*/ |
60 |
|
struct shard_state { |
61 |
|
const struct vrt_ctx *ctx; |
62 |
|
struct sharddir *shardd; |
63 |
|
uint32_t idx; |
64 |
|
|
65 |
|
struct vbitmap *picklist; |
66 |
|
unsigned pickcount; |
67 |
|
|
68 |
|
struct shard_be_info previous; |
69 |
|
struct shard_be_info last; |
70 |
|
}; |
71 |
|
|
72 |
|
void |
73 |
280 |
sharddir_debug(struct sharddir *shardd, const uint32_t flags) |
74 |
|
{ |
75 |
280 |
CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC); |
76 |
280 |
shardd->debug_flags = flags; |
77 |
280 |
} |
78 |
|
|
79 |
|
void |
80 |
840 |
sharddir_log(struct vsl_log *vsl, enum VSL_tag_e tag, const char *fmt, ...) |
81 |
|
{ |
82 |
|
va_list ap; |
83 |
|
|
84 |
840 |
va_start(ap, fmt); |
85 |
840 |
if (vsl != NULL) |
86 |
400 |
VSLbv(vsl, tag, fmt, ap); |
87 |
|
else |
88 |
440 |
VSLv(tag, NO_VXID, fmt, ap); |
89 |
840 |
va_end(ap); |
90 |
840 |
} |
91 |
|
|
92 |
|
static int |
93 |
4160 |
shard_lookup(const struct sharddir *shardd, const uint32_t key) |
94 |
|
{ |
95 |
4160 |
CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC); |
96 |
|
|
97 |
4160 |
const uint32_t n = shardd->n_points; |
98 |
4160 |
uint32_t i, idx = UINT32_MAX, high = n, low = 0; |
99 |
|
|
100 |
4160 |
assert (n < idx); |
101 |
|
|
102 |
4160 |
do { |
103 |
25400 |
i = (high + low) / 2 ; |
104 |
25400 |
if (shardd->hashcircle[i].point == key) |
105 |
40 |
idx = i; |
106 |
25360 |
else if (i == n - 1) |
107 |
720 |
idx = n - 1; |
108 |
24640 |
else if (shardd->hashcircle[i].point < key && |
109 |
13040 |
shardd->hashcircle[i+1].point >= key) |
110 |
2720 |
idx = i + 1; |
111 |
21920 |
else if (shardd->hashcircle[i].point > key) |
112 |
23200 |
if (i == 0) |
113 |
680 |
idx = 0; |
114 |
|
else |
115 |
10920 |
high = i; |
116 |
|
else |
117 |
10320 |
low = i; |
118 |
25400 |
} while (idx == UINT32_MAX); |
119 |
|
|
120 |
4160 |
return (idx); |
121 |
|
} |
122 |
|
|
123 |
|
static int |
124 |
8320 |
shard_next(struct shard_state *state, VCL_INT skip, VCL_BOOL healthy) |
125 |
|
{ |
126 |
8320 |
int c, chosen = -1; |
127 |
|
VCL_BACKEND be; |
128 |
|
vtim_real changed; |
129 |
|
struct shard_be_info *sbe; |
130 |
|
|
131 |
8320 |
AN(state); |
132 |
8320 |
CHECK_OBJ_NOTNULL(state->shardd, SHARDDIR_MAGIC); |
133 |
|
|
134 |
8320 |
if (state->pickcount >= state->shardd->n_backend) |
135 |
120 |
return (-1); |
136 |
|
|
137 |
13120 |
while (state->pickcount < state->shardd->n_backend && skip >= 0) { |
138 |
|
|
139 |
13080 |
c = state->shardd->hashcircle[state->idx].host; |
140 |
|
|
141 |
13080 |
if (!vbit_test(state->picklist, c)) { |
142 |
|
|
143 |
8800 |
vbit_set(state->picklist, c); |
144 |
8800 |
state->pickcount++; |
145 |
|
|
146 |
8800 |
sbe = NULL; |
147 |
8800 |
be = state->shardd->backend[c].backend; |
148 |
8800 |
AN(be); |
149 |
8800 |
if (VRT_Healthy(state->ctx, be, &changed)) { |
150 |
8560 |
if (skip-- == 0) { |
151 |
8160 |
chosen = c; |
152 |
8160 |
sbe = &state->last; |
153 |
8160 |
} else { |
154 |
400 |
sbe = &state->previous; |
155 |
|
} |
156 |
|
|
157 |
8800 |
} else if (!healthy && skip-- == 0) { |
158 |
0 |
chosen = c; |
159 |
0 |
sbe = &state->last; |
160 |
0 |
} |
161 |
8800 |
if (sbe == &state->last && |
162 |
8160 |
state->last.hostid != UINT_MAX) |
163 |
4000 |
memcpy(&state->previous, &state->last, |
164 |
|
sizeof(state->previous)); |
165 |
|
|
166 |
8800 |
if (sbe) { |
167 |
8560 |
sbe->hostid = c; |
168 |
8560 |
sbe->healthy = 1; |
169 |
8560 |
sbe->changed = changed; |
170 |
8560 |
} |
171 |
8800 |
if (chosen != -1) |
172 |
8160 |
break; |
173 |
640 |
} |
174 |
|
|
175 |
4920 |
if (++(state->idx) == state->shardd->n_points) |
176 |
760 |
state->idx = 0; |
177 |
|
} |
178 |
8200 |
return (chosen); |
179 |
8320 |
} |
180 |
|
|
181 |
|
void |
182 |
1000 |
sharddir_new(struct sharddir **sharddp, const char *vcl_name, |
183 |
|
const struct vmod_directors_shard_param *param) |
184 |
|
{ |
185 |
|
struct sharddir *shardd; |
186 |
|
|
187 |
1000 |
AN(vcl_name); |
188 |
1000 |
AN(sharddp); |
189 |
1000 |
AZ(*sharddp); |
190 |
1000 |
ALLOC_OBJ(shardd, SHARDDIR_MAGIC); |
191 |
1000 |
AN(shardd); |
192 |
1000 |
*sharddp = shardd; |
193 |
1000 |
shardd->name = vcl_name; |
194 |
1000 |
shardd->param = param; |
195 |
1000 |
AZ(pthread_rwlock_init(&shardd->mtx, NULL)); |
196 |
1000 |
} |
197 |
|
|
198 |
|
void |
199 |
120 |
sharddir_set_param(struct sharddir *shardd, |
200 |
|
const struct vmod_directors_shard_param *param) |
201 |
|
{ |
202 |
120 |
CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC); |
203 |
120 |
shardd->param = param; |
204 |
120 |
} |
205 |
|
|
206 |
|
void |
207 |
200 |
sharddir_delete(struct sharddir **sharddp) |
208 |
|
{ |
209 |
|
struct sharddir *shardd; |
210 |
|
|
211 |
200 |
TAKE_OBJ_NOTNULL(shardd, sharddp, SHARDDIR_MAGIC); |
212 |
200 |
shardcfg_delete(shardd); |
213 |
200 |
AZ(pthread_rwlock_destroy(&shardd->mtx)); |
214 |
200 |
FREE_OBJ(shardd); |
215 |
200 |
} |
216 |
|
|
217 |
|
void |
218 |
6000 |
sharddir_rdlock(struct sharddir *shardd) |
219 |
|
{ |
220 |
6000 |
CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC); |
221 |
6000 |
AZ(pthread_rwlock_rdlock(&shardd->mtx)); |
222 |
6000 |
} |
223 |
|
|
224 |
|
void |
225 |
1760 |
sharddir_wrlock(struct sharddir *shardd) |
226 |
|
{ |
227 |
1760 |
CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC); |
228 |
1760 |
AZ(pthread_rwlock_wrlock(&shardd->mtx)); |
229 |
1760 |
} |
230 |
|
|
231 |
|
void |
232 |
7760 |
sharddir_unlock(struct sharddir *shardd) |
233 |
|
{ |
234 |
7760 |
CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC); |
235 |
7760 |
AZ(pthread_rwlock_unlock(&shardd->mtx)); |
236 |
7760 |
} |
237 |
|
|
238 |
|
static inline void |
239 |
4160 |
validate_alt(VRT_CTX, const struct sharddir *shardd, VCL_INT *alt) |
240 |
|
{ |
241 |
4160 |
const VCL_INT alt_max = shardd->n_backend - 1; |
242 |
|
|
243 |
4160 |
if (*alt < 0) { |
244 |
0 |
shard_err(ctx->vsl, shardd->name, |
245 |
|
"invalid negative parameter alt=%ld, set to 0", *alt); |
246 |
0 |
*alt = 0; |
247 |
4160 |
} else if (*alt > alt_max) { |
248 |
120 |
shard_err(ctx->vsl, shardd->name, |
249 |
|
"parameter alt=%ld limited to %ld", *alt, alt_max); |
250 |
120 |
*alt = alt_max; |
251 |
120 |
} |
252 |
4160 |
} |
253 |
|
|
254 |
|
static inline void |
255 |
4160 |
init_state(struct shard_state *state, |
256 |
|
VRT_CTX, struct sharddir *shardd, struct vbitmap *picklist) |
257 |
|
{ |
258 |
4160 |
AN(picklist); |
259 |
|
|
260 |
4160 |
state->ctx = ctx; |
261 |
4160 |
state->shardd = shardd; |
262 |
4160 |
state->idx = UINT32_MAX; |
263 |
4160 |
state->picklist = picklist; |
264 |
|
|
265 |
|
/* healhy and changed only defined for valid hostids */ |
266 |
4160 |
state->previous.hostid = UINT_MAX; |
267 |
4160 |
state->last.hostid = UINT_MAX; |
268 |
4160 |
} |
269 |
|
|
270 |
|
/* basically same as vdir_any_healthy |
271 |
|
* - XXX we should embed a vdir |
272 |
|
* - XXX should we return the health state of the actual backend |
273 |
|
* for healthy=IGNORE ? |
274 |
|
*/ |
275 |
|
VCL_BOOL |
276 |
640 |
sharddir_any_healthy(VRT_CTX, struct sharddir *shardd, VCL_TIME *changed) |
277 |
|
{ |
278 |
640 |
unsigned i, retval = 0; |
279 |
|
VCL_BACKEND be; |
280 |
|
vtim_real c; |
281 |
|
|
282 |
640 |
CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC); |
283 |
640 |
sharddir_rdlock(shardd); |
284 |
640 |
if (changed != NULL) |
285 |
280 |
*changed = 0; |
286 |
640 |
for (i = 0; i < shardd->n_backend; i++) { |
287 |
640 |
be = shardd->backend[i].backend; |
288 |
640 |
CHECK_OBJ_NOTNULL(be, DIRECTOR_MAGIC); |
289 |
640 |
retval = VRT_Healthy(ctx, be, &c); |
290 |
640 |
if (changed != NULL && c > *changed) |
291 |
280 |
*changed = c; |
292 |
640 |
if (retval) |
293 |
640 |
break; |
294 |
0 |
} |
295 |
640 |
sharddir_unlock(shardd); |
296 |
640 |
return (retval); |
297 |
|
} |
298 |
|
|
299 |
|
/* |
300 |
|
* core function for the director backend/resolve method |
301 |
|
*/ |
302 |
|
|
303 |
|
static VCL_BACKEND |
304 |
4160 |
sharddir_pick_be_locked(VRT_CTX, const struct sharddir *shardd, uint32_t key, |
305 |
|
VCL_INT alt, VCL_REAL warmup, VCL_BOOL rampup, VCL_ENUM healthy, |
306 |
|
struct shard_state *state) |
307 |
|
{ |
308 |
|
VCL_BACKEND be; |
309 |
|
VCL_DURATION chosen_r, alt_r; |
310 |
|
|
311 |
4160 |
CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC); |
312 |
4160 |
CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC); |
313 |
4160 |
AN(ctx->vsl); |
314 |
4160 |
assert(shardd->n_backend > 0); |
315 |
|
|
316 |
4160 |
assert(shardd->hashcircle); |
317 |
|
|
318 |
4160 |
validate_alt(ctx, shardd, &alt); |
319 |
|
|
320 |
4160 |
state->idx = shard_lookup(shardd, key); |
321 |
4160 |
assert(state->idx < UINT32_MAX); |
322 |
|
|
323 |
4160 |
SHDBG(SHDBG_LOOKUP, shardd, "lookup key %x idx %u host %u", |
324 |
|
key, state->idx, shardd->hashcircle[state->idx].host); |
325 |
|
|
326 |
4160 |
if (alt > 0) { |
327 |
3240 |
if (shard_next(state, alt - 1, |
328 |
2160 |
healthy == VENUM(ALL) ? 1 : 0) == -1) { |
329 |
0 |
if (state->previous.hostid != UINT_MAX) { |
330 |
0 |
be = sharddir_backend(shardd, |
331 |
0 |
state->previous.hostid); |
332 |
0 |
AN(be); |
333 |
0 |
return (be); |
334 |
|
} |
335 |
0 |
return (NULL); |
336 |
|
} |
337 |
1080 |
} |
338 |
|
|
339 |
4160 |
if (shard_next(state, 0, healthy == VENUM(IGNORE) ? 0 : 1) == -1) { |
340 |
0 |
if (state->previous.hostid != UINT_MAX) { |
341 |
0 |
be = sharddir_backend(shardd, state->previous.hostid); |
342 |
0 |
AN(be); |
343 |
0 |
return (be); |
344 |
|
} |
345 |
0 |
return (NULL); |
346 |
|
} |
347 |
|
|
348 |
4160 |
be = sharddir_backend(shardd, state->last.hostid); |
349 |
4160 |
AN(be); |
350 |
|
|
351 |
4160 |
if (warmup == -1) |
352 |
4120 |
warmup = shardd->warmup; |
353 |
|
|
354 |
|
/* short path for cases we dont want ramup/warmup or can't */ |
355 |
4160 |
if (alt > 0 || healthy == VENUM(IGNORE) || (!rampup && warmup == 0) || |
356 |
3080 |
shard_next(state, 0, 1) == -1) |
357 |
1240 |
return (be); |
358 |
|
|
359 |
2920 |
assert(alt == 0); |
360 |
2920 |
assert(state->previous.hostid != UINT_MAX); |
361 |
2920 |
assert(state->last.hostid != UINT_MAX); |
362 |
2920 |
assert(state->previous.hostid != state->last.hostid); |
363 |
2920 |
assert(be == sharddir_backend(shardd, state->previous.hostid)); |
364 |
|
|
365 |
2920 |
chosen_r = shardcfg_get_rampup(shardd, state->previous.hostid); |
366 |
2920 |
alt_r = shardcfg_get_rampup(shardd, state->last.hostid); |
367 |
|
|
368 |
2920 |
SHDBG(SHDBG_RAMPWARM, shardd, "chosen host %u rampup %f changed %f", |
369 |
|
state->previous.hostid, chosen_r, |
370 |
|
ctx->now - state->previous.changed); |
371 |
2920 |
SHDBG(SHDBG_RAMPWARM, shardd, "alt host %u rampup %f changed %f", |
372 |
|
state->last.hostid, alt_r, |
373 |
|
ctx->now - state->last.changed); |
374 |
|
|
375 |
2920 |
if (ctx->now - state->previous.changed < chosen_r) { |
376 |
|
/* |
377 |
|
* chosen host is in rampup |
378 |
|
* - no change if alternative host is also in rampup or the dice |
379 |
|
* has rolled in favour of the chosen host |
380 |
|
*/ |
381 |
120 |
if (!rampup || |
382 |
80 |
ctx->now - state->last.changed < alt_r || |
383 |
80 |
VRND_RandomTestableDouble() * chosen_r < |
384 |
40 |
(ctx->now - state->previous.changed)) |
385 |
40 |
return (be); |
386 |
40 |
} else { |
387 |
|
/* chosen host not in rampup - warmup ? */ |
388 |
2840 |
if (warmup == 0 || VRND_RandomTestableDouble() > warmup) |
389 |
2840 |
return (be); |
390 |
|
} |
391 |
|
|
392 |
40 |
be = sharddir_backend(shardd, state->last.hostid); |
393 |
40 |
return (be); |
394 |
4160 |
} |
395 |
|
|
396 |
|
VCL_BACKEND |
397 |
4160 |
sharddir_pick_be(VRT_CTX, struct sharddir *shardd, uint32_t key, VCL_INT alt, |
398 |
|
VCL_REAL warmup, VCL_BOOL rampup, VCL_ENUM healthy) |
399 |
|
{ |
400 |
|
VCL_BACKEND be; |
401 |
|
struct shard_state state[1]; |
402 |
|
unsigned picklist_sz; |
403 |
|
|
404 |
4160 |
CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC); |
405 |
4160 |
CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC); |
406 |
|
|
407 |
4160 |
sharddir_rdlock(shardd); |
408 |
|
|
409 |
4160 |
if (shardd->n_backend == 0) { |
410 |
0 |
shard_err0(ctx->vsl, shardd->name, "no backends"); |
411 |
0 |
sharddir_unlock(shardd); |
412 |
0 |
return (NULL); |
413 |
|
} |
414 |
|
|
415 |
4160 |
picklist_sz = VBITMAP_SZ(shardd->n_backend); |
416 |
4160 |
char picklist_spc[picklist_sz]; |
417 |
|
|
418 |
4160 |
memset(state, 0, sizeof(state)); |
419 |
4160 |
init_state(state, ctx, shardd, vbit_init(picklist_spc, picklist_sz)); |
420 |
|
|
421 |
8320 |
be = sharddir_pick_be_locked(ctx, shardd, key, alt, warmup, rampup, |
422 |
4160 |
healthy, state); |
423 |
4160 |
sharddir_unlock(shardd); |
424 |
|
|
425 |
4160 |
vbit_destroy(state->picklist); |
426 |
4160 |
return (be); |
427 |
4160 |
} |