| | varnish-cache/vmod/vmod_directors_shard_dir.c |
0 |
|
/*- |
1 |
|
* Copyright 2009-2016 UPLEX - Nils Goroll Systemoptimierung |
2 |
|
* All rights reserved. |
3 |
|
* |
4 |
|
* Authors: Nils Goroll <nils.goroll@uplex.de> |
5 |
|
* Geoffrey Simmons <geoff.simmons@uplex.de> |
6 |
|
* Julian Wiesener <jw@uplex.de> |
7 |
|
* |
8 |
|
* SPDX-License-Identifier: BSD-2-Clause |
9 |
|
* |
10 |
|
* Redistribution and use in source and binary forms, with or without |
11 |
|
* modification, are permitted provided that the following conditions |
12 |
|
* are met: |
13 |
|
* 1. Redistributions of source code must retain the above copyright |
14 |
|
* notice, this list of conditions and the following disclaimer. |
15 |
|
* 2. Redistributions in binary form must reproduce the above copyright |
16 |
|
* notice, this list of conditions and the following disclaimer in the |
17 |
|
* documentation and/or other materials provided with the distribution. |
18 |
|
* |
19 |
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
20 |
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
21 |
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
22 |
|
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE |
23 |
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
24 |
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
25 |
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
26 |
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
27 |
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
28 |
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
29 |
|
* SUCH DAMAGE. |
30 |
|
*/ |
31 |
|
|
32 |
|
/*lint -e801 */ |
33 |
|
|
34 |
|
#include "config.h" |
35 |
|
|
36 |
|
#include <stdlib.h> |
37 |
|
#include <stdio.h> |
38 |
|
#include <time.h> |
39 |
|
#include <limits.h> |
40 |
|
|
41 |
|
#include "cache/cache.h" |
42 |
|
|
43 |
|
#include "vbm.h" |
44 |
|
#include "vrnd.h" |
45 |
|
|
46 |
|
#include "vcc_directors_if.h" |
47 |
|
#include "vmod_directors_shard_dir.h" |
48 |
|
|
49 |
|
struct shard_be_info { |
50 |
|
unsigned hostid; |
51 |
|
unsigned healthy; |
52 |
|
double changed; // when |
53 |
|
}; |
54 |
|
|
55 |
|
/* |
56 |
|
* circle walk state for shard_next |
57 |
|
* |
58 |
|
* pick* cut off the search after having seen all possible backends |
59 |
|
*/ |
60 |
|
struct shard_state { |
61 |
|
const struct vrt_ctx *ctx; |
62 |
|
struct sharddir *shardd; |
63 |
|
uint32_t idx; |
64 |
|
|
65 |
|
struct vbitmap *picklist; |
66 |
|
unsigned pickcount; |
67 |
|
|
68 |
|
struct shard_be_info previous; |
69 |
|
struct shard_be_info last; |
70 |
|
}; |
71 |
|
|
72 |
|
void |
73 |
280 |
sharddir_debug(struct sharddir *shardd, const uint32_t flags) |
74 |
|
{ |
75 |
280 |
CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC); |
76 |
280 |
shardd->debug_flags = flags; |
77 |
280 |
} |
78 |
|
|
79 |
|
void |
80 |
840 |
sharddir_log(struct vsl_log *vsl, enum VSL_tag_e tag, const char *fmt, ...) |
81 |
|
{ |
82 |
|
va_list ap; |
83 |
|
|
84 |
840 |
va_start(ap, fmt); |
85 |
840 |
if (vsl != NULL) |
86 |
400 |
VSLbv(vsl, tag, fmt, ap); |
87 |
|
else |
88 |
440 |
VSLv(tag, NO_VXID, fmt, ap); |
89 |
840 |
va_end(ap); |
90 |
840 |
} |
91 |
|
|
92 |
|
static int |
93 |
4160 |
shard_lookup(const struct sharddir *shardd, const uint32_t key) |
94 |
|
{ |
95 |
4160 |
CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC); |
96 |
|
|
97 |
4160 |
const uint32_t n = shardd->n_points; |
98 |
4160 |
uint32_t i, idx = UINT32_MAX, high = n, low = 0; |
99 |
|
|
100 |
4160 |
assert (n < idx); |
101 |
|
|
102 |
4160 |
do { |
103 |
25400 |
i = (high + low) / 2 ; |
104 |
25400 |
if (shardd->hashcircle[i].point == key) |
105 |
40 |
idx = i; |
106 |
25360 |
else if (i == n - 1) |
107 |
720 |
idx = n - 1; |
108 |
24640 |
else if (shardd->hashcircle[i].point < key && |
109 |
13040 |
shardd->hashcircle[i+1].point >= key) |
110 |
2720 |
idx = i + 1; |
111 |
21920 |
else if (shardd->hashcircle[i].point > key) |
112 |
23200 |
if (i == 0) |
113 |
680 |
idx = 0; |
114 |
|
else |
115 |
10920 |
high = i; |
116 |
|
else |
117 |
10320 |
low = i; |
118 |
25400 |
} while (idx == UINT32_MAX); |
119 |
|
|
120 |
4160 |
return (idx); |
121 |
|
} |
122 |
|
|
123 |
|
static int |
124 |
8320 |
shard_next(struct shard_state *state, VCL_INT skip, VCL_BOOL healthy) |
125 |
|
{ |
126 |
8320 |
int c, chosen = -1; |
127 |
|
VCL_BACKEND be; |
128 |
|
vtim_real changed; |
129 |
|
struct shard_be_info *sbe; |
130 |
|
|
131 |
8320 |
AN(state); |
132 |
8320 |
CHECK_OBJ_NOTNULL(state->shardd, SHARDDIR_MAGIC); |
133 |
|
|
134 |
8320 |
if (state->pickcount >= state->shardd->n_backend) |
135 |
120 |
return (-1); |
136 |
|
|
137 |
13120 |
while (state->pickcount < state->shardd->n_backend && skip >= 0) { |
138 |
|
|
139 |
13080 |
c = state->shardd->hashcircle[state->idx].host; |
140 |
|
|
141 |
13080 |
if (!vbit_test(state->picklist, c)) { |
142 |
|
|
143 |
8800 |
vbit_set(state->picklist, c); |
144 |
8800 |
state->pickcount++; |
145 |
|
|
146 |
8800 |
sbe = NULL; |
147 |
8800 |
be = state->shardd->backend[c].backend; |
148 |
8800 |
AN(be); |
149 |
8800 |
if (VRT_Healthy(state->ctx, be, &changed)) { |
150 |
8560 |
if (skip-- == 0) { |
151 |
8160 |
chosen = c; |
152 |
8160 |
sbe = &state->last; |
153 |
8160 |
} else { |
154 |
400 |
sbe = &state->previous; |
155 |
|
} |
156 |
|
|
157 |
8800 |
} else if (!healthy && skip-- == 0) { |
158 |
0 |
chosen = c; |
159 |
0 |
sbe = &state->last; |
160 |
0 |
} |
161 |
8800 |
if (sbe == &state->last && |
162 |
8160 |
state->last.hostid != UINT_MAX) |
163 |
4000 |
memcpy(&state->previous, &state->last, |
164 |
|
sizeof(state->previous)); |
165 |
|
|
166 |
8800 |
if (sbe) { |
167 |
8560 |
sbe->hostid = c; |
168 |
8560 |
sbe->healthy = 1; |
169 |
8560 |
sbe->changed = changed; |
170 |
8560 |
} |
171 |
8800 |
if (chosen != -1) |
172 |
8160 |
break; |
173 |
640 |
} |
174 |
|
|
175 |
4920 |
if (++(state->idx) == state->shardd->n_points) |
176 |
760 |
state->idx = 0; |
177 |
|
} |
178 |
8200 |
return (chosen); |
179 |
8320 |
} |
180 |
|
|
181 |
|
void |
182 |
1000 |
sharddir_new(struct sharddir **sharddp, const char *vcl_name, |
183 |
|
const struct vmod_directors_shard_param *param) |
184 |
|
{ |
185 |
|
struct sharddir *shardd; |
186 |
|
|
187 |
1000 |
AN(vcl_name); |
188 |
1000 |
AN(sharddp); |
189 |
1000 |
AZ(*sharddp); |
190 |
1000 |
ALLOC_OBJ(shardd, SHARDDIR_MAGIC); |
191 |
1000 |
AN(shardd); |
192 |
1000 |
*sharddp = shardd; |
193 |
1000 |
shardd->name = vcl_name; |
194 |
1000 |
shardd->param = param; |
195 |
1000 |
PTOK(pthread_rwlock_init(&shardd->mtx, NULL)); |
196 |
1000 |
} |
197 |
|
|
198 |
|
void |
199 |
120 |
sharddir_set_param(struct sharddir *shardd, |
200 |
|
const struct vmod_directors_shard_param *param) |
201 |
|
{ |
202 |
120 |
CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC); |
203 |
120 |
shardd->param = param; |
204 |
120 |
} |
205 |
|
|
206 |
|
void |
207 |
200 |
sharddir_release(struct sharddir *shardd) |
208 |
|
{ |
209 |
200 |
CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC); |
210 |
200 |
shardcfg_backend_clear(shardd); |
211 |
200 |
} |
212 |
|
|
213 |
|
void |
214 |
200 |
sharddir_delete(struct sharddir **sharddp) |
215 |
|
{ |
216 |
|
struct sharddir *shardd; |
217 |
|
|
218 |
200 |
TAKE_OBJ_NOTNULL(shardd, sharddp, SHARDDIR_MAGIC); |
219 |
200 |
shardcfg_delete(shardd); |
220 |
200 |
PTOK(pthread_rwlock_destroy(&shardd->mtx)); |
221 |
200 |
FREE_OBJ(shardd); |
222 |
200 |
} |
223 |
|
|
224 |
|
void |
225 |
6000 |
sharddir_rdlock(struct sharddir *shardd) |
226 |
|
{ |
227 |
6000 |
CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC); |
228 |
6000 |
PTOK(pthread_rwlock_rdlock(&shardd->mtx)); |
229 |
6000 |
} |
230 |
|
|
231 |
|
void |
232 |
1760 |
sharddir_wrlock(struct sharddir *shardd) |
233 |
|
{ |
234 |
1760 |
CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC); |
235 |
1760 |
PTOK(pthread_rwlock_wrlock(&shardd->mtx)); |
236 |
1760 |
} |
237 |
|
|
238 |
|
void |
239 |
7760 |
sharddir_unlock(struct sharddir *shardd) |
240 |
|
{ |
241 |
7760 |
CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC); |
242 |
7760 |
PTOK(pthread_rwlock_unlock(&shardd->mtx)); |
243 |
7760 |
} |
244 |
|
|
245 |
|
static inline void |
246 |
4160 |
validate_alt(VRT_CTX, const struct sharddir *shardd, VCL_INT *alt) |
247 |
|
{ |
248 |
4160 |
const VCL_INT alt_max = shardd->n_backend - 1; |
249 |
|
|
250 |
4160 |
if (*alt < 0) { |
251 |
0 |
shard_err(ctx->vsl, shardd->name, |
252 |
|
"invalid negative parameter alt=%ld, set to 0", *alt); |
253 |
0 |
*alt = 0; |
254 |
4160 |
} else if (*alt > alt_max) { |
255 |
120 |
shard_err(ctx->vsl, shardd->name, |
256 |
|
"parameter alt=%ld limited to %ld", *alt, alt_max); |
257 |
120 |
*alt = alt_max; |
258 |
120 |
} |
259 |
4160 |
} |
260 |
|
|
261 |
|
static inline void |
262 |
4160 |
init_state(struct shard_state *state, |
263 |
|
VRT_CTX, struct sharddir *shardd, struct vbitmap *picklist) |
264 |
|
{ |
265 |
4160 |
AN(picklist); |
266 |
|
|
267 |
4160 |
state->ctx = ctx; |
268 |
4160 |
state->shardd = shardd; |
269 |
4160 |
state->idx = UINT32_MAX; |
270 |
4160 |
state->picklist = picklist; |
271 |
|
|
272 |
|
/* healthy and changed only defined for valid hostids */ |
273 |
4160 |
state->previous.hostid = UINT_MAX; |
274 |
4160 |
state->last.hostid = UINT_MAX; |
275 |
4160 |
} |
276 |
|
|
277 |
|
/* basically same as vdir_any_healthy |
278 |
|
* - XXX we should embed a vdir |
279 |
|
* - XXX should we return the health state of the actual backend |
280 |
|
* for healthy=IGNORE ? |
281 |
|
*/ |
282 |
|
VCL_BOOL |
283 |
640 |
sharddir_any_healthy(VRT_CTX, struct sharddir *shardd, VCL_TIME *changed) |
284 |
|
{ |
285 |
640 |
unsigned i, retval = 0; |
286 |
|
VCL_BACKEND be; |
287 |
|
vtim_real c; |
288 |
|
|
289 |
640 |
CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC); |
290 |
640 |
sharddir_rdlock(shardd); |
291 |
640 |
if (changed != NULL) |
292 |
280 |
*changed = 0; |
293 |
640 |
for (i = 0; i < shardd->n_backend; i++) { |
294 |
640 |
be = shardd->backend[i].backend; |
295 |
640 |
CHECK_OBJ_NOTNULL(be, DIRECTOR_MAGIC); |
296 |
640 |
retval = VRT_Healthy(ctx, be, &c); |
297 |
640 |
if (changed != NULL && c > *changed) |
298 |
280 |
*changed = c; |
299 |
640 |
if (retval) |
300 |
640 |
break; |
301 |
0 |
} |
302 |
640 |
sharddir_unlock(shardd); |
303 |
640 |
return (retval); |
304 |
|
} |
305 |
|
|
306 |
|
/* |
307 |
|
* core function for the director backend/resolve method |
308 |
|
*/ |
309 |
|
|
310 |
|
static VCL_BACKEND |
311 |
4160 |
sharddir_pick_be_locked(VRT_CTX, const struct sharddir *shardd, uint32_t key, |
312 |
|
VCL_INT alt, VCL_REAL warmup, VCL_BOOL rampup, VCL_ENUM healthy, |
313 |
|
struct shard_state *state) |
314 |
|
{ |
315 |
|
VCL_BACKEND be; |
316 |
|
VCL_DURATION chosen_r, alt_r; |
317 |
|
|
318 |
4160 |
CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC); |
319 |
4160 |
CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC); |
320 |
4160 |
AN(ctx->vsl); |
321 |
4160 |
assert(shardd->n_backend > 0); |
322 |
|
|
323 |
4160 |
assert(shardd->hashcircle); |
324 |
|
|
325 |
4160 |
validate_alt(ctx, shardd, &alt); |
326 |
|
|
327 |
4160 |
state->idx = shard_lookup(shardd, key); |
328 |
4160 |
assert(state->idx < UINT32_MAX); |
329 |
|
|
330 |
4160 |
SHDBG(SHDBG_LOOKUP, shardd, "lookup key %x idx %u host %u", |
331 |
|
key, state->idx, shardd->hashcircle[state->idx].host); |
332 |
|
|
333 |
4160 |
if (alt > 0) { |
334 |
3240 |
if (shard_next(state, alt - 1, |
335 |
2160 |
healthy == VENUM(ALL) ? 1 : 0) == -1) { |
336 |
0 |
if (state->previous.hostid != UINT_MAX) { |
337 |
0 |
be = sharddir_backend(shardd, |
338 |
0 |
state->previous.hostid); |
339 |
0 |
AN(be); |
340 |
0 |
return (be); |
341 |
|
} |
342 |
0 |
return (NULL); |
343 |
|
} |
344 |
1080 |
} |
345 |
|
|
346 |
4160 |
if (shard_next(state, 0, healthy == VENUM(IGNORE) ? 0 : 1) == -1) { |
347 |
0 |
if (state->previous.hostid != UINT_MAX) { |
348 |
0 |
be = sharddir_backend(shardd, state->previous.hostid); |
349 |
0 |
AN(be); |
350 |
0 |
return (be); |
351 |
|
} |
352 |
0 |
return (NULL); |
353 |
|
} |
354 |
|
|
355 |
4160 |
be = sharddir_backend(shardd, state->last.hostid); |
356 |
4160 |
AN(be); |
357 |
|
|
358 |
4160 |
if (warmup == -1) |
359 |
4120 |
warmup = shardd->warmup; |
360 |
|
|
361 |
|
/* short path for cases we dont want ramup/warmup or can't */ |
362 |
4160 |
if (alt > 0 || healthy == VENUM(IGNORE) || (!rampup && warmup == 0) || |
363 |
3080 |
shard_next(state, 0, 1) == -1) |
364 |
1240 |
return (be); |
365 |
|
|
366 |
2920 |
assert(alt == 0); |
367 |
2920 |
assert(state->previous.hostid != UINT_MAX); |
368 |
2920 |
assert(state->last.hostid != UINT_MAX); |
369 |
2920 |
assert(state->previous.hostid != state->last.hostid); |
370 |
2920 |
assert(be == sharddir_backend(shardd, state->previous.hostid)); |
371 |
|
|
372 |
2920 |
chosen_r = shardcfg_get_rampup(shardd, state->previous.hostid); |
373 |
2920 |
alt_r = shardcfg_get_rampup(shardd, state->last.hostid); |
374 |
|
|
375 |
2920 |
SHDBG(SHDBG_RAMPWARM, shardd, "chosen host %u rampup %f changed %f", |
376 |
|
state->previous.hostid, chosen_r, |
377 |
|
ctx->now - state->previous.changed); |
378 |
2920 |
SHDBG(SHDBG_RAMPWARM, shardd, "alt host %u rampup %f changed %f", |
379 |
|
state->last.hostid, alt_r, |
380 |
|
ctx->now - state->last.changed); |
381 |
|
|
382 |
2920 |
if (ctx->now - state->previous.changed < chosen_r) { |
383 |
|
/* |
384 |
|
* chosen host is in rampup |
385 |
|
* - no change if alternative host is also in rampup or the dice |
386 |
|
* has rolled in favour of the chosen host |
387 |
|
*/ |
388 |
120 |
if (!rampup || |
389 |
80 |
ctx->now - state->last.changed < alt_r || |
390 |
80 |
VRND_RandomTestableDouble() * chosen_r < |
391 |
40 |
(ctx->now - state->previous.changed)) |
392 |
40 |
return (be); |
393 |
40 |
} else { |
394 |
|
/* chosen host not in rampup - warmup ? */ |
395 |
2840 |
if (warmup == 0 || VRND_RandomTestableDouble() > warmup) |
396 |
2840 |
return (be); |
397 |
|
} |
398 |
|
|
399 |
40 |
be = sharddir_backend(shardd, state->last.hostid); |
400 |
40 |
return (be); |
401 |
4160 |
} |
402 |
|
|
403 |
|
VCL_BACKEND |
404 |
4160 |
sharddir_pick_be(VRT_CTX, struct sharddir *shardd, uint32_t key, VCL_INT alt, |
405 |
|
VCL_REAL warmup, VCL_BOOL rampup, VCL_ENUM healthy) |
406 |
|
{ |
407 |
|
VCL_BACKEND be; |
408 |
|
struct shard_state state[1]; |
409 |
|
unsigned picklist_sz; |
410 |
|
|
411 |
4160 |
CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC); |
412 |
4160 |
CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC); |
413 |
|
|
414 |
4160 |
sharddir_rdlock(shardd); |
415 |
|
|
416 |
4160 |
if (shardd->n_backend == 0) { |
417 |
0 |
shard_err0(ctx->vsl, shardd->name, "no backends"); |
418 |
0 |
sharddir_unlock(shardd); |
419 |
0 |
return (NULL); |
420 |
|
} |
421 |
|
|
422 |
4160 |
picklist_sz = VBITMAP_SZ(shardd->n_backend); |
423 |
4160 |
char picklist_spc[picklist_sz]; |
424 |
|
|
425 |
4160 |
memset(state, 0, sizeof(state)); |
426 |
4160 |
init_state(state, ctx, shardd, vbit_init(picklist_spc, picklist_sz)); |
427 |
|
|
428 |
8320 |
be = sharddir_pick_be_locked(ctx, shardd, key, alt, warmup, rampup, |
429 |
4160 |
healthy, state); |
430 |
4160 |
sharddir_unlock(shardd); |
431 |
|
|
432 |
4160 |
vbit_destroy(state->picklist); |
433 |
4160 |
return (be); |
434 |
4160 |
} |