varnish-cache/vmod/vmod_directors_shard_dir.c
0
/*-
1
 * Copyright 2009-2016 UPLEX - Nils Goroll Systemoptimierung
2
 * All rights reserved.
3
 *
4
 * Authors: Nils Goroll <nils.goroll@uplex.de>
5
 *          Geoffrey Simmons <geoff.simmons@uplex.de>
6
 *          Julian Wiesener <jw@uplex.de>
7
 *
8
 * SPDX-License-Identifier: BSD-2-Clause
9
 *
10
 * Redistribution and use in source and binary forms, with or without
11
 * modification, are permitted provided that the following conditions
12
 * are met:
13
 * 1. Redistributions of source code must retain the above copyright
14
 *    notice, this list of conditions and the following disclaimer.
15
 * 2. Redistributions in binary form must reproduce the above copyright
16
 *    notice, this list of conditions and the following disclaimer in the
17
 *    documentation and/or other materials provided with the distribution.
18
 *
19
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
23
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29
 * SUCH DAMAGE.
30
 */
31
32
/*lint -e801 */
33
34
#include "config.h"
35
36
#include <stdlib.h>
37
#include <stdio.h>
38
#include <time.h>
39
#include <limits.h>
40
41
#include "cache/cache.h"
42
43
#include "vbm.h"
44
#include "vrnd.h"
45
46
#include "vcc_directors_if.h"
47
#include "vmod_directors_shard_dir.h"
48
49
struct shard_be_info {
50
        unsigned        hostid;
51
        unsigned        healthy;
52
        double          changed;        // when
53
};
54
55
/*
56
 * circle walk state for shard_next
57
 *
58
 * pick* cut off the search after having seen all possible backends
59
 */
60
struct shard_state {
61
        const struct vrt_ctx    *ctx;
62
        struct sharddir *shardd;
63
        uint32_t                idx;
64
65
        struct vbitmap          *picklist;
66
        unsigned                pickcount;
67
68
        struct shard_be_info    previous;
69
        struct shard_be_info    last;
70
};
71
72
void
73 280
sharddir_debug(struct sharddir *shardd, const uint32_t flags)
74
{
75 280
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
76 280
        shardd->debug_flags = flags;
77 280
}
78
79
void
80 840
sharddir_log(struct vsl_log *vsl, enum VSL_tag_e tag,  const char *fmt, ...)
81
{
82
        va_list ap;
83
84 840
        va_start(ap, fmt);
85 840
        if (vsl != NULL)
86 400
                VSLbv(vsl, tag, fmt, ap);
87
        else
88 440
                VSLv(tag, NO_VXID, fmt, ap);
89 840
        va_end(ap);
90 840
}
91
92
static int
93 4160
shard_lookup(const struct sharddir *shardd, const uint32_t key)
94
{
95 4160
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
96
97 4160
        const uint32_t n = shardd->n_points;
98 4160
        uint32_t i, idx = UINT32_MAX, high = n, low = 0;
99
100 4160
        assert (n < idx);
101
102 4160
        do {
103 25400
            i = (high + low) / 2 ;
104 25400
            if (shardd->hashcircle[i].point == key)
105 40
                idx = i;
106 25360
            else if (i == n - 1)
107 720
                idx = n - 1;
108 24640
            else if (shardd->hashcircle[i].point < key &&
109 13040
                     shardd->hashcircle[i+1].point >= key)
110 2720
                idx = i + 1;
111 21920
            else if (shardd->hashcircle[i].point > key)
112 23200
                if (i == 0)
113 680
                    idx = 0;
114
                else
115 10920
                    high = i;
116
            else
117 10320
                low = i;
118 25400
        } while (idx == UINT32_MAX);
119
120 4160
        return (idx);
121
}
122
123
static int
124 8320
shard_next(struct shard_state *state, VCL_INT skip, VCL_BOOL healthy)
125
{
126 8320
        int c, chosen = -1;
127
        VCL_BACKEND be;
128
        vtim_real changed;
129
        struct shard_be_info *sbe;
130
131 8320
        AN(state);
132 8320
        CHECK_OBJ_NOTNULL(state->shardd, SHARDDIR_MAGIC);
133
134 8320
        if (state->pickcount >= state->shardd->n_backend)
135 120
                return (-1);
136
137 13120
        while (state->pickcount < state->shardd->n_backend && skip >= 0) {
138
139 13080
                c = state->shardd->hashcircle[state->idx].host;
140
141 13080
                if (!vbit_test(state->picklist, c)) {
142
143 8800
                        vbit_set(state->picklist, c);
144 8800
                        state->pickcount++;
145
146 8800
                        sbe = NULL;
147 8800
                        be = state->shardd->backend[c].backend;
148 8800
                        AN(be);
149 8800
                        if (VRT_Healthy(state->ctx, be, &changed)) {
150 8560
                                if (skip-- == 0) {
151 8160
                                        chosen = c;
152 8160
                                        sbe = &state->last;
153 8160
                                } else {
154 400
                                        sbe = &state->previous;
155
                                }
156
157 8800
                        } else if (!healthy && skip-- == 0) {
158 0
                                chosen = c;
159 0
                                sbe = &state->last;
160 0
                        }
161 8800
                        if (sbe == &state->last &&
162 8160
                            state->last.hostid != UINT_MAX)
163 4000
                                memcpy(&state->previous, &state->last,
164
                                    sizeof(state->previous));
165
166 8800
                        if (sbe) {
167 8560
                                sbe->hostid = c;
168 8560
                                sbe->healthy = 1;
169 8560
                                sbe->changed = changed;
170 8560
                        }
171 8800
                        if (chosen != -1)
172 8160
                                break;
173 640
                }
174
175 4920
                if (++(state->idx) == state->shardd->n_points)
176 760
                        state->idx = 0;
177
        }
178 8200
        return (chosen);
179 8320
}
180
181
void
182 1000
sharddir_new(struct sharddir **sharddp, const char *vcl_name,
183
    const struct vmod_directors_shard_param *param)
184
{
185
        struct sharddir *shardd;
186
187 1000
        AN(vcl_name);
188 1000
        AN(sharddp);
189 1000
        AZ(*sharddp);
190 1000
        ALLOC_OBJ(shardd, SHARDDIR_MAGIC);
191 1000
        AN(shardd);
192 1000
        *sharddp = shardd;
193 1000
        shardd->name = vcl_name;
194 1000
        shardd->param = param;
195 1000
        PTOK(pthread_rwlock_init(&shardd->mtx, NULL));
196 1000
}
197
198
void
199 120
sharddir_set_param(struct sharddir *shardd,
200
    const struct vmod_directors_shard_param *param)
201
{
202 120
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
203 120
        shardd->param = param;
204 120
}
205
206
void
207 200
sharddir_release(struct sharddir *shardd)
208
{
209 200
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
210 200
        shardcfg_backend_clear(shardd);
211 200
}
212
213
void
214 200
sharddir_delete(struct sharddir **sharddp)
215
{
216
        struct sharddir *shardd;
217
218 200
        TAKE_OBJ_NOTNULL(shardd, sharddp, SHARDDIR_MAGIC);
219 200
        shardcfg_delete(shardd);
220 200
        PTOK(pthread_rwlock_destroy(&shardd->mtx));
221 200
        FREE_OBJ(shardd);
222 200
}
223
224
void
225 6000
sharddir_rdlock(struct sharddir *shardd)
226
{
227 6000
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
228 6000
        PTOK(pthread_rwlock_rdlock(&shardd->mtx));
229 6000
}
230
231
void
232 1760
sharddir_wrlock(struct sharddir *shardd)
233
{
234 1760
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
235 1760
        PTOK(pthread_rwlock_wrlock(&shardd->mtx));
236 1760
}
237
238
void
239 7760
sharddir_unlock(struct sharddir *shardd)
240
{
241 7760
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
242 7760
        PTOK(pthread_rwlock_unlock(&shardd->mtx));
243 7760
}
244
245
static inline void
246 4160
validate_alt(VRT_CTX, const struct sharddir *shardd, VCL_INT *alt)
247
{
248 4160
        const VCL_INT alt_max = shardd->n_backend - 1;
249
250 4160
        if (*alt < 0) {
251 0
                shard_err(ctx->vsl, shardd->name,
252
                    "invalid negative parameter alt=%ld, set to 0", *alt);
253 0
                *alt = 0;
254 4160
        } else if (*alt > alt_max) {
255 120
                shard_err(ctx->vsl, shardd->name,
256
                    "parameter alt=%ld limited to %ld", *alt, alt_max);
257 120
                *alt = alt_max;
258 120
        }
259 4160
}
260
261
static inline void
262 4160
init_state(struct shard_state *state,
263
    VRT_CTX, struct sharddir *shardd, struct vbitmap *picklist)
264
{
265 4160
        AN(picklist);
266
267 4160
        state->ctx = ctx;
268 4160
        state->shardd = shardd;
269 4160
        state->idx = UINT32_MAX;
270 4160
        state->picklist = picklist;
271
272
        /* healthy and changed only defined for valid hostids */
273 4160
        state->previous.hostid = UINT_MAX;
274 4160
        state->last.hostid = UINT_MAX;
275 4160
}
276
277
/* basically same as vdir_any_healthy
278
 * - XXX we should embed a vdir
279
 * - XXX should we return the health state of the actual backend
280
 *   for healthy=IGNORE ?
281
 */
282
VCL_BOOL
283 640
sharddir_any_healthy(VRT_CTX, struct sharddir *shardd, VCL_TIME *changed)
284
{
285 640
        unsigned i, retval = 0;
286
        VCL_BACKEND be;
287
        vtim_real c;
288
289 640
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
290 640
        sharddir_rdlock(shardd);
291 640
        if (changed != NULL)
292 280
                *changed = 0;
293 640
        for (i = 0; i < shardd->n_backend; i++) {
294 640
                be = shardd->backend[i].backend;
295 640
                CHECK_OBJ_NOTNULL(be, DIRECTOR_MAGIC);
296 640
                retval = VRT_Healthy(ctx, be, &c);
297 640
                if (changed != NULL && c > *changed)
298 280
                        *changed = c;
299 640
                if (retval)
300 640
                        break;
301 0
        }
302 640
        sharddir_unlock(shardd);
303 640
        return (retval);
304
}
305
306
/*
307
 * core function for the director backend/resolve method
308
 */
309
310
static VCL_BACKEND
311 4160
sharddir_pick_be_locked(VRT_CTX, const struct sharddir *shardd, uint32_t key,
312
    VCL_INT alt, VCL_REAL warmup, VCL_BOOL rampup, VCL_ENUM healthy,
313
    struct shard_state *state)
314
{
315
        VCL_BACKEND be;
316
        VCL_DURATION chosen_r, alt_r;
317
318 4160
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
319 4160
        CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
320 4160
        AN(ctx->vsl);
321 4160
        assert(shardd->n_backend > 0);
322
323 4160
        assert(shardd->hashcircle);
324
325 4160
        validate_alt(ctx, shardd, &alt);
326
327 4160
        state->idx = shard_lookup(shardd, key);
328 4160
        assert(state->idx < UINT32_MAX);
329
330 4160
        SHDBG(SHDBG_LOOKUP, shardd, "lookup key %x idx %u host %u",
331
            key, state->idx, shardd->hashcircle[state->idx].host);
332
333 4160
        if (alt > 0) {
334 3240
                if (shard_next(state, alt - 1,
335 2160
                    healthy == VENUM(ALL) ? 1 : 0) == -1) {
336 0
                        if (state->previous.hostid != UINT_MAX) {
337 0
                                be = sharddir_backend(shardd,
338 0
                                    state->previous.hostid);
339 0
                                AN(be);
340 0
                                return (be);
341
                        }
342 0
                        return (NULL);
343
                }
344 1080
        }
345
346 4160
        if (shard_next(state, 0, healthy == VENUM(IGNORE) ? 0 : 1) == -1) {
347 0
                if (state->previous.hostid != UINT_MAX) {
348 0
                        be = sharddir_backend(shardd, state->previous.hostid);
349 0
                        AN(be);
350 0
                        return (be);
351
                }
352 0
                return (NULL);
353
        }
354
355 4160
        be = sharddir_backend(shardd, state->last.hostid);
356 4160
        AN(be);
357
358 4160
        if (warmup == -1)
359 4120
                warmup = shardd->warmup;
360
361
        /* short path for cases we dont want ramup/warmup or can't */
362 4160
        if (alt > 0 || healthy == VENUM(IGNORE) || (!rampup && warmup == 0) ||
363 3080
            shard_next(state, 0, 1) == -1)
364 1240
                return (be);
365
366 2920
        assert(alt == 0);
367 2920
        assert(state->previous.hostid != UINT_MAX);
368 2920
        assert(state->last.hostid != UINT_MAX);
369 2920
        assert(state->previous.hostid != state->last.hostid);
370 2920
        assert(be == sharddir_backend(shardd, state->previous.hostid));
371
372 2920
        chosen_r = shardcfg_get_rampup(shardd, state->previous.hostid);
373 2920
        alt_r = shardcfg_get_rampup(shardd, state->last.hostid);
374
375 2920
        SHDBG(SHDBG_RAMPWARM, shardd, "chosen host %u rampup %f changed %f",
376
            state->previous.hostid, chosen_r,
377
            ctx->now - state->previous.changed);
378 2920
        SHDBG(SHDBG_RAMPWARM, shardd, "alt host %u rampup %f changed %f",
379
            state->last.hostid, alt_r,
380
            ctx->now - state->last.changed);
381
382 2920
        if (ctx->now - state->previous.changed < chosen_r) {
383
                /*
384
                 * chosen host is in rampup
385
                 * - no change if alternative host is also in rampup or the dice
386
                 *   has rolled in favour of the chosen host
387
                 */
388 120
                if (!rampup ||
389 80
                    ctx->now - state->last.changed < alt_r ||
390 80
                    VRND_RandomTestableDouble() * chosen_r <
391 40
                    (ctx->now - state->previous.changed))
392 40
                        return (be);
393 40
        } else {
394
                /* chosen host not in rampup - warmup ? */
395 2840
                if (warmup == 0 || VRND_RandomTestableDouble() > warmup)
396 2840
                        return (be);
397
        }
398
399 40
        be = sharddir_backend(shardd, state->last.hostid);
400 40
        return (be);
401 4160
}
402
403
VCL_BACKEND
404 4160
sharddir_pick_be(VRT_CTX, struct sharddir *shardd, uint32_t key, VCL_INT alt,
405
    VCL_REAL warmup, VCL_BOOL rampup, VCL_ENUM healthy)
406
{
407
        VCL_BACKEND be;
408
        struct shard_state state[1];
409
        unsigned picklist_sz;
410
411 4160
        CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
412 4160
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
413
414 4160
        sharddir_rdlock(shardd);
415
416 4160
        if (shardd->n_backend == 0) {
417 0
                shard_err0(ctx->vsl, shardd->name, "no backends");
418 0
                sharddir_unlock(shardd);
419 0
                return (NULL);
420
        }
421
422 4160
        picklist_sz = VBITMAP_SZ(shardd->n_backend);
423 4160
        char picklist_spc[picklist_sz];
424
425 4160
        memset(state, 0, sizeof(state));
426 4160
        init_state(state, ctx, shardd, vbit_init(picklist_spc, picklist_sz));
427
428 8320
        be = sharddir_pick_be_locked(ctx, shardd, key, alt, warmup, rampup,
429 4160
            healthy, state);
430 4160
        sharddir_unlock(shardd);
431
432 4160
        vbit_destroy(state->picklist);
433 4160
        return (be);
434 4160
}