varnish-cache/vmod/vmod_directors_shard_dir.c
1
/*-
2
 * Copyright 2009-2016 UPLEX - Nils Goroll Systemoptimierung
3
 * All rights reserved.
4
 *
5
 * Authors: Nils Goroll <nils.goroll@uplex.de>
6
 *          Geoffrey Simmons <geoff.simmons@uplex.de>
7
 *          Julian Wiesener <jw@uplex.de>
8
 *
9
 * SPDX-License-Identifier: BSD-2-Clause
10
 *
11
 * Redistribution and use in source and binary forms, with or without
12
 * modification, are permitted provided that the following conditions
13
 * are met:
14
 * 1. Redistributions of source code must retain the above copyright
15
 *    notice, this list of conditions and the following disclaimer.
16
 * 2. Redistributions in binary form must reproduce the above copyright
17
 *    notice, this list of conditions and the following disclaimer in the
18
 *    documentation and/or other materials provided with the distribution.
19
 *
20
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
24
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30
 * SUCH DAMAGE.
31
 */
32
33
/*lint -e801 */
34
35
#include "config.h"
36
37
#include <stdlib.h>
38
#include <stdio.h>
39
#include <time.h>
40
#include <limits.h>
41
42
#include "cache/cache.h"
43
44
#include "vbm.h"
45
#include "vrnd.h"
46
47
#include "vcc_directors_if.h"
48
#include "vmod_directors_shard_dir.h"
49
50
struct shard_be_info {
51
        unsigned        hostid;
52
        unsigned        healthy;
53
        double          changed;        // when
54
};
55
56
/*
57
 * circle walk state for shard_next
58
 *
59
 * pick* cut off the search after having seen all possible backends
60
 */
61
struct shard_state {
62
        const struct vrt_ctx    *ctx;
63
        struct sharddir *shardd;
64
        uint32_t                idx;
65
66
        struct vbitmap          *picklist;
67
        unsigned                pickcount;
68
69
        struct shard_be_info    previous;
70
        struct shard_be_info    last;
71
};
72
73
void
74 14
sharddir_debug(struct sharddir *shardd, const uint32_t flags)
75
{
76 14
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
77 14
        shardd->debug_flags = flags;
78 14
}
79
80
void
81 42
sharddir_log(struct vsl_log *vsl, enum VSL_tag_e tag,  const char *fmt, ...)
82
{
83
        va_list ap;
84
85 42
        va_start(ap, fmt);
86 42
        if (vsl != NULL)
87 20
                VSLbv(vsl, tag, fmt, ap);
88
        else
89 22
                VSLv(tag, 0, fmt, ap);
90 42
        va_end(ap);
91 42
}
92
93
static int
94 208
shard_lookup(const struct sharddir *shardd, const uint32_t key)
95
{
96 208
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
97
98 208
        const uint32_t n = shardd->n_points;
99 208
        uint32_t i, idx = UINT32_MAX, high = n, low = 0;
100
101 208
        assert (n < idx);
102
103 208
        do {
104 1270
            i = (high + low) / 2 ;
105 1270
            if (shardd->hashcircle[i].point == key)
106 2
                idx = i;
107 1268
            else if (i == n - 1)
108 36
                idx = n - 1;
109 1232
            else if (shardd->hashcircle[i].point < key &&
110 652
                     shardd->hashcircle[i+1].point >= key)
111 136
                idx = i + 1;
112 1096
            else if (shardd->hashcircle[i].point > key)
113 1160
                if (i == 0)
114 34
                    idx = 0;
115
                else
116 546
                    high = i;
117
            else
118 516
                low = i;
119 1270
        } while (idx == UINT32_MAX);
120
121 208
        return (idx);
122
}
123
124
static int
125 416
shard_next(struct shard_state *state, VCL_INT skip, VCL_BOOL healthy)
126
{
127 416
        int c, chosen = -1;
128
        VCL_BACKEND be;
129
        vtim_real changed;
130
        struct shard_be_info *sbe;
131
132 416
        AN(state);
133 416
        CHECK_OBJ_NOTNULL(state->shardd, SHARDDIR_MAGIC);
134
135 416
        if (state->pickcount >= state->shardd->n_backend)
136 6
                return (-1);
137
138 656
        while (state->pickcount < state->shardd->n_backend && skip >= 0) {
139
140 654
                c = state->shardd->hashcircle[state->idx].host;
141
142 654
                if (!vbit_test(state->picklist, c)) {
143
144 440
                        vbit_set(state->picklist, c);
145 440
                        state->pickcount++;
146
147 440
                        sbe = NULL;
148 440
                        be = state->shardd->backend[c].backend;
149 440
                        AN(be);
150 440
                        if (VRT_Healthy(state->ctx, be, &changed)) {
151 428
                                if (skip-- == 0) {
152 408
                                        chosen = c;
153 408
                                        sbe = &state->last;
154 408
                                } else {
155 20
                                        sbe = &state->previous;
156
                                }
157
158 440
                        } else if (!healthy && skip-- == 0) {
159 0
                                chosen = c;
160 0
                                sbe = &state->last;
161 0
                        }
162 440
                        if (sbe == &state->last &&
163 408
                            state->last.hostid != UINT_MAX)
164 200
                                memcpy(&state->previous, &state->last,
165
                                    sizeof(state->previous));
166
167 440
                        if (sbe) {
168 428
                                sbe->hostid = c;
169 428
                                sbe->healthy = 1;
170 428
                                sbe->changed = changed;
171 428
                        }
172 440
                        if (chosen != -1)
173 408
                                break;
174 32
                }
175
176 246
                if (++(state->idx) == state->shardd->n_points)
177 38
                        state->idx = 0;
178
        }
179 410
        return (chosen);
180 416
}
181
182
void
183 50
sharddir_new(struct sharddir **sharddp, const char *vcl_name,
184
    const struct vmod_directors_shard_param *param)
185
{
186
        struct sharddir *shardd;
187
188 50
        AN(vcl_name);
189 50
        AN(sharddp);
190 50
        AZ(*sharddp);
191 50
        ALLOC_OBJ(shardd, SHARDDIR_MAGIC);
192 50
        AN(shardd);
193 50
        *sharddp = shardd;
194 50
        shardd->name = vcl_name;
195 50
        shardd->param = param;
196 50
        AZ(pthread_rwlock_init(&shardd->mtx, NULL));
197 50
}
198
199
void
200 6
sharddir_set_param(struct sharddir *shardd,
201
    const struct vmod_directors_shard_param *param)
202
{
203 6
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
204 6
        shardd->param = param;
205 6
}
206
207
void
208 10
sharddir_delete(struct sharddir **sharddp)
209
{
210
        struct sharddir *shardd;
211
212 10
        TAKE_OBJ_NOTNULL(shardd, sharddp, SHARDDIR_MAGIC);
213 10
        shardcfg_delete(shardd);
214 10
        AZ(pthread_rwlock_destroy(&shardd->mtx));
215 10
        FREE_OBJ(shardd);
216 10
}
217
218
void
219 300
sharddir_rdlock(struct sharddir *shardd)
220
{
221 300
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
222 300
        AZ(pthread_rwlock_rdlock(&shardd->mtx));
223 300
}
224
225
void
226 88
sharddir_wrlock(struct sharddir *shardd)
227
{
228 88
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
229 88
        AZ(pthread_rwlock_wrlock(&shardd->mtx));
230 88
}
231
232
void
233 388
sharddir_unlock(struct sharddir *shardd)
234
{
235 388
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
236 388
        AZ(pthread_rwlock_unlock(&shardd->mtx));
237 388
}
238
239
static inline void
240 208
validate_alt(VRT_CTX, const struct sharddir *shardd, VCL_INT *alt)
241
{
242 208
        const VCL_INT alt_max = shardd->n_backend - 1;
243
244 208
        if (*alt < 0) {
245 0
                shard_err(ctx->vsl, shardd->name,
246
                    "invalid negative parameter alt=%ld, set to 0", *alt);
247 0
                *alt = 0;
248 208
        } else if (*alt > alt_max) {
249 6
                shard_err(ctx->vsl, shardd->name,
250
                    "parameter alt=%ld limited to %ld", *alt, alt_max);
251 6
                *alt = alt_max;
252 6
        }
253 208
}
254
255
static inline void
256 208
init_state(struct shard_state *state,
257
    VRT_CTX, struct sharddir *shardd, struct vbitmap *picklist)
258
{
259 208
        AN(picklist);
260
261 208
        state->ctx = ctx;
262 208
        state->shardd = shardd;
263 208
        state->idx = UINT32_MAX;
264 208
        state->picklist = picklist;
265
266
        /* healhy and changed only defined for valid hostids */
267 208
        state->previous.hostid = UINT_MAX;
268 208
        state->last.hostid = UINT_MAX;
269 208
}
270
271
/* basically same as vdir_any_healthy
272
 * - XXX we should embed a vdir
273
 * - XXX should we return the health state of the actual backend
274
 *   for healthy=IGNORE ?
275
 */
276
VCL_BOOL
277 32
sharddir_any_healthy(VRT_CTX, struct sharddir *shardd, VCL_TIME *changed)
278
{
279 32
        unsigned i, retval = 0;
280
        VCL_BACKEND be;
281
        vtim_real c;
282
283 32
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
284 32
        sharddir_rdlock(shardd);
285 32
        if (changed != NULL)
286 14
                *changed = 0;
287 32
        for (i = 0; i < shardd->n_backend; i++) {
288 32
                be = shardd->backend[i].backend;
289 32
                CHECK_OBJ_NOTNULL(be, DIRECTOR_MAGIC);
290 32
                retval = VRT_Healthy(ctx, be, &c);
291 32
                if (changed != NULL && c > *changed)
292 14
                        *changed = c;
293 32
                if (retval)
294 32
                        break;
295 0
        }
296 32
        sharddir_unlock(shardd);
297 32
        return (retval);
298
}
299
300
/*
301
 * core function for the director backend/resolve method
302
 */
303
304
static VCL_BACKEND
305 208
sharddir_pick_be_locked(VRT_CTX, const struct sharddir *shardd, uint32_t key,
306
    VCL_INT alt, VCL_REAL warmup, VCL_BOOL rampup, VCL_ENUM healthy,
307
    struct shard_state *state)
308
{
309
        VCL_BACKEND be;
310
        VCL_DURATION chosen_r, alt_r;
311
312 208
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
313 208
        CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
314 208
        AN(ctx->vsl);
315 208
        assert(shardd->n_backend > 0);
316
317 208
        assert(shardd->hashcircle);
318
319 208
        validate_alt(ctx, shardd, &alt);
320
321 208
        state->idx = shard_lookup(shardd, key);
322 208
        assert(state->idx < UINT32_MAX);
323
324 208
        SHDBG(SHDBG_LOOKUP, shardd, "lookup key %x idx %u host %u",
325
            key, state->idx, shardd->hashcircle[state->idx].host);
326
327 208
        if (alt > 0) {
328 162
                if (shard_next(state, alt - 1,
329 108
                    healthy == VENUM(ALL) ? 1 : 0) == -1) {
330 0
                        if (state->previous.hostid != UINT_MAX) {
331 0
                                be = sharddir_backend(shardd,
332 0
                                    state->previous.hostid);
333 0
                                AN(be);
334 0
                                return (be);
335
                        }
336 0
                        return (NULL);
337
                }
338 54
        }
339
340 208
        if (shard_next(state, 0, healthy == VENUM(IGNORE) ? 0 : 1) == -1) {
341 0
                if (state->previous.hostid != UINT_MAX) {
342 0
                        be = sharddir_backend(shardd, state->previous.hostid);
343 0
                        AN(be);
344 0
                        return (be);
345
                }
346 0
                return (NULL);
347
        }
348
349 208
        be = sharddir_backend(shardd, state->last.hostid);
350 208
        AN(be);
351
352 208
        if (warmup == -1)
353 206
                warmup = shardd->warmup;
354
355
        /* short path for cases we dont want ramup/warmup or can't */
356 208
        if (alt > 0 || healthy == VENUM(IGNORE) || (!rampup && warmup == 0) ||
357 154
            shard_next(state, 0, 1) == -1)
358 62
                return (be);
359
360 146
        assert(alt == 0);
361 146
        assert(state->previous.hostid != UINT_MAX);
362 146
        assert(state->last.hostid != UINT_MAX);
363 146
        assert(state->previous.hostid != state->last.hostid);
364 146
        assert(be == sharddir_backend(shardd, state->previous.hostid));
365
366 146
        chosen_r = shardcfg_get_rampup(shardd, state->previous.hostid);
367 146
        alt_r = shardcfg_get_rampup(shardd, state->last.hostid);
368
369 146
        SHDBG(SHDBG_RAMPWARM, shardd, "chosen host %u rampup %f changed %f",
370
            state->previous.hostid, chosen_r,
371
            ctx->now - state->previous.changed);
372 146
        SHDBG(SHDBG_RAMPWARM, shardd, "alt host %u rampup %f changed %f",
373
            state->last.hostid, alt_r,
374
            ctx->now - state->last.changed);
375
376 146
        if (ctx->now - state->previous.changed < chosen_r) {
377
                /*
378
                 * chosen host is in rampup
379
                 * - no change if alternative host is also in rampup or the dice
380
                 *   has rolled in favour of the chosen host
381
                 */
382 6
                if (!rampup ||
383 4
                    ctx->now - state->last.changed < alt_r ||
384 4
                    VRND_RandomTestableDouble() * chosen_r <
385 2
                    (ctx->now - state->previous.changed))
386 2
                        return (be);
387 2
        } else {
388
                /* chosen host not in rampup - warmup ? */
389 142
                if (warmup == 0 || VRND_RandomTestableDouble() > warmup)
390 142
                        return (be);
391
        }
392
393 2
        be = sharddir_backend(shardd, state->last.hostid);
394 2
        return (be);
395 208
}
396
397
VCL_BACKEND
398 208
sharddir_pick_be(VRT_CTX, struct sharddir *shardd, uint32_t key, VCL_INT alt,
399
    VCL_REAL warmup, VCL_BOOL rampup, VCL_ENUM healthy)
400
{
401
        VCL_BACKEND be;
402
        struct shard_state state[1];
403
        unsigned picklist_sz;
404
405 208
        CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
406 208
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
407
408 208
        sharddir_rdlock(shardd);
409
410 208
        if (shardd->n_backend == 0) {
411 0
                shard_err0(ctx->vsl, shardd->name, "no backends");
412 0
                sharddir_unlock(shardd);
413 0
                return (NULL);
414
        }
415
416 208
        picklist_sz = VBITMAP_SZ(shardd->n_backend);
417 208
        char picklist_spc[picklist_sz];
418
419 208
        memset(state, 0, sizeof(state));
420 208
        init_state(state, ctx, shardd, vbit_init(picklist_spc, picklist_sz));
421
422 416
        be = sharddir_pick_be_locked(ctx, shardd, key, alt, warmup, rampup,
423 208
            healthy, state);
424 208
        sharddir_unlock(shardd);
425
426 208
        vbit_destroy(state->picklist);
427 208
        return (be);
428 208
}