varnish-cache/lib/libvmod_directors/shard_dir.c
1
/*-
2
 * Copyright 2009-2016 UPLEX - Nils Goroll Systemoptimierung
3
 * All rights reserved.
4
 *
5
 * Authors: Nils Goroll <nils.goroll@uplex.de>
6
 *          Geoffrey Simmons <geoff.simmons@uplex.de>
7
 *          Julian Wiesener <jw@uplex.de>
8
 *
9
 * Redistribution and use in source and binary forms, with or without
10
 * modification, are permitted provided that the following conditions
11
 * are met:
12
 * 1. Redistributions of source code must retain the above copyright
13
 *    notice, this list of conditions and the following disclaimer.
14
 * 2. Redistributions in binary form must reproduce the above copyright
15
 *    notice, this list of conditions and the following disclaimer in the
16
 *    documentation and/or other materials provided with the distribution.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
22
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28
 * SUCH DAMAGE.
29
 */
30
31
/*lint -e801 */
32
33
#include "config.h"
34
35
#include <stdlib.h>
36
#include <stdio.h>
37
#include <time.h>
38
39
#include "cache/cache.h"
40
41
#include "vbm.h"
42
#include "vrnd.h"
43
#include "vsha256.h"
44
#include "vend.h"
45
46
#include "shard_dir.h"
47
48
struct shard_be_info {
49
        int             hostid;
50
        unsigned        healthy;
51
        double          changed;        // when
52
};
53
54
/*
55
 * circle walk state for shard_next
56
 *
57
 * pick* cut off the search after having seen all possible backends
58
 */
59
struct shard_state {
60
        const struct vrt_ctx    *ctx;
61
        struct sharddir *shardd;
62
        int                     idx;
63
64
        struct vbitmap          *picklist;
65
        int                     pickcount;
66
67
        struct shard_be_info    previous;
68
        struct shard_be_info    last;
69
};
70
71
void
72 6
sharddir_debug(struct sharddir *shardd, const uint32_t flags)
73
{
74 6
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
75 6
        shardd->debug_flags = flags;
76 6
}
77
78
void
79 17
sharddir_err(VRT_CTX, enum VSL_tag_e tag,  const char *fmt, ...)
80
{
81
        va_list ap;
82
83 17
        va_start(ap, fmt);
84 17
        if (ctx->vsl)
85 8
                VSLbv(ctx->vsl, tag, fmt, ap);
86
        else
87 9
                VSLv(tag, 0, fmt, ap);
88 17
        va_end(ap);
89 17
}
90
91
uint32_t
92 925
sharddir_sha256v(const char *s, va_list ap)
93
{
94
        struct VSHA256Context sha256;
95
        union {
96
                unsigned char digest[32];
97
                uint32_t uint32_digest[8];
98
        } sha256_digest;
99
        uint32_t r;
100
        const char *p;
101
102 925
        VSHA256_Init(&sha256);
103 925
        p = s;
104 3671
        while (p != vrt_magic_string_end) {
105 1821
                if (p != NULL && *p != '\0')
106 1821
                        VSHA256_Update(&sha256, p, strlen(p));
107 1821
                p = va_arg(ap, const char *);
108
        }
109 925
        VSHA256_Final(sha256_digest.digest, &sha256);
110
111
        /*
112
         * use low 32 bits only
113
         * XXX: Are these the best bits to pick?
114
         */
115 925
        vle32enc(&r, sha256_digest.uint32_digest[7]);
116 925
        return (r);
117
}
118
119
uint32_t
120 914
sharddir_sha256(const char *s, ...)
121
{
122
        va_list ap;
123
        uint32_t r;
124
125 914
        va_start(ap, s);
126 914
        r = sharddir_sha256v(s, ap);
127 914
        va_end(ap);
128
129 914
        return (r);
130
}
131
132
static int
133 38
shard_lookup(const struct sharddir *shardd, const uint32_t key)
134
{
135 38
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
136
137 38
        const int n = shardd->n_backend * shardd->replicas;
138 38
        int idx = -1, high = n, low = 0, i;
139
140
        do {
141 215
            i = (high + low) / 2 ;
142 215
            if (shardd->hashcircle[i].point == key)
143 0
                idx = i;
144 215
            else if (i == n - 1)
145 5
                idx = n - 1;
146 325
            else if (shardd->hashcircle[i].point < key &&
147 115
                     shardd->hashcircle[i+1].point >= key)
148 29
                idx = i + 1;
149 181
            else if (shardd->hashcircle[i].point > key)
150 95
                if (i == 0)
151 4
                    idx = 0;
152
                else
153 91
                    high = i;
154
            else
155 86
                low = i;
156 215
        } while (idx == -1);
157
158 38
        return idx;
159
}
160
161
static int
162 76
shard_next(struct shard_state *state, VCL_INT skip, VCL_BOOL healthy)
163
{
164 76
        int c, chosen = -1;
165
        uint32_t ringsz;
166
        VCL_BACKEND be;
167
        double changed;
168
        struct shard_be_info *sbe;
169
170 76
        AN(state);
171 76
        assert(state->idx >= 0);
172 76
        CHECK_OBJ_NOTNULL(state->shardd, SHARDDIR_MAGIC);
173
174 76
        if (state->pickcount >= state->shardd->n_backend)
175 2
                return -1;
176
177 74
        ringsz = state->shardd->n_backend * state->shardd->replicas;
178
179 193
        while (state->pickcount < state->shardd->n_backend && skip >= 0) {
180
181 119
                c = state->shardd->hashcircle[state->idx].host;
182
183 119
                if (!vbit_test(state->picklist, c)) {
184
185 79
                        vbit_set(state->picklist, c);
186 79
                        state->pickcount++;
187
188 79
                        sbe = NULL;
189 79
                        be = state->shardd->backend[c].backend;
190 79
                        AN(be);
191 79
                        if (VRT_Healthy(state->ctx, be, &changed)) {
192 75
                                if (skip-- == 0) {
193 74
                                        chosen = c;
194 74
                                        sbe = &state->last;
195
                                } else {
196 1
                                        sbe = &state->previous;
197
                                }
198
199 4
                        } else if (!healthy && skip-- == 0) {
200 0
                                chosen = c;
201 0
                                sbe = &state->last;
202
                        }
203 153
                        if (sbe == &state->last &&
204 74
                            state->last.hostid != -1)
205 36
                                memcpy(&state->previous, &state->last,
206
                                    sizeof(state->previous));
207
208 79
                        if (sbe) {
209 75
                                sbe->hostid = c;
210 75
                                sbe->healthy = 1;
211 75
                                sbe->changed = changed;
212
                        }
213 79
                        if (chosen != -1)
214 74
                                break;
215
                }
216
217 45
                if (++(state->idx) == ringsz)
218 6
                        state->idx = 0;
219
        }
220 74
        return chosen;
221
}
222
223
void
224 21
sharddir_new(struct sharddir **sharddp, const char *vcl_name,
225
    const struct vmod_directors_shard_param *param)
226
{
227
        struct sharddir *shardd;
228
229 21
        AN(vcl_name);
230 21
        AN(sharddp);
231 21
        AZ(*sharddp);
232 21
        ALLOC_OBJ(shardd, SHARDDIR_MAGIC);
233 21
        AN(shardd);
234 21
        *sharddp = shardd;
235 21
        shardd->name = vcl_name;
236 21
        shardd->param = param;
237 21
        AZ(pthread_rwlock_init(&shardd->mtx, NULL));
238 21
}
239
240
void
241 2
sharddir_set_param(struct sharddir *shardd,
242
    const struct vmod_directors_shard_param *param)
243
{
244 2
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
245 2
        shardd->param = param;
246 2
}
247
248
void
249 3
sharddir_delete(struct sharddir **sharddp)
250
{
251
        struct sharddir *shardd;
252
253 3
        TAKE_OBJ_NOTNULL(shardd, sharddp, SHARDDIR_MAGIC);
254 3
        shardcfg_delete(shardd);
255 3
        AZ(pthread_rwlock_destroy(&shardd->mtx));
256 3
        FREE_OBJ(shardd);
257 3
}
258
259
static void
260 47
sharddir_rdlock(struct sharddir *shardd)
261
{
262 47
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
263 47
        AZ(pthread_rwlock_rdlock(&shardd->mtx));
264 47
}
265
266
void
267 33
sharddir_wrlock(struct sharddir *shardd)
268
{
269 33
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
270 33
        AZ(pthread_rwlock_wrlock(&shardd->mtx));
271 33
}
272
273
void
274 80
sharddir_unlock(struct sharddir *shardd)
275
{
276 80
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
277 80
        AZ(pthread_rwlock_unlock(&shardd->mtx));
278 80
}
279
280
static inline void
281 38
validate_alt(VRT_CTX, const struct sharddir *shardd, VCL_INT *alt)
282
{
283 38
        const VCL_INT alt_max = shardd->n_backend - 1;
284
285 38
        if (*alt < 0) {
286 0
                shard_err(ctx, shardd,
287
                    "invalid negative parameter alt=%ld, set to 0", *alt);
288 0
                *alt = 0;
289 38
        } else if (*alt > alt_max) {
290 0
                shard_err(ctx, shardd,
291
                    "parameter alt=%ld limited to %ld", *alt, alt_max);
292 0
                *alt = alt_max;
293
        }
294 38
}
295
296
static inline void
297 38
init_state(struct shard_state *state,
298
    VRT_CTX, struct sharddir *shardd, struct vbitmap *picklist)
299
{
300 38
        AN(picklist);
301
302 38
        state->ctx = ctx;
303 38
        state->shardd = shardd;
304 38
        state->idx = -1;
305 38
        state->picklist = picklist;
306
307
        /* healhy and changed only defined for hostid != -1 */
308 38
        state->previous.hostid = -1;
309 38
        state->last.hostid = -1;
310 38
}
311
312
/* basically same as vdir_any_healthy
313
 * - XXX we should embed a vdir
314
 * - XXX should we return the health state of the actual backend
315
 *   for healthy=IGNORE ?
316
 */
317
VCL_BOOL
318 9
sharddir_any_healthy(VRT_CTX, struct sharddir *shardd, VCL_TIME *changed)
319
{
320 9
        unsigned retval = 0;
321
        VCL_BACKEND be;
322
        unsigned u;
323
        double c;
324
325 9
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
326 9
        sharddir_rdlock(shardd);
327 9
        if (changed != NULL)
328 3
                *changed = 0;
329 9
        for (u = 0; u < shardd->n_backend; u++) {
330 9
                be = shardd->backend[u].backend;
331 9
                CHECK_OBJ_NOTNULL(be, DIRECTOR_MAGIC);
332 9
                retval = VRT_Healthy(ctx, be, &c);
333 9
                if (changed != NULL && c > *changed)
334 3
                        *changed = c;
335 9
                if (retval)
336 9
                        break;
337
        }
338 9
        sharddir_unlock(shardd);
339 9
        return (retval);
340
}
341
/*
342
 * core function for the director backend/resolve method
343
 */
344
VCL_BACKEND
345 38
sharddir_pick_be(VRT_CTX, struct sharddir *shardd,
346
    uint32_t key, VCL_INT alt, VCL_REAL warmup, VCL_BOOL rampup,
347
    enum healthy_e healthy)
348 38
{
349
        VCL_BACKEND be;
350
        struct shard_state state;
351 38
        unsigned picklist_sz = VBITMAP_SZ(shardd->n_backend);
352 38
        char picklist_spc[picklist_sz];
353
        VCL_DURATION chosen_r, alt_r;
354
355 38
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
356 38
        CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
357 38
        AN(ctx->vsl);
358
359 38
        memset(&state, 0, sizeof(state));
360 38
        init_state(&state, ctx, shardd, vbit_init(picklist_spc, picklist_sz));
361
362 38
        sharddir_rdlock(shardd);
363 38
        if (shardd->n_backend == 0) {
364 0
                shard_err0(ctx, shardd, "no backends");
365 0
                goto err;
366
        }
367
368 38
        assert(shardd->hashcircle);
369
370 38
        validate_alt(ctx, shardd, &alt);
371
372 38
        state.idx = shard_lookup(shardd, key);
373 38
        assert(state.idx >= 0);
374
375 38
        SHDBG(SHDBG_LOOKUP, shardd, "lookup key %x idx %d host %u",
376
            key, state.idx, shardd->hashcircle[state.idx].host);
377
378 38
        if (alt > 0) {
379 3
                if (shard_next(&state, alt - 1, healthy == ALL ? 1 : 0) == -1) {
380 0
                        if (state.previous.hostid != -1) {
381 0
                                be = sharddir_backend(shardd,
382
                                    state.previous.hostid);
383 0
                                goto ok;
384
                        }
385 0
                        goto err;
386
                }
387
        }
388
389 38
        if (shard_next(&state, 0, healthy == IGNORE ? 0 : 1) == -1) {
390 0
                if (state.previous.hostid != -1) {
391 0
                        be = sharddir_backend(shardd, state.previous.hostid);
392 0
                        goto ok;
393
                }
394 0
                goto err;
395
        }
396
397 38
        be = sharddir_backend(shardd, state.last.hostid);
398
399 38
        if (warmup == -1)
400 38
                warmup = shardd->warmup;
401
402
        /* short path for cases we dont want ramup/warmup or can't */
403 73
        if (alt > 0 || healthy == IGNORE || (!rampup && warmup == 0) ||
404 35
            shard_next(&state, 0, 0) == -1)
405
                goto ok;
406
407 33
        assert(alt == 0);
408 33
        assert(state.previous.hostid >= 0);
409 33
        assert(state.last.hostid >= 0);
410 33
        assert(state.previous.hostid != state.last.hostid);
411 33
        assert(be == sharddir_backend(shardd, state.previous.hostid));
412
413 33
        chosen_r = shardcfg_get_rampup(shardd, state.previous.hostid);
414 33
        alt_r = shardcfg_get_rampup(shardd, state.last.hostid);
415
416 33
        SHDBG(SHDBG_RAMPWARM, shardd, "chosen host %d rampup %f changed %f",
417
            state.previous.hostid, chosen_r,
418
            ctx->now - state.previous.changed);
419 33
        SHDBG(SHDBG_RAMPWARM, shardd, "alt host %d rampup %f changed %f",
420
            state.last.hostid, alt_r,
421
            ctx->now - state.last.changed);
422
423 33
        if (ctx->now - state.previous.changed < chosen_r) {
424
                /*
425
                 * chosen host is in rampup
426
                 * - no change if alternative host is also in rampup or the dice
427
                 *   has rolled in favour of the chosen host
428
                 */
429 4
                if (!rampup ||
430 3
                    ctx->now - state.last.changed < alt_r ||
431 1
                    VRND_RandomTestableDouble() * chosen_r <
432 1
                     (ctx->now - state.previous.changed))
433
                        goto ok;
434
        } else {
435
                /* chosen host not in rampup - warmup ? */
436 31
                if (warmup == 0 || VRND_RandomTestableDouble() > warmup)
437
                        goto ok;
438
        }
439
440 1
        be = sharddir_backend(shardd, state.last.hostid);
441
442
  ok:
443 38
        AN(be);
444 38
        sharddir_unlock(shardd);
445 38
        vbit_destroy(state.picklist);
446 38
        return (be);
447
  err:
448 0
        sharddir_unlock(shardd);
449 0
        vbit_destroy(state.picklist);
450 0
        return NULL;
451
}