varnish-cache/lib/libvmod_directors/shard_dir.c
1
/*-
2
 * Copyright 2009-2016 UPLEX - Nils Goroll Systemoptimierung
3
 * All rights reserved.
4
 *
5
 * Authors: Nils Goroll <nils.goroll@uplex.de>
6
 *          Geoffrey Simmons <geoff.simmons@uplex.de>
7
 *          Julian Wiesener <jw@uplex.de>
8
 *
9
 * Redistribution and use in source and binary forms, with or without
10
 * modification, are permitted provided that the following conditions
11
 * are met:
12
 * 1. Redistributions of source code must retain the above copyright
13
 *    notice, this list of conditions and the following disclaimer.
14
 * 2. Redistributions in binary form must reproduce the above copyright
15
 *    notice, this list of conditions and the following disclaimer in the
16
 *    documentation and/or other materials provided with the distribution.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
22
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28
 * SUCH DAMAGE.
29
 */
30
31
/*lint -e801 */
32
33
#include "config.h"
34
35
#include <stdlib.h>
36
#include <stdio.h>
37
#include <time.h>
38
39
#include "cache/cache.h"
40
41
#include "vbm.h"
42
#include "vrnd.h"
43
#include "vsha256.h"
44
#include "vend.h"
45
46
#include "shard_dir.h"
47
48
struct shard_be_info {
49
        int             hostid;
50
        unsigned        healthy;
51
        double          changed;        // when
52
};
53
54
/*
55
 * circle walk state for shard_next
56
 *
57
 * pick* cut off the search after having seen all possible backends
58
 */
59
struct shard_state {
60
        const struct vrt_ctx    *ctx;
61
        struct sharddir *shardd;
62
        int                     idx;
63
64
        struct vbitmap          *picklist;
65
        int                     pickcount;
66
67
        struct shard_be_info    previous;
68
        struct shard_be_info    last;
69
};
70
71
void
72 12
sharddir_debug(struct sharddir *shardd, const uint32_t flags)
73
{
74 12
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
75 12
        shardd->debug_flags = flags;
76 12
}
77
78
void
79 34
sharddir_err(VRT_CTX, enum VSL_tag_e tag,  const char *fmt, ...)
80
{
81
        va_list ap;
82
83 34
        va_start(ap, fmt);
84 34
        if (ctx->vsl)
85 16
                VSLbv(ctx->vsl, tag, fmt, ap);
86
        else
87 18
                VSLv(tag, 0, fmt, ap);
88 34
        va_end(ap);
89 34
}
90
91
uint32_t
92 1850
sharddir_sha256v(const char *s, va_list ap)
93
{
94
        struct VSHA256Context sha256;
95
        union {
96
                unsigned char digest[32];
97
                uint32_t uint32_digest[8];
98
        } sha256_digest;
99
        uint32_t r;
100
        const char *p;
101
102 1850
        VSHA256_Init(&sha256);
103 1850
        p = s;
104 7342
        while (p != vrt_magic_string_end) {
105 3642
                if (p != NULL && *p != '\0')
106 3642
                        VSHA256_Update(&sha256, p, strlen(p));
107 3642
                p = va_arg(ap, const char *);
108
        }
109 1850
        VSHA256_Final(sha256_digest.digest, &sha256);
110
111
        /*
112
         * use low 32 bits only
113
         * XXX: Are these the best bits to pick?
114
         */
115 1850
        vle32enc(&r, sha256_digest.uint32_digest[7]);
116 1850
        return (r);
117
}
118
119
uint32_t
120 1828
sharddir_sha256(const char *s, ...)
121
{
122
        va_list ap;
123
        uint32_t r;
124
125 1828
        va_start(ap, s);
126 1828
        r = sharddir_sha256v(s, ap);
127 1828
        va_end(ap);
128
129 1828
        return (r);
130
}
131
132
static int
133 76
shard_lookup(const struct sharddir *shardd, const uint32_t key)
134
{
135 76
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
136
137 76
        const int n = shardd->n_backend * shardd->replicas;
138 76
        int idx = -1, high = n, low = 0, i;
139
140
        do {
141 430
            i = (high + low) / 2 ;
142 430
            if (shardd->hashcircle[i].point == key)
143 0
                idx = i;
144 430
            else if (i == n - 1)
145 10
                idx = n - 1;
146 650
            else if (shardd->hashcircle[i].point < key &&
147 230
                     shardd->hashcircle[i+1].point >= key)
148 58
                idx = i + 1;
149 362
            else if (shardd->hashcircle[i].point > key)
150 190
                if (i == 0)
151 8
                    idx = 0;
152
                else
153 182
                    high = i;
154
            else
155 172
                low = i;
156 430
        } while (idx == -1);
157
158 76
        return idx;
159
}
160
161
static int
162 152
shard_next(struct shard_state *state, VCL_INT skip, VCL_BOOL healthy)
163
{
164 152
        int c, chosen = -1;
165
        uint32_t ringsz;
166
        VCL_BACKEND be;
167
        double changed;
168
        struct shard_be_info *sbe;
169
170 152
        AN(state);
171 152
        assert(state->idx >= 0);
172 152
        CHECK_OBJ_NOTNULL(state->shardd, SHARDDIR_MAGIC);
173
174 152
        if (state->pickcount >= state->shardd->n_backend)
175 4
                return -1;
176
177 148
        ringsz = state->shardd->n_backend * state->shardd->replicas;
178
179 386
        while (state->pickcount < state->shardd->n_backend && skip >= 0) {
180
181 238
                c = state->shardd->hashcircle[state->idx].host;
182
183 238
                if (!vbit_test(state->picklist, c)) {
184
185 158
                        vbit_set(state->picklist, c);
186 158
                        state->pickcount++;
187
188 158
                        sbe = NULL;
189 158
                        be = state->shardd->backend[c].backend;
190 158
                        AN(be);
191 158
                        if (VRT_Healthy(state->ctx, be, &changed)) {
192 150
                                if (skip-- == 0) {
193 148
                                        chosen = c;
194 148
                                        sbe = &state->last;
195
                                } else {
196 2
                                        sbe = &state->previous;
197
                                }
198
199 8
                        } else if (!healthy && skip-- == 0) {
200 0
                                chosen = c;
201 0
                                sbe = &state->last;
202
                        }
203 306
                        if (sbe == &state->last &&
204 148
                            state->last.hostid != -1)
205 72
                                memcpy(&state->previous, &state->last,
206
                                    sizeof(state->previous));
207
208 158
                        if (sbe) {
209 150
                                sbe->hostid = c;
210 150
                                sbe->healthy = 1;
211 150
                                sbe->changed = changed;
212
                        }
213 158
                        if (chosen != -1)
214 148
                                break;
215
                }
216
217 90
                if (++(state->idx) == ringsz)
218 12
                        state->idx = 0;
219
        }
220 148
        return chosen;
221
}
222
223
void
224 42
sharddir_new(struct sharddir **sharddp, const char *vcl_name,
225
    const struct vmod_directors_shard_param *param)
226
{
227
        struct sharddir *shardd;
228
229 42
        AN(vcl_name);
230 42
        AN(sharddp);
231 42
        AZ(*sharddp);
232 42
        ALLOC_OBJ(shardd, SHARDDIR_MAGIC);
233 42
        AN(shardd);
234 42
        *sharddp = shardd;
235 42
        shardd->name = vcl_name;
236 42
        shardd->param = param;
237 42
        AZ(pthread_rwlock_init(&shardd->mtx, NULL));
238 42
}
239
240
void
241 4
sharddir_set_param(struct sharddir *shardd,
242
    const struct vmod_directors_shard_param *param)
243
{
244 4
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
245 4
        shardd->param = param;
246 4
}
247
248
void
249 6
sharddir_delete(struct sharddir **sharddp)
250
{
251
        struct sharddir *shardd;
252
253 6
        TAKE_OBJ_NOTNULL(shardd, sharddp, SHARDDIR_MAGIC);
254 6
        shardcfg_delete(shardd);
255 6
        AZ(pthread_rwlock_destroy(&shardd->mtx));
256 6
        FREE_OBJ(shardd);
257 6
}
258
259
static void
260 94
sharddir_rdlock(struct sharddir *shardd)
261
{
262 94
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
263 94
        AZ(pthread_rwlock_rdlock(&shardd->mtx));
264 94
}
265
266
void
267 66
sharddir_wrlock(struct sharddir *shardd)
268
{
269 66
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
270 66
        AZ(pthread_rwlock_wrlock(&shardd->mtx));
271 66
}
272
273
void
274 160
sharddir_unlock(struct sharddir *shardd)
275
{
276 160
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
277 160
        AZ(pthread_rwlock_unlock(&shardd->mtx));
278 160
}
279
280
static inline void
281 76
validate_alt(VRT_CTX, const struct sharddir *shardd, VCL_INT *alt)
282
{
283 76
        const VCL_INT alt_max = shardd->n_backend - 1;
284
285 76
        if (*alt < 0) {
286 0
                shard_err(ctx, shardd,
287
                    "invalid negative parameter alt=%ld, set to 0", *alt);
288 0
                *alt = 0;
289 76
        } else if (*alt > alt_max) {
290 0
                shard_err(ctx, shardd,
291
                    "parameter alt=%ld limited to %ld", *alt, alt_max);
292 0
                *alt = alt_max;
293
        }
294 76
}
295
296
static inline void
297 76
init_state(struct shard_state *state,
298
    VRT_CTX, struct sharddir *shardd, struct vbitmap *picklist)
299
{
300 76
        AN(picklist);
301
302 76
        state->ctx = ctx;
303 76
        state->shardd = shardd;
304 76
        state->idx = -1;
305 76
        state->picklist = picklist;
306
307
        /* healhy and changed only defined for hostid != -1 */
308 76
        state->previous.hostid = -1;
309 76
        state->last.hostid = -1;
310 76
}
311
312
/* basically same as vdir_any_healthy
313
 * - XXX we should embed a vdir
314
 * - XXX should we return the health state of the actual backend
315
 *   for healthy=IGNORE ?
316
 */
317
VCL_BOOL
318 18
sharddir_any_healthy(VRT_CTX, struct sharddir *shardd, VCL_TIME *changed)
319
{
320 18
        unsigned retval = 0;
321
        VCL_BACKEND be;
322
        unsigned u;
323
        double c;
324
325 18
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
326 18
        sharddir_rdlock(shardd);
327 18
        if (changed != NULL)
328 6
                *changed = 0;
329 18
        for (u = 0; u < shardd->n_backend; u++) {
330 18
                be = shardd->backend[u].backend;
331 18
                CHECK_OBJ_NOTNULL(be, DIRECTOR_MAGIC);
332 18
                retval = VRT_Healthy(ctx, be, &c);
333 18
                if (changed != NULL && c > *changed)
334 6
                        *changed = c;
335 18
                if (retval)
336 18
                        break;
337
        }
338 18
        sharddir_unlock(shardd);
339 18
        return (retval);
340
}
341
/*
342
 * core function for the director backend/resolve method
343
 */
344
VCL_BACKEND
345 76
sharddir_pick_be(VRT_CTX, struct sharddir *shardd,
346
    uint32_t key, VCL_INT alt, VCL_REAL warmup, VCL_BOOL rampup,
347
    enum healthy_e healthy)
348 76
{
349
        VCL_BACKEND be;
350
        struct shard_state state;
351 76
        unsigned picklist_sz = VBITMAP_SZ(shardd->n_backend);
352 76
        char picklist_spc[picklist_sz];
353
        VCL_DURATION chosen_r, alt_r;
354
355 76
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
356 76
        CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
357 76
        AN(ctx->vsl);
358
359 76
        memset(&state, 0, sizeof(state));
360 76
        init_state(&state, ctx, shardd, vbit_init(picklist_spc, picklist_sz));
361
362 76
        sharddir_rdlock(shardd);
363 76
        if (shardd->n_backend == 0) {
364 0
                shard_err0(ctx, shardd, "no backends");
365 0
                goto err;
366
        }
367
368 76
        assert(shardd->hashcircle);
369
370 76
        validate_alt(ctx, shardd, &alt);
371
372 76
        state.idx = shard_lookup(shardd, key);
373 76
        assert(state.idx >= 0);
374
375 76
        SHDBG(SHDBG_LOOKUP, shardd, "lookup key %x idx %d host %u",
376
            key, state.idx, shardd->hashcircle[state.idx].host);
377
378 76
        if (alt > 0) {
379 6
                if (shard_next(&state, alt - 1, healthy == ALL ? 1 : 0) == -1) {
380 0
                        if (state.previous.hostid != -1) {
381 0
                                be = sharddir_backend(shardd,
382
                                    state.previous.hostid);
383 0
                                goto ok;
384
                        }
385 0
                        goto err;
386
                }
387
        }
388
389 76
        if (shard_next(&state, 0, healthy == IGNORE ? 0 : 1) == -1) {
390 0
                if (state.previous.hostid != -1) {
391 0
                        be = sharddir_backend(shardd, state.previous.hostid);
392 0
                        goto ok;
393
                }
394 0
                goto err;
395
        }
396
397 76
        be = sharddir_backend(shardd, state.last.hostid);
398
399 76
        if (warmup == -1)
400 76
                warmup = shardd->warmup;
401
402
        /* short path for cases we dont want ramup/warmup or can't */
403 146
        if (alt > 0 || healthy == IGNORE || (!rampup && warmup == 0) ||
404 70
            shard_next(&state, 0, 0) == -1)
405
                goto ok;
406
407 66
        assert(alt == 0);
408 66
        assert(state.previous.hostid >= 0);
409 66
        assert(state.last.hostid >= 0);
410 66
        assert(state.previous.hostid != state.last.hostid);
411 66
        assert(be == sharddir_backend(shardd, state.previous.hostid));
412
413 66
        chosen_r = shardcfg_get_rampup(shardd, state.previous.hostid);
414 66
        alt_r = shardcfg_get_rampup(shardd, state.last.hostid);
415
416 66
        SHDBG(SHDBG_RAMPWARM, shardd, "chosen host %d rampup %f changed %f",
417
            state.previous.hostid, chosen_r,
418
            ctx->now - state.previous.changed);
419 66
        SHDBG(SHDBG_RAMPWARM, shardd, "alt host %d rampup %f changed %f",
420
            state.last.hostid, alt_r,
421
            ctx->now - state.last.changed);
422
423 66
        if (ctx->now - state.previous.changed < chosen_r) {
424
                /*
425
                 * chosen host is in rampup
426
                 * - no change if alternative host is also in rampup or the dice
427
                 *   has rolled in favour of the chosen host
428
                 */
429 8
                if (!rampup ||
430 6
                    ctx->now - state.last.changed < alt_r ||
431 2
                    VRND_RandomTestableDouble() * chosen_r <
432 2
                     (ctx->now - state.previous.changed))
433
                        goto ok;
434
        } else {
435
                /* chosen host not in rampup - warmup ? */
436 62
                if (warmup == 0 || VRND_RandomTestableDouble() > warmup)
437
                        goto ok;
438
        }
439
440 2
        be = sharddir_backend(shardd, state.last.hostid);
441
442
  ok:
443 76
        AN(be);
444 76
        sharddir_unlock(shardd);
445 76
        vbit_destroy(state.picklist);
446 76
        return (be);
447
  err:
448 0
        sharddir_unlock(shardd);
449 0
        vbit_destroy(state.picklist);
450 0
        return NULL;
451
}