varnish-cache/vmod/vmod_directors_shard_dir.c
1
/*-
2
 * Copyright 2009-2016 UPLEX - Nils Goroll Systemoptimierung
3
 * All rights reserved.
4
 *
5
 * Authors: Nils Goroll <nils.goroll@uplex.de>
6
 *          Geoffrey Simmons <geoff.simmons@uplex.de>
7
 *          Julian Wiesener <jw@uplex.de>
8
 *
9
 * SPDX-License-Identifier: BSD-2-Clause
10
 *
11
 * Redistribution and use in source and binary forms, with or without
12
 * modification, are permitted provided that the following conditions
13
 * are met:
14
 * 1. Redistributions of source code must retain the above copyright
15
 *    notice, this list of conditions and the following disclaimer.
16
 * 2. Redistributions in binary form must reproduce the above copyright
17
 *    notice, this list of conditions and the following disclaimer in the
18
 *    documentation and/or other materials provided with the distribution.
19
 *
20
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
24
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30
 * SUCH DAMAGE.
31
 */
32
33
/*lint -e801 */
34
35
#include "config.h"
36
37
#include <stdlib.h>
38
#include <stdio.h>
39
#include <time.h>
40
#include <limits.h>
41
42
#include "cache/cache.h"
43
44
#include "vbm.h"
45
#include "vrnd.h"
46
47
#include "vcc_directors_if.h"
48
#include "vmod_directors_shard_dir.h"
49
50
struct shard_be_info {
51
        unsigned        hostid;
52
        unsigned        healthy;
53
        double          changed;        // when
54
};
55
56
/*
57
 * circle walk state for shard_next
58
 *
59
 * pick* cut off the search after having seen all possible backends
60
 */
61
struct shard_state {
62
        const struct vrt_ctx    *ctx;
63
        struct sharddir *shardd;
64
        uint32_t                idx;
65
66
        struct vbitmap          *picklist;
67
        unsigned                pickcount;
68
69
        struct shard_be_info    previous;
70
        struct shard_be_info    last;
71
};
72
73
void
74 147
sharddir_debug(struct sharddir *shardd, const uint32_t flags)
75
{
76 147
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
77 147
        shardd->debug_flags = flags;
78 147
}
79
80
void
81 441
sharddir_log(struct vsl_log *vsl, enum VSL_tag_e tag,  const char *fmt, ...)
82
{
83
        va_list ap;
84
85 441
        va_start(ap, fmt);
86 441
        if (vsl != NULL)
87 210
                VSLbv(vsl, tag, fmt, ap);
88
        else
89 231
                VSLv(tag, 0, fmt, ap);
90 441
        va_end(ap);
91 441
}
92
93
static int
94 2184
shard_lookup(const struct sharddir *shardd, const uint32_t key)
95
{
96 2184
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
97
98 2184
        const uint32_t n = shardd->n_points;
99 2184
        uint32_t i, idx = UINT32_MAX, high = n, low = 0;
100
101 2184
        assert (n < idx);
102
103 2184
        do {
104 13335
            i = (high + low) / 2 ;
105 13335
            if (shardd->hashcircle[i].point == key)
106 21
                idx = i;
107 13314
            else if (i == n - 1)
108 378
                idx = n - 1;
109 12936
            else if (shardd->hashcircle[i].point < key &&
110 6846
                     shardd->hashcircle[i+1].point >= key)
111 1428
                idx = i + 1;
112 11508
            else if (shardd->hashcircle[i].point > key)
113 12180
                if (i == 0)
114 357
                    idx = 0;
115
                else
116 5733
                    high = i;
117
            else
118 5418
                low = i;
119 13335
        } while (idx == UINT32_MAX);
120
121 2184
        return (idx);
122
}
123
124
static int
125 4368
shard_next(struct shard_state *state, VCL_INT skip, VCL_BOOL healthy)
126
{
127 4368
        int c, chosen = -1;
128
        VCL_BACKEND be;
129
        vtim_real changed;
130
        struct shard_be_info *sbe;
131
132 4368
        AN(state);
133 4368
        CHECK_OBJ_NOTNULL(state->shardd, SHARDDIR_MAGIC);
134
135 4368
        if (state->pickcount >= state->shardd->n_backend)
136 63
                return (-1);
137
138 6888
        while (state->pickcount < state->shardd->n_backend && skip >= 0) {
139
140 6867
                c = state->shardd->hashcircle[state->idx].host;
141
142 6867
                if (!vbit_test(state->picklist, c)) {
143
144 4620
                        vbit_set(state->picklist, c);
145 4620
                        state->pickcount++;
146
147 4620
                        sbe = NULL;
148 4620
                        be = state->shardd->backend[c].backend;
149 4620
                        AN(be);
150 4620
                        if (VRT_Healthy(state->ctx, be, &changed)) {
151 4494
                                if (skip-- == 0) {
152 4284
                                        chosen = c;
153 4284
                                        sbe = &state->last;
154 4284
                                } else {
155 210
                                        sbe = &state->previous;
156
                                }
157
158 4620
                        } else if (!healthy && skip-- == 0) {
159 0
                                chosen = c;
160 0
                                sbe = &state->last;
161 0
                        }
162 4620
                        if (sbe == &state->last &&
163 4284
                            state->last.hostid != UINT_MAX)
164 2100
                                memcpy(&state->previous, &state->last,
165
                                    sizeof(state->previous));
166
167 4620
                        if (sbe) {
168 4494
                                sbe->hostid = c;
169 4494
                                sbe->healthy = 1;
170 4494
                                sbe->changed = changed;
171 4494
                        }
172 4620
                        if (chosen != -1)
173 4284
                                break;
174 336
                }
175
176 2583
                if (++(state->idx) == state->shardd->n_points)
177 399
                        state->idx = 0;
178
        }
179 4305
        return (chosen);
180 4368
}
181
182
void
183 525
sharddir_new(struct sharddir **sharddp, const char *vcl_name,
184
    const struct vmod_directors_shard_param *param)
185
{
186
        struct sharddir *shardd;
187
188 525
        AN(vcl_name);
189 525
        AN(sharddp);
190 525
        AZ(*sharddp);
191 525
        ALLOC_OBJ(shardd, SHARDDIR_MAGIC);
192 525
        AN(shardd);
193 525
        *sharddp = shardd;
194 525
        shardd->name = vcl_name;
195 525
        shardd->param = param;
196 525
        AZ(pthread_rwlock_init(&shardd->mtx, NULL));
197 525
}
198
199
void
200 63
sharddir_set_param(struct sharddir *shardd,
201
    const struct vmod_directors_shard_param *param)
202
{
203 63
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
204 63
        shardd->param = param;
205 63
}
206
207
void
208 105
sharddir_delete(struct sharddir **sharddp)
209
{
210
        struct sharddir *shardd;
211
212 105
        TAKE_OBJ_NOTNULL(shardd, sharddp, SHARDDIR_MAGIC);
213 105
        shardcfg_delete(shardd);
214 105
        AZ(pthread_rwlock_destroy(&shardd->mtx));
215 105
        FREE_OBJ(shardd);
216 105
}
217
218
void
219 3150
sharddir_rdlock(struct sharddir *shardd)
220
{
221 3150
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
222 3150
        AZ(pthread_rwlock_rdlock(&shardd->mtx));
223 3150
}
224
225
void
226 924
sharddir_wrlock(struct sharddir *shardd)
227
{
228 924
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
229 924
        AZ(pthread_rwlock_wrlock(&shardd->mtx));
230 924
}
231
232
void
233 4074
sharddir_unlock(struct sharddir *shardd)
234
{
235 4074
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
236 4074
        AZ(pthread_rwlock_unlock(&shardd->mtx));
237 4074
}
238
239
static inline void
240 2184
validate_alt(VRT_CTX, const struct sharddir *shardd, VCL_INT *alt)
241
{
242 2184
        const VCL_INT alt_max = shardd->n_backend - 1;
243
244 2184
        if (*alt < 0) {
245 0
                shard_err(ctx->vsl, shardd->name,
246
                    "invalid negative parameter alt=%ld, set to 0", *alt);
247 0
                *alt = 0;
248 2184
        } else if (*alt > alt_max) {
249 63
                shard_err(ctx->vsl, shardd->name,
250
                    "parameter alt=%ld limited to %ld", *alt, alt_max);
251 63
                *alt = alt_max;
252 63
        }
253 2184
}
254
255
static inline void
256 2184
init_state(struct shard_state *state,
257
    VRT_CTX, struct sharddir *shardd, struct vbitmap *picklist)
258
{
259 2184
        AN(picklist);
260
261 2184
        state->ctx = ctx;
262 2184
        state->shardd = shardd;
263 2184
        state->idx = UINT32_MAX;
264 2184
        state->picklist = picklist;
265
266
        /* healhy and changed only defined for valid hostids */
267 2184
        state->previous.hostid = UINT_MAX;
268 2184
        state->last.hostid = UINT_MAX;
269 2184
}
270
271
/* basically same as vdir_any_healthy
272
 * - XXX we should embed a vdir
273
 * - XXX should we return the health state of the actual backend
274
 *   for healthy=IGNORE ?
275
 */
276
VCL_BOOL
277 336
sharddir_any_healthy(VRT_CTX, struct sharddir *shardd, VCL_TIME *changed)
278
{
279 336
        unsigned i, retval = 0;
280
        VCL_BACKEND be;
281
        vtim_real c;
282
283 336
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
284 336
        sharddir_rdlock(shardd);
285 336
        if (changed != NULL)
286 147
                *changed = 0;
287 336
        for (i = 0; i < shardd->n_backend; i++) {
288 336
                be = shardd->backend[i].backend;
289 336
                CHECK_OBJ_NOTNULL(be, DIRECTOR_MAGIC);
290 336
                retval = VRT_Healthy(ctx, be, &c);
291 336
                if (changed != NULL && c > *changed)
292 147
                        *changed = c;
293 336
                if (retval)
294 336
                        break;
295 0
        }
296 336
        sharddir_unlock(shardd);
297 336
        return (retval);
298
}
299
300
/*
301
 * core function for the director backend/resolve method
302
 */
303
304
static VCL_BACKEND
305 2184
sharddir_pick_be_locked(VRT_CTX, const struct sharddir *shardd, uint32_t key,
306
    VCL_INT alt, VCL_REAL warmup, VCL_BOOL rampup, VCL_ENUM healthy,
307
    struct shard_state *state)
308
{
309
        VCL_BACKEND be;
310
        VCL_DURATION chosen_r, alt_r;
311
312 2184
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
313 2184
        CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
314 2184
        AN(ctx->vsl);
315 2184
        assert(shardd->n_backend > 0);
316
317 2184
        assert(shardd->hashcircle);
318
319 2184
        validate_alt(ctx, shardd, &alt);
320
321 2184
        state->idx = shard_lookup(shardd, key);
322 2184
        assert(state->idx < UINT32_MAX);
323
324 2184
        SHDBG(SHDBG_LOOKUP, shardd, "lookup key %x idx %u host %u",
325
            key, state->idx, shardd->hashcircle[state->idx].host);
326
327 2184
        if (alt > 0) {
328 1701
                if (shard_next(state, alt - 1,
329 1134
                    healthy == VENUM(ALL) ? 1 : 0) == -1) {
330 0
                        if (state->previous.hostid != UINT_MAX) {
331 0
                                be = sharddir_backend(shardd,
332 0
                                    state->previous.hostid);
333 0
                                AN(be);
334 0
                                return (be);
335
                        }
336 0
                        return (NULL);
337
                }
338 567
        }
339
340 2184
        if (shard_next(state, 0, healthy == VENUM(IGNORE) ? 0 : 1) == -1) {
341 0
                if (state->previous.hostid != UINT_MAX) {
342 0
                        be = sharddir_backend(shardd, state->previous.hostid);
343 0
                        AN(be);
344 0
                        return (be);
345
                }
346 0
                return (NULL);
347
        }
348
349 2184
        be = sharddir_backend(shardd, state->last.hostid);
350 2184
        AN(be);
351
352 2184
        if (warmup == -1)
353 2163
                warmup = shardd->warmup;
354
355
        /* short path for cases we dont want ramup/warmup or can't */
356 2184
        if (alt > 0 || healthy == VENUM(IGNORE) || (!rampup && warmup == 0) ||
357 1617
            shard_next(state, 0, 1) == -1)
358 651
                return (be);
359
360 1533
        assert(alt == 0);
361 1533
        assert(state->previous.hostid != UINT_MAX);
362 1533
        assert(state->last.hostid != UINT_MAX);
363 1533
        assert(state->previous.hostid != state->last.hostid);
364 1533
        assert(be == sharddir_backend(shardd, state->previous.hostid));
365
366 1533
        chosen_r = shardcfg_get_rampup(shardd, state->previous.hostid);
367 1533
        alt_r = shardcfg_get_rampup(shardd, state->last.hostid);
368
369 1533
        SHDBG(SHDBG_RAMPWARM, shardd, "chosen host %u rampup %f changed %f",
370
            state->previous.hostid, chosen_r,
371
            ctx->now - state->previous.changed);
372 1533
        SHDBG(SHDBG_RAMPWARM, shardd, "alt host %u rampup %f changed %f",
373
            state->last.hostid, alt_r,
374
            ctx->now - state->last.changed);
375
376 1533
        if (ctx->now - state->previous.changed < chosen_r) {
377
                /*
378
                 * chosen host is in rampup
379
                 * - no change if alternative host is also in rampup or the dice
380
                 *   has rolled in favour of the chosen host
381
                 */
382 63
                if (!rampup ||
383 42
                    ctx->now - state->last.changed < alt_r ||
384 42
                    VRND_RandomTestableDouble() * chosen_r <
385 21
                    (ctx->now - state->previous.changed))
386 21
                        return (be);
387 21
        } else {
388
                /* chosen host not in rampup - warmup ? */
389 1491
                if (warmup == 0 || VRND_RandomTestableDouble() > warmup)
390 1491
                        return (be);
391
        }
392
393 21
        be = sharddir_backend(shardd, state->last.hostid);
394 21
        return (be);
395 2184
}
396
397
VCL_BACKEND
398 2184
sharddir_pick_be(VRT_CTX, struct sharddir *shardd, uint32_t key, VCL_INT alt,
399
    VCL_REAL warmup, VCL_BOOL rampup, VCL_ENUM healthy)
400
{
401
        VCL_BACKEND be;
402
        struct shard_state state[1];
403
        unsigned picklist_sz;
404
405 2184
        CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
406 2184
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
407
408 2184
        sharddir_rdlock(shardd);
409
410 2184
        if (shardd->n_backend == 0) {
411 0
                shard_err0(ctx->vsl, shardd->name, "no backends");
412 0
                sharddir_unlock(shardd);
413 0
                return (NULL);
414
        }
415
416 2184
        picklist_sz = VBITMAP_SZ(shardd->n_backend);
417 2184
        char picklist_spc[picklist_sz];
418
419 2184
        memset(state, 0, sizeof(state));
420 2184
        init_state(state, ctx, shardd, vbit_init(picklist_spc, picklist_sz));
421
422 4368
        be = sharddir_pick_be_locked(ctx, shardd, key, alt, warmup, rampup,
423 2184
            healthy, state);
424 2184
        sharddir_unlock(shardd);
425
426 2184
        vbit_destroy(state->picklist);
427 2184
        return (be);
428 2184
}