varnish-cache/lib/libvmod_directors/shard_dir.c
1
/*-
2
 * Copyright 2009-2016 UPLEX - Nils Goroll Systemoptimierung
3
 * All rights reserved.
4
 *
5
 * Authors: Nils Goroll <nils.goroll@uplex.de>
6
 *          Geoffrey Simmons <geoff.simmons@uplex.de>
7
 *          Julian Wiesener <jw@uplex.de>
8
 *
9
 * Redistribution and use in source and binary forms, with or without
10
 * modification, are permitted provided that the following conditions
11
 * are met:
12
 * 1. Redistributions of source code must retain the above copyright
13
 *    notice, this list of conditions and the following disclaimer.
14
 * 2. Redistributions in binary form must reproduce the above copyright
15
 *    notice, this list of conditions and the following disclaimer in the
16
 *    documentation and/or other materials provided with the distribution.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
22
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28
 * SUCH DAMAGE.
29
 */
30
31
/*lint -e801 */
32
33
#include "config.h"
34
35
#include <stdlib.h>
36
#include <stdio.h>
37
#include <time.h>
38
39
#include "cache/cache.h"
40
#include "cache/cache_director.h"
41
42
#include "vbm.h"
43
#include "vrnd.h"
44
45
#include "shard_dir.h"
46
47
struct shard_be_info {
48
        int             hostid;
49
        unsigned        healthy;
50
        double          changed;        // when
51
};
52
53
/*
54
 * circle walk state for shard_next
55
 *
56
 * pick* cut off the search after having seen all possible backends
57
 */
58
struct shard_state {
59
        const struct vrt_ctx    *ctx;
60
        struct sharddir *shardd;
61
        int                     idx;
62
63
        struct vbitmap          *picklist;
64
        int                     pickcount;
65
66
        struct shard_be_info    previous;
67
        struct shard_be_info    last;
68
};
69
70
void
71 5
sharddir_debug(struct sharddir *shardd, const uint32_t flags)
72
{
73 5
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
74 5
        shardd->debug_flags = flags;
75 5
}
76
77
void
78 16
sharddir_err(VRT_CTX, enum VSL_tag_e tag,  const char *fmt, ...)
79
{
80
        va_list ap;
81
82 16
        va_start(ap, fmt);
83 16
        if (ctx->vsl)
84 8
                VSLbv(ctx->vsl, tag, fmt, ap);
85
        else
86 8
                VSLv(tag, 0, fmt, ap);
87 16
        va_end(ap);
88 16
}
89
90
static int
91 37
shard_lookup(const struct sharddir *shardd, const uint32_t key)
92
{
93 37
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
94
95 37
        const int n = shardd->n_backend * shardd->replicas;
96 37
        int idx = -1, high = n, low = 0, i;
97
98
        do {
99 198
            i = (high + low) / 2 ;
100 198
            if (shardd->hashcircle[i].point == key)
101 0
                idx = i;
102 198
            else if (i == n - 1)
103 2
                idx = n - 1;
104 303
            else if (shardd->hashcircle[i].point < key &&
105 107
                     shardd->hashcircle[i+1].point >= key)
106 34
                idx = i + 1;
107 162
            else if (shardd->hashcircle[i].point > key)
108 89
                if (i == 0)
109 1
                    idx = 0;
110
                else
111 88
                    high = i;
112
            else
113 73
                low = i;
114 198
        } while (idx == -1);
115
116 37
        return idx;
117
}
118
119
static int
120 74
shard_next(struct shard_state *state, VCL_INT skip, VCL_BOOL healthy)
121
{
122 74
        int c, chosen = -1;
123
        uint32_t ringsz;
124
        VCL_BACKEND be;
125
        double changed;
126
        struct shard_be_info *sbe;
127
128 74
        AN(state);
129 74
        assert(state->idx >= 0);
130 74
        CHECK_OBJ_NOTNULL(state->shardd, SHARDDIR_MAGIC);
131
132 74
        if (state->pickcount >= state->shardd->n_backend)
133 2
                return -1;
134
135 72
        ringsz = state->shardd->n_backend * state->shardd->replicas;
136
137 192
        while (state->pickcount < state->shardd->n_backend && skip >= 0) {
138
139 120
                c = state->shardd->hashcircle[state->idx].host;
140
141 120
                if (!vbit_test(state->picklist, c)) {
142
143 77
                        vbit_set(state->picklist, c);
144 77
                        state->pickcount++;
145
146 77
                        sbe = NULL;
147 77
                        be = state->shardd->backend[c].backend;
148 77
                        AN(be);
149 77
                        if (be->healthy(be, state->ctx->bo, &changed)) {
150 73
                                if (skip-- == 0) {
151 72
                                        chosen = c;
152 72
                                        sbe = &state->last;
153
                                } else {
154 1
                                        sbe = &state->previous;
155
                                }
156
157 4
                        } else if (!healthy && skip-- == 0) {
158 0
                                chosen = c;
159 0
                                sbe = &state->last;
160
                        }
161 149
                        if (sbe == &state->last &&
162 72
                            state->last.hostid != -1)
163 35
                                memcpy(&state->previous, &state->last,
164
                                    sizeof(state->previous));
165
166 77
                        if (sbe) {
167 73
                                sbe->hostid = c;
168 73
                                sbe->healthy = 1;
169 73
                                sbe->changed = changed;
170
                        }
171 77
                        if (chosen != -1)
172 72
                                break;
173
                }
174
175 48
                if (++(state->idx) == ringsz)
176 3
                        state->idx = 0;
177
        }
178 72
        return chosen;
179
}
180
181
void
182 15
sharddir_new(struct sharddir **sharddp, const char *vcl_name)
183
{
184
        struct sharddir *shardd;
185
186 15
        AN(vcl_name);
187 15
        AN(sharddp);
188 15
        AZ(*sharddp);
189 15
        ALLOC_OBJ(shardd, SHARDDIR_MAGIC);
190 15
        AN(shardd);
191 15
        *sharddp = shardd;
192 15
        shardd->name = vcl_name;
193 15
        AZ(pthread_rwlock_init(&shardd->mtx, NULL));
194 15
}
195
196
void
197 0
sharddir_delete(struct sharddir **sharddp)
198
{
199
        struct sharddir *shardd;
200
201 0
        AN(sharddp);
202 0
        shardd = *sharddp;
203 0
        *sharddp = NULL;
204
205 0
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
206 0
        shardcfg_delete(shardd);
207 0
        AZ(pthread_rwlock_destroy(&shardd->mtx));
208 0
        FREE_OBJ(shardd);
209 0
}
210
211
static void
212 37
sharddir_rdlock(struct sharddir *shardd)
213
{
214 37
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
215 37
        AZ(pthread_rwlock_rdlock(&shardd->mtx));
216 37
}
217
218
void
219 32
sharddir_wrlock(struct sharddir *shardd)
220
{
221 32
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
222 32
        AZ(pthread_rwlock_wrlock(&shardd->mtx));
223 32
}
224
225
void
226 69
sharddir_unlock(struct sharddir *shardd)
227
{
228 69
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
229 69
        AZ(pthread_rwlock_unlock(&shardd->mtx));
230 69
}
231
232
static inline void
233 37
validate_alt(VRT_CTX, const struct sharddir *shardd, VCL_INT *alt)
234
{
235 37
        const VCL_INT alt_max = shardd->n_backend - 1;
236
237 37
        if (*alt < 0) {
238 0
                shard_err(ctx, shardd,
239
                    "invalid negative parameter alt=%ld, set to 0", *alt);
240 0
                *alt = 0;
241 37
        } else if (*alt > alt_max) {
242 0
                shard_err(ctx, shardd,
243
                    "parameter alt=%ld limited to %ld", *alt, alt_max);
244 0
                *alt = alt_max;
245
        }
246 37
}
247
248
static inline void
249 37
init_state(struct shard_state *state,
250
    VRT_CTX, struct sharddir *shardd, struct vbitmap *picklist)
251
{
252 37
        AN(picklist);
253
254 37
        state->ctx = ctx;
255 37
        state->shardd = shardd;
256 37
        state->idx = -1;
257 37
        state->picklist = picklist;
258
259
        /* healhy and changed only defined for hostid != -1 */
260 37
        state->previous.hostid = -1;
261 37
        state->last.hostid = -1;
262 37
}
263
264
/*
265
 * core function for the director backend method
266
 *
267
 * while other directors return a reference to their own backend object (on
268
 * which varnish will call the resolve method to resolve to a non-director
269
 * backend), this director immediately reolves in the backend method, to make
270
 * the director choice visible in VCL
271
 *
272
 * consequences:
273
 * - we need no own struct director
274
 * - we can only respect a busy object when being called on the backend side,
275
 *   which probably is, for all practical purposes, only relevant when the
276
 *   saintmode vmod is used
277
 *
278
 * if we wanted to offer delayed resolution, we'd need something like
279
 * per-request per-director state or we'd need to return a dynamically created
280
 * director object. That should be straight forward once we got director
281
 * refcounting #2072. Until then, we could create it on the workspace, but then
282
 * we'd need to keep other directors from storing any references to our dynamic
283
 * object for longer than the current task
284
 *
285
 */
286
VCL_BACKEND
287 37
sharddir_pick_be(VRT_CTX, struct sharddir *shardd,
288
    uint32_t key, VCL_INT alt, VCL_REAL warmup, VCL_BOOL rampup,
289
    enum healthy_e healthy)
290
{
291
        VCL_BACKEND be;
292
        struct shard_state state;
293 37
        unsigned picklist_sz = VBITMAP_SZ(shardd->n_backend);
294 37
        char picklist_spc[picklist_sz];
295
        VCL_DURATION chosen_r, alt_r;
296
297 37
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
298 37
        CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
299 37
        AN(ctx->vsl);
300
301 37
        memset(&state, 0, sizeof(state));
302 37
        init_state(&state, ctx, shardd, vbit_init(picklist_spc, picklist_sz));
303
304 37
        sharddir_rdlock(shardd);
305 37
        if (shardd->n_backend == 0) {
306 0
                shard_err0(ctx, shardd, "no backends");
307 0
                goto err;
308
        }
309
310 37
        assert(shardd->hashcircle);
311
312 37
        validate_alt(ctx, shardd, &alt);
313
314 37
        state.idx = shard_lookup(shardd, key);
315 37
        assert(state.idx >= 0);
316
317 37
        SHDBG(SHDBG_LOOKUP, shardd, "lookup key %x idx %d host %u",
318
            key, state.idx, shardd->hashcircle[state.idx].host);
319
320 37
        if (alt > 0) {
321 3
                if (shard_next(&state, alt - 1, healthy == ALL ? 1 : 0) == -1) {
322 0
                        if (state.previous.hostid != -1) {
323 0
                                be = sharddir_backend(shardd,
324
                                    state.previous.hostid);
325 0
                                goto ok;
326
                        }
327 0
                        goto err;
328
                }
329
        }
330
331 37
        if (shard_next(&state, 0, healthy == IGNORE ? 0 : 1) == -1) {
332 0
                if (state.previous.hostid != -1) {
333 0
                        be = sharddir_backend(shardd, state.previous.hostid);
334 0
                        goto ok;
335
                }
336 0
                goto err;
337
        }
338
339 37
        be = sharddir_backend(shardd, state.last.hostid);
340
341 37
        if (warmup == -1)
342 37
                warmup = shardd->warmup;
343
344
        /* short path for cases we dont want ramup/warmup or can't */
345 71
        if (alt > 0 || healthy == IGNORE || (!rampup && warmup == 0) ||
346 34
            shard_next(&state, 0, 0) == -1)
347
                goto ok;
348
349 32
        assert(alt == 0);
350 32
        assert(state.previous.hostid >= 0);
351 32
        assert(state.last.hostid >= 0);
352 32
        assert(state.previous.hostid != state.last.hostid);
353 32
        assert(be == sharddir_backend(shardd, state.previous.hostid));
354
355 32
        chosen_r = shardcfg_get_rampup(shardd, state.previous.hostid);
356 32
        alt_r = shardcfg_get_rampup(shardd, state.last.hostid);
357
358 32
        SHDBG(SHDBG_RAMPWARM, shardd, "chosen host %d rampup %f changed %f",
359
            state.previous.hostid, chosen_r,
360
            ctx->now - state.previous.changed);
361 32
        SHDBG(SHDBG_RAMPWARM, shardd, "alt host %d rampup %f changed %f",
362
            state.last.hostid, alt_r,
363
            ctx->now - state.last.changed);
364
365 32
        if (ctx->now - state.previous.changed < chosen_r) {
366
                /*
367
                 * chosen host is in rampup
368
                 * - no change if alternative host is also in rampup or the dice
369
                 *   has rolled in favour of the chosen host
370
                 */
371 4
                if (!rampup ||
372 3
                    ctx->now - state.last.changed < alt_r ||
373 1
                    VRND_RandomTestableDouble() * chosen_r <
374 1
                     (ctx->now - state.previous.changed))
375
                        goto ok;
376
        } else {
377
                /* chosen host not in rampup - warmup ? */
378 30
                if (warmup == 0 || VRND_RandomTestableDouble() > warmup)
379
                        goto ok;
380
        }
381
382 1
        be = sharddir_backend(shardd, state.last.hostid);
383
384
  ok:
385 37
        AN(be);
386 37
        sharddir_unlock(shardd);
387 37
        vbit_destroy(state.picklist);
388 37
        return (be);
389
  err:
390 0
        sharddir_unlock(shardd);
391 0
        vbit_destroy(state.picklist);
392 0
        return NULL;
393
}