varnish-cache/lib/libvmod_directors/shard_cfg.c
1
/*-
2
 * Copyright 2009-2016 UPLEX - Nils Goroll Systemoptimierung
3
 * All rights reserved.
4
 *
5
 * Authors: Nils Goroll <nils.goroll@uplex.de>
6
 *          Geoffrey Simmons <geoff@uplex.de>
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
21
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
 * SUCH DAMAGE.
28
 */
29
30
#include "config.h"
31
32
#include <limits.h>
33
#include <stdlib.h>
34
#include <stdio.h>
35
#include <string.h>
36
37
#include "cache/cache.h"
38
39
#include "shard_dir.h"
40
#include "shard_cfg.h"
41
42
/*lint -esym(749,  shard_change_task_e::*) */
43
enum shard_change_task_e {
44
        _INVALID = 0,
45
        CLEAR,
46
        ADD_BE,
47
        REMOVE_BE,
48
        _SHARD_TASK_E_MAX
49
};
50
51
struct shard_change_task {
52
        unsigned                                magic;
53
#define SHARD_CHANGE_TASK_MAGIC                 0x1e1168af
54
        enum shard_change_task_e                task;
55
        void                                    *priv;
56
        VSTAILQ_ENTRY(shard_change_task)        list;
57
};
58
59
struct shard_change {
60
        unsigned                                magic;
61
#define SHARD_CHANGE_MAGIC                      0xdff5c9a6
62
        const struct sharddir                   *shardd;
63
        void                                    *space;
64
        VSTAILQ_HEAD(,shard_change_task)        tasks;
65
};
66
67
struct backend_reconfig {
68
        struct sharddir * const shardd;
69
        unsigned                hint;   // on number of backends after reconfig
70
        unsigned                hole_n; // number of holes in backends array
71
        unsigned                hole_i; // index hint on first hole
72
};
73
74
/*
75
 * ============================================================
76
 * change / task list
77
 *
78
 * for backend reconfiguration, we create a change list on the VCL workspace in
79
 * a PRIV_TASK state, which we work in reconfigure.
80
 *
81
 * for now, we allow to only reconfigure one shard director at a time.
82
 */
83
84
static struct shard_change *
85 420
shard_change_get(VRT_CTX, struct vmod_priv *priv,
86
        const struct sharddir * const shardd)
87
{
88
        struct shard_change *change;
89
90 420
        if (priv->priv) {
91 381
                CAST_OBJ_NOTNULL(change, priv->priv, SHARD_CHANGE_MAGIC);
92 381
                if (change->shardd == NULL) {
93 57
                        change->shardd = shardd;
94 57
                        VSTAILQ_INIT(&change->tasks);
95 324
                } else if (change->shardd != shardd) {
96 6
                        shard_err0(ctx, shardd,
97
                            "cannot change more than one shard director "
98
                            "at a time");
99 6
                        return NULL;
100
                }
101 375
                return (change);
102
        }
103
104 39
        change = WS_Alloc(ctx->ws, sizeof(*change));
105 39
        if (change == NULL) {
106 0
                shard_err0(ctx, shardd, "could not get workspace");
107 0
                return NULL;
108
        }
109
110 39
        INIT_OBJ(change, SHARD_CHANGE_MAGIC);
111 39
        change->space = NULL;
112 39
        change->shardd = shardd;
113 39
        VSTAILQ_INIT(&change->tasks);
114 39
        priv->priv = change;
115
116 39
        return (change);
117
}
118
119
static void
120 96
shard_change_finish(struct shard_change *change)
121
{
122 96
        CHECK_OBJ_NOTNULL(change, SHARD_CHANGE_MAGIC);
123
124 96
        change->shardd = NULL;
125 96
        VSTAILQ_INIT(&change->tasks);
126 96
}
127
128
static void
129 306
shard_change_task_add(VRT_CTX, struct shard_change *change,
130
    enum shard_change_task_e task_e, void *priv)
131
{
132
        struct shard_change_task *task;
133
134 306
        CHECK_OBJ_NOTNULL(change, SHARD_CHANGE_MAGIC);
135
136 306
        task = WS_Alloc(ctx->ws, sizeof(*task));
137 306
        if (task == NULL) {
138 0
                shard_err0(ctx, change->shardd,
139
                    "could not get workspace for task");
140 0
                return;
141
        }
142 306
        INIT_OBJ(task, SHARD_CHANGE_TASK_MAGIC);
143 306
        task->task = task_e;
144 306
        task->priv = priv;
145 306
        VSTAILQ_INSERT_TAIL(&change->tasks, task, list);
146
}
147
148
static inline VCL_BOOL
149 270
shard_change_task_backend(VRT_CTX,
150
    struct vmod_priv *priv, const struct sharddir *shardd,
151
    enum shard_change_task_e task_e, VCL_BACKEND be, VCL_STRING ident,
152
    VCL_DURATION rampup)
153
{
154
        struct shard_change *change;
155
        struct shard_backend *b;
156
157 270
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
158 270
        assert(task_e == ADD_BE || task_e == REMOVE_BE);
159
160 270
        change = shard_change_get(ctx, priv, shardd);
161 270
        if (change == NULL)
162 0
                return 0;
163
164 270
        b = WS_Alloc(ctx->ws, sizeof(*b));
165 270
        if (b == NULL) {
166 0
                shard_err(ctx, shardd, ".%s_backend() WS_Alloc() failed",
167
                    task_e == ADD_BE ? "add" : "remove");
168 0
                return 0;
169
        }
170
171 270
        b->backend = be;
172 270
        b->ident = ident != NULL && *ident != '\0' ? ident : NULL;
173 270
        b->rampup = rampup;
174
175 270
        shard_change_task_add(ctx, change, task_e, b);
176
177 270
        return 1;
178
}
179
180
/*
181
 * ============================================================
182
 * director reconfiguration tasks
183
 */
184
VCL_BOOL
185 246
shardcfg_add_backend(VRT_CTX, struct vmod_priv *priv,
186
    const struct sharddir *shardd, VCL_BACKEND be, VCL_STRING ident,
187
    VCL_DURATION rampup)
188
{
189 246
        AN(be);
190 246
        return shard_change_task_backend(ctx, priv, shardd, ADD_BE,
191
            be, ident, rampup);
192
}
193
194
VCL_BOOL
195 24
shardcfg_remove_backend(VRT_CTX, struct vmod_priv *priv,
196
    const struct sharddir *shardd, VCL_BACKEND be, VCL_STRING ident)
197
{
198 24
        return shard_change_task_backend(ctx, priv, shardd, REMOVE_BE,
199
            be, ident, 0);
200
}
201
202
VCL_BOOL
203 42
shardcfg_clear(VRT_CTX, struct vmod_priv *priv, const struct sharddir *shardd)
204
{
205
        struct shard_change *change;
206
207 42
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
208
209 42
        change = shard_change_get(ctx, priv, shardd);
210 42
        if (change == NULL)
211 6
                return 0;
212
213 36
        shard_change_task_add(ctx, change, CLEAR, NULL);
214
215 36
        return 1;
216
}
217
218
/*
219
 * ============================================================
220
 * consistent hashing cirle init
221
 */
222
223
typedef int (*compar)( const void*, const void* );
224
225
static int
226 15384
circlepoint_compare(const struct shard_circlepoint *a,
227
    const struct shard_circlepoint *b)
228
{
229 15384
        return (a->point == b->point) ? 0 : ((a->point > b->point) ? 1 : -1);
230
}
231
232
static void
233 84
shardcfg_hashcircle(struct sharddir *shardd, VCL_INT replicas)
234 84
{
235
        int i, j;
236
        const char *ident;
237 84
        const int len = 12; // log10(UINT32_MAX) + 2;
238 84
        char s[len];
239
240 84
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
241 84
        AZ(shardd->hashcircle);
242
243 84
        assert(shardd->n_backend > 0);
244 84
        AN(shardd->backend);
245
246 84
        shardd->hashcircle = calloc(shardd->n_backend * replicas,
247
                sizeof(struct shard_circlepoint));
248 84
        AN(shardd->hashcircle);
249
250 84
        shardd->replicas = replicas;
251
252 396
        for (i = 0; i < shardd->n_backend; i++) {
253 312
                CHECK_OBJ_NOTNULL(shardd->backend[i].backend, DIRECTOR_MAGIC);
254
255 624
                ident = shardd->backend[i].ident
256 162
                    ? shardd->backend[i].ident
257 474
                    : VRT_BACKEND_string(shardd->backend[i].backend);
258
259 312
                AN(ident);
260 312
                assert(ident[0] != '\0');
261
262 3000
                for (j = 0; j < replicas; j++) {
263 2688
                        assert(snprintf(s, len, "%d", j) < len);
264 5376
                        shardd->hashcircle[i * replicas + j].point =
265 2688
                                sharddir_sha256(ident, s, vrt_magic_string_end);
266 2688
                        shardd->hashcircle[i * replicas + j].host = i;
267
                }
268
                /* not used in current interface */
269 624
                shardd->backend[i].canon_point =
270 312
                    shardd->hashcircle[i * replicas].point;
271
        }
272 84
        qsort( (void *) shardd->hashcircle, shardd->n_backend * replicas,
273
            sizeof (struct shard_circlepoint), (compar) circlepoint_compare);
274
275 84
        if ((shardd->debug_flags & SHDBG_CIRCLE) == 0)
276 30
                return;
277
278 282
        for (i = 0; i < shardd->n_backend; i++)
279 888
                for (j = 0; j < replicas; j++)
280 660
                        SHDBG(SHDBG_CIRCLE, shardd,
281
                            "hashcircle[%5jd] = "
282
                            "{point = %8x, host = %2u}\n",
283
                            (intmax_t)(i * replicas + j),
284
                            shardd->hashcircle[i * replicas + j].point,
285
                            shardd->hashcircle[i * replicas + j].host);
286
}
287
288
/*
289
 * ============================================================
290
 * configure the director backends
291
 */
292
293
static void
294 54
shardcfg_backend_free(struct shard_backend *f)
295
{
296 54
        if (f->freeptr)
297 24
                free (f->freeptr);
298 54
        memset(f, 0, sizeof(*f));
299 54
}
300
301
static void
302 198
shardcfg_backend_copyin(struct shard_backend *dst,
303
    const struct shard_backend *src)
304
{
305 198
        dst->backend = src->backend;
306 198
        dst->ident = src->ident ? strdup(src->ident) : NULL;
307 198
        dst->rampup = src->rampup;
308 198
        dst->canon_point = 0xffffffff;
309 198
}
310
311
static int
312 525
shardcfg_backend_cmp(const struct shard_backend *a,
313
    const struct shard_backend *b)
314
{
315
        const char *ai, *bi;
316
317 525
        ai = a->ident;
318 525
        bi = b->ident;
319
320 525
        assert(ai || a->backend);
321 525
        assert(bi || b->backend);
322
323
        /* vcl_names are unique, so we can compare the backend pointers */
324 525
        if (ai == NULL && bi == NULL)
325 117
                return a->backend != b->backend;
326
327 408
        if (ai == NULL)
328 12
                ai = VRT_BACKEND_string(a->backend);
329
330 408
        if (bi == NULL)
331 42
                bi = VRT_BACKEND_string(b->backend);
332
333 408
        AN(ai);
334 408
        AN(bi);
335 408
        return strcmp(ai, bi);
336
}
337
338
/* for removal, we delete all instances if the backend matches */
339
static int
340 144
shardcfg_backend_del_cmp(const struct shard_backend *task,
341
    const struct shard_backend *b)
342
{
343 144
        assert(task->backend || task->ident);
344
345 144
        if (task->ident == NULL)
346 18
                return task->backend != b->backend;
347
348 126
        return shardcfg_backend_cmp(task, b);
349
}
350
351
static const struct shard_backend *
352 222
shardcfg_backend_lookup(const struct backend_reconfig *re,
353
    const struct shard_backend *b)
354
{
355 222
        unsigned i, max = re->shardd->n_backend + re->hole_n;
356 222
        const struct shard_backend *bb = re->shardd->backend;
357
358 597
        for (i = 0; i < max; i++)
359 399
                if (!shardcfg_backend_cmp(b, &bb[i]))
360 24
                        return &bb[i];
361
362 198
        return NULL;
363
}
364
365
static void
366 42
shardcfg_backend_expand(const struct backend_reconfig *re)
367
{
368 42
        unsigned min = re->hint;
369
370 42
        CHECK_OBJ_NOTNULL(re->shardd, SHARDDIR_MAGIC);
371
372 42
        if (min < 16)
373 42
                min = 16;
374
375 42
        if (re->shardd->l_backend < min)
376 42
                re->shardd->l_backend = min;
377
        else
378 0
                re->shardd->l_backend <<= 1;
379
380 42
        if (re->shardd->backend)
381 0
                re->shardd->backend = realloc(re->shardd->backend,
382 0
                    re->shardd->l_backend * sizeof *re->shardd->backend);
383
        else
384 84
                re->shardd->backend = malloc(
385 42
                    re->shardd->l_backend * sizeof *re->shardd->backend);
386
387 42
        AN(re->shardd->backend);
388 42
}
389
390
static void
391 198
shardcfg_backend_add(struct backend_reconfig *re,
392
    const struct shard_backend *b)
393
{
394
        unsigned i;
395 198
        struct shard_backend *bb = re->shardd->backend;
396
397 198
        if (re->hole_n == 0) {
398 198
                if (re->shardd->n_backend >= re->shardd->l_backend) {
399 42
                        shardcfg_backend_expand(re);
400 42
                        bb = re->shardd->backend;
401
                }
402 198
                assert(re->shardd->n_backend < re->shardd->l_backend);
403 198
                i = re->shardd->n_backend;
404
        } else {
405
                do {
406 0
                        if (!bb[re->hole_i].backend)
407 0
                                break;
408 0
                } while (++(re->hole_i) < re->shardd->n_backend + re->hole_n);
409 0
                assert(re->hole_i < re->shardd->n_backend + re->hole_n);
410
411 0
                i = (re->hole_i)++;
412 0
                (re->hole_n)--;
413
        }
414
415 198
        re->shardd->n_backend++;
416 198
        shardcfg_backend_copyin(&bb[i], b);
417 198
}
418
419
static void
420 30
shardcfg_backend_clear(struct sharddir *shardd)
421
{
422
        int i;
423 54
        for (i = 0; i < shardd->n_backend; i++)
424 24
                shardcfg_backend_free(&shardd->backend[i]);
425 30
        shardd->n_backend = 0;
426 30
}
427
428
429
static void
430 24
shardcfg_backend_del(struct backend_reconfig *re,
431
    const struct shard_backend *spec)
432
{
433 24
        unsigned i, max = re->shardd->n_backend + re->hole_n;
434 24
        struct shard_backend * const bb = re->shardd->backend;
435
436 168
        for (i = 0; i < max; i++) {
437 144
                if (shardcfg_backend_del_cmp(spec, &bb[i]))
438 114
                        continue;
439
440 30
                shardcfg_backend_free(&bb[i]);
441 30
                re->shardd->n_backend--;
442 30
                if (i < re->shardd->n_backend + re->hole_n) {
443 24
                        (re->hole_n)++;
444 24
                        if (i < re->hole_i)
445 18
                                re->hole_i = i;
446
                }
447
        }
448 24
}
449
450
static void
451 84
shardcfg_backend_finalize(struct backend_reconfig *re)
452
{
453
        unsigned i;
454 84
        struct shard_backend * const bb = re->shardd->backend;
455
456 186
        while (re->hole_n > 0) {
457
                // trim end
458 24
                i = re->shardd->n_backend + re->hole_n - 1;
459 54
                while (re->hole_n && bb[i].backend == NULL) {
460 6
                        (re->hole_n)--;
461 6
                        i--;
462
                }
463
464 24
                if (re->hole_n == 0)
465 6
                        break;
466
467 18
                assert(re->hole_i < i);
468
469
                do {
470 18
                        if (!bb[re->hole_i].backend)
471 18
                                break;
472 0
                } while (++(re->hole_i) <= i);
473
474 18
                assert(re->hole_i < i);
475 18
                assert(bb[re->hole_i].backend == NULL);
476 18
                assert(bb[i].backend != NULL);
477
478 18
                memcpy(&bb[re->hole_i], &bb[i], sizeof(*bb));
479 18
                memset(&bb[i], 0, sizeof(*bb));
480
481 18
                (re->hole_n)--;
482 18
                (re->hole_i)++;
483
        }
484
485 84
        assert(re->hole_n == 0);
486 84
}
487
488
/*
489
 * ============================================================
490
 * work the change tasks
491
 */
492
493
static void
494 96
shardcfg_apply_change(VRT_CTX, struct sharddir *shardd,
495
    const struct shard_change *change)
496
{
497
        struct shard_change_task *task, *clear;
498
        const struct shard_backend *b;
499
500 192
        struct backend_reconfig re = {
501
                .shardd = shardd,
502 96
                .hint = shardd->n_backend,
503
                .hole_n = 0,
504
                .hole_i = INT_MAX
505
        };
506
507
        // XXX assert sharddir_locked(shardd)
508
509 96
        clear = NULL;
510 402
        VSTAILQ_FOREACH(task, &change->tasks, list) {
511 306
                CHECK_OBJ_NOTNULL(task, SHARD_CHANGE_TASK_MAGIC);
512 306
                switch (task->task) {
513
                case CLEAR:
514 36
                        clear = task;
515 36
                        re.hint = 0;
516 36
                        break;
517
                case ADD_BE:
518 246
                        re.hint++;
519 246
                        break;
520
                case REMOVE_BE:
521 24
                        re.hint--;
522 24
                        break;
523
                default:
524 0
                        INCOMPL();
525
                }
526
        }
527
528 96
        if (clear) {
529 30
                shardcfg_backend_clear(shardd);
530 30
                clear = VSTAILQ_NEXT(clear, list);
531 30
                if (clear == NULL)
532 12
                        return;
533
        }
534
535 84
        task = clear;
536 330
        VSTAILQ_FOREACH_FROM(task, &change->tasks, list) {
537 246
                CHECK_OBJ_NOTNULL(task, SHARD_CHANGE_TASK_MAGIC);
538 246
                switch (task->task) {
539
                case CLEAR:
540 0
                        assert(task->task != CLEAR);
541 0
                        break;
542
                case ADD_BE:
543 222
                        b = shardcfg_backend_lookup(&re, task->priv);
544
545 222
                        if (b == NULL) {
546 198
                                shardcfg_backend_add(&re, task->priv);
547 198
                                break;
548
                        }
549
550 24
                        const char * const ident = b->ident;
551
552 24
                        shard_err(ctx, shardd, "(notice) backend %s%s%s "
553
                            "already exists - skipping",
554
                            VRT_BACKEND_string(b->backend),
555
                            ident ? "/" : "",
556
                            ident ? ident : "");
557 24
                        break;
558
                case REMOVE_BE:
559 24
                        shardcfg_backend_del(&re, task->priv);
560 24
                        break;
561
                default:
562 0
                        INCOMPL();
563
                }
564
        }
565 84
        shardcfg_backend_finalize(&re);
566
}
567
568
/*
569
 * ============================================================
570
 * top reconfiguration function
571
 */
572
573
VCL_BOOL
574 114
shardcfg_reconfigure(VRT_CTX, struct vmod_priv *priv,
575
    struct sharddir *shardd, VCL_INT replicas)
576
{
577
        struct shard_change *change;
578
579 114
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
580 114
        if (replicas <= 0) {
581 6
                shard_err(ctx, shardd,
582
                    ".reconfigure() invalid replicas argument %ld", replicas);
583 6
                return 0;
584
        }
585
586 108
        change = shard_change_get(ctx, priv, shardd);
587 108
        if (change == NULL)
588 0
                return 0;
589
590 108
        if (VSTAILQ_FIRST(&change->tasks) == NULL)
591 12
                return 1;
592
593 96
        sharddir_wrlock(shardd);
594
595 96
        shardcfg_apply_change(ctx, shardd, change);
596 96
        shard_change_finish(change);
597
598 96
        if (shardd->hashcircle)
599 42
                free(shardd->hashcircle);
600 96
        shardd->hashcircle = NULL;
601
602 96
        if (shardd->n_backend == 0) {
603 12
                shard_err0(ctx, shardd, ".reconfigure() no backends");
604 12
                sharddir_unlock(shardd);
605 12
                return 0;
606
        }
607
608 84
        shardcfg_hashcircle(shardd, replicas);
609 84
        sharddir_unlock(shardd);
610 84
        return (1);
611
}
612
613
/*
614
 * ============================================================
615
 * misc config related
616
 */
617
618
/* only for sharddir_delete() */
619
void
620 9
shardcfg_delete(const struct sharddir *shardd)
621
{
622
        int i;
623
624 9
        for (i = 0; i < shardd->n_backend; i++)
625 0
                shardcfg_backend_free(&shardd->backend[i]);
626 9
        if (shardd->backend)
627 0
                free(shardd->backend);
628 9
        if (shardd->hashcircle)
629 0
                free(shardd->hashcircle);
630 9
}
631
632
VCL_VOID
633 0
shardcfg_set_warmup(struct sharddir *shardd, VCL_REAL ratio)
634
{
635 0
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
636 0
        assert(ratio >= 0 && ratio < 1);
637 0
        sharddir_wrlock(shardd);
638 0
        shardd->warmup = ratio;
639 0
        sharddir_unlock(shardd);
640 0
}
641
642
VCL_VOID
643 3
shardcfg_set_rampup(struct sharddir *shardd, VCL_DURATION duration)
644
{
645 3
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
646 3
        assert(duration >= 0);
647 3
        sharddir_wrlock(shardd);
648 3
        shardd->rampup_duration = duration;
649 3
        sharddir_unlock(shardd);
650 3
}
651
652
VCL_DURATION
653 198
shardcfg_get_rampup(const struct sharddir *shardd, int host)
654
{
655
        VCL_DURATION r;
656
657 198
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
658
        // assert sharddir_rdlock_held(shardd);
659 198
        assert (host < shardd->n_backend);
660
661 198
        if (isnan(shardd->backend[host].rampup))
662 198
                r = shardd->rampup_duration;
663
        else
664 0
                r = shardd->backend[host].rampup;
665
666 198
        return (r);
667
}