varnish-cache/lib/libvmod_directors/shard_cfg.c
1
/*-
2
 * Copyright 2009-2016 UPLEX - Nils Goroll Systemoptimierung
3
 * All rights reserved.
4
 *
5
 * Authors: Nils Goroll <nils.goroll@uplex.de>
6
 *          Geoffrey Simmons <geoff@uplex.de>
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
21
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
 * SUCH DAMAGE.
28
 */
29
30
#include "config.h"
31
32
#include <limits.h>
33
#include <stdlib.h>
34
#include <stdio.h>
35
#include <string.h>
36
37
#include "cache/cache.h"
38
#include "cache/cache_director.h"
39
40
#include "shard_dir.h"
41
#include "shard_cfg.h"
42
#include "shard_hash.h"
43
44
/*lint -esym(749,  shard_change_task_e::*) */
45
enum shard_change_task_e {
46
        _INVALID = 0,
47
        CLEAR,
48
        ADD_BE,
49
        REMOVE_BE,
50
        _SHARD_TASK_E_MAX
51
};
52
53
struct shard_change_task {
54
        unsigned                                magic;
55
#define SHARD_CHANGE_TASK_MAGIC                 0x1e1168af
56
        enum shard_change_task_e                task;
57
        void                                    *priv;
58
        VSTAILQ_ENTRY(shard_change_task)        list;
59
};
60
61
struct shard_change {
62
        unsigned                                magic;
63
#define SHARD_CHANGE_MAGIC                      0xdff5c9a6
64
        const struct sharddir                   *shardd;
65
        void                                    *space;
66
        VSTAILQ_HEAD(,shard_change_task)        tasks;
67
};
68
69
struct backend_reconfig {
70
        struct sharddir * const shardd;
71
        int                     hint;   // on number of backends after reconfig
72
        int                     hole_n; // number of holes in backends array
73
        int                     hole_i; // index hint on first hole
74
};
75
76
/*
77
 * ============================================================
78
 * change / task list
79
 *
80
 * for backend reconfiguration, we create a change list on the VCL workspace in
81
 * a PRIV_TASK state, which we work in reconfigure.
82
 *
83
 * for now, we allow to only reconfigure one shard director at a time.
84
 */
85
86
static struct shard_change *
87 272
shard_change_get(VRT_CTX, struct vmod_priv *priv,
88
        const struct sharddir * const shardd)
89
{
90
        struct shard_change *change;
91
92 272
        if (priv->priv) {
93 246
                CAST_OBJ_NOTNULL(change, priv->priv, SHARD_CHANGE_MAGIC);
94 246
                if (change->shardd == NULL) {
95 36
                        change->shardd = shardd;
96 36
                        VSTAILQ_INIT(&change->tasks);
97 210
                } else if (change->shardd != shardd) {
98 4
                        shard_err0(ctx, shardd,
99
                            "cannot change more than one shard director "
100
                            "at a time");
101 4
                        return NULL;
102
                }
103 242
                return (change);
104
        }
105
106 26
        change = WS_Alloc(ctx->ws, sizeof(*change));
107 26
        if (change == NULL) {
108 0
                shard_err0(ctx, shardd, "could not get workspace");
109 0
                return NULL;
110
        }
111
112 26
        INIT_OBJ(change, SHARD_CHANGE_MAGIC);
113 26
        change->space = NULL;
114 26
        change->shardd = shardd;
115 26
        VSTAILQ_INIT(&change->tasks);
116 26
        priv->priv = change;
117
118 26
        return (change);
119
}
120
121
static void
122 62
shard_change_finish(struct shard_change *change)
123
{
124 62
        CHECK_OBJ_NOTNULL(change, SHARD_CHANGE_MAGIC);
125
126 62
        change->shardd = NULL;
127 62
        VSTAILQ_INIT(&change->tasks);
128 62
}
129
130
static void
131 198
shard_change_task_add(VRT_CTX, struct shard_change *change,
132
    enum shard_change_task_e task_e, void *priv)
133
{
134
        struct shard_change_task *task;
135
136 198
        CHECK_OBJ_NOTNULL(change, SHARD_CHANGE_MAGIC);
137
138 198
        task = WS_Alloc(ctx->ws, sizeof(*task));
139 198
        if (task == NULL) {
140 0
                shard_err0(ctx, change->shardd,
141
                    "could not get workspace for task");
142 198
                return;
143
        }
144 198
        INIT_OBJ(task, SHARD_CHANGE_TASK_MAGIC);
145 198
        task->task = task_e;
146 198
        task->priv = priv;
147 198
        VSTAILQ_INSERT_TAIL(&change->tasks, task, list);
148
}
149
150
static inline VCL_BOOL
151 174
shard_change_task_backend(VRT_CTX,
152
    struct vmod_priv *priv, const struct sharddir *shardd,
153
    enum shard_change_task_e task_e, VCL_BACKEND be, VCL_STRING ident,
154
    VCL_DURATION rampup)
155
{
156
        struct shard_change *change;
157
        struct shard_backend *b;
158
159 174
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
160 174
        assert(task_e == ADD_BE || task_e == REMOVE_BE);
161
162 174
        change = shard_change_get(ctx, priv, shardd);
163 174
        if (change == NULL)
164 0
                return 0;
165
166 174
        b = WS_Alloc(ctx->ws, sizeof(*b));
167 174
        if (b == NULL) {
168 0
                shard_err(ctx, shardd, ".%s_backend() WS_Alloc() failed",
169
                    task_e == ADD_BE ? "add" : "remove");
170 0
                return 0;
171
        }
172
173 174
        b->backend = be;
174 174
        b->ident = ident != NULL && *ident != '\0' ? ident : NULL;
175 174
        b->rampup = rampup;
176
177 174
        shard_change_task_add(ctx, change, task_e, b);
178
179 174
        return 1;
180
}
181
182
/*
183
 * ============================================================
184
 * director reconfiguration tasks
185
 */
186
VCL_BOOL
187 158
shardcfg_add_backend(VRT_CTX, struct vmod_priv *priv,
188
    const struct sharddir *shardd, VCL_BACKEND be, VCL_STRING ident,
189
    VCL_DURATION rampup)
190
{
191 158
        AN(be);
192 158
        return shard_change_task_backend(ctx, priv, shardd, ADD_BE,
193
            be, ident, rampup);
194
}
195
196
VCL_BOOL
197 16
shardcfg_remove_backend(VRT_CTX, struct vmod_priv *priv,
198
    const struct sharddir *shardd, VCL_BACKEND be, VCL_STRING ident)
199
{
200 16
        return shard_change_task_backend(ctx, priv, shardd, REMOVE_BE,
201
            be, ident, 0);
202
}
203
204
VCL_BOOL
205 28
shardcfg_clear(VRT_CTX, struct vmod_priv *priv, const struct sharddir *shardd)
206
{
207
        struct shard_change *change;
208
209 28
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
210
211 28
        change = shard_change_get(ctx, priv, shardd);
212 28
        if (change == NULL)
213 4
                return 0;
214
215 24
        shard_change_task_add(ctx, change, CLEAR, NULL);
216
217 24
        return 1;
218
}
219
220
/*
221
 * ============================================================
222
 * consistent hashing cirle init
223
 */
224
225
typedef int (*compar)( const void*, const void* );
226
227
static int
228 9352
circlepoint_compare(const struct shard_circlepoint *a,
229
    const struct shard_circlepoint *b)
230
{
231 9352
        return (a->point == b->point) ? 0 : ((a->point > b->point) ? 1 : -1);
232
}
233
234
static void
235 54
shardcfg_hashcircle(struct sharddir *shardd, VCL_INT replicas, enum alg_e alg)
236
{
237
        int i, j;
238
        const char *ident;
239
        int len;
240
241 54
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
242 54
        AZ(shardd->hashcircle);
243
244 54
        assert(shardd->n_backend > 0);
245 54
        AN(shardd->backend);
246
247 54
        shardd->hashcircle = calloc(shardd->n_backend * replicas,
248
                sizeof(struct shard_circlepoint));
249 54
        AN(shardd->hashcircle);
250
251 54
        shardd->replicas = replicas;
252
253 256
        for (i = 0; i < shardd->n_backend; i++) {
254 202
                CHECK_OBJ_NOTNULL(shardd->backend[i].backend, DIRECTOR_MAGIC);
255
256 404
                ident = shardd->backend[i].ident
257 108
                    ? shardd->backend[i].ident
258 310
                    : shardd->backend[i].backend->vcl_name;
259
260 202
                assert(ident[0] != '\0');
261
262 202
                len = strlen(ident) + 12; // log10(UINT32_MAX) + 2;
263
264 202
                char s[len];
265
266 1844
                for (j = 0; j < replicas; j++) {
267 1642
                        assert(snprintf(s, len, "%s%d", ident, j) < len);
268 3284
                        shardd->hashcircle[i * replicas + j].point =
269 1642
                            shard_hash_f[alg](s);
270 1642
                        shardd->hashcircle[i * replicas + j].host = i;
271
                }
272
                /* not used in current interface */
273 404
                shardd->backend[i].canon_point =
274 202
                    shardd->hashcircle[i * replicas].point;
275
        }
276 54
        qsort( (void *) shardd->hashcircle, shardd->n_backend * replicas,
277
            sizeof (struct shard_circlepoint), (compar) circlepoint_compare);
278
279 54
        if ((shardd->debug_flags & SHDBG_CIRCLE) == 0)
280 72
                return;
281
282 188
        for (i = 0; i < shardd->n_backend; i++)
283 592
                for (j = 0; j < replicas; j++)
284 440
                        SHDBG(SHDBG_CIRCLE, shardd,
285
                            "hashcircle[%5ld] = "
286
                            "{point = %8x, host = %2u}\n",
287
                            i * replicas + j,
288
                            shardd->hashcircle[i * replicas + j].point,
289
                            shardd->hashcircle[i * replicas + j].host);
290
}
291
292
/*
293
 * ============================================================
294
 * configure the director backends
295
 */
296
297
static void
298 36
shardcfg_backend_free(struct shard_backend *f)
299
{
300 36
        if (f->ident)
301 16
                free (TRUST_ME(f->ident));
302 36
        memset(f, 0, sizeof(*f));
303 36
}
304
305
static void
306 126
shardcfg_backend_copyin(struct shard_backend *dst,
307
    const struct shard_backend *src)
308
{
309 126
        dst->backend = src->backend;
310 126
        dst->ident = src->ident ? strdup(src->ident) : NULL;
311 126
        dst->rampup = src->rampup;
312 126
        dst->canon_point = 0xffffffff;
313 126
}
314
315
static int
316 344
shardcfg_backend_cmp(const struct shard_backend *a,
317
    const struct shard_backend *b)
318
{
319
        const char *ai, *bi;
320
321 344
        ai = a->ident;
322 344
        bi = b->ident;
323
324 344
        assert(ai || a->backend);
325 344
        assert(bi || b->backend);
326
327
        /* vcl_names are unique, so we can compare the backend pointers */
328 344
        if (ai == NULL && bi == NULL)
329 72
                return a->backend != b->backend;
330
331 272
        if (ai == NULL)
332 8
                ai = a->backend->vcl_name;
333
334 272
        if (bi == NULL)
335 28
                bi = b->backend->vcl_name;
336
337 272
        return strcmp(ai, bi);
338
}
339
340
/* for removal, we delete all instances if the backend matches */
341
static int
342 96
shardcfg_backend_del_cmp(const struct shard_backend *task,
343
    const struct shard_backend *b)
344
{
345 96
        assert(task->backend || task->ident);
346
347 96
        if (task->ident == NULL)
348 12
                return task->backend != b->backend;
349
350 84
        return shardcfg_backend_cmp(task, b);
351
}
352
353
static const struct shard_backend *
354 142
shardcfg_backend_lookup(const struct backend_reconfig *re,
355
    const struct shard_backend *b)
356
{
357 142
        int i, max = re->shardd->n_backend + re->hole_n;
358 142
        const struct shard_backend *bb = re->shardd->backend;
359
360 386
        for (i = 0; i < max; i++)
361 260
                if (!shardcfg_backend_cmp(b, &bb[i]))
362 16
                        return &bb[i];
363
364 126
        return NULL;
365
}
366
367
static void
368 26
shardcfg_backend_expand(const struct backend_reconfig *re)
369
{
370 26
        int min = re->hint;
371
372 26
        CHECK_OBJ_NOTNULL(re->shardd, SHARDDIR_MAGIC);
373
374 26
        if (min < 16)
375 26
                min = 16;
376
377 26
        if (re->shardd->l_backend < min)
378 26
                re->shardd->l_backend = min;
379
        else
380 0
                re->shardd->l_backend <<= 1;
381
382 26
        if (re->shardd->backend)
383 0
                re->shardd->backend = realloc(re->shardd->backend,
384 0
                    re->shardd->l_backend * sizeof *re->shardd->backend);
385
        else
386 52
                re->shardd->backend = malloc(
387 26
                    re->shardd->l_backend * sizeof *re->shardd->backend);
388
389 26
        AN(re->shardd->backend);
390 26
}
391
392
static void
393 126
shardcfg_backend_add(struct backend_reconfig *re,
394
    const struct shard_backend *b)
395
{
396
        int i;
397 126
        struct shard_backend *bb = re->shardd->backend;
398
399 126
        if (re->hole_n == 0) {
400 126
                if (re->shardd->n_backend >= re->shardd->l_backend) {
401 26
                        shardcfg_backend_expand(re);
402 26
                        bb = re->shardd->backend;
403
                }
404 126
                assert(re->shardd->n_backend < re->shardd->l_backend);
405 126
                i = re->shardd->n_backend;
406
        } else {
407
                do {
408 0
                        if (!bb[re->hole_i].backend)
409 0
                                break;
410 0
                } while (++(re->hole_i) < re->shardd->n_backend + re->hole_n);
411 0
                assert(re->hole_i < re->shardd->n_backend + re->hole_n);
412
413 0
                i = (re->hole_i)++;
414 0
                (re->hole_n)--;
415
        }
416
417 126
        re->shardd->n_backend++;
418 126
        shardcfg_backend_copyin(&bb[i], b);
419 126
}
420
421
static void
422 20
shardcfg_backend_clear(struct sharddir *shardd)
423
{
424
        int i;
425 36
        for (i = 0; i < shardd->n_backend; i++)
426 16
                shardcfg_backend_free(&shardd->backend[i]);
427 20
        shardd->n_backend = 0;
428 20
}
429
430
431
static void
432 16
shardcfg_backend_del(struct backend_reconfig *re,
433
    const struct shard_backend *spec)
434
{
435 16
        int i, max = re->shardd->n_backend + re->hole_n;
436 16
        struct shard_backend * const bb = re->shardd->backend;
437
438 112
        for (i = 0; i < max; i++) {
439 96
                if (shardcfg_backend_del_cmp(spec, &bb[i]))
440 76
                        continue;
441
442 20
                shardcfg_backend_free(&bb[i]);
443 20
                re->shardd->n_backend--;
444 20
                if (i < re->shardd->n_backend + re->hole_n) {
445 16
                        (re->hole_n)++;
446 16
                        if (i < re->hole_i)
447 12
                                re->hole_i = i;
448
                }
449
        }
450 16
}
451
452
static void
453 54
shardcfg_backend_finalize(struct backend_reconfig *re)
454
{
455
        int i;
456 54
        struct shard_backend * const bb = re->shardd->backend;
457
458 120
        while (re->hole_n > 0) {
459
                // trim end
460 16
                i = re->shardd->n_backend + re->hole_n - 1;
461 36
                while (re->hole_n && bb[i].backend == NULL) {
462 4
                        (re->hole_n)--;
463 4
                        i--;
464
                }
465
466 16
                if (re->hole_n == 0)
467 4
                        break;
468
469 12
                assert(re->hole_i < i);
470
471
                do {
472 12
                        if (!bb[re->hole_i].backend)
473 12
                                break;
474 0
                } while (++(re->hole_i) <= i);
475
476 12
                assert(re->hole_i < i);
477 12
                assert(bb[re->hole_i].backend == NULL);
478 12
                assert(bb[i].backend != NULL);
479
480 12
                memcpy(&bb[re->hole_i], &bb[i], sizeof(*bb));
481 12
                memset(&bb[i], 0, sizeof(*bb));
482
483 12
                (re->hole_n)--;
484 12
                (re->hole_i)++;
485
        }
486
487 54
        assert(re->hole_n == 0);
488 54
}
489
490
/*
491
 * ============================================================
492
 * work the change tasks
493
 */
494
495
static void
496 62
shardcfg_apply_change(VRT_CTX, struct sharddir *shardd,
497
    const struct shard_change *change)
498
{
499
        struct shard_change_task *task, *clear;
500
        const struct shard_backend *b;
501
502 124
        struct backend_reconfig re = {
503
                .shardd = shardd,
504 62
                .hint = shardd->n_backend,
505
                .hole_n = 0,
506
                .hole_i = INT_MAX
507
        };
508
509
        // XXX assert sharddir_locked(shardd)
510
511 62
        clear = NULL;
512 260
        VSTAILQ_FOREACH(task, &change->tasks, list) {
513 198
                CHECK_OBJ_NOTNULL(task, SHARD_CHANGE_TASK_MAGIC);
514 198
                switch (task->task) {
515
                case CLEAR:
516 24
                        clear = task;
517 24
                        re.hint = 0;
518 24
                        break;
519
                case ADD_BE:
520 158
                        re.hint++;
521 158
                        break;
522
                case REMOVE_BE:
523 16
                        re.hint--;
524 16
                        break;
525
                default:
526 0
                        INCOMPL();
527
                }
528
        }
529
530 62
        if (clear) {
531 20
                shardcfg_backend_clear(shardd);
532 20
                clear = VSTAILQ_NEXT(clear, list);
533 20
                if (clear == NULL)
534 70
                        return;
535
        }
536
537 54
        task = clear;
538 212
        VSTAILQ_FOREACH_FROM(task, &change->tasks, list) {
539 158
                CHECK_OBJ_NOTNULL(task, SHARD_CHANGE_TASK_MAGIC);
540 158
                switch (task->task) {
541
                case CLEAR:
542 0
                        assert(task->task != CLEAR);
543 0
                        break;
544
                case ADD_BE:
545 142
                        b = shardcfg_backend_lookup(&re, task->priv);
546
547 142
                        if (b == NULL) {
548 126
                                shardcfg_backend_add(&re, task->priv);
549 126
                                break;
550
                        }
551
552 16
                        const char * const ident = b->ident;
553
554 16
                        shard_err(ctx, shardd, "(notice) backend %s%s%s "
555
                            "already exists - skipping",
556
                            b->backend->vcl_name,
557
                            ident ? "/" : "",
558
                            ident ? ident : "");
559 16
                        break;
560
                case REMOVE_BE:
561 16
                        shardcfg_backend_del(&re, task->priv);
562 16
                        break;
563
                default:
564 0
                        INCOMPL();
565
                }
566
        }
567 54
        shardcfg_backend_finalize(&re);
568
}
569
570
/*
571
 * ============================================================
572
 * top reconfiguration function
573
 */
574
575
VCL_BOOL
576 74
shardcfg_reconfigure(VRT_CTX, struct vmod_priv *priv,
577
    struct sharddir *shardd, VCL_INT replicas, enum alg_e alg)
578
{
579
        struct shard_change *change;
580
581 74
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
582 74
        if (replicas <= 0) {
583 4
                shard_err(ctx, shardd,
584
                    ".reconfigure() invalid replicas argument %ld", replicas);
585 4
                return 0;
586
        }
587
588 70
        change = shard_change_get(ctx, priv, shardd);
589 70
        if (change == NULL)
590 0
                return 0;
591
592 70
        if (VSTAILQ_FIRST(&change->tasks) == NULL)
593 8
                return 1;
594
595 62
        sharddir_wrlock(shardd);
596
597 62
        shardcfg_apply_change(ctx, shardd, change);
598 62
        shard_change_finish(change);
599
600 62
        if (shardd->hashcircle)
601 28
                free(shardd->hashcircle);
602 62
        shardd->hashcircle = NULL;
603
604 62
        if (shardd->n_backend == 0) {
605 8
                shard_err0(ctx, shardd, ".reconfigure() no backends");
606 8
                sharddir_unlock(shardd);
607 8
                return 0;
608
        }
609
610 54
        shardcfg_hashcircle(shardd, replicas, alg);
611 54
        sharddir_unlock(shardd);
612 54
        return (1);
613
}
614
615
/*
616
 * ============================================================
617
 * misc config related
618
 */
619
620
/* only for sharddir_delete() */
621
void
622 0
shardcfg_delete(const struct sharddir *shardd)
623
{
624
        int i;
625
626 0
        for (i = 0; i < shardd->n_backend; i++)
627 0
                shardcfg_backend_free(&shardd->backend[i]);
628 0
        if (shardd->backend)
629 0
                free(shardd->backend);
630 0
        if (shardd->hashcircle)
631 0
                free(shardd->hashcircle);
632 0
}
633
634
VCL_VOID
635 0
shardcfg_set_warmup(struct sharddir *shardd, VCL_REAL ratio)
636
{
637 0
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
638 0
        assert(ratio >= 0 && ratio < 1);
639 0
        sharddir_wrlock(shardd);
640 0
        shardd->warmup = ratio;
641 0
        sharddir_unlock(shardd);
642 0
}
643
644
VCL_VOID
645 2
shardcfg_set_rampup(struct sharddir *shardd, VCL_DURATION duration)
646
{
647 2
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
648 2
        assert(duration >= 0);
649 2
        sharddir_wrlock(shardd);
650 2
        shardd->rampup_duration = duration;
651 2
        sharddir_unlock(shardd);
652 2
}
653
654
VCL_DURATION
655 128
shardcfg_get_rampup(const struct sharddir *shardd, int host)
656
{
657
        VCL_DURATION r;
658
659 128
        CHECK_OBJ_NOTNULL(shardd, SHARDDIR_MAGIC);
660
        // assert sharddir_rdlock_held(shardd);
661 128
        assert (host < shardd->n_backend);
662
663
        // magic value for default
664 128
        if (shardd->backend[host].rampup == 973279260)
665 128
                r = shardd->rampup_duration;
666
        else
667 0
                r = shardd->backend[host].rampup;
668
669 128
        return (r);
670
}