varnish-cache/bin/varnishd/storage/storage_persistent_silo.c
1
/*-
2
 * Copyright (c) 2008-2011 Varnish Software AS
3
 * All rights reserved.
4
 *
5
 * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
6
 *
7
 * Redistribution and use in source and binary forms, with or without
8
 * modification, are permitted provided that the following conditions
9
 * are met:
10
 * 1. Redistributions of source code must retain the above copyright
11
 *    notice, this list of conditions and the following disclaimer.
12
 * 2. Redistributions in binary form must reproduce the above copyright
13
 *    notice, this list of conditions and the following disclaimer in the
14
 *    documentation and/or other materials provided with the distribution.
15
 *
16
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
20
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26
 * SUCH DAMAGE.
27
 *
28
 * Persistent storage method
29
 *
30
 * XXX: Before we start the client or maybe after it stops, we should give the
31
 * XXX: stevedores a chance to examine their storage for consistency.
32
 *
33
 */
34
35
#include "config.h"
36
37
38
#include <stdio.h>
39
#include <stdlib.h>
40
41
#include "cache/cache_varnishd.h"
42
43
#include "vsha256.h"
44
#include "vend.h"
45
#include "vtim.h"
46
47
#include "cache/cache_objhead.h"
48
49
#include "storage/storage.h"
50
#include "storage/storage_simple.h"
51
#include "storage/storage_persistent.h"
52
53
/*
54
 * We use the top bit to mark objects still needing fixup
55
 * In theory this may need to be platform dependent
56
 */
57
58
#define NEED_FIXUP      (1U << 31)
59
60
/*--------------------------------------------------------------------
61
 * Write the segmentlist back to the silo.
62
 *
63
 * We write the first copy, sync it synchronously, then write the
64
 * second copy and sync it synchronously.
65
 *
66
 * Provided the kernel doesn't lie, that means we will always have
67
 * at least one valid copy on in the silo.
68
 */
69
70
static void
71 116
smp_save_seg(const struct smp_sc *sc, struct smp_signspace *spc)
72
{
73
        struct smp_segptr *ss;
74
        struct smp_seg *sg;
75
        uint64_t length;
76
77 116
        Lck_AssertHeld(&sc->mtx);
78 116
        smp_reset_signspace(spc);
79 115
        ss = SIGNSPACE_DATA(spc);
80 115
        length = 0;
81 217
        VTAILQ_FOREACH(sg, &sc->segments, list) {
82 102
                assert(sg->p.offset < sc->mediasize);
83 102
                assert(sg->p.offset + sg->p.length <= sc->mediasize);
84 102
                *ss = sg->p;
85 102
                ss++;
86 102
                length += sizeof *ss;
87
        }
88 115
        smp_append_signspace(spc, length);
89 116
        smp_sync_sign(&spc->ctx);
90 116
}
91
92
void
93 58
smp_save_segs(struct smp_sc *sc)
94
{
95
        struct smp_seg *sg, *sg2;
96
97 58
        Lck_AssertHeld(&sc->mtx);
98
99
        /*
100
         * Remove empty segments from the front of the list
101
         * before we write the segments to disk.
102
         */
103 62
        VTAILQ_FOREACH_SAFE(sg, &sc->segments, list, sg2) {
104 53
                if (sg->nobj > 0)
105 48
                        break;
106 5
                if (sg == sc->cur_seg)
107 1
                        continue;
108 4
                VTAILQ_REMOVE(&sc->segments, sg, list);
109 4
                AN(VTAILQ_EMPTY(&sg->objcores));
110 4
                FREE_OBJ(sg);
111
        }
112 57
        smp_save_seg(sc, &sc->seg1);
113 58
        smp_save_seg(sc, &sc->seg2);
114 58
}
115
116
/*--------------------------------------------------------------------
117
 * Load segments
118
 *
119
 * The overall objective is to register the existence of an object, based
120
 * only on the minimally sized struct smp_object, without causing the
121
 * main object to be faulted in.
122
 *
123
 * XXX: We can test this by mprotecting the main body of the segment
124
 * XXX: until the first fixup happens, or even just over this loop,
125
 * XXX: However: the requires that the smp_objects starter further
126
 * XXX: into the segment than a page so that they do not get hit
127
 * XXX: by the protection.
128
 */
129
130
void
131 15
smp_load_seg(struct worker *wrk, const struct smp_sc *sc,
132
    struct smp_seg *sg)
133
{
134
        struct smp_object *so;
135
        struct objcore *oc;
136
        struct ban *ban;
137
        uint32_t no;
138 15
        double t_now = VTIM_real();
139
        struct smp_signctx ctx[1];
140
141 15
        ASSERT_SILO_THREAD(sc);
142 15
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
143 15
        CHECK_OBJ_NOTNULL(sg, SMP_SEG_MAGIC);
144 15
        assert(sg->flags & SMP_SEG_MUSTLOAD);
145 15
        sg->flags &= ~SMP_SEG_MUSTLOAD;
146 15
        AN(sg->p.offset);
147 15
        if (sg->p.objlist == 0)
148 0
                return;
149 15
        smp_def_sign(sc, ctx, sg->p.offset, "SEGHEAD");
150 15
        if (smp_chk_sign(ctx))
151 0
                return;
152
153
        /* test SEGTAIL */
154
        /* test OBJIDX */
155 15
        so = (void*)(sc->base + sg->p.objlist);
156 15
        sg->objs = so;
157 15
        no = sg->p.lobjlist;
158
        /* Clear the bogus "hold" count */
159 15
        sg->nobj = 0;
160 32
        for (;no > 0; so++,no--) {
161 17
                if (EXP_WHEN(so) < t_now)
162 1
                        continue;
163 16
                ban = BAN_FindBan(so->ban);
164 16
                AN(ban);
165 16
                oc = ObjNew(wrk);
166 16
                oc->stobj->stevedore = sc->parent;
167 16
                smp_init_oc(oc, sg, no);
168 16
                VTAILQ_INSERT_TAIL(&sg->objcores, oc, lru_list);
169 16
                oc->stobj->priv2 |= NEED_FIXUP;
170 16
                EXP_COPY(oc, so);
171 16
                sg->nobj++;
172 16
                oc->refcnt++;
173 16
                HSH_Insert(wrk, so->hash, oc, ban);
174 16
                AN(oc->ban);
175 16
                HSH_DerefBoc(wrk, oc);  // XXX Keep it an stream resurrection?
176 16
                (void)HSH_DerefObjCore(wrk, &oc, HSH_RUSH_POLICY);
177 16
                wrk->stats->n_vampireobject++;
178
        }
179 15
        Pool_Sumstat(wrk);
180 15
        sg->flags |= SMP_SEG_LOADED;
181
}
182
183
/*--------------------------------------------------------------------
184
 * Create a new segment
185
 */
186
187
void
188 41
smp_new_seg(struct smp_sc *sc)
189
{
190
        struct smp_seg tmpsg;
191
        struct smp_seg *sg;
192
193 41
        AZ(sc->cur_seg);
194 41
        Lck_AssertHeld(&sc->mtx);
195
196
        /* XXX: find where it goes in silo */
197
198 41
        INIT_OBJ(&tmpsg, SMP_SEG_MAGIC);
199 41
        tmpsg.sc = sc;
200 41
        tmpsg.p.offset = sc->free_offset;
201
        /* XXX: align */
202 41
        assert(tmpsg.p.offset >= sc->ident->stuff[SMP_SPC_STUFF]);
203 41
        assert(tmpsg.p.offset < sc->mediasize);
204
205 41
        tmpsg.p.length = sc->aim_segl;
206 41
        tmpsg.p.length = RDN2(tmpsg.p.length, 8);
207
208 41
        if (smp_segend(&tmpsg) > sc->mediasize)
209
                /* XXX: Consider truncation in this case */
210 0
                tmpsg.p.offset = sc->ident->stuff[SMP_SPC_STUFF];
211
212 41
        assert(smp_segend(&tmpsg) <= sc->mediasize);
213
214 41
        sg = VTAILQ_FIRST(&sc->segments);
215 41
        if (sg != NULL && tmpsg.p.offset <= sg->p.offset) {
216 0
                if (smp_segend(&tmpsg) > sg->p.offset)
217
                        /* No more space, return (cur_seg will be NULL) */
218
                        /* XXX: Consider truncation instead of failing */
219 0
                        return;
220 0
                assert(smp_segend(&tmpsg) <= sg->p.offset);
221
        }
222
223 41
        if (tmpsg.p.offset == sc->ident->stuff[SMP_SPC_STUFF])
224 23
                printf("Wrapped silo\n");
225
226 41
        ALLOC_OBJ(sg, SMP_SEG_MAGIC);
227 41
        if (sg == NULL)
228 0
                return;
229 41
        *sg = tmpsg;
230 41
        VTAILQ_INIT(&sg->objcores);
231
232 41
        sg->p.offset = IRNUP(sc, sg->p.offset);
233 41
        sg->p.length -= sg->p.offset - tmpsg.p.offset;
234 41
        sg->p.length = IRNDN(sc, sg->p.length);
235 41
        assert(sg->p.offset + sg->p.length <= tmpsg.p.offset + tmpsg.p.length);
236 41
        sc->free_offset = sg->p.offset + sg->p.length;
237
238 41
        VTAILQ_INSERT_TAIL(&sc->segments, sg, list);
239
240
        /* Neuter the new segment in case there is an old one there */
241 41
        AN(sg->p.offset);
242 41
        smp_def_sign(sc, sg->ctx, sg->p.offset, "SEGHEAD");
243 41
        smp_reset_sign(sg->ctx);
244 41
        smp_sync_sign(sg->ctx);
245
246
        /* Set up our allocation points */
247 41
        sc->cur_seg = sg;
248 41
        sc->next_bot = sg->p.offset + IRNUP(sc, SMP_SIGN_SPACE);
249 41
        sc->next_top = smp_segend(sg);
250 41
        sc->next_top -= IRNUP(sc, SMP_SIGN_SPACE);
251 41
        IASSERTALIGN(sc, sc->next_bot);
252 41
        IASSERTALIGN(sc, sc->next_top);
253 41
        sg->objs = (void*)(sc->base + sc->next_top);
254
}
255
256
/*--------------------------------------------------------------------
257
 * Close a segment
258
 */
259
260
void
261 40
smp_close_seg(struct smp_sc *sc, struct smp_seg *sg)
262
{
263
        uint64_t left, dst, len;
264
        void *dp;
265
266 40
        Lck_AssertHeld(&sc->mtx);
267
268 40
        CHECK_OBJ_NOTNULL(sg, SMP_SEG_MAGIC);
269 40
        assert(sg == sc->cur_seg);
270 40
        AN(sg->p.offset);
271 40
        sc->cur_seg = NULL;
272
273 40
        if (sg->nalloc == 0) {
274
                /* If segment is empty, delete instead */
275 21
                VTAILQ_REMOVE(&sc->segments, sg, list);
276 21
                assert(sg->p.offset >= sc->ident->stuff[SMP_SPC_STUFF]);
277 21
                assert(sg->p.offset < sc->mediasize);
278 21
                sc->free_offset = sg->p.offset;
279 21
                AN(VTAILQ_EMPTY(&sg->objcores));
280 21
                FREE_OBJ(sg);
281 61
                return;
282
        }
283
284
        /*
285
         * If there is enough space left, that we can move the smp_objects
286
         * down without overwriting the present copy, we will do so to
287
         * compact the segment.
288
         */
289 19
        left = smp_spaceleft(sc, sg);
290 19
        len = sizeof(struct smp_object) * sg->p.lobjlist;
291 19
        if (len < left) {
292 19
                dst = sc->next_bot + IRNUP(sc, SMP_SIGN_SPACE);
293 19
                dp = sc->base + dst;
294 19
                assert((uintptr_t)dp + len < (uintptr_t)sg->objs);
295 19
                memcpy(dp, sg->objs, len);
296 19
                sc->next_top = dst;
297 19
                sg->objs = dp;
298 38
                sg->p.length = (sc->next_top - sg->p.offset)
299 19
                     + len + IRNUP(sc, SMP_SIGN_SPACE);
300 19
                (void)smp_spaceleft(sc, sg);    /* for the asserts */
301
302
        }
303
304
        /* Update the segment header */
305 19
        sg->p.objlist = sc->next_top;
306
307
        /* Write the (empty) OBJIDX signature */
308 19
        sc->next_top -= IRNUP(sc, SMP_SIGN_SPACE);
309 19
        assert(sc->next_top >= sc->next_bot);
310 19
        smp_def_sign(sc, sg->ctx, sc->next_top, "OBJIDX");
311 19
        smp_reset_sign(sg->ctx);
312 19
        smp_sync_sign(sg->ctx);
313
314
        /* Write the (empty) SEGTAIL signature */
315 19
        smp_def_sign(sc, sg->ctx,
316 19
            sg->p.offset + sg->p.length - IRNUP(sc, SMP_SIGN_SPACE), "SEGTAIL");
317 19
        smp_reset_sign(sg->ctx);
318 19
        smp_sync_sign(sg->ctx);
319
320
        /* Save segment list */
321 19
        smp_save_segs(sc);
322 19
        sc->free_offset = smp_segend(sg);
323
}
324
325
326
/*---------------------------------------------------------------------
327
 */
328
329
static struct smp_object *
330 363
smp_find_so(const struct smp_seg *sg, unsigned priv2)
331
{
332
        struct smp_object *so;
333
334 363
        priv2 &= ~NEED_FIXUP;
335 363
        assert(priv2 > 0);
336 363
        assert(priv2 <= sg->p.lobjlist);
337 363
        so = &sg->objs[sg->p.lobjlist - priv2];
338 363
        return (so);
339
}
340
341
/*---------------------------------------------------------------------
342
 * Check if a given storage structure is valid to use
343
 */
344
345
static int
346 3
smp_loaded_st(const struct smp_sc *sc, const struct smp_seg *sg,
347
    const struct storage *st)
348
{
349
        struct smp_seg *sg2;
350
        const uint8_t *pst;
351
        uint64_t o;
352
353
        (void)sg;               /* XXX: faster: Start search from here */
354 3
        pst = (const void *)st;
355
356 3
        if (pst < (sc->base + sc->ident->stuff[SMP_SPC_STUFF]))
357 0
                return (0x01);          /* Before silo payload start */
358 3
        if (pst > (sc->base + sc->ident->stuff[SMP_END_STUFF]))
359 0
                return (0x02);          /* After silo end */
360
361 3
        o = pst - sc->base;
362
363
        /* Find which segment contains the storage structure */
364 5
        VTAILQ_FOREACH(sg2, &sc->segments, list)
365 4
                if (o > sg2->p.offset && (o + sizeof(*st)) < sg2->p.objlist)
366 2
                        break;
367 3
        if (sg2 == NULL)
368 1
                return (0x04);          /* No claiming segment */
369 2
        if (!(sg2->flags & SMP_SEG_LOADED))
370 0
                return (0x08);          /* Claiming segment not loaded */
371
372
        /* It is now safe to access the storage structure */
373 2
        if (st->magic != STORAGE_MAGIC)
374 0
                return (0x10);          /* Not enough magic */
375
376 2
        if (o + st->space >= sg2->p.objlist)
377 0
                return (0x20);          /* Allocation not inside segment */
378
379 2
        if (st->len > st->space)
380 0
                return (0x40);          /* Plain bad... */
381
382
        /*
383
         * XXX: We could patch up st->stevedore and st->priv here
384
         * XXX: but if things go right, we will never need them.
385
         */
386 2
        return (0);
387
}
388
389
/*---------------------------------------------------------------------
390
 * objcore methods for persistent objects
391
 */
392
393
struct object * v_matchproto_(sml_getobj_f)
394 275
smp_sml_getobj(struct worker *wrk, struct objcore *oc)
395
{
396
        struct object *o;
397
        struct smp_seg *sg;
398
        struct smp_object *so;
399
        struct storage *st;
400
        uint64_t l;
401
        int bad;
402
403 275
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
404 275
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
405 275
        AN(oc->stobj->stevedore);
406
407 275
        CAST_OBJ_NOTNULL(sg, oc->stobj->priv, SMP_SEG_MAGIC);
408 275
        so = smp_find_so(sg, oc->stobj->priv2);
409
410 275
        o = (void*)(sg->sc->base + so->ptr);
411
        /*
412
         * The object may not be in this segment since we allocate it
413
         * In a separate operation than the smp_object.  We could check
414
         * that it is in a later segment, but that would be complicated.
415
         * XXX: For now, be happy if it is inside the silo
416
         */
417 275
        ASSERT_PTR_IN_SILO(sg->sc, o);
418 275
        CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC);
419
420
        /*
421
         * If this flag is not set, it will not be, and the lock is not
422
         * needed to test it.
423
         */
424 275
        if (!(oc->stobj->priv2 & NEED_FIXUP))
425 263
                return (o);
426
427 12
        Lck_Lock(&sg->sc->mtx);
428
        /* Check again, we might have raced. */
429 12
        if (oc->stobj->priv2 & NEED_FIXUP) {
430
                /* We trust caller to have a refcnt for us */
431
432 12
                bad = 0;
433 12
                l = 0;
434 14
                VTAILQ_FOREACH(st, &o->list, list) {
435 3
                        bad |= smp_loaded_st(sg->sc, sg, st);
436 3
                        if (bad)
437 1
                                break;
438 2
                        l += st->len;
439
                }
440 12
                if (l != vbe64dec(o->fa_len))
441 0
                        bad |= 0x100;
442
443 12
                if (bad) {
444 1
                        EXP_ZERO(oc);
445 1
                        EXP_ZERO(so);
446
                }
447
448 12
                sg->nfixed++;
449 12
                wrk->stats->n_object++;
450 12
                wrk->stats->n_vampireobject--;
451 12
                oc->stobj->priv2 &= ~NEED_FIXUP;
452
        }
453 12
        Lck_Unlock(&sg->sc->mtx);
454 12
        return (o);
455
}
456
457
void v_matchproto_(objfree_f)
458 3
smp_oc_objfree(struct worker *wrk, struct objcore *oc)
459
{
460
        struct smp_seg *sg;
461
        struct smp_object *so;
462
463 3
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
464 3
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
465
466 3
        CAST_OBJ_NOTNULL(sg, oc->stobj->priv, SMP_SEG_MAGIC);
467 3
        so = smp_find_so(sg, oc->stobj->priv2);
468
469 3
        Lck_Lock(&sg->sc->mtx);
470 3
        EXP_ZERO(so);
471 3
        so->ptr = 0;
472
473 3
        assert(sg->nobj > 0);
474 3
        sg->nobj--;
475 3
        if (oc->stobj->priv2 & NEED_FIXUP) {
476 0
                wrk->stats->n_vampireobject--;
477
        } else {
478 3
                assert(sg->nfixed > 0);
479 3
                sg->nfixed--;
480 3
                wrk->stats->n_object--;
481
        }
482 3
        VTAILQ_REMOVE(&sg->objcores, oc, lru_list);
483
484 3
        Lck_Unlock(&sg->sc->mtx);
485 3
        memset(oc->stobj, 0, sizeof oc->stobj);
486 3
}
487
488
/*--------------------------------------------------------------------*/
489
490
void
491 38
smp_init_oc(struct objcore *oc, struct smp_seg *sg, unsigned objidx)
492
{
493
494 38
        AZ(objidx & NEED_FIXUP);
495 38
        oc->stobj->priv = sg;
496 38
        oc->stobj->priv2 = objidx;
497 38
}
498
499
/*--------------------------------------------------------------------*/
500
501
void v_matchproto_(obj_event_f)
502 101
smp_oc_event(struct worker *wrk, void *priv, struct objcore *oc, unsigned ev)
503
{
504
        struct stevedore *st;
505
        struct smp_seg *sg;
506
        struct smp_object *so;
507
508 101
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
509 101
        CAST_OBJ_NOTNULL(st, priv, STEVEDORE_MAGIC);
510 101
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
511
512 101
        if (oc->stobj->stevedore != st)
513 117
                return;
514
515 85
        CAST_OBJ_NOTNULL(sg, oc->stobj->priv, SMP_SEG_MAGIC);
516 85
        CHECK_OBJ_NOTNULL(sg->sc, SMP_SC_MAGIC);
517 85
        so = smp_find_so(sg, oc->stobj->priv2);
518
519 85
        if (sg == sg->sc->cur_seg) {
520
                /* Lock necessary, we might race close_seg */
521 44
                Lck_Lock(&sg->sc->mtx);
522 44
                if (ev & (OEV_BANCHG|OEV_INSERT))
523 22
                        so->ban = BAN_Time(oc->ban);
524 44
                if (ev & (OEV_TTLCHG|OEV_INSERT))
525 44
                        EXP_COPY(so, oc);
526 44
                Lck_Unlock(&sg->sc->mtx);
527
        } else {
528 41
                if (ev & (OEV_BANCHG|OEV_INSERT))
529 25
                        so->ban = BAN_Time(oc->ban);
530 41
                if (ev & (OEV_TTLCHG|OEV_INSERT))
531 32
                        EXP_COPY(so, oc);
532
        }
533
}
534