varnish-cache/bin/varnishd/storage/storage_persistent.h
1
/*-
2
 * Copyright (c) 2008-2011 Varnish Software AS
3
 * All rights reserved.
4
 *
5
 * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
6
 *
7
 * Redistribution and use in source and binary forms, with or without
8
 * modification, are permitted provided that the following conditions
9
 * are met:
10
 * 1. Redistributions of source code must retain the above copyright
11
 *    notice, this list of conditions and the following disclaimer.
12
 * 2. Redistributions in binary form must reproduce the above copyright
13
 *    notice, this list of conditions and the following disclaimer in the
14
 *    documentation and/or other materials provided with the distribution.
15
 *
16
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
20
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26
 * SUCH DAMAGE.
27
 *
28
 * Persistent storage method
29
 *
30
 * XXX: Before we start the client or maybe after it stops, we should give the
31
 * XXX: stevedores a chance to examine their storage for consistency.
32
 *
33
 * XXX: Do we ever free the LRU-lists ?
34
 */
35
36
/*
37
 *
38
 * Overall layout:
39
 *
40
 *      struct smp_ident;               Identification and geometry
41
 *      sha256[...]                     checksum of same
42
 *
43
 *      struct smp_sign;
44
 *      banspace_1;                     First ban-space
45
 *      sha256[...]                     checksum of same
46
 *
47
 *      struct smp_sign;
48
 *      banspace_2;                     Second ban-space
49
 *      sha256[...]                     checksum of same
50
 *
51
 *      struct smp_sign;
52
 *      struct smp_segment_1[N];        First Segment table
53
 *      sha256[...]                     checksum of same
54
 *
55
 *      struct smp_sign;
56
 *      struct smp_segment_2[N];        Second Segment table
57
 *      sha256[...]                     checksum of same
58
 *
59
 *      N segments {
60
 *              struct smp_sign;
61
 *              struct smp_object[M]    Objects in segment
62
 *              sha256[...]             checksum of same
63
 *              objspace
64
 *      }
65
 *
66
 */
67
68
/*
69
 * The identblock is located in the first sector of the storage space.
70
 * This is written once and not subsequently modified in normal operation.
71
 * It is immediately followed by a SHA256sum of the structure, as stored.
72
 */
73
74
struct smp_ident {
75
        char                    ident[32];      /* Human readable ident
76
                                                 * so people and programs
77
                                                 * can tell what the file
78
                                                 * or device contains.
79
                                                 */
80
81
        uint32_t                byte_order;     /* 0x12345678 */
82
83
        uint32_t                size;           /* sizeof(struct smp_ident) */
84
85
        uint32_t                major_version;
86
87
        uint32_t                unique;
88
89
        uint32_t                align;          /* alignment in silo */
90
91
        uint32_t                granularity;    /* smallest ... in bytes */
92
93
        uint64_t                mediasize;      /* ... in bytes */
94
95
        uint64_t                stuff[6];       /* pointers to stuff */
96
#define SMP_BAN1_STUFF          0
97
#define SMP_BAN2_STUFF          1
98
#define SMP_SEG1_STUFF          2
99
#define SMP_SEG2_STUFF          3
100
#define SMP_SPC_STUFF           4
101
#define SMP_END_STUFF           5
102
};
103
104
/*
105
 * The size of smp_ident should be fixed and constant across all platforms.
106
 * We enforce that with the following #define and an assert in smp_init()
107
 */
108
#define SMP_IDENT_SIZE          112
109
110
#define SMP_IDENT_STRING        "Varnish Persistent Storage Silo"
111
112
/*
113
 * This is used to sign various bits on the disk.
114
 */
115
116
struct smp_sign {
117
        char                    ident[8];
118
        uint32_t                unique;
119
        uint64_t                mapped;
120
        /* The length field is the length of the signed data only
121
         * (does not include struct smp_sign) */
122
        uint64_t                length;         /* NB: Must be last */
123
};
124
125
#define SMP_SIGN_SPACE          (sizeof(struct smp_sign) + VSHA256_LEN)
126
127
/*
128
 * A segment pointer.
129
 */
130
131
struct smp_segptr {
132
        uint64_t                offset;         /* rel to silo */
133
        uint64_t                length;         /* rel to offset */
134
        uint64_t                objlist;        /* rel to silo */
135
        uint32_t                lobjlist;       /* len of objlist */
136
};
137
138
/*
139
 * An object descriptor
140
 *
141
 * A positive ttl is obj.ttl with obj.grace being NAN
142
 * A negative ttl is - (obj.ttl + obj.grace)
143
 */
144
145
struct smp_object {
146
        uint8_t                 hash[32];       /* really: DIGEST_LEN */
147
        double                  t_origin;
148
        float                   ttl;
149
        float                   grace;
150
        float                   keep;
151
        uint32_t                __filler__;     /* -> align/8 on 32bit */
152
        double                  ban;
153
        uint64_t                ptr;            /* rel to silo */
154
};
155
156
#define ASSERT_SILO_THREAD(sc) \
157
    do {assert(pthread_equal(pthread_self(), (sc)->thread));} while (0)
158
159
/*
160
 * Context for a signature.
161
 *
162
 * A signature is a sequence of bytes in the silo, signed by a SHA256 hash
163
 * which follows the bytes.
164
 *
165
 * The context structure allows us to append to a signature without
166
 * recalculating the entire SHA256 hash.
167
 */
168
169
struct smp_signctx {
170
        struct smp_sign         *ss;
171
        struct VSHA256Context   ctx;
172
        uint32_t                unique;
173
        const char              *id;
174
};
175
176
/*
177
 * A space wrapped by a signature
178
 *
179
 * A signspace is a chunk of the silo that is wrapped by a
180
 * signature. It has attributes for size, so range checking can be
181
 * performed.
182
 *
183
 */
184
185
struct smp_signspace {
186
        struct smp_signctx      ctx;
187
        uint8_t                 *start;
188
        uint64_t                size;
189
};
190
191
struct smp_sc;
192
193
/* XXX: name confusion with on-media version ? */
194
struct smp_seg {
195
        unsigned                magic;
196
#define SMP_SEG_MAGIC           0x45c61895
197
198
        struct smp_sc           *sc;
199
        VTAILQ_HEAD(,objcore)   objcores;
200
201
        VTAILQ_ENTRY(smp_seg)   list;           /* on smp_sc.smp_segments */
202
203
        struct smp_segptr       p;
204
205
        unsigned                flags;
206
#define SMP_SEG_MUSTLOAD        (1 << 0)
207
#define SMP_SEG_LOADED          (1 << 1)
208
209
        uint32_t                nobj;           /* Number of objects */
210
        uint32_t                nalloc;         /* Allocations */
211
        uint32_t                nfixed;         /* How many fixed objects */
212
213
        /* Only for open segment */
214
        struct smp_object       *objs;          /* objdesc array */
215
        struct smp_signctx      ctx[1];
216
};
217
218
VTAILQ_HEAD(smp_seghead, smp_seg);
219
220
struct smp_sc {
221
        unsigned                magic;
222
#define SMP_SC_MAGIC            0x7b73af0a
223
        struct stevedore        *parent;
224
225
        pthread_t               bgthread;
226
        unsigned                flags;
227
#define SMP_SC_LOADED           (1 << 0)
228
#define SMP_SC_STOP             (1 << 1)
229
230
        const struct stevedore  *stevedore;
231
        int                     fd;
232
        const char              *filename;
233
        off_t                   mediasize;
234
        uintptr_t               align;
235
        uint32_t                granularity;
236
        uint32_t                unique;
237
238
        uint8_t                 *base;
239
240
        struct smp_ident        *ident;
241
242
        struct smp_seghead      segments;
243
        struct smp_seg          *cur_seg;
244
        uint64_t                next_bot;       /* next alloc address bottom */
245
        uint64_t                next_top;       /* next alloc address top */
246
247
        uint64_t                free_offset;
248
249
        pthread_t               thread;
250
251
        VTAILQ_ENTRY(smp_sc)    list;
252
253
        struct smp_signctx      idn;
254
        struct smp_signspace    ban1;
255
        struct smp_signspace    ban2;
256
        struct smp_signspace    seg1;
257
        struct smp_signspace    seg2;
258
259
        struct lock             mtx;
260
261
        /* Cleaner metrics */
262
263
        unsigned                min_nseg;
264
        unsigned                aim_nseg;
265
        unsigned                max_nseg;
266
267
        uint64_t                min_segl;
268
        uint64_t                aim_segl;
269
        uint64_t                max_segl;
270
271
        uint64_t                free_reserve;
272
};
273
274
/*--------------------------------------------------------------------*/
275
276
/* Pointer round up/down & assert */
277
#define PRNUP(sc, x)    ((void*)RUP2((uintptr_t)(x), sc->align))
278
279
/* Integer round up/down & assert */
280
#define IRNDN(sc, x)    RDN2(x, sc->align)
281
#define IRNUP(sc, x)    RUP2(x, sc->align)
282
#define IASSERTALIGN(sc, x)     assert(IRNDN(sc, x) == (x))
283
284
/*--------------------------------------------------------------------*/
285
286
#define ASSERT_PTR_IN_SILO(sc, ptr) \
287
        assert((const void*)(ptr) >= (const void*)((sc)->base) && \
288
            (const void*)(ptr) < (const void *)((sc)->base + (sc)->mediasize))
289
290
/*--------------------------------------------------------------------*/
291
292
#define SIGN_DATA(ctx)  ((void *)((ctx)->ss + 1))
293
#define SIGN_END(ctx)   ((void *)((int8_t *)SIGN_DATA(ctx) + (ctx)->ss->length))
294
295
#define SIGNSPACE_DATA(spc)     (SIGN_DATA(&(spc)->ctx))
296
#define SIGNSPACE_FRONT(spc)    (SIGN_END(&(spc)->ctx))
297
#define SIGNSPACE_LEN(spc)      ((spc)->ctx.ss->length)
298
#define SIGNSPACE_FREE(spc)     ((spc)->size - SIGNSPACE_LEN(spc))
299
300
/* storage_persistent_mgt.c */
301
302
void smp_mgt_init(struct stevedore *parent, int ac, char * const *av);
303
304
/* storage_persistent_silo.c */
305
306
void smp_load_seg(struct worker *, const struct smp_sc *sc, struct smp_seg *sg);
307
void smp_new_seg(struct smp_sc *sc);
308
void smp_close_seg(struct smp_sc *sc, struct smp_seg *sg);
309
void smp_init_oc(struct objcore *oc, struct smp_seg *sg, unsigned objidx);
310
void smp_save_segs(struct smp_sc *sc);
311
sml_getobj_f smp_sml_getobj;
312
void smp_oc_objfree(struct worker *, struct objcore *);
313
obj_event_f smp_oc_event;
314
315
/* storage_persistent_subr.c */
316
317
void smp_def_sign(const struct smp_sc *sc, struct smp_signctx *ctx,
318
    uint64_t off, const char *id);
319
int smp_chk_sign(struct smp_signctx *ctx);
320
void smp_reset_sign(struct smp_signctx *ctx);
321
void smp_sync_sign(const struct smp_signctx *ctx);
322
323
int smp_chk_signspace(struct smp_signspace *spc);
324
void smp_append_signspace(struct smp_signspace *spc, uint32_t len);
325
void smp_reset_signspace(struct smp_signspace *spc);
326
void smp_copy_signspace(struct smp_signspace *dst,
327
                        const struct smp_signspace *src);
328
329
void smp_newsilo(struct smp_sc *sc);
330
int smp_valid_silo(struct smp_sc *sc);
331
332
/*--------------------------------------------------------------------
333
 * Caculate payload of some stuff
334
 */
335
336
static inline uint64_t
337 922
smp_stuff_len(const struct smp_sc *sc, unsigned stuff)
338
{
339
        uint64_t l;
340
341 922
        assert(stuff < SMP_END_STUFF);
342 922
        l = sc->ident->stuff[stuff + 1] - sc->ident->stuff[stuff];
343 922
        l -= SMP_SIGN_SPACE;
344 922
        return (l);
345
}
346
347
static inline uint64_t
348 142
smp_segend(const struct smp_seg *sg)
349
{
350
351 142
        return (sg->p.offset + sg->p.length);
352
}
353
354
static inline uint64_t
355 92
smp_spaceleft(const struct smp_sc *sc, const struct smp_seg *sg)
356
{
357
358 92
        IASSERTALIGN(sc, sc->next_bot);
359 92
        assert(sc->next_bot <= sc->next_top - IRNUP(sc, SMP_SIGN_SPACE));
360 92
        assert(sc->next_bot >= sg->p.offset);
361 92
        assert(sc->next_top < sg->p.offset + sg->p.length);
362 92
        return ((sc->next_top - sc->next_bot) - IRNUP(sc, SMP_SIGN_SPACE));
363
}