varnish-cache/bin/varnishd/storage/storage_persistent.h
0
/*-
1
 * Copyright (c) 2008-2011 Varnish Software AS
2
 * All rights reserved.
3
 *
4
 * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
5
 *
6
 * SPDX-License-Identifier: BSD-2-Clause
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
21
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
 * SUCH DAMAGE.
28
 *
29
 * Persistent storage method
30
 *
31
 * XXX: Before we start the client or maybe after it stops, we should give the
32
 * XXX: stevedores a chance to examine their storage for consistency.
33
 *
34
 * XXX: Do we ever free the LRU-lists ?
35
 */
36
37
/*
38
 *
39
 * Overall layout:
40
 *
41
 *      struct smp_ident;               Identification and geometry
42
 *      sha256[...]                     checksum of same
43
 *
44
 *      struct smp_sign;
45
 *      banspace_1;                     First ban-space
46
 *      sha256[...]                     checksum of same
47
 *
48
 *      struct smp_sign;
49
 *      banspace_2;                     Second ban-space
50
 *      sha256[...]                     checksum of same
51
 *
52
 *      struct smp_sign;
53
 *      struct smp_segment_1[N];        First Segment table
54
 *      sha256[...]                     checksum of same
55
 *
56
 *      struct smp_sign;
57
 *      struct smp_segment_2[N];        Second Segment table
58
 *      sha256[...]                     checksum of same
59
 *
60
 *      N segments {
61
 *              struct smp_sign;
62
 *              struct smp_object[M]    Objects in segment
63
 *              sha256[...]             checksum of same
64
 *              objspace
65
 *      }
66
 *
67
 */
68
69
/*
70
 * The identblock is located in the first sector of the storage space.
71
 * This is written once and not subsequently modified in normal operation.
72
 * It is immediately followed by a SHA256sum of the structure, as stored.
73
 */
74
75
struct smp_ident {
76
        char                    ident[32];      /* Human readable ident
77
                                                 * so people and programs
78
                                                 * can tell what the file
79
                                                 * or device contains.
80
                                                 */
81
82
        uint32_t                byte_order;     /* 0x12345678 */
83
84
        uint32_t                size;           /* sizeof(struct smp_ident) */
85
86
        uint32_t                major_version;
87
88
        uint32_t                unique;
89
90
        uint32_t                align;          /* alignment in silo */
91
92
        uint32_t                granularity;    /* smallest ... in bytes */
93
94
        uint64_t                mediasize;      /* ... in bytes */
95
96
        uint64_t                stuff[6];       /* pointers to stuff */
97
#define SMP_BAN1_STUFF          0
98
#define SMP_BAN2_STUFF          1
99
#define SMP_SEG1_STUFF          2
100
#define SMP_SEG2_STUFF          3
101
#define SMP_SPC_STUFF           4
102
#define SMP_END_STUFF           5
103
};
104
105
/*
106
 * The size of smp_ident should be fixed and constant across all platforms.
107
 * We enforce that with the following #define and an assert in smp_init()
108
 */
109
#define SMP_IDENT_SIZE          112
110
111
#define SMP_IDENT_STRING        "Varnish Persistent Storage Silo"
112
113
/*
114
 * This is used to sign various bits on the disk.
115
 */
116
117
struct smp_sign {
118
        char                    ident[8];
119
        uint32_t                unique;
120
        uint64_t                mapped;
121
        /* The length field is the length of the signed data only
122
         * (does not include struct smp_sign) */
123
        uint64_t                length;         /* NB: Must be last */
124
};
125
126
#define SMP_SIGN_SPACE          (sizeof(struct smp_sign) + VSHA256_LEN)
127
128
/*
129
 * A segment pointer.
130
 */
131
132
struct smp_segptr {
133
        uint64_t                offset;         /* rel to silo */
134
        uint64_t                length;         /* rel to offset */
135
        uint64_t                objlist;        /* rel to silo */
136
        uint32_t                lobjlist;       /* len of objlist */
137
};
138
139
/*
140
 * An object descriptor
141
 *
142
 * A positive ttl is obj.ttl with obj.grace being NAN
143
 * A negative ttl is - (obj.ttl + obj.grace)
144
 */
145
146
struct smp_object {
147
        uint8_t                 hash[32];       /* really: DIGEST_LEN */
148
        double                  t_origin;
149
        float                   ttl;
150
        float                   grace;
151
        float                   keep;
152
        uint32_t                __filler__;     /* -> align/8 on 32bit */
153
        double                  ban;
154
        uint64_t                ptr;            /* rel to silo */
155
};
156
157
#define ASSERT_SILO_THREAD(sc) \
158
    do {assert(pthread_equal(pthread_self(), (sc)->thread));} while (0)
159
160
/*
161
 * Context for a signature.
162
 *
163
 * A signature is a sequence of bytes in the silo, signed by a SHA256 hash
164
 * which follows the bytes.
165
 *
166
 * The context structure allows us to append to a signature without
167
 * recalculating the entire SHA256 hash.
168
 */
169
170
struct smp_signctx {
171
        struct smp_sign         *ss;
172
        struct VSHA256Context   ctx;
173
        uint32_t                unique;
174
        const char              *id;
175
};
176
177
/*
178
 * A space wrapped by a signature
179
 *
180
 * A signspace is a chunk of the silo that is wrapped by a
181
 * signature. It has attributes for size, so range checking can be
182
 * performed.
183
 *
184
 */
185
186
struct smp_signspace {
187
        struct smp_signctx      ctx;
188
        uint8_t                 *start;
189
        uint64_t                size;
190
};
191
192
struct smp_sc;
193
194
/* XXX: name confusion with on-media version ? */
195
struct smp_seg {
196
        unsigned                magic;
197
#define SMP_SEG_MAGIC           0x45c61895
198
199
        struct smp_sc           *sc;
200
        VTAILQ_HEAD(,objcore)   objcores;
201
202
        VTAILQ_ENTRY(smp_seg)   list;           /* on smp_sc.smp_segments */
203
204
        struct smp_segptr       p;
205
206
        unsigned                flags;
207
#define SMP_SEG_MUSTLOAD        (1 << 0)
208
#define SMP_SEG_LOADED          (1 << 1)
209
210
        uint32_t                nobj;           /* Number of objects */
211
        uint32_t                nalloc;         /* Allocations */
212
        uint32_t                nfixed;         /* How many fixed objects */
213
214
        /* Only for open segment */
215
        struct smp_object       *objs;          /* objdesc array */
216
        struct smp_signctx      ctx[1];
217
};
218
219
VTAILQ_HEAD(smp_seghead, smp_seg);
220
221
struct smp_sc {
222
        unsigned                magic;
223
#define SMP_SC_MAGIC            0x7b73af0a
224
        struct stevedore        *parent;
225
226
        pthread_t               bgthread;
227
        unsigned                flags;
228
#define SMP_SC_LOADED           (1 << 0)
229
#define SMP_SC_STOP             (1 << 1)
230
231
        const struct stevedore  *stevedore;
232
        int                     fd;
233
        const char              *filename;
234
        uint64_t                mediasize;
235
        uintptr_t               align;
236
        uint32_t                granularity;
237
        uint32_t                unique;
238
239
        uint8_t                 *base;
240
241
        struct smp_ident        *ident;
242
243
        struct smp_seghead      segments;
244
        struct smp_seg          *cur_seg;
245
        uint64_t                next_bot;       /* next alloc address bottom */
246
        uint64_t                next_top;       /* next alloc address top */
247
248
        uint64_t                free_offset;
249
250
        pthread_t               thread;
251
252
        VTAILQ_ENTRY(smp_sc)    list;
253
254
        struct smp_signctx      idn;
255
        struct smp_signspace    ban1;
256
        struct smp_signspace    ban2;
257
        struct smp_signspace    seg1;
258
        struct smp_signspace    seg2;
259
260
        struct lock             mtx;
261
262
        /* Cleaner metrics */
263
264
        unsigned                min_nseg;
265
        unsigned                aim_nseg;
266
        unsigned                max_nseg;
267
268
        uint64_t                min_segl;
269
        uint64_t                aim_segl;
270
        uint64_t                max_segl;
271
272
        uint64_t                free_reserve;
273
};
274
275
/*--------------------------------------------------------------------*/
276
277
/* Pointer round up/down & assert */
278
#define PRNUP(sc, x)    ((void*)RUP2((uintptr_t)(x), sc->align))
279
280
/* Integer round up/down & assert */
281
#define IRNDN(sc, x)    RDN2(x, sc->align)
282
#define IRNUP(sc, x)    RUP2(x, sc->align)
283
#define IASSERTALIGN(sc, x)     assert(IRNDN(sc, x) == (x))
284
285
/*--------------------------------------------------------------------*/
286
287
#define ASSERT_PTR_IN_SILO(sc, ptr) \
288
        assert((const void*)(ptr) >= (const void*)((sc)->base) && \
289
            (const void*)(ptr) < (const void *)((sc)->base + (sc)->mediasize))
290
291
/*--------------------------------------------------------------------*/
292
293
#define SIGN_DATA(ctx)  ((void *)((ctx)->ss + 1))
294
#define SIGN_END(ctx)   ((void *)((int8_t *)SIGN_DATA(ctx) + (ctx)->ss->length))
295
296
#define SIGNSPACE_DATA(spc)     (SIGN_DATA(&(spc)->ctx))
297
#define SIGNSPACE_FRONT(spc)    (SIGN_END(&(spc)->ctx))
298
#define SIGNSPACE_LEN(spc)      ((spc)->ctx.ss->length)
299
#define SIGNSPACE_FREE(spc)     ((spc)->size - SIGNSPACE_LEN(spc))
300
301
/* storage_persistent_mgt.c */
302
303
void smp_mgt_init(struct stevedore *parent, int ac, char * const *av);
304
305
/* storage_persistent_silo.c */
306
307
void smp_load_seg(struct worker *, const struct smp_sc *sc, struct smp_seg *sg);
308
void smp_new_seg(struct smp_sc *sc);
309
void smp_close_seg(struct smp_sc *sc, struct smp_seg *sg);
310
void smp_init_oc(struct objcore *oc, struct smp_seg *sg, unsigned objidx);
311
void smp_save_segs(struct smp_sc *sc);
312
sml_getobj_f smp_sml_getobj;
313
void smp_oc_objfree(struct worker *, struct objcore *);
314
obj_event_f smp_oc_event;
315
316
/* storage_persistent_subr.c */
317
318
void smp_def_sign(const struct smp_sc *sc, struct smp_signctx *ctx,
319
    uint64_t off, const char *id);
320
int smp_chk_sign(struct smp_signctx *ctx);
321
void smp_reset_sign(struct smp_signctx *ctx);
322
void smp_sync_sign(const struct smp_signctx *ctx);
323
324
int smp_chk_signspace(struct smp_signspace *spc);
325
void smp_append_signspace(struct smp_signspace *spc, uint32_t len);
326
void smp_reset_signspace(struct smp_signspace *spc);
327
void smp_copy_signspace(struct smp_signspace *dst,
328
                        const struct smp_signspace *src);
329
330
void smp_newsilo(struct smp_sc *sc);
331
int smp_valid_silo(struct smp_sc *sc);
332
333
/*--------------------------------------------------------------------
334
 * Calculate payload of some stuff
335
 */
336
337
static inline uint64_t
338 53680
smp_stuff_len(const struct smp_sc *sc, unsigned stuff)
339
{
340
        uint64_t l;
341
342 53680
        assert(stuff < SMP_END_STUFF);
343 53680
        l = sc->ident->stuff[stuff + 1] - sc->ident->stuff[stuff];
344 53680
        l -= SMP_SIGN_SPACE;
345 53680
        return (l);
346
}
347
348
static inline uint64_t
349 5680
smp_segend(const struct smp_seg *sg)
350
{
351
352 5680
        return (sg->p.offset + sg->p.length);
353
}
354
355
static inline uint64_t
356 3760
smp_spaceleft(const struct smp_sc *sc, const struct smp_seg *sg)
357
{
358
359 3760
        IASSERTALIGN(sc, sc->next_bot);
360 3760
        assert(sc->next_bot <= sc->next_top - IRNUP(sc, SMP_SIGN_SPACE));
361 3760
        assert(sc->next_bot >= sg->p.offset);
362 3760
        assert(sc->next_top < sg->p.offset + sg->p.length);
363 3760
        return ((sc->next_top - sc->next_bot) - IRNUP(sc, SMP_SIGN_SPACE));
364
}