varnish-cache/bin/varnishd/storage/storage_file.c
1
/*-
2
 * Copyright (c) 2006 Verdens Gang AS
3
 * Copyright (c) 2006-2011 Varnish Software AS
4
 * All rights reserved.
5
 *
6
 * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
21
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
 * SUCH DAMAGE.
28
 *
29
 * Storage method based on mmap'ed file
30
 */
31
32
#include "config.h"
33
34
#include "cache/cache_varnishd.h"
35
#include "common/heritage.h"
36
37
#include <sys/mman.h>
38
39
#include <errno.h>
40
#include <stdio.h>
41
#include <stdlib.h>
42
43
#include "storage/storage.h"
44
#include "storage/storage_simple.h"
45
46
#include "vnum.h"
47
#include "vfil.h"
48
49
#include "VSC_smf.h"
50
51
#ifndef MAP_NOCORE
52
#define MAP_NOCORE 0 /* XXX Linux */
53
#endif
54
55
#ifndef MAP_NOSYNC
56
#define MAP_NOSYNC 0 /* XXX Linux */
57
#endif
58
59
#define MINPAGES                128
60
61
/*
62
 * Number of buckets on free-list.
63
 *
64
 * Last bucket is "larger than" so choose number so that the second
65
 * to last bucket matches the 128k CHUNKSIZE in cache_fetch.c when
66
 * using the a 4K minimal page size
67
 */
68
#define NBUCKET                 (128 / 4 + 1)
69
70
static struct VSC_lck *lck_smf;
71
72
/*--------------------------------------------------------------------*/
73
74
VTAILQ_HEAD(smfhead, smf);
75
76
struct smf {
77
        unsigned                magic;
78
#define SMF_MAGIC               0x0927a8a0
79
        struct storage          s;
80
        struct smf_sc           *sc;
81
82
        int                     alloc;
83
84
        off_t                   size;
85
        off_t                   offset;
86
        unsigned char           *ptr;
87
88
        VTAILQ_ENTRY(smf)       order;
89
        VTAILQ_ENTRY(smf)       status;
90
        struct smfhead          *flist;
91
};
92
93
struct smf_sc {
94
        unsigned                magic;
95
#define SMF_SC_MAGIC            0x52962ee7
96
        struct lock             mtx;
97
        struct VSC_smf          *stats;
98
99
        const char              *filename;
100
        int                     fd;
101
        unsigned                pagesize;
102
        uintmax_t               filesize;
103
        int                     advice;
104
        struct smfhead          order;
105
        struct smfhead          free[NBUCKET];
106
        struct smfhead          used;
107
};
108
109
/*--------------------------------------------------------------------*/
110
111
static void
112 8
smf_init(struct stevedore *parent, int ac, char * const *av)
113
{
114
        const char *size, *fn, *r;
115
        struct smf_sc *sc;
116
        unsigned u;
117
        uintmax_t page_size;
118 8
        int advice = MADV_RANDOM;
119
120 8
        AZ(av[ac]);
121
122 8
        size = NULL;
123 8
        page_size = getpagesize();
124
125 8
        if (ac > 4)
126 0
                ARGV_ERR("(-sfile) too many arguments\n");
127 8
        if (ac < 1 || *av[0] == '\0')
128 0
                ARGV_ERR("(-sfile) path is mandatory\n");
129 8
        fn = av[0];
130 8
        if (ac > 1 && *av[1] != '\0')
131 8
                size = av[1];
132 8
        if (ac > 2 && *av[2] != '\0') {
133
134 0
                r = VNUM_2bytes(av[2], &page_size, 0);
135 0
                if (r != NULL)
136 0
                        ARGV_ERR("(-sfile) granularity \"%s\": %s\n", av[2], r);
137
        }
138 8
        if (ac > 3) {
139 0
                if (!strcmp(av[3], "normal"))
140 0
                        advice = MADV_NORMAL;
141 0
                else if (!strcmp(av[3], "random"))
142 0
                        advice = MADV_RANDOM;
143 0
                else if (!strcmp(av[3], "sequential"))
144 0
                        advice = MADV_SEQUENTIAL;
145
                else
146 0
                        ARGV_ERR("(-s file) invalid advice: \"%s\"", av[3]);
147
        }
148
149 8
        AN(fn);
150
151 8
        ALLOC_OBJ(sc, SMF_SC_MAGIC);
152 8
        XXXAN(sc);
153 8
        VTAILQ_INIT(&sc->order);
154 272
        for (u = 0; u < NBUCKET; u++)
155 264
                VTAILQ_INIT(&sc->free[u]);
156 8
        VTAILQ_INIT(&sc->used);
157 8
        sc->pagesize = page_size;
158 8
        sc->advice = advice;
159 8
        parent->priv = sc;
160
161 8
        (void)STV_GetFile(fn, &sc->fd, &sc->filename, "-sfile");
162 8
        MCH_Fd_Inherit(sc->fd, "storage_file");
163 8
        sc->filesize = STV_FileSize(sc->fd, size, &sc->pagesize, "-sfile");
164 8
        if (VFIL_allocate(sc->fd, (off_t)sc->filesize, 0))
165 0
                ARGV_ERR("(-sfile) allocation error: %s\n", strerror(errno));
166 8
}
167
168
/*--------------------------------------------------------------------
169
 * Insert/Remove from correct freelist
170
 */
171
172
static void
173 120
insfree(struct smf_sc *sc, struct smf *sp)
174
{
175
        size_t b;
176
        struct smf *sp2;
177
        size_t ns;
178
179 120
        AZ(sp->alloc);
180 120
        assert(sp->flist == NULL);
181 120
        Lck_AssertHeld(&sc->mtx);
182 120
        b = sp->size / sc->pagesize;
183 120
        if (b >= NBUCKET) {
184 96
                b = NBUCKET - 1;
185 96
                sc->stats->g_smf_large++;
186
        } else {
187 24
                sc->stats->g_smf_frag++;
188
        }
189 120
        sp->flist = &sc->free[b];
190 120
        ns = b * sc->pagesize;
191 120
        VTAILQ_FOREACH(sp2, sp->flist, status) {
192 20
                assert(sp2->size >= ns);
193 20
                AZ(sp2->alloc);
194 20
                assert(sp2->flist == sp->flist);
195 20
                if (sp->offset < sp2->offset)
196 20
                        break;
197
        }
198 120
        if (sp2 == NULL)
199 100
                VTAILQ_INSERT_TAIL(sp->flist, sp, status);
200
        else
201 20
                VTAILQ_INSERT_BEFORE(sp2, sp, status);
202 120
}
203
204
static void
205 108
remfree(const struct smf_sc *sc, struct smf *sp)
206
{
207
        size_t b;
208
209 108
        AZ(sp->alloc);
210 108
        assert(sp->flist != NULL);
211 108
        Lck_AssertHeld(&sc->mtx);
212 108
        b = sp->size / sc->pagesize;
213 108
        if (b >= NBUCKET) {
214 88
                b = NBUCKET - 1;
215 88
                sc->stats->g_smf_large--;
216
        } else {
217 20
                sc->stats->g_smf_frag--;
218
        }
219 108
        assert(sp->flist == &sc->free[b]);
220 108
        VTAILQ_REMOVE(sp->flist, sp, status);
221 108
        sp->flist = NULL;
222 108
}
223
224
/*--------------------------------------------------------------------
225
 * Allocate a range from the first free range that is large enough.
226
 */
227
228
static struct smf *
229 60
alloc_smf(struct smf_sc *sc, size_t bytes)
230
{
231
        struct smf *sp, *sp2;
232
        size_t b;
233
234 60
        AZ(bytes % sc->pagesize);
235 60
        b = bytes / sc->pagesize;
236 60
        if (b >= NBUCKET)
237 0
                b = NBUCKET - 1;
238 60
        sp = NULL;
239 1812
        for (; b < NBUCKET - 1; b++) {
240 1752
                sp = VTAILQ_FIRST(&sc->free[b]);
241 1752
                if (sp != NULL)
242 0
                        break;
243
        }
244 60
        if (sp == NULL) {
245 60
                VTAILQ_FOREACH(sp, &sc->free[NBUCKET -1], status)
246 60
                        if (sp->size >= bytes)
247 60
                                break;
248
        }
249 60
        if (sp == NULL)
250 0
                return (sp);
251
252 60
        assert(sp->size >= bytes);
253 60
        remfree(sc, sp);
254
255 60
        if (sp->size == bytes) {
256 0
                sp->alloc = 1;
257 0
                VTAILQ_INSERT_TAIL(&sc->used, sp, status);
258 0
                return (sp);
259
        }
260
261
        /* Split from front */
262 60
        sp2 = malloc(sizeof *sp2);
263 60
        XXXAN(sp2);
264 60
        sc->stats->g_smf++;
265 60
        *sp2 = *sp;
266
267 60
        sp->offset += bytes;
268 60
        sp->ptr += bytes;
269 60
        sp->size -= bytes;
270
271 60
        sp2->size = bytes;
272 60
        sp2->alloc = 1;
273 60
        VTAILQ_INSERT_BEFORE(sp, sp2, order);
274 60
        VTAILQ_INSERT_TAIL(&sc->used, sp2, status);
275 60
        insfree(sc, sp);
276 60
        return (sp2);
277
}
278
279
/*--------------------------------------------------------------------
280
 * Free a range.  Attempt merge forward and backward, then sort into
281
 * free list according to age.
282
 */
283
284
static void
285 60
free_smf(struct smf *sp)
286
{
287
        struct smf *sp2;
288 60
        struct smf_sc *sc = sp->sc;
289
290 60
        CHECK_OBJ_NOTNULL(sp, SMF_MAGIC);
291 60
        AN(sp->alloc);
292 60
        assert(sp->size > 0);
293 60
        AZ(sp->size % sc->pagesize);
294 60
        VTAILQ_REMOVE(&sc->used, sp, status);
295 60
        sp->alloc = 0;
296
297 60
        sp2 = VTAILQ_NEXT(sp, order);
298 112
        if (sp2 != NULL &&
299 66
            sp2->alloc == 0 &&
300 28
            (sp2->ptr == sp->ptr + sp->size) &&
301 14
            (sp2->offset == sp->offset + sp->size)) {
302 14
                sp->size += sp2->size;
303 14
                VTAILQ_REMOVE(&sc->order, sp2, order);
304 14
                remfree(sc, sp2);
305 14
                free(sp2);
306 14
                sc->stats->g_smf--;
307
        }
308
309 60
        sp2 = VTAILQ_PREV(sp, smfhead, order);
310 106
        if (sp2 != NULL &&
311 80
            sp2->alloc == 0 &&
312 68
            (sp->ptr == sp2->ptr + sp2->size) &&
313 34
            (sp->offset == sp2->offset + sp2->size)) {
314 34
                remfree(sc, sp2);
315 34
                sp2->size += sp->size;
316 34
                VTAILQ_REMOVE(&sc->order, sp, order);
317 34
                free(sp);
318 34
                sc->stats->g_smf--;
319 34
                sp = sp2;
320
        }
321
322 60
        insfree(sc, sp);
323 60
}
324
325
/*--------------------------------------------------------------------
326
 * Insert a newly created range as busy, then free it to do any collapses
327
 */
328
329
static void
330 8
new_smf(struct smf_sc *sc, unsigned char *ptr, off_t off, size_t len)
331
{
332
        struct smf *sp, *sp2;
333
334 8
        AZ(len % sc->pagesize);
335 8
        ALLOC_OBJ(sp, SMF_MAGIC);
336 8
        XXXAN(sp);
337 8
        sp->s.magic = STORAGE_MAGIC;
338 8
        sc->stats->g_smf++;
339
340 8
        sp->sc = sc;
341 8
        sp->size = len;
342 8
        sp->ptr = ptr;
343 8
        sp->offset = off;
344 8
        sp->alloc = 1;
345
346 8
        VTAILQ_FOREACH(sp2, &sc->order, order) {
347 0
                if (sp->ptr < sp2->ptr) {
348 0
                        VTAILQ_INSERT_BEFORE(sp2, sp, order);
349 0
                        break;
350
                }
351
        }
352 8
        if (sp2 == NULL)
353 8
                VTAILQ_INSERT_TAIL(&sc->order, sp, order);
354
355 8
        VTAILQ_INSERT_HEAD(&sc->used, sp, status);
356
357 8
        free_smf(sp);
358 8
}
359
360
/*--------------------------------------------------------------------*/
361
362
/*
363
 * XXX: This may be too aggressive and soak up too much address room.
364
 * XXX: On the other hand, the user, directly or implicitly asked us to
365
 * XXX: use this much storage, so we should make a decent effort.
366
 * XXX: worst case (I think), malloc will fail.
367
 */
368
369
static void
370 8
smf_open_chunk(struct smf_sc *sc, off_t sz, off_t off, off_t *fail, off_t *sum)
371
{
372
        void *p;
373
        off_t h;
374
375 8
        AN(sz);
376 8
        AZ(sz % sc->pagesize);
377
378 8
        if (*fail < (uintmax_t)sc->pagesize * MINPAGES)
379 0
                return;
380
381 8
        if (sz > 0 && sz < *fail && sz < SSIZE_MAX) {
382 8
                p = mmap(NULL, sz, PROT_READ|PROT_WRITE,
383
                    MAP_NOCORE | MAP_NOSYNC | MAP_SHARED, sc->fd, off);
384 8
                if (p != MAP_FAILED) {
385 8
                        (void)madvise(p, sz, sc->advice);
386 8
                        (*sum) += sz;
387 8
                        new_smf(sc, p, off, sz);
388 8
                        return;
389
                }
390
        }
391
392 0
        if (sz < *fail)
393 0
                *fail = sz;
394
395 0
        h = sz / 2;
396
        if (h > SSIZE_MAX)
397
                h = SSIZE_MAX;
398 0
        h -= (h % sc->pagesize);
399
400 0
        smf_open_chunk(sc, h, off, fail, sum);
401 0
        smf_open_chunk(sc, sz - h, off + h, fail, sum);
402
}
403
404
static void v_matchproto_(storage_open_f)
405 8
smf_open(struct stevedore *st)
406
{
407
        struct smf_sc *sc;
408 8
        off_t fail = 1 << 30;   /* XXX: where is OFF_T_MAX ? */
409 8
        off_t sum = 0;
410
411 8
        ASSERT_CLI();
412 8
        st->lru = LRU_Alloc();
413 8
        if (lck_smf == NULL)
414 8
                lck_smf = Lck_CreateClass("smf");
415 8
        CAST_OBJ_NOTNULL(sc, st->priv, SMF_SC_MAGIC);
416 8
        sc->stats = VSC_smf_New(st->ident);
417 8
        Lck_New(&sc->mtx, lck_smf);
418 8
        Lck_Lock(&sc->mtx);
419 8
        smf_open_chunk(sc, sc->filesize, 0, &fail, &sum);
420 8
        Lck_Unlock(&sc->mtx);
421 8
        printf("SMF.%s mmap'ed %ju bytes of %ju\n",
422
            st->ident, (uintmax_t)sum, sc->filesize);
423
424
        /* XXX */
425 8
        if (sum < MINPAGES * (off_t)getpagesize())
426 0
                exit(4);
427
428 8
        sc->stats->g_space += sc->filesize;
429 8
}
430
431
/*--------------------------------------------------------------------*/
432
433
static struct storage * v_matchproto_(sml_alloc_f)
434 60
smf_alloc(const struct stevedore *st, size_t size)
435
{
436
        struct smf *smf;
437
        struct smf_sc *sc;
438
439 60
        CAST_OBJ_NOTNULL(sc, st->priv, SMF_SC_MAGIC);
440 60
        assert(size > 0);
441 60
        size += (sc->pagesize - 1UL);
442 60
        size &= ~(sc->pagesize - 1UL);
443 60
        Lck_Lock(&sc->mtx);
444 60
        sc->stats->c_req++;
445 60
        smf = alloc_smf(sc, size);
446 60
        if (smf == NULL) {
447 0
                sc->stats->c_fail++;
448 0
                Lck_Unlock(&sc->mtx);
449 0
                return (NULL);
450
        }
451 60
        CHECK_OBJ_NOTNULL(smf, SMF_MAGIC);
452 60
        sc->stats->g_alloc++;
453 60
        sc->stats->c_bytes += smf->size;
454 60
        sc->stats->g_bytes += smf->size;
455 60
        sc->stats->g_space -= smf->size;
456 60
        Lck_Unlock(&sc->mtx);
457 60
        CHECK_OBJ_NOTNULL(&smf->s, STORAGE_MAGIC);      /*lint !e774 */
458 60
        XXXAN(smf);
459 60
        assert(smf->size == size);
460 60
        smf->s.space = size;
461 60
        smf->s.priv = smf;
462 60
        smf->s.ptr = smf->ptr;
463 60
        smf->s.len = 0;
464 60
        return (&smf->s);
465
}
466
467
/*--------------------------------------------------------------------*/
468
469
static void v_matchproto_(sml_free_f)
470 52
smf_free(struct storage *s)
471
{
472
        struct smf *smf;
473
        struct smf_sc *sc;
474
475 52
        CHECK_OBJ_NOTNULL(s, STORAGE_MAGIC);
476 52
        CAST_OBJ_NOTNULL(smf, s->priv, SMF_MAGIC);
477 52
        sc = smf->sc;
478 52
        Lck_Lock(&sc->mtx);
479 52
        sc->stats->g_alloc--;
480 52
        sc->stats->c_freed += smf->size;
481 52
        sc->stats->g_bytes -= smf->size;
482 52
        sc->stats->g_space += smf->size;
483 52
        free_smf(smf);
484 52
        Lck_Unlock(&sc->mtx);
485 52
}
486
487
/*--------------------------------------------------------------------*/
488
489
const struct stevedore smf_stevedore = {
490
        .magic          =       STEVEDORE_MAGIC,
491
        .name           =       "file",
492
        .init           =       smf_init,
493
        .open           =       smf_open,
494
        .sml_alloc      =       smf_alloc,
495
        .sml_free       =       smf_free,
496
        .allocobj       =       SML_allocobj,
497
        .panic          =       SML_panic,
498
        .methods        =       &SML_methods,
499
};
500
501
#ifdef INCLUDE_TEST_DRIVER
502
503
void vca_flush(struct sess *sp) {}
504
505
#define N       100
506
#define M       (128*1024)
507
508
struct storage *s[N];
509
510
static void
511
dumpit(void)
512
{
513
        struct smf_sc *sc = smf_stevedore.priv;
514
        struct smf *s;
515
516
        return (0);
517
        printf("----------------\n");
518
        printf("Order:\n");
519
        VTAILQ_FOREACH(s, &sc->order, order) {
520
                printf("%10p %12ju %12ju %12ju\n",
521
                    s, s->offset, s->size, s->offset + s->size);
522
        }
523
        printf("Used:\n");
524
        VTAILQ_FOREACH(s, &sc->used, status) {
525
                printf("%10p %12ju %12ju %12ju\n",
526
                    s, s->offset, s->size, s->offset + s->size);
527
        }
528
        printf("Free:\n");
529
        VTAILQ_FOREACH(s, &sc->free, status) {
530
                printf("%10p %12ju %12ju %12ju\n",
531
                    s, s->offset, s->size, s->offset + s->size);
532
        }
533
        printf("================\n");
534
}
535
536
int
537
main(int argc, char **argv)
538
{
539
        int i, j;
540
541
        setbuf(stdout, NULL);
542
        smf_init(&smf_stevedore, "");
543
        smf_open(&smf_stevedore);
544
        while (1) {
545
                dumpit();
546
                i = random() % N;
547
                do
548
                        j = random() % M;
549
                while (j == 0);
550
                if (s[i] == NULL) {
551
                        s[i] = smf_alloc(&smf_stevedore, j);
552
                        printf("A %10p %12d\n", s[i], j);
553
                } else {
554
                        smf_free(s[i]);
555
                        printf("D %10p\n", s[i]);
556
                        s[i] = NULL;
557
                }
558
        }
559
}
560
561
#endif /* INCLUDE_TEST_DRIVER */