varnish-cache/bin/varnishd/storage/storage_file.c
0
/*-
1
 * Copyright (c) 2006 Verdens Gang AS
2
 * Copyright (c) 2006-2011 Varnish Software AS
3
 * All rights reserved.
4
 *
5
 * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
6
 *
7
 * SPDX-License-Identifier: BSD-2-Clause
8
 *
9
 * Redistribution and use in source and binary forms, with or without
10
 * modification, are permitted provided that the following conditions
11
 * are met:
12
 * 1. Redistributions of source code must retain the above copyright
13
 *    notice, this list of conditions and the following disclaimer.
14
 * 2. Redistributions in binary form must reproduce the above copyright
15
 *    notice, this list of conditions and the following disclaimer in the
16
 *    documentation and/or other materials provided with the distribution.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
22
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28
 * SUCH DAMAGE.
29
 *
30
 * Storage method based on mmap'ed file
31
 */
32
33
#include "config.h"
34
35
#include "cache/cache_varnishd.h"
36
#include "common/heritage.h"
37
38
#include <sys/mman.h>
39
40
#include <stdio.h>
41
#include <stdlib.h>
42
43
#include "storage/storage.h"
44
#include "storage/storage_simple.h"
45
46
#include "vnum.h"
47
#include "vfil.h"
48
49
#include "VSC_smf.h"
50
51
#ifndef MAP_NOCORE
52
#ifdef MAP_CONCEAL
53
#define MAP_NOCORE MAP_CONCEAL /* XXX OpenBSD */
54
#else
55
#define MAP_NOCORE 0 /* XXX Linux */
56
#endif
57
#endif
58
59
#ifndef MAP_NOSYNC
60
#define MAP_NOSYNC 0 /* XXX Linux */
61
#endif
62
63
#define MINPAGES                128
64
65
/*
66
 * Number of buckets on free-list.
67
 *
68
 * Last bucket is "larger than" so choose number so that the second
69
 * to last bucket matches the 128k CHUNKSIZE in cache_fetch.c when
70
 * using the a 4K minimal page size
71
 */
72
#define NBUCKET                 (128 / 4 + 1)
73
74
static struct VSC_lck *lck_smf;
75
76
/*--------------------------------------------------------------------*/
77
78
VTAILQ_HEAD(smfhead, smf);
79
80
struct smf {
81
        unsigned                magic;
82
#define SMF_MAGIC               0x0927a8a0
83
        struct storage          s;
84
        struct smf_sc           *sc;
85
86
        int                     alloc;
87
88
        off_t                   size;
89
        off_t                   offset;
90
        unsigned char           *ptr;
91
92
        VTAILQ_ENTRY(smf)       order;
93
        VTAILQ_ENTRY(smf)       status;
94
        struct smfhead          *flist;
95
};
96
97
struct smf_sc {
98
        unsigned                magic;
99
#define SMF_SC_MAGIC            0x52962ee7
100
        struct lock             mtx;
101
        struct VSC_smf          *stats;
102
103
        const char              *filename;
104
        int                     fd;
105
        unsigned                pagesize;
106
        uintmax_t               filesize;
107
        int                     advice;
108
        struct smfhead          order;
109
        struct smfhead          free[NBUCKET];
110
        struct smfhead          used;
111
};
112
113
/*--------------------------------------------------------------------*/
114
115
static void v_matchproto_(storage_init_f)
116 200
smf_init(struct stevedore *parent, int ac, char * const *av)
117
{
118
        const char *size, *fn, *r;
119
        struct smf_sc *sc;
120
        unsigned u;
121
        uintmax_t page_size;
122 200
        int advice = MADV_RANDOM;
123
124 200
        AZ(av[ac]);
125
126 200
        size = NULL;
127 200
        page_size = getpagesize();
128
129 200
        if (ac > 4)
130 0
                ARGV_ERR("(-sfile) too many arguments\n");
131 200
        if (ac < 1 || *av[0] == '\0')
132 0
                ARGV_ERR("(-sfile) path is mandatory\n");
133 200
        fn = av[0];
134 200
        if (ac > 1 && *av[1] != '\0')
135 200
                size = av[1];
136 200
        if (ac > 2 && *av[2] != '\0') {
137
138 25
                r = VNUM_2bytes(av[2], &page_size, 0);
139 25
                if (r != NULL)
140 25
                        ARGV_ERR("(-sfile) granularity \"%s\": %s\n", av[2], r);
141 0
        }
142 175
        if (ac > 3) {
143 25
                if (!strcmp(av[3], "normal"))
144 0
                        advice = MADV_NORMAL;
145 25
                else if (!strcmp(av[3], "random"))
146 0
                        advice = MADV_RANDOM;
147 25
                else if (!strcmp(av[3], "sequential"))
148 0
                        advice = MADV_SEQUENTIAL;
149
                else
150 25
                        ARGV_ERR("(-s file) invalid advice: \"%s\"", av[3]);
151 0
        }
152
153 150
        AN(fn);
154
155 150
        ALLOC_OBJ(sc, SMF_SC_MAGIC);
156 150
        XXXAN(sc);
157 150
        VTAILQ_INIT(&sc->order);
158 5100
        for (u = 0; u < NBUCKET; u++)
159 4950
                VTAILQ_INIT(&sc->free[u]);
160 150
        VTAILQ_INIT(&sc->used);
161 150
        sc->pagesize = page_size;
162 150
        sc->advice = advice;
163 150
        parent->priv = sc;
164
165 150
        (void)STV_GetFile(fn, &sc->fd, &sc->filename, "-sfile");
166 150
        MCH_Fd_Inherit(sc->fd, "storage_file");
167 150
        sc->filesize = STV_FileSize(sc->fd, size, &sc->pagesize, "-sfile");
168 150
        if (VFIL_allocate(sc->fd, (off_t)sc->filesize, 0))
169 0
                ARGV_ERR("(-sfile) allocation error: %s\n", VAS_errtxt(errno));
170 150
}
171
172
/*--------------------------------------------------------------------
173
 * Insert/Remove from correct freelist
174
 */
175
176
static void
177 1475
insfree(struct smf_sc *sc, struct smf *sp)
178
{
179
        off_t b, ns;
180
        struct smf *sp2;
181
182 1475
        AZ(sp->alloc);
183 1475
        assert(sp->flist == NULL);
184 1475
        Lck_AssertHeld(&sc->mtx);
185 1475
        b = sp->size / sc->pagesize;
186 1475
        if (b >= NBUCKET) {
187 1375
                b = NBUCKET - 1;
188 1375
                sc->stats->g_smf_large++;
189 1375
        } else {
190 100
                sc->stats->g_smf_frag++;
191
        }
192 1475
        sp->flist = &sc->free[b];
193 1475
        ns = b * sc->pagesize;
194 1475
        VTAILQ_FOREACH(sp2, sp->flist, status) {
195 0
                assert(sp2->size >= ns);
196 0
                AZ(sp2->alloc);
197 0
                assert(sp2->flist == sp->flist);
198 0
                if (sp->offset < sp2->offset)
199 0
                        break;
200 0
        }
201 1475
        if (sp2 == NULL)
202 1475
                VTAILQ_INSERT_TAIL(sp->flist, sp, status);
203
        else
204 0
                VTAILQ_INSERT_BEFORE(sp2, sp, status);
205 1475
}
206
207
static void
208 1325
remfree(const struct smf_sc *sc, struct smf *sp)
209
{
210
        size_t b;
211
212 1325
        AZ(sp->alloc);
213 1325
        assert(sp->flist != NULL);
214 1325
        Lck_AssertHeld(&sc->mtx);
215 1325
        b = sp->size / sc->pagesize;
216 1325
        if (b >= NBUCKET) {
217 1250
                b = NBUCKET - 1;
218 1250
                sc->stats->g_smf_large--;
219 1250
        } else {
220 75
                sc->stats->g_smf_frag--;
221
        }
222 1325
        assert(sp->flist == &sc->free[b]);
223 1325
        VTAILQ_REMOVE(sp->flist, sp, status);
224 1325
        sp->flist = NULL;
225 1325
}
226
227
/*--------------------------------------------------------------------
228
 * Allocate a range from the first free range that is large enough.
229
 */
230
231
static struct smf *
232 700
alloc_smf(struct smf_sc *sc, off_t bytes)
233
{
234
        struct smf *sp, *sp2;
235
        off_t b;
236
237 700
        AZ(bytes % sc->pagesize);
238 700
        b = bytes / sc->pagesize;
239 700
        if (b >= NBUCKET)
240 0
                b = NBUCKET - 1;
241 700
        sp = NULL;
242 21050
        for (; b < NBUCKET - 1; b++) {
243 20350
                sp = VTAILQ_FIRST(&sc->free[b]);
244 20350
                if (sp != NULL)
245 0
                        break;
246 20350
        }
247 700
        if (sp == NULL) {
248 700
                VTAILQ_FOREACH(sp, &sc->free[NBUCKET -1], status)
249 700
                        if (sp->size >= bytes)
250 700
                                break;
251 700
        }
252 700
        if (sp == NULL)
253 0
                return (sp);
254
255 700
        assert(sp->size >= bytes);
256 700
        remfree(sc, sp);
257
258 700
        if (sp->size == bytes) {
259 0
                sp->alloc = 1;
260 0
                VTAILQ_INSERT_TAIL(&sc->used, sp, status);
261 0
                return (sp);
262
        }
263
264
        /* Split from front */
265 700
        sp2 = malloc(sizeof *sp2);
266 700
        XXXAN(sp2);
267 700
        sc->stats->g_smf++;
268 700
        *sp2 = *sp;
269
270 700
        sp->offset += bytes;
271 700
        sp->ptr += bytes;
272 700
        sp->size -= bytes;
273
274 700
        sp2->size = bytes;
275 700
        sp2->alloc = 1;
276 700
        VTAILQ_INSERT_BEFORE(sp, sp2, order);
277 700
        VTAILQ_INSERT_TAIL(&sc->used, sp2, status);
278 700
        insfree(sc, sp);
279 700
        return (sp2);
280 700
}
281
282
/*--------------------------------------------------------------------
283
 * Free a range.  Attempt merge forward and backward, then sort into
284
 * free list according to age.
285
 */
286
287
static void
288 775
free_smf(struct smf *sp)
289
{
290
        struct smf *sp2;
291 775
        struct smf_sc *sc = sp->sc;
292
293 775
        CHECK_OBJ_NOTNULL(sp, SMF_MAGIC);
294 775
        AN(sp->alloc);
295 775
        assert(sp->size > 0);
296 775
        AZ(sp->size % sc->pagesize);
297 775
        VTAILQ_REMOVE(&sc->used, sp, status);
298 775
        sp->alloc = 0;
299
300 775
        sp2 = VTAILQ_NEXT(sp, order);
301 1325
        if (sp2 != NULL &&
302 650
            sp2->alloc == 0 &&
303 550
            (sp2->ptr == sp->ptr + sp->size) &&
304 550
            (sp2->offset == sp->offset + sp->size)) {
305 550
                sp->size += sp2->size;
306 550
                VTAILQ_REMOVE(&sc->order, sp2, order);
307 550
                remfree(sc, sp2);
308 550
                free(sp2);
309 550
                sc->stats->g_smf--;
310 550
        }
311
312 775
        sp2 = VTAILQ_PREV(sp, smfhead, order);
313 850
        if (sp2 != NULL &&
314 575
            sp2->alloc == 0 &&
315 75
            (sp->ptr == sp2->ptr + sp2->size) &&
316 75
            (sp->offset == sp2->offset + sp2->size)) {
317 75
                remfree(sc, sp2);
318 75
                sp2->size += sp->size;
319 75
                VTAILQ_REMOVE(&sc->order, sp, order);
320 75
                free(sp);
321 75
                sc->stats->g_smf--;
322 75
                sp = sp2;
323 75
        }
324
325 775
        insfree(sc, sp);
326 775
}
327
328
/*--------------------------------------------------------------------
329
 * Insert a newly created range as busy, then free it to do any collapses
330
 */
331
332
static void
333 125
new_smf(struct smf_sc *sc, unsigned char *ptr, off_t off, size_t len)
334
{
335
        struct smf *sp, *sp2;
336
337 125
        AZ(len % sc->pagesize);
338 125
        ALLOC_OBJ(sp, SMF_MAGIC);
339 125
        XXXAN(sp);
340 125
        sp->s.magic = STORAGE_MAGIC;
341 125
        sc->stats->g_smf++;
342
343 125
        sp->sc = sc;
344 125
        sp->size = len;
345 125
        sp->ptr = ptr;
346 125
        sp->offset = off;
347 125
        sp->alloc = 1;
348
349 125
        VTAILQ_FOREACH(sp2, &sc->order, order) {
350 0
                if (sp->ptr < sp2->ptr) {
351 0
                        VTAILQ_INSERT_BEFORE(sp2, sp, order);
352 0
                        break;
353
                }
354 0
        }
355 125
        if (sp2 == NULL)
356 125
                VTAILQ_INSERT_TAIL(&sc->order, sp, order);
357
358 125
        VTAILQ_INSERT_HEAD(&sc->used, sp, status);
359
360 125
        free_smf(sp);
361 125
}
362
363
/*--------------------------------------------------------------------*/
364
365
/*
366
 * XXX: This may be too aggressive and soak up too much address room.
367
 * XXX: On the other hand, the user, directly or implicitly asked us to
368
 * XXX: use this much storage, so we should make a decent effort.
369
 * XXX: worst case (I think), malloc will fail.
370
 */
371
372
static void
373 125
smf_open_chunk(struct smf_sc *sc, off_t sz, off_t off, off_t *fail, off_t *sum)
374
{
375
        void *p;
376
        off_t h;
377
378 125
        AN(sz);
379 125
        AZ(sz % sc->pagesize);
380
381 125
        if (*fail < (off_t)sc->pagesize * MINPAGES)
382 0
                return;
383
384 125
        if (sz > 0 && sz < *fail && sz < SSIZE_MAX) {
385 250
                p = mmap(NULL, sz, PROT_READ|PROT_WRITE,
386 125
                    MAP_NOCORE | MAP_NOSYNC | MAP_SHARED, sc->fd, off);
387 125
                if (p != MAP_FAILED) {
388 125
                        (void)madvise(p, sz, sc->advice);
389 125
                        (*sum) += sz;
390 125
                        new_smf(sc, p, off, sz);
391 125
                        return;
392
                }
393 0
        }
394
395 0
        if (sz < *fail)
396 0
                *fail = sz;
397
398 0
        h = sz / 2;
399 0
        h -= (h % sc->pagesize);
400
401 0
        smf_open_chunk(sc, h, off, fail, sum);
402 0
        smf_open_chunk(sc, sz - h, off + h, fail, sum);
403 125
}
404
405
static void v_matchproto_(storage_open_f)
406 125
smf_open(struct stevedore *st)
407
{
408
        struct smf_sc *sc;
409 125
        off_t fail = 1 << 30;   /* XXX: where is OFF_T_MAX ? */
410 125
        off_t sum = 0;
411
412 125
        ASSERT_CLI();
413 125
        st->lru = LRU_Alloc();
414 125
        if (lck_smf == NULL)
415 100
                lck_smf = Lck_CreateClass(NULL, "smf");
416 125
        CAST_OBJ_NOTNULL(sc, st->priv, SMF_SC_MAGIC);
417 125
        sc->stats = VSC_smf_New(NULL, NULL, st->ident);
418 125
        Lck_New(&sc->mtx, lck_smf);
419 125
        Lck_Lock(&sc->mtx);
420 125
        smf_open_chunk(sc, sc->filesize, 0, &fail, &sum);
421 125
        Lck_Unlock(&sc->mtx);
422 125
        if (sum < MINPAGES * (off_t)getpagesize()) {
423 0
                ARGV_ERR(
424
                    "-sfile too small for this architecture,"
425
                    " minimum size is %jd MB\n",
426
                    (MINPAGES * (intmax_t)getpagesize()) / (1<<20)
427
                );
428 0
        }
429 125
        printf("SMF.%s mmap'ed %ju bytes of %ju\n",
430 125
            st->ident, (uintmax_t)sum, sc->filesize);
431
432
        /* XXX */
433 125
        if (sum < MINPAGES * (off_t)getpagesize())
434 0
                exit(4);
435
436 125
        sc->stats->g_space += sc->filesize;
437 125
}
438
439
/*--------------------------------------------------------------------*/
440
441
static struct storage * v_matchproto_(sml_alloc_f)
442 700
smf_alloc(const struct stevedore *st, size_t sz)
443
{
444
        struct smf *smf;
445
        struct smf_sc *sc;
446
        off_t size;
447
448 700
        CAST_OBJ_NOTNULL(sc, st->priv, SMF_SC_MAGIC);
449 700
        assert(sz > 0);
450
        // XXX missing OFF_T_MAX
451 700
        size = (off_t)sz;
452 700
        size += (sc->pagesize - 1UL);
453 700
        size &= ~(sc->pagesize - 1UL);
454 700
        Lck_Lock(&sc->mtx);
455 700
        sc->stats->c_req++;
456 700
        smf = alloc_smf(sc, size);
457 700
        if (smf == NULL) {
458 0
                sc->stats->c_fail++;
459 0
                Lck_Unlock(&sc->mtx);
460 0
                return (NULL);
461
        }
462 700
        CHECK_OBJ_NOTNULL(smf, SMF_MAGIC);
463 700
        sc->stats->g_alloc++;
464 700
        sc->stats->c_bytes += smf->size;
465 700
        sc->stats->g_bytes += smf->size;
466 700
        sc->stats->g_space -= smf->size;
467 700
        Lck_Unlock(&sc->mtx);
468 700
        CHECK_OBJ_NOTNULL(&smf->s, STORAGE_MAGIC);      /*lint !e774 */
469 700
        XXXAN(smf);
470 700
        assert(smf->size == size);
471 700
        smf->s.space = size;
472 700
        smf->s.priv = smf;
473 700
        smf->s.ptr = smf->ptr;
474 700
        smf->s.len = 0;
475 700
        return (&smf->s);
476 700
}
477
478
/*--------------------------------------------------------------------*/
479
480
static void v_matchproto_(sml_free_f)
481 650
smf_free(struct storage *s)
482
{
483
        struct smf *smf;
484
        struct smf_sc *sc;
485
486 650
        CHECK_OBJ_NOTNULL(s, STORAGE_MAGIC);
487 650
        CAST_OBJ_NOTNULL(smf, s->priv, SMF_MAGIC);
488 650
        sc = smf->sc;
489 650
        Lck_Lock(&sc->mtx);
490 650
        sc->stats->g_alloc--;
491 650
        sc->stats->c_freed += smf->size;
492 650
        sc->stats->g_bytes -= smf->size;
493 650
        sc->stats->g_space += smf->size;
494 650
        free_smf(smf);
495 650
        Lck_Unlock(&sc->mtx);
496 650
}
497
498
/*--------------------------------------------------------------------*/
499
500
const struct stevedore smf_stevedore = {
501
        .magic          =       STEVEDORE_MAGIC,
502
        .name           =       "file",
503
        .init           =       smf_init,
504
        .open           =       smf_open,
505
        .sml_alloc      =       smf_alloc,
506
        .sml_free       =       smf_free,
507
        .allocobj       =       SML_allocobj,
508
        .panic          =       SML_panic,
509
        .methods        =       &SML_methods,
510
        .allocbuf       =       SML_AllocBuf,
511
        .freebuf        =       SML_FreeBuf,
512
};