varnish-cache/bin/varnishd/storage/storage_file.c
0
/*-
1
 * Copyright (c) 2006 Verdens Gang AS
2
 * Copyright (c) 2006-2011 Varnish Software AS
3
 * All rights reserved.
4
 *
5
 * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
6
 *
7
 * SPDX-License-Identifier: BSD-2-Clause
8
 *
9
 * Redistribution and use in source and binary forms, with or without
10
 * modification, are permitted provided that the following conditions
11
 * are met:
12
 * 1. Redistributions of source code must retain the above copyright
13
 *    notice, this list of conditions and the following disclaimer.
14
 * 2. Redistributions in binary form must reproduce the above copyright
15
 *    notice, this list of conditions and the following disclaimer in the
16
 *    documentation and/or other materials provided with the distribution.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
22
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28
 * SUCH DAMAGE.
29
 *
30
 * Storage method based on mmap'ed file
31
 */
32
33
#include "config.h"
34
35
#include "cache/cache_varnishd.h"
36
#include "common/heritage.h"
37
38
#include <sys/mman.h>
39
40
#include <stdio.h>
41
#include <stdlib.h>
42
43
#include "storage/storage.h"
44
#include "storage/storage_simple.h"
45
46
#include "vnum.h"
47
#include "vfil.h"
48
49
#include "VSC_smf.h"
50
51
#ifndef MAP_NOCORE
52
#ifdef MAP_CONCEAL
53
#define MAP_NOCORE MAP_CONCEAL /* XXX OpenBSD */
54
#else
55
#define MAP_NOCORE 0 /* XXX Linux */
56
#endif
57
#endif
58
59
#ifndef MAP_NOSYNC
60
#define MAP_NOSYNC 0 /* XXX Linux */
61
#endif
62
63
#define MINPAGES                128
64
65
/*
66
 * Number of buckets on free-list.
67
 *
68
 * Last bucket is "larger than" so choose number so that the second
69
 * to last bucket matches the 128k CHUNKSIZE in cache_fetch.c when
70
 * using a 4K minimal page size
71
 */
72
#define NBUCKET                 (128 / 4 + 1)
73
74
static struct VSC_lck *lck_smf;
75
76
/*--------------------------------------------------------------------*/
77
78
VTAILQ_HEAD(smfhead, smf);
79
80
struct smf {
81
        unsigned                magic;
82
#define SMF_MAGIC               0x0927a8a0
83
        struct storage          s;
84
        struct smf_sc           *sc;
85
86
        int                     alloc;
87
88
        off_t                   size;
89
        off_t                   offset;
90
        unsigned char           *ptr;
91
92
        VTAILQ_ENTRY(smf)       order;
93
        VTAILQ_ENTRY(smf)       status;
94
        struct smfhead          *flist;
95
};
96
97
struct smf_sc {
98
        unsigned                magic;
99
#define SMF_SC_MAGIC            0x52962ee7
100
        struct lock             mtx;
101
        struct VSC_smf          *stats;
102
103
        const char              *filename;
104
        int                     fd;
105
        unsigned                pagesize;
106
        uintmax_t               filesize;
107
        int                     advice;
108
        struct smfhead          order;
109
        struct smfhead          free[NBUCKET];
110
        struct smfhead          used;
111
};
112
113
/*--------------------------------------------------------------------*/
114
115
static void v_matchproto_(storage_init_f)
116 320
smf_init(struct stevedore *parent, int ac, char * const *av)
117
{
118
        const char *size, *fn, *r;
119
        struct smf_sc *sc;
120
        unsigned u;
121
        uintmax_t page_size;
122 320
        int advice = MADV_RANDOM;
123
124 320
        AZ(av[ac]);
125
126 320
        size = NULL;
127 320
        page_size = getpagesize();
128
129 320
        if (ac > 4)
130 0
                ARGV_ERR("(-sfile) too many arguments\n");
131 320
        if (ac < 1 || *av[0] == '\0')
132 0
                ARGV_ERR("(-sfile) path is mandatory\n");
133 320
        fn = av[0];
134 320
        if (ac > 1 && *av[1] != '\0')
135 320
                size = av[1];
136 320
        if (ac > 2 && *av[2] != '\0') {
137
138 40
                r = VNUM_2bytes(av[2], &page_size, 0);
139 40
                if (r != NULL)
140 40
                        ARGV_ERR("(-sfile) granularity \"%s\": %s\n", av[2], r);
141 0
        }
142 280
        if (ac > 3) {
143 40
                if (!strcmp(av[3], "normal"))
144 0
                        advice = MADV_NORMAL;
145 40
                else if (!strcmp(av[3], "random"))
146 0
                        advice = MADV_RANDOM;
147 40
                else if (!strcmp(av[3], "sequential"))
148 0
                        advice = MADV_SEQUENTIAL;
149
                else
150 40
                        ARGV_ERR("(-s file) invalid advice: \"%s\"", av[3]);
151 0
        }
152
153 240
        AN(fn);
154
155 240
        ALLOC_OBJ(sc, SMF_SC_MAGIC);
156 240
        XXXAN(sc);
157 240
        VTAILQ_INIT(&sc->order);
158 8160
        for (u = 0; u < NBUCKET; u++)
159 7920
                VTAILQ_INIT(&sc->free[u]);
160 240
        VTAILQ_INIT(&sc->used);
161 240
        sc->pagesize = page_size;
162 240
        sc->advice = advice;
163 240
        parent->priv = sc;
164
165 240
        (void)STV_GetFile(fn, &sc->fd, &sc->filename, "-sfile");
166 240
        MCH_Fd_Inherit(sc->fd, "storage_file");
167 240
        sc->filesize = STV_FileSize(sc->fd, size, &sc->pagesize, "-sfile");
168 240
        if (VFIL_allocate(sc->fd, (off_t)sc->filesize, 0))
169 0
                ARGV_ERR("(-sfile) allocation error: %s\n", VAS_errtxt(errno));
170 240
}
171
172
/*--------------------------------------------------------------------
173
 * Insert/Remove from correct freelist
174
 */
175
176
static void
177 2360
insfree(struct smf_sc *sc, struct smf *sp)
178
{
179
        off_t b, ns;
180
        struct smf *sp2;
181
182 2360
        AZ(sp->alloc);
183 2360
        assert(sp->flist == NULL);
184 2360
        Lck_AssertHeld(&sc->mtx);
185 2360
        b = sp->size / sc->pagesize;
186 2360
        if (b >= NBUCKET) {
187 2200
                b = NBUCKET - 1;
188 2200
                sc->stats->g_smf_large++;
189 2200
        } else {
190 160
                sc->stats->g_smf_frag++;
191
        }
192 2360
        sp->flist = &sc->free[b];
193 2360
        ns = b * sc->pagesize;
194 2360
        VTAILQ_FOREACH(sp2, sp->flist, status) {
195 0
                assert(sp2->size >= ns);
196 0
                AZ(sp2->alloc);
197 0
                assert(sp2->flist == sp->flist);
198 0
                if (sp->offset < sp2->offset)
199 0
                        break;
200 0
        }
201 2360
        if (sp2 == NULL)
202 2360
                VTAILQ_INSERT_TAIL(sp->flist, sp, status);
203
        else
204 0
                VTAILQ_INSERT_BEFORE(sp2, sp, status);
205 2360
}
206
207
static void
208 2120
remfree(const struct smf_sc *sc, struct smf *sp)
209
{
210
        size_t b;
211
212 2120
        AZ(sp->alloc);
213 2120
        assert(sp->flist != NULL);
214 2120
        Lck_AssertHeld(&sc->mtx);
215 2120
        b = sp->size / sc->pagesize;
216 2120
        if (b >= NBUCKET) {
217 2000
                b = NBUCKET - 1;
218 2000
                sc->stats->g_smf_large--;
219 2000
        } else {
220 120
                sc->stats->g_smf_frag--;
221
        }
222 2120
        assert(sp->flist == &sc->free[b]);
223 2120
        VTAILQ_REMOVE(sp->flist, sp, status);
224 2120
        sp->flist = NULL;
225 2120
}
226
227
/*--------------------------------------------------------------------
228
 * Allocate a range from the first free range that is large enough.
229
 */
230
231
static struct smf *
232 1120
alloc_smf(struct smf_sc *sc, off_t bytes)
233
{
234
        struct smf *sp, *sp2;
235
        off_t b;
236
237 1120
        AZ(bytes % sc->pagesize);
238 1120
        b = bytes / sc->pagesize;
239 1120
        if (b >= NBUCKET)
240 0
                b = NBUCKET - 1;
241 1120
        sp = NULL;
242 33680
        for (; b < NBUCKET - 1; b++) {
243 32560
                sp = VTAILQ_FIRST(&sc->free[b]);
244 32560
                if (sp != NULL)
245 0
                        break;
246 32560
        }
247 1120
        if (sp == NULL) {
248 1120
                VTAILQ_FOREACH(sp, &sc->free[NBUCKET -1], status)
249 1120
                        if (sp->size >= bytes)
250 1120
                                break;
251 1120
        }
252 1120
        if (sp == NULL)
253 0
                return (sp);
254
255 1120
        assert(sp->size >= bytes);
256 1120
        remfree(sc, sp);
257
258 1120
        if (sp->size == bytes) {
259 0
                sp->alloc = 1;
260 0
                VTAILQ_INSERT_TAIL(&sc->used, sp, status);
261 0
                return (sp);
262
        }
263
264
        /* Split from front */
265 1120
        sp2 = malloc(sizeof *sp2);
266 1120
        XXXAN(sp2);
267 1120
        sc->stats->g_smf++;
268 1120
        *sp2 = *sp;
269
270 1120
        sp->offset += bytes;
271 1120
        sp->ptr += bytes;
272 1120
        sp->size -= bytes;
273
274 1120
        sp2->size = bytes;
275 1120
        sp2->alloc = 1;
276 1120
        VTAILQ_INSERT_BEFORE(sp, sp2, order);
277 1120
        VTAILQ_INSERT_TAIL(&sc->used, sp2, status);
278 1120
        insfree(sc, sp);
279 1120
        return (sp2);
280 1120
}
281
282
/*--------------------------------------------------------------------
283
 * Free a range.  Attempt merge forward and backward, then sort into
284
 * free list according to age.
285
 */
286
287
static void
288 1240
free_smf(struct smf *sp)
289
{
290
        struct smf *sp2;
291 1240
        struct smf_sc *sc = sp->sc;
292
293 1240
        CHECK_OBJ_NOTNULL(sp, SMF_MAGIC);
294 1240
        AN(sp->alloc);
295 1240
        assert(sp->size > 0);
296 1240
        AZ(sp->size % sc->pagesize);
297 1240
        VTAILQ_REMOVE(&sc->used, sp, status);
298 1240
        sp->alloc = 0;
299
300 1240
        sp2 = VTAILQ_NEXT(sp, order);
301 2120
        if (sp2 != NULL &&
302 1040
            sp2->alloc == 0 &&
303 880
            (sp2->ptr == sp->ptr + sp->size) &&
304 880
            (sp2->offset == sp->offset + sp->size)) {
305 880
                sp->size += sp2->size;
306 880
                VTAILQ_REMOVE(&sc->order, sp2, order);
307 880
                remfree(sc, sp2);
308 880
                free(sp2);
309 880
                sc->stats->g_smf--;
310 880
        }
311
312 1240
        sp2 = VTAILQ_PREV(sp, smfhead, order);
313 1360
        if (sp2 != NULL &&
314 920
            sp2->alloc == 0 &&
315 120
            (sp->ptr == sp2->ptr + sp2->size) &&
316 120
            (sp->offset == sp2->offset + sp2->size)) {
317 120
                remfree(sc, sp2);
318 120
                sp2->size += sp->size;
319 120
                VTAILQ_REMOVE(&sc->order, sp, order);
320 120
                free(sp);
321 120
                sc->stats->g_smf--;
322 120
                sp = sp2;
323 120
        }
324
325 1240
        insfree(sc, sp);
326 1240
}
327
328
/*--------------------------------------------------------------------
329
 * Insert a newly created range as busy, then free it to do any collapses
330
 */
331
332
static void
333 200
new_smf(struct smf_sc *sc, unsigned char *ptr, off_t off, size_t len)
334
{
335
        struct smf *sp, *sp2;
336
337 200
        AZ(len % sc->pagesize);
338 200
        ALLOC_OBJ(sp, SMF_MAGIC);
339 200
        XXXAN(sp);
340 200
        sp->s.magic = STORAGE_MAGIC;
341 200
        sc->stats->g_smf++;
342
343 200
        sp->sc = sc;
344 200
        sp->size = len;
345 200
        sp->ptr = ptr;
346 200
        sp->offset = off;
347 200
        sp->alloc = 1;
348
349 200
        VTAILQ_FOREACH(sp2, &sc->order, order) {
350 0
                if (sp->ptr < sp2->ptr) {
351 0
                        VTAILQ_INSERT_BEFORE(sp2, sp, order);
352 0
                        break;
353
                }
354 0
        }
355 200
        if (sp2 == NULL)
356 200
                VTAILQ_INSERT_TAIL(&sc->order, sp, order);
357
358 200
        VTAILQ_INSERT_HEAD(&sc->used, sp, status);
359
360 200
        free_smf(sp);
361 200
}
362
363
/*--------------------------------------------------------------------*/
364
365
/*
366
 * XXX: This may be too aggressive and soak up too much address room.
367
 * XXX: On the other hand, the user, directly or implicitly asked us to
368
 * XXX: use this much storage, so we should make a decent effort.
369
 * XXX: worst case (I think), malloc will fail.
370
 */
371
372
static void
373 200
smf_open_chunk(struct smf_sc *sc, off_t sz, off_t off, off_t *fail, off_t *sum)
374
{
375
        void *p;
376
        off_t h;
377
378 200
        AN(sz);
379 200
        AZ(sz % sc->pagesize);
380
381 200
        if (*fail < (off_t)sc->pagesize * MINPAGES)
382 0
                return;
383
384 200
        if (sz > 0 && sz < *fail && sz < SSIZE_MAX) {
385 400
                p = mmap(NULL, sz, PROT_READ|PROT_WRITE,
386 200
                    MAP_NOCORE | MAP_NOSYNC | MAP_SHARED, sc->fd, off);
387 200
                if (p != MAP_FAILED) {
388 200
                        (void)madvise(p, sz, sc->advice);
389 200
                        (*sum) += sz;
390 200
                        new_smf(sc, p, off, sz);
391 200
                        return;
392
                }
393 0
        }
394
395 0
        if (sz < *fail)
396 0
                *fail = sz;
397
398 0
        h = sz / 2;
399 0
        h -= (h % sc->pagesize);
400
401 0
        smf_open_chunk(sc, h, off, fail, sum);
402 0
        smf_open_chunk(sc, sz - h, off + h, fail, sum);
403 200
}
404
405
static void v_matchproto_(storage_open_f)
406 200
smf_open(struct stevedore *st)
407
{
408
        struct smf_sc *sc;
409 200
        off_t fail = 1 << 30;   /* XXX: where is OFF_T_MAX ? */
410 200
        off_t sum = 0;
411
412 200
        ASSERT_CLI();
413 200
        st->lru = LRU_Alloc();
414 200
        if (lck_smf == NULL)
415 160
                lck_smf = Lck_CreateClass(NULL, "smf");
416 200
        CAST_OBJ_NOTNULL(sc, st->priv, SMF_SC_MAGIC);
417 200
        sc->stats = VSC_smf_New(NULL, NULL, st->ident);
418 200
        Lck_New(&sc->mtx, lck_smf);
419 200
        Lck_Lock(&sc->mtx);
420 200
        smf_open_chunk(sc, sc->filesize, 0, &fail, &sum);
421 200
        Lck_Unlock(&sc->mtx);
422 200
        if (sum < MINPAGES * (off_t)getpagesize()) {
423 0
                ARGV_ERR(
424
                    "-sfile too small for this architecture,"
425
                    " minimum size is %jd MB\n",
426
                    (MINPAGES * (intmax_t)getpagesize()) / (1<<20)
427
                );
428 0
        }
429 200
        printf("SMF.%s mmap'ed %ju bytes of %ju\n",
430 200
            st->ident, (uintmax_t)sum, sc->filesize);
431
432
        /* XXX */
433 200
        if (sum < MINPAGES * (off_t)getpagesize())
434 0
                exit(4);
435
436 200
        sc->stats->g_space += sc->filesize;
437 200
}
438
439
/*--------------------------------------------------------------------*/
440
441
static struct storage * v_matchproto_(sml_alloc_f)
442 1120
smf_alloc(const struct stevedore *st, size_t sz)
443
{
444
        struct smf *smf;
445
        struct smf_sc *sc;
446
        off_t size;
447
448 1120
        CAST_OBJ_NOTNULL(sc, st->priv, SMF_SC_MAGIC);
449 1120
        assert(sz > 0);
450
        // XXX missing OFF_T_MAX
451 1120
        size = (off_t)sz;
452 1120
        size += (sc->pagesize - 1UL);
453 1120
        size &= ~(sc->pagesize - 1UL);
454 1120
        Lck_Lock(&sc->mtx);
455 1120
        sc->stats->c_req++;
456 1120
        smf = alloc_smf(sc, size);
457 1120
        if (smf == NULL) {
458 0
                sc->stats->c_fail++;
459 0
                Lck_Unlock(&sc->mtx);
460 0
                return (NULL);
461
        }
462 1120
        CHECK_OBJ_NOTNULL(smf, SMF_MAGIC);
463 1120
        sc->stats->g_alloc++;
464 1120
        sc->stats->c_bytes += smf->size;
465 1120
        sc->stats->g_bytes += smf->size;
466 1120
        sc->stats->g_space -= smf->size;
467 1120
        Lck_Unlock(&sc->mtx);
468 1120
        CHECK_OBJ_NOTNULL(&smf->s, STORAGE_MAGIC);      /*lint !e774 */
469 1120
        XXXAN(smf);
470 1120
        assert(smf->size == size);
471 1120
        smf->s.space = size;
472 1120
        smf->s.priv = smf;
473 1120
        smf->s.ptr = smf->ptr;
474 1120
        smf->s.len = 0;
475 1120
        return (&smf->s);
476 1120
}
477
478
/*--------------------------------------------------------------------*/
479
480
static void v_matchproto_(sml_free_f)
481 1040
smf_free(struct storage *s)
482
{
483
        struct smf *smf;
484
        struct smf_sc *sc;
485
486 1040
        CHECK_OBJ_NOTNULL(s, STORAGE_MAGIC);
487 1040
        CAST_OBJ_NOTNULL(smf, s->priv, SMF_MAGIC);
488 1040
        sc = smf->sc;
489 1040
        Lck_Lock(&sc->mtx);
490 1040
        sc->stats->g_alloc--;
491 1040
        sc->stats->c_freed += smf->size;
492 1040
        sc->stats->g_bytes -= smf->size;
493 1040
        sc->stats->g_space += smf->size;
494 1040
        free_smf(smf);
495 1040
        Lck_Unlock(&sc->mtx);
496 1040
}
497
498
/*--------------------------------------------------------------------*/
499
500
const struct stevedore smf_stevedore = {
501
        .magic          =       STEVEDORE_MAGIC,
502
        .name           =       "file",
503
        .init           =       smf_init,
504
        .open           =       smf_open,
505
        .sml_alloc      =       smf_alloc,
506
        .sml_free       =       smf_free,
507
        .allocobj       =       SML_allocobj,
508
        .panic          =       SML_panic,
509
        .methods        =       &SML_methods,
510
        .allocbuf       =       SML_AllocBuf,
511
        .freebuf        =       SML_FreeBuf,
512
};