varnish-cache/bin/varnishd/common/common_vsmw.c
0
/*-
1
 * Copyright (c) 2010-2011 Varnish Software AS
2
 * All rights reserved.
3
 *
4
 * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
5
 *
6
 * SPDX-License-Identifier: BSD-2-Clause
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
21
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
 * SUCH DAMAGE.
28
 *
29
 * VSM stuff common to manager and child.
30
 *
31
 */
32
33
#include "config.h"
34
35
#include <fcntl.h>
36
#include <stdarg.h>
37
#include <stdio.h>
38
#include <stdint.h>
39
#include <stdlib.h>
40
#include <string.h>
41
#include <time.h>
42
#include <unistd.h>
43
#include <sys/mman.h>
44
#include <sys/stat.h>
45
46
#include "vdef.h"
47
#include "vas.h"
48
#include "vsb.h"
49
#include "miniobj.h"
50
#include "vqueue.h"
51
52
#include "vfil.h"
53
#include "vrnd.h"
54
55
#include "heritage.h"
56
#include "vsmw.h"
57
58
#ifndef MAP_HASSEMAPHORE
59
#  define MAP_HASSEMAPHORE 0 /* XXX Linux */
60
#endif
61
62
#ifndef MAP_NOSYNC
63
#  define MAP_NOSYNC 0 /* XXX Linux */
64
#endif
65
66
static void v_matchproto_(vsm_lock_f)
67 503100
vsmw_dummy_lock(void)
68
{
69 503100
}
70
71
static int vsmw_haslock;
72
vsm_lock_f *vsmw_lock = vsmw_dummy_lock;
73
vsm_lock_f *vsmw_unlock = vsmw_dummy_lock;
74
75
#define vsmw_assert_lock()      AN(vsmw_haslock)
76
77
#define vsmw_do_lock() vsmw_do_lock_(__func__, __LINE__)
78
79
#define vsmw_do_lock_(f, l)                             \
80
        do {                                    \
81
                vsmw_lock();                    \
82
                AZ(vsmw_haslock);               \
83
                vsmw_haslock = 1;               \
84
        } while(0)
85
86
#define vsmw_do_unlock() vsmw_do_unlock_(__func__, __LINE__)
87
#define vsmw_do_unlock_(f, l)                           \
88
        do {                                    \
89
                AN(vsmw_haslock);               \
90
                vsmw_haslock = 0;               \
91
                vsmw_unlock();                  \
92
        } while(0)
93
94
/*--------------------------------------------------------------------*/
95
96
struct vsmw_cluster {
97
        unsigned                        magic;
98
#define VSMW_CLUSTER_MAGIC              0x28b74c00
99
100
        VTAILQ_ENTRY(vsmw_cluster)      list;
101
        struct vsmwseg                  *cseg;
102
        char                            *fn;
103
        size_t                          len;
104
        void                            *ptr;
105
        size_t                          next;
106
        int                             refs;
107
        int                             named;
108
};
109
110
struct vsmwseg {
111
        unsigned                        magic;
112
#define VSMWSEG_MAGIC                   0x7e4ccaea
113
        VTAILQ_ENTRY(vsmwseg)           list;
114
        struct vsmw_cluster             *cluster;
115
116
        char                            *category;
117
        size_t                          off;
118
        size_t                          len;
119
        char                            *id;
120
        void                            *ptr;
121
};
122
123
struct vsmw {
124
        unsigned                        magic;
125
#define VSMW_MAGIC                      0xc2ca2cd9
126
        int                             vdirfd;
127
        int                             mode;
128
        char                            *idx;
129
        VTAILQ_HEAD(, vsmw_cluster)     clusters;
130
        VTAILQ_HEAD(, vsmwseg)          segs;
131
        struct vsb                      *vsb;
132
        pid_t                           pid;
133
        time_t                          birth;
134
        uint64_t                        nsegs;
135
        uint64_t                        nsubs;
136
};
137
138
/* Allocations in clusters never start at offset zero */
139
#define VSM_CLUSTER_OFFSET 16
140
141
/*--------------------------------------------------------------------*/
142
143
static void
144 45322
vsmw_idx_head(const struct vsmw *vsmw, int fd)
145
{
146
        char buf[64];
147
148 45322
        bprintf(buf, "# %jd %jd\n", (intmax_t)vsmw->pid, (intmax_t)vsmw->birth);
149
        // XXX handle ENOSPC? #2764
150 45322
        assert(write(fd, buf, strlen(buf)) == strlen(buf));
151 45322
}
152
153
#define ASSERT_SEG_STR(x) do {                  \
154
                AN(x);                          \
155
                AZ(strchr(x, '\n'));            \
156
        } while (0);
157
158
static void
159 1182400
vsmw_fmt_index(const struct vsmw *vsmw, const struct vsmwseg *seg, char act)
160
{
161
162 1182400
        vsmw_assert_lock();
163 1182400
        CHECK_OBJ_NOTNULL(vsmw, VSMW_MAGIC);
164 1182400
        CHECK_OBJ_NOTNULL(seg, VSMWSEG_MAGIC);
165 1182400
        AN(seg->cluster);
166 1182400
        ASSERT_SEG_STR(seg->category);
167 1182400
        ASSERT_SEG_STR(seg->id);
168
169 2364800
        VSB_printf(vsmw->vsb, "%c %s %zu %zu %s %s\n",
170 1182400
            act,
171 1182400
            seg->cluster->fn,
172 1182400
            seg->off,
173 1182400
            seg->len,
174 1182400
            seg->category,
175 1182400
            seg->id);
176 1182400
}
177
178
/*--------------------------------------------------------------------*/
179
180
static void
181 986408
vsmw_mkent(const struct vsmw *vsmw, const char *pfx)
182
{
183
        int fd;
184
        uint64_t rn;
185
186 986408
        AN(pfx);
187 986408
        vsmw_assert_lock();
188 986408
        while (1) {
189 986408
                VSB_clear(vsmw->vsb);
190 986408
                VSB_printf(vsmw->vsb, "_.%s", pfx);
191 986408
                AZ(VRND_RandomCrypto(&rn, sizeof rn));
192 986408
                VSB_printf(vsmw->vsb, ".%016jx", (uintmax_t)rn);
193 986408
                AZ(VSB_finish(vsmw->vsb));
194 986408
                fd = openat(vsmw->vdirfd, VSB_data(vsmw->vsb), O_RDONLY);
195 986408
                if (fd < 0 && errno == ENOENT)
196 986408
                        return;
197 0
                if (fd >= 0)
198 0
                        closefd(&fd);
199
        }
200
}
201
202
/*--------------------------------------------------------------------*/
203
204
static void
205 1181000
vsmw_append_record(struct vsmw *vsmw, struct vsmwseg *seg, char act)
206
{
207
        int fd;
208
209 1181000
        vsmw_assert_lock();
210 1181000
        CHECK_OBJ_NOTNULL(vsmw, VSMW_MAGIC);
211 1181000
        CHECK_OBJ_NOTNULL(seg, VSMWSEG_MAGIC);
212 1181000
        fd = openat(vsmw->vdirfd, vsmw->idx, O_APPEND | O_WRONLY);
213 1181000
        assert(fd >= 0);
214 1181000
        VSB_clear(vsmw->vsb);
215 1181000
        vsmw_fmt_index(vsmw, seg, act);
216 1181000
        AZ(VSB_finish(vsmw->vsb));
217 1181000
        XXXAZ(VSB_tofile(vsmw->vsb, fd)); // XXX handle ENOSPC? #2764
218 1181000
        closefd(&fd);
219 1181000
}
220
221
/*--------------------------------------------------------------------*/
222
223
static void
224 1017033
vsmw_addseg(struct vsmw *vsmw, struct vsmwseg *seg)
225
{
226
227 1017033
        vsmw_assert_lock();
228 1017033
        VTAILQ_INSERT_TAIL(&vsmw->segs, seg, list);
229 1017033
        vsmw_append_record(vsmw, seg, '+');
230 1017033
        vsmw->nsegs++;
231 1017033
}
232
233
/*--------------------------------------------------------------------*/
234
235
static void
236 164167
vsmw_delseg(struct vsmw *vsmw, struct vsmwseg *seg)
237
{
238 164167
        char *t = NULL;
239
        int fd;
240
        struct vsmwseg *s2;
241
242 164167
        vsmw_assert_lock();
243 164167
        CHECK_OBJ_NOTNULL(vsmw, VSMW_MAGIC);
244 164167
        CHECK_OBJ_NOTNULL(seg, VSMWSEG_MAGIC);
245
246 164167
        VTAILQ_REMOVE(&vsmw->segs, seg, list);
247
248 164167
        vsmw->nsegs--;
249 164167
        if (vsmw->nsubs < 10 || vsmw->nsubs * 2 < vsmw->nsegs) {
250 163967
                vsmw_append_record(vsmw, seg, '-');
251 163967
                vsmw->nsubs++;
252 163967
        } else {
253 200
                vsmw_mkent(vsmw, vsmw->idx);
254 200
                REPLACE(t, VSB_data(vsmw->vsb));
255 400
                fd = openat(vsmw->vdirfd,
256 200
                    t, O_WRONLY|O_CREAT|O_EXCL, vsmw->mode);
257 200
                assert(fd >= 0);
258 200
                vsmw_idx_head(vsmw, fd);
259 200
                VSB_clear(vsmw->vsb);
260 1600
                VTAILQ_FOREACH(s2, &vsmw->segs, list)
261 1400
                        vsmw_fmt_index(vsmw, s2, '+');
262 200
                AZ(VSB_finish(vsmw->vsb));
263 200
                XXXAZ(VSB_tofile(vsmw->vsb, fd)); // XXX handle ENOSPC? #2764
264 200
                closefd(&fd);
265 200
                AZ(renameat(vsmw->vdirfd, t, vsmw->vdirfd, vsmw->idx));
266 200
                REPLACE(t, NULL);
267 200
                vsmw->nsubs = 0;
268
        }
269 164167
        REPLACE(seg->category, NULL);
270 164167
        REPLACE(seg->id, NULL);
271 164167
        FREE_OBJ(seg);
272 164167
}
273
274
/*--------------------------------------------------------------------*/
275
276
static struct vsmw_cluster *
277 986208
vsmw_newcluster(struct vsmw *vsmw, size_t len, const char *pfx)
278
{
279
        struct vsmw_cluster *vc;
280
        int fd;
281
        size_t ps;
282
283 986208
        vsmw_assert_lock();
284 986208
        ALLOC_OBJ(vc, VSMW_CLUSTER_MAGIC);
285 986208
        AN(vc);
286
287 986208
        vsmw_mkent(vsmw, pfx);
288 986208
        REPLACE(vc->fn, VSB_data(vsmw->vsb));
289
290 986208
        VTAILQ_INSERT_TAIL(&vsmw->clusters, vc, list);
291
292 986208
        ps = getpagesize();
293 986208
        len = RUP2(len, ps);
294 986208
        vc->len = len;
295
296 1972416
        fd = openat(vsmw->vdirfd, vc->fn,
297 986208
            O_RDWR | O_CREAT | O_EXCL, vsmw->mode);
298 986208
        assert(fd >= 0);
299
300 986208
        AZ(VFIL_allocate(fd, (off_t)len, 1));
301
302 1972416
        vc->ptr = (void *)mmap(NULL, len,
303
            PROT_READ|PROT_WRITE,
304
            MAP_HASSEMAPHORE | MAP_NOSYNC | MAP_SHARED,
305 986208
            fd, 0);
306
307 986208
        closefd(&fd);
308 986208
        assert(vc->ptr != MAP_FAILED);
309 986208
        (void)mlock(vc->ptr, len);
310
311 986208
        return (vc);
312
}
313
314
struct vsmw_cluster *
315 28525
VSMW_NewCluster(struct vsmw *vsmw, size_t len, const char *pfx)
316
{
317
        struct vsmw_cluster *vc;
318
        struct vsmwseg *seg;
319
320 28525
        vsmw_do_lock();
321 28525
        vc = vsmw_newcluster(vsmw, len + VSM_CLUSTER_OFFSET, pfx);
322 28525
        AN(vc);
323 28525
        vc->next += VSM_CLUSTER_OFFSET;
324
325 28525
        ALLOC_OBJ(seg, VSMWSEG_MAGIC);
326 28525
        AN(seg);
327 28525
        vc->cseg = seg;
328 28525
        seg->len = vc->len;
329 28525
        seg->cluster = vc;
330 28525
        REPLACE(seg->category, "");
331 28525
        REPLACE(seg->id, "");
332 28525
        vc->refs++;
333 28525
        vc->named = 1;
334 28525
        vsmw_addseg(vsmw, seg);
335
336 28525
        vsmw_do_unlock();
337 28525
        return (vc);
338
}
339
340
static void
341 48021
vsmw_DestroyCluster_locked(struct vsmw *vsmw, struct vsmw_cluster *vc)
342
{
343
344 48021
        vsmw_assert_lock();
345 48021
        CHECK_OBJ_NOTNULL(vsmw, VSMW_MAGIC);
346 48021
        CHECK_OBJ_NOTNULL(vc, VSMW_CLUSTER_MAGIC);
347
348 48021
        AZ(vc->refs);
349
350 48021
        AZ(munmap(vc->ptr, vc->len));
351 48021
        if (vc->named)
352 1896
                vsmw_delseg(vsmw, vc->cseg);
353 48021
        vc->cseg = 0;
354
355 48021
        VTAILQ_REMOVE(&vsmw->clusters, vc, list);
356 48021
        if (unlinkat(vsmw->vdirfd, vc->fn, 0))
357 0
                assert (errno == ENOENT);
358 48021
        REPLACE(vc->fn, NULL);
359 48021
        FREE_OBJ(vc);
360 48021
}
361
362
void
363 1896
VSMW_DestroyCluster(struct vsmw *vsmw, struct vsmw_cluster **vsmcp)
364
{
365
        struct vsmw_cluster *vc;
366
367 1896
        TAKE_OBJ_NOTNULL(vc, vsmcp, VSMW_CLUSTER_MAGIC);
368
369 1896
        vsmw_do_lock();
370 1896
        if (--vc->refs == 0)
371 1075
                vsmw_DestroyCluster_locked(vsmw, vc);
372 1896
        vsmw_do_unlock();
373 1896
}
374
375
/*--------------------------------------------------------------------*/
376
377
void *
378 988508
VSMW_Allocv(struct vsmw *vsmw, struct vsmw_cluster *vc,
379
    const char *category, size_t payload,
380
    const char *fmt, va_list va)
381
{
382
        struct vsmwseg *seg;
383
384 988508
        vsmw_do_lock();
385 988508
        CHECK_OBJ_NOTNULL(vsmw, VSMW_MAGIC);
386
387 988508
        ALLOC_OBJ(seg, VSMWSEG_MAGIC);
388 988508
        AN(seg);
389 988508
        REPLACE(seg->category, category);
390 988508
        seg->len = PRNDUP(payload);
391
392 988508
        VSB_clear(vsmw->vsb);
393 988508
        VSB_vprintf(vsmw->vsb, fmt, va);
394 988508
        AZ(VSB_finish(vsmw->vsb));
395 988508
        REPLACE(seg->id, VSB_data(vsmw->vsb));
396
397 988508
        if (vc == NULL)
398 957683
                vc = vsmw_newcluster(vsmw, seg->len, category);
399 988508
        AN(vc);
400 988508
        vc->refs++;
401
402 988508
        seg->cluster = vc;
403 988508
        seg->off = vc->next;
404 988508
        vc->next += seg->len;
405 988508
        assert(vc->next <= vc->len);
406 988508
        seg->ptr = seg->off + (char*)vc->ptr;
407
408 988508
        vsmw_addseg(vsmw, seg);
409
410 988508
        vsmw_do_unlock();
411 988508
        return (seg->ptr);
412
}
413
414
void *
415 136497
VSMW_Allocf(struct vsmw *vsmw, struct vsmw_cluster *vc,
416
    const char *category, size_t len, const char *fmt, ...)
417
{
418
        va_list ap;
419
        void *p;
420
421 136497
        va_start(ap, fmt);
422 136497
        p = VSMW_Allocv(vsmw, vc, category, len, fmt, ap);
423 136497
        va_end(ap);
424 136497
        return (p);
425
}
426
427
/*--------------------------------------------------------------------*/
428
429
void
430 47671
VSMW_Free(struct vsmw *vsmw, void **pp)
431
{
432
        struct vsmwseg *seg;
433
        struct vsmw_cluster *cp;
434
435 47671
        vsmw_do_lock();
436 47671
        CHECK_OBJ_NOTNULL(vsmw, VSMW_MAGIC);
437 47671
        AN(pp);
438 377348
        VTAILQ_FOREACH(seg, &vsmw->segs, list)
439 377348
                if (seg->ptr == *pp)
440 47671
                        break;
441 47671
        AN(seg);
442 47671
        *pp = NULL;
443
444 47671
        cp = seg->cluster;
445 47671
        CHECK_OBJ_NOTNULL(cp, VSMW_CLUSTER_MAGIC);
446 47671
        assert(cp->refs > 0);
447
448 47671
        vsmw_delseg(vsmw, seg);
449
450 47671
        if (!--cp->refs)
451 46946
                vsmw_DestroyCluster_locked(vsmw, cp);
452 47671
        vsmw_do_unlock();
453 47671
}
454
455
/*--------------------------------------------------------------------*/
456
457
struct vsmw *
458 45122
VSMW_New(int vdirfd, int mode, const char *idxname)
459
{
460
        struct vsmw *vsmw;
461
        int fd;
462
463 45122
        assert(vdirfd > 0);
464 45122
        assert(mode > 0);
465 45122
        AN(idxname);
466
467 45122
        vsmw_do_lock();
468 45122
        ALLOC_OBJ(vsmw, VSMW_MAGIC);
469 45122
        AN(vsmw);
470
471 45122
        VTAILQ_INIT(&vsmw->segs);
472 45122
        VTAILQ_INIT(&vsmw->clusters);
473 45122
        vsmw->vsb = VSB_new_auto();
474 45122
        AN(vsmw->vsb);
475 45122
        REPLACE(vsmw->idx, idxname);
476 45122
        vsmw->mode = mode;
477 45122
        vsmw->vdirfd = vdirfd;
478 45122
        vsmw->pid = getpid();
479 45122
        vsmw->birth = time(NULL);
480
481 45122
        if (unlinkat(vdirfd, vsmw->idx, 0))
482 45122
                assert (errno == ENOENT);
483 90244
        fd = openat(vdirfd,
484 45122
            vsmw->idx, O_APPEND | O_WRONLY | O_CREAT, vsmw->mode);
485 45122
        assert(fd >= 0);
486 45122
        vsmw_idx_head(vsmw, fd);
487 45122
        closefd(&fd);
488
489 45122
        vsmw_do_unlock();
490 45122
        return (vsmw);
491
}
492
493
void
494 22925
VSMW_Destroy(struct vsmw **pp)
495
{
496
        struct vsmw *vsmw;
497
        struct vsmwseg *seg, *s2;
498
499 22925
        vsmw_do_lock();
500 22925
        TAKE_OBJ_NOTNULL(vsmw, pp, VSMW_MAGIC);
501 137525
        VTAILQ_FOREACH_SAFE(seg, &vsmw->segs, list, s2)
502 114600
                vsmw_delseg(vsmw, seg);
503 22925
        if (unlinkat(vsmw->vdirfd, vsmw->idx, 0))
504 0
                assert (errno == ENOENT);
505 22925
        REPLACE(vsmw->idx, NULL);
506 22925
        VSB_destroy(&vsmw->vsb);
507 22925
        closefd(&vsmw->vdirfd);
508 22925
        FREE_OBJ(vsmw);
509 22925
        vsmw_do_unlock();
510 22925
}