varnish-cache/bin/varnishd/cache/cache_hash.c
1
/*-
2
 * Copyright (c) 2006 Verdens Gang AS
3
 * Copyright (c) 2006-2015 Varnish Software AS
4
 * All rights reserved.
5
 *
6
 * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
21
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
 * SUCH DAMAGE.
28
 *
29
 * This is the central hash-table code, it relies on a chosen hash
30
 * implementation only for the actual hashing, all the housekeeping
31
 * happens here.
32
 *
33
 * We have two kinds of structures, objecthead and object.  An objecthead
34
 * corresponds to a given (Host:, URL) tupple, and the objects hung from
35
 * the objecthead may represent various variations (ie: Vary: header,
36
 * different TTL etc) instances of that web-entity.
37
 *
38
 * Each objecthead has a mutex which locks both its own fields, the
39
 * list of objects and fields in the objects.
40
 *
41
 * The hash implementation must supply a reference count facility on
42
 * the objecthead, and return with a reference held after a lookup.
43
 *
44
 * Lookups in the hash implementation returns with a ref held and each
45
 * object hung from the objhead holds a ref as well.
46
 *
47
 * Objects have refcounts which are locked by the objecthead mutex.
48
 *
49
 * New objects are always marked busy, and they can go from busy to
50
 * not busy only once.
51
 */
52
53
#include "config.h"
54
55
#include <stdio.h>
56
#include <stdlib.h>
57
58
#include "cache_varnishd.h"
59
60
#include "cache/cache_objhead.h"
61
#include "cache/cache_transport.h"
62
63
#include "hash/hash_slinger.h"
64
65
#include "vsha256.h"
66
67
struct rush {
68
        unsigned                magic;
69
#define RUSH_MAGIC              0xa1af5f01
70
        VTAILQ_HEAD(,req)       reqs;
71
};
72
73
static const struct hash_slinger *hash;
74
static struct objhead *private_oh;
75
76
static void hsh_rush1(const struct worker *, struct objhead *,
77
    struct rush *, int);
78
static void hsh_rush2(struct worker *, struct rush *);
79
static int HSH_DerefObjHead(struct worker *wrk, struct objhead **poh);
80
81
/*---------------------------------------------------------------------*/
82
83
static struct objhead *
84 7502
hsh_newobjhead(void)
85
{
86
        struct objhead *oh;
87
88 7502
        ALLOC_OBJ(oh, OBJHEAD_MAGIC);
89 7502
        XXXAN(oh);
90 7502
        oh->refcnt = 1;
91 7502
        VTAILQ_INIT(&oh->objcs);
92 7502
        VTAILQ_INIT(&oh->waitinglist);
93 7502
        Lck_New(&oh->mtx, lck_objhdr);
94 7502
        return (oh);
95
}
96
97
/*---------------------------------------------------------------------*/
98
/* Precreate an objhead and object for later use */
99
static void
100 7259
hsh_prealloc(struct worker *wrk)
101
{
102
103 7259
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
104
105 7259
        if (wrk->nobjcore == NULL)
106 5258
                wrk->nobjcore = ObjNew(wrk);
107 7259
        CHECK_OBJ_NOTNULL(wrk->nobjcore, OBJCORE_MAGIC);
108
109 7259
        if (wrk->nobjhead == NULL) {
110 4750
                wrk->nobjhead = hsh_newobjhead();
111 4750
                wrk->stats->n_objecthead++;
112
        }
113 7259
        CHECK_OBJ_NOTNULL(wrk->nobjhead, OBJHEAD_MAGIC);
114
115 7259
        if (hash->prep != NULL)
116 7235
                hash->prep(wrk);
117 7259
}
118
119
/*---------------------------------------------------------------------*/
120
121
struct objcore *
122 3380
HSH_Private(const struct worker *wrk)
123
{
124
        struct objcore *oc;
125
126 3380
        CHECK_OBJ_NOTNULL(private_oh, OBJHEAD_MAGIC);
127
128 3380
        oc = ObjNew(wrk);
129 3380
        AN(oc);
130 3380
        oc->refcnt = 1;
131 3380
        oc->objhead = private_oh;
132 3380
        oc->flags |= OC_F_PRIVATE;
133 3380
        Lck_Lock(&private_oh->mtx);
134 3380
        VTAILQ_INSERT_TAIL(&private_oh->objcs, oc, hsh_list);
135 3380
        private_oh->refcnt++;
136 3380
        Lck_Unlock(&private_oh->mtx);
137 3380
        return (oc);
138
}
139
140
/*---------------------------------------------------------------------*/
141
void
142 92
HSH_Cleanup(struct worker *wrk)
143
{
144
145 92
        if (wrk->nobjcore != NULL)
146 1
                ObjDestroy(wrk, &wrk->nobjcore);
147
148 92
        if (wrk->nobjhead != NULL) {
149 1
                Lck_Delete(&wrk->nobjhead->mtx);
150 1
                FREE_OBJ(wrk->nobjhead);
151 1
                wrk->nobjhead = NULL;
152 1
                wrk->stats->n_objecthead--;
153
        }
154 92
        if (wrk->nhashpriv != NULL) {
155
                /* XXX: If needed, add slinger method for this */
156 8
                free(wrk->nhashpriv);
157 8
                wrk->nhashpriv = NULL;
158
        }
159 92
}
160
161
void
162 0
HSH_DeleteObjHead(const struct worker *wrk, struct objhead *oh)
163
{
164
165 0
        AZ(oh->refcnt);
166 0
        assert(VTAILQ_EMPTY(&oh->objcs));
167 0
        assert(VTAILQ_EMPTY(&oh->waitinglist));
168 0
        Lck_Delete(&oh->mtx);
169 0
        wrk->stats->n_objecthead--;
170 0
        FREE_OBJ(oh);
171 0
}
172
173
void
174 39432
HSH_AddString(struct req *req, void *ctx, const char *str)
175
{
176
177 39432
        CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
178 39432
        AN(ctx);
179 39432
        if (str != NULL) {
180 19716
                VSHA256_Update(ctx, str, strlen(str));
181 19716
                VSLb(req->vsl, SLT_Hash, "%s", str);
182
        } else
183 19716
                VSHA256_Update(ctx, &str, 1);
184 39432
}
185
186
/*---------------------------------------------------------------------
187
 * This is a debugging hack to enable testing of boundary conditions
188
 * in the hash algorithm.
189
 * We trap the first 9 different digests and translate them to different
190
 * digests with edge bit conditions
191
 */
192
193
static struct hsh_magiclist {
194
        unsigned char was[VSHA256_LEN];
195
        unsigned char now[VSHA256_LEN];
196
} hsh_magiclist[] = {
197
        { .now = {      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
198
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
199
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
200
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
201
        { .now = {      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
202
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
203
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
204
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 } },
205
        { .now = {      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
206
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
207
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
208
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 } },
209
        { .now = {      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
210
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
211
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
212
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40 } },
213
        { .now = {      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
214
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
215
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
216
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 } },
217
        { .now = {      0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
218
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
219
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
220
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
221
        { .now = {      0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
222
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
223
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
224
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
225
        { .now = {      0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
226
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
227
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
228
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
229
        { .now = {      0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
230
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
231
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
232
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
233
};
234
235
#define HSH_NMAGIC (sizeof hsh_magiclist / sizeof hsh_magiclist[0])
236
237
static void
238 72
hsh_testmagic(void *result)
239
{
240
        int i, j;
241
        static int nused = 0;
242
243 360
        for (i = 0; i < nused; i++)
244 324
                if (!memcmp(hsh_magiclist[i].was, result, VSHA256_LEN))
245 36
                        break;
246 72
        if (i == nused && i < HSH_NMAGIC)
247 36
                memcpy(hsh_magiclist[nused++].was, result, VSHA256_LEN);
248 72
        if (i == nused)
249 0
                return;
250 72
        assert(i < HSH_NMAGIC);
251 72
        fprintf(stderr, "HASHMAGIC: <");
252 2376
        for (j = 0; j < VSHA256_LEN; j++)
253 2304
                fprintf(stderr, "%02x", ((unsigned char*)result)[j]);
254 72
        fprintf(stderr, "> -> <");
255 72
        memcpy(result, hsh_magiclist[i].now, VSHA256_LEN);
256 2376
        for (j = 0; j < VSHA256_LEN; j++)
257 2304
                fprintf(stderr, "%02x", ((unsigned char*)result)[j]);
258 72
        fprintf(stderr, ">\n");
259
}
260
261
/*---------------------------------------------------------------------
262
 * Insert an object which magically appears out of nowhere or, more likely,
263
 * comes off some persistent storage device.
264
 * Insert it with a reference held.
265
 */
266
267
void
268 64
HSH_Insert(struct worker *wrk, const void *digest, struct objcore *oc,
269
    struct ban *ban)
270
{
271
        struct objhead *oh;
272
        struct rush rush;
273
274 64
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
275 64
        AN(digest);
276 64
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
277 64
        AN(ban);
278 64
        AN(oc->flags & OC_F_BUSY);
279 64
        AZ(oc->flags & OC_F_PRIVATE);
280 64
        assert(oc->refcnt == 1);
281 64
        INIT_OBJ(&rush, RUSH_MAGIC);
282
283 64
        hsh_prealloc(wrk);
284
285 64
        AN(wrk->nobjhead);
286 64
        oh = hash->lookup(wrk, digest, &wrk->nobjhead);
287 64
        CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
288 64
        Lck_AssertHeld(&oh->mtx);
289 64
        assert(oh->refcnt > 0);
290
291
        /* Mark object busy and insert (precreated) objcore in
292
           objecthead. The new object inherits our objhead reference. */
293 64
        oc->objhead = oh;
294 64
        VTAILQ_INSERT_TAIL(&oh->objcs, oc, hsh_list);
295 64
        oc->refcnt++;                           // For EXP_Insert
296 64
        Lck_Unlock(&oh->mtx);
297
298 64
        BAN_RefBan(oc, ban);
299 64
        AN(oc->ban);
300 64
        EXP_Insert(wrk, oc);
301
302
        /* Move the object first in the oh list, unbusy it and run the
303
           waitinglist if necessary */
304 64
        Lck_Lock(&oh->mtx);
305 64
        VTAILQ_REMOVE(&oh->objcs, oc, hsh_list);
306 64
        VTAILQ_INSERT_HEAD(&oh->objcs, oc, hsh_list);
307 64
        oc->flags &= ~OC_F_BUSY;
308 64
        if (!VTAILQ_EMPTY(&oh->waitinglist))
309 0
                hsh_rush1(wrk, oh, &rush, HSH_RUSH_POLICY);
310 64
        Lck_Unlock(&oh->mtx);
311 64
        hsh_rush2(wrk, &rush);
312 64
}
313
314
/*---------------------------------------------------------------------
315
 */
316
317
static struct objcore *
318 4532
hsh_insert_busyobj(struct worker *wrk, struct objhead *oh)
319
{
320
        struct objcore *oc;
321
322 4532
        CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
323 4532
        Lck_AssertHeld(&oh->mtx);
324
325 4532
        oc = wrk->nobjcore;
326 4532
        wrk->nobjcore = NULL;
327 4532
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
328
329 4532
        AN(oc->flags & OC_F_BUSY);
330 4532
        oc->refcnt = 1;         /* Owned by busyobj */
331 4532
        oc->objhead = oh;
332 4532
        VTAILQ_INSERT_TAIL(&oh->objcs, oc, hsh_list);
333 4532
        return (oc);
334
}
335
336
/*---------------------------------------------------------------------
337
 */
338
339
enum lookup_e
340 7194
HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp,
341
    int always_insert)
342
{
343
        struct worker *wrk;
344
        struct objhead *oh;
345
        struct objcore *oc;
346
        struct objcore *exp_oc;
347
        vtim_real exp_t_origin;
348
        int busy_found;
349
        enum lookup_e retval;
350
        const uint8_t *vary;
351
352 7194
        AN(ocp);
353 7194
        *ocp = NULL;
354 7194
        AN(bocp);
355 7194
        *bocp = NULL;
356
357 7194
        CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
358 7194
        wrk = req->wrk;
359 7194
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
360 7194
        CHECK_OBJ_NOTNULL(req->http, HTTP_MAGIC);
361 7194
        AN(hash);
362
363 7194
        hsh_prealloc(wrk);
364 7195
        if (DO_DEBUG(DBG_HASHEDGE))
365 72
                hsh_testmagic(req->digest);
366
367 7195
        if (req->hash_objhead != NULL) {
368
                /*
369
                 * This req came off the waiting list, and brings an
370
                 * oh refcnt with it.
371
                 */
372 111
                CHECK_OBJ_NOTNULL(req->hash_objhead, OBJHEAD_MAGIC);
373 111
                oh = req->hash_objhead;
374 111
                Lck_Lock(&oh->mtx);
375 111
                req->hash_objhead = NULL;
376
        } else {
377 7084
                AN(wrk->nobjhead);
378 7084
                oh = hash->lookup(wrk, req->digest, &wrk->nobjhead);
379
        }
380
381 7195
        CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
382 7195
        Lck_AssertHeld(&oh->mtx);
383
384 7195
        if (always_insert) {
385
                /* XXX: should we do predictive Vary in this case ? */
386
                /* Insert new objcore in objecthead and release mutex */
387 44
                *bocp = hsh_insert_busyobj(wrk, oh);
388
                /* NB: no deref of objhead, new object inherits reference */
389 44
                Lck_Unlock(&oh->mtx);
390 44
                return (HSH_MISS);
391
        }
392
393 7151
        assert(oh->refcnt > 0);
394 7151
        busy_found = 0;
395 7151
        exp_oc = NULL;
396 7151
        exp_t_origin = 0.0;
397 18011
        VTAILQ_FOREACH(oc, &oh->objcs, hsh_list) {
398
                /* Must be at least our own ref + the objcore we examine */
399 13476
                assert(oh->refcnt > 1);
400 13476
                CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
401 13476
                assert(oc->objhead == oh);
402 13476
                assert(oc->refcnt > 0);
403
404 13476
                if (oc->flags & OC_F_DYING)
405 0
                        continue;
406 13476
                if (oc->flags & OC_F_FAILED)
407 0
                        continue;
408
409 13476
                CHECK_OBJ_ORNULL(oc->boc, BOC_MAGIC);
410 13476
                if (oc->boc != NULL && oc->boc->state < BOS_STREAM) {
411 135
                        if (req->hash_ignore_busy)
412 4
                                continue;
413
414 135
                        if (oc->boc->vary != NULL &&
415 4
                            !VRY_Match(req, oc->boc->vary))
416 4
                                continue;
417
418 127
                        busy_found = 1;
419 127
                        continue;
420
                }
421
422 13341
                if (oc->ttl <= 0.)
423 0
                        continue;
424
425 13341
                if (BAN_CheckObject(wrk, oc, req)) {
426 100
                        oc->flags |= OC_F_DYING;
427 100
                        EXP_Remove(oc);
428 100
                        continue;
429
                }
430
431 13241
                if (ObjHasAttr(wrk, oc, OA_VARY)) {
432 10660
                        vary = ObjGetAttr(wrk, oc, OA_VARY, NULL);
433 10660
                        AN(vary);
434 10660
                        if (!VRY_Match(req, vary))
435 10416
                                continue;
436
                }
437
438 2825
                if (EXP_Ttl(req, oc) > req->t_req) {
439
                        /* If still valid, use it */
440 2616
                        assert(oh->refcnt > 1);
441 2616
                        assert(oc->objhead == oh);
442 2616
                        if (oc->flags & OC_F_HFP) {
443 16
                                wrk->stats->cache_hitpass++;
444 16
                                VSLb(req->vsl, SLT_HitPass, "%u %.6f",
445 16
                                    ObjGetXID(wrk, oc), EXP_Dttl(req, oc));
446 16
                                oc = NULL;
447 16
                                retval = HSH_MISS;
448 2600
                        } else if (oc->flags & OC_F_PASS) {
449 76
                                wrk->stats->cache_hitmiss++;
450 76
                                VSLb(req->vsl, SLT_HitMiss, "%u %.6f",
451 76
                                    ObjGetXID(wrk, oc), EXP_Dttl(req, oc));
452 76
                                *bocp = hsh_insert_busyobj(wrk, oh);
453 76
                                oc->refcnt++;
454 76
                                retval = HSH_MISS;
455
                        } else {
456 2524
                                oc->refcnt++;
457 2524
                                if (oc->hits < LONG_MAX)
458 2524
                                        oc->hits++;
459 2524
                                retval = HSH_HIT;
460
                        }
461 2616
                        Lck_Unlock(&oh->mtx);
462 2616
                        *ocp = oc;
463 2616
                        if (*bocp == NULL)
464 2540
                                assert(HSH_DerefObjHead(wrk, &oh));
465 2616
                        return (retval);
466
                }
467 393
                if (EXP_Ttl(NULL, oc) < req->t_req && /* ignore req.ttl */
468 184
                    oc->t_origin > exp_t_origin) {
469
                        /* record the newest object */
470 180
                        exp_oc = oc;
471 180
                        exp_t_origin = oc->t_origin;
472
                }
473
        }
474
475 4535
        if (exp_oc != NULL && exp_oc->flags & OC_F_PASS) {
476 16
                wrk->stats->cache_hitmiss++;
477 16
                VSLb(req->vsl, SLT_HitMiss, "%u %.6f", ObjGetXID(wrk, exp_oc),
478 16
                    EXP_Dttl(req, exp_oc));
479 16
                exp_oc = NULL;
480 16
                busy_found = 0;
481
        }
482
483 4535
        if (!busy_found) {
484
                /* Insert objcore in objecthead */
485 4412
                *bocp = hsh_insert_busyobj(wrk, oh);
486
487 4412
                if (exp_oc != NULL) {
488 140
                        assert(oh->refcnt > 1);
489 140
                        assert(exp_oc->objhead == oh);
490 140
                        exp_oc->refcnt++;
491 140
                        Lck_Unlock(&oh->mtx);
492 140
                        *ocp = exp_oc;
493
494 140
                        if (EXP_Ttl_grace(req, exp_oc) < req->t_req) {
495 80
                                retval = HSH_MISS;
496
                        } else {
497 60
                                if (exp_oc->hits < LONG_MAX)
498 60
                                        exp_oc->hits++;
499 60
                                retval = HSH_EXPBUSY;
500
                        }
501
                } else {
502 4272
                        Lck_Unlock(&oh->mtx);
503 4272
                        retval = HSH_MISS;
504
                }
505
506 4412
                return (retval);
507
        }
508
509 123
        AN(busy_found);
510 123
        if (exp_oc != NULL && EXP_Ttl_grace(req, exp_oc) >= req->t_req) {
511
                /* we do not wait on the busy object if in grace */
512 12
                assert(oh->refcnt > 1);
513 12
                assert(exp_oc->objhead == oh);
514 12
                exp_oc->refcnt++;
515 12
                Lck_Unlock(&oh->mtx);
516 12
                *ocp = exp_oc;
517
518 12
                assert(HSH_DerefObjHead(wrk, &oh));
519 12
                if (exp_oc->hits < LONG_MAX)
520 12
                        exp_oc->hits++;
521 12
                return (HSH_EXP);
522
        }
523
524
        /* There are one or more busy objects, wait for them */
525
526 111
        AZ(req->hash_ignore_busy);
527
528 111
        VTAILQ_INSERT_TAIL(&oh->waitinglist, req, w_list);
529 111
        if (DO_DEBUG(DBG_WAITINGLIST))
530 8
                VSLb(req->vsl, SLT_Debug, "on waiting list <%p>", oh);
531
532 111
        wrk->stats->busy_sleep++;
533
        /*
534
         * The objhead reference transfers to the sess, we get it
535
         * back when the sess comes off the waiting list and
536
         * calls us again
537
         */
538 111
        req->hash_objhead = oh;
539 111
        req->wrk = NULL;
540 111
        req->waitinglist = 1;
541 111
        Lck_Unlock(&oh->mtx);
542 111
        return (HSH_BUSY);
543
}
544
545
/*---------------------------------------------------------------------
546
 * Pick the req's we are going to rush from the waiting list
547
 */
548
549
static void
550 95
hsh_rush1(const struct worker *wrk, struct objhead *oh, struct rush *r, int max)
551
{
552
        unsigned u;
553
        struct req *req;
554
555 95
        if (max == 0)
556 0
                return;
557 95
        if (max == HSH_RUSH_POLICY)
558 95
                max = cache_param->rush_exponent;
559 95
        assert(max > 0);
560
561 95
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
562 95
        CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
563 95
        CHECK_OBJ_NOTNULL(r, RUSH_MAGIC);
564 95
        VTAILQ_INIT(&r->reqs);
565 95
        Lck_AssertHeld(&oh->mtx);
566 206
        for (u = 0; u < max; u++) {
567 198
                req = VTAILQ_FIRST(&oh->waitinglist);
568 198
                if (req == NULL)
569 87
                        break;
570 111
                CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
571 111
                wrk->stats->busy_wakeup++;
572 111
                AZ(req->wrk);
573 111
                VTAILQ_REMOVE(&oh->waitinglist, req, w_list);
574 111
                VTAILQ_INSERT_TAIL(&r->reqs, req, w_list);
575 111
                req->waitinglist = 0;
576
        }
577
}
578
579
/*---------------------------------------------------------------------
580
 * Rush req's that came from waiting list.
581
 */
582
583
static void
584 25460
hsh_rush2(struct worker *wrk, struct rush *r)
585
{
586
        struct req *req;
587
588 25460
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
589 25460
        CHECK_OBJ_NOTNULL(r, RUSH_MAGIC);
590
591 51031
        while (!VTAILQ_EMPTY(&r->reqs)) {
592 111
                req = VTAILQ_FIRST(&r->reqs);
593 111
                CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
594 111
                VTAILQ_REMOVE(&r->reqs, req, w_list);
595 111
                DSL(DBG_WAITINGLIST, req->vsl->wid, "off waiting list");
596 111
                if (req->transport->reembark != NULL) {
597
                        // For ESI includes
598 4
                        req->transport->reembark(wrk, req);
599
                } else {
600
                        /*
601
                         * We ignore the queue limits which apply to new
602
                         * requests because if we fail to reschedule there
603
                         * may be vmod_privs to cleanup and we need a proper
604
                         * workerthread for that.
605
                         */
606 107
                        AZ(Pool_Task(req->sp->pool, &req->task, TASK_QUEUE_RUSH));
607
                }
608
        }
609 25460
}
610
611
/*---------------------------------------------------------------------
612
 * Purge an entire objhead
613
 */
614
615
unsigned
616 52
HSH_Purge(struct worker *wrk, struct objhead *oh, vtim_real ttl_now,
617
    vtim_dur ttl, vtim_dur grace, vtim_dur keep)
618
{
619
        struct objcore *oc, **ocp;
620 52
        unsigned spc, ospc, nobj, n, n_tot = 0;
621 52
        int more = 0;
622
623 52
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
624 52
        CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
625 52
        ospc = WS_Reserve(wrk->aws, 0);
626 52
        assert(ospc >= sizeof *ocp);
627
        /*
628
         * Because of "soft" purges, there might be oc's in the list that has
629
         * the OC_F_PURGED flag set. We do not want to let these slip through,
630
         * so we need to clear the flag before entering the do..while loop.
631
         */
632 52
        Lck_Lock(&oh->mtx);
633 52
        assert(oh->refcnt > 0);
634 428
        VTAILQ_FOREACH(oc, &oh->objcs, hsh_list) {
635 376
                CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
636 376
                assert(oc->objhead == oh);
637 376
                oc->flags &= ~OC_F_PURGED;
638
        }
639 52
        Lck_Unlock(&oh->mtx);
640
641
        do {
642 56
                more = 0;
643 56
                spc = ospc;
644 56
                nobj = 0;
645 56
                ocp = (void*)wrk->aws->f;
646 56
                Lck_Lock(&oh->mtx);
647 56
                assert(oh->refcnt > 0);
648 684
                VTAILQ_FOREACH(oc, &oh->objcs, hsh_list) {
649 632
                        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
650 632
                        assert(oc->objhead == oh);
651 632
                        if (oc->flags & OC_F_BUSY) {
652
                                /*
653
                                 * We cannot purge busy objects here, because
654
                                 * their owners have special rights to them,
655
                                 * and may nuke them without concern for the
656
                                 * refcount, which by definition always must
657
                                 * be one, so they don't check.
658
                                 */
659 48
                                continue;
660
                        }
661 584
                        if (oc->flags & OC_F_DYING)
662 0
                                continue;
663 584
                        if (oc->flags & OC_F_PURGED) {
664
                                /*
665
                                 * We have already called EXP_Rearm on this
666
                                 * object, and we do not want to do it
667
                                 * again. Plus the space in the ocp array may
668
                                 * be limited.
669
                                 */
670 252
                                continue;
671
                        }
672 332
                        if (spc < sizeof *ocp) {
673
                                /* Iterate if aws is not big enough */
674 4
                                more = 1;
675 4
                                break;
676
                        }
677 328
                        oc->refcnt++;
678 328
                        spc -= sizeof *ocp;
679 328
                        ocp[nobj++] = oc;
680 328
                        oc->flags |= OC_F_PURGED;
681
                }
682 56
                Lck_Unlock(&oh->mtx);
683
684 384
                for (n = 0; n < nobj; n++) {
685 328
                        oc = ocp[n];
686 328
                        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
687 328
                        EXP_Rearm(oc, ttl_now, ttl, grace, keep);
688 328
                        (void)HSH_DerefObjCore(wrk, &oc, 0);
689
                }
690 56
                n_tot += nobj;
691 56
        } while (more);
692 52
        WS_Release(wrk->aws, 0);
693 52
        Pool_PurgeStat(n_tot);
694 52
        return (n_tot);
695
}
696
697
/*---------------------------------------------------------------------
698
 * Fail an objcore
699
 */
700
701
void
702 116
HSH_Fail(struct objcore *oc)
703
{
704
        struct objhead *oh;
705
706 116
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
707 116
        oh = oc->objhead;
708 116
        CHECK_OBJ(oh, OBJHEAD_MAGIC);
709
710
        /*
711
         * We have to have either a busy bit, so that HSH_Lookup
712
         * will not consider this oc, or an object hung of the oc
713
         * so that it can consider it.
714
         */
715 116
        assert((oc->flags & OC_F_BUSY) || (oc->stobj->stevedore != NULL));
716
717 116
        Lck_Lock(&oh->mtx);
718 116
        oc->flags |= OC_F_FAILED;
719 116
        Lck_Unlock(&oh->mtx);
720 116
}
721
722
/*---------------------------------------------------------------------
723
 * Abandon a fetch we will not need
724
 */
725
726
void
727 1301
HSH_Abandon(struct objcore *oc)
728
{
729
        struct objhead *oh;
730
731 1301
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
732 1301
        oh = oc->objhead;
733 1301
        CHECK_OBJ(oh, OBJHEAD_MAGIC);
734
735 1301
        Lck_Lock(&oh->mtx);
736 1301
        oc->flags |= OC_F_ABANDON;
737 1301
        Lck_Unlock(&oh->mtx);
738 1301
}
739
740
/*---------------------------------------------------------------------
741
 * Unbusy an objcore when the object is completely fetched.
742
 */
743
744
void
745 6488
HSH_Unbusy(struct worker *wrk, struct objcore *oc)
746
{
747
        struct objhead *oh;
748
        struct rush rush;
749
750 6488
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
751 6488
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
752 6488
        oh = oc->objhead;
753 6488
        CHECK_OBJ(oh, OBJHEAD_MAGIC);
754 6488
        INIT_OBJ(&rush, RUSH_MAGIC);
755
756 6488
        AN(oc->stobj->stevedore);
757 6488
        AN(oc->flags & OC_F_BUSY);
758 6488
        assert(oh->refcnt > 0);
759 6488
        assert(oc->refcnt > 0);
760
761 6488
        if (!(oc->flags & OC_F_PRIVATE)) {
762 4348
                BAN_NewObjCore(oc);
763 4348
                AN(oc->ban);
764
        }
765
766
        /* XXX: pretouch neighbors on oh->objcs to prevent page-on under mtx */
767 6488
        Lck_Lock(&oh->mtx);
768 6488
        assert(oh->refcnt > 0);
769 6488
        assert(oc->refcnt > 0);
770 6488
        if (!(oc->flags & OC_F_PRIVATE))
771 4348
                oc->refcnt++;                   // For EXP_Insert
772
        /* XXX: strictly speaking, we should sort in Date: order. */
773 6488
        VTAILQ_REMOVE(&oh->objcs, oc, hsh_list);
774 6488
        VTAILQ_INSERT_HEAD(&oh->objcs, oc, hsh_list);
775 6488
        oc->flags &= ~OC_F_BUSY;
776 6488
        if (!VTAILQ_EMPTY(&oh->waitinglist))
777 52
                hsh_rush1(wrk, oh, &rush, HSH_RUSH_POLICY);
778 6488
        Lck_Unlock(&oh->mtx);
779 6488
        if (!(oc->flags & OC_F_PRIVATE))
780 4348
                EXP_Insert(wrk, oc);
781 6488
        hsh_rush2(wrk, &rush);
782 6488
}
783
784
/*====================================================================
785
 * HSH_Kill()
786
 *
787
 * It's dead Jim, kick it...
788
 */
789
790
void
791 880
HSH_Kill(struct objcore *oc)
792
{
793
794 880
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
795 880
        CHECK_OBJ_NOTNULL(oc->objhead, OBJHEAD_MAGIC);
796
797 880
        Lck_Lock(&oc->objhead->mtx);
798 880
        oc->flags |= OC_F_DYING;
799 880
        Lck_Unlock(&oc->objhead->mtx);
800 880
        EXP_Remove(oc);
801 880
}
802
803
/*====================================================================
804
 * HSH_Snipe()
805
 *
806
 * If objcore is idle, gain a ref and mark it dead.
807
 */
808
809
int
810 32
HSH_Snipe(const struct worker *wrk, struct objcore *oc)
811
{
812 32
        int retval = 0;
813
814 32
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
815 32
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
816 32
        CHECK_OBJ_NOTNULL(oc->objhead, OBJHEAD_MAGIC);
817
818 32
        if (oc->refcnt == 1 && !Lck_Trylock(&oc->objhead->mtx)) {
819 32
                if (oc->refcnt == 1 && !(oc->flags & OC_F_DYING)) {
820 32
                        oc->flags |= OC_F_DYING;
821 32
                        oc->refcnt++;
822 32
                        retval = 1;
823
                }
824 32
                Lck_Unlock(&oc->objhead->mtx);
825
        }
826 32
        if (retval)
827 32
                EXP_Remove(oc);
828 32
        return (retval);
829
}
830
831
832
/*---------------------------------------------------------------------
833
 * Gain a reference on an objcore
834
 */
835
836
void
837 6780
HSH_Ref(struct objcore *oc)
838
{
839
        struct objhead *oh;
840
841 6780
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
842 6780
        oh = oc->objhead;
843 6780
        CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
844 6780
        Lck_Lock(&oh->mtx);
845 6780
        assert(oc->refcnt > 0);
846 6780
        oc->refcnt++;
847 6780
        Lck_Unlock(&oh->mtx);
848 6780
}
849
850
/*---------------------------------------------------------------------
851
 * Gain a reference on the busyobj, if the objcore has one
852
 */
853
854
struct boc *
855 22947
HSH_RefBoc(const struct objcore *oc)
856
{
857
        struct objhead *oh;
858
        struct boc *boc;
859
860 22947
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
861 22947
        oh = oc->objhead;
862 22947
        CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
863 22947
        if (oc->boc == NULL)
864 10694
                return (NULL);
865 12253
        Lck_Lock(&oh->mtx);
866 12254
        assert(oc->refcnt > 0);
867 12254
        boc = oc->boc;
868 12254
        CHECK_OBJ_ORNULL(boc, BOC_MAGIC);
869 12254
        if (boc != NULL) {
870 12251
                assert(boc->refcount > 0);
871 12251
                if (boc->state < BOS_FINISHED)
872 12057
                        boc->refcount++;
873
                else
874 194
                        boc = NULL;
875
        }
876 12254
        Lck_Unlock(&oh->mtx);
877 12254
        return (boc);
878
}
879
880
void
881 19907
HSH_DerefBoc(struct worker *wrk, struct objcore *oc)
882
{
883
        struct boc *boc;
884
        unsigned r;
885
886 19907
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
887 19907
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
888 19907
        boc = oc->boc;
889 19907
        CHECK_OBJ_NOTNULL(boc, BOC_MAGIC);
890 19907
        Lck_Lock(&oc->objhead->mtx);
891 19909
        assert(oc->refcnt > 0);
892 19909
        assert(boc->refcount > 0);
893 19909
        r = --boc->refcount;
894 19909
        if (r == 0)
895 7856
                oc->boc = NULL;
896 19909
        Lck_Unlock(&oc->objhead->mtx);
897 19908
        if (r == 0)
898 7856
                ObjBocDone(wrk, oc, &boc);
899 19908
}
900
901
/*--------------------------------------------------------------------
902
 * Dereference objcore
903
 *
904
 * Returns zero if target was destroyed.
905
 */
906
907
int
908 18908
HSH_DerefObjCore(struct worker *wrk, struct objcore **ocp, int rushmax)
909
{
910
        struct objcore *oc;
911
        struct objhead *oh;
912
        struct rush rush;
913
        unsigned r;
914
915 18908
        AN(ocp);
916 18908
        oc = *ocp;
917 18908
        *ocp = NULL;
918
919 18908
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
920 18908
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
921 18908
        assert(oc->refcnt > 0);
922 18908
        INIT_OBJ(&rush, RUSH_MAGIC);
923
924 18908
        oh = oc->objhead;
925 18908
        CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
926
927 18908
        Lck_Lock(&oh->mtx);
928 18908
        assert(oh->refcnt > 0);
929 18908
        r = --oc->refcnt;
930 18908
        if (!r)
931 4564
                VTAILQ_REMOVE(&oh->objcs, oc, hsh_list);
932 18908
        if (!VTAILQ_EMPTY(&oh->waitinglist))
933 43
                hsh_rush1(wrk, oh, &rush, rushmax);
934 18908
        Lck_Unlock(&oh->mtx);
935 18908
        hsh_rush2(wrk, &rush);
936 18908
        if (r != 0)
937 14344
                return (r);
938
939 4564
        AZ(oc->exp_flags);
940
941 4564
        BAN_DestroyObj(oc);
942 4564
        AZ(oc->ban);
943
944 4564
        if (oc->stobj->stevedore != NULL)
945 4372
                ObjFreeObj(wrk, oc);
946 4564
        ObjDestroy(wrk, &oc);
947
948
        /* Drop our ref on the objhead */
949 4564
        assert(oh->refcnt > 0);
950 4564
        (void)HSH_DerefObjHead(wrk, &oh);
951 4564
        return (0);
952
}
953
954
static int
955 7116
HSH_DerefObjHead(struct worker *wrk, struct objhead **poh)
956
{
957
        struct objhead *oh;
958
        struct rush rush;
959
        int r;
960
961 7116
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
962 7116
        TAKE_OBJ_NOTNULL(oh, poh, OBJHEAD_MAGIC);
963 7116
        INIT_OBJ(&rush, RUSH_MAGIC);
964
965 7116
        if (oh == private_oh) {
966 3380
                assert(VTAILQ_EMPTY(&oh->waitinglist));
967 3380
                Lck_Lock(&oh->mtx);
968 3380
                assert(oh->refcnt > 1);
969 3380
                oh->refcnt--;
970 3380
                Lck_Unlock(&oh->mtx);
971 3380
                return(1);
972
        }
973
974
        /*
975
         * Make absolutely certain that we do not let the final ref
976
         * disappear until the waitinglist is empty.
977
         * This is necessary because the req's on the waiting list do
978
         * not hold any ref on the objhead of their own, and we cannot
979
         * just make the hold the same ref's as objcore, that would
980
         * confuse hashers.
981
         */
982 3736
        Lck_Lock(&oh->mtx);
983 7472
        while (oh->refcnt == 1 && !VTAILQ_EMPTY(&oh->waitinglist)) {
984 0
                hsh_rush1(wrk, oh, &rush, HSH_RUSH_ALL);
985 0
                Lck_Unlock(&oh->mtx);
986 0
                hsh_rush2(wrk, &rush);
987 0
                Lck_Lock(&oh->mtx);
988
        }
989 3736
        Lck_Unlock(&oh->mtx);
990
991 3736
        assert(oh->refcnt > 0);
992 3736
        r = hash->deref(oh);
993 3736
        if (!r)
994 0
                HSH_DeleteObjHead(wrk, oh);
995 3736
        return (r);
996
}
997
998
void
999 2752
HSH_Init(const struct hash_slinger *slinger)
1000
{
1001
1002
        assert(DIGEST_LEN == VSHA256_LEN);      /* avoid #include pollution */
1003 2752
        hash = slinger;
1004 2752
        if (hash->start != NULL)
1005 2752
                hash->start();
1006 2752
        private_oh = hsh_newobjhead();
1007 2752
        private_oh->refcnt = 1;
1008 2752
}