varnish-cache/bin/varnishd/cache/cache_hash.c
1
/*-
2
 * Copyright (c) 2006 Verdens Gang AS
3
 * Copyright (c) 2006-2015 Varnish Software AS
4
 * All rights reserved.
5
 *
6
 * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
21
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
 * SUCH DAMAGE.
28
 *
29
 * This is the central hash-table code, it relies on a chosen hash
30
 * implementation only for the actual hashing, all the housekeeping
31
 * happens here.
32
 *
33
 * We have two kinds of structures, objecthead and object.  An objecthead
34
 * corresponds to a given (Host:, URL) tupple, and the objects hung from
35
 * the objecthead may represent various variations (ie: Vary: header,
36
 * different TTL etc) instances of that web-entity.
37
 *
38
 * Each objecthead has a mutex which locks both its own fields, the
39
 * list of objects and fields in the objects.
40
 *
41
 * The hash implementation must supply a reference count facility on
42
 * the objecthead, and return with a reference held after a lookup.
43
 *
44
 * Lookups in the hash implementation returns with a ref held and each
45
 * object hung from the objhead holds a ref as well.
46
 *
47
 * Objects have refcounts which are locked by the objecthead mutex.
48
 *
49
 * New objects are always marked busy, and they can go from busy to
50
 * not busy only once.
51
 */
52
53
#include "config.h"
54
55
#include <stdio.h>
56
#include <stdlib.h>
57
58
#include "cache_varnishd.h"
59
60
#include "cache/cache_objhead.h"
61
#include "cache/cache_transport.h"
62
63
#include "hash/hash_slinger.h"
64
65
#include "vsha256.h"
66
67
struct rush {
68
        unsigned                magic;
69
#define RUSH_MAGIC              0xa1af5f01
70
        VTAILQ_HEAD(,req)       reqs;
71
};
72
73
static const struct hash_slinger *hash;
74
static struct objhead *private_oh;
75
76
static void hsh_rush1(const struct worker *, struct objhead *,
77
    struct rush *, int);
78
static void hsh_rush2(struct worker *, struct rush *);
79
80
/*---------------------------------------------------------------------*/
81
82
static struct objhead *
83 5606
hsh_newobjhead(void)
84
{
85
        struct objhead *oh;
86
87 5606
        ALLOC_OBJ(oh, OBJHEAD_MAGIC);
88 5606
        XXXAN(oh);
89 5606
        oh->refcnt = 1;
90 5606
        VTAILQ_INIT(&oh->objcs);
91 5606
        VTAILQ_INIT(&oh->waitinglist);
92 5606
        Lck_New(&oh->mtx, lck_objhdr);
93 5606
        return (oh);
94
}
95
96
/*---------------------------------------------------------------------*/
97
/* Precreate an objhead and object for later use */
98
static void
99 5408
hsh_prealloc(struct worker *wrk)
100
{
101
102 5408
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
103
104 5408
        if (wrk->nobjcore == NULL)
105 3930
                wrk->nobjcore = ObjNew(wrk);
106 5408
        CHECK_OBJ_NOTNULL(wrk->nobjcore, OBJCORE_MAGIC);
107
108 5408
        if (wrk->nobjhead == NULL) {
109 3551
                wrk->nobjhead = hsh_newobjhead();
110 3551
                wrk->stats->n_objecthead++;
111
        }
112 5408
        CHECK_OBJ_NOTNULL(wrk->nobjhead, OBJHEAD_MAGIC);
113
114 5408
        if (hash->prep != NULL)
115 5390
                hash->prep(wrk);
116 5408
}
117
118
/*---------------------------------------------------------------------*/
119
120
struct objcore *
121 2457
HSH_Private(const struct worker *wrk)
122
{
123
        struct objcore *oc;
124
125 2457
        CHECK_OBJ_NOTNULL(private_oh, OBJHEAD_MAGIC);
126
127 2457
        oc = ObjNew(wrk);
128 2457
        AN(oc);
129 2457
        oc->refcnt = 1;
130 2457
        oc->objhead = private_oh;
131 2457
        oc->flags |= OC_F_PRIVATE;
132 2457
        Lck_Lock(&private_oh->mtx);
133 2457
        VTAILQ_INSERT_TAIL(&private_oh->objcs, oc, hsh_list);
134 2457
        private_oh->refcnt++;
135 2457
        Lck_Unlock(&private_oh->mtx);
136 2457
        return (oc);
137
}
138
139
/*---------------------------------------------------------------------*/
140
void
141 69
HSH_Cleanup(struct worker *wrk)
142
{
143
144 69
        if (wrk->nobjcore != NULL)
145 0
                ObjDestroy(wrk, &wrk->nobjcore);
146
147 69
        if (wrk->nobjhead != NULL) {
148 0
                Lck_Delete(&wrk->nobjhead->mtx);
149 0
                FREE_OBJ(wrk->nobjhead);
150 0
                wrk->nobjhead = NULL;
151 0
                wrk->stats->n_objecthead--;
152
        }
153 69
        if (wrk->nhashpriv != NULL) {
154
                /* XXX: If needed, add slinger method for this */
155 6
                free(wrk->nhashpriv);
156 6
                wrk->nhashpriv = NULL;
157
        }
158 69
}
159
160
void
161 0
HSH_DeleteObjHead(const struct worker *wrk, struct objhead *oh)
162
{
163
164 0
        AZ(oh->refcnt);
165 0
        assert(VTAILQ_EMPTY(&oh->objcs));
166 0
        assert(VTAILQ_EMPTY(&oh->waitinglist));
167 0
        Lck_Delete(&oh->mtx);
168 0
        wrk->stats->n_objecthead--;
169 0
        FREE_OBJ(oh);
170 0
}
171
172
void
173 29184
HSH_AddString(struct req *req, void *ctx, const char *str)
174
{
175
176 29184
        CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
177 29184
        AN(ctx);
178 29184
        if (str != NULL) {
179 14593
                VSHA256_Update(ctx, str, strlen(str));
180 14592
                VSLb(req->vsl, SLT_Hash, "%s", str);
181
        } else
182 14591
                VSHA256_Update(ctx, &str, 1);
183 29187
}
184
185
/*---------------------------------------------------------------------
186
 * This is a debugging hack to enable testing of boundary conditions
187
 * in the hash algorithm.
188
 * We trap the first 9 different digests and translate them to different
189
 * digests with edge bit conditions
190
 */
191
192
static struct hsh_magiclist {
193
        unsigned char was[VSHA256_LEN];
194
        unsigned char now[VSHA256_LEN];
195
} hsh_magiclist[] = {
196
        { .now = {      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
197
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
198
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
199
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
200
        { .now = {      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
201
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
202
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
203
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 } },
204
        { .now = {      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
205
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
206
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
207
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 } },
208
        { .now = {      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
209
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
210
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
211
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40 } },
212
        { .now = {      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
213
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
214
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
215
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 } },
216
        { .now = {      0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
217
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
218
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
219
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
220
        { .now = {      0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
221
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
222
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
223
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
224
        { .now = {      0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
225
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
226
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
227
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
228
        { .now = {      0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
229
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
230
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
231
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
232
};
233
234
#define HSH_NMAGIC (sizeof hsh_magiclist / sizeof hsh_magiclist[0])
235
236
static void
237 54
hsh_testmagic(void *result)
238
{
239
        int i, j;
240
        static int nused = 0;
241
242 270
        for (i = 0; i < nused; i++)
243 243
                if (!memcmp(hsh_magiclist[i].was, result, VSHA256_LEN))
244 27
                        break;
245 54
        if (i == nused && i < HSH_NMAGIC)
246 27
                memcpy(hsh_magiclist[nused++].was, result, VSHA256_LEN);
247 54
        if (i == nused)
248 0
                return;
249 54
        assert(i < HSH_NMAGIC);
250 54
        fprintf(stderr, "HASHMAGIC: <");
251 1782
        for (j = 0; j < VSHA256_LEN; j++)
252 1728
                fprintf(stderr, "%02x", ((unsigned char*)result)[j]);
253 54
        fprintf(stderr, "> -> <");
254 54
        memcpy(result, hsh_magiclist[i].now, VSHA256_LEN);
255 1782
        for (j = 0; j < VSHA256_LEN; j++)
256 1728
                fprintf(stderr, "%02x", ((unsigned char*)result)[j]);
257 54
        fprintf(stderr, ">\n");
258
}
259
260
/*---------------------------------------------------------------------
261
 * Insert an object which magically appears out of nowhere or, more likely,
262
 * comes off some persistent storage device.
263
 * Insert it with a reference held.
264
 */
265
266
void
267 48
HSH_Insert(struct worker *wrk, const void *digest, struct objcore *oc,
268
    struct ban *ban)
269
{
270
        struct objhead *oh;
271
        struct rush rush;
272
273 48
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
274 48
        AN(digest);
275 48
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
276 48
        AN(ban);
277 48
        AN(oc->flags & OC_F_BUSY);
278 48
        AZ(oc->flags & OC_F_PRIVATE);
279 48
        assert(oc->refcnt == 1);
280 48
        INIT_OBJ(&rush, RUSH_MAGIC);
281
282 48
        hsh_prealloc(wrk);
283
284 48
        AN(wrk->nobjhead);
285 48
        oh = hash->lookup(wrk, digest, &wrk->nobjhead);
286 48
        CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
287 48
        Lck_AssertHeld(&oh->mtx);
288 48
        assert(oh->refcnt > 0);
289
290
        /* Mark object busy and insert (precreated) objcore in
291
           objecthead. The new object inherits our objhead reference. */
292 48
        oc->objhead = oh;
293 48
        VTAILQ_INSERT_TAIL(&oh->objcs, oc, hsh_list);
294 48
        oc->refcnt++;                           // For EXP_Insert
295 48
        Lck_Unlock(&oh->mtx);
296
297 48
        BAN_RefBan(oc, ban);
298 48
        AN(oc->ban);
299 48
        EXP_Insert(wrk, oc);
300
301
        /* Move the object first in the oh list, unbusy it and run the
302
           waitinglist if necessary */
303 48
        Lck_Lock(&oh->mtx);
304 48
        VTAILQ_REMOVE(&oh->objcs, oc, hsh_list);
305 48
        VTAILQ_INSERT_HEAD(&oh->objcs, oc, hsh_list);
306 48
        oc->flags &= ~OC_F_BUSY;
307 48
        if (!VTAILQ_EMPTY(&oh->waitinglist))
308 0
                hsh_rush1(wrk, oh, &rush, HSH_RUSH_POLICY);
309 48
        Lck_Unlock(&oh->mtx);
310 48
        hsh_rush2(wrk, &rush);
311 48
}
312
313
/*---------------------------------------------------------------------
314
 */
315
316
static struct objcore *
317 3384
hsh_insert_busyobj(struct worker *wrk, struct objhead *oh)
318
{
319
        struct objcore *oc;
320
321 3384
        CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
322 3384
        Lck_AssertHeld(&oh->mtx);
323
324 3384
        oc = wrk->nobjcore;
325 3384
        wrk->nobjcore = NULL;
326 3384
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
327
328 3384
        AN(oc->flags & OC_F_BUSY);
329 3384
        oc->refcnt = 1;         /* Owned by busyobj */
330 3384
        oc->objhead = oh;
331 3384
        VTAILQ_INSERT_TAIL(&oh->objcs, oc, hsh_list);
332 3384
        return (oc);
333
}
334
335
/*---------------------------------------------------------------------
336
 */
337
338
enum lookup_e
339 5360
HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp,
340
    int always_insert)
341
{
342
        struct worker *wrk;
343
        struct objhead *oh;
344
        struct objcore *oc;
345
        struct objcore *exp_oc;
346
        double exp_t_origin;
347
        int busy_found;
348
        enum lookup_e retval;
349
        const uint8_t *vary;
350
351 5360
        AN(ocp);
352 5360
        *ocp = NULL;
353 5360
        AN(bocp);
354 5360
        *bocp = NULL;
355
356 5360
        CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
357 5360
        wrk = req->wrk;
358 5360
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
359 5360
        CHECK_OBJ_NOTNULL(req->http, HTTP_MAGIC);
360 5360
        AN(hash);
361
362 5360
        hsh_prealloc(wrk);
363 5360
        if (DO_DEBUG(DBG_HASHEDGE))
364 54
                hsh_testmagic(req->digest);
365
366 5360
        if (req->hash_objhead != NULL) {
367
                /*
368
                 * This req came off the waiting list, and brings an
369
                 * oh refcnt with it.
370
                 */
371 83
                CHECK_OBJ_NOTNULL(req->hash_objhead, OBJHEAD_MAGIC);
372 83
                oh = req->hash_objhead;
373 83
                Lck_Lock(&oh->mtx);
374 83
                req->hash_objhead = NULL;
375
        } else {
376 5277
                AN(wrk->nobjhead);
377 5277
                oh = hash->lookup(wrk, req->digest, &wrk->nobjhead);
378
        }
379
380 5360
        CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
381 5360
        Lck_AssertHeld(&oh->mtx);
382
383 5360
        if (always_insert) {
384
                /* XXX: should we do predictive Vary in this case ? */
385
                /* Insert new objcore in objecthead and release mutex */
386 33
                *bocp = hsh_insert_busyobj(wrk, oh);
387
                /* NB: no deref of objhead, new object inherits reference */
388 33
                Lck_Unlock(&oh->mtx);
389 33
                return (HSH_MISS);
390
        }
391
392 5327
        assert(oh->refcnt > 0);
393 5327
        busy_found = 0;
394 5327
        exp_oc = NULL;
395 5327
        exp_t_origin = 0.0;
396 13488
        VTAILQ_FOREACH(oc, &oh->objcs, hsh_list) {
397
                /* Must be at least our own ref + the objcore we examine */
398 10105
                assert(oh->refcnt > 1);
399 10105
                CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
400 10105
                assert(oc->objhead == oh);
401 10105
                assert(oc->refcnt > 0);
402
403 10105
                if (oc->flags & OC_F_DYING)
404 0
                        continue;
405 10105
                if (oc->flags & OC_F_FAILED)
406 0
                        continue;
407
408 10105
                if (oc->boc != NULL && oc->boc->state < BOS_STREAM) {
409 107
                        CHECK_OBJ_ORNULL(oc->boc, BOC_MAGIC);
410
411 107
                        if (req->hash_ignore_busy)
412 3
                                continue;
413
414 107
                        if (oc->boc->vary != NULL &&
415 3
                            !VRY_Match(req, oc->boc->vary))
416 3
                                continue;
417
418 101
                        busy_found = 1;
419 101
                        continue;
420
                }
421
422 9998
                if (oc->ttl <= 0.)
423 0
                        continue;
424
425 9998
                if (BAN_CheckObject(wrk, oc, req)) {
426 87
                        oc->flags |= OC_F_DYING;
427 87
                        EXP_Remove(oc);
428 87
                        continue;
429
                }
430
431 9911
                if (ObjHasAttr(wrk, oc, OA_VARY)) {
432 7995
                        vary = ObjGetAttr(wrk, oc, OA_VARY, NULL);
433 7995
                        AN(vary);
434 7995
                        if (!VRY_Match(req, vary))
435 7812
                                continue;
436
                }
437
438 2099
                if (EXP_Ttl(req, oc) > req->t_req) {
439
                        /* If still valid, use it */
440 1944
                        assert(oh->refcnt > 1);
441 1944
                        assert(oc->objhead == oh);
442 1944
                        if (oc->flags & OC_F_HFP) {
443 12
                                wrk->stats->cache_hitpass++;
444 12
                                VSLb(req->vsl, SLT_HitPass, "%u %.6f",
445 12
                                    ObjGetXID(wrk, oc), EXP_Dttl(req, oc));
446 12
                                oc = NULL;
447 1932
                        } else if (oc->flags & OC_F_PASS) {
448 66
                                wrk->stats->cache_hitmiss++;
449 66
                                VSLb(req->vsl, SLT_HitMiss, "%u %.6f",
450 66
                                    ObjGetXID(wrk, oc), EXP_Dttl(req, oc));
451 66
                                oc = NULL;
452 66
                                *bocp = hsh_insert_busyobj(wrk, oh);
453
                        } else {
454 1866
                                oc->refcnt++;
455 1866
                                if (oc->hits < LONG_MAX)
456 1866
                                        oc->hits++;
457
                        }
458 1944
                        Lck_Unlock(&oh->mtx);
459 1944
                        if (oc == NULL)
460 78
                                return (HSH_MISS);
461 1866
                        assert(HSH_DerefObjHead(wrk, &oh));
462 1866
                        *ocp = oc;
463 1866
                        return (HSH_HIT);
464
                }
465 290
                if (EXP_Ttl(NULL, oc) < req->t_req && /* ignore req.ttl */
466 135
                    oc->t_origin > exp_t_origin) {
467
                        /* record the newest object */
468 132
                        exp_oc = oc;
469 132
                        exp_t_origin = oc->t_origin;
470
                }
471
        }
472
473 3383
        if (exp_oc != NULL && exp_oc->flags & OC_F_PASS) {
474 12
                wrk->stats->cache_hitmiss++;
475 12
                VSLb(req->vsl, SLT_HitMiss, "%u %.6f", ObjGetXID(wrk, exp_oc),
476 12
                    EXP_Dttl(req, exp_oc));
477 12
                exp_oc = NULL;
478 12
                busy_found = 0;
479
        }
480
481 3383
        if (!busy_found) {
482
                /* Insert objcore in objecthead */
483 3285
                *bocp = hsh_insert_busyobj(wrk, oh);
484
485 3285
                if (exp_oc != NULL) {
486 102
                        assert(oh->refcnt > 1);
487 102
                        assert(exp_oc->objhead == oh);
488 102
                        exp_oc->refcnt++;
489 102
                        Lck_Unlock(&oh->mtx);
490 102
                        *ocp = exp_oc;
491
492 102
                        if (EXP_Ttl_grace(req, exp_oc) < req->t_req) {
493 57
                                retval = HSH_MISS;
494
                        } else {
495 45
                                if (exp_oc->hits < LONG_MAX)
496 45
                                        exp_oc->hits++;
497 45
                                retval = HSH_EXPBUSY;
498
                        }
499
                } else {
500 3183
                        Lck_Unlock(&oh->mtx);
501 3183
                        retval = HSH_MISS;
502
                }
503
504 3285
                return (retval);
505
        }
506
507 98
        AN(busy_found);
508 98
        if (exp_oc != NULL && EXP_Ttl_grace(req, exp_oc) >= req->t_req) {
509
                /* we do not wait on the busy object if in grace */
510 9
                assert(oh->refcnt > 1);
511 9
                assert(exp_oc->objhead == oh);
512 9
                exp_oc->refcnt++;
513 9
                Lck_Unlock(&oh->mtx);
514 9
                *ocp = exp_oc;
515
516 9
                assert(HSH_DerefObjHead(wrk, &oh));
517 9
                if (exp_oc->hits < LONG_MAX)
518 9
                        exp_oc->hits++;
519 9
                return (HSH_EXP);
520
        }
521
522
        /* There are one or more busy objects, wait for them */
523
524 89
        AZ(req->hash_ignore_busy);
525
526 89
        VTAILQ_INSERT_TAIL(&oh->waitinglist, req, w_list);
527 89
        if (DO_DEBUG(DBG_WAITINGLIST))
528 9
                VSLb(req->vsl, SLT_Debug, "on waiting list <%p>", oh);
529
530 89
        wrk->stats->busy_sleep++;
531
        /*
532
         * The objhead reference transfers to the sess, we get it
533
         * back when the sess comes off the waiting list and
534
         * calls us again
535
         */
536 89
        req->hash_objhead = oh;
537 89
        req->wrk = NULL;
538 89
        req->waitinglist = 1;
539 89
        Lck_Unlock(&oh->mtx);
540 89
        return (HSH_BUSY);
541
}
542
543
/*---------------------------------------------------------------------
544
 * Pick the req's we are going to rush from the waiting list
545
 */
546
547
static void
548 77
hsh_rush1(const struct worker *wrk, struct objhead *oh, struct rush *r, int max)
549
{
550
        unsigned u;
551
        struct req *req;
552
553 77
        if (max == 0)
554 0
                return;
555 77
        if (max == HSH_RUSH_POLICY)
556 77
                max = cache_param->rush_exponent;
557 77
        assert(max > 0);
558
559 77
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
560 77
        CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
561 77
        CHECK_OBJ_NOTNULL(r, RUSH_MAGIC);
562 77
        VTAILQ_INIT(&r->reqs);
563 77
        Lck_AssertHeld(&oh->mtx);
564 166
        for (u = 0; u < max; u++) {
565 160
                req = VTAILQ_FIRST(&oh->waitinglist);
566 160
                if (req == NULL)
567 71
                        break;
568 89
                CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
569 89
                wrk->stats->busy_wakeup++;
570 89
                AZ(req->wrk);
571 89
                VTAILQ_REMOVE(&oh->waitinglist, req, w_list);
572 89
                VTAILQ_INSERT_TAIL(&r->reqs, req, w_list);
573 89
                req->waitinglist = 0;
574
        }
575
}
576
577
/*---------------------------------------------------------------------
578
 * Rush req's that came from waiting list.
579
 */
580
581
static void
582 18765
hsh_rush2(struct worker *wrk, struct rush *r)
583
{
584
        struct req *req;
585
586 18765
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
587 18765
        CHECK_OBJ_NOTNULL(r, RUSH_MAGIC);
588
589 37619
        while (!VTAILQ_EMPTY(&r->reqs)) {
590 89
                req = VTAILQ_FIRST(&r->reqs);
591 89
                CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
592 89
                VTAILQ_REMOVE(&r->reqs, req, w_list);
593 89
                DSL(DBG_WAITINGLIST, req->vsl->wid, "off waiting list");
594 89
                AN(req->transport->reembark);
595 89
                req->transport->reembark(wrk, req);
596
        }
597 18765
}
598
599
/*---------------------------------------------------------------------
600
 * Purge an entire objhead
601
 */
602
603
unsigned
604 39
HSH_Purge(struct worker *wrk, struct objhead *oh, double ttl_now, double ttl,
605
double grace, double keep)
606
{
607
        struct objcore *oc, **ocp;
608 39
        unsigned spc, ospc, nobj, n, n_tot = 0;
609 39
        int more = 0;
610
611 39
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
612 39
        CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
613 39
        ospc = WS_Reserve(wrk->aws, 0);
614 39
        assert(ospc >= sizeof *ocp);
615
        /*
616
         * Because of "soft" purges, there might be oc's in the list that has
617
         * the OC_F_PURGED flag set. We do not want to let these slip through,
618
         * so we need to clear the flag before entering the do..while loop.
619
         */
620 39
        Lck_Lock(&oh->mtx);
621 39
        assert(oh->refcnt > 0);
622 321
        VTAILQ_FOREACH(oc, &oh->objcs, hsh_list) {
623 282
                CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
624 282
                assert(oc->objhead == oh);
625 282
                oc->flags &= ~OC_F_PURGED;
626
        }
627 39
        Lck_Unlock(&oh->mtx);
628
629
        do {
630 42
                more = 0;
631 42
                spc = ospc;
632 42
                nobj = 0;
633 42
                ocp = (void*)wrk->aws->f;
634 42
                Lck_Lock(&oh->mtx);
635 42
                assert(oh->refcnt > 0);
636 513
                VTAILQ_FOREACH(oc, &oh->objcs, hsh_list) {
637 474
                        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
638 474
                        assert(oc->objhead == oh);
639 474
                        if (oc->flags & OC_F_BUSY) {
640
                                /*
641
                                 * We cannot purge busy objects here, because
642
                                 * their owners have special rights to them,
643
                                 * and may nuke them without concern for the
644
                                 * refcount, which by definition always must
645
                                 * be one, so they don't check.
646
                                 */
647 36
                                continue;
648
                        }
649 438
                        if (oc->flags & OC_F_DYING)
650 0
                                continue;
651 438
                        if (oc->flags & OC_F_PURGED) {
652
                                /*
653
                                 * We have already called EXP_Rearm on this
654
                                 * object, and we do not want to do it
655
                                 * again. Plus the space in the ocp array may
656
                                 * be limited.
657
                                 */
658 189
                                continue;
659
                        }
660 249
                        if (spc < sizeof *ocp) {
661
                                /* Iterate if aws is not big enough */
662 3
                                more = 1;
663 3
                                break;
664
                        }
665 246
                        oc->refcnt++;
666 246
                        spc -= sizeof *ocp;
667 246
                        ocp[nobj++] = oc;
668 246
                        oc->flags |= OC_F_PURGED;
669
                }
670 42
                Lck_Unlock(&oh->mtx);
671
672 288
                for (n = 0; n < nobj; n++) {
673 246
                        oc = ocp[n];
674 246
                        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
675 246
                        EXP_Rearm(oc, ttl_now, ttl, grace, keep);
676 246
                        (void)HSH_DerefObjCore(wrk, &oc, 0);
677
                }
678 42
                n_tot += nobj;
679 42
        } while (more);
680 39
        WS_Release(wrk->aws, 0);
681 39
        Pool_PurgeStat(n_tot);
682 39
        return (n_tot);
683
}
684
685
/*---------------------------------------------------------------------
686
 * Fail an objcore
687
 */
688
689
void
690 87
HSH_Fail(struct objcore *oc)
691
{
692
        struct objhead *oh;
693
694 87
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
695 87
        oh = oc->objhead;
696 87
        CHECK_OBJ(oh, OBJHEAD_MAGIC);
697
698
        /*
699
         * We have to have either a busy bit, so that HSH_Lookup
700
         * will not consider this oc, or an object hung of the oc
701
         * so that it can consider it.
702
         */
703 87
        assert((oc->flags & OC_F_BUSY) || (oc->stobj->stevedore != NULL));
704
705 87
        Lck_Lock(&oh->mtx);
706 87
        oc->flags |= OC_F_FAILED;
707 87
        Lck_Unlock(&oh->mtx);
708 87
}
709
710
/*---------------------------------------------------------------------
711
 * Abandon a fetch we will not need
712
 */
713
714
void
715 969
HSH_Abandon(struct objcore *oc)
716
{
717
        struct objhead *oh;
718
719 969
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
720 969
        oh = oc->objhead;
721 969
        CHECK_OBJ(oh, OBJHEAD_MAGIC);
722
723 969
        Lck_Lock(&oh->mtx);
724 969
        oc->flags |= OC_F_ABANDON;
725 969
        Lck_Unlock(&oh->mtx);
726 969
}
727
728
/*---------------------------------------------------------------------
729
 * Unbusy an objcore when the object is completely fetched.
730
 */
731
732
void
733 4845
HSH_Unbusy(struct worker *wrk, struct objcore *oc)
734
{
735
        struct objhead *oh;
736
        struct rush rush;
737
738 4845
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
739 4845
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
740 4845
        oh = oc->objhead;
741 4845
        CHECK_OBJ(oh, OBJHEAD_MAGIC);
742 4845
        INIT_OBJ(&rush, RUSH_MAGIC);
743
744 4845
        AN(oc->stobj->stevedore);
745 4845
        AN(oc->flags & OC_F_BUSY);
746 4845
        assert(oh->refcnt > 0);
747 4845
        assert(oc->refcnt > 0);
748
749 4845
        if (!(oc->flags & OC_F_PRIVATE)) {
750 3246
                BAN_NewObjCore(oc);
751 3246
                AN(oc->ban);
752
        }
753
754
        /* XXX: pretouch neighbors on oh->objcs to prevent page-on under mtx */
755 4845
        Lck_Lock(&oh->mtx);
756 4845
        assert(oh->refcnt > 0);
757 4845
        assert(oc->refcnt > 0);
758 4845
        if (!(oc->flags & OC_F_PRIVATE))
759 3246
                oc->refcnt++;                   // For EXP_Insert
760
        /* XXX: strictly speaking, we should sort in Date: order. */
761 4845
        VTAILQ_REMOVE(&oh->objcs, oc, hsh_list);
762 4845
        VTAILQ_INSERT_HEAD(&oh->objcs, oc, hsh_list);
763 4845
        oc->flags &= ~OC_F_BUSY;
764 4845
        if (!VTAILQ_EMPTY(&oh->waitinglist))
765 45
                hsh_rush1(wrk, oh, &rush, HSH_RUSH_POLICY);
766 4845
        Lck_Unlock(&oh->mtx);
767 4845
        if (!(oc->flags & OC_F_PRIVATE))
768 3246
                EXP_Insert(wrk, oc);
769 4845
        hsh_rush2(wrk, &rush);
770 4845
}
771
772
/*====================================================================
773
 * HSH_Kill()
774
 *
775
 * It's dead Jim, kick it...
776
 */
777
778
void
779 600
HSH_Kill(struct objcore *oc)
780
{
781
782 600
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
783 600
        CHECK_OBJ_NOTNULL(oc->objhead, OBJHEAD_MAGIC);
784
785 600
        Lck_Lock(&oc->objhead->mtx);
786 600
        oc->flags |= OC_F_DYING;
787 600
        Lck_Unlock(&oc->objhead->mtx);
788 600
        EXP_Remove(oc);
789 600
}
790
791
/*====================================================================
792
 * HSH_Snipe()
793
 *
794
 * If objcore is idle, gain a ref and mark it dead.
795
 */
796
797
int
798 24
HSH_Snipe(const struct worker *wrk, struct objcore *oc)
799
{
800 24
        int retval = 0;
801
802 24
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
803 24
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
804 24
        CHECK_OBJ_NOTNULL(oc->objhead, OBJHEAD_MAGIC);
805
806 24
        if (oc->refcnt == 1 && !Lck_Trylock(&oc->objhead->mtx)) {
807 24
                if (oc->refcnt == 1 && !(oc->flags & OC_F_DYING)) {
808 24
                        oc->flags |= OC_F_DYING;
809 24
                        oc->refcnt++;
810 24
                        retval = 1;
811
                }
812 24
                Lck_Unlock(&oc->objhead->mtx);
813
        }
814 24
        if (retval)
815 24
                EXP_Remove(oc);
816 24
        return (retval);
817
}
818
819
820
/*---------------------------------------------------------------------
821
 * Gain a reference on an objcore
822
 */
823
824
void
825 5003
HSH_Ref(struct objcore *oc)
826
{
827
        struct objhead *oh;
828
829 5003
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
830 5003
        oh = oc->objhead;
831 5003
        CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
832 5003
        Lck_Lock(&oh->mtx);
833 5004
        assert(oc->refcnt > 0);
834 5004
        oc->refcnt++;
835 5004
        Lck_Unlock(&oh->mtx);
836 5004
}
837
838
/*---------------------------------------------------------------------
839
 * Gain a reference on the busyobj, if the objcore has one
840
 */
841
842
struct boc *
843 17013
HSH_RefBoc(const struct objcore *oc)
844
{
845
        struct objhead *oh;
846
        struct boc *boc;
847
848 17013
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
849 17013
        oh = oc->objhead;
850 17013
        CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
851 17013
        if (oc->boc == NULL)
852 7861
                return (NULL);
853 9152
        Lck_Lock(&oh->mtx);
854 9152
        assert(oc->refcnt > 0);
855 9152
        boc = oc->boc;
856 9152
        CHECK_OBJ_ORNULL(boc, BOC_MAGIC);
857 9152
        if (boc != NULL) {
858 9149
                assert(boc->refcount > 0);
859 9149
                if (boc->state < BOS_FINISHED)
860 9011
                        boc->refcount++;
861
                else
862 138
                        boc = NULL;
863
        }
864 9152
        Lck_Unlock(&oh->mtx);
865 9152
        return (boc);
866
}
867
868
void
869 14806
HSH_DerefBoc(struct worker *wrk, struct objcore *oc)
870
{
871
        struct boc *boc;
872
        unsigned r;
873
874 14806
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
875 14806
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
876 14806
        boc = oc->boc;
877 14806
        CHECK_OBJ_NOTNULL(boc, BOC_MAGIC);
878 14806
        Lck_Lock(&oc->objhead->mtx);
879 14807
        assert(oc->refcnt > 0);
880 14807
        assert(boc->refcount > 0);
881 14807
        r = --boc->refcount;
882 14807
        if (r == 0)
883 5799
                oc->boc = NULL;
884 14807
        Lck_Unlock(&oc->objhead->mtx);
885 14807
        if (r == 0)
886 5799
                ObjBocDone(wrk, oc, &boc);
887 14807
}
888
889
/*--------------------------------------------------------------------
890
 * Dereference objcore
891
 *
892
 * Returns zero if target was destroyed.
893
 */
894
895
int
896 13872
HSH_DerefObjCore(struct worker *wrk, struct objcore **ocp, int rushmax)
897
{
898
        struct objcore *oc;
899
        struct objhead *oh;
900
        struct rush rush;
901
        unsigned r;
902
903 13872
        AN(ocp);
904 13872
        oc = *ocp;
905 13872
        *ocp = NULL;
906
907 13872
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
908 13872
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
909 13872
        assert(oc->refcnt > 0);
910 13872
        INIT_OBJ(&rush, RUSH_MAGIC);
911
912 13872
        oh = oc->objhead;
913 13872
        CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
914
915 13872
        Lck_Lock(&oh->mtx);
916 13872
        assert(oh->refcnt > 0);
917 13872
        r = --oc->refcnt;
918 13872
        if (!r)
919 3297
                VTAILQ_REMOVE(&oh->objcs, oc, hsh_list);
920 13872
        if (!VTAILQ_EMPTY(&oh->waitinglist))
921 32
                hsh_rush1(wrk, oh, &rush, rushmax);
922 13872
        Lck_Unlock(&oh->mtx);
923 13872
        hsh_rush2(wrk, &rush);
924 13872
        if (r != 0)
925 10575
                return (r);
926
927 3297
        AZ(oc->exp_flags);
928
929 3297
        BAN_DestroyObj(oc);
930 3297
        AZ(oc->ban);
931
932 3297
        if (oc->stobj->stevedore != NULL)
933 3153
                ObjFreeObj(wrk, oc);
934 3297
        ObjDestroy(wrk, &oc);
935
936
        /* Drop our ref on the objhead */
937 3297
        assert(oh->refcnt > 0);
938 3297
        (void)HSH_DerefObjHead(wrk, &oh);
939 3296
        return (0);
940
}
941
942
int
943 5178
HSH_DerefObjHead(struct worker *wrk, struct objhead **poh)
944
{
945
        struct objhead *oh;
946
        struct rush rush;
947
        int r;
948
949 5178
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
950 5178
        TAKE_OBJ_NOTNULL(oh, poh, OBJHEAD_MAGIC);
951 5178
        INIT_OBJ(&rush, RUSH_MAGIC);
952
953 5178
        if (oh == private_oh) {
954 2457
                assert(VTAILQ_EMPTY(&oh->waitinglist));
955 2457
                Lck_Lock(&oh->mtx);
956 2457
                assert(oh->refcnt > 1);
957 2457
                oh->refcnt--;
958 2457
                Lck_Unlock(&oh->mtx);
959 2456
                return(1);
960
        }
961
962
        /*
963
         * Make absolutely certain that we do not let the final ref
964
         * disappear until the waitinglist is empty.
965
         * This is necessary because the req's on the waiting list do
966
         * not hold any ref on the objhead of their own, and we cannot
967
         * just make the hold the same ref's as objcore, that would
968
         * confuse hashers.
969
         */
970 2721
        Lck_Lock(&oh->mtx);
971 5442
        while (oh->refcnt == 1 && !VTAILQ_EMPTY(&oh->waitinglist)) {
972 0
                hsh_rush1(wrk, oh, &rush, HSH_RUSH_ALL);
973 0
                Lck_Unlock(&oh->mtx);
974 0
                hsh_rush2(wrk, &rush);
975 0
                Lck_Lock(&oh->mtx);
976
        }
977 2721
        Lck_Unlock(&oh->mtx);
978
979 2721
        assert(oh->refcnt > 0);
980 2721
        r = hash->deref(oh);
981 2721
        if (!r)
982 0
                HSH_DeleteObjHead(wrk, oh);
983 2721
        return (r);
984
}
985
986
void
987 2055
HSH_Init(const struct hash_slinger *slinger)
988
{
989
990
        assert(DIGEST_LEN == VSHA256_LEN);      /* avoid #include pollution */
991 2055
        hash = slinger;
992 2055
        if (hash->start != NULL)
993 2055
                hash->start();
994 2055
        private_oh = hsh_newobjhead();
995 2055
        private_oh->refcnt = 1;
996 2055
}