varnish-cache/bin/varnishd/cache/cache_hash.c
1
/*-
2
 * Copyright (c) 2006 Verdens Gang AS
3
 * Copyright (c) 2006-2015 Varnish Software AS
4
 * All rights reserved.
5
 *
6
 * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
21
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
 * SUCH DAMAGE.
28
 *
29
 * This is the central hash-table code, it relies on a chosen hash
30
 * implementation only for the actual hashing, all the housekeeping
31
 * happens here.
32
 *
33
 * We have two kinds of structures, objecthead and object.  An objecthead
34
 * corresponds to a given (Host:, URL) tupple, and the objects hung from
35
 * the objecthead may represent various variations (ie: Vary: header,
36
 * different TTL etc) instances of that web-entity.
37
 *
38
 * Each objecthead has a mutex which locks both its own fields, the
39
 * list of objects and fields in the objects.
40
 *
41
 * The hash implementation must supply a reference count facility on
42
 * the objecthead, and return with a reference held after a lookup.
43
 *
44
 * Lookups in the hash implementation returns with a ref held and each
45
 * object hung from the objhead holds a ref as well.
46
 *
47
 * Objects have refcounts which are locked by the objecthead mutex.
48
 *
49
 * New objects are always marked busy, and they can go from busy to
50
 * not busy only once.
51
 */
52
53
#include "config.h"
54
55
#include <stdio.h>
56
#include <stdlib.h>
57
58
#include "cache_varnishd.h"
59
60
#include "cache/cache_objhead.h"
61
#include "cache/cache_transport.h"
62
63
#include "hash/hash_slinger.h"
64
65
#include "vsha256.h"
66
#include "vtim.h"
67
68
struct rush {
69
        unsigned                magic;
70
#define RUSH_MAGIC              0xa1af5f01
71
        VTAILQ_HEAD(,req)       reqs;
72
};
73
74
static const struct hash_slinger *hash;
75
static struct objhead *private_oh;
76
77
static void hsh_rush1(const struct worker *, struct objhead *,
78
    struct rush *, int);
79
static void hsh_rush2(struct worker *, struct rush *);
80
81
/*---------------------------------------------------------------------*/
82
83
static struct objhead *
84 3366
hsh_newobjhead(void)
85
{
86
        struct objhead *oh;
87
88 3366
        ALLOC_OBJ(oh, OBJHEAD_MAGIC);
89 3366
        XXXAN(oh);
90 3366
        oh->refcnt = 1;
91 3366
        VTAILQ_INIT(&oh->objcs);
92 3366
        VTAILQ_INIT(&oh->waitinglist);
93 3366
        Lck_New(&oh->mtx, lck_objhdr);
94 3366
        return (oh);
95
}
96
97
/*---------------------------------------------------------------------*/
98
/* Precreate an objhead and object for later use */
99
static void
100 3135
hsh_prealloc(struct worker *wrk)
101
{
102
103 3135
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
104
105 3135
        if (wrk->nobjcore == NULL)
106 2383
                wrk->nobjcore = ObjNew(wrk);
107 3135
        CHECK_OBJ_NOTNULL(wrk->nobjcore, OBJCORE_MAGIC);
108
109 3135
        if (wrk->nobjhead == NULL) {
110 2138
                wrk->nobjhead = hsh_newobjhead();
111 2138
                wrk->stats->n_objecthead++;
112
        }
113 3135
        CHECK_OBJ_NOTNULL(wrk->nobjhead, OBJHEAD_MAGIC);
114
115 3135
        if (hash->prep != NULL)
116 3123
                hash->prep(wrk);
117 3135
}
118
119
/*---------------------------------------------------------------------*/
120
121
struct objcore *
122 1104
HSH_Private(const struct worker *wrk)
123
{
124
        struct objcore *oc;
125
126 1104
        CHECK_OBJ_NOTNULL(private_oh, OBJHEAD_MAGIC);
127
128 1104
        oc = ObjNew(wrk);
129 1105
        AN(oc);
130 1105
        oc->refcnt = 1;
131 1105
        oc->objhead = private_oh;
132 1105
        oc->flags |= OC_F_PRIVATE;
133 1105
        Lck_Lock(&private_oh->mtx);
134 1105
        VTAILQ_INSERT_TAIL(&private_oh->objcs, oc, hsh_list);
135 1105
        private_oh->refcnt++;
136 1105
        Lck_Unlock(&private_oh->mtx);
137 1105
        return (oc);
138
}
139
140
/*---------------------------------------------------------------------*/
141
void
142 46
HSH_Cleanup(struct worker *wrk)
143
{
144
145 46
        if (wrk->nobjcore != NULL)
146 1
                ObjDestroy(wrk, &wrk->nobjcore);
147
148 46
        if (wrk->nobjhead != NULL) {
149 1
                Lck_Delete(&wrk->nobjhead->mtx);
150 1
                FREE_OBJ(wrk->nobjhead);
151 1
                wrk->nobjhead = NULL;
152 1
                wrk->stats->n_objecthead--;
153
        }
154 46
        if (wrk->nhashpriv != NULL) {
155
                /* XXX: If needed, add slinger method for this */
156 4
                free(wrk->nhashpriv);
157 4
                wrk->nhashpriv = NULL;
158
        }
159 46
}
160
161
void
162 0
HSH_DeleteObjHead(const struct worker *wrk, struct objhead *oh)
163
{
164
165 0
        AZ(oh->refcnt);
166 0
        assert(VTAILQ_EMPTY(&oh->objcs));
167 0
        assert(VTAILQ_EMPTY(&oh->waitinglist));
168 0
        Lck_Delete(&oh->mtx);
169 0
        wrk->stats->n_objecthead--;
170 0
        FREE_OBJ(oh);
171 0
}
172
173
void
174 15501
HSH_AddString(struct req *req, void *ctx, const char *str)
175
{
176
177 15501
        CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
178 15501
        AN(ctx);
179 15501
        if (str != NULL) {
180 7756
                VSHA256_Update(ctx, str, strlen(str));
181 7756
                VSLb(req->vsl, SLT_Hash, "%s", str);
182
        } else
183 7745
                VSHA256_Update(ctx, &str, 1);
184 15502
}
185
186
/*---------------------------------------------------------------------
187
 * This is a debugging hack to enable testing of boundary conditions
188
 * in the hash algorithm.
189
 * We trap the first 9 different digests and translate them to different
190
 * digests with edge bit conditions
191
 */
192
193
static struct hsh_magiclist {
194
        unsigned char was[VSHA256_LEN];
195
        unsigned char now[VSHA256_LEN];
196
} hsh_magiclist[] = {
197
        { .now = {      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
198
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
199
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
200
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
201
        { .now = {      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
202
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
203
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
204
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 } },
205
        { .now = {      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
206
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
207
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
208
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 } },
209
        { .now = {      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
210
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
211
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
212
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40 } },
213
        { .now = {      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
214
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
215
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
216
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 } },
217
        { .now = {      0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
218
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
219
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
220
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
221
        { .now = {      0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
222
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
223
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
224
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
225
        { .now = {      0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
226
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
227
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
228
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
229
        { .now = {      0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
230
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
231
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
232
                        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
233
};
234
235
#define HSH_NMAGIC (sizeof hsh_magiclist / sizeof hsh_magiclist[0])
236
237
static void
238 36
hsh_testmagic(void *result)
239
{
240
        int i, j;
241
        static int nused = 0;
242
243 180
        for (i = 0; i < nused; i++)
244 162
                if (!memcmp(hsh_magiclist[i].was, result, VSHA256_LEN))
245 18
                        break;
246 36
        if (i == nused && i < HSH_NMAGIC)
247 18
                memcpy(hsh_magiclist[nused++].was, result, VSHA256_LEN);
248 36
        if (i == nused)
249 36
                return;
250 36
        assert(i < HSH_NMAGIC);
251 36
        fprintf(stderr, "HASHMAGIC: <");
252 1188
        for (j = 0; j < VSHA256_LEN; j++)
253 1152
                fprintf(stderr, "%02x", ((unsigned char*)result)[j]);
254 36
        fprintf(stderr, "> -> <");
255 36
        memcpy(result, hsh_magiclist[i].now, VSHA256_LEN);
256 1188
        for (j = 0; j < VSHA256_LEN; j++)
257 1152
                fprintf(stderr, "%02x", ((unsigned char*)result)[j]);
258 36
        fprintf(stderr, ">\n");
259
}
260
261
/*---------------------------------------------------------------------
262
 * Insert an object which magically appears out of nowhere or, more likely,
263
 * comes off some persistent storage device.
264
 * Insert it with a reference held.
265
 */
266
267
void
268 32
HSH_Insert(struct worker *wrk, const void *digest, struct objcore *oc,
269
    struct ban *ban)
270
{
271
        struct objhead *oh;
272
        struct rush rush;
273
274 32
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
275 32
        AN(digest);
276 32
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
277 32
        AN(ban);
278 32
        AN(oc->flags & OC_F_BUSY);
279 32
        AZ(oc->flags & OC_F_PRIVATE);
280 32
        assert(oc->refcnt == 1);
281 32
        INIT_OBJ(&rush, RUSH_MAGIC);
282
283 32
        hsh_prealloc(wrk);
284
285 32
        AN(wrk->nobjhead);
286 32
        oh = hash->lookup(wrk, digest, &wrk->nobjhead);
287 32
        CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
288 32
        Lck_AssertHeld(&oh->mtx);
289 32
        assert(oh->refcnt > 0);
290
291
        /* Mark object busy and insert (precreated) objcore in
292
           objecthead. The new object inherits our objhead reference. */
293 32
        oc->objhead = oh;
294 32
        VTAILQ_INSERT_TAIL(&oh->objcs, oc, hsh_list);
295 32
        oc->refcnt++;                           // For EXP_Insert
296 32
        Lck_Unlock(&oh->mtx);
297
298 32
        BAN_RefBan(oc, ban);
299 32
        AN(oc->ban);
300 32
        EXP_Insert(wrk, oc);
301
302
        /* Move the object first in the oh list, unbusy it and run the
303
           waitinglist if necessary */
304 32
        Lck_Lock(&oh->mtx);
305 32
        VTAILQ_REMOVE(&oh->objcs, oc, hsh_list);
306 32
        VTAILQ_INSERT_HEAD(&oh->objcs, oc, hsh_list);
307 32
        oc->flags &= ~OC_F_BUSY;
308 32
        if (!VTAILQ_EMPTY(&oh->waitinglist))
309 0
                hsh_rush1(wrk, oh, &rush, HSH_RUSH_POLICY);
310 32
        Lck_Unlock(&oh->mtx);
311 32
        hsh_rush2(wrk, &rush);
312 32
}
313
314
/*---------------------------------------------------------------------
315
 */
316
317
static struct objcore *
318 2067
hsh_insert_busyobj(struct worker *wrk, struct objhead *oh)
319
{
320
        struct objcore *oc;
321
322 2067
        CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
323 2067
        Lck_AssertHeld(&oh->mtx);
324
325 2067
        oc = wrk->nobjcore;
326 2067
        wrk->nobjcore = NULL;
327 2067
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
328
329 2067
        AN(oc->flags & OC_F_BUSY);
330 2067
        oc->refcnt = 1;         /* Owned by busyobj */
331 2067
        oc->objhead = oh;
332 2067
        VTAILQ_INSERT_TAIL(&oh->objcs, oc, hsh_list);
333 2067
        return (oc);
334
}
335
336
/*---------------------------------------------------------------------
337
 */
338
339
enum lookup_e
340 3103
HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp,
341
    int always_insert)
342
{
343
        struct worker *wrk;
344
        struct objhead *oh;
345
        struct objcore *oc;
346
        struct objcore *exp_oc;
347
        double exp_t_origin;
348
        int busy_found;
349
        enum lookup_e retval;
350
        const uint8_t *vary;
351
352 3103
        AN(ocp);
353 3103
        *ocp = NULL;
354 3103
        AN(bocp);
355 3103
        *bocp = NULL;
356
357 3103
        CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
358 3103
        wrk = req->wrk;
359 3103
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
360 3103
        CHECK_OBJ_NOTNULL(req->http, HTTP_MAGIC);
361 3103
        AN(hash);
362
363 3103
        hsh_prealloc(wrk);
364 3103
        if (DO_DEBUG(DBG_HASHEDGE))
365 36
                hsh_testmagic(req->digest);
366
367 3103
        if (req->hash_objhead != NULL) {
368
                /*
369
                 * This req came off the waiting list, and brings an
370
                 * oh refcnt with it.
371
                 */
372 44
                CHECK_OBJ_NOTNULL(req->hash_objhead, OBJHEAD_MAGIC);
373 44
                oh = req->hash_objhead;
374 44
                Lck_Lock(&oh->mtx);
375 44
                req->hash_objhead = NULL;
376
        } else {
377 3059
                AN(wrk->nobjhead);
378 3059
                oh = hash->lookup(wrk, req->digest, &wrk->nobjhead);
379
        }
380
381 3103
        CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
382 3103
        Lck_AssertHeld(&oh->mtx);
383
384 3103
        if (always_insert) {
385
                /* XXX: should we do predictive Vary in this case ? */
386
                /* Insert new objcore in objecthead and release mutex */
387 22
                *bocp = hsh_insert_busyobj(wrk, oh);
388
                /* NB: no deref of objhead, new object inherits reference */
389 22
                Lck_Unlock(&oh->mtx);
390 22
                return (HSH_MISS);
391
        }
392
393 3081
        assert(oh->refcnt > 0);
394 3081
        busy_found = 0;
395 3081
        exp_oc = NULL;
396 3081
        exp_t_origin = 0.0;
397 8473
        VTAILQ_FOREACH(oc, &oh->objcs, hsh_list) {
398
                /* Must be at least our own ref + the objcore we examine */
399 6426
                assert(oh->refcnt > 1);
400 6426
                CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
401 6426
                assert(oc->objhead == oh);
402 6426
                assert(oc->refcnt > 0);
403
404 6426
                if (oc->flags & OC_F_DYING)
405 1
                        continue;
406 6425
                if (oc->flags & OC_F_FAILED)
407 0
                        continue;
408
409 6425
                if (oc->boc != NULL && oc->boc->state < BOS_STREAM) {
410 52
                        CHECK_OBJ_ORNULL(oc->boc, BOC_MAGIC);
411
412 52
                        if (req->hash_ignore_busy)
413 2
                                continue;
414
415 52
                        if (oc->boc->vary != NULL &&
416 2
                            !VRY_Match(req, oc->boc->vary))
417 2
                                continue;
418
419 48
                        busy_found = 1;
420 48
                        continue;
421
                }
422
423 6373
                if (oc->ttl <= 0.)
424 0
                        continue;
425
426 6373
                if (BAN_CheckObject(wrk, oc, req)) {
427 58
                        oc->flags |= OC_F_DYING;
428 58
                        EXP_Remove(oc);
429 58
                        continue;
430
                }
431
432 6315
                if (ObjHasAttr(wrk, oc, OA_VARY)) {
433 5324
                        vary = ObjGetAttr(wrk, oc, OA_VARY, NULL);
434 5324
                        AN(vary);
435 5324
                        if (!VRY_Match(req, vary))
436 5203
                                continue;
437
                }
438
439 1112
                if (EXP_Ttl(req, oc) >= req->t_req) {
440
                        /* If still valid, use it */
441 1034
                        assert(oh->refcnt > 1);
442 1034
                        assert(oc->objhead == oh);
443 1034
                        if (oc->flags & OC_F_HFP) {
444 8
                                wrk->stats->cache_hitpass++;
445 16
                                VSLb(req->vsl, SLT_HitPass, "%u %.6f",
446 16
                                    ObjGetXID(wrk, oc), EXP_Dttl(req, oc));
447 8
                                oc = NULL;
448 1026
                        } else if (oc->flags & OC_F_PASS) {
449 44
                                wrk->stats->cache_hitmiss++;
450 88
                                VSLb(req->vsl, SLT_HitMiss, "%u %.6f",
451 88
                                    ObjGetXID(wrk, oc), EXP_Dttl(req, oc));
452 44
                                oc = NULL;
453 44
                                *bocp = hsh_insert_busyobj(wrk, oh);
454
                        } else {
455 982
                                oc->refcnt++;
456 982
                                if (oc->hits < LONG_MAX)
457 982
                                        oc->hits++;
458
                        }
459 1034
                        Lck_Unlock(&oh->mtx);
460 1034
                        if (oc == NULL)
461 52
                                return (HSH_MISS);
462 982
                        assert(HSH_DerefObjHead(wrk, &oh));
463 982
                        *ocp = oc;
464 982
                        return (HSH_HIT);
465
                }
466 144
                if (EXP_Ttl(NULL, oc) < req->t_req && /* ignore req.ttl */
467 66
                    oc->t_origin > exp_t_origin) {
468
                        /* record the newest object */
469 64
                        exp_oc = oc;
470 64
                        exp_t_origin = oc->t_origin;
471
                }
472
        }
473
474 2047
        if (exp_oc != NULL && exp_oc->flags & OC_F_PASS) {
475 8
                wrk->stats->cache_hitmiss++;
476 16
                VSLb(req->vsl, SLT_HitMiss, "%u %.6f", ObjGetXID(wrk, exp_oc),
477 16
                    EXP_Dttl(req, exp_oc));
478 8
                exp_oc = NULL;
479 8
                busy_found = 0;
480
        }
481
482 2047
        if (exp_oc != NULL) {
483 56
                assert(oh->refcnt > 1);
484 56
                assert(exp_oc->objhead == oh);
485 56
                exp_oc->refcnt++;
486
487 56
                if (!busy_found) {
488 54
                        *bocp = hsh_insert_busyobj(wrk, oh);
489 54
                        retval = HSH_EXPBUSY;
490
                } else {
491 2
                        AZ(req->hash_ignore_busy);
492 2
                        retval = HSH_EXP;
493
                }
494 56
                if (exp_oc->hits < LONG_MAX)
495 56
                        exp_oc->hits++;
496 56
                Lck_Unlock(&oh->mtx);
497 56
                if (retval == HSH_EXP)
498 2
                        assert(HSH_DerefObjHead(wrk, &oh));
499 56
                *ocp = exp_oc;
500 56
                return (retval);
501
        }
502
503 1991
        if (!busy_found) {
504
                /* Insert objcore in objecthead and release mutex */
505 1947
                *bocp = hsh_insert_busyobj(wrk, oh);
506
                /* NB: no deref of objhead, new object inherits reference */
507 1947
                Lck_Unlock(&oh->mtx);
508 1947
                return (HSH_MISS);
509
        }
510
511
        /* There are one or more busy objects, wait for them */
512
513 44
        AZ(req->hash_ignore_busy);
514
515 44
        VTAILQ_INSERT_TAIL(&oh->waitinglist, req, w_list);
516 44
        if (DO_DEBUG(DBG_WAITINGLIST))
517 4
                VSLb(req->vsl, SLT_Debug, "on waiting list <%p>", oh);
518
519 44
        wrk->stats->busy_sleep++;
520
        /*
521
         * The objhead reference transfers to the sess, we get it
522
         * back when the sess comes off the waiting list and
523
         * calls us again
524
         */
525 44
        req->hash_objhead = oh;
526 44
        req->wrk = NULL;
527 44
        req->waitinglist = 1;
528 44
        Lck_Unlock(&oh->mtx);
529 44
        return (HSH_BUSY);
530
}
531
532
/*---------------------------------------------------------------------
533
 * Pick the req's we are going to rush from the waiting list
534
 */
535
536
static void
537 37
hsh_rush1(const struct worker *wrk, struct objhead *oh, struct rush *r, int max)
538
{
539
        unsigned u;
540
        struct req *req;
541
542 37
        if (max == 0)
543 37
                return;
544 37
        if (max == HSH_RUSH_POLICY)
545 37
                max = cache_param->rush_exponent;
546 37
        assert(max > 0);
547
548 37
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
549 37
        CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
550 37
        CHECK_OBJ_NOTNULL(r, RUSH_MAGIC);
551 37
        VTAILQ_INIT(&r->reqs);
552 37
        Lck_AssertHeld(&oh->mtx);
553 81
        for (u = 0; u < max; u++) {
554 78
                req = VTAILQ_FIRST(&oh->waitinglist);
555 78
                if (req == NULL)
556 34
                        break;
557 44
                CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
558 44
                wrk->stats->busy_wakeup++;
559 44
                AZ(req->wrk);
560 44
                VTAILQ_REMOVE(&oh->waitinglist, req, w_list);
561 44
                VTAILQ_INSERT_TAIL(&r->reqs, req, w_list);
562 44
                req->waitinglist = 0;
563
        }
564
}
565
566
/*---------------------------------------------------------------------
567
 * Rush req's that came from waiting list.
568
 */
569
570
static void
571 10264
hsh_rush2(struct worker *wrk, struct rush *r)
572
{
573
        struct req *req;
574
575 10264
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
576 10264
        CHECK_OBJ_NOTNULL(r, RUSH_MAGIC);
577
578 20572
        while (!VTAILQ_EMPTY(&r->reqs)) {
579 44
                req = VTAILQ_FIRST(&r->reqs);
580 44
                CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
581 44
                VTAILQ_REMOVE(&r->reqs, req, w_list);
582 44
                DSL(DBG_WAITINGLIST, req->vsl->wid, "off waiting list");
583 44
                AN(req->transport->reembark);
584 44
                req->transport->reembark(wrk, req);
585
        }
586 10264
}
587
588
/*---------------------------------------------------------------------
589
 * Purge an entire objhead
590
 */
591
592
unsigned
593 26
HSH_Purge(struct worker *wrk, struct objhead *oh, double ttl, double grace,
594
double keep)
595
{
596
        struct objcore *oc, **ocp;
597 26
        unsigned spc, ospc, nobj, n, n_tot = 0;
598 26
        int more = 0;
599
        double now;
600
601 26
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
602 26
        CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
603 26
        ospc = WS_Reserve(wrk->aws, 0);
604 26
        assert(ospc >= sizeof *ocp);
605
        /*
606
         * Because of "soft" purges, there might be oc's in the list that has
607
         * the OC_F_PURGED flag set. We do not want to let these slip through,
608
         * so we need to clear the flag before entering the do..while loop.
609
         */
610 26
        Lck_Lock(&oh->mtx);
611 26
        assert(oh->refcnt > 0);
612 213
        VTAILQ_FOREACH(oc, &oh->objcs, hsh_list) {
613 187
                CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
614 187
                assert(oc->objhead == oh);
615 187
                oc->flags &= ~OC_F_PURGED;
616
        }
617 26
        Lck_Unlock(&oh->mtx);
618
619
        do {
620 28
                more = 0;
621 28
                spc = ospc;
622 28
                nobj = 0;
623 28
                ocp = (void*)wrk->aws->f;
624 28
                Lck_Lock(&oh->mtx);
625 28
                assert(oh->refcnt > 0);
626 28
                now = VTIM_real();
627 341
                VTAILQ_FOREACH(oc, &oh->objcs, hsh_list) {
628 315
                        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
629 315
                        assert(oc->objhead == oh);
630 315
                        if (oc->flags & OC_F_BUSY) {
631
                                /*
632
                                 * We cannot purge busy objects here, because
633
                                 * their owners have special rights to them,
634
                                 * and may nuke them without concern for the
635
                                 * refcount, which by definition always must
636
                                 * be one, so they don't check.
637
                                 */
638 24
                                continue;
639
                        }
640 291
                        if (oc->flags & OC_F_DYING)
641 0
                                continue;
642 291
                        if (oc->flags & OC_F_PURGED) {
643
                                /*
644
                                 * We have already called EXP_Rearm on this
645
                                 * object, and we do not want to do it
646
                                 * again. Plus the space in the ocp array may
647
                                 * be limited.
648
                                 */
649 126
                                continue;
650
                        }
651 165
                        if (spc < sizeof *ocp) {
652
                                /* Iterate if aws is not big enough */
653 2
                                more = 1;
654 2
                                break;
655
                        }
656 163
                        oc->refcnt++;
657 163
                        spc -= sizeof *ocp;
658 163
                        ocp[nobj++] = oc;
659 163
                        oc->flags |= OC_F_PURGED;
660
                }
661 28
                Lck_Unlock(&oh->mtx);
662
663 191
                for (n = 0; n < nobj; n++) {
664 163
                        oc = ocp[n];
665 163
                        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
666 163
                        EXP_Rearm(oc, now, ttl, grace, keep);
667 163
                        (void)HSH_DerefObjCore(wrk, &oc, 0);
668
                }
669 28
                n_tot += nobj;
670 28
        } while (more);
671 26
        WS_Release(wrk->aws, 0);
672 26
        Pool_PurgeStat(n_tot);
673 26
        return (n_tot);
674
}
675
676
/*---------------------------------------------------------------------
677
 * Fail an objcore
678
 */
679
680
void
681 52
HSH_Fail(struct objcore *oc)
682
{
683
        struct objhead *oh;
684
685 52
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
686 52
        oh = oc->objhead;
687 52
        CHECK_OBJ(oh, OBJHEAD_MAGIC);
688
689
        /*
690
         * We have to have either a busy bit, so that HSH_Lookup
691
         * will not consider this oc, or an object hung of the oc
692
         * so that it can consider it.
693
         */
694 52
        assert((oc->flags & OC_F_BUSY) || (oc->stobj->stevedore != NULL));
695
696 52
        Lck_Lock(&oh->mtx);
697 52
        oc->flags |= OC_F_FAILED;
698 52
        Lck_Unlock(&oh->mtx);
699 52
}
700
701
/*---------------------------------------------------------------------
702
 * Abandon a fetch we will not need
703
 */
704
705
void
706 393
HSH_Abandon(struct objcore *oc)
707
{
708
        struct objhead *oh;
709
710 393
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
711 393
        oh = oc->objhead;
712 393
        CHECK_OBJ(oh, OBJHEAD_MAGIC);
713
714 393
        Lck_Lock(&oh->mtx);
715 393
        oc->flags |= OC_F_ABANDON;
716 393
        Lck_Unlock(&oh->mtx);
717 393
}
718
719
/*---------------------------------------------------------------------
720
 * Unbusy an objcore when the object is completely fetched.
721
 */
722
723
void
724 2626
HSH_Unbusy(struct worker *wrk, struct objcore *oc)
725
{
726
        struct objhead *oh;
727
        struct rush rush;
728
729 2626
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
730 2626
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
731 2626
        oh = oc->objhead;
732 2626
        CHECK_OBJ(oh, OBJHEAD_MAGIC);
733 2626
        INIT_OBJ(&rush, RUSH_MAGIC);
734
735 2626
        AN(oc->stobj->stevedore);
736 2626
        AN(oc->flags & OC_F_BUSY);
737 2626
        assert(oh->refcnt > 0);
738 2626
        assert(oc->refcnt > 0);
739
740 2626
        if (!(oc->flags & OC_F_PRIVATE)) {
741 1991
                BAN_NewObjCore(oc);
742 1991
                AN(oc->ban);
743
        }
744
745
        /* XXX: pretouch neighbors on oh->objcs to prevent page-on under mtx */
746 2626
        Lck_Lock(&oh->mtx);
747 2626
        assert(oh->refcnt > 0);
748 2626
        assert(oc->refcnt > 0);
749 2626
        if (!(oc->flags & OC_F_PRIVATE))
750 1991
                oc->refcnt++;                   // For EXP_Insert
751
        /* XXX: strictly speaking, we should sort in Date: order. */
752 2626
        VTAILQ_REMOVE(&oh->objcs, oc, hsh_list);
753 2626
        VTAILQ_INSERT_HEAD(&oh->objcs, oc, hsh_list);
754 2626
        oc->flags &= ~OC_F_BUSY;
755 2626
        if (!VTAILQ_EMPTY(&oh->waitinglist))
756 21
                hsh_rush1(wrk, oh, &rush, HSH_RUSH_POLICY);
757 2626
        Lck_Unlock(&oh->mtx);
758 2626
        if (!(oc->flags & OC_F_PRIVATE))
759 1991
                EXP_Insert(wrk, oc);
760 2626
        hsh_rush2(wrk, &rush);
761 2626
}
762
763
/*====================================================================
764
 * HSH_Kill()
765
 *
766
 * It's dead Jim, kick it...
767
 */
768
769
void
770 383
HSH_Kill(struct objcore *oc)
771
{
772
773 383
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
774 383
        CHECK_OBJ_NOTNULL(oc->objhead, OBJHEAD_MAGIC);
775
776 383
        Lck_Lock(&oc->objhead->mtx);
777 383
        oc->flags |= OC_F_DYING;
778 383
        Lck_Unlock(&oc->objhead->mtx);
779 383
        EXP_Remove(oc);
780 383
}
781
782
/*====================================================================
783
 * HSH_Snipe()
784
 *
785
 * If objcore is idle, gain a ref and mark it dead.
786
 */
787
788
int
789 16
HSH_Snipe(const struct worker *wrk, struct objcore *oc)
790
{
791 16
        int retval = 0;
792
793 16
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
794 16
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
795 16
        CHECK_OBJ_NOTNULL(oc->objhead, OBJHEAD_MAGIC);
796
797 16
        if (oc->refcnt == 1 && !Lck_Trylock(&oc->objhead->mtx)) {
798 16
                if (oc->refcnt == 1 && !(oc->flags & OC_F_DYING)) {
799 16
                        oc->flags |= OC_F_DYING;
800 16
                        oc->refcnt++;
801 16
                        retval = 1;
802
                }
803 16
                Lck_Unlock(&oc->objhead->mtx);
804
        }
805 16
        if (retval)
806 16
                EXP_Remove(oc);
807 16
        return (retval);
808
}
809
810
811
/*---------------------------------------------------------------------
812
 * Gain a reference on an objcore
813
 */
814
815
void
816 2714
HSH_Ref(struct objcore *oc)
817
{
818
        struct objhead *oh;
819
820 2714
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
821 2714
        oh = oc->objhead;
822 2714
        CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
823 2714
        Lck_Lock(&oh->mtx);
824 2714
        assert(oc->refcnt > 0);
825 2714
        oc->refcnt++;
826 2714
        Lck_Unlock(&oh->mtx);
827 2714
}
828
829
/*---------------------------------------------------------------------
830
 * Gain a reference on the busyobj, if the objcore has one
831
 */
832
833
struct boc *
834 9029
HSH_RefBoc(const struct objcore *oc)
835
{
836
        struct objhead *oh;
837
        struct boc *boc;
838
839 9029
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
840 9029
        oh = oc->objhead;
841 9029
        CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
842 9029
        if (oc->boc == NULL)
843 4307
                return (NULL);
844 4722
        Lck_Lock(&oh->mtx);
845 4723
        assert(oc->refcnt > 0);
846 4723
        boc = oc->boc;
847 4723
        CHECK_OBJ_ORNULL(boc, BOC_MAGIC);
848 4723
        if (boc != NULL) {
849 4723
                assert(boc->refcount > 0);
850 4723
                if (boc->state < BOS_FINISHED)
851 4639
                        boc->refcount++;
852
                else
853 84
                        boc = NULL;
854
        }
855 4723
        Lck_Unlock(&oh->mtx);
856 4723
        return (boc);
857
}
858
859
void
860 7787
HSH_DerefBoc(struct worker *wrk, struct objcore *oc)
861
{
862
        struct boc *boc;
863
        unsigned r;
864
865 7787
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
866 7787
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
867 7787
        boc = oc->boc;
868 7787
        CHECK_OBJ_NOTNULL(boc, BOC_MAGIC);
869 7787
        Lck_Lock(&oc->objhead->mtx);
870 7793
        assert(oc->refcnt > 0);
871 7793
        assert(boc->refcount > 0);
872 7793
        r = --boc->refcount;
873 7793
        if (r == 0)
874 3156
                oc->boc = NULL;
875 7793
        Lck_Unlock(&oc->objhead->mtx);
876 7793
        if (r == 0)
877 3156
                ObjBocDone(wrk, oc, &boc);
878 7793
}
879
880
/*--------------------------------------------------------------------
881
 * Dereference objcore
882
 *
883
 * Returns zero if target was destroyed.
884
 */
885
886
int
887 7605
HSH_DerefObjCore(struct worker *wrk, struct objcore **ocp, int rushmax)
888
{
889
        struct objcore *oc;
890
        struct objhead *oh;
891
        struct rush rush;
892
        unsigned r;
893
894 7605
        AN(ocp);
895 7605
        oc = *ocp;
896 7605
        *ocp = NULL;
897
898 7605
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
899 7605
        CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
900 7605
        assert(oc->refcnt > 0);
901 7605
        INIT_OBJ(&rush, RUSH_MAGIC);
902
903 7605
        oh = oc->objhead;
904 7605
        CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
905
906 7605
        Lck_Lock(&oh->mtx);
907 7606
        assert(oh->refcnt > 0);
908 7606
        r = --oc->refcnt;
909 7606
        if (!r)
910 1632
                VTAILQ_REMOVE(&oh->objcs, oc, hsh_list);
911 7606
        if (!VTAILQ_EMPTY(&oh->waitinglist))
912 16
                hsh_rush1(wrk, oh, &rush, rushmax);
913 7606
        Lck_Unlock(&oh->mtx);
914 7606
        hsh_rush2(wrk, &rush);
915 7606
        if (r != 0)
916 5974
                return (r);
917
918 1632
        AZ(oc->exp_flags);
919
920 1632
        BAN_DestroyObj(oc);
921 1632
        AZ(oc->ban);
922
923 1632
        if (oc->stobj->stevedore != NULL)
924 1552
                ObjFreeObj(wrk, oc);
925 1632
        ObjDestroy(wrk, &oc);
926
927
        /* Drop our ref on the objhead */
928 1632
        assert(oh->refcnt > 0);
929 1632
        (void)HSH_DerefObjHead(wrk, &oh);
930 1632
        return (0);
931
}
932
933
int
934 2616
HSH_DerefObjHead(struct worker *wrk, struct objhead **poh)
935
{
936
        struct objhead *oh;
937
        struct rush rush;
938
        int r;
939
940 2616
        CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
941 2616
        TAKE_OBJ_NOTNULL(oh, poh, OBJHEAD_MAGIC);
942 2616
        INIT_OBJ(&rush, RUSH_MAGIC);
943
944 2616
        if (oh == private_oh) {
945 1105
                assert(VTAILQ_EMPTY(&oh->waitinglist));
946 1105
                Lck_Lock(&oh->mtx);
947 1105
                assert(oh->refcnt > 1);
948 1105
                oh->refcnt--;
949 1105
                Lck_Unlock(&oh->mtx);
950 1105
                return(1);
951
        }
952
953
        /*
954
         * Make absolutely certain that we do not let the final ref
955
         * disappear until the waitinglist is empty.
956
         * This is necessary because the req's on the waiting list do
957
         * not hold any ref on the objhead of their own, and we cannot
958
         * just make the hold the same ref's as objcore, that would
959
         * confuse hashers.
960
         */
961 1511
        Lck_Lock(&oh->mtx);
962 3022
        while (oh->refcnt == 1 && !VTAILQ_EMPTY(&oh->waitinglist)) {
963 0
                hsh_rush1(wrk, oh, &rush, HSH_RUSH_ALL);
964 0
                Lck_Unlock(&oh->mtx);
965 0
                hsh_rush2(wrk, &rush);
966 0
                Lck_Lock(&oh->mtx);
967
        }
968 1511
        Lck_Unlock(&oh->mtx);
969
970 1511
        assert(oh->refcnt > 0);
971 1511
        r = hash->deref(oh);
972 1511
        if (!r)
973 0
                HSH_DeleteObjHead(wrk, oh);
974 1511
        return (r);
975
}
976
977
void
978 1228
HSH_Init(const struct hash_slinger *slinger)
979
{
980
981
        assert(DIGEST_LEN == VSHA256_LEN);      /* avoid #include pollution */
982 1228
        hash = slinger;
983 1228
        if (hash->start != NULL)
984 1228
                hash->start();
985 1228
        private_oh = hsh_newobjhead();
986 1228
        private_oh->refcnt = 1;
987 1228
}