varnish-cache/lib/libvarnish/binary_heap.c
1
/*-
2
 * Copyright (c) 2006 Verdens Gang AS
3
 * Copyright (c) 2006-2011 Varnish Software AS
4
 * All rights reserved.
5
 *
6
 * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
21
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
 * SUCH DAMAGE.
28
 *
29
 * Implementation of a binary heap API
30
 *
31
 * See also:
32
 *      http://dl.acm.org/citation.cfm?doid=1785414.1785434
33
 *      (or: http://queue.acm.org/detail.cfm?id=1814327)
34
 */
35
36
#include "config.h"
37
38
#include <limits.h>
39
#include <stdint.h>
40
#include <stdlib.h>
41
#include <unistd.h>
42
43
#include "vdef.h"
44
#include "vas.h"
45
#include "binary_heap.h"
46
47
/* Parameters --------------------------------------------------------*/
48
49
/*
50
 * The number of elements in a row has to be a compromise between
51
 * wasted space and number of memory allocations.
52
 * With 64k objects per row, there will be at least 5...10 seconds
53
 * between row additions on a very busy server.
54
 * At the same time, the worst case amount of wasted memory is kept
55
 * at a reasonable 1 MB -- two rows on 64bit system.
56
 * Finally, but without practical significance: 16 bits should be
57
 * easier for the compiler to optimize.
58
 */
59
#define ROW_SHIFT               16
60
61
62
#undef PARANOIA
63
64
/* Private definitions -----------------------------------------------*/
65
66
#define ROOT_IDX                1
67
68
#define ROW_WIDTH               (1 << ROW_SHIFT)
69
70
/*lint -emacro(572, ROW) shift 0 >> by 16 */
71
/*lint -emacro(835, ROW) 0 left of >> */
72
/*lint -emacro(778, ROW) const >> evaluates to zero */
73
#define ROW(b, n)               ((b)->array[(n) >> ROW_SHIFT])
74
75
/*lint -emacro(835, A) 0 left of & */
76
#define A(b, n)                 ROW(b, n)[(n) & (ROW_WIDTH - 1)]
77
78
struct binheap {
79
        unsigned                magic;
80
#define BINHEAP_MAGIC           0xf581581aU     /* from /dev/random */
81
        void                    *priv;
82
        binheap_cmp_t           *cmp;
83
        binheap_update_t        *update;
84
        void                    ***array;
85
        unsigned                rows;
86
        unsigned                length;
87
        unsigned                next;
88
        unsigned                page_size;
89
        unsigned                page_mask;
90
        unsigned                page_shift;
91
};
92
93
#define VM_AWARE
94
95
#ifdef VM_AWARE
96
97
static  unsigned
98 4587308
parent(const struct binheap *bh, unsigned u)
99
{
100
        unsigned po;
101
        unsigned v;
102
103 4587308
        assert(u != UINT_MAX);
104 4587308
        po = u & bh->page_mask;
105
106 4587308
        if (u < bh->page_size || po > 3) {
107 4567630
                v = (u & ~bh->page_mask) | (po >> 1);
108 19678
        } else if (po < 2) {
109 9716
                v = (u - bh->page_size) >> bh->page_shift;
110 9716
                v += v & ~(bh->page_mask >> 1);
111 9716
                v |= bh->page_size / 2;
112
        } else {
113 9962
                v = u - 2;
114
        }
115 4587308
        return (v);
116
}
117
118
static void
119 59798699
child(const struct binheap *bh, unsigned u, unsigned *a, unsigned *b)
120
{
121
        uintmax_t uu;
122
123 59798699
        if (u > bh->page_mask && (u & (bh->page_mask - 1)) == 0) {
124
                /* First two elements are magical except on the first page */
125 3788300
                *a = *b = u + 2;
126 56010399
        } else if (u & (bh->page_size >> 1)) {
127
                /* The bottom row is even more magical */
128 7791310
                *a = (u & ~bh->page_mask) >> 1;
129 7791310
                *a |= u & (bh->page_mask >> 1);
130 7791310
                *a += 1;
131 7791310
                uu = (uintmax_t)*a << bh->page_shift;
132 7791310
                *a = uu;
133 7791310
                if (*a == uu) {
134 7791294
                        *b = *a + 1;
135
                } else {
136
                        /*
137
                         * An unsigned is not big enough: clamp instead
138
                         * of truncating.  We do not support adding
139
                         * more than UINT_MAX elements anyway, so this
140
                         * is without consequence.
141
                         */
142 16
                        *a = UINT_MAX;
143 16
                        *b = UINT_MAX;
144
                }
145
        } else {
146
                /* The rest is as usual, only inside the page */
147 48219089
                *a = u + (u & bh->page_mask);
148 48219089
                *b = *a + 1;
149
        }
150
#ifdef PARANOIA
151
        assert(*a > 0);
152
        assert(*b > 0);
153
        if (*a != UINT_MAX) {
154
                assert(parent(bh, *a) == u);
155
                assert(parent(bh, *b) == u);
156
        }
157
#endif
158 59798699
}
159
160
161
#else
162
163
static unsigned
164
parent(const struct binheap *bh, unsigned u)
165
{
166
167
        (void)bh;
168
        return (u / 2);
169
}
170
171
static void
172
child(const struct binheap *bh, unsigned u, unsigned *a, unsigned *b)
173
{
174
175
        (void)bh;
176
        *a = u * 2;
177
        *b = *a + 1;
178
}
179
180
#endif
181
182
/* Implementation ----------------------------------------------------*/
183
184
static void
185 7550
binheap_addrow(struct binheap *bh)
186
{
187
        unsigned u;
188
189
        /* First make sure we have space for another row */
190 7550
        if (&ROW(bh, bh->length) >= bh->array + bh->rows) {
191 4
                u = bh->rows * 2;
192 4
                bh->array = realloc(bh->array, sizeof(*bh->array) * u);
193 4
                assert(bh->array != NULL);
194
195
                /* NULL out new pointers */
196 14
                while (bh->rows < u)
197 6
                        bh->array[bh->rows++] = NULL;
198
        }
199 7550
        assert(ROW(bh, bh->length) == NULL);
200 7550
        ROW(bh, bh->length) = malloc(sizeof(**bh->array) * ROW_WIDTH);
201 7550
        assert(ROW(bh, bh->length));
202 7550
        bh->length += ROW_WIDTH;
203 7550
}
204
205
struct binheap *
206 7540
binheap_new(void *priv, binheap_cmp_t *cmp_f, binheap_update_t *update_f)
207
{
208
        struct binheap *bh;
209
        unsigned u;
210
211 7540
        bh = calloc(1, sizeof *bh);
212 7540
        if (bh == NULL)
213 0
                return (bh);
214 7540
        bh->priv = priv;
215
216 7540
        bh->page_size = (unsigned)getpagesize() / sizeof (void *);
217 7540
        bh->page_mask = bh->page_size - 1;
218 7540
        AZ(bh->page_size & bh->page_mask);      /* power of two */
219 7540
        for (u = 1; (1U << u) != bh->page_size; u++)
220
                ;
221 7540
        bh->page_shift = u;
222 7540
        assert(bh->page_size <= (sizeof(**bh->array) * ROW_WIDTH));
223
224 7540
        bh->cmp = cmp_f;
225 7540
        bh->update = update_f;
226 7540
        bh->next = ROOT_IDX;
227
#ifdef TEST_DRIVER
228 2
        bh->rows = 1;           /* A tiny-ish number */
229
#else
230 7538
        bh->rows = 16;          /* A tiny-ish number */
231
#endif
232 7540
        bh->array = calloc(bh->rows, sizeof *bh->array);
233 7540
        assert(bh->array != NULL);
234 7540
        binheap_addrow(bh);
235 7540
        A(bh, ROOT_IDX) = NULL;
236 7540
        bh->magic = BINHEAP_MAGIC;
237 7540
        return (bh);
238
}
239
240
static void
241 117209390
binheap_update(const struct binheap *bh, unsigned u)
242
{
243 117209390
        assert(bh != NULL);
244 117209390
        assert(bh->magic == BINHEAP_MAGIC);
245 117209390
        assert(u < bh->next);
246 117209390
        assert(A(bh, u) != NULL);
247 117209390
        if (bh->update != NULL)
248 117209390
                bh->update(bh->priv, A(bh, u), u);
249 117209391
}
250
251
static void
252 55793961
binhead_swap(const struct binheap *bh, unsigned u, unsigned v)
253
{
254
        void *p;
255
256 55793961
        assert(bh != NULL);
257 55793961
        assert(bh->magic == BINHEAP_MAGIC);
258 55793961
        assert(u < bh->next);
259 55793961
        assert(A(bh, u) != NULL);
260 55793961
        assert(v < bh->next);
261 55793961
        assert(A(bh, v) != NULL);
262 55793961
        p = A(bh, u);
263 55793961
        A(bh, u) = A(bh, v);
264 55793961
        A(bh, v) = p;
265 55793961
        binheap_update(bh, u);
266 55793961
        binheap_update(bh, v);
267 55793961
}
268
269
static unsigned
270 7109061
binheap_trickleup(const struct binheap *bh, unsigned u)
271
{
272
        unsigned v;
273
274 7109061
        assert(bh != NULL); assert(bh->magic == BINHEAP_MAGIC);
275 7109061
        assert(u < bh->next);
276 7109061
        assert(A(bh, u) != NULL);
277
278 14247907
        while (u > ROOT_IDX) {
279 4587308
                assert(u < bh->next);
280 4587308
                assert(A(bh, u) != NULL);
281 4587308
                v = parent(bh, u);
282 4587308
                assert(v < u);
283 4587308
                assert(v < bh->next);
284 4587308
                assert(A(bh, v) != NULL);
285 4587308
                if (!bh->cmp(bh->priv, A(bh, u), A(bh, v)))
286 4557523
                        break;
287 29785
                binhead_swap(bh, u, v);
288 29785
                u = v;
289
        }
290 7109061
        return (u);
291
}
292
293
static unsigned
294 4034337
binheap_trickledown(const struct binheap *bh, unsigned u)
295
{
296
        unsigned v1, v2;
297
298 4034337
        assert(bh != NULL);
299 4034337
        assert(bh->magic == BINHEAP_MAGIC);
300 4034337
        assert(u < bh->next);
301 4034337
        assert(A(bh, u) != NULL);
302
303
        while (1) {
304 59798513
                assert(u < bh->next);
305 59798513
                assert(A(bh, u) != NULL);
306 59798513
                child(bh, u, &v1, &v2);
307 59798513
                assert(v1 > 0);
308 59798513
                assert(v2 > 0);
309 59798513
                assert(v1 <= v2);
310
311 59798513
                if (v1 >= bh->next)
312 4033650
                        return (u);
313
314 55764863
                assert(A(bh, v1) != NULL);
315 55764863
                if (v1 != v2 && v2 < bh->next) {
316 51962358
                        assert(A(bh, v2) != NULL);
317 51962358
                        if (bh->cmp(bh->priv, A(bh, v2), A(bh, v1)))
318 6844
                                v1 = v2;
319
                }
320 55764863
                assert(v1 < bh->next);
321 55764863
                assert(A(bh, v1) != NULL);
322 55764863
                if (bh->cmp(bh->priv, A(bh, u), A(bh, v1)))
323 687
                        return (u);
324 55764176
                binhead_swap(bh, u, v1);
325 55764176
                u = v1;
326 55764176
        }
327
}
328
329
void
330 3074725
binheap_insert(struct binheap *bh, void *p)
331
{
332
        unsigned u;
333
334 3074725
        assert(bh != NULL);
335 3074725
        assert(bh->magic == BINHEAP_MAGIC);
336 3074725
        assert(bh->length >= bh->next);
337 3074725
        if (bh->length == bh->next)
338 10
                binheap_addrow(bh);
339 3074725
        assert(bh->length > bh->next);
340 3074725
        u = bh->next++;
341 3074725
        A(bh, u) = p;
342 3074725
        binheap_update(bh, u);
343 3074725
        (void)binheap_trickleup(bh, u);
344 3074723
        assert(u < bh->next);
345 3074723
        assert(A(bh, u) != NULL);
346 3074723
}
347
348
349
#ifdef PARANOIA
350
static void
351
chk(const struct binheap *bh)
352
{
353
        unsigned u, v;
354
355
        for (u = 2; u < bh->next; u++) {
356
                v = parent(bh, u);
357
                AZ(bh->cmp(bh->priv, A(bh, u), A(bh, v)));
358
        }
359
}
360
#endif
361
362
void *
363 3117950
binheap_root(const struct binheap *bh)
364
{
365
366 3117950
        assert(bh != NULL);
367 3117950
        assert(bh->magic == BINHEAP_MAGIC);
368
#ifdef PARANOIA
369
        chk(bh);
370
#endif
371 3117950
        return (A(bh, ROOT_IDX));
372
}
373
374
/*
375
 * It may seem counter-intuitive that we delete by replacement with
376
 * the tail object. "That's almost certain to not belong there, in
377
 * particular when we delete the root ?" is the typical reaction.
378
 *
379
 * If we tried to trickle up into the empty position, we would,
380
 * eventually, end up with a hole in the bottom row, at which point
381
 * we would move the tail object there.
382
 * But there is no guarantee that the tail object would not need to
383
 * trickle up from that position, in fact, it might be the new root
384
 * of this half of the subtree.
385
 * The total number of operations is guaranteed to be at least
386
 * N{height} downward selections, because we have to get the hole
387
 * all the way down, but in addition to that, we may get up to
388
 * N{height}-1 upward trickles.
389
 *
390
 * When we fill the hole with the tail object, the worst case is
391
 * that it trickles all the way up to of this half-tree, or down
392
 * to become the tail object again.
393
 *
394
 * In other words worst case is N{height} up or downward trickles.
395
 * But there is a decent chance that it does not make it all the way.
396
 */
397
398
void
399 2551410
binheap_delete(struct binheap *bh, unsigned idx)
400
{
401
402 2551410
        assert(bh != NULL);
403 2551410
        assert(bh->magic == BINHEAP_MAGIC);
404 2551410
        assert(bh->next > ROOT_IDX);
405 2551410
        assert(idx < bh->next);
406 2551410
        assert(idx > 0);
407 2551410
        assert(A(bh, idx) != NULL);
408 2551410
        bh->update(bh->priv, A(bh, idx), BINHEAP_NOIDX);
409 2551411
        if (idx == --bh->next) {
410 4667
                A(bh, bh->next) = NULL;
411 2556078
                return;
412
        }
413 2546744
        A(bh, idx) = A(bh, bh->next);
414 2546744
        A(bh, bh->next) = NULL;
415 2546744
        binheap_update(bh, idx);
416 2546744
        idx = binheap_trickleup(bh, idx);
417 2546744
        assert(idx < bh->next);
418 2546744
        assert(idx > 0);
419 2546744
        assert(A(bh, idx) != NULL);
420 2546744
        idx = binheap_trickledown(bh, idx);
421 2546744
        assert(idx < bh->next);
422 2546744
        assert(idx > 0);
423 2546744
        assert(A(bh, idx) != NULL);
424
425
        /*
426
         * We keep a hysteresis of one full row before we start to
427
         * return space to the OS to avoid silly behaviour around
428
         * row boundaries.
429
         */
430 2546744
        if (bh->next + 2 * ROW_WIDTH <= bh->length) {
431 4
                free(ROW(bh, bh->length - 1));
432 4
                ROW(bh, bh->length - 1) = NULL;
433 4
                bh->length -= ROW_WIDTH;
434
        }
435
}
436
437
/*
438
 * Move an item up/down after changing its key value
439
 */
440
441
void
442 1487593
binheap_reorder(const struct binheap *bh, unsigned idx)
443
{
444
445 1487593
        assert(bh != NULL);
446 1487593
        assert(bh->magic == BINHEAP_MAGIC);
447 1487593
        assert(bh->next > ROOT_IDX);
448 1487593
        assert(idx < bh->next);
449 1487593
        assert(idx > 0);
450 1487593
        assert(A(bh, idx) != NULL);
451 1487593
        idx = binheap_trickleup(bh, idx);
452 1487593
        assert(idx < bh->next);
453 1487593
        assert(idx > 0);
454 1487593
        assert(A(bh, idx) != NULL);
455 1487593
        idx = binheap_trickledown(bh, idx);
456 1487593
        assert(idx < bh->next);
457 1487593
        assert(idx > 0);
458 1487593
        assert(A(bh, idx) != NULL);
459 1487593
}
460
461
#ifdef TEST_DRIVER
462
463
#include <stdio.h>
464
#include <string.h>
465
466
#include "vrnd.h"
467
#include "miniobj.h"
468
469
/* Test driver -------------------------------------------------------*/
470
471
struct foo {
472
        unsigned        magic;
473
#define FOO_MAGIC       0x23239823
474
        unsigned        idx;
475
        unsigned        key;
476
        unsigned        n;
477
};
478
479
#define M 500083        /* Number of operations */
480
#define N 131101        /* Number of items */
481
#define R -1            /* Random modulus */
482
483
struct foo *ff[N];
484
485
static int v_matchproto_(binheap_cmp_t)
486 112179502
cmp(void *priv, const void *a, const void *b)
487
{
488
        const struct foo *fa, *fb;
489
490
        (void)priv;
491 112179502
        CAST_OBJ_NOTNULL(fa, a, FOO_MAGIC);
492 112179502
        CAST_OBJ_NOTNULL(fb, b, FOO_MAGIC);
493 112179502
        return (fa->key < fb->key);
494
}
495
496
static void v_matchproto_(binheap_update_t)
497 119529820
update(void *priv, void *a, unsigned u)
498
{
499
        struct foo *fa;
500
501
        (void)priv;
502 119529820
        CAST_OBJ_NOTNULL(fa, a, FOO_MAGIC);
503 119529820
        fa->idx = u;
504 119529820
}
505
506
#ifdef CHECK2
507
static void
508
chk2(struct binheap *bh)
509
{
510
        unsigned u, v;
511
        struct foo *fa, *fb;
512
513
        for (u = 2; u < bh->next; u++) {
514
                v = parent(bh, u);
515
                fa = A(bh, u);
516
                fb = A(bh, v);
517
                assert(fa->key >= fb->key);
518
        }
519
}
520
#endif
521
522
int
523 2
main(void)
524
{
525
        struct binheap *bh;
526
        unsigned j, u, v, lr, n;
527
        struct foo *fp;
528
529 2
        VRND_SeedAll();
530 2
        VRND_SeedTestable(1);
531 2
        bh = binheap_new(NULL, cmp, update);
532 64
        for (n = 2; n; n += n) {
533 62
                child(bh, n - 1, &u, &v);
534 62
                child(bh, n, &u, &v);
535 62
                child(bh, n + 1, &u, &v);
536
        }
537
538 2
        lr = 0; /* unconfuse some compilers... */
539
540 6
        for (j = 0; j < 2; j++) {
541
                /* First insert our N elements */
542 524408
                for (u = 0; u < N; u++) {
543 524404
                        lr = VRND_RandomTestable() % R;
544 524404
                        ALLOC_OBJ(ff[u], FOO_MAGIC);
545 524404
                        assert(ff[u] != NULL);
546 524404
                        ff[u]->key = lr;
547 524404
                        ff[u]->n = u;
548 524404
                        binheap_insert(bh, ff[u]);
549
550 524404
                        fp = binheap_root(bh);
551 524404
                        assert(fp->idx == 1);
552 524404
                        assert(fp->key <= lr);
553
                }
554 4
                fprintf(stderr, "%d inserts OK\n", N);
555
                /* For M cycles, pick the root, insert new */
556 2000336
                for (u = 0; u < M; u++) {
557 2000332
                        fp = binheap_root(bh);
558 2000332
                        CHECK_OBJ_NOTNULL(fp, FOO_MAGIC);
559 2000332
                        assert(fp->idx == 1);
560
561
                        /*
562
                         * It cannot possibly be larger than the last
563
                         * value we added
564
                         */
565 2000332
                        assert(fp->key <= lr);
566 2000332
                        binheap_delete(bh, fp->idx);
567
568 2000332
                        n = fp->n;
569 2000332
                        ALLOC_OBJ(ff[n], FOO_MAGIC);
570 2000332
                        assert(ff[n] != NULL);
571 2000332
                        FREE_OBJ(fp);
572 2000332
                        fp = ff[n];
573 2000332
                        fp->n = n;
574
575 2000332
                        lr = VRND_RandomTestable() % R;
576 2000332
                        fp->key = lr;
577 2000332
                        binheap_insert(bh, fp);
578
                }
579 4
                fprintf(stderr, "%d replacements OK\n", M);
580
                /* The remove everything */
581 4
                lr = 0;
582 524408
                for (u = 0; u < N; u++) {
583 524404
                        fp = binheap_root(bh);
584 524404
                        CHECK_OBJ_NOTNULL(fp, FOO_MAGIC);
585 524404
                        assert(fp->idx == 1);
586 524404
                        assert(fp->key >= lr);
587 524404
                        lr = fp->key;
588 524404
                        binheap_delete(bh, fp->idx);
589 524404
                        ff[fp->n] = NULL;
590 524404
                        FREE_OBJ(fp);
591
                }
592 4
                fprintf(stderr, "%d removes OK\n", N);
593
594 2000336
                for (u = 0; u < M; u++) {
595 2000332
                        v = VRND_RandomTestable() % N;
596 2000332
                        if (ff[v] != NULL) {
597 1487430
                                CHECK_OBJ_NOTNULL(ff[v], FOO_MAGIC);
598 1487430
                                AN(ff[v]->idx);
599 1487430
                                if (ff[v]->key & 1) {
600 0
                                        binheap_delete(bh, ff[v]->idx);
601 0
                                        assert(ff[v]->idx == BINHEAP_NOIDX);
602 0
                                        FREE_OBJ(ff[v]);
603 0
                                        ff[v] = NULL;
604
                                } else {
605 1487430
                                        ff[v]->key = VRND_RandomTestable() % R;
606 1487430
                                        binheap_reorder(bh, ff[v]->idx);
607
                                }
608
                        } else {
609 512902
                                ALLOC_OBJ(ff[v], FOO_MAGIC);
610 512902
                                assert(ff[v] != NULL);
611 512902
                                ff[v]->key = VRND_RandomTestable() % R;
612 512902
                                binheap_insert(bh, ff[v]);
613 512902
                                CHECK_OBJ_NOTNULL(ff[v], FOO_MAGIC);
614 512902
                                AN(ff[v]->idx);
615
                        }
616
#ifdef CHECK2
617
                        chk2(bh);
618
#endif
619
                }
620 4
                fprintf(stderr, "%d updates OK\n", M);
621
        }
622 2
        return (0);
623
}
624
#endif