varnish-cache/bin/varnishd/http1/cache_http1_line.c
0
/*-
1
 * Copyright (c) 2006 Verdens Gang AS
2
 * Copyright (c) 2006-2011 Varnish Software AS
3
 * All rights reserved.
4
 *
5
 * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
6
 *
7
 * SPDX-License-Identifier: BSD-2-Clause
8
 *
9
 * Redistribution and use in source and binary forms, with or without
10
 * modification, are permitted provided that the following conditions
11
 * are met:
12
 * 1. Redistributions of source code must retain the above copyright
13
 *    notice, this list of conditions and the following disclaimer.
14
 * 2. Redistributions in binary form must reproduce the above copyright
15
 *    notice, this list of conditions and the following disclaimer in the
16
 *    documentation and/or other materials provided with the distribution.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
22
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28
 * SUCH DAMAGE.
29
 *
30
 * Write data to fd
31
 * We try to use writev() if possible in order to minimize number of
32
 * syscalls made and packets sent.  It also just might allow the worker
33
 * thread to complete the request without holding stuff locked.
34
 *
35
 * XXX: chunked header (generated in Flush) and Tail (EndChunk)
36
 *      are not accounted by means of the size_t returned. Obvious ideas:
37
 *      - add size_t return value to Flush and EndChunk
38
 *      - base accounting on (struct v1l).cnt
39
 */
40
41
#include "config.h"
42
43
#include <sys/uio.h>
44
#include "cache/cache_varnishd.h"
45
#include "cache/cache_filter.h"
46
47
#include <stdio.h>
48
49
#include "cache_http1.h"
50
#include "vtim.h"
51
52
/*--------------------------------------------------------------------*/
53
54
struct v1l {
55
        unsigned                magic;
56
#define V1L_MAGIC               0x2f2142e5
57
        int                     *wfd;
58
        stream_close_t          werr;   /* valid after V1L_Flush() */
59
        struct iovec            *iov;
60
        int                     siov;
61
        int                     niov;
62
        size_t                  liov;
63
        size_t                  cliov;
64
        int                     ciov;   /* Chunked header marker */
65
        vtim_real               deadline;
66
        struct vsl_log          *vsl;
67
        uint64_t                cnt;    /* Flushed byte count */
68
        struct ws               *ws;
69
        uintptr_t               ws_snap;
70
        void                    **vdp_priv;
71
};
72
73
/*--------------------------------------------------------------------
74
 * for niov == 0, reserve the ws for max number of iovs
75
 * otherwise, up to niov
76
 */
77
78
struct v1l *
79 5240
V1L_Open(struct ws *ws, int *fd, struct vsl_log *vsl,
80
    vtim_real deadline, unsigned niov)
81
{
82
        struct v1l *v1l;
83
        unsigned u;
84
        uintptr_t ws_snap;
85
        size_t sz;
86
87 5240
        if (WS_Overflowed(ws))
88 0
                return (NULL);
89
90 5240
        if (niov != 0)
91 3112
                assert(niov >= 3);
92
93 5240
        ws_snap = WS_Snapshot(ws);
94
95 5240
        v1l = WS_Alloc(ws, sizeof *v1l);
96 5240
        if (v1l == NULL)
97 1
                return (NULL);
98 5239
        INIT_OBJ(v1l, V1L_MAGIC);
99
100 5239
        v1l->ws = ws;
101 5239
        v1l->ws_snap = ws_snap;
102
103 5239
        u = WS_ReserveLumps(ws, sizeof(struct iovec));
104 5239
        if (u < 3) {
105
                /* Must have at least 3 in case of chunked encoding */
106 0
                WS_Release(ws, 0);
107 0
                WS_MarkOverflow(ws);
108 0
                return (NULL);
109
        }
110 5239
        if (u > IOV_MAX)
111 80
                u = IOV_MAX;
112 5239
        if (niov != 0 && u > niov)
113 3035
                u = niov;
114 5239
        v1l->iov = WS_Reservation(ws);
115 5239
        v1l->siov = (int)u;
116 5239
        v1l->ciov = (int)u;
117 5239
        v1l->wfd = fd;
118 5239
        v1l->deadline = deadline;
119 5239
        v1l->vsl = vsl;
120 5239
        v1l->werr = SC_NULL;
121
122 5239
        sz = u * sizeof(struct iovec);
123 5239
        assert(sz < UINT_MAX);
124 5239
        WS_Release(ws, (unsigned)sz);
125 5239
        return (v1l);
126 5240
}
127
128
void
129 80
V1L_NoRollback(struct v1l *v1l)
130
{
131
132 80
        CHECK_OBJ_NOTNULL(v1l, V1L_MAGIC);
133 80
        v1l->ws_snap = 0;
134 80
}
135
136
stream_close_t
137 5240
V1L_Close(struct v1l **v1lp, uint64_t *cnt)
138
{
139
        struct v1l *v1l;
140
        struct ws *ws;
141
        uintptr_t ws_snap;
142
        stream_close_t sc;
143
144 5240
        AN(cnt);
145 5240
        TAKE_OBJ_NOTNULL(v1l, v1lp, V1L_MAGIC);
146 5240
        if (v1l->vdp_priv != NULL) {
147 4118
                assert(*v1l->vdp_priv == v1l);
148 4118
                *v1l->vdp_priv = NULL;
149 4118
        }
150 5240
        sc = V1L_Flush(v1l);
151 5240
        *cnt = v1l->cnt;
152 5240
        ws = v1l->ws;
153 5240
        ws_snap = v1l->ws_snap;
154 5240
        ZERO_OBJ(v1l, sizeof *v1l);
155 5240
        if (ws_snap != 0)
156 5160
                WS_Rollback(ws, ws_snap);
157 5240
        return (sc);
158
}
159
160
static void
161 34
v1l_prune(struct v1l *v1l, ssize_t abytes)
162
{
163 34
        size_t used = 0;
164
        size_t sz, bytes, used_here;
165
        int j;
166
167 34
        assert(abytes > 0);
168 34
        bytes = (size_t)abytes;
169
170 85
        for (j = 0; j < v1l->niov; j++) {
171 85
                if (used + v1l->iov[j].iov_len > bytes) {
172
                        /* Cutoff is in this iov */
173 34
                        used_here = bytes - used;
174 34
                        v1l->iov[j].iov_len -= used_here;
175 34
                        v1l->iov[j].iov_base =
176 34
                            (char*)v1l->iov[j].iov_base + used_here;
177 34
                        sz = (unsigned)v1l->niov - (unsigned)j;
178 34
                        sz *= sizeof(struct iovec);
179 34
                        memmove(v1l->iov, &v1l->iov[j], sz);
180 34
                        v1l->niov -= j;
181 34
                        assert(v1l->liov >= bytes);
182 34
                        v1l->liov -= bytes;
183 34
                        return;
184
                }
185 51
                used += v1l->iov[j].iov_len;
186 51
        }
187 0
        AZ(v1l->liov);
188 34
}
189
190
stream_close_t
191 10389
V1L_Flush(struct v1l *v1l)
192
{
193
        ssize_t i;
194
        size_t sz;
195
        int err;
196
        char cbuf[32];
197
198 10389
        CHECK_OBJ_NOTNULL(v1l, V1L_MAGIC);
199 10389
        CHECK_OBJ_NOTNULL(v1l->werr, STREAM_CLOSE_MAGIC);
200 10389
        AN(v1l->wfd);
201
202 10389
        assert(v1l->niov <= v1l->siov);
203
204 10389
        if (*v1l->wfd >= 0 && v1l->liov > 0 && v1l->werr == SC_NULL) {
205 6961
                if (v1l->ciov < v1l->siov && v1l->cliov > 0) {
206
                        /* Add chunk head & tail */
207 1058
                        bprintf(cbuf, "00%zx\r\n", v1l->cliov);
208 1058
                        sz = strlen(cbuf);
209 1058
                        v1l->iov[v1l->ciov].iov_base = cbuf;
210 1058
                        v1l->iov[v1l->ciov].iov_len = sz;
211 1058
                        v1l->liov += sz;
212
213
                        /* This is OK, because siov was --'ed */
214 1058
                        v1l->iov[v1l->niov].iov_base = cbuf + sz - 2;
215 1058
                        v1l->iov[v1l->niov++].iov_len = 2;
216 1058
                        v1l->liov += 2;
217 6961
                } else if (v1l->ciov < v1l->siov) {
218 67
                        v1l->iov[v1l->ciov].iov_base = cbuf;
219 67
                        v1l->iov[v1l->ciov].iov_len = 0;
220 67
                }
221
222 6961
                i = 0;
223 6961
                err = 0;
224 6961
                do {
225 7021
                        if (VTIM_real() > v1l->deadline) {
226 8
                                VSLb(v1l->vsl, SLT_Debug,
227
                                    "Hit total send timeout, "
228
                                    "wrote = %zd/%zd; not retrying",
229 4
                                    i, v1l->liov);
230 4
                                i = -1;
231 4
                                break;
232
                        }
233
234 7017
                        i = writev(*v1l->wfd, v1l->iov, v1l->niov);
235 7017
                        if (i > 0) {
236 6983
                                v1l->cnt += (size_t)i;
237 6983
                                if ((size_t)i == v1l->liov)
238 6949
                                        break;
239 34
                        }
240
241
                        /* we hit a timeout, and some data may have been sent:
242
                         * Remove sent data from start of I/O vector, then retry
243
                         *
244
                         * XXX: Add a "minimum sent data per timeout counter to
245
                         * prevent slowloris attacks
246
                         */
247
248 68
                        err = errno;
249
250 68
                        if (err == EWOULDBLOCK) {
251 52
                                VSLb(v1l->vsl, SLT_Debug,
252
                                    "Hit idle send timeout, "
253
                                    "wrote = %zd/%zd; retrying",
254 26
                                    i, v1l->liov);
255 26
                        }
256
257 68
                        if (i > 0)
258 34
                                v1l_prune(v1l, i);
259 68
                } while (i > 0 || err == EWOULDBLOCK);
260
261 6961
                if (i <= 0) {
262 24
                        VSLb(v1l->vsl, SLT_Debug,
263
                            "Write error, retval = %zd, len = %zd, errno = %s",
264 12
                            i, v1l->liov, VAS_errtxt(err));
265 12
                        assert(v1l->werr == SC_NULL);
266 12
                        if (err == EPIPE)
267 8
                                v1l->werr = SC_REM_CLOSE;
268
                        else
269 4
                                v1l->werr = SC_TX_ERROR;
270 12
                        errno = err;
271 12
                }
272 6961
        }
273 10389
        v1l->liov = 0;
274 10389
        v1l->cliov = 0;
275 10389
        v1l->niov = 0;
276 10389
        if (v1l->ciov < v1l->siov)
277 2078
                v1l->ciov = v1l->niov++;
278 10389
        CHECK_OBJ_NOTNULL(v1l->werr, STREAM_CLOSE_MAGIC);
279 10389
        return (v1l->werr);
280
}
281
282
size_t
283 117631
V1L_Write(struct v1l *v1l, const void *ptr, ssize_t alen)
284
{
285 117631
        size_t len = 0;
286
287 117631
        CHECK_OBJ_NOTNULL(v1l, V1L_MAGIC);
288 117631
        AN(v1l->wfd);
289 117631
        if (alen == 0 || *v1l->wfd < 0)
290 20
                return (0);
291 117631
        if (alen > 0)
292 57977
                len = (size_t)alen;
293 59654
        else if (alen == -1)
294 59654
                len = strlen(ptr);
295
        else
296 0
                WRONG("alen");
297
298 117631
        assert(v1l->niov < v1l->siov);
299 117631
        v1l->iov[v1l->niov].iov_base = TRUST_ME(ptr);
300 117631
        v1l->iov[v1l->niov].iov_len = len;
301 117631
        v1l->liov += len;
302 117631
        v1l->niov++;
303 117631
        v1l->cliov += len;
304 117631
        if (v1l->niov >= v1l->siov) {
305 110
                (void)V1L_Flush(v1l);
306 110
                VSC_C_main->http1_iovs_flush++;
307 110
        }
308 117631
        return (len);
309 117631
}
310
311
void
312 285
V1L_Chunked(struct v1l *v1l)
313
{
314
315 285
        CHECK_OBJ_NOTNULL(v1l, V1L_MAGIC);
316
317 285
        assert(v1l->ciov == v1l->siov);
318 285
        assert(v1l->siov >= 3);
319
        /*
320
         * If there is no space for chunked header, a chunk of data and
321
         * a chunk tail, we might as well flush right away.
322
         */
323 285
        if (v1l->niov + 3 >= v1l->siov) {
324 0
                (void)V1L_Flush(v1l);
325 0
                VSC_C_main->http1_iovs_flush++;
326 0
        }
327 285
        v1l->siov--;
328 285
        v1l->ciov = v1l->niov++;
329 285
        v1l->cliov = 0;
330 285
        assert(v1l->ciov < v1l->siov);
331 285
        assert(v1l->niov < v1l->siov);
332 285
}
333
334
/*
335
 * XXX: It is not worth the complexity to attempt to get the
336
 * XXX: end of chunk into the V1L_Flush(), because most of the time
337
 * XXX: if not always, that is a no-op anyway, because the calling
338
 * XXX: code already called V1L_Flush() to release local storage.
339
 */
340
341
void
342 264
V1L_EndChunk(struct v1l *v1l)
343
{
344
345 264
        CHECK_OBJ_NOTNULL(v1l, V1L_MAGIC);
346
347 264
        assert(v1l->ciov < v1l->siov);
348 264
        (void)V1L_Flush(v1l);
349 264
        v1l->siov++;
350 264
        v1l->ciov = v1l->siov;
351 264
        v1l->niov = 0;
352 264
        v1l->cliov = 0;
353 264
        (void)V1L_Write(v1l, "0\r\n\r\n", -1);
354 264
}
355
356
/*--------------------------------------------------------------------
357
 * VDP using V1L
358
 */
359
360
/* remember priv pointer for V1L_Close() to clear */
361
static int v_matchproto_(vdp_init_f)
362 4069
v1l_init(VRT_CTX, struct vdp_ctx *vdc, void **priv)
363
{
364
        struct v1l *v1l;
365
366 4069
        (void) ctx;
367 4069
        (void) vdc;
368 4069
        AN(priv);
369 4069
        CAST_OBJ_NOTNULL(v1l, *priv, V1L_MAGIC);
370
371 4069
        v1l->vdp_priv = priv;
372 4069
        return (0);
373
}
374
375
static int v_matchproto_(vdp_bytes_f)
376 4965
v1l_bytes(struct vdp_ctx *vdc, enum vdp_action act, void **priv,
377
    const void *ptr, ssize_t len)
378
{
379 4965
        size_t wl = 0;
380
381 4965
        CHECK_OBJ_NOTNULL(vdc, VDP_CTX_MAGIC);
382 4965
        AN(priv);
383
384 4965
        AZ(vdc->nxt);           /* always at the bottom of the pile */
385
386 4965
        if (len > 0)
387 3609
                wl = V1L_Write(*priv, ptr, len);
388 4965
        if (act > VDP_NULL && V1L_Flush(*priv) != SC_NULL)
389 12
                return (-1);
390 4953
        if ((size_t)len != wl)
391 0
                return (-1);
392 4953
        return (0);
393 4965
}
394
395
/*--------------------------------------------------------------------
396
 * VDPIO using V1L
397
 *
398
 * this is deliverately half-baked to reduce work in progress while heading
399
 * towards VAI/VDPIO: we update the v1l with the scarab, which we
400
 * return unmodified.
401
 *
402
 */
403
404
/* remember priv pointer for V1L_Close() to clear */
405
static int v_matchproto_(vpio_init_f)
406 48
v1l_io_init(VRT_CTX, struct vdp_ctx *vdc, void **priv, int capacity)
407
{
408
        struct v1l *v1l;
409
410 48
        (void) ctx;
411 48
        (void) vdc;
412 48
        AN(priv);
413
414 48
        CAST_OBJ_NOTNULL(v1l, *priv, V1L_MAGIC);
415
416 48
        v1l->vdp_priv = priv;
417 48
        return (capacity);
418
}
419
420
static int v_matchproto_(vpio_init_f)
421 0
v1l_io_upgrade(VRT_CTX, struct vdp_ctx *vdc, void **priv, int capacity)
422
{
423 0
        return (v1l_io_init(ctx, vdc, priv, capacity));
424
}
425
426
/*
427
 * API note
428
 *
429
 * this VDP is special in that it does not transform data, but prepares
430
 * the write. From the perspective of VDPIO, its current state is only
431
 * transitional.
432
 *
433
 * Because the VDP prepares the actual writes, but the caller needs
434
 * to return the scarab's leases, the caller in this case is
435
 * required to empty the scarab after V1L_Flush()'ing.
436
 */
437
438
static int v_matchproto_(vdpio_lease_f)
439 100
v1l_io_lease(struct vdp_ctx *vdc, struct vdp_entry *this, struct vscarab *scarab)
440
{
441
        struct v1l *v1l;
442
        struct viov *v;
443
        int r;
444
445 100
        CHECK_OBJ_NOTNULL(vdc, VDP_CTX_MAGIC);
446 100
        CHECK_OBJ_NOTNULL(this, VDP_ENTRY_MAGIC);
447 100
        CAST_OBJ_NOTNULL(v1l, this->priv, V1L_MAGIC);
448 100
        VSCARAB_CHECK(scarab);
449 100
        AZ(scarab->used);       // see note above
450 100
        this->calls++;
451 100
        r = vdpio_pull(vdc, this, scarab);
452 100
        if (r < 0)
453 2
                return (r);
454 194
        VSCARAB_FOREACH(v, scarab)
455 96
                this->bytes_in += V1L_Write(v1l, v->iov.iov_base, v->iov.iov_len);
456 98
        return (r);
457 100
}
458
459
const struct vdp * const VDP_v1l = &(struct vdp){
460
        .name =         "V1B",
461
        .init =         v1l_init,
462
        .bytes =        v1l_bytes,
463
464
        .io_init =      v1l_io_init,
465
        .io_upgrade =   v1l_io_upgrade,
466
        .io_lease =     v1l_io_lease,
467
};