[experimental-ims] ace762d Merge with trunk as of 2011-11-22
Geoff Simmons
geoff at varnish-cache.org
Mon Jan 9 21:52:42 CET 2012
commit ace762d59b261f22d14849677e297e70e4f6b1d0
Merge: 71ee192 aed74d6
Author: Geoff Simmons <geoff at uplex.de>
Date: Fri Nov 25 11:17:52 2011 +0100
Merge with trunk as of 2011-11-22
diff --cc bin/varnishd/cache/cache.h
index 0000000,4b66309..4df2e3d
mode 000000,100644..100644
--- a/bin/varnishd/cache/cache.h
+++ b/bin/varnishd/cache/cache.h
@@@ -1,0 -1,1032 +1,1040 @@@
+ /*-
+ * Copyright (c) 2006 Verdens Gang AS
+ * Copyright (c) 2006-2011 Varnish Software AS
+ * All rights reserved.
+ *
+ * Author: Poul-Henning Kamp <phk at phk.freebsd.dk>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+ /*
+ * This macro can be used in .h files to isolate bits that the manager
+ * should not (need to) see, such as pthread mutexes etc.
+ */
+ #define VARNISH_CACHE_CHILD 1
+
+ #include "common/common.h"
+
+ #include "vapi/vsc_int.h"
+ #include "vapi/vsl_int.h"
+
+ #include <sys/socket.h>
+
+ #include <pthread.h>
+ #ifdef HAVE_PTHREAD_NP_H
+ #include <pthread_np.h>
+ #endif
+ #include <stdarg.h>
+ #include <string.h>
+ #include <limits.h>
+ #include <unistd.h>
+
+ #if defined(HAVE_EPOLL_CTL)
+ #include <sys/epoll.h>
+ #endif
+
+
+ #include "common/params.h"
+
+ enum body_status {
+ #define BODYSTATUS(U,l) BS_##U,
+ #include "tbl/body_status.h"
+ #undef BODYSTATUS
+ };
+
+ static inline const char *
+ body_status(enum body_status e)
+ {
+ switch(e) {
+ #define BODYSTATUS(U,l) case BS_##U: return (#l);
+ #include "tbl/body_status.h"
+ #undef BODYSTATUS
+ default:
+ return ("?");
+ }
+ }
+
+ /*
+ * NB: HDR_STATUS is only used in cache_http.c, everybody else uses the
+ * http->status integer field.
+ */
+
+ enum {
+ /* Fields from the first line of HTTP proto */
+ HTTP_HDR_REQ,
+ HTTP_HDR_URL,
+ HTTP_HDR_PROTO,
+ HTTP_HDR_STATUS,
+ HTTP_HDR_RESPONSE,
+ /* HTTP header lines */
+ HTTP_HDR_FIRST,
+ };
+
+ struct SHA256Context;
+ struct VSC_C_lck;
+ struct ban;
+ struct busyobj;
+ struct cli;
+ struct cli_proto;
+ struct director;
+ struct iovec;
+ struct objcore;
+ struct object;
+ struct objhead;
+ struct pool;
+ struct sess;
+ struct sesspool;
+ struct vbc;
+ struct vef_priv;
+ struct vrt_backend;
+ struct vsb;
+ struct waitinglist;
+ struct worker;
+
+ #define DIGEST_LEN 32
+
+ /*--------------------------------------------------------------------*/
+
+ typedef struct {
+ char *b;
+ char *e;
+ } txt;
+
+ /*--------------------------------------------------------------------*/
+
+ enum step {
+ #define STEP(l, u) STP_##u,
+ #include "tbl/steps.h"
+ #undef STEP
+ };
+
+ /*--------------------------------------------------------------------*/
+ struct lock { void *priv; }; // Opaque
+
+ /*--------------------------------------------------------------------
+ * Workspace structure for quick memory allocation.
+ */
+
+ struct ws {
+ unsigned magic;
+ #define WS_MAGIC 0x35fac554
+ unsigned overflow; /* workspace overflowed */
+ const char *id; /* identity */
+ char *s; /* (S)tart of buffer */
+ char *f; /* (F)ree pointer */
+ char *r; /* (R)eserved length */
+ char *e; /* (E)nd of buffer */
+ };
+
+ /*--------------------------------------------------------------------
+ * HTTP Request/Response/Header handling structure.
+ */
+
+ enum httpwhence {
+ HTTP_Rx = 1,
+ HTTP_Tx = 2,
+ HTTP_Obj = 3
+ };
+
+ /* NB: remember to update http_Copy() if you add fields */
+ struct http {
+ unsigned magic;
+ #define HTTP_MAGIC 0x6428b5c9
+
+ enum httpwhence logtag;
+
+ struct ws *ws;
+ txt *hd;
+ unsigned char *hdf;
+ #define HDF_FILTER (1 << 0) /* Filtered by Connection */
+ uint16_t shd; /* Size of hd space */
+ uint16_t nhd; /* Next free hd */
+ uint16_t status;
+ uint8_t protover;
+ uint8_t conds; /* If-* headers present */
+ };
+
+ /*--------------------------------------------------------------------
+ * HTTP Protocol connection structure
+ */
+
+ struct http_conn {
+ unsigned magic;
+ #define HTTP_CONN_MAGIC 0x3e19edd1
+
+ int fd;
+ unsigned vsl_id;
+ unsigned maxbytes;
+ unsigned maxhdr;
+ struct ws *ws;
+ txt rxbuf;
+ txt pipeline;
+ };
+
+ /*--------------------------------------------------------------------*/
+
+ struct acct {
+ double first;
+ #define ACCT(foo) uint64_t foo;
+ #include "tbl/acct_fields.h"
+ #undef ACCT
+ };
+
+ /*--------------------------------------------------------------------*/
+
+ #define L0(t, n)
+ #define L1(t, n) t n;
+ #define VSC_F(n, t, l, f, e,d) L##l(t, n)
+ #define VSC_DO_MAIN
+ struct dstat {
+ #include "tbl/vsc_fields.h"
+ };
+ #undef VSC_F
+ #undef VSC_DO_MAIN
+ #undef L0
+ #undef L1
+
+ /* Fetch processors --------------------------------------------------*/
+
+ typedef void vfp_begin_f(struct worker *, size_t );
+ typedef int vfp_bytes_f(struct worker *, struct http_conn *, ssize_t);
+ typedef int vfp_end_f(struct worker *);
+
+ struct vfp {
+ vfp_begin_f *begin;
+ vfp_bytes_f *bytes;
+ vfp_end_f *end;
+ };
+
+ extern struct vfp vfp_gunzip;
+ extern struct vfp vfp_gzip;
+ extern struct vfp vfp_testgzip;
+ extern struct vfp vfp_esi;
+
+ /*--------------------------------------------------------------------*/
+
+ struct exp {
+ double ttl;
+ double grace;
+ double keep;
+ double age;
+ double entered;
+ };
+
+ /*--------------------------------------------------------------------*/
+
+ struct wrw {
+ int *wfd;
+ unsigned werr; /* valid after WRW_Flush() */
+ struct iovec *iov;
+ unsigned siov;
+ unsigned niov;
+ ssize_t liov;
+ ssize_t cliov;
+ unsigned ciov; /* Chunked header marker */
+ };
+
+ /*--------------------------------------------------------------------*/
+
+ struct stream_ctx {
+ unsigned magic;
+ #define STREAM_CTX_MAGIC 0x8213728b
+
+ struct vgz *vgz;
+ void *obuf;
+ ssize_t obuf_len;
+ ssize_t obuf_ptr;
+
+ /* Next byte we will take from storage */
+ ssize_t stream_next;
+
+ /* First byte of storage if we free it as we go (pass) */
+ ssize_t stream_front;
+ };
+
+ /*--------------------------------------------------------------------*/
+
+ struct wrk_accept {
+ unsigned magic;
+ #define WRK_ACCEPT_MAGIC 0x8c4b4d59
+
+ /* Accept stuff */
+ struct sockaddr_storage acceptaddr;
+ socklen_t acceptaddrlen;
+ int acceptsock;
+ struct listen_sock *acceptlsock;
+ };
+
+ /*--------------------------------------------------------------------*/
+
+ struct worker {
+ unsigned magic;
+ #define WORKER_MAGIC 0x6391adcf
+ struct pool *pool;
+ struct objhead *nobjhead;
+ struct objcore *nobjcore;
+ struct waitinglist *nwaitinglist;
+ struct busyobj *nbusyobj;
+ void *nhashpriv;
+ struct dstat stats;
+
+ /* Pool stuff */
+ double lastused;
+
+ struct wrw wrw;
+
+ pthread_cond_t cond;
+
+ VTAILQ_ENTRY(worker) list;
+ struct sess *sp;
+
+ struct VCL_conf *vcl;
+
+ uint32_t *wlb, *wlp, *wle;
+ unsigned wlr;
+
+ /* Lookup stuff */
+ struct SHA256Context *sha256ctx;
+
+ struct http_conn htc[1];
+ struct ws ws[1];
+ struct http *bereq;
+ struct http *beresp;
+ struct http *resp;
+
+ struct exp exp;
+
+ /* This is only here so VRT can find it */
+ const char *storage_hint;
+
+ /* Fetch stuff */
+ struct vbc *vbc;
+ struct object *fetch_obj;
+ enum body_status body_status;
+ struct vfp *vfp;
+ struct vgz *vgz_rx;
+ struct vef_priv *vef_priv;
+ unsigned fetch_failed;
+ unsigned do_stream;
+ unsigned do_esi;
+ unsigned do_gzip;
+ unsigned is_gzip;
+ unsigned do_gunzip;
+ unsigned is_gunzip;
+ unsigned do_close;
+ char *h_content_length;
+
+ /* Stream state */
+ struct stream_ctx *sctx;
+
+ /* ESI stuff */
+ struct vep_state *vep;
+ int gzip_resp;
+ ssize_t l_crc;
+ uint32_t crc;
+
+ /* Timeouts */
+ double connect_timeout;
+ double first_byte_timeout;
+ double between_bytes_timeout;
+
+ /* Delivery mode */
+ unsigned res_mode;
+ #define RES_LEN (1<<1)
+ #define RES_EOF (1<<2)
+ #define RES_CHUNKED (1<<3)
+ #define RES_ESI (1<<4)
+ #define RES_ESI_CHILD (1<<5)
+ #define RES_GUNZIP (1<<6)
+
+ /* Temporary accounting */
+ struct acct acct_tmp;
+ };
+
+ /* LRU ---------------------------------------------------------------*/
+
+ struct lru {
+ unsigned magic;
+ #define LRU_MAGIC 0x3fec7bb0
+ VTAILQ_HEAD(,objcore) lru_head;
+ struct lock mtx;
+ };
+
+ /* Storage -----------------------------------------------------------*/
+
+ struct storage {
+ unsigned magic;
+ #define STORAGE_MAGIC 0x1a4e51c0
+
+ #ifdef SENDFILE_WORKS
+ int fd;
+ off_t where;
+ #endif
+
+ VTAILQ_ENTRY(storage) list;
+ struct stevedore *stevedore;
+ void *priv;
+
+ unsigned char *ptr;
+ unsigned len;
+ unsigned space;
+ };
+
+ /* Object core structure ---------------------------------------------
+ * Objects have sideways references in the binary heap and the LRU list
+ * and we want to avoid paging in a lot of objects just to move them up
+ * or down the binheap or to move a unrelated object on the LRU list.
+ * To avoid this we use a proxy object, objcore, to hold the relevant
+ * housekeeping fields parts of an object.
+ */
+
+ typedef struct object *getobj_f(struct worker *wrk, struct objcore *oc);
+ typedef void updatemeta_f(struct objcore *oc);
+ typedef void freeobj_f(struct objcore *oc);
+ typedef struct lru *getlru_f(const struct objcore *oc);
+
+ struct objcore_methods {
+ getobj_f *getobj;
+ updatemeta_f *updatemeta;
+ freeobj_f *freeobj;
+ getlru_f *getlru;
+ };
+
+ struct objcore {
+ unsigned magic;
+ #define OBJCORE_MAGIC 0x4d301302
+ unsigned refcnt;
+ struct objcore_methods *methods;
+ void *priv;
+ unsigned priv2;
+ struct objhead *objhead;
+ struct busyobj *busyobj;
+ double timer_when;
+ unsigned flags;
+ #define OC_F_BUSY (1<<1)
+ #define OC_F_PASS (1<<2)
+ #define OC_F_LRUDONTMOVE (1<<4)
+ #define OC_F_PRIV (1<<5) /* Stevedore private flag */
+ #define OC_F_LURK (3<<6) /* Ban-lurker-color */
+ unsigned timer_idx;
+ VTAILQ_ENTRY(objcore) list;
+ VTAILQ_ENTRY(objcore) lru_list;
+ VTAILQ_ENTRY(objcore) ban_list;
+ struct ban *ban;
+ };
+
+ static inline struct object *
+ oc_getobj(struct worker *wrk, struct objcore *oc)
+ {
+
+ CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ AZ(oc->flags & OC_F_BUSY);
+ AN(oc->methods);
+ AN(oc->methods->getobj);
+ return (oc->methods->getobj(wrk, oc));
+ }
+
+ static inline void
+ oc_updatemeta(struct objcore *oc)
+ {
+
+ CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ AN(oc->methods);
+ if (oc->methods->updatemeta != NULL)
+ oc->methods->updatemeta(oc);
+ }
+
+ static inline void
+ oc_freeobj(struct objcore *oc)
+ {
+
+ CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ AN(oc->methods);
+ AN(oc->methods->freeobj);
+ oc->methods->freeobj(oc);
+ }
+
+ static inline struct lru *
+ oc_getlru(const struct objcore *oc)
+ {
+
+ CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ AN(oc->methods);
+ AN(oc->methods->getlru);
+ return (oc->methods->getlru(oc));
+ }
+
+ /* Busy Object structure ---------------------------------------------*/
+
+ struct busyobj {
+ unsigned magic;
+ #define BUSYOBJ_MAGIC 0x23b95567
+ uint8_t *vary;
+ };
+
+ /* Object structure --------------------------------------------------*/
+
+ VTAILQ_HEAD(storagehead, storage);
+
+ struct object {
+ unsigned magic;
+ #define OBJECT_MAGIC 0x32851d42
+ unsigned xid;
+ struct storage *objstore;
+ struct objcore *objcore;
+
+ struct ws ws_o[1];
+
+ uint8_t *vary;
+ unsigned hits;
+ uint16_t response;
+
+ /* XXX: make bitmap */
+ uint8_t gziped;
+ /* Bit positions in the gzip stream */
+ ssize_t gzip_start;
+ ssize_t gzip_last;
+ ssize_t gzip_stop;
+
+ ssize_t len;
+
+ struct exp exp;
+
+ double last_modified;
+ double last_lru;
+
+ struct http *http;
+
+ struct storagehead store;
+
+ struct storage *esidata;
+
+ double last_use;
+
+ };
+
+ /* -------------------------------------------------------------------*/
+
+ struct sess {
+ unsigned magic;
+ #define SESS_MAGIC 0x2c2f9c5a
+ int fd;
+ unsigned vsl_id;
+ unsigned xid;
+
+ int restarts;
+ int esi_level;
+ int disable_esi;
+
+ uint8_t hash_ignore_busy;
+ uint8_t hash_always_miss;
+
+ struct worker *wrk;
+
+ socklen_t sockaddrlen;
+ socklen_t mysockaddrlen;
+ struct sockaddr_storage sockaddr;
+ struct sockaddr_storage mysockaddr;
+ struct listen_sock *mylsock;
+
+ /* formatted ascii client address */
+ char *addr;
+ char *port;
+ char *client_identity;
+
+ /* HTTP request */
+ const char *doclose;
+ struct http *http;
+ struct http *http0;
+
+ struct ws ws[1];
+ char *ws_ses; /* WS above session data */
+ char *ws_req; /* WS above request data */
+
+ unsigned char digest[DIGEST_LEN];
+
+ /* Built Vary string */
+ uint8_t *vary_b;
+ uint8_t *vary_l;
+ uint8_t *vary_e;
+
+ struct http_conn htc[1];
+
+ /* Timestamps, all on TIM_real() timescale */
+ double t_open;
+ double t_req;
+ double t_resp;
+ double t_end;
+
+ /* Acceptable grace period */
+ struct exp exp;
+
+ enum step step;
+ unsigned cur_method;
+ unsigned handling;
+ unsigned char sendbody;
+ unsigned char wantbody;
+ uint16_t err_code;
+ const char *err_reason;
+
+ VTAILQ_ENTRY(sess) list;
+
+ struct director *director;
+ struct object *obj;
+ struct objcore *objcore;
+ struct VCL_conf *vcl;
+
++ struct object *stale_obj;
+ /* The busy objhead we sleep on */
+ struct objhead *hash_objhead;
+
+ /* Various internal stuff */
+ struct sessmem *mem;
+
+ VTAILQ_ENTRY(sess) poollist;
+ uint64_t req_bodybytes;
+ struct acct acct_ses;
+
+ #if defined(HAVE_EPOLL_CTL)
+ struct epoll_event ev;
+ #endif
+ };
+
+ /* Prototypes etc ----------------------------------------------------*/
+
+ /* cache_acceptor.c */
+ void VCA_Prep(struct sess *sp);
+ void VCA_Init(void);
+ void VCA_Shutdown(void);
+ int VCA_Accept(struct listen_sock *ls, struct wrk_accept *wa);
+ void VCA_SetupSess(struct worker *w);
+ void VCA_FailSess(struct worker *w);
+
+ /* cache_backend.c */
+ void VBE_UseHealth(const struct director *vdi);
+
+ struct vbc *VDI_GetFd(const struct director *, struct sess *sp);
+ int VDI_Healthy(const struct director *, const struct sess *sp);
+ void VDI_CloseFd(struct worker *wrk);
+ void VDI_RecycleFd(struct worker *wrk);
+ void VDI_AddHostHeader(const struct sess *sp);
+ void VBE_Poll(void);
+
+ /* cache_backend_cfg.c */
+ void VBE_Init(void);
+ struct backend *VBE_AddBackend(struct cli *cli, const struct vrt_backend *vb);
+
+ /* cache_backend_poll.c */
+ void VBP_Init(void);
+
+ /* cache_ban.c */
+ struct ban *BAN_New(void);
+ int BAN_AddTest(struct cli *, struct ban *, const char *, const char *,
+ const char *);
+ void BAN_Free(struct ban *b);
+ void BAN_Insert(struct ban *b);
+ void BAN_Init(void);
+ void BAN_NewObjCore(struct objcore *oc);
+ void BAN_DestroyObj(struct objcore *oc);
+ int BAN_CheckObject(struct object *o, const struct sess *sp);
+ void BAN_Reload(const uint8_t *ban, unsigned len);
+ struct ban *BAN_TailRef(void);
+ void BAN_Compile(void);
+ struct ban *BAN_RefBan(struct objcore *oc, double t0, const struct ban *tail);
+ void BAN_TailDeref(struct ban **ban);
+ double BAN_Time(const struct ban *ban);
+
+ /* cache_center.c [CNT] */
+ void CNT_Session(struct sess *sp);
+ void CNT_Init(void);
+
+ /* cache_cli.c [CLI] */
+ void CLI_Init(void);
+ void CLI_Run(void);
+ void CLI_AddFuncs(struct cli_proto *p);
+ extern pthread_t cli_thread;
+ #define ASSERT_CLI() do {assert(pthread_self() == cli_thread);} while (0)
+
+ /* cache_expiry.c */
+ void EXP_Clr(struct exp *e);
+ double EXP_Get_ttl(const struct exp *e);
+ double EXP_Get_grace(const struct exp *e);
+ double EXP_Get_keep(const struct exp *e);
+ void EXP_Set_ttl(struct exp *e, double v);
+ void EXP_Set_grace(struct exp *e, double v);
+ void EXP_Set_keep(struct exp *e, double v);
+
+ double EXP_Ttl(const struct sess *, const struct object*);
+ double EXP_Grace(const struct sess *, const struct object*);
++double EXP_Keep(const struct sess *, const struct object*);
+ void EXP_Insert(struct object *o);
+ void EXP_Inject(struct objcore *oc, struct lru *lru, double when);
+ void EXP_Init(void);
+ void EXP_Rearm(const struct object *o);
+ int EXP_Touch(struct objcore *oc);
+ int EXP_NukeOne(struct worker *w, struct lru *lru);
+
+ /* cache_fetch.c */
+ struct storage *FetchStorage(struct worker *w, ssize_t sz);
+ int FetchError(struct worker *w, const char *error);
+ int FetchError2(struct worker *w, const char *error, const char *more);
+ int FetchHdr(struct sess *sp);
+ int FetchBody(struct worker *w, struct object *obj);
+ int FetchReqBody(struct sess *sp);
+ void Fetch_Init(void);
+
+ /* cache_gzip.c */
+ struct vgz;
+
+ enum vgz_flag { VGZ_NORMAL, VGZ_ALIGN, VGZ_RESET, VGZ_FINISH };
+ struct vgz *VGZ_NewUngzip(struct worker *wrk, const char *id);
+ struct vgz *VGZ_NewGzip(struct worker *wrk, const char *id);
+ void VGZ_Ibuf(struct vgz *, const void *, ssize_t len);
+ int VGZ_IbufEmpty(const struct vgz *vg);
+ void VGZ_Obuf(struct vgz *, void *, ssize_t len);
+ int VGZ_ObufFull(const struct vgz *vg);
+ int VGZ_ObufStorage(struct worker *w, struct vgz *vg);
+ int VGZ_Gzip(struct vgz *, const void **, size_t *len, enum vgz_flag);
+ int VGZ_Gunzip(struct vgz *, const void **, size_t *len);
+ int VGZ_Destroy(struct vgz **, int vsl_id);
+ void VGZ_UpdateObj(const struct vgz*, struct object *);
+ int VGZ_WrwGunzip(struct worker *w, struct vgz *, const void *ibuf,
+ ssize_t ibufl, char *obuf, ssize_t obufl, ssize_t *obufp);
+
+ /* Return values */
+ #define VGZ_ERROR -1
+ #define VGZ_OK 0
+ #define VGZ_END 1
+ #define VGZ_STUCK 2
+
+ /* cache_http.c */
+ unsigned HTTP_estimate(unsigned nhttp);
+ void HTTP_Copy(struct http *to, const struct http * const fm);
+ struct http *HTTP_create(void *p, uint16_t nhttp);
+ const char *http_StatusMessage(unsigned);
+ unsigned http_EstimateWS(const struct http *fm, unsigned how, uint16_t *nhd);
+ void HTTP_Init(void);
+ void http_ClrHeader(struct http *to);
+ unsigned http_Write(struct worker *w, unsigned vsl_id, const struct http *hp,
+ int resp);
+ void http_CopyResp(struct http *to, const struct http *fm);
+ void http_SetResp(struct http *to, const char *proto, uint16_t status,
+ const char *response);
+ void http_FilterFields(struct worker *w, unsigned vsl_id, struct http *to,
+ const struct http *fm, unsigned how);
+ void http_FilterHeader(const struct sess *sp, unsigned how);
++
++/* Check if a refresh should be done */
++void http_CheckRefresh(struct sess *sp);
++/* Check if we got 304 response */
++void http_Check304(struct sess *sp);
++
+ void http_PutProtocol(struct worker *w, unsigned vsl_id, const struct http *to,
+ const char *protocol);
+ void http_PutStatus(struct http *to, uint16_t status);
+ void http_PutResponse(struct worker *w, unsigned vsl_id, const struct http *to,
+ const char *response);
+ void http_PrintfHeader(struct worker *w, unsigned vsl_id, struct http *to,
+ const char *fmt, ...);
+ void http_SetHeader(struct worker *w, unsigned vsl_id, struct http *to,
+ const char *hdr);
+ void http_SetH(const struct http *to, unsigned n, const char *fm);
+ void http_ForceGet(const struct http *to);
+ void http_Setup(struct http *ht, struct ws *ws);
+ int http_GetHdr(const struct http *hp, const char *hdr, char **ptr);
+ int http_GetHdrData(const struct http *hp, const char *hdr,
+ const char *field, char **ptr);
+ int http_GetHdrField(const struct http *hp, const char *hdr,
+ const char *field, char **ptr);
+ double http_GetHdrQ(const struct http *hp, const char *hdr, const char *field);
+ uint16_t http_GetStatus(const struct http *hp);
+ const char *http_GetReq(const struct http *hp);
+ int http_HdrIs(const struct http *hp, const char *hdr, const char *val);
+ uint16_t http_DissectRequest(struct sess *sp);
+ uint16_t http_DissectResponse(struct worker *w, const struct http_conn *htc,
+ struct http *sp);
+ const char *http_DoConnection(const struct http *hp);
+ void http_CopyHome(struct worker *w, unsigned vsl_id, const struct http *hp);
+ void http_Unset(struct http *hp, const char *hdr);
+ void http_CollectHdr(struct http *hp, const char *hdr);
+
+ /* cache_httpconn.c */
+ void HTC_Init(struct http_conn *htc, struct ws *ws, int fd, unsigned vsl_id,
+ unsigned maxbytes, unsigned maxhdr);
+ int HTC_Reinit(struct http_conn *htc);
+ int HTC_Rx(struct http_conn *htc);
+ ssize_t HTC_Read(struct worker *w, struct http_conn *htc, void *d, size_t len);
+ int HTC_Complete(struct http_conn *htc);
+
+ #define HTTPH(a, b, c, d, e, f, g) extern char b[];
+ #include "tbl/http_headers.h"
+ #undef HTTPH
+
+ /* cache_main.c */
+ extern volatile struct params * cache_param;
+ void THR_SetName(const char *name);
+ const char* THR_GetName(void);
+ void THR_SetSession(const struct sess *sp);
+ const struct sess * THR_GetSession(void);
+
+ /* cache_lck.c */
+
+ /* Internal functions, call only through macros below */
+ void Lck__Lock(struct lock *lck, const char *p, const char *f, int l);
+ void Lck__Unlock(struct lock *lck, const char *p, const char *f, int l);
+ int Lck__Trylock(struct lock *lck, const char *p, const char *f, int l);
+ void Lck__New(struct lock *lck, struct VSC_C_lck *, const char *);
+ void Lck__Assert(const struct lock *lck, int held);
+
+ /* public interface: */
+ void LCK_Init(void);
+ void Lck_Delete(struct lock *lck);
+ int Lck_CondWait(pthread_cond_t *cond, struct lock *lck, struct timespec *ts);
+
+ #define Lck_New(a, b) Lck__New(a, b, #b)
+ #define Lck_Lock(a) Lck__Lock(a, __func__, __FILE__, __LINE__)
+ #define Lck_Unlock(a) Lck__Unlock(a, __func__, __FILE__, __LINE__)
+ #define Lck_Trylock(a) Lck__Trylock(a, __func__, __FILE__, __LINE__)
+ #define Lck_AssertHeld(a) Lck__Assert(a, 1)
+
+ #define LOCK(nam) extern struct VSC_C_lck *lck_##nam;
+ #include "tbl/locks.h"
+ #undef LOCK
+
+ /* cache_panic.c */
+ void PAN_Init(void);
+
+ /* cache_pipe.c */
+ void PipeSession(struct sess *sp);
+
+ /* cache_pool.c */
+ void Pool_Init(void);
+ void Pool_Work_Thread(void *priv, struct worker *w);
+ void Pool_Wait(struct sess *sp);
+ int Pool_Schedule(struct pool *pp, struct sess *sp);
+
+ #define WRW_IsReleased(w) ((w)->wrw.wfd == NULL)
+ int WRW_Error(const struct worker *w);
+ void WRW_Chunked(struct worker *w);
+ void WRW_EndChunk(struct worker *w);
+ void WRW_Reserve(struct worker *w, int *fd);
+ unsigned WRW_Flush(struct worker *w);
+ unsigned WRW_FlushRelease(struct worker *w);
+ unsigned WRW_Write(struct worker *w, const void *ptr, int len);
+ unsigned WRW_WriteH(struct worker *w, const txt *hh, const char *suf);
+ #ifdef SENDFILE_WORKS
+ void WRW_Sendfile(struct worker *w, int fd, off_t off, unsigned len);
+ #endif /* SENDFILE_WORKS */
+
+ /* cache_session.c [SES] */
+ struct sess *SES_New(struct worker *wrk, struct sesspool *pp);
+ struct sess *SES_Alloc(void);
+ void SES_Close(struct sess *sp, const char *reason);
+ void SES_Delete(struct sess *sp, const char *reason);
+ void SES_Charge(struct sess *sp);
+ struct sesspool *SES_NewPool(struct pool *pp);
+ void SES_DeletePool(struct sesspool *sp, struct worker *wrk);
+ int SES_Schedule(struct sess *sp);
+
+
+ /* cache_shmlog.c */
+ extern struct VSC_C_main *VSC_C_main;
+ void VSL_Init(void);
+ void *VSM_Alloc(unsigned size, const char *class, const char *type,
+ const char *ident);
+ void VSM_Free(void *ptr);
+ #ifdef VSL_ENDMARKER
+ void VSL(enum VSL_tag_e tag, int id, const char *fmt, ...);
+ void WSLR(struct worker *w, enum VSL_tag_e tag, int id, txt t);
+ void WSL(struct worker *w, enum VSL_tag_e tag, int id, const char *fmt, ...);
+ void WSLB(struct worker *w, enum VSL_tag_e tag, const char *fmt, ...);
+
+ void WSL_Flush(struct worker *w, int overflow);
+
+ #define DSL(flag, tag, id, ...) \
+ do { \
+ if (cache_param->diag_bitmap & (flag)) \
+ VSL((tag), (id), __VA_ARGS__); \
+ } while (0)
+
+ #define WSP(sess, tag, ...) \
+ WSL((sess)->wrk, tag, (sess)->vsl_id, __VA_ARGS__)
+
+ #define WSPR(sess, tag, txt) \
+ WSLR((sess)->wrk, tag, (sess)->vsl_id, txt)
+
+ #define INCOMPL() do { \
+ VSL(SLT_Debug, 0, "INCOMPLETE AT: %s(%d)", __func__, __LINE__); \
+ fprintf(stderr, \
+ "INCOMPLETE AT: %s(%d)\n", \
+ (const char *)__func__, __LINE__); \
+ abort(); \
+ } while (0)
+ #endif
+
+ /* cache_response.c */
+ void RES_BuildHttp(const struct sess *sp);
+ void RES_WriteObj(struct sess *sp);
+ void RES_StreamStart(struct sess *sp);
+ void RES_StreamEnd(struct sess *sp);
+ void RES_StreamPoll(struct worker *);
+
+ /* cache_vary.c */
+ struct vsb *VRY_Create(const struct sess *sp, const struct http *hp);
+ int VRY_Match(struct sess *sp, const uint8_t *vary);
+ void VRY_Validate(const uint8_t *vary);
+
+ /* cache_vcl.c */
+ void VCL_Init(void);
+ void VCL_Refresh(struct VCL_conf **vcc);
+ void VCL_Rel(struct VCL_conf **vcc);
+ void VCL_Poll(void);
+ const char *VCL_Return_Name(unsigned method);
+
+ #define VCL_MET_MAC(l,u,b) void VCL_##l##_method(struct sess *);
+ #include "tbl/vcl_returns.h"
+ #undef VCL_MET_MAC
+
+ /* cache_vrt.c */
+
+ char *VRT_String(struct ws *ws, const char *h, const char *p, va_list ap);
+ char *VRT_StringList(char *d, unsigned dl, const char *p, va_list ap);
+
+ void ESI_Deliver(struct sess *);
+ void ESI_DeliverChild(const struct sess *);
+
+ /* cache_vrt_vmod.c */
+ void VMOD_Init(void);
+
+ /* cache_wrk.c */
+
+ void WRK_Init(void);
+ int WRK_TrySumStat(struct worker *w);
+ void WRK_SumStat(struct worker *w);
+ void *WRK_thread(void *priv);
+ typedef void *bgthread_t(struct sess *, void *priv);
+ void WRK_BgThread(pthread_t *thr, const char *name, bgthread_t *func,
+ void *priv);
+
+ /* cache_ws.c */
+
+ void WS_Init(struct ws *ws, const char *id, void *space, unsigned len);
+ unsigned WS_Reserve(struct ws *ws, unsigned bytes);
+ void WS_Release(struct ws *ws, unsigned bytes);
+ void WS_ReleaseP(struct ws *ws, char *ptr);
+ void WS_Assert(const struct ws *ws);
+ void WS_Reset(struct ws *ws, char *p);
+ char *WS_Alloc(struct ws *ws, unsigned bytes);
+ char *WS_Dup(struct ws *ws, const char *);
+ char *WS_Snapshot(struct ws *ws);
+ unsigned WS_Free(const struct ws *ws);
+
+ /* rfc2616.c */
+ void RFC2616_Ttl(const struct sess *sp);
+ enum body_status RFC2616_Body(const struct sess *sp);
+ unsigned RFC2616_Req_Gzip(const struct sess *sp);
+ int RFC2616_Do_Cond(const struct sess *sp);
+
+ /* stevedore.c */
+ struct object *STV_NewObject(struct sess *sp, const char *hint, unsigned len,
+ struct exp *, uint16_t nhttp);
+ struct storage *STV_alloc(struct worker *w, size_t size);
+ void STV_trim(struct storage *st, size_t size);
+ void STV_free(struct storage *st);
+ void STV_open(void);
+ void STV_close(void);
+ void STV_Freestore(struct object *o);
+
+ /* storage_synth.c */
+ struct vsb *SMS_Makesynth(struct object *obj);
+ void SMS_Finish(struct object *obj);
+ void SMS_Init(void);
+
+ /* storage_persistent.c */
+ void SMP_Init(void);
+ void SMP_Ready(void);
+ void SMP_NewBan(const uint8_t *ban, unsigned len);
+
+ /*
+ * A normal pointer difference is signed, but we never want a negative value
+ * so this little tool will make sure we don't get that.
+ */
+
+ static inline unsigned
+ pdiff(const void *b, const void *e)
+ {
+
+ assert(b <= e);
+ return
+ ((unsigned)((const unsigned char *)e - (const unsigned char *)b));
+ }
+
+ static inline void
+ Tcheck(const txt t)
+ {
+
+ AN(t.b);
+ AN(t.e);
+ assert(t.b <= t.e);
+ }
+
+ /*
+ * unsigned length of a txt
+ */
+
+ static inline unsigned
+ Tlen(const txt t)
+ {
+
+ Tcheck(t);
+ return ((unsigned)(t.e - t.b));
+ }
+
+ static inline void
+ Tadd(txt *t, const char *p, int l)
+ {
+ Tcheck(*t);
+
+ if (l <= 0) {
+ } if (t->b + l < t->e) {
+ memcpy(t->b, p, l);
+ t->b += l;
+ } else {
+ t->b = t->e;
+ }
+ }
+
+ static inline void
+ AssertObjBusy(const struct object *o)
+ {
+ AN(o->objcore);
+ AN (o->objcore->flags & OC_F_BUSY);
+ }
+
+ static inline void
+ AssertObjCorePassOrBusy(const struct objcore *oc)
+ {
+ if (oc != NULL)
+ AN (oc->flags & OC_F_BUSY);
+ }
diff --cc bin/varnishd/cache/cache_center.c
index 0000000,e42fac8..7b8dc8a
mode 000000,100644..100644
--- a/bin/varnishd/cache/cache_center.c
+++ b/bin/varnishd/cache/cache_center.c
@@@ -1,0 -1,1691 +1,1724 @@@
+ /*-
+ * Copyright (c) 2006 Verdens Gang AS
+ * Copyright (c) 2006-2011 Varnish Software AS
+ * All rights reserved.
+ *
+ * Author: Poul-Henning Kamp <phk at phk.freebsd.dk>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * This file contains the central state machine for pushing requests.
+ *
+ * We cannot just use direct calls because it is possible to kick a
+ * request back to the lookup stage (usually after a rewrite). The
+ * state engine also allows us to break the processing up into some
+ * logical chunks which improves readability a little bit.
+ *
+ * Since the states are rather nasty in detail, I have decided to embedd
+ * a dot(1) graph in the source code comments. So to see the big picture,
+ * extract the DOT lines and run though dot(1), for instance with the
+ * command:
+ * sed -n '/^DOT/s///p' cache_center.c | dot -Tps > /tmp/_.ps
+ */
+
+ /*
+ DOT digraph vcl_center {
+ xDOT page="8.2,11.5"
+ DOT size="7.2,10.5"
+ DOT margin="0.5"
+ DOT center="1"
+ DOT acceptor [
+ DOT shape=hexagon
+ DOT label="Request received"
+ DOT ]
+ DOT ERROR [shape=plaintext]
+ DOT RESTART [shape=plaintext]
+ DOT acceptor -> start [style=bold,color=green]
+ */
+
+ #include "config.h"
+
+ #include <math.h>
+ #include <poll.h>
+ #include <stdio.h>
+ #include <stdlib.h>
+
+ #include "cache.h"
+
+ #include "hash/hash_slinger.h"
+ #include "vcl.h"
+ #include "vcli_priv.h"
+ #include "vsha256.h"
+ #include "vtcp.h"
+ #include "vtim.h"
+
+ #ifndef HAVE_SRANDOMDEV
+ #include "compat/srandomdev.h"
+ #endif
+
+ static unsigned xids;
+
+ /*--------------------------------------------------------------------
+ * WAIT
+ * Wait (briefly) until we have a full request in our htc.
+ */
+
+ static int
+ cnt_wait(struct sess *sp)
+ {
+ int i;
+ struct pollfd pfd[1];
+
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ AZ(sp->vcl);
+ AZ(sp->obj);
+ assert(sp->xid == 0);
+
+ i = HTC_Complete(sp->htc);
+ if (i == 0 && cache_param->session_linger > 0) {
+ pfd[0].fd = sp->fd;
+ pfd[0].events = POLLIN;
+ pfd[0].revents = 0;
+ i = poll(pfd, 1, cache_param->session_linger);
+ if (i)
+ i = HTC_Rx(sp->htc);
+ }
+ if (i == 0) {
+ WSP(sp, SLT_Debug, "herding");
+ sp->wrk->stats.sess_herd++;
+ SES_Charge(sp);
+ sp->wrk = NULL;
+ Pool_Wait(sp);
+ return (1);
+ }
+ if (i == 1) {
+ sp->step = STP_START;
+ return (0);
+ }
+ if (i == -2) {
+ SES_Close(sp, "overflow");
+ return (0);
+ }
+ if (i == -1 && Tlen(sp->htc->rxbuf) == 0 &&
+ (errno == 0 || errno == ECONNRESET))
+ SES_Close(sp, "EOF");
+ else
+ SES_Close(sp, "error");
+ sp->step = STP_DONE;
+ return (0);
+ }
+
+ /*--------------------------------------------------------------------
+ * We have a refcounted object on the session, now deliver it.
+ *
+ DOT subgraph xcluster_prepresp {
+ DOT prepresp [
+ DOT shape=ellipse
+ DOT label="Filter obj.->resp."
+ DOT ]
+ DOT vcl_deliver [
+ DOT shape=record
+ DOT label="vcl_deliver()|resp."
+ DOT ]
+ DOT prepresp -> vcl_deliver [style=bold,color=green]
+ DOT prepresp -> vcl_deliver [style=bold,color=cyan]
+ DOT prepresp -> vcl_deliver [style=bold,color=red]
+ DOT prepresp -> vcl_deliver [style=bold,color=blue,]
+ DOT vcl_deliver -> deliver [style=bold,color=green,label=deliver]
+ DOT vcl_deliver -> deliver [style=bold,color=red]
+ DOT vcl_deliver -> deliver [style=bold,color=blue]
+ DOT vcl_deliver -> errdeliver [label="error"]
+ DOT errdeliver [label="ERROR",shape=plaintext]
+ DOT vcl_deliver -> rstdeliver [label="restart",color=purple]
+ DOT rstdeliver [label="RESTART",shape=plaintext]
+ DOT vcl_deliver -> streambody [style=bold,color=cyan,label="deliver"]
+ DOT }
+ *
+ */
+
+ static int
+ cnt_prepresp(struct sess *sp)
+ {
+
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ CHECK_OBJ_NOTNULL(sp->obj, OBJECT_MAGIC);
+ CHECK_OBJ_NOTNULL(sp->vcl, VCL_CONF_MAGIC);
+
+ if (sp->wrk->do_stream)
+ AssertObjCorePassOrBusy(sp->obj->objcore);
+
+ sp->wrk->res_mode = 0;
+
+ if ((sp->wrk->h_content_length != NULL || !sp->wrk->do_stream) &&
+ !sp->wrk->do_gzip && !sp->wrk->do_gunzip)
+ sp->wrk->res_mode |= RES_LEN;
+
+ if (!sp->disable_esi && sp->obj->esidata != NULL) {
+ /* In ESI mode, we don't know the aggregate length */
+ sp->wrk->res_mode &= ~RES_LEN;
+ sp->wrk->res_mode |= RES_ESI;
+ }
+
+ if (sp->esi_level > 0) {
+ sp->wrk->res_mode &= ~RES_LEN;
+ sp->wrk->res_mode |= RES_ESI_CHILD;
+ }
+
+ if (cache_param->http_gzip_support && sp->obj->gziped &&
+ !RFC2616_Req_Gzip(sp)) {
+ /*
+ * We don't know what it uncompresses to
+ * XXX: we could cache that
+ */
+ sp->wrk->res_mode &= ~RES_LEN;
+ sp->wrk->res_mode |= RES_GUNZIP;
+ }
+
+ if (!(sp->wrk->res_mode & (RES_LEN|RES_CHUNKED|RES_EOF))) {
+ if (sp->obj->len == 0 && !sp->wrk->do_stream)
+ /*
+ * If the object is empty, neither ESI nor GUNZIP
+ * can make it any different size
+ */
+ sp->wrk->res_mode |= RES_LEN;
+ else if (!sp->wantbody) {
+ /* Nothing */
+ } else if (sp->http->protover >= 11) {
+ sp->wrk->res_mode |= RES_CHUNKED;
+ } else {
+ sp->wrk->res_mode |= RES_EOF;
+ sp->doclose = "EOF mode";
+ }
+ }
+
+ sp->t_resp = VTIM_real();
+ if (sp->obj->objcore != NULL) {
+ if ((sp->t_resp - sp->obj->last_lru) > cache_param->lru_timeout &&
+ EXP_Touch(sp->obj->objcore))
+ sp->obj->last_lru = sp->t_resp;
+ sp->obj->last_use = sp->t_resp; /* XXX: locking ? */
+ }
+ http_Setup(sp->wrk->resp, sp->wrk->ws);
+ RES_BuildHttp(sp);
+ VCL_deliver_method(sp);
+ switch (sp->handling) {
+ case VCL_RET_DELIVER:
+ break;
+ case VCL_RET_RESTART:
+ if (sp->restarts >= cache_param->max_restarts)
+ break;
+ if (sp->wrk->do_stream) {
+ VDI_CloseFd(sp->wrk);
+ HSH_Drop(sp);
+ } else {
+ (void)HSH_Deref(sp->wrk, NULL, &sp->obj);
+ }
+ AZ(sp->obj);
+ sp->restarts++;
+ sp->director = NULL;
+ sp->wrk->h_content_length = NULL;
+ http_Setup(sp->wrk->bereq, NULL);
+ http_Setup(sp->wrk->beresp, NULL);
+ http_Setup(sp->wrk->resp, NULL);
+ sp->step = STP_RECV;
+ return (0);
+ default:
+ WRONG("Illegal action in vcl_deliver{}");
+ }
+ if (sp->wrk->do_stream) {
+ AssertObjCorePassOrBusy(sp->obj->objcore);
+ sp->step = STP_STREAMBODY;
+ } else {
+ sp->step = STP_DELIVER;
+ }
+ return (0);
+ }
+
+ /*--------------------------------------------------------------------
+ * Deliver an already stored object
+ *
+ DOT subgraph xcluster_deliver {
+ DOT deliver [
+ DOT shape=ellipse
+ DOT label="Send body"
+ DOT ]
+ DOT }
+ DOT deliver -> DONE [style=bold,color=green]
+ DOT deliver -> DONE [style=bold,color=red]
+ DOT deliver -> DONE [style=bold,color=blue]
+ *
+ */
+
+ static int
+ cnt_deliver(struct sess *sp)
+ {
+
+ sp->director = NULL;
+ sp->restarts = 0;
+
+ RES_WriteObj(sp);
+
+ assert(WRW_IsReleased(sp->wrk));
+ assert(sp->wrk->wrw.ciov == sp->wrk->wrw.siov);
+ (void)HSH_Deref(sp->wrk, NULL, &sp->obj);
+ http_Setup(sp->wrk->resp, NULL);
+ sp->step = STP_DONE;
+ return (0);
+ }
+
+ /*--------------------------------------------------------------------
+ * This is the final state, figure out if we should close or recycle
+ * the client connection
+ *
+ DOT DONE [
+ DOT shape=hexagon
+ DOT label="Request completed"
+ DOT ]
+ */
+
+ static int
+ cnt_done(struct sess *sp)
+ {
+ double dh, dp, da;
+ int i;
+
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ CHECK_OBJ_ORNULL(sp->vcl, VCL_CONF_MAGIC);
+
+ AZ(sp->obj);
+ AZ(sp->wrk->vbc);
+ sp->director = NULL;
+ sp->restarts = 0;
+
+ sp->wrk->do_esi = 0;
+ sp->wrk->do_gunzip = 0;
+ sp->wrk->do_gzip = 0;
+ sp->wrk->do_stream = 0;
+ sp->wrk->is_gunzip = 0;
+ sp->wrk->is_gzip = 0;
+
+ if (sp->vcl != NULL && sp->esi_level == 0) {
+ if (sp->wrk->vcl != NULL)
+ VCL_Rel(&sp->wrk->vcl);
+ sp->wrk->vcl = sp->vcl;
+ sp->vcl = NULL;
+ }
+
+ SES_Charge(sp);
+
+ sp->t_end = VTIM_real();
+ sp->wrk->lastused = sp->t_end;
+ if (sp->xid == 0) {
+ sp->t_req = sp->t_end;
+ sp->t_resp = sp->t_end;
+ } else if (sp->esi_level == 0) {
+ dp = sp->t_resp - sp->t_req;
+ da = sp->t_end - sp->t_resp;
+ dh = sp->t_req - sp->t_open;
+ /* XXX: Add StatReq == StatSess */
+ /* XXX: Workaround for pipe */
+ if (sp->fd >= 0) {
+ WSP(sp, SLT_Length, "%ju",
+ (uintmax_t)sp->req_bodybytes);
+ }
+ WSP(sp, SLT_ReqEnd, "%u %.9f %.9f %.9f %.9f %.9f",
+ sp->xid, sp->t_req, sp->t_end, dh, dp, da);
+ }
+ sp->xid = 0;
+ sp->t_open = sp->t_end;
+ sp->t_resp = NAN;
+ WSL_Flush(sp->wrk, 0);
+
+ /* If we did an ESI include, don't mess up our state */
+ if (sp->esi_level > 0)
+ return (1);
+
+ sp->req_bodybytes = 0;
+
+ sp->t_req = NAN;
+ sp->hash_always_miss = 0;
+ sp->hash_ignore_busy = 0;
+
+ if (sp->fd >= 0 && sp->doclose != NULL) {
+ /*
+ * This is an orderly close of the connection; ditch nolinger
+ * before we close, to get queued data transmitted.
+ */
+ // XXX: not yet (void)VTCP_linger(sp->fd, 0);
+ SES_Close(sp, sp->doclose);
+ }
+
+ if (sp->fd < 0) {
+ sp->wrk->stats.sess_closed++;
+ SES_Delete(sp, NULL);
+ return (1);
+ }
+
+ if (sp->wrk->stats.client_req >= cache_param->wthread_stats_rate)
+ WRK_SumStat(sp->wrk);
+ /* Reset the workspace to the session-watermark */
+ WS_Reset(sp->ws, sp->ws_ses);
+ WS_Reset(sp->wrk->ws, NULL);
+
+ i = HTC_Reinit(sp->htc);
+ if (i == 1) {
+ sp->wrk->stats.sess_pipeline++;
+ sp->step = STP_START;
+ return (0);
+ }
+ if (Tlen(sp->htc->rxbuf)) {
+ sp->wrk->stats.sess_readahead++;
+ sp->step = STP_WAIT;
+ return (0);
+ }
+ if (cache_param->session_linger > 0) {
+ sp->wrk->stats.sess_linger++;
+ sp->step = STP_WAIT;
+ return (0);
+ }
+ sp->wrk->stats.sess_herd++;
+ sp->wrk = NULL;
+ Pool_Wait(sp);
+ return (1);
+ }
+
+ /*--------------------------------------------------------------------
+ * Emit an error
+ *
+ DOT subgraph xcluster_error {
+ DOT vcl_error [
+ DOT shape=record
+ DOT label="vcl_error()|resp."
+ DOT ]
+ DOT ERROR -> vcl_error
+ DOT vcl_error-> prepresp [label=deliver]
+ DOT }
+ DOT vcl_error-> rsterr [label="restart",color=purple]
+ DOT rsterr [label="RESTART",shape=plaintext]
+ */
+
+ static int
+ cnt_error(struct sess *sp)
+ {
+ struct worker *w;
+ struct http *h;
+ char date[40];
+
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+
+ sp->wrk->do_esi = 0;
+ sp->wrk->is_gzip = 0;
+ sp->wrk->is_gunzip = 0;
+ sp->wrk->do_gzip = 0;
+ sp->wrk->do_gunzip = 0;
+ sp->wrk->do_stream = 0;
+
+ w = sp->wrk;
+ if (sp->obj == NULL) {
+ HSH_Prealloc(sp);
+ EXP_Clr(&w->exp);
+ sp->obj = STV_NewObject(sp, NULL, cache_param->http_resp_size,
+ &w->exp, (uint16_t)cache_param->http_max_hdr);
+ if (sp->obj == NULL)
+ sp->obj = STV_NewObject(sp, TRANSIENT_STORAGE,
+ cache_param->http_resp_size, &w->exp,
+ (uint16_t)cache_param->http_max_hdr);
+ if (sp->obj == NULL) {
+ sp->doclose = "Out of objects";
+ sp->director = NULL;
+ sp->wrk->h_content_length = NULL;
+ http_Setup(sp->wrk->beresp, NULL);
+ http_Setup(sp->wrk->bereq, NULL);
+ sp->step = STP_DONE;
+ return(0);
+ }
+ AN(sp->obj);
+ sp->obj->xid = sp->xid;
+ sp->obj->exp.entered = sp->t_req;
+ } else {
+ /* XXX: Null the headers ? */
+ }
+ CHECK_OBJ_NOTNULL(sp->obj, OBJECT_MAGIC);
+ h = sp->obj->http;
+
+ if (sp->err_code < 100 || sp->err_code > 999)
+ sp->err_code = 501;
+
+ http_PutProtocol(w, sp->vsl_id, h, "HTTP/1.1");
+ http_PutStatus(h, sp->err_code);
+ VTIM_format(VTIM_real(), date);
+ http_PrintfHeader(w, sp->vsl_id, h, "Date: %s", date);
+ http_SetHeader(w, sp->vsl_id, h, "Server: Varnish");
+
+ if (sp->err_reason != NULL)
+ http_PutResponse(w, sp->vsl_id, h, sp->err_reason);
+ else
+ http_PutResponse(w, sp->vsl_id, h,
+ http_StatusMessage(sp->err_code));
+ VCL_error_method(sp);
+
+ if (sp->handling == VCL_RET_RESTART &&
+ sp->restarts < cache_param->max_restarts) {
+ HSH_Drop(sp);
+ sp->director = NULL;
+ sp->restarts++;
+ sp->step = STP_RECV;
+ return (0);
+ } else if (sp->handling == VCL_RET_RESTART)
+ sp->handling = VCL_RET_DELIVER;
+
+
+ /* We always close when we take this path */
+ sp->doclose = "error";
+ sp->wantbody = 1;
+
+ assert(sp->handling == VCL_RET_DELIVER);
+ sp->err_code = 0;
+ sp->err_reason = NULL;
+ http_Setup(sp->wrk->bereq, NULL);
+ sp->step = STP_PREPRESP;
+ return (0);
+ }
+
+ /*--------------------------------------------------------------------
+ * Fetch response headers from the backend
+ *
+ DOT subgraph xcluster_fetch {
+ DOT fetch [
+ DOT shape=ellipse
+ DOT label="fetch hdr\nfrom backend\n(find obj.ttl)"
+ DOT ]
+ DOT vcl_fetch [
+ DOT shape=record
+ DOT label="vcl_fetch()|req.\nbereq.\nberesp."
+ DOT ]
+ DOT fetch -> vcl_fetch [style=bold,color=blue]
+ DOT fetch -> vcl_fetch [style=bold,color=red]
+ DOT fetch_pass [
+ DOT shape=ellipse
+ DOT label="obj.f.pass=true"
+ DOT ]
+ DOT vcl_fetch -> fetch_pass [label="hit_for_pass",style=bold,color=red]
+ DOT }
+ DOT fetch_pass -> fetchbody [style=bold,color=red]
+ DOT vcl_fetch -> fetchbody [label="deliver",style=bold,color=blue]
+ DOT vcl_fetch -> rstfetch [label="restart",color=purple]
+ DOT rstfetch [label="RESTART",shape=plaintext]
+ DOT fetch -> errfetch
+ DOT vcl_fetch -> errfetch [label="error"]
+ DOT errfetch [label="ERROR",shape=plaintext]
+ */
+
+ static int
+ cnt_fetch(struct sess *sp)
+ {
+ int i;
+
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ CHECK_OBJ_NOTNULL(sp->vcl, VCL_CONF_MAGIC);
+
+ AN(sp->director);
+ AZ(sp->wrk->vbc);
+ AZ(sp->wrk->h_content_length);
+ AZ(sp->wrk->do_close);
+ AZ(sp->wrk->storage_hint);
+
+ http_Setup(sp->wrk->beresp, sp->wrk->ws);
+
+ i = FetchHdr(sp);
+ /*
+ * If we recycle a backend connection, there is a finite chance
+ * that the backend closed it before we get a request to it.
+ * Do a single retry in that case.
+ */
+ if (i == 1) {
+ VSC_C_main->backend_retry++;
+ i = FetchHdr(sp);
+ }
+
+ if (i) {
+ sp->handling = VCL_RET_ERROR;
+ sp->err_code = 503;
+ } else {
+ /*
+ * These two headers can be spread over multiple actual headers
+ * and we rely on their content outside of VCL, so collect them
+ * into one line here.
+ */
+ http_CollectHdr(sp->wrk->beresp, H_Cache_Control);
+ http_CollectHdr(sp->wrk->beresp, H_Vary);
+
+ /*
+ * Figure out how the fetch is supposed to happen, before the
+ * headers are adultered by VCL
+ * NB: Also sets other sp->wrk variables
+ */
+ sp->wrk->body_status = RFC2616_Body(sp);
+
+ sp->err_code = http_GetStatus(sp->wrk->beresp);
+
+ /*
+ * What does RFC2616 think about TTL ?
+ */
+ EXP_Clr(&sp->wrk->exp);
+ sp->wrk->exp.entered = VTIM_real();
+ RFC2616_Ttl(sp);
++ sp->wrk->exp.keep = cache_param->default_keep;
+
+ /* pass from vclrecv{} has negative TTL */
+ if (sp->objcore == NULL)
+ sp->wrk->exp.ttl = -1.;
+
+ AZ(sp->wrk->do_esi);
+
+ VCL_fetch_method(sp);
+
+ switch (sp->handling) {
+ case VCL_RET_HIT_FOR_PASS:
+ if (sp->objcore != NULL)
+ sp->objcore->flags |= OC_F_PASS;
+ sp->step = STP_FETCHBODY;
+ return (0);
+ case VCL_RET_DELIVER:
+ AssertObjCorePassOrBusy(sp->objcore);
+ sp->step = STP_FETCHBODY;
+ return (0);
+ default:
+ break;
+ }
+
+ /* We are not going to fetch the body, Close the connection */
+ VDI_CloseFd(sp->wrk);
+ }
+
+ /* Clean up partial fetch */
+ AZ(sp->wrk->vbc);
+
+ if (sp->objcore != NULL) {
+ CHECK_OBJ_NOTNULL(sp->objcore, OBJCORE_MAGIC);
+ AZ(HSH_Deref(sp->wrk, sp->objcore, NULL));
+ sp->objcore = NULL;
+ }
+ http_Setup(sp->wrk->bereq, NULL);
+ http_Setup(sp->wrk->beresp, NULL);
+ sp->wrk->h_content_length = NULL;
+ sp->director = NULL;
+ sp->wrk->storage_hint = NULL;
+
+ switch (sp->handling) {
+ case VCL_RET_RESTART:
+ sp->restarts++;
+ sp->step = STP_RECV;
+ return (0);
+ case VCL_RET_ERROR:
+ sp->step = STP_ERROR;
+ return (0);
+ default:
+ WRONG("Illegal action in vcl_fetch{}");
+ }
+ }
+
+ /*--------------------------------------------------------------------
+ * Fetch response body from the backend
+ *
+ DOT subgraph xcluster_body {
+ DOT fetchbody [
+ DOT shape=diamond
+ DOT label="stream ?"
+ DOT ]
+ DOT fetchbody2 [
+ DOT shape=ellipse
+ DOT label="fetch body\nfrom backend\n"
+ DOT ]
+ DOT }
+ DOT fetchbody -> fetchbody2 [label=no,style=bold,color=red]
+ DOT fetchbody -> fetchbody2 [style=bold,color=blue]
+ DOT fetchbody -> prepresp [label=yes,style=bold,color=cyan]
+ DOT fetchbody2 -> prepresp [style=bold,color=red]
+ DOT fetchbody2 -> prepresp [style=bold,color=blue]
+ */
+
+
+ static int
+ cnt_fetchbody(struct sess *sp)
+ {
+ int i;
+ struct http *hp, *hp2;
+ char *b;
- uint16_t nhttp;
++ uint16_t nhttp, stale_nhttp;
+ unsigned l;
+ struct vsb *vary = NULL;
+ int varyl = 0, pass;
+
+ assert(sp->handling == VCL_RET_HIT_FOR_PASS ||
+ sp->handling == VCL_RET_DELIVER);
+
+ if (sp->objcore == NULL) {
+ /* This is a pass from vcl_recv */
+ pass = 1;
+ /* VCL may have fiddled this, but that doesn't help */
+ sp->wrk->exp.ttl = -1.;
+ } else if (sp->handling == VCL_RET_HIT_FOR_PASS) {
+ /* pass from vcl_fetch{} -> hit-for-pass */
+ /* XXX: the bereq was not filtered pass... */
+ pass = 1;
+ } else {
+ /* regular object */
+ pass = 0;
+ }
+
+ /*
+ * The VCL variables beresp.do_g[un]zip tells us how we want the
+ * object processed before it is stored.
+ *
+ * The backend Content-Encoding header tells us what we are going
+ * to receive, which we classify in the following three classes:
+ *
+ * "Content-Encoding: gzip" --> object is gzip'ed.
+ * no Content-Encoding --> object is not gzip'ed.
+ * anything else --> do nothing wrt gzip
+ *
+ */
+
+ AZ(sp->wrk->vfp);
+
+ /* We do nothing unless the param is set */
+ if (!cache_param->http_gzip_support)
+ sp->wrk->do_gzip = sp->wrk->do_gunzip = 0;
+
+ sp->wrk->is_gzip =
+ http_HdrIs(sp->wrk->beresp, H_Content_Encoding, "gzip");
+
+ sp->wrk->is_gunzip =
+ !http_GetHdr(sp->wrk->beresp, H_Content_Encoding, NULL);
+
+ /* It can't be both */
+ assert(sp->wrk->is_gzip == 0 || sp->wrk->is_gunzip == 0);
+
+ /* We won't gunzip unless it is gzip'ed */
+ if (sp->wrk->do_gunzip && !sp->wrk->is_gzip)
+ sp->wrk->do_gunzip = 0;
+
+ /* If we do gunzip, remove the C-E header */
+ if (sp->wrk->do_gunzip)
+ http_Unset(sp->wrk->beresp, H_Content_Encoding);
+
+ /* We wont gzip unless it is ungziped */
+ if (sp->wrk->do_gzip && !sp->wrk->is_gunzip)
+ sp->wrk->do_gzip = 0;
+
+ /* If we do gzip, add the C-E header */
+ if (sp->wrk->do_gzip)
+ http_SetHeader(sp->wrk, sp->vsl_id, sp->wrk->beresp,
+ "Content-Encoding: gzip");
+
+ /* But we can't do both at the same time */
+ assert(sp->wrk->do_gzip == 0 || sp->wrk->do_gunzip == 0);
+
+ /* ESI takes precedence and handles gzip/gunzip itself */
+ if (sp->wrk->do_esi)
+ sp->wrk->vfp = &vfp_esi;
+ else if (sp->wrk->do_gunzip)
+ sp->wrk->vfp = &vfp_gunzip;
+ else if (sp->wrk->do_gzip)
+ sp->wrk->vfp = &vfp_gzip;
+ else if (sp->wrk->is_gzip)
+ sp->wrk->vfp = &vfp_testgzip;
+
+ if (sp->wrk->do_esi || sp->esi_level > 0)
+ sp->wrk->do_stream = 0;
+ if (!sp->wantbody)
+ sp->wrk->do_stream = 0;
+
+ l = http_EstimateWS(sp->wrk->beresp,
+ pass ? HTTPH_R_PASS : HTTPH_A_INS, &nhttp);
++ if (sp->stale_obj) {
++ l += http_EstimateWS(sp->stale_obj->http, 0, &stale_nhttp);
++ nhttp += stale_nhttp;
++ }
+
+ /* Create Vary instructions */
+ if (sp->objcore != NULL) {
+ CHECK_OBJ_NOTNULL(sp->objcore, OBJCORE_MAGIC);
+ vary = VRY_Create(sp, sp->wrk->beresp);
+ if (vary != NULL) {
+ varyl = VSB_len(vary);
+ assert(varyl > 0);
+ l += varyl;
+ }
+ }
+
+ /*
+ * Space for producing a Content-Length: header including padding
+ * A billion gigabytes is enough for anybody.
+ */
+ l += strlen("Content-Length: XxxXxxXxxXxxXxxXxx") + sizeof(void *);
+
+ if (sp->wrk->exp.ttl < cache_param->shortlived || sp->objcore == NULL)
+ sp->wrk->storage_hint = TRANSIENT_STORAGE;
+
+ sp->obj = STV_NewObject(sp, sp->wrk->storage_hint, l,
+ &sp->wrk->exp, nhttp);
+ if (sp->obj == NULL) {
+ /*
+ * Try to salvage the transaction by allocating a
+ * shortlived object on Transient storage.
+ */
+ sp->obj = STV_NewObject(sp, TRANSIENT_STORAGE, l,
+ &sp->wrk->exp, nhttp);
+ if (sp->wrk->exp.ttl > cache_param->shortlived)
+ sp->wrk->exp.ttl = cache_param->shortlived;
+ sp->wrk->exp.grace = 0.0;
+ sp->wrk->exp.keep = 0.0;
+ }
+ if (sp->obj == NULL) {
+ sp->err_code = 503;
+ sp->step = STP_ERROR;
+ VDI_CloseFd(sp->wrk);
+ return (0);
+ }
+ CHECK_OBJ_NOTNULL(sp->obj, OBJECT_MAGIC);
++ sp->obj->exp.keep = sp->wrk->exp.keep;
+
+ sp->wrk->storage_hint = NULL;
+
+ if (sp->wrk->do_gzip || (sp->wrk->is_gzip && !sp->wrk->do_gunzip))
+ sp->obj->gziped = 1;
+
+ if (vary != NULL) {
+ sp->obj->vary =
+ (void *)WS_Alloc(sp->obj->http->ws, varyl);
+ AN(sp->obj->vary);
+ memcpy(sp->obj->vary, VSB_data(vary), varyl);
+ VRY_Validate(sp->obj->vary);
+ VSB_delete(vary);
+ }
+
+ sp->obj->xid = sp->xid;
+ sp->obj->response = sp->err_code;
+ WS_Assert(sp->obj->ws_o);
+
+ /* Filter into object */
+ hp = sp->wrk->beresp;
+ hp2 = sp->obj->http;
+
+ hp2->logtag = HTTP_Obj;
+ http_CopyResp(hp2, hp);
++
+ http_FilterFields(sp->wrk, sp->vsl_id, hp2, hp,
+ pass ? HTTPH_R_PASS : HTTPH_A_INS);
++
++ /*
++ * If we found a candidate for conditional backend request, attempt it
++ * now. If backend responds with 304, http_Check304() merges stale_obj
++ * into sp->obj, any other response is handled as usual. In either case,
++ * the stale_obj is no longer needed in the cache, so discard it.
++ */
++ if (sp->stale_obj) {
++ http_Check304(sp);
++ if (sp->wrk->beresp->status == 304)
++ assert(sp->obj->http->status == 200);
++ EXP_Clr(&sp->stale_obj->exp);
++ EXP_Rearm(sp->stale_obj);
++ HSH_Deref(sp->wrk, NULL, &sp->stale_obj);
++ AZ(sp->stale_obj);
++ }
+ http_CopyHome(sp->wrk, sp->vsl_id, hp2);
+
- if (http_GetHdr(hp, H_Last_Modified, &b))
++ if (http_GetHdr(hp, H_Last_Modified, &b)
++ || http_GetHdr(sp->obj->http, H_Last_Modified, &b))
+ sp->obj->last_modified = VTIM_parse(b);
+ else
+ sp->obj->last_modified = floor(sp->wrk->exp.entered);
+
+ assert(WRW_IsReleased(sp->wrk));
+
+ /*
+ * If we can deliver a 304 reply, we don't bother streaming.
+ * Notice that vcl_deliver{} could still nuke the headers
+ * that allow the 304, in which case we return 200 non-stream.
+ */
+ if (sp->obj->response == 200 &&
+ sp->http->conds &&
+ RFC2616_Do_Cond(sp))
+ sp->wrk->do_stream = 0;
+
+ AssertObjCorePassOrBusy(sp->obj->objcore);
+
+ if (sp->wrk->do_stream) {
+ sp->step = STP_PREPRESP;
+ return (0);
+ }
+
+ /* Use unmodified headers*/
+ i = FetchBody(sp->wrk, sp->obj);
+
+ sp->wrk->h_content_length = NULL;
+
+ http_Setup(sp->wrk->bereq, NULL);
+ http_Setup(sp->wrk->beresp, NULL);
+ sp->wrk->vfp = NULL;
+ assert(WRW_IsReleased(sp->wrk));
+ AZ(sp->wrk->vbc);
+ AN(sp->director);
+
+ if (i) {
+ HSH_Drop(sp);
+ AZ(sp->obj);
+ sp->err_code = 503;
+ sp->step = STP_ERROR;
+ return (0);
+ }
+
+ if (sp->obj->objcore != NULL) {
+ EXP_Insert(sp->obj);
+ AN(sp->obj->objcore);
+ AN(sp->obj->objcore->ban);
+ HSH_Unbusy(sp);
+ }
+ sp->wrk->acct_tmp.fetch++;
+ sp->step = STP_PREPRESP;
+ return (0);
+ }
+
+ /*--------------------------------------------------------------------
+ * Stream the body as we fetch it
+ DOT subgraph xstreambody {
+ DOT streambody [
+ DOT shape=ellipse
+ DOT label="streaming\nfetch/deliver"
+ DOT ]
+ DOT }
+ DOT streambody -> DONE [style=bold,color=cyan]
+ */
+
+ static int
+ cnt_streambody(struct sess *sp)
+ {
+ int i;
+ struct stream_ctx sctx;
+ uint8_t obuf[sp->wrk->res_mode & RES_GUNZIP ?
+ cache_param->gzip_stack_buffer : 1];
+
+ memset(&sctx, 0, sizeof sctx);
+ sctx.magic = STREAM_CTX_MAGIC;
+ AZ(sp->wrk->sctx);
+ sp->wrk->sctx = &sctx;
+
+ if (sp->wrk->res_mode & RES_GUNZIP) {
+ sctx.vgz = VGZ_NewUngzip(sp->wrk, "U S -");
+ sctx.obuf = obuf;
+ sctx.obuf_len = sizeof (obuf);
+ }
+
+ RES_StreamStart(sp);
+
+ AssertObjCorePassOrBusy(sp->obj->objcore);
+
+ i = FetchBody(sp->wrk, sp->obj);
+
+ sp->wrk->h_content_length = NULL;
+
+ http_Setup(sp->wrk->bereq, NULL);
+ http_Setup(sp->wrk->beresp, NULL);
+ sp->wrk->vfp = NULL;
+ AZ(sp->wrk->vbc);
+ AN(sp->director);
+
+ if (!i && sp->obj->objcore != NULL) {
+ EXP_Insert(sp->obj);
+ AN(sp->obj->objcore);
+ AN(sp->obj->objcore->ban);
+ HSH_Unbusy(sp);
+ } else {
+ sp->doclose = "Stream error";
+ }
+ sp->wrk->acct_tmp.fetch++;
+ sp->director = NULL;
+ sp->restarts = 0;
+
+ RES_StreamEnd(sp);
+ if (sp->wrk->res_mode & RES_GUNZIP)
+ (void)VGZ_Destroy(&sctx.vgz, sp->vsl_id);
+
+ sp->wrk->sctx = NULL;
+ assert(WRW_IsReleased(sp->wrk));
+ assert(sp->wrk->wrw.ciov == sp->wrk->wrw.siov);
+ (void)HSH_Deref(sp->wrk, NULL, &sp->obj);
+ http_Setup(sp->wrk->resp, NULL);
+ sp->step = STP_DONE;
+ return (0);
+ }
+
+ /*--------------------------------------------------------------------
+ * The very first request
+ */
+ static int
+ cnt_first(struct sess *sp)
+ {
+
+ /*
+ * XXX: If we don't have acceptfilters we are somewhat subject
+ * XXX: to DoS'ing here. One remedy would be to set a shorter
+ * XXX: SO_RCVTIMEO and once we have received something here
+ * XXX: increase it to the normal value.
+ */
+
+ assert(sp->xid == 0);
+ assert(sp->restarts == 0);
+ VCA_Prep(sp);
+
+ /* Record the session watermark */
+ sp->ws_ses = WS_Snapshot(sp->ws);
+
+ /* Receive a HTTP protocol request */
+ HTC_Init(sp->htc, sp->ws, sp->fd, sp->vsl_id, cache_param->http_req_size,
+ cache_param->http_req_hdr_len);
+ sp->wrk->lastused = sp->t_open;
+ sp->wrk->acct_tmp.sess++;
+
+ sp->step = STP_WAIT;
+ return (0);
+ }
+
+ /*--------------------------------------------------------------------
+ * HIT
+ * We had a cache hit. Ask VCL, then march off as instructed.
+ *
+ DOT subgraph xcluster_hit {
+ DOT hit [
+ DOT shape=record
+ DOT label="vcl_hit()|req.\nobj."
+ DOT ]
+ DOT }
+ DOT hit -> err_hit [label="error"]
+ DOT err_hit [label="ERROR",shape=plaintext]
+ DOT hit -> rst_hit [label="restart",color=purple]
+ DOT rst_hit [label="RESTART",shape=plaintext]
+ DOT hit -> pass [label=pass,style=bold,color=red]
+ DOT hit -> prepresp [label="deliver",style=bold,color=green]
+ */
+
+ static int
+ cnt_hit(struct sess *sp)
+ {
+
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ CHECK_OBJ_NOTNULL(sp->obj, OBJECT_MAGIC);
+ CHECK_OBJ_NOTNULL(sp->vcl, VCL_CONF_MAGIC);
+
+ assert(!(sp->obj->objcore->flags & OC_F_PASS));
+
+ AZ(sp->wrk->do_stream);
+
+ VCL_hit_method(sp);
+
+ if (sp->handling == VCL_RET_DELIVER) {
+ /* Dispose of any body part of the request */
+ (void)FetchReqBody(sp);
+ AZ(sp->wrk->bereq->ws);
+ AZ(sp->wrk->beresp->ws);
+ sp->step = STP_PREPRESP;
+ return (0);
+ }
+
+ /* Drop our object, we won't need it */
+ (void)HSH_Deref(sp->wrk, NULL, &sp->obj);
+ sp->objcore = NULL;
+
+ switch(sp->handling) {
+ case VCL_RET_PASS:
+ sp->step = STP_PASS;
+ return (0);
+ case VCL_RET_ERROR:
+ sp->step = STP_ERROR;
+ return (0);
+ case VCL_RET_RESTART:
+ sp->director = NULL;
+ sp->restarts++;
+ sp->step = STP_RECV;
+ return (0);
+ default:
+ WRONG("Illegal action in vcl_hit{}");
+ }
+ }
+
+ /*--------------------------------------------------------------------
+ * LOOKUP
+ * Hash things together and look object up in hash-table.
+ *
+ * LOOKUP consists of two substates so that we can reenter if we
+ * encounter a busy object.
+ *
+ DOT subgraph xcluster_lookup {
+ DOT hash [
+ DOT shape=record
+ DOT label="vcl_hash()|req."
+ DOT ]
+ DOT lookup [
+ DOT shape=diamond
+ DOT label="obj in cache ?\ncreate if not"
+ DOT ]
+ DOT lookup2 [
+ DOT shape=diamond
+ DOT label="obj.f.pass ?"
+ DOT ]
+ DOT hash -> lookup [label="hash",style=bold,color=green]
+ DOT lookup -> lookup2 [label="yes",style=bold,color=green]
+ DOT }
+ DOT lookup2 -> hit [label="no", style=bold,color=green]
+ DOT lookup2 -> pass [label="yes",style=bold,color=red]
+ DOT lookup -> miss [label="no",style=bold,color=blue]
+ */
+
+ static int
+ cnt_lookup(struct sess *sp)
+ {
+ struct objcore *oc;
+ struct object *o;
+ struct objhead *oh;
+
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ CHECK_OBJ_NOTNULL(sp->vcl, VCL_CONF_MAGIC);
+
+ if (sp->hash_objhead == NULL) {
+ /* Not a waiting list return */
+ AZ(sp->vary_b);
+ AZ(sp->vary_l);
+ AZ(sp->vary_e);
+ (void)WS_Reserve(sp->ws, 0);
+ } else {
+ AN(sp->ws->r);
+ }
+ sp->vary_b = (void*)sp->ws->f;
+ sp->vary_e = (void*)sp->ws->r;
+ sp->vary_b[2] = '\0';
+
+ oc = HSH_Lookup(sp, &oh);
+
+ if (oc == NULL) {
+ /*
+ * We lost the session to a busy object, disembark the
+ * worker thread. The hash code to restart the session,
+ * still in STP_LOOKUP, later when the busy object isn't.
+ * NB: Do not access sp any more !
+ */
+ return (1);
+ }
+
+
+ CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
+
+ /* If we inserted a new object it's a miss */
+ if (oc->flags & OC_F_BUSY) {
+ sp->wrk->stats.cache_miss++;
+
+ if (sp->vary_l != NULL) {
+ assert(oc->busyobj->vary == sp->vary_b);
+ VRY_Validate(oc->busyobj->vary);
+ WS_ReleaseP(sp->ws, (void*)sp->vary_l);
+ } else {
+ AZ(oc->busyobj->vary);
+ WS_Release(sp->ws, 0);
+ }
+ sp->vary_b = NULL;
+ sp->vary_l = NULL;
+ sp->vary_e = NULL;
+
+ sp->objcore = oc;
+ sp->step = STP_MISS;
+ return (0);
+ }
+
+ o = oc_getobj(sp->wrk, oc);
+ CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC);
+ sp->obj = o;
+
+ WS_Release(sp->ws, 0);
+ sp->vary_b = NULL;
+ sp->vary_l = NULL;
+ sp->vary_e = NULL;
+
+ if (oc->flags & OC_F_PASS) {
+ sp->wrk->stats.cache_hitpass++;
+ WSP(sp, SLT_HitPass, "%u", sp->obj->xid);
+ (void)HSH_Deref(sp->wrk, NULL, &sp->obj);
++ if (sp->stale_obj != NULL)
++ (void)HSH_Deref(sp->wrk, NULL, &sp->stale_obj);
+ sp->objcore = NULL;
+ sp->step = STP_PASS;
+ return (0);
+ }
+
+ sp->wrk->stats.cache_hit++;
+ WSP(sp, SLT_Hit, "%u", sp->obj->xid);
+ sp->step = STP_HIT;
+ return (0);
+ }
+
+ /*--------------------------------------------------------------------
+ * We had a miss, ask VCL, proceed as instructed
+ *
+ DOT subgraph xcluster_miss {
+ DOT miss [
+ DOT shape=ellipse
+ DOT label="filter req.->bereq."
+ DOT ]
+ DOT vcl_miss [
+ DOT shape=record
+ DOT label="vcl_miss()|req.\nbereq."
+ DOT ]
+ DOT miss -> vcl_miss [style=bold,color=blue]
+ DOT }
+ DOT vcl_miss -> rst_miss [label="restart",color=purple]
+ DOT rst_miss [label="RESTART",shape=plaintext]
+ DOT vcl_miss -> err_miss [label="error"]
+ DOT err_miss [label="ERROR",shape=plaintext]
+ DOT vcl_miss -> fetch [label="fetch",style=bold,color=blue]
+ DOT vcl_miss -> pass [label="pass",style=bold,color=red]
+ DOT
+ */
+
+ static int
+ cnt_miss(struct sess *sp)
+ {
+
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ CHECK_OBJ_NOTNULL(sp->vcl, VCL_CONF_MAGIC);
+
+ AZ(sp->obj);
+ AN(sp->objcore);
+ WS_Reset(sp->wrk->ws, NULL);
+ http_Setup(sp->wrk->bereq, sp->wrk->ws);
+ http_FilterHeader(sp, HTTPH_R_FETCH);
+ http_ForceGet(sp->wrk->bereq);
+ if (cache_param->http_gzip_support) {
+ /*
+ * We always ask the backend for gzip, even if the
+ * client doesn't grok it. We will uncompress for
+ * the minority of clients which don't.
+ */
+ http_Unset(sp->wrk->bereq, H_Accept_Encoding);
+ http_SetHeader(sp->wrk, sp->vsl_id, sp->wrk->bereq,
+ "Accept-Encoding: gzip");
+ }
+ sp->wrk->connect_timeout = 0;
+ sp->wrk->first_byte_timeout = 0;
+ sp->wrk->between_bytes_timeout = 0;
++
++ /* If a candidate for a conditional backend request was found,
++ * add If-Modified-Since and/or If-None-Match to the bereq.
++ */
++ if (sp->stale_obj)
++ http_CheckRefresh(sp);
++
+ VCL_miss_method(sp);
+ switch(sp->handling) {
+ case VCL_RET_ERROR:
+ AZ(HSH_Deref(sp->wrk, sp->objcore, NULL));
+ sp->objcore = NULL;
+ http_Setup(sp->wrk->bereq, NULL);
+ sp->step = STP_ERROR;
+ return (0);
+ case VCL_RET_PASS:
+ AZ(HSH_Deref(sp->wrk, sp->objcore, NULL));
+ sp->objcore = NULL;
+ sp->step = STP_PASS;
+ return (0);
+ case VCL_RET_FETCH:
+ sp->step = STP_FETCH;
+ return (0);
+ case VCL_RET_RESTART:
+ AZ(HSH_Deref(sp->wrk, sp->objcore, NULL));
+ sp->objcore = NULL;
+ INCOMPL();
+ default:
+ WRONG("Illegal action in vcl_miss{}");
+ }
+ }
+
+ /*--------------------------------------------------------------------
+ * Start pass processing by getting headers from backend, then
+ * continue in passbody.
+ *
+ DOT subgraph xcluster_pass {
+ DOT pass [
+ DOT shape=ellipse
+ DOT label="deref obj."
+ DOT ]
+ DOT pass2 [
+ DOT shape=ellipse
+ DOT label="filter req.->bereq."
+ DOT ]
+ DOT vcl_pass [
+ DOT shape=record
+ DOT label="vcl_pass()|req.\nbereq."
+ DOT ]
+ DOT pass_do [
+ DOT shape=ellipse
+ DOT label="create anon object\n"
+ DOT ]
+ DOT pass -> pass2 [style=bold, color=red]
+ DOT pass2 -> vcl_pass [style=bold, color=red]
+ DOT vcl_pass -> pass_do [label="pass"] [style=bold, color=red]
+ DOT }
+ DOT pass_do -> fetch [style=bold, color=red]
+ DOT vcl_pass -> rst_pass [label="restart",color=purple]
+ DOT rst_pass [label="RESTART",shape=plaintext]
+ DOT vcl_pass -> err_pass [label="error"]
+ DOT err_pass [label="ERROR",shape=plaintext]
+ */
+
+ static int
+ cnt_pass(struct sess *sp)
+ {
+
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ CHECK_OBJ_NOTNULL(sp->vcl, VCL_CONF_MAGIC);
+ AZ(sp->obj);
+
+ WS_Reset(sp->wrk->ws, NULL);
+ http_Setup(sp->wrk->bereq, sp->wrk->ws);
+ http_FilterHeader(sp, HTTPH_R_PASS);
+
+ sp->wrk->connect_timeout = 0;
+ sp->wrk->first_byte_timeout = 0;
+ sp->wrk->between_bytes_timeout = 0;
+ VCL_pass_method(sp);
+ if (sp->handling == VCL_RET_ERROR) {
+ http_Setup(sp->wrk->bereq, NULL);
+ sp->step = STP_ERROR;
+ return (0);
+ }
+ assert(sp->handling == VCL_RET_PASS);
+ sp->wrk->acct_tmp.pass++;
+ sp->sendbody = 1;
+ sp->step = STP_FETCH;
+ return (0);
+ }
+
+ /*--------------------------------------------------------------------
+ * Ship the request header to the backend unchanged, then pipe
+ * until one of the ends close the connection.
+ *
+ DOT subgraph xcluster_pipe {
+ DOT pipe [
+ DOT shape=ellipse
+ DOT label="Filter req.->bereq."
+ DOT ]
+ DOT vcl_pipe [
+ DOT shape=record
+ DOT label="vcl_pipe()|req.\nbereq\."
+ DOT ]
+ DOT pipe_do [
+ DOT shape=ellipse
+ DOT label="send bereq.\npipe until close"
+ DOT ]
+ DOT vcl_pipe -> pipe_do [label="pipe",style=bold,color=orange]
+ DOT pipe -> vcl_pipe [style=bold,color=orange]
+ DOT }
+ DOT pipe_do -> DONE [style=bold,color=orange]
+ DOT vcl_pipe -> err_pipe [label="error"]
+ DOT err_pipe [label="ERROR",shape=plaintext]
+ */
+
+ static int
+ cnt_pipe(struct sess *sp)
+ {
+
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ CHECK_OBJ_NOTNULL(sp->vcl, VCL_CONF_MAGIC);
+
+ sp->wrk->acct_tmp.pipe++;
+ WS_Reset(sp->wrk->ws, NULL);
+ http_Setup(sp->wrk->bereq, sp->wrk->ws);
+ http_FilterHeader(sp, HTTPH_R_PIPE);
+
+ VCL_pipe_method(sp);
+
+ if (sp->handling == VCL_RET_ERROR)
+ INCOMPL();
+ assert(sp->handling == VCL_RET_PIPE);
+
+ PipeSession(sp);
+ assert(WRW_IsReleased(sp->wrk));
+ http_Setup(sp->wrk->bereq, NULL);
+ sp->step = STP_DONE;
+ return (0);
+ }
+
+ /*--------------------------------------------------------------------
+ * RECV
+ * We have a complete request, set everything up and start it.
+ *
+ DOT subgraph xcluster_recv {
+ DOT recv [
+ DOT shape=record
+ DOT label="vcl_recv()|req."
+ DOT ]
+ DOT }
+ DOT RESTART -> recv
+ DOT recv -> pipe [label="pipe",style=bold,color=orange]
+ DOT recv -> pass2 [label="pass",style=bold,color=red]
+ DOT recv -> err_recv [label="error"]
+ DOT err_recv [label="ERROR",shape=plaintext]
+ DOT recv -> hash [label="lookup",style=bold,color=green]
+ */
+
+ static int
+ cnt_recv(struct sess *sp)
+ {
+ unsigned recv_handling;
+
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ CHECK_OBJ_NOTNULL(sp->vcl, VCL_CONF_MAGIC);
+ AZ(sp->obj);
+ assert(sp->wrk->wrw.ciov == sp->wrk->wrw.siov);
+
+ /* By default we use the first backend */
+ AZ(sp->director);
+ sp->director = sp->vcl->director[0];
+ AN(sp->director);
+
+ sp->disable_esi = 0;
+ sp->hash_always_miss = 0;
+ sp->hash_ignore_busy = 0;
+ sp->client_identity = NULL;
+
+ http_CollectHdr(sp->http, H_Cache_Control);
+
+ VCL_recv_method(sp);
+ recv_handling = sp->handling;
+
+ if (sp->restarts >= cache_param->max_restarts) {
+ if (sp->err_code == 0)
+ sp->err_code = 503;
+ sp->step = STP_ERROR;
+ return (0);
+ }
+
+ /* Zap these, in case we came here through restart */
+ sp->wrk->do_esi = 0;
+ sp->wrk->is_gzip = 0;
+ sp->wrk->is_gunzip = 0;
+ sp->wrk->do_gzip = 0;
+ sp->wrk->do_gunzip = 0;
+ sp->wrk->do_stream = 0;
+
+ if (cache_param->http_gzip_support &&
+ (recv_handling != VCL_RET_PIPE) &&
+ (recv_handling != VCL_RET_PASS)) {
+ if (RFC2616_Req_Gzip(sp)) {
+ http_Unset(sp->http, H_Accept_Encoding);
+ http_SetHeader(sp->wrk, sp->vsl_id, sp->http,
+ "Accept-Encoding: gzip");
+ } else {
+ http_Unset(sp->http, H_Accept_Encoding);
+ }
+ }
+
+ SHA256_Init(sp->wrk->sha256ctx);
+ VCL_hash_method(sp);
+ assert(sp->handling == VCL_RET_HASH);
+ SHA256_Final(sp->digest, sp->wrk->sha256ctx);
+
+ if (!strcmp(sp->http->hd[HTTP_HDR_REQ].b, "HEAD"))
+ sp->wantbody = 0;
+ else
+ sp->wantbody = 1;
+
+ sp->sendbody = 0;
+ switch(recv_handling) {
+ case VCL_RET_LOOKUP:
+ /* XXX: discard req body, if any */
+ sp->step = STP_LOOKUP;
+ return (0);
+ case VCL_RET_PIPE:
+ if (sp->esi_level > 0) {
+ /* XXX: VSL something */
+ INCOMPL();
+ /* sp->step = STP_DONE; */
+ return (1);
+ }
+ sp->step = STP_PIPE;
+ return (0);
+ case VCL_RET_PASS:
+ sp->step = STP_PASS;
+ return (0);
+ case VCL_RET_ERROR:
+ /* XXX: discard req body, if any */
+ sp->step = STP_ERROR;
+ return (0);
+ default:
+ WRONG("Illegal action in vcl_recv{}");
+ }
+ }
+
+ /*--------------------------------------------------------------------
+ * START
+ * Handle a request, wherever it came from recv/restart.
+ *
+ DOT start [shape=box,label="Dissect request"]
+ DOT start -> recv [style=bold,color=green]
+ */
+
+ static int
+ cnt_start(struct sess *sp)
+ {
+ uint16_t done;
+ char *p;
+ const char *r = "HTTP/1.1 100 Continue\r\n\r\n";
+
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ AZ(sp->restarts);
+ AZ(sp->obj);
+ AZ(sp->vcl);
+
+ /* Update stats of various sorts */
+ sp->wrk->stats.client_req++;
+ sp->t_req = VTIM_real();
+ sp->wrk->lastused = sp->t_req;
+ sp->wrk->acct_tmp.req++;
+
+ /* Assign XID and log */
+ sp->xid = ++xids; /* XXX not locked */
+ WSP(sp, SLT_ReqStart, "%s %s %u", sp->addr, sp->port, sp->xid);
+
+ /* Borrow VCL reference from worker thread */
+ VCL_Refresh(&sp->wrk->vcl);
+ sp->vcl = sp->wrk->vcl;
+ sp->wrk->vcl = NULL;
+
+ http_Setup(sp->http, sp->ws);
+ done = http_DissectRequest(sp);
+
+ /* If we could not even parse the request, just close */
+ if (done == 400) {
+ sp->step = STP_DONE;
+ SES_Close(sp, "junk");
+ return (0);
+ }
+
+ /* Catch request snapshot */
+ sp->ws_req = WS_Snapshot(sp->ws);
+
+ /* Catch original request, before modification */
+ HTTP_Copy(sp->http0, sp->http);
+
+ if (done != 0) {
+ sp->err_code = done;
+ sp->step = STP_ERROR;
+ return (0);
+ }
+
+ sp->doclose = http_DoConnection(sp->http);
+
+ /* XXX: Handle TRACE & OPTIONS of Max-Forwards = 0 */
+
+ /*
+ * Handle Expect headers
+ */
+ if (http_GetHdr(sp->http, H_Expect, &p)) {
+ if (strcasecmp(p, "100-continue")) {
+ sp->err_code = 417;
+ sp->step = STP_ERROR;
+ return (0);
+ }
+
+ /* XXX: Don't bother with write failures for now */
+ (void)write(sp->fd, r, strlen(r));
+ /* XXX: When we do ESI includes, this is not removed
+ * XXX: because we use http0 as our basis. Believed
+ * XXX: safe, but potentially confusing.
+ */
+ http_Unset(sp->http, H_Expect);
+ }
+
+ sp->step = STP_RECV;
+ return (0);
+ }
+
+ /*--------------------------------------------------------------------
+ * Central state engine dispatcher.
+ *
+ * Kick the session around until it has had enough.
+ *
+ */
+
+ static void
+ cnt_diag(struct sess *sp, const char *state)
+ {
+ if (sp->wrk != NULL) {
+ WSP(sp, SLT_Debug, "thr %p STP_%s sp %p obj %p vcl %p",
+ pthread_self(), state, sp, sp->obj, sp->vcl);
+ WSL_Flush(sp->wrk, 0);
+ } else {
+ VSL(SLT_Debug, sp->vsl_id,
+ "thr %p STP_%s sp %p obj %p vcl %p",
+ pthread_self(), state, sp, sp->obj, sp->vcl);
+ }
+ }
+
+ void
+ CNT_Session(struct sess *sp)
+ {
+ int done;
+ struct worker *w;
+
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ w = sp->wrk;
+ CHECK_OBJ_NOTNULL(w, WORKER_MAGIC);
+
+ /*
+ * Possible entrance states
+ */
+ assert(
+ sp->step == STP_FIRST ||
+ sp->step == STP_START ||
+ sp->step == STP_LOOKUP ||
+ sp->step == STP_RECV);
+
+ AZ(w->do_stream);
+ AZ(w->is_gzip);
+ AZ(w->do_gzip);
+ AZ(w->is_gunzip);
+ AZ(w->do_gunzip);
+ AZ(w->do_esi);
+
+ /*
+ * Whenever we come in from the acceptor or waiter, we need to set
+ * blocking mode, but there is no point in setting it when we come from
+ * ESI or when a parked sessions returns.
+ * It would be simpler to do this in the acceptor or waiter, but we'd
+ * rather do the syscall in the worker thread.
+ * On systems which return errors for ioctl, we close early
+ */
+ if ((sp->step == STP_FIRST || sp->step == STP_START) &&
+ VTCP_blocking(sp->fd)) {
+ if (errno == ECONNRESET)
+ SES_Close(sp, "remote closed");
+ else
+ SES_Close(sp, "error");
+ sp->step = STP_DONE;
+ }
+
+ /*
+ * NB: Once done is set, we can no longer touch sp!
+ */
+ for (done = 0; !done; ) {
+ assert(sp->wrk == w);
+ /*
+ * This is a good place to be paranoid about the various
+ * pointers still pointing to the things we expect.
+ */
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ CHECK_OBJ_ORNULL(sp->obj, OBJECT_MAGIC);
+ CHECK_OBJ_NOTNULL(sp->wrk, WORKER_MAGIC);
+ CHECK_OBJ_ORNULL(w->nobjhead, OBJHEAD_MAGIC);
+ WS_Assert(w->ws);
+
+ switch (sp->step) {
+ #define STEP(l,u) \
+ case STP_##u: \
+ if (cache_param->diag_bitmap & 0x01) \
+ cnt_diag(sp, #u); \
+ done = cnt_##l(sp); \
+ break;
+ #include "tbl/steps.h"
+ #undef STEP
+ default:
+ WRONG("State engine misfire");
+ }
+ WS_Assert(w->ws);
+ CHECK_OBJ_ORNULL(w->nobjhead, OBJHEAD_MAGIC);
+ }
+ WSL_Flush(w, 0);
+ AZ(w->do_stream);
+ AZ(w->is_gzip);
+ AZ(w->do_gzip);
+ AZ(w->is_gunzip);
+ AZ(w->do_gunzip);
+ AZ(w->do_esi);
+ #define ACCT(foo) AZ(w->acct_tmp.foo);
+ #include "tbl/acct_fields.h"
+ #undef ACCT
+ assert(WRW_IsReleased(w));
+ }
+
+ /*
+ DOT }
+ */
+
+ /*--------------------------------------------------------------------
+ * Debugging aids
+ */
+
+ static void
+ cli_debug_xid(struct cli *cli, const char * const *av, void *priv)
+ {
+ (void)priv;
+ if (av[2] != NULL)
+ xids = strtoul(av[2], NULL, 0);
+ VCLI_Out(cli, "XID is %u", xids);
+ }
+
+ /*
+ * Default to seed=1, this is the only seed value POSIXl guarantees will
+ * result in a reproducible random number sequence.
+ */
+ static void
+ cli_debug_srandom(struct cli *cli, const char * const *av, void *priv)
+ {
+ (void)priv;
+ unsigned seed = 1;
+
+ if (av[2] != NULL)
+ seed = strtoul(av[2], NULL, 0);
+ srandom(seed);
+ srand48(random());
+ VCLI_Out(cli, "Random(3) seeded with %lu", seed);
+ }
+
+ static struct cli_proto debug_cmds[] = {
+ { "debug.xid", "debug.xid",
+ "\tExamine or set XID\n", 0, 1, "d", cli_debug_xid },
+ { "debug.srandom", "debug.srandom",
+ "\tSeed the random(3) function\n", 0, 1, "d", cli_debug_srandom },
+ { NULL }
+ };
+
+ /*--------------------------------------------------------------------
+ *
+ */
+
+ void
+ CNT_Init(void)
+ {
+
+ srandomdev();
+ srand48(random());
+ xids = random();
+ CLI_AddFuncs(debug_cmds);
+ }
+
+
diff --cc bin/varnishd/cache/cache_expire.c
index 0000000,23e3fc6..31fd41a
mode 000000,100644..100644
--- a/bin/varnishd/cache/cache_expire.c
+++ b/bin/varnishd/cache/cache_expire.c
@@@ -1,0 -1,490 +1,490 @@@
+ /*-
+ * Copyright (c) 2006 Verdens Gang AS
+ * Copyright (c) 2006-2011 Varnish Software AS
+ * All rights reserved.
+ *
+ * Author: Poul-Henning Kamp <phk at phk.freebsd.dk>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * LRU and object timer handling.
+ *
+ * We have two data structures, a LRU-list and a binary heap for the timers
+ * and two ways to kill objects: TTL-timeouts and LRU cleanups.
+ *
+ * Any object on the LRU is also on the binheap and vice versa.
+ *
+ * We hold a single object reference for both data structures.
+ *
+ * An attempted overview:
+ *
+ * EXP_Ttl() EXP_Grace() EXP_Keep()
+ * | | |
+ * entered v v |
+ * | +--------------->+ |
+ * v | grace |
+ * +---------------------->+ |
+ * ttl | v
+ * +---------------------------->+
+ * keep
+ *
+ */
+
+ #include "config.h"
+
+ #include <math.h>
+
+ #include "cache.h"
+
+ #include "binary_heap.h"
+ #include "hash/hash_slinger.h"
+ #include "vtim.h"
+
+ static pthread_t exp_thread;
+ static struct binheap *exp_heap;
+ static struct lock exp_mtx;
+
+ /*--------------------------------------------------------------------
+ * struct exp manipulations
+ *
+ * The Get/Set functions encapsulate the mutual magic between the
+ * fields in one single place.
+ */
+
+ void
+ EXP_Clr(struct exp *e)
+ {
+
+ e->ttl = -1;
+ e->grace = -1;
+ e->keep = -1;
+ e->age = 0;
+ e->entered = 0;
+ }
+
+ #define EXP_ACCESS(fld, low_val, extra) \
+ double \
+ EXP_Get_##fld(const struct exp *e) \
+ { \
+ return (e->fld > 0. ? e->fld : low_val); \
+ } \
+ \
+ void \
+ EXP_Set_##fld(struct exp *e, double v) \
+ { \
+ if (v > 0.) \
+ e->fld = v; \
+ else { \
+ e->fld = -1.; \
+ extra; \
+ } \
+ } \
+
+ EXP_ACCESS(ttl, -1., (e->grace = e->keep = -1.))
+ EXP_ACCESS(grace, 0., )
+ EXP_ACCESS(keep, 0.,)
+
+ /*--------------------------------------------------------------------
+ * Calculate an objects effective keep, grace or ttl time, suitably
+ * adjusted for defaults and by per-session limits.
+ */
+
-static double
++double
+ EXP_Keep(const struct sess *sp, const struct object *o)
+ {
+ double r;
+
+ r = (double)cache_param->default_keep;
+ if (o->exp.keep > 0.)
+ r = o->exp.keep;
+ if (sp != NULL && sp->exp.keep > 0. && sp->exp.keep < r)
+ r = sp->exp.keep;
+ return (EXP_Ttl(sp, o) + r);
+ }
+
+ double
+ EXP_Grace(const struct sess *sp, const struct object *o)
+ {
+ double r;
+
+ r = (double)cache_param->default_grace;
+ if (o->exp.grace >= 0.)
+ r = o->exp.grace;
+ if (sp != NULL && sp->exp.grace > 0. && sp->exp.grace < r)
+ r = sp->exp.grace;
+ return (EXP_Ttl(sp, o) + r);
+ }
+
+ double
+ EXP_Ttl(const struct sess *sp, const struct object *o)
+ {
+ double r;
+
+ r = o->exp.ttl;
+ if (sp != NULL && sp->exp.ttl > 0. && sp->exp.ttl < r)
+ r = sp->exp.ttl;
+ return (o->exp.entered + r);
+ }
+
+ /*--------------------------------------------------------------------
+ * When & why does the timer fire for this object ?
+ */
+
+ static int
+ update_object_when(const struct object *o)
+ {
+ struct objcore *oc;
+ double when, w2;
+
+ CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC);
+ oc = o->objcore;
+ CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ Lck_AssertHeld(&exp_mtx);
+
+ when = EXP_Keep(NULL, o);
+ w2 = EXP_Grace(NULL, o);
+ if (w2 > when)
+ when = w2;
+ assert(!isnan(when));
+ if (when == oc->timer_when)
+ return (0);
+ oc->timer_when = when;
+ return (1);
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ static void
+ exp_insert(struct objcore *oc, struct lru *lru)
+ {
+ CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ CHECK_OBJ_NOTNULL(lru, LRU_MAGIC);
+
+ Lck_AssertHeld(&lru->mtx);
+ Lck_AssertHeld(&exp_mtx);
+ assert(oc->timer_idx == BINHEAP_NOIDX);
+ binheap_insert(exp_heap, oc);
+ assert(oc->timer_idx != BINHEAP_NOIDX);
+ VTAILQ_INSERT_TAIL(&lru->lru_head, oc, lru_list);
+ }
+
+ /*--------------------------------------------------------------------
+ * Object has been added to cache, record in lru & binheap.
+ *
+ * The objcore comes with a reference, which we inherit.
+ */
+
+ void
+ EXP_Inject(struct objcore *oc, struct lru *lru, double when)
+ {
+
+ CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ CHECK_OBJ_NOTNULL(lru, LRU_MAGIC);
+
+ Lck_Lock(&lru->mtx);
+ Lck_Lock(&exp_mtx);
+ oc->timer_when = when;
+ exp_insert(oc, lru);
+ Lck_Unlock(&exp_mtx);
+ Lck_Unlock(&lru->mtx);
+ }
+
+ /*--------------------------------------------------------------------
+ * Object has been added to cache, record in lru & binheap.
+ *
+ * We grab a reference to the object, which will keep it around until
+ * we decide its time to let it go.
+ */
+
+ void
+ EXP_Insert(struct object *o)
+ {
+ struct objcore *oc;
+ struct lru *lru;
+
+ CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC);
+ oc = o->objcore;
+ CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ AssertObjBusy(o);
+ HSH_Ref(oc);
+
+ assert(o->exp.entered != 0 && !isnan(o->exp.entered));
+ o->last_lru = o->exp.entered;
+
+ lru = oc_getlru(oc);
+ CHECK_OBJ_NOTNULL(lru, LRU_MAGIC);
+ Lck_Lock(&lru->mtx);
+ Lck_Lock(&exp_mtx);
+ (void)update_object_when(o);
+ exp_insert(oc, lru);
+ Lck_Unlock(&exp_mtx);
+ Lck_Unlock(&lru->mtx);
+ oc_updatemeta(oc);
+ }
+
+ /*--------------------------------------------------------------------
+ * Object was used, move to tail of LRU list.
+ *
+ * To avoid the exp_mtx becoming a hotspot, we only attempt to move
+ * objects if they have not been moved recently and if the lock is available.
+ * This optimization obviously leaves the LRU list imperfectly sorted.
+ */
+
+ int
+ EXP_Touch(struct objcore *oc)
+ {
+ struct lru *lru;
+
+ CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+
+ /*
+ * For -spersistent we don't move objects on the lru list. Each
+ * segment has its own LRU list, and the order on it is not material
+ * for anything. The code below would move the objects to the
+ * LRU list of the currently open segment, which would prevent
+ * the cleaner from doing its job.
+ */
+ if (oc->flags & OC_F_LRUDONTMOVE)
+ return (0);
+
+ lru = oc_getlru(oc);
+ CHECK_OBJ_NOTNULL(lru, LRU_MAGIC);
+
+ /*
+ * We only need the LRU lock here. The locking order is LRU->EXP
+ * so we can trust the content of the oc->timer_idx without the
+ * EXP lock. Since each lru list has its own lock, this should
+ * reduce contention a fair bit
+ */
+ if (Lck_Trylock(&lru->mtx))
+ return (0);
+
+ if (oc->timer_idx != BINHEAP_NOIDX) {
+ VTAILQ_REMOVE(&lru->lru_head, oc, lru_list);
+ VTAILQ_INSERT_TAIL(&lru->lru_head, oc, lru_list);
+ VSC_C_main->n_lru_moved++;
+ }
+ Lck_Unlock(&lru->mtx);
+ return (1);
+ }
+
+ /*--------------------------------------------------------------------
+ * We have changed one or more of the object timers, shuffle it
+ * accordingly in the binheap
+ *
+ * The VCL code can send us here on a non-cached object, just return.
+ *
+ * XXX: special case check for ttl = 0 ?
+ */
+
+ void
+ EXP_Rearm(const struct object *o)
+ {
+ struct objcore *oc;
+ struct lru *lru;
+
+ CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC);
+ oc = o->objcore;
+ if (oc == NULL)
+ return;
+ CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ lru = oc_getlru(oc);
+ Lck_Lock(&lru->mtx);
+ Lck_Lock(&exp_mtx);
+ /*
+ * The hang-man might have this object of the binheap while
+ * tending to a timer. If so, we do not muck with it here.
+ */
+ if (oc->timer_idx != BINHEAP_NOIDX && update_object_when(o)) {
+ assert(oc->timer_idx != BINHEAP_NOIDX);
+ binheap_reorder(exp_heap, oc->timer_idx);
+ assert(oc->timer_idx != BINHEAP_NOIDX);
+ }
+ Lck_Unlock(&exp_mtx);
+ Lck_Unlock(&lru->mtx);
+ oc_updatemeta(oc);
+ }
+
+ /*--------------------------------------------------------------------
+ * This thread monitors the root of the binary heap and whenever an
+ * object expires, accounting also for graceability, it is killed.
+ */
+
+ static void * __match_proto__(void *start_routine(void *))
+ exp_timer(struct sess *sp, void *priv)
+ {
+ struct objcore *oc;
+ struct lru *lru;
+ double t;
+ struct object *o;
+
+ (void)priv;
+ t = VTIM_real();
+ oc = NULL;
+ while (1) {
+ if (oc == NULL) {
+ WSL_Flush(sp->wrk, 0);
+ WRK_SumStat(sp->wrk);
+ VTIM_sleep(cache_param->expiry_sleep);
+ t = VTIM_real();
+ }
+
+ Lck_Lock(&exp_mtx);
+ oc = binheap_root(exp_heap);
+ if (oc == NULL) {
+ Lck_Unlock(&exp_mtx);
+ continue;
+ }
+ CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+
+ /*
+ * We may have expired so many objects that our timestamp
+ * got out of date, refresh it and check again.
+ */
+ if (oc->timer_when > t)
+ t = VTIM_real();
+ if (oc->timer_when > t) {
+ Lck_Unlock(&exp_mtx);
+ oc = NULL;
+ continue;
+ }
+
+ /*
+ * It's time...
+ * Technically we should drop the exp_mtx, get the lru->mtx
+ * get the exp_mtx again and then check that the oc is still
+ * on the binheap. We take the shorter route and try to
+ * get the lru->mtx and punt if we fail.
+ */
+
+ lru = oc_getlru(oc);
+ CHECK_OBJ_NOTNULL(lru, LRU_MAGIC);
+ if (Lck_Trylock(&lru->mtx)) {
+ Lck_Unlock(&exp_mtx);
+ oc = NULL;
+ continue;
+ }
+
+ /* Remove from binheap */
+ assert(oc->timer_idx != BINHEAP_NOIDX);
+ binheap_delete(exp_heap, oc->timer_idx);
+ assert(oc->timer_idx == BINHEAP_NOIDX);
+
+ /* And from LRU */
+ lru = oc_getlru(oc);
+ VTAILQ_REMOVE(&lru->lru_head, oc, lru_list);
+
+ Lck_Unlock(&exp_mtx);
+ Lck_Unlock(&lru->mtx);
+
+ VSC_C_main->n_expired++;
+
+ CHECK_OBJ_NOTNULL(oc->objhead, OBJHEAD_MAGIC);
+ o = oc_getobj(sp->wrk, oc);
+ WSL(sp->wrk, SLT_ExpKill, 0, "%u %.0f",
+ o->xid, EXP_Ttl(NULL, o) - t);
+ (void)HSH_Deref(sp->wrk, oc, NULL);
+ }
+ NEEDLESS_RETURN(NULL);
+ }
+
+ /*--------------------------------------------------------------------
+ * Attempt to make space by nuking the oldest object on the LRU list
+ * which isn't in use.
+ * Returns: 1: did, 0: didn't, -1: can't
+ */
+
+ int
+ EXP_NukeOne(struct worker *w, struct lru *lru)
+ {
+ struct objcore *oc;
+ struct object *o;
+
+ /* Find the first currently unused object on the LRU. */
+ Lck_Lock(&lru->mtx);
+ Lck_Lock(&exp_mtx);
+ VTAILQ_FOREACH(oc, &lru->lru_head, lru_list) {
+ CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ assert (oc->timer_idx != BINHEAP_NOIDX);
+ /*
+ * It wont release any space if we cannot release the last
+ * reference, besides, if somebody else has a reference,
+ * it's a bad idea to nuke this object anyway.
+ */
+ if (oc->refcnt == 1)
+ break;
+ }
+ if (oc != NULL) {
+ VTAILQ_REMOVE(&lru->lru_head, oc, lru_list);
+ binheap_delete(exp_heap, oc->timer_idx);
+ assert(oc->timer_idx == BINHEAP_NOIDX);
+ VSC_C_main->n_lru_nuked++;
+ }
+ Lck_Unlock(&exp_mtx);
+ Lck_Unlock(&lru->mtx);
+
+ if (oc == NULL)
+ return (-1);
+
+ /* XXX: bad idea for -spersistent */
+ o = oc_getobj(w, oc);
+ WSL(w, SLT_ExpKill, 0, "%u LRU", o->xid);
+ (void)HSH_Deref(w, NULL, &o);
+ return (1);
+ }
+
+ /*--------------------------------------------------------------------
+ * BinHeap helper functions for objcore.
+ */
+
+ static int
+ object_cmp(void *priv, void *a, void *b)
+ {
+ struct objcore *aa, *bb;
+
+ (void)priv;
+ CAST_OBJ_NOTNULL(aa, a, OBJCORE_MAGIC);
+ CAST_OBJ_NOTNULL(bb, b, OBJCORE_MAGIC);
+ return (aa->timer_when < bb->timer_when);
+ }
+
+ static void
+ object_update(void *priv, void *p, unsigned u)
+ {
+ struct objcore *oc;
+
+ (void)priv;
+ CAST_OBJ_NOTNULL(oc, p, OBJCORE_MAGIC);
+ oc->timer_idx = u;
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ void
+ EXP_Init(void)
+ {
+
+ Lck_New(&exp_mtx, lck_exp);
+ exp_heap = binheap_new(NULL, object_cmp, object_update);
+ XXXAN(exp_heap);
+ WRK_BgThread(&exp_thread, "cache-timeout", exp_timer, NULL);
+ }
diff --cc bin/varnishd/cache/cache_fetch.c
index 0000000,a5c0323..cc46222
mode 000000,100644..100644
--- a/bin/varnishd/cache/cache_fetch.c
+++ b/bin/varnishd/cache/cache_fetch.c
@@@ -1,0 -1,645 +1,650 @@@
+ /*-
+ * Copyright (c) 2006 Verdens Gang AS
+ * Copyright (c) 2006-2011 Varnish Software AS
+ * All rights reserved.
+ *
+ * Author: Poul-Henning Kamp <phk at phk.freebsd.dk>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+ #include "config.h"
+
+ #include <inttypes.h>
+ #include <stdio.h>
+ #include <stdlib.h>
+
+ #include "cache.h"
+
+ #include "cache_backend.h"
+ #include "vcli_priv.h"
+ #include "vct.h"
+ #include "vtcp.h"
+
+ static unsigned fetchfrag;
+
+ /*--------------------------------------------------------------------
+ * We want to issue the first error we encounter on fetching and
+ * supress the rest. This function does that.
+ *
+ * Other code is allowed to look at w->fetch_failed to bail out
+ *
+ * For convenience, always return -1
+ */
+
+ int
+ FetchError2(struct worker *w, const char *error, const char *more)
+ {
+
+ CHECK_OBJ_NOTNULL(w, WORKER_MAGIC);
+ if (!w->fetch_failed) {
+ if (more == NULL)
+ WSLB(w, SLT_FetchError, "%s", error);
+ else
+ WSLB(w, SLT_FetchError, "%s: %s", error, more);
+ }
+ w->fetch_failed = 1;
+ return (-1);
+ }
+
+ int
+ FetchError(struct worker *w, const char *error)
+ {
+ return(FetchError2(w, error, NULL));
+ }
+
+ /*--------------------------------------------------------------------
+ * VFP_NOP
+ *
+ * This fetch-processor does nothing but store the object.
+ * It also documents the API
+ */
+
+ /*--------------------------------------------------------------------
+ * VFP_BEGIN
+ *
+ * Called to set up stuff.
+ *
+ * 'estimate' is the estimate of the number of bytes we expect to receive,
+ * as seen on the socket, or zero if unknown.
+ */
+ static void __match_proto__()
+ vfp_nop_begin(struct worker *w, size_t estimate)
+ {
+
+ if (estimate > 0)
+ (void)FetchStorage(w, estimate);
+ }
+
+ /*--------------------------------------------------------------------
+ * VFP_BYTES
+ *
+ * Process (up to) 'bytes' from the socket.
+ *
+ * Return -1 on error, issue FetchError()
+ * will not be called again, once error happens.
+ * Return 0 on EOF on socket even if bytes not reached.
+ * Return 1 when 'bytes' have been processed.
+ */
+
+ static int __match_proto__()
+ vfp_nop_bytes(struct worker *w, struct http_conn *htc, ssize_t bytes)
+ {
+ ssize_t l, wl;
+ struct storage *st;
+
+ AZ(w->fetch_failed);
+ while (bytes > 0) {
+ st = FetchStorage(w, 0);
+ if (st == NULL)
+ return(-1);
+ l = st->space - st->len;
+ if (l > bytes)
+ l = bytes;
+ wl = HTC_Read(w, htc, st->ptr + st->len, l);
+ if (wl <= 0)
+ return (wl);
+ st->len += wl;
+ w->fetch_obj->len += wl;
+ bytes -= wl;
+ if (w->do_stream)
+ RES_StreamPoll(w);
+ }
+ return (1);
+ }
+
+ /*--------------------------------------------------------------------
+ * VFP_END
+ *
+ * Finish & cleanup
+ *
+ * Return -1 for error
+ * Return 0 for OK
+ */
+
+ static int __match_proto__()
+ vfp_nop_end(struct worker *w)
+ {
+ struct storage *st;
+
+ st = VTAILQ_LAST(&w->fetch_obj->store, storagehead);
+ if (st == NULL)
+ return (0);
+
+ if (st->len == 0) {
+ VTAILQ_REMOVE(&w->fetch_obj->store, st, list);
+ STV_free(st);
+ return (0);
+ }
+ if (st->len < st->space)
+ STV_trim(st, st->len);
+ return (0);
+ }
+
+ static struct vfp vfp_nop = {
+ .begin = vfp_nop_begin,
+ .bytes = vfp_nop_bytes,
+ .end = vfp_nop_end,
+ };
+
+ /*--------------------------------------------------------------------
+ * Fetch Storage to put object into.
+ *
+ */
+
+ struct storage *
+ FetchStorage(struct worker *w, ssize_t sz)
+ {
+ ssize_t l;
+ struct storage *st;
+ struct object *obj;
+
+ obj = w->fetch_obj;
+ CHECK_OBJ_NOTNULL(obj, OBJECT_MAGIC);
+ st = VTAILQ_LAST(&obj->store, storagehead);
+ if (st != NULL && st->len < st->space)
+ return (st);
+
+ l = fetchfrag;
+ if (l == 0)
+ l = sz;
+ if (l == 0)
+ l = cache_param->fetch_chunksize;
+ st = STV_alloc(w, l);
+ if (st == NULL) {
+ (void)FetchError(w, "Could not get storage");
+ return (NULL);
+ }
+ AZ(st->len);
+ VTAILQ_INSERT_TAIL(&obj->store, st, list);
+ return (st);
+ }
+
+ /*--------------------------------------------------------------------
+ * Convert a string to a size_t safely
+ */
+
+ static ssize_t
+ fetch_number(const char *nbr, int radix)
+ {
+ uintmax_t cll;
+ ssize_t cl;
+ char *q;
+
+ if (*nbr == '\0')
+ return (-1);
+ cll = strtoumax(nbr, &q, radix);
+ if (q == NULL || *q != '\0')
+ return (-1);
+
+ cl = (ssize_t)cll;
+ if((uintmax_t)cl != cll) /* Protect against bogusly large values */
+ return (-1);
+ return (cl);
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ static int
+ fetch_straight(struct worker *w, struct http_conn *htc, ssize_t cl)
+ {
+ int i;
+
+ assert(w->body_status == BS_LENGTH);
+
+ if (cl < 0) {
+ return (FetchError(w, "straight length field bogus"));
+ } else if (cl == 0)
+ return (0);
+
+ i = w->vfp->bytes(w, htc, cl);
+ if (i <= 0)
+ return (FetchError(w, "straight insufficient bytes"));
+ return (0);
+ }
+
+ /*--------------------------------------------------------------------
+ * Read a chunked HTTP object.
+ *
+ * XXX: Reading one byte at a time is pretty pessimal.
+ */
+
+ static int
+ fetch_chunked(struct worker *w, struct http_conn *htc)
+ {
+ int i;
+ char buf[20]; /* XXX: 20 is arbitrary */
+ unsigned u;
+ ssize_t cl;
+
+ assert(w->body_status == BS_CHUNKED);
+ do {
+ /* Skip leading whitespace */
+ do {
+ if (HTC_Read(w, htc, buf, 1) <= 0)
+ return (-1);
+ } while (vct_islws(buf[0]));
+
+ if (!vct_ishex(buf[0]))
+ return (FetchError(w,"chunked header non-hex"));
+
+ /* Collect hex digits, skipping leading zeros */
+ for (u = 1; u < sizeof buf; u++) {
+ do {
+ if (HTC_Read(w, htc, buf + u, 1) <= 0)
+ return (-1);
+ } while (u == 1 && buf[0] == '0' && buf[u] == '0');
+ if (!vct_ishex(buf[u]))
+ break;
+ }
+
+ if (u >= sizeof buf)
+ return (FetchError(w,"chunked header too long"));
+
+ /* Skip trailing white space */
+ while(vct_islws(buf[u]) && buf[u] != '\n')
+ if (HTC_Read(w, htc, buf + u, 1) <= 0)
+ return (-1);
+
+ if (buf[u] != '\n')
+ return (FetchError(w,"chunked header no NL"));
+
+ buf[u] = '\0';
+ cl = fetch_number(buf, 16);
+ if (cl < 0)
+ return (FetchError(w,"chunked header number syntax"));
+
+ if (cl > 0 && w->vfp->bytes(w, htc, cl) <= 0)
+ return (-1);
+
+ i = HTC_Read(w, htc, buf, 1);
+ if (i <= 0)
+ return (-1);
+ if (buf[0] == '\r' && HTC_Read(w, htc, buf, 1) <= 0)
+ return (-1);
+ if (buf[0] != '\n')
+ return (FetchError(w,"chunked tail no NL"));
+ } while (cl > 0);
+ return (0);
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ static int
+ fetch_eof(struct worker *w, struct http_conn *htc)
+ {
+ int i;
+
+ assert(w->body_status == BS_EOF);
+ i = w->vfp->bytes(w, htc, SSIZE_MAX);
+ if (i < 0)
+ return (-1);
+ return (0);
+ }
+
+ /*--------------------------------------------------------------------
+ * Fetch any body attached to the incoming request, and either write it
+ * to the backend (if we pass) or discard it (anything else).
+ * This is mainly a separate function to isolate the stack buffer and
+ * to contain the complexity when we start handling chunked encoding.
+ */
+
+ int
+ FetchReqBody(struct sess *sp)
+ {
+ unsigned long content_length;
+ char buf[8192];
+ char *ptr, *endp;
+ int rdcnt;
+
+ if (http_GetHdr(sp->http, H_Content_Length, &ptr)) {
+
+ content_length = strtoul(ptr, &endp, 10);
+ /* XXX should check result of conversion */
+ while (content_length) {
+ if (content_length > sizeof buf)
+ rdcnt = sizeof buf;
+ else
+ rdcnt = content_length;
+ rdcnt = HTC_Read(sp->wrk, sp->htc, buf, rdcnt);
+ if (rdcnt <= 0)
+ return (1);
+ content_length -= rdcnt;
+ if (!sp->sendbody)
+ continue;
+ (void)WRW_Write(sp->wrk, buf, rdcnt); /* XXX: stats ? */
+ if (WRW_Flush(sp->wrk))
+ return (2);
+ }
+ }
+ if (http_GetHdr(sp->http, H_Transfer_Encoding, NULL)) {
+ /* XXX: Handle chunked encoding. */
+ WSP(sp, SLT_Debug, "Transfer-Encoding in request");
+ return (1);
+ }
+ return (0);
+ }
+
+ /*--------------------------------------------------------------------
+ * Send request, and receive the HTTP protocol response, but not the
+ * response body.
+ *
+ * Return value:
+ * -1 failure, not retryable
+ * 0 success
+ * 1 failure which can be retried.
+ */
+
+ int
+ FetchHdr(struct sess *sp)
+ {
+ struct vbc *vc;
+ struct worker *w;
+ char *b;
+ struct http *hp;
+ int retry = -1;
+ int i;
+
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ CHECK_OBJ_NOTNULL(sp->wrk, WORKER_MAGIC);
+ w = sp->wrk;
+
+ AN(sp->director);
+ AZ(sp->obj);
+
+ if (sp->objcore != NULL) { /* pass has no objcore */
+ CHECK_OBJ_NOTNULL(sp->objcore, OBJCORE_MAGIC);
+ AN(sp->objcore->flags & OC_F_BUSY);
+ }
+
+ hp = w->bereq;
+
+ sp->wrk->vbc = VDI_GetFd(NULL, sp);
+ if (sp->wrk->vbc == NULL) {
+ WSP(sp, SLT_FetchError, "no backend connection");
+ return (-1);
+ }
+ vc = sp->wrk->vbc;
+ if (vc->recycled)
+ retry = 1;
+
+ /*
+ * Now that we know our backend, we can set a default Host:
+ * header if one is necessary. This cannot be done in the VCL
+ * because the backend may be chosen by a director.
+ */
+ if (!http_GetHdr(hp, H_Host, &b))
+ VDI_AddHostHeader(sp);
+
+ (void)VTCP_blocking(vc->fd); /* XXX: we should timeout instead */
+ WRW_Reserve(w, &vc->fd);
+ (void)http_Write(w, vc->vsl_id, hp, 0); /* XXX: stats ? */
+
+ /* Deal with any message-body the request might have */
+ i = FetchReqBody(sp);
+ if (WRW_FlushRelease(w) || i > 0) {
+ WSP(sp, SLT_FetchError, "backend write error: %d (%s)",
+ errno, strerror(errno));
+ VDI_CloseFd(sp->wrk);
+ /* XXX: other cleanup ? */
+ return (retry);
+ }
+
+ /* Checkpoint the vsl.here */
+ WSL_Flush(w, 0);
+
+ /* XXX is this the right place? */
+ VSC_C_main->backend_req++;
+
+ /* Receive response */
+
+ HTC_Init(w->htc, w->ws, vc->fd, vc->vsl_id, cache_param->http_resp_size,
+ cache_param->http_resp_hdr_len);
+
+ VTCP_set_read_timeout(vc->fd, vc->first_byte_timeout);
+
+ i = HTC_Rx(w->htc);
+
+ if (i < 0) {
+ WSP(sp, SLT_FetchError, "http first read error: %d %d (%s)",
+ i, errno, strerror(errno));
+ VDI_CloseFd(sp->wrk);
+ /* XXX: other cleanup ? */
+ /* Retryable if we never received anything */
+ return (i == -1 ? retry : -1);
+ }
+
+ VTCP_set_read_timeout(vc->fd, vc->between_bytes_timeout);
+
+ while (i == 0) {
+ i = HTC_Rx(w->htc);
+ if (i < 0) {
+ WSP(sp, SLT_FetchError,
+ "http first read error: %d %d (%s)",
+ i, errno, strerror(errno));
+ VDI_CloseFd(sp->wrk);
+ /* XXX: other cleanup ? */
+ return (-1);
+ }
+ }
+
+ hp = w->beresp;
+
+ if (http_DissectResponse(w, w->htc, hp)) {
+ WSP(sp, SLT_FetchError, "http format error");
+ VDI_CloseFd(sp->wrk);
+ /* XXX: other cleanup ? */
+ return (-1);
+ }
+ return (0);
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ int
+ FetchBody(struct worker *w, struct object *obj)
+ {
+ int cls;
+ struct storage *st;
+ int mklen;
+ ssize_t cl;
+
+ CHECK_OBJ_NOTNULL(w, WORKER_MAGIC);
+ AZ(w->fetch_obj);
+ CHECK_OBJ_NOTNULL(w->vbc, VBC_MAGIC);
+ CHECK_OBJ_NOTNULL(obj, OBJECT_MAGIC);
+ CHECK_OBJ_NOTNULL(obj->http, HTTP_MAGIC);
+
+ if (w->vfp == NULL)
+ w->vfp = &vfp_nop;
+
+ AssertObjCorePassOrBusy(obj->objcore);
+
+ AZ(w->vgz_rx);
- AZ(VTAILQ_FIRST(&obj->store));
++
++ /* If we've freshened from another object and got a "Not Modified"
++ * response, then we have already duped the other object's body.
++ */
++ if (w->beresp->status != 304)
++ AZ(VTAILQ_FIRST(&obj->store));
+
+ w->fetch_obj = obj;
+ w->fetch_failed = 0;
+
+ /* XXX: pick up estimate from objdr ? */
+ cl = 0;
+ switch (w->body_status) {
+ case BS_NONE:
+ cls = 0;
+ mklen = 0;
+ break;
+ case BS_ZERO:
+ cls = 0;
+ mklen = 1;
+ break;
+ case BS_LENGTH:
+ cl = fetch_number( w->h_content_length, 10);
+ w->vfp->begin(w, cl > 0 ? cl : 0);
+ cls = fetch_straight(w, w->htc, cl);
+ mklen = 1;
+ if (w->vfp->end(w))
+ cls = -1;
+ break;
+ case BS_CHUNKED:
+ w->vfp->begin(w, cl);
+ cls = fetch_chunked(w, w->htc);
+ mklen = 1;
+ if (w->vfp->end(w))
+ cls = -1;
+ break;
+ case BS_EOF:
+ w->vfp->begin(w, cl);
+ cls = fetch_eof(w, w->htc);
+ mklen = 1;
+ if (w->vfp->end(w))
+ cls = -1;
+ break;
+ case BS_ERROR:
+ cls = 1;
+ mklen = 0;
+ break;
+ default:
+ cls = 0;
+ mklen = 0;
+ INCOMPL();
+ }
+ AZ(w->vgz_rx);
+
+ /*
+ * It is OK for ->end to just leave the last storage segment
+ * sitting on w->storage, we will always call vfp_nop_end()
+ * to get it trimmed or thrown out if empty.
+ */
+ AZ(vfp_nop_end(w));
+
+ w->fetch_obj = NULL;
+
+ WSLB(w, SLT_Fetch_Body, "%u(%s) cls %d mklen %u",
+ w->body_status, body_status(w->body_status),
+ cls, mklen);
+
+ if (w->body_status == BS_ERROR) {
+ VDI_CloseFd(w);
+ return (__LINE__);
+ }
+
+ if (cls < 0) {
+ w->stats.fetch_failed++;
+ /* XXX: Wouldn't this store automatically be released ? */
+ while (!VTAILQ_EMPTY(&obj->store)) {
+ st = VTAILQ_FIRST(&obj->store);
+ VTAILQ_REMOVE(&obj->store, st, list);
+ STV_free(st);
+ }
+ VDI_CloseFd(w);
+ obj->len = 0;
+ return (__LINE__);
+ }
+ AZ(w->fetch_failed);
+
+ if (cls == 0 && w->do_close)
+ cls = 1;
+
+ WSLB(w, SLT_Length, "%u", obj->len);
+
+ {
+ /* Sanity check fetch methods accounting */
+ ssize_t uu;
+
+ uu = 0;
+ VTAILQ_FOREACH(st, &obj->store, list)
+ uu += st->len;
+ if (w->do_stream)
+ /* Streaming might have started freeing stuff */
+ assert (uu <= obj->len);
+
+ else
+ assert(uu == obj->len);
+ }
+
+ if (mklen > 0) {
+ http_Unset(obj->http, H_Content_Length);
+ http_PrintfHeader(w, w->vbc->vsl_id, obj->http,
+ "Content-Length: %jd", (intmax_t)obj->len);
+ }
+
+ if (cls)
+ VDI_CloseFd(w);
+ else
+ VDI_RecycleFd(w);
+
+ return (0);
+ }
+
+ /*--------------------------------------------------------------------
+ * Debugging aids
+ */
+
+ static void
+ debug_fragfetch(struct cli *cli, const char * const *av, void *priv)
+ {
+ (void)priv;
+ (void)cli;
+ fetchfrag = strtoul(av[2], NULL, 0);
+ }
+
+ static struct cli_proto debug_cmds[] = {
+ { "debug.fragfetch", "debug.fragfetch",
+ "\tEnable fetch fragmentation\n", 1, 1, "d", debug_fragfetch },
+ { NULL }
+ };
+
+ /*--------------------------------------------------------------------
+ *
+ */
+
+ void
+ Fetch_Init(void)
+ {
+
+ CLI_AddFuncs(debug_cmds);
+ }
diff --cc bin/varnishd/cache/cache_hash.c
index 0000000,db865de..5251f6d
mode 000000,100644..100644
--- a/bin/varnishd/cache/cache_hash.c
+++ b/bin/varnishd/cache/cache_hash.c
@@@ -1,0 -1,752 +1,789 @@@
+ /*-
+ * Copyright (c) 2006 Verdens Gang AS
+ * Copyright (c) 2006-2011 Varnish Software AS
+ * All rights reserved.
+ *
+ * Author: Poul-Henning Kamp <phk at phk.freebsd.dk>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * This is the central hash-table code, it relies on a chosen hash
+ * implementation only for the actual hashing, all the housekeeping
+ * happens here.
+ *
+ * We have two kinds of structures, objecthead and object. An objecthead
+ * corresponds to a given (Host:, URL) tupple, and the objects hung from
+ * the objecthead may represent various variations (ie: Vary: header,
+ * different TTL etc) instances of that web-entity.
+ *
+ * Each objecthead has a mutex which locks both its own fields, the
+ * list of objects and fields in the objects.
+ *
+ * The hash implementation must supply a reference count facility on
+ * the objecthead, and return with a reference held after a lookup.
+ *
+ * Lookups in the hash implementation returns with a ref held and each
+ * object hung from the objhead holds a ref as well.
+ *
+ * Objects have refcounts which are locked by the objecthead mutex.
+ *
+ * New objects are always marked busy, and they can go from busy to
+ * not busy only once.
+ */
+
+ #include "config.h"
+
+ #include <math.h>
+ #include <stdio.h>
+ #include <stdlib.h>
+
+ #include "cache.h"
+
+ #include "hash/hash_slinger.h"
+ #include "vsha256.h"
+
+ static const struct hash_slinger *hash;
+
+ /*---------------------------------------------------------------------*/
+ /* Precreate an objhead and object for later use */
+ void
+ HSH_Prealloc(const struct sess *sp)
+ {
+ struct worker *w;
+ struct objhead *oh;
+ struct objcore *oc;
+ struct waitinglist *wl;
+
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ CHECK_OBJ_NOTNULL(sp->wrk, WORKER_MAGIC);
+ w = sp->wrk;
+
+ if (w->nobjcore == NULL) {
+ ALLOC_OBJ(oc, OBJCORE_MAGIC);
+ XXXAN(oc);
+ w->nobjcore = oc;
+ w->stats.n_objectcore++;
+ oc->flags |= OC_F_BUSY;
+ }
+ CHECK_OBJ_NOTNULL(w->nobjcore, OBJCORE_MAGIC);
+
+ if (w->nobjhead == NULL) {
+ ALLOC_OBJ(oh, OBJHEAD_MAGIC);
+ XXXAN(oh);
+ oh->refcnt = 1;
+ VTAILQ_INIT(&oh->objcs);
+ Lck_New(&oh->mtx, lck_objhdr);
+ w->nobjhead = oh;
+ w->stats.n_objecthead++;
+ }
+ CHECK_OBJ_NOTNULL(w->nobjhead, OBJHEAD_MAGIC);
+
+ if (w->nwaitinglist == NULL) {
+ ALLOC_OBJ(wl, WAITINGLIST_MAGIC);
+ XXXAN(wl);
+ VTAILQ_INIT(&wl->list);
+ w->nwaitinglist = wl;
+ w->stats.n_waitinglist++;
+ }
+ CHECK_OBJ_NOTNULL(w->nwaitinglist, WAITINGLIST_MAGIC);
+
+ if (w->nbusyobj == NULL) {
+ ALLOC_OBJ(w->nbusyobj, BUSYOBJ_MAGIC);
+ XXXAN(w->nbusyobj);
+ }
+
+ if (hash->prep != NULL)
+ hash->prep(sp);
+ }
+
+ void
+ HSH_Cleanup(struct worker *w)
+ {
+
+ if (w->nobjcore != NULL) {
+ FREE_OBJ(w->nobjcore);
+ w->stats.n_objectcore--;
+ w->nobjcore = NULL;
+ }
+ if (w->nobjhead != NULL) {
+ Lck_Delete(&w->nobjhead->mtx);
+ FREE_OBJ(w->nobjhead);
+ w->nobjhead = NULL;
+ w->stats.n_objecthead--;
+ }
+ if (w->nwaitinglist != NULL) {
+ FREE_OBJ(w->nwaitinglist);
+ w->nwaitinglist = NULL;
+ }
+ if (w->nhashpriv != NULL) {
+ /* XXX: If needed, add slinger method for this */
+ free(w->nhashpriv);
+ w->nhashpriv = NULL;
+ }
+ if (w->nbusyobj != NULL) {
+ FREE_OBJ(w->nbusyobj);
+ w->nbusyobj = NULL;
+ }
+ }
+
+ void
+ HSH_DeleteObjHead(struct worker *w, struct objhead *oh)
+ {
+
+ AZ(oh->refcnt);
+ assert(VTAILQ_EMPTY(&oh->objcs));
+ Lck_Delete(&oh->mtx);
+ w->stats.n_objecthead--;
+ FREE_OBJ(oh);
+ }
+
+ void
+ HSH_AddString(const struct sess *sp, const char *str)
+ {
+ int l;
+
+ if (str == NULL)
+ str = "";
+ l = strlen(str);
+
+ SHA256_Update(sp->wrk->sha256ctx, str, l);
+ SHA256_Update(sp->wrk->sha256ctx, "#", 1);
+
+ if (cache_param->log_hash)
+ WSP(sp, SLT_Hash, "%s", str);
+ }
+
+ /*---------------------------------------------------------------------
+ * This is a debugging hack to enable testing of boundary conditions
+ * in the hash algorithm.
+ * We trap the first 9 different digests and translate them to different
+ * digests with edge bit conditions
+ */
+
+ static struct hsh_magiclist {
+ unsigned char was[SHA256_LEN];
+ unsigned char now[SHA256_LEN];
+ } hsh_magiclist[] = {
+ { .now = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+ { .now = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 } },
+ { .now = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 } },
+ { .now = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40 } },
+ { .now = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 } },
+ { .now = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+ { .now = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+ { .now = { 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+ { .now = { 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
+ };
+
+ #define HSH_NMAGIC (sizeof hsh_magiclist / sizeof hsh_magiclist[0])
+
+ static void
+ hsh_testmagic(void *result)
+ {
+ int i, j;
+ static int nused = 0;
+
+ for (i = 0; i < nused; i++)
+ if (!memcmp(hsh_magiclist[i].was, result, SHA256_LEN))
+ break;
+ if (i == nused && i < HSH_NMAGIC)
+ memcpy(hsh_magiclist[nused++].was, result, SHA256_LEN);
+ if (i == nused)
+ return;
+ assert(i < HSH_NMAGIC);
+ fprintf(stderr, "HASHMAGIC: <");
+ for (j = 0; j < SHA256_LEN; j++)
+ fprintf(stderr, "%02x", ((unsigned char*)result)[j]);
+ fprintf(stderr, "> -> <");
+ memcpy(result, hsh_magiclist[i].now, SHA256_LEN);
+ for (j = 0; j < SHA256_LEN; j++)
+ fprintf(stderr, "%02x", ((unsigned char*)result)[j]);
+ fprintf(stderr, ">\n");
+ }
+
+ /*---------------------------------------------------------------------
+ * Insert an object which magically appears out of nowhere or, more likely,
+ * comes off some persistent storage device.
+ * Return it with a reference held.
+ */
+
+ struct objcore *
+ HSH_Insert(const struct sess *sp)
+ {
+ struct worker *w;
+ struct objhead *oh;
+ struct objcore *oc;
+
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ CHECK_OBJ_NOTNULL(sp->wrk, WORKER_MAGIC);
+ AN(hash);
+ w = sp->wrk;
+
+ HSH_Prealloc(sp);
+ if (cache_param->diag_bitmap & 0x80000000)
+ hsh_testmagic(sp->wrk->nobjhead->digest);
+
+ AZ(sp->hash_objhead);
+ AN(w->nobjhead);
+ oh = hash->lookup(sp, w->nobjhead);
+ CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
+ if (oh == w->nobjhead)
+ w->nobjhead = NULL;
+ Lck_Lock(&oh->mtx);
+ assert(oh->refcnt > 0);
+
+ /* Insert (precreated) objcore in objecthead */
+ oc = w->nobjcore;
+ w->nobjcore = NULL;
+ oc->refcnt = 1;
+ CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ AZ(oc->flags & OC_F_BUSY);
+
+ VTAILQ_INSERT_HEAD(&oh->objcs, oc, list);
+ /* NB: do not deref objhead the new object inherits our reference */
+ oc->objhead = oh;
+ Lck_Unlock(&oh->mtx);
+ sp->wrk->stats.n_vampireobject++;
+ return (oc);
+ }
+
+ /*---------------------------------------------------------------------
+ */
+
+ struct objcore *
+ HSH_Lookup(struct sess *sp, struct objhead **poh)
+ {
+ struct worker *w;
+ struct objhead *oh;
+ struct objcore *oc;
+ struct objcore *busy_oc, *grace_oc;
+ struct object *o;
- double grace_ttl;
++ struct object *stale_o; /* for freshness check */
++ double grace_ttl, stale_ttl;
++ char *p;
+
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ CHECK_OBJ_NOTNULL(sp->wrk, WORKER_MAGIC);
+ CHECK_OBJ_NOTNULL(sp->http, HTTP_MAGIC);
+ AN(sp->director);
+ AN(hash);
++ AZ(sp->stale_obj);
+ w = sp->wrk;
+
+ HSH_Prealloc(sp);
+ memcpy(sp->wrk->nobjhead->digest, sp->digest, sizeof sp->digest);
+ if (cache_param->diag_bitmap & 0x80000000)
+ hsh_testmagic(sp->wrk->nobjhead->digest);
+
+ if (sp->hash_objhead != NULL) {
+ /*
+ * This sess came off the waiting list, and brings a
+ * oh refcnt with it.
+ */
+ CHECK_OBJ_NOTNULL(sp->hash_objhead, OBJHEAD_MAGIC);
+ oh = sp->hash_objhead;
+ sp->hash_objhead = NULL;
+ } else {
+ AN(w->nobjhead);
+ oh = hash->lookup(sp, w->nobjhead);
+ if (oh == w->nobjhead)
+ w->nobjhead = NULL;
+ }
+
+ CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
+ Lck_Lock(&oh->mtx);
+ assert(oh->refcnt > 0);
+ busy_oc = NULL;
+ grace_oc = NULL;
++ stale_o = NULL; /* for freshness check */
+ grace_ttl = NAN;
++ stale_ttl = NAN;
+ VTAILQ_FOREACH(oc, &oh->objcs, list) {
+ /* Must be at least our own ref + the objcore we examine */
+ assert(oh->refcnt > 1);
+ CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ assert(oc->objhead == oh);
+
+ if (oc->flags & OC_F_BUSY) {
+ CHECK_OBJ_NOTNULL(oc->busyobj, BUSYOBJ_MAGIC);
+ if (sp->hash_ignore_busy)
+ continue;
+
+ if (oc->busyobj->vary != NULL &&
+ !VRY_Match(sp, oc->busyobj->vary))
+ continue;
+
+ busy_oc = oc;
+ continue;
+ }
+
+ o = oc_getobj(sp->wrk, oc);
+ CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC);
+
- if (o->exp.ttl <= 0.)
++ if (o->exp.ttl <= 0. && o->exp.grace <= 0.
++ && o->exp.keep <= 0.)
+ continue;
+ if (BAN_CheckObject(o, sp))
+ continue;
+ if (o->vary != NULL && !VRY_Match(sp, o->vary))
+ continue;
+
+ /* If still valid, use it */
+ if (EXP_Ttl(sp, o) >= sp->t_req)
+ break;
+
+ /*
+ * Remember any matching objects inside their grace period
+ * and if there are several, use the least expired one.
+ */
+ if (EXP_Grace(sp, o) >= sp->t_req) {
+ if (grace_oc == NULL ||
+ grace_ttl < o->exp.entered + o->exp.ttl) {
+ grace_oc = oc;
+ grace_ttl = o->exp.entered + o->exp.ttl;
+ }
+ }
++
++ /* At this point we know:
++ * - o's TTL has elapsed
++ * - o is not busy or banned,
++ * - o is not a Vary match.
++ * The object may be used for a conditional backend request if
++ * - the keep time has not elapsed, and
++ * - it has a Last-Modified and/or an ETag header.
++ * If there are several, use the least expired one.
++ */
++ if (EXP_Keep(sp, o) >= sp->t_req
++ && (http_GetHdr(o->http, H_Last_Modified, &p)
++ || http_GetHdr(o->http, H_ETag, &p)))
++ if (stale_o == NULL ||
++ stale_ttl < o->exp.entered + o->exp.ttl) {
++ stale_o = o;
++ stale_ttl = o->exp.entered + o->exp.ttl;
++ }
++
+ }
+
+ /*
+ * If we have seen a busy object or the backend is unhealthy, and
+ * we have an object in grace, use it, if req.grace is also
+ * satisified.
+ * XXX: Interesting footnote: The busy object might be for a
+ * XXX: different "Vary:" than we sought. We have no way of knowing
+ * XXX: this until the object is unbusy'ed, so in practice we
+ * XXX: serialize fetch of all Vary's if grace is possible.
+ */
+
+ AZ(sp->objcore);
+ sp->objcore = grace_oc; /* XXX: Hack-ish */
+ if (oc == NULL /* We found no live object */
+ && grace_oc != NULL /* There is a grace candidate */
+ && (busy_oc != NULL /* Somebody else is already busy */
+ || !VDI_Healthy(sp->director, sp))) {
+ /* Or it is impossible to fetch */
+ o = oc_getobj(sp->wrk, grace_oc);
+ CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC);
+ oc = grace_oc;
+ }
+ sp->objcore = NULL;
+
+ if (oc != NULL && !sp->hash_always_miss) {
+ o = oc_getobj(sp->wrk, oc);
+ CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC);
+ assert(oc->objhead == oh);
+
+ /* We found an object we like */
+ oc->refcnt++;
+ if (o->hits < INT_MAX)
+ o->hits++;
+ assert(oh->refcnt > 1);
+ Lck_Unlock(&oh->mtx);
+ assert(hash->deref(oh));
+ *poh = oh;
+ return (oc);
+ }
+
+ if (busy_oc != NULL) {
+ /* There are one or more busy objects, wait for them */
+ if (sp->esi_level == 0) {
+ CHECK_OBJ_NOTNULL(sp->wrk->nwaitinglist,
+ WAITINGLIST_MAGIC);
+ if (oh->waitinglist == NULL) {
+ oh->waitinglist = sp->wrk->nwaitinglist;
+ sp->wrk->nwaitinglist = NULL;
+ }
+ VTAILQ_INSERT_TAIL(&oh->waitinglist->list, sp, list);
+ }
+ if (cache_param->diag_bitmap & 0x20)
+ WSP(sp, SLT_Debug,
+ "on waiting list <%p>", oh);
+ SES_Charge(sp);
+ /*
+ * The objhead reference transfers to the sess, we get it
+ * back when the sess comes off the waiting list and
+ * calls us again
+ */
+ sp->hash_objhead = oh;
+ sp->wrk = NULL;
+ Lck_Unlock(&oh->mtx);
+ return (NULL);
+ }
+
++ /* If we're not serving a valid or graced object and we saved stale_o,
++ * it is a candidate for the conditional backend request. */
++ AZ(oc && !sp->hash_always_miss);
++ AZ(busy_oc);
++ if (stale_o != NULL) {
++ AZ(stale_o->objcore->flags & OC_F_BUSY);
++ CHECK_OBJ_NOTNULL(stale_o->objcore, OBJCORE_MAGIC);
++ Lck_AssertHeld(&oh->mtx);
++ stale_o->objcore->refcnt++;
++ sp->stale_obj = stale_o;
++ }
++
+ /* Insert (precreated) objcore in objecthead */
+ oc = w->nobjcore;
+ w->nobjcore = NULL;
+ AN(oc->flags & OC_F_BUSY);
+ oc->refcnt = 1;
+
+ /* XXX: clear w->nbusyobj before use */
+ VRY_Validate(sp->vary_b);
+ if (sp->vary_l != NULL)
+ w->nbusyobj->vary = sp->vary_b;
+ else
+ w->nbusyobj->vary = NULL;
+ oc->busyobj = w->nbusyobj;
+ w->nbusyobj = NULL;
+
+ /*
+ * Busy objects go on the tail, so they will not trip up searches.
+ * HSH_Unbusy() will move them to the front.
+ */
+ VTAILQ_INSERT_TAIL(&oh->objcs, oc, list);
+ oc->objhead = oh;
+ /* NB: do not deref objhead the new object inherits our reference */
+ Lck_Unlock(&oh->mtx);
+ *poh = oh;
+ return (oc);
+ }
+
+ /*---------------------------------------------------------------------
+ */
+
+ static void
+ hsh_rush(struct objhead *oh)
+ {
+ unsigned u;
+ struct sess *sp;
+ struct waitinglist *wl;
+
+ CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
+ Lck_AssertHeld(&oh->mtx);
+ wl = oh->waitinglist;
+ CHECK_OBJ_NOTNULL(wl, WAITINGLIST_MAGIC);
+ for (u = 0; u < cache_param->rush_exponent; u++) {
+ sp = VTAILQ_FIRST(&wl->list);
+ if (sp == NULL)
+ break;
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ AZ(sp->wrk);
+ VTAILQ_REMOVE(&wl->list, sp, list);
+ DSL(0x20, SLT_Debug, sp->vsl_id, "off waiting list");
+ if (SES_Schedule(sp)) {
+ /*
+ * We could not schedule the session, leave the
+ * rest on the busy list.
+ */
+ break;
+ }
+ }
+ if (VTAILQ_EMPTY(&wl->list)) {
+ oh->waitinglist = NULL;
+ FREE_OBJ(wl);
+ }
+ }
+
+ /*---------------------------------------------------------------------
+ * Purge an entire objhead
+ */
+
+ void
+ HSH_Purge(const struct sess *sp, struct objhead *oh, double ttl, double grace)
+ {
+ struct objcore *oc, **ocp;
+ unsigned spc, nobj, n;
+ struct object *o;
+
+ CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
+ spc = WS_Reserve(sp->wrk->ws, 0);
+ ocp = (void*)sp->wrk->ws->f;
+ Lck_Lock(&oh->mtx);
+ assert(oh->refcnt > 0);
+ nobj = 0;
+ VTAILQ_FOREACH(oc, &oh->objcs, list) {
+ CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ assert(oc->objhead == oh);
+ if (oc->flags & OC_F_BUSY) {
+ /*
+ * We cannot purge busy objects here, because their
+ * owners have special rights to them, and may nuke
+ * them without concern for the refcount, which by
+ * definition always must be one, so they don't check.
+ */
+ continue;
+ }
+
+ (void)oc_getobj(sp->wrk, oc); /* XXX: still needed ? */
+
+ xxxassert(spc >= sizeof *ocp);
+ oc->refcnt++;
+ spc -= sizeof *ocp;
+ ocp[nobj++] = oc;
+ }
+ Lck_Unlock(&oh->mtx);
+
+ /* NB: inverse test to catch NAN also */
+ if (!(ttl > 0.))
+ ttl = -1.;
+ if (!(grace > 0.))
+ grace = -1.;
+ for (n = 0; n < nobj; n++) {
+ oc = ocp[n];
+ CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ o = oc_getobj(sp->wrk, oc);
+ if (o == NULL)
+ continue;
+ CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC);
+ o->exp.ttl = ttl;
+ o->exp.grace = grace;
+ EXP_Rearm(o);
+ (void)HSH_Deref(sp->wrk, NULL, &o);
+ }
+ WS_Release(sp->wrk->ws, 0);
+ }
+
+
+ /*---------------------------------------------------------------------
+ * Kill a busy object we don't need anyway.
+ * There may be sessions on the waiting list, so we cannot just blow
+ * it out of the water.
+ */
+
+ void
+ HSH_Drop(struct sess *sp)
+ {
+ struct object *o;
+
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ o = sp->obj;
+ CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC);
+ AssertObjCorePassOrBusy(o->objcore);
+ o->exp.ttl = -1.;
+ if (o->objcore != NULL) /* Pass has no objcore */
+ HSH_Unbusy(sp);
+ (void)HSH_Deref(sp->wrk, NULL, &sp->obj);
+ }
+
+ void
+ HSH_Unbusy(const struct sess *sp)
+ {
+ struct object *o;
+ struct objhead *oh;
+ struct objcore *oc;
+
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ o = sp->obj;
+ CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC);
+ oc = o->objcore;
+ CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ oh = oc->objhead;
+ CHECK_OBJ(oh, OBJHEAD_MAGIC);
+
+ AssertObjBusy(o);
+ AN(oc->ban);
+ assert(oc->refcnt > 0);
+ assert(oh->refcnt > 0);
+ if (o->ws_o->overflow)
+ sp->wrk->stats.n_objoverflow++;
+ if (cache_param->diag_bitmap & 0x40)
+ WSP(sp, SLT_Debug,
+ "Object %u workspace free %u", o->xid, WS_Free(o->ws_o));
+
+ /* XXX: pretouch neighbors on oh->objcs to prevent page-on under mtx */
+ Lck_Lock(&oh->mtx);
+ assert(oh->refcnt > 0);
+ /* XXX: strictly speaking, we should sort in Date: order. */
+ VTAILQ_REMOVE(&oh->objcs, oc, list);
+ VTAILQ_INSERT_HEAD(&oh->objcs, oc, list);
+ oc->flags &= ~OC_F_BUSY;
+ AZ(sp->wrk->nbusyobj);
+ sp->wrk->nbusyobj = oc->busyobj;
+ oc->busyobj = NULL;
+ if (oh->waitinglist != NULL)
+ hsh_rush(oh);
+ AN(oc->ban);
+ Lck_Unlock(&oh->mtx);
+ assert(oc_getobj(sp->wrk, oc) == o);
+ }
+
+ void
+ HSH_Ref(struct objcore *oc)
+ {
+ struct objhead *oh;
+
+ CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ oh = oc->objhead;
+ CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
+ Lck_Lock(&oh->mtx);
+ assert(oc->refcnt > 0);
+ oc->refcnt++;
+ Lck_Unlock(&oh->mtx);
+ }
+
+ /*--------------------------------------------------------------------
+ * Dereference objcore and or object
+ *
+ * Can deal with:
+ * bare objcore (incomplete fetch)
+ * bare object (pass)
+ * object with objcore
+ * XXX later: objcore with object (?)
+ *
+ * But you can only supply one of the two arguments at a time.
+ *
+ * Returns zero if target was destroyed.
+ */
+
+ int
+ HSH_Deref(struct worker *w, struct objcore *oc, struct object **oo)
+ {
+ struct object *o = NULL;
+ struct objhead *oh;
+ unsigned r;
+
+ /* Only one arg at a time */
+ assert(oc == NULL || oo == NULL);
+
+ if (oo != NULL) {
+ o = *oo;
+ *oo = NULL;
+ CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC);
+ oc = o->objcore;
+ }
+
+ if (o != NULL && oc == NULL) {
+ /*
+ * A pass object with neither objcore nor objhdr reference.
+ * -> simply free the (Transient) storage
+ */
+ STV_Freestore(o);
+ STV_free(o->objstore);
+ w->stats.n_object--;
+ return (0);
+ }
+
+ CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+
+ oh = oc->objhead;
+ CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
+
+ Lck_Lock(&oh->mtx);
+ assert(oh->refcnt > 0);
+ assert(oc->refcnt > 0);
+ r = --oc->refcnt;
+ if (!r)
+ VTAILQ_REMOVE(&oh->objcs, oc, list);
+ else {
+ /* Must have an object */
+ AN(oc->methods);
+ }
+ if (oh->waitinglist != NULL)
+ hsh_rush(oh);
+ Lck_Unlock(&oh->mtx);
+ if (r != 0)
+ return (r);
+
+ BAN_DestroyObj(oc);
+ AZ(oc->ban);
+
+ if (oc->flags & OC_F_BUSY) {
+ CHECK_OBJ_NOTNULL(oc->busyobj, BUSYOBJ_MAGIC);
+ if (w->nbusyobj == NULL)
+ w->nbusyobj = oc->busyobj;
+ else
+ FREE_OBJ(oc->busyobj);
+ oc->busyobj = NULL;
+ }
+ AZ(oc->busyobj);
+
+ if (oc->methods != NULL) {
+ oc_freeobj(oc);
+ w->stats.n_object--;
+ }
+ FREE_OBJ(oc);
+
+ w->stats.n_objectcore--;
+ /* Drop our ref on the objhead */
+ assert(oh->refcnt > 0);
+ if (hash->deref(oh))
+ return (0);
+ HSH_DeleteObjHead(w, oh);
+ return (0);
+ }
+
+ void
+ HSH_Init(const struct hash_slinger *slinger)
+ {
+
+ assert(DIGEST_LEN == SHA256_LEN); /* avoid #include pollution */
+ hash = slinger;
+ if (hash->start != NULL)
+ hash->start();
+ }
diff --cc bin/varnishd/cache/cache_http.c
index 0000000,784eb28..b937d64
mode 000000,100644..100644
--- a/bin/varnishd/cache/cache_http.c
+++ b/bin/varnishd/cache/cache_http.c
@@@ -1,0 -1,1119 +1,1236 @@@
+ /*-
+ * Copyright (c) 2006 Verdens Gang AS
+ * Copyright (c) 2006-2011 Varnish Software AS
+ * All rights reserved.
+ *
+ * Author: Poul-Henning Kamp <phk at phk.freebsd.dk>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * HTTP request storage and manipulation
+ */
+
+ #include "config.h"
+
+ #include <stdio.h>
+
+ #include "cache.h"
++#include "storage/storage.h"
+
+ #include "vct.h"
+
+ #define HTTPH(a, b, c, d, e, f, g) char b[] = "*" a ":";
+ #include "tbl/http_headers.h"
+ #undef HTTPH
+
+ /*lint -save -e773 not () */
+ #define LOGMTX2(ax, bx, cx) [bx] = SLT_##ax##cx
+
+ #define LOGMTX1(ax) { \
+ LOGMTX2(ax, HTTP_HDR_REQ, Request), \
+ LOGMTX2(ax, HTTP_HDR_RESPONSE, Response), \
+ LOGMTX2(ax, HTTP_HDR_STATUS, Status), \
+ LOGMTX2(ax, HTTP_HDR_URL, URL), \
+ LOGMTX2(ax, HTTP_HDR_PROTO, Protocol), \
+ LOGMTX2(ax, HTTP_HDR_FIRST, Header), \
+ }
+
+ static const enum VSL_tag_e logmtx[][HTTP_HDR_FIRST + 1] = {
+ [HTTP_Rx] = LOGMTX1(Rx),
+ [HTTP_Tx] = LOGMTX1(Tx),
+ [HTTP_Obj] = LOGMTX1(Obj)
+ };
+ /*lint -restore */
+
++void http_FilterMissingFields(struct worker *w, int fd, struct http *to,
++ const struct http *fm);
++
+ static enum VSL_tag_e
+ http2shmlog(const struct http *hp, int t)
+ {
+
+ CHECK_OBJ_NOTNULL(hp, HTTP_MAGIC);
+ if (t > HTTP_HDR_FIRST)
+ t = HTTP_HDR_FIRST;
+ assert(hp->logtag >= HTTP_Rx && hp->logtag <= HTTP_Obj); /*lint !e685*/
+ assert(t >= HTTP_HDR_REQ && t <= HTTP_HDR_FIRST);
+ return (logmtx[hp->logtag][t]);
+ }
+
+ static void
+ WSLH(struct worker *w, unsigned vsl_id, const struct http *hp, unsigned hdr)
+ {
+
+ AN(vsl_id & (VSL_CLIENTMARKER|VSL_BACKENDMARKER));
+ WSLR(w, http2shmlog(hp, hdr), vsl_id, hp->hd[hdr]);
+ }
+
+ /*--------------------------------------------------------------------*/
+ /* List of canonical HTTP response code names from RFC2616 */
+
+ static struct http_msg {
+ unsigned nbr;
+ const char *txt;
+ } http_msg[] = {
+ #define HTTP_RESP(n, t) { n, t},
+ #include "tbl/http_response.h"
+ { 0, NULL }
+ };
+
+ const char *
+ http_StatusMessage(unsigned status)
+ {
+ struct http_msg *mp;
+
+ assert(status >= 100 && status <= 999);
+ for (mp = http_msg; mp->nbr != 0 && mp->nbr <= status; mp++)
+ if (mp->nbr == status)
+ return (mp->txt);
+ return ("Unknown Error");
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ unsigned
+ HTTP_estimate(unsigned nhttp)
+ {
+
+ /* XXX: We trust the structs to size-aligned as necessary */
+ return (sizeof (struct http) + (sizeof (txt) + 1) * nhttp);
+ }
+
+ struct http *
+ HTTP_create(void *p, uint16_t nhttp)
+ {
+ struct http *hp;
+
+ hp = p;
+ hp->magic = HTTP_MAGIC;
+ hp->hd = (void*)(hp + 1);
+ hp->shd = nhttp;
+ hp->hdf = (void*)(hp->hd + nhttp);
+ return (hp);
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ void
+ http_Setup(struct http *hp, struct ws *ws)
+ {
+ uint16_t shd;
+ txt *hd;
+ unsigned char *hdf;
+
+ /* XXX: This is not elegant, is it efficient ? */
+ shd = hp->shd;
+ hd = hp->hd;
+ hdf = hp->hdf;
+ memset(hp, 0, sizeof *hp);
+ memset(hd, 0, sizeof *hd * shd);
+ memset(hdf, 0, sizeof *hdf * shd);
+ hp->magic = HTTP_MAGIC;
+ hp->ws = ws;
+ hp->nhd = HTTP_HDR_FIRST;
+ hp->shd = shd;
+ hp->hd = hd;
+ hp->hdf = hdf;
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ static int
+ http_IsHdr(const txt *hh, const char *hdr)
+ {
+ unsigned l;
+
+ Tcheck(*hh);
+ AN(hdr);
+ l = hdr[0];
+ assert(l == strlen(hdr + 1));
+ assert(hdr[l] == ':');
+ hdr++;
+ return (!strncasecmp(hdr, hh->b, l));
+ }
+
+ /*--------------------------------------------------------------------
+ * This function collapses multiple headerlines of the same name.
+ * The lines are joined with a comma, according to [rfc2616, 4.2bot, p32]
+ */
+
+ void
+ http_CollectHdr(struct http *hp, const char *hdr)
+ {
+ unsigned u, v, ml, f = 0, x;
+ char *b = NULL, *e = NULL;
+
+ for (u = HTTP_HDR_FIRST; u < hp->nhd; u++) {
+ while (u < hp->nhd && http_IsHdr(&hp->hd[u], hdr)) {
+ Tcheck(hp->hd[u]);
+ if (f == 0) {
+ /* Found first header, just record the fact */
+ f = u;
+ break;
+ }
+ if (b == NULL) {
+ /* Found second header, start our collection */
+ ml = WS_Reserve(hp->ws, 0);
+ b = hp->ws->f;
+ e = b + ml;
+ x = Tlen(hp->hd[f]);
+ if (b + x < e) {
+ memcpy(b, hp->hd[f].b, x);
+ b += x;
+ } else
+ b = e;
+ }
+
+ AN(b);
+ AN(e);
+
+ /* Append the Nth header we found */
+ if (b < e)
+ *b++ = ',';
+ x = Tlen(hp->hd[u]) - *hdr;
+ if (b + x < e) {
+ memcpy(b, hp->hd[u].b + *hdr, x);
+ b += x;
+ } else
+ b = e;
+
+ /* Shift remaining headers up one slot */
+ for (v = u; v < hp->nhd - 1; v++)
+ hp->hd[v] = hp->hd[v + 1];
+ hp->nhd--;
+ }
+
+ }
+ if (b == NULL)
+ return;
+ AN(e);
+ if (b >= e) {
+ WS_Release(hp->ws, 0);
+ return;
+ }
+ *b = '\0';
+ hp->hd[f].b = hp->ws->f;
+ hp->hd[f].e = b;
+ WS_ReleaseP(hp->ws, b + 1);
+ }
+
+
+ /*--------------------------------------------------------------------*/
+
+ static unsigned
+ http_findhdr(const struct http *hp, unsigned l, const char *hdr)
+ {
+ unsigned u;
+
+ for (u = HTTP_HDR_FIRST; u < hp->nhd; u++) {
+ Tcheck(hp->hd[u]);
+ if (hp->hd[u].e < hp->hd[u].b + l + 1)
+ continue;
+ if (hp->hd[u].b[l] != ':')
+ continue;
+ if (strncasecmp(hdr, hp->hd[u].b, l))
+ continue;
+ return (u);
+ }
+ return (0);
+ }
+
+ int
+ http_GetHdr(const struct http *hp, const char *hdr, char **ptr)
+ {
+ unsigned u, l;
+ char *p;
+
+ l = hdr[0];
+ diagnostic(l == strlen(hdr + 1));
+ assert(hdr[l] == ':');
+ hdr++;
+ u = http_findhdr(hp, l - 1, hdr);
+ if (u == 0) {
+ if (ptr != NULL)
+ *ptr = NULL;
+ return (0);
+ }
+ if (ptr != NULL) {
+ p = hp->hd[u].b + l;
+ while (vct_issp(*p))
+ p++;
+ *ptr = p;
+ }
+ return (1);
+ }
+
+
+ /*--------------------------------------------------------------------
+ * Find a given data element in a header according to RFC2616's #rule
+ * (section 2.1, p15)
+ */
+
+ int
+ http_GetHdrData(const struct http *hp, const char *hdr,
+ const char *field, char **ptr)
+ {
+ char *h, *e;
+ unsigned fl;
+
+ if (ptr != NULL)
+ *ptr = NULL;
+ if (!http_GetHdr(hp, hdr, &h))
+ return (0);
+ AN(h);
+ e = strchr(h, '\0');
+ fl = strlen(field);
+ while (h + fl <= e) {
+ /* Skip leading whitespace and commas */
+ if (vct_islws(*h) || *h == ',') {
+ h++;
+ continue;
+ }
+ /* Check for substrings before memcmp() */
+ if ((h + fl == e || vct_issepctl(h[fl])) &&
+ !memcmp(h, field, fl)) {
+ if (ptr != NULL) {
+ h += fl;
+ while (vct_islws(*h))
+ h++;
+ *ptr = h;
+ }
+ return (1);
+ }
+ /* Skip until end of header or comma */
+ while (*h && *h != ',')
+ h++;
+ }
+ return (0);
+ }
+
+ /*--------------------------------------------------------------------
+ * Find a given headerfields Q value.
+ */
+
+ double
+ http_GetHdrQ(const struct http *hp, const char *hdr, const char *field)
+ {
+ char *h;
+ int i;
+ double a, b;
+
+ h = NULL;
+ i = http_GetHdrData(hp, hdr, field, &h);
+ if (!i)
+ return (0.);
+
+ if (h == NULL)
+ return (1.);
+ /* Skip whitespace, looking for '=' */
+ while (*h && vct_issp(*h))
+ h++;
+ if (*h++ != ';')
+ return (1.);
+ while (*h && vct_issp(*h))
+ h++;
+ if (*h++ != 'q')
+ return (1.);
+ while (*h && vct_issp(*h))
+ h++;
+ if (*h++ != '=')
+ return (1.);
+ while (*h && vct_issp(*h))
+ h++;
+ a = 0.;
+ while (vct_isdigit(*h)) {
+ a *= 10.;
+ a += *h - '0';
+ h++;
+ }
+ if (*h++ != '.')
+ return (a);
+ b = .1;
+ while (vct_isdigit(*h)) {
+ a += b * (*h - '0');
+ b *= .1;
+ h++;
+ }
+ return (a);
+ }
+
+ /*--------------------------------------------------------------------
+ * Find a given headerfields value.
+ */
+
+ int
+ http_GetHdrField(const struct http *hp, const char *hdr,
+ const char *field, char **ptr)
+ {
+ char *h;
+ int i;
+
+ if (ptr != NULL)
+ *ptr = NULL;
+
+ h = NULL;
+ i = http_GetHdrData(hp, hdr, field, &h);
+ if (!i)
+ return (i);
+
+ if (ptr != NULL && h != NULL) {
+ /* Skip whitespace, looking for '=' */
+ while (*h && vct_issp(*h))
+ h++;
+ if (*h == '=') {
+ h++;
+ while (*h && vct_issp(*h))
+ h++;
+ *ptr = h;
+ }
+ }
+ return (i);
+ }
+
+ /*--------------------------------------------------------------------
+ * XXX: redo with http_GetHdrField() ?
+ */
+
+ const char *
+ http_DoConnection(const struct http *hp)
+ {
+ char *p, *q;
+ const char *ret;
+ unsigned u;
+
+ if (!http_GetHdr(hp, H_Connection, &p)) {
+ if (hp->protover < 11)
+ return ("not HTTP/1.1");
+ return (NULL);
+ }
+ ret = NULL;
+ AN(p);
+ for (; *p; p++) {
+ if (vct_issp(*p))
+ continue;
+ if (*p == ',')
+ continue;
+ for (q = p + 1; *q; q++)
+ if (*q == ',' || vct_issp(*q))
+ break;
+ u = pdiff(p, q);
+ if (u == 5 && !strncasecmp(p, "close", u))
+ ret = "Connection: close";
+ u = http_findhdr(hp, u, p);
+ if (u != 0)
+ hp->hdf[u] |= HDF_FILTER;
+ if (!*q)
+ break;
+ p = q;
+ }
+ return (ret);
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ int
+ http_HdrIs(const struct http *hp, const char *hdr, const char *val)
+ {
+ char *p;
+
+ if (!http_GetHdr(hp, hdr, &p))
+ return (0);
+ AN(p);
+ if (!strcasecmp(p, val))
+ return (1);
+ return (0);
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ uint16_t
+ http_GetStatus(const struct http *hp)
+ {
+
+ return (hp->status);
+ }
+
+ const char *
+ http_GetReq(const struct http *hp)
+ {
+
+ Tcheck(hp->hd[HTTP_HDR_REQ]);
+ return (hp->hd[HTTP_HDR_REQ].b);
+ }
+
+ /*--------------------------------------------------------------------
+ * Dissect the headers of the HTTP protocol message.
+ * Detect conditionals (headers which start with '^[Ii][Ff]-')
+ */
+
+ static uint16_t
+ http_dissect_hdrs(struct worker *w, struct http *hp, unsigned vsl_id, char *p,
+ const struct http_conn *htc)
+ {
+ char *q, *r;
+ txt t = htc->rxbuf;
+
+ if (*p == '\r')
+ p++;
+
+ hp->nhd = HTTP_HDR_FIRST;
+ hp->conds = 0;
+ r = NULL; /* For FlexeLint */
+ for (; p < t.e; p = r) {
+
+ /* Find end of next header */
+ q = r = p;
+ while (r < t.e) {
+ if (!vct_iscrlf(*r)) {
+ r++;
+ continue;
+ }
+ q = r;
+ assert(r < t.e);
+ r += vct_skipcrlf(r);
+ if (r >= t.e)
+ break;
+ /* If line does not continue: got it. */
+ if (!vct_issp(*r))
+ break;
+
+ /* Clear line continuation LWS to spaces */
+ while (vct_islws(*q))
+ *q++ = ' ';
+ }
+
+ if (q - p > htc->maxhdr) {
+ VSC_C_main->losthdr++;
+ WSL(w, SLT_LostHeader, vsl_id, "%.*s",
+ q - p > 20 ? 20 : q - p, p);
+ return (413);
+ }
+
+ /* Empty header = end of headers */
+ if (p == q)
+ break;
+
+ if ((p[0] == 'i' || p[0] == 'I') &&
+ (p[1] == 'f' || p[1] == 'F') &&
+ p[2] == '-')
+ hp->conds = 1;
+
+ while (q > p && vct_issp(q[-1]))
+ q--;
+ *q = '\0';
+
+ if (hp->nhd < hp->shd) {
+ hp->hdf[hp->nhd] = 0;
+ hp->hd[hp->nhd].b = p;
+ hp->hd[hp->nhd].e = q;
+ WSLH(w, vsl_id, hp, hp->nhd);
+ hp->nhd++;
+ } else {
+ VSC_C_main->losthdr++;
+ WSL(w, SLT_LostHeader, vsl_id, "%.*s",
+ q - p > 20 ? 20 : q - p, p);
+ return (413);
+ }
+ }
+ return (0);
+ }
+
+ /*--------------------------------------------------------------------
+ * Deal with first line of HTTP protocol message.
+ */
+
+ static uint16_t
+ http_splitline(struct worker *w, unsigned vsl_id, struct http *hp,
+ const struct http_conn *htc, int h1, int h2, int h3)
+ {
+ char *p, *q;
+
+ CHECK_OBJ_NOTNULL(htc, HTTP_CONN_MAGIC);
+ CHECK_OBJ_NOTNULL(hp, HTTP_MAGIC);
+
+ /* XXX: Assert a NUL at rx.e ? */
+ Tcheck(htc->rxbuf);
+
+ /* Skip leading LWS */
+ for (p = htc->rxbuf.b ; vct_islws(*p); p++)
+ continue;
+
+ /* First field cannot contain SP, CRLF or CTL */
+ q = p;
+ for (; !vct_issp(*p); p++) {
+ if (vct_isctl(*p))
+ return (400);
+ }
+ hp->hd[h1].b = q;
+ hp->hd[h1].e = p;
+
+ /* Skip SP */
+ for (; vct_issp(*p); p++) {
+ if (vct_isctl(*p))
+ return (400);
+ }
+
+ /* Second field cannot contain LWS or CTL */
+ q = p;
+ for (; !vct_islws(*p); p++) {
+ if (vct_isctl(*p))
+ return (400);
+ }
+ hp->hd[h2].b = q;
+ hp->hd[h2].e = p;
+
+ if (!Tlen(hp->hd[h2]))
+ return (413);
+
+ /* Skip SP */
+ for (; vct_issp(*p); p++) {
+ if (vct_isctl(*p))
+ return (400);
+ }
+
+ /* Third field is optional and cannot contain CTL */
+ q = p;
+ if (!vct_iscrlf(*p)) {
+ for (; !vct_iscrlf(*p); p++)
+ if (!vct_issep(*p) && vct_isctl(*p))
+ return (400);
+ }
+ hp->hd[h3].b = q;
+ hp->hd[h3].e = p;
+
+ /* Skip CRLF */
+ p += vct_skipcrlf(p);
+
+ *hp->hd[h1].e = '\0';
+ WSLH(w, vsl_id, hp, h1);
+
+ *hp->hd[h2].e = '\0';
+ WSLH(w, vsl_id, hp, h2);
+
+ if (hp->hd[h3].e != NULL) {
+ *hp->hd[h3].e = '\0';
+ WSLH(w, vsl_id, hp, h3);
+ }
+
+ return (http_dissect_hdrs(w, hp, vsl_id, p, htc));
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ static void
+ http_ProtoVer(struct http *hp)
+ {
+
+ if (!strcasecmp(hp->hd[HTTP_HDR_PROTO].b, "HTTP/1.0"))
+ hp->protover = 10;
+ else if (!strcasecmp(hp->hd[HTTP_HDR_PROTO].b, "HTTP/1.1"))
+ hp->protover = 11;
+ else
+ hp->protover = 9;
+ }
+
+
+ /*--------------------------------------------------------------------*/
+
+ uint16_t
+ http_DissectRequest(struct sess *sp)
+ {
+ struct http_conn *htc;
+ struct http *hp;
+ uint16_t retval;
+
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ htc = sp->htc;
+ CHECK_OBJ_NOTNULL(htc, HTTP_CONN_MAGIC);
+ hp = sp->http;
+ CHECK_OBJ_NOTNULL(hp, HTTP_MAGIC);
+
+ hp->logtag = HTTP_Rx;
+
+ retval = http_splitline(sp->wrk, sp->vsl_id, hp, htc,
+ HTTP_HDR_REQ, HTTP_HDR_URL, HTTP_HDR_PROTO);
+ if (retval != 0) {
+ WSPR(sp, SLT_HttpGarbage, htc->rxbuf);
+ return (retval);
+ }
+ http_ProtoVer(hp);
+ return (retval);
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ uint16_t
+ http_DissectResponse(struct worker *w, const struct http_conn *htc,
+ struct http *hp)
+ {
+ int j;
+ uint16_t retval = 0;
+ char *p;
+
+
+ CHECK_OBJ_NOTNULL(htc, HTTP_CONN_MAGIC);
+ CHECK_OBJ_NOTNULL(hp, HTTP_MAGIC);
+ hp->logtag = HTTP_Rx;
+
+ if (http_splitline(w, htc->vsl_id, hp, htc,
+ HTTP_HDR_PROTO, HTTP_HDR_STATUS, HTTP_HDR_RESPONSE))
+ retval = 503;
+
+ if (retval == 0 && memcmp(hp->hd[HTTP_HDR_PROTO].b, "HTTP/1.", 7))
+ retval = 503;
+
+ if (retval == 0 && Tlen(hp->hd[HTTP_HDR_STATUS]) != 3)
+ retval = 503;
+
+ if (retval == 0) {
+ hp->status = 0;
+ p = hp->hd[HTTP_HDR_STATUS].b;
+ for (j = 100; j != 0; j /= 10) {
+ if (!vct_isdigit(*p)) {
+ retval = 503;
+ break;
+ }
+ hp->status += (uint16_t)(j * (*p - '0'));
+ p++;
+ }
+ if (*p != '\0')
+ retval = 503;
+ }
+
+ if (retval != 0) {
+ WSLR(w, SLT_HttpGarbage, htc->vsl_id, htc->rxbuf);
+ assert(retval >= 100 && retval <= 999);
+ hp->status = retval;
+ } else {
+ http_ProtoVer(hp);
+ }
+
+ if (hp->hd[HTTP_HDR_RESPONSE].b == NULL ||
+ !Tlen(hp->hd[HTTP_HDR_RESPONSE])) {
+ /* Backend didn't send a response string, use the standard */
+ hp->hd[HTTP_HDR_RESPONSE].b =
+ TRUST_ME(http_StatusMessage(hp->status));
+ hp->hd[HTTP_HDR_RESPONSE].e =
+ strchr(hp->hd[HTTP_HDR_RESPONSE].b, '\0');
+ }
+ return (retval);
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ void
+ http_SetH(const struct http *to, unsigned n, const char *fm)
+ {
+
+ assert(n < to->shd);
+ AN(fm);
+ to->hd[n].b = TRUST_ME(fm);
+ to->hd[n].e = strchr(to->hd[n].b, '\0');
+ to->hdf[n] = 0;
+ }
+
+ static void
+ http_copyh(const struct http *to, const struct http *fm, unsigned n)
+ {
+
+ assert(n < HTTP_HDR_FIRST);
+ Tcheck(fm->hd[n]);
+ to->hd[n] = fm->hd[n];
+ to->hdf[n] = fm->hdf[n];
+ }
+
+ void
+ http_ForceGet(const struct http *to)
+ {
+ if (strcmp(http_GetReq(to), "GET"))
+ http_SetH(to, HTTP_HDR_REQ, "GET");
+ }
+
+ void
+ http_CopyResp(struct http *to, const struct http *fm)
+ {
+
+ CHECK_OBJ_NOTNULL(fm, HTTP_MAGIC);
+ CHECK_OBJ_NOTNULL(to, HTTP_MAGIC);
+ http_SetH(to, HTTP_HDR_PROTO, "HTTP/1.1");
+ to->status = fm->status;
+ http_copyh(to, fm, HTTP_HDR_RESPONSE);
+ }
+
+ void
+ http_SetResp(struct http *to, const char *proto, uint16_t status,
+ const char *response)
+ {
+
+ CHECK_OBJ_NOTNULL(to, HTTP_MAGIC);
+ http_SetH(to, HTTP_HDR_PROTO, proto);
+ assert(status >= 100 && status <= 999);
+ to->status = status;
+ http_SetH(to, HTTP_HDR_RESPONSE, response);
+ }
+
+ static void
+ http_copyheader(struct worker *w, unsigned vsl_id, struct http *to,
+ const struct http *fm, unsigned n)
+ {
+
+ CHECK_OBJ_NOTNULL(fm, HTTP_MAGIC);
+ CHECK_OBJ_NOTNULL(to, HTTP_MAGIC);
+ assert(n < fm->shd);
+ Tcheck(fm->hd[n]);
+ if (to->nhd < to->shd) {
+ to->hd[to->nhd] = fm->hd[n];
+ to->hdf[to->nhd] = 0;
+ to->nhd++;
+ } else {
+ VSC_C_main->losthdr++;
+ WSLR(w, SLT_LostHeader, vsl_id, fm->hd[n]);
+ }
+ }
+
+ /*--------------------------------------------------------------------
+ * Estimate how much workspace we need to Filter this header according
+ * to 'how'.
+ */
+
+ unsigned
+ http_EstimateWS(const struct http *fm, unsigned how, uint16_t *nhd)
+ {
+ unsigned u, l;
+
+ l = 0;
+ *nhd = HTTP_HDR_FIRST;
+ CHECK_OBJ_NOTNULL(fm, HTTP_MAGIC);
+ for (u = 0; u < fm->nhd; u++) {
+ if (fm->hd[u].b == NULL)
+ continue;
+ if (fm->hdf[u] & HDF_FILTER)
+ continue;
+ #define HTTPH(a, b, c, d, e, f, g) \
+ if (((e) & how) && http_IsHdr(&fm->hd[u], (b))) \
+ continue;
+ #include "tbl/http_headers.h"
+ #undef HTTPH
+ l += PRNDUP(Tlen(fm->hd[u]) + 1);
+ (*nhd)++;
+ // fm->hdf[u] |= HDF_COPY;
+ }
+ return (l);
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ void
+ http_FilterFields(struct worker *w, unsigned vsl_id, struct http *to,
+ const struct http *fm, unsigned how)
+ {
+ unsigned u;
+
+ CHECK_OBJ_NOTNULL(fm, HTTP_MAGIC);
+ CHECK_OBJ_NOTNULL(to, HTTP_MAGIC);
+ to->nhd = HTTP_HDR_FIRST;
+ to->status = fm->status;
+ for (u = HTTP_HDR_FIRST; u < fm->nhd; u++) {
+ if (fm->hd[u].b == NULL)
+ continue;
+ if (fm->hdf[u] & HDF_FILTER)
+ continue;
+ #define HTTPH(a, b, c, d, e, f, g) \
+ if (((e) & how) && http_IsHdr(&fm->hd[u], (b))) \
+ continue;
+ #include "tbl/http_headers.h"
+ #undef HTTPH
+ http_copyheader(w, vsl_id, to, fm, u);
+ }
+ }
+
++/*---------------------------------------------------------------------
++ * Same as http_FilterFields but keep any existing hdrs in fm.
++ * Furthermore, before copy, check if fm already has that hdr, and if so
++ * do not copy. Used for 304 refresh processing.
++ */
++
++/* XXX: uplex/GS: Also, don't filter according to the "how" bitmap in
++ * http_headers.h. We only use this to copy from one cached object to
++ * another, so if a header made into the first object, we want it.
++ */
++
++void
++http_FilterMissingFields(struct worker *w, int fd, struct http *to,
++ const struct http *fm)
++{
++ unsigned u;
++ unsigned hdrlen;
++
++ CHECK_OBJ_NOTNULL(fm, HTTP_MAGIC);
++ CHECK_OBJ_NOTNULL(to, HTTP_MAGIC);
++ for (u = HTTP_HDR_FIRST; u < fm->nhd; u++) {
++ if (fm->hd[u].b == NULL)
++ continue;
++ hdrlen = strchr(fm->hd[u].b, ':') - fm->hd[u].b;
++ if (http_findhdr(to, hdrlen, fm->hd[u].b))
++ continue;
++ http_copyheader(w, fd, to, fm, u);
++ }
++}
++
+ /*--------------------------------------------------------------------*/
+
+ void
+ http_FilterHeader(const struct sess *sp, unsigned how)
+ {
+ struct http *hp;
+
+ hp = sp->wrk->bereq;
+ CHECK_OBJ_NOTNULL(hp, HTTP_MAGIC);
+ hp->logtag = HTTP_Tx;
+
+ http_copyh(hp, sp->http, HTTP_HDR_REQ);
+ http_copyh(hp, sp->http, HTTP_HDR_URL);
+ if (how == HTTPH_R_FETCH)
+ http_SetH(hp, HTTP_HDR_PROTO, "HTTP/1.1");
+ else
+ http_copyh(hp, sp->http, HTTP_HDR_PROTO);
+ http_FilterFields(sp->wrk, sp->vsl_id, hp, sp->http, how);
+ http_PrintfHeader(sp->wrk, sp->vsl_id, hp, "X-Varnish: %u", sp->xid);
+ }
+
++/*-------------------------------------------------------------------
++ * This function checks for sp->freshen_obj. If present, HSH_Lookup()
++ * found an expired object that qualifies for a refresh check,
++ * so add the appropriate headers.
++ */
++
++void
++http_CheckRefresh(struct sess *sp)
++{
++ struct object *freshen_obj;
++ struct http *obj_hp, *bereq_hp;
++ char *p;
++
++ freshen_obj = sp->stale_obj;
++ CHECK_OBJ_NOTNULL(freshen_obj, OBJECT_MAGIC);
++ bereq_hp = sp->wrk->bereq;
++ CHECK_OBJ_NOTNULL(bereq_hp, HTTP_MAGIC);
++ obj_hp = freshen_obj->http;
++ CHECK_OBJ_NOTNULL(obj_hp, HTTP_MAGIC);
++
++ if(http_GetHdr(obj_hp, H_ETag, &p))
++ http_PrintfHeader(sp->wrk, sp->fd, bereq_hp, "If-None-Match: %s", p);
++
++ if(http_GetHdr(obj_hp, H_Last_Modified, &p))
++ http_PrintfHeader(sp->wrk, sp->fd, bereq_hp, "If-Modified-Since: %s",p);
++}
++
++/*-------------------------------------------------------------------
++ * Called after fetch and sp->freshen_obj present. Check
++ * response and handle as needed.
++ */
++
++void
++http_Check304(struct sess *sp)
++{
++ struct object *o, *o_stale;
++ char *p;
++
++ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
++ o_stale = sp->stale_obj;
++ CHECK_OBJ_NOTNULL(o_stale, OBJECT_MAGIC);
++ o = sp->obj;
++ CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC);
++
++ if (sp->wrk->beresp->status != 304) {
++ /*
++ * IMS/INM headers may have been removed in VCL, so only count a
++ * non-validating response if they were present in the request.
++ */
++ if (http_GetHdr(sp->wrk->bereq, H_If_Modified_Since, &p)
++ || http_GetHdr(sp->wrk->bereq, H_If_None_Match, &p))
++ sp->wrk->stats.cond_not_validated++;
++ return;
++ }
++
++ /*
++ * Copy headers we need from the stale object into the 304 response
++ */
++ http_FilterMissingFields(sp->wrk, sp->fd, sp->obj->http,
++ sp->stale_obj->http);
++
++ /*
++ * Dup the stale object's storage in to the new object
++ * and reset Content-Length from the size of the storage.
++ */
++ STV_dup(sp, o_stale, o);
++ http_Unset(o->http, H_Content_Length);
++ http_PrintfHeader(sp->wrk, sp->fd, o->http, "Content-Length: %u", o->len);
++
++ http_SetResp(o->http, "HTTP/1.1", 200, "Ok Not Modified");
++ http_SetH(o->http, HTTP_HDR_REQ, "GET");
++ http_copyh(o->http, sp->wrk->bereq, HTTP_HDR_URL);
++
++ /*
++ * XXX: Are we copying all the necessary fields from stale_obj?
++ * Should we copy o_stale->hits into o->hits?
++ */
++ o->response = 200;
++ o->gziped = o_stale->gziped;
++
++ AZ(o_stale->objcore->flags & OC_F_BUSY);
++}
++
+ /*--------------------------------------------------------------------
+ * This function copies any header fields which reference foreign
+ * storage into our own WS.
+ */
+
+ void
+ http_CopyHome(struct worker *w, unsigned vsl_id, const struct http *hp)
+ {
+ unsigned u, l;
+ char *p;
+
+ for (u = 0; u < hp->nhd; u++) {
+ if (hp->hd[u].b == NULL)
+ continue;
+ if (hp->hd[u].b >= hp->ws->s && hp->hd[u].e <= hp->ws->e) {
+ WSLH(w, vsl_id, hp, u);
+ continue;
+ }
+ l = Tlen(hp->hd[u]);
+ p = WS_Alloc(hp->ws, l + 1);
+ if (p != NULL) {
+ WSLH(w, vsl_id, hp, u);
+ memcpy(p, hp->hd[u].b, l + 1L);
+ hp->hd[u].b = p;
+ hp->hd[u].e = p + l;
+ } else {
+ /* XXX This leaves a slot empty */
+ VSC_C_main->losthdr++;
+ WSLR(w, SLT_LostHeader, vsl_id, hp->hd[u]);
+ hp->hd[u].b = NULL;
+ hp->hd[u].e = NULL;
+ }
+ }
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ void
+ http_ClrHeader(struct http *to)
+ {
+
+ CHECK_OBJ_NOTNULL(to, HTTP_MAGIC);
+ to->nhd = HTTP_HDR_FIRST;
+ to->status = 0;
+ to->protover = 0;
+ to->conds = 0;
+ memset(to->hd, 0, sizeof *to->hd * to->shd);
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ void
+ http_SetHeader(struct worker *w, unsigned vsl_id, struct http *to,
+ const char *hdr)
+ {
+
+ CHECK_OBJ_NOTNULL(to, HTTP_MAGIC);
+ if (to->nhd >= to->shd) {
+ VSC_C_main->losthdr++;
+ WSL(w, SLT_LostHeader, vsl_id, "%s", hdr);
+ return;
+ }
+ http_SetH(to, to->nhd++, hdr);
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ static void
+ http_PutField(struct worker *w, unsigned vsl_id, const struct http *to,
+ int field, const char *string)
+ {
+ char *p;
+ unsigned l;
+
+ CHECK_OBJ_NOTNULL(to, HTTP_MAGIC);
+ l = strlen(string);
+ p = WS_Alloc(to->ws, l + 1);
+ if (p == NULL) {
+ WSL(w, SLT_LostHeader, vsl_id, "%s", string);
+ to->hd[field].b = NULL;
+ to->hd[field].e = NULL;
+ to->hdf[field] = 0;
+ } else {
+ memcpy(p, string, l + 1L);
+ to->hd[field].b = p;
+ to->hd[field].e = p + l;
+ to->hdf[field] = 0;
+ }
+ }
+
+ void
+ http_PutProtocol(struct worker *w, unsigned vsl_id, const struct http *to,
+ const char *protocol)
+ {
+
+ http_PutField(w, vsl_id, to, HTTP_HDR_PROTO, protocol);
+ if (to->hd[HTTP_HDR_PROTO].b == NULL)
+ http_SetH(to, HTTP_HDR_PROTO, "HTTP/1.1");
+ Tcheck(to->hd[HTTP_HDR_PROTO]);
+ }
+
+ void
+ http_PutStatus(struct http *to, uint16_t status)
+ {
+
+ assert(status >= 100 && status <= 999);
+ to->status = status;
+ }
+
+ void
+ http_PutResponse(struct worker *w, unsigned vsl_id, const struct http *to,
+ const char *response)
+ {
+
+ http_PutField(w, vsl_id, to, HTTP_HDR_RESPONSE, response);
+ if (to->hd[HTTP_HDR_RESPONSE].b == NULL)
+ http_SetH(to, HTTP_HDR_RESPONSE, "Lost Response");
+ Tcheck(to->hd[HTTP_HDR_RESPONSE]);
+ }
+
+ void
+ http_PrintfHeader(struct worker *w, unsigned vsl_id, struct http *to,
+ const char *fmt, ...)
+ {
+ va_list ap;
+ unsigned l, n;
+
+ CHECK_OBJ_NOTNULL(to, HTTP_MAGIC);
+ l = WS_Reserve(to->ws, 0);
+ va_start(ap, fmt);
+ n = vsnprintf(to->ws->f, l, fmt, ap);
+ va_end(ap);
+ if (n + 1 >= l || to->nhd >= to->shd) {
+ VSC_C_main->losthdr++;
+ WSL(w, SLT_LostHeader, vsl_id, "%s", to->ws->f);
+ WS_Release(to->ws, 0);
+ } else {
+ to->hd[to->nhd].b = to->ws->f;
+ to->hd[to->nhd].e = to->ws->f + n;
+ to->hdf[to->nhd] = 0;
+ WS_Release(to->ws, n + 1);
+ to->nhd++;
+ }
+ }
+ /*--------------------------------------------------------------------*/
+
+ void
+ http_Unset(struct http *hp, const char *hdr)
+ {
+ uint16_t u, v;
+
+ for (v = u = HTTP_HDR_FIRST; u < hp->nhd; u++) {
+ if (hp->hd[u].b == NULL)
+ continue;
+ if (http_IsHdr(&hp->hd[u], hdr))
+ continue;
+ if (v != u) {
+ memcpy(&hp->hd[v], &hp->hd[u], sizeof *hp->hd);
+ memcpy(&hp->hdf[v], &hp->hdf[u], sizeof *hp->hdf);
+ }
+ v++;
+ }
+ hp->nhd = v;
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ void
+ HTTP_Copy(struct http *to, const struct http * const fm)
+ {
+
+ to->conds = fm->conds;
+ to->logtag = fm->logtag;
+ to->status = fm->status;
+ to->protover = fm->protover;
+ to->nhd = fm->nhd;
+ assert(fm->nhd <= to->shd);
+ memcpy(to->hd, fm->hd, fm->nhd * sizeof *to->hd);
+ memcpy(to->hdf, fm->hdf, fm->nhd * sizeof *to->hdf);
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ unsigned
+ http_Write(struct worker *w, unsigned vsl_id, const struct http *hp, int resp)
+ {
+ unsigned u, l;
+
+ if (resp) {
+ l = WRW_WriteH(w, &hp->hd[HTTP_HDR_PROTO], " ");
+ WSLH(w, vsl_id, hp, HTTP_HDR_PROTO);
+
+ hp->hd[HTTP_HDR_STATUS].b = WS_Alloc(w->ws, 4);
+ AN(hp->hd[HTTP_HDR_STATUS].b);
+
+ sprintf(hp->hd[HTTP_HDR_STATUS].b, "%3d", hp->status);
+ hp->hd[HTTP_HDR_STATUS].e = hp->hd[HTTP_HDR_STATUS].b + 3;
+
+ l += WRW_WriteH(w, &hp->hd[HTTP_HDR_STATUS], " ");
+ WSLH(w, vsl_id, hp, HTTP_HDR_STATUS);
+
+ l += WRW_WriteH(w, &hp->hd[HTTP_HDR_RESPONSE], "\r\n");
+ WSLH(w, vsl_id, hp, HTTP_HDR_RESPONSE);
+ } else {
+ AN(hp->hd[HTTP_HDR_URL].b);
+ l = WRW_WriteH(w, &hp->hd[HTTP_HDR_REQ], " ");
+ WSLH(w, vsl_id, hp, HTTP_HDR_REQ);
+ l += WRW_WriteH(w, &hp->hd[HTTP_HDR_URL], " ");
+ WSLH(w, vsl_id, hp, HTTP_HDR_URL);
+ l += WRW_WriteH(w, &hp->hd[HTTP_HDR_PROTO], "\r\n");
+ WSLH(w, vsl_id, hp, HTTP_HDR_PROTO);
+ }
+ for (u = HTTP_HDR_FIRST; u < hp->nhd; u++) {
+ if (hp->hd[u].b == NULL)
+ continue;
+ AN(hp->hd[u].b);
+ AN(hp->hd[u].e);
+ l += WRW_WriteH(w, &hp->hd[u], "\r\n");
+ WSLH(w, vsl_id, hp, u);
+ }
+ l += WRW_Write(w, "\r\n", -1);
+ return (l);
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ void
+ HTTP_Init(void)
+ {
+
+ #define HTTPH(a, b, c, d, e, f, g) b[0] = (char)strlen(b + 1);
+ #include "tbl/http_headers.h"
+ #undef HTTPH
+ }
diff --cc bin/varnishd/cache/cache_vrt.c
index 0000000,5e19ccc..27964dc
mode 000000,100644..100644
--- a/bin/varnishd/cache/cache_vrt.c
+++ b/bin/varnishd/cache/cache_vrt.c
@@@ -1,0 -1,535 +1,544 @@@
+ /*-
+ * Copyright (c) 2006 Verdens Gang AS
+ * Copyright (c) 2006-2011 Varnish Software AS
+ * All rights reserved.
+ *
+ * Author: Poul-Henning Kamp <phk at phk.freebsd.dk>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Runtime support for compiled VCL programs
+ */
+
+ #include "config.h"
+
+ #include <netinet/in.h>
+ #include <arpa/inet.h>
+
+ #include <stdio.h>
+ #include <stdlib.h>
+
+ #include "cache.h"
+
+ #include "cache_backend.h"
+ #include "hash/hash_slinger.h"
+ #include "vav.h"
+ #include "vcl.h"
+ #include "vrt.h"
+ #include "vrt_obj.h"
+ #include "vtim.h"
+
+ const void * const vrt_magic_string_end = &vrt_magic_string_end;
+
+ /*--------------------------------------------------------------------*/
+
+ void
+ VRT_error(struct sess *sp, unsigned code, const char *reason)
+ {
+
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ WSL(sp->wrk, SLT_Debug, 0, "VCL_error(%u, %s)", code, reason ?
+ reason : "(null)");
+ if (code < 100 || code > 999)
+ code = 503;
+ sp->err_code = (uint16_t)code;
+ sp->err_reason = reason ? reason : http_StatusMessage(sp->err_code);
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ void
+ VRT_count(const struct sess *sp, unsigned u)
+ {
+
+ if (sp == NULL)
+ return;
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ if (cache_param->vcl_trace)
+ WSP(sp, SLT_VCL_trace, "%u %d.%d", u,
+ sp->vcl->ref[u].line, sp->vcl->ref[u].pos);
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ void
+ VRT_acl_log(const struct sess *sp, const char *msg)
+ {
+ WSP(sp, SLT_VCL_acl, msg);
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ static struct http *
+ vrt_selecthttp(const struct sess *sp, enum gethdr_e where)
+ {
+ struct http *hp;
+
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ switch (where) {
+ case HDR_REQ:
+ hp = sp->http;
+ break;
+ case HDR_BEREQ:
+ hp = sp->wrk->bereq;
+ break;
+ case HDR_BERESP:
+ hp = sp->wrk->beresp;
+ break;
+ case HDR_RESP:
+ hp = sp->wrk->resp;
+ break;
+ case HDR_OBJ:
+ CHECK_OBJ_NOTNULL(sp->obj, OBJECT_MAGIC);
+ hp = sp->obj->http;
+ break;
++ case HDR_STALE_OBJ:
++ CHECK_OBJ_NOTNULL(sp->stale_obj, OBJECT_MAGIC);
++ hp = sp->stale_obj->http;
++ break;
+ default:
+ INCOMPL();
+ }
+ CHECK_OBJ_NOTNULL(hp, HTTP_MAGIC);
+ return (hp);
+ }
+
+ char *
+ VRT_GetHdr(const struct sess *sp, enum gethdr_e where, const char *n)
+ {
+ char *p;
+ struct http *hp;
+
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
++ if (where == HDR_STALE_OBJ && sp->stale_obj == NULL) {
++ WSP(sp, SLT_VCL_error,
++ "stale_obj does not exist (reading header %s)", n);
++ return NULL;
++ }
+ hp = vrt_selecthttp(sp, where);
+ if (!http_GetHdr(hp, n, &p))
+ return (NULL);
+ return (p);
+ }
+
+ /*--------------------------------------------------------------------
+ * XXX: Optimize the single element case ?
+ */
+
+ char *
+ VRT_StringList(char *d, unsigned dl, const char *p, va_list ap)
+ {
+ char *b, *e;
+ unsigned x;
+
+ b = d;
+ e = b + dl;
+ while (p != vrt_magic_string_end && b < e) {
+ if (p != NULL) {
+ x = strlen(p);
+ if (b + x < e)
+ memcpy(b, p, x);
+ b += x;
+ }
+ p = va_arg(ap, const char *);
+ }
+ if (b >= e)
+ return (NULL);
+ *b++ = '\0';
+ return (b);
+ }
+
+ /*--------------------------------------------------------------------
+ * XXX: Optimize the single element case ?
+ */
+
+ char *
+ VRT_String(struct ws *ws, const char *h, const char *p, va_list ap)
+ {
+ char *b, *e;
+ unsigned u, x;
+
+ u = WS_Reserve(ws, 0);
+ e = b = ws->f;
+ e += u;
+ if (h != NULL) {
+ x = strlen(h);
+ if (b + x < e)
+ memcpy(b, h, x);
+ b += x;
+ if (b < e)
+ *b = ' ';
+ b++;
+ }
+ b = VRT_StringList(b, e > b ? e - b : 0, p, ap);
+ if (b == NULL || b == e) {
+ WS_Release(ws, 0);
+ return (NULL);
+ }
+ e = b;
+ b = ws->f;
+ WS_Release(ws, e - b);
+ return (b);
+ }
+
+ /*--------------------------------------------------------------------
+ * Build a string on the worker threads workspace
+ */
+
+ const char *
+ VRT_WrkString(const struct sess *sp, const char *p, ...)
+ {
+ va_list ap;
+ char *b;
+
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ va_start(ap, p);
+ b = VRT_String(sp->wrk->ws, NULL, p, ap);
+ va_end(ap);
+ return (b);
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ void
+ VRT_SetHdr(const struct sess *sp , enum gethdr_e where, const char *hdr,
+ const char *p, ...)
+ {
+ struct http *hp;
+ va_list ap;
+ char *b;
+
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ hp = vrt_selecthttp(sp, where);
+ va_start(ap, p);
+ if (p == NULL) {
+ http_Unset(hp, hdr);
+ } else {
+ b = VRT_String(hp->ws, hdr + 1, p, ap);
+ if (b == NULL) {
+ WSP(sp, SLT_LostHeader, "%s", hdr + 1);
+ } else {
+ http_Unset(hp, hdr);
+ http_SetHeader(sp->wrk, sp->vsl_id, hp, b);
+ }
+ }
+ va_end(ap);
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ void
+ VRT_handling(struct sess *sp, unsigned hand)
+ {
+
+ if (sp == NULL) {
+ assert(hand == VCL_RET_OK);
+ return;
+ }
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ assert(hand < VCL_RET_MAX);
+ sp->handling = hand;
+ }
+
+ /*--------------------------------------------------------------------
+ * Add an element to the array/list of hash bits.
+ */
+
+ void
+ VRT_hashdata(const struct sess *sp, const char *str, ...)
+ {
+ va_list ap;
+ const char *p;
+
+ HSH_AddString(sp, str);
+ va_start(ap, str);
+ while (1) {
+ p = va_arg(ap, const char *);
+ if (p == vrt_magic_string_end)
+ break;
+ HSH_AddString(sp, p);
+ }
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ double
+ VRT_r_now(const struct sess *sp)
+ {
+
+ (void)sp;
+ return (VTIM_real());
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ char *
+ VRT_IP_string(const struct sess *sp, const struct sockaddr_storage *sa)
+ {
+ char *p;
+ const struct sockaddr_in *si4;
+ const struct sockaddr_in6 *si6;
+ const void *addr;
+ int len;
+
+ switch (sa->ss_family) {
+ case AF_INET:
+ len = INET_ADDRSTRLEN;
+ si4 = (const void *)sa;
+ addr = &(si4->sin_addr);
+ break;
+ case AF_INET6:
+ len = INET6_ADDRSTRLEN;
+ si6 = (const void *)sa;
+ addr = &(si6->sin6_addr);
+ break;
+ default:
+ INCOMPL();
+ }
+ XXXAN(len);
+ AN(p = WS_Alloc(sp->http->ws, len));
+ AN(inet_ntop(sa->ss_family, addr, p, len));
+ return (p);
+ }
+
+ char *
+ VRT_int_string(const struct sess *sp, int num)
+ {
+ char *p;
+ int size;
+
+ size = snprintf(NULL, 0, "%d", num) + 1;
+ AN(p = WS_Alloc(sp->http->ws, size));
+ assert(snprintf(p, size, "%d", num) < size);
+ return (p);
+ }
+
+ char *
+ VRT_double_string(const struct sess *sp, double num)
+ {
+ char *p;
+ int size;
+
+ size = snprintf(NULL, 0, "%.3f", num) + 1;
+ AN(p = WS_Alloc(sp->http->ws, size));
+ assert(snprintf(p, size, "%.3f", num) < size);
+ return (p);
+ }
+
+ char *
+ VRT_time_string(const struct sess *sp, double t)
+ {
+ char *p;
+
+ AN(p = WS_Alloc(sp->http->ws, VTIM_FORMAT_SIZE));
+ VTIM_format(t, p);
+ return (p);
+ }
+
+ const char *
+ VRT_backend_string(const struct sess *sp, const struct director *d)
+ {
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ if (d == NULL)
+ d = sp->director;
+ if (d == NULL)
+ return (NULL);
+ return (d->vcl_name);
+ }
+
+ const char *
+ VRT_bool_string(const struct sess *sp, unsigned val)
+ {
+
+ (void)sp;
+ return (val ? "true" : "false");
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ void
+ VRT_Rollback(struct sess *sp)
+ {
+
+ HTTP_Copy(sp->http, sp->http0);
+ WS_Reset(sp->ws, sp->ws_req);
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ void
+ VRT_panic(const struct sess *sp, const char *str, ...)
+ {
+ va_list ap;
+ char *b;
+
+ va_start(ap, str);
+ b = VRT_String(sp->http->ws, "PANIC: ", str, ap);
+ va_end(ap);
+ VAS_Fail("VCL", "", 0, b, 0, 2);
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ void
+ VRT_synth_page(const struct sess *sp, unsigned flags, const char *str, ...)
+ {
+ va_list ap;
+ const char *p;
+ struct vsb *vsb;
+
+ (void)flags;
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ CHECK_OBJ_NOTNULL(sp->obj, OBJECT_MAGIC);
+ vsb = SMS_Makesynth(sp->obj);
+ AN(vsb);
+
+ VSB_cat(vsb, str);
+ va_start(ap, str);
+ p = va_arg(ap, const char *);
+ while (p != vrt_magic_string_end) {
+ if (p == NULL)
+ p = "(null)";
+ VSB_cat(vsb, p);
+ p = va_arg(ap, const char *);
+ }
+ va_end(ap);
+ SMS_Finish(sp->obj);
+ http_Unset(sp->obj->http, H_Content_Length);
+ http_PrintfHeader(sp->wrk, sp->vsl_id, sp->obj->http,
+ "Content-Length: %d", sp->obj->len);
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ void
+ VRT_ban(struct sess *sp, char *cmds, ...)
+ {
+ char *a1, *a2, *a3;
+ va_list ap;
+ struct ban *b;
+ int good;
+
+ (void)sp;
+ b = BAN_New();
+ va_start(ap, cmds);
+ a1 = cmds;
+ good = 0;
+ while (a1 != NULL) {
+ good = 0;
+ a2 = va_arg(ap, char *);
+ if (a2 == NULL)
+ break;
+ a3 = va_arg(ap, char *);
+ if (a3 == NULL)
+ break;
+ if (BAN_AddTest(NULL, b, a1, a2, a3))
+ break;
+ a1 = va_arg(ap, char *);
+ good = 1;
+ }
+ if (!good)
+ /* XXX: report error how ? */
+ BAN_Free(b);
+ else
+ BAN_Insert(b);
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ void
+ VRT_ban_string(struct sess *sp, const char *str)
+ {
+ char *a1, *a2, *a3;
+ char **av;
+ struct ban *b;
+ int good;
+ int i;
+
+ (void)sp;
+ av = VAV_Parse(str, NULL, ARGV_NOESC);
+ if (av[0] != NULL) {
+ /* XXX: report error how ? */
+ VAV_Free(av);
+ return;
+ }
+ b = BAN_New();
+ good = 0;
+ for (i = 1; ;) {
+ a1 = av[i++];
+ if (a1 == NULL)
+ break;
+ good = 0;
+ a2 = av[i++];
+ if (a2 == NULL)
+ break;
+ a3 = av[i++];
+ if (a3 == NULL)
+ break;
+ if (BAN_AddTest(NULL, b, a1, a2, a3))
+ break;
+ good = 1;
+ if (av[i] == NULL)
+ break;
+ good = 0;
+ if (strcmp(av[i++], "&&"))
+ break;
+ }
+ if (!good)
+ /* XXX: report error how ? */
+ BAN_Free(b);
+ else
+ BAN_Insert(b);
+ VAV_Free(av);
+ }
+
+ /*--------------------------------------------------------------------
+ * "real" purges
+ */
+
+ void
+ VRT_purge(const struct sess *sp, double ttl, double grace)
+ {
+ if (sp->cur_method == VCL_MET_HIT)
+ HSH_Purge(sp, sp->obj->objcore->objhead, ttl, grace);
+ else if (sp->cur_method == VCL_MET_MISS)
+ HSH_Purge(sp, sp->objcore->objhead, ttl, grace);
+ }
+
+ /*--------------------------------------------------------------------
+ * Simple stuff
+ */
+
+ int
+ VRT_strcmp(const char *s1, const char *s2)
+ {
+ if (s1 == NULL || s2 == NULL)
+ return(1);
+ return (strcmp(s1, s2));
+ }
+
+ void
+ VRT_memmove(void *dst, const void *src, unsigned len)
+ {
+
+ (void)memmove(dst, src, len);
+ }
diff --cc bin/varnishd/cache/cache_vrt_var.c
index 0000000,860c7aa..407de1d
mode 000000,100644..100644
--- a/bin/varnishd/cache/cache_vrt_var.c
+++ b/bin/varnishd/cache/cache_vrt_var.c
@@@ -1,0 -1,550 +1,620 @@@
+ /*-
+ * Copyright (c) 2006 Verdens Gang AS
+ * Copyright (c) 2006-2011 Varnish Software AS
+ * All rights reserved.
+ *
+ * Author: Poul-Henning Kamp <phk at phk.freebsd.dk>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Runtime support for compiled VCL programs
+ */
+ #include "config.h"
+
+ #include <stdio.h>
+ #include <stdlib.h>
+
+ #include "cache.h"
+ #include "common/heritage.h"
+
+ #include "cache_backend.h"
+ #include "vrt_obj.h"
+ #include "vtcp.h"
+ #include "vtim.h"
+
++#define ILLEGAL_R(sess, obj, field) \
++WSP(sess, SLT_VCL_error, "%s does not exist (reading field %s)", obj, field)
++
+ static char vrt_hostname[255] = "";
+
+ /*--------------------------------------------------------------------*/
+
+ static void
+ vrt_do_string(struct worker *w, int fd, const struct http *hp, int fld,
+ const char *err, const char *p, va_list ap)
+ {
+ char *b;
+
+ // AN(p);
+ AN(hp);
+ b = VRT_String(hp->ws, NULL, p, ap);
+ if (b == NULL || *b == '\0') {
+ WSL(w, SLT_LostHeader, fd, err);
+ } else {
+ http_SetH(hp, fld, b);
+ }
+ va_end(ap);
+ }
+
-#define VRT_DO_HDR(obj, hdr, http, fld) \
++#define VRT_DO_HDR_l(obj, hdr, cont, http, fld) \
+ void \
+ VRT_l_##obj##_##hdr(const struct sess *sp, const char *p, ...) \
+ { \
+ va_list ap; \
+ \
+ va_start(ap, p); \
+ vrt_do_string(sp->wrk, sp->fd, \
- http, fld, #obj "." #hdr, p, ap); \
++ cont->http, fld, #obj "." #hdr, p, ap); \
+ va_end(ap); \
-} \
- \
++}
++
++#define VRT_DO_HDR_r(obj, hdr, cont, http, fld, nullable) \
+ const char * \
+ VRT_r_##obj##_##hdr(const struct sess *sp) \
+ { \
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC); \
- CHECK_OBJ_NOTNULL(http, HTTP_MAGIC); \
- return (http->hd[fld].b); \
-}
-
-VRT_DO_HDR(req, request, sp->http, HTTP_HDR_REQ)
-VRT_DO_HDR(req, url, sp->http, HTTP_HDR_URL)
-VRT_DO_HDR(req, proto, sp->http, HTTP_HDR_PROTO)
-VRT_DO_HDR(bereq, request, sp->wrk->bereq, HTTP_HDR_REQ)
-VRT_DO_HDR(bereq, url, sp->wrk->bereq, HTTP_HDR_URL)
-VRT_DO_HDR(bereq, proto, sp->wrk->bereq, HTTP_HDR_PROTO)
-VRT_DO_HDR(obj, proto, sp->obj->http, HTTP_HDR_PROTO)
-VRT_DO_HDR(obj, response, sp->obj->http, HTTP_HDR_RESPONSE)
-VRT_DO_HDR(resp, proto, sp->wrk->resp, HTTP_HDR_PROTO)
-VRT_DO_HDR(resp, response, sp->wrk->resp, HTTP_HDR_RESPONSE)
-VRT_DO_HDR(beresp, proto, sp->wrk->beresp, HTTP_HDR_PROTO)
-VRT_DO_HDR(beresp, response, sp->wrk->beresp, HTTP_HDR_RESPONSE)
++ if (!nullable || cont != NULL) { \
++ CHECK_OBJ_NOTNULL(cont->http, HTTP_MAGIC); \
++ return (cont->http->hd[fld].b); \
++ } \
++ ILLEGAL_R(sp, #obj, #hdr); \
++ return(NULL); \
++} \
++
++#define VRT_DO_HDR(obj, hdr, cont, http, fld, nullable) \
++VRT_DO_HDR_l(obj, hdr, cont, http, fld) \
++VRT_DO_HDR_r(obj, hdr, cont, http, fld, nullable) \
++
++VRT_DO_HDR(req, request, sp, http, HTTP_HDR_REQ, 0)
++VRT_DO_HDR(req, url, sp, http, HTTP_HDR_URL, 0)
++VRT_DO_HDR(req, proto, sp, http, HTTP_HDR_PROTO, 0)
++VRT_DO_HDR(bereq, request, sp->wrk, bereq, HTTP_HDR_REQ, 0)
++VRT_DO_HDR(bereq, url, sp->wrk, bereq, HTTP_HDR_URL, 0)
++VRT_DO_HDR(bereq, proto, sp->wrk, bereq, HTTP_HDR_PROTO, 0)
++VRT_DO_HDR(obj, proto, sp->obj, http, HTTP_HDR_PROTO, 0)
++VRT_DO_HDR(obj, response, sp->obj, http, HTTP_HDR_RESPONSE, 0)
++VRT_DO_HDR(resp, proto, sp->wrk, resp, HTTP_HDR_PROTO, 0)
++VRT_DO_HDR(resp, response, sp->wrk, resp, HTTP_HDR_RESPONSE, 0)
++VRT_DO_HDR(beresp, proto, sp->wrk, beresp, HTTP_HDR_PROTO, 0)
++VRT_DO_HDR(beresp, response, sp->wrk, beresp, HTTP_HDR_RESPONSE, 0)
++VRT_DO_HDR_r(stale_obj, proto, sp->stale_obj, http, HTTP_HDR_PROTO, 1)
++VRT_DO_HDR_r(stale_obj, response, sp->stale_obj, http, HTTP_HDR_RESPONSE, 1)
+
+ /*--------------------------------------------------------------------*/
+
-#define VRT_DO_STATUS(obj, http) \
++#define VRT_DO_STATUS_l(obj, cont, http) \
+ void \
+ VRT_l_##obj##_status(const struct sess *sp, int num) \
+ { \
+ \
+ assert(num >= 100 && num <= 999); \
- http->status = (uint16_t)num; \
-} \
- \
++ cont->http->status = (uint16_t) num; \
++}
++
++#define VRT_DO_STATUS_r(obj, cont, http, nullable) \
+ int \
+ VRT_r_##obj##_status(const struct sess *sp) \
+ { \
+ \
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC); \
- return(http->status); \
++ if (nullable && cont == NULL) { \
++ ILLEGAL_R(sp, #obj, "status"); \
++ return (503); \
++ } \
++ return(cont->http->status); \
+ }
+
-VRT_DO_STATUS(obj, sp->obj->http)
-VRT_DO_STATUS(beresp, sp->wrk->beresp)
-VRT_DO_STATUS(resp, sp->wrk->resp)
++#define VRT_DO_STATUS(obj, cont, http, nullable) \
++VRT_DO_STATUS_l(obj, cont, http) \
++VRT_DO_STATUS_r(obj, cont, http, nullable) \
++
++VRT_DO_STATUS(obj, sp->obj, http, 0)
++VRT_DO_STATUS(beresp, sp->wrk, beresp, 0)
++VRT_DO_STATUS(resp, sp->wrk, resp, 0)
++VRT_DO_STATUS_r(stale_obj, sp->stale_obj, http, 1)
+
+ /*--------------------------------------------------------------------*/
+
+ /* XXX: review this */
+ /* Add an objecthead to the saintmode list for the (hopefully) relevant
+ * backend. Some double-up asserting here to avoid assert-errors when there
+ * is no object.
+ */
+ void
+ VRT_l_beresp_saintmode(const struct sess *sp, double a)
+ {
+ struct trouble *new;
+ struct trouble *tr;
+ struct trouble *tr2;
+ struct worker *wrk;
+
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ wrk = sp->wrk;
+ if (!wrk->vbc)
+ return;
+ CHECK_OBJ_NOTNULL(wrk->vbc, VBC_MAGIC);
+ if (!wrk->vbc->backend)
+ return;
+ CHECK_OBJ_NOTNULL(wrk->vbc->backend, BACKEND_MAGIC);
+ if (!sp->objcore)
+ return;
+ CHECK_OBJ_NOTNULL(sp->objcore, OBJCORE_MAGIC);
+
+ /* Setting a negative holdoff period is a mistake. Detecting this
+ * when compiling the VCL would be better.
+ */
+ assert(a > 0);
+
+ ALLOC_OBJ(new, TROUBLE_MAGIC);
+ AN(new);
+ new->target = (uintptr_t)(sp->objcore->objhead);
+ new->timeout = sp->t_req + a;
+
+ /* Insert the new item on the list before the first item with a
+ * timeout at a later date (ie: sort by which entry will time out
+ * from the list
+ */
+ Lck_Lock(&wrk->vbc->backend->mtx);
+ VTAILQ_FOREACH_SAFE(tr, &wrk->vbc->backend->troublelist, list, tr2) {
+ if (tr->timeout < new->timeout) {
+ VTAILQ_INSERT_BEFORE(tr, new, list);
+ new = NULL;
+ break;
+ }
+ }
+
+ /* Insert the item at the end if the list is empty or all other
+ * items have a longer timeout.
+ */
+ if (new)
+ VTAILQ_INSERT_TAIL(&wrk->vbc->backend->troublelist, new, list);
+
+ Lck_Unlock(&wrk->vbc->backend->mtx);
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ #define VBERESP(dir, type, onm, field) \
+ void \
+ VRT_l_##dir##_##onm(const struct sess *sp, type a) \
+ { \
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC); \
+ sp->wrk->field = a; \
+ } \
+ \
+ type \
+ VRT_r_##dir##_##onm(const struct sess *sp) \
+ { \
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC); \
+ return (sp->wrk->field); \
+ }
+
+ VBERESP(beresp, unsigned, do_esi, do_esi)
+ VBERESP(beresp, unsigned, do_gzip, do_gzip)
+ VBERESP(beresp, unsigned, do_gunzip, do_gunzip)
+ VBERESP(beresp, unsigned, do_stream, do_stream)
+
+ /*--------------------------------------------------------------------*/
+
+ const char * __match_proto__()
+ VRT_r_client_identity(struct sess *sp)
+ {
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ if (sp->client_identity != NULL)
+ return (sp->client_identity);
+ else
+ return (sp->addr);
+ }
+
+ void
+ VRT_l_client_identity(struct sess *sp, const char *str, ...)
+ {
+ va_list ap;
+ char *b;
+
+ va_start(ap, str);
+ b = VRT_String(sp->http->ws, NULL, str, ap);
+ va_end(ap);
+ sp->client_identity = b;
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ #define BEREQ_TIMEOUT(which) \
+ void __match_proto__() \
+ VRT_l_bereq_##which(struct sess *sp, double num) \
+ { \
+ \
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC); \
+ sp->wrk->which = (num > 0.0 ? num : 0.0); \
+ } \
+ \
+ double __match_proto__() \
+ VRT_r_bereq_##which(struct sess *sp) \
+ { \
+ \
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC); \
+ return(sp->wrk->which); \
+ }
+
+ BEREQ_TIMEOUT(connect_timeout)
+ BEREQ_TIMEOUT(first_byte_timeout)
+ BEREQ_TIMEOUT(between_bytes_timeout)
+
+ /*--------------------------------------------------------------------*/
+
+ const char *
+ VRT_r_beresp_backend_name(const struct sess *sp)
+ {
+
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ CHECK_OBJ_NOTNULL(sp->wrk->vbc, VBC_MAGIC);
+ return(sp->wrk->vbc->backend->vcl_name);
+ }
+
+ struct sockaddr_storage *
+ VRT_r_beresp_backend_ip(const struct sess *sp)
+ {
+
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ CHECK_OBJ_NOTNULL(sp->wrk->vbc, VBC_MAGIC);
+ return(sp->wrk->vbc->addr);
+ }
+
+ int
+ VRT_r_beresp_backend_port(const struct sess *sp)
+ {
+
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ CHECK_OBJ_NOTNULL(sp->wrk->vbc, VBC_MAGIC);
+ return (VTCP_port(sp->wrk->vbc->addr));
+ }
+
+ const char * __match_proto__()
+ VRT_r_beresp_storage(struct sess *sp)
+ {
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ if (sp->wrk->storage_hint != NULL)
+ return (sp->wrk->storage_hint);
+ else
+ return (NULL);
+ }
+
+ void __match_proto__()
+ VRT_l_beresp_storage(struct sess *sp, const char *str, ...)
+ {
+ va_list ap;
+ char *b;
+
+ va_start(ap, str);
+ b = VRT_String(sp->wrk->ws, NULL, str, ap);
+ va_end(ap);
+ sp->wrk->storage_hint = b;
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ void
+ VRT_l_req_backend(struct sess *sp, struct director *be)
+ {
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ sp->director = be;
+ }
+
+ struct director * __match_proto__()
+ VRT_r_req_backend(struct sess *sp)
+ {
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ return (sp->director);
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ void
+ VRT_l_req_esi(struct sess *sp, unsigned process_esi)
+ {
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ /*
+ * Only allow you to turn of esi in the main request
+ * else everything gets confused
+ */
+ if(sp->esi_level == 0)
+ sp->disable_esi = !process_esi;
+ }
+
+ unsigned __match_proto__()
+ VRT_r_req_esi(struct sess *sp)
+ {
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ return (!sp->disable_esi);
+ }
+
+ int
+ VRT_r_req_esi_level(const struct sess *sp)
+ {
+
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ return(sp->esi_level);
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ unsigned __match_proto__()
+ VRT_r_req_can_gzip(struct sess *sp)
+ {
+
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ return (RFC2616_Req_Gzip(sp));
+ }
+
+
+ /*--------------------------------------------------------------------*/
+
+ int
+ VRT_r_req_restarts(const struct sess *sp)
+ {
+
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ return (sp->restarts);
+ }
+
+ /*--------------------------------------------------------------------
+ * NB: TTL is relative to when object was created, whereas grace and
+ * keep are relative to ttl.
+ */
+
-#define VRT_DO_EXP(which, exp, fld, offset, extra) \
- \
-void __match_proto__() \
-VRT_l_##which##_##fld(struct sess *sp, double a) \
-{ \
- \
- CHECK_OBJ_NOTNULL(sp, SESS_MAGIC); \
- if (a > 0.) \
- a += offset; \
- EXP_Set_##fld(&exp, a); \
- extra; \
-} \
- \
-double __match_proto__() \
-VRT_r_##which##_##fld(struct sess *sp) \
-{ \
- \
- CHECK_OBJ_NOTNULL(sp, SESS_MAGIC); \
- return(EXP_Get_##fld(&exp) - offset); \
-}
++#define VRT_DO_EXP_l(which, cont, fld, offset, extra) \
++void __match_proto__() \
++VRT_l_##which##_##fld(struct sess *sp, double a) \
++{ \
++ \
++ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC); \
++ if (a > 0.) \
++ a += offset; \
++ EXP_Set_##fld(&cont->exp, a); \
++ extra; \
++}
++
++#define VRT_DO_EXP_r(which, cont, fld, offset, nullable) \
++double __match_proto__() \
++VRT_r_##which##_##fld(struct sess *sp) \
++{ \
++ \
++ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC); \
++ if (nullable && cont == NULL) { \
++ ILLEGAL_R(sp, #which, #fld); \
++ return (-1); \
++ } \
++ return(EXP_Get_##fld(&cont->exp) - offset); \
++}
++
++#define VRT_DO_EXP(which, cont, fld, offset, nullable, extra) \
++VRT_DO_EXP_l(which, cont, fld, offset, extra) \
++VRT_DO_EXP_r(which, cont, fld, offset, nullable)
+
+ static void
+ vrt_wsp_exp(const struct sess *sp, unsigned xid, const struct exp *e)
+ {
+ WSP(sp, SLT_TTL, "%u VCL %.0f %.0f %.0f %.0f %.0f",
+ xid, e->ttl - (sp->t_req - e->entered), e->grace, e->keep,
+ sp->t_req, e->age + (sp->t_req - e->entered));
+ }
+
-VRT_DO_EXP(req, sp->exp, ttl, 0, )
-VRT_DO_EXP(req, sp->exp, grace, 0, )
-VRT_DO_EXP(req, sp->exp, keep, 0, )
++VRT_DO_EXP(req, sp, ttl, 0, 0, )
++VRT_DO_EXP(req, sp, grace, 0, 0, )
++VRT_DO_EXP(req, sp, keep, 0, 0, )
+
-VRT_DO_EXP(obj, sp->obj->exp, grace, 0,
++VRT_DO_EXP(obj, sp->obj, grace, 0, 0,
+ EXP_Rearm(sp->obj);
+ vrt_wsp_exp(sp, sp->obj->xid, &sp->obj->exp);)
-VRT_DO_EXP(obj, sp->obj->exp, ttl, (sp->t_req - sp->obj->exp.entered),
++VRT_DO_EXP(obj, sp->obj, ttl, (sp->t_req - sp->obj->exp.entered), 0,
+ EXP_Rearm(sp->obj);
+ vrt_wsp_exp(sp, sp->obj->xid, &sp->obj->exp);)
-VRT_DO_EXP(obj, sp->obj->exp, keep, 0,
++VRT_DO_EXP(obj, sp->obj, keep, 0, 0,
+ EXP_Rearm(sp->obj);
+ vrt_wsp_exp(sp, sp->obj->xid, &sp->obj->exp);)
+
-VRT_DO_EXP(beresp, sp->wrk->exp, grace, 0,
++VRT_DO_EXP(beresp, sp->wrk, grace, 0, 0,
+ vrt_wsp_exp(sp, sp->xid, &sp->wrk->exp);)
-VRT_DO_EXP(beresp, sp->wrk->exp, ttl, 0,
++VRT_DO_EXP(beresp, sp->wrk, ttl, 0, 0,
+ vrt_wsp_exp(sp, sp->xid, &sp->wrk->exp);)
-VRT_DO_EXP(beresp, sp->wrk->exp, keep, 0,
++VRT_DO_EXP(beresp, sp->wrk, keep, 0, 0,
+ vrt_wsp_exp(sp, sp->xid, &sp->wrk->exp);)
++
++VRT_DO_EXP_r(stale_obj, sp->stale_obj, grace, 0, 1)
++VRT_DO_EXP_r(stale_obj, sp->stale_obj, ttl, 0, 1)
++VRT_DO_EXP_r(stale_obj, sp->stale_obj, keep, 0, 1)
+
+ /*--------------------------------------------------------------------
+ * req.xid
+ */
+
+ const char * __match_proto__()
+ VRT_r_req_xid(struct sess *sp)
+ {
+ char *p;
+ int size;
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+
+ size = snprintf(NULL, 0, "%u", sp->xid) + 1;
+ AN(p = WS_Alloc(sp->http->ws, size));
+ assert(snprintf(p, size, "%u", sp->xid) < size);
+ return (p);
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ #define REQ_BOOL(which) \
+ void __match_proto__() \
+ VRT_l_req_##which(struct sess *sp, unsigned val) \
+ { \
+ \
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC); \
+ sp->which = val ? 1 : 0; \
+ } \
+ \
+ unsigned __match_proto__() \
+ VRT_r_req_##which(struct sess *sp) \
+ { \
+ \
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC); \
+ return(sp->which); \
+ }
+
+ REQ_BOOL(hash_ignore_busy)
+ REQ_BOOL(hash_always_miss)
+
+ /*--------------------------------------------------------------------*/
+
+ struct sockaddr_storage *
+ VRT_r_client_ip(struct sess *sp)
+ {
+
+ return (&sp->sockaddr);
+ }
+
+ struct sockaddr_storage *
+ VRT_r_server_ip(struct sess *sp)
+ {
+ int i;
+
+ if (sp->mysockaddr.ss_family == AF_UNSPEC) {
+ i = getsockname(sp->fd,
+ (void*)&sp->mysockaddr, &sp->mysockaddrlen);
+ assert(VTCP_Check(i));
+ }
+
+ return (&sp->mysockaddr);
+ }
+
+ const char*
+ VRT_r_server_identity(struct sess *sp)
+ {
+ (void)sp;
+
+ if (heritage.identity[0] != '\0')
+ return (heritage.identity);
+ else
+ return (heritage.name);
+ }
+
+
+ const char*
+ VRT_r_server_hostname(struct sess *sp)
+ {
+ (void)sp;
+
+ if (vrt_hostname[0] == '\0')
+ AZ(gethostname(vrt_hostname, sizeof(vrt_hostname)));
+
+ return (vrt_hostname);
+ }
+
+ /*--------------------------------------------------------------------
+ * XXX: This is pessimistically silly
+ */
+
+ int
+ VRT_r_server_port(struct sess *sp)
+ {
+ int i;
+
+ if (sp->mysockaddr.ss_family == AF_UNSPEC) {
+ i = getsockname(sp->fd,
+ (void*)&sp->mysockaddr, &sp->mysockaddrlen);
+ assert(VTCP_Check(i));
+ }
+ return (VTCP_port(&sp->mysockaddr));
+ }
+
+ /*--------------------------------------------------------------------*/
+
++/* XXX: uplex/GS: a nice macro would eliminate the repetition here ... */
++
+ int
+ VRT_r_obj_hits(const struct sess *sp)
+ {
+
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ CHECK_OBJ_NOTNULL(sp->obj, OBJECT_MAGIC); /* XXX */
+ return (sp->obj->hits);
+ }
+
++int
++VRT_r_stale_obj_hits(const struct sess *sp)
++{
++
++ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
++ if (sp->stale_obj == NULL) {
++ ILLEGAL_R(sp, "stale_obj", "hits");
++ return (0);
++ }
++ CHECK_OBJ(sp->stale_obj, OBJECT_MAGIC); /* XXX */
++ return (sp->stale_obj->hits);
++}
++
+ double
+ VRT_r_obj_lastuse(const struct sess *sp)
+ {
+
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ CHECK_OBJ_NOTNULL(sp->obj, OBJECT_MAGIC); /* XXX */
+ return (VTIM_real() - sp->obj->last_use);
+ }
+
++double
++VRT_r_stale_obj_lastuse(const struct sess *sp)
++{
++
++ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
++ if (sp->stale_obj == NULL) {
++ ILLEGAL_R(sp, "stale_obj", "lastuse");
++ return (0);
++ }
++ CHECK_OBJ(sp->stale_obj, OBJECT_MAGIC); /* XXX */
++ return (VTIM_real() - sp->stale_obj->last_use);
++}
++
+ unsigned
+ VRT_r_req_backend_healthy(const struct sess *sp)
+ {
+ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
+ CHECK_OBJ_NOTNULL(sp->director, DIRECTOR_MAGIC);
+ return (VDI_Healthy(sp->director, sp));
+ }
+
++unsigned
++VRT_r_stale_obj(const struct sess *sp)
++{
++ CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
++ return (sp->stale_obj != NULL);
++}
diff --cc bin/varnishd/mgt/mgt_param.c
index 0000000,e4be5d8..991793a
mode 000000,100644..100644
--- a/bin/varnishd/mgt/mgt_param.c
+++ b/bin/varnishd/mgt/mgt_param.c
@@@ -1,0 -1,1368 +1,1368 @@@
+ /*-
+ * Copyright (c) 2006 Verdens Gang AS
+ * Copyright (c) 2006-2011 Varnish Software AS
+ * All rights reserved.
+ *
+ * Author: Poul-Henning Kamp <phk at phk.freebsd.dk>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+ #include "config.h"
+
+ #include <grp.h>
+ #include <limits.h>
+ #include <math.h>
+ #include <pwd.h>
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <string.h>
+ #include <unistd.h>
+
+ #include "mgt/mgt.h"
+ #include "common/heritage.h"
+ #include "common/params.h"
+
+ #include "mgt/mgt_param.h"
+ #include "waiter/cache_waiter.h"
+ #include "vav.h"
+ #include "vcli.h"
+ #include "vcli_common.h"
+ #include "vcli_priv.h"
+ #include "vnum.h"
+ #include "vss.h"
+
+ #include "mgt_cli.h"
+
+ #define MAGIC_INIT_STRING "\001"
+ struct params mgt_param;
+ static int nparspec;
+ static struct parspec const ** parspec;
+ static int margin;
+
+ /*--------------------------------------------------------------------*/
+
+ static const struct parspec *
+ mcf_findpar(const char *name)
+ {
+ int i;
+
+ for (i = 0; i < nparspec; i++)
+ if (!strcmp(parspec[i]->name, name))
+ return (parspec[i]);
+ return (NULL);
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ static void
+ tweak_generic_timeout(struct cli *cli, volatile unsigned *dst, const char *arg)
+ {
+ unsigned u;
+
+ if (arg != NULL) {
+ u = strtoul(arg, NULL, 0);
+ if (u == 0) {
+ VCLI_Out(cli, "Timeout must be greater than zero\n");
+ VCLI_SetResult(cli, CLIS_PARAM);
+ return;
+ }
+ *dst = u;
+ } else
+ VCLI_Out(cli, "%u", *dst);
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ void
+ tweak_timeout(struct cli *cli, const struct parspec *par, const char *arg)
+ {
+ volatile unsigned *dest;
+
+ dest = par->priv;
+ tweak_generic_timeout(cli, dest, arg);
+ }
+
+ static void
+ tweak_timeout_double(struct cli *cli, const struct parspec *par,
+ const char *arg)
+ {
+ volatile double *dest;
+ double u;
+
+ dest = par->priv;
+ if (arg != NULL) {
+ u = strtod(arg, NULL);
+ if (u < par->min) {
+ VCLI_Out(cli,
+ "Timeout must be greater or equal to %.g\n",
+ par->min);
+ VCLI_SetResult(cli, CLIS_PARAM);
+ return;
+ }
+ if (u > par->max) {
+ VCLI_Out(cli,
+ "Timeout must be less than or equal to %.g\n",
+ par->max);
+ VCLI_SetResult(cli, CLIS_PARAM);
+ return;
+ }
+ *dest = u;
+ } else
+ VCLI_Out(cli, "%.6f", *dest);
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ static void
+ tweak_generic_double(struct cli *cli, const struct parspec *par,
+ const char *arg)
+ {
+ volatile double *dest;
+ double u;
+
+ dest = par->priv;
+ if (arg != NULL) {
+ u = strtod(arg, NULL);
+ if (u < par->min) {
+ VCLI_Out(cli,
+ "Must be greater or equal to %.g\n",
+ par->min);
+ VCLI_SetResult(cli, CLIS_PARAM);
+ return;
+ }
+ if (u > par->max) {
+ VCLI_Out(cli,
+ "Must be less than or equal to %.g\n",
+ par->max);
+ VCLI_SetResult(cli, CLIS_PARAM);
+ return;
+ }
+ *dest = u;
+ } else
+ VCLI_Out(cli, "%f", *dest);
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ static void
+ tweak_generic_bool(struct cli *cli, volatile unsigned *dest, const char *arg)
+ {
+ if (arg != NULL) {
+ if (!strcasecmp(arg, "off"))
+ *dest = 0;
+ else if (!strcasecmp(arg, "disable"))
+ *dest = 0;
+ else if (!strcasecmp(arg, "no"))
+ *dest = 0;
+ else if (!strcasecmp(arg, "false"))
+ *dest = 0;
+ else if (!strcasecmp(arg, "on"))
+ *dest = 1;
+ else if (!strcasecmp(arg, "enable"))
+ *dest = 1;
+ else if (!strcasecmp(arg, "yes"))
+ *dest = 1;
+ else if (!strcasecmp(arg, "true"))
+ *dest = 1;
+ else {
+ VCLI_Out(cli, "use \"on\" or \"off\"\n");
+ VCLI_SetResult(cli, CLIS_PARAM);
+ return;
+ }
+ } else
+ VCLI_Out(cli, *dest ? "on" : "off");
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ static void
+ tweak_bool(struct cli *cli, const struct parspec *par, const char *arg)
+ {
+ volatile unsigned *dest;
+
+ dest = par->priv;
+ tweak_generic_bool(cli, dest, arg);
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ void
+ tweak_generic_uint(struct cli *cli, volatile unsigned *dest, const char *arg,
+ unsigned min, unsigned max)
+ {
+ unsigned u;
+
+ if (arg != NULL) {
+ if (!strcasecmp(arg, "unlimited"))
+ u = UINT_MAX;
+ else
+ u = strtoul(arg, NULL, 0);
+ if (u < min) {
+ VCLI_Out(cli, "Must be at least %u\n", min);
+ VCLI_SetResult(cli, CLIS_PARAM);
+ return;
+ }
+ if (u > max) {
+ VCLI_Out(cli, "Must be no more than %u\n", max);
+ VCLI_SetResult(cli, CLIS_PARAM);
+ return;
+ }
+ *dest = u;
+ } else if (*dest == UINT_MAX) {
+ VCLI_Out(cli, "unlimited", *dest);
+ } else {
+ VCLI_Out(cli, "%u", *dest);
+ }
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ void
+ tweak_uint(struct cli *cli, const struct parspec *par, const char *arg)
+ {
+ volatile unsigned *dest;
+
+ dest = par->priv;
+ tweak_generic_uint(cli, dest, arg, (uint)par->min, (uint)par->max);
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ static void
+ fmt_bytes(struct cli *cli, ssize_t t)
+ {
+ const char *p;
+
+ if (t & 0xff) {
+ VCLI_Out(cli, "%zub", t);
+ return;
+ }
+ for (p = "kMGTPEZY"; *p; p++) {
+ if (t & 0x300) {
+ VCLI_Out(cli, "%.2f%c", t / 1024.0, *p);
+ return;
+ }
+ t /= 1024;
+ if (t & 0x0ff) {
+ VCLI_Out(cli, "%zu%c", t, *p);
+ return;
+ }
+ }
+ VCLI_Out(cli, "(bogus number)");
+ }
+
+ static void
+ tweak_generic_bytes(struct cli *cli, volatile ssize_t *dest, const char *arg,
+ double min, double max)
+ {
+ uintmax_t r;
+ const char *p;
+
+ if (arg != NULL) {
+ p = VNUM_2bytes(arg, &r, 0);
+ if (p != NULL) {
+ VCLI_Out(cli, "Could not convert to bytes.\n");
+ VCLI_Out(cli, "%s\n", p);
+ VCLI_Out(cli,
+ " Try something like '80k' or '120M'\n");
+ VCLI_SetResult(cli, CLIS_PARAM);
+ return;
+ }
+ if ((uintmax_t)((ssize_t)r) != r || r > max) {
+ VCLI_Out(cli, "Must be no more than ");
+ fmt_bytes(cli, (ssize_t)max);
+ VCLI_Out(cli, "\n");
+ VCLI_SetResult(cli, CLIS_PARAM);
+ return;
+ }
+ if (r < min) {
+ VCLI_Out(cli, "Must be at least ");
+ fmt_bytes(cli, (ssize_t)min);
+ VCLI_Out(cli, "\n");
+ VCLI_SetResult(cli, CLIS_PARAM);
+ return;
+ }
+ *dest = r;
+ } else {
+ fmt_bytes(cli, *dest);
+ }
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ static void
+ tweak_bytes(struct cli *cli, const struct parspec *par, const char *arg)
+ {
+ volatile ssize_t *dest;
+
+ assert(par->min >= 0);
+ dest = par->priv;
+ tweak_generic_bytes(cli, dest, arg, par->min, par->max);
+ }
+
+
+ /*--------------------------------------------------------------------*/
+
+ static void
+ tweak_bytes_u(struct cli *cli, const struct parspec *par, const char *arg)
+ {
+ volatile unsigned *d1;
+ volatile ssize_t dest;
+
+ assert(par->max <= UINT_MAX);
+ assert(par->min >= 0);
+ d1 = par->priv;
+ dest = *d1;
+ tweak_generic_bytes(cli, &dest, arg, par->min, par->max);
+ *d1 = dest;
+ }
+
+ /*--------------------------------------------------------------------
+ * XXX: slightly magic. We want to initialize to "nobody" (XXX: shouldn't
+ * XXX: that be something autocrap found for us ?) but we don't want to
+ * XXX: fail initialization if that user doesn't exists, even though we
+ * XXX: do want to fail it, in subsequent sets.
+ * XXX: The magic init string is a hack for this.
+ */
+
+ static void
+ tweak_user(struct cli *cli, const struct parspec *par, const char *arg)
+ {
+ struct passwd *pw;
+ struct group *gr;
+
+ (void)par;
+ if (arg != NULL) {
+ if (!strcmp(arg, MAGIC_INIT_STRING)) {
+ pw = getpwnam("nobody");
+ if (pw == NULL) {
+ mgt_param.uid = getuid();
+ return;
+ }
+ } else
+ pw = getpwnam(arg);
+ if (pw == NULL) {
+ VCLI_Out(cli, "Unknown user");
+ VCLI_SetResult(cli, CLIS_PARAM);
+ return;
+ }
+ REPLACE(mgt_param.user, pw->pw_name);
+ mgt_param.uid = pw->pw_uid;
+ mgt_param.gid = pw->pw_gid;
+
+ /* set group to user's primary group */
+ if ((gr = getgrgid(pw->pw_gid)) != NULL &&
+ (gr = getgrnam(gr->gr_name)) != NULL &&
+ gr->gr_gid == pw->pw_gid)
+ REPLACE(mgt_param.group, gr->gr_name);
+ } else if (mgt_param.user) {
+ VCLI_Out(cli, "%s (%d)", mgt_param.user, (int)mgt_param.uid);
+ } else {
+ VCLI_Out(cli, "%d", (int)mgt_param.uid);
+ }
+ }
+
+ /*--------------------------------------------------------------------
+ * XXX: see comment for tweak_user, same thing here.
+ */
+
+ static void
+ tweak_group(struct cli *cli, const struct parspec *par, const char *arg)
+ {
+ struct group *gr;
+
+ (void)par;
+ if (arg != NULL) {
+ if (!strcmp(arg, MAGIC_INIT_STRING)) {
+ gr = getgrnam("nogroup");
+ if (gr == NULL) {
+ /* Only replace if tweak_user didn't */
+ if (mgt_param.gid == 0)
+ mgt_param.gid = getgid();
+ return;
+ }
+ } else
+ gr = getgrnam(arg);
+ if (gr == NULL) {
+ VCLI_Out(cli, "Unknown group");
+ VCLI_SetResult(cli, CLIS_PARAM);
+ return;
+ }
+ REPLACE(mgt_param.group, gr->gr_name);
+ mgt_param.gid = gr->gr_gid;
+ } else if (mgt_param.group) {
+ VCLI_Out(cli, "%s (%d)", mgt_param.group, (int)mgt_param.gid);
+ } else {
+ VCLI_Out(cli, "%d", (int)mgt_param.gid);
+ }
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ static void
+ clean_listen_sock_head(struct listen_sock_head *lsh)
+ {
+ struct listen_sock *ls, *ls2;
+
+ VTAILQ_FOREACH_SAFE(ls, lsh, list, ls2) {
+ CHECK_OBJ_NOTNULL(ls, LISTEN_SOCK_MAGIC);
+ VTAILQ_REMOVE(lsh, ls, list);
+ free(ls->name);
+ free(ls->addr);
+ FREE_OBJ(ls);
+ }
+ }
+
+ static void
+ tweak_listen_address(struct cli *cli, const struct parspec *par,
+ const char *arg)
+ {
+ char **av;
+ int i;
+ struct listen_sock *ls;
+ struct listen_sock_head lsh;
+
+ (void)par;
+ if (arg == NULL) {
+ VCLI_Quote(cli, mgt_param.listen_address);
+ return;
+ }
+
+ av = VAV_Parse(arg, NULL, ARGV_COMMA);
+ if (av == NULL) {
+ VCLI_Out(cli, "Parse error: out of memory");
+ VCLI_SetResult(cli, CLIS_PARAM);
+ return;
+ }
+ if (av[0] != NULL) {
+ VCLI_Out(cli, "Parse error: %s", av[0]);
+ VCLI_SetResult(cli, CLIS_PARAM);
+ VAV_Free(av);
+ return;
+ }
+ if (av[1] == NULL) {
+ VCLI_Out(cli, "Empty listen address");
+ VCLI_SetResult(cli, CLIS_PARAM);
+ VAV_Free(av);
+ return;
+ }
+ VTAILQ_INIT(&lsh);
+ for (i = 1; av[i] != NULL; i++) {
+ struct vss_addr **ta;
+ int j, n;
+
+ n = VSS_resolve(av[i], "http", &ta);
+ if (n == 0) {
+ VCLI_Out(cli, "Invalid listen address ");
+ VCLI_Quote(cli, av[i]);
+ VCLI_SetResult(cli, CLIS_PARAM);
+ break;
+ }
+ for (j = 0; j < n; ++j) {
+ ALLOC_OBJ(ls, LISTEN_SOCK_MAGIC);
+ AN(ls);
+ ls->sock = -1;
+ ls->addr = ta[j];
+ ls->name = strdup(av[i]);
+ AN(ls->name);
+ VTAILQ_INSERT_TAIL(&lsh, ls, list);
+ }
+ free(ta);
+ }
+ VAV_Free(av);
+ if (cli != NULL && cli->result != CLIS_OK) {
+ clean_listen_sock_head(&lsh);
+ return;
+ }
+
+ REPLACE(mgt_param.listen_address, arg);
+
+ clean_listen_sock_head(&heritage.socks);
+ heritage.nsocks = 0;
+
+ while (!VTAILQ_EMPTY(&lsh)) {
+ ls = VTAILQ_FIRST(&lsh);
+ VTAILQ_REMOVE(&lsh, ls, list);
+ CHECK_OBJ_NOTNULL(ls, LISTEN_SOCK_MAGIC);
+ VTAILQ_INSERT_TAIL(&heritage.socks, ls, list);
+ heritage.nsocks++;
+ }
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ static void
+ tweak_string(struct cli *cli, const struct parspec *par, const char *arg)
+ {
+ char **p = TRUST_ME(par->priv);
+
+ AN(p);
+ /* XXX should have tweak_generic_string */
+ if (arg == NULL) {
+ VCLI_Quote(cli, *p);
+ } else {
+ REPLACE(*p, arg);
+ }
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ static void
+ tweak_waiter(struct cli *cli, const struct parspec *par, const char *arg)
+ {
+
+ /* XXX should have tweak_generic_string */
+ (void)par;
+ WAIT_tweak_waiter(cli, arg);
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ static void
+ tweak_diag_bitmap(struct cli *cli, const struct parspec *par, const char *arg)
+ {
+ unsigned u;
+
+ (void)par;
+ if (arg != NULL) {
+ u = strtoul(arg, NULL, 0);
+ mgt_param.diag_bitmap = u;
+ } else {
+ VCLI_Out(cli, "0x%x", mgt_param.diag_bitmap);
+ }
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ /*
+ * Make sure to end all lines with either a space or newline of the
+ * formatting will go haywire.
+ */
+
+ #define DELAYED_EFFECT_TEXT \
+ "\nNB: This parameter may take quite some time to take (full) effect."
+
+ #define MUST_RESTART_TEXT \
+ "\nNB: This parameter will not take any effect until the " \
+ "child process has been restarted."
+
+ #define MUST_RELOAD_TEXT \
+ "\nNB: This parameter will not take any effect until the " \
+ "VCL programs have been reloaded."
+
+ #define EXPERIMENTAL_TEXT \
+ "\nNB: We do not know yet if it is a good idea to change " \
+ "this parameter, or if the default value is even sensible. " \
+ "Caution is advised, and feedback is most welcome."
+
+ #define WIZARD_TEXT \
+ "\nNB: Do not change this parameter, unless a developer tell " \
+ "you to do so."
+
+ /*
+ * Remember to update varnishd.1 whenever you add / remove a parameter or
+ * change its default value.
+ * XXX: we should generate the relevant section of varnishd.1 from here.
+ */
+ static const struct parspec input_parspec[] = {
+ { "user", tweak_user, NULL, 0, 0,
+ "The unprivileged user to run as. Setting this will "
+ "also set \"group\" to the specified user's primary group.",
+ MUST_RESTART,
+ MAGIC_INIT_STRING },
+ { "group", tweak_group, NULL, 0, 0,
+ "The unprivileged group to run as.",
+ MUST_RESTART,
+ MAGIC_INIT_STRING },
+ { "default_ttl", tweak_timeout_double, &mgt_param.default_ttl,
+ 0, UINT_MAX,
+ "The TTL assigned to objects if neither the backend nor "
+ "the VCL code assigns one.\n"
+ "Objects already cached will not be affected by changes "
+ "made until they are fetched from the backend again.\n"
+ "To force an immediate effect at the expense of a total "
+ "flush of the cache use \"ban.url .\"",
+ 0,
+ "120", "seconds" },
+ { "sess_workspace",
+ tweak_bytes_u, &mgt_param.sess_workspace, 1024, UINT_MAX,
+ "Bytes of HTTP protocol workspace allocated for sessions. "
+ "This space must be big enough for the entire HTTP protocol "
+ "header and any edits done to it in the VCL code.\n"
+ "Minimum is 1024 bytes.",
+ DELAYED_EFFECT,
+ "64k", "bytes" },
+ { "http_req_hdr_len",
+ tweak_bytes_u, &mgt_param.http_req_hdr_len,
+ 40, UINT_MAX,
+ "Maximum length of any HTTP client request header we will "
+ "allow. The limit is inclusive its continuation lines.\n",
+ 0,
+ "8k", "bytes" },
+ { "http_req_size",
+ tweak_bytes_u, &mgt_param.http_req_size,
+ 256, UINT_MAX,
+ "Maximum number of bytes of HTTP client request we will deal "
+ "with. This is a limit on all bytes up to the double blank "
+ "line which ends the HTTP request.\n"
+ "The memory for the request is allocated from the session "
+ "workspace (param: sess_workspace) and this parameter limits "
+ "how much of that the request is allowed to take up.",
+ 0,
+ "32k", "bytes" },
+ { "http_resp_hdr_len",
+ tweak_bytes_u, &mgt_param.http_resp_hdr_len,
+ 40, UINT_MAX,
+ "Maximum length of any HTTP backend response header we will "
+ "allow. The limit is inclusive its continuation lines.\n",
+ 0,
+ "8k", "bytes" },
+ { "http_resp_size",
+ tweak_bytes_u, &mgt_param.http_resp_size,
+ 256, UINT_MAX,
+ "Maximum number of bytes of HTTP backend resonse we will deal "
+ "with. This is a limit on all bytes up to the double blank "
+ "line which ends the HTTP request.\n"
+ "The memory for the request is allocated from the worker "
+ "workspace (param: sess_workspace) and this parameter limits "
+ "how much of that the request is allowed to take up.",
+ 0,
+ "32k", "bytes" },
+ { "http_max_hdr", tweak_uint, &mgt_param.http_max_hdr, 32, 65535,
+ "Maximum number of HTTP headers we will deal with in "
+ "client request or backend reponses. "
+ "Note that the first line occupies five header fields.\n"
+ "This paramter does not influence storage consumption, "
+ "objects allocate exact space for the headers they store.\n",
+ 0,
+ "64", "header lines" },
+ { "shm_workspace",
+ tweak_bytes_u, &mgt_param.shm_workspace, 4096, UINT_MAX,
+ "Bytes of shmlog workspace allocated for worker threads. "
+ "If too big, it wastes some ram, if too small it causes "
+ "needless flushes of the SHM workspace.\n"
+ "These flushes show up in stats as "
+ "\"SHM flushes due to overflow\".\n"
+ "Minimum is 4096 bytes.",
+ DELAYED_EFFECT,
+ "8k", "bytes" },
+ { "shm_reclen",
+ tweak_bytes_u, &mgt_param.shm_reclen, 16, 65535,
+ "Maximum number of bytes in SHM log record.\n"
+ "Maximum is 65535 bytes.",
+ 0,
+ "255", "bytes" },
+ { "default_grace", tweak_timeout_double, &mgt_param.default_grace,
+ 0, UINT_MAX,
+ "Default grace period. We will deliver an object "
+ "this long after it has expired, provided another thread "
+ "is attempting to get a new copy.\n"
+ "Objects already cached will not be affected by changes "
+ "made until they are fetched from the backend again.\n",
+ DELAYED_EFFECT,
+ "10", "seconds" },
+ { "default_keep", tweak_timeout_double, &mgt_param.default_keep,
+ 0, UINT_MAX,
- "Default keep period. We will keep a useless object "
++ "Default keep period. We will keep a stale object "
+ "around this long, making it available for conditional "
+ "backend fetches. "
+ "That means that the object will be removed from the "
- "cache at the end of ttl+grace+keep.",
++ "cache at the end of ttl+max(grace,keep).",
+ DELAYED_EFFECT,
- "0", "seconds" },
++ "10", "seconds" },
+ { "sess_timeout", tweak_timeout, &mgt_param.sess_timeout, 0, 0,
+ "Idle timeout for persistent sessions. "
+ "If a HTTP request has not been received in this many "
+ "seconds, the session is closed.",
+ 0,
+ "5", "seconds" },
+ { "expiry_sleep", tweak_timeout_double, &mgt_param.expiry_sleep, 0, 60,
+ "How long the expiry thread sleeps when there is nothing "
+ "for it to do.\n",
+ 0,
+ "1", "seconds" },
+ { "pipe_timeout", tweak_timeout, &mgt_param.pipe_timeout, 0, 0,
+ "Idle timeout for PIPE sessions. "
+ "If nothing have been received in either direction for "
+ "this many seconds, the session is closed.\n",
+ 0,
+ "60", "seconds" },
+ { "send_timeout", tweak_timeout, &mgt_param.send_timeout, 0, 0,
+ "Send timeout for client connections. "
+ "If the HTTP response hasn't been transmitted in this many\n"
+ "seconds the session is closed. \n"
+ "See setsockopt(2) under SO_SNDTIMEO for more information.",
+ DELAYED_EFFECT,
+ "600", "seconds" },
+ { "idle_send_timeout", tweak_timeout, &mgt_param.idle_send_timeout, 0, 0,
+ "Time to wait with no data sent. "
+ "If no data has been transmitted in this many\n"
+ "seconds the session is closed. \n"
+ "See setsockopt(2) under SO_SNDTIMEO for more information.",
+ DELAYED_EFFECT,
+ "60", "seconds" },
+ { "auto_restart", tweak_bool, &mgt_param.auto_restart, 0, 0,
+ "Restart child process automatically if it dies.\n",
+ 0,
+ "on", "bool" },
+ { "nuke_limit",
+ tweak_uint, &mgt_param.nuke_limit, 0, UINT_MAX,
+ "Maximum number of objects we attempt to nuke in order"
+ "to make space for a object body.",
+ EXPERIMENTAL,
+ "50", "allocations" },
+ { "fetch_chunksize",
+ tweak_bytes_u,
+ &mgt_param.fetch_chunksize, 4 * 1024, UINT_MAX,
+ "The default chunksize used by fetcher. "
+ "This should be bigger than the majority of objects with "
+ "short TTLs.\n"
+ "Internal limits in the storage_file module makes increases "
+ "above 128kb a dubious idea.",
+ EXPERIMENTAL,
+ "128k", "bytes" },
+ { "fetch_maxchunksize",
+ tweak_bytes_u,
+ &mgt_param.fetch_maxchunksize, 64 * 1024, UINT_MAX,
+ "The maximum chunksize we attempt to allocate from storage. "
+ "Making this too large may cause delays and storage "
+ "fragmentation.\n",
+ EXPERIMENTAL,
+ "256m", "bytes" },
+ #ifdef SENDFILE_WORKS
+ { "sendfile_threshold",
+ tweak_bytes, &mgt_param.sendfile_threshold, 0, HUGE_VAL,
+ "The minimum size of objects transmitted with sendfile.",
+ EXPERIMENTAL,
+ "1E", "bytes" },
+ #endif /* SENDFILE_WORKS */
+ { "vcl_trace", tweak_bool, &mgt_param.vcl_trace, 0, 0,
+ "Trace VCL execution in the shmlog.\n"
+ "Enabling this will allow you to see the path each "
+ "request has taken through the VCL program.\n"
+ "This generates a lot of logrecords so it is off by "
+ "default.",
+ 0,
+ "off", "bool" },
+ { "listen_address", tweak_listen_address, NULL, 0, 0,
+ "Whitespace separated list of network endpoints where "
+ "Varnish will accept requests.\n"
+ "Possible formats: host, host:port, :port",
+ MUST_RESTART,
+ ":80" },
+ { "listen_depth", tweak_uint, &mgt_param.listen_depth, 0, UINT_MAX,
+ "Listen queue depth.",
+ MUST_RESTART,
+ "1024", "connections" },
+ { "cli_buffer",
+ tweak_bytes_u, &mgt_param.cli_buffer, 4096, UINT_MAX,
+ "Size of buffer for CLI command input."
+ "\nYou may need to increase this if you have big VCL files "
+ "and use the vcl.inline CLI command.\n"
+ "NB: Must be specified with -p to have effect.\n",
+ 0,
+ "8k", "bytes" },
+ { "cli_limit",
+ tweak_bytes_u, &mgt_param.cli_limit, 128, 99999999,
+ "Maximum size of CLI response. If the response exceeds"
+ " this limit, the reponse code will be 201 instead of"
+ " 200 and the last line will indicate the truncation.",
+ 0,
+ "4k", "bytes" },
+ { "cli_timeout", tweak_timeout, &mgt_param.cli_timeout, 0, 0,
+ "Timeout for the childs replies to CLI requests from "
+ "the mgt_param.",
+ 0,
+ "10", "seconds" },
+ { "ping_interval", tweak_uint, &mgt_param.ping_interval, 0, UINT_MAX,
+ "Interval between pings from parent to child.\n"
+ "Zero will disable pinging entirely, which makes "
+ "it possible to attach a debugger to the child.",
+ MUST_RESTART,
+ "3", "seconds" },
+ { "lru_interval", tweak_timeout, &mgt_param.lru_timeout, 0, 0,
+ "Grace period before object moves on LRU list.\n"
+ "Objects are only moved to the front of the LRU "
+ "list if they have not been moved there already inside "
+ "this timeout period. This reduces the amount of lock "
+ "operations necessary for LRU list access.",
+ EXPERIMENTAL,
+ "2", "seconds" },
+ { "cc_command", tweak_string, &mgt_cc_cmd, 0, 0,
+ "Command used for compiling the C source code to a "
+ "dlopen(3) loadable object. Any occurrence of %s in "
+ "the string will be replaced with the source file name, "
+ "and %o will be replaced with the output file name.",
+ MUST_RELOAD,
+ VCC_CC , NULL },
+ { "max_restarts", tweak_uint, &mgt_param.max_restarts, 0, UINT_MAX,
+ "Upper limit on how many times a request can restart."
+ "\nBe aware that restarts are likely to cause a hit against "
+ "the backend, so don't increase thoughtlessly.\n",
+ 0,
+ "4", "restarts" },
+ { "esi_syntax",
+ tweak_uint, &mgt_param.esi_syntax, 0, UINT_MAX,
+ "Bitmap controlling ESI parsing code:\n"
+ " 0x00000001 - Don't check if it looks like XML\n"
+ " 0x00000002 - Ignore non-esi elements\n"
+ " 0x00000004 - Emit parsing debug records\n"
+ " 0x00000008 - Force-split parser input (debugging)\n"
+ "\n"
+ "Use 0x notation and do the bitor in your head :-)\n",
+ 0,
+ "0", "bitmap" },
+ { "max_esi_depth",
+ tweak_uint, &mgt_param.max_esi_depth, 0, UINT_MAX,
+ "Maximum depth of esi:include processing.\n",
+ 0,
+ "5", "levels" },
+ { "connect_timeout", tweak_timeout_double,
+ &mgt_param.connect_timeout,0, UINT_MAX,
+ "Default connection timeout for backend connections. "
+ "We only try to connect to the backend for this many "
+ "seconds before giving up. "
+ "VCL can override this default value for each backend and "
+ "backend request.",
+ 0,
+ "0.7", "s" },
+ { "first_byte_timeout", tweak_timeout_double,
+ &mgt_param.first_byte_timeout,0, UINT_MAX,
+ "Default timeout for receiving first byte from backend. "
+ "We only wait for this many seconds for the first "
+ "byte before giving up. A value of 0 means it will never time "
+ "out. "
+ "VCL can override this default value for each backend and "
+ "backend request. This parameter does not apply to pipe.",
+ 0,
+ "60", "s" },
+ { "between_bytes_timeout", tweak_timeout_double,
+ &mgt_param.between_bytes_timeout,0, UINT_MAX,
+ "Default timeout between bytes when receiving data from "
+ "backend. "
+ "We only wait for this many seconds between bytes "
+ "before giving up. A value of 0 means it will never time out. "
+ "VCL can override this default value for each backend request "
+ "and backend request. This parameter does not apply to pipe.",
+ 0,
+ "60", "s" },
+ { "acceptor_sleep_max", tweak_timeout_double,
+ &mgt_param.acceptor_sleep_max, 0, 10,
+ "If we run out of resources, such as file descriptors or "
+ "worker threads, the acceptor will sleep between accepts.\n"
+ "This parameter limits how long it can sleep between "
+ "attempts to accept new connections.",
+ EXPERIMENTAL,
+ "0.050", "s" },
+ { "acceptor_sleep_incr", tweak_timeout_double,
+ &mgt_param.acceptor_sleep_incr, 0, 1,
+ "If we run out of resources, such as file descriptors or "
+ "worker threads, the acceptor will sleep between accepts.\n"
+ "This parameter control how much longer we sleep, each time "
+ "we fail to accept a new connection.",
+ EXPERIMENTAL,
+ "0.001", "s" },
+ { "acceptor_sleep_decay", tweak_generic_double,
+ &mgt_param.acceptor_sleep_decay, 0, 1,
+ "If we run out of resources, such as file descriptors or "
+ "worker threads, the acceptor will sleep between accepts.\n"
+ "This parameter (multiplicatively) reduce the sleep duration "
+ "for each succesfull accept. (ie: 0.9 = reduce by 10%)",
+ EXPERIMENTAL,
+ "0.900", "" },
+ { "clock_skew", tweak_uint, &mgt_param.clock_skew, 0, UINT_MAX,
+ "How much clockskew we are willing to accept between the "
+ "backend and our own clock.",
+ 0,
+ "10", "s" },
+ { "prefer_ipv6", tweak_bool, &mgt_param.prefer_ipv6, 0, 0,
+ "Prefer IPv6 address when connecting to backends which "
+ "have both IPv4 and IPv6 addresses.",
+ 0,
+ "off", "bool" },
+ { "session_max", tweak_uint,
+ &mgt_param.max_sess, 1000, UINT_MAX,
+ "Maximum number of sessions we will allocate from one pool "
+ "before just dropping connections.\n"
+ "This is mostly an anti-DoS measure, and setting it plenty "
+ "high should not hurt, as long as you have the memory for "
+ "it.\n",
+ 0,
+ "100000", "sessions" },
+ { "session_linger", tweak_uint,
+ &mgt_param.session_linger,0, UINT_MAX,
+ "How long time the workerthread lingers on the session "
+ "to see if a new request appears right away.\n"
+ "If sessions are reused, as much as half of all reuses "
+ "happen within the first 100 msec of the previous request "
+ "completing.\n"
+ "Setting this too high results in worker threads not doing "
+ "anything for their keep, setting it too low just means that "
+ "more sessions take a detour around the waiter.",
+ EXPERIMENTAL,
+ "50", "ms" },
+ { "log_hashstring", tweak_bool, &mgt_param.log_hash, 0, 0,
+ "Log the hash string components to shared memory log.\n",
+ 0,
+ "on", "bool" },
+ { "log_local_address", tweak_bool, &mgt_param.log_local_addr, 0, 0,
+ "Log the local address on the TCP connection in the "
+ "SessionOpen shared memory record.\n",
+ 0,
+ "off", "bool" },
+ { "waiter", tweak_waiter, NULL, 0, 0,
+ "Select the waiter kernel interface.\n",
+ EXPERIMENTAL | MUST_RESTART,
+ "default", NULL },
+ { "diag_bitmap", tweak_diag_bitmap, 0, 0, 0,
+ "Bitmap controlling diagnostics code:\n"
+ " 0x00000001 - CNT_Session states.\n"
+ " 0x00000002 - workspace debugging.\n"
+ " 0x00000004 - kqueue debugging.\n"
+ " 0x00000008 - mutex logging.\n"
+ " 0x00000010 - mutex contests.\n"
+ " 0x00000020 - waiting list.\n"
+ " 0x00000040 - object workspace.\n"
+ " 0x00001000 - do not core-dump child process.\n"
+ " 0x00002000 - only short panic message.\n"
+ " 0x00004000 - panic to stderr.\n"
+ " 0x00010000 - synchronize shmlog.\n"
+ " 0x00020000 - synchronous start of persistence.\n"
+ " 0x00040000 - release VCL early.\n"
+ " 0x00080000 - ban-lurker debugging.\n"
+ " 0x80000000 - do edge-detection on digest.\n"
+ "\n"
+ "Use 0x notation and do the bitor in your head :-)\n",
+ 0,
+ "0", "bitmap" },
+ { "ban_dups", tweak_bool, &mgt_param.ban_dups, 0, 0,
+ "Detect and eliminate duplicate bans.\n",
+ 0,
+ "on", "bool" },
+ { "syslog_cli_traffic", tweak_bool, &mgt_param.syslog_cli_traffic, 0, 0,
+ "Log all CLI traffic to syslog(LOG_INFO).\n",
+ 0,
+ "on", "bool" },
+ { "ban_lurker_sleep", tweak_timeout_double,
+ &mgt_param.ban_lurker_sleep, 0, UINT_MAX,
+ "How long time does the ban lurker thread sleeps between "
+ "successful attempts to push the last item up the ban "
+ " list. It always sleeps a second when nothing can be done.\n"
+ "A value of zero disables the ban lurker.",
+ 0,
+ "0.01", "s" },
+ { "saintmode_threshold", tweak_uint,
+ &mgt_param.saintmode_threshold, 0, UINT_MAX,
+ "The maximum number of objects held off by saint mode before "
+ "no further will be made to the backend until one times out. "
+ "A value of 0 disables saintmode.",
+ EXPERIMENTAL,
+ "10", "objects" },
+ { "http_range_support", tweak_bool, &mgt_param.http_range_support, 0, 0,
+ "Enable support for HTTP Range headers.\n",
+ EXPERIMENTAL,
+ "on", "bool" },
+ { "http_gzip_support", tweak_bool, &mgt_param.http_gzip_support, 0, 0,
+ "Enable gzip support. When enabled Varnish will compress "
+ "uncompressed objects before they are stored in the cache. "
+ "If a client does not support gzip encoding Varnish will "
+ "uncompress compressed objects on demand. Varnish will also "
+ "rewrite the Accept-Encoding header of clients indicating "
+ "support for gzip to:\n"
+ " Accept-Encoding: gzip\n\n"
+ "Clients that do not support gzip will have their "
+ "Accept-Encoding header removed. For more information on how "
+ "gzip is implemented please see the chapter on gzip in the "
+ "Varnish reference.",
+ EXPERIMENTAL,
+ "on", "bool" },
+ { "gzip_tmp_space", tweak_uint, &mgt_param.gzip_tmp_space, 0, 2,
+ "Where temporary space for gzip/gunzip is allocated:\n"
+ " 0 - malloc\n"
+ " 2 - thread workspace\n"
+ "\n"
+ "If you have much gzip/gunzip activity, it may be an"
+ " advantage to use workspace for these allocations to reduce"
+ " malloc activity. Be aware that gzip needs 256+KB and gunzip"
+ " needs 32+KB of workspace (64+KB if ESI processing).",
+ EXPERIMENTAL,
+ "0", "" },
+ { "gzip_level", tweak_uint, &mgt_param.gzip_level, 0, 9,
+ "Gzip compression level: 0=debug, 1=fast, 9=best",
+ 0,
+ "6", ""},
+ { "gzip_window", tweak_uint, &mgt_param.gzip_window, 8, 15,
+ "Gzip window size 8=least, 15=most compression.\n"
+ "Memory impact is 8=1k, 9=2k, ... 15=128k.",
+ 0,
+ "15", ""},
+ { "gzip_memlevel", tweak_uint, &mgt_param.gzip_memlevel, 1, 9,
+ "Gzip memory level 1=slow/least, 9=fast/most compression.\n"
+ "Memory impact is 1=1k, 2=2k, ... 9=256k.",
+ 0,
+ "8", ""},
+ { "gzip_stack_buffer",
+ tweak_bytes_u, &mgt_param.gzip_stack_buffer,
+ 2048, UINT_MAX,
+ "Size of stack buffer used for gzip processing.\n"
+ "The stack buffers are used for in-transit data,"
+ " for instance gunzip'ed data being sent to a client."
+ "Making this space to small results in more overhead,"
+ " writes to sockets etc, making it too big is probably"
+ " just a waste of memory.",
+ EXPERIMENTAL,
+ "32k", "bytes" },
+ { "shortlived", tweak_timeout_double,
+ &mgt_param.shortlived, 0, UINT_MAX,
+ "Objects created with TTL shorter than this are always "
+ "put in transient storage.\n",
+ 0,
+ "10.0", "s" },
+ { "critbit_cooloff", tweak_timeout_double,
+ &mgt_param.critbit_cooloff, 60, 254,
+ "How long time the critbit hasher keeps deleted objheads "
+ "on the cooloff list.\n",
+ WIZARD,
+ "180.0", "s" },
+ { "vcl_dir", tweak_string, &mgt_vcl_dir, 0, 0,
+ "Directory from which relative VCL filenames (vcl.load and "
+ "include) are opened.",
+ 0,
+ #ifdef VARNISH_VCL_DIR
+ VARNISH_VCL_DIR,
+ #else
+ ".",
+ #endif
+ NULL },
+ { "vmod_dir", tweak_string, &mgt_vmod_dir, 0, 0,
+ "Directory where VCL modules are to be found.",
+ 0,
+ #ifdef VARNISH_VMOD_DIR
+ VARNISH_VMOD_DIR,
+ #else
+ ".",
+ #endif
+ NULL },
+ { "vcc_err_unref", tweak_bool, &mgt_vcc_err_unref, 0, 0,
+ "Unreferenced VCL objects result in error.\n",
+ 0,
+ "on", "bool" },
+
+
+ { "pcre_match_limit", tweak_uint,
+ &mgt_param.vre_limits.match,
+ 1, UINT_MAX,
+ "The limit for the number of internal matching function"
+ " calls in a pcre_exec() execution.",
+ 0,
+ "10000", ""},
+
+ { "pcre_match_limit_recursion", tweak_uint,
+ &mgt_param.vre_limits.match_recursion,
+ 1, UINT_MAX,
+ "The limit for the number of internal matching function"
+ " recursions in a pcre_exec() execution.",
+ 0,
+ "10000", ""},
+
+ { "vsl_space", tweak_bytes,
+ &mgt_param.vsl_space, 1024*1024, HUGE_VAL,
+ "The amount of space to allocate for the VSL fifo buffer"
+ " in the VSM memory segment."
+ " If you make this too small, varnish{ncsa|log} etc will"
+ " not be able to keep up."
+ " Making it too large just costs memory resources.",
+ MUST_RESTART,
+ "80M", "bytes"},
+
+ { "vsm_space", tweak_bytes,
+ &mgt_param.vsm_space, 1024*1024, HUGE_VAL,
+ "The amount of space to allocate for stats counters"
+ " in the VSM memory segment."
+ " If you make this too small, some counters will be"
+ " invisible."
+ " Making it too large just costs memory resources.",
+ MUST_RESTART,
+ "1M", "bytes"},
+
+ { NULL, NULL, NULL }
+ };
+
+ /*--------------------------------------------------------------------*/
+
+ #define WIDTH 76
+
+ static void
+ mcf_wrap(struct cli *cli, const char *text)
+ {
+ const char *p, *q;
+
+ /* Format text to COLUMNS width */
+ for (p = text; *p != '\0'; ) {
+ q = strchr(p, '\n');
+ if (q == NULL)
+ q = strchr(p, '\0');
+ if (q > p + WIDTH - margin) {
+ q = p + WIDTH - margin;
+ while (q > p && *q != ' ')
+ q--;
+ AN(q);
+ }
+ VCLI_Out(cli, "%*s %.*s\n", margin, "", (int)(q - p), p);
+ p = q;
+ if (*p == ' ' || *p == '\n')
+ p++;
+ }
+ }
+
+ void
+ mcf_param_show(struct cli *cli, const char * const *av, void *priv)
+ {
+ int i;
+ const struct parspec *pp;
+ int lfmt;
+
+ (void)priv;
+ if (av[2] == NULL || strcmp(av[2], "-l"))
+ lfmt = 0;
+ else
+ lfmt = 1;
+ for (i = 0; i < nparspec; i++) {
+ pp = parspec[i];
+ if (av[2] != NULL && !lfmt && strcmp(pp->name, av[2]))
+ continue;
+ VCLI_Out(cli, "%-*s ", margin, pp->name);
+ if (pp->func == NULL) {
+ VCLI_Out(cli, "Not implemented.\n");
+ if (av[2] != NULL && !lfmt)
+ return;
+ else
+ continue;
+ }
+ pp->func(cli, pp, NULL);
+ if (pp->units != NULL)
+ VCLI_Out(cli, " [%s]\n", pp->units);
+ else
+ VCLI_Out(cli, "\n");
+ if (av[2] != NULL) {
+ VCLI_Out(cli, "%-*s Default is %s\n",
+ margin, "", pp->def);
+ mcf_wrap(cli, pp->descr);
+ if (pp->flags & DELAYED_EFFECT)
+ mcf_wrap(cli, DELAYED_EFFECT_TEXT);
+ if (pp->flags & EXPERIMENTAL)
+ mcf_wrap(cli, EXPERIMENTAL_TEXT);
+ if (pp->flags & MUST_RELOAD)
+ mcf_wrap(cli, MUST_RELOAD_TEXT);
+ if (pp->flags & MUST_RESTART)
+ mcf_wrap(cli, MUST_RESTART_TEXT);
+ if (pp->flags & WIZARD)
+ mcf_wrap(cli, WIZARD_TEXT);
+ if (!lfmt)
+ return;
+ else
+ VCLI_Out(cli, "\n");
+ }
+ }
+ if (av[2] != NULL && !lfmt) {
+ VCLI_SetResult(cli, CLIS_PARAM);
+ VCLI_Out(cli, "Unknown parameter \"%s\".", av[2]);
+ }
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ void
+ MCF_ParamSet(struct cli *cli, const char *param, const char *val)
+ {
+ const struct parspec *pp;
+
+ pp = mcf_findpar(param);
+ if (pp == NULL) {
+ VCLI_SetResult(cli, CLIS_PARAM);
+ VCLI_Out(cli, "Unknown parameter \"%s\".", param);
+ return;
+ }
+ pp->func(cli, pp, val);
+
+ if (cli->result == CLIS_OK && heritage.param != NULL)
+ *heritage.param = mgt_param;
+
+ if (cli->result != CLIS_OK) {
+ VCLI_Out(cli, "(attempting to set param %s to %s)\n",
+ pp->name, val);
+ } else if (child_pid >= 0 && pp->flags & MUST_RESTART) {
+ VCLI_Out(cli, "Change will take effect"
+ " when child is restarted");
+ } else if (pp->flags & MUST_RELOAD) {
+ VCLI_Out(cli, "Change will take effect"
+ " when VCL script is reloaded");
+ }
+ }
+
+
+ /*--------------------------------------------------------------------*/
+
+ void
+ mcf_param_set(struct cli *cli, const char * const *av, void *priv)
+ {
+
+ (void)priv;
+ MCF_ParamSet(cli, av[2], av[3]);
+ }
+
+ /*--------------------------------------------------------------------
+ * Add a group of parameters to the global set and sort by name.
+ */
+
+ static int
+ parspec_cmp(const void *a, const void *b)
+ {
+ struct parspec * const * pa = a;
+ struct parspec * const * pb = b;
+ return (strcmp((*pa)->name, (*pb)->name));
+ }
+
+ static void
+ MCF_AddParams(const struct parspec *ps)
+ {
+ const struct parspec *pp;
+ int n;
+
+ n = 0;
+ for (pp = ps; pp->name != NULL; pp++) {
+ if (mcf_findpar(pp->name) != NULL)
+ fprintf(stderr, "Duplicate param: %s\n", pp->name);
+ if (strlen(pp->name) + 1 > margin)
+ margin = strlen(pp->name) + 1;
+ n++;
+ }
+ parspec = realloc(parspec, (1L + nparspec + n) * sizeof *parspec);
+ XXXAN(parspec);
+ for (pp = ps; pp->name != NULL; pp++)
+ parspec[nparspec++] = pp;
+ parspec[nparspec] = NULL;
+ qsort (parspec, nparspec, sizeof parspec[0], parspec_cmp);
+ }
+
+ /*--------------------------------------------------------------------
+ * Set defaults for all parameters
+ */
+
+ static void
+ MCF_SetDefaults(struct cli *cli)
+ {
+ const struct parspec *pp;
+ int i;
+
+ for (i = 0; i < nparspec; i++) {
+ pp = parspec[i];
+ if (cli != NULL)
+ VCLI_Out(cli,
+ "Set Default for %s = %s\n", pp->name, pp->def);
+ pp->func(cli, pp, pp->def);
+ if (cli != NULL && cli->result != CLIS_OK)
+ return;
+ }
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ void
+ MCF_ParamInit(struct cli *cli)
+ {
+
+ MCF_AddParams(input_parspec);
+ MCF_AddParams(WRK_parspec);
+
+ /* XXX: We do this twice, to get past any interdependencies */
+ MCF_SetDefaults(NULL);
+ MCF_SetDefaults(cli);
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ void
+ MCF_DumpRst(void)
+ {
+ const struct parspec *pp;
+ const char *p, *q;
+ int i;
+
+ printf("\n.. The following is the autogenerated output from varnishd -x dumprst\n\n");
+ for (i = 0; i < nparspec; i++) {
+ pp = parspec[i];
+ printf("%s\n", pp->name);
+ if (pp->units != NULL && *pp->units != '\0')
+ printf("\t- Units: %s\n", pp->units);
+ printf("\t- Default: %s\n",
+ strcmp(pp->def,MAGIC_INIT_STRING) == 0 ? "magic" : pp->def);
+ /*
+ * XXX: we should mark the params with one/two flags
+ * XXX: that say if ->min/->max are valid, so we
+ * XXX: can emit those also in help texts.
+ */
+ if (pp->flags) {
+ printf("\t- Flags: ");
+ q = "";
+ if (pp->flags & DELAYED_EFFECT) {
+ printf("%sdelayed", q);
+ q = ", ";
+ }
+ if (pp->flags & MUST_RESTART) {
+ printf("%smust_restart", q);
+ q = ", ";
+ }
+ if (pp->flags & MUST_RELOAD) {
+ printf("%smust_reload", q);
+ q = ", ";
+ }
+ if (pp->flags & EXPERIMENTAL) {
+ printf("%sexperimental", q);
+ q = ", ";
+ }
+ printf("\n");
+ }
+ printf("\n\t");
+ for (p = pp->descr; *p; p++) {
+ if (*p == '\n' && p[1] =='\0')
+ break;
+ if (*p == '\n' && p[1] =='\n') {
+ printf("\n\n\t");
+ p++;
+ } else if (*p == '\n') {
+ printf("\n\t");
+ } else if (*p == ':' && p[1] == '\n') {
+ /*
+ * Start of definition list,
+ * use RSTs code mode for this
+ */
+ printf("::\n");
+ } else {
+ printf("%c", *p);
+ }
+ }
+ printf("\n\n");
+ }
+ printf("\n");
+ }
diff --cc bin/varnishd/mgt/mgt_sandbox_solaris.c
index 0000000,79f6650..114d6a4
mode 000000,100644..100644
--- a/bin/varnishd/mgt/mgt_sandbox_solaris.c
+++ b/bin/varnishd/mgt/mgt_sandbox_solaris.c
@@@ -1,0 -1,233 +1,234 @@@
+ /*-
+ * Copyright (c) 2006-2011 Varnish Software AS
+ * All rights reserved.
+ *
+ * Author: Poul-Henning Kamp <phk at phk.freebsd.dk>
+ * Nils Goroll <nils.goroll at uplex.de>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Sandboxing child processes on Solaris
+ *
+ */
+
+ #include "config.h"
+
+ #ifdef HAVE_SETPPRIV
+
+ #ifdef HAVE_PRIV_H
+ #include <priv.h>
+ #endif
+ #include <stdio.h>
+ #include <string.h>
+ #include <syslog.h>
+ #include <unistd.h>
+
+ #include "mgt/mgt.h"
+
+ #include "common/heritage.h"
++#include "common/params.h"
+
+ /*--------------------------------------------------------------------
+ * SOLARIS PRIVILEGES: Note on use of symbolic PRIV_* constants
+ *
+ * For privileges which existed in Solaris 10 FCS, we may use the constants from
+ * sys/priv_names.h
+ *
+ * For privileges which have been added later, we need to use strings in order
+ * not to break builds of varnish on these platforms. To remain binary
+ * compatible, we need to silently ignore errors from priv_addset when using
+ * these strings.
+ *
+ * For optimal build and binary forward comatibility, we could use subtractive
+ * set specs like
+ *
+ * basic,!file_link_any,!proc_exec,!proc_fork,!proc_info,!proc_session
+ *
+ * but I (Nils) have a preference for making an informed decision about which
+ * privileges the varnish child should have and which it shouldn't.
+ *
+ * Newly introduced privileges should be annotated with their PSARC / commit ID
+ * (as long as Oracle reveils these :/ )
+ *
+ * SOLARIS PRIVILEGES: Note on accidentally setting the SNOCD flag
+ *
+ * When setting privileges, we need to take care not to accidentally set the
+ * SNOCD flag which will disable core dumps unnecessarily. (see
+ * https://www.varnish-cache.org/trac/ticket/671 )
+ *
+ * When changing the logic herein, always check with mdb -k. Replace _PID_ with
+ * the pid of your varnish child, the result should be 0, otherwise a regression
+ * has been introduced.
+ *
+ * > 0t_PID_::pid2proc | ::print proc_t p_flag | >a
+ * > (<a & 0x10000000)=X
+ * 0
+ *
+ * (a value of 0x10000000 indicates that SNOCD is set)
+ *
+ * NOTE that on Solaris changing the uid will _always_ set SNOCD, so make sure
+ * you run this test with appropriate privileges, but without proc_setid, so
+ * varnish won't setuid(), e.g.
+ *
+ * pfexec ppriv -e -s A=basic,net_privaddr,sys_resource varnish ...
+ *
+ * SOLARIS COREDUMPS with setuid(): See coreadm(1M) - global-setid / proc-setid
+ *
+ */
+
+ /* effective during runtime of the child */
+ static inline void
+ mgt_sandbox_solaris_add_effective(priv_set_t *pset)
+ {
+ /* PSARC/2009/685 - 8eca52188202 - onnv_132 */
+ priv_addset(pset, "net_access");
+
+ /* PSARC/2009/378 - 63678502e95e - onnv_140 */
+ priv_addset(pset, "file_read");
+ priv_addset(pset, "file_write");
+ }
+
+ /* permitted during runtime of the child - for privilege bracketing */
+ static inline void
+ mgt_sandbox_solaris_add_permitted(priv_set_t *pset)
+ {
+ /* for raising limits in cache_waiter_ports.c */
+ priv_addset(pset, PRIV_SYS_RESOURCE);
+ }
+
+ /* effective during mgt_sandbox */
+ static inline void
+ mgt_sandbox_solaris_add_initial(priv_set_t *pset)
+ {
+ /* for setgid/setuid */
+ priv_addset(pset, PRIV_PROC_SETID);
+ }
+
+ /*
+ * if we are not yet privilege-aware already (ie we have been started
+ * not-privilege aware wird euid 0), we need to grab any additional privileges
+ * needed during mgt_standbox, until we reduce to least privileges in
+ * mgt_sandbox_waive, otherwise we would loose them with setuid()
+ */
+
+ void
+ mgt_sandbox_solaris_init(void)
+ {
+ priv_set_t *priv_all;
+
+ if (! (priv_all = priv_allocset())) {
+ REPORT(LOG_ERR,
+ "Child start warning: mgt_sandbox_init - priv_allocset failed: errno=%d (%s)",
+ errno, strerror(errno));
+ return;
+ }
+
+ priv_emptyset(priv_all);
+
+ mgt_sandbox_solaris_add_effective(priv_all);
+ mgt_sandbox_solaris_add_permitted(priv_all);
+ mgt_sandbox_solaris_add_initial(priv_all);
+
+ setppriv(PRIV_ON, PRIV_PERMITTED, priv_all);
+ setppriv(PRIV_ON, PRIV_EFFECTIVE, priv_all);
+ setppriv(PRIV_ON, PRIV_INHERITABLE, priv_all);
+
+ priv_freeset(priv_all);
+ }
+
+ void
+ mgt_sandbox_solaris_privsep(void)
+ {
+ if (priv_ineffect(PRIV_PROC_SETID)) {
+ if (getgid() != mgt_param.gid)
+ XXXAZ(setgid(mgt_param.gid));
+ if (getuid() != mgt_param.uid)
+ XXXAZ(setuid(mgt_param.uid));
+ } else {
+ REPORT(LOG_INFO, "Privilege %s missing, will not change uid/gid",
+ PRIV_PROC_SETID);
+ }
+ }
+
+ /*
+ * Waive most privileges in the child
+ *
+ * as of onnv_151a, we should end up with:
+ *
+ * > ppriv -v #pid of varnish child
+ * PID: .../varnishd ...
+ * flags = PRIV_AWARE
+ * E: file_read,file_write,net_access
+ * I: none
+ * P: file_read,file_write,net_access,sys_resource
+ * L: file_read,file_write,net_access,sys_resource
+ *
+ * We should keep sys_resource in P in order to adjust our limits if we need to
+ */
+
+ void
+ mgt_sandbox_solaris_fini(void)
+ {
+ priv_set_t *effective, *inheritable, *permitted;
+
+ if (!(effective = priv_allocset()) ||
+ !(inheritable = priv_allocset()) ||
+ !(permitted = priv_allocset())) {
+ REPORT(LOG_ERR,
+ "Child start warning: mgt_sandbox_waive - priv_allocset failed: errno=%d (%s)",
+ errno, strerror(errno));
+ return;
+ }
+
+ priv_emptyset(inheritable);
+
+ priv_emptyset(effective);
+ mgt_sandbox_solaris_add_effective(effective);
+
+ priv_copyset(effective, permitted);
+ mgt_sandbox_solaris_add_permitted(permitted);
+
+ /*
+ * invert the sets and clear privileges such that setppriv will always
+ * succeed
+ */
+ priv_inverse(inheritable);
+ priv_inverse(effective);
+ priv_inverse(permitted);
+
+ #define SETPPRIV(which, set) \
+ if (setppriv(PRIV_OFF, which, set)) \
+ REPORT(LOG_ERR, \
+ "Child start warning: Waiving privileges failed on %s: errno=%d (%s)", \
+ #which, errno, strerror(errno));
+
+ SETPPRIV(PRIV_LIMIT, permitted);
+ SETPPRIV(PRIV_PERMITTED, permitted);
+ SETPPRIV(PRIV_EFFECTIVE, effective);
+ SETPPRIV(PRIV_INHERITABLE, inheritable);
+ #undef SETPPRIV
+
+ priv_freeset(inheritable);
+ priv_freeset(effective);
+ }
+
+ #endif /* HAVE_SETPPRIV */
diff --cc bin/varnishd/storage/stevedore.c
index 0000000,860604e..71241f5
mode 000000,100644..100644
--- a/bin/varnishd/storage/stevedore.c
+++ b/bin/varnishd/storage/stevedore.c
@@@ -1,0 -1,466 +1,514 @@@
+ /*-
+ * Copyright (c) 2007-2011 Varnish Software AS
+ * All rights reserved.
+ *
+ * Author: Dag-Erling Smørgav <des at des.no>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * STEVEDORE: one who works at or is responsible for loading and
+ * unloading ships in port. Example: "on the wharves, stevedores were
+ * unloading cargo from the far corners of the world." Origin: Spanish
+ * estibador, from estibar to pack. First Known Use: 1788
+ */
+
+ #include "config.h"
+
+ #include <stdio.h>
+ #include <stdlib.h>
+
+ #include "cache/cache.h"
+
+ #include "storage/storage.h"
+ #include "vrt.h"
+ #include "vrt_obj.h"
+
+ static const struct stevedore * volatile stv_next;
+
+ /*---------------------------------------------------------------------
+ * Default objcore methods
+ */
+
+ static struct object * __match_proto__(getobj_f)
+ default_oc_getobj(struct worker *wrk, struct objcore *oc)
+ {
+ struct object *o;
+
+ (void)wrk;
+ if (oc->priv == NULL)
+ return (NULL);
+ CAST_OBJ_NOTNULL(o, oc->priv, OBJECT_MAGIC);
+ return (o);
+ }
+
+ static void
+ default_oc_freeobj(struct objcore *oc)
+ {
+ struct object *o;
+
+ CAST_OBJ_NOTNULL(o, oc->priv, OBJECT_MAGIC);
+ oc->priv = NULL;
+ oc->methods = NULL;
+
+ STV_Freestore(o);
+ STV_free(o->objstore);
+ }
+
+ static struct lru *
+ default_oc_getlru(const struct objcore *oc)
+ {
+ struct object *o;
+
+ CAST_OBJ_NOTNULL(o, oc->priv, OBJECT_MAGIC);
+ return (o->objstore->stevedore->lru);
+ }
+
+ static struct objcore_methods default_oc_methods = {
+ .getobj = default_oc_getobj,
+ .freeobj = default_oc_freeobj,
+ .getlru = default_oc_getlru,
+ };
+
+
+ /*--------------------------------------------------------------------
+ */
+
+ struct lru *
+ LRU_Alloc(void)
+ {
+ struct lru *l;
+
+ ALLOC_OBJ(l, LRU_MAGIC);
+ AN(l);
+ VTAILQ_INIT(&l->lru_head);
+ Lck_New(&l->mtx, lck_lru);
+ return (l);
+ }
+
+ void
+ LRU_Free(struct lru *lru)
+ {
+ CHECK_OBJ_NOTNULL(lru, LRU_MAGIC);
+ Lck_Delete(&lru->mtx);
+ FREE_OBJ(lru);
+ }
+
+ /*--------------------------------------------------------------------
+ * XXX: trust pointer writes to be atomic
+ */
+
+ static struct stevedore *
+ stv_pick_stevedore(const struct sess *sp, const char **hint)
+ {
+ struct stevedore *stv;
+
+ AN(hint);
+ if (*hint != NULL && **hint != '\0') {
+ VTAILQ_FOREACH(stv, &stv_stevedores, list) {
+ if (!strcmp(stv->ident, *hint))
+ return (stv);
+ }
+ if (!strcmp(TRANSIENT_STORAGE, *hint))
+ return (stv_transient);
+
+ /* Hint was not valid, nuke it */
+ WSP(sp, SLT_Debug, "Storage hint not usable");
+ *hint = NULL;
+ }
+ /* pick a stevedore and bump the head along */
+ stv = VTAILQ_NEXT(stv_next, list);
+ if (stv == NULL)
+ stv = VTAILQ_FIRST(&stv_stevedores);
+ AN(stv);
+ AN(stv->name);
+ stv_next = stv;
+ return (stv);
+ }
+
+ /*-------------------------------------------------------------------*/
+
+ static struct storage *
+ stv_alloc(struct worker *w, const struct object *obj, size_t size)
+ {
+ struct storage *st;
+ struct stevedore *stv;
+ unsigned fail = 0;
+
+ /*
+ * Always use the stevedore which allocated the object in order to
+ * keep an object inside the same stevedore.
+ */
+ CHECK_OBJ_NOTNULL(obj, OBJECT_MAGIC);
+ CHECK_OBJ_NOTNULL(w, WORKER_MAGIC);
+ stv = obj->objstore->stevedore;
+ CHECK_OBJ_NOTNULL(stv, STEVEDORE_MAGIC);
+
+ if (size > cache_param->fetch_maxchunksize)
+ size = cache_param->fetch_maxchunksize;
+
+ for (;;) {
+ /* try to allocate from it */
+ AN(stv->alloc);
+ st = stv->alloc(stv, size);
+ if (st != NULL)
+ break;
+
+ if (size > cache_param->fetch_chunksize) {
+ size >>= 1;
+ continue;
+ }
+
+ /* no luck; try to free some space and keep trying */
+ if (EXP_NukeOne(w, stv->lru) == -1)
+ break;
+
+ /* Enough is enough: try another if we have one */
+ if (++fail >= cache_param->nuke_limit)
+ break;
+ }
+ if (st != NULL)
+ CHECK_OBJ_NOTNULL(st, STORAGE_MAGIC);
+ return (st);
+ }
+
+
+ /*-------------------------------------------------------------------*
+ * Structure used to transport internal knowledge from STV_NewObject()
+ * to STV_MkObject(). Nobody else should mess with this struct.
+ */
+
+ struct stv_objsecrets {
+ unsigned magic;
+ #define STV_OBJ_SECRETES_MAGIC 0x78c87247
+ uint16_t nhttp;
+ unsigned lhttp;
+ unsigned wsl;
+ struct exp *exp;
+ };
+
+ /*--------------------------------------------------------------------
+ * This function is called by stevedores ->allocobj() method, which
+ * very often will be stv_default_allocobj() below, to convert a slab
+ * of storage into object which the stevedore can then register in its
+ * internal state, before returning it to STV_NewObject().
+ * As you probably guessed: All this for persistence.
+ */
+
+ struct object *
+ STV_MkObject(struct sess *sp, void *ptr, unsigned ltot,
+ const struct stv_objsecrets *soc)
+ {
+ struct object *o;
+ unsigned l;
+
+ CHECK_OBJ_NOTNULL(soc, STV_OBJ_SECRETES_MAGIC);
+
+ assert(PAOK(ptr));
+ assert(PAOK(soc->wsl));
+ assert(PAOK(soc->lhttp));
+
+ assert(ltot >= sizeof *o + soc->lhttp + soc->wsl);
+
+ o = ptr;
+ memset(o, 0, sizeof *o);
+ o->magic = OBJECT_MAGIC;
+
+ l = PRNDDN(ltot - (sizeof *o + soc->lhttp));
+ assert(l >= soc->wsl);
+
+ o->http = HTTP_create(o + 1, soc->nhttp);
+ WS_Init(o->ws_o, "obj", (char *)(o + 1) + soc->lhttp, soc->wsl);
+ WS_Assert(o->ws_o);
+ assert(o->ws_o->e <= (char*)ptr + ltot);
+
+ http_Setup(o->http, o->ws_o);
+ o->http->magic = HTTP_MAGIC;
+ o->exp = *soc->exp;
+ VTAILQ_INIT(&o->store);
+ sp->wrk->stats.n_object++;
+
+ if (sp->objcore != NULL) {
+ CHECK_OBJ_NOTNULL(sp->objcore, OBJCORE_MAGIC);
+
+ o->objcore = sp->objcore;
+ sp->objcore = NULL; /* refcnt follows pointer. */
+ BAN_NewObjCore(o->objcore);
+
+ o->objcore->methods = &default_oc_methods;
+ o->objcore->priv = o;
+ }
+ return (o);
+ }
+
+ /*--------------------------------------------------------------------
+ * This is the default ->allocobj() which all stevedores who do not
+ * implement persistent storage can rely on.
+ */
+
+ struct object *
+ stv_default_allocobj(struct stevedore *stv, struct sess *sp, unsigned ltot,
+ const struct stv_objsecrets *soc)
+ {
+ struct object *o;
+ struct storage *st;
+
+ CHECK_OBJ_NOTNULL(soc, STV_OBJ_SECRETES_MAGIC);
+ st = stv->alloc(stv, ltot);
+ if (st == NULL)
+ return (NULL);
+ if (st->space < ltot) {
+ stv->free(st);
+ return (NULL);
+ }
+ ltot = st->len = st->space;
+ o = STV_MkObject(sp, st->ptr, ltot, soc);
+ CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC);
+ o->objstore = st;
+ return (o);
+ }
+
+ /*-------------------------------------------------------------------
+ * Allocate storage for an object, based on the header information.
+ * XXX: If we know (a hint of) the length, we could allocate space
+ * XXX: for the body in the same allocation while we are at it.
+ */
+
+ struct object *
+ STV_NewObject(struct sess *sp, const char *hint, unsigned wsl, struct exp *ep,
+ uint16_t nhttp)
+ {
+ struct object *o;
+ struct stevedore *stv, *stv0;
+ unsigned lhttp, ltot;
+ struct stv_objsecrets soc;
+ int i;
+
+ assert(wsl > 0);
+ wsl = PRNDUP(wsl);
+
+ lhttp = HTTP_estimate(nhttp);
+ lhttp = PRNDUP(lhttp);
+
+ memset(&soc, 0, sizeof soc);
+ soc.magic = STV_OBJ_SECRETES_MAGIC;
+ soc.nhttp = nhttp;
+ soc.lhttp = lhttp;
+ soc.wsl = wsl;
+ soc.exp = ep;
+
+ ltot = sizeof *o + wsl + lhttp;
+
+ stv = stv0 = stv_pick_stevedore(sp, &hint);
+ AN(stv->allocobj);
+ o = stv->allocobj(stv, sp, ltot, &soc);
+ if (o == NULL && hint == NULL) {
+ do {
+ stv = stv_pick_stevedore(sp, &hint);
+ AN(stv->allocobj);
+ o = stv->allocobj(stv, sp, ltot, &soc);
+ } while (o == NULL && stv != stv0);
+ }
+ if (o == NULL) {
+ /* no luck; try to free some space and keep trying */
+ for (i = 0; o == NULL && i < cache_param->nuke_limit; i++) {
+ if (EXP_NukeOne(sp->wrk, stv->lru) == -1)
+ break;
+ o = stv->allocobj(stv, sp, ltot, &soc);
+ }
+ }
+
+ if (o == NULL)
+ return (NULL);
+ CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC);
+ CHECK_OBJ_NOTNULL(o->objstore, STORAGE_MAGIC);
+ return (o);
+ }
+
+ /*-------------------------------------------------------------------*/
+
+ void
+ STV_Freestore(struct object *o)
+ {
+ struct storage *st, *stn;
+
+ if (o->esidata != NULL) {
+ STV_free(o->esidata);
+ o->esidata = NULL;
+ }
+ VTAILQ_FOREACH_SAFE(st, &o->store, list, stn) {
+ CHECK_OBJ_NOTNULL(st, STORAGE_MAGIC);
+ VTAILQ_REMOVE(&o->store, st, list);
+ STV_free(st);
+ }
+ }
+
+ /*-------------------------------------------------------------------*/
+
+ struct storage *
+ STV_alloc(struct worker *w, size_t size)
+ {
++ struct object *obj = w->fetch_obj;
++ if (obj == NULL)
++ obj = w->sp->obj;
+
- return (stv_alloc(w, w->fetch_obj, size));
++ return (stv_alloc(w, obj, size));
+ }
+
+ void
+ STV_trim(struct storage *st, size_t size)
+ {
+
+ CHECK_OBJ_NOTNULL(st, STORAGE_MAGIC);
+ AN(st->stevedore);
+ if (st->stevedore->trim)
+ st->stevedore->trim(st, size);
+ }
+
++/*
++ * Duplicate the object storage (HTML body) from src into target, using a
++ * stevedore-specific dup method for src's stevedore.
++ *
++ * Currently, every method just copies storage from one object to the other,
++ * but this method of encapsulation opens the path to future techniques of
++ * sharing storage together with reference counting.
++ */
++void
++STV_dup(const struct sess *sp, struct object *src, struct object *target)
++{
++ struct stevedore *stv;
++
++ CHECK_OBJ_NOTNULL(src, OBJECT_MAGIC);
++ CHECK_OBJ_NOTNULL(target, OBJECT_MAGIC);
++ CHECK_OBJ_NOTNULL(src->objstore, STORAGE_MAGIC);
++ CHECK_OBJ_NOTNULL(src->objstore->stevedore, STEVEDORE_MAGIC);
++
++ stv = src->objstore->stevedore;
++ AN(stv->dup);
++
++ stv->dup(sp, src, target);
++}
++
+ void
+ STV_free(struct storage *st)
+ {
+
+ CHECK_OBJ_NOTNULL(st, STORAGE_MAGIC);
+ AN(st->stevedore);
+ AN(st->stevedore->free);
+ st->stevedore->free(st);
+ }
+
+ void
+ STV_open(void)
+ {
+ struct stevedore *stv;
+
+ VTAILQ_FOREACH(stv, &stv_stevedores, list) {
+ stv->lru = LRU_Alloc();
+ if (stv->open != NULL)
+ stv->open(stv);
+ }
+ stv = stv_transient;
+ if (stv->open != NULL) {
+ stv->lru = LRU_Alloc();
+ stv->open(stv);
+ }
+ stv_next = VTAILQ_FIRST(&stv_stevedores);
+ }
+
+ void
+ STV_close(void)
+ {
+ struct stevedore *stv;
+
+ VTAILQ_FOREACH(stv, &stv_stevedores, list)
+ if (stv->close != NULL)
+ stv->close(stv);
+ stv = stv_transient;
+ if (stv->close != NULL)
+ stv->close(stv);
+ }
+
+
+ /*--------------------------------------------------------------------
+ * VRT functions for stevedores
+ */
+
+ static const struct stevedore *
+ stv_find(const char *nm)
+ {
+ const struct stevedore *stv;
+
+ VTAILQ_FOREACH(stv, &stv_stevedores, list)
+ if (!strcmp(stv->ident, nm))
+ return (stv);
+ if (!strcmp(TRANSIENT_STORAGE, nm))
+ return (stv_transient);
+ return (NULL);
+ }
+
+ int
+ VRT_Stv(const char *nm)
+ {
+
+ if (stv_find(nm) != NULL)
+ return (1);
+ return (0);
+ }
+
+ #define VRTSTVVAR(nm, vtype, ctype, dval) \
+ ctype \
+ VRT_Stv_##nm(const char *nm) \
+ { \
+ const struct stevedore *stv; \
+ \
+ stv = stv_find(nm); \
+ if (stv == NULL) \
+ return (dval); \
+ if (stv->var_##nm == NULL) \
+ return (dval); \
+ return (stv->var_##nm(stv)); \
+ }
+
+ #include "tbl/vrt_stv_var.h"
+ #undef VRTSTVVAR
++
++/*
++ * Default object store dup just copies the storage.
++ */
++void
++default_dup(const struct sess *sp, struct object *src, struct object *target)
++{
++ struct storage *st, *st2;
++ unsigned cl;
++
++ VTAILQ_FOREACH(st2, &src->store, list) {
++ cl = st2->len;
++ st = STV_alloc(sp->wrk, cl);
++ XXXAN(st);
++ assert(st->space >= cl);
++ VTAILQ_INSERT_TAIL(&target->store, st, list);
++ st->len = cl;
++ target->len += cl;
++ memcpy(st->ptr, st2->ptr, cl);
++ }
++}
diff --cc bin/varnishd/storage/storage.h
index 0000000,a813a36..879a6fb
mode 000000,100644..100644
--- a/bin/varnishd/storage/storage.h
+++ b/bin/varnishd/storage/storage.h
@@@ -1,0 -1,104 +1,110 @@@
+ /*-
+ * Copyright (c) 2006 Verdens Gang AS
+ * Copyright (c) 2006-2011 Varnish Software AS
+ * All rights reserved.
+ *
+ * Author: Poul-Henning Kamp <phk at phk.freebsd.dk>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * This defines the backend interface between the stevedore and the
+ * pluggable storage implementations.
+ *
+ */
+
+ struct stv_objsecrets;
+ struct stevedore;
+ struct sess;
+ struct lru;
+
+ typedef void storage_init_f(struct stevedore *, int ac, char * const *av);
+ typedef void storage_open_f(const struct stevedore *);
+ typedef struct storage *storage_alloc_f(struct stevedore *, size_t size);
++typedef void storage_dup_f(const struct sess *sp, struct object *src, struct object *target);
+ typedef void storage_trim_f(struct storage *, size_t size);
+ typedef void storage_free_f(struct storage *);
+ typedef struct object *storage_allocobj_f(struct stevedore *, struct sess *sp,
+ unsigned ltot, const struct stv_objsecrets *);
+ typedef void storage_close_f(const struct stevedore *);
+
+ /* Prototypes for VCL variable responders */
+ #define VRTSTVTYPE(ct) typedef ct storage_var_##ct(const struct stevedore *);
+ #include "tbl/vrt_stv_var.h"
+ #undef VRTSTVTYPE
+
+ extern storage_allocobj_f stv_default_allocobj;
+
+ /*--------------------------------------------------------------------*/
+
+ struct stevedore {
+ unsigned magic;
+ #define STEVEDORE_MAGIC 0x4baf43db
+ const char *name;
+ unsigned transient;
+ storage_init_f *init; /* called by mgt process */
+ storage_open_f *open; /* called by cache process */
+ storage_alloc_f *alloc; /* --//-- */
+ storage_trim_f *trim; /* --//-- */
++ storage_dup_f *dup; /* --//-- */
+ storage_free_f *free; /* --//-- */
+ storage_close_f *close; /* --//-- */
+ storage_allocobj_f *allocobj; /* --//-- */
+
+ struct lru *lru;
+
+ #define VRTSTVVAR(nm, vtype, ctype, dval) storage_var_##ctype *var_##nm;
+ #include "tbl/vrt_stv_var.h"
+ #undef VRTSTVVAR
+
+ /* private fields */
+ void *priv;
+
+ VTAILQ_ENTRY(stevedore) list;
+ char ident[16]; /* XXX: match VSM_chunk.ident */
+ };
+
+ VTAILQ_HEAD(stevedore_head, stevedore);
+
+ extern struct stevedore_head stv_stevedores;
+ extern struct stevedore *stv_transient;
+
+ /*--------------------------------------------------------------------*/
+ int STV_GetFile(const char *fn, int *fdp, const char **fnp, const char *ctx);
+ uintmax_t STV_FileSize(int fd, const char *size, unsigned *granularity,
+ const char *ctx);
+ struct object *STV_MkObject(struct sess *sp, void *ptr, unsigned ltot,
+ const struct stv_objsecrets *soc);
+
+ struct lru *LRU_Alloc(void);
+ void LRU_Free(struct lru *lru);
+
+ /*--------------------------------------------------------------------*/
+ extern const struct stevedore sma_stevedore;
+ extern const struct stevedore smf_stevedore;
+ extern const struct stevedore smp_stevedore;
+ #ifdef HAVE_LIBUMEM
+ extern const struct stevedore smu_stevedore;
+ #endif
++
++/* Default dup method */
++void STV_dup(const struct sess *sp, struct object *src, struct object *target);
++void default_dup(const struct sess *sp, struct object *src, struct object *target);
diff --cc bin/varnishd/storage/storage_file.c
index 0000000,9eb44d9..028563d
mode 000000,100644..100644
--- a/bin/varnishd/storage/storage_file.c
+++ b/bin/varnishd/storage/storage_file.c
@@@ -1,0 -1,616 +1,617 @@@
+ /*-
+ * Copyright (c) 2006 Verdens Gang AS
+ * Copyright (c) 2006-2011 Varnish Software AS
+ * All rights reserved.
+ *
+ * Author: Poul-Henning Kamp <phk at phk.freebsd.dk>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Storage method based on mmap'ed file
+ */
+
+ #include "config.h"
+
+ #include <sys/mman.h>
+
+ #include <stdio.h>
+ #include <stdlib.h>
+
+ #include "cache/cache.h"
+ #include "storage/storage.h"
+
+ #include "vnum.h"
+
+ #ifndef MAP_NOCORE
+ #define MAP_NOCORE 0 /* XXX Linux */
+ #endif
+
+ #ifndef MAP_NOSYNC
+ #define MAP_NOSYNC 0 /* XXX Linux */
+ #endif
+
+ #define MINPAGES 128
+
+ /*
+ * Number of buckets on free-list.
+ *
+ * Last bucket is "larger than" so choose number so that the second
+ * to last bucket matches the 128k CHUNKSIZE in cache_fetch.c when
+ * using the a 4K minimal page size
+ */
+ #define NBUCKET (128 / 4 + 1)
+
+ /*--------------------------------------------------------------------*/
+
+ VTAILQ_HEAD(smfhead, smf);
+
+ struct smf {
+ unsigned magic;
+ #define SMF_MAGIC 0x0927a8a0
+ struct storage s;
+ struct smf_sc *sc;
+
+ int alloc;
+
+ off_t size;
+ off_t offset;
+ unsigned char *ptr;
+
+ VTAILQ_ENTRY(smf) order;
+ VTAILQ_ENTRY(smf) status;
+ struct smfhead *flist;
+ };
+
+ struct smf_sc {
+ unsigned magic;
+ #define SMF_SC_MAGIC 0x52962ee7
+ struct lock mtx;
+ struct VSC_C_smf *stats;
+
+ const char *filename;
+ int fd;
+ unsigned pagesize;
+ uintmax_t filesize;
+ struct smfhead order;
+ struct smfhead free[NBUCKET];
+ struct smfhead used;
+ };
+
+ /*--------------------------------------------------------------------*/
+
+ static void
+ smf_initfile(struct smf_sc *sc, const char *size)
+ {
+ sc->filesize = STV_FileSize(sc->fd, size, &sc->pagesize, "-sfile");
+
+ AZ(ftruncate(sc->fd, (off_t)sc->filesize));
+
+ /* XXX: force block allocation here or in open ? */
+ }
+
+ static const char default_size[] = "100M";
+ static const char default_filename[] = ".";
+
+ static void
+ smf_init(struct stevedore *parent, int ac, char * const *av)
+ {
+ const char *size, *fn, *r;
+ struct smf_sc *sc;
+ unsigned u;
+ uintmax_t page_size;
+
+ AZ(av[ac]);
+
+ fn = default_filename;
+ size = default_size;
+ page_size = getpagesize();
+
+ if (ac > 3)
+ ARGV_ERR("(-sfile) too many arguments\n");
+ if (ac > 0 && *av[0] != '\0')
+ fn = av[0];
+ if (ac > 1 && *av[1] != '\0')
+ size = av[1];
+ if (ac > 2 && *av[2] != '\0') {
+
+ r = VNUM_2bytes(av[2], &page_size, 0);
+ if (r != NULL)
+ ARGV_ERR("(-sfile) granularity \"%s\": %s\n", av[2], r);
+ }
+
+ AN(fn);
+ AN(size);
+
+ ALLOC_OBJ(sc, SMF_SC_MAGIC);
+ XXXAN(sc);
+ VTAILQ_INIT(&sc->order);
+ for (u = 0; u < NBUCKET; u++)
+ VTAILQ_INIT(&sc->free[u]);
+ VTAILQ_INIT(&sc->used);
+ sc->pagesize = page_size;
+
+ parent->priv = sc;
+
+ (void)STV_GetFile(fn, &sc->fd, &sc->filename, "-sfile");
+
+ mgt_child_inherit(sc->fd, "storage_file");
+ smf_initfile(sc, size);
+ }
+
+ /*--------------------------------------------------------------------
+ * Insert/Remove from correct freelist
+ */
+
+ static void
+ insfree(struct smf_sc *sc, struct smf *sp)
+ {
+ size_t b;
+ struct smf *sp2;
+ size_t ns;
+
+ assert(sp->alloc == 0);
+ assert(sp->flist == NULL);
+ Lck_AssertHeld(&sc->mtx);
+ b = sp->size / sc->pagesize;
+ if (b >= NBUCKET) {
+ b = NBUCKET - 1;
+ sc->stats->g_smf_large++;
+ } else {
+ sc->stats->g_smf_frag++;
+ }
+ sp->flist = &sc->free[b];
+ ns = b * sc->pagesize;
+ VTAILQ_FOREACH(sp2, sp->flist, status) {
+ assert(sp2->size >= ns);
+ assert(sp2->alloc == 0);
+ assert(sp2->flist == sp->flist);
+ if (sp->offset < sp2->offset)
+ break;
+ }
+ if (sp2 == NULL)
+ VTAILQ_INSERT_TAIL(sp->flist, sp, status);
+ else
+ VTAILQ_INSERT_BEFORE(sp2, sp, status);
+ }
+
+ static void
+ remfree(const struct smf_sc *sc, struct smf *sp)
+ {
+ size_t b;
+
+ assert(sp->alloc == 0);
+ assert(sp->flist != NULL);
+ Lck_AssertHeld(&sc->mtx);
+ b = sp->size / sc->pagesize;
+ if (b >= NBUCKET) {
+ b = NBUCKET - 1;
+ sc->stats->g_smf_large--;
+ } else {
+ sc->stats->g_smf_frag--;
+ }
+ assert(sp->flist == &sc->free[b]);
+ VTAILQ_REMOVE(sp->flist, sp, status);
+ sp->flist = NULL;
+ }
+
+ /*--------------------------------------------------------------------
+ * Allocate a range from the first free range that is large enough.
+ */
+
+ static struct smf *
+ alloc_smf(struct smf_sc *sc, size_t bytes)
+ {
+ struct smf *sp, *sp2;
+ size_t b;
+
+ assert(!(bytes % sc->pagesize));
+ b = bytes / sc->pagesize;
+ if (b >= NBUCKET)
+ b = NBUCKET - 1;
+ sp = NULL;
+ for (; b < NBUCKET - 1; b++) {
+ sp = VTAILQ_FIRST(&sc->free[b]);
+ if (sp != NULL)
+ break;
+ }
+ if (sp == NULL) {
+ VTAILQ_FOREACH(sp, &sc->free[NBUCKET -1], status)
+ if (sp->size >= bytes)
+ break;
+ }
+ if (sp == NULL)
+ return (sp);
+
+ assert(sp->size >= bytes);
+ remfree(sc, sp);
+
+ if (sp->size == bytes) {
+ sp->alloc = 1;
+ VTAILQ_INSERT_TAIL(&sc->used, sp, status);
+ return (sp);
+ }
+
+ /* Split from front */
+ sp2 = malloc(sizeof *sp2);
+ XXXAN(sp2);
+ sc->stats->g_smf++;
+ *sp2 = *sp;
+
+ sp->offset += bytes;
+ sp->ptr += bytes;
+ sp->size -= bytes;
+
+ sp2->size = bytes;
+ sp2->alloc = 1;
+ VTAILQ_INSERT_BEFORE(sp, sp2, order);
+ VTAILQ_INSERT_TAIL(&sc->used, sp2, status);
+ insfree(sc, sp);
+ return (sp2);
+ }
+
+ /*--------------------------------------------------------------------
+ * Free a range. Attempt merge forward and backward, then sort into
+ * free list according to age.
+ */
+
+ static void
+ free_smf(struct smf *sp)
+ {
+ struct smf *sp2;
+ struct smf_sc *sc = sp->sc;
+
+ CHECK_OBJ_NOTNULL(sp, SMF_MAGIC);
+ assert(sp->alloc != 0);
+ assert(sp->size > 0);
+ assert(!(sp->size % sc->pagesize));
+ VTAILQ_REMOVE(&sc->used, sp, status);
+ sp->alloc = 0;
+
+ sp2 = VTAILQ_NEXT(sp, order);
+ if (sp2 != NULL &&
+ sp2->alloc == 0 &&
+ (sp2->ptr == sp->ptr + sp->size) &&
+ (sp2->offset == sp->offset + sp->size)) {
+ sp->size += sp2->size;
+ VTAILQ_REMOVE(&sc->order, sp2, order);
+ remfree(sc, sp2);
+ free(sp2);
+ sc->stats->g_smf--;
+ }
+
+ sp2 = VTAILQ_PREV(sp, smfhead, order);
+ if (sp2 != NULL &&
+ sp2->alloc == 0 &&
+ (sp->ptr == sp2->ptr + sp2->size) &&
+ (sp->offset == sp2->offset + sp2->size)) {
+ remfree(sc, sp2);
+ sp2->size += sp->size;
+ VTAILQ_REMOVE(&sc->order, sp, order);
+ free(sp);
+ sc->stats->g_smf--;
+ sp = sp2;
+ }
+
+ insfree(sc, sp);
+ }
+
+ /*--------------------------------------------------------------------
+ * Trim the tail of a range.
+ */
+
+ static void
+ trim_smf(struct smf *sp, size_t bytes)
+ {
+ struct smf *sp2;
+ struct smf_sc *sc = sp->sc;
+
+ assert(sp->alloc != 0);
+ assert(bytes > 0);
+ assert(bytes < sp->size);
+ assert(!(bytes % sc->pagesize));
+ assert(!(sp->size % sc->pagesize));
+ CHECK_OBJ_NOTNULL(sp, SMF_MAGIC);
+ sp2 = malloc(sizeof *sp2);
+ XXXAN(sp2);
+ sc->stats->g_smf++;
+ *sp2 = *sp;
+
+ sp2->size -= bytes;
+ sp->size = bytes;
+ sp2->ptr += bytes;
+ sp2->offset += bytes;
+ VTAILQ_INSERT_AFTER(&sc->order, sp, sp2, order);
+ VTAILQ_INSERT_TAIL(&sc->used, sp2, status);
+ free_smf(sp2);
+ }
+
+ /*--------------------------------------------------------------------
+ * Insert a newly created range as busy, then free it to do any collapses
+ */
+
+ static void
+ new_smf(struct smf_sc *sc, unsigned char *ptr, off_t off, size_t len)
+ {
+ struct smf *sp, *sp2;
+
+ assert(!(len % sc->pagesize));
+ sp = calloc(sizeof *sp, 1);
+ XXXAN(sp);
+ sp->magic = SMF_MAGIC;
+ sp->s.magic = STORAGE_MAGIC;
+ sc->stats->g_smf++;
+
+ sp->sc = sc;
+ sp->size = len;
+ sp->ptr = ptr;
+ sp->offset = off;
+ sp->alloc = 1;
+
+ VTAILQ_FOREACH(sp2, &sc->order, order) {
+ if (sp->ptr < sp2->ptr) {
+ VTAILQ_INSERT_BEFORE(sp2, sp, order);
+ break;
+ }
+ }
+ if (sp2 == NULL)
+ VTAILQ_INSERT_TAIL(&sc->order, sp, order);
+
+ VTAILQ_INSERT_HEAD(&sc->used, sp, status);
+
+ free_smf(sp);
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ /*
+ * XXX: This may be too aggressive and soak up too much address room.
+ * XXX: On the other hand, the user, directly or implicitly asked us to
+ * XXX: use this much storage, so we should make a decent effort.
+ * XXX: worst case (I think), malloc will fail.
+ */
+
+ static void
+ smf_open_chunk(struct smf_sc *sc, off_t sz, off_t off, off_t *fail, off_t *sum)
+ {
+ void *p;
+ off_t h;
+
+ assert(sz != 0);
+ assert(!(sz % sc->pagesize));
+
+ if (*fail < (uintmax_t)sc->pagesize * MINPAGES)
+ return;
+
+ if (sz > 0 && sz < *fail && sz < SSIZE_MAX) {
+ p = mmap(NULL, sz, PROT_READ|PROT_WRITE,
+ MAP_NOCORE | MAP_NOSYNC | MAP_SHARED, sc->fd, off);
+ if (p != MAP_FAILED) {
+ (void) madvise(p, sz, MADV_RANDOM);
+ (*sum) += sz;
+ new_smf(sc, p, off, sz);
+ return;
+ }
+ }
+
+ if (sz < *fail)
+ *fail = sz;
+
+ h = sz / 2;
+ if (h > SSIZE_MAX)
+ h = SSIZE_MAX;
+ h -= (h % sc->pagesize);
+
+ smf_open_chunk(sc, h, off, fail, sum);
+ smf_open_chunk(sc, sz - h, off + h, fail, sum);
+ }
+
+ static void
+ smf_open(const struct stevedore *st)
+ {
+ struct smf_sc *sc;
+ off_t fail = 1 << 30; /* XXX: where is OFF_T_MAX ? */
+ off_t sum = 0;
+
+ CAST_OBJ_NOTNULL(sc, st->priv, SMF_SC_MAGIC);
+ sc->stats = VSM_Alloc(sizeof *sc->stats,
+ VSC_CLASS, VSC_TYPE_SMF, st->ident);
+ Lck_New(&sc->mtx, lck_smf);
+ Lck_Lock(&sc->mtx);
+ smf_open_chunk(sc, sc->filesize, 0, &fail, &sum);
+ Lck_Unlock(&sc->mtx);
+ printf("SMF.%s mmap'ed %ju bytes of %ju\n",
+ st->ident, (uintmax_t)sum, sc->filesize);
+
+ /* XXX */
+ if (sum < MINPAGES * (off_t)getpagesize())
+ exit (2);
+
+ sc->stats->g_space += sc->filesize;
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ static struct storage *
+ smf_alloc(struct stevedore *st, size_t size)
+ {
+ struct smf *smf;
+ struct smf_sc *sc;
+
+ CAST_OBJ_NOTNULL(sc, st->priv, SMF_SC_MAGIC);
+ assert(size > 0);
+ size += (sc->pagesize - 1);
+ size &= ~(sc->pagesize - 1);
+ Lck_Lock(&sc->mtx);
+ sc->stats->c_req++;
+ smf = alloc_smf(sc, size);
+ if (smf == NULL) {
+ sc->stats->c_fail++;
+ Lck_Unlock(&sc->mtx);
+ return (NULL);
+ }
+ CHECK_OBJ_NOTNULL(smf, SMF_MAGIC);
+ sc->stats->g_alloc++;
+ sc->stats->c_bytes += smf->size;
+ sc->stats->g_bytes += smf->size;
+ sc->stats->g_space -= smf->size;
+ Lck_Unlock(&sc->mtx);
+ CHECK_OBJ_NOTNULL(&smf->s, STORAGE_MAGIC); /*lint !e774 */
+ XXXAN(smf);
+ assert(smf->size == size);
+ smf->s.space = size;
+ smf->s.priv = smf;
+ smf->s.ptr = smf->ptr;
+ smf->s.len = 0;
+ smf->s.stevedore = st;
+ #ifdef SENDFILE_WORKS
+ smf->s.fd = smf->sc->fd;
+ smf->s.where = smf->offset;
+ #endif
+ return (&smf->s);
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ static void
+ smf_trim(struct storage *s, size_t size)
+ {
+ struct smf *smf;
+ struct smf_sc *sc;
+
+ CHECK_OBJ_NOTNULL(s, STORAGE_MAGIC);
+ assert(size > 0);
+ assert(size <= s->space);
+ xxxassert(size > 0); /* XXX: seen */
+ CAST_OBJ_NOTNULL(smf, s->priv, SMF_MAGIC);
+ assert(size <= smf->size);
+ sc = smf->sc;
+ size += (sc->pagesize - 1);
+ size &= ~(sc->pagesize - 1);
+ if (smf->size > size) {
+ Lck_Lock(&sc->mtx);
+ sc->stats->c_freed += (smf->size - size);
+ sc->stats->g_bytes -= (smf->size - size);
+ sc->stats->g_space += (smf->size - size);
+ trim_smf(smf, size);
+ assert(smf->size == size);
+ Lck_Unlock(&sc->mtx);
+ s->space = size;
+ }
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ static void __match_proto__(storage_free_f)
+ smf_free(struct storage *s)
+ {
+ struct smf *smf;
+ struct smf_sc *sc;
+
+ CHECK_OBJ_NOTNULL(s, STORAGE_MAGIC);
+ CAST_OBJ_NOTNULL(smf, s->priv, SMF_MAGIC);
+ sc = smf->sc;
+ Lck_Lock(&sc->mtx);
+ sc->stats->g_alloc--;
+ sc->stats->c_freed += smf->size;
+ sc->stats->g_bytes -= smf->size;
+ sc->stats->g_space += smf->size;
+ free_smf(smf);
+ Lck_Unlock(&sc->mtx);
+ }
+
+ /*--------------------------------------------------------------------*/
+
+ const struct stevedore smf_stevedore = {
+ .magic = STEVEDORE_MAGIC,
+ .name = "file",
+ .init = smf_init,
+ .open = smf_open,
+ .alloc = smf_alloc,
+ .trim = smf_trim,
+ .free = smf_free,
++ .dup = default_dup,
+ };
+
+ #ifdef INCLUDE_TEST_DRIVER
+
+ void vca_flush(struct sess *sp) {}
+
+ #define N 100
+ #define M (128*1024)
+
+ struct storage *s[N];
+
+ static void
+ dumpit(void)
+ {
+ struct smf_sc *sc = smf_stevedore.priv;
+ struct smf *s;
+
+ return (0);
+ printf("----------------\n");
+ printf("Order:\n");
+ VTAILQ_FOREACH(s, &sc->order, order) {
+ printf("%10p %12ju %12ju %12ju\n",
+ s, s->offset, s->size, s->offset + s->size);
+ }
+ printf("Used:\n");
+ VTAILQ_FOREACH(s, &sc->used, status) {
+ printf("%10p %12ju %12ju %12ju\n",
+ s, s->offset, s->size, s->offset + s->size);
+ }
+ printf("Free:\n");
+ VTAILQ_FOREACH(s, &sc->free, status) {
+ printf("%10p %12ju %12ju %12ju\n",
+ s, s->offset, s->size, s->offset + s->size);
+ }
+ printf("================\n");
+ }
+
+ int
+ main(int argc, char **argv)
+ {
+ int i, j;
+
+ setbuf(stdout, NULL);
+ smf_init(&smf_stevedore, "");
+ smf_open(&smf_stevedore);
+ while (1) {
+ dumpit();
+ i = random() % N;
+ do
+ j = random() % M;
+ while (j == 0);
+ if (s[i] == NULL) {
+ s[i] = smf_alloc(&smf_stevedore, j);
+ printf("A %10p %12d\n", s[i], j);
+ } else if (j < s[i]->space) {
+ smf_trim(s[i], j);
+ printf("T %10p %12d\n", s[i], j);
+ } else {
+ smf_free(s[i]);
+ printf("D %10p\n", s[i]);
+ s[i] = NULL;
+ }
+ }
+ }
+
+ #endif /* INCLUDE_TEST_DRIVER */
diff --cc bin/varnishd/storage/storage_malloc.c
index 0000000,156c832..79fefb6
mode 000000,100644..100644
--- a/bin/varnishd/storage/storage_malloc.c
+++ b/bin/varnishd/storage/storage_malloc.c
@@@ -1,0 -1,256 +1,257 @@@
+ /*-
+ * Copyright (c) 2006 Verdens Gang AS
+ * Copyright (c) 2006-2011 Varnish Software AS
+ * All rights reserved.
+ *
+ * Author: Poul-Henning Kamp <phk at phk.freebsd.dk>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Storage method based on malloc(3)
+ */
+
+ #include "config.h"
+
+ #include <stdio.h>
+ #include <stdlib.h>
+
+ #include "cache/cache.h"
+ #include "storage/storage.h"
+
+ #include "vnum.h"
+
+ struct sma_sc {
+ unsigned magic;
+ #define SMA_SC_MAGIC 0x1ac8a345
+ struct lock sma_mtx;
+ size_t sma_max;
+ size_t sma_alloc;
+ struct VSC_C_sma *stats;
+ };
+
+ struct sma {
+ unsigned magic;
+ #define SMA_MAGIC 0x69ae9bb9
+ struct storage s;
+ size_t sz;
+ struct sma_sc *sc;
+ };
+
+ static struct storage *
+ sma_alloc(struct stevedore *st, size_t size)
+ {
+ struct sma_sc *sma_sc;
+ struct sma *sma = NULL;
+ void *p;
+
+ CAST_OBJ_NOTNULL(sma_sc, st->priv, SMA_SC_MAGIC);
+ Lck_Lock(&sma_sc->sma_mtx);
+ sma_sc->stats->c_req++;
+ if (sma_sc->sma_alloc + size > sma_sc->sma_max) {
+ sma_sc->stats->c_fail += size;
+ size = 0;
+ } else {
+ sma_sc->sma_alloc += size;
+ sma_sc->stats->c_bytes += size;
+ sma_sc->stats->g_alloc++;
+ sma_sc->stats->g_bytes += size;
+ if (sma_sc->sma_max != SIZE_MAX)
+ sma_sc->stats->g_space -= size;
+ }
+ Lck_Unlock(&sma_sc->sma_mtx);
+
+ if (size == 0)
+ return (NULL);
+
+ /*
+ * Do not collaps the sma allocation with sma->s.ptr: it is not
+ * a good idea. Not only would it make ->trim impossible,
+ * performance-wise it would be a catastropy with chunksized
+ * allocations growing another full page, just to accomodate the sma.
+ */
+
+ p = malloc(size);
+ if (p != NULL) {
+ ALLOC_OBJ(sma, SMA_MAGIC);
+ if (sma != NULL)
+ sma->s.ptr = p;
+ else
+ free(p);
+ }
+ if (sma == NULL) {
+ Lck_Lock(&sma_sc->sma_mtx);
+ /*
+ * XXX: Not nice to have counters go backwards, but we do
+ * XXX: Not want to pick up the lock twice just for stats.
+ */
+ sma_sc->stats->c_fail++;
+ sma_sc->stats->c_bytes -= size;
+ sma_sc->stats->g_alloc--;
+ sma_sc->stats->g_bytes -= size;
+ if (sma_sc->sma_max != SIZE_MAX)
+ sma_sc->stats->g_space += size;
+ Lck_Unlock(&sma_sc->sma_mtx);
+ return (NULL);
+ }
+ sma->sc = sma_sc;
+ sma->sz = size;
+ sma->s.priv = sma;
+ sma->s.len = 0;
+ sma->s.space = size;
+ #ifdef SENDFILE_WORKS
+ sma->s.fd = -1;
+ #endif
+ sma->s.stevedore = st;
+ sma->s.magic = STORAGE_MAGIC;
+ return (&sma->s);
+ }
+
+ static void __match_proto__(storage_free_f)
+ sma_free(struct storage *s)
+ {
+ struct sma_sc *sma_sc;
+ struct sma *sma;
+
+ CHECK_OBJ_NOTNULL(s, STORAGE_MAGIC);
+ CAST_OBJ_NOTNULL(sma, s->priv, SMA_MAGIC);
+ sma_sc = sma->sc;
+ assert(sma->sz == sma->s.space);
+ Lck_Lock(&sma_sc->sma_mtx);
+ sma_sc->sma_alloc -= sma->sz;
+ sma_sc->stats->g_alloc--;
+ sma_sc->stats->g_bytes -= sma->sz;
+ sma_sc->stats->c_freed += sma->sz;
+ if (sma_sc->sma_max != SIZE_MAX)
+ sma_sc->stats->g_space += sma->sz;
+ Lck_Unlock(&sma_sc->sma_mtx);
+ free(sma->s.ptr);
+ free(sma);
+ }
+
+ static void
+ sma_trim(struct storage *s, size_t size)
+ {
+ struct sma_sc *sma_sc;
+ struct sma *sma;
+ void *p;
+ size_t delta;
+
+ CHECK_OBJ_NOTNULL(s, STORAGE_MAGIC);
+ CAST_OBJ_NOTNULL(sma, s->priv, SMA_MAGIC);
+ sma_sc = sma->sc;
+
+ assert(sma->sz == sma->s.space);
+ assert(size < sma->sz);
+ delta = sma->sz - size;
+ if (delta < 256)
+ return;
+ if ((p = realloc(sma->s.ptr, size)) != NULL) {
+ Lck_Lock(&sma_sc->sma_mtx);
+ sma_sc->sma_alloc -= delta;
+ sma_sc->stats->g_bytes -= delta;
+ sma_sc->stats->c_freed += delta;
+ if (sma_sc->sma_max != SIZE_MAX)
+ sma_sc->stats->g_space += delta;
+ sma->sz = size;
+ Lck_Unlock(&sma_sc->sma_mtx);
+ sma->s.ptr = p;
+ s->space = size;
+ }
+ }
+
+ static double
+ sma_used_space(const struct stevedore *st)
+ {
+ struct sma_sc *sma_sc;
+
+ CAST_OBJ_NOTNULL(sma_sc, st->priv, SMA_SC_MAGIC);
+ return (sma_sc->sma_alloc);
+ }
+
+ static double
+ sma_free_space(const struct stevedore *st)
+ {
+ struct sma_sc *sma_sc;
+
+ CAST_OBJ_NOTNULL(sma_sc, st->priv, SMA_SC_MAGIC);
+ return (sma_sc->sma_max - sma_sc->sma_alloc);
+ }
+
+ static void
+ sma_init(struct stevedore *parent, int ac, char * const *av)
+ {
+ const char *e;
+ uintmax_t u;
+ struct sma_sc *sc;
+
+ ASSERT_MGT();
+ ALLOC_OBJ(sc, SMA_SC_MAGIC);
+ AN(sc);
+ sc->sma_max = SIZE_MAX;
+ assert(sc->sma_max == SIZE_MAX);
+ parent->priv = sc;
+
+ AZ(av[ac]);
+ if (ac > 1)
+ ARGV_ERR("(-smalloc) too many arguments\n");
+
+ if (ac == 0 || *av[0] == '\0')
+ return;
+
+ e = VNUM_2bytes(av[0], &u, 0);
+ if (e != NULL)
+ ARGV_ERR("(-smalloc) size \"%s\": %s\n", av[0], e);
+ if ((u != (uintmax_t)(size_t)u))
+ ARGV_ERR("(-smalloc) size \"%s\": too big\n", av[0]);
+ if (u < 1024*1024)
+ ARGV_ERR("(-smalloc) size \"%s\": too small, "
+ "did you forget to specify M or G?\n", av[0]);
+
+ sc->sma_max = u;
+ }
+
+ static void
+ sma_open(const struct stevedore *st)
+ {
+ struct sma_sc *sma_sc;
+
+ CAST_OBJ_NOTNULL(sma_sc, st->priv, SMA_SC_MAGIC);
+ Lck_New(&sma_sc->sma_mtx, lck_sma);
+ sma_sc->stats = VSM_Alloc(sizeof *sma_sc->stats,
+ VSC_CLASS, VSC_TYPE_SMA, st->ident);
+ memset(sma_sc->stats, 0, sizeof *sma_sc->stats);
+ if (sma_sc->sma_max != SIZE_MAX)
+ sma_sc->stats->g_space = sma_sc->sma_max;
+ }
+
+ const struct stevedore sma_stevedore = {
+ .magic = STEVEDORE_MAGIC,
+ .name = "malloc",
+ .init = sma_init,
+ .open = sma_open,
+ .alloc = sma_alloc,
+ .free = sma_free,
+ .trim = sma_trim,
+ .var_free_space = sma_free_space,
+ .var_used_space = sma_used_space,
++ .dup = default_dup,
+ };
diff --cc bin/varnishd/storage/storage_persistent.c
index 0000000,ded638b..095fcf2
mode 000000,100644..100644
--- a/bin/varnishd/storage/storage_persistent.c
+++ b/bin/varnishd/storage/storage_persistent.c
@@@ -1,0 -1,678 +1,679 @@@
+ /*-
+ * Copyright (c) 2008-2011 Varnish Software AS
+ * All rights reserved.
+ *
+ * Author: Poul-Henning Kamp <phk at phk.freebsd.dk>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Persistent storage method
+ *
+ * XXX: Before we start the client or maybe after it stops, we should give the
+ * XXX: stevedores a chance to examine their storage for consistency.
+ *
+ * XXX: Do we ever free the LRU-lists ?
+ */
+
+ #include "config.h"
+
+ #include <sys/param.h>
+ #include <sys/mman.h>
+
+ #include <stdint.h>
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <string.h>
+
+ #include "cache/cache.h"
+ #include "storage/storage.h"
+
+ #include "hash/hash_slinger.h"
+ #include "vcli.h"
+ #include "vcli_priv.h"
+ #include "vend.h"
+ #include "vsha256.h"
+
+ #include "persistent.h"
+ #include "storage/storage_persistent.h"
+
+ /*--------------------------------------------------------------------*/
+
+ /*
+ * silos is unlocked, it only changes during startup when we are
+ * single-threaded
+ */
+ static VTAILQ_HEAD(,smp_sc) silos = VTAILQ_HEAD_INITIALIZER(silos);
+
+ /*--------------------------------------------------------------------
+ * Add bans to silos
+ */
+
+ static void
+ smp_appendban(struct smp_sc *sc, struct smp_signctx *ctx,
+ uint32_t len, const uint8_t *ban)
+ {
+ uint8_t *ptr, *ptr2;
+
+ (void)sc;
+ ptr = ptr2 = SIGN_END(ctx);
+
+ memcpy(ptr, "BAN", 4);
+ ptr += 4;
+
+ vbe32enc(ptr, len);
+ ptr += 4;
+
+ memcpy(ptr, ban, len);
+ ptr += len;
+
+ smp_append_sign(ctx, ptr2, ptr - ptr2);
+ }
+
+ /* Trust that cache_ban.c takes care of locking */
+
+ void
+ SMP_NewBan(const uint8_t *ban, unsigned ln)
+ {
+ struct smp_sc *sc;
+
+ VTAILQ_FOREACH(sc, &silos, list) {
+ smp_appendban(sc, &sc->ban1, ln, ban);
+ smp_appendban(sc, &sc->ban2, ln, ban);
+ }
+ }
+
+ /*--------------------------------------------------------------------
+ * Attempt to open and read in a ban list
+ */
+
+ static int
+ smp_open_bans(struct smp_sc *sc, struct smp_signctx *ctx)
+ {
+ uint8_t *ptr, *pe;
+ uint32_t length;
+ int i, retval = 0;
+
+ ASSERT_CLI();
+ (void)sc;
+ i = smp_chk_sign(ctx);
+ if (i)
+ return (i);
+ ptr = SIGN_DATA(ctx);
+ pe = ptr + ctx->ss->length;
+
+ while (ptr < pe) {
+ if (memcmp(ptr, "BAN", 4)) {
+ retval = 1001;
+ break;
+ }
+ ptr += 4;
+
+ length = vbe32dec(ptr);
+ ptr += 4;
+
+ if (ptr + length > pe) {
+ retval = 1003;
+ break;
+ }
+
+ BAN_Reload(ptr, length);
+
+ ptr += length;
+ }
+ assert(ptr <= pe);
+ return (retval);
+ }
+
+ /*--------------------------------------------------------------------
+ * Attempt to open and read in a segment list
+ */
+
+ static int
+ smp_open_segs(struct smp_sc *sc, struct smp_signctx *ctx)
+ {
+ uint64_t length, l;
+ struct smp_segptr *ss, *se;
+ struct smp_seg *sg, *sg1, *sg2;
+ int i, n = 0;
+
+ ASSERT_CLI();
+ i = smp_chk_sign(ctx);
+ if (i)
+ return (i);
+
+ ss = SIGN_DATA(ctx);
+ length = ctx->ss->length;
+
+ if (length == 0) {
+ /* No segments */
+ sc->free_offset = sc->ident->stuff[SMP_SPC_STUFF];
+ return (0);
+ }
+ se = ss + length / sizeof *ss;
+ se--;
+ assert(ss <= se);
+
+ /*
+ * Locate the free reserve, there are only two basic cases,
+ * but once we start dropping segments, things gets more complicated.
+ */
+
+ sc->free_offset = se->offset + se->length;
+ l = sc->mediasize - sc->free_offset;
+ if (se->offset > ss->offset && l >= sc->free_reserve) {
+ /*
+ * [__xxxxyyyyzzzz___]
+ * Plenty of space at tail, do nothing.
+ */
+ } else if (ss->offset > se->offset) {
+ /*
+ * [zzzz____xxxxyyyy_]
+ * (make) space between ends
+ * We might nuke the entire tail end without getting
+ * enough space, in which case we fall through to the
+ * last check.
+ */
+ while (ss < se && ss->offset > se->offset) {
+ l = ss->offset - (se->offset + se->length);
+ if (l > sc->free_reserve)
+ break;
+ ss++;
+ n++;
+ }
+ }
+
+ if (l < sc->free_reserve) {
+ /*
+ * [__xxxxyyyyzzzz___]
+ * (make) space at front
+ */
+ sc->free_offset = sc->ident->stuff[SMP_SPC_STUFF];
+ while (ss < se) {
+ l = ss->offset - sc->free_offset;
+ if (l > sc->free_reserve)
+ break;
+ ss++;
+ n++;
+ }
+ }
+
+ assert (l >= sc->free_reserve);
+
+
+ sg1 = NULL;
+ sg2 = NULL;
+ for(; ss <= se; ss++) {
+ ALLOC_OBJ(sg, SMP_SEG_MAGIC);
+ AN(sg);
+ sg->lru = LRU_Alloc();
+ CHECK_OBJ_NOTNULL(sg->lru, LRU_MAGIC);
+ sg->p = *ss;
+
+ sg->flags |= SMP_SEG_MUSTLOAD;
+
+ /*
+ * HACK: prevent save_segs from nuking segment until we have
+ * HACK: loaded it.
+ */
+ sg->nobj = 1;
+ if (sg1 != NULL) {
+ assert(sg1->p.offset != sg->p.offset);
+ if (sg1->p.offset < sg->p.offset)
+ assert(smp_segend(sg1) <= sg->p.offset);
+ else
+ assert(smp_segend(sg) <= sg1->p.offset);
+ }
+ if (sg2 != NULL) {
+ assert(sg2->p.offset != sg->p.offset);
+ if (sg2->p.offset < sg->p.offset)
+ assert(smp_segend(sg2) <= sg->p.offset);
+ else
+ assert(smp_segend(sg) <= sg2->p.offset);
+ }
+
+ /* XXX: check that they are inside silo */
+ /* XXX: check that they don't overlap */
+ /* XXX: check that they are serial */
+ sg->sc = sc;
+ VTAILQ_INSERT_TAIL(&sc->segments, sg, list);
+ sg2 = sg;
+ if (sg1 == NULL)
+ sg1 = sg;
+ }
+ printf("Dropped %d segments to make free_reserve\n", n);
+ return (0);
+ }
+
+ /*--------------------------------------------------------------------
+ * Silo worker thread
+ */
+
+ static void *
+ smp_thread(struct sess *sp, void *priv)
+ {
+ struct smp_sc *sc;
+ struct smp_seg *sg;
+
+ (void)sp;
+ CAST_OBJ_NOTNULL(sc, priv, SMP_SC_MAGIC);
+
+ /* First, load all the objects from all segments */
+ VTAILQ_FOREACH(sg, &sc->segments, list)
+ if (sg->flags & SMP_SEG_MUSTLOAD)
+ smp_load_seg(sp, sc, sg);
+
+ sc->flags |= SMP_SC_LOADED;
+ BAN_TailDeref(&sc->tailban);
+ AZ(sc->tailban);
+ printf("Silo completely loaded\n");
+ while (1) {
+ (void)sleep (1);
+ sg = VTAILQ_FIRST(&sc->segments);
+ if (sg != NULL && sg -> sc->cur_seg &&
+ sg->nobj == 0) {
+ Lck_Lock(&sc->mtx);
+ smp_save_segs(sc);
+ Lck_Unlock(&sc->mtx);
+ }
+ }
+ NEEDLESS_RETURN(NULL);
+ }
+
+ /*--------------------------------------------------------------------
+ * Open a silo in the worker process
+ */
+
+ static void
+ smp_open(const struct stevedore *st)
+ {
+ struct smp_sc *sc;
+
+ ASSERT_CLI();
+
+ CAST_OBJ_NOTNULL(sc, st->priv, SMP_SC_MAGIC);
+
+ Lck_New(&sc->mtx, lck_smp);
+ Lck_Lock(&sc->mtx);
+
+ sc->stevedore = st;
+
+ /* We trust the parent to give us a valid silo, for good measure: */
+ AZ(smp_valid_silo(sc));
+
+ AZ(mprotect(sc->base, 4096, PROT_READ));
+
+ sc->ident = SIGN_DATA(&sc->idn);
+
+ /* We attempt ban1 first, and if that fails, try ban2 */
+ if (smp_open_bans(sc, &sc->ban1))
+ AZ(smp_open_bans(sc, &sc->ban2));
+
+ /* We attempt seg1 first, and if that fails, try seg2 */
+ if (smp_open_segs(sc, &sc->seg1))
+ AZ(smp_open_segs(sc, &sc->seg2));
+
+ /*
+ * Grap a reference to the tail of the ban list, until the thread
+ * has loaded all objects, so we can be sure that all of our
+ * proto-bans survive until then.
+ */
+ sc->tailban = BAN_TailRef();
+ AN(sc->tailban);
+
+ /* XXX: save segments to ensure consistency between seg1 & seg2 ? */
+
+ /* XXX: abandon early segments to make sure we have free space ? */
+
+ /* Open a new segment, so we are ready to write */
+ smp_new_seg(sc);
+
+ /* Start the worker silo worker thread, it will load the objects */
+ WRK_BgThread(&sc->thread, "persistence", smp_thread, sc);
+
+ VTAILQ_INSERT_TAIL(&silos, sc, list);
+ Lck_Unlock(&sc->mtx);
+ }
+
+ /*--------------------------------------------------------------------
+ * Close a silo
+ */
+
+ static void
+ smp_close(const struct stevedore *st)
+ {
+ struct smp_sc *sc;
+
+ ASSERT_CLI();
+
+ CAST_OBJ_NOTNULL(sc, st->priv, SMP_SC_MAGIC);
+ Lck_Lock(&sc->mtx);
+ smp_close_seg(sc, sc->cur_seg);
+ Lck_Unlock(&sc->mtx);
+
+ /* XXX: reap thread */
+ }
+
+ /*--------------------------------------------------------------------
+ * Allocate a bite.
+ *
+ * Allocate [min_size...max_size] space from the bottom of the segment,
+ * as is convenient.
+ *
+ * If 'so' + 'idx' is given, also allocate a smp_object from the top
+ * of the segment.
+ *
+ * Return the segment in 'ssg' if given.
+ */
+
+ static struct storage *
+ smp_allocx(struct stevedore *st, size_t min_size, size_t max_size,
+ struct smp_object **so, unsigned *idx, struct smp_seg **ssg)
+ {
+ struct smp_sc *sc;
+ struct storage *ss;
+ struct smp_seg *sg;
+ unsigned tries;
+ uint64_t left, extra;
+
+ CAST_OBJ_NOTNULL(sc, st->priv, SMP_SC_MAGIC);
+ assert(min_size <= max_size);
+
+ max_size = IRNUP(sc, max_size);
+ min_size = IRNUP(sc, min_size);
+
+ extra = IRNUP(sc, sizeof(*ss));
+ if (so != NULL) {
+ extra += sizeof(**so);
+ AN(idx);
+ }
+
+ Lck_Lock(&sc->mtx);
+ sg = NULL;
+ ss = NULL;
+ for (tries = 0; tries < 3; tries++) {
+ left = smp_spaceleft(sc, sc->cur_seg);
+ if (left >= extra + min_size)
+ break;
+ smp_close_seg(sc, sc->cur_seg);
+ smp_new_seg(sc);
+ }
+ if (left >= extra + min_size) {
+ if (left < extra + max_size)
+ max_size = IRNDN(sc, left - extra);
+
+ sg = sc->cur_seg;
+ ss = (void*)(sc->base + sc->next_bot);
+ sc->next_bot += max_size + IRNUP(sc, sizeof(*ss));
+ sg->nalloc++;
+ if (so != NULL) {
+ sc->next_top -= sizeof(**so);
+ *so = (void*)(sc->base + sc->next_top);
+ /* Render this smp_object mostly harmless */
+ (*so)->ttl = 0.;
+ (*so)->ban = 0.;
+ (*so)->ptr = 0;;
+ sg->objs = *so;
+ *idx = ++sg->p.lobjlist;
+ }
+ (void)smp_spaceleft(sc, sg); /* for the assert */
+ }
+ Lck_Unlock(&sc->mtx);
+
+ if (ss == NULL)
+ return (ss);
+ AN(sg);
+ assert(max_size >= min_size);
+
+ /* Fill the storage structure */
+ memset(ss, 0, sizeof *ss);
+ ss->magic = STORAGE_MAGIC;
+ ss->ptr = PRNUP(sc, ss + 1);
+ ss->space = max_size;
+ ss->priv = sc;
+ ss->stevedore = st;
+ #ifdef SENDFILE_WORKS
+ ss->fd = sc->fd;
+ #endif
+ if (ssg != NULL)
+ *ssg = sg;
+ return (ss);
+ }
+
+ /*--------------------------------------------------------------------
+ * Allocate an object
+ */
+
+ static struct object *
+ smp_allocobj(struct stevedore *stv, struct sess *sp, unsigned ltot,
+ const struct stv_objsecrets *soc)
+ {
+ struct object *o;
+ struct storage *st;
+ struct smp_sc *sc;
+ struct smp_seg *sg;
+ struct smp_object *so;
+ struct objcore *oc;
+ unsigned objidx;
+
+ if (sp->objcore == NULL)
+ return (NULL); /* from cnt_error */
+ CAST_OBJ_NOTNULL(sc, stv->priv, SMP_SC_MAGIC);
+ AN(sp->objcore);
+ AN(sp->wrk->exp.ttl > 0.);
+
+ ltot = IRNUP(sc, ltot);
+
+ st = smp_allocx(stv, ltot, ltot, &so, &objidx, &sg);
+ if (st == NULL)
+ return (NULL);
+
+ assert(st->space >= ltot);
+ ltot = st->len = st->space;
+
+ o = STV_MkObject(sp, st->ptr, ltot, soc);
+ CHECK_OBJ_NOTNULL(o, OBJECT_MAGIC);
+ o->objstore = st;
+
+ oc = o->objcore;
+ CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+ oc->flags |= OC_F_LRUDONTMOVE;
+
+ Lck_Lock(&sc->mtx);
+ sg->nfixed++;
+ sg->nobj++;
+
+ /* We have to do this somewhere, might as well be here... */
+ assert(sizeof so->hash == DIGEST_LEN);
+ memcpy(so->hash, oc->objhead->digest, DIGEST_LEN);
+ so->ttl = EXP_Grace(NULL, o);
+ so->ptr = (uint8_t*)o - sc->base;
+ so->ban = BAN_Time(oc->ban);
+
+ smp_init_oc(oc, sg, objidx);
+
+ Lck_Unlock(&sc->mtx);
+ return (o);
+ }
+
+ /*--------------------------------------------------------------------
+ * Allocate a bite
+ */
+
+ static struct storage *
+ smp_alloc(struct stevedore *st, size_t size)
+ {
+
+ return (smp_allocx(st,
+ size > 4096 ? 4096 : size, size, NULL, NULL, NULL));
+ }
+
+ /*--------------------------------------------------------------------
+ * Trim a bite
+ * XXX: We could trim the last allocation.
+ */
+
+ static void
+ smp_trim(struct storage *ss, size_t size)
+ {
+
+ (void)ss;
+ (void)size;
+ }
+
+ /*--------------------------------------------------------------------
+ * We don't track frees of storage, we track the objects which own the
+ * storage and when there are no more objects in in the first segment,
+ * it can be reclaimed.
+ * XXX: We could free the last allocation, but does that happen ?
+ */
+
+ static void __match_proto__(storage_free_f)
+ smp_free(struct storage *st)
+ {
+
+ /* XXX */
+ (void)st;
+ }
+
+
+ /*--------------------------------------------------------------------*/
+
+ const struct stevedore smp_stevedore = {
+ .magic = STEVEDORE_MAGIC,
+ .name = "persistent",
+ .init = smp_mgt_init,
+ .open = smp_open,
+ .close = smp_close,
+ .alloc = smp_alloc,
+ .allocobj = smp_allocobj,
+ .free = smp_free,
+ .trim = smp_trim,
++ .dup = default_dup,
+ };
+
+ /*--------------------------------------------------------------------
+ * Persistence is a bear to test unadultered, so we cheat by adding
+ * a cli command we can use to make it do tricks for us.
+ */
+
+ static void
+ debug_report_silo(struct cli *cli, const struct smp_sc *sc, int objs)
+ {
+ struct smp_seg *sg;
+ struct objcore *oc;
+
+ VCLI_Out(cli, "Silo: %s (%s)\n",
+ sc->stevedore->ident, sc->filename);
+ VTAILQ_FOREACH(sg, &sc->segments, list) {
+ VCLI_Out(cli, " Seg: [0x%jx ... +0x%jx]\n",
+ (uintmax_t)sg->p.offset, (uintmax_t)sg->p.length);
+ if (sg == sc->cur_seg)
+ VCLI_Out(cli,
+ " Alloc: [0x%jx ... 0x%jx] = 0x%jx free\n",
+ (uintmax_t)(sc->next_bot),
+ (uintmax_t)(sc->next_top),
+ (uintmax_t)(sc->next_top - sc->next_bot));
+ VCLI_Out(cli, " %u nobj, %u alloc, %u lobjlist, %u fixed\n",
+ sg->nobj, sg->nalloc, sg->p.lobjlist, sg->nfixed);
+ if (objs) {
+ VTAILQ_FOREACH(oc, &sg->lru->lru_head, lru_list)
+ VCLI_Out(cli, " OC %p\n", oc);
+ }
+ }
+ }
+
+ static void
+ debug_persistent(struct cli *cli, const char * const * av, void *priv)
+ {
+ struct smp_sc *sc;
+
+ (void)priv;
+
+ if (av[2] == NULL) {
+ VTAILQ_FOREACH(sc, &silos, list)
+ debug_report_silo(cli, sc, 0);
+ return;
+ }
+ VTAILQ_FOREACH(sc, &silos, list)
+ if (!strcmp(av[2], sc->stevedore->ident))
+ break;
+ if (sc == NULL) {
+ VCLI_Out(cli, "Silo <%s> not found\n", av[2]);
+ VCLI_SetResult(cli, CLIS_PARAM);
+ return;
+ }
+ if (av[3] == NULL) {
+ debug_report_silo(cli, sc, 0);
+ return;
+ }
+ Lck_Lock(&sc->mtx);
+ if (!strcmp(av[3], "sync")) {
+ smp_close_seg(sc, sc->cur_seg);
+ smp_new_seg(sc);
+ } else if (!strcmp(av[3], "dump")) {
+ debug_report_silo(cli, sc, 1);
+ } else {
+ VCLI_Out(cli, "Unknown operation\n");
+ VCLI_SetResult(cli, CLIS_PARAM);
+ }
+ Lck_Unlock(&sc->mtx);
+ }
+
+ static struct cli_proto debug_cmds[] = {
+ { "debug.persistent", "debug.persistent",
+ "Persistent debugging magic:\n"
+ "\tdebug.persistent [stevedore [cmd]]\n"
+ "With no cmd arg, a summary of the silo is returned.\n"
+ "Possible commands:\n"
+ "\tsync\tClose current segment, open a new one\n"
+ "\tdump\tinclude objcores in silo summary\n"
+ "",
+ 0, 2, "d", debug_persistent },
+ { NULL }
+ };
+
+ /*--------------------------------------------------------------------*/
+
+ void
+ SMP_Init(void)
+ {
+ CLI_AddFuncs(debug_cmds);
+ }
+
+ /*--------------------------------------------------------------------
+ * Pause until all silos have loaded.
+ */
+
+ void
+ SMP_Ready(void)
+ {
+ struct smp_sc *sc;
+
+ ASSERT_CLI();
+ do {
+ VTAILQ_FOREACH(sc, &silos, list)
+ if (!(sc->flags & SMP_SC_LOADED))
+ break;
+ if (sc != NULL)
+ (void)sleep(1);
+ } while (sc != NULL);
+ }
diff --cc bin/varnishd/storage/storage_synth.c
index 0000000,e9e9b2f..062cfba
mode 000000,100644..100644
--- a/bin/varnishd/storage/storage_synth.c
+++ b/bin/varnishd/storage/storage_synth.c
@@@ -1,0 -1,120 +1,121 @@@
+ /*-
+ * Copyright (c) 2008-2011 Varnish Software AS
+ * All rights reserved.
+ *
+ * Author: Poul-Henning Kamp <phk at phk.freebsd.dk>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Storage method for synthetic content, based on vsb.
+ */
+
+ #include "config.h"
+
+ #include <stdlib.h>
+
+ #include "cache/cache.h"
+ #include "storage/storage.h"
+
+
+ static struct lock sms_mtx;
+
+ static void
+ sms_free(struct storage *sto)
+ {
+
+ CHECK_OBJ_NOTNULL(sto, STORAGE_MAGIC);
+ Lck_Lock(&sms_mtx);
+ VSC_C_main->sms_nobj--;
+ VSC_C_main->sms_nbytes -= sto->len;
+ VSC_C_main->sms_bfree += sto->len;
+ Lck_Unlock(&sms_mtx);
+ VSB_delete(sto->priv);
+ free(sto);
+ }
+
+ void
+ SMS_Init(void)
+ {
+
+ Lck_New(&sms_mtx, lck_sms);
+ }
+
+ static struct stevedore sms_stevedore = {
+ .magic = STEVEDORE_MAGIC,
+ .name = "synth",
+ .free = sms_free,
++ .dup = default_dup,
+ };
+
+ struct vsb *
+ SMS_Makesynth(struct object *obj)
+ {
+ struct storage *sto;
+ struct vsb *vsb;
+
+ CHECK_OBJ_NOTNULL(obj, OBJECT_MAGIC);
+ STV_Freestore(obj);
+ obj->len = 0;
+
+ Lck_Lock(&sms_mtx);
+ VSC_C_main->sms_nreq++;
+ VSC_C_main->sms_nobj++;
+ Lck_Unlock(&sms_mtx);
+
+ sto = calloc(sizeof *sto, 1);
+ XXXAN(sto);
+ vsb = VSB_new_auto();
+ XXXAN(vsb);
+ sto->priv = vsb;
+ sto->len = 0;
+ sto->space = 0;
+ #ifdef SENDFILE_WORKS
+ sto->fd = -1;
+ #endif
+ sto->stevedore = &sms_stevedore;
+ sto->magic = STORAGE_MAGIC;
+
+ VTAILQ_INSERT_TAIL(&obj->store, sto, list);
+ return (vsb);
+ }
+
+ void
+ SMS_Finish(struct object *obj)
+ {
+ struct storage *sto;
+ struct vsb *vsb;
+
+ CHECK_OBJ_NOTNULL(obj, OBJECT_MAGIC);
+ sto = VTAILQ_FIRST(&obj->store);
+ assert(sto->stevedore == &sms_stevedore);
+ vsb = sto->priv;
+ AZ(VSB_finish(vsb));
+
+ sto->ptr = (void*)VSB_data(vsb);
+ sto->len = VSB_len(vsb);
+ sto->space = VSB_len(vsb);
+ obj->len = sto->len;
+ Lck_Lock(&sms_mtx);
+ VSC_C_main->sms_nbytes += sto->len;
+ VSC_C_main->sms_balloc += sto->len;
+ Lck_Unlock(&sms_mtx);
+ }
diff --cc bin/varnishd/storage/storage_umem.c
index 0000000,52d238d..add6bd7
mode 000000,100644..100644
--- a/bin/varnishd/storage/storage_umem.c
+++ b/bin/varnishd/storage/storage_umem.c
@@@ -1,0 -1,166 +1,167 @@@
+ /*-
+ * Copyright (c) 2006 Verdens Gang AS
+ * Copyright (c) 2006-2011 Varnish Software AS
+ * All rights reserved.
+ *
+ * Author: Poul-Henning Kamp <phk at phk.freebsd.dk>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Storage method based on umem_alloc(3MALLOC)
+ */
+
+ #include "config.h"
+
+ #ifdef HAVE_LIBUMEM
+
+ #include <sys/types.h>
+
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <umem.h>
+
+ #include "cache/cache.h"
+ #include "storage/storage.h"
+
+ static size_t smu_max = SIZE_MAX;
+ static MTX smu_mtx;
+
+ struct smu {
+ struct storage s;
+ size_t sz;
+ };
+
+ static struct storage *
+ smu_alloc(struct stevedore *st, size_t size)
+ {
+ struct smu *smu;
+
+ Lck_Lock(&smu_mtx);
+ VSC_C_main->sma_nreq++;
+ if (VSC_C_main->sma_nbytes + size > smu_max)
+ size = 0;
+ else {
+ VSC_C_main->sma_nobj++;
+ VSC_C_main->sma_nbytes += size;
+ VSC_C_main->sma_balloc += size;
+ }
+ Lck_Unlock(&smu_mtx);
+
+ if (size == 0)
+ return (NULL);
+
+ smu = umem_zalloc(sizeof *smu, UMEM_DEFAULT);
+ if (smu == NULL)
+ return (NULL);
+ smu->sz = size;
+ smu->s.priv = smu;
+ smu->s.ptr = umem_alloc(size, UMEM_DEFAULT);
+ XXXAN(smu->s.ptr);
+ smu->s.len = 0;
+ smu->s.space = size;
+ smu->s.fd = -1;
+ smu->s.stevedore = st;
+ smu->s.magic = STORAGE_MAGIC;
+ return (&smu->s);
+ }
+
+ static void
+ smu_free(struct storage *s)
+ {
+ struct smu *smu;
+
+ CHECK_OBJ_NOTNULL(s, STORAGE_MAGIC);
+ smu = s->priv;
+ assert(smu->sz == smu->s.space);
+ Lck_Lock(&smu_mtx);
+ VSC_C_main->sma_nobj--;
+ VSC_C_main->sma_nbytes -= smu->sz;
+ VSC_C_main->sma_bfree += smu->sz;
+ Lck_Unlock(&smu_mtx);
+ umem_free(smu->s.ptr, smu->s.space);
+ umem_free(smu, sizeof *smu);
+ }
+
+ static void
+ smu_trim(const struct storage *s, size_t size)
+ {
+ struct smu *smu;
+ void *p;
+
+ CHECK_OBJ_NOTNULL(s, STORAGE_MAGIC);
+ smu = s->priv;
+ assert(smu->sz == smu->s.space);
+ if ((p = umem_alloc(size, UMEM_DEFAULT)) != NULL) {
+ memcpy(p, smu->s.ptr, size);
+ umem_free(smu->s.ptr, smu->s.space);
+ Lck_Lock(&smu_mtx);
+ VSC_C_main->sma_nbytes -= (smu->sz - size);
+ VSC_C_main->sma_bfree += smu->sz - size;
+ smu->sz = size;
+ Lck_Unlock(&smu_mtx);
+ smu->s.ptr = p;
+ smu->s.space = size;
+ }
+ }
+
+ static void
+ smu_init(struct stevedore *parent, int ac, char * const *av)
+ {
+ const char *e;
+ uintmax_t u;
+
+ (void)parent;
+
+ AZ(av[ac]);
+ if (ac > 1)
+ ARGV_ERR("(-sumem) too many arguments\n");
+
+ if (ac == 0 || *av[0] == '\0')
+ return;
+
+ e = VNUM_2bytes(av[0], &u, 0);
+ if (e != NULL)
+ ARGV_ERR("(-sumem) size \"%s\": %s\n", av[0], e);
+ if ((u != (uintmax_t)(size_t)u))
+ ARGV_ERR("(-sumem) size \"%s\": too big\n", av[0]);
+ smu_max = u;
+ }
+
+ static void
+ smu_open(const struct stevedore *st)
+ {
+ (void)st;
+ AZ(pthread_mutex_init(&smu_mtx, NULL));
+ }
+
+ const struct stevedore smu_stevedore = {
+ .magic = STEVEDORE_MAGIC,
+ .name = "umem",
+ .init = smu_init,
+ .open = smu_open,
+ .alloc = smu_alloc,
+ .free = smu_free,
+ .trim = smu_trim,
++ .dup = default_dup,
+ };
+
+ #endif /* HAVE_UMEM_H */
diff --cc include/tbl/vsc_fields.h
index 0000000,738703c..824ba6d
mode 000000,100644..100644
--- a/include/tbl/vsc_fields.h
+++ b/include/tbl/vsc_fields.h
@@@ -1,0 -1,417 +1,419 @@@
+ /*-
+ * Copyright (c) 2006 Verdens Gang AS
+ * Copyright (c) 2006-2011 Varnish Software AS
+ * All rights reserved.
+ *
+ * Author: Poul-Henning Kamp <phk at phk.freebsd.dk>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION, "")
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Definition of all shared memory statistics below.
+ *
+ * Fields (n, t, l, f, e, d):
+ * n - Name: Field name, in C-source and stats programs
+ * t - Type: C-type, uint64_t, unless marked in 'f'
+ * l - Local: Local counter in worker thread.
+ * f - Format: Semantics of the value in this field
+ * 'a' - Accumulator (deprecated, use 'c')
+ * 'b' - Bitmap
+ * 'c' - Counter, never decreases.
+ * 'g' - Gauge, goes up and down
+ * 'i' - Integer (deprecated, use 'g')
+ * e - Explantion: Short explanation of field (for screen use)
+ * d - Description: Long explanation of field (for doc use)
+ *
+ * Please describe Gauge variables as "Number of..." to indicate that
+ * this is a snapshot, and Counter variables as "Count of" to indicate
+ * accumulative count.
+ *
+ * -----------------------
+ * NB: Cleanup in progress
+ * -----------------------
+ *
+ * Insufficient attention has caused this to become a swamp of conflicting
+ * conventions, shorthands and general mumbo-jumbo. I'm trying to clean
+ * it up as I go over the code in other business.
+ *
+ * Please see the sessmem section for how it should look.
+ *
+ */
+
+ /**********************************************************************/
+
+ #ifdef VSC_DO_MAIN
+
+ /*---------------------------------------------------------------------
+ * Sessions
+ * see: cache_acceptor.c and cache_pool.c
+ */
+
+ VSC_F(sess_conn, uint64_t, 1, 'c',
+ "Sessions accepted",
+ "Count of sessions succesfully accepted"
+ )
+ VSC_F(sess_drop, uint64_t, 1, 'c',
+ "Sessions dropped",
+ "Count of sessions silently dropped due to lack of session memory."
+ " See parameter 'max_sess'."
+ )
+
+ VSC_F(sess_fail, uint64_t, 1, 'c',
+ "Session accept failures",
+ "Count of failures to accept TCP connection."
+ " Either the client changed its mind, or the kernel ran out of"
+ " some resource like filedescriptors."
+ )
+
+ /*---------------------------------------------------------------------*/
+
+ VSC_F(client_req, uint64_t, 1, 'a',
+ "Client requests received",
+ "")
+
+ VSC_F(cache_hit, uint64_t, 1, 'a',
+ "Cache hits",
+ "Count of cache hits. "
+ " A cache hit indicates that an object has been delivered to a"
+ " client without fetching it from a backend server."
+ )
+
+ VSC_F(cache_hitpass, uint64_t, 1, 'a',
+ "Cache hits for pass",
+ "Count of hits for pass"
+ " A cache hit for pass indicates that Varnish is going to"
+ " pass the request to the backend and this decision has been "
+ " cached in it self. This counts how many times the cached "
+ " decision is being used."
+ )
+ VSC_F(cache_miss, uint64_t, 1, 'a',
+ "Cache misses",
+ "Count of misses"
+ " A cache miss indicates the object was fetched from the"
+ " backend before delivering it to the backend.")
+
+ VSC_F(backend_conn, uint64_t, 0, 'a',
+ "Backend conn. success",
+ "")
+
+ VSC_F(backend_unhealthy, uint64_t, 0, 'a',
+ "Backend conn. not attempted",
+ ""
+ )
+ VSC_F(backend_busy, uint64_t, 0, 'a', "Backend conn. too many", "")
+ VSC_F(backend_fail, uint64_t, 0, 'a', "Backend conn. failures", "")
+ VSC_F(backend_reuse, uint64_t, 0, 'a',
+ "Backend conn. reuses",
+ "Count of backend connection reuses"
+ " This counter is increased whenever we reuse a recycled connection.")
+ VSC_F(backend_toolate, uint64_t, 0, 'a', "Backend conn. was closed", "")
+ VSC_F(backend_recycle, uint64_t, 0, 'a',
+ "Backend conn. recycles",
+ "Count of backend connection recycles"
+ " This counter is increased whenever we have a keep-alive"
+ " connection that is put back into the pool of connections."
+ " It has not yet been used, but it might be, unless the backend"
+ " closes it.")
+ VSC_F(backend_retry, uint64_t, 0, 'a', "Backend conn. retry", "")
+
+ VSC_F(fetch_head, uint64_t, 1, 'a', "Fetch head", "")
+ VSC_F(fetch_length, uint64_t, 1, 'a', "Fetch with Length", "")
+ VSC_F(fetch_chunked, uint64_t, 1, 'a', "Fetch chunked", "")
+ VSC_F(fetch_eof, uint64_t, 1, 'a', "Fetch EOF", "")
+ VSC_F(fetch_bad, uint64_t, 1, 'a', "Fetch had bad headers", "")
+ VSC_F(fetch_close, uint64_t, 1, 'a', "Fetch wanted close", "")
+ VSC_F(fetch_oldhttp, uint64_t, 1, 'a', "Fetch pre HTTP/1.1 closed", "")
+ VSC_F(fetch_zero, uint64_t, 1, 'a', "Fetch zero len", "")
+ VSC_F(fetch_failed, uint64_t, 1, 'a', "Fetch failed", "")
+ VSC_F(fetch_1xx, uint64_t, 1, 'a', "Fetch no body (1xx)", "")
+ VSC_F(fetch_204, uint64_t, 1, 'a', "Fetch no body (204)", "")
+ VSC_F(fetch_304, uint64_t, 1, 'a', "Fetch no body (304)", "")
+
+ /*---------------------------------------------------------------------
+ * Session Memory
+ * see: cache_session.c
+ */
+
+ VSC_F(sessmem_size, uint64_t, 1, 'g',
+ "Session mem size",
+ "Bytes of memory allocated for last allocated session."
+ )
+
+ VSC_F(sessmem_alloc, uint64_t, 1, 'c',
+ "Session mem allocated",
+ "Count of all allocations of session memory."
+ )
+
+ VSC_F(sessmem_free, uint64_t, 1, 'c',
+ "Session mem freed",
+ "Count of all frees of session memory."
+ )
+
+ VSC_F(sessmem_fail, uint64_t, 1, 'c',
+ "Session mem alloc failed",
+ "Count of session memory allocation failures."
+ )
+
+ VSC_F(sessmem_limit, uint64_t, 1, 'c',
+ "Session mem alloc limited",
+ "Count of session memory allocations blocked by limit (max_sess)."
+ )
+
+ /*---------------------------------------------------------------------
+ * Pools, threads, and sessions
+ * see: cache_pool.c
+ *
+ */
+
+ VSC_F(pools, uint64_t, 1, 'g',
+ "Number of thread pools",
+ "Number of thread pools. See also param wthread_pools."
+ " NB: Presently pools cannot be removed once created."
+ )
+
+ VSC_F(threads, uint64_t, 1, 'g',
+ "Total number of threads",
+ "Number of threads in all pools."
+ " See also params thread_pools, thread_pool_min & thread_pool_max."
+ )
+
+ VSC_F(threads_limited, uint64_t, 1, 'c',
+ "Threads hit max",
+ "Number of times more threads were needed, but limit was reached"
+ " in a thread pool."
+ " See also param thread_pool_max."
+ )
+
+ VSC_F(threads_created, uint64_t, 1, 'c',
+ "Threads created",
+ "Total number of threads created in all pools."
+ )
+
+ VSC_F(threads_destroyed, uint64_t, 1, 'c',
+ "Threads destoryed",
+ "Total number of threads destroyed in all pools."
+ )
+
+ VSC_F(threads_failed, uint64_t, 1, 'c',
+ "Thread creation failed",
+ "Number of times creating a thread failed."
+ " See VSL::Debug for diagnostics."
+ " See also param thread_fail_delay."
+ )
+
+ VSC_F(thread_queue_len, uint64_t, 1, 'g',
+ "Length of session queue",
+ "Length of session queue waiting for threads."
+ " NB: Only updates once per second."
+ " See also param queue_max."
+ )
+
+ VSC_F(sess_queued, uint64_t, 1, 'c',
+ "Sessions queued for thread",
+ "Number of times session was queued waiting for a thread."
+ " See also param queue_max."
+ )
+
+ VSC_F(sess_dropped, uint64_t, 1, 'c',
+ "Sessions dropped for thread",
+ "Number of times session was dropped because the queue were too"
+ " long already."
+ " See also param queue_max."
+ )
+
+ /*---------------------------------------------------------------------*/
+
+ VSC_F(n_sess_mem, uint64_t, 0, 'i', "N struct sess_mem", "")
+ VSC_F(n_sess, uint64_t, 0, 'i', "N struct sess", "")
+ VSC_F(n_object, uint64_t, 1, 'i', "N struct object", "")
+ VSC_F(n_vampireobject, uint64_t, 1, 'i', "N unresurrected objects", "")
+ VSC_F(n_objectcore, uint64_t, 1, 'i', "N struct objectcore", "")
+ VSC_F(n_objecthead, uint64_t, 1, 'i', "N struct objecthead", "")
+ VSC_F(n_waitinglist, uint64_t, 1, 'i', "N struct waitinglist", "")
+
+ VSC_F(n_vbc, uint64_t, 0, 'i', "N struct vbc", "")
+
+ VSC_F(n_backend, uint64_t, 0, 'i', "N backends", "")
+
+ VSC_F(n_expired, uint64_t, 0, 'i', "N expired objects", "")
+ VSC_F(n_lru_nuked, uint64_t, 0, 'i', "N LRU nuked objects", "")
+ VSC_F(n_lru_moved, uint64_t, 0, 'i', "N LRU moved objects", "")
+
+ VSC_F(losthdr, uint64_t, 0, 'a', "HTTP header overflows", "")
+
+ VSC_F(n_objsendfile, uint64_t, 0, 'a', "Objects sent with sendfile",
+ "The number of objects sent with the sendfile system call. If enabled "
+ "sendfile will be used on object larger than a certain size.")
+ VSC_F(n_objwrite, uint64_t, 0, 'a', "Objects sent with write",
+ "The number of objects sent with regular write calls."
+ "Writes are used when the objects are too small for sendfile "
+ "or if the sendfile call has been disabled")
+ VSC_F(n_objoverflow, uint64_t, 1, 'a',
+ "Objects overflowing workspace", "")
+
+ VSC_F(s_sess, uint64_t, 1, 'a', "Total Sessions", "")
+ VSC_F(s_req, uint64_t, 1, 'a', "Total Requests", "")
+ VSC_F(s_pipe, uint64_t, 1, 'a', "Total pipe", "")
+ VSC_F(s_pass, uint64_t, 1, 'a', "Total pass", "")
+ VSC_F(s_fetch, uint64_t, 1, 'a', "Total fetch", "")
+ VSC_F(s_hdrbytes, uint64_t, 1, 'a', "Total header bytes", "")
+ VSC_F(s_bodybytes, uint64_t, 1, 'a', "Total body bytes", "")
+
+ VSC_F(sess_closed, uint64_t, 1, 'a', "Session Closed", "")
+ VSC_F(sess_pipeline, uint64_t, 1, 'a', "Session Pipeline", "")
+ VSC_F(sess_readahead, uint64_t, 1, 'a', "Session Read Ahead", "")
+ VSC_F(sess_linger, uint64_t, 1, 'a', "Session Linger", "")
+ VSC_F(sess_herd, uint64_t, 1, 'a', "Session herd", "")
+
+ VSC_F(shm_records, uint64_t, 0, 'a', "SHM records", "")
+ VSC_F(shm_writes, uint64_t, 0, 'a', "SHM writes", "")
+ VSC_F(shm_flushes, uint64_t, 0, 'a', "SHM flushes due to overflow", "")
+ VSC_F(shm_cont, uint64_t, 0, 'a', "SHM MTX contention", "")
+ VSC_F(shm_cycles, uint64_t, 0, 'a', "SHM cycles through buffer", "")
+
+ VSC_F(sms_nreq, uint64_t, 0, 'a', "SMS allocator requests", "")
+ VSC_F(sms_nobj, uint64_t, 0, 'i', "SMS outstanding allocations", "")
+ VSC_F(sms_nbytes, uint64_t, 0, 'i', "SMS outstanding bytes", "")
+ VSC_F(sms_balloc, uint64_t, 0, 'i', "SMS bytes allocated", "")
+ VSC_F(sms_bfree, uint64_t, 0, 'i', "SMS bytes freed", "")
+
+ VSC_F(backend_req, uint64_t, 0, 'a', "Backend requests made", "")
+
+ VSC_F(n_vcl, uint64_t, 0, 'a', "N vcl total", "")
+ VSC_F(n_vcl_avail, uint64_t, 0, 'a', "N vcl available", "")
+ VSC_F(n_vcl_discard, uint64_t, 0, 'a', "N vcl discarded", "")
+
+ /**********************************************************************/
+
+ VSC_F(bans, uint64_t, 0, 'g',
+ "Count of bans",
+ "Number of all bans in system, including bans superseded"
+ " by newer bans and bans already checked by the ban-lurker."
+ )
+ VSC_F(bans_gone, uint64_t, 0, 'g',
+ "Number of bans marked 'gone'",
+ "Number of bans which are no longer active, either because they"
+ " got checked by the ban-lurker or superseded by newer identical bans."
+ )
+ VSC_F(bans_req, uint64_t, 0, 'g',
+ "Number of bans using req.*",
+ "Number of bans which use req.* variables. These bans can not"
+ " be washed by the ban-lurker."
+ )
+ VSC_F(bans_added, uint64_t, 0, 'c',
+ "Bans added",
+ "Counter of bans added to ban list."
+ )
+ VSC_F(bans_deleted, uint64_t, 0, 'c',
+ "Bans deleted",
+ "Counter of bans deleted from ban list."
+ )
+
+ VSC_F(bans_tested, uint64_t, 0, 'c',
+ "Bans tested against objects",
+ "Count of how many bans and objects have been tested against"
+ " each other."
+ )
+ VSC_F(bans_tests_tested, uint64_t, 0, 'c',
+ "Ban tests tested against objects",
+ "Count of how many tests and objects have been tested against"
+ " each other. 'ban req.url == foo && req.http.host == bar'"
+ " counts as one in 'bans_tested' and as two in 'bans_tests_tested'"
+ )
+ VSC_F(bans_dups, uint64_t, 0, 'c',
+ "Bans superseded by other bans",
+ "Count of bans replaced by later identical bans."
+ )
+
+ /**********************************************************************/
+
+ VSC_F(hcb_nolock, uint64_t, 0, 'a', "HCB Lookups without lock", "")
+ VSC_F(hcb_lock, uint64_t, 0, 'a', "HCB Lookups with lock", "")
+ VSC_F(hcb_insert, uint64_t, 0, 'a', "HCB Inserts", "")
+
+ VSC_F(esi_errors, uint64_t, 0, 'a', "ESI parse errors (unlock)", "")
+ VSC_F(esi_warnings, uint64_t, 0, 'a', "ESI parse warnings (unlock)", "")
+ VSC_F(client_drop_late, uint64_t, 0, 'a', "Connection dropped late", "")
+ VSC_F(uptime, uint64_t, 0, 'a', "Client uptime", "")
+
+ VSC_F(dir_dns_lookups, uint64_t, 0, 'a', "DNS director lookups", "")
+ VSC_F(dir_dns_failed, uint64_t, 0, 'a', "DNS director failed lookups", "")
+ VSC_F(dir_dns_hit, uint64_t, 0, 'a', "DNS director cached lookups hit", "")
+ VSC_F(dir_dns_cache_full, uint64_t, 0, 'a', "DNS director full dnscache", "")
+
+ VSC_F(vmods, uint64_t, 0, 'i', "Loaded VMODs", "")
+
+ VSC_F(n_gzip, uint64_t, 0, 'a', "Gzip operations", "")
+ VSC_F(n_gunzip, uint64_t, 0, 'a', "Gunzip operations", "")
+
++VSC_F(cond_not_validated, uint64_t, 1, 'c', "Non-validating responses",
++ "Count of backend responses to conditional requests with status != 304")
+ #endif
+
+ /**********************************************************************/
+
+ #ifdef VSC_DO_LCK
+
+ VSC_F(creat, uint64_t, 0, 'a', "Created locks", "")
+ VSC_F(destroy, uint64_t, 0, 'a', "Destroyed locks", "")
+ VSC_F(locks, uint64_t, 0, 'a', "Lock Operations", "")
+ VSC_F(colls, uint64_t, 0, 'a', "Collisions", "")
+
+ #endif
+
+ /**********************************************************************
+ * All Stevedores support these counters
+ */
+
+ #if defined(VSC_DO_SMA) || defined (VSC_DO_SMF)
+ VSC_F(c_req, uint64_t, 0, 'a', "Allocator requests", "")
+ VSC_F(c_fail, uint64_t, 0, 'a', "Allocator failures", "")
+ VSC_F(c_bytes, uint64_t, 0, 'a', "Bytes allocated", "")
+ VSC_F(c_freed, uint64_t, 0, 'a', "Bytes freed", "")
+ VSC_F(g_alloc, uint64_t, 0, 'i', "Allocations outstanding", "")
+ VSC_F(g_bytes, uint64_t, 0, 'i', "Bytes outstanding", "")
+ VSC_F(g_space, uint64_t, 0, 'i', "Bytes available", "")
+ #endif
+
+
+ /**********************************************************************/
+
+ #ifdef VSC_DO_SMA
+ /* No SMA specific counters */
+ #endif
+
+ /**********************************************************************/
+
+ #ifdef VSC_DO_SMF
+ VSC_F(g_smf, uint64_t, 0, 'i', "N struct smf", "")
+ VSC_F(g_smf_frag, uint64_t, 0, 'i', "N small free smf", "")
+ VSC_F(g_smf_large, uint64_t, 0, 'i', "N large free smf", "")
+ #endif
+
+ /**********************************************************************/
+
+ #ifdef VSC_DO_VBE
+
+ VSC_F(vcls, uint64_t, 0, 'i', "VCL references", "")
+ VSC_F(happy, uint64_t, 0, 'b', "Happy health probes", "")
+
+ #endif
+
More information about the varnish-commit
mailing list