[master] e4d1d36 Polish round over varnishd part of VSM

Poul-Henning Kamp phk at varnish-cache.org
Mon Nov 21 00:39:22 CET 2011


commit e4d1d360beca6917fe2ee1f866ca1ed71302b639
Author: Poul-Henning Kamp <phk at FreeBSD.org>
Date:   Sun Nov 20 12:44:19 2011 +0000

    Polish round over varnishd part of VSM

diff --git a/bin/varnishd/cache/cache_shmlog.c b/bin/varnishd/cache/cache_shmlog.c
index 63157c7..950f57f 100644
--- a/bin/varnishd/cache/cache_shmlog.c
+++ b/bin/varnishd/cache/cache_shmlog.c
@@ -37,9 +37,7 @@
 
 #include "cache_backend.h"	// For w->vbc
 
-#include "vapi/vsm_int.h"
 #include "vmb.h"
-#include "vtim.h"
 
 /* These cannot be struct lock, which depends on vsm/vsl working */
 static pthread_mutex_t vsl_mtx;
diff --git a/bin/varnishd/common/common.h b/bin/varnishd/common/common.h
index a35e649..a5d7633 100644
--- a/bin/varnishd/common/common.h
+++ b/bin/varnishd/common/common.h
@@ -75,30 +75,11 @@ void mgt_child_inherit(int fd, const char *what);
 
 /* vsm.c */
 struct vsm_sc;
-struct vsm_sc *VSM_common_new(void *ptr, unsigned len);
-void *VSM_common_alloc(struct vsm_sc *sc, unsigned size,
+struct vsm_sc *VSM_common_new(void *ptr, ssize_t len);
+void *VSM_common_alloc(struct vsm_sc *sc, ssize_t size,
     const char *class, const char *type, const char *ident);
 void VSM_common_free(struct vsm_sc *sc, void *ptr);
-void VSM_common_delete(struct vsm_sc *sc);
-
-// extern struct VSM_head		*VSM_head;
-// extern const struct VSM_chunk	*vsm_end;
-
-/*
- * These three should not be called directly, but only through
- * proper vectors in mgt.h/cache.h, hence the __
- */
-void *VSM__Alloc(unsigned size, const char *class, const char *type,
-    const char *ident);
-void VSM__Free(const void *ptr);
-void VSM__Clean(void);
-
-/* These classes are opaque to other programs, so we define the here */
-#define VSM_CLASS_FREE	"Free"
-#define VSM_CLASS_COOL	"Cool"
-#define VSM_CLASS_PARAM	"Params"
-#define VSM_CLASS_MARK	"MgrCld"
-#define VSM_COOL_TIME	5
+void VSM_common_delete(struct vsm_sc **sc);
 
 /*---------------------------------------------------------------------
  * Generic power-2 rounding macros
diff --git a/bin/varnishd/common/common_vsm.c b/bin/varnishd/common/common_vsm.c
index 682f542..f525a1e 100644
--- a/bin/varnishd/common/common_vsm.c
+++ b/bin/varnishd/common/common_vsm.c
@@ -27,27 +27,8 @@
  *
  * VSM stuff common to manager and child.
  *
- * We have three potential conflicts we need to deal with:
- *
- * VSM-studying programs (varnishstat...) vs. everybody else
- *	The VSM studying programs only have read-only access to the VSM
- *	so everybody else must use memory barriers, stable storage and
- *	similar tricks to keep the VSM image in sync (long enough) for
- *	the studying programs.
- *	It can not be prevented, and may indeed in some cases be
- *	desirable for such programs to write to VSM, for instance to
- *	zero counters.
- *	Varnishd should never trust the integrity of VSM content.
- *
- * Manager process vs child process.
- *	The manager will create a fresh VSM for each child process launch
- *	and not muck about with VSM while the child runs.  If the child
- *	crashes, the panicstring will be evacuated and the VSM possibly
- *	saved for debugging, and a new VSM created before the child is
- *	started again.
- *
- * Child process threads
- *	Pthread locking necessary.
+ * Please see comments in <vapi/vsm_int.h> for details of protocols and
+ * data consistency.
  *
  */
 
@@ -71,8 +52,8 @@ struct vsm_range {
 	unsigned 			magic;
 #define VSM_RANGE_MAGIC			0x8d30f14
 	VTAILQ_ENTRY(vsm_range)		list;
-	unsigned			off; 
-	unsigned			len;
+	ssize_t				off; 
+	ssize_t				len;
 	double				cool;
 	struct VSM_chunk		*chunk;
 	void				*ptr;
@@ -82,7 +63,7 @@ struct vsm_sc {
 	unsigned 			magic;
 #define VSM_SC_MAGIC			0x8b83270d
 	char				*b;
-	unsigned			len;
+	ssize_t				len;
 	struct VSM_head			*head;
 	VTAILQ_HEAD(,vsm_range)		r_used;
 	VTAILQ_HEAD(,vsm_range)		r_cooling;
@@ -137,7 +118,7 @@ vsm_common_insert_free(struct vsm_sc *sc, struct vsm_range *vr)
  */
 
 struct vsm_sc *
-VSM_common_new(void *p, unsigned l)
+VSM_common_new(void *p, ssize_t l)
 {
 	struct vsm_sc *sc;
 	struct vsm_range *vr;
@@ -154,10 +135,13 @@ VSM_common_new(void *p, unsigned l)
 	sc->len = l;
 
 	sc->head = (void *)sc->b;
-	memset(TRUST_ME(sc->head), 0, sizeof *sc->head);
+	/* This should not be necessary, but just in case...*/
+	memset(sc->head, 0, sizeof *sc->head);
 	sc->head->magic = VSM_HEAD_MAGIC;
 	sc->head->hdrsize = sizeof *sc->head;
 	sc->head->shm_size = l;
+	sc->head->alloc_seq = random() | 1;
+	VWMB();
 
 	ALLOC_OBJ(vr, VSM_RANGE_MAGIC);
 	AN(vr);
@@ -172,7 +156,7 @@ VSM_common_new(void *p, unsigned l)
  */
 
 void *
-VSM_common_alloc(struct vsm_sc *sc, unsigned size,
+VSM_common_alloc(struct vsm_sc *sc, ssize_t size,
     const char *class, const char *type, const char *ident)
 {
 	struct vsm_range *vr, *vr2, *vr3;
@@ -238,15 +222,15 @@ VSM_common_alloc(struct vsm_sc *sc, unsigned size,
 	/* XXX: stats ? */
 
 	/* Zero the entire allocation, to avoid garbage confusing readers */
-	memset(TRUST_ME(sc->b + vr->off), 0, vr->len);
+	memset(sc->b + vr->off, 0, vr->len);
 
 	vr->chunk = (void *)(sc->b + vr->off);
 	vr->ptr = (vr->chunk + 1);
 
 	vr->chunk->magic = VSM_CHUNK_MAGIC;
-	strcpy(TRUST_ME(vr->chunk->class), class);
-	strcpy(TRUST_ME(vr->chunk->type), type);
-	strcpy(TRUST_ME(vr->chunk->ident), ident);
+	strcpy(vr->chunk->class, class);
+	strcpy(vr->chunk->type, type);
+	strcpy(vr->chunk->ident, ident);
 	VWMB();
 
 	vr3 = VTAILQ_FIRST(&sc->r_used);
@@ -258,6 +242,7 @@ VSM_common_alloc(struct vsm_sc *sc, unsigned size,
 	} else {
 		sc->head->first = vr->off;
 	}
+	sc->head->alloc_seq += 2;
 	VWMB();
 	return (vr->ptr);
 }
@@ -289,6 +274,7 @@ VSM_common_free(struct vsm_sc *sc, void *ptr)
 			sc->head->first = vr->chunk->next;
 		VWMB();
 		vr->chunk->len = 0;
+		sc->head->alloc_seq += 2;
 		VWMB();
 		return;
 	}
@@ -298,7 +284,7 @@ VSM_common_free(struct vsm_sc *sc, void *ptr)
 			VTAILQ_REMOVE(&sc->r_bogus, vr, list);
 			FREE_OBJ(vr);
 			/* XXX: stats ? */
-			free(TRUST_ME(ptr));
+			free(ptr);
 			return;
 		}
 	}
@@ -311,9 +297,14 @@ VSM_common_free(struct vsm_sc *sc, void *ptr)
  */
 
 void
-VSM_common_delete(struct vsm_sc *sc)
+VSM_common_delete(struct vsm_sc **scp)
 {
 	struct vsm_range *vr, *vr2;
+	struct vsm_sc *sc;
+
+	AN(scp);
+	sc =*scp;
+	*scp = NULL;
 
 	CHECK_OBJ_NOTNULL(sc, VSM_SC_MAGIC);
 	VTAILQ_FOREACH_SAFE(vr, &sc->r_free, list, vr2)
@@ -323,9 +314,10 @@ VSM_common_delete(struct vsm_sc *sc)
 	VTAILQ_FOREACH_SAFE(vr, &sc->r_cooling, list, vr2)
 		FREE_OBJ(vr);
 	VTAILQ_FOREACH_SAFE(vr, &sc->r_bogus, list, vr2) {
-		free(TRUST_ME(vr->ptr));
+		free(vr->ptr);
 		FREE_OBJ(vr);
 	}
-	sc->head->magic = 0;
+	sc->head->alloc_seq = 0;
+	VWMB();
 	FREE_OBJ(sc);
 }
diff --git a/bin/varnishd/common/params.h b/bin/varnishd/common/params.h
index 95258fb..cab2f49 100644
--- a/bin/varnishd/common/params.h
+++ b/bin/varnishd/common/params.h
@@ -31,6 +31,8 @@
 
 #include "vre.h"
 
+#define VSM_CLASS_PARAM		"Params"
+
 struct params {
 
 	/* Unprivileged user / group */
diff --git a/bin/varnishd/mgt/mgt.h b/bin/varnishd/mgt/mgt.h
index 571aa23..775fe2c 100644
--- a/bin/varnishd/mgt/mgt.h
+++ b/bin/varnishd/mgt/mgt.h
@@ -82,7 +82,6 @@ void mgt_sandbox_solaris_privsep(void);
 
 /* mgt_shmem.c */
 void mgt_SHM_Init(void);
-void mgt_SHM_Pid(void);
 
 /* stevedore_mgt.c */
 void STV_Config(const char *spec);
diff --git a/bin/varnishd/mgt/mgt_cli.c b/bin/varnishd/mgt/mgt_cli.c
index aa4fffe..d3b7396 100644
--- a/bin/varnishd/mgt/mgt_cli.c
+++ b/bin/varnishd/mgt/mgt_cli.c
@@ -493,13 +493,13 @@ mgt_cli_secret(const char *S_arg)
 {
 	int i, fd;
 	char buf[BUFSIZ];
-	volatile char *p;
+	char *p;
 
 	/* Save in shmem */
 	i = strlen(S_arg);
-	p = VSM_Alloc(i + 1, "Arg", "-S", "");
+	p = VSM_Alloc(i + 1L, "Arg", "-S", "");
 	AN(p);
-	memcpy(TRUST_ME(p), S_arg, i + 1);
+	memcpy(p, S_arg, i + 1L);
 
 	srandomdev();			/* XXX: why here ??? */
 	fd = open(S_arg, O_RDONLY);
@@ -527,7 +527,7 @@ mgt_cli_telnet(const char *T_arg)
 	struct vss_addr **ta;
 	int i, n, sock, good;
 	struct telnet *tn;
-	volatile char *p;
+	char *p;
 	struct vsb *vsb;
 	char abuf[VTCP_ADDRBUFSIZE];
 	char pbuf[VTCP_PORTBUFSIZE];
@@ -566,7 +566,7 @@ mgt_cli_telnet(const char *T_arg)
 	/* Save in shmem */
 	p = VSM_Alloc(VSB_len(vsb) + 1, "Arg", "-T", "");
 	AN(p);
-	memcpy(TRUST_ME(p), VSB_data(vsb), VSB_len(vsb) + 1);
+	memcpy(p, VSB_data(vsb), VSB_len(vsb) + 1);
 	VSB_delete(vsb);
 }
 
diff --git a/bin/varnishd/mgt/mgt_main.c b/bin/varnishd/mgt/mgt_main.c
index 4e63c16..f782d03 100644
--- a/bin/varnishd/mgt/mgt_main.c
+++ b/bin/varnishd/mgt/mgt_main.c
@@ -630,8 +630,6 @@ main(int argc, char * const *argv)
 	if (!d_flag && !F_flag)
 		AZ(varnish_daemon(1, 0));
 
-	mgt_SHM_Pid();
-
 	if (pfh != NULL && VPF_Write(pfh))
 		fprintf(stderr, "NOTE: Could not write PID file\n");
 
diff --git a/bin/varnishd/mgt/mgt_pool.c b/bin/varnishd/mgt/mgt_pool.c
index e8c4f27..badb3f4 100644
--- a/bin/varnishd/mgt/mgt_pool.c
+++ b/bin/varnishd/mgt/mgt_pool.c
@@ -48,7 +48,6 @@
 #include <unistd.h>
 
 #include "mgt/mgt.h"
-#include "common/heritage.h"
 #include "common/params.h"
 
 #include "mgt/mgt_param.h"
diff --git a/bin/varnishd/mgt/mgt_sandbox.c b/bin/varnishd/mgt/mgt_sandbox.c
index cdba825..3cd1d98 100644
--- a/bin/varnishd/mgt/mgt_sandbox.c
+++ b/bin/varnishd/mgt/mgt_sandbox.c
@@ -53,7 +53,6 @@
 #include <unistd.h>
 
 #include "mgt/mgt.h"
-#include "common/heritage.h"
 #include "common/params.h"
 
 /*--------------------------------------------------------------------*/
diff --git a/bin/varnishd/mgt/mgt_shmem.c b/bin/varnishd/mgt/mgt_shmem.c
index 6095d9f..6bb8e13 100644
--- a/bin/varnishd/mgt/mgt_shmem.c
+++ b/bin/varnishd/mgt/mgt_shmem.c
@@ -90,7 +90,6 @@
 #include <sys/stat.h>
 
 #include <fcntl.h>
-#include <signal.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -103,7 +102,6 @@
 
 #include "flopen.h"
 #include "vapi/vsc_int.h"
-#include "vapi/vsl_int.h"
 #include "vapi/vsm_int.h"
 #include "vmb.h"
 
@@ -117,88 +115,96 @@
 
 struct VSC_C_main	*VSC_C_main;
 
-static int vsl_fd = -1;
+static int vsm_fd = -1;
 
 /*--------------------------------------------------------------------
  * Check that we are not started with the same -n argument as an already
- * running varnishd
+ * running varnishd.
+ *
+ * Non-zero return means we should exit and not trample the file.
+ *
  */
 
-static void
-vsl_n_check(int fd)
+static int
+vsm_n_check(void)
 {
-	struct VSM_head slh;
-	int i;
+	int fd, i;
 	struct stat st;
 	pid_t pid;
+	struct VSM_head vsmh;
+	int retval = 2;
 
-	AZ(fstat(fd, &st));
-	if (!S_ISREG(st.st_mode))
-		ARGV_ERR("\tshmlog: Not a file\n");
-
-	/* Test if the SHMFILE is locked by other Varnish */
-	if (fltest(fd, &pid) > 0) {
-		fprintf(stderr,
-			"SHMFILE locked by running varnishd master (pid=%jd)\n",
-			(intmax_t)pid);
-		fprintf(stderr,
-			"(Use unique -n arguments if you want multiple "
-			"instances)\n");
-		exit(2);
-	}
+	fd = open(VSM_FILENAME, O_RDWR, 0644);
+	if (fd < 0)
+		return (0);
 
-	/* Read owning pid from SHMFILE */
-	memset(&slh, 0, sizeof slh);	/* XXX: for flexelint */
-	i = read(fd, &slh, sizeof slh);
-	if (i != sizeof slh)
-		return;
-	if (slh.magic != VSM_HEAD_MAGIC)
-		return;
-	if (slh.hdrsize != sizeof slh)
-		return;
-	if (slh.master_pid != 0 && !kill(slh.master_pid, 0)) {
+	AZ(fstat(fd, &st));
+	if (!S_ISREG(st.st_mode)) {
 		fprintf(stderr,
-			"WARNING: Taking over SHMFILE marked as owned by "
-			"running process (pid=%jd)\n",
-			(intmax_t)slh.master_pid);
+		    "VSM (%s) not a regular file.\n", VSM_FILENAME);
+	} else {
+		i = fltest(fd, &pid);
+		if (i < 0) {
+			fprintf(stderr,
+			    "Cannot determine locking status of VSM (%s)\n.",
+			    VSM_FILENAME);
+		} else if (i == 0) {
+			/*
+			 * File is unlocked, mark it as dead, to help any
+			 * consumers still stuck on it.
+			 */
+			if (pread(fd, &vsmh, sizeof vsmh, 0) == sizeof vsmh) {
+				vsmh.alloc_seq = 0;
+				(void)pwrite(fd, &vsmh, sizeof vsmh, 0);
+			}
+			retval = 0;
+		} else {
+			/* The VSM is locked, we won't touch it. */
+			fprintf(stderr,
+			    "VSM locked by running varnishd master (pid=%jd)\n"
+			    "(Use unique -n arguments if you want"
+			    "  multiple instances)\n", (intmax_t)pid);
+		}
 	}
+	(void)close(fd);
+	return (retval);
 }
 
 /*--------------------------------------------------------------------
- * Build a new shmlog file
+ * Build a zeroed file
  */
 
-static void
-vsl_buildnew(const char *fn, ssize_t size)
+static int
+vsm_zerofile(const char *fn, ssize_t size)
 {
-	int i;
-	unsigned u;
+	int fd;
+	ssize_t i, u;
 	char buf[64*1024];
 	int flags;
 
-	(void)unlink(fn);
-	vsl_fd = flopen(fn, O_RDWR | O_CREAT | O_EXCL | O_NONBLOCK, 0644);
-	if (vsl_fd < 0) {
+	fd = flopen(fn, O_RDWR | O_CREAT | O_EXCL | O_NONBLOCK, 0644);
+	if (fd < 0) {
 		fprintf(stderr, "Could not create %s: %s\n",
 		    fn, strerror(errno));
-		exit (1);
+		return (-1);
 	}
-	flags = fcntl(vsl_fd, F_GETFL);
+	flags = fcntl(fd, F_GETFL);
 	assert(flags != -1);
 	flags &= ~O_NONBLOCK;
-	AZ(fcntl(vsl_fd, F_SETFL, flags));
+	AZ(fcntl(fd, F_SETFL, flags));
 
 	memset(buf, 0, sizeof buf);
 	for (u = 0; u < size; ) {
-		i = write(vsl_fd, buf, sizeof buf);
+		i = write(fd, buf, sizeof buf);
 		if (i <= 0) {
 			fprintf(stderr, "Write error %s: %s\n",
 			    fn, strerror(errno));
-			exit (1);
+			return (-1);
 		}
 		u += i;
 	}
-	AZ(ftruncate(vsl_fd, (off_t)size));
+	AZ(ftruncate(fd, (off_t)size));
+	return (fd);
 }
 
 /*--------------------------------------------------------------------
@@ -209,48 +215,59 @@ static
 void
 mgt_shm_atexit(void)
 {
-#if 0
-	if (getpid() == VSM_head->master_pid)
-		VSM_head->master_pid = 0;
-#endif
+
+	if (heritage.vsm != NULL)
+		VSM_common_delete(&heritage.vsm);
 }
 
 void
 mgt_SHM_Init(void)
 {
-	int i, fill;
+	int i;
 	uintmax_t size, ps;
 	void *p;
-#if 0
-	uint32_t *vsl_log_start;
-#endif
-
-	fill = 1;
+	char fnbuf[64];
 
 	size = mgt_param.vsl_space + mgt_param.vsm_space;
 	ps = getpagesize();
 	size += ps - 1;
 	size &= ~(ps - 1);
 
-	i = open(VSM_FILENAME, O_RDWR, 0644);
-	if (i >= 0) {
-		vsl_n_check(i);
-		(void)close(i);
-	}
-	vsl_buildnew(VSM_FILENAME, size);
+	/* Collision check with already running varnishd */
+	i = vsm_n_check();
+	if (i)
+		exit(i);
+
+	bprintf(fnbuf, "%s.%jd", VSM_FILENAME, (intmax_t)getpid());
+
+	vsm_fd = vsm_zerofile(fnbuf, size);
+	if (vsm_fd < 0) 
+		exit(1);
 
 	p = (void *)mmap(NULL, size,
 	    PROT_READ|PROT_WRITE,
 	    MAP_HASSEMAPHORE | MAP_NOSYNC | MAP_SHARED,
-	    vsl_fd, 0);
-	xxxassert(p != MAP_FAILED);
+	    vsm_fd, 0);
 
-	heritage.vsm = VSM_common_new(p, size);
+	if (p == MAP_FAILED) {
+		fprintf(stderr, "Mmap error %s: %s\n", fnbuf, strerror(errno));
+		exit (-1);
+	}
 
+	/* This may or may not work */
 	(void)mlock(p, size);
+
+	heritage.vsm = VSM_common_new(p, size);
+
+	if (rename(fnbuf, VSM_FILENAME)) {
+		fprintf(stderr, "Rename failed %s -> %s: %s\n",
+		    fnbuf, VSM_FILENAME, strerror(errno));
+		(void)unlink(fnbuf);
+		exit (-1);
+	}
+
 	AZ(atexit(mgt_shm_atexit));
 
-	/* XXX: We need to zero params if we dealloc/clean/wash */
 	cache_param = VSM_Alloc(sizeof *cache_param, VSM_CLASS_PARAM, "", "");
 	AN(cache_param);
 	*cache_param = mgt_param;
@@ -258,36 +275,4 @@ mgt_SHM_Init(void)
 	PAN_panicstr_len = 64 * 1024;
 	PAN_panicstr = VSM_Alloc(PAN_panicstr_len, PAN_CLASS, "", "");
 	AN(PAN_panicstr);
-
-#if 0
-
-	VSM_head->master_pid = getpid();
-
-	memset(&VSM_head->head, 0, sizeof VSM_head->head);
-	VSM_head->head.magic = VSM_CHUNK_MAGIC;
-	VSM_head->head.len =
-	    (uint8_t*)(VSM_head) + size - (uint8_t*)&VSM_head->head;
-	bprintf(VSM_head->head.class, "%s", VSM_CLASS_FREE);
-	VWMB();
-
-	vsm_end = (void*)((uint8_t*)VSM_head + size);
-
-	VSC_C_main = VSM_Alloc(sizeof *VSC_C_main,
-	    VSC_CLASS, VSC_TYPE_MAIN, "");
-	AN(VSC_C_main);
-
-	do
-		VSM_head->alloc_seq = random();
-	while (VSM_head->alloc_seq == 0);
-#endif
-
-}
-
-void
-mgt_SHM_Pid(void)
-{
-
-#if 0
-	VSM_head->master_pid = getpid();
-#endif
 }
diff --git a/include/vapi/vsm.h b/include/vapi/vsm.h
index fd7fbee..275f6d9 100644
--- a/include/vapi/vsm.h
+++ b/include/vapi/vsm.h
@@ -32,8 +32,16 @@
 #define VAPI_VSM_H_INCLUDED
 
 struct VSM_head;
+struct VSM_chunk;
 struct VSM_data;
 
+struct VSM_fantom {
+	struct VSM_chunk	*chunk;
+	void			*b;
+	void			*e;
+	uintptr_t		priv;
+};
+
 /*---------------------------------------------------------------------
  * VSM level access functions
  */
@@ -46,6 +54,7 @@ struct VSM_data *VSM_New(void);
 	 * referencing the same or different shared memory files.
 	 * Returns:
 	 *	Pointer to usable VSL_data handle.
+	 *	NULL: malloc failed.
 	 */
 
 typedef void VSM_diag_f(void *priv, const char *fmt, ...);
@@ -66,7 +75,7 @@ int VSM_n_Arg(struct VSM_data *vd, const char *n_arg);
 	 * and VSC_Arg() functions.
 	 * Returns:
 	 *	 1 on success
-	 *	 -1 on failure, with diagnostic on stderr.
+	 *	 -1 on failure, with diagnostic.
 	 */
 
 const char *VSM_Name(const struct VSM_data *vd);
diff --git a/include/vapi/vsm_int.h b/include/vapi/vsm_int.h
index cad1e36..c33ba35 100644
--- a/include/vapi/vsm_int.h
+++ b/include/vapi/vsm_int.h
@@ -29,6 +29,67 @@
  * Define the layout of the shared memory log segment.
  *
  * NB: THIS IS NOT A PUBLIC API TO VARNISH!
+ *
+ * There is a lot of diplomacy and protocol involved with the VSM segment
+ * since there is no way to (and no desire to!) lock between the readers
+ * and the writer.
+ *
+ * In particular we want the readers to seamlessly jump from one VSM instance
+ * to another when the child restarts.
+ *
+ * The VSM life-cycle there is:
+ *
+ *	Manager creates VSM file under temp name
+ *
+ *	Temp VSM file is initialized such that VSM_head is consistent
+ *	with a non-zero alloc_seq
+ *
+ *	Manager renames Temp VSM file to correct filename as atomic
+ *	operation.
+ *
+ *	When manager abandons VSM file, alloc_seq is set to zero, which
+ *	never happens in any other circumstances.
+ *
+ *	If a manager is started and finds and old abandonned VSM segment
+ *	it will zero the alloc_seq in it, before replacing the file.
+ *
+ * Subscribers will have to monitor two things to make sure they have
+ * the current VSM instance:  The alloc_seq field and the inode number
+ * of the path-name.  The former check is by far the cheaper and the
+ * latter check should only be employed when lack of activity in the
+ * VSM segment raises suspicion that something has happened.
+ *
+ * The allocations ("chunks") in the VSM forms a linked list, starting with
+ * VSM_head->first, with the first/next fields being byte offsets relative
+ * to the start of the VSM segment.
+ *
+ * The last chunk on the list, has next == 0.
+ *
+ * New chunks are appended to the list, no matter where in the VSM
+ * they happen to be allocated.
+ *
+ * Chunk allocation sequence is:
+ *	Find free space
+ *	Zero payload
+ *	Init Chunk header
+ *	Write memory barrier
+ *	update hdr->first or $last->next pointer
+ *	hdr->alloc_seq changes
+ *	Write memory barrier
+ *
+ * Chunk contents should be designed so that zero bytes are not mistaken
+ * for valid contents.
+ *
+ * Chunk deallocation sequence is:
+ *	update hdr->first or $prev->next pointer
+ *	Write memory barrier
+ *	this->len = 0
+ *	hdr->alloc_seq changes
+ *	Write memory barrier
+ *
+ * The space occupied by the chunk is put on a cooling list and is not
+ * recycled for at least a minute.
+ *
  */
 
 #ifndef VSM_INT_H_INCLUDED
@@ -36,80 +97,23 @@
 
 #define VSM_FILENAME		"_.vsm"
 
-/*
- * This structure describes each allocation from the shmlog
- */
-
 struct VSM_chunk {
 #define VSM_CHUNK_MAGIC		0xa15712e5	/* From /dev/random */
 	unsigned		magic;
-	unsigned		len;		/* Incl VSM_chunk */
-	unsigned		next;		/* Offset in shmem */
-	unsigned		state;		/* XXX remove */
+	ssize_t			len;		/* Incl VSM_chunk */
+	ssize_t			next;		/* Offset in shmem */
 	char			class[8];
 	char			type[8];
 	char			ident[64];
 };
 
-#define VSM_NEXT(sha)		((void*)((uintptr_t)(sha) + (sha)->len))
-#define VSM_PTR(sha)		((void*)((uintptr_t)((sha) + 1)))
-
 struct VSM_head {
 #define VSM_HEAD_MAGIC		0xe75f7e91	/* From /dev/random */
 	unsigned		magic;
-
-	unsigned		hdrsize;
-	unsigned		first;		/* Offset, first chunk */
-
-	uint64_t		starttime;
-	int64_t			master_pid;
-	int64_t			child_pid;
-
-	unsigned		shm_size;
-
+	ssize_t			hdrsize;
+	ssize_t			shm_size;
+	ssize_t			first;		/* Offset, first chunk */
 	unsigned		alloc_seq;
-	/* Must be last element */
-	struct VSM_chunk	head;
 };
 
-/*
- * You must include "miniobj.h" and have an assert function to be
- * able to use the VSM_ITER() macro.
- */
-#ifdef CHECK_OBJ_NOTNULL
-
-extern struct VSM_head *VSM_head;
-extern const struct VSM_chunk *vsm_end;
-
-static inline struct VSM_chunk *
-vsm_iter_0(void)
-{
-
-	CHECK_OBJ_NOTNULL(VSM_head, VSM_HEAD_MAGIC);
-	CHECK_OBJ_NOTNULL(&VSM_head->head, VSM_CHUNK_MAGIC);
-	return (&VSM_head->head);
-}
-
-static inline void
-vsm_iter_n(struct VSM_chunk **pp)
-{
-
-	CHECK_OBJ_NOTNULL(VSM_head, VSM_HEAD_MAGIC);
-	CHECK_OBJ_NOTNULL(*pp, VSM_CHUNK_MAGIC);
-	*pp = VSM_NEXT(*pp);
-	if (*pp >= vsm_end) {
-		*pp = NULL;
-		return;
-	}
-	CHECK_OBJ_NOTNULL(*pp, VSM_CHUNK_MAGIC);
-}
-
-#define VSM_ITER(vd) for ((vd) = vsm_iter_0(); (vd) != NULL; vsm_iter_n(&vd))
-
-#else
-
-#define VSM_ITER(vd) while (YOU_NEED_MINIOBJ_TO_USE_VSM_ITER)
-
-#endif /* CHECK_OBJ_NOTNULL */
-
 #endif /* VSM_INT_H_INCLUDED */
diff --git a/lib/libvarnishapi/vsl.c b/lib/libvarnishapi/vsl.c
index 88bd9d6..b17cd36 100644
--- a/lib/libvarnishapi/vsl.c
+++ b/lib/libvarnishapi/vsl.c
@@ -390,7 +390,8 @@ VSL_Open(struct VSM_data *vd, int diag)
 
 /*--------------------------------------------------------------------*/
 
-int VSL_Matched(const struct VSM_data *vd, uint64_t bitmap)
+int
+VSL_Matched(const struct VSM_data *vd, uint64_t bitmap)
 {
 	if (vd->vsl->num_matchers > 0) {
 		uint64_t t;
diff --git a/lib/libvarnishapi/vsm.c b/lib/libvarnishapi/vsm.c
index 8e8b27b..97e3847 100644
--- a/lib/libvarnishapi/vsm.c
+++ b/lib/libvarnishapi/vsm.c
@@ -62,7 +62,8 @@ VSM_New(void)
 	struct VSM_data *vd;
 
 	ALLOC_OBJ(vd, VSM_MAGIC);
-	AN(vd);
+	if (vd == NULL)
+		return (vd);
 
 	vd->diag = (VSM_diag_f*)fprintf;
 	vd->priv = stderr;
@@ -132,15 +133,22 @@ VSM_Delete(struct VSM_data *vd)
 	if (vd->vsl != NULL)
 		VSL_Delete(vd);
 
-	free(vd);
+	FREE_OBJ(vd);
 }
 
-/*--------------------------------------------------------------------*/
+/*--------------------------------------------------------------------
+ * The internal VSM open function
+ *
+ * Return:
+ *	0 = sucess
+ * 	1 = failure
+ *
+ */
 
 static int
 vsm_open(struct VSM_data *vd, int diag)
 {
-	int i, j;
+	int i;
 	struct VSM_head slh;
 
 	CHECK_OBJ_NOTNULL(vd, VSM_MAGIC);
@@ -155,7 +163,7 @@ vsm_open(struct VSM_data *vd, int diag)
 		return (1);
 	}
 
-	assert(fstat(vd->vsm_fd, &vd->fstat) == 0);
+	AZ(fstat(vd->vsm_fd, &vd->fstat));
 	if (!S_ISREG(vd->fstat.st_mode)) {
 		if (diag)
 			vd->diag(vd->priv, "%s is not a regular file\n",
@@ -174,16 +182,16 @@ vsm_open(struct VSM_data *vd, int diag)
 		vd->vsm_fd = -1;
 		return (1);
 	}
-	if (slh.magic != VSM_HEAD_MAGIC) {
+	if (slh.magic != VSM_HEAD_MAGIC || slh.alloc_seq == 0) {
 		if (diag)
-			vd->diag(vd->priv, "Wrong magic number in file %s\n",
+			vd->diag(vd->priv, "Not a ready VSM file %s\n",
 			    vd->fname);
 		AZ(close(vd->vsm_fd));
 		vd->vsm_fd = -1;
 		return (1);
 	}
 
-	vd->VSM_head = (void *)mmap(NULL, slh.shm_size,
+	vd->VSM_head = mmap(NULL, slh.shm_size,
 	    PROT_READ, MAP_SHARED|MAP_HASSEMAPHORE, vd->vsm_fd, 0);
 	if (vd->VSM_head == MAP_FAILED) {
 		if (diag)
@@ -195,20 +203,7 @@ vsm_open(struct VSM_data *vd, int diag)
 		return (1);
 	}
 	vd->vsm_end = (uint8_t *)vd->VSM_head + slh.shm_size;
-
-	for (j = 0; j < 20 && vd->VSM_head->alloc_seq == 0; j++)
-		(void)usleep(50000);
-	if (vd->VSM_head->alloc_seq == 0) {
-		if (diag)
-			vd->diag(vd->priv, "File not initialized %s\n",
-			    vd->fname);
-		assert(0 == munmap((void*)vd->VSM_head, slh.shm_size));
-		AZ(close(vd->vsm_fd));
-		vd->vsm_fd = -1;
-		vd->VSM_head = NULL;
-		return (1);
-	}
-	vd->alloc_seq = vd->VSM_head->alloc_seq;
+	vd->my_alloc_seq = vd->VSM_head->alloc_seq;
 
 	if (vd->vsl != NULL)
 		VSL_Open_CallBack(vd);
@@ -238,10 +233,10 @@ VSM_Close(struct VSM_data *vd)
 	CHECK_OBJ_NOTNULL(vd, VSM_MAGIC);
 	if (vd->VSM_head == NULL)
 		return;
-	assert(0 == munmap((void*)vd->VSM_head, vd->VSM_head->shm_size));
+	AZ(munmap((void*)vd->VSM_head, vd->VSM_head->shm_size));
 	vd->VSM_head = NULL;
 	assert(vd->vsm_fd >= 0);
-	assert(0 == close(vd->vsm_fd));
+	AZ(close(vd->vsm_fd));
 	vd->vsm_fd = -1;
 }
 
@@ -330,10 +325,10 @@ VSM_iter0(struct VSM_data *vd)
 {
 
 	CHECK_OBJ_NOTNULL(vd, VSM_MAGIC);
-	vd->alloc_seq = vd->VSM_head->alloc_seq;
-	while (vd->alloc_seq == 0) {
+	vd->my_alloc_seq = vd->VSM_head->alloc_seq;
+	while (vd->my_alloc_seq == 0) {
 		(void)usleep(50000);
-		vd->alloc_seq = vd->VSM_head->alloc_seq;
+		vd->my_alloc_seq = vd->VSM_head->alloc_seq;
 	}
 	CHECK_OBJ_NOTNULL(&vd->VSM_head->head, VSM_CHUNK_MAGIC);
 	return (&vd->VSM_head->head);
@@ -344,7 +339,7 @@ VSM_itern(const struct VSM_data *vd, struct VSM_chunk **pp)
 {
 
 	CHECK_OBJ_NOTNULL(vd, VSM_MAGIC);
-	if (vd->alloc_seq != vd->VSM_head->alloc_seq) {
+	if (vd->my_alloc_seq != vd->VSM_head->alloc_seq) {
 		*pp = NULL;
 		return;
 	}
diff --git a/lib/libvarnishapi/vsm_api.h b/lib/libvarnishapi/vsm_api.h
index 519e46d..da5222e 100644
--- a/lib/libvarnishapi/vsm_api.h
+++ b/lib/libvarnishapi/vsm_api.h
@@ -44,13 +44,12 @@ struct VSM_data {
 	char			*n_opt;
 	char			*fname;
 
-
 	struct stat		fstat;
 
 	int			vsm_fd;
 	struct VSM_head		*VSM_head;
 	void			*vsm_end;
-	unsigned		alloc_seq;
+	unsigned		my_alloc_seq;
 
 	/* Stuff relating the stats fields start here */
 



More information about the varnish-commit mailing list