r981 - trunk/varnish-cache/bin/varnishncsa

phk at projects.linpro.no phk at projects.linpro.no
Fri Sep 15 09:14:37 CEST 2006


Author: phk
Date: 2006-09-15 09:14:37 +0200 (Fri, 15 Sep 2006)
New Revision: 981

Added:
   trunk/varnish-cache/bin/varnishncsa/flint.lnt
   trunk/varnish-cache/bin/varnishncsa/flint.sh
Modified:
   trunk/varnish-cache/bin/varnishncsa/varnishncsa.c
Log:
Overhaul NCSA logging.

Reduce memory footprint by only allocating space for a fd first time
we see it.  Few if any servers will ever see 64k fd's in use.

Move the sbuf into the logline structure for simplicity.

Access the logline structure through a pointer instead of indexing
the array all the time.

Reduce amount of malloc/free traffic for performance.

Use the "cooked" SHMlog api where the length, fd, etc are broken out
as arguments for us.  Pass the FILE* where we want the stuff to
go as private data (stdout for now).

Add FlexeLint files.

Now over to Anders again...



Added: trunk/varnish-cache/bin/varnishncsa/flint.lnt
===================================================================
--- trunk/varnish-cache/bin/varnishncsa/flint.lnt	2006-09-13 21:26:01 UTC (rev 980)
+++ trunk/varnish-cache/bin/varnishncsa/flint.lnt	2006-09-15 07:14:37 UTC (rev 981)
@@ -0,0 +1,60 @@
+-passes=3
+
+-header(../../config.h)
+
++d__attribute__()=
+
+// Fix strchr() semtics, it can only return NULL if arg2 != 0
+-sem(strchr, 1p, type(1), 2n == 0 ? (@p < 1p) : (@p < 1p || @p == 0 ))
+
+-ffc	// No automatic custody
+
+-e763	// Redundant declaration for symbol '...' previously declared
+-e726	// Extraneous comma ignored
+-e728	// Symbol ... not explicitly initialized
+-e716	// while(1) ... 
+-e785	// Too few initializers for aggregate 
+-e786	// String concatenation within initializer
+
+-emacro(740, TAILQ_PREV) // Unusual pointer cast (incompatible indirect types)
+-emacro((826), TAILQ_PREV) // Suspicious pointer-to-pointer conversion (area too small)
+
+
+-esym(534, printf)	// Ignoring return value of function
+-esym(534, fprintf)	// Ignoring return value of function
+-esym(534, memset)	// Ignoring return value of function
+-esym(534, memcpy)	// Ignoring return value of function
+-esym(534, memmove)	// Ignoring return value of function
+-esym(534, strcpy)	// Ignoring return value of function
+-esym(534, vsb_printf)	// Ignoring return value of function
+-esym(534, vsb_cat)	// Ignoring return value of function
+
+// cache.h
+-emacro(506, INCOMPL) // Constant value Boolean
+
+// cache_center.c
+-efunc(525, CNT_Session)	// Negative indentation from line
+-efunc(525, http_FilterHeader)	// Negative indentation from line
+-efunc(539, http_FilterHeader)	// Positive indentation from line
+
+// cache_vcl.c
+-efunc(525, vcl_handlingname)	// Negative indentation from line
+-esym(528, vcl_handlingname)	// Not referenced
+-e641	// Converting enum 'cli_status_e' to int
+
+// Review all below this line ///////////////////////////////////////////////
+
+-e767	// Macro redef (system queue.h vs ours )
+
+-e574	// Signed-unsigned mix with relational
+-e712	// Loss of precision (assignment) (long long to
+-e747	// Significant prototype coercion (arg. no. 2) long
+-e713	// Loss of precision (assignment) (unsigned long long to long long)
+-e506	// Constant value boolean
+-e818	// Pointer parameter '...' could be declared as pointing to const
+-e774	// Boolean within 'if' always evaluates to False
+-e534	// Ignoring return value of function
+-e557	// unrecog format
+-e732   // Loss of sign (arg. no. 2) (int to unsigned
+-e737	// [45]  Loss of sign in promotion from int to unsigned
+

Added: trunk/varnish-cache/bin/varnishncsa/flint.sh
===================================================================
--- trunk/varnish-cache/bin/varnishncsa/flint.sh	2006-09-13 21:26:01 UTC (rev 980)
+++ trunk/varnish-cache/bin/varnishncsa/flint.sh	2006-09-15 07:14:37 UTC (rev 981)
@@ -0,0 +1,29 @@
+#!/bin/sh
+
+T=/tmp/_$$
+flexelint \
+	-I/usr/include \
+	-I. \
+	-I../../include \
+	flint.lnt \
+	*.c > $T 2>&1
+
+for t in Error Warning Info
+do
+	sed -n "/$t [0-9][0-9][0-9]:/s/.*\($t [0-9][0-9][0-9]\).*/\1/p" $T
+done | awk '
+$2 == 830	{ next }
+$2 == 831	{ next }
+	{
+	i=$2"_"$1
+	h[i]++
+	n++
+	}
+END	{
+	printf "%5d %s\n", n, "Total"
+	for (i in h)
+		printf "%5d %s\n", h[i], i
+	}
+' | sort -rn
+
+cat $T

Modified: trunk/varnish-cache/bin/varnishncsa/varnishncsa.c
===================================================================
--- trunk/varnish-cache/bin/varnishncsa/varnishncsa.c	2006-09-13 21:26:01 UTC (rev 980)
+++ trunk/varnish-cache/bin/varnishncsa/varnishncsa.c	2006-09-15 07:14:37 UTC (rev 981)
@@ -27,385 +27,133 @@
 #include "shmlog.h"
 #include "varnishapi.h"
 
+static struct logline {
+	char df_h[4 * (3 + 1)];		/* Datafield for %h (IP adress)	*/
+	char df_s[4]; 			/* Datafield for %s, Status	*/
+	char df_b[12];			/* Datafield for %b, Bytes	*/
+	char *df_R; 			/* Datafield for %{Referer}	*/
+	char *df_U; 			/* Datafield for %{User-agent}	*/
+	int bogus_req; 			/* bogus request		*/
+	struct vsb *sb;
+} *ll[65536];
 
-/* Ordering-----------------------------------------------------------*/
-
-
-/* Adding a struct to hold the data for the logline
- *
- */
-
-struct logline {
-	char df_h[4 * (3 + 1)]; // Datafield for %h (IP adress)
-	int df_hfini;	// Set to 1 when a SessionClose is seen.
-	unsigned char *df_r; // Datafield for %r (Request)
-	int df_rfini;	// Set to 1 when a ReqServTime has come.
-	unsigned char *df_s; // Datafield for %s, Status
-	int df_sfini;
-	unsigned char *df_b; // Datafield for %b, Bytes
-	int df_bfini;
-	struct tm *logline_time; // Datafield for %t
-	unsigned char *df_R; // Datafield for %{Referer}i
-	int df_Rfini;
-	unsigned char *df_U; // Datafield for %{User-agent}i
-	int df_Ufini;
-	int bogus_req; // Set to 1 if we discover a bogus request
-};
-
-/* We make a array of pointers to vsb's. Sbuf is a string buffer.
- * * The buffer can be made/extended/cleared etc. through a API.
- * * The array is 65536 long because we will use sessionid as key.
- * *
- * */
-
-static struct vsb      *ob[65536];
-static struct logline	ll[65536];
-
-
-/*
-* Clean order is called once in a while. It clears all the sessions that 
-* where never finished (SLT_SessionClose). Because the data is not complete
-* we disregard the data.
-*
-*/
-
-static void
-clean_order(void)
+/* Check if string starts with pfx */
+static int
+ispfx(const char *ptr, unsigned len, const char *pfx)
 {
-	unsigned u;
+	unsigned l;
 
-	for (u = 0; u < 65536; u++) {
-		if (ob[u] == NULL)
-			continue;
-		vsb_finish(ob[u]);
-		vsb_clear(ob[u]);
-	}
+	l = strlen(pfx);
+	if (l > len)
+		return (0);
+	if (strncasecmp(ptr, pfx, l))
+		return (0);
+	return (1);
 }
 
-static struct tm *make_timestruct(char *tmpPtra){
-	char *tmpPtrb;
-	int timesec = 0;
-	time_t req_time; // Timeobject used for making the requesttime.
-	int i = 0;
-	char temp_time[27];
-	struct tm *timestring;
-
-	temp_time[0] = '\0';
-	for ( tmpPtrb = strtok(tmpPtra," "); tmpPtrb != NULL; tmpPtrb = strtok(NULL, " ")){
-		if (i == 2){
-			// We have the right time
-			//printf("Time: %s\n", tmpPtrb);
-			tmpPtra = tmpPtrb;
-			strncpy(temp_time, tmpPtra, 10);
-			temp_time[11] = '\0';
-		}
-		//printf("ReqServTime number %d: %s\n", i, tmpPtrb);
-	
-                i++;
-         }
-
-	//strncpy(temp_time, tmpPtra, 20);
-	//printf("inten: %s\n",temp_time);
-	timesec = atoi(temp_time);
-	req_time = timesec;
-        timestring = localtime(&req_time);
-	return timestring;
-
-}
-
-static void 
-extended_log_format(unsigned char *p)
+static int
+extended_log_format(void *priv, unsigned tag, unsigned fd, unsigned len, unsigned spec, const char *ptr)
 {
-	unsigned u,v,w;
+	const char *p;
+	char *q;
+	FILE *fo;
+	time_t t;
+	long l;
+	struct tm tm;
+	char tbuf[40];
+	struct logline *lp;
 
-	// Used for getting IP.
-	unsigned char *tmpPtr;
-	int j;
+	if (!(spec &VSL_S_CLIENT))
+		return (0);
 
-	// Used for requesttime.
-	char *tmpPtra;
-
-	u = (p[2] << 8) | p[3];
-	if (ob[u] == NULL) {
-		ob[u] = vsb_new(NULL, NULL, 0, VSB_AUTOEXTEND);
-		assert(ob[u] != NULL);
+	if (ll[fd] == NULL) {
+		ll[fd] = calloc(sizeof *ll[fd], 1);
+		assert(ll[fd] != NULL);
+		ll[fd]->sb = vsb_new(NULL, NULL, 0, VSB_AUTOEXTEND);
+		assert(ll[fd]->sb != NULL);
 	}
-	
-	v = 0;
-	w = 0;
-	j = 0;
-	//ll[u].df_rfini = 0;
-	//ll[u].df_hfini = 0;
+	lp = ll[fd];
 
-	switch (p[0]) {
+	switch (tag) {
 
 	case SLT_SessionOpen:
-
-		// We catch the IP adress of the session.
-		// We also catch IP in SessionReuse because we can not always
-		// be sure we see a SessionOpen when we start logging.
-		
-		//printf("IP %s\n", p+4);
-		ll[u].df_h[0] = '\0';
-		tmpPtr = strchr(p + 4, ' ');
-		if (tmpPtr != NULL){
-
-			j = strlen(p + 4) - strlen(tmpPtr);                // length of IP
-			strncpy(ll[u].df_h, p + 4, j);
-			ll[u].df_h[j] = '\0'; // put on a NULL at end of buffer.
-		}
-		//printf("New session [%d]: %s \n",u, ll[u].df_h);
-
+		for (p = ptr, q = lp->df_h; *p && *p != ' ';)
+			*q++ = *p++;
+		*q = '\0';
 		break;
 
 	case SLT_ReqStart:
-
-		// We use XID to catch that a new request is comming inn.
-
+		vsb_clear(lp->sb);
 		break;
 
 	case SLT_RxRequest:
-
-		vsb_clear(ob[u]);
-
-		if (p[1] >= 4 && !strncasecmp((void *)&p[4], "HEAD",4)){
-			vsb_bcat(ob[u], p + 4, strlen(p + 4));
-			//printf("Got a HEAD\n");
+		if (ispfx(ptr, len, "HEAD")) {
+			vsb_bcat(lp->sb, ptr, len);
+		} else if (ispfx(ptr, len, "POST")) {
+			vsb_bcat(lp->sb, ptr, len);
+		} else if (ispfx(ptr, len, "GET")) {
+			vsb_bcat(lp->sb, ptr, len);
+		} else {
+			lp->bogus_req = 1;
 		}
+		break;
 	
-		else if (p[1] >= 4 && !strncasecmp((void *)&p[4], "POST",4)){
-			vsb_bcat(ob[u], p + 4, strlen(p + 4));
-			//printf("Got a POST\n");
-		}
-		
-		else if (p[1] >= 3 && !strncasecmp((void *)&p[4], "GET",3)){
-			vsb_bcat(ob[u], p + 4, strlen(p + 4));
-			//printf("Got a GET\n");
-		}
-		
-		else {
-			//vsb_bcat(ob[u], p + 4, strlen(p + 4));
-			//printf("Got something other than HEAD, POST, GET\n");
-			ll[u].bogus_req = 1;
-		}
-	break;
-	
 	case SLT_RxURL:
-
-		vsb_cat(ob[u], " ");
-		vsb_bcat(ob[u], p + 4, strlen(p + 4));
-
+		vsb_cat(lp->sb, " ");
+		vsb_bcat(lp->sb, ptr, len);
 		break;
 
 	case SLT_RxProtocol:
-		
-		vsb_cat(ob[u], " ");
-                vsb_bcat(ob[u], p + 4, strlen(p + 4));
-
+		vsb_cat(lp->sb, " ");
+		vsb_bcat(lp->sb, ptr, len);
 		break;
 
 	case SLT_TxStatus:
-		
-		ll[u].df_s = strdup(p + 4);
-		ll[u].df_sfini = 1;
-
+		strcpy(lp->df_s, ptr);
 		break;
 
 	case SLT_RxHeader:
-		if (p[1] >= 11 && !strncasecmp((void *)&p[4], "user-agent:",11)){
-                        ll[u].df_U = strdup(p + 4);
-			// Jump ahead past "User-Agent: "
-			ll[u].df_U = ll[u].df_U + 12;
-			ll[u].df_Ufini = 1;
-                }
-                if (p[1] >= 8 && !strncasecmp((void *)&p[4], "referer:",8)){
-                        ll[u].df_R = strdup(p + 4);
-			ll[u].df_R = ll[u].df_R + 9;
-			ll[u].df_Rfini = 1;
-                }
-
+		if (ispfx(ptr, len, "user-agent:"))
+                        lp->df_U = strdup(ptr + 12);
+                else if (ispfx(ptr, len, "referer:"))
+                        lp->df_R = strdup(ptr + 9);
 		break;
 
-	case SLT_ReqEnd:
-
-		// We use ReqServTime to find how the time the request was delivered
-		// also to define that a request is finished.
-		tmpPtra =  strdup(p + 4);
-		ll[u].logline_time = make_timestruct(tmpPtra);
-		free(tmpPtra);
-		ll[u].df_rfini = 1;
-		//printf("ReqServTime %s\n", temp_time);
-
-		break;
-
 	case SLT_Length:
-
-		// XXX ask DES or PHK about this one. Am I overflowing?
-
-		ll[u].df_b = strdup(p + 4);
-		ll[u].df_bfini = 1;
-                if (!atoi(ll[u].df_b)){
-			ll[u].df_b = malloc(2);
-	                ll[u].df_b[0] = '-';
-        	        ll[u].df_b[1] = '\0';
-                }
-
+		if (strcmp(ptr, "0"))
+			strcpy(lp->df_b, ptr);
+		else
+			strcpy(lp->df_b, "-");
 		break;
 
-	case SLT_SessionClose:
-
-		// Session is closed, we clean up things. But do not write.
-
-		//printf("Session close [%d]\n", u);
-		
-		ll[u].df_hfini = 1;
-
-		break;
-
-	case SLT_SessionReuse:
-
-		// We use SessionReuse to catch the IP adress of a session that has already
-		// started with a SessionOpen that we did not catch.
-		// Other than that it is not used.
-
-		// Catch IP if not already done.
-		
-		if (ll[u].df_h[0] == '\0'){
-			// We don't have IP, fetch it.
-			//
-			
-			tmpPtr = strchr(p + 4, ' ');
-			if (tmpPtr != NULL){
-				j = strlen(p + 4) - strlen(tmpPtr);                // length of IP
-				strncpy(ll[u].df_h, p + 4, j);
-				ll[u].df_h[j] = '\0'; // put on a NULL at end of buffer.
-			}
-			//printf("Got IP from Reuse [%d] : %s\n", u, ll[u].df_h);
-		}
-
-		//printf("Session reuse [%d]\n", u);
-
-		break;
-
 	default:
-
-		// printf("DEBUG: %s\n", p+4);
-
 		break;
 	}
+	if (tag != SLT_ReqEnd)
+		return (0);
 
-
-	if (ll[u].df_rfini) {
-		// We have a ReqServTime. Lets print the logline
-		// and clear variables that are different for each request.
-		//
-
-		char temp_time[27]; // Where we store the string we take from the log
-
-		// make temp_time
-		strftime (temp_time, 29, "[%d/%b/%Y:%X %z] ", ll[u].logline_time);
-
-		if (ll[u].df_h[0] == '\0' || ll[u].bogus_req){
-			ll[u].bogus_req = 0;
-			//printf("Tom IP \n");		
-		}
-		else{	
-			printf("%s - - %s ", ll[u].df_h, temp_time );
-			vsb_finish(ob[u]);
-			printf("\"%s\"", vsb_data(ob[u]));
-			printf(" %s %s ", ll[u].df_s, ll[u].df_b);
-			if (ll[u].df_Rfini){
-				printf(" \"%s\" ", ll[u].df_R);
-			}
-			else {
-				printf(" \"-\" ");
-			}
-
-			if (ll[u].df_Ufini){
-				printf(" \"%s\" ", ll[u].df_U);
-			}
-			else {
-				printf(" \"-\" ");
-			}
-			printf("\n");
-		}
-		fflush(stdout);
-
-		//vsb_finish(ob[u]);
-               	vsb_clear(ob[u]);
-		temp_time[0] = '\0';
-
-
-		ll[u].df_rfini = 0;
-
-
-		// Clear the TxStaus
-
-		if (ll[u].df_sfini){
-			free(ll[u].df_s);
-			ll[u].df_sfini = 0;
-			//printf("Freed df_s [%d]\n", u);
-		}
-
-		if (ll[u].df_bfini){
-			free(ll[u].df_b);
-			ll[u].df_bfini = 0;
-			//printf("Freed df_b [%d]\n", u);
-		}
-
-		// Clean User-Agent and Referer
-		if (ll[u].df_Ufini){
-			ll[u].df_U = ll[u].df_U - 12;
-			free(ll[u].df_U);
-			ll[u].df_Ufini = 0;
-			ll[u].df_U[0] = '\0';
-			//printf("Freed df_U [%d]\n", u);
-		}
-		
-		if (ll[u].df_Rfini){
-			ll[u].df_R = ll[u].df_R - 9;
-			free(ll[u].df_R);
-			ll[u].df_R[0] = '\0';
-			ll[u].df_Rfini = 0;
-			//printf("Freed df_R [%d]\n", u);
-		}
-
-		// Clean up ReqEnd/Time variables
-		
-		//if (tmpPtra != NULL){
-		//	free(tmpPtra);
-			//printf("Freed tmpPtra [%d]\n", u);
-		//}
-
-			/*
-		if (tmpPtrb != NULL){
-			free(tmpPtrb);
-			//printf("Freed tmpPtrb [%d]\n", u);
-		}
-		if (tmpPtrc != NULL){
-			free(tmpPtrc);
-			//printf("Freed tmpPtrc [%d]\n", u);
-		}
-		*/
-		temp_time[0] = '\0';	
-		
-
-		if (ll[u].df_hfini) {
-			// We have a SessionClose. Lets clean data.
-			//
-			// Clean IP adress
-			ll[u].df_h[0] = '\0';
-			//printf("Clearer [%d]\n", u);
-			ll[u].df_hfini = 0;
-										
-		}
-
-
-
-
+	fo = priv;
+	assert(1 == sscanf(ptr, "%*u %*u.%*u %ld.", &l));
+	t = l;
+	localtime_r(&t, &tm);
+	strftime(tbuf, sizeof tbuf, "%d/%b/%Y:%T %z", &tm);
+	fprintf(fo, "%s - - %s", lp->df_h, tbuf);
+	vsb_finish(lp->sb);
+	fprintf(fo, " \"%s\"", vsb_data(lp->sb));
+	fprintf(fo, " %s", lp->df_b);
+	if (lp->df_R != NULL) {
+		fprintf(fo, " \"%s\"", lp->df_R);
+		free(lp->df_R);
+		lp->df_R = NULL;
 	}
-	
-	
+	if (lp->df_U != NULL) {
+		fprintf(fo, " \"%s\"", lp->df_U);
+		free(lp->df_U);
+		lp->df_U = NULL;
+	}
+	fprintf(fo, "\n");
+
+	return (0);
 }
 
 /*--------------------------------------------------------------------*/
@@ -421,15 +169,11 @@
 main(int argc, char **argv)
 {
 	int i, c;
-	unsigned u, v;
-	unsigned char *p;
-	char *w_opt = NULL;
-	FILE *wfile = NULL;
 	struct VSL_data *vd;
 
 	vd = VSL_New();
 	
-	while ((c = getopt(argc, argv, VSL_ARGS "w:")) != -1) {
+	while ((c = getopt(argc, argv, VSL_ARGS "")) != -1) {
 		i = VSL_Arg(vd, c, optarg);
 		if (i < 0)
 			exit (1);
@@ -439,9 +183,6 @@
 		case 'V':
 			varnish_version("varnishncsa");
 			exit(0);
-		case 'w':
-			w_opt = optarg;
-			break;
 		default:
 			usage();
 		}
@@ -450,36 +191,11 @@
 	if (VSL_OpenLog(vd))
 		exit (1);
 
-	if (w_opt != NULL) {
-		wfile = fopen(w_opt, "w");
-		if (wfile == NULL) {
-			perror(w_opt);
-			exit (1);
-		}
-	}
-	u = 0;
-	v = 0;
-
 	while (1) {
-		i = VSL_NextLog(vd, &p);
+		i = VSL_Dispatch(vd, extended_log_format, stdout);
 		if (i < 0)
 			break;
-		if (i == 0) {
-			if (w_opt == NULL) {
-				if (++v == 100){
-					clean_order();
-					fflush(stdout);
-				}
-			} 
-			usleep(50000);
-			continue;
-		}
-		v = 0;
-		
-		extended_log_format(p);
 	}
-	
-	clean_order();
 	return (0);
 }
 




More information about the varnish-commit mailing list