Debugging restarts/crashes?

Cloude Porteus cloude at instructables.com
Thu May 14 18:31:45 CEST 2009


Sure.
~~~default.vcl~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

NFILES=131072
MEMLOCK=90000
DAEMON_COREFILE_LIMIT="unlimited"
VARNISH_VCL_CONF=/etc/varnish/instructables.vcl
VARNISH_LISTEN_ADDRESS=
VARNISH_LISTEN_PORT=80
VARNISH_ADMIN_LISTEN_ADDRESS=127.0.0.1
VARNISH_ADMIN_LISTEN_PORT=82
VARNISH_MIN_THREADS=400
VARNISH_MAX_THREADS=4000
VARNISH_THREAD_TIMEOUT=60
VARNISH_STORAGE_FILE=/var/lib/varnish/mmap
VARNISH_STORAGE_SIZE=50G
VARNISH_STORAGE="file,${VARNISH_STORAGE_FILE},${VARNISH_STORAGE_SIZE}"
VARNISH_TTL=1800

DAEMON_OPTS="-a ${VARNISH_LISTEN_ADDRESS}:${VARNISH_LISTEN_PORT} \
             -f ${VARNISH_VCL_CONF} \
             -T ${VARNISH_ADMIN_LISTEN_ADDRESS}:${VARNISH_ADMIN_LISTEN_PORT}
\
             -t ${VARNISH_TTL} \
             -w
${VARNISH_MIN_THREADS},${VARNISH_MAX_THREADS},${VARNISH_THREAD_TIMEOUT} \
             -u varnish -g varnish \
             -s ${VARNISH_STORAGE} \
             -p obj_workspace=4096 \
             -p sess_workspace=262144 \
             -p lru_interval=3600 \
             -p listen_depth=8192 \
             -p log_hashstring=off \
             -p sess_timeout=10 \
             -p shm_workspace=32768 \
             -p ping_interval=1 \
             -p thread_pools=4 \
             -p thread_pool_min=100 \
             -p thread_pool_max=4000 \
             -p srcaddr_ttl=0 \
             -p esi_syntax=1 "

~~~instructables.vcl~~~~~~~~~~~~~~~~~~~
C{
#include <stdio.h>
#include <unistd.h>

char myhostname[255] = "";
}C


acl purge {
        "localhost";         /* myself */
        "backup";            /* purge requests will always come from backup
*/
}

backend default {
        /*
         * Default backend definition.  Set this to point to your content
         * server.
         */
         .host = "10.0.0.1"; /* put your back end server here! */
        .port = "8080";
}

sub vcl_recv {
    C{
    if (myhostname[0] == '\0') {
      /* only get hostname once - restart required if hostname changes */
      gethostname(myhostname, 255);
    }
    }C
        /* Do not cache if request is not GET or HEAD */
        if (req.request != "GET" && req.request != "HEAD") {
                /* Forward to 'lookup' if request is an authorized PURGE
request */
                if (req.request == "PURGE") {
                        if (!client.ip ~ purge) {
                                error 405 "Not allowed.";
                        }
                        lookup;
                }
                set req.http.connection = "close";
                pipe;
        }

        /* Do not cache if request contains an Expect header */
        if (req.http.Expect) {
                set req.http.connection = "close";
                pipe;
        }

        # normalize Accept-Encoding to reduce vary
        if (req.http.Accept-Encoding) {
         if (req.http.Accept-Encoding ~ "gzip") {
           set req.http.Accept-Encoding = "gzip";
         } elsif (req.http.Accept-Encoding ~ "deflate") {
           set req.http.Accept-Encoding = "deflate";
         } else {
         unset req.http.Accept-Encoding;
         }
        }


        if (req.url ~ "^/(you|edit|account)") {
                pass;
        }

        /* Enable grace period (varnish serves stale (but cacheable) objects
while retriving
         * object from backend)
         */
        set req.grace = 30s;

        if (req.request == "GET" && req.http.cookie) {
                lookup;
        }
}

sub vcl_fetch {
        set obj.http.X-Cache = "MISS";

        /*if (!obj.valid) {
                error;
        }*/
        if (!obj.cacheable) {
                pass;
        }
        if (obj.http.Set-Cookie) {
                pass;
        }

        if(obj.status >= 500 && obj.status < 600) {
                pass;
        }
        /* Do not cache if response contains any 'no cache' tokens
        */
        if (obj.http.Cache-Control ~ "(private|no-cache|no-store)") {
                pass;
        }

         /*
         * object from backend)
         */
        set obj.grace = 30s;

        /* you might want to change this, I have a default TTL of 10 min
here. */
        if (obj.ttl < 1s) {
                set obj.ttl = 1711s;
        }

        deliver;
}

sub vcl_pipe {
        pipe;
}

sub vcl_pass {
        pass;
}

sub vcl_hash {
        set req.hash += req.url;
        set req.hash += req.http.host;
        hash;
}

sub vcl_hit {

        if (req.request == "PURGE") {
                purge_url(req.url);
                error 200 "Purged";
        }
        if (!obj.cacheable) {
                pass;
        }


        deliver;
}

sub vcl_miss {

        /* Varnish doesn't do IMS to backend, so if not in cache just pass
it through */
        if (req.http.If-Modified-Since) {
                pass;
        }

        if (req.request == "PURGE") {
                error 404 "Not in cache";
        }

        fetch;
}

sub vcl_hit {
        if (req.request == "PURGE") {
                purge_url(req.url);
                error 200 "Purged";
        }
        if (!obj.cacheable) {
                pass;
        }
        deliver;
}

sub vcl_timeout {
        discard;
}

sub vcl_discard {
        discard;
}

sub vcl_deliver {

     C{
     VRT_SetHdr(sp, HDR_RESP, "\014X-Cache-Svr:", myhostname,
vrt_magic_string_end);
     }C

     /* mark hit/miss on the request */
     if (obj.hits > 0) {
       set resp.http.X-Cache = "HIT";
       set resp.http.X-Cache-Hits = obj.hits;
     } else {
       set resp.http.X-Cache = "MISS";
     }
        deliver;
}

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

On Thu, May 14, 2009 at 9:28 AM, Artur Bergman <sky at crucially.net> wrote:

> Can you share your VCL?
> Artur
>
> On May 14, 2009, at 9:26 AM, Cloude Porteus wrote:
>
> I was able to find this entry in /var/log/messages, but it's hard to tell
> if it's going to be helpful or not for debugging. I can't seem to find a
> daemon.log in /var/log.
> May 13 13:06:48 squid03 varnishd[14993]: Child (14994) not responding to
> ping, killing it.
> May 13 13:06:48 squid03 varnishd[14993]: Child (14994) died signal=6
> May 13 13:06:48 squid03 varnishd[14993]: Child (14994) Panic message:
> Assert error in Tcheck(), cache.h line 648:   Condition((t.e) != 0) not
> true.  thread = (cache-worker)sp = 0x2ab733984008 {   fd = 19, id = 19, xid
> = 2173533104,   client = 10.0.0.1:58322,   step = STP_FETCH,   handling =
> fetch,   ws = 0x2ab733984078 {      id = "sess",     {s,f,r,e} =
> {0x2ab733984808,,+1162,(nil),+262144},   },     worker = 0x2ab7b4a46bd0 {
>   },     vcl = {       srcname = {         "input",         "Default",
> },     },   obj = 0x2aaddddaf000 {     refcnt = 1, xid = 2173533104,     ws
> = 0x2aaddddaf028 {        id = "obj",       {s,f,r,e} =
> {0x2aaddddaf358,,0x2aaddddaf358,(nil),+3240},     },     http = {       ws =
> 0x2aaddddaf028 {          id = "obj",         {s,f,r,e} =
> {0x2aaddddaf358,,0x2aaddddaf358,(nil),+3240},       },     },     len = 0,
>   store = {     },   }, },
> May 13 13:06:48 squid03 varnishd[14993]: child (32738) Started
> May 13 13:06:48 squid03 varnishd[14993]: Child (32738) said Closed fds: 4 5
> 6 10 11 13 14
> May 13 13:06:48 squid03 varnishd[14993]: Child (32738) said Child starts
> May 13 13:06:48 squid03 varnishd[14993]: Child (32738) said managed to mmap
> 53687091200 bytes of 53687091200
> May 13 13:06:48 squid03 varnishd[14993]: Child (32738) said Ready
> May 13 13:06:50 squid03 varnishd[14993]: Child (32738) died signal=6
> May 13 13:06:50 squid03 varnishd[14993]: Child (32738) Panic message:
> Assert error in Tcheck(), cache.h line 648:   Condition((t.e) != 0) not
> true.  thread = (cache-worker)sp = 0x2ab736402008 {   fd = 267, id = 267,
> xid = 1961344622,   client = 10.0.0.1:59424,   step = STP_FETCH,
> handling = fetch,   ws = 0x2ab736402078 {      id = "sess",     {s,f,r,e} =
> {0x2ab736402808,,+1162,(nil),+262144},   },     worker = 0x2ab7432ffbd0 {
>   },     vcl = {       srcname = {         "input",         "Default",
> },     },   obj = 0x2aaab0ce0000 {     refcnt = 1, xid = 1961344622,     ws
> = 0x2aaab0ce0028 {        id = "obj",       {s,f,r,e} =
> {0x2aaab0ce0358,,0x2aaab0ce0358,(nil),+3240},     },     http = {       ws =
> 0x2aaab0ce0028 {          id = "obj",         {s,f,r,e} =
> {0x2aaab0ce0358,,0x2aaab0ce0358,(nil),+3240},       },     },     len = 0,
>   store = {     },   }, },
> May 13 13:06:50 squid03 varnishd[14993]: child (390) Started
> May 13 13:06:50 squid03 varnishd[14993]: Child (390) said Closed fds: 4 5 6
> 10 11 13 14
> May 13 13:06:50 squid03 varnishd[14993]: Child (390) said Child starts
> May 13 13:06:50 squid03 varnishd[14993]: Child (390) said managed to mmap
> 53687091200 bytes of 53687091200
> May 13 13:06:50 squid03 varnishd[14993]: Child (390) said Ready
>
>
> On Wed, May 13, 2009 at 5:57 PM, John Adams <jna at twitter.com> wrote:
>
>> Go look in /var/log/daemon.log, and find out why the children are dying.
>> If you're getting SEGVs, that's going to be listed in your syslog.
>> A big reason for crashing is running out of session workspace
>> (sess_workspace). Find out if that's happening, and if it is, increase it.
>> (say, to 512k)
>>
>>
>> -j
>>
>> On May 13, 2009, at 5:17 PM, Cloude Porteus wrote:
>>
>> Varnish seems to restart itself every so often, usually after a few days.
>> We finally installed some SSD's, which made all the difference and our
>> system load came down to the .02 - .2 range.
>>
>> I don't currently have any logging turned on. If I did, would it have any
>> useful information for me?
>>
>> We typically don't have more that 700k objects cached. I also never see
>> the worker threads go above 400. We are running on CentOS 5.
>>
>> Our config:
>>
>> NFILES=131072
>> MEMLOCK=90000
>> DAEMON_COREFILE_LIMIT="unlimited"
>> VARNISH_VCL_CONF=/etc/varnish/instructables.vcl
>> VARNISH_LISTEN_ADDRESS=
>> VARNISH_LISTEN_PORT=80
>> VARNISH_ADMIN_LISTEN_ADDRESS=127.0.0.1
>> VARNISH_ADMIN_LISTEN_PORT=82
>> VARNISH_MIN_THREADS=400
>> VARNISH_MAX_THREADS=4000
>> VARNISH_THREAD_TIMEOUT=60
>> VARNISH_STORAGE_FILE=/var/lib/varnish/mmap
>> VARNISH_STORAGE_SIZE=50G
>> VARNISH_STORAGE="file,${VARNISH_STORAGE_FILE},${VARNISH_STORAGE_SIZE}"
>> VARNISH_TTL=1800
>>
>> DAEMON_OPTS="-a ${VARNISH_LISTEN_ADDRESS}:${VARNISH_LISTEN_PORT} \
>>              -f ${VARNISH_VCL_CONF} \
>>              -T
>> ${VARNISH_ADMIN_LISTEN_ADDRESS}:${VARNISH_ADMIN_LISTEN_PORT} \
>>              -t ${VARNISH_TTL} \
>>              -w
>> ${VARNISH_MIN_THREADS},${VARNISH_MAX_THREADS},${VARNISH_THREAD_TIMEOUT} \
>>              -u varnish -g varnish \
>>              -s ${VARNISH_STORAGE} \
>>              -p obj_workspace=4096 \
>>              -p sess_workspace=262144 \
>>              -p lru_interval=3600 \
>>              -p listen_depth=8192 \
>>              -p log_hashstring=off \
>>              -p sess_timeout=10 \
>>              -p shm_workspace=32768 \
>>              -p ping_interval=1 \
>>              -p thread_pools=4 \
>>              -p thread_pool_min=100 \
>>              -p thread_pool_max=4000 \
>>              -p srcaddr_ttl=0 \
>>              -p esi_syntax=1 "
>>
>> I don't think out load is all that heavy and we have a high hitrate ratio.
>> Our varnishstat output.
>>
>> 0+11:02:14
>>
>> Hitrate ratio:       10      100      133
>> Hitrate avg:     0.9875   0.9833   0.9832
>>
>>     15370527       325.94       386.84 Client connections accepted
>>     15370517       325.94       386.84 Client requests received
>>     14604988       320.94       367.57 Cache hits
>>       182795         4.00         4.60 Cache hits for pass
>>       581426         1.00        14.63 Cache misses
>>       765513         5.00        19.27 Backend connections success
>>            1         0.00         0.00 Backend connections failures
>>          119          .            .   N struct sess_mem
>>           10          .            .   N struct sess
>>       447866          .            .   N struct object
>>       440545          .            .   N struct objecthead
>>       913238          .            .   N struct smf
>>         4490          .            .   N small free smf
>>            2          .            .   N large free smf
>>            0          .            .   N struct vbe_conn
>>          112          .            .   N struct bereq
>>          400          .            .   N worker threads
>>          400         0.00         0.01 N worker threads created
>>       313048         0.00         7.88 N worker threads limited
>>            1          .            .   N backends
>>       126326          .            .   N expired objects
>>      1250255          .            .   N LRU moved objects
>>           18         0.00         0.00 HTTP header overflows
>>     13645614       292.95       343.42 Objects sent with write
>>     15370518       327.94       386.84 Total Sessions
>>     15370510       327.94       386.84 Total Requests
>>            7         0.00         0.00 Total pipe
>>       191394         5.00         4.82 Total pass
>>       765481         9.00        19.27 Total fetch
>>   7252376430    156332.93    182523.19 Total header bytes
>>  74103505138    772233.39   1864989.81 Total body bytes
>>     15370160       327.94       386.83 Session Closed
>>          365         0.00         0.01 Session herd
>>    758091326     15625.09     19079.16 SHM records
>>     62365137      1317.75      1569.57 SHM writes
>>          468         0.00         0.01 SHM flushes due to overflow
>>          589         0.00         0.01 SHM MTX contention
>>          307         0.00         0.01 SHM cycles through buffer
>>      1573886        15.00        39.61 allocator requests
>>       908746          .            .   outstanding allocations
>>   7766196224          .            .   bytes allocated
>>  45920894976          .            .   bytes free
>>
>>
>> --
>> VP of Product Development
>> Instructables.com
>>
>> http://www.instructables.com/member/lebowski
>> _______________________________________________
>> varnish-dev mailing list
>> varnish-dev at projects.linpro.no
>> http://projects.linpro.no/mailman/listinfo/varnish-dev
>>
>>
>>  ---
>> John Adams
>> Twitter Operations
>> jna at twitter.com
>> http://twitter.com/netik
>>
>>
>>
>>
>>
>
>
> --
> VP of Product Development
> Instructables.com
>
> http://www.instructables.com/member/lebowski
>  _______________________________________________
> varnish-dev mailing list
> varnish-dev at projects.linpro.no
> http://projects.linpro.no/mailman/listinfo/varnish-dev
>
>
>


-- 
VP of Product Development
Instructables.com

http://www.instructables.com/member/lebowski
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://www.varnish-cache.org/lists/pipermail/varnish-dev/attachments/20090514/a8cbe230/attachment-0001.html>


More information about the varnish-dev mailing list