Bug : Assert error in exp_timer() | Child not responding to ping, killing it.

Benjamin Sonntag benjamin at octopuce.fr
Fri Dec 12 03:25:43 CET 2008


Hi all,

(first of all, I'll be glad to obtain a login/pass on the trac so that I
may create a ticket for this one if it became a real bug ;) and help
varnish community)

I guess I found a bug :) So please find below the informations I was
able to gather to start working on this issue :

We are using varnish 2.0.2 (debian package version from lenny) on this
machine (from dell) :
Linux cache1b 2.6.25-2-amd64 #1 SMP Fri Jun 27 14:47:16 UTC 2008 x86_64
GNU/Linux
- 2 physical processors (8 pipelines total) : Intel(R) Xeon(R)
CPU           X5460  @ 3.16GHz
- 8* 4GB FB-DIMM (total 32GB RAM) (yes, I like this machine ;) )

I found a previous similar error last July in the mailing list, but
didn't know how to solve it :
we put those parameters high enough (I guess) :
cli_timeout                40 [seconds]
ping_interval              12 [seconds]

The main question is : how can I create a backtrace, how can I obtain a
core dump to create this backtrace ?

Thanks for your help, I will, of course, do my best to find a solution
for this issue.

Regards,

Benjamin Sonntag





Here is what Syslog said (the most important I guess) :

Dec 11 20:17:01 cache1b /USR/SBIN/CRON[30655]: (root) CMD (   cd / &&
run-parts --report /etc/cron.hourly)
Dec 11 20:17:09 cache1b varnishd[63966]: Child (42470) not responding to
ping, killing it.
 Child (42470) died signal=6
 Child (42470) Panic message: Assert error in exp_timer(),
cache_expire.c line 303:   Condition(oe2->timer_when >= oe->timer_when)
not true.  thread = (cache-timeout)
 Child cleanup complete
 child (30657) Started
 Child (30657) said Closed fds: 4 5 6 10 11 13 14
 Child (30657) said Child starts
 Child (30657) said managed to mmap 68719476736 bytes of 68719476736
 Child (30657) said Ready
 Child (30657) said Probe("GET /search/C=?definition=homepage HTTP/1.1^M
 Child (30657) said Host: 192.168.131.101^M
 Child (30657) said Connection: close^M
 Child (30657) said ^M
 Child (30657) said ", 4, 1)
 Child (30657) said Probe("GET /search/C=?definition=homepage HTTP/1.1^M
 Child (30657) said Host: 192.168.131.102^M
 Child (30657) said Connection: close^M
 Child (30657) said ^M
 Child (30657) said ", 4, 1)
 Child (30657) said Probe("GET /search/C=?definition=homepage HTTP/1.1^M
 Child (30657) said Host: 192.168.131.107^M
 Child (30657) said Connection: close^M
 Child (30657) said ^M
 Child (30657) said ", 4, 1)
Dec 11 20:20:01 cache1b /USR/SBIN/CRON[31317]: (root) CMD (if [ -x
/etc/munin...



Here is the varnishlog extract (my server is quite busy, I had a hard
time finding this place, so awk|sort|uniq|grep was my friends ;) )


 1137 StatSess     c 192.168.131.8 43747 0 1 1 0 0 0 233 5
    0 StatAddr     - 192.168.131.8 0 285468 2281231 2281230 0 0 1873552
489132873 83107957357
 1139 ReqStart     c 192.168.131.7 51390 2047115919
 1139 RxRequest    c GET
 1139 RxURL        c /confidential/url/blabla&purge=1
 1139 RxProtocol   c HTTP/1.1
 1139 RxHeader     c Host: varnish:30000
 1139 RxHeader     c Accept: */*
 1139 VCL_call     c recv
 1139 VCL_return   c lookup
 1139 VCL_call     c hash
 1139 VCL_return   c hash
 1139 VCL_call     c miss
 1139 VCL_return   c fetch
 1137 BackendOpen  b be1b 192.168.131.30 35113 192.168.131.101 30000
 1139 Backend      c 1137 lb3 be1b
 1137 TxRequest    b GET
 1137 TxURL        b /confidential/url/blabla&purge=1
 1137 TxProtocol   b HTTP/1.1
 1137 TxHeader     b Host: varnish:30000
 1137 TxHeader     b Accept: */*
 1137 TxHeader     b X-Varnish: 2047115919
 1137 TxHeader     b X-Forwarded-For: 192.168.131.7
 1157 SessionClose c remote closed
    0 WorkThread   - 0x43033cb0 start
    0 CLI          - Rd vcl.load boot ./vcl.1P9zoqAU.so
    0 CLI          - Wr 0 200 Loaded "./vcl.1P9zoqAU.so" as "boot"
    0 CLI          - Rd vcl.load test ./vcl.FANefPfn.so
    0 WorkThread   - 0x45037cb0 start
    0 Backend_health - be2b Still sick 4--X-S-RH 1 3 8 0.021917 0.021917
HTTP/1.1 200 OK
    0 Backend_health - be1b Still sick 4--X-S--- 0 3 8 0.000000 0.000000
    0 Backend_health - be3b Still sick 4--X-S--- 0 3 8 0.000000 0.000000
    0 WorkThread   - 0x40c85cb0 start
    0 CLI          - Wr 0 200 Loaded "./vcl.FANefPfn.so" as "test"
    0 CLI          - Rd vcl.use test
    0 CLI          - Wr 0 200
    0 CLI          - Rd start
    0 Debug        - "Acceptor is epoll"
    0 CLI          - Wr 0 200
    0 WorkThread   - 0x4683acb0 start
    0 WorkThread   - 0x4703bcb0 start
    0 WorkThread   - 0x4783ccb0 start
    0 WorkThread   - 0x4803dcb0 start
   11 SessionOpen  c 192.168.131.9 49159 :30000
   11 ReqStart     c 192.168.131.9 49159 705356244
   11 RxRequest    c GET
   11 RxURL        c /confidential/url/blabla&ttl=120
   11 RxProtocol   c HTTP/1.1
   11 RxHeader     c Host: varnish:30000
   11 RxHeader     c Accept: */*
   11 VCL_call     c recv
   11 VCL_return   c lookup
   11 VCL_call     c hash
   11 VCL_return   c hash
   11 VCL_call     c miss
   11 VCL_return   c fetch
   11 VCL_call     c error
   11 VCL_return   c deliver
   11 Length       c 5
   11 VCL_call     c deliver
   11 VCL_return   c deliver
   11 TxProtocol   c HTTP/1.1
   11 TxStatus     c 503
   11 TxResponse   c Service Unavailable
   11 TxHeader     c Server: Varnish
   11 TxHeader     c Retry-After: 0
   11 TxHeader     c Content-Type: text/html; charset=utf-8
   11 TxHeader     c Content-Length: 5
   11 TxHeader     c Date: Thu, 11 Dec 2008 19:17:10 GMT
   11 TxHeader     c X-Varnish: 705356244
   11 TxHeader     c Age: 0
   11 TxHeader     c Via: 1.1 varnish
   11 TxHeader     c Connection: close
 

Then our VCL code (... don't know if it's good or bad syntax, but it
used to work)


backend be1b {
    .host = "192.168.131.101";
    .port = "30000";
    .probe = {
                 .url = "/search/C=?definition=homepage";
                 .timeout = 4s;
                 .interval = 1s;
                 .window = 8;
                 .threshold = 3;
             }
}
backend be2b {
    .host = "192.168.131.102";
    .port = "30000";
    .probe = {
                 .url = "/search/C=?definition=homepage";
                 .timeout = 4s;
                 .interval = 1s;
                 .window = 8;
                 .threshold = 3;
             }
}
backend be3b {
    .host = "192.168.131.107";
    .port = "30000";
    .probe = {
                 .url = "/search/C=?definition=homepage";
                 .timeout = 4s;
                 .interval = 1s;
                 .window = 8;
                 .threshold = 3;
             }
}
director lb3 round-robin {
             {
                 .backend        = be1b;
             }
             {
                 .backend        = be2b;
             }
             {
                 .backend        = be3b;
             }
}

sub vcl_recv {
     set req.backend = lb3;
     if (req.url ~ "&nocache=1") {
        set req.url = regsub(req.url,"&nocache=1","");
    pass;
     }
     lookup;
}

sub vcl_hash {
    set req.hash += req.url;
    hash;
}

sub vcl_fetch {
     if (obj.status != 200 && obj.status != 403 && obj.status != 404) {
            restart;
     }
     if (obj.ttl < 3600s) {
         set obj.ttl = 3600s;
     }
     # Si on a &ttl=nombre, on fixe la ttl a <nombre> heures
     if (req.url ~ "&ttl=") {
    if (req.url ~ "&ttl=001") { set obj.ttl=3600s; }
    if (req.url ~ "&ttl=002") { set obj.ttl=7200s; }
    if (req.url ~ "&ttl=003") { set obj.ttl=10800s; }
    if (req.url ~ "&ttl=006") { set obj.ttl=21600s; }
    if (req.url ~ "&ttl=009") { set obj.ttl=32400s; }
    if (req.url ~ "&ttl=012") { set obj.ttl=43200s; }
    if (req.url ~ "&ttl=015") { set obj.ttl=54000s; }
    if (req.url ~ "&ttl=018") { set obj.ttl=64800s; }
    if (req.url ~ "&ttl=021") { set obj.ttl=75600s; }
    if (req.url ~ "&ttl=024") { set obj.ttl=86400s; }
    if (req.url ~ "&ttl=096") { set obj.ttl=345600s; }
    if (req.url ~ "&ttl=168") { set obj.ttl=604800s; }
    if (req.url ~ "&ttl=672") { set obj.ttl=2419200s; }
        set req.url = regsub(req.url,"&ttl=([0-9]+)","");
     }
     # Si on a &purge=1, on purge l'entree demandee du cache.
     if (req.url ~ "&purge=1") {
        set req.url = regsub(req.url,"&purge=1","");
        purge_url(req.url);
     }
}

sub vcl_error {
    set obj.http.Content-Type = "text/html; charset=utf-8";
    synthetic {"error"};
    deliver;
}





More information about the varnish-dev mailing list