[Varnish] #1235: Frequent Varnish crashes - Size of Varnish cache never grows

Mon Dec 3 18:39:53 CET 2012

#1235: Frequent Varnish crashes - Size of Varnish cache never grows
------------------------+----------------------
 Reporter:  msallen333  |       Type:  defect
   Status:  new         |   Priority:  normal
Milestone:              |  Component:  varnishd
  Version:  3.0.2       |   Severity:  normal
 Keywords:  crash       |
------------------------+----------------------
 *** PROBLEM DESCRIPTION ***

 Varnish 3.0.2-1daemon crashes and restarts at least once per week with
 below output in /var/log/messages. In addition, the
 usage of /varnish filesystem never climbs above 404G.

 I have already disabled "Transparent Hugepages".

 Has anyone else experienced this same problem, and possibly have a
 solution?

 =============================================

 # /var/log/messages
 Dec  2 12:45:37 lx11 varnishd[7568]: Child (7569) not responding to CLI,
 killing it.
 Dec  2 12:45:47 lx11 varnishd[7568]: Child (7569) not responding to CLI,
 killing it.
 Dec  2 12:45:57 lx11 varnishd[7568]: Child (7569) not responding to CLI,
 killing it.
 Dec  2 12:46:06 lx11 varnishd[7568]: Child (7569) not responding to CLI,
 killing it.
 Dec  2 12:46:06 lx11 varnishd[7568]: Child (7569) not responding to CLI,
 killing it.
 Dec  2 12:46:06 lx11 varnishd[7568]: Child (7569) died signal=3 (core
 dumped)
 Dec  2 12:46:06 lx11 varnishd[7568]: child (9172) Started
 Dec  2 12:46:06 lx11 varnishd[7568]: Child (9172) said Child starts
 Dec  2 12:46:06 lx11 varnishd[7568]: Child (9172) said SMF.s0 mmap'ed
 1589334294528 bytes of 1589334294528

 # df -h | grep varnish
 /dev/mapper/emcvg1-varnish 2.0T  404G  1.5T  22% /varnish

 # ps -ef | grep arnish
 root      4130     1  0 Nov30 ?        00:00:10 /usr/bin/varnishlog -a -w
 /var/log/varnish/varnish.log -D -P /var/run/varnishlog.pid
 root      4137     1  0 Nov30 ?        00:07:08 /usr/bin/varnishncsa -a -w
 /var/log/varnish/varnishncsa.log -D -P /var/run/varnishncsa.pid
 root      7568     1  0 Nov30 ?        00:00:02 /usr/sbin/varnishd -P
 /var/run/varnish.pid -a :80 -f /etc/varnish/default.vcl -T 127.0.0.1:6082
 -t 120 -w 1,1000,120 -u varnish -g varnish -S /etc/varnish/secret -s
 file,/varnish/varnish_storage.bin,98%
 varnish   9172  7568 13 Dec02 ?        02:48:45 /usr/sbin/varnishd -P
 /var/run/varnish.pid -a :80 -f /etc/varnish/default.vcl -T 127.0.0.1:6082
 -t 120 -w 1,1000,120 -u varnish -g varnish -S /etc/varnish/secret -s
 file,/varnish/varnish_storage.bin,98%

 # cat /sys/kernel/mm/redhat_transparent_hugepage/enabled
 always [never]

 Which varnish version ?

 # rpm -qa | grep varnish
 varnish-libs-3.0.2-1.el5.x86_64
 varnish-3.0.2-1.el5.x86_64
 varnish-release-3.0-1.noarch

 Which type of CPU ?

 # more /proc/cpuinfo
 processor       : 0
 vendor_id       : GenuineIntel
 cpu family      : 6
 model           : 29
 model name      : Intel(R) Xeon(R) CPU           E7440  @ 2.40GHz
 stepping        : 1
 cpu MHz         : 2400.080
 cache size      : 16384 KB
 physical id     : 0
 siblings        : 4
 core id         : 0
 cpu cores       : 4
 apicid          : 0
 initial apicid  : 0
 fpu             : yes
 fpu_exception   : yes
 cpuid level     : 11
 wp              : yes
 flags           : fpu vme de pse tsc msr pae mce cx8 apic mtrr pge mca
 cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall lm
 constant_tsc arch_perfmon pebs bts rep_good xtopology aperfmperf pni
 dtes64 monitor ds_cpl vm
 x est tm2 ssse3 cx16 xtpr pdcm dca sse4_1 lahf_lm dts tpr_shadow vnmi
 flexpriority
 bogomips        : 4800.16
 clflush size    : 64
 cache_alignment : 64
 address sizes   : 40 bits physical, 48 bits virtual
 power management:

 32 or 64 bit mode ?

 64bit

 how much RAM ?

 128GB

 # more meminfo
 MemTotal:       132153720 kB
 MemFree:          662828 kB
 Buffers:          334308 kB
 Cached:         124553164 kB
 SwapCached:           28 kB
 Active:         15416940 kB
 Inactive:       114045600 kB
 Active(anon):    4406140 kB
 Inactive(anon):   174144 kB
 Active(file):   11010800 kB
 Inactive(file): 113871456 kB
 Unevictable:        5244 kB
 Mlocked:            5244 kB
 SwapTotal:      16777208 kB
 SwapFree:       16777080 kB
 Dirty:             33320 kB
 Writeback:             0 kB
 AnonPages:       4580508 kB
 Mapped:         68785120 kB
 Shmem:              1784 kB
 Slab:             548132 kB
 SReclaimable:     429880 kB
 SUnreclaim:       118252 kB
 KernelStack:        5504 kB
 PageTables:       158976 kB
 NFS_Unstable:          0 kB
 Bounce:                0 kB
 WritebackTmp:          0 kB
 CommitLimit:    82854068 kB
 Committed_AS:   45740984 kB
 VmallocTotal:   34359738367 kB
 VmallocUsed:      493684 kB
 VmallocChunk:   34359215588 kB
 HardwareCorrupted:     0 kB
 AnonHugePages:   1718272 kB
 HugePages_Total:       0
 HugePages_Free:        0
 HugePages_Rsvd:        0
 HugePages_Surp:        0
 Hugepagesize:       2048 kB
 DirectMap4k:        9456 kB
 DirectMap2M:    134205440 kB

 Which OS/kernel version ?

 # cat /etc/redhat-release
 Red Hat Enterprise Linux Server release 6.2 (Santiago)
 # cat /proc/version
 Linux version 2.6.32-220.4.2.el6.x86_64
 (mockbuild at x86-003.build.bos.redhat.com) (gcc version 4.4.6 20110731 (Red
 Hat 4.4.6-3) (GCC) ) #1 SMP Mon Feb 6 16:39:28 EST 2012

 default VCL or do you have your own ?

 # cat /etc/varnish/default.vcl
 # This is a basic VCL configuration file for varnish.  See the vcl(7)
 # man page for details on VCL syntax and semantics.
 #
 # Default backend definition.  Set this to point to your content
 # server.
 #
 backend default {
   .host = "x.y.z";
   .port = "8080";
   .connect_timeout = 15s;
   .first_byte_timeout = 120s;
   .between_bytes_timeout = 120s;
 }
 #
 # Below is a commented-out copy of the default VCL logic.  If you
 # redefine any of these subroutines, the built-in logic will be
 # appended to your code.
 # sub vcl_recv {
 #     if (req.restarts == 0) {
 #       if (req.http.x-forwarded-for) {
 #           set req.http.X-Forwarded-For =
 #               req.http.X-Forwarded-For + ", " + client.ip;
 #       } else {
 #           set req.http.X-Forwarded-For = client.ip;
 #       }
 #     }
 #     if (req.request != "GET" &&
 #       req.request != "HEAD" &&
 #       req.request != "PUT" &&
 #       req.request != "POST" &&
 #       req.request != "TRACE" &&
 #       req.request != "OPTIONS" &&
 #       req.request != "DELETE") {
 #         /* Non-RFC2616 or CONNECT which is weird. */
 #         return (pipe);
 #     }
 #     if (req.request != "GET" && req.request != "HEAD") {
 #         /* We only deal with GET and HEAD by default */
 #         return (pass);
 #     }
 #     if (req.http.Authorization || req.http.Cookie) {
 #         /* Not cacheable by default */
 #         return (pass);
 #     }
 #     return (lookup);
 # }
 #

 sub vcl_fetch {
     set beresp.grace = 1h;

     if (beresp.http.content-type ~ "(text|application)") {
         set beresp.do_gzip = true;
     }

 }

 sub vcl_recv {
     # unset cookies since we don't want to bypass caching normally
     if (req.http.cookie) {
         unset req.http.cookie;
     }

     set req.grace = 1h;
 }

 sub vcl_deliver {
     if (!resp.http.Vary) {
         set resp.http.Vary = "Accept-Encoding";
     } else if (resp.http.Vary !~ "(?i)Accept-Encoding") {
         set resp.http.Vary = resp.http.Vary + ",Accept-Encoding";
     }
 }

 # sub vcl_pipe {
 #     # Note that only the first request to the backend will have
 #     # X-Forwarded-For set.  If you use X-Forwarded-For and want to
 #     # have it set for all requests, make sure to have:
 #     # set bereq.http.connection = "close";
 #     # here.  It is not set by default as it might break some broken web
 #     # applications, like IIS with NTLM authentication.
 #     return (pipe);
 # }
 #
 # sub vcl_pass {
 #     return (pass);
 # }
 #
 # sub vcl_hash {
 #     hash_data(req.url);
 #     if (req.http.host) {
 #         hash_data(req.http.host);
 #     } else {
 #         hash_data(server.ip);
 #     }
 #     return (hash);
 # }
 #
 # sub vcl_hit {
 #     return (deliver);
 # }
 #
 # sub vcl_miss {
 #     return (fetch);
 # }
 #
 # sub vcl_fetch {
 #     if (beresp.ttl <= 0s ||
 #         beresp.http.Set-Cookie ||
 #         beresp.http.Vary == "*") {
 #               /*
 #                * Mark as "Hit-For-Pass" for the next 2 minutes
 #                */
 #               set beresp.ttl = 120 s;
 #               return (hit_for_pass);
 #     }
 #     return (deliver);
 # }
 #
 # sub vcl_deliver {
 #     return (deliver);
 # }
 #
 sub vcl_error {
     set obj.http.Content-Type = "text/html; charset=utf-8";
     set obj.http.Retry-After = "5";
     synthetic {"
 <?xml version="1.0" encoding="utf-8"?>
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
 <html>
 <head>
   <title>The page is temporarily unavailable</title>
 </head>
 <body>
   <h1>Chronicling America is currently unavailable</h1>
   <p>The Chronicling America website is currently offline, undergoing
 maintenance.  We regret the inconvenience, and invite you to visit other
 collections available on the Library of Congress website at <a
 href="http://www.loc.gov">www.loc.gov</a> while we are working to restore
 service.</p>
  </body>
 </html>
 "};
     return (deliver);
 }
 #
 # sub vcl_init {
 #       return (ok);
 # }
 #
 # sub vcl_fini {
 #       return (ok);
 # }

-- 
Ticket URL: <https://www.varnish-cache.org/trac/ticket/1235>
Varnish <https://varnish-cache.org/>
The Varnish HTTP Accelerator