503 Error was met frequently

Shaohui Zheng shaohui.zheng at gmail.com
Mon Jan 7 19:15:33 CET 2013


Hello,

    After a few days efforts, I did not get my problem fixed, I almost
exhaust every possible methods which I could do, so I am trying to get help
from the community.

I use varnish as web cache and load balancer to manage 3 web nodes, but
recently, I get 503 errors frequently,

My varnish configuration file:
=======================================================
backend nanjing {
     .host = "10.80.125.66";
     .port = "80";
     .connect_timeout = 1800s;
     .first_byte_timeout = 1800s;
     .between_bytes_timeout = 1800s;
.probe = {
.url = "/live.html";
.interval = 1s;
.timeout = 3s;
.window = 10;
.threshold = 2;
  }
 }

backend hangzhou {
     .host = "10.80.125.68";
     #.host = "10.36.146.202";
     .port = "80";
     .connect_timeout = 1800s;
     .first_byte_timeout = 1800s;
     .between_bytes_timeout = 1800s;

.probe = {
.url = "/live.html";
.interval = 1s;
.timeout = 3s;
.window = 10;
.threshold = 2;
  }
 }
backend chongqing {
     .host = "10.80.125.76";
     .port = "80";
     .connect_timeout = 1800s;
     .first_byte_timeout = 1800s;
     .between_bytes_timeout = 1800s;

.probe = {
.url = "/live.html";
.interval = 1s;
.timeout = 3s;
.window = 10;
.threshold = 2;
  }
 }



director proxy random {
        {
                .backend = chongqing;
.weight = 2;
        }
        {
                .backend = nanjing;
.weight = 4;
        }
        {
                .backend = hangzhou;
.weight = 4;
        }
}

acl purge {
        "localhost";
        "10.80.125.0"/24;
}

sub vcl_recv {
     set req.backend = proxy;

        if (req.request != "GET" && req.request != "HEAD") {

                # POST - Logins and edits
                if (req.request == "POST") {
                        return(pass);
                }

                # PURGE - The CacheFu product can invalidate updated URLs
                if (req.request == "PURGE") {
                        if (!client.ip ~ purge) {
                                error 405 "Not allowed.";
                        }
                        return(lookup);
                }
        }

        # Don't cache authenticated requests
        if (req.http.Cookie && req.http.Cookie ~
"__ac(|_(name|password|persistent))=") {

# Force lookup of specific urls unlikely to need protection
if (req.url ~ "\.(js|css)") {
                        remove req.http.cookie;
                        return(lookup);
                }
                return(pass);
        }

        # The default vcl_recv is used from here.
 }

sub vcl_hit {
       # if (req.request == "PURGE") {
       #         purge('');
       #         error 200 "Purged";
       # }
}
sub vcl_miss {
       # if (req.request == "PURGE") {
       #         purge('');
       #         error 200 "Purged";
       # }
}

# Enforce a minimum TTL, since we can PURGE changed objects actively
# from Zope by using the CacheFu product

sub vcl_fetch {
        if (beresp.ttl < 3600s) {
                set beresp.ttl = 3600s;
        }
}


Varnish boots up script
==========================================
varnishd -f /etc/varnish/my.vcl -s malloc,8192M -a $ip:80 \
-T $ip:2048 \
-n vcache-my\
-p thread_pools=2 \
-p thread_pool_max=15000\
-p thread_pool_min=500\
-p listen_depth=2048 \
-p lru_interval=1800 \
-h classic,169313 \
-p connect_timeout=1800 \
-p http_max_hdr=8192\
-p http_resp_hdr_len=18192\
-p max_restarts=6

I try to the backend status:
[root at hongkong varnish]# varnishadm -n vcache-my backend.list
==============================================
Backend name                   Refs   Admin      Probe
nanjing(10.80.125.66,,80)      68     probe      Healthy 8/10
hangzhou(10.80.125.68,,80)     66     probe      Healthy 7/10
chongqing(10.80.125.76,,80)    23     probe      Healthy 9/10


I already downgrade the .threshold from 8 to 2, so it can make sure the all
the node is in Healthy status, if I set the .threshold to 8,
most of the node will be Sick.

I try to use a script to wget the probe page every 2 seconds, there is no
failure, but it is always have failure in the command 'backend.list',

I have to script to watch the status of my website:
----------------------------------------------------------------------------------
#!/bin/bash
pass=0
fail=0

while [ 1 ]
do
wget http://mysite/live.html -O /dev/null
if [ $? -eq 0 ];then
pass=$(expr $pass + 1)
else
fail=$(expr $fail + 1)
fi

echo -e "pass: $pass\n fail: $fail"
sleep 5
done

25% failed, it is very strange thing, I have no clue about it,

Example result about the varnish log:
=======================================
varnishlog -n vcache-my| tee -a /var/log/varnish.log

  977 RxHeader     c Connection: keep-alive
  977 RxHeader     c User-Agent: Mozilla/5.0 (iPad; CPU OS 6_0_1 like Mac
OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A523
Safari/8536.25
  977 VCL_call     c recv pass
  977 VCL_call     c hash
  977 Hash         c /
  977 Hash         c www.mywebsite.com
  977 VCL_return   c hash
  977 VCL_call     c pass pass
  977 FetchError   c no backend connection
  977 VCL_call     c error deliver
  977 VCL_call     c deliver deliver
  977 TxProtocol   c HTTP/1.1
  977 TxStatus     c 503
  977 TxResponse   c Service Unavailable
  977 TxHeader     c Server: Varnish
  977 TxHeader     c Content-Type: text/html; charset=utf-8
  977 TxHeader     c Retry-After: 5
  977 TxHeader     c Content-Length: 419
  977 TxHeader     c Accept-Ranges: bytes
  977 TxHeader     c Date: Mon, 07 Jan 2013 18:03:02 GMT
  977 TxHeader     c X-Varnish: 2122413499

more varnish log:
shaohui dot org/downloads/varnish.tgz

this 503 error make my website trap in troubles, my customers can not
access my site, I did not have any clue, can some body provide some
advices, thanks so much.

-- 
Best regards
Shaohui
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://www.varnish-cache.org/lists/pipermail/varnish-misc/attachments/20130108/93518587/attachment.html>


More information about the varnish-misc mailing list