503 Error was met frequently

Wed Jan 9 08:53:12 CET 2013

Thanks for Rangel Raul's suggestion, after did some home work, You did me a
great favor, I draft an new VCL configure file
=======================================
backend nanjing {
     .host = "10.80.125.66";
     #.host = "10.36.146.202";
     .port = "80";
     .connect_timeout = 1800s;
     .first_byte_timeout = 1800s;
     .between_bytes_timeout = 1800s;
     .max_connections = 500;
.probe = {
.url = "/live.html";
.interval = 1s;
.timeout = 30s;
.window = 10;
.threshold = 2;
  }
 }

backend hangzhou {
     .host = "10.80.125.68";
     #.host = "10.36.146.202";
     .port = "80";
     .connect_timeout = 1800s;
     .first_byte_timeout = 1800s;
     .between_bytes_timeout = 1800s;
     .max_connections = 500;
.probe = {
.url = "/live.html";
.interval = 1s;
.timeout = 30s;
.window = 10;
.threshold = 2;
  }
 }
backend chongqing {
     .host = "10.80.125.76";
     #.host = "10.36.146.202";
     .port = "80";
     .connect_timeout = 1800s;
     .first_byte_timeout = 1800s;
     .between_bytes_timeout = 1800s;
     .max_connections = 500;
.probe = {
.url = "/live.html";
.interval = 1s;
.timeout = 30s;
.window = 10;
.threshold = 2;
  }
 }

director proxy random {
        {
                .backend = chongqing;
.weight = 2;
        }
        {
                .backend = nanjing;
.weight = 4;
        }
        {
                .backend = hangzhou;
.weight = 4;
        }
}

#acl purge {
#        "localhost";
#        "10.80.125.0"/24;
#}
#
sub vcl_recv {
set req.backend = proxy;

# remove Cookies （ingore cookie for static page）
if (req.url ~ "^/[^?]+\.(jpeg|jpg|png|gif|ico|js|css|txt|zip)(\?.*|)$") {
unset req.http.cookie;
}

# ignore cookie for product page, it might pesudo static page
if (req.url ~ "^/[^?]+-product-?+\.(html)(\?.*|)$") {
unset req.http.cookie;
}

# remove Cookies on home page
if (req.url ~ "^/$") {
unset req.http.cookie;
}

# Allow a grace period for offering "stale" data in case backend lags
set req.grace = 5m;

remove req.http.X-Forwarded-For;
set req.http.X-Forwarded-For = client.ip;

# Properly handle different encoding types
if (req.http.Accept-Encoding) {
if (req.url ~ "\.(jpg|png|gif|gz|tgz|bz2|tbz|mp3|ogg)$") {
# No point in compressing these
remove req.http.Accept-Encoding;
} elsif (req.http.Accept-Encoding ~ "gzip") {
set req.http.Accept-Encoding = "gzip";
} elsif (req.http.Accept-Encoding ~ "deflate") {
set req.http.Accept-Encoding = "deflate";
} else {
# unkown algorithm
remove req.http.Accept-Encoding;
}
}

# Force lookup if the request is a no-cache request from the client
# if (req.http.Cache-Control ~ "no-cache") {
# return (pass);
# }

## Default request checks
if (req.request != "GET" &&
req.request != "HEAD" &&
req.request != "PUT" &&
req.request != "POST" &&
req.request != "TRACE" &&
req.request != "OPTIONS" &&
req.request != "DELETE") {
# Non-RFC2616 or CONNECT which is weird.
return (pipe);
}
if (req.request != "GET" && req.request != "HEAD") {
# We only deal with GET and HEAD by default
return (pass);
}
if (req.request != "GET" && req.request != "HEAD") {
# We only deal with GET and HEAD by default
return (pass);
}

## Modified from default to allow caching if cookies are set, but not http
auth
if (req.http.Authorization) {
/* Not cacheable by default */
return (pass);
}

# ORVSD tweaks
## Remove has_js and Google Analytics cookies.
set req.http.Cookie = regsuball(req.http.Cookie,
"(^|;\s*)(__[a-z]+|has_js)=[^;]*", "");
## Remove a ";" prefix, if present.
set req.http.Cookie = regsub(req.http.Cookie, "^;\s*", "");
## Remove empty cookies.
if (req.http.Cookie ~ "^\s*$") {
unset req.http.Cookie;
}

if (req.url ~ ".*/server-status$") {
return (pass);
}

## Pass php page
if (req.url ~ ".php") {
return (pass);
}

## Pass admin
if (req.url ~ "(admin|cs_department)") {
return (pass);
}

if (req.url ~ ".*/server-status$") {
return (pass);
}

# The default vcl_recv is used from here.

}

# Do the PURGE thing
sub vcl_hit {
       #if (req.request == "PURGE") {
       #        purge('');
       #        error 200 "Purged";
       #}
}
sub vcl_miss {
       # if (req.request == "PURGE") {
       #         purge('');
       #         error 200 "Purged";
       # }
}

sub vcl_fetch {
        if (beresp.ttl < 3600s) {
                set beresp.ttl = 3600s;
        }
}

the data of varnishstat
=======================================
0+00:48:47
          vcache-my
Hitrate ratio:       10       81       81
Hitrate avg:     0.9317   0.9485   0.9485

      257260        82.90        87.89 client_conn - Client connections
accepted
      367561       147.82 125.58 client_req - Client requests received
       74714        41.95        25.53 cache_hit - Cache hits
          35         0.00         0.01 cache_hitpass - Cache hits for pass
        4438         2.00         1.52 cache_miss - Cache misses
      292546       113.86        99.95 backend_conn - Backend conn. success
        1090         0.00         0.37 backend_fail - Backend conn. failures
           8         0.00         0.00 fetch_head - Fetch head
      262473        89.89        89.67 fetch_length - Fetch with Length
       27652        14.98         9.45 fetch_chunked - Fetch chunked
        2095         1.00         0.72 fetch_close - Fetch wanted close
         243         0.00         0.08 fetch_304 - Fetch no body (304)
         837          .            .   n_sess_mem - N struct sess_mem
         601          .            .   n_sess - N struct sess
        3748          .            .   n_object - N struct object
        4169          .            .   n_objectcore - N struct objectcore
        4169          .            .   n_objecthead - N struct objecthead
         455          .            .   n_waitinglist - N struct waitinglist
         127          .            .   n_vbc - N struct vbc

the backend workload is much better than before, I want to know which page
is not cached,it  can help me to improve the cache hit ratio, can I get
some debug information from varnishstat? thanks.

Do you have some more advice for my VCL configuration file.?

On Tue, Jan 8, 2013 at 10:38 AM, Shaohui Zheng <shaohui.zheng at gmail.com>wrote:

> Yes, when the 503 error is happened,  all my backend is in heavy load, the
> load is more than 40. the very strange thing is that I can wget the probe
> file hundreds times, and there is no fail. I can not explain it.
>
> We use cookie to save customers information, when guest come back to our
> website, it helps to do auto login. our varnish server work load is very
> low, under 1.0 in most time, but web server is in heavy load. Your answer
> can explain it.
>
> the hit raito sees terrible. that is the output of varnishstat:
> varnishstat -n vcache-my
> =========================================================================
> 0+09:20:26
>                              vcache-my
> Hitrate ratio:        8        8        8
> Hitrate avg:     0.6145   0.6145   0.6145
>
>      4195411        61.92       124.77 client_conn - Client connections
> accepted
>      5371966        65.92       159.76 client_req - Client requests
> received
>       372757         7.99        11.09 cache_hit - Cache hits
>         6820         1.00         0.20 cache_hitpass - Cache hits for pass
>        63206         3.00         1.88 cache_miss - Cache misses
>      4240160        71.91       126.10 backend_conn - Backend conn. success
>      2277628         0.00        67.73 backend_fail - Backend conn.
> failures
>           84         0.00         0.00 fetch_head - Fetch head
>      3387280        52.93       100.73 fetch_length - Fetch with Length
>       417030         3.99        12.40 fetch_chunked - Fetch chunked
>         8669         1.00         0.26 fetch_close - Fetch wanted close
>            4         0.00         0.00 fetch_failed - Fetch failed
>       428006         0.00        12.73 fetch_304 - Fetch no body (304)
>          919          .            .   n_sess_mem - N struct sess_mem
>          339          .            .   n_sess - N struct sess
>         7159          .            .   n_object - N struct object
>         7633          .            .   n_objectcore - N struct objectcore
>         7635          .            .   n_objecthead - N struct objecthead
>          875          .            .   n_waitinglist - N struct waitinglist
>           62          .            .   n_vbc - N struct vbc
>         1000          .            .   n_wrk - N worker threads
>         1000         0.00         0.03 n_wrk_create - N worker threads
> created
>          219         0.00         0.01 n_wrk_queued - N queued work
> requests
>            3          .            .   n_backend - N backends
>        48739          .            .   n_expired - N expired objects
>         2567          .            .   n_lru_moved - N LRU moved objects
>          143         0.00         0.00 losthdr - HTTP header overflows
>
> That is great, thanks Rangel, Raul, your information is import for me, I
> will rewrite the vcl_recv function.
>
>
> On Tue, Jan 8, 2013 at 2:52 AM, Rangel, Raul <Raul.Rangel at disney.com>wrote:
>
>> What is the load on your backend servers? It seems like they are under
>> heavy load. One thing I did notice in your vcl_recv is the cookie code. You
>> are checking the existence of a specific cookie and passing if it exists.
>> If that cookie does not exist you fall back to the default vcl_recv. Well
>> the default vcl_recv also does a check on req.http.Cookie. If it finds one
>> then it also does a pass. My guess would be that most of your incoming
>> requests have some sort of cookie and thus all the requests are getting
>> passed to the backend. Have you done a varnishstat and looked at your cache
>> hit ratios?****
>>
>> ** **
>>
>> If I were you I would add a unset req.http.cookie to the end of vcl_recv.
>> This way the default vcl_recv won’t pass the request.****
>>
>> ** **
>>
>> Raul****
>>
>> ** **
>>
>> *From:* varnish-misc-bounces at varnish-cache.org [mailto:
>> varnish-misc-bounces at varnish-cache.org] *On Behalf Of *Shaohui Zheng
>> *Sent:* Monday, January 07, 2013 11:16 AM
>> *To:* varnish-misc at varnish-cache.org
>> *Subject:* 503 Error was met frequently****
>>
>> ** **
>>
>> Hello,****
>>
>> ** **
>>
>>     After a few days efforts, I did not get my problem fixed, I almost
>> exhaust every possible methods which I could do, so I am trying to get help
>> from the community. ****
>>
>> ** **
>>
>> I use varnish as web cache and load balancer to manage 3 web nodes, but
>> recently, I get 503 errors frequently, ****
>>
>> ** **
>>
>> My varnish configuration file:****
>>
>> =======================================================****
>>
>> backend nanjing {****
>>
>>      .host = "10.80.125.66";****
>>
>>      .port = "80";****
>>
>>      .connect_timeout = 1800s;****
>>
>>      .first_byte_timeout = 1800s;****
>>
>>      .between_bytes_timeout = 1800s;****
>>
>>             .probe = {****
>>
>>                         .url = "/live.html";****
>>
>>                         .interval = 1s;****
>>
>>                         .timeout = 3s;****
>>
>>                         .window = 10;****
>>
>>                         .threshold = 2;****
>>
>>              }****
>>
>>  }****
>>
>> ** **
>>
>> backend hangzhou {****
>>
>>      .host = "10.80.125.68";****
>>
>>      #.host = "10.36.146.202";****
>>
>>      .port = "80";****
>>
>>      .connect_timeout = 1800s;****
>>
>>      .first_byte_timeout = 1800s;****
>>
>>      .between_bytes_timeout = 1800s;****
>>
>> ** **
>>
>>             .probe = {****
>>
>>                         .url = "/live.html";****
>>
>>                         .interval = 1s;****
>>
>>                         .timeout = 3s;****
>>
>>                         .window = 10;****
>>
>>                         .threshold = 2;****
>>
>>              }****
>>
>>  }****
>>
>> backend chongqing {****
>>
>>      .host = "10.80.125.76";****
>>
>>      .port = "80";****
>>
>>      .connect_timeout = 1800s;****
>>
>>      .first_byte_timeout = 1800s;****
>>
>>      .between_bytes_timeout = 1800s;****
>>
>> ** **
>>
>>             .probe = {****
>>
>>                         .url = "/live.html";****
>>
>>                         .interval = 1s;****
>>
>>                         .timeout = 3s;****
>>
>>                         .window = 10;****
>>
>>                         .threshold = 2;****
>>
>>              }****
>>
>>  }****
>>
>> ** **
>>
>> ** **
>>
>> ** **
>>
>> director proxy random {****
>>
>>         {****
>>
>>                 .backend = chongqing;****
>>
>>                         .weight = 2;****
>>
>>         }****
>>
>>         {****
>>
>>                 .backend = nanjing;****
>>
>>                         .weight = 4;****
>>
>>         }****
>>
>>         {****
>>
>>                 .backend = hangzhou;****
>>
>>                         .weight = 4;****
>>
>>         }****
>>
>> }****
>>
>> ** **
>>
>> acl purge {****
>>
>>         "localhost";****
>>
>>         "10.80.125.0"/24;****
>>
>> }****
>>
>> ** **
>>
>> sub vcl_recv {****
>>
>>      set req.backend = proxy;****
>>
>> ** **
>>
>>         if (req.request != "GET" && req.request != "HEAD") {****
>>
>> ** **
>>
>>                 # POST - Logins and edits****
>>
>>                 if (req.request == "POST") {****
>>
>>                         return(pass);****
>>
>>                 }****
>>
>>                 ****
>>
>>                 # PURGE - The CacheFu product can invalidate updated URLs
>> ****
>>
>>                 if (req.request == "PURGE") {****
>>
>>                         if (!client.ip ~ purge) {****
>>
>>                                 error 405 "Not allowed.";****
>>
>>                         }****
>>
>>                         return(lookup);****
>>
>>                 }****
>>
>>         }****
>>
>> ** **
>>
>>         # Don't cache authenticated requests****
>>
>>         if (req.http.Cookie && req.http.Cookie ~
>> "__ac(|_(name|password|persistent))=") {****
>>
>> ** **
>>
>>                         # Force lookup of specific urls unlikely to need
>> protection****
>>
>>                         if (req.url ~ "\.(js|css)") {****
>>
>>                         remove req.http.cookie;****
>>
>>                         return(lookup);****
>>
>>                 }****
>>
>>                 return(pass);****
>>
>>         }****
>>
>> ** **
>>
>>         # The default vcl_recv is used from here.****
>>
>>  }****
>>
>> ** **
>>
>> sub vcl_hit {****
>>
>>        # if (req.request == "PURGE") {****
>>
>>        #         purge('');****
>>
>>        #         error 200 "Purged";****
>>
>>        # }****
>>
>> }****
>>
>> sub vcl_miss {****
>>
>>        # if (req.request == "PURGE") {****
>>
>>        #         purge('');****
>>
>>        #         error 200 "Purged";****
>>
>>        # }****
>>
>> }****
>>
>> ** **
>>
>> # Enforce a minimum TTL, since we can PURGE changed objects actively****
>>
>> # from Zope by using the CacheFu product****
>>
>> ** **
>>
>> sub vcl_fetch {****
>>
>>         if (beresp.ttl < 3600s) {****
>>
>>                 set beresp.ttl = 3600s;****
>>
>>         }****
>>
>> }****
>>
>> ** **
>>
>> ** **
>>
>> Varnish boots up script****
>>
>> ==========================================****
>>
>>                         varnishd -f /etc/varnish/my.vcl -s malloc,8192M
>> -a $ip:80 \****
>>
>>                         -T $ip:2048 \****
>>
>>                         -n vcache-my\****
>>
>>                         -p thread_pools=2 \****
>>
>>                         -p thread_pool_max=15000\****
>>
>>                         -p thread_pool_min=500\****
>>
>>                         -p listen_depth=2048 \****
>>
>>                         -p lru_interval=1800 \****
>>
>>                         -h classic,169313 \****
>>
>>                         -p connect_timeout=1800 \****
>>
>>                         -p http_max_hdr=8192\****
>>
>>                         -p http_resp_hdr_len=18192\****
>>
>>                         -p max_restarts=6 ****
>>
>> ** **
>>
>> I try to the backend status:****
>>
>> [root at hongkong varnish]# varnishadm -n vcache-my backend.list****
>>
>> ==============================================****
>>
>> Backend name                   Refs   Admin      Probe****
>>
>> nanjing(10.80.125.66,,80)      68     probe      Healthy 8/10****
>>
>> hangzhou(10.80.125.68,,80)     66     probe      Healthy 7/10****
>>
>> chongqing(10.80.125.76,,80)    23     probe      Healthy 9/10****
>>
>> ** **
>>
>> ** **
>>
>> I already downgrade the .threshold from 8 to 2, so it can make sure the
>> all the node is in Healthy status, if I set the .threshold to 8,****
>>
>> most of the node will be Sick.****
>>
>> ** **
>>
>> I try to use a script to wget the probe page every 2 seconds, there is no
>> failure, but it is always have failure in the command 'backend.list', ***
>> *
>>
>> ** **
>>
>> I have to script to watch the status of my website:****
>>
>>
>> ----------------------------------------------------------------------------------
>> ****
>>
>> #!/bin/bash****
>>
>> pass=0****
>>
>> fail=0****
>>
>> ** **
>>
>> while [ 1 ]****
>>
>> do****
>>
>>             wget http://mysite/live.html -O /dev/null****
>>
>>             if [ $? -eq 0 ];then****
>>
>>                         pass=$(expr $pass + 1)****
>>
>>             else****
>>
>>                         fail=$(expr $fail + 1)****
>>
>>             fi****
>>
>> ** **
>>
>>             echo -e "pass: $pass\n fail: $fail" ****
>>
>>             sleep 5****
>>
>> done****
>>
>> ** **
>>
>> 25% failed, it is very strange thing, I have no clue about it, ****
>>
>> ** **
>>
>> Example result about the varnish log:****
>>
>> =======================================****
>>
>> varnishlog -n vcache-my| tee -a /var/log/varnish.log****
>>
>> ** **
>>
>>   977 RxHeader     c Connection: keep-alive****
>>
>>   977 RxHeader     c User-Agent: Mozilla/5.0 (iPad; CPU OS 6_0_1 like Mac
>> OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A523
>> Safari/8536.25****
>>
>>   977 VCL_call     c recv pass****
>>
>>   977 VCL_call     c hash****
>>
>>   977 Hash         c /****
>>
>>   977 Hash         c www.mywebsite.com****
>>
>>   977 VCL_return   c hash****
>>
>>   977 VCL_call     c pass pass****
>>
>>   977 FetchError   c no backend connection****
>>
>>   977 VCL_call     c error deliver****
>>
>>   977 VCL_call     c deliver deliver****
>>
>>   977 TxProtocol   c HTTP/1.1****
>>
>>   977 TxStatus     c 503****
>>
>>   977 TxResponse   c Service Unavailable****
>>
>>   977 TxHeader     c Server: Varnish****
>>
>>   977 TxHeader     c Content-Type: text/html; charset=utf-8****
>>
>>   977 TxHeader     c Retry-After: 5****
>>
>>   977 TxHeader     c Content-Length: 419****
>>
>>   977 TxHeader     c Accept-Ranges: bytes****
>>
>>   977 TxHeader     c Date: Mon, 07 Jan 2013 18:03:02 GMT****
>>
>>   977 TxHeader     c X-Varnish: 2122413499****
>>
>> ** **
>>
>> more varnish log:****
>>
>> shaohui dot org/downloads/varnish.tgz****
>>
>> ** **
>>
>> this 503 error make my website trap in troubles, my customers can not
>> access my site, I did not have any clue, can some body provide some
>> advices, thanks so much. ****
>>
>> ** **
>>
>> --
>> Best regards
>> Shaohui ****
>>
>
>
>
> --
> Best regards
> Shaohui
>

-- 
Best regards
Shaohui
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://www.varnish-cache.org/lists/pipermail/varnish-misc/attachments/20130109/f8760411/attachment-0001.html>