503 Error was met frequently
Shaohui Zheng
shaohui.zheng at gmail.com
Wed Jan 9 08:53:12 CET 2013
Thanks for Rangel Raul's suggestion, after did some home work, You did me a
great favor, I draft an new VCL configure file
=======================================
backend nanjing {
.host = "10.80.125.66";
#.host = "10.36.146.202";
.port = "80";
.connect_timeout = 1800s;
.first_byte_timeout = 1800s;
.between_bytes_timeout = 1800s;
.max_connections = 500;
.probe = {
.url = "/live.html";
.interval = 1s;
.timeout = 30s;
.window = 10;
.threshold = 2;
}
}
backend hangzhou {
.host = "10.80.125.68";
#.host = "10.36.146.202";
.port = "80";
.connect_timeout = 1800s;
.first_byte_timeout = 1800s;
.between_bytes_timeout = 1800s;
.max_connections = 500;
.probe = {
.url = "/live.html";
.interval = 1s;
.timeout = 30s;
.window = 10;
.threshold = 2;
}
}
backend chongqing {
.host = "10.80.125.76";
#.host = "10.36.146.202";
.port = "80";
.connect_timeout = 1800s;
.first_byte_timeout = 1800s;
.between_bytes_timeout = 1800s;
.max_connections = 500;
.probe = {
.url = "/live.html";
.interval = 1s;
.timeout = 30s;
.window = 10;
.threshold = 2;
}
}
director proxy random {
{
.backend = chongqing;
.weight = 2;
}
{
.backend = nanjing;
.weight = 4;
}
{
.backend = hangzhou;
.weight = 4;
}
}
#acl purge {
# "localhost";
# "10.80.125.0"/24;
#}
#
sub vcl_recv {
set req.backend = proxy;
# remove Cookies (ingore cookie for static page)
if (req.url ~ "^/[^?]+\.(jpeg|jpg|png|gif|ico|js|css|txt|zip)(\?.*|)$") {
unset req.http.cookie;
}
# ignore cookie for product page, it might pesudo static page
if (req.url ~ "^/[^?]+-product-?+\.(html)(\?.*|)$") {
unset req.http.cookie;
}
# remove Cookies on home page
if (req.url ~ "^/$") {
unset req.http.cookie;
}
# Allow a grace period for offering "stale" data in case backend lags
set req.grace = 5m;
remove req.http.X-Forwarded-For;
set req.http.X-Forwarded-For = client.ip;
# Properly handle different encoding types
if (req.http.Accept-Encoding) {
if (req.url ~ "\.(jpg|png|gif|gz|tgz|bz2|tbz|mp3|ogg)$") {
# No point in compressing these
remove req.http.Accept-Encoding;
} elsif (req.http.Accept-Encoding ~ "gzip") {
set req.http.Accept-Encoding = "gzip";
} elsif (req.http.Accept-Encoding ~ "deflate") {
set req.http.Accept-Encoding = "deflate";
} else {
# unkown algorithm
remove req.http.Accept-Encoding;
}
}
# Force lookup if the request is a no-cache request from the client
# if (req.http.Cache-Control ~ "no-cache") {
# return (pass);
# }
## Default request checks
if (req.request != "GET" &&
req.request != "HEAD" &&
req.request != "PUT" &&
req.request != "POST" &&
req.request != "TRACE" &&
req.request != "OPTIONS" &&
req.request != "DELETE") {
# Non-RFC2616 or CONNECT which is weird.
return (pipe);
}
if (req.request != "GET" && req.request != "HEAD") {
# We only deal with GET and HEAD by default
return (pass);
}
if (req.request != "GET" && req.request != "HEAD") {
# We only deal with GET and HEAD by default
return (pass);
}
## Modified from default to allow caching if cookies are set, but not http
auth
if (req.http.Authorization) {
/* Not cacheable by default */
return (pass);
}
# ORVSD tweaks
## Remove has_js and Google Analytics cookies.
set req.http.Cookie = regsuball(req.http.Cookie,
"(^|;\s*)(__[a-z]+|has_js)=[^;]*", "");
## Remove a ";" prefix, if present.
set req.http.Cookie = regsub(req.http.Cookie, "^;\s*", "");
## Remove empty cookies.
if (req.http.Cookie ~ "^\s*$") {
unset req.http.Cookie;
}
if (req.url ~ ".*/server-status$") {
return (pass);
}
## Pass php page
if (req.url ~ ".php") {
return (pass);
}
## Pass admin
if (req.url ~ "(admin|cs_department)") {
return (pass);
}
if (req.url ~ ".*/server-status$") {
return (pass);
}
# The default vcl_recv is used from here.
}
# Do the PURGE thing
sub vcl_hit {
#if (req.request == "PURGE") {
# purge('');
# error 200 "Purged";
#}
}
sub vcl_miss {
# if (req.request == "PURGE") {
# purge('');
# error 200 "Purged";
# }
}
sub vcl_fetch {
if (beresp.ttl < 3600s) {
set beresp.ttl = 3600s;
}
}
the data of varnishstat
=======================================
0+00:48:47
vcache-my
Hitrate ratio: 10 81 81
Hitrate avg: 0.9317 0.9485 0.9485
257260 82.90 87.89 client_conn - Client connections
accepted
367561 147.82 125.58 client_req - Client requests received
74714 41.95 25.53 cache_hit - Cache hits
35 0.00 0.01 cache_hitpass - Cache hits for pass
4438 2.00 1.52 cache_miss - Cache misses
292546 113.86 99.95 backend_conn - Backend conn. success
1090 0.00 0.37 backend_fail - Backend conn. failures
8 0.00 0.00 fetch_head - Fetch head
262473 89.89 89.67 fetch_length - Fetch with Length
27652 14.98 9.45 fetch_chunked - Fetch chunked
2095 1.00 0.72 fetch_close - Fetch wanted close
243 0.00 0.08 fetch_304 - Fetch no body (304)
837 . . n_sess_mem - N struct sess_mem
601 . . n_sess - N struct sess
3748 . . n_object - N struct object
4169 . . n_objectcore - N struct objectcore
4169 . . n_objecthead - N struct objecthead
455 . . n_waitinglist - N struct waitinglist
127 . . n_vbc - N struct vbc
the backend workload is much better than before, I want to know which page
is not cached,it can help me to improve the cache hit ratio, can I get
some debug information from varnishstat? thanks.
Do you have some more advice for my VCL configuration file.?
On Tue, Jan 8, 2013 at 10:38 AM, Shaohui Zheng <shaohui.zheng at gmail.com>wrote:
> Yes, when the 503 error is happened, all my backend is in heavy load, the
> load is more than 40. the very strange thing is that I can wget the probe
> file hundreds times, and there is no fail. I can not explain it.
>
> We use cookie to save customers information, when guest come back to our
> website, it helps to do auto login. our varnish server work load is very
> low, under 1.0 in most time, but web server is in heavy load. Your answer
> can explain it.
>
> the hit raito sees terrible. that is the output of varnishstat:
> varnishstat -n vcache-my
> =========================================================================
> 0+09:20:26
> vcache-my
> Hitrate ratio: 8 8 8
> Hitrate avg: 0.6145 0.6145 0.6145
>
> 4195411 61.92 124.77 client_conn - Client connections
> accepted
> 5371966 65.92 159.76 client_req - Client requests
> received
> 372757 7.99 11.09 cache_hit - Cache hits
> 6820 1.00 0.20 cache_hitpass - Cache hits for pass
> 63206 3.00 1.88 cache_miss - Cache misses
> 4240160 71.91 126.10 backend_conn - Backend conn. success
> 2277628 0.00 67.73 backend_fail - Backend conn.
> failures
> 84 0.00 0.00 fetch_head - Fetch head
> 3387280 52.93 100.73 fetch_length - Fetch with Length
> 417030 3.99 12.40 fetch_chunked - Fetch chunked
> 8669 1.00 0.26 fetch_close - Fetch wanted close
> 4 0.00 0.00 fetch_failed - Fetch failed
> 428006 0.00 12.73 fetch_304 - Fetch no body (304)
> 919 . . n_sess_mem - N struct sess_mem
> 339 . . n_sess - N struct sess
> 7159 . . n_object - N struct object
> 7633 . . n_objectcore - N struct objectcore
> 7635 . . n_objecthead - N struct objecthead
> 875 . . n_waitinglist - N struct waitinglist
> 62 . . n_vbc - N struct vbc
> 1000 . . n_wrk - N worker threads
> 1000 0.00 0.03 n_wrk_create - N worker threads
> created
> 219 0.00 0.01 n_wrk_queued - N queued work
> requests
> 3 . . n_backend - N backends
> 48739 . . n_expired - N expired objects
> 2567 . . n_lru_moved - N LRU moved objects
> 143 0.00 0.00 losthdr - HTTP header overflows
>
> That is great, thanks Rangel, Raul, your information is import for me, I
> will rewrite the vcl_recv function.
>
>
> On Tue, Jan 8, 2013 at 2:52 AM, Rangel, Raul <Raul.Rangel at disney.com>wrote:
>
>> What is the load on your backend servers? It seems like they are under
>> heavy load. One thing I did notice in your vcl_recv is the cookie code. You
>> are checking the existence of a specific cookie and passing if it exists.
>> If that cookie does not exist you fall back to the default vcl_recv. Well
>> the default vcl_recv also does a check on req.http.Cookie. If it finds one
>> then it also does a pass. My guess would be that most of your incoming
>> requests have some sort of cookie and thus all the requests are getting
>> passed to the backend. Have you done a varnishstat and looked at your cache
>> hit ratios?****
>>
>> ** **
>>
>> If I were you I would add a unset req.http.cookie to the end of vcl_recv.
>> This way the default vcl_recv won’t pass the request.****
>>
>> ** **
>>
>> Raul****
>>
>> ** **
>>
>> *From:* varnish-misc-bounces at varnish-cache.org [mailto:
>> varnish-misc-bounces at varnish-cache.org] *On Behalf Of *Shaohui Zheng
>> *Sent:* Monday, January 07, 2013 11:16 AM
>> *To:* varnish-misc at varnish-cache.org
>> *Subject:* 503 Error was met frequently****
>>
>> ** **
>>
>> Hello,****
>>
>> ** **
>>
>> After a few days efforts, I did not get my problem fixed, I almost
>> exhaust every possible methods which I could do, so I am trying to get help
>> from the community. ****
>>
>> ** **
>>
>> I use varnish as web cache and load balancer to manage 3 web nodes, but
>> recently, I get 503 errors frequently, ****
>>
>> ** **
>>
>> My varnish configuration file:****
>>
>> =======================================================****
>>
>> backend nanjing {****
>>
>> .host = "10.80.125.66";****
>>
>> .port = "80";****
>>
>> .connect_timeout = 1800s;****
>>
>> .first_byte_timeout = 1800s;****
>>
>> .between_bytes_timeout = 1800s;****
>>
>> .probe = {****
>>
>> .url = "/live.html";****
>>
>> .interval = 1s;****
>>
>> .timeout = 3s;****
>>
>> .window = 10;****
>>
>> .threshold = 2;****
>>
>> }****
>>
>> }****
>>
>> ** **
>>
>> backend hangzhou {****
>>
>> .host = "10.80.125.68";****
>>
>> #.host = "10.36.146.202";****
>>
>> .port = "80";****
>>
>> .connect_timeout = 1800s;****
>>
>> .first_byte_timeout = 1800s;****
>>
>> .between_bytes_timeout = 1800s;****
>>
>> ** **
>>
>> .probe = {****
>>
>> .url = "/live.html";****
>>
>> .interval = 1s;****
>>
>> .timeout = 3s;****
>>
>> .window = 10;****
>>
>> .threshold = 2;****
>>
>> }****
>>
>> }****
>>
>> backend chongqing {****
>>
>> .host = "10.80.125.76";****
>>
>> .port = "80";****
>>
>> .connect_timeout = 1800s;****
>>
>> .first_byte_timeout = 1800s;****
>>
>> .between_bytes_timeout = 1800s;****
>>
>> ** **
>>
>> .probe = {****
>>
>> .url = "/live.html";****
>>
>> .interval = 1s;****
>>
>> .timeout = 3s;****
>>
>> .window = 10;****
>>
>> .threshold = 2;****
>>
>> }****
>>
>> }****
>>
>> ** **
>>
>> ** **
>>
>> ** **
>>
>> director proxy random {****
>>
>> {****
>>
>> .backend = chongqing;****
>>
>> .weight = 2;****
>>
>> }****
>>
>> {****
>>
>> .backend = nanjing;****
>>
>> .weight = 4;****
>>
>> }****
>>
>> {****
>>
>> .backend = hangzhou;****
>>
>> .weight = 4;****
>>
>> }****
>>
>> }****
>>
>> ** **
>>
>> acl purge {****
>>
>> "localhost";****
>>
>> "10.80.125.0"/24;****
>>
>> }****
>>
>> ** **
>>
>> sub vcl_recv {****
>>
>> set req.backend = proxy;****
>>
>> ** **
>>
>> if (req.request != "GET" && req.request != "HEAD") {****
>>
>> ** **
>>
>> # POST - Logins and edits****
>>
>> if (req.request == "POST") {****
>>
>> return(pass);****
>>
>> }****
>>
>> ****
>>
>> # PURGE - The CacheFu product can invalidate updated URLs
>> ****
>>
>> if (req.request == "PURGE") {****
>>
>> if (!client.ip ~ purge) {****
>>
>> error 405 "Not allowed.";****
>>
>> }****
>>
>> return(lookup);****
>>
>> }****
>>
>> }****
>>
>> ** **
>>
>> # Don't cache authenticated requests****
>>
>> if (req.http.Cookie && req.http.Cookie ~
>> "__ac(|_(name|password|persistent))=") {****
>>
>> ** **
>>
>> # Force lookup of specific urls unlikely to need
>> protection****
>>
>> if (req.url ~ "\.(js|css)") {****
>>
>> remove req.http.cookie;****
>>
>> return(lookup);****
>>
>> }****
>>
>> return(pass);****
>>
>> }****
>>
>> ** **
>>
>> # The default vcl_recv is used from here.****
>>
>> }****
>>
>> ** **
>>
>> sub vcl_hit {****
>>
>> # if (req.request == "PURGE") {****
>>
>> # purge('');****
>>
>> # error 200 "Purged";****
>>
>> # }****
>>
>> }****
>>
>> sub vcl_miss {****
>>
>> # if (req.request == "PURGE") {****
>>
>> # purge('');****
>>
>> # error 200 "Purged";****
>>
>> # }****
>>
>> }****
>>
>> ** **
>>
>> # Enforce a minimum TTL, since we can PURGE changed objects actively****
>>
>> # from Zope by using the CacheFu product****
>>
>> ** **
>>
>> sub vcl_fetch {****
>>
>> if (beresp.ttl < 3600s) {****
>>
>> set beresp.ttl = 3600s;****
>>
>> }****
>>
>> }****
>>
>> ** **
>>
>> ** **
>>
>> Varnish boots up script****
>>
>> ==========================================****
>>
>> varnishd -f /etc/varnish/my.vcl -s malloc,8192M
>> -a $ip:80 \****
>>
>> -T $ip:2048 \****
>>
>> -n vcache-my\****
>>
>> -p thread_pools=2 \****
>>
>> -p thread_pool_max=15000\****
>>
>> -p thread_pool_min=500\****
>>
>> -p listen_depth=2048 \****
>>
>> -p lru_interval=1800 \****
>>
>> -h classic,169313 \****
>>
>> -p connect_timeout=1800 \****
>>
>> -p http_max_hdr=8192\****
>>
>> -p http_resp_hdr_len=18192\****
>>
>> -p max_restarts=6 ****
>>
>> ** **
>>
>> I try to the backend status:****
>>
>> [root at hongkong varnish]# varnishadm -n vcache-my backend.list****
>>
>> ==============================================****
>>
>> Backend name Refs Admin Probe****
>>
>> nanjing(10.80.125.66,,80) 68 probe Healthy 8/10****
>>
>> hangzhou(10.80.125.68,,80) 66 probe Healthy 7/10****
>>
>> chongqing(10.80.125.76,,80) 23 probe Healthy 9/10****
>>
>> ** **
>>
>> ** **
>>
>> I already downgrade the .threshold from 8 to 2, so it can make sure the
>> all the node is in Healthy status, if I set the .threshold to 8,****
>>
>> most of the node will be Sick.****
>>
>> ** **
>>
>> I try to use a script to wget the probe page every 2 seconds, there is no
>> failure, but it is always have failure in the command 'backend.list', ***
>> *
>>
>> ** **
>>
>> I have to script to watch the status of my website:****
>>
>>
>> ----------------------------------------------------------------------------------
>> ****
>>
>> #!/bin/bash****
>>
>> pass=0****
>>
>> fail=0****
>>
>> ** **
>>
>> while [ 1 ]****
>>
>> do****
>>
>> wget http://mysite/live.html -O /dev/null****
>>
>> if [ $? -eq 0 ];then****
>>
>> pass=$(expr $pass + 1)****
>>
>> else****
>>
>> fail=$(expr $fail + 1)****
>>
>> fi****
>>
>> ** **
>>
>> echo -e "pass: $pass\n fail: $fail" ****
>>
>> sleep 5****
>>
>> done****
>>
>> ** **
>>
>> 25% failed, it is very strange thing, I have no clue about it, ****
>>
>> ** **
>>
>> Example result about the varnish log:****
>>
>> =======================================****
>>
>> varnishlog -n vcache-my| tee -a /var/log/varnish.log****
>>
>> ** **
>>
>> 977 RxHeader c Connection: keep-alive****
>>
>> 977 RxHeader c User-Agent: Mozilla/5.0 (iPad; CPU OS 6_0_1 like Mac
>> OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A523
>> Safari/8536.25****
>>
>> 977 VCL_call c recv pass****
>>
>> 977 VCL_call c hash****
>>
>> 977 Hash c /****
>>
>> 977 Hash c www.mywebsite.com****
>>
>> 977 VCL_return c hash****
>>
>> 977 VCL_call c pass pass****
>>
>> 977 FetchError c no backend connection****
>>
>> 977 VCL_call c error deliver****
>>
>> 977 VCL_call c deliver deliver****
>>
>> 977 TxProtocol c HTTP/1.1****
>>
>> 977 TxStatus c 503****
>>
>> 977 TxResponse c Service Unavailable****
>>
>> 977 TxHeader c Server: Varnish****
>>
>> 977 TxHeader c Content-Type: text/html; charset=utf-8****
>>
>> 977 TxHeader c Retry-After: 5****
>>
>> 977 TxHeader c Content-Length: 419****
>>
>> 977 TxHeader c Accept-Ranges: bytes****
>>
>> 977 TxHeader c Date: Mon, 07 Jan 2013 18:03:02 GMT****
>>
>> 977 TxHeader c X-Varnish: 2122413499****
>>
>> ** **
>>
>> more varnish log:****
>>
>> shaohui dot org/downloads/varnish.tgz****
>>
>> ** **
>>
>> this 503 error make my website trap in troubles, my customers can not
>> access my site, I did not have any clue, can some body provide some
>> advices, thanks so much. ****
>>
>> ** **
>>
>> --
>> Best regards
>> Shaohui ****
>>
>
>
>
> --
> Best regards
> Shaohui
>
--
Best regards
Shaohui
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://www.varnish-cache.org/lists/pipermail/varnish-misc/attachments/20130109/f8760411/attachment-0001.html>
More information about the varnish-misc
mailing list