varnish is sticking too much to web nodes

Tim Dunphy bluethundr at gmail.com
Mon Jun 2 18:08:20 CEST 2014


We have two out of six web nodes showing extremely high CPU usage compared
to the others in the farm.

We're using the round-robin load balancing algorithm. Using a tool like
'htop' shows us that all the hungry processes belong to apache. Yet diffing
the configuration files between the nodes with low cpu usage and the ones
with high cpu usage shows no difference at all.

The theory is that varnish is throwing traffic at these two hosts too often.

So I was just wondering if there could be any way to diagnose this from a
Varnish perspective. What clues could tools like varnishtop and varnishstat
or varnishlog yield in investigating this type of problem?

Here's my varnish vcl (cleaned of any too-identifying info) in case anyone
can offer an opinion as to why this is going on:

backend web1 {
    .host = "10.10.1.98";
    .port = "80";
    .connect_timeout = 90s;
    .first_byte_timeout = 90s;
    .between_bytes_timeout = 90s;
    .max_connections = 70;
    .probe = {
        .url = "/healthcheck.php";
        .timeout = 5s;
        .interval = 30s;
        .window = 10;
        .threshold = 1;
    }
}

backend web2 {
    .host = "10.10.1.99";
    .port = "80";
    .connect_timeout = 90s;
    .first_byte_timeout = 90s;
    .between_bytes_timeout = 90s;
    .max_connections = 70;
    .probe = {
        .url = "/healthcheck.php";
        .timeout = 5s;
        .interval = 30s;
        .window = 10;
        .threshold = 1;
    }
}

backend web3 {
    .host = "10.10.1.100";
    .port = "80";
    .connect_timeout = 90s;
    .first_byte_timeout = 90s;
    .between_bytes_timeout = 90s;
    .max_connections = 70;
    .probe = {
        .url = "/healthcheck.php";
        .timeout = 5s;
        .interval = 30s;
        .window = 10;
        .threshold = 1;
    }
}

backend web4 {
    .host = "10.10.1.101";
    .port = "80";
    .connect_timeout = 90s;
    .first_byte_timeout = 90s;
    .between_bytes_timeout = 90s;
    .max_connections = 70;
    .probe = {
        .url = "/healthcheck.php";
        .timeout = 5s;
        .interval = 30s;
        .window = 10;
        .threshold = 1;
    }
}

backend web5 {
    .host = "10.10.1.235";
    .port = "80";
    .connect_timeout = 90s;
    .first_byte_timeout = 90s;
    .between_bytes_timeout = 90s;
    .max_connections = 70;
    .probe = {
        .url = "/healthcheck.php";
        .timeout = 5s;
        .interval = 30s;
        .window = 10;
        .threshold = 1;
    }
}

backend web6 {
    .host = "10.10.1.236";
    .port = "80";
    .connect_timeout = 90s;
    .first_byte_timeout = 90s;
    .between_bytes_timeout = 90s;
    .max_connections = 70;
    .probe = {
        .url = "/healthcheck.php";
        .timeout = 5s;
        .interval = 30s;
        .window = 10;
        .threshold = 1;
    }
}

backend varnish1 {
    .host = "10.10.1.96";
    .port = "80";
    .connect_timeout = 90s;
    .first_byte_timeout = 90s;
    .between_bytes_timeout = 90s;
    .max_connections = 1000;
}

backend varnish2 {
    .host = "10.10.1.97";
    .port = "80";
    .connect_timeout = 90s;
    .first_byte_timeout = 90s;
    .between_bytes_timeout = 90s;
    .max_connections = 1000;
}

acl purge {
    "localhost";
    "127.0.0.1";
    "10.10.1.96";
    "10.10.1.97";
}

director www round-robin {
    { .backend = web1; }
    { .backend = web2; }
    { .backend = web3; }
    { .backend = web4; }
    { .backend = web5; }
    { .backend = web6; }


}

director cache round-robin {
    { .backend = varnish1; }
    { .backend = varnish2; }
}

sub vcl_recv {
    set req.backend = www;
    if (req.http.User-Agent ~ "typhoeus") {
        error 403 "Temporarily disabled";
    }
    set req.grace = 6h;
    if (! req.backend.healthy) {
        set req.grace = 24h;
    }
    set req.http.X-Forwarded-For = req.http.X-Forwarded-For ", " client.ip;
    /*if (req.http.host ~ "^origin\.test\.(.+\.|)mywebsite.com$") {
      return(pass);
    }*/
    if (req.http.host ~ "^origin\.(.+\.|)mywebsite\.com$") {
      return(pass);
    }
    if (req.http.host ~ "test.cms.mywebsite.com|test.cms-2.mywebsite.com|
test.facebook.mywebsite.com") {
        return (pass);
    }
    if (req.http.host ~ "test.cache-only.mywebsite.com") {
        set req.backend = cache;
    }
    /* if (req.http.host ~ "test\.*\.mywebsite.com") {
      if (req.http.host !~ "test\.(.+\.|)m\.mywebsite\.com$") {
        set req.http.host = "test.mywebsite.com";
      }
    } else {
        return (pass);
    }*/

     if (req.http.host ~ ".*\.mywebsite.com") {
      if (req.http.host !~ "(.+\.|)m\.mywebsite\.com$") {
        set req.http.host = "www.mywebsite.com";
      }
    } else {
        return (pass);
    }

/*
    if (req.url ~ "^/node/75402/tweettracker_brand/config/xml") {
        error 403 "Temporarily disabled";
    }
*/
    if (req.request == "PURGE") {
        if (!client.ip ~ purge) {
            error 405 "Not allowed.";
        }
        return (lookup);
    }
    if (req.request != "GET" &&
        req.request != "HEAD" &&
        req.request != "PUT" &&
        req.request != "POST" &&
        req.request != "TRACE" &&
        req.request != "OPTIONS" &&
        req.request != "DELETE") {
            return (pipe);
    }
    if (req.request != "GET" && req.request != "HEAD") {
        return (pass);
    }
    if (req.url ~ "eyeblaster/addineyeV2.html\?.*") {
        set req.url = "/eyeblaster/addineyeV2.html";
    }
    if (req.url ~ "sites/all/modules/custom/bravo_ad/ads.html\?.*") {
        set req.url = "/sites/all/modules/custom/bravo_ad/ads.html";
    }
    if (req.url ~ "sites/
m.mywebsite.com/modules/custom/bravo_ad/ads.html\?.*") {
        set req.url = "/sites/
m.mywebsite.com/modules/custom/bravo_ad/ads.html";
    }
    if (req.url ~
"ahah_helper\.php|bravo_points\.php|install\.php|update\.php|cron\.php|/json(:?\?.*)?$")
{
        return (pass);
    }
    if (req.http.Authorization) {
        return (pass);
    }
    if (req.url ~ "login" || req.url ~ "logout") {
        return (pass);
    }
    if (req.url ~ "^/admin/" || req.url ~ "^/node/add/") {
        return (pass);
    }
    if (req.http.Cache-Control ~ "no-cache") {
        //return (pass);
    }
    if (req.http.Cookie ~
"(VARNISH|DRUPAL_UID|LOGGED_IN|SESS|_twitter_sess)") {
        set req.http.Cookie = regsuball(req.http.Cookie,
"(^|;\s*)(__[a-z]+|has_js)=[^;]*", "");
        set req.http.Cookie = regsub(req.http.Cookie, "^;\s*", "");
    } else {
        unset req.http.Cookie;
    }
    if (req.restarts == 0) {
        if (client.ip == "10.10.1.96" || client.ip == "10.10.1.97") {
            set req.backend = www;
        } elsif (server.ip == "10.10.1.96") {
            set req.backend = varnish2;
        } else {
            set req.backend = varnish1;
        }
    } elsif (req.restarts >= 2) {
        return (pass);
    }
    if (req.restarts >= 2) {
        return (pass);
    }
    if (req.url ~
"\.(ico|jpg|jpeg|png|gif|gz|tgz|bz2|tbz|mp3|ogg|swf|ICO|JPG|JPEG|PNG|GIF|GZ|TGZ|BZ2|TBZ|MP3|OOG|SWF)")
{
        unset req.http.Accept-Encoding;
    }
    if (req.url ~
"^/(sites/all/modules/mywebsite_admanager/includes/ads.php|doubleclick/DARTIframe.html)(\?.*|)$")
{
        set req.url = regsub(req.url, "\?.*$", "");
    }
/* keep user-agents
    if (! req.http.User-Agent ~ "MSIE") {
        set req.http.User-Agent = "Mozilla";
    }
*/
    if (req.http.Accept-Encoding ~ "gzip") {
        set req.http.Accept-Encoding = "gzip";
    } elsif (req.http.Accept-Encoding ~ "deflate") {
        set req.http.Accept-Encoding = "deflate";
    } else {
        unset req.http.Accept-Encoding;
    }
    return (lookup);
}

sub vcl_pipe {
    set bereq.http.connection = "close";
    return (pipe);
}

sub vcl_pass {
    return (pass);
}

sub vcl_hash {
    set req.hash += req.url;
    set req.hash += req.http.host;
    if (req.http.Cookie ~ "VARNISH|DRUPAL_UID|LOGGED_IN") {
        set req.hash += req.http.Cookie;
    }
    return (hash);
}

sub vcl_hit {
    if (req.request == "PURGE") {
        set obj.ttl = 0s;
        error 200 "Purged.";
    }
    /*
    if (!obj.cacheable) {
        return (pass);
    }
    return (deliver);
    */
}

sub vcl_fetch {
    if (beresp.status == 500) {
        set req.http.X-Varnish-Error = "1";
        restart;
    }
    set beresp.grace = 6h;
    # Set a short circuit cache lifetime for resp codes above 302
    if (beresp.status > 302) {
set beresp.ttl = 60s;
set beresp.http.Cache-Control = "max-age = 60";
    }
    if (beresp.http.Edge-control ~ "no-store") {
        set beresp.http.storage = "1";
        set beresp.cacheable = false;
        return (pass);
    }
    if (beresp.status >= 300 || !beresp.cacheable) {
        set beresp.http.Varnish-X-Cacheable = "Not Cacheable";
        set beresp.http.storage = "1";
        return (pass);
    }
    if (beresp.http.Set-Cookie) {
        return (pass);
    }
    if (beresp.cacheable) {
        unset beresp.http.expires;
        set beresp.ttl = 600s;
        set beresp.http.Cache-Control = "max-age = 600";
        if (req.url ~
"\.(ico|jpg|jpeg|png|gif|gz|tgz|bz2|tbz|mp3|ogg|swf|ICO|JPG|JPEG|PNG|GIF|GZ|TGZ|BZ2|TBZ|MP3|OOG|SWF)")
{
            set beresp.ttl = 43829m;
            set beresp.http.Cache-Control = "max-age = 1000000";
        }
        /*
        if (req.url ~ "json(:?\??.*)?$") {
            set beresp.ttl = 30s;
            set beresp.http.Cache-Control = "max-age = 30";
        }
        */
    }
    return (deliver);
}

sub vcl_deliver {
    if (obj.hits > 0) {
        set resp.http.Varnish-X-Cache = "HIT";
        set resp.http.Varnish-X-Cache-Hits = obj.hits;
    } else {
        set resp.http.Varnish-X-Cache = "MISS";
    }
    return (deliver);
}

sub vcl_error {
    if (req.restarts == 0) {
        return (restart);
    }
    if (req.http.X-Varnish-Error != "1") {
        set req.http.X-Varnish-Error = "1";
        return (restart);
    }
    set obj.http.Content-Type = "text/html; charset=utf-8";
    synthetic {"
    <?xml version="1.0" encoding="utf-8"?>
    <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
     "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd" <
http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd%22> >
<html>
    <head>
        <title>mywebsite.com "} obj.status " " obj.response {"</title>
        <style type="text/css">
            body {background-color: #fff; margin: auto;}
            #page {width: 1000px; margin: 0 auto; background-color: #fff;}
        </style>
    </head>
    <body>
        <div id="page">
                                                <img src="
http://www.mywebsite.com/media/site-unavailable/site-unavailable.jpg"
width="1000" height="700" >
                                                <!--
            <h4>Debug Info:</h4>
            <pre>
                Status: "} obj.status {"
                Response: "} obj.response {"
                XID: "} req.xid {"
            </pre>
            <address><a href="http://www.varnish-cache.org/
">Varnish</a></address<
http://www.varnish-cache.org/%22%3eVarnish%3c/a%3e%3c/address> <
http://www.varnish-cache.org/%22%3eVarnish%3c/a%3e%3c/address> >
                                                -->
        </div>
    </body>
</html>
    "};
    return (deliver);
}


Thanks
Tim
-- 
GPG me!!

gpg --keyserver pool.sks-keyservers.net --recv-keys F186197B
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://www.varnish-cache.org/lists/pipermail/varnish-misc/attachments/20140602/11c2b727/attachment-0001.html>


More information about the varnish-misc mailing list