site goes down if same one of two varnish nodes stopped

Tim Dunphy bluethundr at gmail.com
Mon May 12 00:20:50 CEST 2014


hey all..

I have two varnish nodes being balanced by an F5 load balancer both were
installed in the same exact manner with yum installing local rpms of
varnish 2.1.5 (the requested version of the client).

Both share the exact same default.vcl file.  But if you take node a down
with node b running the whole site goes down if you take node b down with
node a running the site stays up. I need to determine why node b isn't
supporting the site. Each varnish node needs to be balancing 3 web servers
and it looks like the a node does. Since the site goes down when you take
down node a and leave node b running

I had a look at varnishlog for both and both nodes appear to be getting hit.

Node A:

3 VCL_return   c deliver

    3 TxProtocol   c HTTP/1.1

    3 TxStatus     c 200

    3 TxResponse   c OK

    3 TxHeader     c Server: Apache

    3 TxHeader     c X-Powered-By: PHP/5.2.8

    3 TxHeader     c Content-Type: text/html

    3 TxHeader     c Cache-Control: max-age = 600

    3 TxHeader     c Content-Length: 4

    3 TxHeader     c Date: Sun, 11 May 2014 22:11:02 GMT

    3 TxHeader     c X-Varnish: 1578371599 1578371564

    3 TxHeader     c Age: 86

    3 TxHeader     c Via: 1.1 varnish

    3 TxHeader     c Connection: close

    3 TxHeader     c Varnish-X-Cache: HIT

    3 TxHeader     c Varnish-X-Cache-Hits: 35

    3 Length       c 4

    3 ReqEnd       c 1578371599 1399846262.156239033 1399846262.156332970
0.000054121 0.000056028 0.000037909


Node B:

9 VCL_return   c deliver

    9 TxProtocol   c HTTP/1.1

    9 TxStatus     c 200

    9 TxResponse   c OK

    9 TxHeader     c Server: Apache

    9 TxHeader     c X-Powered-By: PHP/5.2.17

    9 TxHeader     c Content-Type: text/html

    9 TxHeader     c Cache-Control: max-age = 600

    9 TxHeader     c Content-Length: 4

    9 TxHeader     c Date: Sun, 11 May 2014 22:11:33 GMT

    9 TxHeader     c X-Varnish: 1525629213 1525629076

    9 TxHeader     c Age: 341

    9 TxHeader     c Via: 1.1 varnish

    9 TxHeader     c Connection: close

    9 TxHeader     c Varnish-X-Cache: HIT

    9 TxHeader     c Varnish-X-Cache-Hits: 137

    9 Length       c 4

    9 ReqEnd       c 1525629213 1399846293.098695993 1399846293.098922968
0.000057936 0.000181913 0.000045061

So I'm not sure why this is the case.

Here’s the VCL file that I’m using in case this might shed any clues. I
apologize that I’m still to much of a newb to ferret out the most relevant
parts. But I hope that the context may yield some clues.

backend web1 {

    .host = "10.10.1.104";

    .port = "80";

    .connect_timeout = 45s;

    .first_byte_timeout = 45s;

    .between_bytes_timeout = 45s;

    .max_connections = 70;

    .probe = {

        .url = "/healthcheck.php";

        .timeout = 5s;

        .interval = 30s;

        .window = 10;

        .threshold = 1;

    }

}

backend web2 {

    .host = "10.10.1.105";

    .port = "80";

    .connect_timeout = 45s;

    .first_byte_timeout = 45s;

    .between_bytes_timeout = 45s;

    .max_connections = 70;

    .probe = {

        .url = "/healthcheck.php";

        .timeout = 5s;

        .interval = 30s;

        .window = 10;

        .threshold = 1;

    }

}

backend web3 {

    .host = "10.10.1.106";

    .port = "80";

    .connect_timeout = 45s;

    .first_byte_timeout = 45s;

    .between_bytes_timeout = 45s;

    .max_connections = 70;

    .probe = {

        .url = "/healthcheck.php";

        .timeout = 5s;

        .interval = 30s;

        .window = 10;

        .threshold = 1;

    }

}

acl purge {

    "localhost";

    "127.0.0.1";

    "10.10.1.102";

    "10.10.1.103";

}

director www round-robin {

    { .backend = web1; }

    { .backend = web2; }

    { .backend = web3; }


}

sub vcl_recv {

    set req.backend = www;

    set req.grace = 6h;

    if (!req.backend.healthy) {

        set req.grace = 24h;

    }

    set req.http.X-Forwarded-For = req.http.X-Forwarded-For ", " client.ip;

    if (req.http.host ~ "^origin\.test(.+\.|)mywebsite\.com$") {

      return (pass);

    }

    if (req.http.host ~ ".*\.mywebsite.com|mywebsite.com") {

        /* allow (origin.)stage.m.mywebsite.com to be a separate host */

        if (req.http.host != "stage.m.mywebsite.com") {

            set req.http.host = "stage.mywebsite.com";

        }

    } else {

        return (pass);

    }

    if (req.request == "PURGE") {

        if (!client.ip ~ purge) {

            error 405 "Not allowed.";

        }

        return (lookup);

    }

    if (req.request != "GET" &&

        req.request != "HEAD" &&

        req.request != "PUT" &&

        req.request != "POST" &&

        req.request != "TRACE" &&

        req.request != "OPTIONS" &&

        req.request != "DELETE") {

            return (pipe);

    }

    if (req.request != "GET" && req.request != "HEAD") {

        return (pass);

    }

    if (req.url ~ "sites/all/modules/custom/bravo_ad/ads.html\?.*") {

      set req.url = "/sites/all/modules/custom/bravo_ad/ads.html";

    }

    if (req.url ~ "eyeblaster/addineyeV2.html\?.*") {

        set req.url = "/eyeblaster/addineyeV2.html";

    }

    if (req.url ~
"ahah_helper\.php|bravo_points\.php|install\.php|update\.php|cron\.php|/json(:?\?.*)?$")
{

        return (pass);

    }

    if (req.http.Authorization) {

        return (pass);

    }

    if (req.url ~ "login" || req.url ~ "logout") {

        return (pass);

    }

    if (req.url ~ "^/admin/" || req.url ~ "^/node/add/") {

        return (pass);

    }

    if (req.http.Cache-Control ~ "no-cache") {

        // return (pass);

    }

    if (req.http.Cookie ~
"(VARNISH|DRUPAL_UID|LOGGED_IN|SESS|_twitter_sess)") {

        set req.http.Cookie = regsuball(req.http.Cookie,
"(^|;\s*)(__[a-z]+|has_js)=[^;]*", "");

        set req.http.Cookie = regsub(req.http.Cookie, "^;\s*", "");

    } else {

        unset req.http.Cookie;

    }

    /* removed varnish cache backend logic */

    if (req.restarts == 0) {

        set req.backend = www;

    } elsif (req.restarts >= 2) {

        return (pass);

    }

    if (req.restarts >= 2) {

        return (pass);

    }

    if (req.url ~
"\.(ico|jpg|jpeg|png|gif|gz|tgz|bz2|tbz|mp3|ogg|swf|ICO|JPG|JPEG|PNG|GIF|GZ|TGZ|BZ2|TBZ|MP3|OOG|SWF)")
{

        unset req.http.Accept-Encoding;

    }

    if (req.url ~
"^/(sites/all/modules/mywebsite_admanager/includes/ads.php|doubleclick/DARTIframe.html)(\?.*|)$")
{

        set req.url = regsub(req.url, "\?.*$", "");

    }

    if (req.http.Accept-Encoding ~ "gzip") {

        set req.http.Accept-Encoding = "gzip";

    } elsif (req.http.Accept-Encoding ~ "deflate") {

        set req.http.Accept-Encoding = "deflate";

    } else {

        unset req.http.Accept-Encoding;

    }

    return (lookup);

}

sub vcl_pipe {

    set bereq.http.connection = "close";

    return (pipe);

}

sub vcl_pass {

    return (pass);

}

sub vcl_hash {

    set req.hash += req.url;

    set req.hash += req.http.host;

    if (req.http.Cookie ~ "VARNISH|DRUPAL_UID|LOGGED_IN") {

        set req.hash += req.http.Cookie;

    }

    return (hash);

}

sub vcl_hit {

    if (req.request == "PURGE") {

        set obj.ttl = 0s;

        error 200 "Purged.";

    }

}

sub vcl_fetch {

    if (beresp.status == 500) {

        set req.http.X-Varnish-Error = "1";

        restart;

    }

    set beresp.grace = 6h;

    # Set a short circuit cache lifetime for resp codes above 302

    if (beresp.status > 302) {

    set beresp.ttl = 60s;

    set beresp.http.Cache-Control = "max-age = 60";

    }

    if (beresp.http.Edge-control ~ "no-store") {

        set beresp.http.storage = "1";

        set beresp.cacheable = false;

        return (pass);

    }

    if (beresp.status >= 300 || !beresp.cacheable) {

        set beresp.http.Varnish-X-Cacheable = "Not Cacheable";

        set beresp.http.storage = "1";

        return (pass);

    }

    if (beresp.http.Set-Cookie) {

        return (pass);

    }

    if (beresp.cacheable) {

        unset beresp.http.expires;

        set beresp.ttl = 600s;

        set beresp.http.Cache-Control = "max-age = 600";

        if (req.url ~
"\.(ico|jpg|jpeg|png|gif|gz|tgz|bz2|tbz|mp3|ogg|swf|ICO|JPG|JPEG|PNG|GIF|GZ|TGZ|BZ2|TBZ|MP3|OOG|SWF)")
{

            set beresp.ttl = 43829m;

            set beresp.http.Cache-Control = "max-age = 1000000";

        }

    }

    return (deliver);

}


sub vcl_deliver {

    if (obj.hits > 0) {

        set resp.http.Varnish-X-Cache = "HIT";

        set resp.http.Varnish-X-Cache-Hits = obj.hits;

    } else {

        set resp.http.Varnish-X-Cache = "MISS";

    }

    return (deliver);

}

sub vcl_error {

    if (req.restarts == 0) {

        return (restart);

    }

    if (req.http.X-Varnish-Error != "1") {

        set req.http.X-Varnish-Error = "1";

        return (restart);

    }

}

 The only part that I omitted was the one pointing to the error page. Can
anyone offer any advice on how to troubleshoot this?

I'm enclosing the full VCL in case that extra info is helpful. I didn't
omit much tho.

Thank you!

Tim

-- 
GPG me!!

gpg --keyserver pool.sks-keyservers.net --recv-keys F186197B
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://www.varnish-cache.org/lists/pipermail/varnish-misc/attachments/20140511/253e3440/attachment-0001.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: default.vcl-node-a-stage
Type: application/octet-stream
Size: 7209 bytes
Desc: not available
URL: <https://www.varnish-cache.org/lists/pipermail/varnish-misc/attachments/20140511/253e3440/attachment-0001.obj>


More information about the varnish-misc mailing list