[Varnish] #1791: Varnish send request to wrong servers without any logging

Varnish varnish-bugs at varnish-cache.org
Fri Sep 11 18:24:36 CEST 2015


#1791: Varnish send request to wrong servers without any logging
---------------------+----------------------
 Reporter:  webmons  |       Type:  defect
   Status:  new      |   Priority:  high
Milestone:           |  Component:  varnishd
  Version:  3.0.7    |   Severity:  critical
 Keywords:           |
---------------------+----------------------
 We route requests to different backend based on whether it's mobile and
 its url. We start seeing infrequent but steady stream of requests being
 routed to a wrong backend. We turned on varnishlog but found no trace of
 those requests but our backend log show that they did go through varnish

 Anyone has experience this issue

 Here is detail info

 '''Which varnish version:'''
 3.0.7
 '''
 Which type of CPU ?'''
 2 CPU
 processor       : 0
 vendor_id       : GenuineIntel
 cpu family      : 6
 model           : 62
 model name      : Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 stepping        : 4
 microcode       : 0x415
 cpu MHz         : 2500.106
 cache size      : 25600 KB

 '''32 or 64 bit mode ?'''
 64

 '''how much RAM ?'''
 14G

 '''Which OS/kernel version ?'''
 Ubuntu
 Kernel 3.13.0-48-generic

 '''VCL'''

 {{{

 import std;
 import ipcast;
 import header;
 include "devicedetect.vcl";

 acl block_ips {
 }

 backend zfront1 {
   .host = "10.0.1.70";
   .port = "80";
   .connect_timeout = 600s;
   .first_byte_timeout = 600s;
   .between_bytes_timeout = 600s;
 }
 backend  api1 {
   .host = "10.0.1.251";
   .port = "80";
   .connect_timeout = 600s;
   .first_byte_timeout = 600s;
   .between_bytes_timeout = 600s;
 }
 backend mobile1 {
   .host = "10.0.1.249";
   .port = "80";
   .connect_timeout = 600s;
   .first_byte_timeout = 600s;
   .between_bytes_timeout = 600s;
 }

 backend quoted {
   .host = "10.0.1.250";
   .port = "80";
   .connect_timeout = 600s;
   .first_byte_timeout = 600s;
   .between_bytes_timeout = 600s;
 }
 director zfrontCluster round-robin {
             {
             .backend = zfront1;
         }
 }
 director apiCluster round-robin {
             {
             .backend = api1;
         }
 }
 director mobileCluster round-robin {
             {
             .backend = mobile1;
         }
 }
 director quotedCluster round-robin {
             {
             .backend = quoted;
         }
 }
 sub vcl_recv {
     # Pull the IP address from X-Forwarded-For. If we can't parse the IP,
 default to 127.0.0.1 so that we let it through
     set req.http.xff = regsub(req.http.X-Forwarded-For, "^(^[^,]+),?.*$",
 "\1");
     if (ipcast.ip(req.http.xff, "127.0.0.1") ~ block_ips) {
         error 403 "Forbidden";
     }

     call devicedetect;
     if (req.http.host == "thezebra.com") {
         set req.http.host = "www.thezebra.com";
         error 750 "https://" + req.http.host + req.url;
     }

     if (req.url ~ "^/insurance-news/") {
         set req.url = regsub(req.url, "^/insurance-news/", "/");
         set req.backend = quotedCluster;
     } else if (req.url ~ "^/(api|admin|be|al|el|il)/") {
         set req.backend = apiCluster;
     } else {
         if (req.http.host ~ "[A-Z]" || req.url ~ "[A-Z]") {
             error 750 "https://" + std.tolower(req.http.host + req.url);
         }
         if ((req.url ~ "^/(\?.*)?$" || req.url ~ "^/compare/.*") &&
 req.http.X-UA-Device ~ "^mobile") {
             set req.backend = mobileCluster;
         } else {
             set req.backend = zfrontCluster;
         }
     }
 }

 sub vcl_hash {
     if (req.backend == quotedCluster) {
         hash_data("/insurance-news/");
     }
 }

 sub vcl_fetch {
     # do not cache anything that's not 200.
     if (beresp.status != 200) {
         set beresp.ttl = 0 s;
     }

     # mobile-related: Google requires adding vary: user-agent header to
 the response to make it mobile-friendly.
     # The problem is if varnish see vary: user-agent it'll create
 different cache item per user-agent so the following
     # code will add vary header with X-UA-Device and vcl_deliver will
 replace it with User-agent
     # this is assuming backend doesn't add vary: user-agent. If it does,
 we'd need to remove it here
     if (req.http.X-UA-Device) {
         if (!beresp.http.Vary) { # no Vary at all
             set beresp.http.Vary = "X-UA-Device";
         } elseif (beresp.http.Vary !~ "X-UA-Device") { # add to existing
 Vary
             set beresp.http.Vary = beresp.http.Vary + ", X-UA-Device";
         }
     }
 }

 sub vcl_deliver {
     if (obj.hits > 0) {
             set resp.http.X-Cache = "HIT";
     } else {
             set resp.http.X-Cache = "MISS";
     }

     # to keep any caches in the wild from serving wrong content to client
 #2
     # behind them, we need to transform the Vary on the way out.
     if ((req.http.X-UA-Device) && (resp.http.Vary)) {
         set resp.http.Vary = regsub(resp.http.Vary, "X-UA-Device", "User-
 Agent");
     }

     # click stream stuff
     # extract subid from the cookie or request url
     if (req.http.cookie ~ "subid") {
         std.log("subid:" + regsub(req.http.cookie, ".*subid=([^;]*).*",
 "\1"));
     } elseif (req.url ~ "[&\?]subid=") {
         std.log("subid:" + regsub(req.url, ".*[&\?]subid=([^&]*).*",
 "\1"));
     }

     # now extract the channel id. if the channel id isn't specified, then
 we want to use the organic channel id
     if (req.http.cookie ~ "channel_id") {
         std.log("channelid:" + regsub(req.http.cookie,
 ".*channel\_id=([^;]*).*", "\1"));
     } elseif (req.url ~ "[&\?]channelid=") {
         std.log("channelid:" + regsub(req.url,
 ".*[&\?]channelid=([^&]*).*", "\1"));
     } else {
         std.log("channelid:3acf01");
     }

     # session id should only be in cookie
     if (req.http.cookie ~ "sessionid") {
         std.log("sessionid:" + regsub(req.http.cookie,
 ".*sessionid=([^;]*).*", "\1"));
     } else {
         std.log("sessionid:" + regsub(header.get(resp.http.set-
 cookie,"sessionid="), ".*sessionid=([^;]*).*", "\1"));
     }
     if (req.http.X-UA-Device ~ "^mobile") {
         std.log("mobile:true");
     } else {
         std.log("mobile:false");
     }
 }

 sub vcl_error {
     if (obj.status == 750) {
         set obj.http.Location = obj.response;
         set obj.status = 301;
         return(deliver);
     }
 }
 }}}


 '''Behavior:'''
 We'd see requests not meant for API cluster routed to it. Most of these
 are from bot, like google adsbot or pingdom but quite a few are from
 legitimate users
 Here is an nginx access log entry from an api server in API cluster

 10.0.1.100 0.052 0.052  - [11/Sep/2015:10:56:41 -0500]  "GET
 /compare/start/bid811/ut?utm_source=google&utm_medium=cpc&utm_campaign
 =car-insurance-ut&utm_term=geico-auto-insurance-policy&utm_content=the-
 zebra-car-insurance&channelid=ajf201&subid=ut-big-brand-carriers-exact
 HTTP/1.1" 404 6655 "-" "AdsBot-Google
 (+http://www.google.com/adsbot.html)" "66.249.92.33"

 The url path doesn't match this regex ^/(api|admin|be|al|el|il)/, so I'm
 completely lost as to why it'd show up there

 Neither Varnishlog nor varnishncsa shows anything about this request.

-- 
Ticket URL: <https://www.varnish-cache.org/trac/ticket/1791>
Varnish <https://varnish-cache.org/>
The Varnish HTTP Accelerator



More information about the varnish-bugs mailing list