[Varnish] #1246: Assert error in cnt_hit(), cache_center.c line 1025
Varnish
varnish-bugs at varnish-cache.org
Mon Jan 7 19:43:44 CET 2013
#1246: Assert error in cnt_hit(), cache_center.c line 1025
----------------------+---------------------
Reporter: psa | Owner: martin
Type: defect | Status: new
Priority: normal | Milestone:
Component: varnishd | Version: 3.0.3
Severity: normal | Resolution:
Keywords: |
----------------------+---------------------
Comment (by psa):
It's only occasionally (every couple of days).
{{{
import std;
director default round-robin {
{
.backend = {
.between_bytes_timeout = 25ms;
.connect_timeout = 0.7s;
.first_byte_timeout = 25ms;
.host = "mybackend";
.port = "80";
}
}
}
# Clean the requested URL, reject common junk and setup the grace timeout
#
# Common URL encode substitutions that you might need in this section:
# * %23 #
# * %26 &
# * %2F /
# * %3A :
# * %3D =
# * %3F ?
#
sub vcl_recv {
# If it's a check that we're alive, short circuit
if ("/ready" == req.url) {
error 200 "OK";
}
# We don't care about cookies here and don't want them interfering with
# caching
unset req.http.cookie;
# We only supports GET or HEAD requests
if (req.request != "GET" && req.request != "HEAD") {
error 401 "Not a GET request";
}
# If a URL has spaces in it, it'll get split on the first space and that
# will end up in the protocol. Split it in the right place.
if (req.proto ~ "\s+") {
# If we just have a blank URL, then we want to strip all leading
spaces
# so that we end up with 'url=http://...' rather than 'url=
http://...'
if (req.url ~ "(&|\?)url=$") {
set req.url = req.url + regsub(req.proto, "\s*([\w]+.*) HTTP.*",
"\1");
} else {
# Otherwise, keep all the spaces. The split for protocol will suck
one
# which we need to keep so put it back in the substitution.
set req.url = req.url + regsub(req.proto, "([\s\w]+.*) HTTP.*", "
\1");
}
# Now fix the protocol up again.
set req.proto = regsub(req.proto, ".*HTTP", "HTTP");
}
# WARNING:
# All the filters to clean out junk need to happen after the URL repair
# Block all CDN and obvious ad server traffic
if (req.url ~
"((&|\?)url=http(s|)(://|%3A%2F%2F)(ad(s|v|server|)|cdn)[0-9]*\.)|ad.doubleclick.net|banner.php(\?|%3F)|(/|%2F)adFrame\.html|(/|%2F)adiframe|(/|%2F)ads(/|%2F)|(/|%2F)vda(/|%2F)iframe\.html")
{
error 400 "CDN or Ad Server";
}
if (req.url ~
"http(s|)(://|%3A%2F%2)(.*\.|)xxx.com/middle\?position=")
{
error 400 "Ad Server";
}
if (req.url ~ "yyy.fr") {
error 400 "yyy is all ads";
}
# Dump crap
if (req.url ~ "(&|\?)url=(file:|C:|/|\s*$)") {
error 400 "No URL or request for file object";
}
# Prepend missing http (mostly for cache hit)
if (req.url !~ "(&|\?)url=http") {
set req.url = regsub(req.url, "(&|\?)url=", "\1url=http://");
}
if (req.url ~
"http(s|)(://|%3A%2F%2)(www.|%3A%2F%2F)zzz.com(/|%2F)results")
{
set req.url = regsub(req.url, "zzz.com/.*", "zzz.com/");
}
# Append / if it's a plain domain name so that we get better cache rates
if (req.url ~ "(&|\?)url=http(s|)://[^/]+$") {
set req.url = req.url + "/";
}
# Make sure spaces will travel through the system without disrupting
anything
# further.
if (req.url ~ "\s+") {
set req.url = regsuball(req.url, "\s", "%20");
}
# Remove common ad tags and randomizers
if(req.url ~
"(\?|&|%3F|%26)((s|)rnd|subid|adnet_track|gclid|(_|__|)utm_[a-z]+)(=|%3D)")
{
# It's easier to guard against the trailing '&' by substituting back
set req.url = regsuball(req.url, "%26", "&");
set req.url = regsuball(req.url,
"((s|)rnd|subid|adnet_track|gclid|(_|__|)utm_[a-z]+)(=|%3D)[^&]+&?",
"");
}
# Remove fragments from requests as the fragement messes up the cache
if (req.url ~ "(#|%23)") {
set req.url = regsub(req.url, "(#|%23).*", "");
}
# Remove trailing & and ?
if (req.url ~ "(\?|&|%3F|%26)$") {
set req.url = regsub(req.url, "(\?|&|%26|%3F)$", "");
}
set req.grace = 15m;
return (lookup);
}
# Adjust the hashing mechanism to not use the request host header (or
# server.ip) and to make http://... the same as https://... so we don't
# cache separate pages for secure vs non-secure versions of the same page.
sub vcl_hash {
hash_data(regsub(req.url, "(&|\?)url=https://", "\1url=http://"));
return(hash);
}
sub vcl_hit {
if (obj.ttl < 45s && 5010 == obj.hits) {
set obj.ttl = 1d;
return (deliver);
}
# grab a copy of the TTL that can be passed to deliver
set req.http.X-Local-TTL = obj.ttl;
if (obj.ttl < 45s &&
(3 == obj.hits || 10 == obj.hits || (obj.hits > 3 &&
# VCL doesn't have modulo
((obj.hits - (1000 * (obj.hits/1000))) == 0)))) {
return (pass);
}
return (deliver);
}
sub vcl_fetch {
# This is a failing backend. Error immediately so
# that we serve a blank rather than serving a blob of HTML.
if(500 == beresp.status) {
error 500 "Error from backend";
}
# grab a copy of the TTL that can be passed to deliver
set req.http.X-Local-TTL = beresp.ttl;
# Apply a grace time in case we're unable to get an answer from the
backend
set beresp.grace = 15m;
return (deliver);
}
# Add debugging headers and remove headers we don't want to expose
sub vcl_deliver {
set resp.http.X-Cache-Hits = obj.hits;
set resp.http.X-TTL = req.http.X-Local-TTL;
if(200 == resp.status && (std.integer(resp.http.Content-Length, 0) >
10)) {
if(std.integer(regsub(req.http.X-Local-TTL, "\.[0-9]+", ""),0) <= 30)
{
set resp.http.X-Local-Type = "A";
} else {
set resp.http.X-Local-Type = "B";
}
} else {
set resp.http.X-Local-Type = "C";
}
set resp.http.X-Local-xxx =
regsub(req.url, ".*?(&|\?)xxx=([^&\?]+).*", "\2");
set resp.http.X-Local-URL = regsub(req.url, ".*?(&|\?)url=(.*)", "\2");
unset resp.http.Server;
unset resp.http.Varnish;
unset resp.http.Via;
}
sub vcl_error {
set obj.http.Content-Type = "text/html; charset=utf-8";
# return 1 for status checks rather than filling the cache with crap
if(200 == obj.status && "/ready" == req.url) {
synthetic {"1"};
return(deliver);
}
# Only tell the client to retry if the error is not permanent
if(400 != obj.status) {
set obj.http.Retry-After = "5";
}
synthetic {""};
return (deliver);
}
}}}
We build by pulling the 3.0.3 Ubuntu source package and then applying a
patch for the delta between 3.0.3 and the head of the 3.0 branch. I will
attach the last patch we used to build.
--
Ticket URL: <https://www.varnish-cache.org/trac/ticket/1246#comment:4>
Varnish <https://varnish-cache.org/>
The Varnish HTTP Accelerator
More information about the varnish-bugs
mailing list