Varnish and googlebot 403

Christian Cantinelli morphey at morphey.org
Fri Sep 11 13:36:37 CEST 2015


Yes, sure

In attach..


Christian


Il 11/09/2015 11:29, Balder VC ha scritto:
> On 09/11/2015 09:40 AM, Christian Cantinelli wrote:
>> Hello,
>>
>> has anyone had such this problem?
>
>
> I would seem as if no one had this problem.
>
> But prehaps there something in your vcl what causes it? Can you share it
>
>>
>> Christian
>>
>> Il 08/09/2015 09:15, Christian Cantinelli ha scritto:
>>>    Hello,
>>>
>>>    I have a problem with varnish. Basically it works all right. But a few
>>>    months googlebot tells me many errors 403 that is not derived from
>>>    wordpress's underneath.
>>>    Has anyone encountered similar problems?
>>>
>>>
>>
>> _______________________________________________
>> varnish-misc mailing list
>> varnish-misc at varnish-cache.org
>> https://www.varnish-cache.org/lists/mailman/listinfo/varnish-misc
>
>
> _______________________________________________
> varnish-misc mailing list
> varnish-misc at varnish-cache.org
> https://www.varnish-cache.org/lists/mailman/listinfo/varnish-misc
>
-------------- next part --------------
#
# This is an example VCL file for Varnish.
#
# It does not do anything by default, delegating control to the
# builtin VCL. The builtin VCL is called when there is no explicit
# return statement.
#
# See the VCL chapters in the Users Guide at https://www.varnish-cache.org/docs/
# and http://varnish-cache.org/trac/wiki/VCLExamples for more examples.

# Update for work with Varnish 4


# Marker to tell the VCL compiler that this VCL has been adapted to the
# new 4.0 format.
vcl 4.0;

# Default backend definition. Set this to point to your content server.
backend default {
    .host = "MI_IP_SERVER_APACHE";
    .port = "8080";
    .connect_timeout = 600s;
    .first_byte_timeout = 600s;
    .between_bytes_timeout = 600s;
    .max_connections = 5000;
}

# Only allow purging from specific IPs
acl purge {
    "localhost";
    "127.0.0.1";
}

# Traffico sviluppo nocache
acl passem {
	"MY_DEV/27";
}

# This function is used when a request is send by a HTTP client (Browser) 
sub vcl_recv {

	# Normalize the header, remove the port (in case you're testing this on various TCP ports)
	set req.http.Host = regsub(req.http.Host, ":[0-9]+", "");

	# per logs
	if (req.restarts == 0) {
		if (req.http.X-Forwarded-For) {
			set req.http.X-Forwarded-For = req.http.X-Forwarded-For + ", " + client.ip;
		} else {
			set req.http.X-Forwarded-For = client.ip;
		}
	}
	#remove req.http.X-Forwarded-For;
	#set req.http.X-Forwarded-For = client.ip;

        if (req.http.Host ~ "beta.mydomain1.tld") {
                return (pass);
        }
        if (req.http.Host ~ "beta.mydomain2.tld") {
                return (pass);
        }

	# Traffico sviluppo nocache
	if (client.ip ~ passem) {
		return (pass);
	}
        if (req.url ~ "\?gbjson$") {
                return (pass);
        }



	# Allow purging from ACL
	if (req.method == "PURGE") {
		# If not allowed then a error 405 is returned
		if (!client.ip ~ purge) {
			return(synth(405, "This IP is not allowed to send PURGE requests."));
		}	
		# If allowed, do a cache_lookup -> vlc_hit() or vlc_miss()
		return (purge);
	}

	# Post requests will not be cached
	if (req.http.Authorization || req.method == "POST") {
		return (pass);
	}

	if (req.url ~ "\.(css)$") {
		return(pass);
	}
	if (req.url ~ "\.(xml)$") {
		return(pass);
	}
	# --- Wordpress specific configuration
	
	# Did not cache the RSS feed
	if (req.url ~ "/feed" || req.url ~ "/?feed=sitemap-news") {
		return (pass);
	}

	# Blitz hack
        if (req.url ~ "/mu-.*") {
                return (pass);
        }

	
	# Did not cache the admin and login pages
	if (req.url ~ "/wp-(login|admin)") {
		return (pass);
	}
	
	 # Do not cache the WooCommerce pages
	 ### REMOVE IT IF YOU DO NOT USE WOOCOMMERCE ###
	if (req.url ~ "/(cart|my-account|checkout|addons|/?add-to-cart=)") {
        	return (pass);
    	}

	# Remove the "has_js" cookie
	set req.http.Cookie = regsuball(req.http.Cookie, "has_js=[^;]+(; )?", "");

	# Remove any Google Analytics based cookies
	set req.http.Cookie = regsuball(req.http.Cookie, "__utm.=[^;]+(; )?", "");

	# Remove the Quant Capital cookies (added by some plugin, all __qca)
	set req.http.Cookie = regsuball(req.http.Cookie, "__qc.=[^;]+(; )?", "");

	# Remove the wp-settings-1 cookie
	set req.http.Cookie = regsuball(req.http.Cookie, "wp-settings-1=[^;]+(; )?", "");

	# Remove the wp-settings-time-1 cookie
	set req.http.Cookie = regsuball(req.http.Cookie, "wp-settings-time-1=[^;]+(; )?", "");

	# Remove the wp test cookie
	set req.http.Cookie = regsuball(req.http.Cookie, "wordpress_test_cookie=[^;]+(; )?", "");

	# Are there cookies left with only spaces or that are empty?
	if (req.http.cookie ~ "^ *$") {
		    unset req.http.cookie;
	}
	
	# Cache the following files extensions 
	if (req.url ~ "\.(css|js|png|gif|jp(e)?g|swf|ico)") {
		unset req.http.cookie;
	}

	# Normalize Accept-Encoding header and compression
	# https://www.varnish-cache.org/docs/3.0/tutorial/vary.html
	if (req.http.Accept-Encoding) {
		# Do no compress compressed files...
		if (req.url ~ "\.(jpg|png|gif|gz|tgz|bz2|tbz|mp3|ogg)$") {
			   	unset req.http.Accept-Encoding;
		} elsif (req.http.Accept-Encoding ~ "gzip") {
		    	set req.http.Accept-Encoding = "gzip";
		} elsif (req.http.Accept-Encoding ~ "deflate") {
		    	set req.http.Accept-Encoding = "deflate";
		} else {
			unset req.http.Accept-Encoding;
		}
	}

	# Check the cookies for wordpress-specific items
	if (req.http.Cookie ~ "wordpress_" || req.http.Cookie ~ "comment_") {
		return (pass);
	}
	if (!req.http.cookie) {
		unset req.http.cookie;
	}
	
	# --- End of Wordpress specific configuration

	# Did not cache HTTP authentication and HTTP Cookie
	if (req.http.Authorization || req.http.Cookie) {
		# Not cacheable by default
		return (pass);
	}

	# Cache all others requests
	return (hash);
}
 
sub vcl_pipe {
	set bereq.http.connection = "close";
	return (pipe);
}
 
sub vcl_pass {
	return (fetch);
}
 
# The data on which the hashing will take place
sub vcl_hash {
 	hash_data(req.url);
 	if (req.http.host) {
     	hash_data(req.http.host);
 	} else {
     	hash_data(server.ip);
 	}

	# If the client supports compression, keep that in a different cache
    	if (req.http.Accept-Encoding) {
        	hash_data(req.http.Accept-Encoding);
	}
     
	return (lookup);
}
 
# This function is used when a request is sent by our backend (Nginx server)
sub vcl_backend_response {
	# Remove some headers we never want to see
	unset beresp.http.Server;
	unset beresp.http.X-Powered-By;

	# For static content strip all backend cookies
	if (bereq.url ~ "\.(css|js|png|gif|jp(e?)g)|swf|ico") {
		unset beresp.http.cookie;
	}

	# Only allow cookies to be set if we're in admin area
	if (beresp.http.Set-Cookie && bereq.url !~ "^/wp-(login|admin)") {
        	unset beresp.http.Set-Cookie;
    	}

	# don't cache response to posted requests or those with basic auth
	if ( bereq.method == "POST" || bereq.http.Authorization ) {
        	set beresp.uncacheable = true;
		set beresp.ttl = 120s;
		return (deliver);
    	}
 
    	# don't cache search results
	if ( bereq.url ~ "\?s=" ){
		set beresp.uncacheable = true;
                set beresp.ttl = 120s;
                return (deliver);
	}
    
	# only cache status ok
	if ( beresp.status != 200 ) {
		set beresp.uncacheable = true;
                set beresp.ttl = 120s;
                return (deliver);
	}

	# A TTL of 24h
	set beresp.ttl = 1m;
	# Define the default grace period to serve cached content
	set beresp.grace = 30s;
	
	return (deliver);
}
 
# The routine when we deliver the HTTP request to the user
# Last chance to modify headers that are sent to the client
sub vcl_deliver {
	if (obj.hits > 0) { 
		set resp.http.X-Cache = "cached";
	} else {
		set resp.http.x-Cache = "uncached";
	}

	# Remove some headers: PHP version
	unset resp.http.X-Powered-By;

	# Remove some headers: Apache version & OS
	unset resp.http.Server;

	# Remove some heanders: Varnish
	#unset resp.http.Via;
	#unset resp.http.X-Varnish;

	return (deliver);
}
 
sub vcl_init {
 	return (ok);
}
 
sub vcl_fini {
 	return (ok);
}



More information about the varnish-misc mailing list