Default VCL for MediaWiki
Forest
forest at tmswiki.net
Mon Jan 2 20:30:27 CET 2012
Hello.
I recently added Varnish to my MediaWiki wiki and was amazed by Varnish's amazing performance. Many thanks to everyone who helps make this
incredible software available to the community.
Mediawiki, the software that powers Wikipedia, has a manual page about using
MediaWiki with Varnish, but it needs to be updated to reflect both the
changes to VCL in version 3 and some updates to Mediawiki itself. I'm
planning on writing this update and am requesting a review of the
example VCL code I'll be including in the MediaWiki manual.
I am in no way a Varnish or HTTP expert (in fact, I'm just a graduate
student in economics who is passionate about supporting his wiki and
free culture in general), so I would be very grateful for any feedback
on the example code. My hope is to spur adoption of Varnish within the
Mediawiki community. Since many Mediawiki installations are done
by people with little or no experience with HTTP, so I think that
adoption requires a good manual page.
Some notes:
* When Mediawiki's built in support for http
accelerators is enabled, if a page or image is changed on the wiki, MediaWiki will send
notification to every registered Varnish or Squid server, telling it to
discard the outdated stored page. The specific notification
protocol was designed for Squid and involves HTTP requests with a PURGE
method. Also, with support enabled, changes made to the wiki by
anonymous users will be attributed to their IP address, found in
'x-forwarded-for,' rather than to the requesting cache's IP address.
* The current manual page, designed for Varnish 2.x is http://www.mediawiki.org/wiki/Manual:Varnish_caching . I've brought the issue with the Vector skin up on the Mediawiki-L list, and Mediawiki's Erik Moeller pointed me to Wikia's VCL script, from which I adapted the vcl_recv code for dealing with cookies.
* Finally, the example code is still a bit rough and I plan on cleaning
it up a bit when writing the documentation. Knowing whether I'm on the
right track would be very helpful, though, as my own wiki will be going
live with the code in less than a week. :)
Thank you.
Forest
# set default backend if no server cluster specified
backend default {
.host = "localhost";
.port = "8080";
# .port = "80"; also works well, but using 8080 allows direct access to Apache for debugging purposes.
}
# access control list for "purge": open to only localhost and other local nodes
acl purge {
"localhost";
}
#
The default code for vcl_recv is incorporated into the following
subroutine to make it easier to specify the proper order of execution.
sub vcl_recv
{
set req.backend = default;
# Serve objects up to 2 minutes past their expiry if the backend is slow to respond.
# Not relevant to low traffic wikis.
set req.grace = 120s;
if (req.restarts == 0) {
if (req.http.x-forwarded-for) {
set req.http.X-Forwarded-For =
req.http.X-Forwarded-For + ", " + client.ip;
} else {
set req.http.X-Forwarded-For = client.ip;
}
}
# This uses the ACL action called "purge". Basically if a request to
# PURGE the cache comes from anywhere other than
localhost, ignore it.
if (req.request == "PURGE") {
if (!client.ip ~ purge) {
error 405 "Not allowed.";
}
return(lookup);
}
if (req.request != "GET" &&
req.request != "HEAD" &&
req.request != "PUT" &&
req.request != "POST" &&
req.request != "TRACE" &&
req.request != "OPTIONS" &&
req.request != "DELETE") {
# Non-RFC2616 or CONNECT which is weird.
return (pipe);
}
if (req.request != "GET" && req.request != "HEAD") {
# We only deal with GET and HEAD by default
return (pass);
}
# Replace "/wiki/ with the path to your MediaWiki installation.
# CONCERN: is the following line robust?
if(req.url ~ "^/wiki/"){
if(req.http.Cookie ~ "(session|UserID|UserName|Token|LoggedOut)") {
# dont do anything, the user is logged in
} else {
# dont care about any other cookies
unset req.http.Cookie;
}
}
if (req.http.Authorization || req.http.Cookie) {
/* Not cacheable by default */
return (pass);
}
# Legacy: (I've marked some commented out code as Legacy. This is code that was
found in the existing manual page, but which I'm planning on dropping in the new version. If you think it's worth keeping, please let me know.)
# if (req.http.If-None-Match)
# {return(pass);}
# Force lookup if the request is a no-cache request from the client.
if (req.http.Cache-Control ~ "no-cache") {
set req.hash_always_miss = true;
# https://www.varnish-cache.org/trac/wiki/VCLExampleEnableForceRefresh
# ban_url(req.url);
}
return (lookup);
}
sub vcl_pipe {
# This is otherwise not necessary if you do not do any request rewriting.
set bereq.http.connection = "close";
}
sub vcl_hit {
if (req.request == "PURGE") {
purge;
error 200 "Purged";
}
# Legacy:
# if (!obj.cacheable) {
# return(pass);
# }
}
sub vcl_miss {
if (req.request == "PURGE") {
error 200 "Not in cache";
}
}
sub vcl_fetch {
# For debugging only. Varnish's internal Time To Live for cached object
set beresp.http.X-orig-ttl = beresp.ttl;
# I think the following is redundant because caches aren't allowed to change Cache Control headers
set beresp.http.X-Orig-Cache-Control = beresp.http.Cache-Control;
# set minimum timeouts to auto-discard stored objects
# set beresp.prefetch =
-30s;
set beresp.grace = 120s;
# Legacy:
# if (beresp.http.Cache-Control ~ "(private|no-cache|no-store)") {
# return(hit_for_pass);
# }
# Legacy:
# if (req.http.Authorization && !beresp.http.Cache-Control ~ "public") {
# return(hit_for_pass);
# }
}
sub vcl_deliver {
# For debugging only.
# The approximate number of times the object has been delivered. A value of 0 indicates a cache miss.
set resp.http.X-obj-hits = obj.hits;
}
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://www.varnish-cache.org/lists/pipermail/varnish-misc/attachments/20120102/c01497bd/attachment.html>
More information about the varnish-misc
mailing list