Is anyone using ESI with a lot of traffic?
sky at crucially.net
sky at crucially.net
Mon Mar 2 23:22:55 CET 2009
Unless you have something in front of varnish you can gzip. Varnish doesn't understand it. Unless I guess you split it up from the server and not the esi statement
Sent via BlackBerry by AT&T
-----Original Message-----
From: Cloude Porteus <cloude at instructables.com>
Date: Mon, 2 Mar 2009 13:48:23
To: Artur Bergman<sky at crucially.net>
Cc: John Adams<jna at twitter.com>; <varnish-dev at projects.linpro.no>
Subject: Re: Is anyone using ESI with a lot of traffic?
Artur,
What is the issue with ESI & gzip?
Does this mean that if we want to use ESI, we can't gzip the pages
that have ESI includes? But we could still gzip the pages that are
included by ESI.
thanks,
cloude
On Mon, Mar 2, 2009 at 1:40 PM, Artur Bergman <sky at crucially.net> wrote:
>
> On Feb 27, 2009, at 2:24 PM, John Adams wrote:
>
>> cc'ing the varnish dev list for comments...
>>
>> On Feb 27, 2009, at 1:33 PM, Cloude Porteus wrote:
>>
>>> John,
>>> Goodto hear from you. You must be slammed at Twitter. I'm happy to
>>> hear that ESI is holding up for you. It's been in my backlog since you
>>> mentioned it to me pre-Twitter.
>>>
>>> Any performance info would be great.
>>>
>>
>> Any comments on our setup are welcome. You may also choose to call us
>> crazypants. Many, many thanks to Artur Bergman of Wikia for helping us get
>> this configuration straightened out.
>>
>
> Thanks John :)
>
> I'll describe the settings we use. (We don't use ESI because of gzip)
>
> The first important step is that we put the shmlog on tmpfs
>
> tmpfs /usr/var/varnish/ tmpfs noatime,defaults,size=150M 0 0
> /dev/md0 /var/lib/varnish ext2 noatime,nodiratime,norelatime 0
> 0
>
> Notice also ext2 we don't care about journaling. (Ignore the broken paths)
>
> This is because linux will asynchronously write the log to disk, this puts a
> large io pressure on the system (interfering with your normal reads if you
> use the same disks) It also scales the IO load with traffic and not working
> set.
>
> # Maximum number of open files (for ulimit -n)
> NFILES=131072
>
> # Locked shared memory (for ulimit -l)
> # Default log size is 82MB + header
> MEMLOCK=90000
>
> DAEMON_COREFILE_LIMIT="unlimited"
>
>
> DAEMON_OPTS="-a :80 \
> -T localhost:6082 \
> -f /etc/varnish/wikia.vcl \
> -p obj_workspace=4096 \
> # We have lots of objects
> -p sess_workspace=32768 \
> # Need lots of sessoin space
> -p listen_depth=8192 \
> -p ping_interval=1 \
> -s file,/var/lib/varnish/mmap,120G \
> # lots of mmap
> -p log_hashstring=off \
> -h classic,250007 \
> # 2.5 mmilion objects
> -p thread_pool_max=4000 \
> -p lru_interval=60 \
> -p esi_syntax=0x00000003 \
> -p sess_timeout=10 \
> -p thread_pools=4 \
> -p thread_pool_min=500 \
> # we force 4000 threads pre-created
> # otherwise we run into overflows
> -p shm_workspace=32768 \
> # avoid shm_mtx
> -p srcaddr_ttl=0"
> # avoid hash lookup
>
> # we link geoip into the vcl
> CC_COMMAND='cc_command=exec cc -fpic -shared -Wl,-x -L/usr/local/lib/
> -lGeoIP -o %o %s'
>
> #### VCL
>
> # declare the function signature
> # so we can use them
> C{
> #include <string.h>
> double TIM_real(void);
> void TIM_format(double t, char *p);
> }C
>
>
>
> # init GeoIP code
> C{
> #include <dlfcn.h>
> #include <stdlib.h>
> #include <stdio.h>
> #include <string.h>
> #include <GeoIPCity.h>
> #include <pthread.h>
>
> pthread_mutex_t geoip_mutex = PTHREAD_MUTEX_INITIALIZER;
>
> GeoIP* gi;
> void geo_init () {
> if(!gi) {
> gi = GeoIP_open_type(GEOIP_CITY_EDITION_REV1,GEOIP_MEMORY_CACHE);
> }
> }
> }C
>
> vcl_recv {
>
> set req.url = regsub(req.url, "http://[^/]*","");
> #will normalize proxied requests, specificl curl -x foo:80
>
> # get out error handler for geoiplookup
> if(req.http.host == "geoiplookup.wikia.com") {
> error 200 "Ok";
> }
>
> # lvs check
> if (req.url == "/lvscheck.html") {
> error 200 "Ok";
> }
>
> # normalize Accept-Encoding to reduce vary
> if (req.http.Accept-Encoding) {
> if (req.http.Accept-Encoding ~ "gzip") {
> set req.http.Accept-Encoding = "gzip";
> } elsif (req.http.Accept-Encoding ~ "deflate") {
> set req.http.Accept-Encoding = "deflate";
> } else {
> unset req.http.Accept-Encoding;
> }
> }
>
>
> # Yahoo uses this to check for 404
> if (req.url ~ "^/SlurpConfirm404") {
> error 404 "Not found";
> }
>
> set req.grace = 360000s; #if the backend is down, just serve
>
>
> # check for specific cookies, otherwise nuke them
> # save them so we can re-inject them later in pipe or miss
> set req.http.X-Orig-Cookie = req.http.Cookie;
> if(req.http.Cookie ~ "(session|UserID|UserName|Token|LoggedOut)") {
> # dont do anything, the user is logged in
> } else {
> # dont care about any other cookies
> unset req.http.Cookie;
> }
>
>
> }
>
> # varnish XFF is broken, it doesn't chain them
> # if you have chained varnishes, or trust AOL, you need to append them
> sub vcl_pipe {
> # do the right XFF processing
> set bereq.http.X-Forwarded-For = req.http.X-Forwarded-For;
> set bereq.http.X-Forwarded-For = regsub(bereq.http.X-Forwarded-For, "$", ",
> ");
> set bereq.http.X-Forwarded-For = regsub(bereq.http.X-Forwarded-For, "$",
> client.ip);
> set bereq.http.Cookie = req.http.X-Orig-Cookie;
> }
>
>
> # this implements purging (we purge all 3 versions of the accept-encoding,
> none,gzip,deflate)
> sub vcl_hit {
> if (req.request == "PURGE") {
> set obj.ttl = 0s;
> error 200 "Purged.";
> }
> }
>
> sub vcl_miss {
>
> if (req.request == "PURGE") {
> error 404 "Not purged";
> }
>
> set bereq.http.X-Forwarded-For = req.http.X-Forwarded-For;
> set bereq.http.X-Forwarded-For = regsub(bereq.http.X-Forwarded-For, "$", ",
> ");
> set bereq.http.X-Forwarded-For = regsub(bereq.http.X-Forwarded-For, "$",
> client.ip);
> }
>
>
> # this marks if something is cacheable or not, if it isn't
> # say why
> vcl_fetch {
> # so we have access to this in deliver
> set obj.http.X-Orighost = req.http.host;
> set obj.http.X-Origurl = req.url;
> if (!obj.cacheable) {
> set obj.http.X-Cacheable = "NO:Not-Cacheable";
> pass;
> }
> if (obj.http.Cache-Control ~ "private") {
> if(req.http.Cookie ~"(UserID|_session)") {
> set obj.http.X-Cacheable = "NO:Got Session";
> } else {
> set obj.http.X-Cacheable =
> "NO:Cache-Control=private";
> }
> pass;
> }
> if (obj.http.Set-Cookie ~ "(UserID|_session)") {
> set obj.http.X-Cacheable = "NO:Set-Cookie";
> pass;
> }
>
> set obj.http.X-Cacheable = "YES";
> set obj.grace = 360000s;
>
>
> }
>
>
> #Following sets X-Served-By, if it is already set it appends it
> # it also says if it is a HIT, and how many hits
>
> sub vcl_deliver {
>
> #add or append Served By
> if(!resp.http.X-Served-By) {
> set resp.http.X-Served-By = "varnish8";
> if (obj.hits > 0) {
> set resp.http.X-Cache = "HIT";
> } else {
> set resp.http.X-Cache = "MISS";
> }
> set resp.http.X-Cache-Hits = obj.hits;
> } else {
> # append current data
> set resp.http.X-Served-By = regsub(resp.http.X-Served-By, "$", ",
> varnish8");
> if (obj.hits > 0) {
> set resp.http.X-Cache = regsub(resp.http.X-Cache, "$", ", HIT");
> } else {
> set resp.http.X-Cache = regsub(resp.http.X-Cache, "$" , ", MISS");
> }
> set resp.http.X-Cache-Hits = regsub(resp.http.X-Cache-Hits, "$", ", ");
> set resp.http.X-Cache-Hits = regsub(resp.http.X-Cache-Hits, "$",
> obj.hits);
> }
>
> #
>
> # if the client is another DC, just remove stuff and deliver
> if ( client.ip ~ LON
> || client.ip ~ SJC
> || client.ip ~ IOWA
> ) {
> unset resp.http.X-CPU-Time;
> unset resp.http.X-Real-Time;
> unset resp.http.X-Served-By-Backend;
> unset resp.http.X-User-Id;
> unset resp.http.X-Namespace-Number;
> unset resp.http.X-Orighost;
> unset resp.http.X-Origurl;
> deliver;
> }
> # else do cache-control
> # nuke the headers since they were generally meant for varnish
> # these rules are mostly based on mediawiki rules
> if ( resp.http.X-Pass-Cache-Control ) {
> set resp.http.Cache-Control = resp.http.X-Pass-Cache-Control;
> } elsif ( resp.status == 304 ) {
> # no headers on if-modified since
> } elsif ( resp.http.X-Origurl ~ ".*/index\.php.*(css|js)"
> || resp.http.X-Origurl ~ "raw") {
> # dont touch it let mediawiki decide
> } elsif (resp.http.X-Orighost ~ "images.wikia.com") {
> # lighttpd knows what it is doing
> } elsif (resp.http.X-Orighost ~ "geoiplookup") {
> } else {
> #follow squid content here
> set resp.http.Cache-Control = "private, s-maxage=0, max-age=0,
> must-revalidate";
> }
>
> # this will calculate an Expire headers which is based on now+max-age
> # if you cache the Expire header, then it won't match max-age since it is
> static
> if (!resp.status == 304) {
> C{
> char *cache = VRT_GetHdr(sp, HDR_REQ, "\016cache-control:");
> char date[40];
> int max_age;
> int want_equals = 0;
> if(cache) {
> while(*cache != '\0') {
> if (want_equals && *cache == '=') {
> cache++;
> max_age = strtoul(cache, 0, 0);
> break;
> }
>
> if (*cache == 'm' && !memcmp(cache, "max-age", 7)) {
> cache += 7;
> want_equals = 1;
> continue;
> }
> cache++;
> }
> if (max_age) {
> TIM_format(TIM_real() + max_age, date);
> VRT_SetHdr(sp, HDR_RESP, "\010Expires:", date,
> vrt_magic_string_end);
> }
> }
> }C
> #;
> }
>
> }
>
>
> vcl_error {
> # this implements geoip lookups inside varnish
> # so clients can get the data without hitting the backend
> if(req.http.host == "geoiplookup.wikia.com" || req.url ==
> "/__varnish/geoip") {
> set obj.http.Content-Type = "text/plain";
> set obj.http.cache-control = "private, s-maxage=0, max-age=360";
> set obj.http.X-Orighost = req.http.host;
> C{
> char *ip = VRT_IP_string(sp, VRT_r_client_ip(sp));
> char date[40];
> char json[255];
>
> pthread_mutex_lock(&geoip_mutex);
>
> if(!gi) { geo_init(); }
>
> GeoIPRecord *record = GeoIP_record_by_addr(gi, ip);
> if(record) {
> snprintf(json, 255, "Geo =
> {\"city\":\"%s\",\"country\":\"%s\",\"lat\":\"%f\",\"lon\":\"%f\",\"classC\":\"%s\",\"netmask\":\"%d\"}",
> record->city,
> record->country_code,
> record->latitude,
> record->longitude,
> ip,
> GeoIP_last_netmask(gi)
> );
> pthread_mutex_unlock(&geoip_mutex);
> VRT_synth_page(sp, 0, json, vrt_magic_string_end);
> } else {
> pthread_mutex_unlock(&geoip_mutex);
> VRT_synth_page(sp, 0, "Geo = {}", vrt_magic_string_end);
> }
>
>
> TIM_format(TIM_real(), date);
> VRT_SetHdr(sp, HDR_OBJ, "\016Last-Modified:", date,
> vrt_magic_string_end);
> }C
> # check if site is working
> if(req.url ~ "lvscheck.html") {
> synthetic {"varnish is okay"};
> deliver;
> }
>
> deliver;
>
> }
>
>
> #############
>
> sysctl
>
> net.ipv4.ip_local_port_range = 1024 65536
> net.core.rmem_max=16777216
> net.core.wmem_max=16777216
> net.ipv4.tcp_rmem=4096 87380 16777216
> net.ipv4.tcp_wmem=4096 65536 16777216
> net.ipv4.tcp_fin_timeout = 3
> net.ipv4.tcp_tw_recycle = 1
> net.core.netdev_max_backlog = 30000
> net.ipv4.tcp_no_metrics_save=1
> net.core.somaxconn = 262144
> net.ipv4.tcp_syncookies = 0
> net.ipv4.tcp_max_orphans = 262144
> net.ipv4.tcp_max_syn_backlog = 262144
> net.ipv4.tcp_synack_retries = 2
> net.ipv4.tcp_syn_retries = 2
>
> These are mostly cargo culted from previous emails here.
>
> Cheers
> Artur
>
--
VP of Product Development
Instructables.com
http://www.instructables.com/member/lebowski
More information about the varnish-dev
mailing list