Is anyone using ESI with a lot of traffic?
John Adams
jna at twitter.com
Mon Mar 2 23:39:42 CET 2009
We fix this by front-ending varnish with apache. Not the best solution
but we still get to compress.
-j
On Mar 2, 2009, at 1:48 PM, Cloude Porteus wrote:
> Artur,
> What is the issue with ESI & gzip?
>
> Does this mean that if we want to use ESI, we can't gzip the pages
> that have ESI includes? But we could still gzip the pages that are
> included by ESI.
>
> thanks,
> cloude
>
> On Mon, Mar 2, 2009 at 1:40 PM, Artur Bergman <sky at crucially.net>
> wrote:
>>
>> On Feb 27, 2009, at 2:24 PM, John Adams wrote:
>>
>>> cc'ing the varnish dev list for comments...
>>>
>>> On Feb 27, 2009, at 1:33 PM, Cloude Porteus wrote:
>>>
>>>> John,
>>>> Goodto hear from you. You must be slammed at Twitter. I'm happy to
>>>> hear that ESI is holding up for you. It's been in my backlog
>>>> since you
>>>> mentioned it to me pre-Twitter.
>>>>
>>>> Any performance info would be great.
>>>>
>>>
>>> Any comments on our setup are welcome. You may also choose to call
>>> us
>>> crazypants. Many, many thanks to Artur Bergman of Wikia for
>>> helping us get
>>> this configuration straightened out.
>>>
>>
>> Thanks John :)
>>
>> I'll describe the settings we use. (We don't use ESI because of gzip)
>>
>> The first important step is that we put the shmlog on tmpfs
>>
>> tmpfs /usr/var/varnish/ tmpfs noatime,defaults,size=150M
>> 0 0
>> /dev/md0 /var/lib/varnish ext2
>> noatime,nodiratime,norelatime 0
>> 0
>>
>> Notice also ext2 we don't care about journaling. (Ignore the broken
>> paths)
>>
>> This is because linux will asynchronously write the log to disk,
>> this puts a
>> large io pressure on the system (interfering with your normal reads
>> if you
>> use the same disks) It also scales the IO load with traffic and not
>> working
>> set.
>>
>> # Maximum number of open files (for ulimit -n)
>> NFILES=131072
>>
>> # Locked shared memory (for ulimit -l)
>> # Default log size is 82MB + header
>> MEMLOCK=90000
>>
>> DAEMON_COREFILE_LIMIT="unlimited"
>>
>>
>> DAEMON_OPTS="-a :80 \
>> -T localhost:6082 \
>> -f /etc/varnish/wikia.vcl \
>> -p obj_workspace=4096 \
>> # We have lots of objects
>> -p sess_workspace=32768 \
>> # Need lots of sessoin space
>> -p listen_depth=8192 \
>> -p ping_interval=1 \
>> -s file,/var/lib/varnish/mmap,120G \
>> # lots of mmap
>> -p log_hashstring=off \
>> -h classic,250007 \
>> # 2.5 mmilion objects
>> -p thread_pool_max=4000 \
>> -p lru_interval=60 \
>> -p esi_syntax=0x00000003 \
>> -p sess_timeout=10 \
>> -p thread_pools=4 \
>> -p thread_pool_min=500 \
>> # we force 4000 threads pre-created
>> # otherwise we run into overflows
>> -p shm_workspace=32768 \
>> # avoid shm_mtx
>> -p srcaddr_ttl=0"
>> # avoid hash lookup
>>
>> # we link geoip into the vcl
>> CC_COMMAND='cc_command=exec cc -fpic -shared -Wl,-x -L/usr/local/lib/
>> -lGeoIP -o %o %s'
>>
>> #### VCL
>>
>> # declare the function signature
>> # so we can use them
>> C{
>> #include <string.h>
>> double TIM_real(void);
>> void TIM_format(double t, char *p);
>> }C
>>
>>
>>
>> # init GeoIP code
>> C{
>> #include <dlfcn.h>
>> #include <stdlib.h>
>> #include <stdio.h>
>> #include <string.h>
>> #include <GeoIPCity.h>
>> #include <pthread.h>
>>
>> pthread_mutex_t geoip_mutex = PTHREAD_MUTEX_INITIALIZER;
>>
>> GeoIP* gi;
>> void geo_init () {
>> if(!gi) {
>> gi =
>> GeoIP_open_type(GEOIP_CITY_EDITION_REV1,GEOIP_MEMORY_CACHE);
>> }
>> }
>> }C
>>
>> vcl_recv {
>>
>> set req.url = regsub(req.url, "http://[^/]*","");
>> #will normalize proxied requests, specificl curl -x foo:80
>>
>> # get out error handler for geoiplookup
>> if(req.http.host == "geoiplookup.wikia.com") {
>> error 200 "Ok";
>> }
>>
>> # lvs check
>> if (req.url == "/lvscheck.html") {
>> error 200 "Ok";
>> }
>>
>> # normalize Accept-Encoding to reduce vary
>> if (req.http.Accept-Encoding) {
>> if (req.http.Accept-Encoding ~ "gzip") {
>> set req.http.Accept-Encoding = "gzip";
>> } elsif (req.http.Accept-Encoding ~ "deflate") {
>> set req.http.Accept-Encoding = "deflate";
>> } else {
>> unset req.http.Accept-Encoding;
>> }
>> }
>>
>>
>> # Yahoo uses this to check for 404
>> if (req.url ~ "^/SlurpConfirm404") {
>> error 404 "Not found";
>> }
>>
>> set req.grace = 360000s; #if the backend is down, just serve
>>
>>
>> # check for specific cookies, otherwise nuke them
>> # save them so we can re-inject them later in pipe or miss
>> set req.http.X-Orig-Cookie = req.http.Cookie;
>> if(req.http.Cookie ~ "(session|UserID|UserName|Token|LoggedOut)") {
>> # dont do anything, the user is logged in
>> } else {
>> # dont care about any other cookies
>> unset req.http.Cookie;
>> }
>>
>>
>> }
>>
>> # varnish XFF is broken, it doesn't chain them
>> # if you have chained varnishes, or trust AOL, you need to append
>> them
>> sub vcl_pipe {
>> # do the right XFF processing
>> set bereq.http.X-Forwarded-For = req.http.X-Forwarded-For;
>> set bereq.http.X-Forwarded-For = regsub(bereq.http.X-Forwarded-
>> For, "$", ",
>> ");
>> set bereq.http.X-Forwarded-For = regsub(bereq.http.X-Forwarded-
>> For, "$",
>> client.ip);
>> set bereq.http.Cookie = req.http.X-Orig-Cookie;
>> }
>>
>>
>> # this implements purging (we purge all 3 versions of the accept-
>> encoding,
>> none,gzip,deflate)
>> sub vcl_hit {
>> if (req.request == "PURGE") {
>> set obj.ttl = 0s;
>> error 200 "Purged.";
>> }
>> }
>>
>> sub vcl_miss {
>>
>> if (req.request == "PURGE") {
>> error 404 "Not purged";
>> }
>>
>> set bereq.http.X-Forwarded-For = req.http.X-Forwarded-For;
>> set bereq.http.X-Forwarded-For = regsub(bereq.http.X-Forwarded-
>> For, "$", ",
>> ");
>> set bereq.http.X-Forwarded-For = regsub(bereq.http.X-Forwarded-
>> For, "$",
>> client.ip);
>> }
>>
>>
>> # this marks if something is cacheable or not, if it isn't
>> # say why
>> vcl_fetch {
>> # so we have access to this in deliver
>> set obj.http.X-Orighost = req.http.host;
>> set obj.http.X-Origurl = req.url;
>> if (!obj.cacheable) {
>> set obj.http.X-Cacheable = "NO:Not-Cacheable";
>> pass;
>> }
>> if (obj.http.Cache-Control ~ "private") {
>> if(req.http.Cookie ~"(UserID|_session)") {
>> set obj.http.X-Cacheable = "NO:Got Session";
>> } else {
>> set obj.http.X-Cacheable =
>> "NO:Cache-Control=private";
>> }
>> pass;
>> }
>> if (obj.http.Set-Cookie ~ "(UserID|_session)") {
>> set obj.http.X-Cacheable = "NO:Set-Cookie";
>> pass;
>> }
>>
>> set obj.http.X-Cacheable = "YES";
>> set obj.grace = 360000s;
>>
>>
>> }
>>
>>
>> #Following sets X-Served-By, if it is already set it appends it
>> # it also says if it is a HIT, and how many hits
>>
>> sub vcl_deliver {
>>
>> #add or append Served By
>> if(!resp.http.X-Served-By) {
>> set resp.http.X-Served-By = "varnish8";
>> if (obj.hits > 0) {
>> set resp.http.X-Cache = "HIT";
>> } else {
>> set resp.http.X-Cache = "MISS";
>> }
>> set resp.http.X-Cache-Hits = obj.hits;
>> } else {
>> # append current data
>> set resp.http.X-Served-By = regsub(resp.http.X-Served-By, "$", ",
>> varnish8");
>> if (obj.hits > 0) {
>> set resp.http.X-Cache = regsub(resp.http.X-Cache, "$", ", HIT");
>> } else {
>> set resp.http.X-Cache = regsub(resp.http.X-Cache, "$" , ",
>> MISS");
>> }
>> set resp.http.X-Cache-Hits = regsub(resp.http.X-Cache-Hits, "$",
>> ", ");
>> set resp.http.X-Cache-Hits = regsub(resp.http.X-Cache-Hits, "$",
>> obj.hits);
>> }
>>
>> #
>>
>> # if the client is another DC, just remove stuff and deliver
>> if ( client.ip ~ LON
>> || client.ip ~ SJC
>> || client.ip ~ IOWA
>> ) {
>> unset resp.http.X-CPU-Time;
>> unset resp.http.X-Real-Time;
>> unset resp.http.X-Served-By-Backend;
>> unset resp.http.X-User-Id;
>> unset resp.http.X-Namespace-Number;
>> unset resp.http.X-Orighost;
>> unset resp.http.X-Origurl;
>> deliver;
>> }
>> # else do cache-control
>> # nuke the headers since they were generally meant for varnish
>> # these rules are mostly based on mediawiki rules
>> if ( resp.http.X-Pass-Cache-Control ) {
>> set resp.http.Cache-Control = resp.http.X-Pass-Cache-Control;
>> } elsif ( resp.status == 304 ) {
>> # no headers on if-modified since
>> } elsif ( resp.http.X-Origurl ~ ".*/index\.php.*(css|js)"
>> || resp.http.X-Origurl ~ "raw") {
>> # dont touch it let mediawiki decide
>> } elsif (resp.http.X-Orighost ~ "images.wikia.com") {
>> # lighttpd knows what it is doing
>> } elsif (resp.http.X-Orighost ~ "geoiplookup") {
>> } else {
>> #follow squid content here
>> set resp.http.Cache-Control = "private, s-maxage=0, max-age=0,
>> must-revalidate";
>> }
>>
>> # this will calculate an Expire headers which is based on now+max-age
>> # if you cache the Expire header, then it won't match max-age since
>> it is
>> static
>> if (!resp.status == 304) {
>> C{
>> char *cache = VRT_GetHdr(sp, HDR_REQ, "\016cache-control:");
>> char date[40];
>> int max_age;
>> int want_equals = 0;
>> if(cache) {
>> while(*cache != '\0') {
>> if (want_equals && *cache == '=') {
>> cache++;
>> max_age = strtoul(cache, 0, 0);
>> break;
>> }
>>
>> if (*cache == 'm' && !memcmp(cache, "max-age", 7)) {
>> cache += 7;
>> want_equals = 1;
>> continue;
>> }
>> cache++;
>> }
>> if (max_age) {
>> TIM_format(TIM_real() + max_age, date);
>> VRT_SetHdr(sp, HDR_RESP, "\010Expires:", date,
>> vrt_magic_string_end);
>> }
>> }
>> }C
>> #;
>> }
>>
>> }
>>
>>
>> vcl_error {
>> # this implements geoip lookups inside varnish
>> # so clients can get the data without hitting the backend
>> if(req.http.host == "geoiplookup.wikia.com" || req.url ==
>> "/__varnish/geoip") {
>> set obj.http.Content-Type = "text/plain";
>> set obj.http.cache-control = "private, s-maxage=0, max-age=360";
>> set obj.http.X-Orighost = req.http.host;
>> C{
>> char *ip = VRT_IP_string(sp, VRT_r_client_ip(sp));
>> char date[40];
>> char json[255];
>>
>> pthread_mutex_lock(&geoip_mutex);
>>
>> if(!gi) { geo_init(); }
>>
>> GeoIPRecord *record = GeoIP_record_by_addr(gi, ip);
>> if(record) {
>> snprintf(json, 255, "Geo =
>> {\"city\":\"%s\",\"country\":\"%s\",\"lat\":\"%f\",\"lon\":\"%f\",
>> \"classC\":\"%s\",\"netmask\":\"%d\"}",
>> record->city,
>> record->country_code,
>> record->latitude,
>> record->longitude,
>> ip,
>> GeoIP_last_netmask(gi)
>> );
>> pthread_mutex_unlock(&geoip_mutex);
>> VRT_synth_page(sp, 0, json, vrt_magic_string_end);
>> } else {
>> pthread_mutex_unlock(&geoip_mutex);
>> VRT_synth_page(sp, 0, "Geo = {}", vrt_magic_string_end);
>> }
>>
>>
>> TIM_format(TIM_real(), date);
>> VRT_SetHdr(sp, HDR_OBJ, "\016Last-Modified:", date,
>> vrt_magic_string_end);
>> }C
>> # check if site is working
>> if(req.url ~ "lvscheck.html") {
>> synthetic {"varnish is okay"};
>> deliver;
>> }
>>
>> deliver;
>>
>> }
>>
>>
>> #############
>>
>> sysctl
>>
>> net.ipv4.ip_local_port_range = 1024 65536
>> net.core.rmem_max=16777216
>> net.core.wmem_max=16777216
>> net.ipv4.tcp_rmem=4096 87380 16777216
>> net.ipv4.tcp_wmem=4096 65536 16777216
>> net.ipv4.tcp_fin_timeout = 3
>> net.ipv4.tcp_tw_recycle = 1
>> net.core.netdev_max_backlog = 30000
>> net.ipv4.tcp_no_metrics_save=1
>> net.core.somaxconn = 262144
>> net.ipv4.tcp_syncookies = 0
>> net.ipv4.tcp_max_orphans = 262144
>> net.ipv4.tcp_max_syn_backlog = 262144
>> net.ipv4.tcp_synack_retries = 2
>> net.ipv4.tcp_syn_retries = 2
>>
>> These are mostly cargo culted from previous emails here.
>>
>> Cheers
>> Artur
>>
>
>
>
> --
> VP of Product Development
> Instructables.com
>
> http://www.instructables.com/member/lebowski
---
John Adams
Twitter Operations
jna at twitter.com
http://twitter.com/netik
More information about the varnish-dev
mailing list