[master] 3d0a72c86 varnishstatdiff: New utility to compare metrics
Dridi Boukelmoune
dridi.boukelmoune at gmail.com
Tue Aug 30 15:18:06 UTC 2022
commit 3d0a72c86a8036544d4bde2d846e355377036a9c
Author: Dridi Boukelmoune <dridi.boukelmoune at gmail.com>
Date: Mon Jul 11 12:09:15 2022 +0200
varnishstatdiff: New utility to compare metrics
As I was comparing the output of two varnishstat executions that were
captured after a fresh start of varnishd followed by the workloads to
compare, I realized diff(1) was giving me a hard time, and git-diff(1)
barely improved the situation.
Looking for generic command line utilities to compare metrics I wasn't
able to find anything. So instead I came up with the output format I
thought would help me spot interesting differences and came up with a
format inspired by the unified diff, with a twist. I wanted metrics to
be vertically aligned to easily see differences in orders of magnitude
and reduce the noise to a minimum.
The result is that taking this detour to script varnishstatdiff sped
my research up ultimately.
This should hopefully be portable to POSIX systems.
diff --git a/configure.ac b/configure.ac
index 57f444a92..a76725e30 100644
--- a/configure.ac
+++ b/configure.ac
@@ -932,6 +932,10 @@ AC_ARG_WITH([contrib],
AM_CONDITIONAL([WITH_CONTRIB], [test "$with_contrib" = yes])
AM_COND_IF([WITH_CONTRIB], [
+ CONTRIB_TESTS="$(cd $srcdir/contrib && echo tests/*.vtc)"
+ AC_SUBST(CONTRIB_TESTS)
+ AM_SUBST_NOTMAKE(CONTRIB_TESTS)
+
AC_DEFINE([WITH_CONTRIB], [1],
[Define to 1 when Varnish is built with contributions.])
diff --git a/contrib/Makefile.am b/contrib/Makefile.am
index 2c2a87cbf..54482b52b 100644
--- a/contrib/Makefile.am
+++ b/contrib/Makefile.am
@@ -1,3 +1,10 @@
#
+dist_bin_SCRIPTS = \
+ varnishstatdiff
+TESTS = @CONTRIB_TESTS@
+
+include $(top_srcdir)/vtc.am
+
+EXTRA_DIST = $(TESTS)
diff --git a/contrib/tests/statdiff_b00000.vtc b/contrib/tests/statdiff_b00000.vtc
new file mode 100644
index 000000000..c99edfece
--- /dev/null
+++ b/contrib/tests/statdiff_b00000.vtc
@@ -0,0 +1,54 @@
+varnishtest "varnishstatdiff coverage"
+
+feature cmd "command -v column"
+feature cmd "command -v diff"
+
+server s1 {
+ rxreq
+ txresp
+} -start
+
+varnish v1 -vcl+backend "" -start
+
+shell {
+ varnishstat -n ${v1_name} -1 \
+ -I MAIN.n_object -I MAIN.cache_* -I MAIN.client_req |
+ tee stat1.txt
+}
+
+client c1 {
+ txreq
+ rxresp
+} -start
+
+varnish v1 -vsl_catchup
+
+shell {
+ varnishstat -n ${v1_name} -1 \
+ -I MAIN.n_object -I MAIN.cache_* -I MAIN.esi_req |
+ tee stat2.txt
+}
+
+shell -expect Usage: {varnishstatdiff -h}
+shell -expect "Error: not enough arguments" -err {varnishstatdiff}
+shell -expect "Error: not enough arguments" -err {varnishstatdiff a}
+shell -expect "Error: too many arguments" -err {varnishstatdiff a b c}
+
+shell {
+ varnishstatdiff stat1.txt stat2.txt | tee diff.txt
+}
+
+shell {
+ sed 's/@/ /' >expected.txt <<-EOF
+ --- stat1.txt
+ +++ stat2.txt
+ @MAIN.cache_miss -0 -0.00 Cache misses
+ @ +1 +0.00
+ -MAIN.client_req 0 0.00 Good client requests received
+ +MAIN.esi_req 0 0.00 ESI subrequests
+ @MAIN.n_object -0 . object structs made
+ @ +1 .
+ EOF
+
+ diff -u expected.txt diff.txt
+}
diff --git a/contrib/varnishstatdiff b/contrib/varnishstatdiff
new file mode 100755
index 000000000..8ff28d86d
--- /dev/null
+++ b/contrib/varnishstatdiff
@@ -0,0 +1,175 @@
+#!/bin/sh
+#
+# Copyright (c) 2022 Varnish Software AS
+# All rights reserved.
+#
+# Author: Dridi Boukelmoune <dridi.boukelmoune at gmail.com>
+#
+# SPDX-License-Identifier: BSD-2-Clause
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+set -e
+set -u
+
+readonly SCRIPT=$0
+readonly TMP=$(mktemp -d)
+trap 'rm -rf $TMP' EXIT
+
+usage() {
+ test $# -eq 1 &&
+ printf 'Error: %s.\n\n' "$1"
+
+ sed 's:@: :' <<-EOF
+ Usage: $SCRIPT <file1> <file2>
+ $SCRIPT -h
+
+ Show the differences between two sets of varnish metrics extracted
+ with 'varnishstat -1'.
+
+ Available options:
+ -h : show this help and exit
+
+ Considering the following metrics in <file1>:
+
+ FOO.counter 123 12 Only in file 1
+ BAR.counter 456 45 Counter present in both files
+ BAR.gauge 999 . Gauge present in both files
+
+ And the following metrics in <file2>:
+
+ BAR.counter 789 79 Counter present in both files
+ BAR.gauge 555 . Gauge present in both files
+ BAZ.gauge 0 . Only in file 2
+
+ The output is sorted by metric name and looks like this:
+
+ --- <file1>
+ +++ <file2>
+ @BAR.counter -456 -45 Counter present in both files
+ @ +789 +79
+ @BAR.gauge -999 . Gauge present in both files
+ @ +555 .
+ +BAZ.gauge 0 . Only in file 2
+ -FOO.counter 123 12 Only in file 1
+
+ The output looks like a unified diff except that when metrics are
+ present in both files, the diff is rendered as such only in the
+ metrics columns.
+ EOF
+ exit $#
+}
+
+join_prepare() {
+ # NB: the metrics need to be sorted to later be joined, and since
+ # the metrics descriptions contain spaces, using a delimiter other
+ # than space solves the problem. Hopefully @ never shows up in the
+ # varnishstat -1 output.
+ sort -k 1b,1 "$1" |
+ sed 's: *:@: ; s::@: ; s::@:'
+}
+
+join_render() {
+ # The resulting columns are:
+ # 1: metric name
+ # 2: value in file 1
+ # 3: rate in file 1
+ # 4: value in file 2
+ # 5: rate in file 2
+ # 6: description in file 1
+ # 7: description in file 2
+ join -a1 -a2 -t@ -o '0 1.2 1.3 2.2 2.3 1.4 2.4' -- "$1" "$2"
+}
+
+diff_preamble() {
+ printf "%s %s\n+++ %s\n" --- "$1" "$2"
+}
+
+diff_measure() {
+ awk -F@ '
+ BEGIN {
+ max[1] = 0
+ max[2] = 0
+ max[3] = 0
+ max[4] = 0
+ max[5] = 0
+ }
+ $2 != $4 || $3 != $5 {
+ for (i in max) {
+ if (max[i] < length($i))
+ max[i] = length($i)
+ }
+ }
+ END {
+ if (max[2] < max[4])
+ max[2] = max[4]
+ if (max[3] < max[5])
+ max[3] = max[5]
+ printf "%d %d %d\n", max[1] + 2, max[2] + 2, max[3] + 2
+ }
+ '
+}
+
+diff_render() {
+ read l1 l2 l3
+ awk -F@ -v l1="$l1" -v l2="$l2" -v l3="$l3" '
+ $2 != "" && $4 != "" && ($2 != $4 || $3 != $5) { # present in both
+ sgn = "-"
+ if ($3 == ".")
+ sgn = " "
+ printf " %-*s-%-*s%s%-*s%s\n", l1, $1, l2, $2, sgn, l3, $3, $6
+
+ sgn = "+"
+ if ($5 == ".")
+ sgn = " "
+ printf " %-*s+%-*s%s%s\n", l1, "", l2, $4, sgn, $5
+ }
+ $2 != "" && $4 == "" { # only in file 1
+ printf "-%-*s %-*s %-*s%s\n", l1, $1, l2, $2, l3, $3, $6
+ }
+ $2 == "" && $4 != "" { # only in file 2
+ printf "+%-*s %-*s %-*s%s\n", l1, $1, l2, $4, l3, $5, $7
+ }
+ ' <"$1"
+}
+
+while getopts h OPT
+do
+ case $OPT in
+ h) usage ;;
+ *) usage "wrong usage" >&2 ;;
+ esac
+done
+
+shift $((OPTIND - 1))
+
+test $# -lt 2 && usage "not enough arguments" >&2
+test $# -gt 2 && usage "too many arguments" >&2
+
+export LC_ALL=C.utf-8
+
+join_prepare "$1" >"$TMP"/1
+join_prepare "$2" >"$TMP"/2
+join_render "$TMP"/1 "$TMP"/2 >"$TMP"/join
+diff_preamble "$1" "$2"
+diff_measure <"$TMP"/join |
+diff_render "$TMP"/join
More information about the varnish-commit
mailing list