[master] bf84027da jail_linux: Add THP control

Nils Goroll nils.goroll at uplex.de
Fri Feb 14 10:27:06 UTC 2025


commit bf84027da5f54e5b85bae47b38752722ad21cb48
Author: Thibaut Artis <thibaut.artis at varnish-software.com>
Date:   Wed Sep 11 18:02:14 2024 +0200

    jail_linux: Add THP control
    
    Disabling Transparent Hugepage has often been the solution to solve
    hard-to-diagnose instability issues and despite improvements in this area
    compared to the RHEL6 era, our recommandation is still to avoid THP to this day.
    
    In addition to refreshing the documentation on this topic, we add thp control to
    the linux jail
    
    Committer edit:
    
    - Updated to master
    - Added "try-disable" option as default
    - Made "enable" and "disable" options fail
    - Ensured default gets called for -jlinux
    - Edited documentation
    - polished

diff --git a/bin/varnishd/mgt/mgt_jail_linux.c b/bin/varnishd/mgt/mgt_jail_linux.c
index b8f136020..2dd7f89a2 100644
--- a/bin/varnishd/mgt/mgt_jail_linux.c
+++ b/bin/varnishd/mgt/mgt_jail_linux.c
@@ -44,12 +44,84 @@
 #include <sys/vfs.h>
 
 #include "mgt/mgt.h"
+#include "common/heritage.h"
+
+static int
+vjl_set_thp(const char *arg, struct vsb *vsb)
+{
+	int r, val, must;
+
+	if (!strcmp(arg, "ignore"))
+		return (0);
+	must = 1;
+	if (!strcmp(arg, "enable"))
+		val = 0;
+	else if (!strcmp(arg, "disable"))
+		val = 1;
+	else if (!strcmp(arg, "try-disable")) {
+		arg = "disable";
+		val = 1;
+		must = 0;
+	}
+	else {
+		VSB_printf(vsb, "linux jail: unknown value '%s' for argument"
+		    " transparent_hugepage.", arg);
+		return (1);
+	}
+	r = prctl(PR_SET_THP_DISABLE, val, 0, 0, 0);
+	if (r) {
+		VSB_printf(vsb, "linux jail: Could not %s "
+		    "Transparent Hugepage: %s (%d)",
+		    arg, VAS_errtxt(errno), errno);
+	}
+	return (r && must);
+}
 
 static int
 vjl_init(char **args)
 {
+	struct vsb *vsb;
+	char **unix_args;
+	const char *val;
+	int seen = 0, ret = 0;
+	size_t i;
+
+	vsb = VSB_new_auto();
+	AN(vsb);
+
+	if (args == NULL) {
+		/* Autoconfig */
+		AZ(vjl_set_thp("try-disable", vsb));
+		MGT_ComplainVSB(C_INFO, vsb);
+		VSB_destroy(&vsb);
+		return (jail_tech_unix.init(NULL));
+	}
+
+	for (i = 0; args[i] != NULL; i++);
+	unix_args = calloc(i + 1, sizeof *unix_args);
+	AN(unix_args);
+
+	for (i = 0; *args != NULL && ret == 0; args++) {
+		val = keyval(*args, "transparent_hugepage=");
+		if (val == NULL) {
+			unix_args[i++] = *args;
+			continue;
+		}
+
+		ret |= vjl_set_thp(val, vsb);
+		seen++;
+	}
+
+	if (seen == 0)
+		AZ(vjl_set_thp("try-disable", vsb));
+
+	MGT_ComplainVSB(ret ? C_ERR : C_INFO, vsb);
+	VSB_destroy(&vsb);
 
-	return jail_tech_unix.init(args);
+	if (ret == 0)
+		ret = jail_tech_unix.init(unix_args);
+	free(unix_args);
+	return (ret);
 }
 
 static void
diff --git a/doc/sphinx/installation/platformnotes.rst b/doc/sphinx/installation/platformnotes.rst
index 371105ffb..3c645befc 100644
--- a/doc/sphinx/installation/platformnotes.rst
+++ b/doc/sphinx/installation/platformnotes.rst
@@ -32,6 +32,10 @@ column, no additional action is necessary.
 Otherwise, consider creating a ``tmpfs`` mountpoint at *workdir*, or configure
 *workdir* on an existing ``tmpfs``.
 
+The ``tmpfs`` for *workdir* should be mounted with Transparent Hugepage
+disabled. Consider mounting the working directory with the ``huge=never`` mount
+option if that is not the default.
+
 Note: Very valid reasons exist for *not* following this recommendation, if you
 know what you are doing.
 
@@ -55,21 +59,24 @@ See :ref:`ref-vsm` for details.
 
 .. _platform-thp:
 
-Transparent hugepages on Redhat Enterprise Linux 6
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Transparent Hugepage on Linux
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+On certain Linux distributions Transparent Hugepage (THP) kernel support is
+enabled by default. This is known to cause instabilities of Varnish.
 
-On RHEL6 Transparent Hugepage kernel support is enabled by default.
-This is known to cause sporadic crashes of Varnish.
+By default, Varnish tries to disable the THP feature, but does not fail if it
+can't. The ``linux`` :ref:`ref-varnishd-opt_j` offers to optionally enable,
+disable or ignore THP.
 
-It is recommended to disable transparent hugepages on affected
-systems. This can be done with
-``echo never > /sys/kernel/mm/redhat_transparent_hugepage/enabled``
-(runtime) or by adding "transparent_hugepage=never" to the kernel boot
-line in the "/etc/grub.conf" file (persistent).
+Alternatively, THP can be disabled system-wide. If Varnish is the only
+significant service running on this system, this can be done during runtime
+with::
 
-On Debian/Ubuntu systems running 3.2 kernels the default value is "madvise" and
-does not need to be changed.
+  echo never > /sys/kernel/mm/transparent_hugepage/enabled
 
+The setting can be also be persisted in the bootloader configuration by adding
+``transparent_hugepage=never`` to the kernel command line.
 
 OpenVZ
 ~~~~~~
diff --git a/doc/sphinx/reference/varnishd.rst b/doc/sphinx/reference/varnishd.rst
index a3c445d14..d93721cae 100644
--- a/doc/sphinx/reference/varnishd.rst
+++ b/doc/sphinx/reference/varnishd.rst
@@ -454,13 +454,26 @@ specific options. Available jails are:
 
     -j solaris,worker=basic
 
--j <linux[,user=`user`][,ccgroup=`group`][,workuser=`user`]>
+-j <linux[,transparent_hugepage=`thp_setting`][,`unix jail option`...]>
 
   Default on Linux platforms, it extends the UNIX jail with
   Linux-specific mechanisms:
 
   - It warns when *workdir* is not on a ``tmpfs``.
   - It tries to keep the process dumpable after dropping privileges.
+  - It adds control over the transparent hugepage (THP) setting.
+
+  `thp_setting` can take these values:
+
+  - ``ignore``: Do nothing
+  - ``enable``: Enable THP (see Note below)
+  - ``disable``: Disable THP
+  - ``try-disable`` (default): Try to disable, ignore failure (but emit a
+    warning)
+
+  Note: Technically, ``enable`` is "disable the disable", so it does not
+  necessarily enable THP. The setting names have been chosen to avoid a
+  confusing double negation.
 
 -j <unix[,user=`user`][,ccgroup=`group`][,workuser=`user`]>
 


More information about the varnish-commit mailing list