[experimental-ims] f837fbc Split solaris sandboxing out to a separate source file, and apply patch received from Nils Goroll <nils.goroll at uplex.de>

Geoff Simmons geoff at varnish-cache.org
Mon Jan 9 21:52:01 CET 2012


commit f837fbca893cc09458482c5283456bf8990aeee6
Author: Poul-Henning Kamp <phk at FreeBSD.org>
Date:   Fri Sep 30 13:30:43 2011 +0000

    Split solaris sandboxing out to a separate source file, and apply
    patch received from Nils Goroll <nils.goroll at uplex.de>
    
    - [e0ee2a2e69654a9df74aaf3dcadc9639659cf42b] adds the file_read
      privilege needed for onnv_140 and newer (see #912), but we also need
      the file_write privilege for stevedore access.
    
    - If available, keep sys_resource in the permitted/limited set to
      allow cache_waiter_ports to raise the process.max-port-events
      resource control (feature to be added later).
    
    - When starting varnish with euid 0 on Solaris, privilege seperation
      prohibited preserving additional privileges (in excess of the basic
      set) in the child, because, for a non privilege aware process,
      setuid() resets the effective, inheritable and permitted sets to the
      basic set.
    
      To achieve interoperability between solaris privileges and
      setuid()/setgid(), we now make the varnish child privilege aware
      before calling setuid() by trying to add all privileges we will need
      plus proc_setid.
    
    - On solaris, check for proc_setid rather than checking the euid as a
      prerequisite for changing the uid/gid and only change the uid/gid if
      we need to (for a privilege aware process, [ers]uid 0 loose their
      magic powers).
    
      Note that setuid() will always set SNOCD on Solaris, which will
      prevent core dumps from being written, unless setuid core dumps are
      explicitly enabled using coreadm(1M).
    
      To avoid setuid() (and the SNOCD flag, consequently), start varnish
      as the user you intend to run the child as, but with additional
      privileges, e.g. using
    
      ppriv -e -s A=basic,net_privaddr,sys_resource varnishd ...
    
    - setppriv(PRIV_SET, ...) failed when the privileges to be applied
      were not available in the permitted set.
    
      We change the logic to only clear the privileges which are not
      needed by inverting the sets and removing all unneeded privileges
      using setppriv(PRIV_OFF, ...).
    
      So the child might end up with less privileges than given initially,

diff --git a/bin/varnishd/Makefile.am b/bin/varnishd/Makefile.am
index c13f4e5..0dbc745 100644
--- a/bin/varnishd/Makefile.am
+++ b/bin/varnishd/Makefile.am
@@ -60,6 +60,7 @@ varnishd_SOURCES = \
 	mgt_param.c \
 	mgt_pool.c \
 	mgt_sandbox.c \
+	mgt_sandbox_solaris.c \
 	mgt_shmem.c \
 	mgt_vcc.c \
 	rfc2616.c \
diff --git a/bin/varnishd/mgt.h b/bin/varnishd/mgt.h
index 485de26..b6dac01 100644
--- a/bin/varnishd/mgt.h
+++ b/bin/varnishd/mgt.h
@@ -71,6 +71,12 @@ void MCF_DumpRst(void);
 /* mgt_sandbox.c */
 void mgt_sandbox(void);
 
+/* mgt_sandbox_solaris.c */
+#ifdef HAVE_SETPPRIV
+void mgt_sandbox_solaris_init(void)
+void mgt_sandbox_solaris_fini(void)
+#endif
+
 /* mgt_shmem.c */
 void mgt_SHM_Init(const char *arg);
 void mgt_SHM_Pid(void);
diff --git a/bin/varnishd/mgt_sandbox.c b/bin/varnishd/mgt_sandbox.c
index a5eee2f..94f9f4d 100644
--- a/bin/varnishd/mgt_sandbox.c
+++ b/bin/varnishd/mgt_sandbox.c
@@ -48,10 +48,6 @@
 #include <syslog.h>
 #include <unistd.h>
 
-#ifdef HAVE_PRIV_H
-#include <priv.h>
-#endif
-
 #ifdef __linux__
 #include <sys/prctl.h>
 #endif
@@ -67,6 +63,10 @@ void
 mgt_sandbox(void)
 {
 
+#ifdef HAVE_SETPPRIV
+	mgt_sandbox_solaris_init();
+#endif
+
 	if (geteuid() == 0) {
 		XXXAZ(setgid(params->gid));
 		XXXAZ(setuid(params->uid));
@@ -84,35 +84,7 @@ mgt_sandbox(void)
 #endif
 
 #ifdef HAVE_SETPPRIV
-	priv_set_t *empty, *minimal;
-
-	if (!(empty = priv_allocset()) ||
-	    !(minimal = priv_allocset())) {
-		REPORT0(LOG_ERR, "priv_allocset_failed");
-	} else {
-		priv_emptyset(empty);
-		priv_emptyset(minimal);
-
-		/*
-		 * new privilege,
-		 * silently ignore any errors if it doesn't exist
-		 */
-		priv_addset(minimal, "net_access");
-		priv_addset(minimal, "file_read");
-
-#define SETPPRIV(which, set)						\
-		if (setppriv(PRIV_SET, which, set))			\
-			REPORT0(LOG_ERR,				\
-			    "Waiving privileges failed on " #which)
-
-		/* need to set I after P to avoid SNOCD being set */
-		SETPPRIV(PRIV_LIMIT, minimal);
-		SETPPRIV(PRIV_PERMITTED, minimal); /* implies PRIV_EFFECTIVE */
-		SETPPRIV(PRIV_INHERITABLE, empty);
-
-		priv_freeset(empty);
-		priv_freeset(minimal);
-	}
+	mgt_sandbox_solaris_fini();
 #endif
 
 }
diff --git a/bin/varnishd/mgt_sandbox_solaris.c b/bin/varnishd/mgt_sandbox_solaris.c
new file mode 100644
index 0000000..15b7c95
--- /dev/null
+++ b/bin/varnishd/mgt_sandbox_solaris.c
@@ -0,0 +1,219 @@
+/*-
+ * Copyright (c) 2006-2011 Varnish Software AS
+ * All rights reserved.
+ *
+ * Author: Poul-Henning Kamp <phk at phk.freebsd.dk>
+ *	   Nils Goroll <nils.goroll at uplex.de>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Sandboxing child processes on Solaris
+ *
+ */
+
+#include "config.h"
+
+#ifdef HAVE_SETPPRIV
+
+#include <stdio.h>
+#include <syslog.h>
+#include <unistd.h>
+#include <string.h>
+
+#ifdef HAVE_PRIV_H
+#include <priv.h>
+#endif
+
+#include "mgt.h"
+#include "heritage.h"
+
+/*--------------------------------------------------------------------
+ * SOLARIS PRIVILEGES: Note on use of symbolic PRIV_* constants
+ * 
+ * For privileges which existed in Solaris 10 FCS, we may use the constants from
+ * sys/priv_names.h
+ *
+ * For privileges which have been added later, we need to use strings in order
+ * not to break builds of varnish on these platforms. To remain binary
+ * compatible, we need to silently ignore errors from priv_addset when using
+ * these strings.
+ *
+ * For optimal build and binary forward comatibility, we could use subtractive
+ * set specs like
+ *
+ *       basic,!file_link_any,!proc_exec,!proc_fork,!proc_info,!proc_session
+ *
+ * but I (Nils) have a preference for making an informed decision about which
+ * privileges the varnish child should have and which it shouldn't.
+ *
+ * Newly introduced privileges should be annotated with their PSARC / commit ID
+ * (as long as Oracle reveils these :/ )
+ *
+ * SOLARIS PRIVILEGES: Note on accidentally setting the SNOCD flag
+ *
+ * When setting privileges, we need to take care not to accidentally set the
+ * SNOCD flag which will disable core dumps unnecessarily. (see
+ * https://www.varnish-cache.org/trac/ticket/671 )
+ *
+ * When changing the logic herein, always check with mdb -k. Replace _PID_ with
+ * the pid of your varnish child, the result should be 0, otherwise a regression
+ * has been introduced.
+ *
+ * > 0t_PID_::pid2proc | ::print proc_t p_flag | >a
+ * > (<a & 0x10000000)=X
+ *                 0
+ *
+ * (a value of 0x10000000 indicates that SNOCD is set)
+ *
+ * NOTE that on Solaris changing the uid will _always_ set SNOCD, so make sure
+ * you run this test with appropriate privileges, but without proc_setid, so
+ * varnish won't setuid(), e.g.
+ *
+ * pfexec ppriv -e -s A=basic,net_privaddr,sys_resource varnish ...
+ *
+ * SOLARIS COREDUMPS with setuid(): See coreadm(1M) - global-setid / proc-setid
+ *
+ */
+
+/* effective during runtime of the child */
+static inline void
+mgt_sandbox_solaris_add_effective(priv_set_t *pset)
+{
+	/* PSARC/2009/685 - 8eca52188202 - onnv_132 */
+	priv_addset(pset, "net_access");
+
+	/* PSARC/2009/378 - 63678502e95e - onnv_140 */
+	priv_addset(pset, "file_read");
+	priv_addset(pset, "file_write");
+}
+
+/* permitted during runtime of the child - for privilege bracketing */
+static inline void
+mgt_sandbox_solaris_add_permitted(priv_set_t *pset)
+{
+	/* for raising limits in cache_waiter_ports.c */
+	priv_addset(pset, PRIV_SYS_RESOURCE);
+}
+
+/* effective during mgt_sandbox */
+static inline void
+mgt_sandbox_solaris_add_initial(priv_set_t *pset)
+{
+	/* for setgid/setuid */
+	priv_addset(pset, PRIV_PROC_SETID);
+}
+
+/*
+ * if we are not yet privilege-aware already (ie we have been started
+ * not-privilege aware wird euid 0), we need to grab any additional privileges
+ * needed during mgt_standbox, until we reduce to least privileges in
+ * mgt_sandbox_waive, otherwise we would loose them with setuid()
+ */
+
+void
+mgt_sandbox_solaris_init(void)
+{
+	priv_set_t *priv_all;
+
+	if (! (priv_all = priv_allocset())) {
+		REPORT(LOG_ERR,
+		    "Child start warning: mgt_sandbox_init - priv_allocset failed: errno=%d (%s)",
+		    errno, strerror(errno));
+		return;
+	}
+	
+	priv_emptyset(priv_all);
+
+	mgt_sandbox_solaris_add_effective(priv_all);
+	mgt_sandbox_solaris_add_permitted(priv_all);
+	mgt_sandbox_solaris_add_initial(priv_all);
+
+	setppriv(PRIV_ON, PRIV_PERMITTED, priv_all);
+	setppriv(PRIV_ON, PRIV_EFFECTIVE, priv_all);
+	setppriv(PRIV_ON, PRIV_INHERITABLE, priv_all);
+
+	priv_freeset(priv_all);
+}
+
+/* 
+ * Waive most privileges in the child
+ *
+ * as of onnv_151a, we should end up with:
+ *
+ * > ppriv -v #pid of varnish child
+ * PID:  .../varnishd ...
+ * flags = PRIV_AWARE
+ *      E: file_read,file_write,net_access
+ *      I: none
+ *      P: file_read,file_write,net_access,sys_resource
+ *      L: file_read,file_write,net_access,sys_resource
+ *
+ * We should keep sys_resource in P in order to adjust our limits if we need to
+ */
+
+void
+mgt_sandbox_solaris_fini(void)
+{
+	priv_set_t *effective, *inheritable, *permitted;
+
+	if (!(effective = priv_allocset()) ||
+	    !(inheritable = priv_allocset()) ||
+	    !(permitted = priv_allocset())) {
+		REPORT(LOG_ERR,
+		    "Child start warning: mgt_sandbox_waive - priv_allocset failed: errno=%d (%s)",
+		    errno, strerror(errno));
+		return;
+	}
+
+	priv_emptyset(inheritable);
+
+	priv_emptyset(effective);
+	mgt_sandbox_solaris_add_effective(effective);
+
+	priv_copyset(effective, permitted);
+	mgt_sandbox_solaris_add_permitted(permitted);
+
+	/* 
+	 * invert the sets and clear privileges such that setppriv will always
+	 * succeed
+	 */
+	priv_inverse(inheritable);
+	priv_inverse(effective);
+	priv_inverse(permitted);
+
+#define SETPPRIV(which, set)						\
+	if (setppriv(PRIV_OFF, which, set))				\
+		REPORT(LOG_ERR,						\
+		    "Child start warning: Waiving privileges failed on %s: errno=%d (%s)", \
+		    #which, errno, strerror(errno));
+
+	SETPPRIV(PRIV_INHERITABLE, inheritable);
+	SETPPRIV(PRIV_EFFECTIVE, effective);
+	SETPPRIV(PRIV_PERMITTED, permitted);
+	SETPPRIV(PRIV_LIMIT, permitted);
+#undef SETPPRIV
+
+	priv_freeset(inheritable);
+	priv_freeset(effective);
+}
+
+#endif /* HAVE_SETPPRIV */



More information about the varnish-commit mailing list