[master] a5c1d3a3e vai: add support to allocate & return buffers and implement for sml

Fri Jul 4 17:04:04 UTC 2025

commit a5c1d3a3ea5722d962ee99bc8588720ebed8c37d
Author: Nils Goroll <nils.goroll at uplex.de>
Date:   Mon Jan 6 22:02:44 2025 +0100

    vai: add support to allocate & return buffers and implement for sml
    
    To bring VAI to filters, we are going to need buffer allocations all over the
    place, because any new data created by filters needs to survive after the filter
    function returns.
    
    So we add ObjVAIbuffer() to fill a VSCARAB with buffers and teach ObjVAIreturn()
    to return any kind of lease.
    
    We add an implementation for SML.

diff --git a/bin/varnishd/cache/cache.h b/bin/varnishd/cache/cache.h
index 118f65a34..55553e707 100644
--- a/bin/varnishd/cache/cache.h
+++ b/bin/varnishd/cache/cache.h
@@ -919,6 +919,7 @@ struct vscaret {
 vai_hdl ObjVAIinit(struct worker *, struct objcore *, struct ws *,
     vai_notify_cb *, void *);
 int ObjVAIlease(struct worker *, vai_hdl, struct vscarab *);
+int ObjVAIbuffer(struct worker *, vai_hdl, struct vscarab *);
 void ObjVAIreturn(struct worker *, vai_hdl, struct vscaret *);
 void ObjVAIfini(struct worker *, vai_hdl *);
 
diff --git a/bin/varnishd/cache/cache_obj.c b/bin/varnishd/cache/cache_obj.c
index 6fe72f448..dcbb991b6 100644
--- a/bin/varnishd/cache/cache_obj.c
+++ b/bin/varnishd/cache/cache_obj.c
@@ -218,10 +218,31 @@ ObjIterate(struct worker *wrk, struct objcore *oc,
  *	used by the caller between lease and return, but must be cleared to
  *	zero before returning.
  *
+ * ObjVAIbuffer() allocates temporary buffers, returns:
+ *
+ *	-EAGAIN:  allocation can not be fulfilled immediately, storage will notify,
+ *		  no use to call again until notification
+ *	-EINVAL:  size larger than UINT_MAX requested
+ *	-(errno): other problem, fatal
+ *	n:	  n > 0, number of viovs filled
+ *
+ *	The struct vscarab is used on the way in and out: On the way in, the
+ *	iov.iov_len members contain the sizes the caller requests, all other
+ *	members of the struct viovs are expected to be zero initialized.
+ *
+ *	The maximum size to be requested is UINT_MAX.
+ *
+ *	ObjVAIbuffer() may return sizes larger than requested. The returned n
+ *	might be smaller than requested.
+ *
  * ObjVAIreturn() returns leases collected in a struct vscaret
  *
- *	it must be called with a vscaret, which holds an array of lease values from viovs
- *	received when the caller can guarantee that they are no longer accessed
+ *	it must be called with a vscaret, which holds an array of lease values
+ *	received via ObjVAIlease() or ObjVAIbuffer() when the caller can
+ *	guarantee that they are no longer accessed.
+ *
+ *	ObjVAIreturn() may retain leases in the vscaret if the implementation
+ *	still requires them, iow, the vscaret might not be empty upon return.
  *
  * ObjVAIfini() finalized iteration
  *
@@ -252,6 +273,17 @@ ObjVAIlease(struct worker *wrk, vai_hdl vhdl, struct vscarab *scarab)
 	return (vaip->vai_lease(wrk, vhdl, scarab));
 }
 
+int
+ObjVAIbuffer(struct worker *wrk, vai_hdl vhdl, struct vscarab *scarab)
+{
+	struct vai_hdl_preamble *vaip = vhdl;
+
+	AN(vaip);
+	assert(vaip->magic2 == VAI_HDL_PREAMBLE_MAGIC2);
+	AN(vaip->vai_buffer);
+	return (vaip->vai_buffer(wrk, vhdl, scarab));
+}
+
 void
 ObjVAIreturn(struct worker *wrk, vai_hdl vhdl, struct vscaret *scaret)
 {
@@ -259,11 +291,8 @@ ObjVAIreturn(struct worker *wrk, vai_hdl vhdl, struct vscaret *scaret)
 
 	AN(vaip);
 	assert(vaip->magic2 == VAI_HDL_PREAMBLE_MAGIC2);
-	/* vai_return is optional */
-	if (vaip->vai_return != NULL)
-		vaip->vai_return(wrk, vhdl, scaret);
-	else
-		VSCARET_INIT(scaret, scaret->capacity);
+	AN(vaip->vai_return);
+	vaip->vai_return(wrk, vhdl, scaret);
 }
 
 void
diff --git a/bin/varnishd/cache/cache_obj.h b/bin/varnishd/cache/cache_obj.h
index 0aff7c8b2..9de3c383d 100644
--- a/bin/varnishd/cache/cache_obj.h
+++ b/bin/varnishd/cache/cache_obj.h
@@ -50,8 +50,9 @@ typedef void *objsetattr_f(struct worker *, struct objcore *,
     enum obj_attr attr, ssize_t len, const void *ptr);
 typedef void objtouch_f(struct worker *, struct objcore *, vtim_real now);
 
-/* called by Obj/storage to notify that the lease function (vai_lease_f) can be
- * called again after a -EAGAIN / -ENOBUFS return value
+/* called by Obj/storage to notify that the lease function (vai_lease_f) or
+ * buffer function (vai_buffer_f) can be called again after return of
+ * -EAGAIN or -ENOBUFS
  * NOTE:
  * - the callback gets executed by an arbitrary thread
  * - WITH the boc mtx held
@@ -96,7 +97,26 @@ typedef vai_hdl vai_init_f(struct worker *, struct objcore *, struct ws *,
 typedef int vai_lease_f(struct worker *, vai_hdl, struct vscarab *);
 
 /*
- * return leases
+ * get io vectors with temporary buffers from storage
+ *
+ * vai_hdl is from vai_init_f
+ * the vscarab needs to be initialized with the number of requested elements
+ * and each iov.iov_len contings the requested sizes. all iov_base need to be
+ * zero.
+ *
+ * after return, the vscarab can be smaller than requested if only some
+ * allocation requests could be fulfilled
+ *
+ * return:
+ * -EAGAIN:	allocation can not be fulfilled immediately, storage will notify,
+ *		no use to call again until notification
+ * -(errno):	other problem, fatal
+ *  n:		n > 0, number of viovs filled
+ */
+typedef int vai_buffer_f(struct worker *, vai_hdl, struct vscarab *);
+
+/*
+ * return leases from vai_lease_f or vai_buffer_f
  */
 typedef void vai_return_f(struct worker *, vai_hdl, struct vscaret *);
 
@@ -120,7 +140,8 @@ struct vai_hdl_preamble {
 	unsigned	magic2;
 #define VAI_HDL_PREAMBLE_MAGIC2	0x7a15d162
 	vai_lease_f	*vai_lease;
-	vai_return_f	*vai_return;	// optional
+	vai_buffer_f	*vai_buffer;
+	vai_return_f	*vai_return;
 	uintptr_t	reserve[4];	// abi fwd compat
 	vai_fini_f	*vai_fini;
 };
diff --git a/bin/varnishd/storage/storage_simple.c b/bin/varnishd/storage/storage_simple.c
index cf10dd696..5bd92b5c8 100644
--- a/bin/varnishd/storage/storage_simple.c
+++ b/bin/varnishd/storage/storage_simple.c
@@ -315,6 +315,7 @@ struct sml_hdl {
 	struct vai_hdl_preamble	preamble;
 #define SML_HDL_MAGIC		0x37dfd996
 	struct vai_qe		qe;
+	struct pool_task	task;	// unfortunate
 	struct ws		*ws;	// NULL is malloc()
 	struct objcore		*oc;
 	struct object		*obj;
@@ -359,6 +360,72 @@ sml_ai_viov_fill(struct viov *viov, struct storage *st)
 	VAI_ASSERT_LEASE(viov->lease);
 }
 
+// sml has no mechanism to notify "I got free space again now"
+// (we could add that, but because storage.h is used in mgt, a first attempt
+//  looks at least like this would cause some include spill for vai_q_head or
+//  something similar)
+//
+// So anyway, to get ahead we just implement a pretty stupid "call the notify
+// some time later" on a thread
+static void
+sml_ai_later_task(struct worker *wrk, void *priv)
+{
+	struct sml_hdl *hdl;
+	const vtim_dur dur = 0.0042;
+
+	(void)wrk;
+	VTIM_sleep(dur);
+	CAST_VAI_HDL_NOTNULL(hdl, priv, SML_HDL_MAGIC);
+	memset(&hdl->task, 0, sizeof hdl->task);
+	hdl->qe.cb(hdl, hdl->qe.priv);
+}
+static void
+sml_ai_later(struct worker *wrk, struct sml_hdl *hdl)
+{
+	AZ(hdl->task.func);
+	AZ(hdl->task.priv);
+	hdl->task.func = sml_ai_later_task;
+	hdl->task.priv = hdl;
+	AZ(Pool_Task(wrk->pool, &hdl->task, TASK_QUEUE_BG));
+}
+
+
+static int
+sml_ai_buffer(struct worker *wrk, vai_hdl vhdl, struct vscarab *scarab)
+{
+	const struct stevedore *stv;
+	struct sml_hdl *hdl;
+	struct storage *st;
+	struct viov *vio;
+	int r = 0;
+
+	(void) wrk;
+	CAST_VAI_HDL_NOTNULL(hdl, vhdl, SML_HDL_MAGIC);
+	stv = hdl->stv;
+	CHECK_OBJ_NOTNULL(stv, STEVEDORE_MAGIC);
+
+	VSCARAB_FOREACH(vio, scarab)
+		if (vio->iov.iov_len > UINT_MAX)
+			return (-EINVAL);
+
+	VSCARAB_FOREACH(vio, scarab) {
+		st = objallocwithnuke(wrk, stv, vio->iov.iov_len, 0);
+		if (st == NULL)
+			break;
+		assert(st->space >= vio->iov.iov_len);
+		st->flags = STORAGE_F_BUFFER;
+		st->len = st->space;
+
+		sml_ai_viov_fill(vio, st);
+		r++;
+	}
+	if (r == 0) {
+		sml_ai_later(wrk, hdl);
+		r = -EAGAIN;
+	}
+	return (r);
+}
+
 static int
 sml_ai_lease_simple(struct worker *wrk, vai_hdl vhdl, struct vscarab *scarab)
 {
@@ -497,6 +564,29 @@ sml_ai_lease_boc(struct worker *wrk, vai_hdl vhdl, struct vscarab *scarab)
 	return (r);
 }
 
+// return only buffers, used if object is not streaming
+static void v_matchproto_(vai_return_f)
+sml_ai_return_buffers(struct worker *wrk, vai_hdl vhdl, struct vscaret *scaret)
+{
+	struct storage *st;
+	struct sml_hdl *hdl;
+	uint64_t *p;
+
+	(void) wrk;
+	CAST_VAI_HDL_NOTNULL(hdl, vhdl, SML_HDL_MAGIC);
+
+	VSCARET_FOREACH(p, scaret) {
+		if (*p == VAI_LEASE_NORET)
+			continue;
+		CAST_OBJ_NOTNULL(st, lease2st(*p), STORAGE_MAGIC);
+		if ((st->flags & STORAGE_F_BUFFER) == 0)
+			continue;
+		sml_stv_free(hdl->stv, st);
+	}
+	VSCARET_INIT(scaret, scaret->capacity);
+}
+
+// generic return for buffers and object leases, used when streaming
 static void v_matchproto_(vai_return_f)
 sml_ai_return(struct worker *wrk, vai_hdl vhdl, struct vscaret *scaret)
 {
@@ -528,6 +618,8 @@ sml_ai_return(struct worker *wrk, vai_hdl vhdl, struct vscaret *scaret)
 	Lck_Lock(&hdl->boc->mtx);
 	VSCARET_FOREACH(p, todo) {
 		CAST_OBJ_NOTNULL(st, lease2st(*p), STORAGE_MAGIC);
+		if ((st->flags & STORAGE_F_BUFFER) != 0)
+			continue;
 		VTAILQ_REMOVE(&hdl->obj->list, st, list);
 		if (st == hdl->boc->stevedore_priv)
 			hdl->boc->stevedore_priv = trim_once;
@@ -578,6 +670,8 @@ sml_ai_init(struct worker *wrk, struct objcore *oc, struct ws *ws,
 	AN(hdl);
 	INIT_VAI_HDL(hdl, SML_HDL_MAGIC);
 	hdl->preamble.vai_lease = sml_ai_lease_simple;
+	hdl->preamble.vai_buffer = sml_ai_buffer;
+	hdl->preamble.vai_return = sml_ai_return_buffers;
 	hdl->preamble.vai_fini = sml_ai_fini;
 	hdl->ws = ws;
 
@@ -590,6 +684,11 @@ sml_ai_init(struct worker *wrk, struct objcore *oc, struct ws *ws,
 	hdl->st = VTAILQ_LAST(&hdl->obj->list, storagehead);
 	CHECK_OBJ_ORNULL(hdl->st, STORAGE_MAGIC);
 
+	hdl->qe.magic = VAI_Q_MAGIC;
+	hdl->qe.cb = notify;
+	hdl->qe.hdl = hdl;
+	hdl->qe.priv = notify_priv;
+
 	hdl->boc = HSH_RefBoc(oc);
 	if (hdl->boc == NULL)
 		return (hdl);
@@ -599,10 +698,6 @@ sml_ai_init(struct worker *wrk, struct objcore *oc, struct ws *ws,
 	hdl->preamble.vai_lease = sml_ai_lease_boc;
 	if ((hdl->oc->flags & OC_F_TRANSIENT) != 0)
 		hdl->preamble.vai_return = sml_ai_return;
-	hdl->qe.magic = VAI_Q_MAGIC;
-	hdl->qe.cb = notify;
-	hdl->qe.hdl = hdl;
-	hdl->qe.priv = notify_priv;
 	return (hdl);
 }