[master] b9a204082 vdire: plug the remaining vcl temperature change races
Nils Goroll
nils.goroll at uplex.de
Thu May 22 07:46:05 UTC 2025
commit b9a204082d73f3347b95e2c58e840849888d4ce2
Author: Nils Goroll <nils.goroll at uplex.de>
Date: Tue Feb 4 18:34:25 2025 +0100
vdire: plug the remaining vcl temperature change races
The context for this change is #4142, but while it builds upon it, it is still
causally unrelated.
To motivate this change, let's look at vcl_set_state() from before, and consider
some non-issues and issues:
transition to COLD
------------------
Lck_Lock(&vcl_mtx);
vcl->temp = VTAILQ_EMPTY(&vcl->ref_list) ?
VCL_TEMP_COLD : VCL_TEMP_COOLING;
Lck_Unlock(&vcl_mtx);
// *A*
vcl_BackendEvent(vcl, VCL_EVENT_COLD);
non-issues:
At point *A*, backends could get added. For VCL_TEMP_COOLING, this fails
(see VRT_AddDirector()). For VCL_TEMP_COLD, the state is consistent,
because backends get created cold and that's it.
At point *A*, backends could get removed. vdire_resign() from #4142
ensures that the temperature passed to vcldir_retire() is consistent
with the events the backends have received.
transition to WARM (success)
----------------------------
Lck_Lock(&vcl_mtx);
vcl->temp = VCL_TEMP_WARM;
Lck_Unlock(&vcl_mtx);
// *B* ...
i = vcl_send_event(vcl, VCL_EVENT_WARM, msg);
if (i == 0) {
// ... *B*
vcl_BackendEvent(vcl, VCL_EVENT_WARM);
break;
}
issues:
(1)
In region *B*, backends could get removed. They will not have received a
WARM event, but will be called with a WARM temperature, so they will
receive a bogus COLD event.
(2)
Also in region *B*, backends could get added. They will implicitly
receive a WARM event (see VDI_event() in VRT_AddDirector()), and then
another from vcl_BackendEvent()
transition to WARM (failed)
---------------------------
AZ(vcl_send_event(vcl, VCL_EVENT_COLD, &nomsg));
AZ(nomsg);
vcl->temp = VCL_TEMP_COLD;
issues:
(3)
Backends added in region *B* will have received the implicit WARM event,
and thus need a COLD event for the "cancel".
To solve these issues, we need to do two things:
There needs to be some kind of transaction which begins with the temperature
change and ends when all backends have been notified appropriately. Backends can
not get deleted while the transaction is in progress.
We need a notion of "backends from before the temperature change" and "backends
from after".
The first part is already delivered by #4142: The vdire facility already
provides the transaction during which backends do not actually get deleted and
it ensures that the right temperature gets passed when the deletion is carried
out. So for this part, we only need to use vdire.
But issues (2) and (3) are not yet covered. For these, we add a checkpoint, such
that we know which directors from the list are the "base" from before the
temperature change and which are the "delta" added after it.
That's this patch.
vdire_start_event() atomically (under the vcl_mtx) sets the checkpoint and the
new temperature.
vdire_end_event() just uses the existing vdire_end_iter() and clears the
checkpoint.
vcl_BackendEvent() gets split into two:
backend_event_base() notifies backends from before the checkpoint.
backend_event_delta() atomically sets a new temperature and notifies backends
from after the checkpoint (but not from after its temperature change).
Fixes #4199
diff --git a/bin/varnishd/cache/cache_vcl.c b/bin/varnishd/cache/cache_vcl.c
index 5a82baf21..cc9c109d1 100644
--- a/bin/varnishd/cache/cache_vcl.c
+++ b/bin/varnishd/cache/cache_vcl.c
@@ -436,6 +436,36 @@ vdire_end_iter(struct vdire *vdire)
vcldir_retire(vdir, temp);
}
+/*
+ * like vdire_start_iter, but also prepare for backend_event_*()
+ * by setting the checkpoint
+ */
+static void
+vdire_start_event(struct vdire *vdire, const struct vcltemp *temp)
+{
+
+ CHECK_OBJ_NOTNULL(vdire, VDIRE_MAGIC);
+ AN(temp);
+
+ Lck_AssertHeld(vdire->mtx);
+
+ // https://github.com/varnishcache/varnish-cache/pull/4142#issuecomment-2593091097
+ ASSERT_CLI();
+
+ AZ(vdire->checkpoint);
+ vdire->checkpoint = VTAILQ_LAST(&vdire->directors, vcldir_head);
+ AN(vdire->tempp);
+ *vdire->tempp = temp;
+ vdire->iterating++;
+}
+
+static void
+vdire_end_event(struct vdire *vdire)
+{
+ vdire->checkpoint = NULL;
+ vdire_end_iter(vdire);
+}
+
// if there are no iterators, remove from directors and retire
// otherwise put on resigning list to work when iterators end
void
@@ -557,8 +587,13 @@ VCL_IterDirector(struct cli *cli, const char *pat,
return (found);
}
+/*
+ * vdire_start_event() must have been called before
+ *
+ * send event to directors pre checkpoint
+ */
static void
-vcl_BackendEvent(const struct vcl *vcl, enum vcl_event_e e)
+backend_event_base(const struct vcl *vcl, enum vcl_event_e e)
{
struct vcldir *vdir;
struct vdire *vdire;
@@ -567,11 +602,56 @@ vcl_BackendEvent(const struct vcl *vcl, enum vcl_event_e e)
CHECK_OBJ_NOTNULL(vcl, VCL_MAGIC);
AZ(vcl->busy);
vdire = vcl->vdire;
+ AN(vdire->iterating);
- vdire_start_iter(vdire);
- VTAILQ_FOREACH(vdir, &vdire->directors, directors_list)
+ vdir = vdire->checkpoint;
+ if (vdir == NULL)
+ return;
+
+ CHECK_OBJ(vdir, VCLDIR_MAGIC);
+ VTAILQ_FOREACH_REVERSE_FROM(vdir, &vdire->directors,
+ vcldir_head, directors_list) {
+ CHECK_OBJ(vdir, VCLDIR_MAGIC);
VDI_Event(vdir->dir, e);
- vdire_end_iter(vdire);
+ }
+}
+
+/*
+ * vdire_start_event() must have been called before
+ *
+ * set a new temperature.
+ * send event to directors added post checkpoint, but before
+ * the new temperature
+ */
+static void
+backend_event_delta(const struct vcl *vcl, enum vcl_event_e e, const struct vcltemp *temp)
+{
+ struct vcldir *vdir, *end;
+ struct vdire *vdire;
+
+ ASSERT_CLI();
+ CHECK_OBJ_NOTNULL(vcl, VCL_MAGIC);
+ AZ(vcl->busy);
+ vdire = vcl->vdire;
+ AN(temp);
+ AN(vdire->iterating);
+
+ Lck_Lock(vdire->mtx);
+ if (vdire->checkpoint == NULL)
+ vdir = VTAILQ_FIRST(&vdire->directors);
+ else
+ vdir = VTAILQ_NEXT(vdire->checkpoint, directors_list);
+ AN(vdire->tempp);
+ *vdire->tempp = temp;
+ end = VTAILQ_LAST(&vdire->directors, vcldir_head);
+ Lck_Unlock(vdire->mtx);
+
+ while (vdir != NULL) {
+ VDI_Event(vdir->dir, e);
+ if (vdir == end)
+ break;
+ vdir = VTAILQ_NEXT(vdire->checkpoint, directors_list);
+ }
}
static void
@@ -742,10 +822,12 @@ vcl_set_state(struct vcl *vcl, const char *state, struct vsb **msg)
break;
if (vcl->busy == 0 && vcl->temp->is_warm) {
Lck_Lock(&vcl_mtx);
- vcl->temp = VTAILQ_EMPTY(&vcl->ref_list) ?
- VCL_TEMP_COLD : VCL_TEMP_COOLING;
+ vdire_start_event(vcl->vdire, VTAILQ_EMPTY(&vcl->ref_list) ?
+ VCL_TEMP_COLD : VCL_TEMP_COOLING);
Lck_Unlock(&vcl_mtx);
- vcl_BackendEvent(vcl, VCL_EVENT_COLD);
+ backend_event_base(vcl, VCL_EVENT_COLD);
+ vdire_end_event(vcl->vdire);
+ // delta directors at VCL_TEMP_COLD do not need an event
AZ(vcl_send_event(vcl, VCL_EVENT_COLD, msg));
AZ(*msg);
}
@@ -769,16 +851,19 @@ vcl_set_state(struct vcl *vcl, const char *state, struct vsb **msg)
}
else {
Lck_Lock(&vcl_mtx);
- vcl->temp = VCL_TEMP_WARM;
+ vdire_start_event(vcl->vdire, VCL_TEMP_WARM);
Lck_Unlock(&vcl_mtx);
i = vcl_send_event(vcl, VCL_EVENT_WARM, msg);
if (i == 0) {
- vcl_BackendEvent(vcl, VCL_EVENT_WARM);
+ backend_event_base(vcl, VCL_EVENT_WARM);
+ // delta directors are already warm
+ vdire_end_event(vcl->vdire);
break;
}
AZ(vcl_send_event(vcl, VCL_EVENT_COLD, &nomsg));
AZ(nomsg);
- vcl->temp = VCL_TEMP_COLD;
+ backend_event_delta(vcl, VCL_EVENT_COLD, VCL_TEMP_COLD);
+ vdire_end_event(vcl->vdire);
}
break;
default:
diff --git a/bin/varnishd/cache/cache_vcl.h b/bin/varnishd/cache/cache_vcl.h
index f7fc31b60..3d8dacc66 100644
--- a/bin/varnishd/cache/cache_vcl.h
+++ b/bin/varnishd/cache/cache_vcl.h
@@ -50,6 +50,8 @@ struct vdire {
// to signal when iterators can enter again
pthread_cond_t cond;
const struct vcltemp **tempp;
+ // last director present when vdire_start_event is called
+ struct vcldir *checkpoint;
};
struct vcl {
More information about the varnish-commit
mailing list