[master] ab21f4a2c vbp: Rework probe state management
Nils Goroll
nils.goroll at uplex.de
Mon Jul 15 18:29:03 UTC 2024
commit ab21f4a2c9f580814bd0626371e755ea539c665e
Author: Nils Goroll <nils.goroll at uplex.de>
Date: Mon Jun 3 20:00:04 2024 +0200
vbp: Rework probe state management
Every time I looked at the probe code, my mind ended up twisted and confused. A
probe could change the "enabled" state (tracking the temperature) and be removed
at any time (unless the mtx is held), yet the code did not seem to reflect this.
We un-twist my mind by completing the transition to probe states and adding a
chain of two states for the case that a probe is controlled/deleted while its
task is running:
cooling: running probe disabled
deleted: running probe removed (while cooling only)
With this new scheme, we can now have (I think) a clean state diagram (see dot
file):
- a probe begins in the cold state
- from cold, it can either get removed or scheduled via VBP_Control()
- from scheduled, it can go back to cold (via VBP_Control()) or
be picked up by vbp_thread() to change to running (aka task started)
- once the task finishes, it normally goes back to scheduled,
but in the meantime it could have changed to cooling or deleted,
so vbp_task_comple() hadles these cases and either transitions to cold
or deletes the probe
- if the task can not be scheduled, the same handling happens
We now also remove running probes from the binheap to remove complexity.
Fixes #4108 for good
diff --git a/bin/varnishd/cache/cache_backend_probe.c b/bin/varnishd/cache/cache_backend_probe.c
index 556d318c0..feb9b91a4 100644
--- a/bin/varnishd/cache/cache_backend_probe.c
+++ b/bin/varnishd/cache/cache_backend_probe.c
@@ -64,6 +64,7 @@ struct vbp_state {
VBP_STATE(scheduled);
VBP_STATE(running);
VBP_STATE(cold);
+VBP_STATE(cooling);
VBP_STATE(deleted);
#undef VBP_STATE
@@ -95,7 +96,6 @@ struct vbp_target {
vtim_real due;
const struct vbp_state *state;
- int running;
int heap_idx;
struct pool_task task[1];
};
@@ -444,6 +444,33 @@ vbp_heap_insert(struct vbp_target *vt)
/*--------------------------------------------------------------------
*/
+/*
+ * called when a task was successful or could not get scheduled
+ * returns non-NULL if target is to be deleted (outside mtx)
+ */
+static struct vbp_target *
+vbp_task_complete(struct vbp_target *vt)
+{
+ CHECK_OBJ_NOTNULL(vt, VBP_TARGET_MAGIC);
+
+ Lck_AssertHeld(&vbp_mtx);
+
+ assert(vt->heap_idx == VBH_NOIDX);
+
+ if (vt->state == vbp_state_running) {
+ vt->state = vbp_state_scheduled;
+ vt->due = VTIM_real() + vt->interval;
+ vbp_heap_insert(vt);
+ vt = NULL;
+ } else if (vt->state == vbp_state_cooling) {
+ vt->state = vbp_state_cold;
+ vt = NULL;
+ } else if (vt->state != vbp_state_deleted) {
+ WRONG(vt->state->name);
+ }
+ return (vt);
+}
+
static void v_matchproto_(task_func_t)
vbp_task(struct worker *wrk, void *priv)
{
@@ -452,7 +479,6 @@ vbp_task(struct worker *wrk, void *priv)
CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
CAST_OBJ_NOTNULL(vt, priv, VBP_TARGET_MAGIC);
- AN(vt->running);
AN(vt->req);
assert(vt->req_len > 0);
@@ -462,20 +488,11 @@ vbp_task(struct worker *wrk, void *priv)
VBP_Update_Backend(vt);
Lck_Lock(&vbp_mtx);
- if (vt->running < 0) {
- assert(vt->state == vbp_state_deleted);
- vbp_delete(vt);
- } else {
- assert(vt->state == vbp_state_running);
- vt->running = 0;
- if (vt->heap_idx != VBH_NOIDX) {
- vt->due = VTIM_real() + vt->interval;
- VBH_delete(vbp_heap, vt->heap_idx);
- vbp_heap_insert(vt);
- }
- vt->state = vbp_state_scheduled;
- }
+ vt = vbp_task_complete(vt);
Lck_Unlock(&vbp_mtx);
+ if (vt == NULL)
+ return;
+ vbp_delete(vt);
}
/*--------------------------------------------------------------------
@@ -502,26 +519,26 @@ vbp_thread(struct worker *wrk, void *priv)
vt = NULL;
(void)Lck_CondWaitUntil(&vbp_cond, &vbp_mtx, nxt);
} else {
+ assert(vt->state == vbp_state_scheduled);
VBH_delete(vbp_heap, vt->heap_idx);
- vt->due = now + vt->interval;
- VBH_insert(vbp_heap, vt);
- if (!vt->running) {
- assert(vt->state == vbp_state_scheduled);
- vt->state = vbp_state_running;
- vt->running = 1;
- vt->task->func = vbp_task;
- vt->task->priv = vt;
- Lck_Unlock(&vbp_mtx);
- r = Pool_Task_Any(vt->task, TASK_QUEUE_REQ);
- Lck_Lock(&vbp_mtx);
- if (r) {
- vt->running = 0;
- vt->state = vbp_state_scheduled;
- }
- }
- else
- assert(vt->state == vbp_state_running);
+ vt->state = vbp_state_running;
+ vt->task->func = vbp_task;
+ vt->task->priv = vt;
+ Lck_Unlock(&vbp_mtx);
+ r = Pool_Task_Any(vt->task, TASK_QUEUE_REQ);
+
+ Lck_Lock(&vbp_mtx);
+ if (r == 0)
+ continue;
+ vt = vbp_task_complete(vt);
+ if (vt == NULL)
+ continue;
+ Lck_Unlock(&vbp_mtx);
+
+ vbp_delete(vt);
+
+ Lck_Lock(&vbp_mtx);
}
}
NEEDLESS(Lck_Unlock(&vbp_mtx));
@@ -681,18 +698,24 @@ VBP_Control(const struct backend *be, int enable)
Lck_Lock(&vbp_mtx);
if (enable) {
- // XXX next two assertions are WRONG, see #4108 - WIP
- assert(vt->state == vbp_state_cold);
- assert(vt->heap_idx == VBH_NOIDX);
- vt->due = VTIM_real();
- vbp_heap_insert(vt);
- vt->state = vbp_state_scheduled;
+ if (vt->state == vbp_state_cooling) {
+ vt->state = vbp_state_running;
+ } else if (vt->state == vbp_state_cold) {
+ assert(vt->heap_idx == VBH_NOIDX);
+ vt->due = VTIM_real();
+ vbp_heap_insert(vt);
+ vt->state = vbp_state_scheduled;
+ } else
+ WRONG(vt->state->name);
} else {
- assert(vt->state == vbp_state_scheduled ||
- vt->state == vbp_state_running);
- assert(vt->heap_idx != VBH_NOIDX);
- VBH_delete(vbp_heap, vt->heap_idx);
- vt->state = vbp_state_cold;
+ if (vt->state == vbp_state_running) {
+ vt->state = vbp_state_cooling;
+ } else if (vt->state == vbp_state_scheduled) {
+ assert(vt->heap_idx != VBH_NOIDX);
+ VBH_delete(vbp_heap, vt->heap_idx);
+ vt->state = vbp_state_cold;
+ } else
+ WRONG(vt->state->name);
}
Lck_Unlock(&vbp_mtx);
}
@@ -740,16 +763,9 @@ VBP_Remove(struct backend *be)
be->sick = 1;
be->probe = NULL;
vt->backend = NULL;
- if (vt->running) {
- assert(vt->state == vbp_state_running);
- // task scheduled, it calls vbp_delete()
- vt->running = -1;
- vt = NULL;
+ if (vt->state == vbp_state_cooling) {
vt->state = vbp_state_deleted;
- } else if (vt->heap_idx != VBH_NOIDX) {
- assert(vt->state == vbp_state_scheduled);
- // task done, not yet rescheduled
- VBH_delete(vbp_heap, vt->heap_idx);
+ vt = NULL;
} else
assert(vt->state == vbp_state_cold);
Lck_Unlock(&vbp_mtx);
@@ -763,18 +779,11 @@ static int v_matchproto_(vbh_cmp_t)
vbp_cmp(void *priv, const void *a, const void *b)
{
const struct vbp_target *aa, *bb;
- int ar, br;
AZ(priv);
CAST_OBJ_NOTNULL(aa, a, VBP_TARGET_MAGIC);
CAST_OBJ_NOTNULL(bb, b, VBP_TARGET_MAGIC);
- ar = aa->running == 0;
- br = bb->running == 0;
-
- if (ar != br)
- return (ar);
-
return (aa->due < bb->due);
}
diff --git a/doc/graphviz/cache_backend_probe.dot b/doc/graphviz/cache_backend_probe.dot
index 937eb6c0f..9f289b162 100644
--- a/doc/graphviz/cache_backend_probe.dot
+++ b/doc/graphviz/cache_backend_probe.dot
@@ -5,31 +5,33 @@ digraph cache_backend_probe {
scheduled
running
cold
- deleted
+ cooling # going cold while task runs
+ deleted # from cooling, removed while task runs
FREE
edge [fontname=Courier]
- edge [label="vbp_task()"]
- deleted -> FREE
+ # via vbp_task() or vbp_thread() scheduling error
+ edge [label="vbp_task_complete()"]
running -> scheduled
+ cooling -> cold
+ deleted -> FREE
edge [label="vbp_thread()"]
scheduled -> running
- edge [label="vbp_thread() error"]
- scheduled -> scheduled
-
- edge [label="VBP_Control()"]
+ edge [label="VBP_Control(enable)"]
+ cooling -> running
cold -> scheduled
+
+ edge [label="VBP_Control(disable)"]
+ running -> cooling
scheduled -> cold
- running -> cold
edge [label="VBP_Insert()"]
ALLOC -> cold
edge [label="VBP_Remove()"]
- running -> deleted # should not happen. we should go through some cool first
- scheduled -> FREE # This should not happen. VBP_Control should have set cold
+ cooling -> deleted
cold -> FREE
}
More information about the varnish-commit
mailing list