From d9d4b0200218ea299a8a673126721eafb9a0a1b2 Mon Sep 17 00:00:00 2001 From: Tony Garnock-Jones Date: Sun, 27 Mar 2011 13:23:05 -0400 Subject: [PATCH] Avoid race condition between nap()'s timeout expiring and incoming work calling resume(). --- server/harness.c | 10 +++++++--- server/harness.h | 2 +- server/queue.c | 24 ++++++++++++++++++++++-- 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/server/harness.c b/server/harness.c index ea222b7..2cf39e5 100644 --- a/server/harness.c +++ b/server/harness.c @@ -84,9 +84,13 @@ void suspend(void) { schedule(); } -void resume(Process *p) { - assert(p->state == PROCESS_WAITING); - enqueue_runlist(p); +int resume(Process *p) { + if (p->state == PROCESS_WAITING) { + enqueue_runlist(p); + return 0; + } else { + return -1; + } } static void driver(void (*f)(void *), void *arg) { diff --git a/server/harness.h b/server/harness.h index bd54ef0..d4e612b 100644 --- a/server/harness.h +++ b/server/harness.h @@ -40,7 +40,7 @@ extern Process *spawn(process_main_t f, void *arg); extern int nap(long millis); /* 1 for timeout expired; 0 for resumed early */ extern void suspend(void); -extern void resume(Process *p); +extern int resume(Process *p); extern IOHandle *new_iohandle(int fd); extern void delete_iohandle(IOHandle *h); diff --git a/server/queue.c b/server/queue.c index 721ad77..30305ae 100644 --- a/server/queue.c +++ b/server/queue.c @@ -158,12 +158,32 @@ static void shoveller(void *qv) { } static void throck_shovel(queue_extension_t *q) { + //int counter = 0; + retry: + //printf("throck %d %d %p\n", counter++, q->shovel_awake, q->shovel); if (!q->shovel_awake) { - q->shovel_awake = 1; if (!q->shovel) { + q->shovel_awake = 1; q->shovel = spawn(shoveller, q); } else { - resume(q->shovel); + if (resume(q->shovel) == -1) { + /* The nap() in the shoveller returned and scheduled the + shoveller *just* before we got to it, but the shoveller + hasn't had a chance to run yet, so hasn't been able to + clear q->shovel and exit. The resume() attempt failed + because q->shovel's state is PROCESS_RUNNING, now that it + has been scheduled by the return of nap(), so we know that + we should back off and try again from the top. */ + yield(); + goto retry; + } else { + /* The resume() was successful, i.e. the nap() hadn't returned + before we tried to resume(). We know that nap() will return + zero (since the timeout didn't fire before the process was + resumed), and so the existing shoveller will continue + running. */ + q->shovel_awake = 1; + } } } }