diff options
Diffstat (limited to '0001-libmultipath-fix-tur-checker-locking.patch')
| -rw-r--r-- | 0001-libmultipath-fix-tur-checker-locking.patch | 269 |
1 files changed, 269 insertions, 0 deletions
diff --git a/0001-libmultipath-fix-tur-checker-locking.patch b/0001-libmultipath-fix-tur-checker-locking.patch new file mode 100644 index 0000000..f6a255f --- /dev/null +++ b/0001-libmultipath-fix-tur-checker-locking.patch @@ -0,0 +1,269 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Benjamin Marzinski <bmarzins@redhat.com> +Date: Thu, 8 Feb 2018 16:56:45 -0600 +Subject: [PATCH] libmultipath: fix tur checker locking + +Commit 6e2423fd fixed a bug where the tur checker could cancel a +detached thread after it had exitted. However in fixing this, the new +code grabbed a mutex (to call condlog) while holding a spin_lock. To +deal with this, I've done away with the holder spin_lock completely, and +replaced it with two atomic variables, based on a suggestion by Martin +Wilck. + +The holder variable works exactly like before. When the checker is +initialized, it is set to 1. When a thread is created it is incremented. +When either the thread or the checker are done with the context, they +atomically decrement the holder variable and check its value. If it +is 0, they free the context. If it is 1, they never touch the context +again. + +The other variable has changed. First, ct->running and ct->thread have +switched uses. ct->thread is now only ever accessed by the checker, +never the thread. If it is non-NULL, a thread has started up, but +hasn't been dealt with by the checker yet. It is also obviously used +by the checker to cancel the thread. + +ct->running is now an atomic variable. When the thread is started +it is set to 1. When the checker wants to kill a thread, it atomically +sets the value to 0 and reads the previous value. If it was 1, +the checker cancels the thread. If it was 0, the nothing needs to be +done. After the checker has dealt with the thread, it sets ct->thread +to NULL. + +Right before the thread finishes and pops the cleanup handler, it +atomically sets the value of ct->running to 0 and reads the previous +value. If it was 1, the thread just pops the cleanup handler and exits. +If it was 0, then the checker is trying to cancel the thread, and so the +thread calls pause(), which is a cancellation point. + +Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com> +--- + libmultipath/checkers/tur.c | 107 ++++++++++++++++++-------------------------- + 1 file changed, 43 insertions(+), 64 deletions(-) + +diff --git a/libmultipath/checkers/tur.c b/libmultipath/checkers/tur.c +index b4a5cb2..9155960 100644 +--- a/libmultipath/checkers/tur.c ++++ b/libmultipath/checkers/tur.c +@@ -15,6 +15,7 @@ + #include <errno.h> + #include <sys/time.h> + #include <pthread.h> ++#include <urcu/uatomic.h> + + #include "checkers.h" + +@@ -44,7 +45,6 @@ struct tur_checker_context { + pthread_t thread; + pthread_mutex_t lock; + pthread_cond_t active; +- pthread_spinlock_t hldr_lock; + int holders; + char message[CHECKER_MSG_LEN]; + }; +@@ -74,13 +74,12 @@ int libcheck_init (struct checker * c) + + ct->state = PATH_UNCHECKED; + ct->fd = -1; +- ct->holders = 1; ++ uatomic_set(&ct->holders, 1); + pthread_cond_init_mono(&ct->active); + pthread_mutexattr_init(&attr); + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE); + pthread_mutex_init(&ct->lock, &attr); + pthread_mutexattr_destroy(&attr); +- pthread_spin_init(&ct->hldr_lock, PTHREAD_PROCESS_PRIVATE); + c->context = ct; + + return 0; +@@ -90,7 +89,6 @@ static void cleanup_context(struct tur_checker_context *ct) + { + pthread_mutex_destroy(&ct->lock); + pthread_cond_destroy(&ct->active); +- pthread_spin_destroy(&ct->hldr_lock); + free(ct); + } + +@@ -99,16 +97,14 @@ void libcheck_free (struct checker * c) + if (c->context) { + struct tur_checker_context *ct = c->context; + int holders; +- pthread_t thread; +- +- pthread_spin_lock(&ct->hldr_lock); +- ct->holders--; +- holders = ct->holders; +- thread = ct->thread; +- pthread_spin_unlock(&ct->hldr_lock); +- if (holders) +- pthread_cancel(thread); +- else ++ int running; ++ ++ running = uatomic_xchg(&ct->running, 0); ++ if (running) ++ pthread_cancel(ct->thread); ++ ct->thread = 0; ++ holders = uatomic_sub_return(&ct->holders, 1); ++ if (!holders) + cleanup_context(ct); + c->context = NULL; + } +@@ -220,26 +216,12 @@ static void cleanup_func(void *data) + { + int holders; + struct tur_checker_context *ct = data; +- pthread_spin_lock(&ct->hldr_lock); +- ct->holders--; +- holders = ct->holders; +- ct->thread = 0; +- pthread_spin_unlock(&ct->hldr_lock); ++ ++ holders = uatomic_sub_return(&ct->holders, 1); + if (!holders) + cleanup_context(ct); + } + +-static int tur_running(struct tur_checker_context *ct) +-{ +- pthread_t thread; +- +- pthread_spin_lock(&ct->hldr_lock); +- thread = ct->thread; +- pthread_spin_unlock(&ct->hldr_lock); +- +- return thread != 0; +-} +- + static void copy_msg_to_tcc(void *ct_p, const char *msg) + { + struct tur_checker_context *ct = ct_p; +@@ -252,7 +234,7 @@ static void copy_msg_to_tcc(void *ct_p, const char *msg) + static void *tur_thread(void *ctx) + { + struct tur_checker_context *ct = ctx; +- int state; ++ int state, running; + char devt[32]; + + condlog(3, "%s: tur checker starting up", +@@ -278,6 +260,11 @@ static void *tur_thread(void *ctx) + + condlog(3, "%s: tur checker finished, state %s", + tur_devt(devt, sizeof(devt), ct), checker_state_name(state)); ++ ++ running = uatomic_xchg(&ct->running, 0); ++ if (!running) ++ pause(); ++ + tur_thread_cleanup_pop(ct); + + return ((void *)0); +@@ -325,7 +312,6 @@ int libcheck_check(struct checker * c) + int tur_status, r; + char devt[32]; + +- + if (!ct) + return PATH_UNCHECKED; + +@@ -349,38 +335,29 @@ int libcheck_check(struct checker * c) + return PATH_WILD; + } + +- if (ct->running) { +- /* +- * Check if TUR checker is still running. Hold hldr_lock +- * around the pthread_cancel() call to avoid that +- * pthread_cancel() gets called after the (detached) TUR +- * thread has exited. +- */ +- pthread_spin_lock(&ct->hldr_lock); +- if (ct->thread) { +- if (tur_check_async_timeout(c)) { +- condlog(3, "%s: tur checker timeout", +- tur_devt(devt, sizeof(devt), ct)); ++ if (ct->thread) { ++ if (tur_check_async_timeout(c)) { ++ int running = uatomic_xchg(&ct->running, 0); ++ if (running) + pthread_cancel(ct->thread); +- ct->running = 0; +- MSG(c, MSG_TUR_TIMEOUT); +- tur_status = PATH_TIMEOUT; +- } else { +- condlog(3, "%s: tur checker not finished", ++ condlog(3, "%s: tur checker timeout", ++ tur_devt(devt, sizeof(devt), ct)); ++ ct->thread = 0; ++ MSG(c, MSG_TUR_TIMEOUT); ++ tur_status = PATH_TIMEOUT; ++ } else if (uatomic_read(&ct->running) != 0) { ++ condlog(3, "%s: tur checker not finished", + tur_devt(devt, sizeof(devt), ct)); +- ct->running++; +- tur_status = PATH_PENDING; +- } ++ tur_status = PATH_PENDING; + } else { + /* TUR checker done */ +- ct->running = 0; ++ ct->thread = 0; + tur_status = ct->state; + strlcpy(c->message, ct->message, sizeof(c->message)); + } +- pthread_spin_unlock(&ct->hldr_lock); + pthread_mutex_unlock(&ct->lock); + } else { +- if (tur_running(ct)) { ++ if (uatomic_read(&ct->running) != 0) { + /* pthread cancel failed. continue in sync mode */ + pthread_mutex_unlock(&ct->lock); + condlog(3, "%s: tur thread not responding", +@@ -391,19 +368,17 @@ int libcheck_check(struct checker * c) + ct->state = PATH_UNCHECKED; + ct->fd = c->fd; + ct->timeout = c->timeout; +- pthread_spin_lock(&ct->hldr_lock); +- ct->holders++; +- pthread_spin_unlock(&ct->hldr_lock); ++ uatomic_add(&ct->holders, 1); ++ uatomic_set(&ct->running, 1); + tur_set_async_timeout(c); + setup_thread_attr(&attr, 32 * 1024, 1); + r = pthread_create(&ct->thread, &attr, tur_thread, ct); + pthread_attr_destroy(&attr); + if (r) { +- pthread_spin_lock(&ct->hldr_lock); +- ct->holders--; +- pthread_spin_unlock(&ct->hldr_lock); +- pthread_mutex_unlock(&ct->lock); ++ uatomic_sub(&ct->holders, 1); ++ uatomic_set(&ct->running, 0); + ct->thread = 0; ++ pthread_mutex_unlock(&ct->lock); + condlog(3, "%s: failed to start tur thread, using" + " sync mode", tur_devt(devt, sizeof(devt), ct)); + return tur_check(c->fd, c->timeout, +@@ -414,12 +389,16 @@ int libcheck_check(struct checker * c) + tur_status = ct->state; + strlcpy(c->message, ct->message, sizeof(c->message)); + pthread_mutex_unlock(&ct->lock); +- if (tur_running(ct) && ++ if (uatomic_read(&ct->running) != 0 && + (tur_status == PATH_PENDING || tur_status == PATH_UNCHECKED)) { + condlog(3, "%s: tur checker still running", + tur_devt(devt, sizeof(devt), ct)); +- ct->running = 1; + tur_status = PATH_PENDING; ++ } else { ++ int running = uatomic_xchg(&ct->running, 0); ++ if (running) ++ pthread_cancel(ct->thread); ++ ct->thread = 0; + } + } + +-- +2.7.4 + |
