summaryrefslogtreecommitdiffstats
path: root/0001-libmultipath-fix-tur-checker-locking.patch
blob: f6a255f8cac74e379ad955a520b1cfcaf0198ae0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Benjamin Marzinski <bmarzins@redhat.com>
Date: Thu, 8 Feb 2018 16:56:45 -0600
Subject: [PATCH] libmultipath: fix tur checker locking

Commit 6e2423fd fixed a bug where the tur checker could cancel a
detached thread after it had exitted. However in fixing this, the new
code grabbed a mutex (to call condlog) while holding a spin_lock.  To
deal with this, I've done away with the holder spin_lock completely, and
replaced it with two atomic variables, based on a suggestion by Martin
Wilck.

The holder variable works exactly like before.  When the checker is
initialized, it is set to 1. When a thread is created it is incremented.
When either the thread or the checker are done with the context, they
atomically decrement the holder variable and check its value. If it
is 0, they free the context. If it is 1, they never touch the context
again.

The other variable has changed. First, ct->running and ct->thread have
switched uses. ct->thread is now only ever accessed by the checker,
never the thread.  If it is non-NULL, a thread has started up, but
hasn't been dealt with by the checker yet. It is also obviously used
by the checker to cancel the thread.

ct->running is now an atomic variable.  When the thread is started
it is set to 1. When the checker wants to kill a thread, it atomically
sets the value to 0 and reads the previous value.  If it was 1,
the checker cancels the thread. If it was 0, the nothing needs to be
done.  After the checker has dealt with the thread, it sets ct->thread
to NULL.

Right before the thread finishes and pops the cleanup handler, it
atomically sets the value of ct->running to 0 and reads the previous
value. If it was 1, the thread just pops the cleanup handler and exits.
If it was 0, then the checker is trying to cancel the thread, and so the
thread calls pause(), which is a cancellation point.

Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
---
 libmultipath/checkers/tur.c | 107 ++++++++++++++++++--------------------------
 1 file changed, 43 insertions(+), 64 deletions(-)

diff --git a/libmultipath/checkers/tur.c b/libmultipath/checkers/tur.c
index b4a5cb2..9155960 100644
--- a/libmultipath/checkers/tur.c
+++ b/libmultipath/checkers/tur.c
@@ -15,6 +15,7 @@
 #include <errno.h>
 #include <sys/time.h>
 #include <pthread.h>
+#include <urcu/uatomic.h>
 
 #include "checkers.h"
 
@@ -44,7 +45,6 @@ struct tur_checker_context {
 	pthread_t thread;
 	pthread_mutex_t lock;
 	pthread_cond_t active;
-	pthread_spinlock_t hldr_lock;
 	int holders;
 	char message[CHECKER_MSG_LEN];
 };
@@ -74,13 +74,12 @@ int libcheck_init (struct checker * c)
 
 	ct->state = PATH_UNCHECKED;
 	ct->fd = -1;
-	ct->holders = 1;
+	uatomic_set(&ct->holders, 1);
 	pthread_cond_init_mono(&ct->active);
 	pthread_mutexattr_init(&attr);
 	pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
 	pthread_mutex_init(&ct->lock, &attr);
 	pthread_mutexattr_destroy(&attr);
-	pthread_spin_init(&ct->hldr_lock, PTHREAD_PROCESS_PRIVATE);
 	c->context = ct;
 
 	return 0;
@@ -90,7 +89,6 @@ static void cleanup_context(struct tur_checker_context *ct)
 {
 	pthread_mutex_destroy(&ct->lock);
 	pthread_cond_destroy(&ct->active);
-	pthread_spin_destroy(&ct->hldr_lock);
 	free(ct);
 }
 
@@ -99,16 +97,14 @@ void libcheck_free (struct checker * c)
 	if (c->context) {
 		struct tur_checker_context *ct = c->context;
 		int holders;
-		pthread_t thread;
-
-		pthread_spin_lock(&ct->hldr_lock);
-		ct->holders--;
-		holders = ct->holders;
-		thread = ct->thread;
-		pthread_spin_unlock(&ct->hldr_lock);
-		if (holders)
-			pthread_cancel(thread);
-		else
+		int running;
+
+		running = uatomic_xchg(&ct->running, 0);
+		if (running)
+			pthread_cancel(ct->thread);
+		ct->thread = 0;
+		holders = uatomic_sub_return(&ct->holders, 1);
+		if (!holders)
 			cleanup_context(ct);
 		c->context = NULL;
 	}
@@ -220,26 +216,12 @@ static void cleanup_func(void *data)
 {
 	int holders;
 	struct tur_checker_context *ct = data;
-	pthread_spin_lock(&ct->hldr_lock);
-	ct->holders--;
-	holders = ct->holders;
-	ct->thread = 0;
-	pthread_spin_unlock(&ct->hldr_lock);
+
+	holders = uatomic_sub_return(&ct->holders, 1);
 	if (!holders)
 		cleanup_context(ct);
 }
 
-static int tur_running(struct tur_checker_context *ct)
-{
-	pthread_t thread;
-
-	pthread_spin_lock(&ct->hldr_lock);
-	thread = ct->thread;
-	pthread_spin_unlock(&ct->hldr_lock);
-
-	return thread != 0;
-}
-
 static void copy_msg_to_tcc(void *ct_p, const char *msg)
 {
 	struct tur_checker_context *ct = ct_p;
@@ -252,7 +234,7 @@ static void copy_msg_to_tcc(void *ct_p, const char *msg)
 static void *tur_thread(void *ctx)
 {
 	struct tur_checker_context *ct = ctx;
-	int state;
+	int state, running;
 	char devt[32];
 
 	condlog(3, "%s: tur checker starting up",
@@ -278,6 +260,11 @@ static void *tur_thread(void *ctx)
 
 	condlog(3, "%s: tur checker finished, state %s",
 		tur_devt(devt, sizeof(devt), ct), checker_state_name(state));
+
+	running = uatomic_xchg(&ct->running, 0);
+	if (!running)
+		pause();
+
 	tur_thread_cleanup_pop(ct);
 
 	return ((void *)0);
@@ -325,7 +312,6 @@ int libcheck_check(struct checker * c)
 	int tur_status, r;
 	char devt[32];
 
-
 	if (!ct)
 		return PATH_UNCHECKED;
 
@@ -349,38 +335,29 @@ int libcheck_check(struct checker * c)
 		return PATH_WILD;
 	}
 
-	if (ct->running) {
-		/*
-		 * Check if TUR checker is still running. Hold hldr_lock
-		 * around the pthread_cancel() call to avoid that
-		 * pthread_cancel() gets called after the (detached) TUR
-		 * thread has exited.
-		 */
-		pthread_spin_lock(&ct->hldr_lock);
-		if (ct->thread) {
-			if (tur_check_async_timeout(c)) {
-				condlog(3, "%s: tur checker timeout",
-					tur_devt(devt, sizeof(devt), ct));
+	if (ct->thread) {
+		if (tur_check_async_timeout(c)) {
+			int running = uatomic_xchg(&ct->running, 0);
+			if (running)
 				pthread_cancel(ct->thread);
-				ct->running = 0;
-				MSG(c, MSG_TUR_TIMEOUT);
-				tur_status = PATH_TIMEOUT;
-			} else {
-				condlog(3, "%s: tur checker not finished",
+			condlog(3, "%s: tur checker timeout",
+				tur_devt(devt, sizeof(devt), ct));
+			ct->thread = 0;
+			MSG(c, MSG_TUR_TIMEOUT);
+			tur_status = PATH_TIMEOUT;
+		} else if (uatomic_read(&ct->running) != 0) {
+			condlog(3, "%s: tur checker not finished",
 					tur_devt(devt, sizeof(devt), ct));
-				ct->running++;
-				tur_status = PATH_PENDING;
-			}
+			tur_status = PATH_PENDING;
 		} else {
 			/* TUR checker done */
-			ct->running = 0;
+			ct->thread = 0;
 			tur_status = ct->state;
 			strlcpy(c->message, ct->message, sizeof(c->message));
 		}
-		pthread_spin_unlock(&ct->hldr_lock);
 		pthread_mutex_unlock(&ct->lock);
 	} else {
-		if (tur_running(ct)) {
+		if (uatomic_read(&ct->running) != 0) {
 			/* pthread cancel failed. continue in sync mode */
 			pthread_mutex_unlock(&ct->lock);
 			condlog(3, "%s: tur thread not responding",
@@ -391,19 +368,17 @@ int libcheck_check(struct checker * c)
 		ct->state = PATH_UNCHECKED;
 		ct->fd = c->fd;
 		ct->timeout = c->timeout;
-		pthread_spin_lock(&ct->hldr_lock);
-		ct->holders++;
-		pthread_spin_unlock(&ct->hldr_lock);
+		uatomic_add(&ct->holders, 1);
+		uatomic_set(&ct->running, 1);
 		tur_set_async_timeout(c);
 		setup_thread_attr(&attr, 32 * 1024, 1);
 		r = pthread_create(&ct->thread, &attr, tur_thread, ct);
 		pthread_attr_destroy(&attr);
 		if (r) {
-			pthread_spin_lock(&ct->hldr_lock);
-			ct->holders--;
-			pthread_spin_unlock(&ct->hldr_lock);
-			pthread_mutex_unlock(&ct->lock);
+			uatomic_sub(&ct->holders, 1);
+			uatomic_set(&ct->running, 0);
 			ct->thread = 0;
+			pthread_mutex_unlock(&ct->lock);
 			condlog(3, "%s: failed to start tur thread, using"
 				" sync mode", tur_devt(devt, sizeof(devt), ct));
 			return tur_check(c->fd, c->timeout,
@@ -414,12 +389,16 @@ int libcheck_check(struct checker * c)
 		tur_status = ct->state;
 		strlcpy(c->message, ct->message, sizeof(c->message));
 		pthread_mutex_unlock(&ct->lock);
-		if (tur_running(ct) &&
+		if (uatomic_read(&ct->running) != 0 &&
 		    (tur_status == PATH_PENDING || tur_status == PATH_UNCHECKED)) {
 			condlog(3, "%s: tur checker still running",
 				tur_devt(devt, sizeof(devt), ct));
-			ct->running = 1;
 			tur_status = PATH_PENDING;
+		} else {
+			int running = uatomic_xchg(&ct->running, 0);
+			if (running)
+				pthread_cancel(ct->thread);
+			ct->thread = 0;
 		}
 	}
 
-- 
2.7.4