summaryrefslogtreecommitdiffstats
path: root/kdbus-reduce-scope-of-handle-locking.patch
blob: 7cca4f787270466ff1db533913b20906f8244ea7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
From: David Herrmann <dh.herrmann@gmail.com>
Date: Sat, 18 Apr 2015 12:39:51 +0200
Subject: [PATCH] kdbus: reduce scope of handle locking

A kdbus handle is used to create objects in the kdbus hierarchy. During
open(), we do not have enough information to know how to setup the object.
Therefore, we provide setup ioctls, which allow user-space to pass in
parameters and options how the to-be-created object should behave. Once
setup is done, we allow user-space to use ioctls to operate on that newly
created object.

It is important to notice:
  1) Only one setup ioctl can ever be called on a handle. You cannot call
     multiple, different setup ioctls on the same handle.
  2) A setup ioctl can only be called once, if it succeeded. If it failed,
     it must not modify the handle in any way. If it succeeded, no further
     setup ioctl can be issued.
  3) After a setup ioctl is done, the handle is constant and must not be
     modified in any way.

So far, we used a write-lock around all setup ioctls, and a read-lock
around everything else. The handle setup-indicator (the type field) can
only be set under the write-lock. Whenever you access the handle under a
read-lock, you must verify it was set before, otherwise, you must bail out
as the handle was not initialized, yet.

This has the downside that we need a read-lock on all operations on the
handle. For performance reasons, we should avoid that. This patch turns
the rwlock into a mutex and removes the read-side lock from all paths. It
relies on the 3 behaviors described above.

With this patch, the mutex is only taken around setup ioctls. Furthermore,
the setup-indicator (the type field) is only ever set if the mutex is
held. The mutex guarantees that multiple setup ioctls cannot race, and
also, that only one setup ioctl will ever succeed. If a setup ioctl is
called after setup was already finished, we do not touch the handle at all
and immediately fail.

Furthermore, all other operations (non-setup operations) can only be
called once setup is done. Therefore, we must synchronize them with any
racing setup, otherwise, they might access the handle which is currently
modified by setup.
We protect from this race by setting the setup-indicator (the type field)
_last_, and issue a write-barrier before setting it. Once it is set, we
never modify the handle ever again; it is constant from now on until
file-release.
Hence, on the read-side we simply read the type field and issue a
read-barrier afterwards. _Iff_ the type field was not set, yet, we must
not access the handle in any way, but bail out immediately. Setup was not
done, yet. But if the type field was set, the read-barrier pairs with the
write-barrier during setup. All member fields of the handle object are
guaranteed to be accessible by us, as the type-field is always the last
field that is written.

With this in place, we reduce the locking-overhead of all non-setup ioctls
to a read-barrier, instead of a read-side lock. And in combination with
the follow-up that removes the active-refs from kdbus_handle_poll(), we're
now lock-free in ->poll and ->mmap callbacks.

Signed-off-by: David Herrmann <dh.herrmann@gmail.com>
Acked-by: Daniel Mack <daniel@zonque.org>
---
 ipc/kdbus/handle.c | 110 ++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 83 insertions(+), 27 deletions(-)

diff --git a/ipc/kdbus/handle.c b/ipc/kdbus/handle.c
index 3f5d8085a297..a3e01383a6f6 100644
--- a/ipc/kdbus/handle.c
+++ b/ipc/kdbus/handle.c
@@ -18,6 +18,7 @@
 #include <linux/init.h>
 #include <linux/kdev_t.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/poll.h>
 #include <linux/rwsem.h>
 #include <linux/sched.h>
@@ -229,7 +230,7 @@ enum kdbus_handle_type {
 
 /**
  * struct kdbus_handle - handle to the kdbus system
- * @rwlock:		handle lock
+ * @lock:		handle lock
  * @type:		type of this handle (KDBUS_HANDLE_*)
  * @bus_owner:		bus this handle owns
  * @ep_owner:		endpoint this handle owns
@@ -237,7 +238,7 @@ enum kdbus_handle_type {
  * @privileged:		Flag to mark a handle as privileged
  */
 struct kdbus_handle {
-	struct rw_semaphore rwlock;
+	struct mutex lock;
 
 	enum kdbus_handle_type type;
 	union {
@@ -265,7 +266,7 @@ static int kdbus_handle_open(struct inode *inode, struct file *file)
 		goto exit;
 	}
 
-	init_rwsem(&handle->rwlock);
+	mutex_init(&handle->lock);
 	handle->type = KDBUS_HANDLE_NONE;
 
 	if (node->type == KDBUS_NODE_ENDPOINT) {
@@ -355,8 +356,8 @@ static long kdbus_handle_ioctl_control(struct file *file, unsigned int cmd,
 			break;
 		}
 
-		handle->type = KDBUS_HANDLE_BUS_OWNER;
 		handle->bus_owner = bus;
+		ret = KDBUS_HANDLE_BUS_OWNER;
 		break;
 	}
 
@@ -396,8 +397,8 @@ static long kdbus_handle_ioctl_ep(struct file *file, unsigned int cmd,
 			break;
 		}
 
-		handle->type = KDBUS_HANDLE_EP_OWNER;
 		handle->ep_owner = ep;
+		ret = KDBUS_HANDLE_EP_OWNER;
 		break;
 
 	case KDBUS_CMD_HELLO:
@@ -407,8 +408,8 @@ static long kdbus_handle_ioctl_ep(struct file *file, unsigned int cmd,
 			break;
 		}
 
-		handle->type = KDBUS_HANDLE_CONNECTED;
 		handle->conn = conn;
+		ret = KDBUS_HANDLE_CONNECTED;
 		break;
 
 	default:
@@ -522,19 +523,41 @@ static long kdbus_handle_ioctl(struct file *file, unsigned int cmd,
 	case KDBUS_CMD_BUS_MAKE:
 	case KDBUS_CMD_ENDPOINT_MAKE:
 	case KDBUS_CMD_HELLO:
-		/* bail out early if already typed */
-		if (handle->type != KDBUS_HANDLE_NONE)
-			break;
-
-		down_write(&handle->rwlock);
+		mutex_lock(&handle->lock);
 		if (handle->type == KDBUS_HANDLE_NONE) {
 			if (node->type == KDBUS_NODE_CONTROL)
 				ret = kdbus_handle_ioctl_control(file, cmd,
 								 argp);
 			else if (node->type == KDBUS_NODE_ENDPOINT)
 				ret = kdbus_handle_ioctl_ep(file, cmd, argp);
+
+			if (ret > 0) {
+				/*
+				 * The data given via open() is not sufficient
+				 * to setup a kdbus handle. Hence, we require
+				 * the user to perform a setup ioctl. This setup
+				 * can only be performed once and defines the
+				 * type of the handle. The different setup
+				 * ioctls are locked against each other so they
+				 * cannot race. Once the handle type is set,
+				 * the type-dependent ioctls are enabled. To
+				 * improve performance, we don't lock those via
+				 * handle->lock. Instead, we issue a
+				 * write-barrier before performing the
+				 * type-change, which pairs with smp_rmb() in
+				 * all handlers that access the type field. This
+				 * guarantees the handle is fully setup, if
+				 * handle->type is set. If handle->type is
+				 * unset, you must not make any assumptions
+				 * without taking handle->lock.
+				 * Note that handle->type is only set once. It
+				 * will never change afterwards.
+				 */
+				smp_wmb();
+				handle->type = ret;
+			}
 		}
-		up_write(&handle->rwlock);
+		mutex_unlock(&handle->lock);
 		break;
 
 	case KDBUS_CMD_ENDPOINT_UPDATE:
@@ -549,14 +572,30 @@ static long kdbus_handle_ioctl(struct file *file, unsigned int cmd,
 	case KDBUS_CMD_MATCH_REMOVE:
 	case KDBUS_CMD_SEND:
 	case KDBUS_CMD_RECV:
-	case KDBUS_CMD_FREE:
-		down_read(&handle->rwlock);
-		if (handle->type == KDBUS_HANDLE_EP_OWNER)
+	case KDBUS_CMD_FREE: {
+		enum kdbus_handle_type type;
+
+		/*
+		 * This read-barrier pairs with smp_wmb() of the handle setup.
+		 * it guarantees the handle is fully written, in case the
+		 * type has been set. It allows us to access the handle without
+		 * taking handle->lock, given the guarantee that the type is
+		 * only ever set once, and stays constant afterwards.
+		 * Furthermore, the handle object itself is not modified in any
+		 * way after the type is set. That is, the type-field is the
+		 * last field that is written on any handle. If it has not been
+		 * set, we must not access the handle here.
+		 */
+		type = handle->type;
+		smp_rmb();
+
+		if (type == KDBUS_HANDLE_EP_OWNER)
 			ret = kdbus_handle_ioctl_ep_owner(file, cmd, argp);
-		else if (handle->type == KDBUS_HANDLE_CONNECTED)
+		else if (type == KDBUS_HANDLE_CONNECTED)
 			ret = kdbus_handle_ioctl_connected(file, cmd, argp);
-		up_read(&handle->rwlock);
+
 		break;
+	}
 	default:
 		ret = -ENOTTY;
 		break;
@@ -569,16 +608,23 @@ static unsigned int kdbus_handle_poll(struct file *file,
 				      struct poll_table_struct *wait)
 {
 	struct kdbus_handle *handle = file->private_data;
+	enum kdbus_handle_type type;
 	unsigned int mask = POLLOUT | POLLWRNORM;
 	int ret;
 
+	/*
+	 * This pairs with smp_wmb() during handle setup. It guarantees that
+	 * _iff_ the handle type is set, handle->conn is valid. Furthermore,
+	 * _iff_ the type is set, the handle object is constant and never
+	 * changed again. If it's not set, we must not access the handle but
+	 * bail out. We also must assume no setup has taken place, yet.
+	 */
+	type = handle->type;
+	smp_rmb();
+
 	/* Only a connected endpoint can read/write data */
-	down_read(&handle->rwlock);
-	if (handle->type != KDBUS_HANDLE_CONNECTED) {
-		up_read(&handle->rwlock);
+	if (type != KDBUS_HANDLE_CONNECTED)
 		return POLLERR | POLLHUP;
-	}
-	up_read(&handle->rwlock);
 
 	ret = kdbus_conn_acquire(handle->conn);
 	if (ret < 0)
@@ -598,13 +644,23 @@ static unsigned int kdbus_handle_poll(struct file *file,
 static int kdbus_handle_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct kdbus_handle *handle = file->private_data;
+	enum kdbus_handle_type type;
 	int ret = -EBADFD;
 
-	if (down_read_trylock(&handle->rwlock)) {
-		if (handle->type == KDBUS_HANDLE_CONNECTED)
-			ret = kdbus_pool_mmap(handle->conn->pool, vma);
-		up_read(&handle->rwlock);
-	}
+	/*
+	 * This pairs with smp_wmb() during handle setup. It guarantees that
+	 * _iff_ the handle type is set, handle->conn is valid. Furthermore,
+	 * _iff_ the type is set, the handle object is constant and never
+	 * changed again. If it's not set, we must not access the handle but
+	 * bail out. We also must assume no setup has taken place, yet.
+	 */
+	type = handle->type;
+	smp_rmb();
+
+	/* Only connected handles have a pool we can map */
+	if (type == KDBUS_HANDLE_CONNECTED)
+		ret = kdbus_pool_mmap(handle->conn->pool, vma);
+
 	return ret;
 }