summaryrefslogtreecommitdiffstats
path: root/sctp-Prevent-soft-lockup-when-sctp_accept-is-called-.patch
blob: e9d709c670391b48201ec36b0860fcbb4ff4ae1c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
From 51d49d5d636dd24c1d729f79037d5d033eda7e65 Mon Sep 17 00:00:00 2001
From: Karl Heiss <kheiss@gmail.com>
Date: Thu, 24 Sep 2015 12:15:07 -0400
Subject: [PATCH] sctp: Prevent soft lockup when sctp_accept() is called during
 a timeout event

A case can occur when sctp_accept() is called by the user during
a heartbeat timeout event after the 4-way handshake.  Since
sctp_assoc_migrate() changes both assoc->base.sk and assoc->ep, the
bh_sock_lock in sctp_generate_heartbeat_event() will be taken with
the listening socket but released with the new association socket.
The result is a deadlock on any future attempts to take the listening
socket lock.

Note that this race can occur with other SCTP timeouts that take
the bh_lock_sock() in the event sctp_accept() is called.

 BUG: soft lockup - CPU#9 stuck for 67s! [swapper:0]
 ...
 RIP: 0010:[<ffffffff8152d48e>]  [<ffffffff8152d48e>] _spin_lock+0x1e/0x30
 RSP: 0018:ffff880028323b20  EFLAGS: 00000206
 RAX: 0000000000000002 RBX: ffff880028323b20 RCX: 0000000000000000
 RDX: 0000000000000000 RSI: ffff880028323be0 RDI: ffff8804632c4b48
 RBP: ffffffff8100bb93 R08: 0000000000000000 R09: 0000000000000000
 R10: ffff880610662280 R11: 0000000000000100 R12: ffff880028323aa0
 R13: ffff8804383c3880 R14: ffff880028323a90 R15: ffffffff81534225
 FS:  0000000000000000(0000) GS:ffff880028320000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b
 CR2: 00000000006df528 CR3: 0000000001a85000 CR4: 00000000000006e0
 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
 Process swapper (pid: 0, threadinfo ffff880616b70000, task ffff880616b6cab0)
 Stack:
 ffff880028323c40 ffffffffa01c2582 ffff880614cfb020 0000000000000000
 <d> 0100000000000000 00000014383a6c44 ffff8804383c3880 ffff880614e93c00
 <d> ffff880614e93c00 0000000000000000 ffff8804632c4b00 ffff8804383c38b8
 Call Trace:
 <IRQ>
 [<ffffffffa01c2582>] ? sctp_rcv+0x492/0xa10 [sctp]
 [<ffffffff8148c559>] ? nf_iterate+0x69/0xb0
 [<ffffffff814974a0>] ? ip_local_deliver_finish+0x0/0x2d0
 [<ffffffff8148c716>] ? nf_hook_slow+0x76/0x120
 [<ffffffff814974a0>] ? ip_local_deliver_finish+0x0/0x2d0
 [<ffffffff8149757d>] ? ip_local_deliver_finish+0xdd/0x2d0
 [<ffffffff81497808>] ? ip_local_deliver+0x98/0xa0
 [<ffffffff81496ccd>] ? ip_rcv_finish+0x12d/0x440
 [<ffffffff81497255>] ? ip_rcv+0x275/0x350
 [<ffffffff8145cfeb>] ? __netif_receive_skb+0x4ab/0x750
 ...

With lockdep debugging:

 =====================================
 [ BUG: bad unlock balance detected! ]
 -------------------------------------
 CslRx/12087 is trying to release lock (slock-AF_INET) at:
 [<ffffffffa01bcae0>] sctp_generate_timeout_event+0x40/0xe0 [sctp]
 but there are no more locks to release!

 other info that might help us debug this:
 2 locks held by CslRx/12087:
 #0:  (&asoc->timers[i]){+.-...}, at: [<ffffffff8108ce1f>] run_timer_softirq+0x16f/0x3e0
 #1:  (slock-AF_INET){+.-...}, at: [<ffffffffa01bcac3>] sctp_generate_timeout_event+0x23/0xe0 [sctp]

Ensure the socket taken is also the same one that is released by
saving a copy of the socket before entering the timeout event
critical section.

Signed-off-by: Karl Heiss <kheiss@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sm_sideeffect.c | 44 ++++++++++++++++++++++++--------------------
 1 file changed, 24 insertions(+), 20 deletions(-)

diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 85e6f03aeb70..61b7b81d29ab 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -244,12 +244,13 @@ void sctp_generate_t3_rtx_event(unsigned long peer)
 	int error;
 	struct sctp_transport *transport = (struct sctp_transport *) peer;
 	struct sctp_association *asoc = transport->asoc;
-	struct net *net = sock_net(asoc->base.sk);
+	struct sock *sk = asoc->base.sk;
+	struct net *net = sock_net(sk);
 
 	/* Check whether a task is in the sock.  */
 
-	bh_lock_sock(asoc->base.sk);
-	if (sock_owned_by_user(asoc->base.sk)) {
+	bh_lock_sock(sk);
+	if (sock_owned_by_user(sk)) {
 		pr_debug("%s: sock is busy\n", __func__);
 
 		/* Try again later.  */
@@ -272,10 +273,10 @@ void sctp_generate_t3_rtx_event(unsigned long peer)
 			   transport, GFP_ATOMIC);
 
 	if (error)
-		asoc->base.sk->sk_err = -error;
+		sk->sk_err = -error;
 
 out_unlock:
-	bh_unlock_sock(asoc->base.sk);
+	bh_unlock_sock(sk);
 	sctp_transport_put(transport);
 }
 
@@ -285,11 +286,12 @@ out_unlock:
 static void sctp_generate_timeout_event(struct sctp_association *asoc,
 					sctp_event_timeout_t timeout_type)
 {
-	struct net *net = sock_net(asoc->base.sk);
+	struct sock *sk = asoc->base.sk;
+	struct net *net = sock_net(sk);
 	int error = 0;
 
-	bh_lock_sock(asoc->base.sk);
-	if (sock_owned_by_user(asoc->base.sk)) {
+	bh_lock_sock(sk);
+	if (sock_owned_by_user(sk)) {
 		pr_debug("%s: sock is busy: timer %d\n", __func__,
 			 timeout_type);
 
@@ -312,10 +314,10 @@ static void sctp_generate_timeout_event(struct sctp_association *asoc,
 			   (void *)timeout_type, GFP_ATOMIC);
 
 	if (error)
-		asoc->base.sk->sk_err = -error;
+		sk->sk_err = -error;
 
 out_unlock:
-	bh_unlock_sock(asoc->base.sk);
+	bh_unlock_sock(sk);
 	sctp_association_put(asoc);
 }
 
@@ -365,10 +367,11 @@ void sctp_generate_heartbeat_event(unsigned long data)
 	int error = 0;
 	struct sctp_transport *transport = (struct sctp_transport *) data;
 	struct sctp_association *asoc = transport->asoc;
-	struct net *net = sock_net(asoc->base.sk);
+	struct sock *sk = asoc->base.sk;
+	struct net *net = sock_net(sk);
 
-	bh_lock_sock(asoc->base.sk);
-	if (sock_owned_by_user(asoc->base.sk)) {
+	bh_lock_sock(sk);
+	if (sock_owned_by_user(sk)) {
 		pr_debug("%s: sock is busy\n", __func__);
 
 		/* Try again later.  */
@@ -388,11 +391,11 @@ void sctp_generate_heartbeat_event(unsigned long data)
 			   asoc->state, asoc->ep, asoc,
 			   transport, GFP_ATOMIC);
 
-	 if (error)
-		 asoc->base.sk->sk_err = -error;
+	if (error)
+		sk->sk_err = -error;
 
 out_unlock:
-	bh_unlock_sock(asoc->base.sk);
+	bh_unlock_sock(sk);
 	sctp_transport_put(transport);
 }
 
@@ -403,10 +406,11 @@ void sctp_generate_proto_unreach_event(unsigned long data)
 {
 	struct sctp_transport *transport = (struct sctp_transport *) data;
 	struct sctp_association *asoc = transport->asoc;
-	struct net *net = sock_net(asoc->base.sk);
+	struct sock *sk = asoc->base.sk;
+	struct net *net = sock_net(sk);
 
-	bh_lock_sock(asoc->base.sk);
-	if (sock_owned_by_user(asoc->base.sk)) {
+	bh_lock_sock(sk);
+	if (sock_owned_by_user(sk)) {
 		pr_debug("%s: sock is busy\n", __func__);
 
 		/* Try again later.  */
@@ -427,7 +431,7 @@ void sctp_generate_proto_unreach_event(unsigned long data)
 		   asoc->state, asoc->ep, asoc, transport, GFP_ATOMIC);
 
 out_unlock:
-	bh_unlock_sock(asoc->base.sk);
+	bh_unlock_sock(sk);
 	sctp_association_put(asoc);
 }
 
-- 
2.5.0