summaryrefslogtreecommitdiffstats
path: root/cgroup-fixes.patch
blob: d034189078d4be17b4a9c3daab972290568f9092 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
Bugzilla: 1045775
Upstream-status: queued for 3.14-rc3 and stable 3.12+

Path: news.gmane.org!not-for-mail
From: Michal Hocko <mhocko@suse.cz>
Newsgroups: gmane.linux.kernel,gmane.linux.kernel.cgroups
Subject: Re: [PATCH] cgroup: protect modifications to cgroup_idr with
 cgroup_mutex
Date: Wed, 12 Feb 2014 10:12:38 +0100
Lines: 127
Approved: news@gmane.org
Message-ID: <20140212091238.GC28085@dhcp22.suse.cz>
References: <52F9D9DA.7040108@huawei.com>
 <20140211102032.GA11946@dhcp22.suse.cz>
NNTP-Posting-Host: plane.gmane.org
Mime-Version: 1.0
Content-Type: text/plain; charset=us-ascii
X-Trace: ger.gmane.org 1392196376 529 80.91.229.3 (12 Feb 2014 09:12:56 GMT)
X-Complaints-To: usenet@ger.gmane.org
NNTP-Posting-Date: Wed, 12 Feb 2014 09:12:56 +0000 (UTC)
Cc: Tejun Heo <tj@kernel.org>, LKML <linux-kernel@vger.kernel.org>,
	Cgroups <cgroups@vger.kernel.org>
To: Li Zefan <lizefan@huawei.com>
Original-X-From: linux-kernel-owner@vger.kernel.org Wed Feb 12 10:13:03 2014
Return-path: <linux-kernel-owner@vger.kernel.org>
Envelope-to: glk-linux-kernel-3@plane.gmane.org
Original-Received: from vger.kernel.org ([209.132.180.67])
	by plane.gmane.org with esmtp (Exim 4.69)
	(envelope-from <linux-kernel-owner@vger.kernel.org>)
	id 1WDVsM-0003po-4o
	for glk-linux-kernel-3@plane.gmane.org; Wed, 12 Feb 2014 10:13:02 +0100
Original-Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S1751758AbaBLJMs (ORCPT <rfc822;glk-linux-kernel-3@m.gmane.org>);
	Wed, 12 Feb 2014 04:12:48 -0500
Original-Received: from cantor2.suse.de ([195.135.220.15]:34766 "EHLO mx2.suse.de"
	rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP
	id S1750890AbaBLJMk (ORCPT <rfc822;linux-kernel@vger.kernel.org>);
	Wed, 12 Feb 2014 04:12:40 -0500
Original-Received: from relay1.suse.de (charybdis-ext.suse.de [195.135.220.254])
	by mx2.suse.de (Postfix) with ESMTP id 216F1ABB2;
	Wed, 12 Feb 2014 09:12:39 +0000 (UTC)
Content-Disposition: inline
In-Reply-To: <20140211102032.GA11946@dhcp22.suse.cz>
User-Agent: Mutt/1.5.21 (2010-09-15)
Original-Sender: linux-kernel-owner@vger.kernel.org
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Xref: news.gmane.org gmane.linux.kernel:1646465 gmane.linux.kernel.cgroups:10357
Archived-At: <http://permalink.gmane.org/gmane.linux.kernel/1646465>

Li has pointed out that my previous backport was not correct because
err_unlock label releases a reference to supperblock which was not taken
before idr_alloc. I've also removed cgroup_mutex from free_css_id as per
Li.
Fixed in this version.

--- 
Date: Tue, 11 Feb 2014 16:05:46 +0800
From: Li Zefan <lizefan@huawei.com>
Subject: [PATCH - for 3.12] cgroup: protect modifications to cgroup_idr with cgroup_mutex

Setup cgroupfs like this:
  # mount -t cgroup -o cpuacct xxx /cgroup
  # mkdir /cgroup/sub1
  # mkdir /cgroup/sub2

Then run these two commands:
  # for ((; ;)) { mkdir /cgroup/sub1/tmp && rmdir /mnt/sub1/tmp; } &
  # for ((; ;)) { mkdir /cgroup/sub2/tmp && rmdir /mnt/sub2/tmp; } &

After seconds you may see this warning:

------------[ cut here ]------------
WARNING: CPU: 1 PID: 25243 at lib/idr.c:527 sub_remove+0x87/0x1b0()
idr_remove called for id=6 which is not allocated.
...
Call Trace:
 [<ffffffff8156063c>] dump_stack+0x7a/0x96
 [<ffffffff810591ac>] warn_slowpath_common+0x8c/0xc0
 [<ffffffff81059296>] warn_slowpath_fmt+0x46/0x50
 [<ffffffff81300aa7>] sub_remove+0x87/0x1b0
 [<ffffffff810f3f02>] ? css_killed_work_fn+0x32/0x1b0
 [<ffffffff81300bf5>] idr_remove+0x25/0xd0
 [<ffffffff810f2bab>] cgroup_destroy_css_killed+0x5b/0xc0
 [<ffffffff810f4000>] css_killed_work_fn+0x130/0x1b0
 [<ffffffff8107cdbc>] process_one_work+0x26c/0x550
 [<ffffffff8107eefe>] worker_thread+0x12e/0x3b0
 [<ffffffff81085f96>] kthread+0xe6/0xf0
 [<ffffffff81570bac>] ret_from_fork+0x7c/0xb0
---[ end trace 2d1577ec10cf80d0 ]---

It's because allocating/removing cgroup ID is not properly synchronized.

The bug was introduced when we converted cgroup_ida to cgroup_idr.
While synchronization is already done inside ida_simple_{get,remove}(),
users are responsible for concurrent calls to idr_{alloc,remove}().

[mhocko@suse.cz: ported to 3.12]
Fixes: 4e96ee8e981b ("cgroup: convert cgroup_ida to cgroup_idr")
Cc: <stable@vger.kernel.org> #3.12+
Reported-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Li Zefan <lizefan@huawei.com>
Signed-off-by: Michal Hocko <mhocko@suse.cz>
---
 include/linux/cgroup.h |    2 ++
 kernel/cgroup.c        |   23 ++++++++++++-----------
 2 files changed, 14 insertions(+), 11 deletions(-)

--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -169,6 +169,8 @@ struct cgroup {
 	 *
 	 * The ID of the root cgroup is always 0, and a new cgroup
 	 * will be assigned with a smallest available ID.
+	 *
+	 * Allocating/Removing ID must be protected by cgroup_mutex.
 	 */
 	int id;
 
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4410,16 +4410,6 @@ static long cgroup_create(struct cgroup
 	rcu_assign_pointer(cgrp->name, name);
 
 	/*
-	 * Temporarily set the pointer to NULL, so idr_find() won't return
-	 * a half-baked cgroup.
-	 */
-	cgrp->id = idr_alloc(&root->cgroup_idr, NULL, 1, 0, GFP_KERNEL);
-	if (cgrp->id < 0) {
-		err = -ENOMEM;
-		goto err_free_name;
-	}
-
-	/*
 	 * Only live parents can have children.  Note that the liveliness
 	 * check isn't strictly necessary because cgroup_mkdir() and
 	 * cgroup_rmdir() are fully synchronized by i_mutex; however, do it
@@ -4426,7 +4418,7 @@ static long cgroup_create(struct cgroup
 	 */
 	if (!cgroup_lock_live_group(parent)) {
 		err = -ENODEV;
-		goto err_free_id;
+		goto err_free_name;
 	}
 
 	/* Grab a reference on the superblock so the hierarchy doesn't
@@ -4436,6 +4428,14 @@ static long cgroup_create(struct cgroup
 	 * fs */
 	atomic_inc(&sb->s_active);
 
+	/*
+	 * Temporarily set the pointer to NULL, so idr_find() won't return
+	 * a half-baked cgroup.
+	 */
+	cgrp->id = idr_alloc(&root->cgroup_idr, NULL, 1, 0, GFP_KERNEL);
+	if (cgrp->id < 0)
+		goto err_unlock;
+
 	init_cgroup_housekeeping(cgrp);
 
 	dentry->d_fsdata = cgrp;
@@ -4542,11 +4542,11 @@ err_free_all:
 			ss->css_free(css);
 		}
 	}
+	idr_remove(&root->cgroup_idr, cgrp->id);
+err_unlock:
 	mutex_unlock(&cgroup_mutex);
 	/* Release the reference count that we took on the superblock */
 	deactivate_super(sb);
-err_free_id:
-	idr_remove(&root->cgroup_idr, cgrp->id);
 err_free_name:
 	kfree(rcu_dereference_raw(cgrp->name));
 err_free_cgrp:
-- 
Michal Hocko
SUSE Labs