summaryrefslogtreecommitdiffstats
path: root/scsi-fix-sd_revalidate_disk-oops.patch
blob: efc657c7f4474319fcd33c033113a808eaa58434 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
Hi,

Thank you for review and comments.

On 02/16/12 02:26, Tejun Heo wrote:
> On Wed, Feb 15, 2012 at 11:56:19AM +0900, Jun'ichi Nomura wrote:
>> +int invalidate_partitions(struct gendisk *disk, struct block_device *bdev)
>> +{
>> +	int res;
>> +
>> +	res = drop_partitions(disk, bdev);
>> +	if (res)
>> +		return res;
>> +
> 
> Hmmm... shouldn't we have set_capacity(disk, 0) here?

Added.
I wasn't sure whether I should leave it to drivers.
But it seems capacity 0 for ENOMEDIUM device is reasonable.

>> +	check_disk_size_change(disk, bdev);
>> +	bdev->bd_invalidated = 0;
>> +	/* tell userspace that the media / partition table may have changed */
>> +	kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
> 
> Also, we really shouldn't be generating KOBJ_CHANGE after every
> -ENOMEDIUM open.  This can easily lead to infinite loop.  We should
> generate this iff we actually dropped partitions && modified the size.

invalidate_partitions() is called only when bd_invalidated is set.
So KOBJ_CHANGE is not raised for every ENOMEDIUM open.

I put it explicit in the function to make it safer for
possible misuse.

How about this?

---------------------------------------------------------
Do not call drivers when invalidating partitions for -ENOMEDIUM

When a scsi driver returns -ENOMEDIUM for open(),
__blkdev_get() calls rescan_partitions(), which ends up calling
sd_revalidate_disk() without getting a refcount of scsi_device.

That could lead to oops like this:

  process A                  process B
  ----------------------------------------------
  sys_open
    __blkdev_get
      sd_open
        returns -ENOMEDIUM
                             scsi_remove_device
                               <scsi_device torn down>
      rescan_partitions
        sd_revalidate_disk
          <oops>

Oopses are reported here:
http://marc.info/?l=linux-scsi&m=132388619710052

This patch separates the partition invalidation from rescan_partitions()
and use it for -ENOMEDIUM case. 

Index: linux-3.3/block/partition-generic.c
===================================================================
--- linux-3.3.orig/block/partition-generic.c	2012-02-15 09:00:25.147293790 +0900
+++ linux-3.3/block/partition-generic.c	2012-02-16 10:48:22.257680685 +0900
@@ -389,17 +389,11 @@ static bool disk_unlock_native_capacity(
 	}
 }
 
-int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
+static int drop_partitions(struct gendisk *disk, struct block_device *bdev)
 {
-	struct parsed_partitions *state = NULL;
 	struct disk_part_iter piter;
 	struct hd_struct *part;
-	int p, highest, res;
-rescan:
-	if (state && !IS_ERR(state)) {
-		kfree(state);
-		state = NULL;
-	}
+	int res;
 
 	if (bdev->bd_part_count)
 		return -EBUSY;
@@ -412,6 +406,24 @@ rescan:
 		delete_partition(disk, part->partno);
 	disk_part_iter_exit(&piter);
 
+	return 0;
+}
+
+int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
+{
+	struct parsed_partitions *state = NULL;
+	struct hd_struct *part;
+	int p, highest, res;
+rescan:
+	if (state && !IS_ERR(state)) {
+		kfree(state);
+		state = NULL;
+	}
+
+	res = drop_partitions(disk, bdev);
+	if (res)
+		return res;
+
 	if (disk->fops->revalidate_disk)
 		disk->fops->revalidate_disk(disk);
 	check_disk_size_change(disk, bdev);
@@ -515,6 +527,26 @@ rescan:
 	return 0;
 }
 
+int invalidate_partitions(struct gendisk *disk, struct block_device *bdev)
+{
+	int res;
+
+	if (!bdev->bd_invalidated)
+		return 0;
+
+	res = drop_partitions(disk, bdev);
+	if (res)
+		return res;
+
+	set_capacity(disk, 0);
+	check_disk_size_change(disk, bdev);
+	bdev->bd_invalidated = 0;
+	/* tell userspace that the media / partition table may have changed */
+	kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
+
+	return 0;
+}
+
 unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
 {
 	struct address_space *mapping = bdev->bd_inode->i_mapping;
Index: linux-3.3/include/linux/genhd.h
===================================================================
--- linux-3.3.orig/include/linux/genhd.h	2012-02-09 12:21:53.000000000 +0900
+++ linux-3.3/include/linux/genhd.h	2012-02-16 10:47:43.783681813 +0900
@@ -596,6 +596,7 @@ extern char *disk_name (struct gendisk *
 
 extern int disk_expand_part_tbl(struct gendisk *disk, int target);
 extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev);
+extern int invalidate_partitions(struct gendisk *disk, struct block_device *bdev);
 extern struct hd_struct * __must_check add_partition(struct gendisk *disk,
 						     int partno, sector_t start,
 						     sector_t len, int flags,
Index: linux-3.3/fs/block_dev.c
===================================================================
--- linux-3.3.orig/fs/block_dev.c	2012-02-09 12:21:53.000000000 +0900
+++ linux-3.3/fs/block_dev.c	2012-02-16 10:47:52.602681441 +0900
@@ -1183,8 +1183,12 @@ static int __blkdev_get(struct block_dev
 			 * The latter is necessary to prevent ghost
 			 * partitions on a removed medium.
 			 */
-			if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM))
-				rescan_partitions(disk, bdev);
+			if (bdev->bd_invalidated) {
+				if (!ret)
+					rescan_partitions(disk, bdev);
+				else if (ret == -ENOMEDIUM)
+					invalidate_partitions(disk, bdev);
+			}
 			if (ret)
 				goto out_clear;
 		} else {
@@ -1214,8 +1218,12 @@ static int __blkdev_get(struct block_dev
 			if (bdev->bd_disk->fops->open)
 				ret = bdev->bd_disk->fops->open(bdev, mode);
 			/* the same as first opener case, read comment there */
-			if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM))
-				rescan_partitions(bdev->bd_disk, bdev);
+			if (bdev->bd_invalidated) {
+				if (!ret)
+					rescan_partitions(bdev->bd_disk, bdev);
+				else if (ret == -ENOMEDIUM)
+					invalidate_partitions(bdev->bd_disk, bdev);
+			}
 			if (ret)
 				goto out_unlock_bdev;
 		}