summaryrefslogtreecommitdiffstats
path: root/block/blk-integrity.c
blob: 3f1a8478cc384b041c8ef8c66f633eb85e10b36d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
/*
 * blk-integrity.c - Block layer data integrity extensions
 *
 * Copyright (C) 2007, 2008 Oracle Corporation
 * Written by: Martin K. Petersen <martin.petersen@oracle.com>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License version
 * 2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; see the file COPYING.  If not, write to
 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
 * USA.
 *
 */

#include <linux/blkdev.h>
#include <linux/mempool.h>
#include <linux/bio.h>
#include <linux/scatterlist.h>

#include "blk.h"

static struct kmem_cache *integrity_cachep;

/**
 * blk_rq_count_integrity_sg - Count number of integrity scatterlist elements
 * @rq:		request with integrity metadata attached
 *
 * Description: Returns the number of elements required in a
 * scatterlist corresponding to the integrity metadata in a request.
 */
int blk_rq_count_integrity_sg(struct request *rq)
{
	struct bio_vec *iv, *ivprv;
	struct req_iterator iter;
	unsigned int segments;

	ivprv = NULL;
	segments = 0;

	rq_for_each_integrity_segment(iv, rq, iter) {

		if (!ivprv || !BIOVEC_PHYS_MERGEABLE(ivprv, iv))
			segments++;

		ivprv = iv;
	}

	return segments;
}
EXPORT_SYMBOL(blk_rq_count_integrity_sg);

/**
 * blk_rq_map_integrity_sg - Map integrity metadata into a scatterlist
 * @rq:		request with integrity metadata attached
 * @sglist:	target scatterlist
 *
 * Description: Map the integrity vectors in request into a
 * scatterlist.  The scatterlist must be big enough to hold all
 * elements.  I.e. sized using blk_rq_count_integrity_sg().
 */
int blk_rq_map_integrity_sg(struct request *rq, struct scatterlist *sglist)
{
	struct bio_vec *iv, *ivprv;
	struct req_iterator iter;
	struct scatterlist *sg;
	unsigned int segments;

	ivprv = NULL;
	sg = NULL;
	segments = 0;

	rq_for_each_integrity_segment(iv, rq, iter) {

		if (ivprv) {
			if (!BIOVEC_PHYS_MERGEABLE(ivprv, iv))
				goto new_segment;

			sg->length += iv->bv_len;
		} else {
new_segment:
			if (!sg)
				sg = sglist;
			else {
				sg->page_link &= ~0x02;
				sg = sg_next(sg);
			}

			sg_set_page(sg, iv->bv_page, iv->bv_len, iv->bv_offset);
			segments++;
		}

		ivprv = iv;
	}

	if (sg)
		sg_mark_end(sg);

	return segments;
}
EXPORT_SYMBOL(blk_rq_map_integrity_sg);

/**
 * blk_integrity_compare - Compare integrity profile of two block devices
 * @b1:		Device to compare
 * @b2:		Device to compare
 *
 * Description: Meta-devices like DM and MD need to verify that all
 * sub-devices use the same integrity format before advertising to
 * upper layers that they can send/receive integrity metadata.  This
 * function can be used to check whether two block devices have
 * compatible integrity formats.
 */
int blk_integrity_compare(struct block_device *bd1, struct block_device *bd2)
{
	struct blk_integrity *b1 = bd1->bd_disk->integrity;
	struct blk_integrity *b2 = bd2->bd_disk->integrity;

	BUG_ON(bd1->bd_disk == NULL);
	BUG_ON(bd2->bd_disk == NULL);

	if (!b1 || !b2)
		return 0;

	if (b1->sector_size != b2->sector_size) {
		printk(KERN_ERR "%s: %s/%s sector sz %u != %u\n", __func__,
		       bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
		       b1->sector_size, b2->sector_size);
		return -1;
	}

	if (b1->tuple_size != b2->tuple_size) {
		printk(KERN_ERR "%s: %s/%s tuple sz %u != %u\n", __func__,
		       bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
		       b1->tuple_size, b2->tuple_size);
		return -1;
	}

	if (b1->tag_size && b2->tag_size && (b1->tag_size != b2->tag_size)) {
		printk(KERN_ERR "%s: %s/%s tag sz %u != %u\n", __func__,
		       bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
		       b1->tag_size, b2->tag_size);
		return -1;
	}

	if (strcmp(b1->name, b2->name)) {
		printk(KERN_ERR "%s: %s/%s type %s != %s\n", __func__,
		       bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
		       b1->name, b2->name);
		return -1;
	}

	return 0;
}
EXPORT_SYMBOL(blk_integrity_compare);

struct integrity_sysfs_entry {
	struct attribute attr;
	ssize_t (*show)(struct blk_integrity *, char *);
	ssize_t (*store)(struct blk_integrity *, const char *, size_t);
};

static ssize_t integrity_attr_show(struct kobject *kobj, struct attribute *attr,
				   char *page)
{
	struct blk_integrity *bi =
		container_of(kobj, struct blk_integrity, kobj);
	struct integrity_sysfs_entry *entry =
		container_of(attr, struct integrity_sysfs_entry, attr);

	return entry->show(bi, page);
}

static ssize_t integrity_attr_store(struct kobject *kobj,
				    struct attribute *attr, const char *page,
				    size_t count)
{
	struct blk_integrity *bi =
		container_of(kobj, struct blk_integrity, kobj);
	struct integrity_sysfs_entry *entry =
		container_of(attr, struct integrity_sysfs_entry, attr);
	ssize_t ret = 0;

	if (entry->store)
		ret = entry->store(bi, page, count);

	return ret;
}

static ssize_t integrity_format_show(struct blk_integrity *bi, char *page)
{
	if (bi != NULL && bi->name != NULL)
		return sprintf(page, "%s\n", bi->name);
	else
		return sprintf(page, "none\n");
}

static ssize_t integrity_tag_size_show(struct blk_integrity *bi, char *page)
{
	if (bi != NULL)
		return sprintf(page, "%u\n", bi->tag_size);
	else
		return sprintf(page, "0\n");
}

static ssize_t integrity_read_store(struct blk_integrity *bi,
				    const char *page, size_t count)
{
	char *p = (char *) page;
	unsigned long val = simple_strtoul(p, &p, 10);

	if (val)
		bi->flags |= INTEGRITY_FLAG_READ;
	else
		bi->flags &= ~INTEGRITY_FLAG_READ;

	return count;
}

static ssize_t integrity_read_show(struct blk_integrity *bi, char *page)
{
	return sprintf(page, "%d\n", (bi->flags & INTEGRITY_FLAG_READ) != 0);
}

static ssize_t integrity_write_store(struct blk_integrity *bi,
				     const char *page, size_t count)
{
	char *p = (char *) page;
	unsigned long val = simple_strtoul(p, &p, 10);

	if (val)
		bi->flags |= INTEGRITY_FLAG_WRITE;
	else
		bi->flags &= ~INTEGRITY_FLAG_WRITE;

	return count;
}

static ssize_t integrity_write_show(struct blk_integrity *bi, char *page)
{
	return sprintf(page, "%d\n", (bi->flags & INTEGRITY_FLAG_WRITE) != 0);
}

static struct integrity_sysfs_entry integrity_format_entry = {
	.attr = { .name = "format", .mode = S_IRUGO },
	.show = integrity_format_show,
};

static struct integrity_sysfs_entry integrity_tag_size_entry = {
	.attr = { .name = "tag_size", .mode = S_IRUGO },
	.show = integrity_tag_size_show,
};

static struct integrity_sysfs_entry integrity_read_entry = {
	.attr = { .name = "read_verify", .mode = S_IRUGO | S_IWUSR },
	.show = integrity_read_show,
	.store = integrity_read_store,
};

static struct integrity_sysfs_entry integrity_write_entry = {
	.attr = { .name = "write_generate", .mode = S_IRUGO | S_IWUSR },
	.show = integrity_write_show,
	.store = integrity_write_store,
};

static struct attribute *integrity_attrs[] = {
	&integrity_format_entry.attr,
	&integrity_tag_size_entry.attr,
	&integrity_read_entry.attr,
	&integrity_write_entry.attr,
	NULL,
};

static struct sysfs_ops integrity_ops = {
	.show	= &integrity_attr_show,
	.store	= &integrity_attr_store,
};

static int __init blk_dev_integrity_init(void)
{
	integrity_cachep = kmem_cache_create("blkdev_integrity",
					     sizeof(struct blk_integrity),
					     0, SLAB_PANIC, NULL);
	return 0;
}
subsys_initcall(blk_dev_integrity_init);

static void blk_integrity_release(struct kobject *kobj)
{
	struct blk_integrity *bi =
		container_of(kobj, struct blk_integrity, kobj);

	kmem_cache_free(integrity_cachep, bi);
}

static struct kobj_type integrity_ktype = {
	.default_attrs	= integrity_attrs,
	.sysfs_ops	= &integrity_ops,
	.release	= blk_integrity_release,
};

/**
 * blk_integrity_register - Register a gendisk as being integrity-capable
 * @disk:	struct gendisk pointer to make integrity-aware
 * @template:	integrity profile
 *
 * Description: When a device needs to advertise itself as being able
 * to send/receive integrity metadata it must use this function to
 * register the capability with the block layer.  The template is a
 * blk_integrity struct with values appropriate for the underlying
 * hardware.  See Documentation/block/data-integrity.txt.
 */
int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
{
	struct blk_integrity *bi;

	BUG_ON(disk == NULL);
	BUG_ON(template == NULL);

	if (disk->integrity == NULL) {
		bi = kmem_cache_alloc(integrity_cachep,
						GFP_KERNEL | __GFP_ZERO);
		if (!bi)
			return -1;

		if (kobject_init_and_add(&bi->kobj, &integrity_ktype,
					 &disk->dev.kobj, "%s", "integrity")) {
			kmem_cache_free(integrity_cachep, bi);
			return -1;
		}

		kobject_uevent(&bi->kobj, KOBJ_ADD);

		bi->flags |= INTEGRITY_FLAG_READ | INTEGRITY_FLAG_WRITE;
		bi->sector_size = disk->queue->hardsect_size;
		disk->integrity = bi;
	} else
		bi = disk->integrity;

	/* Use the provided profile as template */
	bi->name = template->name;
	bi->generate_fn = template->generate_fn;
	bi->verify_fn = template->verify_fn;
	bi->tuple_size = template->tuple_size;
	bi->set_tag_fn = template->set_tag_fn;
	bi->get_tag_fn = template->get_tag_fn;
	bi->tag_size = template->tag_size;

	return 0;
}
EXPORT_SYMBOL(blk_integrity_register);

/**
 * blk_integrity_unregister - Remove block integrity profile
 * @disk:	disk whose integrity profile to deallocate
 *
 * Description: This function frees all memory used by the block
 * integrity profile.  To be called at device teardown.
 */
void blk_integrity_unregister(struct gendisk *disk)
{
	struct blk_integrity *bi;

	if (!disk || !disk->integrity)
		return;

	bi = disk->integrity;

	kobject_uevent(&bi->kobj, KOBJ_REMOVE);
	kobject_del(&bi->kobj);
	kobject_put(&disk->dev.kobj);
	kmem_cache_free(integrity_cachep, bi);
}
EXPORT_SYMBOL(blk_integrity_unregister);
% anaconda.id.storage.doAutoPart) log.debug("clearPartType: %s" % anaconda.id.storage.clearPartType) log.debug("clearPartDisks: %s" % anaconda.id.storage.clearPartDisks) log.debug("autoPartitionRequests: %s" % anaconda.id.storage.autoPartitionRequests) log.debug("storage.disks: %s" % anaconda.id.storage.disks) log.debug("all names: %s" % [d.name for d in anaconda.id.storage.devicetree.devices.values()]) if anaconda.dir == DISPATCH_BACK: anaconda.id.storage.reset() return disks = [] devs = [] if anaconda.id.storage.doAutoPart and not anaconda.isKickstart: # kickstart uses clearPartitions even without autopart clearPartitions(anaconda.id.storage) if anaconda.id.storage.doAutoPart: (disks, devs) = _createFreeSpacePartitions(anaconda) if disks == []: anaconda.intf.messageWindow(_("Error Partitioning"), _("Could not find enough free space " "for automatic partitioning, please " "use another partitioning method."), custom_icon='error') return DISPATCH_BACK _schedulePartitions(anaconda, disks) # sanity check the individual devices log.warning("not sanity checking devices because I don't know how yet") # run the autopart function to allocate and grow partitions try: doPartitioning(anaconda.id.storage, exclusiveDisks=anaconda.id.storage.clearPartDisks) except PartitioningWarning as msg: if not anaconda.isKickstart: anaconda.intf.messageWindow(_("Warnings During Automatic " "Partitioning"), _("Following warnings occurred during automatic " "partitioning:\n\n%s") % (msg,), custom_icon='warning') else: log.warning(msg) except PartitioningError as msg: # restore drives to original state anaconda.id.storage.reset() if not anaconda.isKickstart: extra = "" anaconda.dispatch.skipStep("partition", skip = 0) else: extra = _("\n\nPress 'OK' to exit the installer.") anaconda.intf.messageWindow(_("Error Partitioning"), _("Could not allocate requested partitions: \n\n" "%s.%s") % (msg, extra), custom_icon='error') if anaconda.isKickstart: sys.exit(0) if anaconda.id.storage.doAutoPart: _scheduleLVs(anaconda, devs) # sanity check the collection of devices log.warning("not sanity checking storage config because I don't know how yet") # now do a full check of the requests (errors, warnings) = anaconda.id.storage.sanityCheck() if warnings: for warning in warnings: log.warning(warning) if errors: errortxt = "\n".join(errors) if anaconda.isKickstart: extra = _("\n\nPress 'OK' to exit the installer.") else: extra = _("\n\nPress 'OK' to choose a different partitioning option.") anaconda.intf.messageWindow(_("Automatic Partitioning Errors"), _("The following errors occurred with your " "partitioning:\n\n%s\n\n" "This can happen if there is not enough " "space on your hard drive(s) for the " "installation. %s") % (errortxt, extra), custom_icon='error') # # XXX if in kickstart we reboot # if anaconda.isKickstart: anaconda.intf.messageWindow(_("Unrecoverable Error"), _("Your system will now be rebooted.")) sys.exit(0) return DISPATCH_BACK def clearPartitions(storage): """ Clear partitions and dependent devices from disks. Arguments: storage -- a storage.Storage instance Keyword arguments: None NOTES: - Needs some error handling, especially for the parted bits. """ if storage.clearPartType == CLEARPART_TYPE_NONE: # not much to do return # we are only interested in partitions that physically exist partitions = [p for p in storage.partitions if p.exists] disks = [] # a list of disks from which we've removed partitions clearparts = [] # list of partitions we'll remove for part in partitions: log.debug("clearpart: looking at %s" % part.name) clear = False # whether or not we will clear this partition # if we got a list of disks to clear, make sure this one's on it if storage.clearPartDisks and \ part.disk.name not in storage.clearPartDisks: continue # don't clear partitions holding install media if part.name in storage.protectedPartitions: continue # we don't want to fool with extended partitions, freespace, &c if part.partType not in (parted.PARTITION_NORMAL, parted.PARTITION_LOGICAL): continue if storage.clearPartType == CLEARPART_TYPE_ALL: clear = True else: if part.format and part.format.linuxNative: clear = True elif part.partedPartition.getFlag(parted.PARTITION_LVM) or \ part.partedPartition.getFlag(parted.PARTITION_RAID) or \ part.partedPartition.getFlag(parted.PARTITION_SWAP): clear = True # TODO: do platform-specific checks on ia64, pSeries, iSeries, mac if not clear: continue log.debug("clearing %s" % part.name) # XXX is there any argument for not removing incomplete devices? # -- maybe some RAID devices devices = storage.deviceDeps(part) while devices: log.debug("devices to remove: %s" % ([d.name for d in devices],)) leaves = [d for d in devices if d.isleaf] log.debug("leaves to remove: %s" % ([d.name for d in leaves],)) for leaf in leaves: storage.destroyDevice(leaf) devices.remove(leaf) log.debug("partitions: %s" % [p.getDeviceNodeName() for p in part.partedPartition.disk.partitions]) disk_name = os.path.basename(part.partedPartition.disk.device.path) if disk_name not in disks: disks.append(disk_name) clearparts.append(part) for part in clearparts: storage.destroyDevice(part) # now remove any empty extended partitions removeEmptyExtendedPartitions(storage) def removeEmptyExtendedPartitions(storage): for disk in storage.disks: log.debug("checking whether disk %s has an empty extended" % disk.name) extended = disk.partedDisk.getExtendedPartition() logical_parts = disk.partedDisk.getLogicalPartitions() log.debug("extended is %s ; logicals is %s" % (extended, [p.getDeviceNodeName() for p in logical_parts])) if extended and not logical_parts: log.debug("removing empty extended partition from %s" % disk.name) extended_name = extended.getDeviceNodeName() extended = storage.devicetree.getDeviceByName(extended_name) storage.destroyDevice(extended) #disk.partedDisk.removePartition(extended.partedPartition) def partitionCompare(part1, part2): """ More specifically defined partitions come first. < 1 => x < y 0 => x == y > 1 => x > y """ ret = 0 # bootable partitions to the front ret -= cmp(part1.req_bootable, part2.req_bootable) * 1000 # more specific disk specs to the front of the list ret += cmp(len(part1.parents), len(part2.parents)) * 500 # primary-only to the front of the list ret -= cmp(part1.req_primary, part2.req_primary) * 200 # larger requests go to the front of the list ret -= cmp(part1.size, part2.size) * 100 # fixed size requests to the front ret += cmp(part1.req_grow, part2.req_grow) * 50 # potentially larger growable requests go to the front if part1.req_grow and part2.req_grow: if not part1.req_max_size and part2.req_max_size: ret -= 25 elif part1.req_max_size and not part2.req_max_size: ret += 25 else: ret -= cmp(part1.req_max_size, part2.req_max_size) * 25 if ret > 0: ret = 1 elif ret < 0: ret = -1 return ret def getNextPartitionType(disk, no_primary=None): """ Find the type of partition to create next on a disk. Return a parted partition type value representing the type of the next partition we will create on this disk. If there is only one free primary partition and we can create an extended partition, we do that. If there are free primary slots and an extended partition we will recommend creating a primary partition. This can be overridden with the keyword argument no_primary. Arguments: disk -- a parted.Disk instance representing the disk Keyword arguments: no_primary -- given a choice between primary and logical partitions, prefer logical """ part_type = None extended = disk.getExtendedPartition() supports_extended = disk.supportsFeature(parted.DISK_TYPE_EXTENDED) logical_count = len(disk.getLogicalPartitions()) max_logicals = disk.getMaxLogicalPartitions() primary_count = disk.primaryPartitionCount if primary_count == disk.maxPrimaryPartitionCount and \ extended and logical_count < max_logicals: part_type = parted.PARTITION_LOGICAL elif primary_count == (disk.maxPrimaryPartitionCount - 1) and \ not extended and supports_extended: # last chance to create an extended partition part_type = parted.PARTITION_EXTENDED elif no_primary and extended and logical_count < max_logicals: # create a logical even though we could presumably create a # primary instead part_type = parted.PARTITION_LOGICAL elif not no_primary: # XXX there is a possiblity that the only remaining free space on # the disk lies within the extended partition, but we will # try to create a primary first part_type = parted.PARTITION_NORMAL return part_type def getBestFreeSpaceRegion(disk, part_type, req_size, boot=None, best_free=None): """ Return the "best" free region on the specified disk. For non-boot partitions, we return the largest free region on the disk. For boot partitions, we return the first region that is large enough to hold the partition. Partition type (parted's PARTITION_NORMAL, PARTITION_LOGICAL) is taken into account when locating a suitable free region. For locating the best region from among several disks, the keyword argument best_free allows the specification of a current "best" free region with which to compare the best from this disk. The overall best region is returned. Arguments: disk -- the disk (a parted.Disk instance) part_type -- the type of partition we want to allocate (one of parted's partition type constants) req_size -- the requested size of the partition (in MB) Keyword arguments: boot -- indicates whether this will be a bootable partition (boolean) best_free -- current best free region for this partition """ log.debug("getBestFreeSpaceRegion: disk=%s part_type=%d req_size=%dMB boot=%s best=%s" % (disk.device.path, part_type, req_size, boot, best_free)) extended = disk.getExtendedPartition() for _range in disk.getFreeSpaceRegions(): if extended: # find out if there is any overlap between this region and the # extended partition log.debug("looking for intersection between extended (%d-%d) and free (%d-%d)" % (extended.geometry.start, extended.geometry.end, _range.start, _range.end)) # parted.Geometry.overlapsWith can handle this try: free_geom = extended.geometry.intersect(_range) except ArithmeticError, e: # this freespace region does not lie within the extended # partition's geometry free_geom = None if (free_geom and part_type == parted.PARTITION_NORMAL) or \ (not free_geom and part_type == parted.PARTITION_LOGICAL): log.debug("free region not suitable for request") continue if part_type == parted.PARTITION_NORMAL: # we're allocating a primary and the region is not within # the extended, so we use the original region free_geom = _range else: free_geom = _range log.debug("current free range is %d-%d (%dMB)" % (free_geom.start, free_geom.end, free_geom.getSize())) free_size = free_geom.getSize() if req_size <= free_size: if not best_free or free_geom.length > best_free.length: best_free = free_geom if boot: # if this is a bootable partition we want to # use the first freespace region large enough # to satisfy the request break return best_free def doPartitioning(storage, exclusiveDisks=None): """ Allocate and grow partitions. When this function returns without error, all PartitionDevice instances must have their parents set to the disk they are allocated on, and their partedPartition attribute set to the appropriate parted.Partition instance from their containing disk. All req_xxxx attributes must be unchanged. Arguments: storage - Main anaconda Storage instance Keyword arguments: exclusiveDisks -- list of names of disks to use """ anaconda = storage.anaconda disks = storage.disks if exclusiveDisks: disks = [d for d in disks if d.name in exclusiveDisks] partitions = storage.partitions for part in partitions: part.req_bootable = False if not part.exists: # start over with flexible-size requests part.req_size = part.req_base_size # FIXME: isn't there a better place for this to happen? try: bootDev = anaconda.platform.bootDevice() except DeviceError: bootDev = None if bootDev: bootDev.req_bootable = True # FIXME: make sure non-existent partitions have empty parents list allocatePartitions(disks, partitions) growPartitions(disks, partitions) # XXX hack -- if we created any extended partitions we need to add # them to the tree now for disk in disks: extended = disk.partedDisk.getExtendedPartition() if not extended or \ extended.getDeviceNodeName() in [p.name for p in partitions]: # this extended partition is preexisting continue # This is a little odd because normally instantiating a partition # that does not exist means leaving self.parents empty and instead # populating self.req_disks. In this case, we need to skip past # that since this partition is already defined. device = PartitionDevice(extended.getDeviceNodeName(), parents=disk) device.parents = [disk] device.partedPartition = extended storage.createDevice(device) def allocatePartitions(disks, partitions): """ Allocate partitions based on requested features. Non-existing partitions are sorted according to their requested attributes, and then allocated. The basic approach to sorting is that the more specifically- defined a request is, the earlier it will be allocated. See the function partitionCompare for details on the sorting criteria. The PartitionDevice instances will have their name and parents attributes set once they have been allocated. """ log.debug("allocatePartitions: disks=%s ; partitions=%s" % (disks, partitions)) new_partitions = [p for p in partitions if not p.exists] new_partitions.sort(cmp=partitionCompare) # XXX is this needed anymore? partedDisks = {} for disk in disks: if disk.path not in partedDisks.keys(): partedDisks[disk.path] = disk.partedDisk #.duplicate() # remove all newly added partitions from the disk log.debug("removing all non-preexisting from disk(s)") for _part in new_partitions: if _part.partedPartition: if _part.isExtended: # these get removed last continue #_part.disk.partedDisk.removePartition(_part.partedPartition) partedDisk = partedDisks[_part.disk.partedDisk.device.path] #log.debug("removing part %s (%s) from disk %s (%s)" % (_part.partedPartition.path, [p.path for p in _part.partedPartition.disk.partitions], partedDisk.device.path, [p.path for p in partedDisk.partitions])) if not partedDisk.getPartitionByPath(_part.path): continue partedDisk.removePartition(_part.partedPartition) # remove empty extended so it doesn't interfere extended = partedDisk.getExtendedPartition() if extended and not partedDisk.getLogicalPartitions(): log.debug("removing empty extended partition") #partedDisk.minimizeExtendedPartition() partedDisk.removePartition(extended) for _part in new_partitions: if _part.partedPartition and _part.isExtended: # ignore new extendeds as they are implicit requests continue # obtain the set of candidate disks req_disks = [] if _part.disk: # we have a already selected a disk for this request req_disks = [_part.disk] elif _part.req_disks: # use the requested disk set req_disks = _part.req_disks else: # no disks specified means any disk will do req_disks = disks log.debug("allocating partition: %s ; disks: %s ; boot: %s ; primary: %s ; size: %dMB ; grow: %s ; max_size: %s" % (_part.name, req_disks, _part.req_bootable, _part.req_primary, _part.req_size, _part.req_grow, _part.req_max_size)) free = None use_disk = None part_type = None # loop through disks for _disk in req_disks: disk = partedDisks[_disk.path] #for p in disk.partitions: # log.debug("disk %s: part %s" % (disk.device.path, p.path)) sectorSize = disk.device.physicalSectorSize best = None # TODO: On alpha we are supposed to reserve either one or two # MB at the beginning of each disk. Awesome. # -- maybe we do not care about alpha... log.debug("checking freespace on %s" % _disk.name) new_part_type = getNextPartitionType(disk) if new_part_type is None: # can't allocate any more partitions on this disk log.debug("no free partition slots on %s" % _disk.name) continue if _part.req_primary and new_part_type != parted.PARTITION_NORMAL: # we need a primary slot and none are free on this disk log.debug("no primary slots available on %s" % _disk.name) continue best = getBestFreeSpaceRegion(disk, new_part_type, _part.req_size, best_free=free, boot=_part.req_bootable) if best == free and not _part.req_primary and \ new_part_type == parted.PARTITION_NORMAL: # see if we can do better with a logical partition log.debug("not enough free space for primary -- trying logical") new_part_type = getNextPartitionType(disk, no_primary=True) if new_part_type: best = getBestFreeSpaceRegion(disk, new_part_type, _part.req_size, best_free=free, boot=_part.req_bootable) if best and free != best: # now we know we are choosing a new free space, # so update the disk and part type log.debug("updating use_disk to %s (%s), type: %s" % (_disk, _disk.name, new_part_type)) part_type = new_part_type use_disk = _disk log.debug("new free: %s (%d-%d / %dMB)" % (best, best.start, best.end, best.getSize())) free = best if free and _part.req_bootable: # if this is a bootable partition we want to # use the first freespace region large enough # to satisfy the request log.debug("found free space for bootable request") break if free is None: raise PartitioningError("not enough free space on disks") _disk = use_disk disk = _disk.partedDisk # create the extended partition if needed # TODO: move to a function (disk, free) if part_type == parted.PARTITION_EXTENDED: log.debug("creating extended partition") geometry = parted.Geometry(device=disk.device, start=free.start, length=free.length, end=free.end) extended = parted.Partition(disk=disk, type=parted.PARTITION_EXTENDED, geometry=geometry) constraint = parted.Constraint(device=disk.device) # FIXME: we should add this to the tree as well disk.addPartition(extended, constraint) # end proposed function # now the extended partition exists, so set type to logical part_type = parted.PARTITION_LOGICAL # recalculate freespace log.debug("recalculating free space") free = getBestFreeSpaceRegion(disk, part_type, _part.req_size, boot=_part.req_bootable) if not free: raise PartitioningError("not enough free space after " "creating extended partition") # create minimum geometry for this request # req_size is in MB sectors_per_track = disk.device.biosGeometry[2] length = (_part.req_size * (1024 * 1024)) / sectorSize new_geom = parted.Geometry(device=disk.device, start=max(sectors_per_track, free.start), length=length) # create the partition and add it to the disk partition = parted.Partition(disk=disk, type=part_type, geometry=new_geom) constraint = parted.Constraint(exactGeom=new_geom) disk.addPartition(partition=partition, constraint=constraint) log.debug("created partition %s of %dMB and added it to %s" % (partition.getDeviceNodeName(), partition.getSize(), disk)) # this one sets the name _part.partedPartition = partition _part.disk = _disk # parted modifies the partition in the process of adding it to # the disk, so we need to grab the latest version... _part.partedPartition = disk.getPartitionByPath(_part.path) def growPartitions(disks, partitions): """ Grow all growable partition requests. All requests should know what disk they will be on by the time this function is called. This is reflected in the PartitionDevice's disk attribute. Note that the req_disks attribute remains unchanged. The total available free space is summed up for each disk and partition requests are allocated a maximum percentage of the available free space on their disk based on their own base size. Each attempted size means calling allocatePartitions again with one request's size having changed. After taking into account several factors that may limit the maximum size of a requested partition, we arrive at a firm maximum number of sectors by which a request can potentially grow. An initial attempt is made to allocate the full maximum size. If this fails, we begin a rough binary search with a maximum of three iterations to settle on a new size. Arguments: disks -- a list of all usable disks (DiskDevice instances) partitions -- a list of all partitions (PartitionDevice instances) """ log.debug("growPartitions: disks=%s, partitions=%s" % ([d.name for d in disks], [p.name for p in partitions])) all_growable = [p for p in partitions if p.req_grow] if not all_growable: return # sort requests by base size in decreasing order all_growable.sort(key=lambda p: p.req_size, reverse=True) log.debug("growable requests are %s" % [p.name for p in all_growable]) for disk in disks: log.debug("growing requests on %s" % disk.name) for p in disk.partedDisk.partitions: log.debug(" %s: %s (%dMB)" % (disk.name, p.getDeviceNodeName(), p.getSize())) sectorSize = disk.partedDisk.device.physicalSectorSize # get a list of free space regions on the disk free = disk.partedDisk.getFreeSpaceRegions() if not free: log.debug("no free space on %s" % disk.name) continue # sort the free regions in decreasing order of size free.sort(key=lambda r: r.length, reverse=True) disk_free = reduce(lambda x,y: x + y, [f.length for f in free]) log.debug("total free: %d sectors ; largest: %d sectors (%dMB)" % (disk_free, free[0].length, free[0].getSize())) # make a list of partitions currently allocated on this disk # -- they're already sorted growable = [] disk_total = 0 for part in all_growable: #log.debug("checking if part %s (%s) is on this disk" % (part.name, # part.disk.name)) if part.disk == disk: growable.append(part) disk_total += part.partedPartition.geometry.length log.debug("add %s (%dMB/%d sectors) to growable total" % (part.name, part.partedPartition.getSize(), part.partedPartition.geometry.length)) log.debug("growable total is now %d sectors" % disk_total) # now we loop through the partitions... for part in growable: # calculate max number of sectors this request can grow req_sectors = part.partedPartition.geometry.length share = float(req_sectors) / float(disk_total) max_grow = (share * disk_free) max_sectors = req_sectors + max_grow max_mb = (max_sectors * sectorSize) / (1024 * 1024) log.debug("%s: base_size=%dMB, max_size=%sMB" % (part.name, part.req_base_size, part.req_max_size)) log.debug("%s: current_size=%dMB (%d sectors)" % (part.name, part.partedPartition.getSize(), part.partedPartition.geometry.length)) log.debug("%s: %dMB (%d sectors, or %d%% of %d)" % (part.name, max_mb, max_sectors, share * 100, disk_free)) log.debug("checking constraints on max size...") # don't grow beyond the request's maximum size if part.req_max_size: log.debug("max_size: %dMB" % part.req_max_size) # FIXME: round down to nearest cylinder boundary req_max_sect = (part.req_max_size * (1024 * 1024)) / sectorSize if req_max_sect < max_sectors: max_grow -= (max_sectors - req_max_sect) max_sectors = req_sectors + max_grow # don't grow beyond the resident filesystem's max size if part.format.maxSize > 0: log.debug("format maxsize: %dMB" % part.format.maxSize) # FIXME: round down to nearest cylinder boundary fs_max_sect = (part.format.maxSize * (1024 * 1024)) / sectorSize if fs_max_sect < max_sectors: max_grow -= (max_sectors - fs_max_sect) max_sectors = req_sectors + max_grow # we can only grow as much as the largest free region on the disk if free[0].length < max_grow: log.debug("largest free region: %d sectors (%dMB)" % (free[0].length, free[0].getSize())) # FIXME: round down to nearest cylinder boundary max_grow = free[0].length max_sectors = req_sectors + max_grow # Now, we try to grow this partition as close to max_grow # sectors as we can. # # We could call allocatePartitions after modifying this # request and saving the original value of part.req_size, # or we could try to use disk.maximizePartition(). max_size = (max_sectors * sectorSize) / (1024 * 1024) orig_size = part.req_size # try the max size to begin with log.debug("attempting to allocate maximum size: %dMB" % max_size) part.req_size = max_size try: allocatePartitions(disks, partitions) except PartitioningError, e: log.debug("max size attempt failed: %s (%dMB)" % (part.name, max_size)) part.req_size = orig_size else: continue log.debug("starting binary search: size=%d max_size=%d" % (part.req_size, max_size)) count = 0 op_func = add increment = max_sectors last_good_size = part.req_size last_outcome = None while (part.partedPartition.geometry.length < max_sectors and count < 3): last_size = part.req_size increment /= 2 req_sectors = op_func(req_sectors, increment) part.req_size = (req_sectors * sectorSize) / (1024 * 1024) log.debug("attempting size=%dMB" % part.req_size) count += 1 try: allocatePartitions(disks, partitions) except PartitioningError, e: log.debug("attempt at %dMB failed" % part.req_size) op_func = sub last_outcome = False else: last_good_size = part.req_size last_outcome = True if not last_outcome: part.req_size = last_good_size log.debug("backing up to size=%dMB" % part.req_size) try: allocatePartitions(disks, partitions) except PartitioningError, e: raise PartitioningError("failed to grow partitions") # reset all requests to their original requested size for part in partitions: if part.exists: continue part.req_size = part.req_base_size def lvCompare(lv1, lv2): """ More specifically defined lvs come first. < 1 => x < y 0 => x == y > 1 => x > y """ ret = 0 # larger requests go to the front of the list ret -= cmp(lv1.size, lv2.size) * 100 # fixed size requests to the front ret += cmp(lv1.req_grow, lv2.req_grow) * 50 # potentially larger growable requests go to the front if lv1.req_grow and lv2.req_grow: if not lv1.req_max_size and lv2.req_max_size: ret -= 25 elif lv1.req_max_size and not lv2.req_max_size: ret += 25 else: ret -= cmp(lv1.req_max_size, lv2.req_max_size) * 25 if ret > 0: ret = 1 elif ret < 0: ret = -1 return ret def growLVM(storage): """ Grow LVs according to the sizes of the PVs. """ for vg in storage.vgs: total_free = vg.freeSpace if not total_free: log.debug("vg %s has no free space" % vg.name) continue log.debug("vg %s: %dMB free ; lvs: %s" % (vg.name, vg.freeSpace, [l.lvname for l in vg.lvs])) # figure out how much to grow each LV grow_amounts = {} lv_total = vg.size - total_free log.debug("used: %dMB ; vg.size: %dMB" % (lv_total, vg.size)) # This first loop is to calculate percentage-based growth # amounts. These are based on total free space. lvs = vg.lvs lvs.sort(cmp=lvCompare) for lv in lvs: if not lv.req_grow or not lv.req_percent: continue portion = (lv.req_percent * 0.01) grow = portion * vg.vgFree new_size = lv.req_size + grow if lv.req_max_size and new_size > lv.req_max_size: grow -= (new_size - lv.req_max_size) if lv.format.maxSize and lv.format.maxSize < new_size: grow -= (new_size - lv.format.maxSize) # clamp growth amount to a multiple of vg extent size grow_amounts[lv.name] = vg.align(grow) total_free -= grow lv_total += grow # This second loop is to calculate non-percentage-based growth # amounts. These are based on free space remaining after # calculating percentage-based growth amounts. # keep a tab on space not allocated due to format or requested # maximums -- we'll dole it out to subsequent requests leftover = 0 for lv in lvs: log.debug("checking lv %s: req_grow: %s ; req_percent: %s" % (lv.name, lv.req_grow, lv.req_percent)) if not lv.req_grow or lv.req_percent: continue portion = float(lv.req_size) / float(lv_total) grow = portion * total_free log.debug("grow is %dMB" % grow) todo = lvs[lvs.index(lv):] unallocated = reduce(lambda x,y: x+y, [l.req_size for l in todo if l.req_grow and not l.req_percent]) extra_portion = float(lv.req_size) / float(unallocated) extra = extra_portion * leftover log.debug("%s getting %dMB (%d%%) of %dMB leftover space" % (lv.name, extra, extra_portion * 100, leftover)) leftover -= extra grow += extra log.debug("grow is now %dMB" % grow) max_size = lv.req_size + grow if lv.req_max_size and max_size > lv.req_max_size: max_size = lv.req_max_size if lv.format.maxSize and max_size > lv.format.maxSize: max_size = lv.format.maxSize log.debug("max size is %dMB" % max_size) max_size = max_size leftover += (lv.req_size + grow) - max_size grow = max_size - lv.req_size log.debug("lv %s gets %dMB" % (lv.name, vg.align(grow))) grow_amounts[lv.name] = vg.align(grow) if not grow_amounts: log.debug("no growable lvs in vg %s" % vg.name) continue # now grow the lvs by the amounts we've calculated above for lv in lvs: if lv.name not in grow_amounts.keys(): continue lv.size += grow_amounts[lv.name] # now there shouldn't be any free space left, but if there is we # should allocate it to one of the LVs vg_free = vg.freeSpace log.debug("vg %s has %dMB free" % (vg.name, vg_free)) if vg_free: for lv in lvs: if not lv.req_grow: continue if lv.req_max_size and lv.size == lv.req_max_size: continue if lv.format.maxSize and lv.size == lv.format.maxSize: continue # first come, first served projected = lv.size + vg.freeSpace if lv.req_max_size and projected > lv.req_max_size: projected = lv.req_max_size if lv.format.maxSize and projected > lv.format.maxSize: projected = lv.format.maxSize log.debug("giving leftover %dMB to %s" % (projected - lv.size, lv.name)) lv.size = projected