summaryrefslogtreecommitdiffstats
path: root/super-intel.c
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2009-12-21 17:54:32 -0700
committerDan Williams <dan.j.williams@intel.com>2009-12-21 17:54:32 -0700
commit1e5c69836d4d0b6dcaef8fc187e6bf2841eb57f6 (patch)
tree8fbd3075e85acccba73a9845ef3cdae9bf6d8e89 /super-intel.c
parent2904b26f059c5d82d9d631c9987e92e3f9af498c (diff)
downloadmdadm-1e5c69836d4d0b6dcaef8fc187e6bf2841eb57f6.tar.gz
mdadm-1e5c69836d4d0b6dcaef8fc187e6bf2841eb57f6.tar.xz
mdadm-1e5c69836d4d0b6dcaef8fc187e6bf2841eb57f6.zip
imsm: add support for checkpointing via 'curr_migr_unit'
Unlike native md checkpointing some data about the geometry and type of the migration process is coded into curr_migr_unit. Provide logic to convert between md/{resync_start|recovery_start} and imsm/curr_migr_unit. Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Diffstat (limited to 'super-intel.c')
-rw-r--r--super-intel.c320
1 files changed, 288 insertions, 32 deletions
diff --git a/super-intel.c b/super-intel.c
index 9f879c5..609aaf5 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -635,6 +635,8 @@ static int is_failed(struct imsm_disk *disk)
}
#ifndef MDASSEMBLE
+static __u64 blocks_per_migr_unit(struct imsm_dev *dev);
+
static void print_imsm_dev(struct imsm_dev *dev, char *uuid, int disk_idx)
{
__u64 sz;
@@ -690,7 +692,11 @@ static void print_imsm_dev(struct imsm_dev *dev, char *uuid, int disk_idx)
printf(" Map State : %s", map_state_str[map->map_state]);
if (dev->vol.migr_state) {
struct imsm_map *map = get_imsm_map(dev, 1);
+
printf(" <-- %s", map_state_str[map->map_state]);
+ printf("\n Checkpoint : %u (%llu)",
+ __le32_to_cpu(dev->vol.curr_migr_unit),
+ blocks_per_migr_unit(dev));
}
printf("\n");
printf(" Dirty State : %s\n", dev->vol.dirty ? "dirty" : "clean");
@@ -1216,6 +1222,179 @@ get_imsm_numerical_version(struct imsm_super *mpb, int *m, int *p)
}
#endif
+static __u32 migr_strip_blocks_resync(struct imsm_dev *dev)
+{
+ /* migr_strip_size when repairing or initializing parity */
+ struct imsm_map *map = get_imsm_map(dev, 0);
+ __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
+
+ switch (get_imsm_raid_level(map)) {
+ case 5:
+ case 10:
+ return chunk;
+ default:
+ return 128*1024 >> 9;
+ }
+}
+
+static __u32 migr_strip_blocks_rebuild(struct imsm_dev *dev)
+{
+ /* migr_strip_size when rebuilding a degraded disk, no idea why
+ * this is different than migr_strip_size_resync(), but it's good
+ * to be compatible
+ */
+ struct imsm_map *map = get_imsm_map(dev, 1);
+ __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
+
+ switch (get_imsm_raid_level(map)) {
+ case 1:
+ case 10:
+ if (map->num_members % map->num_domains == 0)
+ return 128*1024 >> 9;
+ else
+ return chunk;
+ case 5:
+ return max((__u32) 64*1024 >> 9, chunk);
+ default:
+ return 128*1024 >> 9;
+ }
+}
+
+static __u32 num_stripes_per_unit_resync(struct imsm_dev *dev)
+{
+ struct imsm_map *lo = get_imsm_map(dev, 0);
+ struct imsm_map *hi = get_imsm_map(dev, 1);
+ __u32 lo_chunk = __le32_to_cpu(lo->blocks_per_strip);
+ __u32 hi_chunk = __le32_to_cpu(hi->blocks_per_strip);
+
+ return max((__u32) 1, hi_chunk / lo_chunk);
+}
+
+static __u32 num_stripes_per_unit_rebuild(struct imsm_dev *dev)
+{
+ struct imsm_map *lo = get_imsm_map(dev, 0);
+ int level = get_imsm_raid_level(lo);
+
+ if (level == 1 || level == 10) {
+ struct imsm_map *hi = get_imsm_map(dev, 1);
+
+ return hi->num_domains;
+ } else
+ return num_stripes_per_unit_resync(dev);
+}
+
+static __u8 imsm_num_data_members(struct imsm_dev *dev)
+{
+ /* named 'imsm_' because raid0, raid1 and raid10
+ * counter-intuitively have the same number of data disks
+ */
+ struct imsm_map *map = get_imsm_map(dev, 0);
+
+ switch (get_imsm_raid_level(map)) {
+ case 0:
+ case 1:
+ case 10:
+ return map->num_members;
+ case 5:
+ return map->num_members - 1;
+ default:
+ dprintf("%s: unsupported raid level\n", __func__);
+ return 0;
+ }
+}
+
+static __u32 parity_segment_depth(struct imsm_dev *dev)
+{
+ struct imsm_map *map = get_imsm_map(dev, 0);
+ __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
+
+ switch(get_imsm_raid_level(map)) {
+ case 1:
+ case 10:
+ return chunk * map->num_domains;
+ case 5:
+ return chunk * map->num_members;
+ default:
+ return chunk;
+ }
+}
+
+static __u32 map_migr_block(struct imsm_dev *dev, __u32 block)
+{
+ struct imsm_map *map = get_imsm_map(dev, 1);
+ __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
+ __u32 strip = block / chunk;
+
+ switch (get_imsm_raid_level(map)) {
+ case 1:
+ case 10: {
+ __u32 vol_strip = (strip * map->num_domains) + 1;
+ __u32 vol_stripe = vol_strip / map->num_members;
+
+ return vol_stripe * chunk + block % chunk;
+ } case 5: {
+ __u32 stripe = strip / (map->num_members - 1);
+
+ return stripe * chunk + block % chunk;
+ }
+ default:
+ return 0;
+ }
+}
+
+static __u64 blocks_per_migr_unit(struct imsm_dev *dev)
+{
+ /* calculate the conversion factor between per member 'blocks'
+ * (md/{resync,rebuild}_start) and imsm migration units, return
+ * 0 for the 'not migrating' and 'unsupported migration' cases
+ */
+ if (!dev->vol.migr_state)
+ return 0;
+
+ switch (migr_type(dev)) {
+ case MIGR_VERIFY:
+ case MIGR_REPAIR:
+ case MIGR_INIT: {
+ struct imsm_map *map = get_imsm_map(dev, 0);
+ __u32 stripes_per_unit;
+ __u32 blocks_per_unit;
+ __u32 parity_depth;
+ __u32 migr_chunk;
+ __u32 block_map;
+ __u32 block_rel;
+ __u32 segment;
+ __u32 stripe;
+ __u8 disks;
+
+ /* yes, this is really the translation of migr_units to
+ * per-member blocks in the 'resync' case
+ */
+ stripes_per_unit = num_stripes_per_unit_resync(dev);
+ migr_chunk = migr_strip_blocks_resync(dev);
+ disks = imsm_num_data_members(dev);
+ blocks_per_unit = stripes_per_unit * migr_chunk * disks;
+ stripe = __le32_to_cpu(map->blocks_per_strip) * disks;
+ segment = blocks_per_unit / stripe;
+ block_rel = blocks_per_unit - segment * stripe;
+ parity_depth = parity_segment_depth(dev);
+ block_map = map_migr_block(dev, block_rel);
+ return block_map + parity_depth * segment;
+ }
+ case MIGR_REBUILD: {
+ __u32 stripes_per_unit;
+ __u32 migr_chunk;
+
+ stripes_per_unit = num_stripes_per_unit_rebuild(dev);
+ migr_chunk = migr_strip_blocks_rebuild(dev);
+ return migr_chunk * stripes_per_unit;
+ }
+ case MIGR_GEN_MIGR:
+ case MIGR_STATE_CHANGE:
+ default:
+ return 0;
+ }
+}
+
static int imsm_level_to_layout(int level)
{
switch (level) {
@@ -1265,12 +1444,33 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info)
info->component_size = __le32_to_cpu(map->blocks_per_member);
memset(info->uuid, 0, sizeof(info->uuid));
- if (map->map_state == IMSM_T_STATE_UNINITIALIZED || dev->vol.dirty)
+ if (map->map_state == IMSM_T_STATE_UNINITIALIZED || dev->vol.dirty) {
info->resync_start = 0;
- else if (dev->vol.migr_state)
- /* FIXME add curr_migr_unit to resync_start conversion */
- info->resync_start = 0;
- else
+ } else if (dev->vol.migr_state) {
+ switch (migr_type(dev)) {
+ case MIGR_REPAIR:
+ case MIGR_INIT: {
+ __u64 blocks_per_unit = blocks_per_migr_unit(dev);
+ __u64 units = __le32_to_cpu(dev->vol.curr_migr_unit);
+
+ info->resync_start = blocks_per_unit * units;
+ break;
+ }
+ case MIGR_VERIFY:
+ /* we could emulate the checkpointing of
+ * 'sync_action=check' migrations, but for now
+ * we just immediately complete them
+ */
+ case MIGR_REBUILD:
+ /* this is handled by container_content_imsm() */
+ case MIGR_GEN_MIGR:
+ case MIGR_STATE_CHANGE:
+ /* FIXME handle other migrations */
+ default:
+ /* we are not dirty, so... */
+ info->resync_start = MaxSector;
+ }
+ } else
info->resync_start = MaxSector;
strncpy(info->name, (char *) dev->volume, MAX_RAID_SERIAL_LEN);
@@ -3782,6 +3982,46 @@ static int validate_geometry_imsm(struct supertype *st, int level, int layout,
}
#endif /* MDASSEMBLE */
+static int is_rebuilding(struct imsm_dev *dev)
+{
+ struct imsm_map *migr_map;
+
+ if (!dev->vol.migr_state)
+ return 0;
+
+ if (migr_type(dev) != MIGR_REBUILD)
+ return 0;
+
+ migr_map = get_imsm_map(dev, 1);
+
+ if (migr_map->map_state == IMSM_T_STATE_DEGRADED)
+ return 1;
+ else
+ return 0;
+}
+
+static void update_recovery_start(struct imsm_dev *dev, struct mdinfo *array)
+{
+ struct mdinfo *rebuild = NULL;
+ struct mdinfo *d;
+ __u32 units;
+
+ if (!is_rebuilding(dev))
+ return;
+
+ /* Find the rebuild target, but punt on the dual rebuild case */
+ for (d = array->devs; d; d = d->next)
+ if (d->recovery_start == 0) {
+ if (rebuild)
+ return;
+ rebuild = d;
+ }
+
+ units = __le32_to_cpu(dev->vol.curr_migr_unit);
+ rebuild->recovery_start = units * blocks_per_migr_unit(dev);
+}
+
+
static struct mdinfo *container_content_imsm(struct supertype *st)
{
/* Given a container loaded by load_super_imsm_all,
@@ -3829,6 +4069,7 @@ static struct mdinfo *container_content_imsm(struct supertype *st)
super->current_vol = i;
getinfo_super_imsm_volume(st, this);
for (slot = 0 ; slot < map->num_members; slot++) {
+ unsigned long long recovery_start;
struct mdinfo *info_d;
struct dl *d;
int idx;
@@ -3842,33 +4083,41 @@ static struct mdinfo *container_content_imsm(struct supertype *st)
if (d->index == idx)
break;
+ recovery_start = MaxSector;
if (d == NULL)
skip = 1;
if (d && is_failed(&d->disk))
skip = 1;
if (ord & IMSM_ORD_REBUILD)
- skip = 1;
+ recovery_start = 0;
/*
* if we skip some disks the array will be assmebled degraded;
- * reset resync start to avoid a dirty-degraded situation
+ * reset resync start to avoid a dirty-degraded
+ * situation when performing the intial sync
*
* FIXME handle dirty degraded
*/
- if (skip && !dev->vol.dirty)
+ if ((skip || recovery_start == 0) && !dev->vol.dirty)
this->resync_start = MaxSector;
if (skip)
continue;
- info_d = malloc(sizeof(*info_d));
+ info_d = calloc(1, sizeof(*info_d));
if (!info_d) {
fprintf(stderr, Name ": failed to allocate disk"
" for volume %.16s\n", dev->volume);
+ info_d = this->devs;
+ while (info_d) {
+ struct mdinfo *d = info_d->next;
+
+ free(info_d);
+ info_d = d;
+ }
free(this);
this = rest;
break;
}
- memset(info_d, 0, sizeof(*info_d));
info_d->next = this->devs;
this->devs = info_d;
@@ -3876,9 +4125,10 @@ static struct mdinfo *container_content_imsm(struct supertype *st)
info_d->disk.major = d->major;
info_d->disk.minor = d->minor;
info_d->disk.raid_disk = slot;
- info_d->recovery_start = MaxSector;
+ info_d->recovery_start = recovery_start;
- this->array.working_disks++;
+ if (info_d->recovery_start == MaxSector)
+ this->array.working_disks++;
info_d->events = __le32_to_cpu(mpb->generation_num);
info_d->data_offset = __le32_to_cpu(map->pba_of_lba0);
@@ -3886,6 +4136,8 @@ static struct mdinfo *container_content_imsm(struct supertype *st)
if (d->devname)
strcpy(info_d->name, d->devname);
}
+ /* now that the disk list is up-to-date fixup recovery_start */
+ update_recovery_start(dev, this);
rest = this;
}
@@ -4028,24 +4280,6 @@ static int is_resyncing(struct imsm_dev *dev)
return 0;
}
-static int is_rebuilding(struct imsm_dev *dev)
-{
- struct imsm_map *migr_map;
-
- if (!dev->vol.migr_state)
- return 0;
-
- if (migr_type(dev) != MIGR_REBUILD)
- return 0;
-
- migr_map = get_imsm_map(dev, 1);
-
- if (migr_map->map_state == IMSM_T_STATE_DEGRADED)
- return 1;
- else
- return 0;
-}
-
/* return true if we recorded new information */
static int mark_failure(struct imsm_dev *dev, struct imsm_disk *disk, int idx)
{
@@ -4096,6 +4330,7 @@ static int imsm_set_array_state(struct active_array *a, int consistent)
struct imsm_map *map = get_imsm_map(dev, 0);
int failed = imsm_count_failed(super, dev);
__u8 map_state = imsm_check_degraded(super, dev, failed);
+ __u32 blocks_per_unit;
/* before we activate this array handle any missing disks */
if (consistent == 2 && super->missing) {
@@ -4107,7 +4342,7 @@ static int imsm_set_array_state(struct active_array *a, int consistent)
mark_missing(dev, &dl->disk, dl->index);
super->updates_pending++;
}
-
+
if (consistent == 2 &&
(!is_resync_complete(&a->info) ||
map_state != IMSM_T_STATE_NORMAL ||
@@ -4134,7 +4369,28 @@ static int imsm_set_array_state(struct active_array *a, int consistent)
super->updates_pending++;
}
- /* FIXME check if we can update curr_migr_unit from resync_start */
+ /* check if we can update curr_migr_unit from resync_start, recovery_start */
+ blocks_per_unit = blocks_per_migr_unit(dev);
+ if (blocks_per_unit && failed <= 1) {
+ __u32 units32;
+ __u64 units;
+
+ if (migr_type(dev) == MIGR_REBUILD)
+ units = min_recovery_start(&a->info) / blocks_per_unit;
+ else
+ units = a->info.resync_start / blocks_per_unit;
+ units32 = units;
+
+ /* check that we did not overflow 32-bits, and that
+ * curr_migr_unit needs updating
+ */
+ if (units32 == units &&
+ __le32_to_cpu(dev->vol.curr_migr_unit) != units32) {
+ dprintf("imsm: mark checkpoint (%u)\n", units32);
+ dev->vol.curr_migr_unit = __cpu_to_le32(units32);
+ super->updates_pending++;
+ }
+ }
/* mark dirty / clean */
if (dev->vol.dirty != !consistent) {