summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2009-12-30 13:42:37 +1100
committerNeilBrown <neilb@suse.de>2009-12-30 13:42:37 +1100
commitc1e3ab8c1e76f4ae71ab23bcf5e8c2bf8bd3774e (patch)
tree29f137783fd18fa14d1f96758bba3d15212a7904
parent076515ba5042459c17eb5718483ed09c9e567f5d (diff)
parent1e5c69836d4d0b6dcaef8fc187e6bf2841eb57f6 (diff)
downloadmdadm-c1e3ab8c1e76f4ae71ab23bcf5e8c2bf8bd3774e.tar.gz
mdadm-c1e3ab8c1e76f4ae71ab23bcf5e8c2bf8bd3774e.tar.xz
mdadm-c1e3ab8c1e76f4ae71ab23bcf5e8c2bf8bd3774e.zip
Merge branch 'master' of git://github.com/djbw/mdadm
-rw-r--r--Create.c2
-rw-r--r--Manage.c1
-rw-r--r--bitmap.c2
-rw-r--r--managemon.c93
-rw-r--r--mdadm.h27
-rw-r--r--mdmon.h7
-rw-r--r--monitor.c23
-rw-r--r--super-ddf.c14
-rw-r--r--super-intel.c432
-rw-r--r--super1.c6
-rw-r--r--sysfs.c15
-rw-r--r--util.c24
12 files changed, 493 insertions, 153 deletions
diff --git a/Create.c b/Create.c
index d104c05..7714828 100644
--- a/Create.c
+++ b/Create.c
@@ -538,7 +538,7 @@ int Create(struct supertype *st, char *mddev,
assume_clean
) {
info.array.state = 1; /* clean, but one+ drive will be missing*/
- info.resync_start = ~0ULL;
+ info.resync_start = MaxSector;
} else {
info.array.state = 0; /* not clean, but no errors */
info.resync_start = 0;
diff --git a/Manage.c b/Manage.c
index 1ca371d..749fa7c 100644
--- a/Manage.c
+++ b/Manage.c
@@ -699,6 +699,7 @@ int Manage_subdevs(char *devname, int fd,
tst->ss->getinfo_super(tst, &new_mdi);
new_mdi.disk.major = disc.major;
new_mdi.disk.minor = disc.minor;
+ new_mdi.recovery_start = 0;
if (sysfs_add_disk(sra, &new_mdi, 0) != 0) {
fprintf(stderr, Name ": add new device to external metadata"
" failed for %s\n", dv->devname);
diff --git a/bitmap.c b/bitmap.c
index 850b0ce..088e37d 100644
--- a/bitmap.c
+++ b/bitmap.c
@@ -20,8 +20,6 @@
#include "mdadm.h"
-#define min(a,b) (((a) < (b)) ? (a) : (b))
-
inline void sb_le_to_cpu(bitmap_super_t *sb)
{
sb->magic = __le32_to_cpu(sb->magic);
diff --git a/managemon.c b/managemon.c
index 5958e18..e335077 100644
--- a/managemon.c
+++ b/managemon.c
@@ -112,8 +112,10 @@ static void close_aa(struct active_array *aa)
{
struct mdinfo *d;
- for (d = aa->info.devs; d; d = d->next)
+ for (d = aa->info.devs; d; d = d->next) {
+ close(d->recovery_fd);
close(d->state_fd);
+ }
close(aa->action_fd);
close(aa->info.state_fd);
@@ -209,16 +211,22 @@ struct metadata_update *update_queue = NULL;
struct metadata_update *update_queue_handled = NULL;
struct metadata_update *update_queue_pending = NULL;
-void check_update_queue(struct supertype *container)
+static void free_updates(struct metadata_update **update)
{
- while (update_queue_handled) {
- struct metadata_update *this = update_queue_handled;
- update_queue_handled = this->next;
+ while (*update) {
+ struct metadata_update *this = *update;
+
+ *update = this->next;
free(this->buf);
- if (this->space)
- free(this->space);
+ free(this->space);
free(this);
}
+}
+
+void check_update_queue(struct supertype *container)
+{
+ free_updates(&update_queue_handled);
+
if (update_queue == NULL &&
update_queue_pending) {
update_queue = update_queue_pending;
@@ -376,8 +384,9 @@ static void manage_member(struct mdstat_ent *mdstat,
if (a->check_degraded) {
struct metadata_update *updates = NULL;
- struct mdinfo *newdev;
+ struct mdinfo *newdev = NULL;
struct active_array *newa;
+ struct mdinfo *d;
a->check_degraded = 0;
@@ -385,34 +394,46 @@ static void manage_member(struct mdstat_ent *mdstat,
* to check.
*/
newdev = a->container->ss->activate_spare(a, &updates);
- if (newdev) {
- struct mdinfo *d;
- /* Cool, we can add a device or several. */
- newa = duplicate_aa(a);
- /* suspend recovery - maybe not needed */
-
- /* Add device to array and set offset/size/slot.
- * and open files for each newdev */
- for (d = newdev; d ; d = d->next) {
- struct mdinfo *newd;
- if (sysfs_add_disk(&newa->info, d, 0) < 0)
- continue;
- newd = malloc(sizeof(*newd));
- *newd = *d;
- newd->next = newa->info.devs;
- newa->info.devs = newd;
-
- newd->state_fd = sysfs_open(a->devnum,
- newd->sys_name,
- "state");
- newd->prev_state
- = read_dev_state(newd->state_fd);
- newd->curr_state = newd->prev_state;
+ if (!newdev)
+ return;
+
+ newa = duplicate_aa(a);
+ if (!newa)
+ goto out;
+ /* Cool, we can add a device or several. */
+
+ /* Add device to array and set offset/size/slot.
+ * and open files for each newdev */
+ for (d = newdev; d ; d = d->next) {
+ struct mdinfo *newd;
+
+ newd = malloc(sizeof(*newd));
+ if (!newd)
+ continue;
+ if (sysfs_add_disk(&newa->info, d, 0) < 0) {
+ free(newd);
+ continue;
}
- queue_metadata_update(updates);
- replace_array(a->container, a, newa);
- sysfs_set_str(&a->info, NULL, "sync_action", "recover");
+ *newd = *d;
+ newd->next = newa->info.devs;
+ newa->info.devs = newd;
+
+ newd->state_fd = sysfs_open(a->devnum, newd->sys_name,
+ "state");
+ newd->prev_state = read_dev_state(newd->state_fd);
+ newd->curr_state = newd->prev_state;
+ }
+ queue_metadata_update(updates);
+ updates = NULL;
+ replace_array(a->container, a, newa);
+ sysfs_set_str(&a->info, NULL, "sync_action", "recover");
+ out:
+ while (newdev) {
+ d = newdev->next;
+ free(newdev);
+ newdev = d;
}
+ free_updates(&updates);
}
}
@@ -498,6 +519,9 @@ static void manage_new(struct mdstat_ent *mdstat,
newd->state_fd = sysfs_open(new->devnum,
newd->sys_name,
"state");
+ newd->recovery_fd = sysfs_open(new->devnum,
+ newd->sys_name,
+ "recovery_start");
newd->prev_state = read_dev_state(newd->state_fd);
newd->curr_state = newd->prev_state;
@@ -522,7 +546,6 @@ static void manage_new(struct mdstat_ent *mdstat,
new->info.state_fd = sysfs_open(new->devnum, NULL, "array_state");
new->resync_start_fd = sysfs_open(new->devnum, NULL, "resync_start");
new->metadata_fd = sysfs_open(new->devnum, NULL, "metadata_version");
- get_resync_start(new);
dprintf("%s: inst: %d action: %d state: %d\n", __func__, atoi(inst),
new->action_fd, new->info.state_fd);
diff --git a/mdadm.h b/mdadm.h
index 2bfe840..7ff63cd 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -129,6 +129,22 @@ extern __off64_t lseek64 __P ((int __fd, __off64_t __offset, int __whence));
#endif /* __KLIBC__ */
+/*
+ * min()/max()/clamp() macros that also do
+ * strict type-checking.. See the
+ * "unnecessary" pointer comparison.
+ */
+#define min(x, y) ({ \
+ typeof(x) _min1 = (x); \
+ typeof(y) _min2 = (y); \
+ (void) (&_min1 == &_min2); \
+ _min1 < _min2 ? _min1 : _min2; })
+
+#define max(x, y) ({ \
+ typeof(x) _max1 = (x); \
+ typeof(y) _max2 = (y); \
+ (void) (&_max1 == &_max2); \
+ _max1 > _max2 ? _max1 : _max2; })
/* general information that might be extracted from a superblock */
struct mdinfo {
@@ -146,7 +162,11 @@ struct mdinfo {
*/
int reshape_active;
unsigned long long reshape_progress;
- unsigned long long resync_start;
+ union {
+ unsigned long long resync_start; /* per-array resync position */
+ unsigned long long recovery_start; /* per-device rebuild position */
+ #define MaxSector (~0ULL) /* resync/recovery complete position */
+ };
unsigned long safe_mode_delay; /* ms delay to mark clean */
int new_level, delta_disks, new_layout, new_chunk;
int errors;
@@ -168,6 +188,7 @@ struct mdinfo {
struct mdinfo *next;
/* Device info for mdmon: */
+ int recovery_fd;
int state_fd;
#define DS_FAULTY 1
#define DS_INSYNC 2
@@ -380,8 +401,7 @@ extern int sysfs_get_str(struct mdinfo *sra, struct mdinfo *dev,
char *name, char *val, int size);
extern int sysfs_set_safemode(struct mdinfo *sra, unsigned long ms);
extern int sysfs_set_array(struct mdinfo *info, int vers);
-extern int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd,
- int in_sync);
+extern int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd, int resume);
extern int sysfs_disk_to_scsi_id(int fd, __u32 *id);
extern int sysfs_unique_holder(int devnum, long rdev);
extern int load_sys(char *path, char *buf);
@@ -839,6 +859,7 @@ extern int assemble_container_content(struct supertype *st, int mdfd,
extern int add_disk(int mdfd, struct supertype *st,
struct mdinfo *sra, struct mdinfo *info);
extern int set_array_info(int mdfd, struct supertype *st, struct mdinfo *info);
+unsigned long long min_recovery_start(struct mdinfo *array);
extern char *human_size(long long bytes);
extern char *human_size_brief(long long bytes);
diff --git a/mdmon.h b/mdmon.h
index 7cfee35..4494085 100644
--- a/mdmon.h
+++ b/mdmon.h
@@ -39,8 +39,6 @@ struct active_array {
int check_degraded; /* flag set by mon, read by manage */
int devnum;
-
- unsigned long long resync_start;
};
/*
@@ -73,7 +71,6 @@ extern int socket_hup_requested;
extern int sigterm;
int read_dev_state(int fd);
-int get_resync_start(struct active_array *a);
int is_container_member(struct mdstat_ent *mdstat, char *container);
struct mdstat_ent *mdstat_read(int hold, int start);
@@ -85,9 +82,9 @@ extern int monitor_loop_cnt;
/* helper routine to determine resync completion since MaxSector is a
* moving target
*/
-static inline int is_resync_complete(struct active_array *a)
+static inline int is_resync_complete(struct mdinfo *array)
{
- if (a->resync_start >= a->info.component_size)
+ if (array->resync_start >= array->component_size)
return 1;
return 0;
}
diff --git a/monitor.c b/monitor.c
index 0cafc3a..81fef49 100644
--- a/monitor.c
+++ b/monitor.c
@@ -66,23 +66,20 @@ static int read_attr(char *buf, int len, int fd)
return n;
}
-int get_resync_start(struct active_array *a)
+static unsigned long long read_resync_start(int fd)
{
char buf[30];
int n;
- n = read_attr(buf, 30, a->resync_start_fd);
+ n = read_attr(buf, 30, fd);
if (n <= 0)
- return n;
+ return 0;
if (strncmp(buf, "none", 4) == 0)
- a->resync_start = ~0ULL;
+ return MaxSector;
else
- a->resync_start = strtoull(buf, NULL, 10);
-
- return 1;
+ return strtoull(buf, NULL, 10);
}
-
static enum array_state read_state(int fd)
{
char buf[20];
@@ -208,22 +205,23 @@ static int read_and_act(struct active_array *a)
a->curr_state = read_state(a->info.state_fd);
a->curr_action = read_action(a->action_fd);
+ a->info.resync_start = read_resync_start(a->resync_start_fd);
for (mdi = a->info.devs; mdi ; mdi = mdi->next) {
mdi->next_state = 0;
- if (mdi->state_fd >= 0)
+ if (mdi->state_fd >= 0) {
+ mdi->recovery_start = read_resync_start(mdi->recovery_fd);
mdi->curr_state = read_dev_state(mdi->state_fd);
+ }
}
if (a->curr_state <= inactive &&
a->prev_state > inactive) {
/* array has been stopped */
- get_resync_start(a);
a->container->ss->set_array_state(a, 1);
a->next_state = clear;
deactivate = 1;
}
if (a->curr_state == write_pending) {
- get_resync_start(a);
a->container->ss->set_array_state(a, 0);
a->next_state = active;
dirty = 1;
@@ -236,7 +234,6 @@ static int read_and_act(struct active_array *a)
dirty = 1;
}
if (a->curr_state == clean) {
- get_resync_start(a);
a->container->ss->set_array_state(a, 1);
}
if (a->curr_state == active ||
@@ -253,7 +250,6 @@ static int read_and_act(struct active_array *a)
/* explicit request for readonly array. Leave it alone */
;
} else {
- get_resync_start(a);
if (a->container->ss->set_array_state(a, 2))
a->next_state = read_auto; /* array is clean */
else {
@@ -271,7 +267,6 @@ static int read_and_act(struct active_array *a)
* until the array goes inactive or readonly though.
* Just check if we need to fiddle spares.
*/
- get_resync_start(a);
a->container->ss->set_array_state(a, a->curr_state <= clean);
check_degraded = 1;
}
diff --git a/super-ddf.c b/super-ddf.c
index 1014d81..3e30229 100644
--- a/super-ddf.c
+++ b/super-ddf.c
@@ -1433,7 +1433,7 @@ static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info)
(ddf->virt->entries[info->container_member].init_state
& DDF_initstate_mask)
== DDF_init_full)
- info->resync_start = ~0ULL;
+ info->resync_start = MaxSector;
uuid_from_super_ddf(st, info->uuid);
@@ -2921,7 +2921,7 @@ static struct mdinfo *container_content_ddf(struct supertype *st)
this->resync_start = 0;
} else {
this->array.state = 1;
- this->resync_start = ~0ULL;
+ this->resync_start = MaxSector;
}
memcpy(this->name, ddf->virt->entries[i].name, 16);
this->name[16]=0;
@@ -2968,6 +2968,7 @@ static struct mdinfo *container_content_ddf(struct supertype *st)
dev->disk.minor = d->minor;
dev->disk.raid_disk = i;
dev->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
+ dev->recovery_start = MaxSector;
dev->events = __be32_to_cpu(ddf->primary.seq);
dev->data_offset = __be64_to_cpu(vc->lba_offset[i]);
@@ -3066,7 +3067,7 @@ static int ddf_set_array_state(struct active_array *a, int consistent)
if (consistent == 2) {
/* Should check if a recovery should be started FIXME */
consistent = 1;
- if (!is_resync_complete(a))
+ if (!is_resync_complete(&a->info))
consistent = 0;
}
if (consistent)
@@ -3078,9 +3079,9 @@ static int ddf_set_array_state(struct active_array *a, int consistent)
old = ddf->virt->entries[inst].init_state;
ddf->virt->entries[inst].init_state &= ~DDF_initstate_mask;
- if (is_resync_complete(a))
+ if (is_resync_complete(&a->info))
ddf->virt->entries[inst].init_state |= DDF_init_full;
- else if (a->resync_start == 0)
+ else if (a->info.resync_start == 0)
ddf->virt->entries[inst].init_state |= DDF_init_not;
else
ddf->virt->entries[inst].init_state |= DDF_init_quick;
@@ -3088,7 +3089,7 @@ static int ddf_set_array_state(struct active_array *a, int consistent)
ddf->updates_pending = 1;
dprintf("ddf mark %d %s %llu\n", inst, consistent?"clean":"dirty",
- a->resync_start);
+ a->info.resync_start);
return consistent;
}
@@ -3547,6 +3548,7 @@ static struct mdinfo *ddf_activate_spare(struct active_array *a,
di->disk.major = dl->major;
di->disk.minor = dl->minor;
di->disk.state = 0;
+ di->recovery_start = 0;
di->data_offset = pos;
di->component_size = a->info.component_size;
di->container_member = dl->pdnum;
diff --git a/super-intel.c b/super-intel.c
index 2e119f8..609aaf5 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -635,6 +635,8 @@ static int is_failed(struct imsm_disk *disk)
}
#ifndef MDASSEMBLE
+static __u64 blocks_per_migr_unit(struct imsm_dev *dev);
+
static void print_imsm_dev(struct imsm_dev *dev, char *uuid, int disk_idx)
{
__u64 sz;
@@ -669,27 +671,32 @@ static void print_imsm_dev(struct imsm_dev *dev, char *uuid, int disk_idx)
printf(" Chunk Size : %u KiB\n",
__le16_to_cpu(map->blocks_per_strip) / 2);
printf(" Reserved : %d\n", __le32_to_cpu(dev->reserved_blocks));
- printf(" Migrate State : %s", dev->vol.migr_state ? "migrating" : "idle\n");
+ printf(" Migrate State : ");
if (dev->vol.migr_state) {
if (migr_type(dev) == MIGR_INIT)
- printf(": initializing\n");
+ printf("initialize\n");
else if (migr_type(dev) == MIGR_REBUILD)
- printf(": rebuilding\n");
+ printf("rebuild\n");
else if (migr_type(dev) == MIGR_VERIFY)
- printf(": check\n");
+ printf("check\n");
else if (migr_type(dev) == MIGR_GEN_MIGR)
- printf(": general migration\n");
+ printf("general migration\n");
else if (migr_type(dev) == MIGR_STATE_CHANGE)
- printf(": state change\n");
+ printf("state change\n");
else if (migr_type(dev) == MIGR_REPAIR)
- printf(": repair\n");
+ printf("repair\n");
else
- printf(": <unknown:%d>\n", migr_type(dev));
- }
+ printf("<unknown:%d>\n", migr_type(dev));
+ } else
+ printf("idle\n");
printf(" Map State : %s", map_state_str[map->map_state]);
if (dev->vol.migr_state) {
struct imsm_map *map = get_imsm_map(dev, 1);
+
printf(" <-- %s", map_state_str[map->map_state]);
+ printf("\n Checkpoint : %u (%llu)",
+ __le32_to_cpu(dev->vol.curr_migr_unit),
+ blocks_per_migr_unit(dev));
}
printf("\n");
printf(" Dirty State : %s\n", dev->vol.dirty ? "dirty" : "clean");
@@ -1215,6 +1222,179 @@ get_imsm_numerical_version(struct imsm_super *mpb, int *m, int *p)
}
#endif
+static __u32 migr_strip_blocks_resync(struct imsm_dev *dev)
+{
+ /* migr_strip_size when repairing or initializing parity */
+ struct imsm_map *map = get_imsm_map(dev, 0);
+ __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
+
+ switch (get_imsm_raid_level(map)) {
+ case 5:
+ case 10:
+ return chunk;
+ default:
+ return 128*1024 >> 9;
+ }
+}
+
+static __u32 migr_strip_blocks_rebuild(struct imsm_dev *dev)
+{
+ /* migr_strip_size when rebuilding a degraded disk, no idea why
+ * this is different than migr_strip_size_resync(), but it's good
+ * to be compatible
+ */
+ struct imsm_map *map = get_imsm_map(dev, 1);
+ __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
+
+ switch (get_imsm_raid_level(map)) {
+ case 1:
+ case 10:
+ if (map->num_members % map->num_domains == 0)
+ return 128*1024 >> 9;
+ else
+ return chunk;
+ case 5:
+ return max((__u32) 64*1024 >> 9, chunk);
+ default:
+ return 128*1024 >> 9;
+ }
+}
+
+static __u32 num_stripes_per_unit_resync(struct imsm_dev *dev)
+{
+ struct imsm_map *lo = get_imsm_map(dev, 0);
+ struct imsm_map *hi = get_imsm_map(dev, 1);
+ __u32 lo_chunk = __le32_to_cpu(lo->blocks_per_strip);
+ __u32 hi_chunk = __le32_to_cpu(hi->blocks_per_strip);
+
+ return max((__u32) 1, hi_chunk / lo_chunk);
+}
+
+static __u32 num_stripes_per_unit_rebuild(struct imsm_dev *dev)
+{
+ struct imsm_map *lo = get_imsm_map(dev, 0);
+ int level = get_imsm_raid_level(lo);
+
+ if (level == 1 || level == 10) {
+ struct imsm_map *hi = get_imsm_map(dev, 1);
+
+ return hi->num_domains;
+ } else
+ return num_stripes_per_unit_resync(dev);
+}
+
+static __u8 imsm_num_data_members(struct imsm_dev *dev)
+{
+ /* named 'imsm_' because raid0, raid1 and raid10
+ * counter-intuitively have the same number of data disks
+ */
+ struct imsm_map *map = get_imsm_map(dev, 0);
+
+ switch (get_imsm_raid_level(map)) {
+ case 0:
+ case 1:
+ case 10:
+ return map->num_members;
+ case 5:
+ return map->num_members - 1;
+ default:
+ dprintf("%s: unsupported raid level\n", __func__);
+ return 0;
+ }
+}
+
+static __u32 parity_segment_depth(struct imsm_dev *dev)
+{
+ struct imsm_map *map = get_imsm_map(dev, 0);
+ __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
+
+ switch(get_imsm_raid_level(map)) {
+ case 1:
+ case 10:
+ return chunk * map->num_domains;
+ case 5:
+ return chunk * map->num_members;
+ default:
+ return chunk;
+ }
+}
+
+static __u32 map_migr_block(struct imsm_dev *dev, __u32 block)
+{
+ struct imsm_map *map = get_imsm_map(dev, 1);
+ __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
+ __u32 strip = block / chunk;
+
+ switch (get_imsm_raid_level(map)) {
+ case 1:
+ case 10: {
+ __u32 vol_strip = (strip * map->num_domains) + 1;
+ __u32 vol_stripe = vol_strip / map->num_members;
+
+ return vol_stripe * chunk + block % chunk;
+ } case 5: {
+ __u32 stripe = strip / (map->num_members - 1);
+
+ return stripe * chunk + block % chunk;
+ }
+ default:
+ return 0;
+ }
+}
+
+static __u64 blocks_per_migr_unit(struct imsm_dev *dev)
+{
+ /* calculate the conversion factor between per member 'blocks'
+ * (md/{resync,rebuild}_start) and imsm migration units, return
+ * 0 for the 'not migrating' and 'unsupported migration' cases
+ */
+ if (!dev->vol.migr_state)
+ return 0;
+
+ switch (migr_type(dev)) {
+ case MIGR_VERIFY:
+ case MIGR_REPAIR:
+ case MIGR_INIT: {
+ struct imsm_map *map = get_imsm_map(dev, 0);
+ __u32 stripes_per_unit;
+ __u32 blocks_per_unit;
+ __u32 parity_depth;
+ __u32 migr_chunk;
+ __u32 block_map;
+ __u32 block_rel;
+ __u32 segment;
+ __u32 stripe;
+ __u8 disks;
+
+ /* yes, this is really the translation of migr_units to
+ * per-member blocks in the 'resync' case
+ */
+ stripes_per_unit = num_stripes_per_unit_resync(dev);
+ migr_chunk = migr_strip_blocks_resync(dev);
+ disks = imsm_num_data_members(dev);
+ blocks_per_unit = stripes_per_unit * migr_chunk * disks;
+ stripe = __le32_to_cpu(map->blocks_per_strip) * disks;
+ segment = blocks_per_unit / stripe;
+ block_rel = blocks_per_unit - segment * stripe;
+ parity_depth = parity_segment_depth(dev);
+ block_map = map_migr_block(dev, block_rel);
+ return block_map + parity_depth * segment;
+ }
+ case MIGR_REBUILD: {
+ __u32 stripes_per_unit;
+ __u32 migr_chunk;
+
+ stripes_per_unit = num_stripes_per_unit_rebuild(dev);
+ migr_chunk = migr_strip_blocks_rebuild(dev);
+ return migr_chunk * stripes_per_unit;
+ }
+ case MIGR_GEN_MIGR:
+ case MIGR_STATE_CHANGE:
+ default:
+ return 0;
+ }
+}
+
static int imsm_level_to_layout(int level)
{
switch (level) {
@@ -1264,13 +1444,34 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info)
info->component_size = __le32_to_cpu(map->blocks_per_member);
memset(info->uuid, 0, sizeof(info->uuid));
- if (map->map_state == IMSM_T_STATE_UNINITIALIZED || dev->vol.dirty)
+ if (map->map_state == IMSM_T_STATE_UNINITIALIZED || dev->vol.dirty) {
info->resync_start = 0;
- else if (dev->vol.migr_state)
- /* FIXME add curr_migr_unit to resync_start conversion */
- info->resync_start = 0;
- else
- info->resync_start = ~0ULL;
+ } else if (dev->vol.migr_state) {
+ switch (migr_type(dev)) {
+ case MIGR_REPAIR:
+ case MIGR_INIT: {
+ __u64 blocks_per_unit = blocks_per_migr_unit(dev);
+ __u64 units = __le32_to_cpu(dev->vol.curr_migr_unit);
+
+ info->resync_start = blocks_per_unit * units;
+ break;
+ }
+ case MIGR_VERIFY:
+ /* we could emulate the checkpointing of
+ * 'sync_action=check' migrations, but for now
+ * we just immediately complete them
+ */
+ case MIGR_REBUILD:
+ /* this is handled by container_content_imsm() */
+ case MIGR_GEN_MIGR:
+ case MIGR_STATE_CHANGE:
+ /* FIXME handle other migrations */
+ default:
+ /* we are not dirty, so... */
+ info->resync_start = MaxSector;
+ }
+ } else
+ info->resync_start = MaxSector;
strncpy(info->name, (char *) dev->volume, MAX_RAID_SERIAL_LEN);
info->name[MAX_RAID_SERIAL_LEN] = 0;
@@ -1566,6 +1767,7 @@ static int compare_super_imsm(struct supertype *st, struct supertype *tst)
first->anchor->num_raid_devs = sec->anchor->num_raid_devs;
first->anchor->orig_family_num = sec->anchor->orig_family_num;
first->anchor->family_num = sec->anchor->family_num;
+ memcpy(first->anchor->sig, sec->anchor->sig, MAX_SIGNATURE_LENGTH);
for (i = 0; i < sec->anchor->num_raid_devs; i++)
imsm_copy_dev(get_imsm_dev(first, i), get_imsm_dev(sec, i));
}
@@ -2428,7 +2630,7 @@ imsm_thunderdome(struct intel_super **super_list, int len)
struct intel_disk *idisk;
idisk = disk_list_get(dl->serial, disk_list);
- if (is_spare(&idisk->disk) &&
+ if (idisk && is_spare(&idisk->disk) &&
!is_failed(&idisk->disk) && !is_configured(&idisk->disk))
dl->index = -1;
else {
@@ -3333,7 +3535,7 @@ static unsigned long long merge_extents(struct intel_super *super, int sum_exten
unsigned long reserve;
if (!e)
- return ~0ULL; /* error */
+ return 0;
/* coalesce and sort all extents. also, check to see if we need to
* reserve space between member arrays
@@ -3376,17 +3578,23 @@ static unsigned long long merge_extents(struct intel_super *super, int sum_exten
} while (e[i-1].size);
free(e);
+ if (maxsize == 0)
+ return 0;
+
+ /* FIXME assumes volume at offset 0 is the first volume in a
+ * container
+ */
if (start_extent > 0)
reserve = IMSM_RESERVED_SECTORS; /* gap between raid regions */
else
reserve = 0;
if (maxsize < reserve)
- return ~0ULL;
+ return 0;
super->create_offset = ~((__u32) 0);
if (start + reserve > super->create_offset)
- return ~0ULL; /* start overflows create_offset */
+ return 0; /* start overflows create_offset */
super->create_offset = start + reserve;
return maxsize - reserve;
@@ -3415,6 +3623,34 @@ static int is_raid_level_supported(const struct imsm_orom *orom, int level, int
}
#define pr_vrb(fmt, arg...) (void) (verbose && fprintf(stderr, Name fmt, ##arg))
+static int
+validate_geometry_imsm_orom(struct intel_super *super, int level, int layout,
+ int raiddisks, int chunk, int verbose)
+{
+ if (!is_raid_level_supported(super->orom, level, raiddisks)) {
+ pr_vrb(": platform does not support raid%d with %d disk%s\n",
+ level, raiddisks, raiddisks > 1 ? "s" : "");
+ return 0;
+ }
+ if (super->orom && level != 1 &&
+ !imsm_orom_has_chunk(super->orom, chunk)) {
+ pr_vrb(": platform does not support a chunk size of: %d\n", chunk);
+ return 0;
+ }
+ if (layout != imsm_level_to_layout(level)) {
+ if (level == 5)
+ pr_vrb(": imsm raid 5 only supports the left-asymmetric layout\n");
+ else if (level == 10)
+ pr_vrb(": imsm raid 10 only supports the n2 layout\n");
+ else
+ pr_vrb(": imsm unknown layout %#x for this raid level %d\n",
+ layout, level);
+ return 0;
+ }
+
+ return 1;
+}
+
/* validate_geometry_imsm_volume - lifted from validate_geometry_ddf_bvd
* FIX ME add ahci details
*/
@@ -3437,26 +3673,8 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level,
if (!super)
return 0;
- if (!is_raid_level_supported(super->orom, level, raiddisks)) {
- pr_vrb(": platform does not support raid%d with %d disk%s\n",
- level, raiddisks, raiddisks > 1 ? "s" : "");
+ if (!validate_geometry_imsm_orom(super, level, layout, raiddisks, chunk, verbose))
return 0;
- }
- if (super->orom && level != 1 &&
- !imsm_orom_has_chunk(super->orom, chunk)) {
- pr_vrb(": platform does not support a chunk size of: %d\n", chunk);
- return 0;
- }
- if (layout != imsm_level_to_layout(level)) {
- if (level == 5)
- pr_vrb(": imsm raid 5 only supports the left-asymmetric layout\n");
- else if (level == 10)
- pr_vrb(": imsm raid 10 only supports the n2 layout\n");
- else
- pr_vrb(": imsm unknown layout %#x for this raid level %d\n",
- layout, level);
- return 0;
- }
if (!dev) {
/* General test: make sure there is space for
@@ -3464,7 +3682,7 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level,
* offset
*/
unsigned long long minsize = size;
- unsigned long long start_offset = ~0ULL;
+ unsigned long long start_offset = MaxSector;
int dcnt = 0;
if (minsize == 0)
minsize = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
@@ -3480,7 +3698,7 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level,
esize = e[i].start - pos;
if (esize >= minsize)
found = 1;
- if (found && start_offset == ~0ULL) {
+ if (found && start_offset == MaxSector) {
start_offset = pos;
break;
} else if (found && pos != start_offset) {
@@ -3569,15 +3787,11 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level,
i += dl->extent_cnt;
maxsize = merge_extents(super, i);
- if (maxsize < size) {
+ if (maxsize < size || maxsize == 0) {
if (verbose)
fprintf(stderr, Name ": not enough space after merge (%llu < %llu)\n",
maxsize, size);
return 0;
- } else if (maxsize == ~0ULL) {
- if (verbose)
- fprintf(stderr, Name ": failed to merge %d extents\n", i);
- return 0;
}
*freesize = maxsize;
@@ -3634,7 +3848,8 @@ static int reserve_space(struct supertype *st, int raiddisks,
if (cnt < raiddisks ||
(super->orom && used && used != raiddisks) ||
- maxsize < minsize) {
+ maxsize < minsize ||
+ maxsize == 0) {
fprintf(stderr, Name ": not enough devices with space to create array.\n");
return 0; /* No enough free spaces large enough */
}
@@ -3686,6 +3901,10 @@ static int validate_geometry_imsm(struct supertype *st, int level, int layout,
* created. add_to_super and getinfo_super
* detect when autolayout is in progress.
*/
+ if (!validate_geometry_imsm_orom(st->sb, level, layout,
+ raiddisks, chunk,
+ verbose))
+ return 0;
return reserve_space(st, raiddisks, size, chunk, freesize);
}
return 1;
@@ -3763,6 +3982,46 @@ static int validate_geometry_imsm(struct supertype *st, int level, int layout,
}
#endif /* MDASSEMBLE */
+static int is_rebuilding(struct imsm_dev *dev)
+{
+ struct imsm_map *migr_map;
+
+ if (!dev->vol.migr_state)
+ return 0;
+
+ if (migr_type(dev) != MIGR_REBUILD)
+ return 0;
+
+ migr_map = get_imsm_map(dev, 1);
+
+ if (migr_map->map_state == IMSM_T_STATE_DEGRADED)
+ return 1;
+ else
+ return 0;
+}
+
+static void update_recovery_start(struct imsm_dev *dev, struct mdinfo *array)
+{
+ struct mdinfo *rebuild = NULL;
+ struct mdinfo *d;
+ __u32 units;
+
+ if (!is_rebuilding(dev))
+ return;
+
+ /* Find the rebuild target, but punt on the dual rebuild case */
+ for (d = array->devs; d; d = d->next)
+ if (d->recovery_start == 0) {
+ if (rebuild)
+ return;
+ rebuild = d;
+ }
+
+ units = __le32_to_cpu(dev->vol.curr_migr_unit);
+ rebuild->recovery_start = units * blocks_per_migr_unit(dev);
+}
+
+
static struct mdinfo *container_content_imsm(struct supertype *st)
{
/* Given a container loaded by load_super_imsm_all,
@@ -3810,6 +4069,7 @@ static struct mdinfo *container_content_imsm(struct supertype *st)
super->current_vol = i;
getinfo_super_imsm_volume(st, this);
for (slot = 0 ; slot < map->num_members; slot++) {
+ unsigned long long recovery_start;
struct mdinfo *info_d;
struct dl *d;
int idx;
@@ -3823,33 +4083,41 @@ static struct mdinfo *container_content_imsm(struct supertype *st)
if (d->index == idx)
break;
+ recovery_start = MaxSector;
if (d == NULL)
skip = 1;
if (d && is_failed(&d->disk))
skip = 1;
if (ord & IMSM_ORD_REBUILD)
- skip = 1;
+ recovery_start = 0;
/*
* if we skip some disks the array will be assmebled degraded;
- * reset resync start to avoid a dirty-degraded situation
+ * reset resync start to avoid a dirty-degraded
+ * situation when performing the intial sync
*
* FIXME handle dirty degraded
*/
- if (skip && !dev->vol.dirty)
- this->resync_start = ~0ULL;
+ if ((skip || recovery_start == 0) && !dev->vol.dirty)
+ this->resync_start = MaxSector;
if (skip)
continue;
- info_d = malloc(sizeof(*info_d));
+ info_d = calloc(1, sizeof(*info_d));
if (!info_d) {
fprintf(stderr, Name ": failed to allocate disk"
" for volume %.16s\n", dev->volume);
+ info_d = this->devs;
+ while (info_d) {
+ struct mdinfo *d = info_d->next;
+
+ free(info_d);
+ info_d = d;
+ }
free(this);
this = rest;
break;
}
- memset(info_d, 0, sizeof(*info_d));
info_d->next = this->devs;
this->devs = info_d;
@@ -3857,8 +4125,10 @@ static struct mdinfo *container_content_imsm(struct supertype *st)
info_d->disk.major = d->major;
info_d->disk.minor = d->minor;
info_d->disk.raid_disk = slot;
+ info_d->recovery_start = recovery_start;
- this->array.working_disks++;
+ if (info_d->recovery_start == MaxSector)
+ this->array.working_disks++;
info_d->events = __le32_to_cpu(mpb->generation_num);
info_d->data_offset = __le32_to_cpu(map->pba_of_lba0);
@@ -3866,6 +4136,8 @@ static struct mdinfo *container_content_imsm(struct supertype *st)
if (d->devname)
strcpy(info_d->name, d->devname);
}
+ /* now that the disk list is up-to-date fixup recovery_start */
+ update_recovery_start(dev, this);
rest = this;
}
@@ -4008,24 +4280,6 @@ static int is_resyncing(struct imsm_dev *dev)
return 0;
}
-static int is_rebuilding(struct imsm_dev *dev)
-{
- struct imsm_map *migr_map;
-
- if (!dev->vol.migr_state)
- return 0;
-
- if (migr_type(dev) != MIGR_REBUILD)
- return 0;
-
- migr_map = get_imsm_map(dev, 1);
-
- if (migr_map->map_state == IMSM_T_STATE_DEGRADED)
- return 1;
- else
- return 0;
-}
-
/* return true if we recorded new information */
static int mark_failure(struct imsm_dev *dev, struct imsm_disk *disk, int idx)
{
@@ -4076,6 +4330,7 @@ static int imsm_set_array_state(struct active_array *a, int consistent)
struct imsm_map *map = get_imsm_map(dev, 0);
int failed = imsm_count_failed(super, dev);
__u8 map_state = imsm_check_degraded(super, dev, failed);
+ __u32 blocks_per_unit;
/* before we activate this array handle any missing disks */
if (consistent == 2 && super->missing) {
@@ -4087,14 +4342,14 @@ static int imsm_set_array_state(struct active_array *a, int consistent)
mark_missing(dev, &dl->disk, dl->index);
super->updates_pending++;
}
-
+
if (consistent == 2 &&
- (!is_resync_complete(a) ||
+ (!is_resync_complete(&a->info) ||
map_state != IMSM_T_STATE_NORMAL ||
dev->vol.migr_state))
consistent = 0;
- if (is_resync_complete(a)) {
+ if (is_resync_complete(&a->info)) {
/* complete intialization / resync,
* recovery and interrupted recovery is completed in
* ->set_disk
@@ -4106,7 +4361,7 @@ static int imsm_set_array_state(struct active_array *a, int consistent)
}
} else if (!is_resyncing(dev) && !failed) {
/* mark the start of the init process if nothing is failed */
- dprintf("imsm: mark resync start (%llu)\n", a->resync_start);
+ dprintf("imsm: mark resync start\n");
if (map->map_state == IMSM_T_STATE_UNINITIALIZED)
migrate(dev, IMSM_T_STATE_NORMAL, MIGR_INIT);
else
@@ -4114,12 +4369,32 @@ static int imsm_set_array_state(struct active_array *a, int consistent)
super->updates_pending++;
}
- /* FIXME check if we can update curr_migr_unit from resync_start */
+ /* check if we can update curr_migr_unit from resync_start, recovery_start */
+ blocks_per_unit = blocks_per_migr_unit(dev);
+ if (blocks_per_unit && failed <= 1) {
+ __u32 units32;
+ __u64 units;
+
+ if (migr_type(dev) == MIGR_REBUILD)
+ units = min_recovery_start(&a->info) / blocks_per_unit;
+ else
+ units = a->info.resync_start / blocks_per_unit;
+ units32 = units;
+
+ /* check that we did not overflow 32-bits, and that
+ * curr_migr_unit needs updating
+ */
+ if (units32 == units &&
+ __le32_to_cpu(dev->vol.curr_migr_unit) != units32) {
+ dprintf("imsm: mark checkpoint (%u)\n", units32);
+ dev->vol.curr_migr_unit = __cpu_to_le32(units32);
+ super->updates_pending++;
+ }
+ }
/* mark dirty / clean */
if (dev->vol.dirty != !consistent) {
- dprintf("imsm: mark '%s' (%llu)\n",
- consistent ? "clean" : "dirty", a->resync_start);
+ dprintf("imsm: mark '%s'\n", consistent ? "clean" : "dirty");
if (consistent)
dev->vol.dirty = 0;
else
@@ -4436,6 +4711,7 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a,
di->disk.major = dl->major;
di->disk.minor = dl->minor;
di->disk.state = 0;
+ di->recovery_start = 0;
di->data_offset = __le32_to_cpu(map->pba_of_lba0);
di->component_size = a->info.component_size;
di->container_member = inst;
diff --git a/super1.c b/super1.c
index 2c992a4..85bb598 100644
--- a/super1.c
+++ b/super1.c
@@ -659,9 +659,9 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
switch(__le32_to_cpu(sb->level)) {
case 5: case 4: case 6:
/* need to force clean */
- if (sb->resync_offset != ~0ULL)
+ if (sb->resync_offset != MaxSector)
rv = 1;
- sb->resync_offset = ~0ULL;
+ sb->resync_offset = MaxSector;
}
}
if (strcmp(update, "assemble")==0) {
@@ -855,7 +855,7 @@ static int init_super1(struct supertype *st, mdu_array_info_t *info,
sb->utime = sb->ctime;
sb->events = __cpu_to_le64(1);
if (info->state & (1<<MD_SB_CLEAN))
- sb->resync_offset = ~0ULL;
+ sb->resync_offset = MaxSector;
else
sb->resync_offset = 0;
sb->max_dev = __cpu_to_le32((1024- sizeof(struct mdp_superblock_1))/
diff --git a/sysfs.c b/sysfs.c
index 35dfbd4..c3bbbe3 100644
--- a/sysfs.c
+++ b/sysfs.c
@@ -572,7 +572,7 @@ int sysfs_set_array(struct mdinfo *info, int vers)
return rv;
}
-int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd, int in_sync)
+int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd, int resume)
{
char dv[100];
char nm[100];
@@ -595,15 +595,24 @@ int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd, int in_sync)
strcpy(sd->sys_name, "dev-");
strcpy(sd->sys_name+4, dname);
+ /* test write to see if 'recovery_start' is available */
+ if (resume && sd->recovery_start < MaxSector &&
+ sysfs_set_num(sra, sd, "recovery_start", 0)) {
+ sysfs_set_str(sra, sd, "state", "remove");
+ return -1;
+ }
+
rv = sysfs_set_num(sra, sd, "offset", sd->data_offset);
rv |= sysfs_set_num(sra, sd, "size", (sd->component_size+1) / 2);
if (sra->array.level != LEVEL_CONTAINER) {
- if (in_sync)
+ if (sd->recovery_start == MaxSector)
/* This can correctly fail if array isn't started,
* yet, so just ignore status for now.
*/
- sysfs_set_str(sra, sd, "state", "in_sync");
+ sysfs_set_str(sra, sd, "state", "insync");
rv |= sysfs_set_num(sra, sd, "slot", sd->disk.raid_disk);
+ if (resume)
+ sysfs_set_num(sra, sd, "recovery_start", sd->recovery_start);
}
return rv;
}
diff --git a/util.c b/util.c
index 67c3550..5feec43 100644
--- a/util.c
+++ b/util.c
@@ -1338,8 +1338,11 @@ int add_disk(int mdfd, struct supertype *st,
int rv;
#ifndef MDASSEMBLE
if (st->ss->external) {
- rv = sysfs_add_disk(sra, info,
- info->disk.state & (1<<MD_DISK_SYNC));
+ if (info->disk.state & (1<<MD_DISK_SYNC))
+ info->recovery_start = MaxSector;
+ else
+ info->recovery_start = 0;
+ rv = sysfs_add_disk(sra, info, 0);
if (! rv) {
struct mdinfo *sd2;
for (sd2 = sra->devs; sd2; sd2=sd2->next)
@@ -1383,10 +1386,25 @@ int set_array_info(int mdfd, struct supertype *st, struct mdinfo *info)
return rv;
}
+unsigned long long min_recovery_start(struct mdinfo *array)
+{
+ /* find the minimum recovery_start in an array for metadata
+ * formats that only record per-array recovery progress instead
+ * of per-device
+ */
+ unsigned long long recovery_start = MaxSector;
+ struct mdinfo *d;
+
+ for (d = array->devs; d; d = d->next)
+ recovery_start = min(recovery_start, d->recovery_start);
+
+ return recovery_start;
+}
+
char *devnum2devname(int num)
{
char name[100];
- if (num > 0)
+ if (num >= 0)
sprintf(name, "md%d", num);
else
sprintf(name, "md_d%d", -1-num);