summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeil Brown <neilb@suse.de>2006-03-20 03:17:31 +0000
committerNeil Brown <neilb@suse.de>2006-03-20 03:17:31 +0000
commit353632d927d6e46282ef4f51d4ad17940a30cdf0 (patch)
tree7257d93e459937f1b5bffa90ea5b2dccd18a2b00
parente86c9dd6d847ec57ec400b118efaf2c1808f10bc (diff)
downloadmdadm-353632d927d6e46282ef4f51d4ad17940a30cdf0.tar.gz
mdadm-353632d927d6e46282ef4f51d4ad17940a30cdf0.tar.xz
mdadm-353632d927d6e46282ef4f51d4ad17940a30cdf0.zip
Support restarting of a reshape on --assemble
Signed-off-by: Neil Brown <neilb@suse.de>
-rw-r--r--Assemble.c33
-rw-r--r--Grow.c133
-rw-r--r--mdadm.h10
-rw-r--r--restripe.c9
-rw-r--r--super0.c36
-rw-r--r--super1.c17
-rw-r--r--sysfs.c4
7 files changed, 220 insertions, 22 deletions
diff --git a/Assemble.c b/Assemble.c
index cc906db..480c512 100644
--- a/Assemble.c
+++ b/Assemble.c
@@ -551,7 +551,7 @@ int Assemble(struct supertype *st, char *mddev, int mdfd,
int fd;
fd = dev_open(devices[chosen_drive].devname, O_RDWR|O_EXCL);
if (fd < 0) {
- fprintf(stderr, Name ": Could open %s for write - cannot Assemble array.\n",
+ fprintf(stderr, Name ": Could not open %s for write - cannot Assemble array.\n",
devices[chosen_drive].devname);
return 1;
}
@@ -564,6 +564,37 @@ int Assemble(struct supertype *st, char *mddev, int mdfd,
close(fd);
}
+ /* If we are in the middle of a reshape we may need to restore saved data
+ * that was moved aside due to the reshape overwriting live data
+ * The code of doing this lives in Grow.c
+ */
+ if (info.reshape_active) {
+ int err = 0;
+ int *fdlist = malloc(sizeof(int)* bestcnt);
+ for (i=0; i<bestcnt; i++) {
+ int j = best[i];
+ if (j >= 0) {
+ fdlist[i] = dev_open(devices[j].devname, O_RDWR|O_EXCL);
+ if (fdlist[i] < 0) {
+ fprintf(stderr, Name ": Could not open %s for write - cannot Assemble array.\n",
+ devices[j].devname);
+ err = 1;
+ break;
+ }
+ } else
+ fdlist[i] = -1;
+ }
+ if (!err)
+ err = Grow_restart(st, &info, fdlist, bestcnt);
+ while (i>0) {
+ i--;
+ if (fdlist[i]>=0) close(fdlist[i]);
+ }
+ if (err) {
+ fprintf(stderr, Name ": Failed to restore critical section for reshape, sorry.\n");
+ return err;
+ }
+ }
/* count number of in-sync devices according to the superblock.
* We must have this number to start the array without -s or -R
*/
diff --git a/Grow.c b/Grow.c
index ece2bda..6d09dc6 100644
--- a/Grow.c
+++ b/Grow.c
@@ -219,7 +219,7 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int
}
if (ioctl(fd, GET_BITMAP_FILE, &bmf) != 0) {
- if (errno == ENOMEM)
+ if (errno == ENOMEM)
fprintf(stderr, Name ": Memory allocation failure.\n");
else
fprintf(stderr, Name ": bitmaps not supported by this kernel.\n");
@@ -605,12 +605,12 @@ int Grow_reshape(char *devname, int fd, int quiet,
* from
*/
nstripe = ostripe = 0;
- while (nstripe+ochunk/512 >= ostripe) {
+ while (nstripe >= ostripe) {
nstripe += nchunk/512;
last_block = nstripe * ndata;
- ostripe = last_block / odata;
+ ostripe = last_block / odata / (ochunk/512) * (ochunk/512);
}
- printf("Need to backup to stripe %llu sectors, %lluK\n", nstripe, last_block/2);
+ printf("mdadm: Need to backup %lluK of critical section..\n", last_block/2);
sra = sysfs_read(fd, 0,
GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE);
@@ -625,6 +625,11 @@ int Grow_reshape(char *devname, int fd, int quiet,
devname);
return 1;
}
+ if (sra->spares == 0) {
+ fprintf(stderr, Name ": %s: Cannot grow - need a spare to backup critical section\n",
+ devname);
+ return 1;
+ }
nrdisks = array.nr_disks + sra->spares;
/* Now we need to open all these devices so we can read/write.
@@ -724,13 +729,13 @@ int Grow_reshape(char *devname, int fd, int quiet,
goto abort_resume;
}
/* FIXME write superblocks */
- memcpy(bsb.magic, "md_backups_data-1", 16);
+ memcpy(bsb.magic, "md_backup_data-1", 16);
st->ss->uuid_from_super((int*)&bsb.set_uuid, super);
- bsb.mtime = time(0);
+ bsb.mtime = __cpu_to_le64(time(0));
bsb.arraystart = 0;
- bsb.length = last_block;
+ bsb.length = __cpu_to_le64(last_block);
for (i=odisks; i<d ; i++) {
- bsb.devstart = offsets[i];
+ bsb.devstart = __cpu_to_le64(offsets[i]);
bsb.sb_csum = bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb));
lseek64(fdlist[i], (offsets[i]+last_block)<<9, 0);
write(fdlist[i], &bsb, sizeof(bsb));
@@ -769,6 +774,7 @@ int Grow_reshape(char *devname, int fd, int quiet,
free(fdlist);
free(offsets);
+ printf("mdadm: ... critical section passed.\n");
break;
}
return 0;
@@ -785,3 +791,114 @@ int Grow_reshape(char *devname, int fd, int quiet,
return 1;
}
+
+/*
+ * If any spare contains md_back_data-1 which is recent wrt mtime,
+ * write that data into the array and update the super blocks with
+ * the new reshape_progress
+ */
+int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt)
+{
+ int i, j;
+ int old_disks;
+ int err = 0;
+ unsigned long long *offsets;
+
+ if (info->delta_disks < 0)
+ return 1; /* cannot handle a shrink */
+ if (info->new_level != info->array.level ||
+ info->new_layout != info->array.layout ||
+ info->new_chunk != info->array.chunk_size)
+ return 1; /* Can only handle change in disks */
+
+ old_disks = info->array.raid_disks - info->delta_disks;
+
+ for (i=old_disks; i<cnt; i++) {
+ void *super = NULL;
+ struct mdinfo dinfo;
+ struct mddev_ident_s id;
+ struct mdp_backup_super bsb;
+
+ /* This was a spare and may have some saved data on it.
+ * Load the superblock, find and load the
+ * backup_super_block.
+ * If either fail, go on to next device.
+ * If the backup contains no new info, just return
+ * Else retore data and update all superblocks
+ */
+ if (fdlist[i] < 0)
+ continue;
+ if (st->ss->load_super(st, fdlist[i], &super, NULL))
+ continue;
+
+ st->ss->getinfo_super(&dinfo, &id, super);
+ free(super); super = NULL;
+ if (lseek64(fdlist[i],
+ (dinfo.data_offset + dinfo.component_size - 8) <<9,
+ 0) < 0)
+ continue; /* Cannot seek */
+ if (read(fdlist[i], &bsb, sizeof(bsb)) != sizeof(bsb))
+ continue; /* Cannot read */
+ if (memcmp(bsb.magic, "md_backup_data-1", 16) != 0)
+ continue;
+ if (bsb.sb_csum != bsb_csum((char*)&bsb, ((char*)&bsb.sb_csum)-((char*)&bsb)))
+ continue; /* bad checksum */
+ if (memcmp(bsb.set_uuid,info->uuid, 16) != 0)
+ continue; /* Wrong uuid */
+
+ if (info->array.utime > __le64_to_cpu(bsb.mtime) + 3600 ||
+ info->array.utime < __le64_to_cpu(bsb.mtime))
+ continue; /* time stamp is too bad */
+
+ if (__le64_to_cpu(bsb.arraystart) != 0)
+ continue; /* Can only handle backup from start of array */
+ if (__le64_to_cpu(bsb.length) <
+ info->reshape_progress)
+ continue; /* No new data here */
+
+ if (lseek64(fdlist[i], __le64_to_cpu(bsb.devstart)*512, 0)< 0)
+ continue; /* Cannot seek */
+
+ /* Now need the data offsets for all devices. */
+ offsets = malloc(sizeof(*offsets)*info->array.raid_disks);
+ for(j=0; j<info->array.raid_disks; j++) {
+ if (fdlist[j] < 0)
+ continue;
+ if (st->ss->load_super(st, fdlist[j], &super, NULL))
+ /* FIXME should be this be an error */
+ continue;
+ st->ss->getinfo_super(&dinfo, &id, super);
+ free(super); super = NULL;
+ offsets[j] = dinfo.data_offset;
+ }
+ printf(Name ": restoring critical section\n");
+
+ if (restore_stripes(fdlist, offsets,
+ info->array.raid_disks,
+ info->new_chunk,
+ info->new_level,
+ info->new_layout,
+ fdlist[i], __le64_to_cpu(bsb.devstart)*512,
+ 0, __le64_to_cpu(bsb.length)*512)) {
+ /* didn't succeed, so giveup */
+ return 0;
+ }
+
+ /* Ok, so the data is restored. Let's update those superblocks. */
+
+ for (j=0; j<info->array.raid_disks; j++) {
+ if (fdlist[j] < 0) continue;
+ if (st->ss->load_super(st, fdlist[j], &super, NULL))
+ continue;
+ st->ss->getinfo_super(&dinfo, &id, super);
+ dinfo.reshape_progress = __le64_to_cpu(bsb.length);
+ st->ss->update_super(&dinfo, super, "_reshape_progress",NULL,0);
+ st->ss->store_super(st, fdlist[j], super);
+ free(super);
+ }
+
+ /* And we are done! */
+ return 0;
+ }
+ return err;
+}
diff --git a/mdadm.h b/mdadm.h
index 61d0469..00c280b 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -91,6 +91,11 @@ struct mdinfo {
mdu_disk_info_t disk;
__u64 events;
int uuid[4];
+ unsigned long long data_offset;
+ unsigned long long component_size;
+ int reshape_active;
+ unsigned long long reshape_progress;
+ int new_level, delta_disks, new_layout, new_chunk;
};
#define Name "mdadm"
@@ -225,6 +230,10 @@ extern int save_stripes(int *source, unsigned long long *offsets,
int raid_disks, int chunk_size, int level, int layout,
int nwrites, int *dest,
unsigned long long start, unsigned long long length);
+extern int restore_stripes(int *dest, unsigned long long *offsets,
+ int raid_disks, int chunk_size, int level, int layout,
+ int source, unsigned long long read_offset,
+ unsigned long long start, unsigned long long length);
#ifndef Sendmail
#define Sendmail "/usr/lib/sendmail -t"
@@ -302,6 +311,7 @@ extern int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int dela
extern int Grow_reshape(char *devname, int fd, int quiet,
long long size,
int level, int layout, int chunksize, int raid_disks);
+extern int Grow_restart(struct supertype *st, struct mdinfo *info, int *fdlist, int cnt);
extern int Assemble(struct supertype *st, char *mddev, int mdfd,
diff --git a/restripe.c b/restripe.c
index 94a0e3d..b7b3636 100644
--- a/restripe.c
+++ b/restripe.c
@@ -162,7 +162,7 @@ int save_stripes(int *source, unsigned long long *offsets,
/* Restore data:
* We are given:
* A list of 'fds' of the active disks. Some may be '-1' for not-available.
- * A geometry: raid_disks, chunk_sisze, level, layout
+ * A geometry: raid_disks, chunk_size, level, layout
* An 'fd' to read from. It is already seeked to the right (Read) location.
* A start and length.
* The length must be a multiple of the stripe size.
@@ -172,7 +172,7 @@ int save_stripes(int *source, unsigned long long *offsets,
*/
int restore_stripes(int *dest, unsigned long long *offsets,
int raid_disks, int chunk_size, int level, int layout,
- int source,
+ int source, unsigned long long read_offset,
unsigned long long start, unsigned long long length)
{
char *stripe_buf = malloc(raid_disks * chunk_size);
@@ -199,8 +199,11 @@ int restore_stripes(int *dest, unsigned long long *offsets,
int disk = geo_map(i, start/chunk_size/data_disks,
raid_disks, level, layout);
blocks[i] = stripes[disk];
+ if (lseek64(source, read_offset, 0) != read_offset)
+ return -1;
if (read(source, stripes[disk], chunk_size) != chunk_size)
return -1;
+ read_offset += chunk_size;
}
/* We have the data, now do the parity */
offset = (start/chunk_size/data_disks) * chunk_size;
@@ -311,7 +314,7 @@ main(int argc, char *argv[])
} else {
int rv = restore_stripes(fds, offsets,
raid_disks, chunk_size, level, layout,
- storefd,
+ storefd, 0ULL,
start, length);
if (rv != 0) {
fprintf(stderr, "test_stripe: restore_stripes returned %d\n", rv);
diff --git a/super0.c b/super0.c
index 52be23d..99d09a4 100644
--- a/super0.c
+++ b/super0.c
@@ -80,7 +80,7 @@ void super0_swap_endian(struct mdp_superblock_s *sb)
sb->cp_events_hi = sb->cp_events_lo;
sb->cp_events_lo = t32;
-}
+}
#ifndef MDASSEMBLE
@@ -182,7 +182,7 @@ static void examine_super0(void *sbv)
case -1:
printf(" Rounding : %dK\n", sb->chunk_size/1024);
break;
- default: break;
+ default: break;
}
printf("\n");
printf(" Number Major Minor RaidDevice State\n");
@@ -279,6 +279,9 @@ static void getinfo_super0(struct mdinfo *info, mddev_ident_t ident, void *sbv)
info->array.layout = sb->layout;
info->array.md_minor = sb->md_minor;
info->array.ctime = sb->ctime;
+ info->array.utime = sb->utime;
+ info->array.chunk_size = sb->chunk_size;
+ info->component_size = sb->size*2;
info->disk.state = sb->this_disk.state;
info->disk.major = sb->this_disk.major;
@@ -287,9 +290,20 @@ static void getinfo_super0(struct mdinfo *info, mddev_ident_t ident, void *sbv)
info->disk.number = sb->this_disk.number;
info->events = md_event(sb);
+ info->data_offset = 0;
uuid_from_super0(info->uuid, sbv);
+ if (sb->minor_version > 90 && (sb->reshape_position+1) != 0) {
+ info->reshape_active = 1;
+ info->reshape_progress = sb->reshape_position;
+ info->new_level = sb->new_level;
+ info->delta_disks = sb->delta_disks;
+ info->new_layout = sb->new_layout;
+ info->new_chunk = sb->new_chunk;
+ } else
+ info->reshape_active = 0;
+
ident->name[0] = 0;
/* work_disks is calculated rather than read directly */
for (i=0; i < MD_SB_DISKS; i++)
@@ -403,6 +417,8 @@ static int update_super0(struct mdinfo *info, void *sbv, char *update, char *dev
sb->set_uuid2 = info->uuid[2];
sb->set_uuid3 = info->uuid[3];
}
+ if (strcmp(update, "_reshape_progress")==0)
+ sb->reshape_position = info->reshape_progress;
sb->sb_csum = calc_sb0_csum(sb);
return rv;
@@ -481,7 +497,7 @@ static void add_to_super0(void *sbv, mdu_disk_info_t *dinfo)
{
mdp_super_t *sb = sbv;
mdp_disk_t *dk = &sb->disks[dinfo->number];
-
+
dk->number = dinfo->number;
dk->major = dinfo->major;
dk->minor = dinfo->minor;
@@ -508,7 +524,7 @@ static int store_super0(struct supertype *st, int fd, void *sbv)
if (dsize < MD_RESERVED_SECTORS*2*512)
return 2;
-
+
offset = MD_NEW_SIZE_SECTORS(dsize>>9);
offset *= 512;
@@ -622,7 +638,7 @@ static int load_super0(struct supertype *st, int fd, void **sbp, char *devname)
devname, size);
return 1;
}
-
+
offset = MD_NEW_SIZE_SECTORS(dsize>>9);
offset *= 512;
@@ -717,7 +733,7 @@ static int add_internal_bitmap0(struct supertype *st, void *sbv, int chunk, int
mdp_super_t *sb = sbv;
bitmap_super_t *bms = (bitmap_super_t*)(((char*)sb) + MD_SB_BYTES);
-
+
min_chunk = 4096; /* sub-page chunks don't work yet.. */
bits = (size * 512)/ min_chunk +1;
while (bits > max_bits) {
@@ -744,7 +760,7 @@ static int add_internal_bitmap0(struct supertype *st, void *sbv, int chunk, int
return 1;
}
-
+
void locate_bitmap0(struct supertype *st, int fd, void *sbv)
{
@@ -763,7 +779,7 @@ void locate_bitmap0(struct supertype *st, int fd, void *sbv)
if (dsize < MD_RESERVED_SECTORS*2)
return;
-
+
offset = MD_NEW_SIZE_SECTORS(dsize>>9);
offset *= 512;
@@ -796,8 +812,8 @@ int write_bitmap0(struct supertype *st, int fd, void *sbv)
}
if (dsize < MD_RESERVED_SECTORS*2)
- return -1;
-
+ return -1;
+
offset = MD_NEW_SIZE_SECTORS(dsize>>9);
offset *= 512;
diff --git a/super1.c b/super1.c
index dd852e5..4df6370 100644
--- a/super1.c
+++ b/super1.c
@@ -368,6 +368,11 @@ static void getinfo_super1(struct mdinfo *info, mddev_ident_t ident, void *sbv)
info->array.layout = __le32_to_cpu(sb->layout);
info->array.md_minor = -1;
info->array.ctime = __le64_to_cpu(sb->ctime);
+ info->array.utime = __le64_to_cpu(sb->utime);
+ info->array.chunk_size = __le32_to_cpu(sb->chunksize)/512;
+
+ info->data_offset = __le64_to_cpu(sb->data_offset);
+ info->component_size = __le64_to_cpu(sb->size);
info->disk.major = 0;
info->disk.minor = 0;
@@ -397,6 +402,16 @@ static void getinfo_super1(struct mdinfo *info, mddev_ident_t ident, void *sbv)
strncpy(ident->name, sb->set_name, 32);
ident->name[32] = 0;
+ if (sb->feature_map & __le32_to_cpu(MD_FEATURE_RESHAPE_ACTIVE)) {
+ info->reshape_active = 1;
+ info->reshape_progress = __le64_to_cpu(sb->reshape_position);
+ info->new_level = __le32_to_cpu(sb->new_level);
+ info->delta_disks = __le32_to_cpu(sb->delta_disks);
+ info->new_layout = __le32_to_cpu(sb->new_layout);
+ info->new_chunk = __le32_to_cpu(sb->new_chunk);
+ } else
+ info->reshape_active = 0;
+
for (i=0; i< __le32_to_cpu(sb->max_dev); i++) {
role = __le16_to_cpu(sb->dev_roles[i]);
if (/*role == 0xFFFF || */role < info->array.raid_disks)
@@ -453,6 +468,8 @@ static int update_super1(struct mdinfo *info, void *sbv, char *update, char *dev
}
if (strcmp(update, "uuid") == 0)
memcpy(sb->set_uuid, info->uuid, 16);
+ if (strcmp(update, "_reshape_progress")==0)
+ sb->reshape_position = __cpu_to_le64(info->reshape_progress);
sb->sb_csum = calc_sb_1_csum(sb);
return rv;
diff --git a/sysfs.c b/sysfs.c
index 9894760..1774509 100644
--- a/sysfs.c
+++ b/sysfs.c
@@ -98,6 +98,8 @@ struct sysarray *sysfs_read(int fd, int devnum, unsigned long options)
if (load_sys(fname, buf))
goto abort;
sra->component_size = strtoull(buf, NULL, 0);
+ /* sysfs reports "K", but we want sectors */
+ sra->component_size *= 2;
}
if (options & GET_CHUNK) {
strcpy(base, "chunk_size");
@@ -192,6 +194,8 @@ unsigned long long get_component_size(int fd)
* We cannot trust GET_ARRAY_INFO ioctl as it's
* size field is only 32bits.
* So look in /sys/block/mdXXX/md/component_size
+ *
+ * WARNING: this returns in units of Kilobytes.
*/
struct stat stb;
char fname[50];