summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Assemble.c27
-rw-r--r--Detail.c6
-rw-r--r--Manage.c4
-rw-r--r--Monitor.c26
-rw-r--r--ReadMe.c2
-rw-r--r--managemon.c6
-rw-r--r--mdadm.814
-rw-r--r--mdadm.c23
-rw-r--r--mdadm.h10
-rw-r--r--mdmon.c109
-rw-r--r--mdopen.c8
-rw-r--r--msg.c14
-rw-r--r--msg.h1
-rw-r--r--super-ddf.c27
-rw-r--r--super-intel.c805
-rw-r--r--sysfs.c5
-rw-r--r--tests/09imsm-assemble46
-rw-r--r--tests/10ddf-create8
-rw-r--r--tests/env-09imsm-assemble32
-rw-r--r--util.c12
20 files changed, 869 insertions, 316 deletions
diff --git a/Assemble.c b/Assemble.c
index 28de83e..a52dc50 100644
--- a/Assemble.c
+++ b/Assemble.c
@@ -315,6 +315,9 @@ int Assemble(struct supertype *st, char *mddev,
}
/* It is worth looking inside this container.
*/
+ if (verbose > 0)
+ fprintf(stderr, Name ": looking in container %s\n",
+ devname);
next_member:
if (tmpdev->content)
content = tmpdev->content;
@@ -405,6 +408,9 @@ int Assemble(struct supertype *st, char *mddev,
fprintf(stderr, Name ": member %s in %s is already assembled\n",
content->text_version,
devname);
+ skip:
+ if (tmpdev->content)
+ goto next_member;
tst->ss->free_super(tst);
tst = NULL;
content = NULL;
@@ -412,6 +418,21 @@ int Assemble(struct supertype *st, char *mddev,
goto loop;
return 1;
}
+ if (ident->member && ident->member[0]) {
+ char *s = strchr(content->text_version+1, '/');
+ if (s == NULL) {
+ fprintf(stderr, Name ": badly formatted version: %s\n",
+ content->text_version);
+ goto skip;
+ }
+ if (strcmp(ident->member, s+1) != 0) {
+ if (report_missmatch)
+ fprintf(stderr,
+ Name ": skipping wrong member %s\n",
+ content->text_version);
+ goto skip;
+ }
+ }
st = tst; tst = NULL;
if (!auto_assem && tmpdev->next != NULL) {
fprintf(stderr, Name ": %s is a container, but is not "
@@ -420,6 +441,9 @@ int Assemble(struct supertype *st, char *mddev,
st->ss->free_super(st);
return 1;
}
+ if (verbose > 0)
+ fprintf(stderr, Name ": found match on member %s in %s\n",
+ content->text_version, devname);
break;
}
if (st == NULL)
@@ -565,6 +589,7 @@ int Assemble(struct supertype *st, char *mddev,
#endif
/* Ok, no bad inconsistancy, we can try updating etc */
bitmap_done = 0;
+ content->update_private = NULL;
for (tmpdev = devlist; tmpdev; tmpdev=tmpdev->next) if (tmpdev->used == 1) {
char *devname = tmpdev->devname;
struct stat stb;
@@ -717,6 +742,8 @@ int Assemble(struct supertype *st, char *mddev,
}
devcnt++;
}
+ free(content->update_private);
+ content->update_private = NULL;
if (devcnt == 0) {
fprintf(stderr, Name ": no devices found for %s\n",
diff --git a/Detail.c b/Detail.c
index 55d5481..e2cf028 100644
--- a/Detail.c
+++ b/Detail.c
@@ -194,7 +194,12 @@ int Detail(char *dev, int brief, int export, int test, char *homehost)
st->ss->export_detail_super(st);
} else {
struct map_ent *mp, *map = NULL;
+ char nbuf[64];
mp = map_by_devnum(&map, fd2devnum(fd));
+ if (mp) {
+ __fname_from_uuid(mp->uuid, 0, nbuf, ':');
+ printf("MD_UUID=%s\n", nbuf+5);
+ }
if (mp && mp->path &&
strncmp(mp->path, "/dev/md/", 8) == 0)
printf("MD_DEVNAME=%s\n", mp->path+8);
@@ -540,6 +545,7 @@ This is pretty boring
1, avail, avail_disks))
rv = 2;
+ free(disks);
out:
close(fd);
return rv;
diff --git a/Manage.c b/Manage.c
index 9217139..6b8cff4 100644
--- a/Manage.c
+++ b/Manage.c
@@ -140,7 +140,7 @@ static void remove_devices(int devnum, char *path)
strcpy(path2, path);
pe = path2 + strlen(path2);
} else
- path = NULL;
+ path2 = path = NULL;
for (part = 0; part < 16; part++) {
if (part) {
@@ -161,6 +161,7 @@ static void remove_devices(int devnum, char *path)
unlink(path2);
}
}
+ free(path2);
}
@@ -649,6 +650,7 @@ int Manage_subdevs(char *devname, int fd,
disc.state |= (1<<MD_DISK_SYNC);
break;
}
+ free(used);
}
if (dv->writemostly == 1)
disc.state |= (1 << MD_DISK_WRITEMOSTLY);
diff --git a/Monitor.c b/Monitor.c
index af486d7..b0802f8 100644
--- a/Monitor.c
+++ b/Monitor.c
@@ -33,14 +33,6 @@
static void alert(char *event, char *dev, char *disc, char *mailaddr, char *mailfrom,
char *cmd, int dosyslog);
-static char *percentalerts[] = {
- "RebuildStarted",
- "Rebuild20",
- "Rebuild40",
- "Rebuild60",
- "Rebuild80",
-};
-
/* The largest number of disks current arrays can manage is 384
* This really should be dynamically, but that will have to wait
* At least it isn't MD_SB_DISKS.
@@ -49,7 +41,7 @@ static char *percentalerts[] = {
int Monitor(mddev_dev_t devlist,
char *mailaddr, char *alert_cmd,
int period, int daemonise, int scan, int oneshot,
- int dosyslog, int test, char* pidfile)
+ int dosyslog, int test, char* pidfile, int increments)
{
/*
* Every few seconds, scan every md device looking for changes
@@ -77,8 +69,8 @@ int Monitor(mddev_dev_t devlist,
* An active device had a reverse transition
* RebuildStarted
* percent went from -1 to +ve
- * Rebuild20 Rebuild40 Rebuild60 Rebuild80
- * percent went from below to not-below that number
+ * RebuildNN
+ * percent went from below to not-below NN%
* DeviceDisappeared
* Couldn't access a device which was previously visible
*
@@ -311,9 +303,17 @@ int Monitor(mddev_dev_t devlist,
if (mse &&
st->percent >= 0 &&
mse->percent >= 0 &&
- (mse->percent / 20) > (st->percent / 20))
- alert(percentalerts[mse->percent/20],
+ (mse->percent / increments) > (st->percent / increments)) {
+ char percentalert[15]; // "RebuildNN" (10 chars) or "RebuildStarted" (15 chars)
+
+ if((mse->percent / increments) == 0)
+ snprintf(percentalert, sizeof(percentalert), "RebuildStarted");
+ else
+ snprintf(percentalert, sizeof(percentalert), "Rebuild%02d", mse->percent);
+
+ alert(percentalert,
dev, NULL, mailaddr, mailfrom, alert_cmd, dosyslog);
+ }
if (mse &&
mse->percent == -1 &&
diff --git a/ReadMe.c b/ReadMe.c
index 0a50acb..f5f15c7 100644
--- a/ReadMe.c
+++ b/ReadMe.c
@@ -176,6 +176,7 @@ struct option long_options[] = {
{"mail", 1, 0, 'm'},
{"program", 1, 0, 'p'},
{"alert", 1, 0, 'p'},
+ {"increment", 1, 0, 'r'},
{"delay", 1, 0, 'd'},
{"daemonise", 0, 0, 'f'},
{"daemonize", 0, 0, 'f'},
@@ -496,6 +497,7 @@ char Help_monitor[] =
" --mail= -m : Address to mail alerts of failure to\n"
" --program= -p : Program to run when an event is detected\n"
" --alert= : same as --program\n"
+" --increment= -r : Report RebuildNN events in the given increment. default=20\n"
" --delay= -d : seconds of delay between polling state. default=60\n"
" --config= -c : specify a different config file\n"
" --scan -s : find mail-address/program in config file\n"
diff --git a/managemon.c b/managemon.c
index f9d545d..5958e18 100644
--- a/managemon.c
+++ b/managemon.c
@@ -680,6 +680,12 @@ void do_manager(struct supertype *container)
read_sock(container);
if (container->sock < 0 || socket_hup_requested) {
+ /* If this fails, we hope it already exists
+ * pid file lives in /var/run/mdadm/mdXX.pid
+ */
+ mkdir("/var", 0600);
+ mkdir("/var/run", 0600);
+ mkdir("/var/run/mdadm", 0600);
close(container->sock);
container->sock = make_control_sock(container->devname);
make_pidfile(container->devname, 0);
diff --git a/mdadm.8 b/mdadm.8
index 8022014..ab02558 100644
--- a/mdadm.8
+++ b/mdadm.8
@@ -309,7 +309,7 @@ says to get a list of array devices from
.BR /proc/mdstat .
.TP
-.B \-e ", " \-\-metadata=
+.BR \-e ", " \-\-metadata=
Declare the style of RAID metadata (superblock) to be used. The
default is 0.90 for
.BR \-\-create ,
@@ -1262,6 +1262,12 @@ reduce this as the kernel alerts
immediately when there is any change.
.TP
+.BR \-r ", " \-\-increment
+Give a percentage increment.
+.I mdadm
+will generate RebuildNN events with the given percentage increment.
+
+.TP
.BR \-f ", " \-\-daemonise
Tell
.I mdadm
@@ -1859,8 +1865,10 @@ An md array started reconstruction. (syslog priority: Warning)
.BI Rebuild NN
Where
.I NN
-is 20, 40, 60, or 80, this indicates that rebuild has passed that many
-percentage of the total. (syslog priority: Warning)
+is a two-digit number (ie. 05, 48). This indicates that rebuild
+has passed that many percent of the total. The events are generated
+with fixed increment since 0. Increment size may be specified with
+a commandline option (default is 20). (syslog priority: Warning)
.TP
.B RebuildFinished
diff --git a/mdadm.c b/mdadm.c
index a4f2d90..0c51d63 100644
--- a/mdadm.c
+++ b/mdadm.c
@@ -91,6 +91,7 @@ int main(int argc, char *argv[])
int require_homehost = 1;
char *mailaddr = NULL;
char *program = NULL;
+ int increments = 20;
int delay = 0;
int daemonise = 0;
char *pidfile = NULL;
@@ -714,6 +715,14 @@ int main(int argc, char *argv[])
program = optarg;
continue;
+ case O(MONITOR,'r'): /* rebuild increments */
+ increments = atoi(optarg);
+ if (increments>99 || increments<1) {
+ fprintf(stderr, Name ": please specify positive integer between 1 and 99 as rebuild increments.\n");
+ exit(2);
+ }
+ continue;
+
case O(MONITOR,'d'): /* delay in seconds */
case O(GROW, 'd'):
case O(BUILD,'d'): /* delay for bitmap updates */
@@ -1270,11 +1279,18 @@ int main(int argc, char *argv[])
struct mdstat_ent *ms = mdstat_read(0, 1);
struct mdstat_ent *e;
struct map_ent *map = NULL;
+ int members;
int v = verbose>1?0:verbose+1;
+ for (members = 0; members <= 1; members++) {
for (e=ms ; e ; e=e->next) {
char *name;
struct map_ent *me;
+ int member = e->metadata_version &&
+ strncmp(e->metadata_version,
+ "external:/", 10) == 0;
+ if (members != member)
+ continue;
me = map_by_devnum(&map, e->devnum);
if (me && me->path
&& strcmp(me->path, "/unknown") != 0)
@@ -1292,9 +1308,10 @@ int main(int argc, char *argv[])
export, test,
homehost);
else
- rv |= WaitClean(name, v);
+ rv |= WaitClean(name, -1, v);
put_md_name(name);
}
+ }
free_mdstat(ms);
} else if (devmode == 'S' && scan) {
/* apply --stop to all devices in /proc/mdstat */
@@ -1353,7 +1370,7 @@ int main(int argc, char *argv[])
case 'W':
rv |= Wait(dv->devname); continue;
case Waitclean:
- rv |= WaitClean(dv->devname, verbose-quiet); continue;
+ rv |= WaitClean(dv->devname, -1, verbose-quiet); continue;
}
mdfd = open_mddev(dv->devname, 1);
if (mdfd>=0) {
@@ -1393,7 +1410,7 @@ int main(int argc, char *argv[])
}
rv= Monitor(devlist, mailaddr, program,
delay?delay:60, daemonise, scan, oneshot,
- dosyslog, test, pidfile);
+ dosyslog, test, pidfile, increments);
break;
case GROW:
diff --git a/mdadm.h b/mdadm.h
index 2b8370c..261cdb7 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -153,6 +153,11 @@ struct mdinfo {
int cache_size; /* size of raid456 stripe cache*/
int mismatch_cnt;
char text_version[50];
+ void *update_private; /* for passing metadata-format
+ * specific update data
+ * between successive calls to
+ * update_super()
+ */
int container_member; /* for assembling external-metatdata arrays
* This is to be used internally by metadata
@@ -749,11 +754,11 @@ extern int Examine(mddev_dev_t devlist, int brief, int export, int scan,
extern int Monitor(mddev_dev_t devlist,
char *mailaddr, char *alert_cmd,
int period, int daemonise, int scan, int oneshot,
- int dosyslog, int test, char *pidfile);
+ int dosyslog, int test, char *pidfile, int increments);
extern int Kill(char *dev, int force, int quiet, int noexcl);
extern int Wait(char *dev);
-extern int WaitClean(char *dev, int verbose);
+extern int WaitClean(char *dev, int sock, int verbose);
extern int Incremental(char *devname, int verbose, int runstop,
struct supertype *st, char *homehost, int require_homehost,
@@ -813,6 +818,7 @@ extern void uuid_from_super(int uuid[4], mdp_super_t *super);
extern const int uuid_match_any[4];
extern int same_uuid(int a[4], int b[4], int swapuuid);
extern void copy_uuid(void *a, int b[4], int swapuuid);
+extern char *__fname_from_uuid(int id[4], int swap, char *buf, char sep);
extern char *fname_from_uuid(struct supertype *st,
struct mdinfo *info, char *buf, char sep);
extern unsigned long calc_csum(void *super, int bytes);
diff --git a/mdmon.c b/mdmon.c
index 31994d8..0ec4259 100644
--- a/mdmon.c
+++ b/mdmon.c
@@ -113,6 +113,14 @@ static struct superswitch *find_metadata_methods(char *vers)
return NULL;
}
+static int test_pidfile(char *devname)
+{
+ char path[100];
+ struct stat st;
+
+ sprintf(path, "/var/run/mdadm/%s.pid", devname);
+ return stat(path, &st);
+}
int make_pidfile(char *devname, int o_excl)
{
@@ -149,26 +157,29 @@ int is_container_member(struct mdstat_ent *mdstat, char *container)
return 1;
}
-void remove_pidfile(char *devname);
-static void try_kill_monitor(char *devname)
+pid_t devname2mdmon(char *devname)
{
char buf[100];
+ pid_t pid = -1;
int fd;
- pid_t pid;
- struct mdstat_ent *mdstat;
sprintf(buf, "/var/run/mdadm/%s.pid", devname);
- fd = open(buf, O_RDONLY);
+ fd = open(buf, O_RDONLY|O_NOATIME);
if (fd < 0)
- return;
-
- if (read(fd, buf, sizeof(buf)) < 0) {
- close(fd);
- return;
- }
+ return -1;
+ if (read(fd, buf, sizeof(buf)) > 0)
+ sscanf(buf, "%d\n", &pid);
close(fd);
- pid = strtoul(buf, NULL, 10);
+
+ return pid;
+}
+
+static void try_kill_monitor(pid_t pid, char *devname, int sock)
+{
+ char buf[100];
+ int fd;
+ struct mdstat_ent *mdstat;
/* first rule of survival... don't off yourself */
if (pid == getpid())
@@ -194,10 +205,9 @@ static void try_kill_monitor(char *devname)
for ( ; mdstat; mdstat = mdstat->next)
if (is_container_member(mdstat, devname)) {
sprintf(buf, "/dev/%s", mdstat->dev);
- WaitClean(buf, 0);
+ WaitClean(buf, sock, 0);
}
free_mdstat(mdstat);
- remove_pidfile(devname);
}
void remove_pidfile(char *devname)
@@ -355,9 +365,34 @@ int mdmon(char *devname, int devnum, int scan, char *switchroot)
int pfd[2];
int status;
int ignore;
+ pid_t victim = -1;
+ int victim_sock = -1;
dprintf("starting mdmon for %s in %s\n",
devname, switchroot ? : "/");
+
+ /* try to spawn mdmon instances from the target file system */
+ if (switchroot && strcmp(switchroot, "/") != 0) {
+ char path[1024];
+ pid_t pid;
+
+ sprintf(path, "%s/sbin/mdmon", switchroot);
+ switch (fork()) {
+ case 0:
+ execl(path, "mdmon", devname, NULL);
+ exit(1);
+ case -1:
+ return 1;
+ default:
+ pid = wait(&status);
+ if (pid > -1 && WIFEXITED(status) &&
+ WEXITSTATUS(status) == 0)
+ return 0;
+ else
+ return 1;
+ }
+ }
+
mdfd = open_dev(devnum);
if (mdfd < 0) {
fprintf(stderr, "mdmon: %s: %s\n", devname,
@@ -400,6 +435,7 @@ int mdmon(char *devname, int devnum, int scan, char *switchroot)
container->devname = devname;
container->arrays = NULL;
container->subarray[0] = 0;
+ container->sock = -1;
if (!container->devname) {
fprintf(stderr, "mdmon: failed to allocate container name string\n");
@@ -464,12 +500,10 @@ int mdmon(char *devname, int devnum, int scan, char *switchroot)
if (switchroot) {
/* we assume we assume that /sys /proc /dev are available in
- * the new root (see nash:setuproot)
- *
- * kill any monitors in the current namespace and change
- * to the new one
+ * the new root
*/
- try_kill_monitor(container->devname);
+ victim = devname2mdmon(container->devname);
+ victim_sock = connect_monitor(container->devname);
if (chroot(switchroot) != 0) {
fprintf(stderr, "mdmon: failed to chroot to '%s': %s\n",
switchroot, strerror(errno));
@@ -477,40 +511,15 @@ int mdmon(char *devname, int devnum, int scan, char *switchroot)
}
}
- /* If this fails, we hope it already exists
- * pid file lives in /var/run/mdadm/mdXX.pid
- */
- mkdir("/var", 0600);
- mkdir("/var/run", 0600);
- mkdir("/var/run/mdadm", 0600);
ignore = chdir("/");
- if (make_pidfile(container->devname, O_EXCL) < 0) {
+ if (victim < 0 && test_pidfile(container->devname) == 0) {
if (ping_monitor(container->devname) == 0) {
fprintf(stderr, "mdmon: %s already managed\n",
container->devname);
exit(3);
- } else {
- int err;
-
- /* cleanup the old monitor, this one is taking over */
- try_kill_monitor(container->devname);
- err = make_pidfile(container->devname, 0);
- if (err < 0) {
- fprintf(stderr, "mdmon: %s Cannot create pidfile\n",
- container->devname);
- if (err == -EROFS) {
- /* FIXME implement a mechanism to
- * prevent duplicate monitor instances
- */
- fprintf(stderr,
- "mdmon: continuing on read-only file system\n");
- } else
- exit(3);
- }
- }
+ } else if (victim < 0)
+ victim = devname2mdmon(container->devname);
}
- container->sock = make_control_sock(container->devname);
-
if (container->ss->load_super(container, mdfd, devname)) {
fprintf(stderr, "mdmon: Cannot load metadata for %s\n",
devname);
@@ -536,7 +545,7 @@ int mdmon(char *devname, int devnum, int scan, char *switchroot)
ignore = dup(0);
#endif
- mlockall(MCL_FUTURE);
+ mlockall(MCL_CURRENT | MCL_FUTURE);
if (clone_monitor(container) < 0) {
fprintf(stderr, "mdmon: failed to start monitor process: %s\n",
@@ -544,6 +553,10 @@ int mdmon(char *devname, int devnum, int scan, char *switchroot)
exit(2);
}
+ if (victim > -1) {
+ try_kill_monitor(victim, container->devname, victim_sock);
+ close(victim_sock);
+ }
do_manager(container);
exit(0);
diff --git a/mdopen.c b/mdopen.c
index d322cf4..21baf5d 100644
--- a/mdopen.c
+++ b/mdopen.c
@@ -43,7 +43,7 @@ void make_parts(char *dev, int cnt)
int odig = odig; /* quiet gcc -Os unitialized warning */
int i;
int nlen = strlen(dev) + 20;
- char *name = malloc(nlen);
+ char *name;
int dig = isdigit(dev[strlen(dev)-1]);
char orig[1024];
char sym[1024];
@@ -52,6 +52,7 @@ void make_parts(char *dev, int cnt)
if (cnt==0) cnt=4;
if (lstat(dev, &stb)!= 0)
return;
+
if (S_ISLNK(stb.st_mode)) {
int len = readlink(dev, orig, sizeof(orig));
if (len < 0 || len > 1000)
@@ -63,6 +64,7 @@ void make_parts(char *dev, int cnt)
minor_num = minor(stb.st_rdev);
} else
return;
+ name = malloc(nlen);
for (i=1; i <= cnt ; i++) {
struct stat stb2;
snprintf(name, nlen, "%s%s%d", dev, dig?"p":"", i);
@@ -92,6 +94,7 @@ void make_parts(char *dev, int cnt)
if (err == 0 && stat(name, &stb2) == 0)
add_dev(name, &stb2, 0, NULL);
}
+ free(name);
}
@@ -156,7 +159,6 @@ int create_mddev(char *dev, char *name, int autof, int trustworthy,
if (dev) {
-
if (strncmp(dev, "/dev/md/", 8) == 0) {
strcpy(cname, dev+8);
} else if (strncmp(dev, "/dev/", 5) == 0) {
@@ -307,7 +309,7 @@ int create_mddev(char *dev, char *name, int autof, int trustworthy,
}
}
- if (dev)
+ if (dev && dev[0] == '/')
strcpy(chosen, dev);
else if (cname[0] == 0)
strcpy(chosen, devname);
diff --git a/msg.c b/msg.c
index 5a4839f..8d52b94 100644
--- a/msg.c
+++ b/msg.c
@@ -177,10 +177,8 @@ int connect_monitor(char *devname)
return sfd;
}
-/* give the monitor a chance to update the metadata */
-int ping_monitor(char *devname)
+int fping_monitor(int sfd)
{
- int sfd = connect_monitor(devname);
int err = 0;
if (sfd < 0)
@@ -194,6 +192,16 @@ int ping_monitor(char *devname)
if (!err && wait_reply(sfd, 20) != 0)
err = -1;
+ return err;
+}
+
+
+/* give the monitor a chance to update the metadata */
+int ping_monitor(char *devname)
+{
+ int sfd = connect_monitor(devname);
+ int err = fping_monitor(sfd);
+
close(sfd);
return err;
}
diff --git a/msg.h b/msg.h
index b9bd205..f8e89fd 100644
--- a/msg.h
+++ b/msg.h
@@ -27,6 +27,7 @@ extern int ack(int fd, int tmo);
extern int wait_reply(int fd, int tmo);
extern int connect_monitor(char *devname);
extern int ping_monitor(char *devname);
+extern int fping_monitor(int sock);
extern int ping_manager(char *devname);
#define MSG_MAX_LEN (4*1024*1024)
diff --git a/super-ddf.c b/super-ddf.c
index 9bf08c2..06858e2 100644
--- a/super-ddf.c
+++ b/super-ddf.c
@@ -1589,13 +1589,8 @@ static int init_super_ddf(struct supertype *st,
struct phys_disk *pd;
struct virtual_disk *vd;
- if (!info) {
- st->sb = NULL;
- return 0;
- }
if (st->sb)
- return init_super_ddf_bvd(st, info, size, name, homehost,
- uuid);
+ return init_super_ddf_bvd(st, info, size, name, homehost, uuid);
if (posix_memalign((void**)&ddf, 512, sizeof(*ddf)) != 0) {
fprintf(stderr, Name ": %s could not allocate superblock\n", __func__);
@@ -1604,6 +1599,12 @@ static int init_super_ddf(struct supertype *st,
memset(ddf, 0, sizeof(*ddf));
ddf->dlist = NULL; /* no physical disks yet */
ddf->conflist = NULL; /* No virtual disks yet */
+ st->sb = ddf;
+
+ if (info == NULL) {
+ /* zeroing superblock */
+ return 0;
+ }
/* At least 32MB *must* be reserved for the ddf. So let's just
* start 32MB from the end, and put the primary header there.
@@ -2971,12 +2972,22 @@ static struct mdinfo *container_content_ddf(struct supertype *st)
return rest;
}
-static int store_zero_ddf(struct supertype *st, int fd)
+static int store_super_ddf(struct supertype *st, int fd)
{
+ struct ddf_super *ddf = st->sb;
unsigned long long dsize;
void *buf;
int rc;
+ if (!ddf)
+ return 1;
+
+ /* ->dlist and ->conflist will be set for updates, currently not
+ * supported
+ */
+ if (ddf->dlist || ddf->conflist)
+ return 1;
+
if (!get_dev_size(fd, NULL, &dsize))
return 1;
@@ -3627,7 +3638,7 @@ struct superswitch super_ddf = {
.load_super = load_super_ddf,
.init_super = init_super_ddf,
- .store_super = store_zero_ddf,
+ .store_super = store_super_ddf,
.free_super = free_super_ddf,
.match_metadata_desc = match_metadata_desc_ddf,
.container_content = container_content_ddf,
diff --git a/super-intel.c b/super-intel.c
index 07b0b90..9a99d60 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -265,6 +265,14 @@ struct intel_super {
struct bbm_log *bbm_log;
const char *hba; /* device path of the raid controller for this metadata */
const struct imsm_orom *orom; /* platform firmware support */
+ struct intel_super *next; /* (temp) list for disambiguating family_num */
+};
+
+struct intel_disk {
+ struct imsm_disk disk;
+ #define IMSM_UNKNOWN_OWNER (-1)
+ int owner;
+ struct intel_disk *next;
};
struct extent {
@@ -611,6 +619,21 @@ static __u32 imsm_reserved_sectors(struct intel_super *super, struct dl *dl)
return rv;
}
+static int is_spare(struct imsm_disk *disk)
+{
+ return (disk->status & SPARE_DISK) == SPARE_DISK;
+}
+
+static int is_configured(struct imsm_disk *disk)
+{
+ return (disk->status & CONFIGURED_DISK) == CONFIGURED_DISK;
+}
+
+static int is_failed(struct imsm_disk *disk)
+{
+ return (disk->status & FAILED_DISK) == FAILED_DISK;
+}
+
#ifndef MDASSEMBLE
static void print_imsm_dev(struct imsm_dev *dev, char *uuid, int disk_idx)
{
@@ -676,7 +699,6 @@ static void print_imsm_disk(struct imsm_super *mpb, int index, __u32 reserved)
{
struct imsm_disk *disk = __get_imsm_disk(mpb, index);
char str[MAX_RAID_SERIAL_LEN + 1];
- __u32 s;
__u64 sz;
if (index < 0)
@@ -685,10 +707,9 @@ static void print_imsm_disk(struct imsm_super *mpb, int index, __u32 reserved)
printf("\n");
snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial);
printf(" Disk%02d Serial : %s\n", index, str);
- s = disk->status;
- printf(" State :%s%s%s\n", s&SPARE_DISK ? " spare" : "",
- s&CONFIGURED_DISK ? " active" : "",
- s&FAILED_DISK ? " failed" : "");
+ printf(" State :%s%s%s\n", is_spare(disk) ? " spare" : "",
+ is_configured(disk) ? " active" : "",
+ is_failed(disk) ? " failed" : "");
printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id));
sz = __le32_to_cpu(disk->total_blocks) - reserved;
printf(" Usable Size : %llu%s\n", (unsigned long long)sz,
@@ -1298,7 +1319,6 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info)
{
struct intel_super *super = st->sb;
struct imsm_disk *disk;
- __u32 s;
if (super->current_vol >= 0) {
getinfo_super_imsm_volume(st, info);
@@ -1334,14 +1354,13 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info)
disk = &super->disks->disk;
info->data_offset = __le32_to_cpu(disk->total_blocks) - reserved;
info->component_size = reserved;
- s = disk->status;
- info->disk.state = s & CONFIGURED_DISK ? (1 << MD_DISK_ACTIVE) : 0;
+ info->disk.state = is_configured(disk) ? (1 << MD_DISK_ACTIVE) : 0;
/* we don't change info->disk.raid_disk here because
* this state will be finalized in mdmon after we have
* found the 'most fresh' version of the metadata
*/
- info->disk.state |= s & FAILED_DISK ? (1 << MD_DISK_FAULTY) : 0;
- info->disk.state |= s & SPARE_DISK ? 0 : (1 << MD_DISK_SYNC);
+ info->disk.state |= is_failed(disk) ? (1 << MD_DISK_FAULTY) : 0;
+ info->disk.state |= is_spare(disk) ? 0 : (1 << MD_DISK_SYNC);
}
/* only call uuid_from_super_imsm when this disk is part of a populated container,
@@ -1359,8 +1378,6 @@ static int update_super_imsm(struct supertype *st, struct mdinfo *info,
char *update, char *devname, int verbose,
int uuid_set, char *homehost)
{
- /* FIXME */
-
/* For 'assemble' and 'force' we need to return non-zero if any
* change was made. For others, the return value is ignored.
* Update options are:
@@ -1376,26 +1393,55 @@ static int update_super_imsm(struct supertype *st, struct mdinfo *info,
* linear only
* resync: mark as dirty so a resync will happen.
* name: update the name - preserving the homehost
+ * uuid: Change the uuid of the array to match watch is given
*
* Following are not relevant for this imsm:
* sparc2.2 : update from old dodgey metadata
* super-minor: change the preferred_minor number
* summaries: update redundant counters.
- * uuid: Change the uuid of the array to match watch is given
* homehost: update the recorded homehost
* _reshape_progress: record new reshape_progress position.
*/
- int rv = 0;
- //struct intel_super *super = st->sb;
- //struct imsm_super *mpb = super->mpb;
+ int rv = 1;
+ struct intel_super *super = st->sb;
+ struct imsm_super *mpb;
- if (strcmp(update, "grow") == 0) {
- }
- if (strcmp(update, "resync") == 0) {
- /* dev->vol.dirty = 1; */
- }
+ /* we can only update container info */
+ if (!super || super->current_vol >= 0 || !super->anchor)
+ return 1;
+
+ mpb = super->anchor;
+
+ if (strcmp(update, "uuid") == 0 && uuid_set && !info->update_private)
+ fprintf(stderr,
+ Name ": '--uuid' not supported for imsm metadata\n");
+ else if (strcmp(update, "uuid") == 0 && uuid_set && info->update_private) {
+ mpb->orig_family_num = *((__u32 *) info->update_private);
+ rv = 0;
+ } else if (strcmp(update, "uuid") == 0) {
+ __u32 *new_family = malloc(sizeof(*new_family));
+
+ /* update orig_family_number with the incoming random
+ * data, report the new effective uuid, and store the
+ * new orig_family_num for future updates.
+ */
+ if (new_family) {
+ memcpy(&mpb->orig_family_num, info->uuid, sizeof(__u32));
+ uuid_from_super_imsm(st, info->uuid);
+ *new_family = mpb->orig_family_num;
+ info->update_private = new_family;
+ rv = 0;
+ }
+ } else if (strcmp(update, "assemble") == 0)
+ rv = 0;
+ else
+ fprintf(stderr,
+ Name ": '--update=%s' not supported for imsm metadata\n",
+ update);
- /* IMSM has no concept of UUID or homehost */
+ /* successful update? recompute checksum */
+ if (rv == 0)
+ mpb->check_sum = __le32_to_cpu(__gen_imsm_checksum(mpb));
return rv;
}
@@ -1458,19 +1504,33 @@ static int compare_super_imsm(struct supertype *st, struct supertype *tst)
return 0;
}
- if (memcmp(first->anchor->sig, sec->anchor->sig, MAX_SIGNATURE_LENGTH) != 0)
- return 3;
-
/* if an anchor does not have num_raid_devs set then it is a free
* floating spare
*/
if (first->anchor->num_raid_devs > 0 &&
sec->anchor->num_raid_devs > 0) {
- if (first->anchor->orig_family_num != sec->anchor->orig_family_num ||
- first->anchor->family_num != sec->anchor->family_num)
+ /* Determine if these disks might ever have been
+ * related. Further disambiguation can only take place
+ * in load_super_imsm_all
+ */
+ __u32 first_family = first->anchor->orig_family_num;
+ __u32 sec_family = sec->anchor->orig_family_num;
+
+ if (memcmp(first->anchor->sig, sec->anchor->sig,
+ MAX_SIGNATURE_LENGTH) != 0)
+ return 3;
+
+ if (first_family == 0)
+ first_family = first->anchor->family_num;
+ if (sec_family == 0)
+ sec_family = sec->anchor->family_num;
+
+ if (first_family != sec_family)
return 3;
+
}
+
/* if 'first' is a spare promote it to a populated mpb with sec's
* family number
*/
@@ -1537,7 +1597,6 @@ static void fd2devname(int fd, char *name)
snprintf(name, MAX_RAID_SERIAL_LEN, "/dev/%s", nm);
}
-
extern int scsi_get_serial(int fd, void *buf, size_t buf_len);
static int imsm_read_serial(int fd, char *devname,
@@ -1620,6 +1679,7 @@ static void serialcpy(__u8 *dest, __u8 *src)
strncpy((char *) dest, (char *) src, MAX_RAID_SERIAL_LEN);
}
+#ifndef MDASSEMBLE
static struct dl *serial_to_dl(__u8 *serial, struct intel_super *super)
{
struct dl *dl;
@@ -1630,15 +1690,34 @@ static struct dl *serial_to_dl(__u8 *serial, struct intel_super *super)
return dl;
}
+#endif
+
+static struct imsm_disk *
+__serial_to_disk(__u8 *serial, struct imsm_super *mpb, int *idx)
+{
+ int i;
+
+ for (i = 0; i < mpb->num_disks; i++) {
+ struct imsm_disk *disk = __get_imsm_disk(mpb, i);
+
+ if (serialcmp(disk->serial, serial) == 0) {
+ if (idx)
+ *idx = i;
+ return disk;
+ }
+ }
+
+ return NULL;
+}
static int
load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd)
{
+ struct imsm_disk *disk;
struct dl *dl;
struct stat stb;
int rv;
- int i;
- int alloc = 1;
+ char name[40];
__u8 serial[MAX_RAID_SERIAL_LEN];
rv = imsm_read_serial(fd, devname, serial);
@@ -1646,16 +1725,7 @@ load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd)
if (rv != 0)
return 2;
- /* check if this is a disk we have seen before. it may be a spare in
- * super->disks while the current anchor believes it is a raid member,
- * check if we need to update dl->index
- */
- dl = serial_to_dl(serial, super);
- if (!dl)
- dl = malloc(sizeof(*dl));
- else
- alloc = 0;
-
+ dl = calloc(1, sizeof(*dl));
if (!dl) {
if (devname)
fprintf(stderr,
@@ -1664,53 +1734,35 @@ load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd)
return 2;
}
- if (alloc) {
- fstat(fd, &stb);
- dl->major = major(stb.st_rdev);
- dl->minor = minor(stb.st_rdev);
- dl->next = super->disks;
- dl->fd = keep_fd ? fd : -1;
- dl->devname = devname ? strdup(devname) : NULL;
- serialcpy(dl->serial, serial);
- dl->index = -2;
- dl->e = NULL;
- } else if (keep_fd) {
- close(dl->fd);
- dl->fd = fd;
- }
+ fstat(fd, &stb);
+ dl->major = major(stb.st_rdev);
+ dl->minor = minor(stb.st_rdev);
+ dl->next = super->disks;
+ dl->fd = keep_fd ? fd : -1;
+ assert(super->disks == NULL);
+ super->disks = dl;
+ serialcpy(dl->serial, serial);
+ dl->index = -2;
+ dl->e = NULL;
+ fd2devname(fd, name);
+ if (devname)
+ dl->devname = strdup(devname);
+ else
+ dl->devname = strdup(name);
/* look up this disk's index in the current anchor */
- for (i = 0; i < super->anchor->num_disks; i++) {
- struct imsm_disk *disk_iter;
-
- disk_iter = __get_imsm_disk(super->anchor, i);
-
- if (serialcmp(disk_iter->serial, dl->serial) == 0) {
- dl->disk = *disk_iter;
- /* only set index on disks that are a member of a
- * populated contianer, i.e. one with raid_devs
- */
- if (dl->disk.status & FAILED_DISK)
- dl->index = -2;
- else if (dl->disk.status & SPARE_DISK)
- dl->index = -1;
- else
- dl->index = i;
-
- break;
- }
- }
-
- /* no match, maybe a stale failed drive */
- if (i == super->anchor->num_disks && dl->index >= 0) {
- dl->disk = *__get_imsm_disk(super->anchor, dl->index);
- if (dl->disk.status & FAILED_DISK)
+ disk = __serial_to_disk(dl->serial, super->anchor, &dl->index);
+ if (disk) {
+ dl->disk = *disk;
+ /* only set index on disks that are a member of a
+ * populated contianer, i.e. one with raid_devs
+ */
+ if (is_failed(&dl->disk))
dl->index = -2;
+ else if (is_spare(&dl->disk))
+ dl->index = -1;
}
- if (alloc)
- super->disks = dl;
-
return 0;
}
@@ -1852,7 +1904,6 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname)
struct stat;
struct imsm_super *anchor;
__u32 check_sum;
- int rc;
get_dev_size(fd, NULL, &dsize);
@@ -1914,10 +1965,7 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname)
return 2;
}
- rc = load_imsm_disk(fd, super, devname, 0);
- if (rc == 0)
- rc = parse_raid_devices(super);
- return rc;
+ return 0;
}
/* read the extended mpb */
@@ -1953,11 +2001,23 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname)
*/
super->bbm_log = __get_imsm_bbm_log(super->anchor);
- rc = load_imsm_disk(fd, super, devname, 0);
- if (rc == 0)
- rc = parse_raid_devices(super);
+ return 0;
+}
+
+static int
+load_and_parse_mpb(int fd, struct intel_super *super, char *devname, int keep_fd)
+{
+ int err;
+
+ err = load_imsm_mpb(fd, super, devname);
+ if (err)
+ return err;
+ err = load_imsm_disk(fd, super, devname, keep_fd);
+ if (err)
+ return err;
+ err = parse_raid_devices(super);
- return rc;
+ return err;
}
static void __free_imsm_disk(struct dl *d)
@@ -2087,19 +2147,333 @@ static int find_missing(struct intel_super *super)
return 0;
}
+static struct intel_disk *disk_list_get(__u8 *serial, struct intel_disk *disk_list)
+{
+ struct intel_disk *idisk = disk_list;
+
+ while (idisk) {
+ if (serialcmp(idisk->disk.serial, serial) == 0)
+ break;
+ idisk = idisk->next;
+ }
+
+ return idisk;
+}
+
+static int __prep_thunderdome(struct intel_super **table, int tbl_size,
+ struct intel_super *super,
+ struct intel_disk **disk_list)
+{
+ struct imsm_disk *d = &super->disks->disk;
+ struct imsm_super *mpb = super->anchor;
+ int i, j;
+
+ for (i = 0; i < tbl_size; i++) {
+ struct imsm_super *tbl_mpb = table[i]->anchor;
+ struct imsm_disk *tbl_d = &table[i]->disks->disk;
+
+ if (tbl_mpb->family_num == mpb->family_num) {
+ if (tbl_mpb->check_sum == mpb->check_sum) {
+ dprintf("%s: mpb from %d:%d matches %d:%d\n",
+ __func__, super->disks->major,
+ super->disks->minor,
+ table[i]->disks->major,
+ table[i]->disks->minor);
+ break;
+ }
+
+ if (((is_configured(d) && !is_configured(tbl_d)) ||
+ is_configured(d) == is_configured(tbl_d)) &&
+ tbl_mpb->generation_num < mpb->generation_num) {
+ /* current version of the mpb is a
+ * better candidate than the one in
+ * super_table, but copy over "cross
+ * generational" status
+ */
+ struct intel_disk *idisk;
+
+ dprintf("%s: mpb from %d:%d replaces %d:%d\n",
+ __func__, super->disks->major,
+ super->disks->minor,
+ table[i]->disks->major,
+ table[i]->disks->minor);
+
+ idisk = disk_list_get(tbl_d->serial, *disk_list);
+ if (idisk && is_failed(&idisk->disk))
+ tbl_d->status |= FAILED_DISK;
+ break;
+ } else {
+ struct intel_disk *idisk;
+ struct imsm_disk *disk;
+
+ /* tbl_mpb is more up to date, but copy
+ * over cross generational status before
+ * returning
+ */
+ disk = __serial_to_disk(d->serial, mpb, NULL);
+ if (disk && is_failed(disk))
+ d->status |= FAILED_DISK;
+
+ idisk = disk_list_get(d->serial, *disk_list);
+ if (idisk) {
+ idisk->owner = i;
+ if (disk && is_configured(disk))
+ idisk->disk.status |= CONFIGURED_DISK;
+ }
+
+ dprintf("%s: mpb from %d:%d prefer %d:%d\n",
+ __func__, super->disks->major,
+ super->disks->minor,
+ table[i]->disks->major,
+ table[i]->disks->minor);
+
+ return tbl_size;
+ }
+ }
+ }
+
+ if (i >= tbl_size)
+ table[tbl_size++] = super;
+ else
+ table[i] = super;
+
+ /* update/extend the merged list of imsm_disk records */
+ for (j = 0; j < mpb->num_disks; j++) {
+ struct imsm_disk *disk = __get_imsm_disk(mpb, j);
+ struct intel_disk *idisk;
+
+ idisk = disk_list_get(disk->serial, *disk_list);
+ if (idisk) {
+ idisk->disk.status |= disk->status;
+ if (is_configured(&idisk->disk) ||
+ is_failed(&idisk->disk))
+ idisk->disk.status &= ~(SPARE_DISK);
+ } else {
+ idisk = calloc(1, sizeof(*idisk));
+ if (!idisk)
+ return -1;
+ idisk->owner = IMSM_UNKNOWN_OWNER;
+ idisk->disk = *disk;
+ idisk->next = *disk_list;
+ *disk_list = idisk;
+ }
+
+ if (serialcmp(idisk->disk.serial, d->serial) == 0)
+ idisk->owner = i;
+ }
+
+ return tbl_size;
+}
+
+static struct intel_super *
+validate_members(struct intel_super *super, struct intel_disk *disk_list,
+ const int owner)
+{
+ struct imsm_super *mpb = super->anchor;
+ int ok_count = 0;
+ int i;
+
+ for (i = 0; i < mpb->num_disks; i++) {
+ struct imsm_disk *disk = __get_imsm_disk(mpb, i);
+ struct intel_disk *idisk;
+
+ idisk = disk_list_get(disk->serial, disk_list);
+ if (idisk) {
+ if (idisk->owner == owner ||
+ idisk->owner == IMSM_UNKNOWN_OWNER)
+ ok_count++;
+ else
+ dprintf("%s: '%.16s' owner %d != %d\n",
+ __func__, disk->serial, idisk->owner,
+ owner);
+ } else {
+ dprintf("%s: unknown disk %x [%d]: %.16s\n",
+ __func__, __le32_to_cpu(mpb->family_num), i,
+ disk->serial);
+ break;
+ }
+ }
+
+ if (ok_count == mpb->num_disks)
+ return super;
+ return NULL;
+}
+
+static void show_conflicts(__u32 family_num, struct intel_super *super_list)
+{
+ struct intel_super *s;
+
+ for (s = super_list; s; s = s->next) {
+ if (family_num != s->anchor->family_num)
+ continue;
+ fprintf(stderr, "Conflict, offlining family %#x on '%s'\n",
+ __le32_to_cpu(family_num), s->disks->devname);
+ }
+}
+
+static struct intel_super *
+imsm_thunderdome(struct intel_super **super_list, int len)
+{
+ struct intel_super *super_table[len];
+ struct intel_disk *disk_list = NULL;
+ struct intel_super *champion, *spare;
+ struct intel_super *s, **del;
+ int tbl_size = 0;
+ int conflict;
+ int i;
+
+ memset(super_table, 0, sizeof(super_table));
+ for (s = *super_list; s; s = s->next)
+ tbl_size = __prep_thunderdome(super_table, tbl_size, s, &disk_list);
+
+ for (i = 0; i < tbl_size; i++) {
+ struct imsm_disk *d;
+ struct intel_disk *idisk;
+ struct imsm_super *mpb = super_table[i]->anchor;
+
+ s = super_table[i];
+ d = &s->disks->disk;
+
+ /* 'd' must appear in merged disk list for its
+ * configuration to be valid
+ */
+ idisk = disk_list_get(d->serial, disk_list);
+ if (idisk && idisk->owner == i)
+ s = validate_members(s, disk_list, i);
+ else
+ s = NULL;
+
+ if (!s)
+ dprintf("%s: marking family: %#x from %d:%d offline\n",
+ __func__, mpb->family_num,
+ super_table[i]->disks->major,
+ super_table[i]->disks->minor);
+ super_table[i] = s;
+ }
+
+ /* This is where the mdadm implementation differs from the Windows
+ * driver which has no strict concept of a container. We can only
+ * assemble one family from a container, so when returning a prodigal
+ * array member to this system the code will not be able to disambiguate
+ * the container contents that should be assembled ("foreign" versus
+ * "local"). It requires user intervention to set the orig_family_num
+ * to a new value to establish a new container. The Windows driver in
+ * this situation fixes up the volume name in place and manages the
+ * foreign array as an independent entity.
+ */
+ s = NULL;
+ spare = NULL;
+ conflict = 0;
+ for (i = 0; i < tbl_size; i++) {
+ struct intel_super *tbl_ent = super_table[i];
+ int is_spare = 0;
+
+ if (!tbl_ent)
+ continue;
+
+ if (tbl_ent->anchor->num_raid_devs == 0) {
+ spare = tbl_ent;
+ is_spare = 1;
+ }
+
+ if (s && !is_spare) {
+ show_conflicts(tbl_ent->anchor->family_num, *super_list);
+ conflict++;
+ } else if (!s && !is_spare)
+ s = tbl_ent;
+ }
+
+ if (!s)
+ s = spare;
+ if (!s) {
+ champion = NULL;
+ goto out;
+ }
+ champion = s;
+
+ if (conflict)
+ fprintf(stderr, "Chose family %#x on '%s', "
+ "assemble conflicts to new container with '--update=uuid'\n",
+ __le32_to_cpu(s->anchor->family_num), s->disks->devname);
+
+ /* collect all dl's onto 'champion', and update them to
+ * champion's version of the status
+ */
+ for (s = *super_list; s; s = s->next) {
+ struct imsm_super *mpb = champion->anchor;
+ struct dl *dl = s->disks;
+
+ if (s == champion)
+ continue;
+
+ for (i = 0; i < mpb->num_disks; i++) {
+ struct imsm_disk *disk;
+
+ disk = __serial_to_disk(dl->serial, mpb, &dl->index);
+ if (disk) {
+ dl->disk = *disk;
+ /* only set index on disks that are a member of
+ * a populated contianer, i.e. one with
+ * raid_devs
+ */
+ if (is_failed(&dl->disk))
+ dl->index = -2;
+ else if (is_spare(&dl->disk))
+ dl->index = -1;
+ break;
+ }
+ }
+
+ if (i >= mpb->num_disks) {
+ struct intel_disk *idisk;
+
+ idisk = disk_list_get(dl->serial, disk_list);
+ if (is_spare(&idisk->disk) &&
+ !is_failed(&idisk->disk) && !is_configured(&idisk->disk))
+ dl->index = -1;
+ else {
+ dl->index = -2;
+ continue;
+ }
+ }
+
+ dl->next = champion->disks;
+ champion->disks = dl;
+ s->disks = NULL;
+ }
+
+ /* delete 'champion' from super_list */
+ for (del = super_list; *del; ) {
+ if (*del == champion) {
+ *del = (*del)->next;
+ break;
+ } else
+ del = &(*del)->next;
+ }
+ champion->next = NULL;
+
+ out:
+ while (disk_list) {
+ struct intel_disk *idisk = disk_list;
+
+ disk_list = disk_list->next;
+ free(idisk);
+ }
+
+ return champion;
+}
+
static int load_super_imsm_all(struct supertype *st, int fd, void **sbp,
char *devname, int keep_fd)
{
struct mdinfo *sra;
- struct intel_super *super;
- struct mdinfo *sd, *best = NULL;
- __u32 bestgen = 0;
- __u32 gen;
- char nm[20];
- int dfd;
- int rv;
+ struct intel_super *super_list = NULL;
+ struct intel_super *super = NULL;
int devnum = fd2devnum(fd);
+ struct mdinfo *sd;
int retry;
+ int err = 0;
+ int i;
enum sysfs_read_flags flags;
flags = GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE;
@@ -2116,81 +2490,51 @@ static int load_super_imsm_all(struct supertype *st, int fd, void **sbp,
strcmp(sra->text_version, "imsm") != 0)
return 1;
- super = alloc_super(0);
- if (!super)
- return 1;
+ /* load all mpbs */
+ for (sd = sra->devs, i = 0; sd; sd = sd->next, i++) {
+ struct intel_super *s = alloc_super(0);
+ char nm[20];
+ int dfd;
- /* find the most up to date disk in this array, skipping spares */
- for (sd = sra->devs; sd; sd = sd->next) {
+ err = 1;
+ if (!s)
+ goto error;
+ s->next = super_list;
+ super_list = s;
+
+ err = 2;
sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
dfd = dev_open(nm, keep_fd ? O_RDWR : O_RDONLY);
- if (dfd < 0) {
- free_imsm(super);
- return 2;
- }
- rv = load_imsm_mpb(dfd, super, NULL);
+ if (dfd < 0)
+ goto error;
+
+ err = load_and_parse_mpb(dfd, s, NULL, keep_fd);
/* retry the load if we might have raced against mdmon */
- if (rv == 3 && mdmon_running(devnum))
+ if (err == 3 && mdmon_running(devnum))
for (retry = 0; retry < 3; retry++) {
usleep(3000);
- rv = load_imsm_mpb(dfd, super, NULL);
- if (rv != 3)
+ err = load_and_parse_mpb(dfd, s, NULL, keep_fd);
+ if (err != 3)
break;
}
if (!keep_fd)
close(dfd);
- if (rv == 0) {
- if (super->anchor->num_raid_devs == 0)
- gen = 0;
- else
- gen = __le32_to_cpu(super->anchor->generation_num);
- if (!best || gen > bestgen) {
- bestgen = gen;
- best = sd;
- }
- } else {
- free_imsm(super);
- return rv;
- }
+ if (err)
+ goto error;
}
- if (!best) {
- free_imsm(super);
- return 1;
- }
-
- /* load the most up to date anchor */
- sprintf(nm, "%d:%d", best->disk.major, best->disk.minor);
- dfd = dev_open(nm, O_RDONLY);
- if (dfd < 0) {
- free_imsm(super);
- return 1;
- }
- rv = load_imsm_mpb(dfd, super, NULL);
- close(dfd);
- if (rv != 0) {
- free_imsm(super);
- return 2;
- }
-
- /* re-parse the disk list with the current anchor */
- for (sd = sra->devs ; sd ; sd = sd->next) {
- sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
- dfd = dev_open(nm, keep_fd? O_RDWR : O_RDONLY);
- if (dfd < 0) {
- free_imsm(super);
- return 2;
- }
- load_imsm_disk(dfd, super, NULL, keep_fd);
- if (!keep_fd)
- close(dfd);
+ /* all mpbs enter, maybe one leaves */
+ super = imsm_thunderdome(&super_list, i);
+ if (!super) {
+ err = 1;
+ goto error;
}
-
if (find_missing(super) != 0) {
free_imsm(super);
- return 2;
+ err = 2;
+ goto error;
}
if (st->subarray[0]) {
@@ -2198,13 +2542,26 @@ static int load_super_imsm_all(struct supertype *st, int fd, void **sbp,
super->current_vol = atoi(st->subarray);
else {
free_imsm(super);
- return 1;
+ err = 1;
+ goto error;
}
}
+ err = 0;
+
+ error:
+ while (super_list) {
+ struct intel_super *s = super_list;
+
+ super_list = super_list->next;
+ free_imsm(s);
+ }
+
+ if (err)
+ return err;
*sbp = super;
st->container_dev = devnum;
- if (st->ss == NULL) {
+ if (err == 0 && st->ss == NULL) {
st->ss = &super_imsm;
st->minor_version = 0;
st->max_devs = IMSM_MAX_DEVICES;
@@ -2235,7 +2592,7 @@ static int load_super_imsm(struct supertype *st, int fd, char *devname)
return 1;
}
- rv = load_imsm_mpb(fd, super, devname);
+ rv = load_and_parse_mpb(fd, super, devname, 0);
if (rv) {
if (devname)
@@ -2491,24 +2848,33 @@ static int init_super_imsm(struct supertype *st, mdu_array_info_t *info,
size_t mpb_size;
char *version;
- if (!info) {
- st->sb = NULL;
- return 0;
- }
if (st->sb)
- return init_super_imsm_volume(st, info, size, name, homehost,
- uuid);
+ return init_super_imsm_volume(st, info, size, name, homehost, uuid);
+
+ if (info)
+ mpb_size = disks_to_mpb_size(info->nr_disks);
+ else
+ mpb_size = 512;
super = alloc_super(1);
- if (!super)
- return 0;
- mpb_size = disks_to_mpb_size(info->nr_disks);
- if (posix_memalign(&super->buf, 512, mpb_size) != 0) {
+ if (super && posix_memalign(&super->buf, 512, mpb_size) != 0) {
free(super);
+ super = NULL;
+ }
+ if (!super) {
+ fprintf(stderr, Name
+ ": %s could not allocate superblock\n", __func__);
return 0;
}
+ memset(super->buf, 0, mpb_size);
mpb = super->buf;
- memset(mpb, 0, mpb_size);
+ mpb->mpb_size = __cpu_to_le32(mpb_size);
+ st->sb = super;
+
+ if (info == NULL) {
+ /* zeroing superblock */
+ return 0;
+ }
mpb->attributes = MPB_ATTRIB_CHECKSUM_VERIFY;
@@ -2516,9 +2882,7 @@ static int init_super_imsm(struct supertype *st, mdu_array_info_t *info,
strcpy(version, MPB_SIGNATURE);
version += strlen(MPB_SIGNATURE);
strcpy(version, MPB_VERSION_RAID0);
- mpb->mpb_size = mpb_size;
- st->sb = super;
return 1;
}
@@ -2651,39 +3015,48 @@ static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
return 0;
}
-static int store_imsm_mpb(int fd, struct intel_super *super);
+static int store_imsm_mpb(int fd, struct imsm_super *mpb);
+
+static union {
+ char buf[512];
+ struct imsm_super anchor;
+} spare_record __attribute__ ((aligned(512)));
/* spare records have their own family number and do not have any defined raid
* devices
*/
static int write_super_imsm_spares(struct intel_super *super, int doclose)
{
- struct imsm_super mpb_save;
struct imsm_super *mpb = super->anchor;
+ struct imsm_super *spare = &spare_record.anchor;
__u32 sum;
struct dl *d;
- mpb_save = *mpb;
- mpb->num_raid_devs = 0;
- mpb->num_disks = 1;
- mpb->mpb_size = sizeof(struct imsm_super);
- mpb->generation_num = __cpu_to_le32(1UL);
+ spare->mpb_size = __cpu_to_le32(sizeof(struct imsm_super)),
+ spare->generation_num = __cpu_to_le32(1UL),
+ spare->attributes = MPB_ATTRIB_CHECKSUM_VERIFY;
+ spare->num_disks = 1,
+ spare->num_raid_devs = 0,
+ spare->cache_size = mpb->cache_size,
+ spare->pwr_cycle_count = __cpu_to_le32(1),
+
+ snprintf((char *) spare->sig, MAX_SIGNATURE_LENGTH,
+ MPB_SIGNATURE MPB_VERSION_RAID0);
for (d = super->disks; d; d = d->next) {
if (d->index != -1)
continue;
- mpb->disk[0] = d->disk;
- sum = __gen_imsm_checksum(mpb);
- mpb->family_num = __cpu_to_le32(sum);
- mpb->orig_family_num = 0;
- sum = __gen_imsm_checksum(mpb);
- mpb->check_sum = __cpu_to_le32(sum);
+ spare->disk[0] = d->disk;
+ sum = __gen_imsm_checksum(spare);
+ spare->family_num = __cpu_to_le32(sum);
+ spare->orig_family_num = 0;
+ sum = __gen_imsm_checksum(spare);
+ spare->check_sum = __cpu_to_le32(sum);
- if (store_imsm_mpb(d->fd, super)) {
+ if (store_imsm_mpb(d->fd, spare)) {
fprintf(stderr, "%s: failed for device %d:%d %s\n",
__func__, d->major, d->minor, strerror(errno));
- *mpb = mpb_save;
return 1;
}
if (doclose) {
@@ -2692,7 +3065,6 @@ static int write_super_imsm_spares(struct intel_super *super, int doclose)
}
}
- *mpb = mpb_save;
return 0;
}
@@ -2744,7 +3116,7 @@ static int write_super_imsm(struct intel_super *super, int doclose)
for (d = super->disks; d ; d = d->next) {
if (d->index < 0)
continue;
- if (store_imsm_mpb(d->fd, super))
+ if (store_imsm_mpb(d->fd, mpb))
fprintf(stderr, "%s: failed for device %d:%d %s\n",
__func__, d->major, d->minor, strerror(errno));
if (doclose) {
@@ -2852,24 +3224,19 @@ static int write_init_super_imsm(struct supertype *st)
}
#endif
-static int store_zero_imsm(struct supertype *st, int fd)
+static int store_super_imsm(struct supertype *st, int fd)
{
- unsigned long long dsize;
- void *buf;
-
- get_dev_size(fd, NULL, &dsize);
-
- /* first block is stored on second to last sector of the disk */
- if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0)
- return 1;
+ struct intel_super *super = st->sb;
+ struct imsm_super *mpb = super ? super->anchor : NULL;
- if (posix_memalign(&buf, 512, 512) != 0)
+ if (!mpb)
return 1;
- memset(buf, 0, 512);
- if (write(fd, buf, 512) != 512)
- return 1;
- return 0;
+#ifndef MDASSEMBLE
+ return store_imsm_mpb(fd, mpb);
+#else
+ return 1;
+#endif
}
static int imsm_bbm_log_size(struct imsm_super *mpb)
@@ -3444,7 +3811,6 @@ static struct mdinfo *container_content_imsm(struct supertype *st)
struct dl *d;
int idx;
int skip;
- __u32 s;
__u32 ord;
skip = 0;
@@ -3456,9 +3822,7 @@ static struct mdinfo *container_content_imsm(struct supertype *st)
if (d == NULL)
skip = 1;
-
- s = d ? d->disk.status : 0;
- if (s & FAILED_DISK)
+ if (d && is_failed(&d->disk))
skip = 1;
if (ord & IMSM_ORD_REBUILD)
skip = 1;
@@ -3565,8 +3929,7 @@ static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev,
insync = 2;
disk = get_imsm_disk(super, idx);
- if (!disk || disk->status & FAILED_DISK ||
- ord & IMSM_ORD_REBUILD)
+ if (!disk || is_failed(disk) || ord & IMSM_ORD_REBUILD)
insync--;
/* no in-sync disks left in this mirror the
@@ -3616,8 +3979,7 @@ static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev)
idx = ord_to_idx(ord);
disk = get_imsm_disk(super, idx);
- if (!disk || disk->status & FAILED_DISK ||
- ord & IMSM_ORD_REBUILD)
+ if (!disk || is_failed(disk) || ord & IMSM_ORD_REBUILD)
failed++;
}
@@ -3676,7 +4038,7 @@ static int mark_failure(struct imsm_dev *dev, struct imsm_disk *disk, int idx)
return 0;
ord = __le32_to_cpu(map->disk_ord_tbl[slot]);
- if ((disk->status & FAILED_DISK) && (ord & IMSM_ORD_REBUILD))
+ if (is_failed(disk) && (ord & IMSM_ORD_REBUILD))
return 0;
disk->status |= FAILED_DISK;
@@ -3824,9 +4186,9 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
}
}
-static int store_imsm_mpb(int fd, struct intel_super *super)
+static int store_imsm_mpb(int fd, struct imsm_super *mpb)
{
- struct imsm_super *mpb = super->anchor;
+ void *buf = mpb;
__u32 mpb_size = __le32_to_cpu(mpb->mpb_size);
unsigned long long dsize;
unsigned long long sectors;
@@ -3841,7 +4203,7 @@ static int store_imsm_mpb(int fd, struct intel_super *super)
if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0)
return 1;
- if (write(fd, super->buf + 512, 512 * sectors) != 512 * sectors)
+ if (write(fd, buf + 512, 512 * sectors) != 512 * sectors)
return 1;
}
@@ -3849,7 +4211,7 @@ static int store_imsm_mpb(int fd, struct intel_super *super)
if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0)
return 1;
- if (write(fd, super->buf, 512) != 512)
+ if (write(fd, buf, 512) != 512)
return 1;
return 0;
@@ -3877,7 +4239,7 @@ static struct dl *imsm_readd(struct intel_super *super, int idx, struct active_a
if (dl->index == i)
break;
- if (dl && dl->disk.status & FAILED_DISK)
+ if (dl && is_failed(&dl->disk))
dl = NULL;
if (dl)
@@ -3915,11 +4277,10 @@ static struct dl *imsm_add_spare(struct intel_super *super, int slot,
continue;
/* skip in use or failed drives */
- if (dl->disk.status & FAILED_DISK || idx == dl->index ||
+ if (is_failed(&dl->disk) || idx == dl->index ||
dl->index == -2) {
dprintf("%x:%x status (failed: %d index: %d)\n",
- dl->major, dl->minor,
- (dl->disk.status & FAILED_DISK) == FAILED_DISK, idx);
+ dl->major, dl->minor, is_failed(&dl->disk), idx);
continue;
}
@@ -4221,7 +4582,7 @@ static void imsm_process_update(struct supertype *st,
if (i == u->slot)
continue;
disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i));
- if (!disk || disk->status & FAILED_DISK)
+ if (!disk || is_failed(disk))
failed++;
}
@@ -4584,7 +4945,7 @@ struct superswitch super_imsm = {
.load_super = load_super_imsm,
.init_super = init_super_imsm,
- .store_super = store_zero_imsm,
+ .store_super = store_super_imsm,
.free_super = free_super_imsm,
.match_metadata_desc = match_metadata_desc_imsm,
.container_content = container_content_imsm,
diff --git a/sysfs.c b/sysfs.c
index 5806fa7..35dfbd4 100644
--- a/sysfs.c
+++ b/sysfs.c
@@ -792,7 +792,7 @@ int sysfs_unique_holder(int devnum, long rdev)
static char *clean_states[] = {
"clear", "inactive", "readonly", "read-auto", "clean", NULL };
-int WaitClean(char *dev, int verbose)
+int WaitClean(char *dev, int sock, int verbose)
{
int fd;
struct mdinfo *mdi;
@@ -868,7 +868,8 @@ int WaitClean(char *dev, int verbose)
}
if (rv < 0)
rv = 1;
- else if (ping_monitor(mdi->text_version) == 0) {
+ else if (fping_monitor(sock) == 0 ||
+ ping_monitor(mdi->text_version) == 0) {
/* we need to ping to close the window between array
* state transitioning to clean and the metadata being
* marked clean
diff --git a/tests/09imsm-assemble b/tests/09imsm-assemble
new file mode 100644
index 0000000..7389b0e
--- /dev/null
+++ b/tests/09imsm-assemble
@@ -0,0 +1,46 @@
+# validate the prodigal member disk scenario i.e. a former container
+# member is returned after having been rebuilt on another system
+num_disks=4
+size=$((10*1024))
+mdadm -CR $container -e imsm -n $num_disks $dev0 $dev1 $dev2 $dev3
+mdadm -CR $member $dev0 $dev2 -n 2 -l 1 -z $size
+mdadm --wait $member
+mdadm -Ss
+
+# make dev0 and dev1 a new rebuild family
+mdadm -A $container $dev0 $dev1
+mdadm -I $container
+mdadm --wait ${member}_0
+mdadm -Ss
+
+# make dev2 and dev3 a new rebuild family
+mdadm -A $container $dev2 $dev3
+mdadm -I $container
+mdadm --wait ${member}_0
+mdadm -Ss
+
+# reassemble and make sure one of the families falls out
+mdadm -A $container $dev0 $dev1 $dev2 $dev3
+mdadm -I $container
+testdev ${member}_0 1 $size 1
+if mdadm --remove $container $dev0 ; then
+ # the dev[23] family won
+ imsm_check_removal $container $dev1
+ imsm_check_hold $container $dev2
+ imsm_check_hold $container $dev3
+else
+ # the dev[01] family won
+ imsm_check_hold $container $dev1
+ imsm_check_removal $container $dev2
+ imsm_check_removal $container $dev3
+fi
+mdadm -Ss
+
+# reassemble with a new id for the dev[23] family
+mdadm -A $container $dev0 $dev1
+mdadm -I $container
+mdadm -A ${container}2 $dev2 $dev3 --update=uuid
+mdadm -I ${container}2
+
+testdev ${member}_0 1 $size 1
+testdev ${member}_1 1 $size 1
diff --git a/tests/10ddf-create b/tests/10ddf-create
index db22b64..a32dc0f 100644
--- a/tests/10ddf-create
+++ b/tests/10ddf-create
@@ -55,8 +55,8 @@ mdadm -Ss
mdadm -Asc /var/tmp/mdadm.conf
check nosync # This failed once. The raid5 was resyncing.
-mdadm -Dbs > /tmp/mdadm.conf
-diff /tmp/mdadm.conf /var/tmp/mdadm.conf
+mdadm -Dbs | sort > /tmp/mdadm.conf
+sort /var/tmp/mdadm.conf | diff /tmp/mdadm.conf -
mdadm -Ss
# and now assemble fully incrementally.
@@ -70,7 +70,7 @@ do
done
check nosync
-mdadm -Dbs > /tmp/mdadm.conf
-diff /tmp/mdadm.conf /var/tmp/mdadm.conf
+mdadm -Dbs | sort > /tmp/mdadm.conf
+sort /var/tmp/mdadm.conf | diff /tmp/mdadm.conf -
mdadm -Ss
rm /tmp/mdadm.conf /var/tmp/mdadm.conf
diff --git a/tests/env-09imsm-assemble b/tests/env-09imsm-assemble
new file mode 100644
index 0000000..b12954b
--- /dev/null
+++ b/tests/env-09imsm-assemble
@@ -0,0 +1,32 @@
+imsm_check_hold() {
+ if mdadm --remove $1 $2; then
+ echo "$2 removal from $1 should have been blocked" >&2
+ cat /proc/mdstat >&2
+ mdadm -E $2
+ exit 1
+ fi
+}
+
+imsm_check_removal() {
+ if ! mdadm --remove $1 $2 ; then
+ echo "$2 removal from $1 should have succeeded" >&2
+ cat /proc/mdstat >&2
+ mdadm -E $2
+ exit 1
+ fi
+}
+
+setup_env() {
+ export IMSM_DEVNAME_AS_SERIAL=1
+ export IMSM_TEST_OROM=1
+ container=/dev/md/container
+ member=/dev/md/vol0
+}
+
+reset_env() {
+ unset IMSM_DEVNAME_AS_SERIAL
+ unset IMSM_TEST_OROM
+ unset imsm_check
+ unset container
+ unset member
+}
diff --git a/util.c b/util.c
index 662061b..048c39f 100644
--- a/util.c
+++ b/util.c
@@ -336,17 +336,15 @@ void copy_uuid(void *a, int b[4], int swapuuid)
memcpy(a, b, 16);
}
-char *fname_from_uuid(struct supertype *st, struct mdinfo *info, char *buf, char sep)
+char *__fname_from_uuid(int id[4], int swap, char *buf, char sep)
{
int i, j;
- int id;
char uuid[16];
char *c = buf;
strcpy(c, "UUID-");
c += strlen(c);
- copy_uuid(uuid, info->uuid, st->ss->swapuuid);
+ copy_uuid(uuid, id, swap);
for (i = 0; i < 4; i++) {
- id = uuid[i];
if (i)
*c++ = sep;
for (j = 3; j >= 0; j--) {
@@ -355,6 +353,12 @@ char *fname_from_uuid(struct supertype *st, struct mdinfo *info, char *buf, char
}
}
return buf;
+
+}
+
+char *fname_from_uuid(struct supertype *st, struct mdinfo *info, char *buf, char sep)
+{
+ return __fname_from_uuid(info->uuid, st->ss->swapuuid, buf, sep);
}
#ifndef MDASSEMBLE