summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeil Brown <neilb@suse.de>2006-12-21 17:10:52 +1100
committerNeil Brown <neilb@suse.de>2006-12-21 17:10:52 +1100
commit8382f19bdcc6d2d1de92154e11129acdcaab10fa (patch)
tree3b3c93a6ad6189d6354de81714fc8959de8378f5
parent350f29f90d1f6bb3ddfafea368327911f9e8b27c (diff)
downloadmdadm-8382f19bdcc6d2d1de92154e11129acdcaab10fa.tar.gz
mdadm-8382f19bdcc6d2d1de92154e11129acdcaab10fa.tar.xz
mdadm-8382f19bdcc6d2d1de92154e11129acdcaab10fa.zip
Add new mode: --incremental
--incremental allows arrays to be assembled one device at a time. This is expected to be used with udev.
-rw-r--r--ANNOUNCE-2.60
-rw-r--r--ChangeLog2
-rw-r--r--Incremental.c721
-rw-r--r--Makefile8
-rw-r--r--Manage.c14
-rw-r--r--Monitor.c2
-rw-r--r--ReadMe.c34
-rw-r--r--config.c18
-rw-r--r--kernel-patch-2.6.1835
-rw-r--r--kernel-patch-2.6.18.635
-rw-r--r--kernel-patch-2.6.1934
-rw-r--r--mapfile.c197
-rw-r--r--mdadm.8210
-rw-r--r--mdadm.c39
-rw-r--r--mdadm.h33
-rw-r--r--mdopen.c48
-rw-r--r--mdstat.c12
-rw-r--r--super0.c3
-rw-r--r--sysfs.c30
-rw-r--r--util.c8
20 files changed, 1468 insertions, 15 deletions
diff --git a/ANNOUNCE-2.6 b/ANNOUNCE-2.6
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/ANNOUNCE-2.6
diff --git a/ChangeLog b/ChangeLog
index 38ae488..3d68406 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -28,6 +28,8 @@ Changes Prior to this release
- Don't hold md device open for so long in --monitor mode - map_dev
can be slow and interferes with trying to stop the array.
- Support --uuid= with --create to choose your own UUID.
+ - New major more "--incremental" for incremental assemble of arrays,
+ intended for use with udev.
Changes Prior to 2.5.6 release
- Fix bug which meant "bitmap=xxx" in mdadm.conf was not handled
diff --git a/Incremental.c b/Incremental.c
new file mode 100644
index 0000000..ebe501f
--- /dev/null
+++ b/Incremental.c
@@ -0,0 +1,721 @@
+/*
+ * Incremental.c - support --incremental. Part of:
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2006 Neil Brown <neilb@suse.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Neil Brown
+ * Email: <neilb@suse.de>
+ * Paper: Neil Brown
+ * Novell Inc
+ * GPO Box Q1283
+ * QVB Post Office, NSW 1230
+ * Australia
+ */
+
+#include "mdadm.h"
+
+static int count_active(struct supertype *st, int mdfd, char **availp,
+ struct mdinfo *info);
+static void find_reject(int mdfd, struct supertype *st, struct sysarray *sra,
+ int number, __u64 events, int verbose,
+ char *array_name);
+
+int Incremental(char *devname, int verbose, int runstop,
+ struct supertype *st, char *homehost, int autof)
+{
+ /* Add this device to an array, creating the array if necessary
+ * and starting the array if sensibe or - if runstop>0 - if possible.
+ *
+ * This has several steps:
+ *
+ * 1/ Check if device is permitted by mdadm.conf, reject if not.
+ * 2/ Find metadata, reject if none appropriate (check
+ * version/name from args)
+ * 3/ Check if there is a match in mdadm.conf
+ * 3a/ if not, check for homehost match. If no match, reject.
+ * 4/ Determine device number.
+ * - If in mdadm.conf with std name, use that
+ * - UUID in /var/run/mdadm.map use that
+ * - If name is suggestive, use that. unless in use with different uuid.
+ * - Choose a free, high number.
+ * - Use a partitioned device unless strong suggestion not to.
+ * e.g. auto=md
+ * 5/ Find out if array already exists
+ * 5a/ if it does not
+ * - choose a name, from mdadm.conf or 'name' field in array.
+ * - create the array
+ * - add the device
+ * 5b/ if it does
+ * - check one drive in array to make sure metadata is a reasonably
+ * close match. Reject if not (e.g. different type)
+ * - add the device
+ * 6/ Make sure /var/run/mdadm.map contains this array.
+ * 7/ Is there enough devices to possibly start the array?
+ * 7a/ if not, finish with success.
+ * 7b/ if yes,
+ * - read all metadata and arrange devices like -A does
+ * - if number of OK devices match expected, or -R and there are enough,
+ * start the array (auto-readonly).
+ */
+ struct stat stb;
+ void *super, *super2;
+ struct mdinfo info, info2;
+ struct mddev_ident_s *array_list, *match;
+ char chosen_name[1024];
+ int rv;
+ int devnum;
+ struct map_ent *mp, *map = NULL;
+ int dfd, mdfd;
+ char *avail;
+ int active_disks;
+
+
+ struct createinfo *ci = conf_get_create_info();
+
+ if (autof == 0)
+ autof = ci->autof;
+
+ /* 1/ Check if devices is permitted by mdadm.conf */
+
+ if (!conf_test_dev(devname)) {
+ if (verbose >= 0)
+ fprintf(stderr, Name
+ ": %s not permitted by mdadm.conf.\n",
+ devname);
+ return 1;
+ }
+
+ /* 2/ Find metadata, reject if none appropriate (check
+ * version/name from args) */
+
+ dfd = dev_open(devname, O_RDONLY|O_EXCL);
+ if (dfd < 0) {
+ if (verbose >= 0)
+ fprintf(stderr, Name ": cannot open %s: %s.\n",
+ devname, strerror(errno));
+ return 1;
+ }
+ if (fstat(dfd, &stb) < 0) {
+ if (verbose >= 0)
+ fprintf(stderr, Name ": fstat failed for %s: %s.\n",
+ devname, strerror(errno));
+ close(dfd);
+ return 1;
+ }
+ if ((stb.st_mode & S_IFMT) != S_IFBLK) {
+ if (verbose >= 0)
+ fprintf(stderr, Name ": %s is not a block device.\n",
+ devname);
+ close(dfd);
+ return 1;
+ }
+
+ if (st == NULL && (st = guess_super(dfd)) == NULL) {
+ if (verbose >= 0)
+ fprintf(stderr, Name
+ ": no recognisable superblock on %s.\n",
+ devname);
+ close(dfd);
+ return 1;
+ }
+ if (st->ss->load_super(st, dfd, &super, NULL)) {
+ if (verbose >= 0)
+ fprintf(stderr, Name ": no RAID superblock on %s.\n",
+ devname);
+ close(dfd);
+ return 1;
+ }
+ st->ss->getinfo_super(&info, super);
+ close (dfd);
+
+ /* 3/ Check if there is a match in mdadm.conf */
+
+ array_list = conf_get_ident(NULL);
+ match = NULL;
+ for (; array_list; array_list = array_list->next) {
+ if (array_list->uuid_set &&
+ same_uuid(array_list->uuid, info.uuid, st->ss->swapuuid)
+ == 0) {
+ if (verbose >= 2)
+ fprintf(stderr, Name
+ ": UUID differs from %s.\n",
+ array_list->devname);
+ continue;
+ }
+ if (array_list->name[0] &&
+ strcasecmp(array_list->name, info.name) != 0) {
+ if (verbose >= 2)
+ fprintf(stderr, Name
+ ": Name differs from %s.\n",
+ array_list->devname);
+ continue;
+ }
+ if (array_list->devices &&
+ !match_oneof(array_list->devices, devname)) {
+ if (verbose >= 2)
+ fprintf(stderr, Name
+ ": Not a listed device for %s.\n",
+ array_list->devname);
+ continue;
+ }
+ if (array_list->super_minor != UnSet &&
+ array_list->super_minor != info.array.md_minor) {
+ if (verbose >= 2)
+ fprintf(stderr, Name
+ ": Different super-minor to %s.\n",
+ array_list->devname);
+ continue;
+ }
+ if (!array_list->uuid_set &&
+ !array_list->name[0] &&
+ !array_list->devices &&
+ array_list->super_minor == UnSet) {
+ if (verbose >= 2)
+ fprintf(stderr, Name
+ ": %s doesn't have any identifying information.\n",
+ array_list->devname);
+ continue;
+ }
+ /* FIXME, should I check raid_disks and level too?? */
+
+ if (match) {
+ if (verbose >= 0)
+ fprintf(stderr, Name
+ ": we match both %s and %s - cannot decide which to use.\n",
+ match->devname, array_list->devname);
+ return 2;
+ }
+ match = array_list;
+ }
+
+ /* 3a/ if not, check for homehost match. If no match, reject. */
+ if (!match) {
+ if (homehost == NULL ||
+ st->ss->match_home(super, homehost) == 0) {
+ if (verbose >= 0)
+ fprintf(stderr, Name
+ ": not found in mdadm.conf and not identified by homehost.\n");
+ return 2;
+ }
+ }
+ /* 4/ Determine device number. */
+ /* - If in mdadm.conf with std name, use that */
+ /* - UUID in /var/run/mdadm.map use that */
+ /* - If name is suggestive, use that. unless in use with */
+ /* different uuid. */
+ /* - Choose a free, high number. */
+ /* - Use a partitioned device unless strong suggestion not to. */
+ /* e.g. auto=md */
+ if (match && is_standard(match->devname, &devnum))
+ /* We have devnum now */;
+ else if ((mp = map_by_uuid(&map, info.uuid)) != NULL)
+ devnum = mp->devnum;
+ else {
+ /* Have to guess a bit. */
+ int use_partitions = 1;
+ char *np, *ep;
+ if ((autof&7) == 3 || (autof&7) == 5)
+ use_partitions = 0;
+ np = strchr(info.name, ':');
+ if (np)
+ np++;
+ else
+ np = info.name;
+ devnum = strtoul(np, &ep, 10);
+ if (ep > np && *ep == 0) {
+ /* This is a number. Let check that it is unused. */
+ if (mddev_busy(use_partitions ? (-1-devnum) : devnum))
+ devnum = -1;
+ } else
+ devnum = -1;
+
+ if (devnum < 0) {
+ /* Haven't found anything yet, choose something free */
+ /* There is similar code in mdopen.c - should unify */
+ for (devnum = 127 ; devnum != 128 ;
+ devnum = devnum ? devnum-1 : (1<<22)-1) {
+ if (mddev_busy(use_partitions ?
+ (-1-devnum) : devnum))
+ break;
+ }
+ if (devnum == 128) {
+ fprintf(stderr, Name
+ ": No spare md devices!!\n");
+ return 2;
+ }
+ }
+ devnum = use_partitions ? (-1-devnum) : devnum;
+ }
+ mdfd = open_mddev_devnum(match ? match->devname : NULL,
+ devnum,
+ info.name,
+ chosen_name);
+ if (mdfd < 0) {
+ fprintf(stderr, Name ": failed to open %s: %s.\n",
+ chosen_name, strerror(errno));
+ return 2;
+ }
+ /* 5/ Find out if array already exists */
+ if (! mddev_busy(devnum)) {
+ /* 5a/ if it does not */
+ /* - choose a name, from mdadm.conf or 'name' field in array. */
+ /* - create the array */
+ /* - add the device */
+ mdu_array_info_t ainf;
+ mdu_disk_info_t disk;
+ char md[20];
+ struct sysarray *sra;
+
+ memset(&ainf, 0, sizeof(ainf));
+ ainf.major_version = st->ss->major;
+ ainf.minor_version = st->minor_version;
+ if (ioctl(mdfd, SET_ARRAY_INFO, &ainf) != 0) {
+ fprintf(stderr, Name
+ ": SET_ARRAY_INFO failed for %s: %s\b",
+ chosen_name, strerror(errno));
+ close(mdfd);
+ return 2;
+ }
+ sprintf(md, "%d.%d\n", st->ss->major, st->minor_version);
+ sra = sysfs_read(mdfd, devnum, GET_VERSION);
+ sysfs_set_str(sra, NULL, "metadata_version", md);
+ memset(&disk, 0, sizeof(disk));
+ disk.major = major(stb.st_rdev);
+ disk.minor = minor(stb.st_rdev);
+ sysfs_free(sra);
+ if (ioctl(mdfd, ADD_NEW_DISK, &disk) != 0) {
+ fprintf(stderr, Name ": failed to add %s to %s: %s.\n",
+ devname, chosen_name, strerror(errno));
+ ioctl(mdfd, STOP_ARRAY, 0);
+ close(mdfd);
+ return 2;
+ }
+ sra = sysfs_read(mdfd, devnum, GET_DEVS);
+ if (!sra || !sra->devs || sra->devs->role >= 0) {
+ /* It really should be 'none' - must be old buggy
+ * kernel, and mdadm -I may not be able to complete.
+ * So reject it.
+ */
+ ioctl(mdfd, STOP_ARRAY, NULL);
+ fprintf(stderr, Name
+ ": You have an old buggy kernel which cannot support\n"
+ " --incremental reliably. Aborting.\n");
+ close(mdfd);
+ sysfs_free(sra);
+ return 2;
+ }
+ } else {
+ /* 5b/ if it does */
+ /* - check one drive in array to make sure metadata is a reasonably */
+ /* close match. Reject if not (e.g. different type) */
+ /* - add the device */
+ char dn[20];
+ int dfd2;
+ mdu_disk_info_t disk;
+ int err;
+ struct sysarray *sra;
+ sra = sysfs_read(mdfd, devnum, (GET_VERSION | GET_DEVS |
+ GET_STATE));
+ if (sra->major_version != st->ss->major ||
+ sra->minor_version != st->minor_version) {
+ if (verbose >= 0)
+ fprintf(stderr, Name
+ ": %s has different metadata to chosen array %s %d.%d %d.%d.\n",
+ devname, chosen_name,
+ sra->major_version, sra->minor_version,
+ st->ss->major, st->minor_version);
+ close(mdfd);
+ return 1;
+ }
+ sprintf(dn, "%d:%d", sra->devs->major, sra->devs->minor);
+ dfd2 = dev_open(dn, O_RDONLY);
+ if (st->ss->load_super(st, dfd2,&super2, NULL)) {
+ fprintf(stderr, Name
+ ": Strange error loading metadata for %s.\n",
+ chosen_name);
+ close(mdfd);
+ close(dfd2);
+ return 2;
+ }
+ close(dfd2);
+ st->ss->getinfo_super(&info2, super2);
+ if (info.array.level != info2.array.level ||
+ memcmp(info.uuid, info2.uuid, 16) != 0 ||
+ info.array.raid_disks != info2.array.raid_disks) {
+ fprintf(stderr, Name
+ ": unexpected difference between %s and %s.\n",
+ chosen_name, devname);
+ close(mdfd);
+ return 2;
+ }
+ memset(&disk, 0, sizeof(disk));
+ disk.major = major(stb.st_rdev);
+ disk.minor = minor(stb.st_rdev);
+ err = ioctl(mdfd, ADD_NEW_DISK, &disk);
+ if (err < 0 && errno == EBUSY) {
+ /* could be another device present with the same
+ * disk.number. Find and reject any such
+ */
+ find_reject(mdfd, st, sra, info.disk.number,
+ info.events, verbose, chosen_name);
+ err = ioctl(mdfd, ADD_NEW_DISK, &disk);
+ }
+ if (err < 0) {
+ fprintf(stderr, Name ": failed to add %s to %s: %s.\n",
+ devname, chosen_name, strerror(errno));
+ close(mdfd);
+ return 2;
+ }
+ }
+ /* 6/ Make sure /var/run/mdadm.map contains this array. */
+ map_update(&map, devnum,
+ info.array.major_version,
+ info.array.minor_version,
+ info.uuid, chosen_name);
+
+ /* 7/ Is there enough devices to possibly start the array? */
+ /* 7a/ if not, finish with success. */
+ active_disks = count_active(st, mdfd, &avail, &info);
+ if (enough(info.array.level, info.array.raid_disks,
+ info.array.layout, info.array.state & 1,
+ avail, active_disks) == 0) {
+ free(avail);
+ if (verbose >= 0)
+ fprintf(stderr, Name
+ ": %s attached to %s, not enough to start (%d).\n",
+ devname, chosen_name, active_disks);
+ close(mdfd);
+ return 0;
+ }
+ free(avail);
+
+ /* 7b/ if yes, */
+ /* - if number of OK devices match expected, or -R and there */
+ /* are enough, */
+ /* + add any bitmap file */
+ /* + start the array (auto-readonly). */
+{
+ mdu_array_info_t ainf;
+
+ if (ioctl(mdfd, GET_ARRAY_INFO, &ainf) == 0) {
+ if (verbose >= 0)
+ fprintf(stderr, Name
+ ": %s attached to %s which is already active.\n",
+ devname, chosen_name);
+ close (mdfd);
+ return 0;
+ }
+}
+ if (runstop > 0 || active_disks >= info.array.working_disks) {
+ struct sysarray *sra;
+ /* Let's try to start it */
+ if (match && match->bitmap_file) {
+ int bmfd = open(match->bitmap_file, O_RDWR);
+ if (bmfd < 0) {
+ fprintf(stderr, Name
+ ": Could not open bitmap file %s.\n",
+ match->bitmap_file);
+ close(mdfd);
+ return 1;
+ }
+ if (ioctl(mdfd, SET_BITMAP_FILE, bmfd) != 0) {
+ close(bmfd);
+ fprintf(stderr, Name
+ ": Failed to set bitmapfile for %s.\n",
+ chosen_name);
+ close(mdfd);
+ return 1;
+ }
+ close(bmfd);
+ }
+ sra = sysfs_read(mdfd, devnum, 0);
+ if (sra == NULL || active_disks >= info.array.working_disks)
+ rv = ioctl(mdfd, RUN_ARRAY, NULL);
+ else
+ rv = sysfs_set_str(sra, NULL,
+ "array_state", "read-auto");
+ if (rv == 0) {
+ if (verbose >= 0)
+ fprintf(stderr, Name
+ ": %s attached to %s, which has been started.\n",
+ devname, chosen_name);
+ rv = 0;
+ } else {
+ fprintf(stderr, Name
+ ": %s attached to %s, but failed to start: %s.\n",
+ devname, chosen_name, strerror(errno));
+ rv = 1;
+ }
+ } else {
+ if (verbose >= 0)
+ fprintf(stderr, Name
+ ": %s attached to %s, not enough to start safely.\n",
+ devname, chosen_name);
+ rv = 0;
+ }
+ close(mdfd);
+ return rv;
+}
+
+static void find_reject(int mdfd, struct supertype *st, struct sysarray *sra,
+ int number, __u64 events, int verbose,
+ char *array_name)
+{
+ /* Find an device attached to this array with a disk.number of number
+ * and events less than the passed events, and remove the device.
+ */
+ struct sysdev *d;
+ mdu_array_info_t ra;
+
+ if (ioctl(mdfd, GET_ARRAY_INFO, &ra) == 0)
+ return; /* not safe to remove from active arrays
+ * without thinking more */
+
+ for (d = sra->devs; d ; d = d->next) {
+ char dn[10];
+ int dfd;
+ void *super;
+ struct mdinfo info;
+ sprintf(dn, "%d:%d", d->major, d->minor);
+ dfd = dev_open(dn, O_RDONLY);
+ if (dfd < 0)
+ continue;
+ if (st->ss->load_super(st, dfd, &super, NULL)) {
+ close(dfd);
+ continue;
+ }
+ st->ss->getinfo_super(&info, super);
+ free(super);
+ close(dfd);
+
+ if (info.disk.number != number ||
+ info.events >= events)
+ continue;
+
+ if (d->role > -1)
+ sysfs_set_str(sra, d, "slot", "none");
+ if (sysfs_set_str(sra, d, "state", "remove") == 0)
+ if (verbose >= 0)
+ fprintf(stderr, Name
+ ": removing old device %s from %s\n",
+ d->name+4, array_name);
+ }
+}
+
+static int count_active(struct supertype *st, int mdfd, char **availp,
+ struct mdinfo *bestinfo)
+{
+ /* count how many devices in sra think they are active */
+ struct sysdev *d;
+ int cnt = 0, cnt1 = 0;
+ __u64 max_events = 0;
+ void *best_super = NULL;
+ struct sysarray *sra = sysfs_read(mdfd, -1, GET_DEVS | GET_STATE);
+ char *avail = NULL;
+
+ for (d = sra->devs ; d ; d = d->next) {
+ char dn[30];
+ int dfd;
+ void *super;
+ int ok;
+ struct mdinfo info;
+
+ sprintf(dn, "%d:%d", d->major, d->minor);
+ dfd = dev_open(dn, O_RDONLY);
+ if (dfd < 0)
+ continue;
+ ok = st->ss->load_super(st, dfd, &super, NULL);
+ close(dfd);
+ if (ok != 0)
+ continue;
+ st->ss->getinfo_super(&info, super);
+ if (info.disk.state & (1<<MD_DISK_SYNC))
+ {
+ if (avail == NULL) {
+ avail = malloc(info.array.raid_disks);
+ memset(avail, 0, info.array.raid_disks);
+ }
+ if (cnt == 0) {
+ cnt++;
+ max_events = info.events;
+ avail[info.disk.raid_disk] = 2;
+ best_super = super; super = NULL;
+ } else if (info.events == max_events) {
+ cnt++;
+ avail[info.disk.raid_disk] = 2;
+ } else if (info.events == max_events-1) {
+ cnt1++;
+ avail[info.disk.raid_disk] = 1;
+ } else if (info.events < max_events - 1)
+ ;
+ else if (info.events == max_events+1) {
+ int i;
+ cnt1 = cnt;
+ cnt = 1;
+ max_events = info.events;
+ for (i=0; i<info.array.raid_disks; i++)
+ if (avail[i])
+ avail[i]--;
+ avail[info.disk.raid_disk] = 2;
+ free(best_super);
+ best_super = super;
+ super = NULL;
+ } else { /* info.events much bigger */
+ cnt = 1; cnt1 = 0;
+ memset(avail, 0, info.disk.raid_disk);
+ max_events = info.events;
+ free(best_super);
+ best_super = super;
+ super = NULL;
+ }
+ }
+ if (super)
+ free(super);
+ }
+ if (best_super) {
+ st->ss->getinfo_super(bestinfo,best_super);
+ free(best_super);
+ }
+ return cnt + cnt1;
+}
+
+void RebuildMap(void)
+{
+ struct mdstat_ent *mdstat = mdstat_read(0, 0);
+ struct mdstat_ent *md;
+ struct map_ent *map = NULL;
+ int mdp = get_mdp_major();
+
+ for (md = mdstat ; md ; md = md->next) {
+ struct sysarray *sra = sysfs_read(-1, md->devnum, GET_DEVS);
+ struct sysdev *sd;
+
+ for (sd = sra->devs ; sd ; sd = sd->next) {
+ char dn[30];
+ int dfd;
+ int ok;
+ struct supertype *st;
+ char *path;
+ void *super;
+ struct mdinfo info;
+
+ sprintf(dn, "%d:%d", sd->major, sd->minor);
+ dfd = dev_open(dn, O_RDONLY);
+ if (dfd < 0)
+ continue;
+ st = guess_super(dfd);
+ if ( st == NULL)
+ ok = -1;
+ else
+ ok = st->ss->load_super(st, dfd, &super, NULL);
+ close(dfd);
+ if (ok != 0)
+ continue;
+ st->ss->getinfo_super(&info, super);
+ if (md->devnum > 0)
+ path = map_dev(MD_MAJOR, md->devnum, 0);
+ else
+ path = map_dev(mdp, (-1-md->devnum)<< 6, 0);
+ map_add(&map, md->devnum, st->ss->major,
+ st->minor_version,
+ info.uuid, path ? : "/unknown");
+ free(super);
+ break;
+ }
+ }
+ map_write(map);
+ map_free(map);
+}
+
+int IncrementalScan(int verbose)
+{
+ /* look at every device listed in the 'map' file.
+ * If one is found that is not running then:
+ * look in mdadm.conf for bitmap file.
+ * if one exists, but array has none, add it.
+ * try to start array in auto-readonly mode
+ */
+ struct map_ent *mapl = NULL;
+ struct map_ent *me;
+ mddev_ident_t devs, mddev;
+ int rv = 0;
+
+ map_read(&mapl);
+ devs = conf_get_ident(NULL);
+
+ for (me = mapl ; me ; me = me->next) {
+ char path[1024];
+ mdu_array_info_t array;
+ mdu_bitmap_file_t bmf;
+ struct sysarray *sra;
+ int mdfd = open_mddev_devnum(me->path, me->devnum, NULL, path);
+ if (mdfd < 0)
+ continue;
+ if (ioctl(mdfd, GET_ARRAY_INFO, &array) == 0 ||
+ errno != ENODEV) {
+ close(mdfd);
+ continue;
+ }
+ /* Ok, we can try this one. Maybe it needs a bitmap */
+ for (mddev = devs ; mddev ; mddev = mddev->next)
+ if (strcmp(mddev->devname, me->path) == 0)
+ break;
+ if (mddev && mddev->bitmap_file) {
+ /*
+ * Note: early kernels will wrongly fail this, so it
+ * is a hint only
+ */
+ int added = -1;
+ if (ioctl(mdfd, GET_ARRAY_INFO, &bmf) < 0) {
+ int bmfd = open(mddev->bitmap_file, O_RDWR);
+ if (bmfd >= 0) {
+ added = ioctl(mdfd, SET_BITMAP_FILE,
+ bmfd);
+ close(bmfd);
+ }
+ }
+ if (verbose >= 0) {
+ if (added == 0)
+ fprintf(stderr, Name
+ ": Added bitmap %s to %s\n",
+ mddev->bitmap_file, me->path);
+ else if (errno != EEXIST)
+ fprintf(stderr, Name
+ ": Failed to add bitmap to %s: %s\n",
+ me->path, strerror(errno));
+ }
+ }
+ sra = sysfs_read(mdfd, 0, 0);
+ if (sra) {
+ if (sysfs_set_str(sra, NULL,
+ "array_state", "read-auto") == 0) {
+ if (verbose >= 0)
+ fprintf(stderr, Name
+ ": started array %s\n",
+ me->path);
+ } else {
+ fprintf(stderr, Name
+ ": failed to start array %s: %s\n",
+ me->path, strerror(errno));
+ rv = 1;
+ }
+ }
+ }
+ return rv;
+}
diff --git a/Makefile b/Makefile
index 79aa88f..112f3da 100644
--- a/Makefile
+++ b/Makefile
@@ -68,10 +68,14 @@ MAN8DIR = $(MANDIR)/man8
OBJS = mdadm.o config.o mdstat.o ReadMe.o util.o Manage.o Assemble.o Build.o \
Create.o Detail.o Examine.o Grow.o Monitor.o dlink.o Kill.o Query.o \
- mdopen.o super0.o super1.o bitmap.o restripe.o sysfs.o sha1.o
+ Incremental.o \
+ mdopen.o super0.o super1.o bitmap.o restripe.o sysfs.o sha1.o \
+ mapfile.o
SRCS = mdadm.c config.c mdstat.c ReadMe.c util.c Manage.c Assemble.c Build.c \
Create.c Detail.c Examine.c Grow.c Monitor.c dlink.c Kill.c Query.c \
- mdopen.c super0.c super1.c bitmap.c restripe.c sysfs.c sha1.c
+ Incremental.c \
+ mdopen.c super0.c super1.c bitmap.c restripe.c sysfs.c sha1.c \
+ mapfile.c
STATICSRC = pwgr.c
STATICOBJS = pwgr.o
diff --git a/Manage.c b/Manage.c
index 4b5ec80..9193482 100644
--- a/Manage.c
+++ b/Manage.c
@@ -106,7 +106,11 @@ int Manage_runstop(char *devname, int fd, int runstop, int quiet)
devname, strerror(errno));
return 1;
}
+ if (quiet <= 0)
+ fprintf(stderr, Name ": started %s\n", devname);
} else if (runstop < 0){
+ struct map_ent *map = NULL;
+ struct stat stb;
if (ioctl(fd, STOP_ARRAY, NULL)) {
if (quiet==0)
fprintf(stderr, Name ": fail to stop array %s: %s\n",
@@ -115,6 +119,16 @@ int Manage_runstop(char *devname, int fd, int runstop, int quiet)
}
if (quiet <= 0)
fprintf(stderr, Name ": stopped %s\n", devname);
+ if (fstat(fd, &stb) == 0) {
+ int devnum;
+ if (major(stb.st_rdev) == MD_MAJOR)
+ devnum = minor(stb.st_rdev);
+ else
+ devnum = -1-(minor(stb.st_rdev)>>6);
+ map_delete(&map, devnum);
+ map_write(map);
+ map_free(map);
+ }
}
return 0;
}
diff --git a/Monitor.c b/Monitor.c
index 6a4c8de..213e58d 100644
--- a/Monitor.c
+++ b/Monitor.c
@@ -601,7 +601,7 @@ int Wait(char *dev)
if (major(stb.st_rdev) == MD_MAJOR)
devnum = minor(stb.st_rdev);
else
- devnum = -minor(stb.st_rdev)/16;
+ devnum = -1-(minor(stb.st_rdev)/64);
while(1) {
struct mdstat_ent *ms = mdstat_read(1, 0);
diff --git a/ReadMe.c b/ReadMe.c
index 739b366..7e39c85 100644
--- a/ReadMe.c
+++ b/ReadMe.c
@@ -91,8 +91,9 @@ char Version[] = Name " - v2.5.6 - 9 November 2006\n";
* At the time if writing, there is only minimal support.
*/
-char short_options[]="-ABCDEFGQhVXWvqbc:i:l:p:m:n:x:u:c:d:z:U:sarfRSow1tye:";
-char short_bitmap_auto_options[]="-ABCDEFGQhVXWvqb:c:i:l:p:m:n:x:u:c:d:z:U:sa:rfRSow1tye:";
+char short_options[]="-ABCDEFGIQhVXWvqbc:i:l:p:m:n:x:u:c:d:z:U:sarfRSow1tye:";
+char short_bitmap_auto_options[]=
+ "-ABCDEFGIQhVXWvqb:c:i:l:p:m:n:x:u:c:d:z:U:sa:rfRSow1tye:";
struct option long_options[] = {
{"manage", 0, 0, '@'},
@@ -104,6 +105,7 @@ struct option long_options[] = {
{"examine", 0, 0, 'E'},
{"follow", 0, 0, 'F'},
{"grow", 0, 0, 'G'},
+ {"incremental",0,0, 'I'},
{"zero-superblock", 0, 0, 'K'}, /* deliberately no a short_option */
{"query", 0, 0, 'Q'},
{"examine-bitmap", 0, 0, 'X'},
@@ -179,7 +181,9 @@ struct option long_options[] = {
{"syslog", 0, 0, 'y'},
/* For Grow */
{"backup-file", 1,0, BackupFile},
-
+
+ /* For Incremental */
+ {"rebuild-map", 0, 0, 'r'},
{0, 0, 0, 0}
};
@@ -201,6 +205,10 @@ char Help[] =
" make changes to an existing array.\n"
" mdadm --misc options... devices\n"
" report on or modify various md related devices.\n"
+" mdadm --grow options device\n"
+" resize/reshape an active array\n"
+" mdadm --incremental device\n"
+" add a device to an array as appropriate\n"
" mdadm --monitor options...\n"
" Monitor one or more array for significant changes.\n"
" mdadm device options...\n"
@@ -240,6 +248,8 @@ char OptionHelp[] =
" --examine -E : Examine superblock on an array component\n"
" --examine-bitmap -X: Display the detail of a bitmap file\n"
" --monitor -F : monitor (follow) some arrays\n"
+" --grow -G : resize/ reshape and array\n"
+" --incremental -I : add a single device to an array as appropriate\n"
" --query -Q : Display general information about how a\n"
" device relates to the md driver\n"
;
@@ -506,7 +516,22 @@ char Help_grow[] =
" : array.\n"
;
-
+char Help_incr[] =
+"Usage: mdadm --incremental [-Rqrs] device\n"
+"\n"
+"This usage allows for incremental assembly of md arrays. Devices can be\n"
+"added one at a time as they are discovered. Once an array has all expected\n"
+"devices, it will be started.\n"
+"\n"
+"Options that are valid with incremental assembly (-I --incremental) more are:\n"
+" --run -R : run arrays as soon as a minimal number of devices are\n"
+" : present rather than waiting for all expected.\n"
+" --quiet -q : Don't print any information messages, just errors.\n"
+" --rebuild -r : Rebuild the 'map' file that mdadm uses for tracking\n"
+" : partial arrays.\n"
+" --scan -s : Use with -R to start any arrays that have the minimal\n"
+" : required number of devices, but are not yet started.\n"
+;
char Help_config[] =
"The /etc/mdadm.conf config file:\n\n"
@@ -590,6 +615,7 @@ mapping_t modes[] = {
{ "misc", MISC},
{ "monitor", MONITOR},
{ "grow", GROW},
+ { "incremental", INCREMENTAL},
};
mapping_t faultylayout[] = {
diff --git a/config.c b/config.c
index 7101c3b..73031b7 100644
--- a/config.c
+++ b/config.c
@@ -86,7 +86,7 @@ char *keywords[] = {
[Mailaddr] = "mailaddr",
[Mailfrom] = "mailfrom",
[Program] = "program",
- [CreateDev] = "create",
+ [CreateDev]= "create",
[Homehost] = "homehost",
[LTEnd] = NULL
};
@@ -747,6 +747,22 @@ mddev_dev_t conf_get_devs()
return dlist;
}
+int conf_test_dev(char *devname)
+{
+ struct conf_dev *cd;
+ if (cdevlist == NULL)
+ /* allow anything by default */
+ return 1;
+ for (cd = cdevlist ; cd ; cd = cd->next) {
+ if (strcasecmp(cd->name, "partitions") == 0)
+ return 1;
+ if (fnmatch(cd->name, devname, FNM_PATHNAME) == 0)
+ return 1;
+ }
+ return 0;
+}
+
+
int match_oneof(char *devices, char *devname)
{
/* check if one of the comma separated patterns in devices
diff --git a/kernel-patch-2.6.18 b/kernel-patch-2.6.18
new file mode 100644
index 0000000..87496ea
--- /dev/null
+++ b/kernel-patch-2.6.18
@@ -0,0 +1,35 @@
+
+### Diffstat output
+ ./drivers/md/md.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff .prev/drivers/md/md.c ./drivers/md/md.c
+--- .prev/drivers/md/md.c 2006-10-23 10:26:37.000000000 +1000
++++ ./drivers/md/md.c 2006-12-21 16:28:29.000000000 +1100
+@@ -1783,7 +1783,8 @@ state_store(mdk_rdev_t *rdev, const char
+ else {
+ mddev_t *mddev = rdev->mddev;
+ kick_rdev_from_array(rdev);
+- md_update_sb(mddev);
++ if (mddev->pers)
++ md_update_sb(mddev);
+ md_new_event(mddev);
+ err = 0;
+ }
+@@ -1994,6 +1995,8 @@ static mdk_rdev_t *md_import_device(dev_
+ kobject_init(&rdev->kobj);
+
+ rdev->desc_nr = -1;
++ rdev->saved_raid_disk = -1;
++ rdev->raid_disk = -1;
+ rdev->flags = 0;
+ rdev->data_offset = 0;
+ rdev->sb_events = 0;
+@@ -3991,6 +3994,7 @@ static int set_array_info(mddev_t * mdde
+ mddev->major_version = info->major_version;
+ mddev->minor_version = info->minor_version;
+ mddev->patch_version = info->patch_version;
++ mddev->persistent = ! info->not_persistent;
+ return 0;
+ }
+ mddev->major_version = MD_MAJOR_VERSION;
diff --git a/kernel-patch-2.6.18.6 b/kernel-patch-2.6.18.6
new file mode 100644
index 0000000..e702e14
--- /dev/null
+++ b/kernel-patch-2.6.18.6
@@ -0,0 +1,35 @@
+Signed-off-by: Neil Brown <neilb@suse.de>
+
+### Diffstat output
+ ./drivers/md/md.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff .prev/drivers/md/md.c ./drivers/md/md.c
+--- .prev/drivers/md/md.c 2006-12-21 17:08:23.000000000 +1100
++++ ./drivers/md/md.c 2006-12-21 17:08:26.000000000 +1100
+@@ -1783,7 +1783,8 @@ state_store(mdk_rdev_t *rdev, const char
+ else {
+ mddev_t *mddev = rdev->mddev;
+ kick_rdev_from_array(rdev);
+- md_update_sb(mddev);
++ if (mddev->pers)
++ md_update_sb(mddev);
+ md_new_event(mddev);
+ err = 0;
+ }
+@@ -1995,6 +1996,7 @@ static mdk_rdev_t *md_import_device(dev_
+
+ rdev->desc_nr = -1;
+ rdev->saved_raid_disk = -1;
++ rdev->raid_disk = -1;
+ rdev->flags = 0;
+ rdev->data_offset = 0;
+ rdev->sb_events = 0;
+@@ -3993,6 +3995,7 @@ static int set_array_info(mddev_t * mdde
+ mddev->major_version = info->major_version;
+ mddev->minor_version = info->minor_version;
+ mddev->patch_version = info->patch_version;
++ mddev->persistent = ! info->not_persistent;
+ return 0;
+ }
+ mddev->major_version = MD_MAJOR_VERSION;
diff --git a/kernel-patch-2.6.19 b/kernel-patch-2.6.19
new file mode 100644
index 0000000..22a67a3
--- /dev/null
+++ b/kernel-patch-2.6.19
@@ -0,0 +1,34 @@
+
+### Diffstat output
+ ./drivers/md/md.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff .prev/drivers/md/md.c ./drivers/md/md.c
+--- .prev/drivers/md/md.c 2006-12-21 15:55:01.000000000 +1100
++++ ./drivers/md/md.c 2006-12-21 16:28:09.000000000 +1100
+@@ -1792,7 +1792,8 @@ state_store(mdk_rdev_t *rdev, const char
+ else {
+ mddev_t *mddev = rdev->mddev;
+ kick_rdev_from_array(rdev);
+- md_update_sb(mddev, 1);
++ if (mddev->pers)
++ md_update_sb(mddev, 1);
+ md_new_event(mddev);
+ err = 0;
+ }
+@@ -2004,6 +2005,7 @@ static mdk_rdev_t *md_import_device(dev_
+
+ rdev->desc_nr = -1;
+ rdev->saved_raid_disk = -1;
++ rdev->raid_disk = -1;
+ rdev->flags = 0;
+ rdev->data_offset = 0;
+ rdev->sb_events = 0;
+@@ -3977,6 +3979,7 @@ static int set_array_info(mddev_t * mdde
+ mddev->major_version = info->major_version;
+ mddev->minor_version = info->minor_version;
+ mddev->patch_version = info->patch_version;
++ mddev->persistent = ! info->not_persistent;
+ return 0;
+ }
+ mddev->major_version = MD_MAJOR_VERSION;
diff --git a/mapfile.c b/mapfile.c
new file mode 100644
index 0000000..746073d
--- /dev/null
+++ b/mapfile.c
@@ -0,0 +1,197 @@
+/*
+ * mapfile - manage /var/run/mdadm.map. Part of:
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2006 Neil Brown <neilb@suse.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Neil Brown
+ * Email: <neilb@suse.de>
+ * Paper: Neil Brown
+ * Novell Inc
+ * GPO Box Q1283
+ * QVB Post Office, NSW 1230
+ * Australia
+ */
+
+/* /var/run/mdadm.map is used to track arrays being created in --incremental
+ * more. It particularly allows lookup from UUID to array device, but
+ * also allows the array device name to be easily found.
+ *
+ * The map file is line based with space separated fields. The fields are:
+ * Device id - mdX or mdpX where is a number.
+ * metadata - 0.90 1.0 1.1 1.2
+ * UUID - uuid of the array
+ * path - path where device created: /dev/md/home
+ *
+ */
+
+
+#include "mdadm.h"
+
+
+int map_write(struct map_ent *mel)
+{
+ FILE *f;
+ int err;
+ int subdir = 1;
+
+ f = fopen("/var/run/mdadm/map.new", "w");
+ if (!f) {
+ f = fopen("/var/run/mdadm.map.new", "w");
+ subdir = 1;
+ }
+ if (!f)
+ return 0;
+ while (mel) {
+ if (mel->devnum < 0)
+ fprintf(f, "mdp%d ", -1-mel->devnum);
+ else
+ fprintf(f, "md%d ", mel->devnum);
+ fprintf(f, "%d.%d ", mel->major, mel->minor);
+ fprintf(f, "%08x:%08x:%08x:%08x ", mel->uuid[0],
+ mel->uuid[1], mel->uuid[2], mel->uuid[3]);
+ fprintf(f, "%s\n", mel->path);
+ mel = mel->next;
+ }
+ fflush(f);
+ err = ferror(f);
+ fclose(f);
+ if (err) {
+ if (subdir)
+ unlink("/var/run/mdadm/map.new");
+ else
+ unlink("/var/run/mdadm.map.new");
+ return 0;
+ }
+ if (subdir)
+ return rename("/var/run/mdadm/map.new",
+ "/var/run/mdadm/map") == 0;
+ else
+ return rename("/var/run/mdadm.map.new",
+ "/var/run/mdadm.map") == 0;
+}
+
+void map_add(struct map_ent **melp,
+ int devnum, int major, int minor, int uuid[4], char *path)
+{
+ struct map_ent *me = malloc(sizeof(*me));
+
+ me->devnum = devnum;
+ me->major = major;
+ me->minor = minor;
+ memcpy(me->uuid, uuid, 16);
+ me->path = strdup(path);
+ me->next = *melp;
+ *melp = me;
+}
+
+void map_read(struct map_ent **melp)
+{
+ FILE *f;
+ char buf[8192];
+ char path[200];
+ int devnum, major, minor, uuid[4];
+ char nam[4];
+
+ *melp = NULL;
+
+ f = fopen("/var/run/mdadm/map", "r");
+ if (!f)
+ f = fopen("/var/run/mdadm.map", "r");
+ if (!f)
+ return;
+
+ while (fgets(buf, sizeof(buf), f)) {
+ if (sscanf(buf, " md%1[p]%d %d.%d %x:%x:%x:%x %200s",
+ nam, &devnum, &major, &minor, uuid, uuid+1,
+ uuid+2, uuid+3, path) == 9) {
+ if (nam[0] == 'p')
+ devnum = -1 - devnum;
+ map_add(melp, devnum, major, minor, uuid, path);
+ }
+ }
+ fclose(f);
+}
+
+void map_free(struct map_ent *map)
+{
+ while (map) {
+ struct map_ent *mp = map;
+ map = mp->next;
+ free(mp->path);
+ free(mp);
+ }
+}
+
+int map_update(struct map_ent **mpp, int devnum, int major, int minor,
+ int *uuid, char *path)
+{
+ struct map_ent *map, *mp;
+ int rv;
+
+ if (mpp && *mpp)
+ map = *mpp;
+ else
+ map_read(&map);
+
+ for (mp = map ; mp ; mp=mp->next)
+ if (mp->devnum == devnum) {
+ mp->major = major;
+ mp->minor = minor;
+ memcpy(mp->uuid, uuid, 16);
+ free(mp->path);
+ mp->path = strdup(path);
+ break;
+ }
+ if (!mp)
+ map_add(&map, devnum, major, minor, uuid, path);
+ *mpp = NULL;
+ rv = map_write(map);
+ map_free(map);
+ return rv;
+}
+
+void map_delete(struct map_ent **mapp, int devnum)
+{
+ struct map_ent *mp;
+
+ if (*mapp == NULL)
+ map_read(mapp);
+
+ for (mp = *mapp; mp; mp = *mapp) {
+ if (mp->devnum == devnum) {
+ *mapp = mp->next;
+ free(mp->path);
+ free(mp);
+ } else
+ mapp = & mp->next;
+ }
+}
+
+struct map_ent *map_by_uuid(struct map_ent **map, int uuid[4])
+{
+ struct map_ent *mp;
+ if (!*map)
+ map_read(map);
+
+ for (mp = *map ; mp ; mp = mp->next)
+ if (memcmp(uuid, mp->uuid, 16) == 0)
+ return mp;
+ return NULL;
+
+}
diff --git a/mdadm.8 b/mdadm.8
index 6160462..c00f5fa 100644
--- a/mdadm.8
+++ b/mdadm.8
@@ -88,7 +88,7 @@ provides a layer over a true device that can be used to inject faults.
'''with a different format and a different purpose.
.SH MODES
-mdadm has 7 major modes of operation:
+mdadm has several major modes of operation:
.TP
.B Assemble
Assemble the parts of a previously created
@@ -132,6 +132,16 @@ of component devices in RAID level 1/4/5/6 and changing the number of
active devices in RAID1.
.TP
+.B "Incremental Assembly"
+Add a single device to an appropriate array. If the addition of the
+device makes the array runnable, the array will be started.
+This provides a convenient interface to a
+.I hot-plug
+system. As each device is detected,
+.I mdadm
+has a chance to include it in some array as appropriate.
+
+.TP
.B Manage
This is for doing things to specific components of an array such as
adding new spares and removing faulty devices.
@@ -169,6 +179,11 @@ mode.
.TP
.BR -G ", " --grow
Change the size or shape of an active array.
+
+.TP
+.BE -I ", " --incremental
+Add a single device into an appropriate array, and possibly start the array.
+
.P
If a device is given before any options, or if the first option is
.BR --add ,
@@ -939,6 +954,32 @@ activity to finish before returning.
will return with success if it actually waited for every device
listed, otherwise it will return failure.
+.SH For Incremental Assembly mode:
+.TP
+.BR --rebuild-map ", " -r
+Rebuild the map file
+.RB ( /var/run/mdadm/map )
+that
+.I mdadm
+uses to help track which arrays are currently being assembled.
+
+.TP
+.BR --run ", " -R
+Run any array assembled as soon as a minimal number of devices are
+available, rather than waiting until all expected devices are present.
+
+.TP
+.BR --scan ", " -s
+Only meaningful with
+.B -R
+this will scan the
+.B map
+file for arrays that are being incrementally assembled and will try to
+start any that are not already started. If any such array is listed
+in
+.B mdadm.conf
+as requiring an external bitmap, that bitmap will be attached first.
+
.SH For Monitor mode:
.TP
.BR -m ", " --mail
@@ -1680,6 +1721,153 @@ can be added. Note that if you add a bitmap stored in a file which is
in a filesystem that is on the raid array being affected, the system
will deadlock. The bitmap must be on a separate filesystem.
+.SH INCREMENTAL MODE
+
+.HP 12
+Usage:
+.B mdadm --incremental
+.RB [ --run ]
+.RB [ --quiet ]
+.I component-device
+.HP 12
+Usage:
+.B mdadm --incremental --rebuild
+.HP 12
+Usage:
+.B mdadm --incremental --run --scan
+
+
+.PP
+This mode is designed to be used in conjunction with a device
+discovery system. As devices are found in a system, they can be
+passed to
+.B "mdadm --incremental"
+to be conditionally added to an appropriate array.
+
+.I mdadm
+performs a number of tests to determine if the device is part of an
+array, and which array is should be part of. If an appropriate array
+is found, or can be created,
+.I mdadm
+adds the device to the array and conditionally starts the array.
+
+Note that
+.I mdadm
+will only add devices to an array which were previously working
+(active or spare) parts of that array. It does not currently support
+automatic inclusion of a new drive as a spare in some array.
+
+.B "mdadm --incremental"
+requires a bug present in all kernels through 2.6.19, to be fixed.
+Hopefully this will be fixed in 2.6.20. Alternately apply the patch
+which is included with the mdadm source distribution. If
+.I mdadm
+detects that this bug is present, it will abort any attempt to use
+.BR --incremental .
+
+The tests that
+.I mdadm
+makes are as follow:
+.IP +
+Is the device permitted by
+.BR mdadm.conf ?
+That is, is it listed in a
+.B DEVICES
+line in that file. If
+.B DEVICES
+is absent then the default it to allow any device. Similar if
+.B DEVICES
+contains the special word
+.B partitions
+then any device is allowed. Otherwise the device name given to
+.I mdadm
+must match one of the names or patterns in a
+.B DEVICES
+line.
+
+.IP +
+Does the device have a valid md superblock. If a specific metadata
+version is request with
+.B --metadata
+or
+.B -e
+then only that style of metadata is accepted, otherwise
+.I mdadm
+finds any known version of metadata. If no
+.I md
+metadata is found, the device is rejected.
+
+.IP +
+Does the metadata match an expected array?
+The metadata can match in two ways. Either there is an array listed
+in
+.B mdadm.conf
+which identifies the array (either by UUID, by name, by device list,
+or by minor-number), the array was created with a
+.B homehost
+specified, and that
+.B homehost
+matches that which is given in
+.B mdadm.conf
+or on the command line.
+If
+.I mdadm
+is not able to positively identify the array as belonging to the
+current host, the device will be rejected.
+
+.IP +
+.I mdadm
+keeps a list of arrays that is has partly assembled in
+.B /var/run/mdadm/map
+(or
+.B /var/run/mdadm.map
+if the directory doesn't exist). If no array exists which matches
+the metadata on the new device,
+.I mdadm
+must choose a device name and unit number. It does this based on any
+name given in
+.B mdadm.conf
+or any name information stored in the metadata. If this name
+suggests a unit number, that number will be used, otherwise a free
+unit number will be chosen. Normally
+.I mdadm
+will prefer to create a partitionable array, however if the
+.B CREATE
+line in
+.B mdadm.conf
+suggests that a non-partitionable array is preferred, that will be
+honoured.
+
+.IP +
+Once an appropriate array is found or created and the device is added,
+.I mdadm
+must decide if the array is ready to be started. It will
+normally compare the number of available (non-spare) devices to the
+number of devices that the metadata suggests need to be active. If
+there are at least that many, the array will be started. This means
+that if any devices are missing the array will not be restarted.
+
+As an alternative,
+.B --run
+may be passed to
+.B mdadm
+in which case the array will be run as soon as there are enough
+devices present for the data to be accessible. For a raid1, that
+means one device will start the array. For a clean raid5, the array
+will be started as soon as all but one drive is present.
+
+Note that neither of these approaches is really ideal. If it is can
+be known that all device discovery has completed, then
+.br
+.B " mdadm -IRs"
+.br
+can be run which will try to start all arrays that are being
+incrementally assembled. They are started in "read-auto" mode in
+which they are read-only until the first write request. This means
+that no metadata updates are made and no attempt at resync or recovery
+happens. Further devices that are found before the first write can
+still be added safely.
+
.SH EXAMPLES
.B " mdadm --query /dev/name-of-device"
@@ -1755,6 +1943,16 @@ the background in monitor mode monitoring all md devices. Also write
pid of mdadm daemon to
.BR /var/run/mdadm .
+.B " mdadm -Iq /dev/somedevice"
+.br
+Try to incorporate newly discovered device into some array as
+appropriate.
+
+.B " mdadm --incremental --rebuild --run --scan"
+.br
+Rebuild the array map from any current arrays, and then start any that
+can be started.
+
.B " mdadm --create --help"
.br
Provide help about the Create mode.
@@ -1792,6 +1990,16 @@ they contain MD super block, and gives identifying information
.BR mdadm.conf (5)
for more details.
+.SS /var/run/mdadm/map
+When
+.I --incremental
+mode is used. this file gets a list of arrays currently being created.
+If
+.B /var/run/mdadm
+does not exist as a directory, then
+.B /var/run/mdadm.map
+is used instead.
+
.SH DEVICE NAMES
While entries in the /dev directory can have any format you like,
diff --git a/mdadm.c b/mdadm.c
index b5dce73..9bec295 100644
--- a/mdadm.c
+++ b/mdadm.c
@@ -101,6 +101,7 @@ int main(int argc, char *argv[])
int re_add = 0;
char *shortopt = short_options;
int dosyslog = 0;
+ int rebuild_map = 0;
int auto_update_home = 0;
int copies;
@@ -191,6 +192,7 @@ int main(int argc, char *argv[])
case 'C': newmode = CREATE; shortopt = short_bitmap_auto_options; break;
case 'F': newmode = MONITOR;break;
case 'G': newmode = GROW; shortopt = short_bitmap_auto_options; break;
+ case 'I': newmode = INCREMENTAL; break;
case '#':
case 'D':
@@ -269,6 +271,7 @@ int main(int argc, char *argv[])
case 'C':
case 'F':
case 'G':
+ case 'I':
continue;
}
if (opt == 1) {
@@ -321,6 +324,7 @@ int main(int argc, char *argv[])
case O(ASSEMBLE,AutoHomeHost):
auto_update_home = 1;
continue;
+ case O(INCREMENTAL, 'e'):
case O(CREATE,'e'):
case O(ASSEMBLE,'e'):
case O(MISC,'e'): /* set metadata (superblock) information */
@@ -628,6 +632,7 @@ int main(int argc, char *argv[])
case O(ASSEMBLE,'s'): /* scan */
case O(MISC,'s'):
case O(MONITOR,'s'):
+ case O(INCREMENTAL,'s'):
scan = 1;
continue;
@@ -702,6 +707,7 @@ int main(int argc, char *argv[])
case O(MANAGE,'f'): /* set faulty */
devmode = 'f';
continue;
+ case O(INCREMENTAL,'R'):
case O(MANAGE,'R'):
case O(ASSEMBLE,'R'):
case O(BUILD,'R'):
@@ -833,6 +839,10 @@ int main(int argc, char *argv[])
}
}
continue;
+
+ case O(INCREMENTAL, 'r'):
+ rebuild_map = 1;
+ continue;
}
/* We have now processed all the valid options. Anything else is
* an error
@@ -861,6 +871,7 @@ int main(int argc, char *argv[])
case MISC : help_text = Help_misc; break;
case MONITOR : help_text = Help_monitor; break;
case GROW : help_text = Help_grow; break;
+ case INCREMENTAL:help_text= Help_incr; break;
}
fputs(help_text,stderr);
exit(0);
@@ -1289,6 +1300,34 @@ int main(int argc, char *argv[])
} else
fprintf(stderr, Name ": no changes to --grow\n");
break;
+ case INCREMENTAL:
+ if (rebuild_map) {
+ RebuildMap();
+ }
+ if (scan) {
+ if (runstop <= 0) {
+ fprintf(stderr, Name
+ ": --incremental --scan meaningless without --run.\n");
+ break;
+ }
+ rv = IncrementalScan(verbose);
+ }
+ if (!devlist) {
+ if (!rebuild_map && !scan) {
+ fprintf(stderr, Name
+ ": --incremental requires a device.\n");
+ rv = 1;
+ }
+ break;
+ }
+ if (devlist->next) {
+ fprintf(stderr, Name
+ ": --incremental can only handle one device.\n");
+ rv = 1;
+ break;
+ }
+ rv = Incremental(devlist->devname, verbose-quiet, runstop,
+ ss, homehost, autof);
}
exit(rv);
}
diff --git a/mdadm.h b/mdadm.h
index d40d187..3831f42 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -146,6 +146,7 @@ enum mode {
MISC,
MONITOR,
GROW,
+ INCREMENTAL,
};
extern char short_options[];
@@ -153,6 +154,7 @@ extern char short_bitmap_auto_options[];
extern struct option long_options[];
extern char Version[], Usage[], Help[], OptionHelp[],
Help_create[], Help_build[], Help_assemble[], Help_grow[],
+ Help_incr[],
Help_manage[], Help_misc[], Help_monitor[], Help_config[];
/* for option that don't have short equivilents, we assign arbitrary
@@ -238,6 +240,24 @@ struct mdstat_ent {
extern struct mdstat_ent *mdstat_read(int hold, int start);
extern void free_mdstat(struct mdstat_ent *ms);
extern void mdstat_wait(int seconds);
+extern int mddev_busy(int devnum);
+
+struct map_ent {
+ struct map_ent *next;
+ int devnum;
+ int major,minor;
+ int uuid[4];
+ char *path;
+};
+extern int map_update(struct map_ent **mpp, int devnum, int major, int minor,
+ int uuid[4], char *path);
+extern struct map_ent *map_by_uuid(struct map_ent **map, int uuid[4]);
+extern void map_read(struct map_ent **melp);
+extern int map_write(struct map_ent *mel);
+extern void map_delete(struct map_ent **mapp, int devnum);
+extern void map_free(struct map_ent *map);
+extern void map_add(struct map_ent **melp,
+ int devnum, int major, int minor, int uuid[4], char *path);
/* Data structure for holding info read from sysfs */
struct sysdev {
@@ -259,6 +279,7 @@ struct sysarray {
int spares;
int cache_size;
int mismatch_cnt;
+ int major_version, minor_version;
};
/* various details can be requested */
#define GET_LEVEL 1
@@ -267,6 +288,7 @@ struct sysarray {
#define GET_CHUNK 8
#define GET_CACHE 16
#define GET_MISMATCH 32
+#define GET_VERSION 64
#define GET_DEVS 1024 /* gets role, major, minor */
#define GET_OFFSET 2048
@@ -277,6 +299,7 @@ struct sysarray {
/* If fd >= 0, get the array it is open on,
* else use devnum. >=0 -> major9. <0.....
*/
+extern void sysfs_free(struct sysarray *sra);
extern struct sysarray *sysfs_read(int fd, int devnum, unsigned long options);
extern int sysfs_set_str(struct sysarray *sra, struct sysdev *dev,
char *name, char *val);
@@ -345,6 +368,8 @@ struct supertype {
extern struct supertype *super_by_version(int vers, int minor);
extern struct supertype *guess_super(int fd);
extern int get_dev_size(int fd, char *dname, unsigned long long *sizep);
+extern void get_one_disk(int mdfd, mdu_array_info_t *ainf,
+ mdu_disk_info_t *disk);
#if __GNUC__ < 3
struct stat64;
@@ -426,6 +451,11 @@ extern int Monitor(mddev_dev_t devlist,
extern int Kill(char *dev, int force, int quiet);
extern int Wait(char *dev);
+extern int Incremental(char *devname, int verbose, int runstop,
+ struct supertype *st, char *homehost, int autof);
+extern void RebuildMap(void);
+extern int IncrementalScan(int verbose);
+
extern int CreateBitmap(char *filename, int force, char uuid[16],
unsigned long chunksize, unsigned long daemon_sleep,
unsigned long write_behind,
@@ -448,6 +478,7 @@ extern int is_standard(char *dev, int *nump);
extern int parse_auto(char *str, char *msg, int config);
extern mddev_ident_t conf_get_ident(char *dev);
extern mddev_dev_t conf_get_devs(void);
+extern int conf_test_dev(char *devname);
extern struct createinfo *conf_get_create_info(void);
extern void set_conffile(char *file);
extern char *conf_get_mailaddr(void);
@@ -479,6 +510,8 @@ extern char *get_md_name(int dev);
extern char DefaultConfFile[];
extern int open_mddev(char *dev, int autof);
+extern int open_mddev_devnum(char *devname, int devnum, char *name,
+ char *chosen_name);
#define LEVEL_MULTIPATH (-4)
diff --git a/mdopen.c b/mdopen.c
index 9f3dfb8..0b6951d 100644
--- a/mdopen.c
+++ b/mdopen.c
@@ -292,3 +292,51 @@ int open_mddev(char *dev, int autof)
return mdfd;
}
+
+int open_mddev_devnum(char *devname, int devnum, char *name, char *chosen_name)
+{
+ /* Open the md device with number 'devnum', possibly using 'devname',
+ * possibly constructing a name with 'name', but in any case, copying
+ * the name into 'chosen_name'
+ */
+ int major, minor;
+ struct stat stb;
+
+ if (devname)
+ strcpy(chosen_name, devname);
+ else if (name && strchr(name,'/') == NULL) {
+ char *n = strchr(name, ':');
+ if (n) n++; else n = name;
+ if (isdigit(*n) && devnum < 0)
+ sprintf(chosen_name, "/dev/md/d%s", n);
+ else
+ sprintf(chosen_name, "/dev/md/%s", n);
+ } else {
+ if (devnum >= 0)
+ sprintf(chosen_name, "/dev/md%d", devnum);
+ else
+ sprintf(chosen_name, "/dev/md/d%d", -1-devnum);
+ }
+ if (devnum >= 0) {
+ major = MD_MAJOR;
+ minor = devnum;
+ } else {
+ major = get_mdp_major();
+ minor = (-1-devnum) << 6;
+ }
+ if (stat(chosen_name, &stb) == 0) {
+ /* It already exists. Check it is right. */
+ if ( ! S_ISBLK(stb.st_mode) ||
+ stb.st_rdev != makedev(major, minor)) {
+ errno = EEXIST;
+ return -1;
+ }
+ } else {
+ if (mknod(chosen_name, S_IFBLK | 0600,
+ makedev(major, minor)) != 0) {
+ return -1;
+ }
+ /* FIXME chown/chmod ?? */
+ }
+ return open(chosen_name, O_RDWR);
+}
diff --git a/mdstat.c b/mdstat.c
index 5eeac6c..de31acb 100644
--- a/mdstat.c
+++ b/mdstat.c
@@ -251,3 +251,15 @@ void mdstat_wait(int seconds)
tm.tv_usec = 0;
select(mdstat_fd >2 ? mdstat_fd+1:3, NULL, NULL, &fds, &tm);
}
+
+int mddev_busy(int devnum)
+{
+ struct mdstat_ent *mdstat = mdstat_read(0, 0);
+ struct mdstat_ent *me;
+
+ for (me = mdstat ; me ; me = me->next)
+ if (me->devnum == devnum)
+ break;
+ free_mdstat(mdstat);
+ return me != NULL;
+}
diff --git a/super0.c b/super0.c
index 757d905..8f32843 100644
--- a/super0.c
+++ b/super0.c
@@ -110,6 +110,9 @@ static void examine_super0(void *sbv, char *homehost)
} else
printf(" UUID : %08x\n", sb->set_uuid0);
+ if (sb->not_persistent)
+ printf(" Eedk : not persistent\n");
+
atime = sb->ctime;
printf(" Creation Time : %.24s\n", ctime(&atime));
c=map_num(pers, sb->level);
diff --git a/sysfs.c b/sysfs.c
index 25ede6b..16744f1 100644
--- a/sysfs.c
+++ b/sysfs.c
@@ -42,6 +42,18 @@ int load_sys(char *path, char *buf)
return 0;
}
+void sysfs_free(struct sysarray *sra)
+{
+ if (!sra)
+ return;
+ while (sra->devs) {
+ struct sysdev *d = sra->devs;
+ sra->devs = d->next;
+ free(d);
+ }
+ free(sra);
+}
+
struct sysarray *sysfs_read(int fd, int devnum, unsigned long options)
{
/* Longest possible name in sysfs, mounted at /sys, is
@@ -81,6 +93,16 @@ struct sysarray *sysfs_read(int fd, int devnum, unsigned long options)
base = fname + strlen(fname);
sra->devs = NULL;
+ if (options & GET_VERSION) {
+ strcpy(base, "metadata_version");
+ if (load_sys(fname, buf))
+ goto abort;
+ if (strncmp(buf, "none", 4) == 0)
+ sra->major_version = sra->minor_version = -1;
+ else
+ sscanf(buf, "%d.%d",
+ &sra->major_version, &sra->minor_version);
+ }
if (options & GET_LEVEL) {
strcpy(base, "level");
if (load_sys(fname, buf))
@@ -144,6 +166,7 @@ struct sysarray *sysfs_read(int fd, int devnum, unsigned long options)
goto abort;
dev->next = sra->devs;
sra->devs = dev;
+ strcpy(dev->name, de->d_name);
/* Always get slot, major, minor */
strcpy(dbase, "slot");
@@ -191,12 +214,7 @@ struct sysarray *sysfs_read(int fd, int devnum, unsigned long options)
return sra;
abort:
- while (sra && sra->devs) {
- dev = sra->devs;
- sra->devs = dev->next;
- free(dev);
- }
- if(sra) free(sra);
+ sysfs_free(sra);
return NULL;
}
diff --git a/util.c b/util.c
index c21bf51..58449d5 100644
--- a/util.c
+++ b/util.c
@@ -815,6 +815,14 @@ int get_dev_size(int fd, char *dname, unsigned long long *sizep)
return 1;
}
+void get_one_disk(int mdfd, mdu_array_info_t *ainf, mdu_disk_info_t *disk)
+{
+ int d;
+ ioctl(mdfd, GET_ARRAY_INFO, ainf);
+ for (d = 0 ; d < ainf->raid_disks + ainf->nr_disks ; d++)
+ if (ioctl(mdfd, GET_DISK_INFO, disk) == 0)
+ return;
+}
#ifdef __TINYC__
/* tinyc doesn't optimize this check in ioctl.h out ... */
unsigned int __invalid_size_argument_for_IOC = 0;