summaryrefslogtreecommitdiffstats
path: root/managemon.c
diff options
context:
space:
mode:
authorNeil Brown <neilb@suse.de>2008-06-12 10:13:29 +1000
committerNeil Brown <neilb@suse.de>2008-06-12 10:13:29 +1000
commit6c3fb95c44ffc36df4048db86231521146417223 (patch)
treea718630739a8294af5df59d30cd15e36a425232e /managemon.c
parent57632f4afb3588465454187ca6ab66ffb200b677 (diff)
downloadmdadm-6c3fb95c44ffc36df4048db86231521146417223.tar.gz
mdadm-6c3fb95c44ffc36df4048db86231521146417223.tar.xz
mdadm-6c3fb95c44ffc36df4048db86231521146417223.zip
Support adding a spare to a degraded array.
When signalled by the monitor, the manager will find spares and add them to the array and initiate a recovery.
Diffstat (limited to 'managemon.c')
-rw-r--r--managemon.c84
1 files changed, 80 insertions, 4 deletions
diff --git a/managemon.c b/managemon.c
index 167d176..7a96f36 100644
--- a/managemon.c
+++ b/managemon.c
@@ -26,7 +26,18 @@
* - When a device is added to the container, we add it to the metadata
* as a spare.
*
- * - assist with activating spares by opening relevant sysfs file.
+ * - Deal with degraded array
+ * We only do this when first noticing the array is degraded.
+ * This can be when we first see the array, when sync completes or
+ * when recovery completes.
+ *
+ * Check if number of failed devices suggests recovery is needed, and
+ * skip if not.
+ * Ask metadata to allocate a spare device
+ * Add device as not in_sync and give a role
+ * Update metadata.
+ * Open sysfs files and pass to monitor.
+ * Make sure that monitor Starts recovery....
*
* - Pass on metadata updates from external programs such as
* mdadm creating a new array.
@@ -104,6 +115,32 @@ static void free_aa(struct active_array *aa)
free(aa);
}
+static struct active_array *duplicate_aa(struct active_array *aa)
+{
+ struct active_array *newa = malloc(sizeof(*newa));
+ struct mdinfo **dp1, **dp2;
+
+ *newa = *aa;
+ newa->next = NULL;
+ newa->replaces = NULL;
+ newa->info.next = NULL;
+
+ dp2 = &newa->info.devs;
+
+ for (dp1 = &aa->info.devs; *dp1; dp1 = &(*dp1)->next) {
+ struct mdinfo *d;
+ if ((*dp1)->state_fd < 0)
+ continue;
+
+ d = malloc(sizeof(*d));
+ *d = **dp1;
+ *dp2 = d;
+ dp2 = & d->next;
+ }
+
+ return newa;
+}
+
static void write_wakeup(struct supertype *c)
{
static struct md_generic_cmd cmd = { .action = md_action_ping_monitor };
@@ -171,7 +208,7 @@ void check_update_queue(struct supertype *container)
}
}
-void queue_metadata_update(struct metadata_update *mu)
+static void queue_metadata_update(struct metadata_update *mu)
{
struct metadata_update **qp;
@@ -198,7 +235,6 @@ static void manage_container(struct mdstat_ent *mdstat,
* array ignoring any metadata on it.
* FIXME should we look for compatible metadata and take hints
* about spare assignment.... probably not.
- *
*/
if (mdstat->devcnt != container->devcnt) {
/* read /sys/block/NAME/md/dev-??/block/dev to find out
@@ -222,12 +258,52 @@ static void manage_member(struct mdstat_ent *mdstat,
* being requested.
* Unfortunately decreases in raid_disks don't show up in
* mdstat until the reshape completes FIXME.
+ *
+ * Actually, we also want to handle degraded arrays here by
+ * trying to find and assign a spare.
+ * We do that whenever the monitor tells us too.
*/
// FIXME
a->info.array.raid_disks = mdstat->raid_disks;
a->info.array.chunk_size = mdstat->chunk_size;
// MORE
+ if (a->check_degraded) {
+ struct metadata_update *updates = NULL;
+ struct mdinfo *newdev;
+ struct active_array *newa;
+ wait_update_handled();
+ a->check_degraded = 0;
+
+ /* The array may not be degraded, this is just a good time
+ * to check.
+ */
+ newdev = a->container->ss->activate_spare(a, &updates);
+ if (newdev) {
+ struct mdinfo *d;
+ /* Cool, we can add a device or several. */
+ newa = duplicate_aa(a);
+ /* suspend recovery - maybe not needed */
+
+ /* Add device to array and set offset/size/slot.
+ * and open files for each newdev */
+ for (d = newdev; d ; d = d->next) {
+ struct mdinfo *newd;
+ if (sysfs_add_disk(&newa->info, d))
+ continue;
+ newd = newa->info.devs;
+ newd->state_fd = sysfs_open(a->devnum,
+ newd->sys_name,
+ "state");
+ newd->prev_state
+ = read_dev_state(newd->state_fd);
+ newd->curr_state = newd->prev_state;
+ }
+ queue_metadata_update(updates);
+ replace_array(a->container, a, newa);
+ sysfs_set_str(&a->info, NULL, "sync_action", "repair");
+ }
+ }
}
static void manage_new(struct mdstat_ent *mdstat,
@@ -289,7 +365,7 @@ static void manage_new(struct mdstat_ent *mdstat,
"state");
newd->prev_state = read_dev_state(newd->state_fd);
- newd->curr_state = newd->curr_state;
+ newd->curr_state = newd->prev_state;
} else {
newd->state_fd = -1;
}