summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2008-05-15 16:48:51 +1000
committerNeil Brown <neilb@suse.de>2008-05-15 16:48:51 +1000
commit0af73f61a25904edc7da24e2da9786b48bb8bec6 (patch)
treedfed2b1b3b6f2f7a7e8eee71a3a10a3448d848f2
parent8d45d1969bc299040201df82c51f7fbbc985c401 (diff)
downloadmdadm-0af73f61a25904edc7da24e2da9786b48bb8bec6.tar.gz
mdadm-0af73f61a25904edc7da24e2da9786b48bb8bec6.tar.xz
mdadm-0af73f61a25904edc7da24e2da9786b48bb8bec6.zip
when failures happen they should be propagated to all member arrays
From: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
-rw-r--r--monitor.c46
1 files changed, 43 insertions, 3 deletions
diff --git a/monitor.c b/monitor.c
index 98d0219..e941a1a 100644
--- a/monitor.c
+++ b/monitor.c
@@ -175,7 +175,9 @@ int read_dev_state(int fd)
* detected by rd-N/state reporting "faulty"
* mark device as 'failed' in metadata, let the kernel release the
* device by writing '-blocked' to rd/state, and finally write 'remove' to
- * rd/state
+ * rd/state. Before a disk can be replaced it must be failed and removed
+ * from all container members, this will be preemptive for the other
+ * arrays... safe?
*
* sync completes
* sync_action was 'resync' and becomes 'idle' and resync_start becomes
@@ -346,19 +348,47 @@ static int read_and_act(struct active_array *a)
return 1;
}
+static struct mdinfo *
+find_device(struct active_array *a, int major, int minor)
+{
+ struct mdinfo *mdi;
+
+ for (mdi = a->info.devs ; mdi ; mdi = mdi->next)
+ if (mdi->disk.major == major && mdi->disk.minor == minor)
+ return mdi;
+
+ return NULL;
+}
+
+static void reconcile_failed(struct active_array *aa, struct mdinfo *failed)
+{
+ struct active_array *a;
+ struct mdinfo *victim;
+
+ for (a = aa; a; a = a->next) {
+ if (!a->container)
+ continue;
+ victim = find_device(a, failed->disk.major, failed->disk.minor);
+ if (!victim)
+ continue;
+
+ if (!(victim->curr_state & DS_FAULTY))
+ write_attr("faulty", victim->state_fd);
+ }
+}
+
static int wait_and_act(struct active_array *aa, int pfd, int nowait)
{
fd_set rfds;
int maxfd = 0;
struct active_array *a;
int rv;
+ struct mdinfo *mdi;
FD_ZERO(&rfds);
add_fd(&rfds, &maxfd, pfd);
for (a = aa ; a ; a = a->next) {
- struct mdinfo *mdi;
-
/* once an array has been deactivated only the manager
* thread can make us care about it again
*/
@@ -398,6 +428,16 @@ static int wait_and_act(struct active_array *aa, int pfd, int nowait)
if (a->container)
rv += read_and_act(a);
}
+
+ /* propagate failures across container members */
+ for (a = aa; a ; a = a->next) {
+ if (!a->container)
+ continue;
+ for (mdi = a->info.devs ; mdi ; mdi = mdi->next)
+ if (mdi->curr_state & DS_FAULTY)
+ reconcile_failed(aa, mdi);
+ }
+
return rv;
}