diff options
Diffstat (limited to 'managemon.c')
-rw-r--r-- | managemon.c | 309 |
1 files changed, 309 insertions, 0 deletions
diff --git a/managemon.c b/managemon.c new file mode 100644 index 0000000..ee4ee2b --- /dev/null +++ b/managemon.c @@ -0,0 +1,309 @@ + +/* + * The management thread for monitoring active md arrays. + * This thread does things which might block such as memory + * allocation. + * In particular: + * + * - Find out about new arrays in this container. + * Allocate the data structures and open the files. + * + * For this we watch /proc/mdstat and find new arrays with + * metadata type that confirms sharing. e.g. "md4" + * When we find a new array we slip it into the list of + * arrays and signal 'monitor' by writing to a pipe. + * + * - Respond to reshape requests by allocating new data structures + * and opening new files. + * + * These come as a change to raid_disks. We allocate a new + * version of the data structures and slip it into the list. + * 'monitor' will notice and release the old version. + * Changes to level, chunksize, layout.. do not need re-allocation. + * Reductions in raid_disks don't really either, but we handle + * them the same way for consistency. + * + * - When a device is added to the container, we add it to the metadata + * as a spare. + * + * - assist with activating spares by opening relevant sysfs file. + * + * - Pass on metadata updates from external programs such as + * mdadm creating a new array. + * + * This is most-messy. + * It might involve adding a new array or changing the status of + * a spare, or any reconfig that the kernel doesn't get involved in. + * + * The required updates are received via a named pipe. There will + * be one named pipe for each container. Each message contains a + * sync marker: 0x5a5aa5a5, A byte count, and the message. This is + * passed to the metadata handler which will interpret and process it. + * For 'DDF' messages are internal data blocks with the leading + * 'magic number' signifying what sort of data it is. + * + */ + +/* + * We select on /proc/mdstat and the named pipe. + * We create new arrays or updated version of arrays and slip + * them into the head of the list, then signal 'monitor' via a pipe write. + * 'monitor' will notice and place the old array on a return list. + * Metadata updates are placed on a queue just like they arrive + * from the named pipe. + * + * When new arrays are found based on correct metadata string, we + * need to identify them with an entry in the metadata. Maybe we require + * the metadata to be mdX/NN when NN is the index into an appropriate table. + * + */ + +/* + * List of tasks: + * - Watch for spares to be added to the container, and write updated + * metadata to them. + * - Watch for new arrays using this container, confirm they match metadata + * and if so, start monitoring them + * - Watch for spares being added to monitored arrays. This shouldn't + * happen, as we should do all the adding. Just remove them. + * - Watch for change in raid-disks, chunk-size, etc. Update metadata and + * start a reshape. + */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include "mdadm.h" +#include "mdmon.h" +#include <sys/socket.h> + + +static void free_aa(struct active_array *aa) +{ + /* Note that this doesn't close fds, as they may be in used + * by a clone. Use close_aa for that. + */ + while (aa->info.devs) { + struct mdinfo *d = aa->info.devs; + aa->info.devs = d->next; + free(d); + } + free(aa); +} + +static void replace_array(struct supertype *container, + struct active_array *old, + struct active_array *new) +{ + /* To replace an array, we add it to the top of the list + * marked with ->replaces to point to the original. + * 'monitor' will take the original out of the list + * and put it on 'discard_this'. We take it from there + * and discard it. + */ + + while (pending_discard) { + while (discard_this == NULL) + sleep(1); + if (discard_this != pending_discard) + abort(); + discard_this->next = NULL; + free_aa(discard_this); + discard_this = NULL; + pending_discard = NULL; + } + pending_discard = old; + new->replaces = old; + new->next = container->arrays; + container->arrays = new; +} + + +static void manage_container(struct mdstat_ent *mdstat, + struct supertype *container) +{ + /* The only thing of interest here is if a new device + * has been added to the container. We add it to the + * array ignoring any metadata on it. + * FIXME should we look for compatible metadata and take hints + * about spare assignment.... probably not. + * + */ + if (mdstat->devcnt != container->devcnt) { + /* read /sys/block/NAME/md/dev-??/block/dev to find out + * what is there, and compare with container->info.devs + * To see what is removed and what is added. + * These need to be remove from, or added to, the array + */ + // FIXME + container->devcnt = mdstat->devcnt; + } +} + +static void manage_member(struct mdstat_ent *mdstat, + struct active_array *a) +{ + /* Compare mdstat info with known state of member array. + * We do not need to look for device state changes here, that + * is dealt with by the monitor. + * + * We just look for changes which suggest that a reshape is + * being requested. + * Unfortunately decreases in raid_disks don't show up in + * mdstat until the reshape completes FIXME. + */ + // FIXME + a->info.array.raid_disks = mdstat->raid_disks; + a->info.array.chunk_size = mdstat->chunk_size; + // MORE + +} + +static void write_wakeup(struct supertype *c) +{ + write(c->pipe[1], "PING", 4); +} + +static void manage_new(struct mdstat_ent *mdstat, + struct supertype *container) +{ + /* A new array has appeared in this container. + * Hopefully it is already recorded in the metadata. + * Check, then create the new array to report it to + * the monitor. + */ + + struct active_array *new; + struct mdinfo *mdi, *di; + char *n; + int inst; + int i; + + new = malloc(sizeof(*new)); + + new->devnum = mdstat->devnum; + + new->prev_state = new->curr_state = new->next_state = inactive; + new->prev_action= new->curr_action= new->next_action= idle; + + new->container = container; + + n = &mdstat->metadata_version[10+strlen(container->devname)+1]; + inst = atoi(n); + if (inst < 0) + abort();//FIXME + + mdi = sysfs_read(-1, new->devnum, + GET_LEVEL|GET_CHUNK|GET_DISKS| + GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE); + if (!mdi) { + /* Eeek. Cannot monitor this array. + * Mark it to be ignored by setting container to NULL + */ + new->container = NULL; + replace_array(container, NULL, new); + return; + } + + new->info.array = mdi->array; + + for (i = 0; i < new->info.array.raid_disks; i++) { + struct mdinfo *newd = malloc(sizeof(*newd)); + + for (di = mdi->devs; di; di = di->next) + if (i == di->disk.raid_disk) + break; + + if (di) { + memcpy(newd, di, sizeof(*newd)); + + sprintf(newd->sys_name, "rd%d", i); + + newd->state_fd = sysfs_open(new->devnum, + newd->sys_name, + "state"); + + newd->prev_state = read_dev_state(newd->state_fd); + newd->curr_state = newd->curr_state; + } else { + newd->state_fd = -1; + } + newd->next = new->info.devs; + new->info.devs = newd; + } + new->action_fd = sysfs_open(new->devnum, NULL, "sync_action"); + new->info.state_fd = sysfs_open(new->devnum, NULL, "array_state"); + new->sync_pos_fd = sysfs_open(new->devnum, NULL, "sync_completed"); + new->sync_pos = 0; + + // finds and compares. + if (container->ss->open_new(container, new, inst) < 0) { + // FIXME close all those files + new->container = NULL; + replace_array(container, NULL, new); + return; + } + replace_array(container, NULL, new); + write_wakeup(container); + return; +} + +void manage(struct mdstat_ent *mdstat, struct active_array *aa, + struct supertype *container) +{ + /* We have just read mdstat and need to compare it with + * the known active arrays. + * Arrays with the wrong metadata are ignored. + */ + + for ( ; mdstat ; mdstat = mdstat->next) { + struct active_array *a; + if (mdstat->devnum == container->devnum) { + manage_container(mdstat, container); + continue; + } + if (mdstat->metadata_version == NULL || + strncmp(mdstat->metadata_version, "external:/", 10) != 0 || + strncmp(mdstat->metadata_version+10, container->devname, + strlen(container->devname)) != 0 || + mdstat->metadata_version[10+strlen(container->devname)] + != '/') + /* Not for this array */ + continue; + /* Looks like a member of this container */ + for (a = aa; a; a = a->next) { + if (mdstat->devnum == a->devnum) { + if (a->container) + manage_member(mdstat, a); + break; + } + } + if (a == NULL) + manage_new(mdstat, container); + } +} + +void read_sock(int pfd) +{ + int fd; + + // FIXME set non-blocking + fd = accept(pfd, NULL, NULL); + if (fd < 0) + return; + // FIXME do something useful + close(fd); +} +void do_manager(struct supertype *container) +{ + struct mdstat_ent *mdstat; + + do { + mdstat = mdstat_read(1, 0); + + manage(mdstat, array_list, container); + + read_sock(container->sock); + + mdstat_wait_fd(container->sock); + } while(1); +} |