From 484240d8a3facde992009efd81bfa4cc0c79287d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 14 May 2010 17:42:49 -0700 Subject: mdmon: periodically checkpoint recovery The kernel updates and notifies md/sync_completed when it is time to take a checkpoint. When this occurs (at 1/16 array size intervals) write 'idle' to md/sync_action to have the current recovery position updated in recovery_start and resync_start. Requires the metadata handler to reset ->last_checkpoint when it has determined that recovery has ended. Signed-off-by: Dan Williams --- managemon.c | 1 + mdmon.h | 9 +++++++++ monitor.c | 33 +++++++++++++++++++++++++++++++++ super-intel.c | 4 ++++ 4 files changed, 47 insertions(+) diff --git a/managemon.c b/managemon.c index 454c39d..d5ba6d6 100644 --- a/managemon.c +++ b/managemon.c @@ -540,6 +540,7 @@ static void manage_new(struct mdstat_ent *mdstat, new->info.state_fd = sysfs_open(new->devnum, NULL, "array_state"); new->resync_start_fd = sysfs_open(new->devnum, NULL, "resync_start"); new->metadata_fd = sysfs_open(new->devnum, NULL, "metadata_version"); + new->sync_completed_fd = sysfs_open(new->devnum, NULL, "sync_completed"); dprintf("%s: inst: %d action: %d state: %d\n", __func__, atoi(inst), new->action_fd, new->info.state_fd); diff --git a/mdmon.h b/mdmon.h index 20a0a01..5c51566 100644 --- a/mdmon.h +++ b/mdmon.h @@ -32,6 +32,15 @@ struct active_array { int action_fd; int resync_start_fd; int metadata_fd; /* for monitoring rw/ro status */ + int sync_completed_fd; /* for checkpoint notification events */ + unsigned long long last_checkpoint; /* sync_completed fires for many + * reasons this field makes sure the + * kernel has made progress before + * moving the checkpoint. It is + * cleared by the metadata handler + * when it determines recovery is + * terminated. + */ enum array_state prev_state, curr_state, next_state; enum sync_action prev_action, curr_action, next_action; diff --git a/monitor.c b/monitor.c index e43e545..12f8d3e 100644 --- a/monitor.c +++ b/monitor.c @@ -80,6 +80,24 @@ static unsigned long long read_resync_start(int fd) return strtoull(buf, NULL, 10); } +static unsigned long long read_sync_completed(int fd) +{ + unsigned long long val; + char buf[50]; + int n; + char *ep; + + n = read_attr(buf, 50, fd); + + if (n <= 0) + return 0; + buf[n] = 0; + val = strtoull(buf, &ep, 0); + if (ep == buf || (*ep != 0 && *ep != '\n' && *ep != ' ')) + return 0; + return val; +} + static enum array_state read_state(int fd) { char buf[20]; @@ -195,6 +213,7 @@ static void signal_manager(void) static int read_and_act(struct active_array *a) { + unsigned long long sync_completed; int check_degraded = 0; int deactivate = 0; struct mdinfo *mdi; @@ -206,6 +225,7 @@ static int read_and_act(struct active_array *a) a->curr_state = read_state(a->info.state_fd); a->curr_action = read_action(a->action_fd); a->info.resync_start = read_resync_start(a->resync_start_fd); + sync_completed = read_sync_completed(a->sync_completed_fd); for (mdi = a->info.devs; mdi ; mdi = mdi->next) { mdi->next_state = 0; if (mdi->state_fd >= 0) { @@ -307,6 +327,18 @@ static int read_and_act(struct active_array *a) } } + /* Check for recovery checkpoint notifications. We need to be a + * minimum distance away from the last checkpoint to prevent + * over checkpointing. Note reshape checkpointing is not + * handled here. + */ + if (sync_completed > a->last_checkpoint && + sync_completed - a->last_checkpoint > a->info.component_size >> 4 && + a->curr_action > reshape && a->next_action == bad_action) { + a->last_checkpoint = sync_completed; + a->next_action = idle; + } + a->container->ss->sync_metadata(a->container); dprintf("%s(%d): state:%s action:%s next(", __func__, a->info.container_member, array_states[a->curr_state], sync_actions[a->curr_action]); @@ -461,6 +493,7 @@ static int wait_and_act(struct supertype *container, int nowait) add_fd(&rfds, &maxfd, a->info.state_fd); add_fd(&rfds, &maxfd, a->action_fd); + add_fd(&rfds, &maxfd, a->sync_completed_fd); for (mdi = a->info.devs ; mdi ; mdi = mdi->next) add_fd(&rfds, &maxfd, mdi->state_fd); diff --git a/super-intel.c b/super-intel.c index 677396c..394ace4 100644 --- a/super-intel.c +++ b/super-intel.c @@ -4384,6 +4384,7 @@ static int imsm_set_array_state(struct active_array *a, int consistent) dprintf("imsm: mark resync done\n"); end_migration(dev, map_state); super->updates_pending++; + a->last_checkpoint = 0; } } else if (!is_resyncing(dev) && !failed) { /* mark the start of the init process if nothing is failed */ @@ -4476,17 +4477,20 @@ static void imsm_set_disk(struct active_array *a, int n, int state) map = get_imsm_map(dev, 0); map->failed_disk_num = ~0; super->updates_pending++; + a->last_checkpoint = 0; } else if (map_state == IMSM_T_STATE_DEGRADED && map->map_state != map_state && !dev->vol.migr_state) { dprintf("imsm: mark degraded\n"); map->map_state = map_state; super->updates_pending++; + a->last_checkpoint = 0; } else if (map_state == IMSM_T_STATE_FAILED && map->map_state != map_state) { dprintf("imsm: mark failed\n"); end_migration(dev, map_state); super->updates_pending++; + a->last_checkpoint = 0; } } -- cgit From 33414a0182ae193150f65f7bca97a7e4d818a49e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 15 Jun 2010 17:55:41 -0700 Subject: Kill subarray v2 Support for deleting a subarray out of a container. When all subarrays are deleted the component devices are converted back into spares, a --zero-superblock is still needed to kill the remaining metadata at this point. This operation is blocked when the subarray is active and may also be blocked by the metadata handler when deleting the subarray might change the uuid of other active subarrays. For example, with imsm, deleting subarray 'n' may change the uuid of subarrays with indexes > n. Deleting a subarray needs to be a container wide event to ensure disks that record the modified subarray list perceive other disks that did not receive this change as out of date. Notes: The st->subarray parsing in super-intel.c and super-ddf.c is updated to be more strict now that we are reading user supplied subarray values. Offline container modification shares actions that mdmon typically handles so promote is_container_member() and version_to_superswitch() (formerly find_metadata_methods()) to generic utility functions for the cases where mdadm performs the operation. Signed-off-by: Dan Williams --- Kill.c | 78 +++++++++++++++++++++++++ ReadMe.c | 1 + mdadm.8 | 10 ++++ mdadm.c | 8 +++ mdadm.h | 8 +++ mdmon.c | 25 +------- super-ddf.c | 25 +++++++- super-intel.c | 179 +++++++++++++++++++++++++++++++++++++++++++++++++++++----- util.c | 137 ++++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 431 insertions(+), 40 deletions(-) diff --git a/Kill.c b/Kill.c index e738978..b3344bd 100644 --- a/Kill.c +++ b/Kill.c @@ -79,3 +79,81 @@ int Kill(char *dev, struct supertype *st, int force, int quiet, int noexcl) close(fd); return rv; } + +int Kill_subarray(char *dev, char *subarray, int quiet) +{ + /* Delete a subarray out of a container, the subarry must be + * inactive. The subarray string must be a subarray index + * number. + * + * 0 = successfully deleted subarray from all container members + * 1 = failed to sync metadata to one or more devices + * 2 = failed to find the container, subarray, or other resource + * issue + */ + struct supertype supertype, *st = &supertype; + int fd, rv = 2; + + memset(st, 0, sizeof(*st)); + + if (snprintf(st->subarray, sizeof(st->subarray), "%s", subarray) >= + sizeof(st->subarray)) { + if (!quiet) + fprintf(stderr, + Name ": Input overflow for subarray '%s' > %zu bytes\n", + subarray, sizeof(st->subarray) - 1); + return 2; + } + + fd = open_subarray(dev, st, quiet); + if (fd < 0) + return 2; + + if (!st->ss->kill_subarray) { + if (!quiet) + fprintf(stderr, + Name ": Operation not supported for %s metadata\n", + st->ss->name); + goto free_super; + } + + if (is_subarray_active(subarray, st->devname)) { + if (!quiet) + fprintf(stderr, + Name ": Subarray-%s still active, aborting\n", + subarray); + goto free_super; + } + + if (mdmon_running(st->devnum)) + st->update_tail = &st->updates; + + /* ok we've found our victim, drop the axe */ + rv = st->ss->kill_subarray(st); + if (rv) { + if (!quiet) + fprintf(stderr, + Name ": Failed to delete subarray-%s from %s\n", + subarray, dev); + goto free_super; + } + + /* FIXME these routines do not report success/failure */ + if (st->update_tail) + flush_metadata_updates(st); + else + st->ss->sync_metadata(st); + + if (!quiet) + fprintf(stderr, + Name ": Deleted subarray-%s from %s, UUIDs may have changed\n", + subarray, dev); + + rv = 0; + + free_super: + st->ss->free_super(st); + close(fd); + + return rv; +} diff --git a/ReadMe.c b/ReadMe.c index 9d5a211..387ba6d 100644 --- a/ReadMe.c +++ b/ReadMe.c @@ -108,6 +108,7 @@ struct option long_options[] = { {"examine-bitmap", 0, 0, 'X'}, {"auto-detect", 0, 0, AutoDetect}, {"detail-platform", 0, 0, DetailPlatform}, + {"kill-subarray", 1, 0, KillSubarray}, /* synonyms */ {"monitor", 0, 0, 'F'}, diff --git a/mdadm.8 b/mdadm.8 index 4edfc41..784ba31 100644 --- a/mdadm.8 +++ b/mdadm.8 @@ -1172,6 +1172,16 @@ overwritten with zeros. With the block where the superblock would be is overwritten even if it doesn't appear to be valid. +.TP +.B \-\-kill\-subarray= +If the device is a container and the argument to \-\-kill\-subarray +specifies an inactive subarray in the container, then the subarray is +deleted. Deleting all subarrays will leave an 'empty-container' or +spare superblock on the drives. See \-\-zero\-superblock for completely +removing a superblock. Note that some formats depend on the subarray +index for generating a UUID, this command will fail if it would change +the UUID of an active subarray. + .TP .BR \-t ", " \-\-test When used with diff --git a/mdadm.c b/mdadm.c index d5e34c0..446fab8 100644 --- a/mdadm.c +++ b/mdadm.c @@ -103,6 +103,7 @@ int main(int argc, char *argv[]) int dosyslog = 0; int rebuild_map = 0; int auto_update_home = 0; + char *subarray = NULL; int print_help = 0; FILE *outf; @@ -216,6 +217,9 @@ int main(int argc, char *argv[]) case 'W': case Waitclean: case DetailPlatform: + case KillSubarray: + if (opt == KillSubarray) + subarray = optarg; case 'K': if (!mode) newmode = MISC; break; } if (mode && newmode == mode) { @@ -807,6 +811,7 @@ int main(int argc, char *argv[]) case O(MISC,'W'): case O(MISC, Waitclean): case O(MISC, DetailPlatform): + case O(MISC, KillSubarray): if (devmode && devmode != opt && (devmode == 'E' || (opt == 'E' && devmode != 'Q'))) { fprintf(stderr, Name ": --examine/-E cannot be given with -%c\n", @@ -1403,6 +1408,9 @@ int main(int argc, char *argv[]) rv |= Wait(dv->devname); continue; case Waitclean: rv |= WaitClean(dv->devname, -1, verbose-quiet); continue; + case KillSubarray: + rv |= Kill_subarray(dv->devname, subarray, quiet); + continue; } mdfd = open_mddev(dv->devname, 1); if (mdfd>=0) { diff --git a/mdadm.h b/mdadm.h index d9d17b0..f387477 100644 --- a/mdadm.h +++ b/mdadm.h @@ -273,6 +273,7 @@ enum special_options { AutoDetect, Waitclean, DetailPlatform, + KillSubarray, }; /* structures read from config file */ @@ -609,6 +610,8 @@ extern struct superswitch { struct mdinfo *(*container_content)(struct supertype *st); /* Allow a metadata handler to override mdadm's default layouts */ int (*default_layout)(int level); /* optional */ + /* Permit subarray's to be deleted from inactive containers */ + int (*kill_subarray)(struct supertype *st); /* optional */ /* for mdmon */ int (*open_new)(struct supertype *c, struct active_array *a, @@ -805,6 +808,7 @@ extern int Monitor(mddev_dev_t devlist, int dosyslog, int test, char *pidfile, int increments); extern int Kill(char *dev, struct supertype *st, int force, int quiet, int noexcl); +extern int Kill_subarray(char *dev, char *subarray, int quiet); extern int Wait(char *dev); extern int WaitClean(char *dev, int sock, int verbose); @@ -911,6 +915,10 @@ extern int create_mddev(char *dev, char *name, int autof, int trustworthy, #define METADATA 3 extern int open_mddev(char *dev, int report_errors); extern int open_container(int fd); +extern int is_container_member(struct mdstat_ent *ent, char *devname); +extern int is_subarray_active(char *subarray, char *devname); +extern int open_subarray(char *dev, struct supertype *st, int quiet); +extern struct superswitch *version_to_superswitch(char *vers); extern char *pid_dir; extern int mdmon_running(int devnum); diff --git a/mdmon.c b/mdmon.c index 69c320e..beb39cf 100644 --- a/mdmon.c +++ b/mdmon.c @@ -104,15 +104,6 @@ int __clone2(int (*fn)(void *), return mon_tid; } -static struct superswitch *find_metadata_methods(char *vers) -{ - if (strcmp(vers, "ddf") == 0) - return &super_ddf; - if (strcmp(vers, "imsm") == 0) - return &super_imsm; - return NULL; -} - static int make_pidfile(char *devname) { char path[100]; @@ -136,18 +127,6 @@ static int make_pidfile(char *devname) return 0; } -int is_container_member(struct mdstat_ent *mdstat, char *container) -{ - if (mdstat->metadata_version == NULL || - strncmp(mdstat->metadata_version, "external:", 9) != 0 || - !is_subarray(mdstat->metadata_version+9) || - strncmp(mdstat->metadata_version+10, container, strlen(container)) != 0 || - mdstat->metadata_version[10+strlen(container)] != '/') - return 0; - - return 1; -} - static void try_kill_monitor(pid_t pid, char *devname, int sock) { char buf[100]; @@ -414,9 +393,9 @@ static int mdmon(char *devname, int devnum, int must_fork, int takeover) exit(3); } - container->ss = find_metadata_methods(mdi->text_version); + container->ss = version_to_superswitch(mdi->text_version); if (container->ss == NULL) { - fprintf(stderr, "mdmon: %s uses unknown metadata: %s\n", + fprintf(stderr, "mdmon: %s uses unsupported metadata: %s\n", devname, mdi->text_version); exit(3); } diff --git a/super-ddf.c b/super-ddf.c index 0e6f1e5..736e07f 100644 --- a/super-ddf.c +++ b/super-ddf.c @@ -845,10 +845,18 @@ static int load_super_ddf(struct supertype *st, int fd, } if (st->subarray[0]) { + unsigned long val; struct vcl *v; + char *ep; + + val = strtoul(st->subarray, &ep, 10); + if (*ep != '\0') { + free(super); + return 1; + } for (v = super->conflist; v; v = v->next) - if (v->vcnum == atoi(st->subarray)) + if (v->vcnum == val) super->currentconf = v; if (!super->currentconf) { free(super); @@ -2870,14 +2878,25 @@ static int load_super_ddf_all(struct supertype *st, int fd, return 1; } if (st->subarray[0]) { + unsigned long val; struct vcl *v; + char *ep; + + val = strtoul(st->subarray, &ep, 10); + if (*ep != '\0') { + free(super); + return 1; + } for (v = super->conflist; v; v = v->next) - if (v->vcnum == atoi(st->subarray)) + if (v->vcnum == val) super->currentconf = v; - if (!super->currentconf) + if (!super->currentconf) { + free(super); return 1; + } } + *sbp = super; if (st->ss == NULL) { st->ss = &super_ddf; diff --git a/super-intel.c b/super-intel.c index bdd7a96..d81d620 100644 --- a/super-intel.c +++ b/super-intel.c @@ -282,6 +282,7 @@ struct extent { enum imsm_update_type { update_activate_spare, update_create_array, + update_kill_array, update_add_disk, }; @@ -303,6 +304,11 @@ struct imsm_update_create_array { struct imsm_dev dev; }; +struct imsm_update_kill_array { + enum imsm_update_type type; + int dev_idx; +}; + struct imsm_update_add_disk { enum imsm_update_type type; }; @@ -2753,11 +2759,20 @@ static int load_super_imsm_all(struct supertype *st, int fd, void **sbp, } if (st->subarray[0]) { - if (atoi(st->subarray) <= super->anchor->num_raid_devs) - super->current_vol = atoi(st->subarray); + unsigned long val; + char *ep; + + err = 1; + val = strtoul(st->subarray, &ep, 10); + if (*ep != '\0') { + free_imsm(super); + goto error; + } + + if (val < super->anchor->num_raid_devs) + super->current_vol = val; else { free_imsm(super); - err = 1; goto error; } } @@ -2824,8 +2839,17 @@ static int load_super_imsm(struct supertype *st, int fd, char *devname) } if (st->subarray[0]) { - if (atoi(st->subarray) <= super->anchor->num_raid_devs) - super->current_vol = atoi(st->subarray); + unsigned long val; + char *ep; + + val = strtoul(st->subarray, &ep, 10); + if (*ep != '\0') { + free_imsm(super); + return 1; + } + + if (val < super->anchor->num_raid_devs) + super->current_vol = val; else { free_imsm(super); return 1; @@ -4007,6 +4031,82 @@ static int validate_geometry_imsm(struct supertype *st, int level, int layout, close(cfd); return 0; } + +static void handle_missing(struct intel_super *super, struct imsm_dev *dev); + +static int kill_subarray_imsm(struct supertype *st) +{ + /* remove the subarray currently referenced by ->current_vol */ + __u8 i; + struct intel_dev **dp; + struct intel_super *super = st->sb; + __u8 current_vol = super->current_vol; + struct imsm_super *mpb = super->anchor; + + if (super->current_vol < 0) + return 2; + super->current_vol = -1; /* invalidate subarray cursor */ + + /* block deletions that would change the uuid of active subarrays + * + * FIXME when immutable ids are available, but note that we'll + * also need to fixup the invalidated/active subarray indexes in + * mdstat + */ + for (i = 0; i < mpb->num_raid_devs; i++) { + char subarray[4]; + + if (i < current_vol) + continue; + sprintf(subarray, "%u", i); + if (is_subarray_active(subarray, st->devname)) { + fprintf(stderr, + Name ": deleting subarray-%d would change the UUID of active subarray-%d, aborting\n", + current_vol, i); + + return 2; + } + } + + if (st->update_tail) { + struct imsm_update_kill_array *u = malloc(sizeof(*u)); + + if (!u) + return 2; + u->type = update_kill_array; + u->dev_idx = current_vol; + append_metadata_update(st, u, sizeof(*u)); + + return 0; + } + + for (dp = &super->devlist; *dp;) + if ((*dp)->index == current_vol) { + *dp = (*dp)->next; + } else { + handle_missing(super, (*dp)->dev); + if ((*dp)->index > current_vol) + (*dp)->index--; + dp = &(*dp)->next; + } + + /* no more raid devices, all active components are now spares, + * but of course failed are still failed + */ + if (--mpb->num_raid_devs == 0) { + struct dl *d; + + for (d = super->disks; d; d = d->next) + if (d->index > -2) { + d->index = -1; + d->disk.status = SPARE_DISK; + } + } + + super->updates_pending++; + + return 0; +} #endif /* MDASSEMBLE */ static int is_rebuilding(struct imsm_dev *dev) @@ -4347,6 +4447,24 @@ static void mark_missing(struct imsm_dev *dev, struct imsm_disk *disk, int idx) memmove(&disk->serial[0], &disk->serial[1], MAX_RAID_SERIAL_LEN - 1); } +static void handle_missing(struct intel_super *super, struct imsm_dev *dev) +{ + __u8 map_state; + struct dl *dl; + int failed; + + if (!super->missing) + return; + failed = imsm_count_failed(super, dev); + map_state = imsm_check_degraded(super, dev, failed); + + dprintf("imsm: mark missing\n"); + end_migration(dev, map_state); + for (dl = super->missing; dl; dl = dl->next) + mark_missing(dev, &dl->disk, dl->index); + super->updates_pending++; +} + /* Handle dirty -> clean transititions and resync. Degraded and rebuild * states are handled in imsm_set_disk() with one exception, when a * resync is stopped due to a new failure this routine will set the @@ -4363,15 +4481,8 @@ static int imsm_set_array_state(struct active_array *a, int consistent) __u32 blocks_per_unit; /* before we activate this array handle any missing disks */ - if (consistent == 2 && super->missing) { - struct dl *dl; - - dprintf("imsm: mark missing\n"); - end_migration(dev, map_state); - for (dl = super->missing; dl; dl = dl->next) - mark_missing(dev, &dl->disk, dl->index); - super->updates_pending++; - } + if (consistent == 2) + handle_missing(super, dev); if (consistent == 2 && (!is_resync_complete(&a->info) || @@ -5067,6 +5178,45 @@ static void imsm_process_update(struct supertype *st, } break; } + case update_kill_array: { + struct imsm_update_kill_array *u = (void *) update->buf; + int victim = u->dev_idx; + struct active_array *a; + struct intel_dev **dp; + struct imsm_dev *dev; + + /* sanity check that we are not affecting the uuid of + * active arrays, or deleting an active array + * + * FIXME when immutable ids are available, but note that + * we'll also need to fixup the invalidated/active + * subarray indexes in mdstat + */ + for (a = st->arrays; a; a = a->next) + if (a->info.container_member >= victim) + break; + /* by definition if mdmon is running at least one array + * is active in the container, so checking + * mpb->num_raid_devs is just extra paranoia + */ + dev = get_imsm_dev(super, victim); + if (a || !dev || mpb->num_raid_devs == 1) { + dprintf("failed to delete subarray-%d\n", victim); + break; + } + + for (dp = &super->devlist; *dp;) + if ((*dp)->index == super->current_vol) { + *dp = (*dp)->next; + } else { + if ((*dp)->index > victim) + (*dp)->index--; + dp = &(*dp)->next; + } + mpb->num_raid_devs--; + super->updates_pending++; + break; + } case update_add_disk: /* we may be able to repair some arrays if disks are @@ -5242,6 +5392,7 @@ struct superswitch super_imsm = { .validate_geometry = validate_geometry_imsm, .add_to_super = add_to_super_imsm, .detail_platform = detail_platform_imsm, + .kill_subarray = kill_subarray_imsm, #endif .match_home = match_home_imsm, .uuid_from_super= uuid_from_super_imsm, diff --git a/util.c b/util.c index 25f1e56..66bf2f9 100644 --- a/util.c +++ b/util.c @@ -1392,6 +1392,143 @@ int open_container(int fd) return -1; } +struct superswitch *version_to_superswitch(char *vers) +{ + int i; + + for (i = 0; superlist[i]; i++) { + struct superswitch *ss = superlist[i]; + + if (strcmp(vers, ss->name) == 0) + return ss; + } + + return NULL; +} + +int is_container_member(struct mdstat_ent *mdstat, char *container) +{ + if (mdstat->metadata_version == NULL || + strncmp(mdstat->metadata_version, "external:", 9) != 0 || + !is_subarray(mdstat->metadata_version+9) || + strncmp(mdstat->metadata_version+10, container, strlen(container)) != 0 || + mdstat->metadata_version[10+strlen(container)] != '/') + return 0; + + return 1; +} + +int is_subarray_active(char *subarray, char *container) +{ + struct mdstat_ent *mdstat = mdstat_read(0, 0); + struct mdstat_ent *ent; + + for (ent = mdstat; ent; ent = ent->next) { + if (is_container_member(ent, container)) { + char *inst = &ent->metadata_version[10+strlen(container)+1]; + + if (strcmp(inst, subarray) == 0) + break; + } + } + + free_mdstat(mdstat); + + return ent != NULL; +} + +/* open_subarray - opens a subarray in a container + * @dev: container device name + * @st: supertype with only ->subarray set + * @quiet: block reporting errors flag + * + * On success returns an fd to a container and fills in *st + */ +int open_subarray(char *dev, struct supertype *st, int quiet) +{ + struct mdinfo *mdi; + int fd, err = 1; + + fd = open(dev, O_RDWR|O_EXCL); + if (fd < 0) { + if (!quiet) + fprintf(stderr, Name ": Couldn't open %s, aborting\n", + dev); + return 2; + } + + st->devnum = fd2devnum(fd); + if (st->devnum == NoMdDev) { + if (!quiet) + fprintf(stderr, + Name ": Failed to determine device number for %s\n", + dev); + goto close_fd; + } + + mdi = sysfs_read(fd, st->devnum, GET_VERSION|GET_LEVEL); + if (!mdi) { + if (!quiet) + fprintf(stderr, Name ": Failed to read sysfs for %s\n", + dev); + goto close_fd; + } + + if (mdi->array.level != UnSet) { + if (!quiet) + fprintf(stderr, Name ": %s is not a container\n", dev); + goto free_sysfs; + } + + st->ss = version_to_superswitch(mdi->text_version); + if (!st->ss) { + if (!quiet) + fprintf(stderr, + Name ": Operation not supported for %s metadata\n", + mdi->text_version); + goto free_sysfs; + } + + st->devname = devnum2devname(st->devnum); + if (!st->devname) { + if (!quiet) + fprintf(stderr, Name ": Failed to allocate device name\n"); + goto free_sysfs; + } + + if (st->ss->load_super(st, fd, NULL)) { + if (!quiet) + fprintf(stderr, Name ": Failed to find subarray-%s in %s\n", + st->subarray, dev); + goto free_name; + } + + if (!st->loaded_container) { + if (!quiet) + fprintf(stderr, Name ": %s is not a container\n", dev); + goto free_super; + } + + err = 0; + + free_super: + if (err) + st->ss->free_super(st); + free_name: + if (err) + free(st->devname); + free_sysfs: + sysfs_free(mdi); + close_fd: + if (err) + close(fd); + + if (err) + return -1; + else + return fd; +} + int add_disk(int mdfd, struct supertype *st, struct mdinfo *sra, struct mdinfo *info) { -- cgit From 0bd16cf2173695726f1ed2f9372c613003d80f9a Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 15 Jun 2010 18:41:53 -0700 Subject: create: Check with OROM limit before setting default chunk size Make create check with the appropriate meta data handler and see what the largest chunk size is supported. The current 512K default is not supported by existing imsm OROM. [dan.j.williams@intel.com: trim the upper limit to 512k for future oroms] Signed-off-by: Dave Jiang Signed-off-by: Dan Williams --- Create.c | 8 ++++++-- mdadm.h | 2 ++ platform-intel.h | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ super-intel.c | 12 ++++++++++++ 4 files changed, 69 insertions(+), 2 deletions(-) diff --git a/Create.c b/Create.c index b04388f..43e5f37 100644 --- a/Create.c +++ b/Create.c @@ -235,9 +235,13 @@ int Create(struct supertype *st, char *mddev, case 6: case 0: if (chunk == 0) { - chunk = 512; + if (st && st->ss->default_chunk) + chunk = st->ss->default_chunk(st); + + chunk = chunk ? : 512; + if (verbose > 0) - fprintf(stderr, Name ": chunk size defaults to 512K\n"); + fprintf(stderr, Name ": chunk size defaults to %dK\n", chunk); } break; case LEVEL_LINEAR: diff --git a/mdadm.h b/mdadm.h index 1bf5ac0..142868a 100644 --- a/mdadm.h +++ b/mdadm.h @@ -609,6 +609,8 @@ extern struct superswitch { struct mdinfo *(*container_content)(struct supertype *st); /* Allow a metadata handler to override mdadm's default layouts */ int (*default_layout)(int level); /* optional */ + /* query the supertype for default chunk size */ + int (*default_chunk)(struct supertype *st); /* optional */ /* for mdmon */ int (*open_new)(struct supertype *c, struct active_array *a, diff --git a/platform-intel.h b/platform-intel.h index bbdc9f9..9088436 100644 --- a/platform-intel.h +++ b/platform-intel.h @@ -115,6 +115,55 @@ static inline int imsm_orom_has_chunk(const struct imsm_orom *orom, int chunk) return !!(orom->sss & (1 << (fs - 1))); } +/** + * fls - find last (most-significant) bit set + * @x: the word to search + * The funciton is borrowed from Linux kernel code + * include/asm-generic/bitops/fls.h + */ +static inline int fls(int x) +{ + int r = 32; + + if (!x) + return 0; + if (!(x & 0xffff0000u)) { + x <<= 16; + r -= 16; + } + if (!(x & 0xff000000u)) { + x <<= 8; + r -= 8; + } + if (!(x & 0xf0000000u)) { + x <<= 4; + r -= 4; + } + if (!(x & 0xc0000000u)) { + x <<= 2; + r -= 2; + } + if (!(x & 0x80000000u)) { + x <<= 1; + r -= 1; + } + return r; +} + +/** + * imsm_orom_default_chunk - return the largest chunk size supported via orom + * @orom: orom pointer from find_imsm_orom + */ +static inline int imsm_orom_default_chunk(const struct imsm_orom *orom) +{ + int fs = fls(orom->sss); + + if (!fs) + return 0; + + return min(512, (1 << fs)); +} + struct sys_dev { char *path; struct sys_dev *next; diff --git a/super-intel.c b/super-intel.c index 394ace4..e212d9a 100644 --- a/super-intel.c +++ b/super-intel.c @@ -4003,6 +4003,17 @@ static int validate_geometry_imsm(struct supertype *st, int level, int layout, close(cfd); return 0; } + +static int default_chunk_imsm(struct supertype *st) +{ + struct intel_super *super = st->sb; + + if (!super->orom) + return 0; + + return imsm_orom_default_chunk(super->orom); +} + #endif /* MDASSEMBLE */ static int is_rebuilding(struct imsm_dev *dev) @@ -5240,6 +5251,7 @@ struct superswitch super_imsm = { .brief_detail_super = brief_detail_super_imsm, .write_init_super = write_init_super_imsm, .validate_geometry = validate_geometry_imsm, + .default_chunk = default_chunk_imsm, .add_to_super = add_to_super_imsm, .detail_platform = detail_platform_imsm, #endif -- cgit From 0d80bb2f97e876379fb0ba732e8e97894ebe3de9 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 15 Jun 2010 18:41:57 -0700 Subject: imsm: dump each disk's view of the slot state Allow --examine to determine which disk might have a stale view of the per-disk out-of-sync state. Signed-off-by: Dan Williams --- super-intel.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/super-intel.c b/super-intel.c index e212d9a..b88e6a9 100644 --- a/super-intel.c +++ b/super-intel.c @@ -641,7 +641,7 @@ static __u64 blocks_per_migr_unit(struct imsm_dev *dev); static void print_imsm_dev(struct imsm_dev *dev, char *uuid, int disk_idx) { __u64 sz; - int slot; + int slot, i; struct imsm_map *map = get_imsm_map(dev, 0); __u32 ord; @@ -650,6 +650,12 @@ static void print_imsm_dev(struct imsm_dev *dev, char *uuid, int disk_idx) printf(" UUID : %s\n", uuid); printf(" RAID Level : %d\n", get_imsm_raid_level(map)); printf(" Members : %d\n", map->num_members); + printf(" Slots : ["); + for (i = 0; i < map->num_members; i++) { + ord = get_imsm_ord_tbl_ent(dev, i); + printf("%s", ord & IMSM_ORD_REBUILD ? "_" : "U"); + } + printf("]\n"); slot = get_imsm_disk_slot(map, disk_idx); if (slot >= 0) { ord = get_imsm_ord_tbl_ent(dev, slot); -- cgit From 4f0a7acc9a0a93d39b66b29e374f9a5edd173047 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 15 Jun 2010 18:41:57 -0700 Subject: mdmon: record sync_completed directly to the metadata When sync_action is idle mdmon takes the latest value of md/resync_start or md//recovery_start to record the resync/rebuild checkpoint in the metadata. However, now that mdmon is reading sync_completed there is no longer a need to wait for, or force an idle event to take a checkpoint. Simply update the forward progress of ->last_checkpoint at every wakeup event and force it to be recorded at least every 1/16th array-size interval. It may be recorded more frequently if a ->set_array_state() event occurs. This also cleans up some confusion in handling the dual-rebuild case. If more than one spare has been activated the kernel starts the rebuild at the lowest recovery offset, so we do not need to worry about min_recovery_start(). Signed-off-by: Dan Williams --- monitor.c | 10 +++++++--- super-intel.c | 7 ++----- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/monitor.c b/monitor.c index 12f8d3e..59b4181 100644 --- a/monitor.c +++ b/monitor.c @@ -334,10 +334,14 @@ static int read_and_act(struct active_array *a) */ if (sync_completed > a->last_checkpoint && sync_completed - a->last_checkpoint > a->info.component_size >> 4 && - a->curr_action > reshape && a->next_action == bad_action) { + a->curr_action > reshape) { + /* A (non-reshape) sync_action has reached a checkpoint. + * Record the updated position in the metadata + */ + a->last_checkpoint = sync_completed; + a->container->ss->set_array_state(a, a->curr_state <= clean); + } else if (sync_completed > a->last_checkpoint) a->last_checkpoint = sync_completed; - a->next_action = idle; - } a->container->ss->sync_metadata(a->container); dprintf("%s(%d): state:%s action:%s next(", __func__, a->info.container_member, diff --git a/super-intel.c b/super-intel.c index b88e6a9..3bd041a 100644 --- a/super-intel.c +++ b/super-intel.c @@ -4415,14 +4415,11 @@ static int imsm_set_array_state(struct active_array *a, int consistent) /* check if we can update curr_migr_unit from resync_start, recovery_start */ blocks_per_unit = blocks_per_migr_unit(dev); - if (blocks_per_unit && failed <= 1) { + if (blocks_per_unit) { __u32 units32; __u64 units; - if (migr_type(dev) == MIGR_REBUILD) - units = min_recovery_start(&a->info) / blocks_per_unit; - else - units = a->info.resync_start / blocks_per_unit; + units = a->last_checkpoint / blocks_per_unit; units32 = units; /* check that we did not overflow 32-bits, and that -- cgit From 6a0ee6a0770e8b2ae2a2bbe79896d4ecb083e218 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 15 Jun 2010 18:41:57 -0700 Subject: Remove 'checkpointing' side effect of --wait-clean Now that mdmon records periodic checkpoints, and checkpoints every ->set_array_state() event we no longer need to 'idle' sync_action from --wait-clean. Signed-off-by: Dan Williams --- mdadm.8 | 7 ++----- sysfs.c | 3 --- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/mdadm.8 b/mdadm.8 index 4edfc41..aa2b12f 100644 --- a/mdadm.8 +++ b/mdadm.8 @@ -1195,14 +1195,11 @@ listed, otherwise it will return failure. For each md device given, or each device in /proc/mdstat if .B \-\-scan is given, arrange for the array to be marked clean as soon as possible. -Also, quiesce resync so that the monitor for external metadata arrays -(mdmon) has an opportunity to checkpoint the resync position. .I mdadm will return with success if the array uses external metadata and we successfully waited. For native arrays this returns immediately as the -kernel handles both dirty-clean transitions and resync checkpointing in -the kernel at shutdown. No action is taken if safe-mode handling is -disabled. +kernel handles dirty-clean transitions at shutdown. No action is taken +if safe-mode handling is disabled. .SH For Incremental Assembly mode: .TP diff --git a/sysfs.c b/sysfs.c index ebf9d8a..72c7c5b 100644 --- a/sysfs.c +++ b/sysfs.c @@ -852,9 +852,6 @@ int WaitClean(char *dev, int sock, int verbose) tm.tv_sec = 5; tm.tv_usec = 0; - /* give mdmon a chance to checkpoint resync */ - sysfs_set_str(mdi, NULL, "sync_action", "idle"); - FD_ZERO(&fds); /* wait for array_state to be clean */ -- cgit From b526e52dc7cbdde98db9c9f8765be28ba6d71d78 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 16 Jun 2010 17:26:04 -0700 Subject: Always assume SKIP_GONE_DEVS behaviour and kill the flag ...i.e. GET_DEVS == (GET_DEVS|SKIP_GONE_DEVS) A null pointer dereference in Incremental.c can be triggered by replugging a disk while the old name is in use. When mdadm -I is called on the new disk we fail the call to sysfs_read(). I audited all the locations that use GET_DEVS and it appears they can tolerate missing a drive. So just make SKIP_GONE_DEVS the default behaviour. Also fix up remaining unchecked usages of the sysfs_read() return value. Reported-by: Dave Jiang Signed-off-by: Dan Williams --- Grow.c | 20 +++++++++++++------- Incremental.c | 5 +++++ managemon.c | 2 +- mapfile.c | 5 +++-- mdadm.h | 1 - mdmon.c | 3 +-- super-ddf.c | 8 +------- super-intel.c | 7 +------ sysfs.c | 20 +++++++++----------- 9 files changed, 34 insertions(+), 37 deletions(-) diff --git a/Grow.c b/Grow.c index 28ed8d7..3923a90 100644 --- a/Grow.c +++ b/Grow.c @@ -546,7 +546,13 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file, return 1; } sra = sysfs_read(fd, 0, GET_LEVEL); - frozen = freeze_array(sra); + if (sra) + frozen = freeze_array(sra); + else { + fprintf(stderr, Name ": failed to read sysfs parameters for %s\n", + devname); + return 1; + } if (frozen < 0) { fprintf(stderr, Name ": %s is performing resync/recovery and cannot" " be reshaped\n", devname); @@ -1970,6 +1976,12 @@ int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info, int cache; int done = 0; + sra = sysfs_read(-1, devname2devnum(info->sys_name), + GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE| + GET_CACHE); + if (!sra) + return 1; + err = sysfs_set_str(info, NULL, "array_state", "readonly"); if (err) return err; @@ -1990,7 +2002,6 @@ int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info, ochunk = info->array.chunk_size; nchunk = info->new_chunk; - a = (ochunk/512) * odata; b = (nchunk/512) * ndata; /* Find GCD */ @@ -2003,11 +2014,6 @@ int Grow_continue(int mdfd, struct supertype *st, struct mdinfo *info, /* LCM == product / GCD */ blocks = (ochunk/512) * (nchunk/512) * odata * ndata / a; - sra = sysfs_read(-1, devname2devnum(info->sys_name), - GET_COMPONENT|GET_DEVS|GET_OFFSET|GET_STATE| - GET_CACHE); - - if (ndata == odata) while (blocks * 32 < sra->component_size && blocks < 16*1024*2) diff --git a/Incremental.c b/Incremental.c index d6dd0f4..8062e2b 100644 --- a/Incremental.c +++ b/Incremental.c @@ -369,6 +369,8 @@ int Incremental(char *devname, int verbose, int runstop, strcpy(chosen_name, devnum2devname(mp->devnum)); sra = sysfs_read(mdfd, fd2devnum(mdfd), (GET_DEVS | GET_STATE)); + if (!sra) + return 2; if (sra->devs) { sprintf(dn, "%d:%d", sra->devs->disk.major, @@ -586,6 +588,9 @@ static int count_active(struct supertype *st, int mdfd, char **availp, struct mdinfo *sra = sysfs_read(mdfd, -1, GET_DEVS | GET_STATE); char *avail = NULL; + if (!sra) + return 0; + for (d = sra->devs ; d ; d = d->next) { char dn[30]; int dfd; diff --git a/managemon.c b/managemon.c index 454c39d..debca97 100644 --- a/managemon.c +++ b/managemon.c @@ -315,7 +315,7 @@ static void manage_container(struct mdstat_ent *mdstat, * To see what is removed and what is added. * These need to be remove from, or added to, the array */ - mdi = sysfs_read(-1, mdstat->devnum, GET_DEVS|SKIP_GONE_DEVS); + mdi = sysfs_read(-1, mdstat->devnum, GET_DEVS); if (!mdi) { /* invalidate the current count so we can try again */ container->devcnt = -1; diff --git a/mapfile.c b/mapfile.c index 0f12559..ffe8e16 100644 --- a/mapfile.c +++ b/mapfile.c @@ -368,7 +368,7 @@ void RebuildMap(void) } for (md = mdstat ; md ; md = md->next) { - struct mdinfo *sra = sysfs_read(-1, md->devnum, GET_DEVS|SKIP_GONE_DEVS); + struct mdinfo *sra = sysfs_read(-1, md->devnum, GET_DEVS); struct mdinfo *sd; if (!sra) @@ -486,7 +486,8 @@ void RebuildMap(void) for (md = mdstat ; md ; md = md->next) { struct mdinfo *sra = sysfs_read(-1, md->devnum, GET_VERSION); - sysfs_uevent(sra, "change"); + if (sra) + sysfs_uevent(sra, "change"); sysfs_free(sra); } map_free(map); diff --git a/mdadm.h b/mdadm.h index a0797e8..62bfc44 100644 --- a/mdadm.h +++ b/mdadm.h @@ -404,7 +404,6 @@ enum sysfs_read_flags { GET_SIZE = (1 << 12), GET_STATE = (1 << 13), GET_ERROR = (1 << 14), - SKIP_GONE_DEVS = (1 << 15), }; /* If fd >= 0, get the array it is open on, diff --git a/mdmon.c b/mdmon.c index 69c320e..2191814 100644 --- a/mdmon.c +++ b/mdmon.c @@ -394,8 +394,7 @@ static int mdmon(char *devname, int devnum, int must_fork, int takeover) exit(3); } - mdi = sysfs_read(mdfd, container->devnum, - GET_VERSION|GET_LEVEL|GET_DEVS|SKIP_GONE_DEVS); + mdi = sysfs_read(mdfd, container->devnum, GET_VERSION|GET_LEVEL|GET_DEVS); if (!mdi) { fprintf(stderr, "mdmon: failed to load sysfs info for %s\n", diff --git a/super-ddf.c b/super-ddf.c index b01c68d..6145e3c 100644 --- a/super-ddf.c +++ b/super-ddf.c @@ -2807,14 +2807,8 @@ static int load_super_ddf_all(struct supertype *st, int fd, int seq; char nm[20]; int dfd; - int devnum = fd2devnum(fd); - enum sysfs_read_flags flags; - flags = GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE; - if (mdmon_running(devnum)) - flags |= SKIP_GONE_DEVS; - - sra = sysfs_read(fd, 0, flags); + sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE); if (!sra) return 1; if (sra->array.major_version != -1 || diff --git a/super-intel.c b/super-intel.c index d6d8b09..e09ce5e 100644 --- a/super-intel.c +++ b/super-intel.c @@ -2747,14 +2747,9 @@ static int load_super_imsm_all(struct supertype *st, int fd, void **sbp, int retry; int err = 0; int i; - enum sysfs_read_flags flags; - - flags = GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE; - if (mdmon_running(devnum)) - flags |= SKIP_GONE_DEVS; /* check if 'fd' an opened container */ - sra = sysfs_read(fd, 0, flags); + sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE); if (!sra) return 1; diff --git a/sysfs.c b/sysfs.c index ebf9d8a..17f2567 100644 --- a/sysfs.c +++ b/sysfs.c @@ -273,22 +273,20 @@ struct mdinfo *sysfs_read(int fd, int devnum, unsigned long options) strcpy(dbase, "block/dev"); if (load_sys(fname, buf)) { + /* assume this is a stale reference to a hot + * removed device + */ free(dev); - if (options & SKIP_GONE_DEVS) - continue; - else - goto abort; + continue; } sscanf(buf, "%d:%d", &dev->disk.major, &dev->disk.minor); /* special case check for block devices that can go 'offline' */ - if (options & SKIP_GONE_DEVS) { - strcpy(dbase, "block/device/state"); - if (load_sys(fname, buf) == 0 && - strncmp(buf, "offline", 7) == 0) { - free(dev); - continue; - } + strcpy(dbase, "block/device/state"); + if (load_sys(fname, buf) == 0 && + strncmp(buf, "offline", 7) == 0) { + free(dev); + continue; } /* finally add this disk to the array */ -- cgit From aa534678baad80689a642ba1bd602a00a267ac03 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 22 Jun 2010 16:30:59 -0700 Subject: Rename subarray v2 Allow the name of the array stored in the metadata to be updated. In some cases the metadata format may not be able to support this rename without modifying the UUID. In these cases the request will be blocked. Otherwise we allow the rename to take place, even for active arrays. This assumes that the user understands the difference between the kernel node name, the device node symlink name, and the metadata specific name. Anticipating further need to modify subarrays in-place, introduce the ->update_subarray() superswitch method. A future potential use case is setting storage pool (spare-group) identifiers. Signed-off-by: Dan Williams --- Manage.c | 53 ++++++++++++++++++++++++++ ReadMe.c | 1 + mdadm.8 | 30 +++++++++++++++ mdadm.c | 41 ++++++++++++++++++-- mdadm.h | 7 +++- super-intel.c | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++++----- 6 files changed, 235 insertions(+), 14 deletions(-) diff --git a/Manage.c b/Manage.c index f6fb3ef..cca1503 100644 --- a/Manage.c +++ b/Manage.c @@ -869,4 +869,57 @@ int autodetect(void) } return rv; } + +int Update_subarray(char *dev, char *subarray, char *update, mddev_ident_t ident, int quiet) +{ + struct supertype supertype, *st = &supertype; + int fd, rv = 2; + + memset(st, 0, sizeof(*st)); + if (snprintf(st->subarray, sizeof(st->subarray), "%s", subarray) >= + sizeof(st->subarray)) { + if (!quiet) + fprintf(stderr, + Name ": Input overflow for subarray '%s' > %zu bytes\n", + subarray, sizeof(st->subarray) - 1); + return 2; + } + + fd = open_subarray(dev, st, quiet); + if (fd < 0) + return 2; + + if (!st->ss->update_subarray) { + if (!quiet) + fprintf(stderr, + Name ": Operation not supported for %s metadata\n", + st->ss->name); + goto free_super; + } + + if (mdmon_running(st->devnum)) + st->update_tail = &st->updates; + + rv = st->ss->update_subarray(st, update, ident); + + if (rv) { + if (!quiet) + fprintf(stderr, Name ": Failed to update %s of subarray-%s in %s\n", + update, subarray, dev); + } else if (st->update_tail) + flush_metadata_updates(st); + else + st->ss->sync_metadata(st); + + if (rv == 0 && strcmp(update, "name") == 0 && !quiet) + fprintf(stderr, + Name ": Updated subarray-%s name from %s, UUIDs may have changed\n", + subarray, dev); + + free_super: + st->ss->free_super(st); + close(fd); + + return rv; +} #endif diff --git a/ReadMe.c b/ReadMe.c index 387ba6d..fa33310 100644 --- a/ReadMe.c +++ b/ReadMe.c @@ -109,6 +109,7 @@ struct option long_options[] = { {"auto-detect", 0, 0, AutoDetect}, {"detail-platform", 0, 0, DetailPlatform}, {"kill-subarray", 1, 0, KillSubarray}, + {"update-subarray", 1, 0, UpdateSubarray}, /* synonyms */ {"monitor", 0, 0, 'F'}, diff --git a/mdadm.8 b/mdadm.8 index 784ba31..1909819 100644 --- a/mdadm.8 +++ b/mdadm.8 @@ -1182,6 +1182,14 @@ removing a superblock. Note that some formats depend on the subarray index for generating a UUID, this command will fail if it would change the UUID of an active subarray. +.TP +.B \-\-update\-subarray= +If the device is a container and the argument to \-\-update\-subarray +specifies a subarray in the container, then attempt to update the given +superblock field in the subarray. See below in +.B MISC MODE +for details. + .TP .BR \-t ", " \-\-test When used with @@ -1760,6 +1768,28 @@ metadata is platform independent metadata failed to find its platform components on this system .RE +.TP +.B \-\-update\-subarray= +If the device is a container and the argument to \-\-update\-subarray +specifies a subarray in the container, then attempt to update the given +superblock field in the subarray. Similar to updating an array in +"assemble" mode, the field to update is selected by +.B \-U +or +.B \-\-update= +option. Currently only +.B name +is supported. + +The +.B name +option updates the subarray name in the metadata, it may not affect the +device node name or the device node symlink until the subarray is +re\-assembled. If updating +.B name +would change the UUID of an active subarray this operation is blocked, +and the command will end in an error. + .TP .B \-\-examine The device should be a component of an md array. diff --git a/mdadm.c b/mdadm.c index 446fab8..e7435fd 100644 --- a/mdadm.c +++ b/mdadm.c @@ -218,8 +218,14 @@ int main(int argc, char *argv[]) case Waitclean: case DetailPlatform: case KillSubarray: - if (opt == KillSubarray) + case UpdateSubarray: + if (opt == KillSubarray || opt == UpdateSubarray) { + if (subarray) { + fprintf(stderr, Name ": subarray can only be specified once\n"); + exit(2); + } subarray = optarg; + } case 'K': if (!mode) newmode = MISC; break; } if (mode && newmode == mode) { @@ -593,11 +599,16 @@ int main(int argc, char *argv[]) case O(CREATE,'N'): case O(ASSEMBLE,'N'): + case O(MISC,'N'): if (ident.name[0]) { fprintf(stderr, Name ": name cannot be set twice. " "Second value %s.\n", optarg); exit(2); } + if (mode == MISC && !subarray) { + fprintf(stderr, Name ": -N/--name only valid with --update-subarray in misc mode\n"); + exit(2); + } if (strlen(optarg) > 32) { fprintf(stderr, Name ": name '%s' is too long, 32 chars max.\n", optarg); @@ -624,11 +635,16 @@ int main(int argc, char *argv[]) continue; case O(ASSEMBLE,'U'): /* update the superblock */ + case O(MISC,'U'): if (update) { fprintf(stderr, Name ": Can only update one aspect of superblock, both %s and %s given.\n", update, optarg); exit(2); } + if (mode == MISC && !subarray) { + fprintf(stderr, Name ": Only subarrays can be updated in misc mode\n"); + exit(2); + } update = optarg; if (strcmp(update, "sparc2.2")==0) continue; @@ -812,10 +828,20 @@ int main(int argc, char *argv[]) case O(MISC, Waitclean): case O(MISC, DetailPlatform): case O(MISC, KillSubarray): + case O(MISC, UpdateSubarray): if (devmode && devmode != opt && (devmode == 'E' || (opt == 'E' && devmode != 'Q'))) { - fprintf(stderr, Name ": --examine/-E cannot be given with -%c\n", - devmode =='E'?opt:devmode); + fprintf(stderr, Name ": --examine/-E cannot be given with "); + if (devmode == 'E') { + if (option_index >= 0) + fprintf(stderr, "--%s\n", + long_options[option_index].name); + else + fprintf(stderr, "-%c\n", opt); + } else if (isalpha(devmode)) + fprintf(stderr, "-%c\n", devmode); + else + fprintf(stderr, "previous option\n"); exit(2); } devmode = opt; @@ -1411,6 +1437,15 @@ int main(int argc, char *argv[]) case KillSubarray: rv |= Kill_subarray(dv->devname, subarray, quiet); continue; + case UpdateSubarray: + if (update == NULL) { + fprintf(stderr, + Name ": -U/--update must be specified with --update-subarray\n"); + rv |= 1; + continue; + } + rv |= Update_subarray(dv->devname, subarray, update, &ident, quiet); + continue; } mdfd = open_mddev(dv->devname, 1); if (mdfd>=0) { diff --git a/mdadm.h b/mdadm.h index f387477..68d61a3 100644 --- a/mdadm.h +++ b/mdadm.h @@ -258,6 +258,7 @@ extern char Version[], Usage[], Help[], OptionHelp[], /* for option that don't have short equivilents, we assign arbitrary * small numbers. '1' means an undecorated option, so we start at '2'. + * (note we must stop before we get to 65 i.e. 'A') */ enum special_options { AssumeClean = 2, @@ -266,7 +267,7 @@ enum special_options { ReAdd, NoDegraded, Sparc22, - BackupFile, + BackupFile, /* 8 */ HomeHost, AutoHomeHost, Symlinks, @@ -274,6 +275,7 @@ enum special_options { Waitclean, DetailPlatform, KillSubarray, + UpdateSubarray, /* 16 */ }; /* structures read from config file */ @@ -612,6 +614,8 @@ extern struct superswitch { int (*default_layout)(int level); /* optional */ /* Permit subarray's to be deleted from inactive containers */ int (*kill_subarray)(struct supertype *st); /* optional */ + /* Permit subarray's to be modified */ + int (*update_subarray)(struct supertype *st, char *update, mddev_ident_t ident); /* optional */ /* for mdmon */ int (*open_new)(struct supertype *c, struct active_array *a, @@ -809,6 +813,7 @@ extern int Monitor(mddev_dev_t devlist, extern int Kill(char *dev, struct supertype *st, int force, int quiet, int noexcl); extern int Kill_subarray(char *dev, char *subarray, int quiet); +extern int Update_subarray(char *dev, char *subarray, char *update, mddev_ident_t ident, int quiet); extern int Wait(char *dev); extern int WaitClean(char *dev, int sock, int verbose); diff --git a/super-intel.c b/super-intel.c index d81d620..f0377b8 100644 --- a/super-intel.c +++ b/super-intel.c @@ -283,6 +283,7 @@ enum imsm_update_type { update_activate_spare, update_create_array, update_kill_array, + update_rename_array, update_add_disk, }; @@ -309,6 +310,12 @@ struct imsm_update_kill_array { int dev_idx; }; +struct imsm_update_rename_array { + enum imsm_update_type type; + __u8 name[MAX_RAID_SERIAL_LEN]; + int dev_idx; +}; + struct imsm_update_add_disk { enum imsm_update_type type; }; @@ -2939,6 +2946,30 @@ static void imsm_update_version_info(struct intel_super *super) } } +static int check_name(struct intel_super *super, char *name, int quiet) +{ + struct imsm_super *mpb = super->anchor; + char *reason = NULL; + int i; + + if (strlen(name) > MAX_RAID_SERIAL_LEN) + reason = "must be 16 characters or less"; + + for (i = 0; i < mpb->num_raid_devs; i++) { + struct imsm_dev *dev = get_imsm_dev(super, i); + + if (strncmp((char *) dev->volume, name, MAX_RAID_SERIAL_LEN) == 0) { + reason = "already exists"; + break; + } + } + + if (reason && !quiet) + fprintf(stderr, Name ": imsm volume name %s\n", reason); + + return !reason; +} + static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, unsigned long long size, char *name, char *homehost, int *uuid) @@ -2990,16 +3021,8 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, if (super->current_vol == 0) mpb->num_disks = 0; - for (i = 0; i < super->current_vol; i++) { - dev = get_imsm_dev(super, i); - if (strncmp((char *) dev->volume, name, - MAX_RAID_SERIAL_LEN) == 0) { - fprintf(stderr, Name": '%s' is already defined for this container\n", - name); - return 0; - } - } - + if (!check_name(super, name, 0)) + return 0; sprintf(st->subarray, "%d", idx); dv = malloc(sizeof(*dv)); if (!dv) { @@ -4107,6 +4130,54 @@ static int kill_subarray_imsm(struct supertype *st) return 0; } + +static int update_subarray_imsm(struct supertype *st, char *update, mddev_ident_t ident) +{ + /* update the subarray currently referenced by ->current_vol */ + struct intel_super *super = st->sb; + struct imsm_super *mpb = super->anchor; + + if (super->current_vol < 0) + return 2; + + if (strcmp(update, "name") == 0) { + char *name = ident->name; + + if (is_subarray_active(st->subarray, st->devname)) { + fprintf(stderr, + Name ": Unable to update name of active subarray\n"); + return 2; + } + + if (!check_name(super, name, 0)) + return 2; + + if (st->update_tail) { + struct imsm_update_rename_array *u = malloc(sizeof(*u)); + + if (!u) + return 2; + u->type = update_rename_array; + u->dev_idx = super->current_vol; + snprintf((char *) u->name, MAX_RAID_SERIAL_LEN, "%s", name); + append_metadata_update(st, u, sizeof(*u)); + } else { + struct imsm_dev *dev; + int i; + + dev = get_imsm_dev(super, super->current_vol); + snprintf((char *) dev->volume, MAX_RAID_SERIAL_LEN, "%s", name); + for (i = 0; i < mpb->num_raid_devs; i++) { + dev = get_imsm_dev(super, i); + handle_missing(super, dev); + } + super->updates_pending++; + } + } else + return 2; + + return 0; +} #endif /* MDASSEMBLE */ static int is_rebuilding(struct imsm_dev *dev) @@ -5217,6 +5288,31 @@ static void imsm_process_update(struct supertype *st, super->updates_pending++; break; } + case update_rename_array: { + struct imsm_update_rename_array *u = (void *) update->buf; + char name[MAX_RAID_SERIAL_LEN+1]; + int target = u->dev_idx; + struct active_array *a; + struct imsm_dev *dev; + + /* sanity check that we are not affecting the uuid of + * an active array + */ + snprintf(name, MAX_RAID_SERIAL_LEN, "%s", (char *) u->name); + name[MAX_RAID_SERIAL_LEN] = '\0'; + for (a = st->arrays; a; a = a->next) + if (a->info.container_member == target) + break; + dev = get_imsm_dev(super, u->dev_idx); + if (a || !dev || !check_name(super, name, 1)) { + dprintf("failed to rename subarray-%d\n", target); + break; + } + + snprintf((char *) dev->volume, MAX_RAID_SERIAL_LEN, name); + super->updates_pending++; + break; + } case update_add_disk: /* we may be able to repair some arrays if disks are @@ -5393,6 +5489,7 @@ struct superswitch super_imsm = { .add_to_super = add_to_super_imsm, .detail_platform = detail_platform_imsm, .kill_subarray = kill_subarray_imsm, + .update_subarray = update_subarray_imsm, #endif .match_home = match_home_imsm, .uuid_from_super= uuid_from_super_imsm, -- cgit From 23eb475a96b1b0cf7f8feaeb7b32355b80e8faa7 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 1 Jul 2010 17:28:14 -0700 Subject: mdmon: prevent allocations due to late binding Current versions of glibc do not provide a useable interface to clone(2) as it inflicts hidden dependencies on setting up a glibc specific tls descriptor. The dynamic linker trips this dependency and causes mdmon to intermittently fail to load. Resolving all dynamic linking prior to starting the monitor thread appears to mitigate the issue but there is no guarantee that another tls dependency will bite us later. However, while the debate continues with the glibc maintainers it seems prudent to keep this change. It ensures that we do not get into a situation where the monitor thread needs to make a late allocation to resolve a symbol. Signed-off-by: Dan Williams --- Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3af1665..237f4fc 100644 --- a/Makefile +++ b/Makefile @@ -157,8 +157,9 @@ mdadm.O2 : $(SRCS) mdadm.h mdmon.O2 mdmon.O2 : $(MON_SRCS) mdadm.h mdmon.h $(CC) -o mdmon.O2 $(CFLAGS) -DHAVE_STDINT_H -O2 -D_FORTIFY_SOURCE=2 $(MON_SRCS) +# use '-z now' to guarantee no dynamic linker interactions with the monitor thread mdmon : $(MON_OBJS) - $(CC) $(LDFLAGS) -o mdmon $(MON_OBJS) $(LDLIBS) + $(CC) $(LDFLAGS) -z now -o mdmon $(MON_OBJS) $(LDLIBS) msg.o: msg.c msg.h test_stripe : restripe.c mdadm.h -- cgit