diff options
-rw-r--r-- | ANNOUNCE-1.5.0 | 29 | ||||
-rw-r--r-- | Assemble.c | 35 | ||||
-rw-r--r-- | ChangeLog | 16 | ||||
-rw-r--r-- | Create.c | 41 | ||||
-rw-r--r-- | Detail.c | 15 | ||||
-rw-r--r-- | Examine.c | 2 | ||||
-rw-r--r-- | Makefile | 10 | ||||
-rw-r--r-- | Monitor.c | 53 | ||||
-rw-r--r-- | Query.c | 2 | ||||
-rw-r--r-- | ReadMe.c | 19 | ||||
-rw-r--r-- | config.c | 19 | ||||
-rw-r--r-- | dlink.c | 3 | ||||
-rw-r--r-- | md.4 | 62 | ||||
-rw-r--r-- | mdadm.8 | 47 | ||||
-rw-r--r-- | mdadm.c | 32 | ||||
-rw-r--r-- | mdadm.h | 24 | ||||
-rw-r--r-- | mdadm.spec | 2 | ||||
-rw-r--r-- | mdassemble.c | 97 | ||||
-rw-r--r-- | mdstat.c | 17 | ||||
-rw-r--r-- | util.c | 91 |
20 files changed, 471 insertions, 145 deletions
diff --git a/ANNOUNCE-1.5.0 b/ANNOUNCE-1.5.0 new file mode 100644 index 0000000..05433e6 --- /dev/null +++ b/ANNOUNCE-1.5.0 @@ -0,0 +1,29 @@ +Subject: ANNOUNCE: mdadm 1.5.0 - A tool for managing Soft RAID under Linux + + +I am pleased to announce the availability of + mdadm version 1.5.0 +It is available at + http://www.cse.unsw.edu.au/~neilb/source/mdadm/ +and + http://www.{countrycode}.kernel.org/pub/utils/raid/mdadm/ + +as a source tar-ball and (at the first site) as an SRPM, and as an RPM for i386. + +mdadm is a tool for creating, managing and monitoring +device arrays using the "md" driver in Linux, also +known as Software RAID arrays. + +Release 1.5.0 adds: + - new command "mdassemble" for use in initrd/initramfs. + - raid6 support (for 2.6.2 and later kernels) + - RebuildFinished event in monitor mode. + - include rebuild status in --detail output. + - fixes for assorted compilation problems + +Development of mdadm is sponsored by CSE@UNSW: + The School of Computer Science and Engineering +at + The University of New South Wales + +NeilBrown 22 Jan 2004 @@ -98,8 +98,8 @@ int Assemble(char *mddev, int mdfd, mdp_super_t first_super, super; struct { char *devname; - int major, minor; - int oldmajor, oldminor; + unsigned int major, minor; + unsigned int oldmajor, oldminor; long long events; time_t utime; int uptodate; @@ -107,16 +107,17 @@ int Assemble(char *mddev, int mdfd, int raid_disk; } *devices; int *best = NULL; /* indexed by raid_disk */ - int bestcnt = 0; - int devcnt = 0, okcnt, sparecnt; - int req_cnt; - int i; + unsigned int bestcnt = 0; + int devcnt = 0; + unsigned int okcnt, sparecnt; + unsigned int req_cnt; + unsigned int i; int most_recent = 0; int chosen_drive; int change = 0; int inargv = 0; int start_partial_ok = force || devlist==NULL; - int num_devs; + unsigned int num_devs; mddev_dev_t tmpdev; vers = md_get_version(mdfd); @@ -224,21 +225,21 @@ int Assemble(char *mddev, int mdfd, devname); continue; } - if (ident->super_minor >= 0 && + if (ident->super_minor != UnSet && (!havesuper || ident->super_minor != super.md_minor)) { if (inargv || verbose) fprintf(stderr, Name ": %s has wrong super-minor.\n", devname); continue; } - if (ident->level != -10 && - (!havesuper|| ident->level != super.level)) { + if (ident->level != UnSet && + (!havesuper|| ident->level != (int)super.level)) { if (inargv || verbose) fprintf(stderr, Name ": %s has wrong raid level.\n", devname); continue; } - if (ident->raid_disks != -1 && + if (ident->raid_disks != UnSet && (!havesuper || ident->raid_disks!= super.raid_disks)) { if (inargv || verbose) fprintf(stderr, Name ": %s requires wrong number of drives.\n", @@ -349,16 +350,16 @@ int Assemble(char *mddev, int mdfd, > devices[most_recent].events) most_recent = devcnt; } - if (super.level == -4) + if ((int)super.level == -4) /* with multipath, the raid_disk from the superblock is meaningless */ i = devcnt; else i = devices[devcnt].raid_disk; - if (i>=0 && i < 10000) { + if (i < 10000) { if (i >= bestcnt) { - int newbestcnt = i+10; + unsigned int newbestcnt = i+10; int *newbest = malloc(sizeof(int)*newbestcnt); - int c; + unsigned int c; for (c=0; c < newbestcnt; c++) if (c < bestcnt) newbest[c] = best[c]; @@ -392,7 +393,7 @@ int Assemble(char *mddev, int mdfd, /* note: we ignore error flags in multipath arrays * as they don't make sense */ - if (first_super.level != -4) + if ((int)first_super.level != -4) if (!(devices[j].state & (1<<MD_DISK_SYNC))) { if (!(devices[j].state & (1<<MD_DISK_FAULTY))) sparecnt++; @@ -494,7 +495,7 @@ int Assemble(char *mddev, int mdfd, for (i=0; i<bestcnt; i++) { int j = best[i]; - int desired_state; + unsigned int desired_state; if (i < super.raid_disks) desired_state = (1<<MD_DISK_ACTIVE) | (1<<MD_DISK_SYNC); @@ -1,3 +1,19 @@ +Changes Prior to this release + - new commands "mdassemble" which is a stripped-down equivalent of + "mdadm -As", that can be compiled with dietlibc. + Thanks to Luca Berra <bluca@comedia.it>. + It can be using in an initramfs or initrd. + - Fix compiling error with BLKGETSIZE64 and some signed/unsigned + comparison warnings. + - Add Rebuild Status (% complete) to --detail output. + - Support "--monitor --test" which will generate a test alert + for each array once, to test notification paths. + - Generate RebuildFinished event when rebuild finishes. + - Support for raid6 as found in 2.6.2 - thanks to + H. Peter Anvin <hpa@zytor.com> + - Support partitioned md arrays with a different major number and + naming scheme (md_dX in /proc/mdstat, /dev/md/dXpY in /dev). + Changes Prior to 1.4.0 release - Document fact that creating a raid5 array really creates a degraded array with a spare. @@ -71,7 +71,7 @@ int Create(char *mddev, int mdfd, fprintf(stderr, Name ": Create requires md driver verison 0.90.0 or later\n"); return 1; } - if (level == -10) { + if (level == UnSet) { fprintf(stderr, Name ": a RAID level is needed to create an array.\n"); return 1; @@ -81,9 +81,19 @@ int Create(char *mddev, int mdfd, Name ": a number of --raid-devices must be given to create an array\n"); return 1; } + if (raiddisks < 4 && level == 6) { + fprintf(stderr, + Name ": at least 4 raid-devices needed for level 6\n"); + return 1; + } + if (raiddisks > 256 && level == 6) { + fprintf(stderr, + Name ": no more than 256 raid-devices supported for level 6\n"); + return 1; + } if (raiddisks < 2 && level >= 4) { fprintf(stderr, - Name ": atleast 2 raid-devices needed for level 4 or 5\n"); + Name ": at least 2 raid-devices needed for level 4 or 5\n"); return 1; } if (raiddisks+sparedisks > MD_SB_DISKS) { @@ -102,12 +112,13 @@ int Create(char *mddev, int mdfd, } /* now set some defaults */ - if (layout == -1) + if (layout == UnSet) switch(level) { default: /* no layout */ layout = 0; break; case 5: + case 6: layout = map_name(r5layout, "default"); if (verbose) fprintf(stderr, @@ -118,6 +129,7 @@ int Create(char *mddev, int mdfd, switch(level) { case 4: case 5: + case 6: case 0: case -1: /* linear */ if (chunk == 0) { @@ -229,12 +241,19 @@ int Create(char *mddev, int mdfd, /* If this is raid5, we want to configure the last active slot * as missing, so that a reconstruct happens (faster than re-parity) + * FIX: Can we do this for raid6 as well? */ - if (force == 0 && level == 5 && first_missing >= raiddisks) { - insert_point = raiddisks-1; - sparedisks++; - array.active_disks--; - missing_disks++; + if (force == 0 && first_missing >= raiddisks) { + switch ( level ) { + case 5: + insert_point = raiddisks-1; + sparedisks++; + array.active_disks--; + missing_disks++; + break; + default: + break; + } } /* Ok, lets try some ioctls */ @@ -249,8 +268,10 @@ int Create(char *mddev, int mdfd, if (fstat(mdfd, &stb)==0) array.md_minor = MINOR(stb.st_rdev); array.not_persistent = 0; - if (level == 5 && (insert_point < raiddisks || first_missing < raiddisks)) - array.state = 1; /* clean, but one drive will be missing */ + /*** FIX: Need to do something about RAID-6 here ***/ + if ( (level == 5 || level == 6) && + (insert_point < raiddisks || first_missing < raiddisks) ) + array.state = 1; /* clean, but one+ drive will be missing */ else array.state = 0; /* not clean, but no errors */ @@ -142,6 +142,17 @@ int Detail(char *dev, int brief, int test) } printf("\n"); + { + struct mdstat_ent *ms = mdstat_read(); + struct mdstat_ent *e; + for (e=ms; e; e=e->next) + if (e->devnum == array.md_minor) { + if (e->percent >= 0) + printf(" Rebuild Status : %d%% complete\n\n", e->percent); + break; + } + free_mdstat(ms); + } printf(" Number Major Minor RaidDevice State\n"); } for (d= 0; d<MD_SB_DISKS; d++) { @@ -189,8 +200,8 @@ int Detail(char *dev, int brief, int test) int fd = open(dv, O_RDONLY); if (fd >=0 && load_super(fd, &super) ==0 && - super.ctime == array.ctime && - super.level == array.level) + (unsigned long)super.ctime == (unsigned long)array.ctime && + (unsigned int)super.level == (unsigned int)array.level) have_super = 1; } } @@ -162,7 +162,7 @@ int Examine(mddev_dev_t devlist, int brief, int scan, int SparcAdjust) if (calc_sb_csum(&super) == super.sb_csum) printf(" Checksum : %x - correct\n", super.sb_csum); else - printf(" Checksum : %x - expected %x\n", super.sb_csum, calc_sb_csum(&super)); + printf(" Checksum : %x - expected %lx\n", super.sb_csum, calc_sb_csum(&super)); if (SparcAdjust) { /* 2.2 sparc put the events in the wrong place * So we copy the tail of the superblock @@ -78,6 +78,16 @@ mdadm.klibc : $(SRCS) mdadm.h rm -f $(OBJS) gcc -nostdinc -iwithprefix include -I$(KLIBC)/klibc/include -I$(KLIBC)/linux/include -I$(KLIBC)/klibc/arch/i386/include -I$(KLIBC)/klibc/include/bits32 $(CFLAGS) $(SRCS) +mdassemble : mdassemble.c Assemble.c config.c dlink.c util.c mdadm.h + rm -f $(OBJS) + diet gcc -o mdassemble mdassemble.c Assemble.c config.c dlink.c util.c + +# This doesn't work +mdassemble.klibc : mdassemble.c Assemble.c config.c dlink.c util.c mdadm.h + rm -f $(OBJS) + gcc -nostdinc -iwithprefix include -I$(KLIBC)/klibc/include -I$(KLIBC)/linux/include -I$(KLIBC)/klibc/arch/i386/include -I$(KLIBC)/klibc/include/bits32 $(CFLAGS) -o mdassemble mdassemble.c Assemble.c config.c dlink.c util.c + + mdadm.man : mdadm.8 nroff -man mdadm.8 > mdadm.man @@ -32,6 +32,7 @@ #include "md_u.h" #include <sys/wait.h> #include <sys/signal.h> +#include <values.h> static void alert(char *event, char *dev, char *disc, char *mailaddr, char *cmd); @@ -46,7 +47,7 @@ static char *percentalerts[] = { int Monitor(mddev_dev_t devlist, char *mailaddr, char *alert_cmd, int period, int daemonise, int scan, int oneshot, - char *config) + char *config, int test) { /* * Every few seconds, scan every md device looking for changes @@ -150,7 +151,7 @@ int Monitor(mddev_dev_t devlist, st->utime = 0; st->next = statelist; st->err = 0; - st->devnum = -1; + st->devnum = MAXINT; st->percent = -2; st->expected_spares = mdlist->spare_disks; if (mdlist->spare_group) @@ -169,7 +170,7 @@ int Monitor(mddev_dev_t devlist, st->utime = 0; st->next = statelist; st->err = 0; - st->devnum = -1; + st->devnum = MAXINT; st->percent = -2; st->expected_spares = -1; st->spare_group = NULL; @@ -191,8 +192,10 @@ int Monitor(mddev_dev_t devlist, struct mdstat_ent *mse; char *dev = st->devname; int fd; - int i; + unsigned int i; + if (test) + alert("TestMessage", dev, NULL, mailaddr, alert_cmd); fd = open(dev, O_RDONLY); if (fd < 0) { if (!st->err) @@ -221,18 +224,20 @@ int Monitor(mddev_dev_t devlist, close(fd); continue; } - if (st->devnum < 0) { + if (st->devnum == MAXINT) { struct stat stb; if (fstat(fd, &stb) == 0 && - (S_IFMT&stb.st_mode)==S_IFBLK) - st->devnum = MINOR(stb.st_rdev); + (S_IFMT&stb.st_mode)==S_IFBLK) { + if (MINOR(stb.st_rdev) == 9) + st->devnum = MINOR(stb.st_rdev); + else + st->devnum = -1- (MINOR(stb.st_rdev)>>6); + } } for (mse = mdstat ; mse ; mse=mse->next) - if (mse->devnum == st->devnum) { - mse->devnum = -1; /* flag it as "used" */ - break; - } + if (mse->devnum == st->devnum) + mse->devnum = MAXINT; /* flag it as "used" */ if (st->utime == array.utime && st->failed == array.failed_disks && @@ -266,6 +271,11 @@ int Monitor(mddev_dev_t devlist, alert(percentalerts[mse->percent/20], dev, NULL, mailaddr, alert_cmd); + if (mse && + mse->percent == -1 && + st->percent >= 0) + alert("RebuildFinished", dev, NULL, mailaddr, alert_cmd); + if (mse) st->percent = mse->percent; @@ -285,19 +295,19 @@ int Monitor(mddev_dev_t devlist, } change = newstate ^ st->devstate[i]; if (st->utime && change && !st->err) { - if (i < array.raid_disks && + if (i < (unsigned)array.raid_disks && (((newstate&change)&(1<<MD_DISK_FAULTY)) || ((st->devstate[i]&change)&(1<<MD_DISK_ACTIVE)) || ((st->devstate[i]&change)&(1<<MD_DISK_SYNC))) ) alert("Fail", dev, dv, mailaddr, alert_cmd); - else if (i>=array.raid_disks && + else if (i >= (unsigned)array.raid_disks && (disc.major || disc.minor) && st->devid[i] == MKDEV(disc.major, disc.minor) && ((newstate&change)&(1<<MD_DISK_FAULTY)) ) alert("FailSpare", dev, dv, mailaddr, alert_cmd); - else if (i < array.raid_disks && + else if (i < (unsigned)array.raid_disks && (((st->devstate[i]&change)&(1<<MD_DISK_FAULTY)) || ((newstate&change)&(1<<MD_DISK_ACTIVE)) || ((newstate&change)&(1<<MD_DISK_SYNC))) @@ -320,21 +330,32 @@ int Monitor(mddev_dev_t devlist, if (scan) { struct mdstat_ent *mse; for (mse=mdstat; mse; mse=mse->next) - if (mse->devnum >= 0 && + if (mse->devnum != MAXINT && (strcmp(mse->level, "raid1")==0 || strcmp(mse->level, "raid5")==0 || strcmp(mse->level, "multipath")==0) ) { struct state *st = malloc(sizeof *st); + mdu_array_info_t array; + int fd; if (st == NULL) continue; st->devname = strdup(get_md_name(mse->devnum)); + if ((fd = open(st->devname, O_RDONLY)) < 0 || + ioctl(fd, GET_ARRAY_INFO, &array)< 0) { + /* no such array */ + if (fd >=0) close(fd); + free(st->devname); + free(st); + continue; + } st->utime = 0; st->next = statelist; st->err = 1; st->devnum = mse->devnum; st->percent = -2; st->spare_group = NULL; + st->expected_spares = -1; statelist = st; alert("NewArray", st->devname, NULL, mailaddr, alert_cmd); new_found = 1; @@ -395,6 +416,7 @@ int Monitor(mddev_dev_t devlist, else sleep(period); } + test = 0; } return 0; } @@ -422,6 +444,7 @@ static void alert(char *event, char *dev, char *disc, char *mailaddr, char *cmd) } if (mailaddr && (strncmp(event, "Fail", 4)==0 || + strncmp(event, "Test", 4)==0 || strncmp(event, "Degrade", 7)==0)) { FILE *mp = popen(Sendmail, "w"); if (mp) { @@ -129,7 +129,7 @@ int Query(char *dev) if (md_get_version(fd) >= 9000 && ioctl(fd, GET_ARRAY_INFO, &array)>= 0) { if (ioctl(fd, GET_DISK_INFO, &disc) >= 0 && - MKDEV(disc.major,disc.minor) == stb.st_rdev) + MKDEV((unsigned)disc.major,(unsigned)disc.minor) == stb.st_rdev) activity = "active"; else activity = "mismatch"; @@ -29,7 +29,7 @@ #include "mdadm.h" -char Version[] = Name " - v1.4.0 - 29 Oct 2003\n"; +char Version[] = Name " - v1.5.0 - 22 Jan 2004\n"; /* * File: ReadMe.c * @@ -112,7 +112,7 @@ struct option long_options[] = { /* For create or build: */ {"chunk", 1, 0, 'c'}, {"rounding", 1, 0, 'c'}, /* for linear, chunk is really a rounding number */ - {"level", 1, 0, 'l'}, /* 0,1,4,5,linear */ + {"level", 1, 0, 'l'}, /* 0,1,4,5,6,linear */ {"parity", 1, 0, 'p'}, /* {left,right}-{a,}symmetric */ {"layout", 1, 0, 'p'}, {"raid-disks",1, 0, 'n'}, @@ -205,12 +205,12 @@ char OptionHelp[] = " For create or build:\n" " --chunk= -c : chunk size of kibibytes\n" " --rounding= : rounding factor for linear array (==chunk size)\n" -" --level= -l : raid level: 0,1,4,5,linear,mp. 0 or linear for build\n" -" --parity= -p : raid5 parity algorithm: {left,right}-{,a}symmetric\n" +" --level= -l : raid level: 0,1,4,5,6,linear,mp. 0 or linear for build\n" +" --parity= -p : raid5/6 parity algorithm: {left,right}-{,a}symmetric\n" " --layout= : same as --parity\n" " --raid-devices= -n : number of active devices in array\n" " --spare-devices= -x: number of spares (eXtras) devices in initial array\n" -" --size= -z : Size (in K) of each drive in RAID1/4/5 - optional\n" +" --size= -z : Size (in K) of each drive in RAID1/4/5/6 - optional\n" " --force -f : Honour devices as listed on command line. Don't\n" " : insert a missing drive for RAID5.\n" "\n" @@ -270,12 +270,12 @@ char Help_create[] = " Options that are valid with --create (-C) are:\n" " --chunk= -c : chunk size of kibibytes\n" " --rounding= : rounding factor for linear array (==chunk size)\n" -" --level= -l : raid level: 0,1,4,5,linear,multipath and synonyms\n" -" --parity= -p : raid5 parity algorithm: {left,right}-{,a}symmetric\n" +" --level= -l : raid level: 0,1,4,5,6,linear,multipath and synonyms\n" +" --parity= -p : raid5/6 parity algorithm: {left,right}-{,a}symmetric\n" " --layout= : same as --parity\n" " --raid-devices= -n : number of active devices in array\n" " --spare-devices= -x: number of spares (eXtras) devices in initial array\n" -" --size= -z : Size (in K) of each drive in RAID1/4/5 - optional\n" +" --size= -z : Size (in K) of each drive in RAID1/4/5/6 - optional\n" " --force -f : Honour devices as listed on command line. Don't\n" " : insert a missing drive for RAID5.\n" " --run -R : insist of running the array even if not all\n" @@ -410,6 +410,7 @@ char Help_monitor[] = " --scan -s : find mail-address/program in config file\n" " --daemonise -f : Fork and continue in child, parent exits\n" " --oneshot -1 : Check for degraded arrays, then exit\n" +" --test -t : Generate a TestMessage event against each array at startup\n" ; @@ -480,6 +481,8 @@ mapping_t pers[] = { { "5", 5}, { "multipath", -4}, { "mp", -4}, + { "raid6", 6}, + { "6", 6}, { NULL, 0} }; @@ -211,12 +211,15 @@ void load_partitions(void) } while (fgets(buf, 1024, f)) { int major, minor; - char *name; + char *name, *mp; buf[1023] = '\0'; if (buf[0] != ' ') continue; - if (sscanf(buf, " %d %d ", &major, &minor) != 2) + major = strtoul(buf, &mp, 10); + if (mp == buf || *mp != ' ') continue; + minor = strtoul(mp, NULL, 10); + name = map_dev(major, minor); if (name) { struct conf_dev *cd; @@ -262,10 +265,10 @@ void arrayline(char *line) mddev_ident_t mi; mis.uuid_set = 0; - mis.super_minor = -1; - mis.level = -10; - mis.raid_disks = -1; - mis.spare_disks = -1; + mis.super_minor = UnSet; + mis.level = UnSet; + mis.raid_disks = UnSet; + mis.spare_disks = UnSet; mis.devices = NULL; mis.devname = NULL; mis.spare_group = NULL; @@ -296,7 +299,7 @@ void arrayline(char *line) if (w[12]==0 || endptr[0]!=0 || mis.super_minor < 0) { fprintf(stderr, Name ": invalid super-minor number: %s\n", w); - mis.super_minor = -1; + mis.super_minor = UnSet; } } } else if (strncasecmp(w, "devices=", 8 ) == 0 ) { @@ -450,7 +453,7 @@ mddev_dev_t conf_get_devs(char *conffile) struct conf_dev *cd; int flags = 0; static mddev_dev_t dlist = NULL; - int i; + unsigned int i; while (dlist) { mddev_dev_t t = dlist; @@ -5,6 +5,9 @@ #include <unistd.h> #include <stdlib.h> #include <string.h> +#ifdef __dietlibc__ +char *strncpy(char *dest, const char *src, size_t n) __THROW; +#endif #include "dlink.h" @@ -15,9 +15,12 @@ Array of Independent Devices. .PP .B md supports RAID levels 1 (mirroring) 4 (striped array with parity -device) and 5 (striped array with distributed parity information). -If a single underlying device fails while using one of these levels, -the array will continue to function. +device), 5 (striped array with distributed parity information) and 6 +(striped array with distributed dual redundancy information.) If a +some number of underlying devices fails while using one of these +levels, the array will continue to function; this number is one for +RAID levels 4 and 5, two for RAID level 6, and all but one (N-1) for +RAID level 1. .PP .B md also supports a number of pseudo RAID (non-redundant) configurations @@ -140,6 +143,16 @@ parity blocks on different devices so there is less contention. This also allows more parallelism when reading as read requests are distributed over all the devices in the array instead of all but one. +.SS RAID6 + +RAID6 is similar to RAID5, but can handle the loss of any \fItwo\fP +devices without data loss. Accordingly, it requires N+2 drives to +store N drives worth of data. + +The performance for RAID6 is slightly lower but comparable to RAID5 in +normal mode and single disk failure mode. It is very slow in dual +disk failure mode, however. + .SS MUTIPATH MULTIPATH is not really a RAID at all as there is only one real device @@ -156,7 +169,7 @@ another interface. .SS UNCLEAN SHUTDOWN -When changes are made to a RAID1, RAID4, or RAID5 array there is a +When changes are made to a RAID1, RAID4, RAID5 or RAID6 array there is a possibility of inconsistency for short periods of time as each update requires are least two block to be written to different devices, and these writes probably wont happen at exactly the same time. @@ -166,33 +179,32 @@ consistent. To handle this situation, the md driver marks an array as "dirty" before writing any data to it, and marks it as "clean" when the array -is being disabled, e.g. at shutdown. -If the md driver finds an array to be dirty at startup, it proceeds to -correct any possibly inconsistency. For RAID1, this involves copying -the contents of the first drive onto all other drives. -For RAID4 or RAID5 this involves recalculating the parity for each -stripe and making sure that the parity block has the correct data. -This process, known as "resynchronising" or "resync" is performed in -the background. The array can still be used, though possibly with -reduced performance. - -If a RAID4 or RAID5 array is degraded (missing one drive) when it is -restarted after an unclean shutdown, it cannot recalculate parity, and -so it is possible that data might be undetectably corrupted. -The 2.4 md driver +is being disabled, e.g. at shutdown. If the md driver finds an array +to be dirty at startup, it proceeds to correct any possibly +inconsistency. For RAID1, this involves copying the contents of the +first drive onto all other drives. For RAID4, RAID5 and RAID6 this +involves recalculating the parity for each stripe and making sure that +the parity block has the correct data. This process, known as +"resynchronising" or "resync" is performed in the background. The +array can still be used, though possibly with reduced performance. + +If a RAID4, RAID5 or RAID6 array is degraded (missing at least one +drive) when it is restarted after an unclean shutdown, it cannot +recalculate parity, and so it is possible that data might be +undetectably corrupted. The 2.4 md driver .B does not alert the operator to this condition. The 2.5 md driver will fail to start an array in this condition without manual intervention. .SS RECOVERY -If the md driver detects any error on a device in a RAID1, RAID4, or -RAID5 array, it immediately disables that device (marking it as faulty) -and continues operation on the remaining devices. If there is a spare -drive, the driver will start recreating on one of the spare drives the -data what was on that failed drive, either by copying a working drive -in a RAID1 configuration, or by doing calculations with the parity -block on RAID4 and RAID5. +If the md driver detects any error on a device in a RAID1, RAID4, +RAID5 or RAID6 array, it immediately disables that device (marking it +as faulty) and continues operation on the remaining devices. If there +is a spare drive, the driver will start recreating on one of the spare +drives the data what was on that failed drive, either by copying a +working drive in a RAID1 configuration, or by doing calculations with +the parity block on RAID4, RAID5 or RAID6. While this recovery process is happening, the md driver will monitor accesses to the array and will slow down the rate of recovery if other @@ -1,5 +1,5 @@ .\" -*- nroff -*- -.TH MDADM 8 "" v1.4.0 +.TH MDADM 8 "" v1.5.0 .SH NAME mdadm \- manage MD devices .I aka @@ -29,6 +29,7 @@ md devices, (mirroring), .BR RAID4 , .BR RAID5 , +.BR RAID6 , and .BR MULTIPATH . @@ -109,9 +110,9 @@ superblocks, erasing old superblocks and stopping active arrays. .TP .B "Follow or Monitor" Monitor one or more md devices and act on any state changes. This is -only meaningful for raid1, raid5 or multipath arrays as only these have -interesting state. raid0 or linear never have missing, spare, or -failed drives, so there is nothing to monitor. +only meaningful for raid1, 4, 5, 6 or multipath arrays as +only these have interesting state. raid0 or linear never have +missing, spare, or failed drives, so there is nothing to monitor. .SH OPTIONS @@ -234,8 +235,8 @@ Specify rounding factor for linear array (==chunk size) .BR -l ", " --level= Set raid level. When used with .IR --create , -options are: linear, raid0, 0, stripe, raid1, 1, mirror, raid5, 4, -raid5, 5, multipath, mp. Obviously some of these are synonymous. +options are: linear, raid0, 0, stripe, raid1, 1, mirror, raid4, 4, +raid5, 5, raid6, 6, multipath, mp. Obviously some of these are synonymous. When used with .IR --build , @@ -279,7 +280,7 @@ number of spare devices. .TP .BR -z ", " --size= -Amount (in Kibibytes) of space to use from each drive in RAID1/4/5. +Amount (in Kibibytes) of space to use from each drive in RAID1/4/5/6. This must be a multiple of the chunk size, and must leave about 128Kb of space at the end of the drive for the RAID superblock. If this is not specified @@ -465,6 +466,14 @@ events. Running .in -5 from a cron script will ensure regular notification of any degraded arrays. +.TP +.BR -t ", " --test +Generate a +.B TestMessage +alert for every array found at startup. This alert gets mailed and +passed to the alert program. This can be used for testing that alert +message to get through successfully. + .SH ASSEMBLE MODE .HP 12 @@ -532,7 +541,7 @@ Normally the array will be started after it is assembled. However if is not given and insufficient drives were listed to start a complete (non-degraded) array, then the array is not started (to guard against usage errors). To insist that the array be started in this case (as -may work for RAID1 or RAID5), give the +may work for RAID1, 4, 5 or 6), give the .B --run flag. @@ -590,7 +599,7 @@ in place of a device name. This will cause .B mdadm to leave the corresponding slot in the array empty. For a RAID4 or RAID5 array at most one slot can be -"\fBmissing\fP". +"\fBmissing\fP"; for a RAID6 array at most two slots. For a RAID1 array, only one real device needs to be given. All of the others can be "\fBmissing\fP". @@ -717,8 +726,8 @@ config file to be examined. .TP --stop -This devices should active md arrays which will be deactivated, if -they are not currently in use. +The devices should be active md arrays which will be deactivated, as +long as they are not currently in use. .TP --run @@ -823,6 +832,11 @@ is 20, 40, 60, or 80, this indicates that rebuild has passed that many percentage of the total. .TP +.B RebuildFinished +An md array that was rebuilding, isn't any more, either because it +finished normally or was aborted. + +.TP .B Fail An active component device of an array has been marked as faulty. @@ -857,12 +871,19 @@ A spare drive has been moved from one array in a .B spare-group to another to allow a failed drive to be replaced. +.TP +.B TestMessage +An array was found at startup, and the +.B --test +flag was given. .RE Only -.B Fail +.B Fail , +.B FailSpare , +.B DegradedArray , and -.B FailSpare +.B TestMessage cause Email to be sent. All events cause the program to be run. The program is run with two or three arguments, they being the event name, the array device and possibly a second device. @@ -58,8 +58,8 @@ int main(int argc, char *argv[]) int chunk = 0; int size = 0; - int level = -10; - int layout = -1; + int level = UnSet; + int layout = UnSet; int raiddisks = 0; int sparedisks = 0; struct mddev_ident_s ident; @@ -89,9 +89,9 @@ int main(int argc, char *argv[]) int mdfd = -1; ident.uuid_set=0; - ident.level = -10; - ident.raid_disks = -1; - ident.super_minor= -1; + ident.level = UnSet; + ident.raid_disks = UnSet; + ident.super_minor= UnSet; ident.devices=0; while ((option_index = -1) , @@ -259,13 +259,13 @@ int main(int argc, char *argv[]) case O(CREATE,'l'): case O(BUILD,'l'): /* set raid level*/ - if (level != -10) { + if (level != UnSet) { fprintf(stderr, Name ": raid level may only be set once. " "Second value is %s.\n", optarg); exit(2); } level = map_name(pers, optarg); - if (level == -10) { + if (level == UnSet) { fprintf(stderr, Name ": invalid raid level: %s\n", optarg); exit(2); @@ -294,13 +294,14 @@ int main(int argc, char *argv[]) fprintf(stderr, Name ": layout not meaningful for %s arrays.\n", map_num(pers, level)); exit(2); - case -10: + case UnSet: fprintf(stderr, Name ": raid level must be given before layout.\n"); exit(2); case 5: + case 6: layout = map_name(r5layout, optarg); - if (layout==-10) { + if (layout==UnSet) { fprintf(stderr, Name ": layout %s not understood for raid5.\n", optarg); exit(2); @@ -337,7 +338,7 @@ int main(int argc, char *argv[]) sparedisks, optarg); exit(2); } - if (level > -10 && level <= 0 && level >= -1) { + if (level != UnSet && level <= 0 && level >= -1) { fprintf(stderr, Name ": spare-devices setting is incompatible with raid level %d\n", level); exit(2); @@ -372,7 +373,7 @@ int main(int argc, char *argv[]) continue; case O(ASSEMBLE,'m'): /* super-minor for array */ - if (ident.super_minor != -1) { + if (ident.super_minor != UnSet) { fprintf(stderr, Name ": super-minor cannot be set twice. " "Second value: %s.\n", optarg); exit(2); @@ -455,6 +456,9 @@ int main(int argc, char *argv[]) case O(MONITOR,'1'): /* oneshot */ oneshot = 1; continue; + case O(MONITOR,'t'): /* test */ + test = 1; + continue; /* now the general management options. Some are applicable * to other modes. None have arguments. @@ -563,7 +567,7 @@ int main(int argc, char *argv[]) mdfd = open_mddev(devlist->devname); if (mdfd < 0) exit(1); - if (ident.super_minor == -2) { + if ((int)ident.super_minor == -2) { struct stat stb; fstat(mdfd, &stb); ident.super_minor = MINOR(stb.st_rdev); @@ -586,7 +590,7 @@ int main(int argc, char *argv[]) break; case ASSEMBLE: if (devs_found == 1 && ident.uuid_set == 0 && - ident.super_minor == -1 && !scan ) { + ident.super_minor == UnSet && !scan ) { /* Only a device has been given, so get details from config file */ mddev_ident_t array_ident = conf_get_ident(configfile, devlist->devname); mdfd = open_mddev(devlist->devname); @@ -733,7 +737,7 @@ int main(int argc, char *argv[]) break; } rv= Monitor(devlist, mailaddr, program, - delay?delay:60, daemonise, scan, oneshot, configfile); + delay?delay:60, daemonise, scan, oneshot, configfile, test); break; } exit(rv); @@ -29,7 +29,9 @@ #define __USE_LARGEFILE64 #include <unistd.h> +#ifndef __dietlibc__ extern __off64_t lseek64 __P ((int __fd, __off64_t __offset, int __whence)); +#endif #include <sys/types.h> #include <sys/stat.h> @@ -40,6 +42,12 @@ extern __off64_t lseek64 __P ((int __fd, __off64_t __offset, int __whence)); #include <stdio.h> #include <errno.h> #include <string.h> +#ifdef __dietlibc__NONO +int strncmp(const char *s1, const char *s2, size_t n) __THROW __pure__; +char *strncpy(char *dest, const char *src, size_t n) __THROW; +#include <strings.h> +#endif + #include <linux/kdev_t.h> /*#include <linux/fs.h> */ @@ -49,7 +57,7 @@ extern __off64_t lseek64 __P ((int __fd, __off64_t __offset, int __whence)); #define MD_MAJOR 9 #ifndef BLKGETSIZE64 -#define BLKGETSIZE64 _IOR(0x12,114,sizeof(__u64)) /* return device size in bytes (u64 *arg) */ +#define BLKGETSIZE64 _IOR(0x12,114,size_t) /* return device size in bytes (u64 *arg) */ #endif @@ -83,20 +91,21 @@ extern char Version[], Usage[], Help[], OptionHelp[], * If multiple fields are present, the intersection of all matching * devices is considered */ +#define UnSet (0xfffe) typedef struct mddev_ident_s { char *devname; int uuid_set; __u32 uuid[4]; - int super_minor; /* -1 if not set */ + unsigned int super_minor; char *devices; /* comma separated list of device * names with wild cards */ - int level; /* -10 if not set */ - int raid_disks; /* -1 if not set */ - int spare_disks; /* -1 if not set */ + int level; + unsigned int raid_disks; + unsigned int spare_disks; char *spare_group; struct mddev_ident_s *next; } *mddev_ident_t; @@ -170,7 +179,7 @@ extern int Examine(mddev_dev_t devlist, int brief, int scan, int SparcAdjust); extern int Monitor(mddev_dev_t devlist, char *mailaddr, char *alert_cmd, int period, int daemonise, int scan, int oneshot, - char *config); + char *config, int test); extern int Kill(char *dev, int force); @@ -186,13 +195,14 @@ extern mddev_dev_t conf_get_devs(char *conffile); extern char *conf_get_mailaddr(char *conffile); extern char *conf_get_program(char *conffile); extern char *conf_line(FILE *file); +extern char *conf_word(FILE *file, int allow_key); extern void free_line(char *line); extern int match_oneof(char *devices, char *devname); extern int load_super(int fd, mdp_super_t *super); extern void uuid_from_super(int uuid[4], mdp_super_t *super); extern int same_uuid(int a[4], int b[4]); extern int compare_super(mdp_super_t *first, mdp_super_t *second); -extern int calc_sb_csum(mdp_super_t *super); +extern unsigned long calc_sb_csum(mdp_super_t *super); extern int store_super(int fd, mdp_super_t *super); extern int enough(int level, int raid_disks, int avail_disks); extern int ask(char *mesg); @@ -1,6 +1,6 @@ Summary: mdadm is used for controlling Linux md devices (aka RAID arrays) Name: mdadm -Version: 1.4.0 +Version: 1.5.0 Release: 1 Source: http://www.cse.unsw.edu.au/~neilb/source/mdadm/mdadm-%{version}.tgz URL: http://www.cse.unsw.edu.au/~neilb/source/mdadm/ diff --git a/mdassemble.c b/mdassemble.c new file mode 100644 index 0000000..55055dd --- /dev/null +++ b/mdassemble.c @@ -0,0 +1,97 @@ +/* + * mdassemble - assemble Linux "md" devices aka RAID arrays. + * + * Copyright (C) 2001-2002 Neil Brown <neilb@cse.unsw.edu.au> + * Copyright (C) 2003 Luca Berra <bluca@vodka.it> + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Neil Brown + * Email: <neilb@cse.unsw.edu.au> + * Paper: Neil Brown + * School of Computer Science and Engineering + * The University of New South Wales + * Sydney, 2052 + * Australia + */ + +#include "mdadm.h" +#include "md_p.h" + +/* from readme.c */ +mapping_t pers[] = { + { "linear", -1}, + { "raid0", 0}, + { "0", 0}, + { "stripe", 0}, + { "raid1", 1}, + { "1", 1}, + { "mirror", 1}, + { "raid4", 4}, + { "4", 4}, + { "raid5", 5}, + { "5", 5}, + { "multipath", -4}, + { "mp", -4}, + { NULL, 0} +}; + +/* from mdadm.c */ +int open_mddev(char *dev) +{ + int mdfd = open(dev, O_RDWR, 0); + if (mdfd < 0) + fprintf(stderr, Name ": error opening %s: %s\n", + dev, strerror(errno)); + else if (md_get_version(mdfd) <= 0) { + fprintf(stderr, Name ": %s does not appear to be an md device\n", + dev); + close(mdfd); + mdfd = -1; + } + return mdfd; +} + +char *configfile = NULL; +int rv; +int mdfd = -1; +int runstop = 0; +int readonly = 0; +int verbose = 0; +int force = 0; + +int main() { + mddev_ident_t array_list = conf_get_ident(configfile, NULL); + if (!array_list) { + fprintf(stderr, Name ": No arrays found in config file\n"); + rv = 1; + } else + for (; array_list; array_list = array_list->next) { + mdu_array_info_t array; + mdfd = open_mddev(array_list->devname); + if (mdfd < 0) { + rv |= 1; + continue; + } + if (ioctl(mdfd, GET_ARRAY_INFO, &array)>=0) + /* already assembled, skip */ + continue; + rv |= Assemble(array_list->devname, mdfd, + array_list, configfile, + NULL, + readonly, runstop, NULL, verbose, force); + } +} @@ -114,6 +114,8 @@ struct mdstat_ent *mdstat_read() for (; (line = conf_line(f)) ; free_line(line)) { struct mdstat_ent *ent; char *w; + int devnum; + char *ep; if (strcmp(line, "Personalities")==0) continue; @@ -122,9 +124,16 @@ struct mdstat_ent *mdstat_read() if (strcmp(line, "unused")==0) continue; /* Better be an md line.. */ - if (strncmp(line, "md", 2)!= 0 - || atoi(line+2)<0) { - fprintf(stderr, Name ": bad /proc/mdstat line starts: %s\n", line); + if (strncmp(line, "md", 2)!= 0) + continue; + if (strncmp(line, "md_d", 4) == 0) + devnum = -1-strtoul(line+4, &ep, 10); + else if (strncmp(line, "md", 2) == 0) + devnum = strtoul(line+2, &ep, 10); + else + continue; + if (ep == NULL || *ep ) { + /* fprintf(stderr, Name ": bad /proc/mdstat line starts: %s\n", line); */ continue; } @@ -141,7 +150,7 @@ struct mdstat_ent *mdstat_read() ent->active = -1; ent->dev = strdup(line); - ent->devnum = atoi(line+2); + ent->devnum = devnum; for (w=dl_next(line); w!= line ; w=dl_next(w)) { int l = strlen(w); @@ -30,6 +30,7 @@ #include "mdadm.h" #include "md_p.h" #include <sys/utsname.h> +#include <ctype.h> /* * Parse a 128 bit uuid in 4 integers @@ -102,12 +103,18 @@ int md_get_version(int fd) int get_linux_version() { struct utsname name; + char *cp; int a,b,c; if (uname(&name) <0) return -1; - if (sscanf(name.release, "%d.%d.%d", &a,&b,&c)!= 3) - return -1; + cp = name.release; + a = strtoul(cp, &cp, 10); + if (*cp != '.') return -1; + b = strtoul(cp+1, &cp, 10); + if (*cp != '.') return -1; + c = strtoul(cp+1, NULL, 10); + return (a*1000000)+(b*1000)+c; } @@ -124,6 +131,8 @@ int enough(int level, int raid_disks, int avail_disks) case 4: case 5: return avail_disks >= raid_disks-1; + case 6: + return avail_disks >= raid_disks-2; default: return 0; } @@ -363,7 +372,7 @@ int map_name(mapping_t *map, char *name) return map->num; map++; } - return -10; + return UnSet; } /* @@ -392,7 +401,11 @@ char *map_dev(int major, int minor) #include <ftw.h> +#ifndef __dietlibc__ int add_dev(const char *name, const struct stat *stb, int flag, struct FTW *s) +#else +int add_dev(const char *name, const struct stat *stb, int flag) +#endif { if ((stb->st_mode&S_IFMT)== S_IFBLK) { char *n = strdup(name); @@ -412,7 +425,11 @@ char *map_dev(int major, int minor) { struct devmap *p; if (!devlist_ready) { +#ifndef __dietlibc__ nftw("/dev", add_dev, 10, FTW_PHYS); +#else + ftw("/dev", add_dev, 10); +#endif devlist_ready=1; } @@ -425,7 +442,7 @@ char *map_dev(int major, int minor) #endif -int calc_sb_csum(mdp_super_t *super) +unsigned long calc_sb_csum(mdp_super_t *super) { unsigned int oldcsum = super->sb_csum; unsigned long long newcsum = 0; @@ -487,27 +504,63 @@ char *human_size_brief(long long bytes) return buf; } +static int mdp_major = -1; +void get_mdp_major(void) +{ + FILE *fl = fopen("/proc/devices", "r"); + char *w; + int have_block = 0; + int have_devices = 0; + int last_num = -1; + if (!fl) + return; + while ((w = conf_word(fl, 1))) { + if (have_block && strcmp(w, "devices:")==0) + have_devices = 1; + have_block = (strcmp(w, "Block")==0); + if (isdigit(w[0])) + last_num = atoi(w); + if (have_devices && strcmp(w, "mdp")==0) + mdp_major = last_num; + free(w); + } + fclose(fl); +} + + -#define MD_MAJOR 9 char *get_md_name(int dev) { /* find /dev/md%d or /dev/md/%d or make a device /dev/.tmp.md%d */ + /* if dev < 0, want /dev/md/d%d or find mdp in /proc/devices ... */ static char devname[50]; struct stat stb; - dev_t rdev = MKDEV(MD_MAJOR, dev); - - sprintf(devname, "/dev/md%d", dev); - if (stat(devname, &stb) == 0 - && (S_IFMT&stb.st_mode) == S_IFBLK - && (stb.st_rdev == rdev)) - return devname; - - sprintf(devname, "/dev/md/%d", dev); - if (stat(devname, &stb) == 0 - && (S_IFMT&stb.st_mode) == S_IFBLK - && (stb.st_rdev == rdev)) - return devname; - + dev_t rdev; + + if (dev < 0) { + + if (mdp_major < 0) get_mdp_major(); + if (mdp_major < 0) return NULL; + rdev = MKDEV(mdp_major, (-1-dev)<<6); + sprintf(devname, "/dev/md/d%d", -1-dev); + if (stat(devname, &stb) == 0 + && (S_IFMT&stb.st_mode) == S_IFBLK + && (stb.st_rdev == rdev)) + return devname; + } else { + rdev = MKDEV(MD_MAJOR, dev); + sprintf(devname, "/dev/md%d", dev); + if (stat(devname, &stb) == 0 + && (S_IFMT&stb.st_mode) == S_IFBLK + && (stb.st_rdev == rdev)) + return devname; + + sprintf(devname, "/dev/md/%d", dev); + if (stat(devname, &stb) == 0 + && (S_IFMT&stb.st_mode) == S_IFBLK + && (stb.st_rdev == rdev)) + return devname; + } sprintf(devname, "/dev/.tmp.md%d", dev); if (mknod(devname, S_IFBLK | 0600, rdev) == -1) return NULL; |