summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeil Brown <neilb@suse.de>2005-08-09 04:25:47 +0000
committerNeil Brown <neilb@suse.de>2005-08-09 04:25:47 +0000
commitdfd4d8ee426fb71a369f494f95fe95b114a33c7c (patch)
tree616d0864cc8cb08085cb72b6ad5ccc545cd8b6a5
parent586ed40547a380b9f8bd58aa87c12fbaf6eabf65 (diff)
downloadmdadm-dfd4d8ee426fb71a369f494f95fe95b114a33c7c.tar.gz
mdadm-dfd4d8ee426fb71a369f494f95fe95b114a33c7c.tar.xz
mdadm-dfd4d8ee426fb71a369f494f95fe95b114a33c7c.zip
Add write-behind support
Currently this includes --write-behind to set level of write-behind supported --write-mostly to flag devices as write-mostly. Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
-rw-r--r--Build.c8
-rw-r--r--ChangeLog1
-rw-r--r--Create.c15
-rw-r--r--Detail.c3
-rw-r--r--Grow.c4
-rw-r--r--Manage.c2
-rw-r--r--ReadMe.c4
-rw-r--r--bitmap.c8
-rw-r--r--bitmap.h12
-rw-r--r--md_p.h5
-rw-r--r--mdadm.833
-rw-r--r--mdadm.c45
-rw-r--r--mdadm.h11
-rw-r--r--super0.c26
-rw-r--r--super1.c14
-rw-r--r--tests/06wrmostly15
16 files changed, 172 insertions, 34 deletions
diff --git a/Build.c b/Build.c
index 5537b46..6489d84 100644
--- a/Build.c
+++ b/Build.c
@@ -36,7 +36,7 @@
int Build(char *mddev, int mdfd, int chunk, int level, int layout,
int raiddisks,
mddev_dev_t devlist, int assume_clean,
- char *bitmap_file, int bitmap_chunk, int delay)
+ char *bitmap_file, int bitmap_chunk, int write_behind, int delay)
{
/* Build a linear or raid0 arrays without superblocks
* We cannot really do any checks, we just do it.
@@ -164,7 +164,9 @@ int Build(char *mddev, int mdfd, int chunk, int level, int layout,
mdu_disk_info_t disk;
disk.number = i;
disk.raid_disk = i;
- disk.state = 6;
+ disk.state = (1<<MD_DISK_SYNC) | (1<<MD_DISK_ACTIVE);
+ if (dv->writemostly)
+ disk.state |= 1<<MD_DISK_WRITEMOSTLY;
disk.major = major(stb.st_rdev);
disk.minor = minor(stb.st_rdev);
if (ioctl(mdfd, ADD_NEW_DISK, &disk)) {
@@ -192,7 +194,7 @@ int Build(char *mddev, int mdfd, int chunk, int level, int layout,
return 1;
}
if (CreateBitmap(bitmap_file, 1, NULL, bitmap_chunk,
- delay, 0/* FIXME size */)) {
+ delay, write_behind, 0/* FIXME size */)) {
return 1;
}
bitmap_fd = open(bitmap_file, O_RDWR);
diff --git a/ChangeLog b/ChangeLog
index 2cb01c1..56bf07d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,7 @@
Changes Prior to this release
- Support assembling from byte-swapped superblocks
metadata type "0.swap" and --update=byteorder
+ - write-mostly and write-behind support for raid1.
Changes Prior to 2.0-devel-3 release
- Assorted fixes for multiple bugs...
diff --git a/Create.c b/Create.c
index 735b8d7..87a9a2e 100644
--- a/Create.c
+++ b/Create.c
@@ -35,7 +35,7 @@ int Create(struct supertype *st, char *mddev, int mdfd,
int chunk, int level, int layout, unsigned long size, int raiddisks, int sparedisks,
int subdevs, mddev_dev_t devlist,
int runstop, int verbose, int force,
- char *bitmap_file, int bitmap_chunk, int delay)
+ char *bitmap_file, int bitmap_chunk, int write_behind, int delay)
{
/*
* Create a new raid array.
@@ -351,7 +351,7 @@ int Create(struct supertype *st, char *mddev, int mdfd,
fprintf(stderr, Name ": internal bitmaps not supported by this kernel.\n");
return 1;
}
- if (!st->ss->add_internal_bitmap(super, bitmap_chunk, delay,
+ if (!st->ss->add_internal_bitmap(super, bitmap_chunk, delay, write_behind,
size ? size : maxsize)) {
fprintf(stderr, Name ": Given bitmap chunk size not supported.\n");
return 1;
@@ -382,7 +382,8 @@ int Create(struct supertype *st, char *mddev, int mdfd,
bitmap_chunk = DEFAULT_BITMAP_CHUNK;
st->ss->uuid_from_super(uuid, super);
- if (CreateBitmap(bitmap_file, force, (char*)uuid, bitmap_chunk, delay,
+ if (CreateBitmap(bitmap_file, force, (char*)uuid, bitmap_chunk,
+ delay, write_behind,
array.size*2ULL /* FIXME wrong for raid10 */)) {
return 1;
}
@@ -416,14 +417,18 @@ int Create(struct supertype *st, char *mddev, int mdfd,
}
disk.raid_disk = disk.number;
if (disk.raid_disk < raiddisks)
- disk.state = 6; /* active and in sync */
+ disk.state = (1<<MD_DISK_ACTIVE) |
+ (1<<MD_DISK_SYNC);
else
disk.state = 0;
+ if (dv->writemostly)
+ disk.state |= (1<<MD_DISK_WRITEMOSTLY);
+
if (dnum == insert_point ||
strcasecmp(dv->devname, "missing")==0) {
disk.major = 0;
disk.minor = 0;
- disk.state = 1; /* faulty */
+ disk.state = (1<<MD_DISK_FAULTY);
} else {
fd = open(dv->devname, O_RDONLY|O_EXCL, 0);
if (fd < 0) {
diff --git a/Detail.c b/Detail.c
index 46b483c..4c93eff 100644
--- a/Detail.c
+++ b/Detail.c
@@ -216,6 +216,8 @@ int Detail(char *dev, int brief, int test)
for (d= 0; d < max_disks; d++) {
mdu_disk_info_t disk;
char *dv;
+ int wonly = disk.state & (1<<MD_DISK_WRITEMOSTLY);
+ disk.state &= ~(1<<MD_DISK_WRITEMOSTLY);
disk.number = d;
if (ioctl(fd, GET_DISK_INFO, &disk) < 0) {
if (d < array.raid_disks)
@@ -244,6 +246,7 @@ int Detail(char *dev, int brief, int test)
if (disk.state & (1<<MD_DISK_ACTIVE)) printf(" active");
if (disk.state & (1<<MD_DISK_SYNC)) printf(" sync");
if (disk.state & (1<<MD_DISK_REMOVED)) printf(" removed");
+ if (wonly) printf(" writeonly");
if (disk.state == 0) printf(" spare");
if (disk.state == 0) {
if (is_26) {
diff --git a/Grow.c b/Grow.c
index d20bc6e..ce536d5 100644
--- a/Grow.c
+++ b/Grow.c
@@ -192,7 +192,7 @@ int Grow_Add_device(char *devname, int fd, char *newdev)
return 0;
}
-int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay)
+int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int write_behind)
{
/*
* First check that array doesn't have a bitmap
@@ -255,7 +255,7 @@ int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay)
continue;
if (st->ss->load_super(st, fd2, &super, NULL)==0) {
st->ss->add_internal_bitmap(super,
- chunk, delay,
+ chunk, delay, write_behind,
array.size);
st->ss->write_bitmap(st, fd2, super);
}
diff --git a/Manage.c b/Manage.c
index 53accd7..a91e467 100644
--- a/Manage.c
+++ b/Manage.c
@@ -266,6 +266,8 @@ int Manage_subdevs(char *devname, int fd,
disc.minor = minor(stb.st_rdev);
disc.number =j;
disc.state = 0;
+ if (dv->writemostly)
+ disc.state |= 1 << MD_DISK_WRITEMOSTLY;
st->ss->add_to_super(dsuper, &disc);
if (st->ss->write_init_super(st, dsuper, &disc, dv->devname))
return 1;
diff --git a/ReadMe.c b/ReadMe.c
index 1d28adf..1ba7301 100644
--- a/ReadMe.c
+++ b/ReadMe.c
@@ -131,6 +131,8 @@ struct option long_options[] = {
{"metadata", 1, 0, 'e'}, /* superblock format */
{"bitmap", 1, 0, 'b'},
{"bitmap-chunk", 1, 0, 4},
+ {"write-behind", 2, 0, 5},
+ {"write-mostly",0, 0, 'W'},
/* For assemble */
{"uuid", 1, 0, 'u'},
@@ -139,6 +141,7 @@ struct option long_options[] = {
{"scan", 0, 0, 's'},
{"force", 0, 0, 'f'},
{"update", 1, 0, 'U'},
+
/* Management */
{"add", 0, 0, 'a'},
{"remove", 0, 0, 'r'},
@@ -232,6 +235,7 @@ char OptionHelp[] =
" --assume-clean : Assume the array is already in-sync. This is dangerous.\n"
" --bitmap-chunk= : chunksize of bitmap in bitmap file (Kilobytes)\n"
" --delay= -d : seconds between bitmap updates\n"
+" --write-behind= : number of simultaneous write-behind requests to allow (requires bitmap)\n"
"\n"
" For assemble:\n"
" --bitmap= -b : File to find bitmap information in\n"
diff --git a/bitmap.c b/bitmap.c
index 96a26f9..0a2ed5d 100644
--- a/bitmap.c
+++ b/bitmap.c
@@ -215,6 +215,7 @@ int ExamineBitmap(char *filename, int brief, struct supertype *st)
bitmap_super_t *sb;
bitmap_info_t *info;
int rv = 1;
+ char buf[64];
info = bitmap_file_read(filename, brief, st);
if (!info)
@@ -243,6 +244,11 @@ int ExamineBitmap(char *filename, int brief, struct supertype *st)
printf(" State : %s\n", bitmap_state(sb->state));
printf(" Chunksize : %s\n", human_chunksize(sb->chunksize));
printf(" Daemon : %ds flush period\n", sb->daemon_sleep);
+ if (sb->write_behind)
+ sprintf(buf, "Allow write behind, max %d", sb->write_behind);
+ else
+ sprintf(buf, "Normal");
+ printf(" Write Mode : %s\n", buf);
printf(" Sync Size : %llu%s\n", sb->sync_size/2,
human_size(sb->sync_size * 512));
if (brief)
@@ -257,6 +263,7 @@ free_info:
int CreateBitmap(char *filename, int force, char uuid[16],
unsigned long chunksize, unsigned long daemon_sleep,
+ unsigned long write_behind,
unsigned long long array_size)
{
/*
@@ -288,6 +295,7 @@ int CreateBitmap(char *filename, int force, char uuid[16],
memcpy(sb.uuid, uuid, 16);
sb.chunksize = chunksize;
sb.daemon_sleep = daemon_sleep;
+ sb.write_behind = write_behind;
sb.sync_size = array_size;
sb_cpu_to_le(&sb); /* convert to on-disk byte ordering */
diff --git a/bitmap.h b/bitmap.h
index 811485d..02a4e97 100644
--- a/bitmap.h
+++ b/bitmap.h
@@ -7,7 +7,7 @@
#define BITMAP_H 1
#define BITMAP_MAJOR 3
-#define BITMAP_MINOR 38
+#define BITMAP_MINOR 39
/*
* in-memory bitmap:
@@ -43,6 +43,13 @@
* When we set a bit, or in the counter (to start a write), if the fields is
* 0, we first set the disk bit and set the counter to 1.
*
+ * If the counter is 0, the on-disk bit is clear and the stipe is clean
+ * Anything that dirties the stipe pushes the counter to 2 (at least)
+ * and sets the on-disk bit (lazily).
+ * If a periodic sweep find the counter at 2, it is decremented to 1.
+ * If the sweep find the counter at 1, the on-disk bit is cleared and the
+ * counter goes to zero.
+ *
* Also, we'll hijack the "map" pointer itself and use it as two 16 bit block
* counters as a fallback when "page" memory cannot be allocated:
*
@@ -140,8 +147,9 @@ typedef struct bitmap_super_s {
__u32 state; /* 48 bitmap state information */
__u32 chunksize; /* 52 the bitmap chunk size in bytes */
__u32 daemon_sleep; /* 56 seconds between disk flushes */
+ __u32 write_behind; /* 60 number of outstanding write-behind writes */
- __u8 pad[256 - 60]; /* set to zero */
+ __u8 pad[256 - 64]; /* set to zero */
} bitmap_super_t;
/* notes:
diff --git a/md_p.h b/md_p.h
index 31eaafd..0a0b381 100644
--- a/md_p.h
+++ b/md_p.h
@@ -79,6 +79,11 @@
#define MD_DISK_SYNC 2 /* disk is in sync with the raid set */
#define MD_DISK_REMOVED 3 /* disk is in sync with the raid set */
+#define MD_DISK_WRITEMOSTLY 9 /* disk is "write-mostly" is RAID1 config.
+ * read requests will only be sent here in
+ * dire need
+ */
+
typedef struct mdp_device_descriptor_s {
__u32 number; /* 0 Device number in the entire set */
__u32 major; /* 1 Device major number */
diff --git a/mdadm.8 b/mdadm.8
index 1ecdac2..48d6c53 100644
--- a/mdadm.8
+++ b/mdadm.8
@@ -205,6 +205,18 @@ with
gives an intermediate level of verbosity.
.TP
+.BR -W ", " --write-mostly
+subsequent devices lists in a
+.BR --build ,
+.BR --create ,
+or
+.B --add
+command will be flagged as 'write-mostly'. This is valid for RAID1
+only and means that the 'md' driver will avoid reading from these
+devices if at all possible. This can be useful if mirroring over a
+slow link.
+
+.TP
.BR -b ", " --bitmap=
Give the name of a bitmap file to use with this array. Can be used
with --create (file should not exist) or --assemble (file should
@@ -215,6 +227,15 @@ exist).
Set the Chunksize of the bitmap. Each bit corresponds to that many
Kilobytes of storage. Default is 4.
+.TP
+.BR --write-behind=
+Specify that write-behind mode should be enabled (valid for RAID1
+only). If an argument is specified, it will set the maximum number
+of outstanding writes allowed. The default value is 256.
+A write-intent bitmap is required in order to use write-behind
+mode, and write-behind is only attempted on drives marked as
+.IR write-mostly .
+
.TP
.BR -f ", " --force
@@ -1218,9 +1239,15 @@ For this to work, the kernel must support the necessary change.
Various types of growth may be added during 2.6 development, possibly
including restructuring a raid5 array to have more active devices.
-Currently the only support available is to change the "size" attribute
-for arrays with redundancy, and the raid-disks attribute of RAID1
-arrays.
+Currently the only support available is to
+.IP \(bu 4
+change the "size" attribute
+for RAID1, RAID5 and RAID6.
+.IP \(bu 4
+change the "raid-disks" attribute of RAID1.
+.IP \(bu 4
+add a write-intent bitmap to a RAID1 array.
+.PP
Normally when an array is build the "size" it taken from the smallest
of the drives. If all the small drives in an arrays are, one at a
diff --git a/mdadm.c b/mdadm.c
index 2b2b9be..4dd6524 100644
--- a/mdadm.c
+++ b/mdadm.c
@@ -26,7 +26,7 @@
* Sydney, 2052
* Australia
*
- * Additions for bitmap and async RAID options, Copyright (C) 2003-2004,
+ * Additions for bitmap and write-behind RAID options, Copyright (C) 2003-2004,
* Paul Clements, SteelEye Technology, Inc.
*/
@@ -60,6 +60,7 @@ int main(int argc, char *argv[])
char devmode = 0;
int runstop = 0;
int readonly = 0;
+ int write_behind = 0;
int bitmap_fd = -1;
char *bitmap_file = NULL;
int bitmap_chunk = UnSet;
@@ -89,6 +90,7 @@ int main(int argc, char *argv[])
char *pidfile = NULL;
int oneshot = 0;
struct supertype *ss = NULL;
+ int writemostly = 0;
int copies;
@@ -214,6 +216,7 @@ int main(int argc, char *argv[])
}
dv->devname = optarg;
dv->disposition = devmode;
+ dv->writemostly = writemostly;
dv->next = NULL;
*devlistend = dv;
devlistend = &dv->next;
@@ -262,6 +265,7 @@ int main(int argc, char *argv[])
}
dv->devname = optarg;
dv->disposition = devmode;
+ dv->writemostly = writemostly;
dv->next = NULL;
*devlistend = dv;
devlistend = &dv->next;
@@ -306,6 +310,13 @@ int main(int argc, char *argv[])
max_disks = ss->max_devs;
continue;
+ case O(MANAGE,'W'):
+ case O(BUILD,'W'):
+ case O(CREATE,'W'):
+ /* set write-mostly for following devices */
+ writemostly = 1;
+ continue;
+
case O(GROW,'z'):
case O(CREATE,'z'): /* size */
if (size >= 0) {
@@ -741,6 +752,19 @@ int main(int argc, char *argv[])
/* convert K to B, chunk of 0K means 512B */
bitmap_chunk = bitmap_chunk ? bitmap_chunk * 1024 : 512;
continue;
+
+ case O(BUILD, 5):
+ case O(CREATE, 5): /* write-behind mode */
+ write_behind = DEFAULT_MAX_WRITE_BEHIND;
+ if (optarg) {
+ write_behind = strtol(optarg, &c, 10);
+ if (write_behind < 0 || *c ||
+ write_behind > 16383) {
+ fprintf(stderr, Name ": Invalid value for maximum outstanding write-behind writes: %s.\n\tMust be between 0 and 16383.\n", optarg);
+ exit(2);
+ }
+ }
+ continue;
}
/* We have now processed all the valid options. Anything else is
* an error
@@ -904,6 +928,12 @@ int main(int argc, char *argv[])
case BUILD:
if (bitmap_chunk == UnSet) bitmap_chunk = DEFAULT_BITMAP_CHUNK;
if (delay == 0) delay = DEFAULT_BITMAP_DELAY;
+ if (write_behind && !bitmap_file) {
+ fprintf(stderr, Name ": write-behind mode requires a bitmap.\n");
+ rv = 1;
+ break;
+ }
+
if (bitmap_file) {
if (strcmp(bitmap_file, "internal")==0) {
fprintf(stderr, Name ": 'internal' bitmaps not supported with --build\n");
@@ -918,15 +948,20 @@ int main(int argc, char *argv[])
}
if (bitmap_fd < 0) {
bitmap_fd = CreateBitmap(bitmap_file, force, NULL,
- bitmap_chunk, delay, size);
+ bitmap_chunk, delay, write_behind, size);
}
}
rv = Build(devlist->devname, mdfd, chunk, level, layout,
raiddisks, devlist->next, assume_clean,
- bitmap_file, bitmap_chunk, delay);
+ bitmap_file, bitmap_chunk, write_behind, delay);
break;
case CREATE:
if (delay == 0) delay = DEFAULT_BITMAP_DELAY;
+ if (write_behind && !bitmap_file) {
+ fprintf(stderr, Name ": write-behind mode requires a bitmap.\n");
+ rv = 1;
+ break;
+ }
if (ss == NULL) {
for(i=0; !ss && superlist[i]; i++)
ss = superlist[i]->match_metadata_desc("default");
@@ -939,7 +974,7 @@ int main(int argc, char *argv[])
rv = Create(ss, devlist->devname, mdfd, chunk, level, layout, size<0 ? 0 : size,
raiddisks, sparedisks,
devs_found-1, devlist->next, runstop, verbose, force,
- bitmap_file, bitmap_chunk, delay);
+ bitmap_file, bitmap_chunk, write_behind, delay);
break;
case MISC:
@@ -1078,7 +1113,7 @@ int main(int argc, char *argv[])
else if (bitmap_file) {
if (delay == 0) delay = DEFAULT_BITMAP_DELAY;
rv = Grow_addbitmap(devlist->devname, mdfd, bitmap_file,
- bitmap_chunk, delay);
+ bitmap_chunk, delay, write_behind);
} else
fprintf(stderr, Name ": no changes to --grow\n");
break;
diff --git a/mdadm.h b/mdadm.h
index fa8ea69..8b58afc 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -64,6 +64,7 @@ char *strncpy(char *dest, const char *src, size_t n) __THROW;
#define DEFAULT_BITMAP_CHUNK 4096
#define DEFAULT_BITMAP_DELAY 5
+#define DEFAULT_MAX_WRITE_BEHIND 256
#include "md_u.h"
#include "md_p.h"
@@ -134,6 +135,7 @@ typedef struct mddev_dev_s {
char disposition; /* 'a' for add, 'r' for remove, 'f' for fail.
* Not set for names read from .config
*/
+ char writemostly;
struct mddev_dev_s *next;
} *mddev_dev_t;
@@ -186,7 +188,7 @@ extern struct superswitch {
int (*load_super)(struct supertype *st, int fd, void **sbp, char *devname);
struct supertype * (*match_metadata_desc)(char *arg);
__u64 (*avail_size)(__u64 size);
- int (*add_internal_bitmap)(void *sbv, int chunk, int delay, unsigned long long size);
+ int (*add_internal_bitmap)(void *sbv, int chunk, int delay, int write_behind, unsigned long long size);
void (*locate_bitmap)(struct supertype *st, int fd);
int (*write_bitmap)(struct supertype *st, int fd, void *sbv);
int major;
@@ -223,7 +225,7 @@ extern int Manage_reconfig(char *devname, int fd, int layout);
extern int Manage_subdevs(char *devname, int fd,
mddev_dev_t devlist);
extern int Grow_Add_device(char *devname, int fd, char *newdev);
-extern int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay);
+extern int Grow_addbitmap(char *devname, int fd, char *file, int chunk, int delay, int write_behind);
extern int Assemble(struct supertype *st, char *mddev, int mdfd,
@@ -237,14 +239,14 @@ extern int Assemble(struct supertype *st, char *mddev, int mdfd,
extern int Build(char *mddev, int mdfd, int chunk, int level, int layout,
int raiddisks,
mddev_dev_t devlist, int assume_clean,
- char *bitmap_file, int bitmap_chunk, int delay);
+ char *bitmap_file, int bitmap_chunk, int write_behind, int delay);
extern int Create(struct supertype *st, char *mddev, int mdfd,
int chunk, int level, int layout, unsigned long size, int raiddisks, int sparedisks,
int subdevs, mddev_dev_t devlist,
int runstop, int verbose, int force,
- char *bitmap_file, int bitmap_chunk, int delay);
+ char *bitmap_file, int bitmap_chunk, int write_behind, int delay);
extern int Detail(char *dev, int brief, int test);
extern int Query(char *dev);
@@ -259,6 +261,7 @@ extern int Kill(char *dev, int force);
extern int CreateBitmap(char *filename, int force, char uuid[16],
unsigned long chunksize, unsigned long daemon_sleep,
+ unsigned long write_behind,
unsigned long long array_size);
extern int ExamineBitmap(char *filename, int brief, struct supertype *st);
diff --git a/super0.c b/super0.c
index 7a306b5..e336439 100644
--- a/super0.c
+++ b/super0.c
@@ -148,15 +148,19 @@ static void examine_super0(void *sbv)
mdp_disk_t *dp;
char *dv;
char nb[5];
+ int wonly;
if (d>=0) dp = &sb->disks[d];
else dp = &sb->this_disk;
snprintf(nb, sizeof(nb), "%4d", d);
printf("%4s %5d %5d %5d %5d ", d < 0 ? "this" : nb,
dp->number, dp->major, dp->minor, dp->raid_disk);
+ wonly = dp->state & (1<<MD_DISK_WRITEMOSTLY);
+ dp->state &= ~(1<<MD_DISK_WRITEMOSTLY);
if (dp->state & (1<<MD_DISK_FAULTY)) printf(" faulty");
if (dp->state & (1<<MD_DISK_ACTIVE)) printf(" active");
if (dp->state & (1<<MD_DISK_SYNC)) printf(" sync");
if (dp->state & (1<<MD_DISK_REMOVED)) printf(" removed");
+ if (wonly) printf(" write-mostly");
if (dp->state == 0) printf(" spare");
if ((dv=map_dev(dp->major, dp->minor)))
printf(" %s", dv);
@@ -312,8 +316,10 @@ static int update_super0(struct mdinfo *info, void *sbv, char *update, char *dev
}
if (strcmp(update, "assemble")==0) {
int d = info->disk.number;
+ int wonly = sb->disks[d].state & (1<<MD_DISK_WRITEMOSTLY);
+ sb->disks[d].state &= ~(1<<MD_DISK_WRITEMOSTLY);
if (sb->disks[d].state != info->disk.state) {
- sb->disks[d].state = info->disk.state;
+ sb->disks[d].state = info->disk.state & wonly;
rv = 1;
}
}
@@ -467,7 +473,7 @@ static int store_super0(struct supertype *st, int fd, void *sbv)
static int write_init_super0(struct supertype *st, void *sbv, mdu_disk_info_t *dinfo, char *devname)
{
mdp_super_t *sb = sbv;
- int fd = open(devname, O_RDWR, O_EXCL);
+ int fd = open(devname, O_RDWR|O_EXCL);
int rv;
if (fd < 0) {
@@ -485,6 +491,7 @@ static int write_init_super0(struct supertype *st, void *sbv, mdu_disk_info_t *d
if (sb->state & (1<<MD_SB_BITMAP_PRESENT)) {
int towrite, n;
char buf[4096];
+
write(fd, ((char*)sb)+MD_SB_BYTES, sizeof(bitmap_super_t));
towrite = 64*1024 - MD_SB_BYTES - sizeof(bitmap_super_t);
memset(buf, 0xff, sizeof(buf));
@@ -498,6 +505,7 @@ static int write_init_super0(struct supertype *st, void *sbv, mdu_disk_info_t *d
else
break;
}
+ fsync(fd);
if (towrite)
rv = -2;
}
@@ -661,7 +669,7 @@ static __u64 avail_size0(__u64 devsize)
return MD_NEW_SIZE_SECTORS(devsize);
}
-static int add_internal_bitmap0(void *sbv, int chunk, int delay, unsigned long long size)
+static int add_internal_bitmap0(void *sbv, int chunk, int delay, int write_behind, unsigned long long size)
{
/*
* The bitmap comes immediately after the superblock and must be 60K in size
@@ -690,12 +698,13 @@ static int add_internal_bitmap0(void *sbv, int chunk, int delay, unsigned long l
sb->state |= (1<<MD_SB_BITMAP_PRESENT);
memset(bms, sizeof(*bms), 0);
- bms->magic = __le32_to_cpu(BITMAP_MAGIC);
- bms->version = __le32_to_cpu(BITMAP_MAJOR);
+ bms->magic = __cpu_to_le32(BITMAP_MAGIC);
+ bms->version = __cpu_to_le32(BITMAP_MAJOR);
uuid_from_super0((int*)bms->uuid, sb);
- bms->chunksize = __le32_to_cpu(chunk);
- bms->daemon_sleep = __le32_to_cpu(delay);
- bms->sync_size = __le64_to_cpu(size);
+ bms->chunksize = __cpu_to_le32(chunk);
+ bms->daemon_sleep = __cpu_to_le32(delay);
+ bms->sync_size = __cpu_to_le64(size);
+ bms->write_behind = __cpu_to_le32(write_behind);
@@ -776,6 +785,7 @@ int write_bitmap0(struct supertype *st, int fd, void *sbv)
else
break;
}
+ fsync(fd);
if (towrite)
rv = -2;
diff --git a/super1.c b/super1.c
index 3c24f34..f59eff0 100644
--- a/super1.c
+++ b/super1.c
@@ -64,7 +64,9 @@ struct mdp_superblock_1 {
__u32 dev_number; /* permanent identifier of this device - not role in raid */
__u32 cnt_corrected_read; /* number of read errors that were corrected by re-writing */
__u8 device_uuid[16]; /* user-space setable, ignored by kernel */
- __u8 pad2[64-56]; /* set to 0 when writing */
+ __u8 devflags; /* per-device flags. Only one defined...*/
+#define WriteMostly1 1 /* mask for writemostly flag in above */
+ __u8 pad2[64-57]; /* set to 0 when writing */
/* array state information - 64 bytes */
__u64 utime; /* 40 bits second, 24 btes microseconds */
@@ -153,6 +155,12 @@ static void examine_super1(void *sbv)
if ((i&3)==0 && i != 0) printf(":");
}
printf("\n");
+ if (sb->devflags) {
+ printf(" Flags :");
+ if (sb->devflags & WriteMostly1)
+ printf(" write-mostly");
+ printf("\n");
+ }
atime = __le64_to_cpu(sb->utime) & 0xFFFFFFFFFFULL;
printf(" Update Time : %.24s\n", ctime(&atime));
@@ -429,7 +437,7 @@ static void add_to_super1(void *sbv, mdu_disk_info_t *dk)
{
struct mdp_superblock_1 *sb = sbv;
__u16 *rp = sb->dev_roles + dk->number;
- if (dk->state == 6) /* active, sync */
+ if ((dk->state & 6) == 6) /* active, sync */
*rp = __cpu_to_le16(dk->raid_disk);
else if ((dk->state & ~2) == 0) /* active or idle -> spare */
*rp = 0xffff;
@@ -517,6 +525,8 @@ static int write_init_super1(struct supertype *st, void *sbv, mdu_disk_info_t *d
}
sb->dev_number = __cpu_to_le32(dinfo->number);
+ if (dinfo->state & (1<<MD_DISK_WRITEMOSTLY))
+ sb->devflags |= WriteMostly1;
if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 ||
read(rfd, sb->device_uuid, 16) != 16) {
diff --git a/tests/06wrmostly b/tests/06wrmostly
new file mode 100644
index 0000000..51fff60
--- /dev/null
+++ b/tests/06wrmostly
@@ -0,0 +1,15 @@
+set -e
+
+# create a raid1 array with a wrmostly device
+
+$mdadm -CR $md0 -l1 -n3 $dev0 $dev1 --write-mostly $dev2
+sh tests/testdev $md0 1 $mdsize0 64
+
+# unfortunately, we cannot measure if any read requests are going to $dev2
+
+$mdadm -S $md0
+
+$mdadm -CR $md0 -l1 -n3 --write-behind --bitmap=internal $dev0 $dev1 --write-mostly $dev2
+sh tests/testdev $md0 1 $mdsize0 64
+$mdadm -S $md0
+