summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeil Brown <neilb@suse.de>2005-06-07 23:16:35 +0000
committerNeil Brown <neilb@suse.de>2005-06-07 23:16:35 +0000
commitc82f047cfceb479c9c6b56b44c196018af050e45 (patch)
tree2bce84dc8b9cf6068a863372f0c06530def3a1a4
parenta3fd117c7a2b2449704ee86eb9ec180906142f7a (diff)
downloadmdadm-c82f047cfceb479c9c6b56b44c196018af050e45.tar.gz
mdadm-c82f047cfceb479c9c6b56b44c196018af050e45.tar.xz
mdadm-c82f047cfceb479c9c6b56b44c196018af050e45.zip
Initial bitmap support
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
-rw-r--r--Assemble.c7
-rw-r--r--Build.c34
-rw-r--r--Create.c24
-rw-r--r--Makefile8
-rw-r--r--ReadMe.c23
-rw-r--r--bitmap.c315
-rw-r--r--bitmap.h273
-rw-r--r--md_u.h7
-rw-r--r--mdadm.c72
-rw-r--r--mdadm.h15
10 files changed, 765 insertions, 13 deletions
diff --git a/Assemble.c b/Assemble.c
index 7cab81a..71aaa60 100644
--- a/Assemble.c
+++ b/Assemble.c
@@ -531,6 +531,13 @@ int Assemble(struct supertype *st, char *mddev, int mdfd,
mddev, strerror(errno));
return 1;
}
+ if (ident->bitmap_fd) {
+ if (ioctl(mdfd, SET_BITMAP_FILE, ident->bitmap_fd) != 0) {
+ fprintf(stderr, Name ": SET_BITMAP_FILE failed.\n");
+ return 1;
+ }
+ }
+
/* First, add the raid disks, but add the chosen one last */
for (i=0; i<= bestcnt; i++) {
int j;
diff --git a/Build.c b/Build.c
index 6bb3a62..b1fa122 100644
--- a/Build.c
+++ b/Build.c
@@ -35,7 +35,8 @@
int Build(char *mddev, int mdfd, int chunk, int level, int layout,
int raiddisks,
- mddev_dev_t devlist, int assume_clean)
+ mddev_dev_t devlist, int assume_clean,
+ char *bitmap_file, int bitmap_chunk, int delay)
{
/* Build a linear or raid0 arrays without superblocks
* We cannot really do any checks, we just do it.
@@ -56,6 +57,7 @@ int Build(char *mddev, int mdfd, int chunk, int level, int layout,
struct stat stb;
int subdevs = 0;
mddev_dev_t dv;
+ int bitmap_fd;
/* scan all devices, make sure they really are block devices */
for (dv = devlist; dv; dv=dv->next) {
@@ -135,6 +137,9 @@ int Build(char *mddev, int mdfd, int chunk, int level, int layout,
mddev, strerror(errno));
return 1;
}
+ } else if (bitmap_file) {
+ fprintf(stderr, Name ": bitmaps not supported with this kernel\n");
+ return 1;
}
/* now add the devices */
for ((i=0), (dv = devlist) ; dv ; i++, dv=dv->next) {
@@ -171,6 +176,33 @@ int Build(char *mddev, int mdfd, int chunk, int level, int layout,
/* now to start it */
if (vers >= 9000) {
mdu_param_t param; /* not used by syscall */
+ if (bitmap_file) {
+ bitmap_fd = open(bitmap_file, O_RDWR);
+ if (bitmap_fd < 0) {
+ if (bitmap_chunk == UnSet) {
+ fprintf(stderr, Name ": %s cannot be openned.",
+ bitmap_file);
+ return 1;
+ }
+ if (CreateBitmap(bitmap_file, 1, NULL, bitmap_chunk,
+ delay, 0/* FIXME size */)) {
+ return 1;
+ }
+ bitmap_fd = open(bitmap_file, O_RDWR);
+ if (bitmap_fd < 0) {
+ fprintf(stderr, Name ": %s cannot be openned.",
+ bitmap_file);
+ return 1;
+ }
+ }
+ if (bitmap_fd >= 0) {
+ if (ioctl(mdfd, SET_BITMAP_FILE, bitmap_fd) < 0) {
+ fprintf(stderr, Name ": Cannot set bitmap file for %s: %s\n",
+ mddev, strerror(errno));
+ return 1;
+ }
+ }
+ }
if (ioctl(mdfd, RUN_ARRAY, &param)) {
fprintf(stderr, Name ": RUN_ARRAY failed: %s\n",
strerror(errno));
diff --git a/Create.c b/Create.c
index 1717240..e1b1737 100644
--- a/Create.c
+++ b/Create.c
@@ -34,7 +34,8 @@
int Create(struct supertype *st, char *mddev, int mdfd,
int chunk, int level, int layout, unsigned long size, int raiddisks, int sparedisks,
int subdevs, mddev_dev_t devlist,
- int runstop, int verbose, int force)
+ int runstop, int verbose, int force,
+ char *bitmap_file, int bitmap_chunk, int delay)
{
/*
* Create a new raid array.
@@ -66,6 +67,7 @@ int Create(struct supertype *st, char *mddev, int mdfd,
int pass;
int vers;
int rv;
+ int bitmap_fd;
mdu_array_info_t array;
@@ -358,6 +360,26 @@ int Create(struct supertype *st, char *mddev, int mdfd,
return 1;
}
+ if (bitmap_file) {
+ int uuid[4];
+ st->ss->uuid_from_super(uuid, super);
+ if (CreateBitmap(bitmap_file, force, (char*)uuid, bitmap_chunk, delay,
+ array.size*2ULL /* FIXME wrong for raid10 */)) {
+ return 1;
+ }
+ bitmap_fd = open(bitmap_file, O_RDWR);
+ if (bitmap_fd < 0) {
+ fprintf(stderr, Name ": weird: %s cannot be openned\n",
+ bitmap_file);
+ return 1;
+ }
+ if (ioctl(mdfd, SET_BITMAP_FILE, bitmap_fd) < 0) {
+ fprintf(stderr, Name ": Cannot set bitmap file for %s: %s\n",
+ mddev, strerror(errno));
+ return 1;
+ }
+ }
+
for (pass=1; pass <=2 ; pass++) {
diff --git a/Makefile b/Makefile
index dcb9b0f..fd08605 100644
--- a/Makefile
+++ b/Makefile
@@ -58,9 +58,11 @@ MAN5DIR = $(MANDIR)/man5
MAN8DIR = $(MANDIR)/man8
OBJS = mdadm.o config.o mdstat.o ReadMe.o util.o Manage.o Assemble.o Build.o \
- Create.o Detail.o Examine.o Grow.o Monitor.o dlink.o Kill.o Query.o mdopen.o super0.o super1.o
+ Create.o Detail.o Examine.o Grow.o Monitor.o dlink.o Kill.o Query.o \
+ mdopen.o super0.o super1.o bitmap.o
SRCS = mdadm.c config.c mdstat.c ReadMe.c util.c Manage.c Assemble.c Build.c \
- Create.c Detail.c Examine.c Grow.c Monitor.c dlink.c Kill.c Query.c mdopen.c super0.c super1.c
+ Create.c Detail.c Examine.c Grow.c Monitor.c dlink.c Kill.c Query.c \
+ mdopen.c super0.c super1.c bitmap.c
ASSEMBLE_SRCS := mdassemble.c Assemble.c config.c dlink.c util.c super0.c super1.c
ifdef MDASSEMBLE_AUTO
@@ -114,7 +116,7 @@ md.man : md.4
mdadm.conf.man : mdadm.conf.5
nroff -man mdadm.conf.5 > mdadm.conf.man
-$(OBJS) : mdadm.h
+$(OBJS) : mdadm.h bitmap.h
install : mdadm mdadm.8 md.4 mdadm.conf.5
$(INSTALL) -D $(STRIP) -m 755 mdadm $(DESTDIR)$(BINDIR)/mdadm
diff --git a/ReadMe.c b/ReadMe.c
index 367bc0b..856a8ef 100644
--- a/ReadMe.c
+++ b/ReadMe.c
@@ -91,7 +91,7 @@ char Version[] = Name " - v1.11.0 - 11 April 2005\n";
* At the time if writing, there is only minimal support.
*/
-char short_options[]="-ABCDEFGQhVvbc:i:l:p:m:n:x:u:c:d:z:U:sa::rfRSow1te:";
+char short_options[]="-ABCDEFGQhVXvb::c:i:l:p:m:n:x:u:c:d:z:U:sa::rfRSow1te:";
struct option long_options[] = {
{"manage", 0, 0, '@'},
{"misc", 0, 0, '#'},
@@ -104,6 +104,7 @@ struct option long_options[] = {
{"grow", 0, 0, 'G'},
{"zero-superblock", 0, 0, 'K'}, /* deliberately no a short_option */
{"query", 0, 0, 'Q'},
+ {"examine-bitmap", 0, 0, 'X'},
/* synonyms */
{"monitor", 0, 0, 'F'},
@@ -125,9 +126,11 @@ struct option long_options[] = {
{"spare-disks",1,0, 'x'},
{"spare-devices",1,0, 'x'},
{"size", 1, 0, 'z'},
- {"auto", 2, 0, 'a'}, /* also for --assemble */
+ {"auto", 1, 0, 'a'}, /* also for --assemble */
{"assume-clean",0,0, 3 },
{"metadata", 1, 0, 'e'}, /* superblock format */
+ {"bitmap", 1, 0, 'b'},
+ {"bitmap-chunk", 1, 0, 4},
/* For assemble */
{"uuid", 1, 0, 'u'},
@@ -188,6 +191,7 @@ char Help[] =
char OptionHelp[] =
"Any parameter that does not start with '-' is treated as a device name\n"
+"or, for --examine-bitmap, a file name.\n"
"The first such name is often the name of an md device. Subsequent\n"
"names are often names of component devices."
"\n"
@@ -205,6 +209,7 @@ char OptionHelp[] =
" --create -C : Create a new array\n"
" --detail -D : Display details of an array\n"
" --examine -E : Examine superblock on an array component\n"
+" --examine-bitmap -X: Display the detail of a bitmap file\n"
" --monitor -F : monitor (follow) some arrays\n"
" --query -Q : Display general information about how a\n"
" device relates to the md driver\n"
@@ -212,6 +217,7 @@ char OptionHelp[] =
/*
"\n"
" For create or build:\n"
+" --bitmap= -b : File to store bitmap in - may pre-exist for --build\n"
" --chunk= -c : chunk size of kibibytes\n"
" --rounding= : rounding factor for linear array (==chunk size)\n"
" --level= -l : raid level: 0,1,4,5,6,linear,mp. 0 or linear for build\n"
@@ -224,8 +230,11 @@ char OptionHelp[] =
" : insert a missing drive for RAID5.\n"
" --auto(=p) -a : Automatically allocate new (partitioned) md array if needed.\n"
" --assume-clean : Assume the array is already in-sync. This is dangerous.\n"
+" --bitmap-chunk= : chunksize of bitmap in bitmap file (Kilobytes)\n"
+" --delay= -d : seconds between bitmap updates\n"
"\n"
" For assemble:\n"
+" --bitmap= -b : File to find bitmap information in\n"
" --uuid= -u : uuid of array to assemble. Devices which don't\n"
" have this uuid are excluded\n"
" --super-minor= -m : minor number to look for in super-block when\n"
@@ -280,6 +289,7 @@ char Help_create[] =
" other levels.\n"
"\n"
" Options that are valid with --create (-C) are:\n"
+" --bitmap= : Create a bitmap for the array with the given filename\n"
" --chunk= -c : chunk size of kibibytes\n"
" --rounding= : rounding factor for linear array (==chunk size)\n"
" --level= -l : raid level: 0,1,4,5,6,linear,multipath and synonyms\n"
@@ -293,6 +303,8 @@ char Help_create[] =
" --run -R : insist of running the array even if not all\n"
" : devices are present or some look odd.\n"
" --readonly -o : start the array readonly - not supported yet.\n"
+" --bitmap-chunk= : bitmap chunksize in Kilobytes.\n"
+" --delay= -d : bitmap update delay in seconds.\n"
"\n"
;
@@ -308,10 +320,13 @@ char Help_build[] =
" The level may only be 0, raid0, or linear.\n"
" All devices must be listed and the array will be started once complete.\n"
" Options that are valid with --build (-B) are:\n"
+" --bitmap= : file to store/find bitmap information in.\n"
" --chunk= -c : chunk size of kibibytes\n"
" --rounding= : rounding factor for linear array (==chunk size)\n"
" --level= -l : 0, raid0, or linear\n"
-" --raid-devices= -n : number of active devices in array\n"
+" --raid-devices= -n : number of active devices in array\n"
+" --bitmap-chunk= : bitmap chunksize in Kilobytes.\n"
+" --delay= -d : bitmap update delay in seconds.\n"
;
char Help_assemble[] =
@@ -347,6 +362,7 @@ char Help_assemble[] =
" and components are determined from the config file.\n"
"\n"
"Options that are valid with --assemble (-A) are:\n"
+" --bitmap= : bitmap file to use wit the array\n"
" --uuid= -u : uuid of array to assemble. Devices which don't\n"
" have this uuid are excluded\n"
" --super-minor= -m : minor number to look for in super-block when\n"
@@ -393,6 +409,7 @@ char Help_misc[] =
" device relates to the md driver\n"
" --detail -D : Display details of an array\n"
" --examine -E : Examine superblock on an array component\n"
+" --examine-bitmap -X: Display contents of a bitmap file\n"
" --zero-superblock : erase the MD superblock from a device.\n"
" --run -R : start a partially built array\n"
" --stop -S : deactivate array, releasing all resources\n"
diff --git a/bitmap.c b/bitmap.c
new file mode 100644
index 0000000..57969a6
--- /dev/null
+++ b/bitmap.c
@@ -0,0 +1,315 @@
+/*
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2004 Paul Clements, SteelEye Technology, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include "mdadm.h"
+#include <asm/byteorder.h>
+
+#define min(a,b) (((a) < (b)) ? (a) : (b))
+
+inline void sb_le_to_cpu(bitmap_super_t *sb)
+{
+ sb->magic = __le32_to_cpu(sb->magic);
+ sb->version = __le32_to_cpu(sb->version);
+ /* uuid gets no translation */
+ sb->events = __le64_to_cpu(sb->events);
+ sb->events_cleared = __le64_to_cpu(sb->events_cleared);
+ sb->state = __le32_to_cpu(sb->state);
+ sb->chunksize = __le32_to_cpu(sb->chunksize);
+ sb->daemon_sleep = __le32_to_cpu(sb->daemon_sleep);
+ sb->sync_size = __le64_to_cpu(sb->sync_size);
+}
+
+inline void sb_cpu_to_le(bitmap_super_t *sb)
+{
+ sb_le_to_cpu(sb); /* these are really the same thing */
+}
+
+mapping_t bitmap_states[] = {
+ { "OK", 0 },
+ { "Out of date", 2 },
+ { NULL, -1 }
+};
+
+const char *bitmap_state(int state_num)
+{
+ char *state = map_num(bitmap_states, state_num);
+ return state ? state : "Unknown";
+}
+
+const char *human_chunksize(unsigned long bytes)
+{
+ static char buf[16];
+ char *suffixes[] = { "B", "KB", "MB", "GB", "TB", NULL };
+ int i = 0;
+
+ while (bytes >> 10) {
+ bytes >>= 10;
+ i++;
+ }
+
+ sprintf(buf, "%lu %s", bytes, suffixes[i]);
+
+ return buf;
+}
+
+typedef struct bitmap_info_s {
+ bitmap_super_t sb;
+ unsigned long long total_bits;
+ unsigned long long dirty_bits;
+} bitmap_info_t;
+
+/* count the dirty bits in the first num_bits of byte */
+inline int count_dirty_bits_byte(char byte, int num_bits)
+{
+ int num = 0;
+
+ switch (num_bits) { /* fall through... */
+ case 8: if (byte & 128) num++;
+ case 7: if (byte & 64) num++;
+ case 6: if (byte & 32) num++;
+ case 5: if (byte & 16) num++;
+ case 4: if (byte & 8) num++;
+ case 3: if (byte & 4) num++;
+ case 2: if (byte & 2) num++;
+ case 1: if (byte & 1) num++;
+ default: break;
+ }
+
+ return num;
+}
+
+int count_dirty_bits(char *buf, int num_bits)
+{
+ int i, num = 0;
+
+ for (i=0; i < num_bits / 8; i++)
+ num += count_dirty_bits_byte(buf[i], 8);
+
+ if (num_bits % 8) /* not an even byte boundary */
+ num += count_dirty_bits_byte(buf[i], num_bits % 8);
+
+ return num;
+}
+
+/* calculate the size of the bitmap given the array size and bitmap chunksize */
+unsigned long long bitmap_bits(unsigned long long array_size,
+ unsigned long chunksize)
+{
+ return (array_size * 512 + chunksize - 1) / chunksize;
+}
+
+bitmap_info_t *bitmap_fd_read(int fd, int brief)
+{
+ unsigned long long total_bits = 0, read_bits = 0, dirty_bits = 0;
+ bitmap_info_t *info;
+ char buf[512];
+ int n;
+
+ info = malloc(sizeof(*info));
+ if (info == NULL) {
+ fprintf(stderr, Name ": failed to allocate %d bytes\n",
+ sizeof(*info));
+ return NULL;
+ }
+
+ if (read(fd, &info->sb, sizeof(info->sb)) != sizeof(info->sb)) {
+ fprintf(stderr, Name ": failed to read superblock of bitmap "
+ "file: %s\n", strerror(errno));
+ free(info);
+ return NULL;
+ }
+
+ sb_le_to_cpu(&info->sb); /* convert superblock to CPU byte ordering */
+
+ if (brief || info->sb.sync_size == 0)
+ goto out;
+
+ /* read the rest of the file counting total bits and dirty bits --
+ * we stop when either:
+ * 1) we hit EOF, in which case we assume the rest of the bits (if any)
+ * are dirty
+ * 2) we've read the full bitmap, in which case we ignore any trailing
+ * data in the file
+ */
+ total_bits = bitmap_bits(info->sb.sync_size, info->sb.chunksize);
+
+ while ((n = read(fd, buf, sizeof(*buf))) > 0) {
+ unsigned long long remaining = total_bits - read_bits;
+
+ if (remaining > sizeof(*buf) * 8) /* we want the full buffer */
+ remaining = sizeof(*buf) * 8;
+ if (remaining > n * 8) /* the file is truncated */
+ remaining = n * 8;
+ dirty_bits += count_dirty_bits(buf, remaining);
+
+ read_bits += remaining;
+ if (read_bits >= total_bits) /* we've got what we want */
+ break;
+ }
+
+ if (read_bits < total_bits) { /* file truncated... */
+ fprintf(stderr, Name ": WARNING: bitmap file is not large "
+ "enough for array size %llu!\n\n", info->sb.sync_size);
+ total_bits = read_bits;
+ }
+out:
+ info->total_bits = total_bits;
+ info->dirty_bits = dirty_bits;
+ return info;
+}
+
+bitmap_info_t *bitmap_file_read(char *filename, int brief)
+{
+ int fd;
+ bitmap_info_t *info;
+
+ fd = open(filename, O_RDONLY);
+ if (fd < 0) {
+ fprintf(stderr, Name ": failed to open bitmap file %s: %s\n",
+ filename, strerror(errno));
+ return NULL;
+ }
+
+ info = bitmap_fd_read(fd, brief);
+ close(fd);
+ return info;
+}
+
+int ExamineBitmap(char *filename, int brief)
+{
+ /*
+ * Read the bitmap file and display its contents
+ */
+
+ bitmap_super_t *sb;
+ bitmap_info_t *info;
+ int rv = 1;
+
+ info = bitmap_file_read(filename, brief);
+ if (!info)
+ return rv;
+
+ sb = &info->sb;
+ printf(" Filename : %s\n", filename);
+ printf(" Magic : %08x\n", sb->magic);
+ if (sb->magic != BITMAP_MAGIC) {
+ fprintf(stderr, Name ": invalid bitmap magic 0x%x, the bitmap file appears to be corrupted\n", sb->magic);
+ }
+ printf(" Version : %d\n", sb->version);
+ if (sb->version != BITMAP_MAJOR) {
+ fprintf(stderr, Name ": unknown bitmap version %d, either the bitmap file is corrupted or you need to upgrade your tools\n", sb->version);
+ goto free_info;
+ }
+
+ rv = 0;
+ printf(" UUID : %08x.%08x.%08x.%08x\n",
+ *(__u32 *)(sb->uuid+0),
+ *(__u32 *)(sb->uuid+4),
+ *(__u32 *)(sb->uuid+8),
+ *(__u32 *)(sb->uuid+12));
+ printf(" Events : %llu\n", sb->events);
+ printf(" Events Cleared : %llu\n", sb->events_cleared);
+ printf(" State : %s\n", bitmap_state(sb->state));
+ printf(" Chunksize : %s\n", human_chunksize(sb->chunksize));
+ printf(" Daemon : %ds flush period\n", sb->daemon_sleep);
+ printf(" Sync Size : %llu%s\n", sb->sync_size,
+ human_size(sb->sync_size * 1024));
+ if (brief)
+ goto free_info;
+ printf(" Bitmap : %llu bits (chunks), %llu dirty (%2.1f%%)\n",
+ info->total_bits, info->dirty_bits,
+ 100.0 * info->dirty_bits / (info->total_bits + 1));
+free_info:
+ free(info);
+ return rv;
+}
+
+int CreateBitmap(char *filename, int force, char uuid[16],
+ unsigned long chunksize, unsigned long daemon_sleep,
+ unsigned long long array_size)
+{
+ /*
+ * Create a bitmap file with a superblock and (optionally) a full bitmap
+ */
+
+ FILE *fp;
+ int rv = 1;
+ char block[512];
+ bitmap_super_t sb;
+ long long bytes, filesize;
+
+ if (!force && access(filename, F_OK) == 0) {
+ fprintf(stderr, Name ": bitmap file %s already exists, use --force to overwrite\n", filename);
+ return rv;
+ }
+
+ fp = fopen(filename, "w");
+ if (fp == NULL) {
+ fprintf(stderr, Name ": failed to open bitmap file %s: %s\n",
+ filename, strerror(errno));
+ return rv;
+ }
+
+ memset(&sb, 0, sizeof(sb));
+ sb.magic = BITMAP_MAGIC;
+ sb.version = BITMAP_MAJOR;
+ if (uuid != NULL)
+ memcpy(sb.uuid, uuid, 16);
+ sb.chunksize = chunksize;
+ sb.daemon_sleep = daemon_sleep;
+ sb.sync_size = array_size;
+
+ sb_cpu_to_le(&sb); /* convert to on-disk byte ordering */
+
+ if (fwrite(&sb, sizeof(sb), 1, fp) != 1) {
+ fprintf(stderr, Name ": failed to write superblock to bitmap file %s: %s\n", filename, strerror(errno));
+ goto out;
+ }
+
+ /* calculate the size of the bitmap and write it to disk */
+ bytes = (bitmap_bits(array_size, chunksize) + 7) / 8;
+ if (!bytes) {
+ rv = 0;
+ goto out;
+ }
+
+ filesize = bytes + sizeof(sb);
+
+ memset(block, 0xff, sizeof(block));
+
+ while (bytes > 0) {
+ if (fwrite(block, sizeof(block), 1, fp) != 1) {
+ fprintf(stderr, Name ": failed to write bitmap file %s: %s\n", filename, strerror(errno));
+ goto out;
+ }
+ bytes -= sizeof(block);
+ }
+
+ rv = 0;
+ /* make the file be the right size (well, to the nearest byte) */
+ ftruncate(fileno(fp), filesize);
+out:
+ fclose(fp);
+ if (rv)
+ unlink(filename); /* possibly corrupted, better get rid of it */
+ return rv;
+}
diff --git a/bitmap.h b/bitmap.h
new file mode 100644
index 0000000..17027e0
--- /dev/null
+++ b/bitmap.h
@@ -0,0 +1,273 @@
+/*
+ * bitmap.h: Copyright (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003
+ *
+ * additions: Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.
+ */
+#ifndef BITMAP_H
+#define BITMAP_H 1
+
+#define BITMAP_MAJOR 3
+#define BITMAP_MINOR 38
+
+/*
+ * in-memory bitmap:
+ *
+ * Use 16 bit block counters to track pending writes to each "chunk".
+ * The 2 high order bits are special-purpose, the first is a flag indicating
+ * whether a resync is needed. The second is a flag indicating whether a
+ * resync is active.
+ * This means that the counter is actually 14 bits:
+ *
+ * +--------+--------+------------------------------------------------+
+ * | resync | resync | counter |
+ * | needed | active | |
+ * | (0-1) | (0-1) | (0-16383) |
+ * +--------+--------+------------------------------------------------+
+ *
+ * The "resync needed" bit is set when:
+ * a '1' bit is read from storage at startup.
+ * a write request fails on some drives
+ * a resync is aborted on a chunk with 'resync active' set
+ * It is cleared (and resync-active set) when a resync starts across all drives
+ * of the chunk.
+ *
+ *
+ * The "resync active" bit is set when:
+ * a resync is started on all drives, and resync_needed is set.
+ * resync_needed will be cleared (as long as resync_active wasn't already set).
+ * It is cleared when a resync completes.
+ *
+ * The counter counts pending write requests, plus the on-disk bit.
+ * When the counter is '1' and the resync bits are clear, the on-disk
+ * bit can be cleared aswell, thus setting the counter to 0.
+ * When we set a bit, or in the counter (to start a write), if the fields is
+ * 0, we first set the disk bit and set the counter to 1.
+ *
+ * Also, we'll hijack the "map" pointer itself and use it as two 16 bit block
+ * counters as a fallback when "page" memory cannot be allocated:
+ *
+ * Normal case (page memory allocated):
+ *
+ * page pointer (32-bit)
+ *
+ * [ ] ------+
+ * |
+ * +-------> [ ][ ]..[ ] (4096 byte page == 2048 counters)
+ * c1 c2 c2048
+ *
+ * Hijacked case (page memory allocation failed):
+ *
+ * hijacked page pointer (32-bit)
+ *
+ * [ ][ ] (no page memory allocated)
+ * counter #1 (16-bit) counter #2 (16-bit)
+ *
+ */
+
+#ifdef __KERNEL__
+
+#define PAGE_BITS (PAGE_SIZE << 3)
+#define PAGE_BIT_SHIFT (PAGE_SHIFT + 3)
+
+typedef __u16 bitmap_counter_t;
+#define COUNTER_BITS 16
+#define COUNTER_BIT_SHIFT 4
+#define COUNTER_BYTE_RATIO (COUNTER_BITS / 8)
+#define COUNTER_BYTE_SHIFT (COUNTER_BIT_SHIFT - 3)
+
+#define NEEDED_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 1)))
+#define RESYNC_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 2)))
+#define COUNTER_MAX ((bitmap_counter_t) RESYNC_MASK - 1)
+#define NEEDED(x) (((bitmap_counter_t) x) & NEEDED_MASK)
+#define RESYNC(x) (((bitmap_counter_t) x) & RESYNC_MASK)
+#define COUNTER(x) (((bitmap_counter_t) x) & COUNTER_MAX)
+
+/* how many counters per page? */
+#define PAGE_COUNTER_RATIO (PAGE_BITS / COUNTER_BITS)
+/* same, except a shift value for more efficient bitops */
+#define PAGE_COUNTER_SHIFT (PAGE_BIT_SHIFT - COUNTER_BIT_SHIFT)
+/* same, except a mask value for more efficient bitops */
+#define PAGE_COUNTER_MASK (PAGE_COUNTER_RATIO - 1)
+
+#define BITMAP_BLOCK_SIZE 512
+#define BITMAP_BLOCK_SHIFT 9
+
+/* how many blocks per chunk? (this is variable) */
+#define CHUNK_BLOCK_RATIO(bitmap) ((bitmap)->chunksize >> BITMAP_BLOCK_SHIFT)
+#define CHUNK_BLOCK_SHIFT(bitmap) ((bitmap)->chunkshift - BITMAP_BLOCK_SHIFT)
+#define CHUNK_BLOCK_MASK(bitmap) (CHUNK_BLOCK_RATIO(bitmap) - 1)
+
+/* when hijacked, the counters and bits represent even larger "chunks" */
+/* there will be 1024 chunks represented by each counter in the page pointers */
+#define PAGEPTR_BLOCK_RATIO(bitmap) \
+ (CHUNK_BLOCK_RATIO(bitmap) << PAGE_COUNTER_SHIFT >> 1)
+#define PAGEPTR_BLOCK_SHIFT(bitmap) \
+ (CHUNK_BLOCK_SHIFT(bitmap) + PAGE_COUNTER_SHIFT - 1)
+#define PAGEPTR_BLOCK_MASK(bitmap) (PAGEPTR_BLOCK_RATIO(bitmap) - 1)
+
+/*
+ * on-disk bitmap:
+ *
+ * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
+ * file a page at a time. There's a superblock at the start of the file.
+ */
+
+/* map chunks (bits) to file pages - offset by the size of the superblock */
+#define CHUNK_BIT_OFFSET(chunk) ((chunk) + (sizeof(bitmap_super_t) << 3))
+
+#endif
+
+/*
+ * bitmap structures:
+ */
+
+#define BITMAP_MAGIC 0x6d746962
+
+/* use these for bitmap->flags and bitmap->sb->state bit-fields */
+enum bitmap_state {
+ BITMAP_ACTIVE = 0x001, /* the bitmap is in use */
+ BITMAP_STALE = 0x002 /* the bitmap file is out of date or had -EIO */
+};
+
+/* the superblock at the front of the bitmap file -- little endian */
+typedef struct bitmap_super_s {
+ __u32 magic; /* 0 BITMAP_MAGIC */
+ __u32 version; /* 4 the bitmap major for now, could change... */
+ __u8 uuid[16]; /* 8 128 bit uuid - must match md device uuid */
+ __u64 events; /* 24 event counter for the bitmap (1)*/
+ __u64 events_cleared;/*32 event counter when last bit cleared (2) */
+ __u64 sync_size; /* 40 the size of the md device's sync range(3) */
+ __u32 state; /* 48 bitmap state information */
+ __u32 chunksize; /* 52 the bitmap chunk size in bytes */
+ __u32 daemon_sleep; /* 56 seconds between disk flushes */
+
+ __u8 pad[4096 - 60]; /* set to zero */
+} bitmap_super_t;
+
+/* notes:
+ * (1) This event counter is updated before the eventcounter in the md superblock
+ * When a bitmap is loaded, it is only accepted if this event counter is equal
+ * to, or one greater than, the event counter in the superblock.
+ * (2) This event counter is updated when the other one is *if*and*only*if* the
+ * array is not degraded. As bits are not cleared when the array is degraded,
+ * this represents the last time that any bits were cleared.
+ * If a device is being added that has an event count with this value or
+ * higher, it is accepted as conforming to the bitmap.
+ * (3)This is the number of sectors represented by the bitmap, and is the range that
+ * resync happens across. For raid1 and raid5/6 it is the size of individual
+ * devices. For raid10 it is the size of the array.
+ */
+
+#ifdef __KERNEL__
+
+/* the in-memory bitmap is represented by bitmap_pages */
+struct bitmap_page {
+ /*
+ * map points to the actual memory page
+ */
+ char *map;
+ /*
+ * in emergencies (when map cannot be alloced), hijack the map
+ * pointer and use it as two counters itself
+ */
+ unsigned int hijacked;
+ /*
+ * count of dirty bits on the page
+ */
+ int count;
+};
+
+/* keep track of bitmap file pages that have pending writes on them */
+struct page_list {
+ struct list_head list;
+ struct page *page;
+};
+
+/* the main bitmap structure - one per mddev */
+struct bitmap {
+ struct bitmap_page *bp;
+ unsigned long pages; /* total number of pages in the bitmap */
+ unsigned long missing_pages; /* number of pages not yet allocated */
+
+ mddev_t *mddev; /* the md device that the bitmap is for */
+
+ int counter_bits; /* how many bits per block counter */
+
+ /* bitmap chunksize -- how much data does each bit represent? */
+ unsigned long chunksize;
+ unsigned long chunkshift; /* chunksize = 2^chunkshift (for bitops) */
+ unsigned long chunks; /* total number of data chunks for the array */
+
+ /* We hold a count on the chunk currently being synced, and drop
+ * it when the last block is started. If the resync is aborted
+ * midway, we need to be able to drop that count, so we remember
+ * the counted chunk..
+ */
+ unsigned long syncchunk;
+
+ __u64 events_cleared;
+
+ /* bitmap spinlock */
+ spinlock_t lock;
+
+ struct file *file; /* backing disk file */
+ struct page *sb_page; /* cached copy of the bitmap file superblock */
+ struct page **filemap; /* list of cache pages for the file */
+ unsigned long *filemap_attr; /* attributes associated w/ filemap pages */
+ unsigned long file_pages; /* number of pages in the file */
+
+ unsigned long flags;
+
+ /*
+ * the bitmap daemon - periodically wakes up and sweeps the bitmap
+ * file, cleaning up bits and flushing out pages to disk as necessary
+ */
+ mdk_thread_t *daemon;
+ unsigned long daemon_sleep; /* how many seconds between updates? */
+
+ /*
+ * bitmap write daemon - this daemon performs writes to the bitmap file
+ * this thread is only needed because of a limitation in ext3 (jbd)
+ * that does not allow a task to have two journal transactions ongoing
+ * simultaneously (even if the transactions are for two different
+ * filesystems) -- in the case of bitmap, that would be the filesystem
+ * that the bitmap file resides on and the filesystem that is mounted
+ * on the md device -- see current->journal_info in jbd/transaction.c
+ */
+ mdk_thread_t *write_daemon;
+ mdk_thread_t *writeback_daemon;
+ spinlock_t write_lock;
+ struct semaphore write_ready;
+ struct semaphore write_done;
+ unsigned long writes_pending;
+ wait_queue_head_t write_wait;
+ struct list_head write_pages;
+ struct list_head complete_pages;
+ mempool_t *write_pool;
+};
+
+/* the bitmap API */
+
+/* these are used only by md/bitmap */
+int bitmap_create(mddev_t *mddev);
+void bitmap_destroy(mddev_t *mddev);
+int bitmap_active(struct bitmap *bitmap);
+
+char *file_path(struct file *file, char *buf, int count);
+void bitmap_print_sb(struct bitmap *bitmap);
+int bitmap_update_sb(struct bitmap *bitmap);
+
+int bitmap_setallbits(struct bitmap *bitmap);
+
+/* these are exported */
+void bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors);
+void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors,
+ int success);
+int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks);
+void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted);
+void bitmap_close_sync(struct bitmap *bitmap);
+
+int bitmap_unplug(struct bitmap *bitmap);
+#endif
+
+#endif
diff --git a/md_u.h b/md_u.h
index 22a1543..6b067c6 100644
--- a/md_u.h
+++ b/md_u.h
@@ -23,6 +23,7 @@
#define GET_DISK_INFO _IOR (MD_MAJOR, 0x12, mdu_disk_info_t)
#define PRINT_RAID_DEBUG _IO (MD_MAJOR, 0x13)
#define RAID_AUTORUN _IO (MD_MAJOR, 0x14)
+#define GET_BITMAP_FILE _IOR (MD_MAJOR, 0x15, mdu_bitmap_file_t)
/* configuration */
#define CLEAR_ARRAY _IO (MD_MAJOR, 0x20)
@@ -35,6 +36,7 @@
#define PROTECT_ARRAY _IO (MD_MAJOR, 0x27)
#define HOT_ADD_DISK _IO (MD_MAJOR, 0x28)
#define SET_DISK_FAULTY _IO (MD_MAJOR, 0x29)
+#define SET_BITMAP_FILE _IOW (MD_MAJOR, 0x2b, int)
/* usage */
#define RUN_ARRAY _IOW (MD_MAJOR, 0x30, mdu_param_t)
@@ -105,6 +107,11 @@ typedef struct mdu_start_info_s {
} mdu_start_info_t;
+typedef struct mdu_bitmap_file_s
+{
+ char pathname[4096];
+} mdu_bitmap_file_t;
+
typedef struct mdu_param_s
{
int personality; /* 1,2,3,4 */
diff --git a/mdadm.c b/mdadm.c
index 47ea2fe..d8d46ee 100644
--- a/mdadm.c
+++ b/mdadm.c
@@ -25,6 +25,9 @@
* The University of New South Wales
* Sydney, 2052
* Australia
+ *
+ * Additions for bitmap and async RAID options, Copyright (C) 2003-2004,
+ * Paul Clements, SteelEye Technology, Inc.
*/
#include "mdadm.h"
@@ -56,6 +59,9 @@ int main(int argc, char *argv[])
char devmode = 0;
int runstop = 0;
int readonly = 0;
+ int bitmap_fd = -1;
+ char *bitmap_file = NULL;
+ int bitmap_chunk = UnSet;
int SparcAdjust = 0;
mddev_dev_t devlist = NULL;
mddev_dev_t *devlistend = & devlist;
@@ -95,6 +101,7 @@ int main(int argc, char *argv[])
ident.spare_group = NULL;
ident.autof = 0;
ident.st = NULL;
+ ident.bitmap_fd = -1;
while ((option_index = -1) ,
(opt=getopt_long(argc, argv,
@@ -128,7 +135,10 @@ int main(int argc, char *argv[])
case 'v': verbose = 1;
continue;
- case 'b': brief = 1;
+ case 'b':
+ if (mode == ASSEMBLE || mode == BUILD || mode == CREATE)
+ break; /* b means bitmap */
+ brief = 1;
continue;
case ':':
@@ -159,6 +169,7 @@ int main(int argc, char *argv[])
case '#':
case 'D':
case 'E':
+ case 'X':
case 'Q': newmode = MISC; break;
case 'R':
case 'S':
@@ -574,6 +585,8 @@ int main(int argc, char *argv[])
continue;
case O(MONITOR,'d'): /* delay in seconds */
+ case O(BUILD,'d'): /* delay for bitmap updates */
+ case O(CREATE,'d'):
if (delay)
fprintf(stderr, Name ": only specify delay once. %s ignored.\n",
optarg);
@@ -655,6 +668,7 @@ int main(int argc, char *argv[])
case O(MISC,'K'):
case O(MISC,'R'):
case O(MISC,'S'):
+ case O(MISC,'X'):
case O(MISC,'o'):
case O(MISC,'w'):
if (devmode && devmode != opt &&
@@ -676,6 +690,36 @@ int main(int argc, char *argv[])
}
SparcAdjust = 1;
continue;
+
+ case O(ASSEMBLE,'b'): /* here we simply set the bitmap file */
+ if (!optarg) {
+ fprintf(stderr, Name ": bitmap file needed with -b in --assemble mode\n");
+ exit(2);
+ }
+ bitmap_fd = open(optarg, O_RDWR);
+ if (!*optarg || bitmap_fd < 0) {
+ fprintf(stderr, Name ": cannot open bitmap file %s: %s\n", optarg, strerror(errno));
+ exit(2);
+ }
+ ident.bitmap_fd = bitmap_fd; /* for Assemble */
+ continue;
+ case O(BUILD,'b'):
+ case O(CREATE,'b'): /* here we create the bitmap */
+ bitmap_file = optarg;
+ continue;
+
+ case O(BUILD,4):
+ case O(CREATE,4): /* bitmap chunksize */
+ bitmap_chunk = strtol(optarg, &c, 10);
+ if (!optarg[0] || *c || bitmap_chunk < 0 ||
+ bitmap_chunk & (bitmap_chunk - 1)) {
+ fprintf(stderr, Name ": invalid bitmap chunksize: %s\n",
+ optarg);
+ exit(2);
+ }
+ /* convert K to B, chunk of 0K means 512B */
+ bitmap_chunk = bitmap_chunk ? bitmap_chunk * 1024 : 512;
+ continue;
}
/* We have now processed all the valid options. Anything else is
* an error
@@ -726,6 +770,7 @@ int main(int argc, char *argv[])
}
}
+
rv = 0;
switch(mode) {
case MANAGE:
@@ -813,9 +858,27 @@ int main(int argc, char *argv[])
}
break;
case BUILD:
- rv = Build(devlist->devname, mdfd, chunk, level, layout, raiddisks, devlist->next, assume_clean);
+ if (bitmap_chunk == UnSet) bitmap_chunk = DEFAULT_BITMAP_CHUNK;
+ if (delay == 0) delay = DEFAULT_BITMAP_DELAY;
+ if (bitmap_file) {
+ bitmap_fd = open(bitmap_file, O_RDWR,0);
+ if (bitmap_fd < 0 && errno != ENOENT) {
+ perror(Name ": cannot create bitmap file");
+ rv |= 1;
+ break;
+ }
+ if (bitmap_fd < 0) {
+ bitmap_fd = CreateBitmap(bitmap_file, force, NULL,
+ bitmap_chunk, delay, size);
+ }
+ }
+ rv = Build(devlist->devname, mdfd, chunk, level, layout,
+ raiddisks, devlist->next, assume_clean,
+ bitmap_file, bitmap_chunk, delay);
break;
case CREATE:
+ if (bitmap_chunk == UnSet) bitmap_chunk = DEFAULT_BITMAP_CHUNK;
+ if (delay == 0) delay = DEFAULT_BITMAP_DELAY;
if (ss == NULL) {
for(i=0; !ss && superlist[i]; i++)
ss = superlist[i]->match_metadata_desc("default");
@@ -827,7 +890,8 @@ int main(int argc, char *argv[])
rv = Create(ss, devlist->devname, mdfd, chunk, level, layout, size<0 ? 0 : size,
raiddisks, sparedisks,
- devs_found-1, devlist->next, runstop, verbose, force);
+ devs_found-1, devlist->next, runstop, verbose, force,
+ bitmap_file, bitmap_chunk, delay);
break;
case MISC:
@@ -891,6 +955,8 @@ int main(int argc, char *argv[])
rv |= Kill(dv->devname, force); continue;
case 'Q':
rv |= Query(dv->devname); continue;
+ case 'X':
+ rv |= ExamineBitmap(dv->devname, brief); continue;
}
mdfd = open_mddev(dv->devname, 0);
if (mdfd>=0) {
diff --git a/mdadm.h b/mdadm.h
index d42e853..5733a03 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -61,9 +61,12 @@ char *strncpy(char *dest, const char *src, size_t n) __THROW;
#define BLKGETSIZE64 _IOR(0x12,114,size_t) /* return device size in bytes (u64 *arg) */
#endif
+#define DEFAULT_BITMAP_CHUNK 4096
+#define DEFAULT_BITMAP_DELAY 5
#include "md_u.h"
#include "md_p.h"
+#include "bitmap.h"
/* general information that might be extracted from a superblock */
struct mdinfo {
@@ -119,6 +122,7 @@ typedef struct mddev_ident_s {
struct supertype *st;
int autof; /* 1 for normal, 2 for partitioned */
char *spare_group;
+ int bitmap_fd;
struct mddev_ident_s *next;
} *mddev_ident_t;
@@ -212,13 +216,15 @@ extern int Assemble(struct supertype *st, char *mddev, int mdfd,
extern int Build(char *mddev, int mdfd, int chunk, int level, int layout,
int raiddisks,
- mddev_dev_t devlist, int assume_clean);
+ mddev_dev_t devlist, int assume_clean,
+ char *bitmap_file, int bitmap_chunk, int delay);
extern int Create(struct supertype *st, char *mddev, int mdfd,
int chunk, int level, int layout, unsigned long size, int raiddisks, int sparedisks,
int subdevs, mddev_dev_t devlist,
- int runstop, int verbose, int force);
+ int runstop, int verbose, int force,
+ char *bitmap_file, int bitmap_chunk, int delay);
extern int Detail(char *dev, int brief, int test);
extern int Query(char *dev);
@@ -231,6 +237,11 @@ extern int Monitor(mddev_dev_t devlist,
extern int Kill(char *dev, int force);
+extern int CreateBitmap(char *filename, int force, char uuid[16],
+ unsigned long chunksize, unsigned long daemon_sleep,
+ unsigned long long array_size);
+extern int ExamineBitmap(char *filename, int brief);
+
extern int md_get_version(int fd);
extern int get_linux_version(void);
extern int parse_uuid(char *str, int uuid[4]);