From cbd73ba1635c061fa71ff0476cbce087b389d1ad Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Thu, 21 Feb 2013 16:34:32 +0100 Subject: tdb: introduce tdb->hdr_ofs This makes it possible to have some extra headers before the real tdb content starts in the file. This will be used used e.g. to implement locking based on robust mutexes. Pair-Programmed-With: Stefan Metzmacher Pair-Programmed-With: Michael Adam Signed-off-by: Volker Lendecke Signed-off-by: Stefan Metzmacher Signed-off-by: Michael Adam Reviewed-by: Jeremy Allison --- lib/tdb/common/io.c | 101 ++++++++++++++++++++++++++++++++++++++----- lib/tdb/common/open.c | 53 ++++++++++++++++------- lib/tdb/common/summary.c | 22 ++++++---- lib/tdb/common/tdb_private.h | 3 ++ lib/tdb/test/run-3G-file.c | 6 +-- 5 files changed, 146 insertions(+), 39 deletions(-) (limited to 'lib') diff --git a/lib/tdb/common/io.c b/lib/tdb/common/io.c index 11dfefd102..07d22ccdb2 100644 --- a/lib/tdb/common/io.c +++ b/lib/tdb/common/io.c @@ -28,6 +28,70 @@ #include "tdb_private.h" +/* + * tdb->hdr_ofs is 0 for now. + * + * Note: that we only have the 4GB limit of tdb_off_t for + * tdb->map_size. The file size on disk can be 4GB + tdb->hdr_ofs! + */ + +static bool tdb_adjust_offset(struct tdb_context *tdb, off_t *off) +{ + off_t tmp = tdb->hdr_ofs + *off; + + if ((tmp < tdb->hdr_ofs) || (tmp < *off)) { + errno = EIO; + return false; + } + + *off = tmp; + return true; +} + +static ssize_t tdb_pwrite(struct tdb_context *tdb, const void *buf, + size_t count, off_t offset) +{ + if (!tdb_adjust_offset(tdb, &offset)) { + return -1; + } + return pwrite(tdb->fd, buf, count, offset); +} + +static ssize_t tdb_pread(struct tdb_context *tdb, void *buf, + size_t count, off_t offset) +{ + if (!tdb_adjust_offset(tdb, &offset)) { + return -1; + } + return pread(tdb->fd, buf, count, offset); +} + +static int tdb_ftruncate(struct tdb_context *tdb, off_t length) +{ + if (!tdb_adjust_offset(tdb, &length)) { + return -1; + } + return ftruncate(tdb->fd, length); +} + +static int tdb_fstat(struct tdb_context *tdb, struct stat *buf) +{ + int ret; + + ret = fstat(tdb->fd, buf); + if (ret == -1) { + return -1; + } + + if (buf->st_size < tdb->hdr_ofs) { + errno = EIO; + return -1; + } + buf->st_size -= tdb->hdr_ofs; + + return ret; +} + /* check for an out of bounds access - if it is out of bounds then see if the database has been expanded by someone else and expand if necessary @@ -58,7 +122,7 @@ static int tdb_oob(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len, return -1; } - if (fstat(tdb->fd, &st) == -1) { + if (tdb_fstat(tdb, &st) == -1) { tdb->ecode = TDB_ERR_IO; return -1; } @@ -122,16 +186,18 @@ static int tdb_write(struct tdb_context *tdb, tdb_off_t off, tdb->ecode = TDB_ERR_IO; return -1; #else - ssize_t written = pwrite(tdb->fd, buf, len, off); + ssize_t written; + + written = tdb_pwrite(tdb, buf, len, off); + if ((written != (ssize_t)len) && (written != -1)) { /* try once more */ tdb->ecode = TDB_ERR_IO; TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: wrote only " "%zi of %u bytes at %u, trying once more\n", written, len, off)); - written = pwrite(tdb->fd, (const char *)buf+written, - len-written, - off+written); + written = tdb_pwrite(tdb, (const char *)buf+written, + len-written, off+written); } if (written == -1) { /* Ensure ecode is set for log fn. */ @@ -176,7 +242,9 @@ static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf, tdb->ecode = TDB_ERR_IO; return -1; #else - ssize_t ret = pread(tdb->fd, buf, len, off); + ssize_t ret; + + ret = tdb_pread(tdb, buf, len, off); if (ret != (ssize_t)len) { /* Ensure ecode is set for log fn. */ tdb->ecode = TDB_ERR_IO; @@ -258,7 +326,8 @@ int tdb_mmap(struct tdb_context *tdb) if (should_mmap(tdb)) { tdb->map_ptr = mmap(NULL, tdb->map_size, PROT_READ|(tdb->read_only? 0:PROT_WRITE), - MAP_SHARED|MAP_FILE, tdb->fd, 0); + MAP_SHARED|MAP_FILE, tdb->fd, + tdb->hdr_ofs); /* * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!! @@ -303,12 +372,12 @@ static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t ad return -1; } - if (ftruncate(tdb->fd, new_size) == -1) { + if (tdb_ftruncate(tdb, new_size) == -1) { char b = 0; - ssize_t written = pwrite(tdb->fd, &b, 1, new_size - 1); + ssize_t written = tdb_pwrite(tdb, &b, 1, new_size - 1); if (written == 0) { /* try once more, potentially revealing errno */ - written = pwrite(tdb->fd, &b, 1, new_size - 1); + written = tdb_pwrite(tdb, &b, 1, new_size - 1); } if (written == 0) { /* again - give up, guessing errno */ @@ -328,10 +397,10 @@ static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t ad memset(buf, TDB_PAD_BYTE, sizeof(buf)); while (addition) { size_t n = addition>sizeof(buf)?sizeof(buf):addition; - ssize_t written = pwrite(tdb->fd, buf, n, size); + ssize_t written = tdb_pwrite(tdb, buf, n, size); if (written == 0) { /* prevent infinite loops: try _once_ more */ - written = pwrite(tdb->fd, buf, n, size); + written = tdb_pwrite(tdb, buf, n, size); } if (written == 0) { /* give up, trying to provide a useful errno */ @@ -437,6 +506,14 @@ int tdb_expand(struct tdb_context *tdb, tdb_off_t size) /* must know about any previous expansions by another process */ tdb->methods->tdb_oob(tdb, tdb->map_size, 1, 1); + /* + * Note: that we don't care about tdb->hdr_ofs != 0 here + * + * The 4GB limitation is just related to tdb->map_size + * and the offset calculation in the records. + * + * The file on disk can be up to 4GB + tdb->hdr_ofs + */ size = tdb_expand_adjust(tdb->map_size, size, tdb->page_size); if (!tdb_add_off_t(tdb->map_size, size, &new_size)) { diff --git a/lib/tdb/common/open.c b/lib/tdb/common/open.c index 17ab0b7c28..162f30d404 100644 --- a/lib/tdb/common/open.c +++ b/lib/tdb/common/open.c @@ -194,6 +194,7 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td unsigned v; const char *hash_alg; uint32_t magic1, magic2; + int ret; ZERO_STRUCT(header); @@ -340,7 +341,6 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td if ((tdb_flags & TDB_CLEAR_IF_FIRST) && (!tdb->read_only) && (locked = (tdb_nest_lock(tdb, ACTIVE_LOCK, F_WRLCK, TDB_LOCK_NOWAIT|TDB_LOCK_PROBE) == 0))) { - int ret; ret = tdb_brlock(tdb, F_WRLCK, FREELIST_TOP, 0, TDB_LOCK_WAIT); if (ret == -1) { @@ -400,8 +400,18 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td tdb->flags |= TDB_CONVERT; tdb_convert(&header, sizeof(header)); } - if (fstat(tdb->fd, &st) == -1) + + /* + * We only use st.st_dev and st.st_ino from the raw fstat() + * call, everything else needs to use tdb_fstat() in order + * to skip tdb->hdr_ofs! + */ + if (fstat(tdb->fd, &st) == -1) { goto fail; + } + tdb->device = st.st_dev; + tdb->inode = st.st_ino; + ZERO_STRUCT(st); if (header.rwlocks != 0 && header.rwlocks != TDB_FEATURE_FLAG_MAGIC && @@ -446,28 +456,27 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td } /* Is it already in the open list? If so, fail. */ - if (tdb_already_open(st.st_dev, st.st_ino)) { + if (tdb_already_open(tdb->device, tdb->inode)) { TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: " "%s (%d,%d) is already open in this process\n", - name, (int)st.st_dev, (int)st.st_ino)); + name, (int)tdb->device, (int)tdb->inode)); errno = EBUSY; goto fail; } - /* Beware truncation! */ - tdb->map_size = st.st_size; - if (tdb->map_size != st.st_size) { - /* Ensure ecode is set for log fn. */ - tdb->ecode = TDB_ERR_IO; - TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_open_ex: " - "len %llu too large!\n", (long long)st.st_size)); + /* + * We had tdb_mmap(tdb) here before, + * but we need to use tdb_fstat(), + * which is triggered from tdb_oob() before calling tdb_mmap(). + * As this skips tdb->hdr_ofs. + */ + tdb->map_size = 0; + ret = tdb->methods->tdb_oob(tdb, 0, 1, 0); + if (ret == -1) { errno = EIO; goto fail; } - tdb->device = st.st_dev; - tdb->inode = st.st_ino; - tdb_mmap(tdb); if (locked) { if (tdb_nest_unlock(tdb, ACTIVE_LOCK, F_WRLCK, false) == -1) { TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: " @@ -649,6 +658,11 @@ static int tdb_reopen_internal(struct tdb_context *tdb, bool active_lock) TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: open failed (%s)\n", strerror(errno))); goto fail; } + /* + * We only use st.st_dev and st.st_ino from the raw fstat() + * call, everything else needs to use tdb_fstat() in order + * to skip tdb->hdr_ofs! + */ if (fstat(tdb->fd, &st) != 0) { TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: fstat failed (%s)\n", strerror(errno))); goto fail; @@ -657,7 +671,16 @@ static int tdb_reopen_internal(struct tdb_context *tdb, bool active_lock) TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: file dev/inode has changed!\n")); goto fail; } - if (tdb_mmap(tdb) != 0) { + ZERO_STRUCT(st); + + /* + * We had tdb_mmap(tdb) here before, + * but we need to use tdb_fstat(), + * which is triggered from tdb_oob() before calling tdb_mmap(). + * As this skips tdb->hdr_ofs. + */ + tdb->map_size = 0; + if (tdb->methods->tdb_oob(tdb, 0, 1, 0) != 0) { goto fail; } #endif /* fake pread or pwrite */ diff --git a/lib/tdb/common/summary.c b/lib/tdb/common/summary.c index 6f2e0a9e80..e9989f676f 100644 --- a/lib/tdb/common/summary.c +++ b/lib/tdb/common/summary.c @@ -18,7 +18,8 @@ #include "tdb_private.h" #define SUMMARY_FORMAT \ - "Size of file/data: %u/%zu\n" \ + "Size of file/data: %llu/%zu\n" \ + "Header offset/logical size: %zu/%zu\n" \ "Number of records: %zu\n" \ "Incompatible hash: %s\n" \ "Active/supported feature flags: 0x%08x/0x%08x\n" \ @@ -88,6 +89,7 @@ static size_t get_hash_length(struct tdb_context *tdb, unsigned int i) _PUBLIC_ char *tdb_summary(struct tdb_context *tdb) { + off_t file_size; tdb_off_t off, rec_off; struct tally freet, keys, data, dead, extra, hashval, uncoal; struct tdb_record rec; @@ -165,9 +167,11 @@ _PUBLIC_ char *tdb_summary(struct tdb_context *tdb) for (off = 0; off < tdb->hash_size; off++) tally_add(&hashval, get_hash_length(tdb, off)); + file_size = tdb->hdr_ofs + tdb->map_size; len = asprintf(&ret, SUMMARY_FORMAT, - tdb->map_size, keys.total+data.total, + (unsigned long long)file_size, keys.total+data.total, + (size_t)tdb->hdr_ofs, (size_t)tdb->map_size, keys.num, (tdb->hash_fn == tdb_jenkins_hash)?"yes":"no", (unsigned)tdb->feature_flags, TDB_SUPPORTED_FEATURE_FLAGS, @@ -182,16 +186,16 @@ _PUBLIC_ char *tdb_summary(struct tdb_context *tdb) hashval.min, tally_mean(&hashval), hashval.max, uncoal.total, uncoal.min, tally_mean(&uncoal), uncoal.max, - keys.total * 100.0 / tdb->map_size, - data.total * 100.0 / tdb->map_size, - extra.total * 100.0 / tdb->map_size, - freet.total * 100.0 / tdb->map_size, - dead.total * 100.0 / tdb->map_size, + keys.total * 100.0 / file_size, + data.total * 100.0 / file_size, + extra.total * 100.0 / file_size, + freet.total * 100.0 / file_size, + dead.total * 100.0 / file_size, (keys.num + freet.num + dead.num) * (sizeof(struct tdb_record) + sizeof(uint32_t)) - * 100.0 / tdb->map_size, + * 100.0 / file_size, tdb->hash_size * sizeof(tdb_off_t) - * 100.0 / tdb->map_size); + * 100.0 / file_size); if (len == -1) { goto unlock; } diff --git a/lib/tdb/common/tdb_private.h b/lib/tdb/common/tdb_private.h index aa9dd55ba4..4981e2cd6a 100644 --- a/lib/tdb/common/tdb_private.h +++ b/lib/tdb/common/tdb_private.h @@ -202,6 +202,9 @@ struct tdb_context { int num_lockrecs; struct tdb_lock_type *lockrecs; /* only real locks, all with count>0 */ int lockrecs_array_length; + + tdb_off_t hdr_ofs; /* this is 0 for now */ + enum TDB_ERROR ecode; /* error code for last tdb error */ uint32_t hash_size; uint32_t feature_flags; diff --git a/lib/tdb/test/run-3G-file.c b/lib/tdb/test/run-3G-file.c index 67fd54f54f..900b1a667a 100644 --- a/lib/tdb/test/run-3G-file.c +++ b/lib/tdb/test/run-3G-file.c @@ -22,12 +22,12 @@ static int tdb_expand_file_sparse(struct tdb_context *tdb, return -1; } - if (ftruncate(tdb->fd, size+addition) == -1) { + if (tdb_ftruncate(tdb, size+addition) == -1) { char b = 0; - ssize_t written = pwrite(tdb->fd, &b, 1, (size+addition) - 1); + ssize_t written = tdb_pwrite(tdb, &b, 1, (size+addition) - 1); if (written == 0) { /* try once more, potentially revealing errno */ - written = pwrite(tdb->fd, &b, 1, (size+addition) - 1); + written = tdb_pwrite(tdb, &b, 1, (size+addition) - 1); } if (written == 0) { /* again - give up, guessing errno */ -- cgit