summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorVolker Lendecke <vl@samba.org>2013-02-21 16:34:32 +0100
committerJeremy Allison <jra@samba.org>2014-05-22 21:05:15 +0200
commitcbd73ba1635c061fa71ff0476cbce087b389d1ad (patch)
tree16e67ca6bad58a5c3ba4c58ad77c3683fa74eb4b /lib
parentc29e64d97ee18e9d7946151052d1757084f861c6 (diff)
downloadsamba-cbd73ba1635c061fa71ff0476cbce087b389d1ad.tar.gz
samba-cbd73ba1635c061fa71ff0476cbce087b389d1ad.tar.xz
samba-cbd73ba1635c061fa71ff0476cbce087b389d1ad.zip
tdb: introduce tdb->hdr_ofs
This makes it possible to have some extra headers before the real tdb content starts in the file. This will be used used e.g. to implement locking based on robust mutexes. Pair-Programmed-With: Stefan Metzmacher <metze@samba.org> Pair-Programmed-With: Michael Adam <obnox@samba.org> Signed-off-by: Volker Lendecke <vl@samba.org> Signed-off-by: Stefan Metzmacher <metze@samba.org> Signed-off-by: Michael Adam <obnox@samba.org> Reviewed-by: Jeremy Allison <jra@samba.org>
Diffstat (limited to 'lib')
-rw-r--r--lib/tdb/common/io.c101
-rw-r--r--lib/tdb/common/open.c53
-rw-r--r--lib/tdb/common/summary.c22
-rw-r--r--lib/tdb/common/tdb_private.h3
-rw-r--r--lib/tdb/test/run-3G-file.c6
5 files changed, 146 insertions, 39 deletions
diff --git a/lib/tdb/common/io.c b/lib/tdb/common/io.c
index 11dfefd102b..07d22ccdb21 100644
--- a/lib/tdb/common/io.c
+++ b/lib/tdb/common/io.c
@@ -28,6 +28,70 @@
#include "tdb_private.h"
+/*
+ * tdb->hdr_ofs is 0 for now.
+ *
+ * Note: that we only have the 4GB limit of tdb_off_t for
+ * tdb->map_size. The file size on disk can be 4GB + tdb->hdr_ofs!
+ */
+
+static bool tdb_adjust_offset(struct tdb_context *tdb, off_t *off)
+{
+ off_t tmp = tdb->hdr_ofs + *off;
+
+ if ((tmp < tdb->hdr_ofs) || (tmp < *off)) {
+ errno = EIO;
+ return false;
+ }
+
+ *off = tmp;
+ return true;
+}
+
+static ssize_t tdb_pwrite(struct tdb_context *tdb, const void *buf,
+ size_t count, off_t offset)
+{
+ if (!tdb_adjust_offset(tdb, &offset)) {
+ return -1;
+ }
+ return pwrite(tdb->fd, buf, count, offset);
+}
+
+static ssize_t tdb_pread(struct tdb_context *tdb, void *buf,
+ size_t count, off_t offset)
+{
+ if (!tdb_adjust_offset(tdb, &offset)) {
+ return -1;
+ }
+ return pread(tdb->fd, buf, count, offset);
+}
+
+static int tdb_ftruncate(struct tdb_context *tdb, off_t length)
+{
+ if (!tdb_adjust_offset(tdb, &length)) {
+ return -1;
+ }
+ return ftruncate(tdb->fd, length);
+}
+
+static int tdb_fstat(struct tdb_context *tdb, struct stat *buf)
+{
+ int ret;
+
+ ret = fstat(tdb->fd, buf);
+ if (ret == -1) {
+ return -1;
+ }
+
+ if (buf->st_size < tdb->hdr_ofs) {
+ errno = EIO;
+ return -1;
+ }
+ buf->st_size -= tdb->hdr_ofs;
+
+ return ret;
+}
+
/* check for an out of bounds access - if it is out of bounds then
see if the database has been expanded by someone else and expand
if necessary
@@ -58,7 +122,7 @@ static int tdb_oob(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len,
return -1;
}
- if (fstat(tdb->fd, &st) == -1) {
+ if (tdb_fstat(tdb, &st) == -1) {
tdb->ecode = TDB_ERR_IO;
return -1;
}
@@ -122,16 +186,18 @@ static int tdb_write(struct tdb_context *tdb, tdb_off_t off,
tdb->ecode = TDB_ERR_IO;
return -1;
#else
- ssize_t written = pwrite(tdb->fd, buf, len, off);
+ ssize_t written;
+
+ written = tdb_pwrite(tdb, buf, len, off);
+
if ((written != (ssize_t)len) && (written != -1)) {
/* try once more */
tdb->ecode = TDB_ERR_IO;
TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: wrote only "
"%zi of %u bytes at %u, trying once more\n",
written, len, off));
- written = pwrite(tdb->fd, (const char *)buf+written,
- len-written,
- off+written);
+ written = tdb_pwrite(tdb, (const char *)buf+written,
+ len-written, off+written);
}
if (written == -1) {
/* Ensure ecode is set for log fn. */
@@ -176,7 +242,9 @@ static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf,
tdb->ecode = TDB_ERR_IO;
return -1;
#else
- ssize_t ret = pread(tdb->fd, buf, len, off);
+ ssize_t ret;
+
+ ret = tdb_pread(tdb, buf, len, off);
if (ret != (ssize_t)len) {
/* Ensure ecode is set for log fn. */
tdb->ecode = TDB_ERR_IO;
@@ -258,7 +326,8 @@ int tdb_mmap(struct tdb_context *tdb)
if (should_mmap(tdb)) {
tdb->map_ptr = mmap(NULL, tdb->map_size,
PROT_READ|(tdb->read_only? 0:PROT_WRITE),
- MAP_SHARED|MAP_FILE, tdb->fd, 0);
+ MAP_SHARED|MAP_FILE, tdb->fd,
+ tdb->hdr_ofs);
/*
* NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
@@ -303,12 +372,12 @@ static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t ad
return -1;
}
- if (ftruncate(tdb->fd, new_size) == -1) {
+ if (tdb_ftruncate(tdb, new_size) == -1) {
char b = 0;
- ssize_t written = pwrite(tdb->fd, &b, 1, new_size - 1);
+ ssize_t written = tdb_pwrite(tdb, &b, 1, new_size - 1);
if (written == 0) {
/* try once more, potentially revealing errno */
- written = pwrite(tdb->fd, &b, 1, new_size - 1);
+ written = tdb_pwrite(tdb, &b, 1, new_size - 1);
}
if (written == 0) {
/* again - give up, guessing errno */
@@ -328,10 +397,10 @@ static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t ad
memset(buf, TDB_PAD_BYTE, sizeof(buf));
while (addition) {
size_t n = addition>sizeof(buf)?sizeof(buf):addition;
- ssize_t written = pwrite(tdb->fd, buf, n, size);
+ ssize_t written = tdb_pwrite(tdb, buf, n, size);
if (written == 0) {
/* prevent infinite loops: try _once_ more */
- written = pwrite(tdb->fd, buf, n, size);
+ written = tdb_pwrite(tdb, buf, n, size);
}
if (written == 0) {
/* give up, trying to provide a useful errno */
@@ -437,6 +506,14 @@ int tdb_expand(struct tdb_context *tdb, tdb_off_t size)
/* must know about any previous expansions by another process */
tdb->methods->tdb_oob(tdb, tdb->map_size, 1, 1);
+ /*
+ * Note: that we don't care about tdb->hdr_ofs != 0 here
+ *
+ * The 4GB limitation is just related to tdb->map_size
+ * and the offset calculation in the records.
+ *
+ * The file on disk can be up to 4GB + tdb->hdr_ofs
+ */
size = tdb_expand_adjust(tdb->map_size, size, tdb->page_size);
if (!tdb_add_off_t(tdb->map_size, size, &new_size)) {
diff --git a/lib/tdb/common/open.c b/lib/tdb/common/open.c
index 17ab0b7c285..162f30d4047 100644
--- a/lib/tdb/common/open.c
+++ b/lib/tdb/common/open.c
@@ -194,6 +194,7 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td
unsigned v;
const char *hash_alg;
uint32_t magic1, magic2;
+ int ret;
ZERO_STRUCT(header);
@@ -340,7 +341,6 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td
if ((tdb_flags & TDB_CLEAR_IF_FIRST) &&
(!tdb->read_only) &&
(locked = (tdb_nest_lock(tdb, ACTIVE_LOCK, F_WRLCK, TDB_LOCK_NOWAIT|TDB_LOCK_PROBE) == 0))) {
- int ret;
ret = tdb_brlock(tdb, F_WRLCK, FREELIST_TOP, 0,
TDB_LOCK_WAIT);
if (ret == -1) {
@@ -400,8 +400,18 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td
tdb->flags |= TDB_CONVERT;
tdb_convert(&header, sizeof(header));
}
- if (fstat(tdb->fd, &st) == -1)
+
+ /*
+ * We only use st.st_dev and st.st_ino from the raw fstat()
+ * call, everything else needs to use tdb_fstat() in order
+ * to skip tdb->hdr_ofs!
+ */
+ if (fstat(tdb->fd, &st) == -1) {
goto fail;
+ }
+ tdb->device = st.st_dev;
+ tdb->inode = st.st_ino;
+ ZERO_STRUCT(st);
if (header.rwlocks != 0 &&
header.rwlocks != TDB_FEATURE_FLAG_MAGIC &&
@@ -446,28 +456,27 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td
}
/* Is it already in the open list? If so, fail. */
- if (tdb_already_open(st.st_dev, st.st_ino)) {
+ if (tdb_already_open(tdb->device, tdb->inode)) {
TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: "
"%s (%d,%d) is already open in this process\n",
- name, (int)st.st_dev, (int)st.st_ino));
+ name, (int)tdb->device, (int)tdb->inode));
errno = EBUSY;
goto fail;
}
- /* Beware truncation! */
- tdb->map_size = st.st_size;
- if (tdb->map_size != st.st_size) {
- /* Ensure ecode is set for log fn. */
- tdb->ecode = TDB_ERR_IO;
- TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_open_ex: "
- "len %llu too large!\n", (long long)st.st_size));
+ /*
+ * We had tdb_mmap(tdb) here before,
+ * but we need to use tdb_fstat(),
+ * which is triggered from tdb_oob() before calling tdb_mmap().
+ * As this skips tdb->hdr_ofs.
+ */
+ tdb->map_size = 0;
+ ret = tdb->methods->tdb_oob(tdb, 0, 1, 0);
+ if (ret == -1) {
errno = EIO;
goto fail;
}
- tdb->device = st.st_dev;
- tdb->inode = st.st_ino;
- tdb_mmap(tdb);
if (locked) {
if (tdb_nest_unlock(tdb, ACTIVE_LOCK, F_WRLCK, false) == -1) {
TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: "
@@ -649,6 +658,11 @@ static int tdb_reopen_internal(struct tdb_context *tdb, bool active_lock)
TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: open failed (%s)\n", strerror(errno)));
goto fail;
}
+ /*
+ * We only use st.st_dev and st.st_ino from the raw fstat()
+ * call, everything else needs to use tdb_fstat() in order
+ * to skip tdb->hdr_ofs!
+ */
if (fstat(tdb->fd, &st) != 0) {
TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: fstat failed (%s)\n", strerror(errno)));
goto fail;
@@ -657,7 +671,16 @@ static int tdb_reopen_internal(struct tdb_context *tdb, bool active_lock)
TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: file dev/inode has changed!\n"));
goto fail;
}
- if (tdb_mmap(tdb) != 0) {
+ ZERO_STRUCT(st);
+
+ /*
+ * We had tdb_mmap(tdb) here before,
+ * but we need to use tdb_fstat(),
+ * which is triggered from tdb_oob() before calling tdb_mmap().
+ * As this skips tdb->hdr_ofs.
+ */
+ tdb->map_size = 0;
+ if (tdb->methods->tdb_oob(tdb, 0, 1, 0) != 0) {
goto fail;
}
#endif /* fake pread or pwrite */
diff --git a/lib/tdb/common/summary.c b/lib/tdb/common/summary.c
index 6f2e0a9e80a..e9989f676f7 100644
--- a/lib/tdb/common/summary.c
+++ b/lib/tdb/common/summary.c
@@ -18,7 +18,8 @@
#include "tdb_private.h"
#define SUMMARY_FORMAT \
- "Size of file/data: %u/%zu\n" \
+ "Size of file/data: %llu/%zu\n" \
+ "Header offset/logical size: %zu/%zu\n" \
"Number of records: %zu\n" \
"Incompatible hash: %s\n" \
"Active/supported feature flags: 0x%08x/0x%08x\n" \
@@ -88,6 +89,7 @@ static size_t get_hash_length(struct tdb_context *tdb, unsigned int i)
_PUBLIC_ char *tdb_summary(struct tdb_context *tdb)
{
+ off_t file_size;
tdb_off_t off, rec_off;
struct tally freet, keys, data, dead, extra, hashval, uncoal;
struct tdb_record rec;
@@ -165,9 +167,11 @@ _PUBLIC_ char *tdb_summary(struct tdb_context *tdb)
for (off = 0; off < tdb->hash_size; off++)
tally_add(&hashval, get_hash_length(tdb, off));
+ file_size = tdb->hdr_ofs + tdb->map_size;
len = asprintf(&ret, SUMMARY_FORMAT,
- tdb->map_size, keys.total+data.total,
+ (unsigned long long)file_size, keys.total+data.total,
+ (size_t)tdb->hdr_ofs, (size_t)tdb->map_size,
keys.num,
(tdb->hash_fn == tdb_jenkins_hash)?"yes":"no",
(unsigned)tdb->feature_flags, TDB_SUPPORTED_FEATURE_FLAGS,
@@ -182,16 +186,16 @@ _PUBLIC_ char *tdb_summary(struct tdb_context *tdb)
hashval.min, tally_mean(&hashval), hashval.max,
uncoal.total,
uncoal.min, tally_mean(&uncoal), uncoal.max,
- keys.total * 100.0 / tdb->map_size,
- data.total * 100.0 / tdb->map_size,
- extra.total * 100.0 / tdb->map_size,
- freet.total * 100.0 / tdb->map_size,
- dead.total * 100.0 / tdb->map_size,
+ keys.total * 100.0 / file_size,
+ data.total * 100.0 / file_size,
+ extra.total * 100.0 / file_size,
+ freet.total * 100.0 / file_size,
+ dead.total * 100.0 / file_size,
(keys.num + freet.num + dead.num)
* (sizeof(struct tdb_record) + sizeof(uint32_t))
- * 100.0 / tdb->map_size,
+ * 100.0 / file_size,
tdb->hash_size * sizeof(tdb_off_t)
- * 100.0 / tdb->map_size);
+ * 100.0 / file_size);
if (len == -1) {
goto unlock;
}
diff --git a/lib/tdb/common/tdb_private.h b/lib/tdb/common/tdb_private.h
index aa9dd55ba47..4981e2cd6ac 100644
--- a/lib/tdb/common/tdb_private.h
+++ b/lib/tdb/common/tdb_private.h
@@ -202,6 +202,9 @@ struct tdb_context {
int num_lockrecs;
struct tdb_lock_type *lockrecs; /* only real locks, all with count>0 */
int lockrecs_array_length;
+
+ tdb_off_t hdr_ofs; /* this is 0 for now */
+
enum TDB_ERROR ecode; /* error code for last tdb error */
uint32_t hash_size;
uint32_t feature_flags;
diff --git a/lib/tdb/test/run-3G-file.c b/lib/tdb/test/run-3G-file.c
index 67fd54f54fd..900b1a667a2 100644
--- a/lib/tdb/test/run-3G-file.c
+++ b/lib/tdb/test/run-3G-file.c
@@ -22,12 +22,12 @@ static int tdb_expand_file_sparse(struct tdb_context *tdb,
return -1;
}
- if (ftruncate(tdb->fd, size+addition) == -1) {
+ if (tdb_ftruncate(tdb, size+addition) == -1) {
char b = 0;
- ssize_t written = pwrite(tdb->fd, &b, 1, (size+addition) - 1);
+ ssize_t written = tdb_pwrite(tdb, &b, 1, (size+addition) - 1);
if (written == 0) {
/* try once more, potentially revealing errno */
- written = pwrite(tdb->fd, &b, 1, (size+addition) - 1);
+ written = tdb_pwrite(tdb, &b, 1, (size+addition) - 1);
}
if (written == 0) {
/* again - give up, guessing errno */