summaryrefslogtreecommitdiffstats
path: root/ctdb/lib/tdb/common
diff options
context:
space:
mode:
authorAndrew Tridgell <tridge@samba.org>2008-01-05 17:41:41 +1100
committerAndrew Tridgell <tridge@samba.org>2008-01-05 17:41:41 +1100
commit841a04924c920ac9d8b4a74187a179240351fddc (patch)
tree36a2da087367023296798f9148d426693c0c398c /ctdb/lib/tdb/common
parent370779a1bb0218f31d02f0976e143d4b5d84b3d4 (diff)
downloadsamba-841a04924c920ac9d8b4a74187a179240351fddc.tar.gz
samba-841a04924c920ac9d8b4a74187a179240351fddc.tar.xz
samba-841a04924c920ac9d8b4a74187a179240351fddc.zip
merge from Samba4
(This used to be ctdb commit 9aed7a1d065272c2e5b54872228a73f37664b526)
Diffstat (limited to 'ctdb/lib/tdb/common')
-rw-r--r--ctdb/lib/tdb/common/io.c65
-rw-r--r--ctdb/lib/tdb/common/lock.c12
-rw-r--r--ctdb/lib/tdb/common/open.c29
-rw-r--r--ctdb/lib/tdb/common/tdb.c6
-rw-r--r--ctdb/lib/tdb/common/tdb_private.h4
-rw-r--r--ctdb/lib/tdb/common/traverse.c15
6 files changed, 108 insertions, 23 deletions
diff --git a/ctdb/lib/tdb/common/io.c b/ctdb/lib/tdb/common/io.c
index 1da9ac37c2f..172ab69d8c9 100644
--- a/ctdb/lib/tdb/common/io.c
+++ b/ctdb/lib/tdb/common/io.c
@@ -88,13 +88,32 @@ static int tdb_write(struct tdb_context *tdb, tdb_off_t off,
if (tdb->map_ptr) {
memcpy(off + (char *)tdb->map_ptr, buf, len);
- } else if (pwrite(tdb->fd, buf, len, off) != (ssize_t)len) {
+ } else {
+ ssize_t written = pwrite(tdb->fd, buf, len, off);
+ if ((written != (ssize_t)len) && (written != -1)) {
+ /* try once more */
+ TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: wrote only "
+ "%d of %d bytes at %d, trying once more\n",
+ (int)written, len, off));
+ errno = ENOSPC;
+ written = pwrite(tdb->fd, (const void *)((const char *)buf+written),
+ len-written,
+ off+written);
+ }
+ if (written == -1) {
/* Ensure ecode is set for log fn. */
tdb->ecode = TDB_ERR_IO;
- TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_write failed at %d len=%d (%s)\n",
- off, len, strerror(errno)));
+ TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_write failed at %d "
+ "len=%d (%s)\n", off, len, strerror(errno)));
+ return TDB_ERRCODE(TDB_ERR_IO, -1);
+ } else if (written != (ssize_t)len) {
+ TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: failed to "
+ "write %d bytes at %d in two attempts\n",
+ len, off));
+ errno = ENOSPC;
return TDB_ERRCODE(TDB_ERR_IO, -1);
}
+ }
return 0;
}
@@ -220,7 +239,16 @@ static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t ad
if (ftruncate(tdb->fd, size+addition) == -1) {
char b = 0;
- if (pwrite(tdb->fd, &b, 1, (size+addition) - 1) != 1) {
+ ssize_t written = pwrite(tdb->fd, &b, 1, (size+addition) - 1);
+ if (written == 0) {
+ /* try once more, potentially revealing errno */
+ written = pwrite(tdb->fd, &b, 1, (size+addition) - 1);
+ }
+ if (written == 0) {
+ /* again - give up, guessing errno */
+ errno = ENOSPC;
+ }
+ if (written != 1) {
TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file to %d failed (%s)\n",
size+addition, strerror(errno)));
return -1;
@@ -232,15 +260,30 @@ static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t ad
disk. This must be done with write, not via mmap */
memset(buf, TDB_PAD_BYTE, sizeof(buf));
while (addition) {
- int n = addition>sizeof(buf)?sizeof(buf):addition;
- int ret = pwrite(tdb->fd, buf, n, size);
- if (ret != n) {
- TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file write of %d failed (%s)\n",
- n, strerror(errno)));
+ size_t n = addition>sizeof(buf)?sizeof(buf):addition;
+ ssize_t written = pwrite(tdb->fd, buf, n, size);
+ if (written == 0) {
+ /* prevent infinite loops: try _once_ more */
+ written = pwrite(tdb->fd, buf, n, size);
+ }
+ if (written == 0) {
+ /* give up, trying to provide a useful errno */
+ TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file write "
+ "returned 0 twice: giving up!\n"));
+ errno = ENOSPC;
+ return -1;
+ } else if (written == -1) {
+ TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file write of "
+ "%d bytes failed (%s)\n", (int)n,
+ strerror(errno)));
return -1;
+ } else if (written != n) {
+ TDB_LOG((tdb, TDB_DEBUG_WARNING, "expand_file: wrote "
+ "only %d of %d bytes - retrying\n", (int)written,
+ (int)n));
}
- addition -= n;
- size += n;
+ addition -= written;
+ size += written;
}
return 0;
}
diff --git a/ctdb/lib/tdb/common/lock.c b/ctdb/lib/tdb/common/lock.c
index 33afe74336f..f156c0fa7b2 100644
--- a/ctdb/lib/tdb/common/lock.c
+++ b/ctdb/lib/tdb/common/lock.c
@@ -29,6 +29,11 @@
#define TDB_MARK_LOCK 0x80000000
+void tdb_setalarm_sigptr(struct tdb_context *tdb, volatile sig_atomic_t *ptr)
+{
+ tdb->interrupt_sig_ptr = ptr;
+}
+
/* a byte range locking function - return 0 on success
this functions locks/unlocks 1 byte at the specified offset.
@@ -60,6 +65,13 @@ int tdb_brlock(struct tdb_context *tdb, tdb_off_t offset,
do {
ret = fcntl(tdb->fd,lck_type,&fl);
+
+ /* Check for a sigalarm break. */
+ if (ret == -1 && errno == EINTR &&
+ tdb->interrupt_sig_ptr &&
+ *tdb->interrupt_sig_ptr) {
+ break;
+ }
} while (ret == -1 && errno == EINTR);
if (ret == -1) {
diff --git a/ctdb/lib/tdb/common/open.c b/ctdb/lib/tdb/common/open.c
index e2353d6e600..6bd8fda2bf1 100644
--- a/ctdb/lib/tdb/common/open.c
+++ b/ctdb/lib/tdb/common/open.c
@@ -49,7 +49,9 @@ static unsigned int default_tdb_hash(TDB_DATA *key)
static int tdb_new_database(struct tdb_context *tdb, int hash_size)
{
struct tdb_header *newdb;
- int size, ret = -1;
+ size_t size;
+ int ret = -1;
+ ssize_t written;
/* We make it up in memory, then write it out if not internal */
size = sizeof(struct tdb_header) + (hash_size+1)*sizeof(tdb_off_t);
@@ -78,10 +80,22 @@ static int tdb_new_database(struct tdb_context *tdb, int hash_size)
memcpy(&tdb->header, newdb, sizeof(tdb->header));
/* Don't endian-convert the magic food! */
memcpy(newdb->magic_food, TDB_MAGIC_FOOD, strlen(TDB_MAGIC_FOOD)+1);
- if (write(tdb->fd, newdb, size) != size) {
- ret = -1;
- } else {
+ /* we still have "ret == -1" here */
+ written = write(tdb->fd, newdb, size);
+ if (written == size) {
+ ret = 0;
+ } else if (written != -1) {
+ /* call write once again, this usually should return -1 and
+ * set errno appropriately */
+ size -= written;
+ written = write(tdb->fd, newdb+written, size);
+ if (written == size) {
ret = 0;
+ } else if (written >= 0) {
+ /* a second incomplete write - we give up.
+ * guessing the errno... */
+ errno = ENOSPC;
+ }
}
fail:
@@ -165,6 +179,10 @@ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags,
tdb->page_size = 0x2000;
}
+ if (open_flags & TDB_VOLATILE) {
+ tdb->max_dead_records = 5;
+ }
+
if ((open_flags & O_ACCMODE) == O_WRONLY) {
TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: can't open tdb %s write-only\n",
name));
@@ -221,13 +239,16 @@ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags,
}
}
+ errno = 0;
if (read(tdb->fd, &tdb->header, sizeof(tdb->header)) != sizeof(tdb->header)
|| strcmp(tdb->header.magic_food, TDB_MAGIC_FOOD) != 0
|| (tdb->header.version != TDB_VERSION
&& !(rev = (tdb->header.version==TDB_BYTEREV(TDB_VERSION))))) {
/* its not a valid database - possibly initialise it */
if (!(open_flags & O_CREAT) || tdb_new_database(tdb, hash_size) == -1) {
+ if (errno == 0) {
errno = EIO; /* ie bad format or something */
+ }
goto fail;
}
rev = (tdb->flags & TDB_CONVERT);
diff --git a/ctdb/lib/tdb/common/tdb.c b/ctdb/lib/tdb/common/tdb.c
index 3edd10fff0d..79c15475191 100644
--- a/ctdb/lib/tdb/common/tdb.c
+++ b/ctdb/lib/tdb/common/tdb.c
@@ -579,8 +579,12 @@ int tdb_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf)
if (dbuf.dptr == NULL) {
dbuf.dptr = (unsigned char *)malloc(new_dbuf.dsize);
} else {
- dbuf.dptr = (unsigned char *)realloc(dbuf.dptr,
+ unsigned char *new_dptr = (unsigned char *)realloc(dbuf.dptr,
dbuf.dsize + new_dbuf.dsize);
+ if (new_dptr == NULL) {
+ free(dbuf.dptr);
+ }
+ dbuf.dptr = new_dptr;
}
if (dbuf.dptr == NULL) {
diff --git a/ctdb/lib/tdb/common/tdb_private.h b/ctdb/lib/tdb/common/tdb_private.h
index b157ec8af53..86cb762fd52 100644
--- a/ctdb/lib/tdb/common/tdb_private.h
+++ b/ctdb/lib/tdb/common/tdb_private.h
@@ -28,6 +28,7 @@
#include "system/time.h"
#include "system/shmem.h"
#include "system/select.h"
+#include "system/wait.h"
#include "tdb.h"
#ifndef HAVE_GETPAGESIZE
@@ -61,7 +62,7 @@ typedef uint32_t tdb_off_t;
#define TDB_RECOVERY_HEAD offsetof(struct tdb_header, recovery_start)
#define TDB_SEQNUM_OFS offsetof(struct tdb_header, sequence_number)
#define TDB_PAD_BYTE 0x42
-#define TDB_PAD_UINT32_T 0x42424242
+#define TDB_PAD_U32 0x42424242
/* NB assumes there is a local variable called "tdb" that is the
* current context, also takes doubly-parenthesized print-style
@@ -167,6 +168,7 @@ struct tdb_context {
int page_size;
int max_dead_records;
bool have_transaction_lock;
+ volatile sig_atomic_t *interrupt_sig_ptr;
};
diff --git a/ctdb/lib/tdb/common/traverse.c b/ctdb/lib/tdb/common/traverse.c
index c244ea848da..2bde1270a07 100644
--- a/ctdb/lib/tdb/common/traverse.c
+++ b/ctdb/lib/tdb/common/traverse.c
@@ -260,12 +260,15 @@ TDB_DATA tdb_firstkey(struct tdb_context *tdb)
tdb->travlocks.off = tdb->travlocks.hash = 0;
tdb->travlocks.lock_rw = F_RDLCK;
+ /* Grab first record: locks chain and returned record. */
if (tdb_next_lock(tdb, &tdb->travlocks, &rec) <= 0)
return tdb_null;
/* now read the key */
key.dsize = rec.key_len;
key.dptr =tdb_alloc_read(tdb,tdb->travlocks.off+sizeof(rec),key.dsize);
- if (tdb_unlock(tdb, BUCKET(tdb->travlocks.hash), F_WRLCK) != 0)
+
+ /* Unlock the hash chain of the record we just read. */
+ if (tdb_unlock(tdb, tdb->travlocks.hash, tdb->travlocks.lock_rw) != 0)
TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_firstkey: error occurred while tdb_unlocking!\n"));
return key;
}
@@ -280,7 +283,7 @@ TDB_DATA tdb_nextkey(struct tdb_context *tdb, TDB_DATA oldkey)
/* Is locked key the old key? If so, traverse will be reliable. */
if (tdb->travlocks.off) {
- if (tdb_lock(tdb,tdb->travlocks.hash,F_WRLCK))
+ if (tdb_lock(tdb,tdb->travlocks.hash,tdb->travlocks.lock_rw))
return tdb_null;
if (tdb_rec_read(tdb, tdb->travlocks.off, &rec) == -1
|| !(k = tdb_alloc_read(tdb,tdb->travlocks.off+sizeof(rec),
@@ -291,7 +294,7 @@ TDB_DATA tdb_nextkey(struct tdb_context *tdb, TDB_DATA oldkey)
SAFE_FREE(k);
return tdb_null;
}
- if (tdb_unlock(tdb, tdb->travlocks.hash, F_WRLCK) != 0) {
+ if (tdb_unlock(tdb, tdb->travlocks.hash, tdb->travlocks.lock_rw) != 0) {
SAFE_FREE(k);
return tdb_null;
}
@@ -303,7 +306,7 @@ TDB_DATA tdb_nextkey(struct tdb_context *tdb, TDB_DATA oldkey)
if (!tdb->travlocks.off) {
/* No previous element: do normal find, and lock record */
- tdb->travlocks.off = tdb_find_lock_hash(tdb, oldkey, tdb->hash_fn(&oldkey), F_WRLCK, &rec);
+ tdb->travlocks.off = tdb_find_lock_hash(tdb, oldkey, tdb->hash_fn(&oldkey), tdb->travlocks.lock_rw, &rec);
if (!tdb->travlocks.off)
return tdb_null;
tdb->travlocks.hash = BUCKET(rec.full_hash);
@@ -321,11 +324,11 @@ TDB_DATA tdb_nextkey(struct tdb_context *tdb, TDB_DATA oldkey)
key.dptr = tdb_alloc_read(tdb, tdb->travlocks.off+sizeof(rec),
key.dsize);
/* Unlock the chain of this new record */
- if (tdb_unlock(tdb, tdb->travlocks.hash, F_WRLCK) != 0)
+ if (tdb_unlock(tdb, tdb->travlocks.hash, tdb->travlocks.lock_rw) != 0)
TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_nextkey: WARNING tdb_unlock failed!\n"));
}
/* Unlock the chain of old record */
- if (tdb_unlock(tdb, BUCKET(oldhash), F_WRLCK) != 0)
+ if (tdb_unlock(tdb, BUCKET(oldhash), tdb->travlocks.lock_rw) != 0)
TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_nextkey: WARNING tdb_unlock failed!\n"));
return key;
}