diff options
author | Andrew Tridgell <tridge@samba.org> | 2008-01-05 17:41:41 +1100 |
---|---|---|
committer | Andrew Tridgell <tridge@samba.org> | 2008-01-05 17:41:41 +1100 |
commit | 841a04924c920ac9d8b4a74187a179240351fddc (patch) | |
tree | 36a2da087367023296798f9148d426693c0c398c /ctdb/lib/tdb/common | |
parent | 370779a1bb0218f31d02f0976e143d4b5d84b3d4 (diff) | |
download | samba-841a04924c920ac9d8b4a74187a179240351fddc.tar.gz samba-841a04924c920ac9d8b4a74187a179240351fddc.tar.xz samba-841a04924c920ac9d8b4a74187a179240351fddc.zip |
merge from Samba4
(This used to be ctdb commit 9aed7a1d065272c2e5b54872228a73f37664b526)
Diffstat (limited to 'ctdb/lib/tdb/common')
-rw-r--r-- | ctdb/lib/tdb/common/io.c | 65 | ||||
-rw-r--r-- | ctdb/lib/tdb/common/lock.c | 12 | ||||
-rw-r--r-- | ctdb/lib/tdb/common/open.c | 29 | ||||
-rw-r--r-- | ctdb/lib/tdb/common/tdb.c | 6 | ||||
-rw-r--r-- | ctdb/lib/tdb/common/tdb_private.h | 4 | ||||
-rw-r--r-- | ctdb/lib/tdb/common/traverse.c | 15 |
6 files changed, 108 insertions, 23 deletions
diff --git a/ctdb/lib/tdb/common/io.c b/ctdb/lib/tdb/common/io.c index 1da9ac37c2f..172ab69d8c9 100644 --- a/ctdb/lib/tdb/common/io.c +++ b/ctdb/lib/tdb/common/io.c @@ -88,13 +88,32 @@ static int tdb_write(struct tdb_context *tdb, tdb_off_t off, if (tdb->map_ptr) { memcpy(off + (char *)tdb->map_ptr, buf, len); - } else if (pwrite(tdb->fd, buf, len, off) != (ssize_t)len) { + } else { + ssize_t written = pwrite(tdb->fd, buf, len, off); + if ((written != (ssize_t)len) && (written != -1)) { + /* try once more */ + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: wrote only " + "%d of %d bytes at %d, trying once more\n", + (int)written, len, off)); + errno = ENOSPC; + written = pwrite(tdb->fd, (const void *)((const char *)buf+written), + len-written, + off+written); + } + if (written == -1) { /* Ensure ecode is set for log fn. */ tdb->ecode = TDB_ERR_IO; - TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_write failed at %d len=%d (%s)\n", - off, len, strerror(errno))); + TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_write failed at %d " + "len=%d (%s)\n", off, len, strerror(errno))); + return TDB_ERRCODE(TDB_ERR_IO, -1); + } else if (written != (ssize_t)len) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: failed to " + "write %d bytes at %d in two attempts\n", + len, off)); + errno = ENOSPC; return TDB_ERRCODE(TDB_ERR_IO, -1); } + } return 0; } @@ -220,7 +239,16 @@ static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t ad if (ftruncate(tdb->fd, size+addition) == -1) { char b = 0; - if (pwrite(tdb->fd, &b, 1, (size+addition) - 1) != 1) { + ssize_t written = pwrite(tdb->fd, &b, 1, (size+addition) - 1); + if (written == 0) { + /* try once more, potentially revealing errno */ + written = pwrite(tdb->fd, &b, 1, (size+addition) - 1); + } + if (written == 0) { + /* again - give up, guessing errno */ + errno = ENOSPC; + } + if (written != 1) { TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file to %d failed (%s)\n", size+addition, strerror(errno))); return -1; @@ -232,15 +260,30 @@ static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t ad disk. This must be done with write, not via mmap */ memset(buf, TDB_PAD_BYTE, sizeof(buf)); while (addition) { - int n = addition>sizeof(buf)?sizeof(buf):addition; - int ret = pwrite(tdb->fd, buf, n, size); - if (ret != n) { - TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file write of %d failed (%s)\n", - n, strerror(errno))); + size_t n = addition>sizeof(buf)?sizeof(buf):addition; + ssize_t written = pwrite(tdb->fd, buf, n, size); + if (written == 0) { + /* prevent infinite loops: try _once_ more */ + written = pwrite(tdb->fd, buf, n, size); + } + if (written == 0) { + /* give up, trying to provide a useful errno */ + TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file write " + "returned 0 twice: giving up!\n")); + errno = ENOSPC; + return -1; + } else if (written == -1) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file write of " + "%d bytes failed (%s)\n", (int)n, + strerror(errno))); return -1; + } else if (written != n) { + TDB_LOG((tdb, TDB_DEBUG_WARNING, "expand_file: wrote " + "only %d of %d bytes - retrying\n", (int)written, + (int)n)); } - addition -= n; - size += n; + addition -= written; + size += written; } return 0; } diff --git a/ctdb/lib/tdb/common/lock.c b/ctdb/lib/tdb/common/lock.c index 33afe74336f..f156c0fa7b2 100644 --- a/ctdb/lib/tdb/common/lock.c +++ b/ctdb/lib/tdb/common/lock.c @@ -29,6 +29,11 @@ #define TDB_MARK_LOCK 0x80000000 +void tdb_setalarm_sigptr(struct tdb_context *tdb, volatile sig_atomic_t *ptr) +{ + tdb->interrupt_sig_ptr = ptr; +} + /* a byte range locking function - return 0 on success this functions locks/unlocks 1 byte at the specified offset. @@ -60,6 +65,13 @@ int tdb_brlock(struct tdb_context *tdb, tdb_off_t offset, do { ret = fcntl(tdb->fd,lck_type,&fl); + + /* Check for a sigalarm break. */ + if (ret == -1 && errno == EINTR && + tdb->interrupt_sig_ptr && + *tdb->interrupt_sig_ptr) { + break; + } } while (ret == -1 && errno == EINTR); if (ret == -1) { diff --git a/ctdb/lib/tdb/common/open.c b/ctdb/lib/tdb/common/open.c index e2353d6e600..6bd8fda2bf1 100644 --- a/ctdb/lib/tdb/common/open.c +++ b/ctdb/lib/tdb/common/open.c @@ -49,7 +49,9 @@ static unsigned int default_tdb_hash(TDB_DATA *key) static int tdb_new_database(struct tdb_context *tdb, int hash_size) { struct tdb_header *newdb; - int size, ret = -1; + size_t size; + int ret = -1; + ssize_t written; /* We make it up in memory, then write it out if not internal */ size = sizeof(struct tdb_header) + (hash_size+1)*sizeof(tdb_off_t); @@ -78,10 +80,22 @@ static int tdb_new_database(struct tdb_context *tdb, int hash_size) memcpy(&tdb->header, newdb, sizeof(tdb->header)); /* Don't endian-convert the magic food! */ memcpy(newdb->magic_food, TDB_MAGIC_FOOD, strlen(TDB_MAGIC_FOOD)+1); - if (write(tdb->fd, newdb, size) != size) { - ret = -1; - } else { + /* we still have "ret == -1" here */ + written = write(tdb->fd, newdb, size); + if (written == size) { + ret = 0; + } else if (written != -1) { + /* call write once again, this usually should return -1 and + * set errno appropriately */ + size -= written; + written = write(tdb->fd, newdb+written, size); + if (written == size) { ret = 0; + } else if (written >= 0) { + /* a second incomplete write - we give up. + * guessing the errno... */ + errno = ENOSPC; + } } fail: @@ -165,6 +179,10 @@ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags, tdb->page_size = 0x2000; } + if (open_flags & TDB_VOLATILE) { + tdb->max_dead_records = 5; + } + if ((open_flags & O_ACCMODE) == O_WRONLY) { TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: can't open tdb %s write-only\n", name)); @@ -221,13 +239,16 @@ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags, } } + errno = 0; if (read(tdb->fd, &tdb->header, sizeof(tdb->header)) != sizeof(tdb->header) || strcmp(tdb->header.magic_food, TDB_MAGIC_FOOD) != 0 || (tdb->header.version != TDB_VERSION && !(rev = (tdb->header.version==TDB_BYTEREV(TDB_VERSION))))) { /* its not a valid database - possibly initialise it */ if (!(open_flags & O_CREAT) || tdb_new_database(tdb, hash_size) == -1) { + if (errno == 0) { errno = EIO; /* ie bad format or something */ + } goto fail; } rev = (tdb->flags & TDB_CONVERT); diff --git a/ctdb/lib/tdb/common/tdb.c b/ctdb/lib/tdb/common/tdb.c index 3edd10fff0d..79c15475191 100644 --- a/ctdb/lib/tdb/common/tdb.c +++ b/ctdb/lib/tdb/common/tdb.c @@ -579,8 +579,12 @@ int tdb_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf) if (dbuf.dptr == NULL) { dbuf.dptr = (unsigned char *)malloc(new_dbuf.dsize); } else { - dbuf.dptr = (unsigned char *)realloc(dbuf.dptr, + unsigned char *new_dptr = (unsigned char *)realloc(dbuf.dptr, dbuf.dsize + new_dbuf.dsize); + if (new_dptr == NULL) { + free(dbuf.dptr); + } + dbuf.dptr = new_dptr; } if (dbuf.dptr == NULL) { diff --git a/ctdb/lib/tdb/common/tdb_private.h b/ctdb/lib/tdb/common/tdb_private.h index b157ec8af53..86cb762fd52 100644 --- a/ctdb/lib/tdb/common/tdb_private.h +++ b/ctdb/lib/tdb/common/tdb_private.h @@ -28,6 +28,7 @@ #include "system/time.h" #include "system/shmem.h" #include "system/select.h" +#include "system/wait.h" #include "tdb.h" #ifndef HAVE_GETPAGESIZE @@ -61,7 +62,7 @@ typedef uint32_t tdb_off_t; #define TDB_RECOVERY_HEAD offsetof(struct tdb_header, recovery_start) #define TDB_SEQNUM_OFS offsetof(struct tdb_header, sequence_number) #define TDB_PAD_BYTE 0x42 -#define TDB_PAD_UINT32_T 0x42424242 +#define TDB_PAD_U32 0x42424242 /* NB assumes there is a local variable called "tdb" that is the * current context, also takes doubly-parenthesized print-style @@ -167,6 +168,7 @@ struct tdb_context { int page_size; int max_dead_records; bool have_transaction_lock; + volatile sig_atomic_t *interrupt_sig_ptr; }; diff --git a/ctdb/lib/tdb/common/traverse.c b/ctdb/lib/tdb/common/traverse.c index c244ea848da..2bde1270a07 100644 --- a/ctdb/lib/tdb/common/traverse.c +++ b/ctdb/lib/tdb/common/traverse.c @@ -260,12 +260,15 @@ TDB_DATA tdb_firstkey(struct tdb_context *tdb) tdb->travlocks.off = tdb->travlocks.hash = 0; tdb->travlocks.lock_rw = F_RDLCK; + /* Grab first record: locks chain and returned record. */ if (tdb_next_lock(tdb, &tdb->travlocks, &rec) <= 0) return tdb_null; /* now read the key */ key.dsize = rec.key_len; key.dptr =tdb_alloc_read(tdb,tdb->travlocks.off+sizeof(rec),key.dsize); - if (tdb_unlock(tdb, BUCKET(tdb->travlocks.hash), F_WRLCK) != 0) + + /* Unlock the hash chain of the record we just read. */ + if (tdb_unlock(tdb, tdb->travlocks.hash, tdb->travlocks.lock_rw) != 0) TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_firstkey: error occurred while tdb_unlocking!\n")); return key; } @@ -280,7 +283,7 @@ TDB_DATA tdb_nextkey(struct tdb_context *tdb, TDB_DATA oldkey) /* Is locked key the old key? If so, traverse will be reliable. */ if (tdb->travlocks.off) { - if (tdb_lock(tdb,tdb->travlocks.hash,F_WRLCK)) + if (tdb_lock(tdb,tdb->travlocks.hash,tdb->travlocks.lock_rw)) return tdb_null; if (tdb_rec_read(tdb, tdb->travlocks.off, &rec) == -1 || !(k = tdb_alloc_read(tdb,tdb->travlocks.off+sizeof(rec), @@ -291,7 +294,7 @@ TDB_DATA tdb_nextkey(struct tdb_context *tdb, TDB_DATA oldkey) SAFE_FREE(k); return tdb_null; } - if (tdb_unlock(tdb, tdb->travlocks.hash, F_WRLCK) != 0) { + if (tdb_unlock(tdb, tdb->travlocks.hash, tdb->travlocks.lock_rw) != 0) { SAFE_FREE(k); return tdb_null; } @@ -303,7 +306,7 @@ TDB_DATA tdb_nextkey(struct tdb_context *tdb, TDB_DATA oldkey) if (!tdb->travlocks.off) { /* No previous element: do normal find, and lock record */ - tdb->travlocks.off = tdb_find_lock_hash(tdb, oldkey, tdb->hash_fn(&oldkey), F_WRLCK, &rec); + tdb->travlocks.off = tdb_find_lock_hash(tdb, oldkey, tdb->hash_fn(&oldkey), tdb->travlocks.lock_rw, &rec); if (!tdb->travlocks.off) return tdb_null; tdb->travlocks.hash = BUCKET(rec.full_hash); @@ -321,11 +324,11 @@ TDB_DATA tdb_nextkey(struct tdb_context *tdb, TDB_DATA oldkey) key.dptr = tdb_alloc_read(tdb, tdb->travlocks.off+sizeof(rec), key.dsize); /* Unlock the chain of this new record */ - if (tdb_unlock(tdb, tdb->travlocks.hash, F_WRLCK) != 0) + if (tdb_unlock(tdb, tdb->travlocks.hash, tdb->travlocks.lock_rw) != 0) TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_nextkey: WARNING tdb_unlock failed!\n")); } /* Unlock the chain of old record */ - if (tdb_unlock(tdb, BUCKET(oldhash), F_WRLCK) != 0) + if (tdb_unlock(tdb, BUCKET(oldhash), tdb->travlocks.lock_rw) != 0) TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_nextkey: WARNING tdb_unlock failed!\n")); return key; } |