diff options
author | Ronnie Sahlberg <ronniesahlberg@gmail.com> | 2010-10-08 12:49:08 +1100 |
---|---|---|
committer | Ronnie Sahlberg <ronniesahlberg@gmail.com> | 2010-10-08 12:49:08 +1100 |
commit | 8bdaa7d41f4aad5f5f245b364816aba6f52c0304 (patch) | |
tree | 6e7ec54ab2d3e76ae7330f0209ef5d43a08b0b75 | |
parent | 7253342a70b145d0a3689e898acc485859fc5f8b (diff) | |
parent | c333126496954b4a7a829f3d8fac5190b5b8e816 (diff) | |
download | samba-8bdaa7d41f4aad5f5f245b364816aba6f52c0304.tar.gz samba-8bdaa7d41f4aad5f5f245b364816aba6f52c0304.tar.xz samba-8bdaa7d41f4aad5f5f245b364816aba6f52c0304.zip |
Merge commit 'rusty/tdb-update'
(This used to be ctdb commit 23510bf858c06a3710d1cc741d32bad3675fd97e)
-rw-r--r-- | ctdb/lib/tdb/ABI/tdb-1.2.6.sigs | 61 | ||||
-rw-r--r-- | ctdb/lib/tdb/common/check.c | 73 | ||||
-rw-r--r-- | ctdb/lib/tdb/common/hash.c | 380 | ||||
-rw-r--r-- | ctdb/lib/tdb/common/open.c | 95 | ||||
-rw-r--r-- | ctdb/lib/tdb/common/tdb_private.h | 8 | ||||
-rw-r--r-- | ctdb/lib/tdb/config.mk | 9 | ||||
-rw-r--r-- | ctdb/lib/tdb/configure.ac | 2 | ||||
-rw-r--r-- | ctdb/lib/tdb/include/tdb.h | 2 | ||||
-rw-r--r-- | ctdb/lib/tdb/libtdb.m4 | 2 | ||||
-rw-r--r-- | ctdb/lib/tdb/pytdb.c | 103 | ||||
-rw-r--r-- | ctdb/lib/tdb/python/tests/simple.py | 46 | ||||
-rw-r--r-- | ctdb/lib/tdb/tdb.mk | 5 | ||||
-rw-r--r-- | ctdb/lib/tdb/tools/tdbrestore.c | 226 |
13 files changed, 976 insertions, 36 deletions
diff --git a/ctdb/lib/tdb/ABI/tdb-1.2.6.sigs b/ctdb/lib/tdb/ABI/tdb-1.2.6.sigs new file mode 100644 index 0000000000..1e01f3ba24 --- /dev/null +++ b/ctdb/lib/tdb/ABI/tdb-1.2.6.sigs @@ -0,0 +1,61 @@ +tdb_add_flags: void (struct tdb_context *, unsigned int) +tdb_append: int (struct tdb_context *, TDB_DATA, TDB_DATA) +tdb_chainlock: int (struct tdb_context *, TDB_DATA) +tdb_chainlock_mark: int (struct tdb_context *, TDB_DATA) +tdb_chainlock_nonblock: int (struct tdb_context *, TDB_DATA) +tdb_chainlock_read: int (struct tdb_context *, TDB_DATA) +tdb_chainlock_unmark: int (struct tdb_context *, TDB_DATA) +tdb_chainunlock: int (struct tdb_context *, TDB_DATA) +tdb_chainunlock_read: int (struct tdb_context *, TDB_DATA) +tdb_check: int (struct tdb_context *, int (*)(TDB_DATA, TDB_DATA, void *), void *) +tdb_close: int (struct tdb_context *) +tdb_delete: int (struct tdb_context *, TDB_DATA) +tdb_dump_all: void (struct tdb_context *) +tdb_enable_seqnum: void (struct tdb_context *) +tdb_error: enum TDB_ERROR (struct tdb_context *) +tdb_errorstr: const char *(struct tdb_context *) +tdb_exists: int (struct tdb_context *, TDB_DATA) +tdb_fd: int (struct tdb_context *) +tdb_fetch: TDB_DATA (struct tdb_context *, TDB_DATA) +tdb_firstkey: TDB_DATA (struct tdb_context *) +tdb_freelist_size: int (struct tdb_context *) +tdb_get_flags: int (struct tdb_context *) +tdb_get_logging_private: void *(struct tdb_context *) +tdb_get_seqnum: int (struct tdb_context *) +tdb_hash_size: int (struct tdb_context *) +tdb_increment_seqnum_nonblock: void (struct tdb_context *) +tdb_jenkins_hash: unsigned int (TDB_DATA *) +tdb_lockall: int (struct tdb_context *) +tdb_lockall_mark: int (struct tdb_context *) +tdb_lockall_nonblock: int (struct tdb_context *) +tdb_lockall_read: int (struct tdb_context *) +tdb_lockall_read_nonblock: int (struct tdb_context *) +tdb_lockall_unmark: int (struct tdb_context *) +tdb_log_fn: tdb_log_func (struct tdb_context *) +tdb_map_size: size_t (struct tdb_context *) +tdb_name: const char *(struct tdb_context *) +tdb_nextkey: TDB_DATA (struct tdb_context *, TDB_DATA) +tdb_null: dptr = 0xXXXX, dsize = 0 +tdb_open: struct tdb_context *(const char *, int, int, int, mode_t) +tdb_open_ex: struct tdb_context *(const char *, int, int, int, mode_t, const struct tdb_logging_context *, tdb_hash_func) +tdb_parse_record: int (struct tdb_context *, TDB_DATA, int (*)(TDB_DATA, TDB_DATA, void *), void *) +tdb_printfreelist: int (struct tdb_context *) +tdb_remove_flags: void (struct tdb_context *, unsigned int) +tdb_reopen: int (struct tdb_context *) +tdb_reopen_all: int (int) +tdb_repack: int (struct tdb_context *) +tdb_set_logging_function: void (struct tdb_context *, const struct tdb_logging_context *) +tdb_set_max_dead: void (struct tdb_context *, int) +tdb_setalarm_sigptr: void (struct tdb_context *, volatile sig_atomic_t *) +tdb_store: int (struct tdb_context *, TDB_DATA, TDB_DATA, int) +tdb_transaction_cancel: int (struct tdb_context *) +tdb_transaction_commit: int (struct tdb_context *) +tdb_transaction_prepare_commit: int (struct tdb_context *) +tdb_transaction_start: int (struct tdb_context *) +tdb_transaction_start_nonblock: int (struct tdb_context *) +tdb_traverse: int (struct tdb_context *, tdb_traverse_func, void *) +tdb_traverse_read: int (struct tdb_context *, tdb_traverse_func, void *) +tdb_unlockall: int (struct tdb_context *) +tdb_unlockall_read: int (struct tdb_context *) +tdb_validate_freelist: int (struct tdb_context *, int *) +tdb_wipe_all: int (struct tdb_context *) diff --git a/ctdb/lib/tdb/common/check.c b/ctdb/lib/tdb/common/check.c index 2c640434ee..58c9c26540 100644 --- a/ctdb/lib/tdb/common/check.c +++ b/ctdb/lib/tdb/common/check.c @@ -28,8 +28,9 @@ static bool tdb_check_header(struct tdb_context *tdb, tdb_off_t *recovery) { struct tdb_header hdr; + uint32_t h1, h2; - if (tdb->methods->tdb_read(tdb, 0, &hdr, sizeof(hdr), DOCONV()) == -1) + if (tdb->methods->tdb_read(tdb, 0, &hdr, sizeof(hdr), 0) == -1) return false; if (strcmp(hdr.magic_food, TDB_MAGIC_FOOD) != 0) goto corrupt; @@ -38,7 +39,12 @@ static bool tdb_check_header(struct tdb_context *tdb, tdb_off_t *recovery) if (hdr.version != TDB_VERSION) goto corrupt; - if (hdr.rwlocks != 0) + if (hdr.rwlocks != 0 && hdr.rwlocks != TDB_HASH_RWLOCK_MAGIC) + goto corrupt; + + tdb_header_hash(tdb, &h1, &h2); + if (hdr.magic1_hash && hdr.magic2_hash && + (hdr.magic1_hash != h1 || hdr.magic2_hash != h2)) goto corrupt; if (hdr.hash_size == 0) @@ -301,6 +307,21 @@ static bool tdb_check_free_record(struct tdb_context *tdb, return true; } +/* Slow, but should be very rare. */ +static size_t dead_space(struct tdb_context *tdb, tdb_off_t off) +{ + size_t len; + + for (len = 0; off + len < tdb->map_size; len++) { + char c; + if (tdb->methods->tdb_read(tdb, off, &c, 1, 0)) + return 0; + if (c != 0 && c != 0x42) + break; + } + return len; +} + int tdb_check(struct tdb_context *tdb, int (*check)(TDB_DATA key, TDB_DATA data, void *private_data), void *private_data) @@ -310,9 +331,18 @@ int tdb_check(struct tdb_context *tdb, tdb_off_t off, recovery_start; struct tdb_record rec; bool found_recovery = false; - - if (tdb_lockall_read(tdb) == -1) - return -1; + tdb_len_t dead; + bool locked; + + /* Read-only databases use no locking at all: it's best-effort. + * We may have a write lock already, so skip that case too. */ + if (tdb->read_only || tdb->allrecord_lock.count != 0) { + locked = false; + } else { + if (tdb_lockall_read(tdb) == -1) + return -1; + locked = true; + } /* Make sure we know true size of the underlying file. */ tdb->methods->tdb_oob(tdb, tdb->map_size + 1, 1); @@ -369,8 +399,23 @@ int tdb_check(struct tdb_context *tdb, if (!tdb_check_free_record(tdb, off, &rec, hashes)) goto free; break; - case TDB_RECOVERY_MAGIC: + /* If we crash after ftruncate, we can get zeroes or fill. */ case TDB_RECOVERY_INVALID_MAGIC: + case 0x42424242: + if (recovery_start == off) { + found_recovery = true; + break; + } + dead = dead_space(tdb, off); + if (dead < sizeof(rec)) + goto corrupt; + + TDB_LOG((tdb, TDB_DEBUG_ERROR, + "Dead space at %d-%d (of %u)\n", + off, off + dead, tdb->map_size)); + rec.rec_len = dead - sizeof(rec); + break; + case TDB_RECOVERY_MAGIC: if (recovery_start != off) { TDB_LOG((tdb, TDB_DEBUG_ERROR, "Unexpected recovery record at offset %d\n", @@ -379,7 +424,8 @@ int tdb_check(struct tdb_context *tdb, } found_recovery = true; break; - default: + default: ; + corrupt: tdb->ecode = TDB_ERR_CORRUPT; TDB_LOG((tdb, TDB_DEBUG_ERROR, "Bad magic 0x%x at offset %d\n", @@ -405,19 +451,22 @@ int tdb_check(struct tdb_context *tdb, /* We must have found recovery area if there was one. */ if (recovery_start != 0 && !found_recovery) { TDB_LOG((tdb, TDB_DEBUG_ERROR, - "Expected %s recovery area, got %s\n", - recovery_start ? "a" : "no", - found_recovery ? "one" : "none")); + "Expected a recovery area at %u\n", + recovery_start)); goto free; } free(hashes); - tdb_unlockall_read(tdb); + if (locked) { + tdb_unlockall_read(tdb); + } return 0; free: free(hashes); unlock: - tdb_unlockall_read(tdb); + if (locked) { + tdb_unlockall_read(tdb); + } return -1; } diff --git a/ctdb/lib/tdb/common/hash.c b/ctdb/lib/tdb/common/hash.c new file mode 100644 index 0000000000..c07297ec19 --- /dev/null +++ b/ctdb/lib/tdb/common/hash.c @@ -0,0 +1,380 @@ + /* + Unix SMB/CIFS implementation. + + trivial database library + + Copyright (C) Rusty Russell 2010 + + ** NOTE! The following LGPL license applies to the tdb + ** library. This does NOT imply that all of Samba is released + ** under the LGPL + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see <http://www.gnu.org/licenses/>. +*/ +#include "tdb_private.h" + +/* This is based on the hash algorithm from gdbm */ +unsigned int tdb_old_hash(TDB_DATA *key) +{ + uint32_t value; /* Used to compute the hash value. */ + uint32_t i; /* Used to cycle through random values. */ + + /* Set the initial value from the key size. */ + for (value = 0x238F13AF * key->dsize, i=0; i < key->dsize; i++) + value = (value + (key->dptr[i] << (i*5 % 24))); + + return (1103515243 * value + 12345); +} + +#ifndef WORDS_BIGENDIAN +# define HASH_LITTLE_ENDIAN 1 +# define HASH_BIG_ENDIAN 0 +#else +# define HASH_LITTLE_ENDIAN 0 +# define HASH_BIG_ENDIAN 1 +#endif + +/* +------------------------------------------------------------------------------- +lookup3.c, by Bob Jenkins, May 2006, Public Domain. + +These are functions for producing 32-bit hashes for hash table lookup. +hash_word(), hashlittle(), hashlittle2(), hashbig(), mix(), and final() +are externally useful functions. Routines to test the hash are included +if SELF_TEST is defined. You can use this free for any purpose. It's in +the public domain. It has no warranty. + +You probably want to use hashlittle(). hashlittle() and hashbig() +hash byte arrays. hashlittle() is is faster than hashbig() on +little-endian machines. Intel and AMD are little-endian machines. +On second thought, you probably want hashlittle2(), which is identical to +hashlittle() except it returns two 32-bit hashes for the price of one. +You could implement hashbig2() if you wanted but I haven't bothered here. + +If you want to find a hash of, say, exactly 7 integers, do + a = i1; b = i2; c = i3; + mix(a,b,c); + a += i4; b += i5; c += i6; + mix(a,b,c); + a += i7; + final(a,b,c); +then use c as the hash value. If you have a variable length array of +4-byte integers to hash, use hash_word(). If you have a byte array (like +a character string), use hashlittle(). If you have several byte arrays, or +a mix of things, see the comments above hashlittle(). + +Why is this so big? I read 12 bytes at a time into 3 4-byte integers, +then mix those integers. This is fast (you can do a lot more thorough +mixing with 12*3 instructions on 3 integers than you can with 3 instructions +on 1 byte), but shoehorning those bytes into integers efficiently is messy. +*/ + +#define hashsize(n) ((uint32_t)1<<(n)) +#define hashmask(n) (hashsize(n)-1) +#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) + +/* +------------------------------------------------------------------------------- +mix -- mix 3 32-bit values reversibly. + +This is reversible, so any information in (a,b,c) before mix() is +still in (a,b,c) after mix(). + +If four pairs of (a,b,c) inputs are run through mix(), or through +mix() in reverse, there are at least 32 bits of the output that +are sometimes the same for one pair and different for another pair. +This was tested for: +* pairs that differed by one bit, by two bits, in any combination + of top bits of (a,b,c), or in any combination of bottom bits of + (a,b,c). +* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + is commonly produced by subtraction) look like a single 1-bit + difference. +* the base values were pseudorandom, all zero but one bit set, or + all zero plus a counter that starts at zero. + +Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that +satisfy this are + 4 6 8 16 19 4 + 9 15 3 18 27 15 + 14 9 3 7 17 3 +Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing +for "differ" defined as + with a one-bit base and a two-bit delta. I +used http://burtleburtle.net/bob/hash/avalanche.html to choose +the operations, constants, and arrangements of the variables. + +This does not achieve avalanche. There are input bits of (a,b,c) +that fail to affect some output bits of (a,b,c), especially of a. The +most thoroughly mixed value is c, but it doesn't really even achieve +avalanche in c. + +This allows some parallelism. Read-after-writes are good at doubling +the number of bits affected, so the goal of mixing pulls in the opposite +direction as the goal of parallelism. I did what I could. Rotates +seem to cost as much as shifts on every machine I could lay my hands +on, and rotates are much kinder to the top and bottom bits, so I used +rotates. +------------------------------------------------------------------------------- +*/ +#define mix(a,b,c) \ +{ \ + a -= c; a ^= rot(c, 4); c += b; \ + b -= a; b ^= rot(a, 6); a += c; \ + c -= b; c ^= rot(b, 8); b += a; \ + a -= c; a ^= rot(c,16); c += b; \ + b -= a; b ^= rot(a,19); a += c; \ + c -= b; c ^= rot(b, 4); b += a; \ +} + +/* +------------------------------------------------------------------------------- +final -- final mixing of 3 32-bit values (a,b,c) into c + +Pairs of (a,b,c) values differing in only a few bits will usually +produce values of c that look totally different. This was tested for +* pairs that differed by one bit, by two bits, in any combination + of top bits of (a,b,c), or in any combination of bottom bits of + (a,b,c). +* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + is commonly produced by subtraction) look like a single 1-bit + difference. +* the base values were pseudorandom, all zero but one bit set, or + all zero plus a counter that starts at zero. + +These constants passed: + 14 11 25 16 4 14 24 + 12 14 25 16 4 14 24 +and these came close: + 4 8 15 26 3 22 24 + 10 8 15 26 3 22 24 + 11 8 15 26 3 22 24 +------------------------------------------------------------------------------- +*/ +#define final(a,b,c) \ +{ \ + c ^= b; c -= rot(b,14); \ + a ^= c; a -= rot(c,11); \ + b ^= a; b -= rot(a,25); \ + c ^= b; c -= rot(b,16); \ + a ^= c; a -= rot(c,4); \ + b ^= a; b -= rot(a,14); \ + c ^= b; c -= rot(b,24); \ +} + + +/* +------------------------------------------------------------------------------- +hashlittle() -- hash a variable-length key into a 32-bit value + k : the key (the unaligned variable-length array of bytes) + length : the length of the key, counting by bytes + val2 : IN: can be any 4-byte value OUT: second 32 bit hash. +Returns a 32-bit value. Every bit of the key affects every bit of +the return value. Two keys differing by one or two bits will have +totally different hash values. Note that the return value is better +mixed than val2, so use that first. + +The best hash table sizes are powers of 2. There is no need to do +mod a prime (mod is sooo slow!). If you need less than 32 bits, +use a bitmask. For example, if you need only 10 bits, do + h = (h & hashmask(10)); +In which case, the hash table should have hashsize(10) elements. + +If you are hashing n strings (uint8_t **)k, do it like this: + for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h); + +By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this +code any way you wish, private, educational, or commercial. It's free. + +Use for hash table lookup, or anything where one collision in 2^^32 is +acceptable. Do NOT use for cryptographic purposes. +------------------------------------------------------------------------------- +*/ + +static uint32_t hashlittle( const void *key, size_t length ) +{ + uint32_t a,b,c; /* internal state */ + union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */ + + /* Set up the internal state */ + a = b = c = 0xdeadbeef + ((uint32_t)length); + + u.ptr = key; + if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) { + const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */ +#ifdef VALGRIND + const uint8_t *k8; +#endif + + /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ + while (length > 12) + { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a,b,c); + length -= 12; + k += 3; + } + + /*----------------------------- handle the last (probably partial) block */ + /* + * "k[2]&0xffffff" actually reads beyond the end of the string, but + * then masks off the part it's not allowed to read. Because the + * string is aligned, the masked-off tail is in the same word as the + * rest of the string. Every machine with memory protection I've seen + * does it on word boundaries, so is OK with this. But VALGRIND will + * still catch it and complain. The masking trick does make the hash + * noticably faster for short strings (like English words). + */ +#ifndef VALGRIND + + switch(length) + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break; + case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break; + case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break; + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=k[1]&0xffffff; a+=k[0]; break; + case 6 : b+=k[1]&0xffff; a+=k[0]; break; + case 5 : b+=k[1]&0xff; a+=k[0]; break; + case 4 : a+=k[0]; break; + case 3 : a+=k[0]&0xffffff; break; + case 2 : a+=k[0]&0xffff; break; + case 1 : a+=k[0]&0xff; break; + case 0 : return c; /* zero length strings require no mixing */ + } + +#else /* make valgrind happy */ + + k8 = (const uint8_t *)k; + switch(length) + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ + case 10: c+=((uint32_t)k8[9])<<8; /* fall through */ + case 9 : c+=k8[8]; /* fall through */ + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ + case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */ + case 5 : b+=k8[4]; /* fall through */ + case 4 : a+=k[0]; break; + case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ + case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */ + case 1 : a+=k8[0]; break; + case 0 : return c; + } + +#endif /* !valgrind */ + + } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { + const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */ + const uint8_t *k8; + + /*--------------- all but last block: aligned reads and different mixing */ + while (length > 12) + { + a += k[0] + (((uint32_t)k[1])<<16); + b += k[2] + (((uint32_t)k[3])<<16); + c += k[4] + (((uint32_t)k[5])<<16); + mix(a,b,c); + length -= 12; + k += 6; + } + + /*----------------------------- handle the last (probably partial) block */ + k8 = (const uint8_t *)k; + switch(length) + { + case 12: c+=k[4]+(((uint32_t)k[5])<<16); + b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ + case 10: c+=k[4]; + b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 9 : c+=k8[8]; /* fall through */ + case 8 : b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ + case 6 : b+=k[2]; + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 5 : b+=k8[4]; /* fall through */ + case 4 : a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ + case 2 : a+=k[0]; + break; + case 1 : a+=k8[0]; + break; + case 0 : return c; /* zero length requires no mixing */ + } + + } else { /* need to read the key one byte at a time */ + const uint8_t *k = (const uint8_t *)key; + + /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ + while (length > 12) + { + a += k[0]; + a += ((uint32_t)k[1])<<8; + a += ((uint32_t)k[2])<<16; + a += ((uint32_t)k[3])<<24; + b += k[4]; + b += ((uint32_t)k[5])<<8; + b += ((uint32_t)k[6])<<16; + b += ((uint32_t)k[7])<<24; + c += k[8]; + c += ((uint32_t)k[9])<<8; + c += ((uint32_t)k[10])<<16; + c += ((uint32_t)k[11])<<24; + mix(a,b,c); + length -= 12; + k += 12; + } + + /*-------------------------------- last block: affect all 32 bits of (c) */ + switch(length) /* all the case statements fall through */ + { + case 12: c+=((uint32_t)k[11])<<24; + case 11: c+=((uint32_t)k[10])<<16; + case 10: c+=((uint32_t)k[9])<<8; + case 9 : c+=k[8]; + case 8 : b+=((uint32_t)k[7])<<24; + case 7 : b+=((uint32_t)k[6])<<16; + case 6 : b+=((uint32_t)k[5])<<8; + case 5 : b+=k[4]; + case 4 : a+=((uint32_t)k[3])<<24; + case 3 : a+=((uint32_t)k[2])<<16; + case 2 : a+=((uint32_t)k[1])<<8; + case 1 : a+=k[0]; + break; + case 0 : return c; + } + } + + final(a,b,c); + return c; +} + +unsigned int tdb_jenkins_hash(TDB_DATA *key) +{ + return hashlittle(key->dptr, key->dsize); +} diff --git a/ctdb/lib/tdb/common/open.c b/ctdb/lib/tdb/common/open.c index 7687ff6e32..66539c3f6c 100644 --- a/ctdb/lib/tdb/common/open.c +++ b/ctdb/lib/tdb/common/open.c @@ -30,20 +30,25 @@ /* all contexts, to ensure no double-opens (fcntl locks don't nest!) */ static struct tdb_context *tdbs = NULL; - -/* This is based on the hash algorithm from gdbm */ -static unsigned int default_tdb_hash(TDB_DATA *key) +/* We use two hashes to double-check they're using the right hash function. */ +void tdb_header_hash(struct tdb_context *tdb, + uint32_t *magic1_hash, uint32_t *magic2_hash) { - uint32_t value; /* Used to compute the hash value. */ - uint32_t i; /* Used to cycle through random values. */ + TDB_DATA hash_key; + uint32_t tdb_magic = TDB_MAGIC; - /* Set the initial value from the key size. */ - for (value = 0x238F13AF * key->dsize, i=0; i < key->dsize; i++) - value = (value + (key->dptr[i] << (i*5 % 24))); + hash_key.dptr = discard_const_p(unsigned char, TDB_MAGIC_FOOD); + hash_key.dsize = sizeof(TDB_MAGIC_FOOD); + *magic1_hash = tdb->hash_fn(&hash_key); - return (1103515243 * value + 12345); -} + hash_key.dptr = (unsigned char *)CONVERT(tdb_magic); + hash_key.dsize = sizeof(tdb_magic); + *magic2_hash = tdb->hash_fn(&hash_key); + /* Make sure at least one hash is non-zero! */ + if (*magic1_hash == 0 && *magic2_hash == 0) + *magic1_hash = 1; +} /* initialise a new database with a specified hash size */ static int tdb_new_database(struct tdb_context *tdb, int hash_size) @@ -62,6 +67,14 @@ static int tdb_new_database(struct tdb_context *tdb, int hash_size) /* Fill in the header */ newdb->version = TDB_VERSION; newdb->hash_size = hash_size; + + tdb_header_hash(tdb, &newdb->magic1_hash, &newdb->magic2_hash); + + /* Make sure older tdbs (which don't check the magic hash fields) + * will refuse to open this TDB. */ + if (tdb->flags & TDB_INCOMPATIBLE_HASH) + newdb->rwlocks = TDB_HASH_RWLOCK_MAGIC; + if (tdb->flags & TDB_INTERNAL) { tdb->map_size = size; tdb->map_ptr = (char *)newdb; @@ -128,6 +141,26 @@ static void null_log_fn(struct tdb_context *tdb, enum tdb_debug_level level, con { } +static bool check_header_hash(struct tdb_context *tdb, + bool default_hash, uint32_t *m1, uint32_t *m2) +{ + tdb_header_hash(tdb, m1, m2); + if (tdb->header.magic1_hash == *m1 && + tdb->header.magic2_hash == *m2) { + return true; + } + + /* If they explicitly set a hash, always respect it. */ + if (!default_hash) + return false; + + /* Otherwise, try the other inbuilt hash. */ + if (tdb->hash_fn == tdb_old_hash) + tdb->hash_fn = tdb_jenkins_hash; + else + tdb->hash_fn = tdb_old_hash; + return check_header_hash(tdb, false, m1, m2); +} struct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags, int open_flags, mode_t mode, @@ -140,6 +173,8 @@ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags, unsigned char *vp; uint32_t vertest; unsigned v; + const char *hash_alg; + uint32_t magic1, magic2; if (!(tdb = (struct tdb_context *)calloc(1, sizeof *tdb))) { /* Can't log this */ @@ -161,7 +196,19 @@ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags, tdb->log.log_fn = null_log_fn; tdb->log.log_private = NULL; } - tdb->hash_fn = hash_fn ? hash_fn : default_tdb_hash; + + if (hash_fn) { + tdb->hash_fn = hash_fn; + hash_alg = "the user defined"; + } else { + /* This controls what we use when creating a tdb. */ + if (tdb->flags & TDB_INCOMPATIBLE_HASH) { + tdb->hash_fn = tdb_jenkins_hash; + } else { + tdb->hash_fn = tdb_old_hash; + } + hash_alg = "either default"; + } /* cache the page size */ tdb->page_size = getpagesize(); @@ -196,6 +243,10 @@ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags, goto fail; } + if (getenv("TDB_NO_FSYNC")) { + tdb->flags |= TDB_NOSYNC; + } + /* * TDB_ALLOW_NESTING is the default behavior. * Note: this may change in future versions! @@ -274,11 +325,31 @@ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags, if (fstat(tdb->fd, &st) == -1) goto fail; - if (tdb->header.rwlocks != 0) { + if (tdb->header.rwlocks != 0 && + tdb->header.rwlocks != TDB_HASH_RWLOCK_MAGIC) { TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: spinlocks no longer supported\n")); goto fail; } + if ((tdb->header.magic1_hash == 0) && (tdb->header.magic2_hash == 0)) { + /* older TDB without magic hash references */ + tdb->hash_fn = tdb_old_hash; + } else if (!check_header_hash(tdb, !hash_fn, &magic1, &magic2)) { + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_open_ex: " + "%s was not created with %s hash function we are using\n" + "magic1_hash[0x%08X %s 0x%08X] " + "magic2_hash[0x%08X %s 0x%08X]\n", + name, hash_alg, + tdb->header.magic1_hash, + (tdb->header.magic1_hash == magic1) ? "==" : "!=", + magic1, + tdb->header.magic2_hash, + (tdb->header.magic2_hash == magic2) ? "==" : "!=", + magic2)); + errno = EINVAL; + goto fail; + } + /* Is it already in the open list? If so, fail. */ if (tdb_already_open(st.st_dev, st.st_ino)) { TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: " diff --git a/ctdb/lib/tdb/common/tdb_private.h b/ctdb/lib/tdb/common/tdb_private.h index 9d0f3bcd70..0c621636fa 100644 --- a/ctdb/lib/tdb/common/tdb_private.h +++ b/ctdb/lib/tdb/common/tdb_private.h @@ -50,6 +50,7 @@ typedef uint32_t tdb_off_t; #define TDB_DEAD_MAGIC (0xFEE1DEAD) #define TDB_RECOVERY_MAGIC (0xf53bc0e7U) #define TDB_RECOVERY_INVALID_MAGIC (0x0) +#define TDB_HASH_RWLOCK_MAGIC (0xbad1a51U) #define TDB_ALIGNMENT 4 #define DEFAULT_HASH_SIZE 131 #define FREELIST_TOP (sizeof(struct tdb_header)) @@ -147,7 +148,9 @@ struct tdb_header { tdb_off_t rwlocks; /* obsolete - kept to detect old formats */ tdb_off_t recovery_start; /* offset of transaction recovery region */ tdb_off_t sequence_number; /* used when TDB_SEQNUM is set */ - tdb_off_t reserved[29]; + uint32_t magic1_hash; /* hash of TDB_MAGIC_FOOD. */ + uint32_t magic2_hash; /* hash of TDB_MAGIC. */ + tdb_off_t reserved[27]; }; struct tdb_lock_type { @@ -268,3 +271,6 @@ int tdb_rec_free_read(struct tdb_context *tdb, tdb_off_t off, struct tdb_record *rec); bool tdb_write_all(int fd, const void *buf, size_t count); int tdb_transaction_recover(struct tdb_context *tdb); +void tdb_header_hash(struct tdb_context *tdb, + uint32_t *magic1_hash, uint32_t *magic2_hash); +unsigned int tdb_old_hash(TDB_DATA *key); diff --git a/ctdb/lib/tdb/config.mk b/ctdb/lib/tdb/config.mk index b2e322cebc..22caadaba5 100644 --- a/ctdb/lib/tdb/config.mk +++ b/ctdb/lib/tdb/config.mk @@ -35,6 +35,15 @@ PRIVATE_DEPENDENCIES = \ tdbtorture_OBJ_FILES = $(tdbsrcdir)/tools/tdbtorture.o ################################################ +# Start BINARY tdbrestore +[BINARY::tdbrestore] +INSTALLDIR = BINDIR +PRIVATE_DEPENDENCIES = \ + LIBTDB +# End BINARY tdbrestore +################################################ + +################################################ # Start BINARY tdbdump [BINARY::tdbdump] INSTALLDIR = BINDIR diff --git a/ctdb/lib/tdb/configure.ac b/ctdb/lib/tdb/configure.ac index 9b87227f35..fc42e00c15 100644 --- a/ctdb/lib/tdb/configure.ac +++ b/ctdb/lib/tdb/configure.ac @@ -2,7 +2,7 @@ AC_PREREQ(2.50) AC_DEFUN([SMB_MODULE_DEFAULT], [echo -n ""]) AC_DEFUN([SMB_LIBRARY_ENABLE], [echo -n ""]) AC_DEFUN([SMB_ENABLE], [echo -n ""]) -AC_INIT(tdb, 1.2.3) +AC_INIT(tdb, 1.2.6) AC_CONFIG_SRCDIR([common/tdb.c]) AC_CONFIG_HEADER(include/config.h) AC_LIBREPLACE_ALL_CHECKS diff --git a/ctdb/lib/tdb/include/tdb.h b/ctdb/lib/tdb/include/tdb.h index cd17132fbe..aac7c9221d 100644 --- a/ctdb/lib/tdb/include/tdb.h +++ b/ctdb/lib/tdb/include/tdb.h @@ -55,6 +55,7 @@ extern "C" { #define TDB_VOLATILE 256 /* Activate the per-hashchain freelist, default 5 */ #define TDB_ALLOW_NESTING 512 /* Allow transactions to nest */ #define TDB_DISALLOW_NESTING 1024 /* Disallow transactions to nest */ +#define TDB_INCOMPATIBLE_HASH 2048 /* Better hashing: can't be opened by tdb < 1.2.6. */ /* error codes */ enum TDB_ERROR {TDB_SUCCESS=0, TDB_ERR_CORRUPT, TDB_ERR_IO, TDB_ERR_LOCK, @@ -147,6 +148,7 @@ _PUBLIC_ void tdb_add_flags(struct tdb_context *tdb, unsigned flag); _PUBLIC_ void tdb_remove_flags(struct tdb_context *tdb, unsigned flag); _PUBLIC_ void tdb_enable_seqnum(struct tdb_context *tdb); _PUBLIC_ void tdb_increment_seqnum_nonblock(struct tdb_context *tdb); +_PUBLIC_ unsigned int tdb_jenkins_hash(TDB_DATA *key); _PUBLIC_ int tdb_check(struct tdb_context *tdb, int (*check)(TDB_DATA key, TDB_DATA data, void *private_data), void *private_data); diff --git a/ctdb/lib/tdb/libtdb.m4 b/ctdb/lib/tdb/libtdb.m4 index feae1c2cc6..3fe700ff1c 100644 --- a/ctdb/lib/tdb/libtdb.m4 +++ b/ctdb/lib/tdb/libtdb.m4 @@ -13,7 +13,7 @@ if test x"$tdbdir" = "x"; then AC_MSG_ERROR([cannot find tdb source in $tdbpaths]) fi TDB_OBJ="common/tdb.o common/dump.o common/transaction.o common/error.o common/traverse.o" -TDB_OBJ="$TDB_OBJ common/freelist.o common/freelistcheck.o common/io.o common/lock.o common/open.o common/check.o" +TDB_OBJ="$TDB_OBJ common/freelist.o common/freelistcheck.o common/io.o common/lock.o common/open.o common/check.o common/hash.o" AC_SUBST(TDB_OBJ) AC_SUBST(LIBREPLACEOBJ) diff --git a/ctdb/lib/tdb/pytdb.c b/ctdb/lib/tdb/pytdb.c index 7a9205b815..b857438e16 100644 --- a/ctdb/lib/tdb/pytdb.c +++ b/ctdb/lib/tdb/pytdb.c @@ -24,10 +24,10 @@ License along with this library; if not, see <http://www.gnu.org/licenses/>. */ +#include <Python.h> #include "replace.h" #include "system/filesys.h" -#include <Python.h> #ifndef Py_RETURN_NONE #define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None #endif @@ -77,15 +77,19 @@ static PyObject *PyString_FromTDB_DATA(TDB_DATA data) static PyObject *py_tdb_open(PyTypeObject *type, PyObject *args, PyObject *kwargs) { - char *name; + char *name = NULL; int hash_size = 0, tdb_flags = TDB_DEFAULT, flags = O_RDWR, mode = 0600; TDB_CONTEXT *ctx; PyTdbObject *ret; const char *kwnames[] = { "name", "hash_size", "tdb_flags", "flags", "mode", NULL }; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|iiii", (char **)kwnames, &name, &hash_size, &tdb_flags, &flags, &mode)) + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|siiii", (char **)kwnames, &name, &hash_size, &tdb_flags, &flags, &mode)) return NULL; + if (name == NULL) { + tdb_flags |= TDB_INTERNAL; + } + ctx = tdb_open(name, hash_size, tdb_flags, flags, mode); if (ctx == NULL) { PyErr_SetFromErrno(PyExc_IOError); @@ -93,6 +97,11 @@ static PyObject *py_tdb_open(PyTypeObject *type, PyObject *args, PyObject *kwarg } ret = PyObject_New(PyTdbObject, &PyTdb); + if (!ret) { + tdb_close(ctx); + return NULL; + } + ret->ctx = ctx; ret->closed = false; return (PyObject *)ret; @@ -112,6 +121,13 @@ static PyObject *obj_transaction_commit(PyTdbObject *self) Py_RETURN_NONE; } +static PyObject *obj_transaction_prepare_commit(PyTdbObject *self) +{ + int ret = tdb_transaction_prepare_commit(self->ctx); + PyErr_TDB_ERROR_IS_ERR_RAISE(ret, self->ctx); + Py_RETURN_NONE; +} + static PyObject *obj_transaction_start(PyTdbObject *self) { int ret = tdb_transaction_start(self->ctx); @@ -259,6 +275,27 @@ static PyObject *obj_store(PyTdbObject *self, PyObject *args) Py_RETURN_NONE; } +static PyObject *obj_add_flags(PyTdbObject *self, PyObject *args) +{ + unsigned flags; + + if (!PyArg_ParseTuple(args, "I", &flags)) + return NULL; + + tdb_add_flags(self->ctx, flags); + Py_RETURN_NONE; +} + +static PyObject *obj_remove_flags(PyTdbObject *self, PyObject *args) +{ + unsigned flags; + + if (!PyArg_ParseTuple(args, "I", &flags)) + return NULL; + + tdb_remove_flags(self->ctx, flags); + Py_RETURN_NONE; +} typedef struct { PyObject_HEAD @@ -298,6 +335,8 @@ static PyObject *tdb_object_iter(PyTdbObject *self) PyTdbIteratorObject *ret; ret = PyObject_New(PyTdbIteratorObject, &PyTdbIterator); + if (!ret) + return NULL; ret->current = tdb_firstkey(self->ctx); ret->iteratee = self; Py_INCREF(self); @@ -311,6 +350,25 @@ static PyObject *obj_clear(PyTdbObject *self) Py_RETURN_NONE; } +static PyObject *obj_repack(PyTdbObject *self) +{ + int ret = tdb_repack(self->ctx); + PyErr_TDB_ERROR_IS_ERR_RAISE(ret, self->ctx); + Py_RETURN_NONE; +} + +static PyObject *obj_enable_seqnum(PyTdbObject *self) +{ + tdb_enable_seqnum(self->ctx); + Py_RETURN_NONE; +} + +static PyObject *obj_increment_seqnum_nonblock(PyTdbObject *self) +{ + tdb_increment_seqnum_nonblock(self->ctx); + Py_RETURN_NONE; +} + static PyMethodDef tdb_object_methods[] = { { "transaction_cancel", (PyCFunction)obj_transaction_cancel, METH_NOARGS, "S.transaction_cancel() -> None\n" @@ -318,6 +376,9 @@ static PyMethodDef tdb_object_methods[] = { { "transaction_commit", (PyCFunction)obj_transaction_commit, METH_NOARGS, "S.transaction_commit() -> None\n" "Commit the currently active transaction." }, + { "transaction_prepare_commit", (PyCFunction)obj_transaction_prepare_commit, METH_NOARGS, + "S.transaction_prepare_commit() -> None\n" + "Prepare to commit the currently active transaction" }, { "transaction_start", (PyCFunction)obj_transaction_start, METH_NOARGS, "S.transaction_start() -> None\n" "Start a new transaction." }, @@ -341,9 +402,17 @@ static PyMethodDef tdb_object_methods[] = { "Check whether key exists in this database." }, { "store", (PyCFunction)obj_store, METH_VARARGS, "S.store(key, data, flag=REPLACE) -> None" "Store data." }, + { "add_flags", (PyCFunction)obj_add_flags, METH_VARARGS, "S.add_flags(flags) -> None" }, + { "remove_flags", (PyCFunction)obj_remove_flags, METH_VARARGS, "S.remove_flags(flags) -> None" }, { "iterkeys", (PyCFunction)tdb_object_iter, METH_NOARGS, "S.iterkeys() -> iterator" }, { "clear", (PyCFunction)obj_clear, METH_NOARGS, "S.clear() -> None\n" "Wipe the entire database." }, + { "repack", (PyCFunction)obj_repack, METH_NOARGS, "S.repack() -> None\n" + "Repack the entire database." }, + { "enable_seqnum", (PyCFunction)obj_enable_seqnum, METH_NOARGS, + "S.enable_seqnum() -> None" }, + { "increment_seqnum_nonblock", (PyCFunction)obj_increment_seqnum_nonblock, METH_NOARGS, + "S.increment_seqnum_nonblock() -> None" }, { NULL } }; @@ -365,6 +434,11 @@ static PyObject *obj_get_map_size(PyTdbObject *self, void *closure) return PyInt_FromLong(tdb_map_size(self->ctx)); } +static PyObject *obj_get_freelist_size(PyTdbObject *self, void *closure) +{ + return PyInt_FromLong(tdb_freelist_size(self->ctx)); +} + static PyObject *obj_get_flags(PyTdbObject *self, void *closure) { return PyInt_FromLong(tdb_get_flags(self->ctx)); @@ -375,18 +449,30 @@ static PyObject *obj_get_filename(PyTdbObject *self, void *closure) return PyString_FromString(tdb_name(self->ctx)); } +static PyObject *obj_get_seqnum(PyTdbObject *self, void *closure) +{ + return PyInt_FromLong(tdb_get_seqnum(self->ctx)); +} + + static PyGetSetDef tdb_object_getsetters[] = { { (char *)"hash_size", (getter)obj_get_hash_size, NULL, NULL }, { (char *)"map_size", (getter)obj_get_map_size, NULL, NULL }, + { (char *)"freelist_size", (getter)obj_get_freelist_size, NULL, NULL }, { (char *)"flags", (getter)obj_get_flags, NULL, NULL }, { (char *)"max_dead", NULL, (setter)obj_set_max_dead, NULL }, { (char *)"filename", (getter)obj_get_filename, NULL, (char *)"The filename of this TDB file."}, + { (char *)"seqnum", (getter)obj_get_seqnum, NULL, NULL }, { NULL } }; static PyObject *tdb_object_repr(PyTdbObject *self) { - return PyString_FromFormat("Tdb('%s')", tdb_name(self->ctx)); + if (tdb_get_flags(self->ctx) & TDB_INTERNAL) { + return PyString_FromString("Tdb(<internal>)"); + } else { + return PyString_FromFormat("Tdb('%s')", tdb_name(self->ctx)); + } } static void tdb_object_dealloc(PyTdbObject *self) @@ -497,8 +583,17 @@ void inittdb(void) PyModule_AddObject(m, "NOMMAP", PyInt_FromLong(TDB_NOMMAP)); PyModule_AddObject(m, "CONVERT", PyInt_FromLong(TDB_CONVERT)); PyModule_AddObject(m, "BIGENDIAN", PyInt_FromLong(TDB_BIGENDIAN)); + PyModule_AddObject(m, "NOSYNC", PyInt_FromLong(TDB_NOSYNC)); + PyModule_AddObject(m, "SEQNUM", PyInt_FromLong(TDB_SEQNUM)); + PyModule_AddObject(m, "VOLATILE", PyInt_FromLong(TDB_VOLATILE)); + PyModule_AddObject(m, "ALLOW_NESTING", PyInt_FromLong(TDB_ALLOW_NESTING)); + PyModule_AddObject(m, "DISALLOW_NESTING", PyInt_FromLong(TDB_DISALLOW_NESTING)); + PyModule_AddObject(m, "INCOMPATIBLE_HASH", PyInt_FromLong(TDB_INCOMPATIBLE_HASH)); + PyModule_AddObject(m, "__docformat__", PyString_FromString("restructuredText")); + PyModule_AddObject(m, "__version__", PyString_FromString(PACKAGE_VERSION)); + Py_INCREF(&PyTdb); PyModule_AddObject(m, "Tdb", (PyObject *)&PyTdb); diff --git a/ctdb/lib/tdb/python/tests/simple.py b/ctdb/lib/tdb/python/tests/simple.py index 1c5982b1a4..615de494b5 100644 --- a/ctdb/lib/tdb/python/tests/simple.py +++ b/ctdb/lib/tdb/python/tests/simple.py @@ -16,6 +16,7 @@ class OpenTdbTests(TestCase): self.assertRaises(IOError, tdb.Tdb, "/some/nonexistant/file", 0, tdb.DEFAULT, os.O_RDWR) class CloseTdbTests(TestCase): + def test_double_close(self): self.tdb = tdb.Tdb(tempfile.mkstemp()[1], 0, tdb.DEFAULT, os.O_CREAT|os.O_RDWR) self.assertNotEqual(None, self.tdb) @@ -25,6 +26,15 @@ class CloseTdbTests(TestCase): self.tdb.close() +class InternalTdbTests(TestCase): + + def test_repr(self): + self.tdb = tdb.Tdb() + + # repr used to crash on internal db + self.assertEquals(repr(self.tdb), "Tdb(<internal>)") + + class SimpleTdbTests(TestCase): def setUp(self): super(SimpleTdbTests, self).setUp() @@ -81,6 +91,9 @@ class SimpleTdbTests(TestCase): def test_map_size(self): self.tdb.map_size + def test_freelist_size(self): + self.tdb.freelist_size + def test_name(self): self.tdb.filename @@ -103,11 +116,13 @@ class SimpleTdbTests(TestCase): self.tdb.transaction_commit() self.assertEquals("1", self.tdb["bloe"]) - def test_iterator(self): + def test_transaction_prepare_commit(self): self.tdb["bloe"] = "2" - self.tdb["bla"] = "hoi" - i = iter(self.tdb) - self.assertEquals(set(["bloe", "bla"]), set([i.next(), i.next()])) + self.tdb.transaction_start() + self.tdb["bloe"] = "1" + self.tdb.transaction_prepare_commit() + self.tdb.transaction_commit() + self.assertEquals("1", self.tdb["bloe"]) def test_iterkeys(self): self.tdb["bloe"] = "2" @@ -122,11 +137,34 @@ class SimpleTdbTests(TestCase): self.tdb.clear() self.assertEquals(0, len(list(self.tdb))) + def test_repack(self): + self.tdb["foo"] = "abc" + self.tdb["bar"] = "def" + del self.tdb["foo"] + self.tdb.repack() + + def test_seqnum(self): + self.tdb.enable_seqnum() + seq1 = self.tdb.seqnum + self.tdb.increment_seqnum_nonblock() + seq2 = self.tdb.seqnum + self.assertEquals(seq2-seq1, 1) + def test_len(self): self.assertEquals(0, len(list(self.tdb))) self.tdb["entry"] = "value" self.assertEquals(1, len(list(self.tdb))) + def test_add_flags(self): + self.tdb.add_flags(tdb.NOMMAP) + self.tdb.remove_flags(tdb.NOMMAP) + + +class VersionTests(TestCase): + + def test_present(self): + self.assertTrue(isinstance(tdb.__version__, str)) + if __name__ == '__main__': import unittest diff --git a/ctdb/lib/tdb/tdb.mk b/ctdb/lib/tdb/tdb.mk index 267c2d1c85..3c52b7305b 100644 --- a/ctdb/lib/tdb/tdb.mk +++ b/ctdb/lib/tdb/tdb.mk @@ -1,7 +1,7 @@ dirs:: @mkdir -p bin common tools -PROGS = bin/tdbtool$(EXEEXT) bin/tdbdump$(EXEEXT) bin/tdbbackup$(EXEEXT) +PROGS = bin/tdbtool$(EXEEXT) bin/tdbrestore$(EXEEXT) bin/tdbdump$(EXEEXT) bin/tdbbackup$(EXEEXT) PROGS_NOINSTALL = bin/tdbtest$(EXEEXT) bin/tdbtorture$(EXEEXT) ALL_PROGS = $(PROGS) $(PROGS_NOINSTALL) @@ -23,6 +23,9 @@ bin/tdbtorture$(EXEEXT): tools/tdbtorture.o $(TDB_LIB) bin/tdbdump$(EXEEXT): tools/tdbdump.o $(TDB_LIB) $(CC) $(CFLAGS) $(LDFLAGS) -o bin/tdbdump tools/tdbdump.o -L. -ltdb +bin/tdbrestore$(EXEEXT): tools/tdbrestore.o $(TDB_LIB) + $(CC) $(CFLAGS) $(LDFLAGS) -o bin/tdbrestore tools/tdbrestore.o -L. -ltdb $(TDB_DEPS) + bin/tdbbackup$(EXEEXT): tools/tdbbackup.o $(TDB_LIB) $(CC) $(CFLAGS) $(LDFLAGS) -o bin/tdbbackup tools/tdbbackup.o -L. -ltdb diff --git a/ctdb/lib/tdb/tools/tdbrestore.c b/ctdb/lib/tdb/tools/tdbrestore.c new file mode 100644 index 0000000000..485c440df1 --- /dev/null +++ b/ctdb/lib/tdb/tools/tdbrestore.c @@ -0,0 +1,226 @@ +/* + tdbrestore -- construct a tdb from tdbdump output. + Copyright (C) Volker Lendecke 2010 + Copyright (C) Simon McVittie 2005 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include <assert.h> +#include "replace.h" +#include "system/locale.h" +#include "system/time.h" +#include "system/filesys.h" +#include "system/wait.h" +#include "tdb.h" + +#define debug_fprintf(file, fmt, ...) do {/*nothing*/} while (0) + +static int read_linehead(FILE *f) +{ + int i, c; + int num_bytes; + char prefix[128]; + + while (1) { + c = getc(f); + if (c == EOF) { + return -1; + } + if (c == '\(') { + break; + } + } + for (i=0; i<sizeof(prefix); i++) { + c = getc(f); + if (c == EOF) { + return -1; + } + prefix[i] = c; + if (c == '"') { + break; + } + } + if (i == sizeof(prefix)) { + return -1; + } + prefix[i] = '\0'; + + if (sscanf(prefix, "%d) = ", &num_bytes) != 1) { + return -1; + } + return num_bytes; +} + +static int read_hex(void) { + int c; + c = getchar(); + if (c == EOF) { + fprintf(stderr, "Unexpected EOF in data\n"); + return -1; + } else if (c == '"') { + fprintf(stderr, "Unexpected \\\" sequence\n"); + return -1; + } else if ('0' <= c && c <= '9') { + return c - '0'; + } else if ('A' <= c && c <= 'F') { + return c - 'A' + 10; + } else if ('a' <= c && c <= 'f') { + return c - 'a' + 10; + } else { + fprintf(stderr, "Invalid hex: %c\n", c); + return -1; + } +} + +static int read_data(FILE *f, TDB_DATA *d, size_t size) { + int c, low, high; + int i; + + d->dptr = (unsigned char *)malloc(size); + if (d->dptr == NULL) { + return -1; + } + d->dsize = size; + + for (i=0; i<size; i++) { + c = getc(f); + if (c == EOF) { + fprintf(stderr, "Unexpected EOF in data\n"); + return 1; + } else if (c == '"') { + return 0; + } else if (c == '\\') { + high = read_hex(); + if (high < 0) { + return -1; + } + high = high << 4; + assert(high == (high & 0xf0)); + low = read_hex(); + if (low < 0) { + return -1; + } + assert(low == (low & 0x0f)); + d->dptr[i] = (low|high); + } else { + d->dptr[i] = c; + } + } + return 0; +} + +static int swallow(FILE *f, const char *s, int *eof) +{ + char line[128]; + + if (fgets(line, sizeof(line), f) == NULL) { + if (eof != NULL) { + *eof = 1; + } + return -1; + } + if (strcmp(line, s) != 0) { + return -1; + } + return 0; +} + +static int read_rec(FILE *f, TDB_CONTEXT *tdb, int *eof) +{ + int length; + TDB_DATA key, data; + int ret = -1; + + key.dptr = NULL; + data.dptr = NULL; + + if (swallow(f, "{\n", eof) == -1) { + goto fail; + } + length = read_linehead(f); + if (length == -1) { + goto fail; + } + if (read_data(f, &key, length) == -1) { + goto fail; + } + if (swallow(f, "\"\n", NULL) == -1) { + goto fail; + } + length = read_linehead(f); + if (length == -1) { + goto fail; + } + if (read_data(f, &data, length) == -1) { + goto fail; + } + if ((swallow(f, "\"\n", NULL) == -1) + || (swallow(f, "}\n", NULL) == -1)) { + goto fail; + } + if (tdb_store(tdb, key, data, TDB_INSERT) == -1) { + fprintf(stderr, "TDB error: %s\n", tdb_errorstr(tdb)); + goto fail; + } + + ret = 0; +fail: + free(key.dptr); + free(data.dptr); + return ret; +} + +static int restore_tdb(const char *fname) +{ + TDB_CONTEXT *tdb; + + tdb = tdb_open(fname, 0, 0, O_RDWR|O_CREAT|O_EXCL, 0666); + if (!tdb) { + perror("tdb_open"); + fprintf(stderr, "Failed to open %s\n", fname); + return 1; + } + + while (1) { + int eof = 0; + if (read_rec(stdin, tdb, &eof) == -1) { + if (eof) { + break; + } + return 1; + } + } + if (tdb_close(tdb)) { + fprintf(stderr, "Error closing tdb\n"); + return 1; + } + fprintf(stderr, "EOF\n"); + return 0; +} + +int main(int argc, char *argv[]) +{ + char *fname; + + if (argc < 2) { + printf("Usage: %s dbname < tdbdump_output\n", argv[0]); + exit(1); + } + + fname = argv[1]; + + return restore_tdb(fname); +} |