summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrew Tridgell <tridge@samba.org>2006-11-28 20:48:34 +1100
committerAndrew Tridgell <tridge@samba.org>2006-11-28 20:48:34 +1100
commitfcae7fb9ca397df7f69d099b5dfa40d1f3d21de8 (patch)
treea476fceca3959170fc1d7f5072590110816a4111
parentfdb317facfdb60c87695b557a4680401c210031a (diff)
downloadsamba-fcae7fb9ca397df7f69d099b5dfa40d1f3d21de8.tar.gz
samba-fcae7fb9ca397df7f69d099b5dfa40d1f3d21de8.tar.xz
samba-fcae7fb9ca397df7f69d099b5dfa40d1f3d21de8.zip
- added in idtree for efficient reqid handling
- started adding ctdb_call() code - added ctdb_call_local() implementation (This used to be ctdb commit 97b1fdf7fa0e230f36add3f1770ecb3a9faee0a1)
-rw-r--r--ctdb/Makefile.in4
-rw-r--r--ctdb/common/ctdb.c1
-rw-r--r--ctdb/common/ctdb_call.c82
-rw-r--r--ctdb/ctdb_test.c1
-rw-r--r--ctdb/include/ctdb_private.h5
-rw-r--r--ctdb/include/idtree.h7
-rw-r--r--ctdb/include/includes.h1
-rw-r--r--ctdb/lib/util/idtree.c374
8 files changed, 472 insertions, 3 deletions
diff --git a/ctdb/Makefile.in b/ctdb/Makefile.in
index 66369ae03c..4bb29c7440 100644
--- a/ctdb/Makefile.in
+++ b/ctdb/Makefile.in
@@ -18,10 +18,10 @@ CFLAGS=-g -I$(srcdir)/include -Iinclude -I$(srcdir) \
LIB_FLAGS=@LDFLAGS@ -Llib @LIBS@ -lpopt
-EVENTS_OBJ = lib/events/events.o lib/events/events_standard.o
+EVENTS_OBJ = lib/events/events.o lib/events/events_standard.o
CTDB_COMMON_OBJ = common/ctdb.o common/util.o common/ctdb_util.o \
- common/ctdb_call.o common/ctdb_ltdb.o
+ common/ctdb_call.o common/ctdb_ltdb.o lib/util/idtree.o
CTDB_TCP_OBJ = tcp/tcp_connect.o tcp/tcp_io.o tcp/tcp_init.o
diff --git a/ctdb/common/ctdb.c b/ctdb/common/ctdb.c
index 0277135f36..6ab4b16702 100644
--- a/ctdb/common/ctdb.c
+++ b/ctdb/common/ctdb.c
@@ -187,6 +187,7 @@ struct ctdb_context *ctdb_init(struct event_context *ev)
ctdb = talloc_zero(ev, struct ctdb_context);
ctdb->ev = ev;
ctdb->upcalls = &ctdb_upcalls;
+ ctdb->idr = idr_init(ctdb);
return ctdb;
}
diff --git a/ctdb/common/ctdb_call.c b/ctdb/common/ctdb_call.c
index 9688ffda73..f16c5705f4 100644
--- a/ctdb/common/ctdb_call.c
+++ b/ctdb/common/ctdb_call.c
@@ -31,6 +31,56 @@
static int ctdb_call_local(struct ctdb_context *ctdb, TDB_DATA key, int call_id,
TDB_DATA *call_data, TDB_DATA *reply_data)
{
+ struct ctdb_call *c;
+ struct ctdb_registered_call *fn;
+ TDB_DATA data;
+
+ c = talloc(ctdb, struct ctdb_call);
+ CTDB_NO_MEMORY(ctdb, c);
+
+ data = tdb_fetch(ctdb->ltdb, key);
+
+ c->key = key;
+ c->call_data = call_data;
+ c->record_data.dptr = talloc_memdup(c, data.dptr, data.dsize);
+ CTDB_NO_MEMORY(ctdb, c->record_data.dptr);
+ if (data.dptr) free(data.dptr);
+ c->new_data = NULL;
+ c->reply_data = NULL;
+
+ for (fn=ctdb->calls;fn;fn=fn->next) {
+ if (fn->id == call_id) break;
+ }
+ if (fn == NULL) {
+ ctdb_set_error(ctdb, "Unknown call id %u\n", call_id);
+ return -1;
+ }
+
+ if (fn->fn(c) != 0) {
+ free(c->record_data.dptr);
+ ctdb_set_error(ctdb, "ctdb_call %u failed\n", call_id);
+ return -1;
+ }
+
+ if (c->new_data) {
+ if (tdb_store(ctdb->ltdb, key, *c->new_data, TDB_REPLACE) != 0) {
+ ctdb_set_error(ctdb, "ctdb_call tdb_store failed\n");
+ return -1;
+ }
+ }
+
+ if (reply_data) {
+ if (c->reply_data) {
+ *reply_data = *c->reply_data;
+ talloc_steal(ctdb, reply_data->dptr);
+ } else {
+ reply_data->dptr = NULL;
+ reply_data->dsize = 0;
+ }
+ }
+
+ talloc_free(c);
+
return -1;
}
@@ -41,13 +91,43 @@ int ctdb_call(struct ctdb_context *ctdb, TDB_DATA key, int call_id,
TDB_DATA *call_data, TDB_DATA *reply_data)
{
uint32_t dest;
+ struct ctdb_req_call *c;
+ uint32_t len;
+ struct ctdb_node *node;
dest = ctdb_hash(&key) % ctdb->num_nodes;
if (dest == ctdb->vnn) {
return ctdb_call_local(ctdb, key, call_id, call_data, reply_data);
}
-
+ len = sizeof(*c) + key.dsize + (call_data?call_data->dsize:0);
+ c = talloc_size(ctdb, len);
+ CTDB_NO_MEMORY(ctdb, c);
+
+ c->hdr.operation = CTDB_OP_CALL;
+ c->hdr.destnode = dest;
+ c->hdr.srcnode = ctdb->vnn;
+ /* this limits us to 16k outstanding messages - not unreasonable */
+ c->hdr.reqid = idr_get_new(ctdb->idr, c, 0xFFFF);
+ c->callid = call_id;
+ c->keylen = key.dsize;
+ c->calldatalen = call_data?call_data->dsize:0;
+ memcpy(&c->data[0], key.dptr, key.dsize);
+ if (call_data) {
+ memcpy(&c->data[key.dsize], call_data->dptr, call_data->dsize);
+ }
+
+ node = ctdb->nodes[dest];
+
+ if (ctdb->methods->queue_pkt(node, (uint8_t *)c, len) != 0) {
+ talloc_free(c);
+ return -1;
+ }
+
+ /*
+ event_add_timed(ctdb->ev, c, timeval_current_ofs(CTDB_REQ_TIMEOUT, 0),
+ ctdb_call_timeout, c);
+ */
return -1;
}
diff --git a/ctdb/ctdb_test.c b/ctdb/ctdb_test.c
index c30caa7ec7..452fa558b1 100644
--- a/ctdb/ctdb_test.c
+++ b/ctdb/ctdb_test.c
@@ -190,6 +190,7 @@ int main(int argc, const char *argv[])
for (i=0;i<data.dsize/sizeof(int);i++) {
printf("%3d\n", ((int *)data.dptr)[i]);
}
+ talloc_free(data.dptr);
/* shut it down */
talloc_free(ctdb);
diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h
index d4e7350bd7..582739f617 100644
--- a/ctdb/include/ctdb_private.h
+++ b/ctdb/include/ctdb_private.h
@@ -78,6 +78,7 @@ struct ctdb_context {
const char *name;
uint32_t vnn; /* our own vnn */
uint32_t num_nodes;
+ struct idr_context *idr;
struct ctdb_node **nodes; /* array of nodes in the cluster - indexed by vnn */
struct ctdb_registered_call *calls; /* list of registered calls */
char *err_msg;
@@ -91,6 +92,10 @@ struct ctdb_context {
ctdb_set_error(ctdb, "Out of memory at %s:%d", __FILE__, __LINE__); \
return -1; }} while (0)
+/* arbitrary maximum timeout for ctdb operations */
+#define CTDB_REQ_TIMEOUT 10
+
+
/*
operation IDs
*/
diff --git a/ctdb/include/idtree.h b/ctdb/include/idtree.h
new file mode 100644
index 0000000000..259af91005
--- /dev/null
+++ b/ctdb/include/idtree.h
@@ -0,0 +1,7 @@
+struct idr_context *idr_init(TALLOC_CTX *mem_ctx);
+int idr_get_new(struct idr_context *idp, void *ptr, int limit);
+int idr_get_new_above(struct idr_context *idp, void *ptr, int starting_id, int limit);
+int idr_get_new_random(struct idr_context *idp, void *ptr, int limit);
+void *idr_find(struct idr_context *idp, int id);
+int idr_remove(struct idr_context *idp, int id);
+
diff --git a/ctdb/include/includes.h b/ctdb/include/includes.h
index 70632a7e0d..b160e1f739 100644
--- a/ctdb/include/includes.h
+++ b/ctdb/include/includes.h
@@ -3,6 +3,7 @@
#include "replace.h"
#include "talloc.h"
#include "tdb.h"
+#include "idtree.h"
#include "ctdb.h"
#include "lib/util/dlinklist.h"
diff --git a/ctdb/lib/util/idtree.c b/ctdb/lib/util/idtree.c
new file mode 100644
index 0000000000..b275ae4519
--- /dev/null
+++ b/ctdb/lib/util/idtree.c
@@ -0,0 +1,374 @@
+/*
+ Unix SMB/CIFS implementation.
+
+ very efficient functions to manage mapping a id (such as a fnum) to
+ a pointer. This is used for fnum and search id allocation.
+
+ Copyright (C) Andrew Tridgell 2004
+
+ This code is derived from lib/idr.c in the 2.6 Linux kernel, which was
+ written by Jim Houston jim.houston@ccur.com, and is
+ Copyright (C) 2002 by Concurrent Computer Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+/*
+ see the section marked "public interface" below for documentation
+*/
+
+/**
+ * @file
+ */
+
+#include "includes.h"
+
+#define IDR_BITS 5
+#define IDR_FULL 0xfffffffful
+#if 0 /* unused */
+#define TOP_LEVEL_FULL (IDR_FULL >> 30)
+#endif
+#define IDR_SIZE (1 << IDR_BITS)
+#define IDR_MASK ((1 << IDR_BITS)-1)
+#define MAX_ID_SHIFT (sizeof(int)*8 - 1)
+#define MAX_ID_BIT (1U << MAX_ID_SHIFT)
+#define MAX_ID_MASK (MAX_ID_BIT - 1)
+#define MAX_LEVEL (MAX_ID_SHIFT + IDR_BITS - 1) / IDR_BITS
+#define IDR_FREE_MAX MAX_LEVEL + MAX_LEVEL
+
+#define set_bit(bit, v) (v) |= (1<<(bit))
+#define clear_bit(bit, v) (v) &= ~(1<<(bit))
+#define test_bit(bit, v) ((v) & (1<<(bit)))
+
+struct idr_layer {
+ uint32_t bitmap;
+ struct idr_layer *ary[IDR_SIZE];
+ int count;
+};
+
+struct idr_context {
+ struct idr_layer *top;
+ struct idr_layer *id_free;
+ int layers;
+ int id_free_cnt;
+};
+
+static struct idr_layer *alloc_layer(struct idr_context *idp)
+{
+ struct idr_layer *p;
+
+ if (!(p = idp->id_free))
+ return NULL;
+ idp->id_free = p->ary[0];
+ idp->id_free_cnt--;
+ p->ary[0] = NULL;
+ return p;
+}
+
+static int find_next_bit(uint32_t bm, int maxid, int n)
+{
+ while (n<maxid && !test_bit(n, bm)) n++;
+ return n;
+}
+
+static void free_layer(struct idr_context *idp, struct idr_layer *p)
+{
+ p->ary[0] = idp->id_free;
+ idp->id_free = p;
+ idp->id_free_cnt++;
+}
+
+static int idr_pre_get(struct idr_context *idp)
+{
+ while (idp->id_free_cnt < IDR_FREE_MAX) {
+ struct idr_layer *new = talloc_zero(idp, struct idr_layer);
+ if(new == NULL)
+ return (0);
+ free_layer(idp, new);
+ }
+ return 1;
+}
+
+static int sub_alloc(struct idr_context *idp, void *ptr, int *starting_id)
+{
+ int n, m, sh;
+ struct idr_layer *p, *new;
+ struct idr_layer *pa[MAX_LEVEL];
+ int l, id;
+ uint32_t bm;
+
+ memset(pa, 0, sizeof(pa));
+
+ id = *starting_id;
+ p = idp->top;
+ l = idp->layers;
+ pa[l--] = NULL;
+ while (1) {
+ /*
+ * We run around this while until we reach the leaf node...
+ */
+ n = (id >> (IDR_BITS*l)) & IDR_MASK;
+ bm = ~p->bitmap;
+ m = find_next_bit(bm, IDR_SIZE, n);
+ if (m == IDR_SIZE) {
+ /* no space available go back to previous layer. */
+ l++;
+ id = (id | ((1 << (IDR_BITS*l))-1)) + 1;
+ if (!(p = pa[l])) {
+ *starting_id = id;
+ return -2;
+ }
+ continue;
+ }
+ if (m != n) {
+ sh = IDR_BITS*l;
+ id = ((id >> sh) ^ n ^ m) << sh;
+ }
+ if ((id >= MAX_ID_BIT) || (id < 0))
+ return -1;
+ if (l == 0)
+ break;
+ /*
+ * Create the layer below if it is missing.
+ */
+ if (!p->ary[m]) {
+ if (!(new = alloc_layer(idp)))
+ return -1;
+ p->ary[m] = new;
+ p->count++;
+ }
+ pa[l--] = p;
+ p = p->ary[m];
+ }
+ /*
+ * We have reached the leaf node, plant the
+ * users pointer and return the raw id.
+ */
+ p->ary[m] = (struct idr_layer *)ptr;
+ set_bit(m, p->bitmap);
+ p->count++;
+ /*
+ * If this layer is full mark the bit in the layer above
+ * to show that this part of the radix tree is full.
+ * This may complete the layer above and require walking
+ * up the radix tree.
+ */
+ n = id;
+ while (p->bitmap == IDR_FULL) {
+ if (!(p = pa[++l]))
+ break;
+ n = n >> IDR_BITS;
+ set_bit((n & IDR_MASK), p->bitmap);
+ }
+ return(id);
+}
+
+static int idr_get_new_above_int(struct idr_context *idp, void *ptr, int starting_id)
+{
+ struct idr_layer *p, *new;
+ int layers, v, id;
+
+ idr_pre_get(idp);
+
+ id = starting_id;
+build_up:
+ p = idp->top;
+ layers = idp->layers;
+ if (!p) {
+ if (!(p = alloc_layer(idp)))
+ return -1;
+ layers = 1;
+ }
+ /*
+ * Add a new layer to the top of the tree if the requested
+ * id is larger than the currently allocated space.
+ */
+ while ((layers < MAX_LEVEL) && (id >= (1 << (layers*IDR_BITS)))) {
+ layers++;
+ if (!p->count)
+ continue;
+ if (!(new = alloc_layer(idp))) {
+ /*
+ * The allocation failed. If we built part of
+ * the structure tear it down.
+ */
+ for (new = p; p && p != idp->top; new = p) {
+ p = p->ary[0];
+ new->ary[0] = NULL;
+ new->bitmap = new->count = 0;
+ free_layer(idp, new);
+ }
+ return -1;
+ }
+ new->ary[0] = p;
+ new->count = 1;
+ if (p->bitmap == IDR_FULL)
+ set_bit(0, new->bitmap);
+ p = new;
+ }
+ idp->top = p;
+ idp->layers = layers;
+ v = sub_alloc(idp, ptr, &id);
+ if (v == -2)
+ goto build_up;
+ return(v);
+}
+
+static int sub_remove(struct idr_context *idp, int shift, int id)
+{
+ struct idr_layer *p = idp->top;
+ struct idr_layer **pa[MAX_LEVEL];
+ struct idr_layer ***paa = &pa[0];
+ int n;
+
+ *paa = NULL;
+ *++paa = &idp->top;
+
+ while ((shift > 0) && p) {
+ n = (id >> shift) & IDR_MASK;
+ clear_bit(n, p->bitmap);
+ *++paa = &p->ary[n];
+ p = p->ary[n];
+ shift -= IDR_BITS;
+ }
+ n = id & IDR_MASK;
+ if (p != NULL && test_bit(n, p->bitmap)) {
+ clear_bit(n, p->bitmap);
+ p->ary[n] = NULL;
+ while(*paa && ! --((**paa)->count)){
+ free_layer(idp, **paa);
+ **paa-- = NULL;
+ }
+ if ( ! *paa )
+ idp->layers = 0;
+ return 0;
+ }
+ return -1;
+}
+
+static void *_idr_find(struct idr_context *idp, int id)
+{
+ int n;
+ struct idr_layer *p;
+
+ n = idp->layers * IDR_BITS;
+ p = idp->top;
+ /*
+ * This tests to see if bits outside the current tree are
+ * present. If so, tain't one of ours!
+ */
+ if ((id & ~(~0 << MAX_ID_SHIFT)) >> (n + IDR_BITS))
+ return NULL;
+
+ /* Mask off upper bits we don't use for the search. */
+ id &= MAX_ID_MASK;
+
+ while (n >= IDR_BITS && p) {
+ n -= IDR_BITS;
+ p = p->ary[(id >> n) & IDR_MASK];
+ }
+ return((void *)p);
+}
+
+static int _idr_remove(struct idr_context *idp, int id)
+{
+ struct idr_layer *p;
+
+ /* Mask off upper bits we don't use for the search. */
+ id &= MAX_ID_MASK;
+
+ if (sub_remove(idp, (idp->layers - 1) * IDR_BITS, id) == -1) {
+ return -1;
+ }
+
+ if ( idp->top && idp->top->count == 1 &&
+ (idp->layers > 1) &&
+ idp->top->ary[0]) {
+ /* We can drop a layer */
+ p = idp->top->ary[0];
+ idp->top->bitmap = idp->top->count = 0;
+ free_layer(idp, idp->top);
+ idp->top = p;
+ --idp->layers;
+ }
+ while (idp->id_free_cnt >= IDR_FREE_MAX) {
+ p = alloc_layer(idp);
+ talloc_free(p);
+ }
+ return 0;
+}
+
+/************************************************************************
+ this is the public interface
+**************************************************************************/
+
+/**
+ initialise a idr tree. The context return value must be passed to
+ all subsequent idr calls. To destroy the idr tree use talloc_free()
+ on this context
+ */
+_PUBLIC_ struct idr_context *idr_init(TALLOC_CTX *mem_ctx)
+{
+ return talloc_zero(mem_ctx, struct idr_context);
+}
+
+/**
+ allocate the next available id, and assign 'ptr' into its slot.
+ you can retrieve later this pointer using idr_find()
+*/
+_PUBLIC_ int idr_get_new(struct idr_context *idp, void *ptr, int limit)
+{
+ int ret = idr_get_new_above_int(idp, ptr, 0);
+ if (ret > limit) {
+ idr_remove(idp, ret);
+ return -1;
+ }
+ return ret;
+}
+
+/**
+ allocate a new id, giving the first available value greater than or
+ equal to the given starting id
+*/
+_PUBLIC_ int idr_get_new_above(struct idr_context *idp, void *ptr, int starting_id, int limit)
+{
+ int ret = idr_get_new_above_int(idp, ptr, starting_id);
+ if (ret > limit) {
+ idr_remove(idp, ret);
+ return -1;
+ }
+ return ret;
+}
+
+/**
+ find a pointer value previously set with idr_get_new given an id
+*/
+_PUBLIC_ void *idr_find(struct idr_context *idp, int id)
+{
+ return _idr_find(idp, id);
+}
+
+/**
+ remove an id from the idr tree
+*/
+_PUBLIC_ int idr_remove(struct idr_context *idp, int id)
+{
+ int ret;
+ ret = _idr_remove((struct idr_context *)idp, id);
+ if (ret != 0) {
+ DEBUG(0,("WARNING: attempt to remove unset id %d in idtree\n", id));
+ }
+ return ret;
+}