From 199a5befb65ebc6ce0153a4e18993a7da97e7527 Mon Sep 17 00:00:00 2001 From: Peter Somogyi Date: Thu, 15 Feb 2007 17:02:38 +0100 Subject: 1st working ib integrated ctdb TODO: bugfix of ctdb_bench (wants to sent via uninitialised connection - see bench_ring/dest) (This used to be ctdb commit 61516461e9d45dc7ba87518d134894fed7d7b7cd) --- ctdb/ib/README.txt | 9 ++++++++ ctdb/ib/ibw_ctdb.c | 55 +++++++++++++++++++++++++++++++++++++++++------- ctdb/ib/ibw_ctdb.h | 5 +++++ ctdb/ib/ibw_ctdb_init.c | 18 ---------------- ctdb/ib/ibwrapper.c | 9 ++++---- ctdb/ib/ibwrapper_test.c | 11 +++++++--- 6 files changed, 74 insertions(+), 33 deletions(-) (limited to 'ctdb/ib') diff --git a/ctdb/ib/README.txt b/ctdb/ib/README.txt index 3e6150afe33..74fc129c352 100644 --- a/ctdb/ib/README.txt +++ b/ctdb/ib/README.txt @@ -9,3 +9,12 @@ After then: ./configure --enable-infiniband +Example for testing +=================== +bin/ctdb_test --transport ib --nlist ../2nodes_rm.txt --listen 10.0.0.1:9001 +bin/ctdb_test --transport ib --nlist ../2nodes_rm.txt --listen 10.0.0.2:9001 + +where 2nodes_rm.txt: +10.0.0.1:9001 +10.0.0.2:9001 + diff --git a/ctdb/ib/ibw_ctdb.c b/ctdb/ib/ibw_ctdb.c index 5152062d246..53293810ca8 100644 --- a/ctdb/ib/ibw_ctdb.c +++ b/ctdb/ib/ibw_ctdb.c @@ -29,6 +29,36 @@ #include "ibwrapper.h" #include "ibw_ctdb.h" +int ctdb_ibw_node_connect(struct ibw_ctx *ictx, struct ctdb_node *node) +{ + struct sockaddr_in sock_out; + + memset(&sock_out, 0, sizeof(struct sockaddr_in)); + inet_pton(AF_INET, node->address.address, &sock_out.sin_addr); + sock_out.sin_port = htons(node->address.port); + sock_out.sin_family = PF_INET; + + if (ibw_connect(ictx, &sock_out, node)) { + DEBUG(0, ("ctdb_ibw_node_connect: ibw_connect failed - retrying in 1 sec...\n")); + /* try again once a second */ + event_add_timed(node->ctdb->ev, node, timeval_current_ofs(1, 0), + ctdb_ibw_node_connect_event, node); + return -1; + } + + /* continues at ibw_ctdb.c/IBWC_CONNECTED in good case */ + return 0; +} + +void ctdb_ibw_node_connect_event(struct event_context *ev, struct timed_event *te, + struct timeval t, void *private) +{ + struct ctdb_node *node = talloc_get_type(private, struct ctdb_node); + struct ibw_ctx *ictx = talloc_get_type(node->ctdb->private, struct ibw_ctx); + + ctdb_ibw_node_connect(ictx, node); +} + int ctdb_ibw_connstate_handler(struct ibw_ctx *ctx, struct ibw_conn *conn) { if (ctx!=NULL) { @@ -76,11 +106,17 @@ int ctdb_ibw_connstate_handler(struct ibw_ctx *ctx, struct ibw_conn *conn) if (node!=NULL) node->ctdb->upcalls->node_dead(node); talloc_free(conn); + /* normal + intended disconnect => not reconnecting in this layer */ } break; case IBWC_ERROR: { -/* struct ctdb_node *node = talloc_get_type(conn->conn_userdata, struct ctdb_node); + struct ctdb_node *node = talloc_get_type(conn->conn_userdata, struct ctdb_node); if (node!=NULL) - node->ctdb->upcalls->node_dead(node);*/ + node->private = NULL; /* not to use again */ + + DEBUG(10, ("IBWC_ERROR, reconnecting immediately...\n")); + talloc_free(conn); + event_add_timed(node->ctdb->ev, node, timeval_current_ofs(1, 0), + ctdb_ibw_node_connect_event, node); } break; default: assert(0); @@ -94,17 +130,20 @@ int ctdb_ibw_connstate_handler(struct ibw_ctx *ctx, struct ibw_conn *conn) int ctdb_ibw_receive_handler(struct ibw_conn *conn, void *buf, int n) { struct ctdb_context *ctdb = talloc_get_type(conn->ctx->ctx_userdata, struct ctdb_context); + void *buf2; /* future TODO: a solution for removal of this */ assert(ctdb!=NULL); + assert(buf!=NULL); + assert(conn!=NULL); assert(conn->state==IBWC_CONNECTED); - /* TODO: shall I short-circuit this in ibwrapper? */ - /* maybe when everything go fine... */ + /* so far "buf" is an ib-registered memory area + * and being reused for next receive + * noticed that HL requires talloc-ed memory to be stolen */ + buf2 = talloc_zero_size(conn, n); + memcpy(buf2, buf, n); - /* TODO2: !!! here I can provide conn->conn_userdata (with no perf. penalty) - - * as struct ctdb_node in case the connection - * has been built up by ibw_connect !!! */ - ctdb->upcalls->recv_pkt(ctdb, (uint8_t *)buf, (uint32_t)n); + ctdb->upcalls->recv_pkt(ctdb, (uint8_t *)buf2, (uint32_t)n); return 0; } diff --git a/ctdb/ib/ibw_ctdb.h b/ctdb/ib/ibw_ctdb.h index c43aca7df15..14308682b21 100644 --- a/ctdb/ib/ibw_ctdb.h +++ b/ctdb/ib/ibw_ctdb.h @@ -23,3 +23,8 @@ int ctdb_ibw_connstate_handler(struct ibw_ctx *ctx, struct ibw_conn *conn); int ctdb_ibw_receive_handler(struct ibw_conn *conn, void *buf, int n); + +int ctdb_ibw_node_connect(struct ibw_ctx *ictx, struct ctdb_node *node); +void ctdb_ibw_node_connect_event(struct event_context *ev, struct timed_event *te, + struct timeval t, void *private); + diff --git a/ctdb/ib/ibw_ctdb_init.c b/ctdb/ib/ibw_ctdb_init.c index a1eaca9202b..78924632292 100644 --- a/ctdb/ib/ibw_ctdb_init.c +++ b/ctdb/ib/ibw_ctdb_init.c @@ -53,24 +53,6 @@ static int ctdb_ibw_listen(struct ctdb_context *ctdb, int backlog) return 0; } -int ctdb_ibw_node_connect(struct ibw_ctx *ictx, struct ctdb_node *node) -{ - struct sockaddr_in sock_out; - - memset(&sock_out, 0, sizeof(struct sockaddr_in)); - inet_pton(AF_INET, node->address.address, &sock_out.sin_addr); - sock_out.sin_port = htons(node->address.port); - sock_out.sin_family = PF_INET; - - if (ibw_connect(ictx, &sock_out, node)) { - DEBUG(0, ("ctdb_ibw_node_connect: ibw_connect failed\n")); - return -1; - } - - /* continues at ibw_ctdb.c/IBWC_CONNECTED in good case */ - return 0; -} - /* * Start infiniband */ diff --git a/ctdb/ib/ibwrapper.c b/ctdb/ib/ibwrapper.c index fef312678a5..f3ef0c4c5cd 100644 --- a/ctdb/ib/ibwrapper.c +++ b/ctdb/ib/ibwrapper.c @@ -775,7 +775,7 @@ static int ibw_wc_recv(struct ibw_conn *conn, struct ibv_wc *wc) if (part->len<=sizeof(uint32_t) && part->to_read==0) { assert(part->len==sizeof(uint32_t)); /* set it again now... */ - part->to_read = ntohl(*((uint32_t *)(part->buf))); + part->to_read = *((uint32_t *)(part->buf)); /* TODO: ntohl */ if (part->to_readto_read); goto error; @@ -791,7 +791,7 @@ static int ibw_wc_recv(struct ibw_conn *conn, struct ibv_wc *wc) } } else { if (remain>=sizeof(uint32_t)) { - uint32_t msglen = ntohl(*(uint32_t *)p); + uint32_t msglen = *(uint32_t *)p; /* TODO: ntohl */ if (msglencm_id)); @@ -1196,7 +1197,7 @@ int ibw_send(struct ibw_conn *conn, void *buf, void *key, uint32_t len) int rc; assert(len>=sizeof(uint32_t)); - *((uint32_t *)buf) = htonl(len); + assert((*((uint32_t *)buf)==len)); /* TODO: htonl */ if (len > pctx->opts.recv_bufsize) { struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv); diff --git a/ctdb/ib/ibwrapper_test.c b/ctdb/ib/ibwrapper_test.c index dc07a1c313b..2fa590588ce 100644 --- a/ctdb/ib/ibwrapper_test.c +++ b/ctdb/ib/ibwrapper_test.c @@ -110,6 +110,7 @@ int ibwtest_send_id(struct ibw_conn *conn) } /* first sizeof(uint32_t) size bytes are for length */ + *((uint32_t *)buf) = len; buf[sizeof(uint32_t)] = (char)TESTOP_SEND_ID; strcpy(buf+sizeof(uint32_t)+1, tcx->id); @@ -137,6 +138,7 @@ int ibwtest_send_test_msg(struct ibwtest_ctx *tcx, struct ibw_conn *conn, const return -1; } + *((uint32_t *)buf) = len; p = buf; p += sizeof(uint32_t); p[0] = (char)TESTOP_SEND_TEXT; @@ -190,6 +192,7 @@ int ibwtest_do_varsize_scenario_conn_size(struct ibwtest_ctx *tcx, struct ibw_co DEBUG(0, ("varsize/ibw_alloc_send_buf failed\n")); return -1; } + *((uint32_t *)buf) = len; buf[sizeof(uint32_t)] = TESTOP_SEND_RND; sum = ibwtest_fill_random(buf + sizeof(uint32_t) + 1, size); buf[sizeof(uint32_t) + 1 + size] = sum; @@ -329,7 +332,7 @@ int ibwtest_receive_handler(struct ibw_conn *conn, void *buf, int n) DEBUG(0, ("ERROR: checksum mismatch %u!=%u\n", (uint32_t)sum, (uint32_t)((unsigned char *)buf)[n-1])); ibw_stop(tcx->ibwctx); - return -3; + goto error; } } else { char *buf2; @@ -338,12 +341,12 @@ int ibwtest_receive_handler(struct ibw_conn *conn, void *buf, int n) /* bounce message regardless what it is */ if (ibw_alloc_send_buf(conn, (void **)&buf2, &key2, n)) { fprintf(stderr, "ibw_alloc_send_buf error #2\n"); - return -1; + goto error; } memcpy(buf2, buf, n); if (ibw_send(conn, buf2, key2, n)) { fprintf(stderr, "ibw_send error #2\n"); - return -2; + goto error; } tcx->nsent++; } @@ -368,6 +371,8 @@ int ibwtest_receive_handler(struct ibw_conn *conn, void *buf, int n) tcx->error = rc; return rc; +error: + return -1; } void ibwtest_timeout_handler(struct event_context *ev, struct timed_event *te, -- cgit