diff options
author | Mohammed Rafi KC <rkavunga@redhat.com> | 2014-12-22 15:16:43 +0530 |
---|---|---|
committer | Raghavendra G <rgowdapp@redhat.com> | 2015-02-12 05:03:50 -0800 |
commit | b8ed8da853ac60a19a2099f0c1de21f50fe34fe4 (patch) | |
tree | e76122e5344559a925071ef5d0ab13e2c1e41340 | |
parent | 60361d84a03048b0882935fb92c19ac3b68f9325 (diff) | |
download | glusterfs-b8ed8da853ac60a19a2099f0c1de21f50fe34fe4.tar.gz glusterfs-b8ed8da853ac60a19a2099f0c1de21f50fe34fe4.tar.xz glusterfs-b8ed8da853ac60a19a2099f0c1de21f50fe34fe4.zip |
rdma: post multiple work request in a single call.
ibv_post-send will allow to send multiple work request
in a single call posting as linked list.
So if the payload count > 1, we can perform the data
operation in a single call to ibv_post_send.
Change-Id: Ib2e485cbbe6887919109e73e17d4fab595d5e65e
BUG: 1164079
Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
Reviewed-on: http://review.gluster.org/9327
Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
Tested-by: Raghavendra G <rgowdapp@redhat.com>
-rw-r--r-- | libglusterfs/src/mem-types.h | 1 | ||||
-rw-r--r-- | rpc/rpc-transport/rdma/src/rdma.c | 120 |
2 files changed, 66 insertions, 55 deletions
diff --git a/libglusterfs/src/mem-types.h b/libglusterfs/src/mem-types.h index 388b8dedfd..dd24913278 100644 --- a/libglusterfs/src/mem-types.h +++ b/libglusterfs/src/mem-types.h @@ -126,6 +126,7 @@ enum gf_common_mem_types_ { gf_common_mt_strfd_data_t = 110, gf_common_mt_regex_t = 111, gf_common_mt_ereg = 112, + gf_common_mt_wr = 113, gf_common_mt_end }; #endif diff --git a/rpc/rpc-transport/rdma/src/rdma.c b/rpc/rpc-transport/rdma/src/rdma.c index 599003ea9a..3b50c1312b 100644 --- a/rpc/rpc-transport/rdma/src/rdma.c +++ b/rpc/rpc-transport/rdma/src/rdma.c @@ -3329,57 +3329,17 @@ gf_rdma_decode_header (gf_rdma_peer_t *peer, gf_rdma_post_t *post, int32_t -__gf_rdma_read (gf_rdma_peer_t *peer, gf_rdma_post_t *post, struct iovec *to, - gf_rdma_read_chunk_t *readch) -{ - int32_t ret = -1; - struct ibv_sge list = {0, }; - struct ibv_send_wr wr = {0, }, *bad_wr = NULL; - - ret = __gf_rdma_register_local_mr_for_rdma (peer, to, 1, &post->ctx); - if (ret == -1) { - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, - "registering local memory for rdma read failed"); - goto out; - } - - list.addr = (unsigned long) to->iov_base; - list.length = to->iov_len; - list.lkey = post->ctx.mr[post->ctx.mr_count - 1]->lkey; - - wr.wr_id = (unsigned long) gf_rdma_post_ref (post); - wr.sg_list = &list; - wr.num_sge = 1; - wr.opcode = IBV_WR_RDMA_READ; - wr.send_flags = IBV_SEND_SIGNALED; - wr.wr.rdma.remote_addr = readch->rc_target.rs_offset; - wr.wr.rdma.rkey = readch->rc_target.rs_handle; - - ret = ibv_post_send (peer->qp, &wr, &bad_wr); - if (ret) { - gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, - "rdma read from client " - "(%s) failed with ret = %d (%s)", - peer->trans->peerinfo.identifier, - ret, (ret > 0) ? strerror (ret) : ""); - ret = -1; - gf_rdma_post_unref (post); - } -out: - return ret; -} - - -int32_t gf_rdma_do_reads (gf_rdma_peer_t *peer, gf_rdma_post_t *post, gf_rdma_read_chunk_t *readch) { - int32_t ret = -1, i = 0, count = 0; - size_t size = 0; - char *ptr = NULL; - struct iobuf *iobuf = NULL; - gf_rdma_private_t *priv = NULL; - + int32_t ret = -1, i = 0, count = 0; + size_t size = 0; + char *ptr = NULL; + struct iobuf *iobuf = NULL; + gf_rdma_private_t *priv = NULL; + struct ibv_sge *list = NULL; + struct ibv_send_wr *wr = NULL, *bad_wr = NULL; + int total_ref = 0; priv = peer->trans->private; for (i = 0; readch[i].rc_discrim != 0; i++) { @@ -3394,7 +3354,7 @@ gf_rdma_do_reads (gf_rdma_peer_t *peer, gf_rdma_post_t *post, } post->ctx.gf_rdma_reads = i; - + i = 0; iobuf = iobuf_get2 (peer->trans->ctx->iobuf_pool, size); if (iobuf == NULL) { goto out; @@ -3424,32 +3384,82 @@ gf_rdma_do_reads (gf_rdma_peer_t *peer, gf_rdma_post_t *post, goto unlock; } + list = GF_CALLOC (post->ctx.gf_rdma_reads, + sizeof (struct ibv_sge), gf_common_mt_sge); + wr = GF_CALLOC (post->ctx.gf_rdma_reads, + sizeof (struct ibv_send_wr), gf_common_mt_wr); for (i = 0; readch[i].rc_discrim != 0; i++) { count = post->ctx.count++; post->ctx.vector[count].iov_base = ptr; post->ctx.vector[count].iov_len = readch[i].rc_target.rs_length; - ret = __gf_rdma_read (peer, post, - &post->ctx.vector[count], - &readch[i]); + ret = __gf_rdma_register_local_mr_for_rdma (peer, + &post->ctx.vector[count], 1, &post->ctx); if (ret == -1) { gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, - "rdma read from peer (%s) failed", - peer->trans->peerinfo.identifier); + "registering local memory" + " for rdma read failed"); goto unlock; } + list[i].addr = (unsigned long) + post->ctx.vector[count].iov_base; + list[i].length = post->ctx.vector[count].iov_len; + list[i].lkey = + post->ctx.mr[post->ctx.mr_count - 1]->lkey; + + wr[i].wr_id = + (unsigned long) gf_rdma_post_ref (post); + wr[i].sg_list = &list[i]; + wr[i].next = &wr[i+1]; + wr[i].num_sge = 1; + wr[i].opcode = IBV_WR_RDMA_READ; + wr[i].send_flags = IBV_SEND_SIGNALED; + wr[i].wr.rdma.remote_addr = + readch->rc_target.rs_offset; + wr[i].wr.rdma.rkey = readch->rc_target.rs_handle; + ptr += readch[i].rc_target.rs_length; + total_ref++; + } + wr[i-1].next = NULL; + ret = ibv_post_send (peer->qp, wr, &bad_wr); + if (ret) { + gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, + "rdma read from client " + "(%s) failed with ret = %d (%s)", + peer->trans->peerinfo.identifier, + ret, (ret > 0) ? strerror (ret) : ""); + + if (!bad_wr) { + ret = -1; + goto out; + } + + for (i = 0; i < post->ctx.gf_rdma_reads; i++) { + if (&wr[i] != bad_wr) + total_ref--; + else + break; + } + + ret = -1; } - ret = 0; } unlock: pthread_mutex_unlock (&priv->write_mutex); out: + if (list) + GF_FREE (list); + if (wr) + GF_FREE (wr); if (ret == -1) { + while (total_ref-- > 0) + gf_rdma_post_unref (post); + if (iobuf != NULL) { iobuf_unref (iobuf); } |