From bf31a1a02eb28d9bda0bb74345df7889faeb7335 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 13 May 2009 16:52:40 -0700 Subject: IB/ehca: Replace vmalloc() with kmalloc() for queue allocation To improve performance of driver resource allocation, replace vmalloc() calls with kmalloc(). Signed-off-by: Stefan Roscher Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ipz_pt_fn.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c index c3a32846543..a2605593ae7 100644 --- a/drivers/infiniband/hw/ehca/ipz_pt_fn.c +++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c @@ -220,7 +220,7 @@ int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue, queue->small_page = NULL; /* allocate queue page pointers */ - queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *)); + queue->queue_pages = kmalloc(nr_of_pages * sizeof(void *), GFP_KERNEL); if (!queue->queue_pages) { ehca_gen_err("Couldn't allocate queue page list"); return 0; @@ -240,7 +240,7 @@ int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue, ipz_queue_ctor_exit0: ehca_gen_err("Couldn't alloc pages queue=%p " "nr_of_pages=%x", queue, nr_of_pages); - vfree(queue->queue_pages); + kfree(queue->queue_pages); return 0; } @@ -262,7 +262,7 @@ int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue) free_page((unsigned long)queue->queue_pages[i]); } - vfree(queue->queue_pages); + kfree(queue->queue_pages); return 1; } -- cgit From c94f156f63c835ffc02b686f9d4238b106f31a5d Mon Sep 17 00:00:00 2001 From: Stefan Roscher Date: Wed, 13 May 2009 16:52:42 -0700 Subject: IB/ehca: Fall back to vmalloc() for big allocations In case of large queue pairs there is the possibillity of allocation failures due to memory fragmentation when using kmalloc(). To ensure the memory is allocated even if kmalloc() can not find chunks which are big enough, we fall back to allocating the memory with vmalloc(). Signed-off-by: Stefan Roscher Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ipz_pt_fn.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c index a2605593ae7..1227c593627 100644 --- a/drivers/infiniband/hw/ehca/ipz_pt_fn.c +++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c @@ -222,8 +222,11 @@ int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue, /* allocate queue page pointers */ queue->queue_pages = kmalloc(nr_of_pages * sizeof(void *), GFP_KERNEL); if (!queue->queue_pages) { - ehca_gen_err("Couldn't allocate queue page list"); - return 0; + queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *)); + if (!queue->queue_pages) { + ehca_gen_err("Couldn't allocate queue page list"); + return 0; + } } memset(queue->queue_pages, 0, nr_of_pages * sizeof(void *)); @@ -240,7 +243,10 @@ int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue, ipz_queue_ctor_exit0: ehca_gen_err("Couldn't alloc pages queue=%p " "nr_of_pages=%x", queue, nr_of_pages); - kfree(queue->queue_pages); + if (is_vmalloc_addr(queue->queue_pages)) + vfree(queue->queue_pages); + else + kfree(queue->queue_pages); return 0; } @@ -262,7 +268,10 @@ int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue) free_page((unsigned long)queue->queue_pages[i]); } - kfree(queue->queue_pages); + if (is_vmalloc_addr(queue->queue_pages)) + vfree(queue->queue_pages); + else + kfree(queue->queue_pages); return 1; } -- cgit From 1988d1fa1a9d642c5714a6afc9775fba0627f3ed Mon Sep 17 00:00:00 2001 From: Stefan Roscher Date: Wed, 13 May 2009 16:52:43 -0700 Subject: IB/ehca: Remove unnecessary memory operations for userspace queue pairs The queue map for flush completion circumvention is only used for kernel space queue pairs. This patch skips the allocation of the queue maps in case the QP is created for userspace. In addition, this patch does not iomap the galpas for kernel usage if the queue pair is only used in userspace. These changes will improve the performance of creation of userspace queue pairs. Signed-off-by: Stefan Roscher Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_qp.c | 94 ++++++++++++++++++++--------------- drivers/infiniband/hw/ehca/hcp_if.c | 6 +-- drivers/infiniband/hw/ehca/hcp_if.h | 2 +- drivers/infiniband/hw/ehca/hcp_phyp.c | 11 ++-- drivers/infiniband/hw/ehca/hcp_phyp.h | 2 +- 5 files changed, 65 insertions(+), 50 deletions(-) diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index 00c10815971..ead4e718c08 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -461,7 +461,7 @@ static struct ehca_qp *internal_create_qp( ib_device); struct ib_ucontext *context = NULL; u64 h_ret; - int is_llqp = 0, has_srq = 0; + int is_llqp = 0, has_srq = 0, is_user = 0; int qp_type, max_send_sge, max_recv_sge, ret; /* h_call's out parameters */ @@ -609,9 +609,6 @@ static struct ehca_qp *internal_create_qp( } } - if (pd->uobject && udata) - context = pd->uobject->context; - my_qp = kmem_cache_zalloc(qp_cache, GFP_KERNEL); if (!my_qp) { ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd); @@ -619,6 +616,11 @@ static struct ehca_qp *internal_create_qp( return ERR_PTR(-ENOMEM); } + if (pd->uobject && udata) { + is_user = 1; + context = pd->uobject->context; + } + atomic_set(&my_qp->nr_events, 0); init_waitqueue_head(&my_qp->wait_completion); spin_lock_init(&my_qp->spinlock_s); @@ -707,7 +709,7 @@ static struct ehca_qp *internal_create_qp( (parms.squeue.is_small || parms.rqueue.is_small); } - h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms); + h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms, is_user); if (h_ret != H_SUCCESS) { ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lli", h_ret); @@ -769,18 +771,20 @@ static struct ehca_qp *internal_create_qp( goto create_qp_exit2; } - my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length / - my_qp->ipz_squeue.qe_size; - my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries * - sizeof(struct ehca_qmap_entry)); - if (!my_qp->sq_map.map) { - ehca_err(pd->device, "Couldn't allocate squeue " - "map ret=%i", ret); - goto create_qp_exit3; + if (!is_user) { + my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length / + my_qp->ipz_squeue.qe_size; + my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries * + sizeof(struct ehca_qmap_entry)); + if (!my_qp->sq_map.map) { + ehca_err(pd->device, "Couldn't allocate squeue " + "map ret=%i", ret); + goto create_qp_exit3; + } + INIT_LIST_HEAD(&my_qp->sq_err_node); + /* to avoid the generation of bogus flush CQEs */ + reset_queue_map(&my_qp->sq_map); } - INIT_LIST_HEAD(&my_qp->sq_err_node); - /* to avoid the generation of bogus flush CQEs */ - reset_queue_map(&my_qp->sq_map); } if (HAS_RQ(my_qp)) { @@ -792,20 +796,21 @@ static struct ehca_qp *internal_create_qp( "and pages ret=%i", ret); goto create_qp_exit4; } - - my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length / - my_qp->ipz_rqueue.qe_size; - my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries * - sizeof(struct ehca_qmap_entry)); - if (!my_qp->rq_map.map) { - ehca_err(pd->device, "Couldn't allocate squeue " - "map ret=%i", ret); - goto create_qp_exit5; + if (!is_user) { + my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length / + my_qp->ipz_rqueue.qe_size; + my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries * + sizeof(struct ehca_qmap_entry)); + if (!my_qp->rq_map.map) { + ehca_err(pd->device, "Couldn't allocate squeue " + "map ret=%i", ret); + goto create_qp_exit5; + } + INIT_LIST_HEAD(&my_qp->rq_err_node); + /* to avoid the generation of bogus flush CQEs */ + reset_queue_map(&my_qp->rq_map); } - INIT_LIST_HEAD(&my_qp->rq_err_node); - /* to avoid the generation of bogus flush CQEs */ - reset_queue_map(&my_qp->rq_map); - } else if (init_attr->srq) { + } else if (init_attr->srq && !is_user) { /* this is a base QP, use the queue map of the SRQ */ my_qp->rq_map = my_srq->rq_map; INIT_LIST_HEAD(&my_qp->rq_err_node); @@ -918,7 +923,7 @@ create_qp_exit7: kfree(my_qp->mod_qp_parm); create_qp_exit6: - if (HAS_RQ(my_qp)) + if (HAS_RQ(my_qp) && !is_user) vfree(my_qp->rq_map.map); create_qp_exit5: @@ -926,7 +931,7 @@ create_qp_exit5: ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); create_qp_exit4: - if (HAS_SQ(my_qp)) + if (HAS_SQ(my_qp) && !is_user) vfree(my_qp->sq_map.map); create_qp_exit3: @@ -1244,6 +1249,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, u64 update_mask; u64 h_ret; int bad_wqe_cnt = 0; + int is_user = 0; int squeue_locked = 0; unsigned long flags = 0; @@ -1266,6 +1272,8 @@ static int internal_modify_qp(struct ib_qp *ibqp, ret = ehca2ib_return_code(h_ret); goto modify_qp_exit1; } + if (ibqp->uobject) + is_user = 1; qp_cur_state = ehca2ib_qp_state(mqpcb->qp_state); @@ -1728,7 +1736,8 @@ static int internal_modify_qp(struct ib_qp *ibqp, goto modify_qp_exit2; } } - if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR)) { + if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR) + && !is_user) { ret = check_for_left_cqes(my_qp, shca); if (ret) goto modify_qp_exit2; @@ -1738,16 +1747,17 @@ static int internal_modify_qp(struct ib_qp *ibqp, ipz_qeit_reset(&my_qp->ipz_rqueue); ipz_qeit_reset(&my_qp->ipz_squeue); - if (qp_cur_state == IB_QPS_ERR) { + if (qp_cur_state == IB_QPS_ERR && !is_user) { del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node); if (HAS_RQ(my_qp)) del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node); } - reset_queue_map(&my_qp->sq_map); + if (!is_user) + reset_queue_map(&my_qp->sq_map); - if (HAS_RQ(my_qp)) + if (HAS_RQ(my_qp) && !is_user) reset_queue_map(&my_qp->rq_map); } @@ -2138,10 +2148,12 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, int ret; u64 h_ret; u8 port_num; + int is_user = 0; enum ib_qp_type qp_type; unsigned long flags; if (uobject) { + is_user = 1; if (my_qp->mm_count_galpa || my_qp->mm_count_rqueue || my_qp->mm_count_squeue) { ehca_err(dev, "Resources still referenced in " @@ -2168,10 +2180,10 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, * SRQs will never get into an error list and do not have a recv_cq, * so we need to skip them here. */ - if (HAS_RQ(my_qp) && !IS_SRQ(my_qp)) + if (HAS_RQ(my_qp) && !IS_SRQ(my_qp) && !is_user) del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node); - if (HAS_SQ(my_qp)) + if (HAS_SQ(my_qp) && !is_user) del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node); /* now wait until all pending events have completed */ @@ -2209,13 +2221,13 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, if (HAS_RQ(my_qp)) { ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); - - vfree(my_qp->rq_map.map); + if (!is_user) + vfree(my_qp->rq_map.map); } if (HAS_SQ(my_qp)) { ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); - - vfree(my_qp->sq_map.map); + if (!is_user) + vfree(my_qp->sq_map.map); } kmem_cache_free(qp_cache, my_qp); atomic_dec(&shca->num_qps); diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c index d0ab0c0d5e9..4d5dc3304d4 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.c +++ b/drivers/infiniband/hw/ehca/hcp_if.c @@ -284,7 +284,7 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, param->act_pages = (u32)outs[4]; if (ret == H_SUCCESS) - hcp_galpas_ctor(&cq->galpas, outs[5], outs[6]); + hcp_galpas_ctor(&cq->galpas, 0, outs[5], outs[6]); if (ret == H_NOT_ENOUGH_RESOURCES) ehca_gen_err("Not enough resources. ret=%lli", ret); @@ -293,7 +293,7 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, } u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, - struct ehca_alloc_qp_parms *parms) + struct ehca_alloc_qp_parms *parms, int is_user) { u64 ret; u64 allocate_controls, max_r10_reg, r11, r12; @@ -359,7 +359,7 @@ u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, (u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]); if (ret == H_SUCCESS) - hcp_galpas_ctor(&parms->galpas, outs[6], outs[6]); + hcp_galpas_ctor(&parms->galpas, is_user, outs[6], outs[6]); if (ret == H_NOT_ENOUGH_RESOURCES) ehca_gen_err("Not enough resources. ret=%lli", ret); diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h index 2c3c6e0ea5c..39c1c3618ec 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.h +++ b/drivers/infiniband/hw/ehca/hcp_if.h @@ -78,7 +78,7 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, * initialize resources, create empty QPPTs (2 rings). */ u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, - struct ehca_alloc_qp_parms *parms); + struct ehca_alloc_qp_parms *parms, int is_user); u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, const u8 port_id, diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.c b/drivers/infiniband/hw/ehca/hcp_phyp.c index 214821095cb..b3e0e72e8a7 100644 --- a/drivers/infiniband/hw/ehca/hcp_phyp.c +++ b/drivers/infiniband/hw/ehca/hcp_phyp.c @@ -54,12 +54,15 @@ int hcall_unmap_page(u64 mapaddr) return 0; } -int hcp_galpas_ctor(struct h_galpas *galpas, +int hcp_galpas_ctor(struct h_galpas *galpas, int is_user, u64 paddr_kernel, u64 paddr_user) { - int ret = hcall_map_page(paddr_kernel, &galpas->kernel.fw_handle); - if (ret) - return ret; + if (!is_user) { + int ret = hcall_map_page(paddr_kernel, &galpas->kernel.fw_handle); + if (ret) + return ret; + } else + galpas->kernel.fw_handle = 0; galpas->user.fw_handle = paddr_user; diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.h b/drivers/infiniband/hw/ehca/hcp_phyp.h index 5305c2a3ed9..204227d5303 100644 --- a/drivers/infiniband/hw/ehca/hcp_phyp.h +++ b/drivers/infiniband/hw/ehca/hcp_phyp.h @@ -78,7 +78,7 @@ static inline void hipz_galpa_store(struct h_galpa galpa, u32 offset, u64 value) *(volatile u64 __force *)addr = value; } -int hcp_galpas_ctor(struct h_galpas *galpas, +int hcp_galpas_ctor(struct h_galpas *galpas, int is_user, u64 paddr_kernel, u64 paddr_user); int hcp_galpas_dtor(struct h_galpas *galpas); -- cgit From bde2cfaf8ff5511b4f434078554f89ff6cb677f2 Mon Sep 17 00:00:00 2001 From: Stefan Roscher Date: Wed, 13 May 2009 16:52:43 -0700 Subject: IB/ehca: Increment version number Signed-off-by: Stefan Roscher Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index 368311ce332..85905ab9391 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -52,7 +52,7 @@ #include "ehca_tools.h" #include "hcp_if.h" -#define HCAD_VERSION "0026" +#define HCAD_VERSION "0027" MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Christoph Raisch "); -- cgit From 5b891a9332dc4212bf166a4506092fbcd60f2319 Mon Sep 17 00:00:00 2001 From: Jack Stone Date: Wed, 13 May 2009 16:53:39 -0700 Subject: infiniband: Remove void casts Remove uneeded casts of void *. Signed-off-by: Jack Stone Signed-off-by: Roland Dreier --- drivers/infiniband/hw/amso1100/c2_cq.c | 4 ++-- drivers/infiniband/hw/ehca/ehca_irq.c | 9 ++++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/amso1100/c2_cq.c b/drivers/infiniband/hw/amso1100/c2_cq.c index bb17cce3cb5..f5c45b194f5 100644 --- a/drivers/infiniband/hw/amso1100/c2_cq.c +++ b/drivers/infiniband/hw/amso1100/c2_cq.c @@ -133,7 +133,7 @@ static inline int c2_poll_one(struct c2_dev *c2dev, struct c2_qp *qp; int is_recv = 0; - ce = (struct c2wr_ce *) c2_mq_consume(&cq->mq); + ce = c2_mq_consume(&cq->mq); if (!ce) { return -EAGAIN; } @@ -146,7 +146,7 @@ static inline int c2_poll_one(struct c2_dev *c2dev, while ((qp = (struct c2_qp *) (unsigned long) ce->qp_user_context) == NULL) { c2_mq_free(&cq->mq); - ce = (struct c2wr_ce *) c2_mq_consume(&cq->mq); + ce = c2_mq_consume(&cq->mq); if (!ce) return -EAGAIN; } diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index 99bcbd7ffb0..4b89b791be6 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -479,13 +479,13 @@ void ehca_tasklet_neq(unsigned long data) struct ehca_eqe *eqe; u64 ret; - eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq); + eqe = ehca_poll_eq(shca, &shca->neq); while (eqe) { if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry)) parse_ec(shca, eqe->entry); - eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq); + eqe = ehca_poll_eq(shca, &shca->neq); } ret = hipz_h_reset_event(shca->ipz_hca_handle, @@ -572,8 +572,7 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq) eqe_cnt = 0; do { u32 token; - eqe_cache[eqe_cnt].eqe = - (struct ehca_eqe *)ehca_poll_eq(shca, eq); + eqe_cache[eqe_cnt].eqe = ehca_poll_eq(shca, eq); if (!eqe_cache[eqe_cnt].eqe) break; eqe_value = eqe_cache[eqe_cnt].eqe->entry; @@ -637,7 +636,7 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq) goto unlock_irq_spinlock; do { struct ehca_eqe *eqe; - eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq); + eqe = ehca_poll_eq(shca, &shca->eq); if (!eqe) break; process_eqe(shca, eqe); -- cgit From 28e43a519b9edb8277fc6b490ad17aa38c45a02b Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Fri, 15 May 2009 10:16:45 -0700 Subject: RDMA/nes: Fix off-by-one bugs in reset_adapter_ne020() and init_serdes() With a postfix increment, i is incremented one past 10K/5K before the loop ends, so the error messages will be displayed too soon if the test succeeds on the last iteration. Fix the comparisons to be > instead of >=. Signed-off-by: Roel Kluin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_hw.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index b832a7b814a..4a84d02ece0 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -667,7 +667,7 @@ static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_ i = 0; while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) & 0x00000040) == 0) && i++ < 10000) mdelay(1); - if (i >= 10000) { + if (i > 10000) { nes_debug(NES_DBG_INIT, "Did not see full soft reset done.\n"); return 0; } @@ -675,7 +675,7 @@ static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_ i = 0; while ((nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS) != 0x80) && i++ < 10000) mdelay(1); - if (i >= 10000) { + if (i > 10000) { printk(KERN_ERR PFX "Internal CPU not ready, status = %02X\n", nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS)); return 0; @@ -701,7 +701,7 @@ static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_ i = 0; while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) & 0x00000040) == 0) && i++ < 10000) mdelay(1); - if (i >= 10000) { + if (i > 10000) { nes_debug(NES_DBG_INIT, "Did not see port soft reset done.\n"); return 0; } @@ -711,7 +711,7 @@ static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_ while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0) & 0x0000000f)) != 0x0000000f) && i++ < 5000) mdelay(1); - if (i >= 5000) { + if (i > 5000) { nes_debug(NES_DBG_INIT, "Serdes 0 not ready, status=%x\n", u32temp); return 0; } @@ -722,7 +722,7 @@ static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_ while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS1) & 0x0000000f)) != 0x0000000f) && i++ < 5000) mdelay(1); - if (i >= 5000) { + if (i > 5000) { nes_debug(NES_DBG_INIT, "Serdes 1 not ready, status=%x\n", u32temp); return 0; } @@ -792,7 +792,7 @@ static int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count, while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0) & 0x0000000f)) != 0x0000000f) && i++ < 5000) mdelay(1); - if (i >= 5000) { + if (i > 5000) { nes_debug(NES_DBG_PHY, "Init: serdes 0 not ready, status=%x\n", u32temp); return 1; } @@ -815,7 +815,7 @@ static int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count, while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS1) & 0x0000000f)) != 0x0000000f) && (i++ < 5000)) mdelay(1); - if (i >= 5000) { + if (i > 5000) { printk("%s: Init: serdes 1 not ready, status=%x\n", __func__, u32temp); /* return 1; */ } -- cgit From c1f67a88bf62fac0f4151c007b361199c2cd1988 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 27 May 2009 14:36:16 -0700 Subject: IB/mthca: Add module parameter for number of MTTs per segment The current MTT allocator uses kmalloc() to allocate a buffer for its buddy allocator, and thus is limited in the amount of MTT segments that it can control. As a result, the size of memory that can be registered is limited too. This patch uses a module parameter to control the number of MTT entries that each segment represents, allowing more memory to be registered with the same number of segments. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_cmd.c | 2 +- drivers/infiniband/hw/mthca/mthca_dev.h | 1 + drivers/infiniband/hw/mthca/mthca_main.c | 17 ++++++++++++++--- drivers/infiniband/hw/mthca/mthca_mr.c | 16 ++++++++-------- drivers/infiniband/hw/mthca/mthca_profile.c | 4 ++-- 5 files changed, 26 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index 6d55f9d748f..8c2ed994d54 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -1059,7 +1059,7 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MTT_OFFSET); if (mthca_is_memfree(dev)) dev_lim->reserved_mtts = ALIGN((1 << (field >> 4)) * sizeof(u64), - MTHCA_MTT_SEG_SIZE) / MTHCA_MTT_SEG_SIZE; + dev->limits.mtt_seg_size) / dev->limits.mtt_seg_size; else dev_lim->reserved_mtts = 1 << (field >> 4); MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MRW_SZ_OFFSET); diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 252590116df..9ef611f6dd3 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -159,6 +159,7 @@ struct mthca_limits { int reserved_eqs; int num_mpts; int num_mtt_segs; + int mtt_seg_size; int fmr_reserved_mtts; int reserved_mtts; int reserved_mrws; diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index 1d83cf7caf3..13da9f1d24c 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -125,6 +125,10 @@ module_param_named(fmr_reserved_mtts, hca_profile.fmr_reserved_mtts, int, 0444); MODULE_PARM_DESC(fmr_reserved_mtts, "number of memory translation table segments reserved for FMR"); +static int log_mtts_per_seg = ilog2(MTHCA_MTT_SEG_SIZE / 8); +module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444); +MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-5)"); + static char mthca_version[] __devinitdata = DRV_NAME ": Mellanox InfiniBand HCA driver v" DRV_VERSION " (" DRV_RELDATE ")\n"; @@ -162,6 +166,7 @@ static int mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim *dev_lim) int err; u8 status; + mdev->limits.mtt_seg_size = (1 << log_mtts_per_seg) * 8; err = mthca_QUERY_DEV_LIM(mdev, dev_lim, &status); if (err) { mthca_err(mdev, "QUERY_DEV_LIM command failed, aborting.\n"); @@ -460,11 +465,11 @@ static int mthca_init_icm(struct mthca_dev *mdev, } /* CPU writes to non-reserved MTTs, while HCA might DMA to reserved mtts */ - mdev->limits.reserved_mtts = ALIGN(mdev->limits.reserved_mtts * MTHCA_MTT_SEG_SIZE, - dma_get_cache_alignment()) / MTHCA_MTT_SEG_SIZE; + mdev->limits.reserved_mtts = ALIGN(mdev->limits.reserved_mtts * mdev->limits.mtt_seg_size, + dma_get_cache_alignment()) / mdev->limits.mtt_seg_size; mdev->mr_table.mtt_table = mthca_alloc_icm_table(mdev, init_hca->mtt_base, - MTHCA_MTT_SEG_SIZE, + mdev->limits.mtt_seg_size, mdev->limits.num_mtt_segs, mdev->limits.reserved_mtts, 1, 0); @@ -1315,6 +1320,12 @@ static void __init mthca_validate_profile(void) printk(KERN_WARNING PFX "Corrected fmr_reserved_mtts to %d.\n", hca_profile.fmr_reserved_mtts); } + + if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 5)) { + printk(KERN_WARNING PFX "bad log_mtts_per_seg (%d). Using default - %d\n", + log_mtts_per_seg, ilog2(MTHCA_MTT_SEG_SIZE / 8)); + log_mtts_per_seg = ilog2(MTHCA_MTT_SEG_SIZE / 8); + } } static int __init mthca_init(void) diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c index 882e6b73591..d606edf1085 100644 --- a/drivers/infiniband/hw/mthca/mthca_mr.c +++ b/drivers/infiniband/hw/mthca/mthca_mr.c @@ -220,7 +220,7 @@ static struct mthca_mtt *__mthca_alloc_mtt(struct mthca_dev *dev, int size, mtt->buddy = buddy; mtt->order = 0; - for (i = MTHCA_MTT_SEG_SIZE / 8; i < size; i <<= 1) + for (i = dev->limits.mtt_seg_size / 8; i < size; i <<= 1) ++mtt->order; mtt->first_seg = mthca_alloc_mtt_range(dev, mtt->order, buddy); @@ -267,7 +267,7 @@ static int __mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt, while (list_len > 0) { mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base + - mtt->first_seg * MTHCA_MTT_SEG_SIZE + + mtt->first_seg * dev->limits.mtt_seg_size + start_index * 8); mtt_entry[1] = 0; for (i = 0; i < list_len && i < MTHCA_MAILBOX_SIZE / 8 - 2; ++i) @@ -326,7 +326,7 @@ static void mthca_tavor_write_mtt_seg(struct mthca_dev *dev, u64 __iomem *mtts; int i; - mtts = dev->mr_table.tavor_fmr.mtt_base + mtt->first_seg * MTHCA_MTT_SEG_SIZE + + mtts = dev->mr_table.tavor_fmr.mtt_base + mtt->first_seg * dev->limits.mtt_seg_size + start_index * sizeof (u64); for (i = 0; i < list_len; ++i) mthca_write64_raw(cpu_to_be64(buffer_list[i] | MTHCA_MTT_FLAG_PRESENT), @@ -345,10 +345,10 @@ static void mthca_arbel_write_mtt_seg(struct mthca_dev *dev, /* For Arbel, all MTTs must fit in the same page. */ BUG_ON(s / PAGE_SIZE != (s + list_len * sizeof(u64) - 1) / PAGE_SIZE); /* Require full segments */ - BUG_ON(s % MTHCA_MTT_SEG_SIZE); + BUG_ON(s % dev->limits.mtt_seg_size); mtts = mthca_table_find(dev->mr_table.mtt_table, mtt->first_seg + - s / MTHCA_MTT_SEG_SIZE, &dma_handle); + s / dev->limits.mtt_seg_size, &dma_handle); BUG_ON(!mtts); @@ -479,7 +479,7 @@ int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift, if (mr->mtt) mpt_entry->mtt_seg = cpu_to_be64(dev->mr_table.mtt_base + - mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE); + mr->mtt->first_seg * dev->limits.mtt_seg_size); if (0) { mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey); @@ -626,7 +626,7 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd, goto err_out_table; } - mtt_seg = mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE; + mtt_seg = mr->mtt->first_seg * dev->limits.mtt_seg_size; if (mthca_is_memfree(dev)) { mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table, @@ -908,7 +908,7 @@ int mthca_init_mr_table(struct mthca_dev *dev) dev->mr_table.mtt_base); dev->mr_table.tavor_fmr.mtt_base = - ioremap(addr, mtts * MTHCA_MTT_SEG_SIZE); + ioremap(addr, mtts * dev->limits.mtt_seg_size); if (!dev->mr_table.tavor_fmr.mtt_base) { mthca_warn(dev, "MTT ioremap for FMR failed.\n"); err = -ENOMEM; diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c index d168c254061..8edb28a9a0e 100644 --- a/drivers/infiniband/hw/mthca/mthca_profile.c +++ b/drivers/infiniband/hw/mthca/mthca_profile.c @@ -94,7 +94,7 @@ s64 mthca_make_profile(struct mthca_dev *dev, profile[MTHCA_RES_RDB].size = MTHCA_RDB_ENTRY_SIZE; profile[MTHCA_RES_MCG].size = MTHCA_MGM_ENTRY_SIZE; profile[MTHCA_RES_MPT].size = dev_lim->mpt_entry_sz; - profile[MTHCA_RES_MTT].size = MTHCA_MTT_SEG_SIZE; + profile[MTHCA_RES_MTT].size = dev->limits.mtt_seg_size; profile[MTHCA_RES_UAR].size = dev_lim->uar_scratch_entry_sz; profile[MTHCA_RES_UDAV].size = MTHCA_AV_SIZE; profile[MTHCA_RES_UARC].size = request->uarc_size; @@ -232,7 +232,7 @@ s64 mthca_make_profile(struct mthca_dev *dev, dev->limits.num_mtt_segs = profile[i].num; dev->mr_table.mtt_base = profile[i].start; init_hca->mtt_base = profile[i].start; - init_hca->mtt_seg_sz = ffs(MTHCA_MTT_SEG_SIZE) - 7; + init_hca->mtt_seg_sz = ffs(dev->limits.mtt_seg_size) - 7; break; case MTHCA_RES_UAR: dev->limits.num_uars = profile[i].num; -- cgit From ab6bf42e2339580b5d87746d0ff4da4b1578b03e Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 27 May 2009 14:38:34 -0700 Subject: mlx4_core: Add module parameter for number of MTTs per segment The current MTT allocator uses kmalloc() to allocate a buffer for its buddy allocator, and thus is limited in the amount of MTT segments that it can control. As a result, the size of memory that can be registered is limited too. This patch uses a module parameter to control the number of MTT entries that each segment represents, allowing more memory to be registered with the same number of segments. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/net/mlx4/main.c | 14 ++++++++++++-- drivers/net/mlx4/mr.c | 6 +++--- drivers/net/mlx4/profile.c | 2 +- include/linux/mlx4/device.h | 1 + 4 files changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c index 30bea968969..018348c0119 100644 --- a/drivers/net/mlx4/main.c +++ b/drivers/net/mlx4/main.c @@ -100,6 +100,10 @@ module_param_named(use_prio, use_prio, bool, 0444); MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports " "(0/1, default 0)"); +static int log_mtts_per_seg = ilog2(MLX4_MTT_ENTRY_PER_SEG); +module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444); +MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-5)"); + int mlx4_check_port_params(struct mlx4_dev *dev, enum mlx4_port_type *port_type) { @@ -203,12 +207,13 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.max_cqes = dev_cap->max_cq_sz - 1; dev->caps.reserved_cqs = dev_cap->reserved_cqs; dev->caps.reserved_eqs = dev_cap->reserved_eqs; + dev->caps.mtts_per_seg = 1 << log_mtts_per_seg; dev->caps.reserved_mtts = DIV_ROUND_UP(dev_cap->reserved_mtts, - MLX4_MTT_ENTRY_PER_SEG); + dev->caps.mtts_per_seg); dev->caps.reserved_mrws = dev_cap->reserved_mrws; dev->caps.reserved_uars = dev_cap->reserved_uars; dev->caps.reserved_pds = dev_cap->reserved_pds; - dev->caps.mtt_entry_sz = MLX4_MTT_ENTRY_PER_SEG * dev_cap->mtt_entry_sz; + dev->caps.mtt_entry_sz = dev->caps.mtts_per_seg * dev_cap->mtt_entry_sz; dev->caps.max_msg_sz = dev_cap->max_msg_sz; dev->caps.page_size_cap = ~(u32) (dev_cap->min_page_sz - 1); dev->caps.flags = dev_cap->flags; @@ -1304,6 +1309,11 @@ static int __init mlx4_verify_params(void) return -1; } + if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 5)) { + printk(KERN_WARNING "mlx4_core: bad log_mtts_per_seg: %d\n", log_mtts_per_seg); + return -1; + } + return 0; } diff --git a/drivers/net/mlx4/mr.c b/drivers/net/mlx4/mr.c index 0caf74cae8b..3b8973d1993 100644 --- a/drivers/net/mlx4/mr.c +++ b/drivers/net/mlx4/mr.c @@ -209,7 +209,7 @@ int mlx4_mtt_init(struct mlx4_dev *dev, int npages, int page_shift, } else mtt->page_shift = page_shift; - for (mtt->order = 0, i = MLX4_MTT_ENTRY_PER_SEG; i < npages; i <<= 1) + for (mtt->order = 0, i = dev->caps.mtts_per_seg; i < npages; i <<= 1) ++mtt->order; mtt->first_seg = mlx4_alloc_mtt_range(dev, mtt->order); @@ -350,7 +350,7 @@ int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr) mpt_entry->pd_flags |= cpu_to_be32(MLX4_MPT_PD_FLAG_FAST_REG | MLX4_MPT_PD_FLAG_RAE); mpt_entry->mtt_sz = cpu_to_be32((1 << mr->mtt.order) * - MLX4_MTT_ENTRY_PER_SEG); + dev->caps.mtts_per_seg); } else { mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_SW_OWNS); } @@ -391,7 +391,7 @@ static int mlx4_write_mtt_chunk(struct mlx4_dev *dev, struct mlx4_mtt *mtt, (start_index + npages - 1) / (PAGE_SIZE / sizeof (u64))) return -EINVAL; - if (start_index & (MLX4_MTT_ENTRY_PER_SEG - 1)) + if (start_index & (dev->caps.mtts_per_seg - 1)) return -EINVAL; mtts = mlx4_table_find(&priv->mr_table.mtt_table, mtt->first_seg + diff --git a/drivers/net/mlx4/profile.c b/drivers/net/mlx4/profile.c index cebdf3243ca..bd22df95adf 100644 --- a/drivers/net/mlx4/profile.c +++ b/drivers/net/mlx4/profile.c @@ -98,7 +98,7 @@ u64 mlx4_make_profile(struct mlx4_dev *dev, profile[MLX4_RES_EQ].size = dev_cap->eqc_entry_sz; profile[MLX4_RES_DMPT].size = dev_cap->dmpt_entry_sz; profile[MLX4_RES_CMPT].size = dev_cap->cmpt_entry_sz; - profile[MLX4_RES_MTT].size = MLX4_MTT_ENTRY_PER_SEG * dev_cap->mtt_entry_sz; + profile[MLX4_RES_MTT].size = dev->caps.mtts_per_seg * dev_cap->mtt_entry_sz; profile[MLX4_RES_MCG].size = MLX4_MGM_ENTRY_SIZE; profile[MLX4_RES_QP].num = request->num_qp; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 3aff8a6a389..ce7cc6c7bcb 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -210,6 +210,7 @@ struct mlx4_caps { int num_comp_vectors; int num_mpts; int num_mtt_segs; + int mtts_per_seg; int fmr_reserved_mtts; int reserved_mtts; int reserved_mrws; -- cgit From 7ab1a2b31d4a8b4f519ccff5a84c53a5b87fd1be Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Wed, 27 May 2009 14:42:36 -0700 Subject: RDMA/cxgb3: Report correct port state and MTU Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/iwch_provider.c | 32 +++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 160ef482712..e2a63214008 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -1152,12 +1153,39 @@ static int iwch_query_device(struct ib_device *ibdev, static int iwch_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { + struct iwch_dev *dev; + struct net_device *netdev; + struct in_device *inetdev; + PDBG("%s ibdev %p\n", __func__, ibdev); + dev = to_iwch_dev(ibdev); + netdev = dev->rdev.port_info.lldevs[port-1]; + memset(props, 0, sizeof(struct ib_port_attr)); props->max_mtu = IB_MTU_4096; - props->active_mtu = IB_MTU_2048; - props->state = IB_PORT_ACTIVE; + if (netdev->mtu >= 4096) + props->active_mtu = IB_MTU_4096; + else if (netdev->mtu >= 2048) + props->active_mtu = IB_MTU_2048; + else if (netdev->mtu >= 1024) + props->active_mtu = IB_MTU_1024; + else if (netdev->mtu >= 512) + props->active_mtu = IB_MTU_512; + else + props->active_mtu = IB_MTU_256; + + if (!netif_carrier_ok(netdev)) + props->state = IB_PORT_DOWN; + else { + inetdev = in_dev_get(netdev); + if (inetdev->ifa_list) + props->state = IB_PORT_ACTIVE; + else + props->state = IB_PORT_INIT; + in_dev_put(inetdev); + } + props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_SNMP_TUNNEL_SUP | -- cgit From 3026c19a14ba71ccd4dc4925abab9395ea12839c Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Wed, 27 May 2009 14:43:39 -0700 Subject: RDMA/cxgb3: Limit fast register size based on T3 limitations T3 firmware only supports one WRs worth of page list for fast register work requests. The driver currently allows 2 WRs worth, which doesn't work for T3, so reduce the limit in the driver. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/cxio_wr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h index ff9be1a1310..32e3b1461d8 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_wr.h +++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h @@ -176,7 +176,7 @@ struct t3_send_wr { struct t3_sge sgl[T3_MAX_SGE]; /* 4+ */ }; -#define T3_MAX_FASTREG_DEPTH 24 +#define T3_MAX_FASTREG_DEPTH 10 #define T3_MAX_FASTREG_FRAG 10 struct t3_fastreg_wr { -- cgit From 25a52393270ca48c7d0848672ad4423313033c3d Mon Sep 17 00:00:00 2001 From: Joachim Fenkes Date: Wed, 3 Jun 2009 13:25:42 -0700 Subject: IB/ehca: Remove superfluous bitmasks from QP control block All the fields in the control block are nicely right-aligned, so no masking is necessary. Signed-off-by: Joachim Fenkes Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_classes_pSeries.h | 28 ----------------------- drivers/infiniband/hw/ehca/ehca_qp.c | 18 ++++----------- 2 files changed, 5 insertions(+), 41 deletions(-) diff --git a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h index 1798e6466bd..689c35786dd 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h +++ b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h @@ -165,7 +165,6 @@ struct hcp_modify_qp_control_block { #define MQPCB_MASK_ALT_P_KEY_IDX EHCA_BMASK_IBM( 7, 7) #define MQPCB_MASK_RDMA_ATOMIC_CTRL EHCA_BMASK_IBM( 8, 8) #define MQPCB_MASK_QP_STATE EHCA_BMASK_IBM( 9, 9) -#define MQPCB_QP_STATE EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES EHCA_BMASK_IBM(11, 11) #define MQPCB_MASK_PATH_MIGRATION_STATE EHCA_BMASK_IBM(12, 12) #define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP EHCA_BMASK_IBM(13, 13) @@ -176,60 +175,33 @@ struct hcp_modify_qp_control_block { #define MQPCB_MASK_RETRY_COUNT EHCA_BMASK_IBM(18, 18) #define MQPCB_MASK_TIMEOUT EHCA_BMASK_IBM(19, 19) #define MQPCB_MASK_PATH_MTU EHCA_BMASK_IBM(20, 20) -#define MQPCB_PATH_MTU EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_MAX_STATIC_RATE EHCA_BMASK_IBM(21, 21) -#define MQPCB_MAX_STATIC_RATE EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_DLID EHCA_BMASK_IBM(22, 22) -#define MQPCB_DLID EHCA_BMASK_IBM(16, 31) #define MQPCB_MASK_RNR_RETRY_COUNT EHCA_BMASK_IBM(23, 23) -#define MQPCB_RNR_RETRY_COUNT EHCA_BMASK_IBM(29, 31) #define MQPCB_MASK_SOURCE_PATH_BITS EHCA_BMASK_IBM(24, 24) -#define MQPCB_SOURCE_PATH_BITS EHCA_BMASK_IBM(25, 31) #define MQPCB_MASK_TRAFFIC_CLASS EHCA_BMASK_IBM(25, 25) -#define MQPCB_TRAFFIC_CLASS EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_HOP_LIMIT EHCA_BMASK_IBM(26, 26) -#define MQPCB_HOP_LIMIT EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_SOURCE_GID_IDX EHCA_BMASK_IBM(27, 27) -#define MQPCB_SOURCE_GID_IDX EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_FLOW_LABEL EHCA_BMASK_IBM(28, 28) -#define MQPCB_FLOW_LABEL EHCA_BMASK_IBM(12, 31) #define MQPCB_MASK_DEST_GID EHCA_BMASK_IBM(30, 30) #define MQPCB_MASK_SERVICE_LEVEL_AL EHCA_BMASK_IBM(31, 31) -#define MQPCB_SERVICE_LEVEL_AL EHCA_BMASK_IBM(28, 31) #define MQPCB_MASK_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(32, 32) -#define MQPCB_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(31, 31) #define MQPCB_MASK_RETRY_COUNT_AL EHCA_BMASK_IBM(33, 33) -#define MQPCB_RETRY_COUNT_AL EHCA_BMASK_IBM(29, 31) #define MQPCB_MASK_TIMEOUT_AL EHCA_BMASK_IBM(34, 34) -#define MQPCB_TIMEOUT_AL EHCA_BMASK_IBM(27, 31) #define MQPCB_MASK_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(35, 35) -#define MQPCB_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_DLID_AL EHCA_BMASK_IBM(36, 36) -#define MQPCB_DLID_AL EHCA_BMASK_IBM(16, 31) #define MQPCB_MASK_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(37, 37) -#define MQPCB_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(29, 31) #define MQPCB_MASK_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(38, 38) -#define MQPCB_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(25, 31) #define MQPCB_MASK_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(39, 39) -#define MQPCB_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_HOP_LIMIT_AL EHCA_BMASK_IBM(40, 40) -#define MQPCB_HOP_LIMIT_AL EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(41, 41) -#define MQPCB_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_FLOW_LABEL_AL EHCA_BMASK_IBM(42, 42) -#define MQPCB_FLOW_LABEL_AL EHCA_BMASK_IBM(12, 31) #define MQPCB_MASK_DEST_GID_AL EHCA_BMASK_IBM(44, 44) #define MQPCB_MASK_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(45, 45) -#define MQPCB_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(16, 31) #define MQPCB_MASK_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(46, 46) -#define MQPCB_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(16, 31) #define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(47, 47) -#define MQPCB_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(31, 31) -#define MQPCB_QP_NUMBER EHCA_BMASK_IBM( 8, 31) #define MQPCB_MASK_QP_ENABLE EHCA_BMASK_IBM(48, 48) -#define MQPCB_QP_ENABLE EHCA_BMASK_IBM(31, 31) #define MQPCB_MASK_CURR_SRQ_LIMIT EHCA_BMASK_IBM(49, 49) -#define MQPCB_CURR_SRQ_LIMIT EHCA_BMASK_IBM(16, 31) #define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG EHCA_BMASK_IBM(50, 50) #define MQPCB_MASK_SHARED_RQ_HNDL EHCA_BMASK_IBM(51, 51) diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index ead4e718c08..0338f1fabe8 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -1962,19 +1962,13 @@ int ehca_query_qp(struct ib_qp *qp, qp_attr->cap.max_inline_data = my_qp->sq_max_inline_data_size; qp_attr->dest_qp_num = qpcb->dest_qp_nr; - qp_attr->pkey_index = - EHCA_BMASK_GET(MQPCB_PRIM_P_KEY_IDX, qpcb->prim_p_key_idx); - - qp_attr->port_num = - EHCA_BMASK_GET(MQPCB_PRIM_PHYS_PORT, qpcb->prim_phys_port); - + qp_attr->pkey_index = qpcb->prim_p_key_idx; + qp_attr->port_num = qpcb->prim_phys_port; qp_attr->timeout = qpcb->timeout; qp_attr->retry_cnt = qpcb->retry_count; qp_attr->rnr_retry = qpcb->rnr_retry_count; - qp_attr->alt_pkey_index = - EHCA_BMASK_GET(MQPCB_PRIM_P_KEY_IDX, qpcb->alt_p_key_idx); - + qp_attr->alt_pkey_index = qpcb->alt_p_key_idx; qp_attr->alt_port_num = qpcb->alt_phys_port; qp_attr->alt_timeout = qpcb->timeout_al; @@ -2061,8 +2055,7 @@ int ehca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, update_mask |= EHCA_BMASK_SET(MQPCB_MASK_CURR_SRQ_LIMIT, 1) | EHCA_BMASK_SET(MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG, 1); - mqpcb->curr_srq_limit = - EHCA_BMASK_SET(MQPCB_CURR_SRQ_LIMIT, attr->srq_limit); + mqpcb->curr_srq_limit = attr->srq_limit; mqpcb->qp_aff_asyn_ev_log_reg = EHCA_BMASK_SET(QPX_AAELOG_RESET_SRQ_LIMIT, 1); } @@ -2125,8 +2118,7 @@ int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr) srq_attr->max_wr = qpcb->max_nr_outst_recv_wr - 1; srq_attr->max_sge = 3; - srq_attr->srq_limit = EHCA_BMASK_GET( - MQPCB_CURR_SRQ_LIMIT, qpcb->curr_srq_limit); + srq_attr->srq_limit = qpcb->curr_srq_limit; if (ehca_debug_level >= 2) ehca_dmp(qpcb, 4*70, "qp_num=%x", my_qp->real_qp_num); -- cgit From 2ac6bf4ddc87c3b6b609f8fa82f6ebbffeac12f4 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 5 Jun 2009 10:36:24 -0700 Subject: IB/mlx4: Add strong ordering to local inval and fast reg work requests The ConnectX Programmer's Reference Manual states that the "SO" bit must be set when posting Fast Register and Local Invalidate send work requests. When this bit is set, the work request will be executed only after all previous work requests on the send queue have been executed. (If the bit is not set, Fast Register and Local Invalidate WQEs may begin execution too early, which violates the defined semantics for these operations) This fixes the issue with NFS/RDMA reported in Signed-off-by: Jack Morgenstein Cc: Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/qp.c | 4 ++++ include/linux/mlx4/qp.h | 1 + 2 files changed, 5 insertions(+) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 20724aee76f..c4a02648c8a 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1585,12 +1585,16 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case IB_WR_LOCAL_INV: + ctrl->srcrb_flags |= + cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER); set_local_inv_seg(wqe, wr->ex.invalidate_rkey); wqe += sizeof (struct mlx4_wqe_local_inval_seg); size += sizeof (struct mlx4_wqe_local_inval_seg) / 16; break; case IB_WR_FAST_REG_MR: + ctrl->srcrb_flags |= + cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER); set_fmr_seg(wqe, wr); wqe += sizeof (struct mlx4_wqe_fmr_seg); size += sizeof (struct mlx4_wqe_fmr_seg) / 16; diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index bf8f11982da..9f29d86e5dc 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -165,6 +165,7 @@ enum { MLX4_WQE_CTRL_IP_CSUM = 1 << 4, MLX4_WQE_CTRL_TCP_UDP_CSUM = 1 << 5, MLX4_WQE_CTRL_INS_VLAN = 1 << 6, + MLX4_WQE_CTRL_STRONG_ORDER = 1 << 7, }; struct mlx4_wqe_ctrl_seg { -- cgit From 9aa0a489d909af0cc36c41d3061ef956c7442ce2 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Sat, 13 Jun 2009 15:14:09 -0700 Subject: IB/mthca: Don't double-free IRQs when falling back from MSI-X to INTx When both MSI-X and legacy INTx fail to generate an interrupt, the driver frees the MSI-X interrupts twice. Fix this by clearing the have_irq flag for the MSI-X interrupts when they are freed the first time. Reported-by: Yinghai Lu Tested-by: Yinghai Lu Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_eq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c index 28f0e0c40d7..90e4e450a12 100644 --- a/drivers/infiniband/hw/mthca/mthca_eq.c +++ b/drivers/infiniband/hw/mthca/mthca_eq.c @@ -641,9 +641,11 @@ static void mthca_free_irqs(struct mthca_dev *dev) if (dev->eq_table.have_irq) free_irq(dev->pdev->irq, dev); for (i = 0; i < MTHCA_NUM_EQ; ++i) - if (dev->eq_table.eq[i].have_irq) + if (dev->eq_table.eq[i].have_irq) { free_irq(dev->eq_table.eq[i].msi_x_vector, dev->eq_table.eq + i); + dev->eq_table.eq[i].have_irq = 0; + } } static int mthca_map_reg(struct mthca_dev *dev, -- cgit From d1fdf24b4074a8d962f9a28519c99dcdd66bdee3 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Sun, 14 Jun 2009 13:30:45 -0700 Subject: mlx4_core: Don't double-free IRQs when falling back from MSI-X to INTx When both MSI-X and legacy INTx fail to generate an interrupt, the driver frees the MSI-X interrupts twice. Fix this by clearing the have_irq flag for the MSI-X interrupts when they are freed the first time. This is the same bug that was reported in ib_mthca by Yinghai Lu . Signed-off-by: Roland Dreier --- drivers/net/mlx4/eq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c index 8830dcb92ec..ce064e32420 100644 --- a/drivers/net/mlx4/eq.c +++ b/drivers/net/mlx4/eq.c @@ -497,8 +497,10 @@ static void mlx4_free_irqs(struct mlx4_dev *dev) if (eq_table->have_irq) free_irq(dev->pdev->irq, dev); for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) - if (eq_table->eq[i].have_irq) + if (eq_table->eq[i].have_irq) { free_irq(eq_table->eq[i].irq, eq_table->eq + i); + eq_table->eq[i].have_irq = 0; + } kfree(eq_table->irq_names); } -- cgit