summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/Kconfig2
-rw-r--r--drivers/infiniband/Makefile1
-rw-r--r--drivers/infiniband/core/agent.c297
-rw-r--r--drivers/infiniband/core/agent.h14
-rw-r--r--drivers/infiniband/core/agent_priv.h62
-rw-r--r--drivers/infiniband/core/cache.c1
-rw-r--r--drivers/infiniband/core/cm.c223
-rw-r--r--drivers/infiniband/core/cm_msgs.h1
-rw-r--r--drivers/infiniband/core/device.c22
-rw-r--r--drivers/infiniband/core/mad.c374
-rw-r--r--drivers/infiniband/core/mad_priv.h8
-rw-r--r--drivers/infiniband/core/mad_rmpp.c114
-rw-r--r--drivers/infiniband/core/mad_rmpp.h2
-rw-r--r--drivers/infiniband/core/packer.c2
-rw-r--r--drivers/infiniband/core/sa_query.c277
-rw-r--r--drivers/infiniband/core/smi.h2
-rw-r--r--drivers/infiniband/core/sysfs.c25
-rw-r--r--drivers/infiniband/core/ucm.c272
-rw-r--r--drivers/infiniband/core/ucm.h83
-rw-r--r--drivers/infiniband/core/ud_header.c1
-rw-r--r--drivers/infiniband/core/user_mad.c615
-rw-r--r--drivers/infiniband/core/uverbs.h75
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c1021
-rw-r--r--drivers/infiniband/core/uverbs_main.c516
-rw-r--r--drivers/infiniband/core/verbs.c31
-rw-r--r--drivers/infiniband/hw/mthca/Makefile3
-rw-r--r--drivers/infiniband/hw/mthca/mthca_av.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_catas.c156
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.c19
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.h2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cq.c47
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h28
-rw-r--r--drivers/infiniband/hw/mthca/mthca_eq.c48
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mad.c75
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c59
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mcg.c13
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.c24
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.h5
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mr.c4
-rw-r--r--drivers/infiniband/hw/mthca/mthca_profile.c6
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c56
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.h1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c201
-rw-r--r--drivers/infiniband/hw/mthca/mthca_reset.c1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_srq.c81
-rw-r--r--drivers/infiniband/hw/mthca/mthca_uar.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_user.h6
-rw-r--r--drivers/infiniband/hw/mthca/mthca_wqe.h4
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h41
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_fs.c177
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c137
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c118
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c44
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c13
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_vlan.c7
-rw-r--r--drivers/infiniband/ulp/srp/Kbuild1
-rw-r--r--drivers/infiniband/ulp/srp/Kconfig11
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c1704
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h151
59 files changed, 5225 insertions, 2063 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 325d502e25c..bdf0891a92d 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -33,4 +33,6 @@ source "drivers/infiniband/hw/mthca/Kconfig"
source "drivers/infiniband/ulp/ipoib/Kconfig"
+source "drivers/infiniband/ulp/srp/Kconfig"
+
endmenu
diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile
index d256cf79821..a43fb34cca9 100644
--- a/drivers/infiniband/Makefile
+++ b/drivers/infiniband/Makefile
@@ -1,3 +1,4 @@
obj-$(CONFIG_INFINIBAND) += core/
obj-$(CONFIG_INFINIBAND_MTHCA) += hw/mthca/
obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/
+obj-$(CONFIG_INFINIBAND_SRP) += ulp/srp/
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c
index 5ac86f566dc..34b724afd28 100644
--- a/drivers/infiniband/core/agent.c
+++ b/drivers/infiniband/core/agent.c
@@ -37,58 +37,44 @@
* $Id: agent.c 1389 2004-12-27 22:56:47Z roland $
*/
-#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/string.h>
-#include <asm/bug.h>
+#include "agent.h"
+#include "smi.h"
-#include <rdma/ib_smi.h>
+#define SPFX "ib_agent: "
-#include "smi.h"
-#include "agent_priv.h"
-#include "mad_priv.h"
-#include "agent.h"
+struct ib_agent_port_private {
+ struct list_head port_list;
+ struct ib_mad_agent *agent[2];
+};
-spinlock_t ib_agent_port_list_lock;
+static DEFINE_SPINLOCK(ib_agent_port_list_lock);
static LIST_HEAD(ib_agent_port_list);
-/*
- * Caller must hold ib_agent_port_list_lock
- */
-static inline struct ib_agent_port_private *
-__ib_get_agent_port(struct ib_device *device, int port_num,
- struct ib_mad_agent *mad_agent)
+static struct ib_agent_port_private *
+__ib_get_agent_port(struct ib_device *device, int port_num)
{
struct ib_agent_port_private *entry;
- BUG_ON(!(!!device ^ !!mad_agent)); /* Exactly one MUST be (!NULL) */
-
- if (device) {
- list_for_each_entry(entry, &ib_agent_port_list, port_list) {
- if (entry->smp_agent->device == device &&
- entry->port_num == port_num)
- return entry;
- }
- } else {
- list_for_each_entry(entry, &ib_agent_port_list, port_list) {
- if ((entry->smp_agent == mad_agent) ||
- (entry->perf_mgmt_agent == mad_agent))
- return entry;
- }
+ list_for_each_entry(entry, &ib_agent_port_list, port_list) {
+ if (entry->agent[0]->device == device &&
+ entry->agent[0]->port_num == port_num)
+ return entry;
}
return NULL;
}
-static inline struct ib_agent_port_private *
-ib_get_agent_port(struct ib_device *device, int port_num,
- struct ib_mad_agent *mad_agent)
+static struct ib_agent_port_private *
+ib_get_agent_port(struct ib_device *device, int port_num)
{
struct ib_agent_port_private *entry;
unsigned long flags;
spin_lock_irqsave(&ib_agent_port_list_lock, flags);
- entry = __ib_get_agent_port(device, port_num, mad_agent);
+ entry = __ib_get_agent_port(device, port_num);
spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
-
return entry;
}
@@ -100,226 +86,102 @@ int smi_check_local_dr_smp(struct ib_smp *smp,
if (smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
return 1;
- port_priv = ib_get_agent_port(device, port_num, NULL);
+
+ port_priv = ib_get_agent_port(device, port_num);
if (!port_priv) {
printk(KERN_DEBUG SPFX "smi_check_local_dr_smp %s port %d "
- "not open\n",
- device->name, port_num);
+ "not open\n", device->name, port_num);
return 1;
}
- return smi_check_local_smp(port_priv->smp_agent, smp);
+ return smi_check_local_smp(port_priv->agent[0], smp);
}
-static int agent_mad_send(struct ib_mad_agent *mad_agent,
- struct ib_agent_port_private *port_priv,
- struct ib_mad_private *mad_priv,
- struct ib_grh *grh,
- struct ib_wc *wc)
+int agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
+ struct ib_wc *wc, struct ib_device *device,
+ int port_num, int qpn)
{
- struct ib_agent_send_wr *agent_send_wr;
- struct ib_sge gather_list;
- struct ib_send_wr send_wr;
- struct ib_send_wr *bad_send_wr;
- struct ib_ah_attr ah_attr;
- unsigned long flags;
- int ret = 1;
-
- agent_send_wr = kmalloc(sizeof(*agent_send_wr), GFP_KERNEL);
- if (!agent_send_wr)
- goto out;
- agent_send_wr->mad = mad_priv;
-
- gather_list.addr = dma_map_single(mad_agent->device->dma_device,
- &mad_priv->mad,
- sizeof(mad_priv->mad),
- DMA_TO_DEVICE);
- gather_list.length = sizeof(mad_priv->mad);
- gather_list.lkey = mad_agent->mr->lkey;
-
- send_wr.next = NULL;
- send_wr.opcode = IB_WR_SEND;
- send_wr.sg_list = &gather_list;
- send_wr.num_sge = 1;
- send_wr.wr.ud.remote_qpn = wc->src_qp; /* DQPN */
- send_wr.wr.ud.timeout_ms = 0;
- send_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_SOLICITED;
+ struct ib_agent_port_private *port_priv;
+ struct ib_mad_agent *agent;
+ struct ib_mad_send_buf *send_buf;
+ struct ib_ah *ah;
+ int ret;
- ah_attr.dlid = wc->slid;
- ah_attr.port_num = mad_agent->port_num;
- ah_attr.src_path_bits = wc->dlid_path_bits;
- ah_attr.sl = wc->sl;
- ah_attr.static_rate = 0;
- ah_attr.ah_flags = 0; /* No GRH */
- if (mad_priv->mad.mad.mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT) {
- if (wc->wc_flags & IB_WC_GRH) {
- ah_attr.ah_flags = IB_AH_GRH;
- /* Should sgid be looked up ? */
- ah_attr.grh.sgid_index = 0;
- ah_attr.grh.hop_limit = grh->hop_limit;
- ah_attr.grh.flow_label = be32_to_cpu(
- grh->version_tclass_flow) & 0xfffff;
- ah_attr.grh.traffic_class = (be32_to_cpu(
- grh->version_tclass_flow) >> 20) & 0xff;
- memcpy(ah_attr.grh.dgid.raw,
- grh->sgid.raw,
- sizeof(ah_attr.grh.dgid));
- }
+ port_priv = ib_get_agent_port(device, port_num);
+ if (!port_priv) {
+ printk(KERN_ERR SPFX "Unable to find port agent\n");
+ return -ENODEV;
}
- agent_send_wr->ah = ib_create_ah(mad_agent->qp->pd, &ah_attr);
- if (IS_ERR(agent_send_wr->ah)) {
- printk(KERN_ERR SPFX "No memory for address handle\n");
- kfree(agent_send_wr);
- goto out;
+ agent = port_priv->agent[qpn];
+ ah = ib_create_ah_from_wc(agent->qp->pd, wc, grh, port_num);
+ if (IS_ERR(ah)) {
+ ret = PTR_ERR(ah);
+ printk(KERN_ERR SPFX "ib_create_ah_from_wc error:%d\n", ret);
+ return ret;
}
- send_wr.wr.ud.ah = agent_send_wr->ah;
- if (mad_priv->mad.mad.mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT) {
- send_wr.wr.ud.pkey_index = wc->pkey_index;
- send_wr.wr.ud.remote_qkey = IB_QP1_QKEY;
- } else { /* for SMPs */
- send_wr.wr.ud.pkey_index = 0;
- send_wr.wr.ud.remote_qkey = 0;
+ send_buf = ib_create_send_mad(agent, wc->src_qp, wc->pkey_index, 0,
+ IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
+ GFP_KERNEL);
+ if (IS_ERR(send_buf)) {
+ ret = PTR_ERR(send_buf);
+ printk(KERN_ERR SPFX "ib_create_send_mad error:%d\n", ret);
+ goto err1;
}
- send_wr.wr.ud.mad_hdr = &mad_priv->mad.mad.mad_hdr;
- send_wr.wr_id = (unsigned long)agent_send_wr;
-
- pci_unmap_addr_set(agent_send_wr, mapping, gather_list.addr);
- /* Send */
- spin_lock_irqsave(&port_priv->send_list_lock, flags);
- if (ib_post_send_mad(mad_agent, &send_wr, &bad_send_wr)) {
- spin_unlock_irqrestore(&port_priv->send_list_lock, flags);
- dma_unmap_single(mad_agent->device->dma_device,
- pci_unmap_addr(agent_send_wr, mapping),
- sizeof(mad_priv->mad),
- DMA_TO_DEVICE);
- ib_destroy_ah(agent_send_wr->ah);
- kfree(agent_send_wr);
- } else {
- list_add_tail(&agent_send_wr->send_list,
- &port_priv->send_posted_list);
- spin_unlock_irqrestore(&port_priv->send_list_lock, flags);
- ret = 0;
+ memcpy(send_buf->mad, mad, sizeof *mad);
+ send_buf->ah = ah;
+ if ((ret = ib_post_send_mad(send_buf, NULL))) {
+ printk(KERN_ERR SPFX "ib_post_send_mad error:%d\n", ret);
+ goto err2;
}
-
-out:
+ return 0;
+err2:
+ ib_free_send_mad(send_buf);
+err1:
+ ib_destroy_ah(ah);
return ret;
}
-int agent_send(struct ib_mad_private *mad,
- struct ib_grh *grh,
- struct ib_wc *wc,
- struct ib_device *device,
- int port_num)
-{
- struct ib_agent_port_private *port_priv;
- struct ib_mad_agent *mad_agent;
-
- port_priv = ib_get_agent_port(device, port_num, NULL);
- if (!port_priv) {
- printk(KERN_DEBUG SPFX "agent_send %s port %d not open\n",
- device->name, port_num);
- return 1;
- }
-
- /* Get mad agent based on mgmt_class in MAD */
- switch (mad->mad.mad.mad_hdr.mgmt_class) {
- case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
- case IB_MGMT_CLASS_SUBN_LID_ROUTED:
- mad_agent = port_priv->smp_agent;
- break;
- case IB_MGMT_CLASS_PERF_MGMT:
- mad_agent = port_priv->perf_mgmt_agent;
- break;
- default:
- return 1;
- }
-
- return agent_mad_send(mad_agent, port_priv, mad, grh, wc);
-}
-
static void agent_send_handler(struct ib_mad_agent *mad_agent,
struct ib_mad_send_wc *mad_send_wc)
{
- struct ib_agent_port_private *port_priv;
- struct ib_agent_send_wr *agent_send_wr;
- unsigned long flags;
-
- /* Find matching MAD agent */
- port_priv = ib_get_agent_port(NULL, 0, mad_agent);
- if (!port_priv) {
- printk(KERN_ERR SPFX "agent_send_handler: no matching MAD "
- "agent %p\n", mad_agent);
- return;
- }
-
- agent_send_wr = (struct ib_agent_send_wr *)(unsigned long)mad_send_wc->wr_id;
- spin_lock_irqsave(&port_priv->send_list_lock, flags);
- /* Remove completed send from posted send MAD list */
- list_del(&agent_send_wr->send_list);
- spin_unlock_irqrestore(&port_priv->send_list_lock, flags);
-
- dma_unmap_single(mad_agent->device->dma_device,
- pci_unmap_addr(agent_send_wr, mapping),
- sizeof(agent_send_wr->mad->mad),
- DMA_TO_DEVICE);
-
- ib_destroy_ah(agent_send_wr->ah);
-
- /* Release allocated memory */
- kmem_cache_free(ib_mad_cache, agent_send_wr->mad);
- kfree(agent_send_wr);
+ ib_destroy_ah(mad_send_wc->send_buf->ah);
+ ib_free_send_mad(mad_send_wc->send_buf);
}
int ib_agent_port_open(struct ib_device *device, int port_num)
{
- int ret;
struct ib_agent_port_private *port_priv;
unsigned long flags;
-
- /* First, check if port already open for SMI */
- port_priv = ib_get_agent_port(device, port_num, NULL);
- if (port_priv) {
- printk(KERN_DEBUG SPFX "%s port %d already open\n",
- device->name, port_num);
- return 0;
- }
+ int ret;
/* Create new device info */
- port_priv = kmalloc(sizeof *port_priv, GFP_KERNEL);
+ port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL);
if (!port_priv) {
printk(KERN_ERR SPFX "No memory for ib_agent_port_private\n");
ret = -ENOMEM;
goto error1;
}
- memset(port_priv, 0, sizeof *port_priv);
- port_priv->port_num = port_num;
- spin_lock_init(&port_priv->send_list_lock);
- INIT_LIST_HEAD(&port_priv->send_posted_list);
-
- /* Obtain send only MAD agent for SM class (SMI QP) */
- port_priv->smp_agent = ib_register_mad_agent(device, port_num,
- IB_QPT_SMI,
- NULL, 0,
+ /* Obtain send only MAD agent for SMI QP */
+ port_priv->agent[0] = ib_register_mad_agent(device, port_num,
+ IB_QPT_SMI, NULL, 0,
&agent_send_handler,
- NULL, NULL);
-
- if (IS_ERR(port_priv->smp_agent)) {
- ret = PTR_ERR(port_priv->smp_agent);
+ NULL, NULL);
+ if (IS_ERR(port_priv->agent[0])) {
+ ret = PTR_ERR(port_priv->agent[0]);
goto error2;
}
- /* Obtain send only MAD agent for PerfMgmt class (GSI QP) */
- port_priv->perf_mgmt_agent = ib_register_mad_agent(device, port_num,
- IB_QPT_GSI,
- NULL, 0,
- &agent_send_handler,
- NULL, NULL);
- if (IS_ERR(port_priv->perf_mgmt_agent)) {
- ret = PTR_ERR(port_priv->perf_mgmt_agent);
+ /* Obtain send only MAD agent for GSI QP */
+ port_priv->agent[1] = ib_register_mad_agent(device, port_num,
+ IB_QPT_GSI, NULL, 0,
+ &agent_send_handler,
+ NULL, NULL);
+ if (IS_ERR(port_priv->agent[1])) {
+ ret = PTR_ERR(port_priv->agent[1]);
goto error3;
}
@@ -330,7 +192,7 @@ int ib_agent_port_open(struct ib_device *device, int port_num)
return 0;
error3:
- ib_unregister_mad_agent(port_priv->smp_agent);
+ ib_unregister_mad_agent(port_priv->agent[0]);
error2:
kfree(port_priv);
error1:
@@ -343,7 +205,7 @@ int ib_agent_port_close(struct ib_device *device, int port_num)
unsigned long flags;
spin_lock_irqsave(&ib_agent_port_list_lock, flags);
- port_priv = __ib_get_agent_port(device, port_num, NULL);
+ port_priv = __ib_get_agent_port(device, port_num);
if (port_priv == NULL) {
spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
printk(KERN_ERR SPFX "Port %d not found\n", port_num);
@@ -352,9 +214,8 @@ int ib_agent_port_close(struct ib_device *device, int port_num)
list_del(&port_priv->port_list);
spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
- ib_unregister_mad_agent(port_priv->perf_mgmt_agent);
- ib_unregister_mad_agent(port_priv->smp_agent);
+ ib_unregister_mad_agent(port_priv->agent[1]);
+ ib_unregister_mad_agent(port_priv->agent[0]);
kfree(port_priv);
-
return 0;
}
diff --git a/drivers/infiniband/core/agent.h b/drivers/infiniband/core/agent.h
index d9426842254..86d72fab37b 100644
--- a/drivers/infiniband/core/agent.h
+++ b/drivers/infiniband/core/agent.h
@@ -39,17 +39,15 @@
#ifndef __AGENT_H_
#define __AGENT_H_
-extern spinlock_t ib_agent_port_list_lock;
+#include <linux/err.h>
+#include <rdma/ib_mad.h>
-extern int ib_agent_port_open(struct ib_device *device,
- int port_num);
+extern int ib_agent_port_open(struct ib_device *device, int port_num);
extern int ib_agent_port_close(struct ib_device *device, int port_num);
-extern int agent_send(struct ib_mad_private *mad,
- struct ib_grh *grh,
- struct ib_wc *wc,
- struct ib_device *device,
- int port_num);
+extern int agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
+ struct ib_wc *wc, struct ib_device *device,
+ int port_num, int qpn);
#endif /* __AGENT_H_ */
diff --git a/drivers/infiniband/core/agent_priv.h b/drivers/infiniband/core/agent_priv.h
deleted file mode 100644
index 2ec6d7f1b7d..00000000000
--- a/drivers/infiniband/core/agent_priv.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2004, 2005 Mellanox Technologies Ltd. All rights reserved.
- * Copyright (c) 2004, 2005 Infinicon Corporation. All rights reserved.
- * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved.
- * Copyright (c) 2004, 2005 Topspin Corporation. All rights reserved.
- * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * $Id: agent_priv.h 1640 2005-01-24 22:39:02Z halr $
- */
-
-#ifndef __IB_AGENT_PRIV_H__
-#define __IB_AGENT_PRIV_H__
-
-#include <linux/pci.h>
-
-#define SPFX "ib_agent: "
-
-struct ib_agent_send_wr {
- struct list_head send_list;
- struct ib_ah *ah;
- struct ib_mad_private *mad;
- DECLARE_PCI_UNMAP_ADDR(mapping)
-};
-
-struct ib_agent_port_private {
- struct list_head port_list;
- struct list_head send_posted_list;
- spinlock_t send_list_lock;
- int port_num;
- struct ib_mad_agent *smp_agent; /* SM class */
- struct ib_mad_agent *perf_mgmt_agent; /* PerfMgmt class */
-};
-
-#endif /* __IB_AGENT_PRIV_H__ */
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index f014e639088..c57a3871184 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -38,6 +38,7 @@
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/slab.h>
+#include <linux/sched.h> /* INIT_WORK, schedule_work(), flush_scheduled_work() */
#include <rdma/ib_cache.h>
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 54db6d4831f..02110e00d14 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -135,6 +135,7 @@ struct cm_id_private {
__be64 tid;
__be32 local_qpn;
__be32 remote_qpn;
+ enum ib_qp_type qp_type;
__be32 sq_psn;
__be32 rq_psn;
int timeout_ms;
@@ -175,8 +176,7 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
cm_id_priv->av.pkey_index,
- ah, 0, sizeof(struct ib_mad_hdr),
- sizeof(struct ib_mad)-sizeof(struct ib_mad_hdr),
+ 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
GFP_ATOMIC);
if (IS_ERR(m)) {
ib_destroy_ah(ah);
@@ -184,7 +184,8 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
}
/* Timeout set by caller if response is expected. */
- m->send_wr.wr.ud.retries = cm_id_priv->max_cm_retries;
+ m->ah = ah;
+ m->retries = cm_id_priv->max_cm_retries;
atomic_inc(&cm_id_priv->refcount);
m->context[0] = cm_id_priv;
@@ -205,20 +206,20 @@ static int cm_alloc_response_msg(struct cm_port *port,
return PTR_ERR(ah);
m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index,
- ah, 0, sizeof(struct ib_mad_hdr),
- sizeof(struct ib_mad)-sizeof(struct ib_mad_hdr),
+ 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
GFP_ATOMIC);
if (IS_ERR(m)) {
ib_destroy_ah(ah);
return PTR_ERR(m);
}
+ m->ah = ah;
*msg = m;
return 0;
}
static void cm_free_msg(struct ib_mad_send_buf *msg)
{
- ib_destroy_ah(msg->send_wr.wr.ud.ah);
+ ib_destroy_ah(msg->ah);
if (msg->context[0])
cm_deref_id(msg->context[0]);
ib_free_send_mad(msg);
@@ -366,9 +367,15 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
service_node);
if ((cur_cm_id_priv->id.service_mask & service_id) ==
- (service_mask & cur_cm_id_priv->id.service_id))
- return cm_id_priv;
- if (service_id < cur_cm_id_priv->id.service_id)
+ (service_mask & cur_cm_id_priv->id.service_id) &&
+ (cm_id_priv->id.device == cur_cm_id_priv->id.device))
+ return cur_cm_id_priv;
+
+ if (cm_id_priv->id.device < cur_cm_id_priv->id.device)
+ link = &(*link)->rb_left;
+ else if (cm_id_priv->id.device > cur_cm_id_priv->id.device)
+ link = &(*link)->rb_right;
+ else if (service_id < cur_cm_id_priv->id.service_id)
link = &(*link)->rb_left;
else
link = &(*link)->rb_right;
@@ -378,7 +385,8 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
return NULL;
}
-static struct cm_id_private * cm_find_listen(__be64 service_id)
+static struct cm_id_private * cm_find_listen(struct ib_device *device,
+ __be64 service_id)
{
struct rb_node *node = cm.listen_service_table.rb_node;
struct cm_id_private *cm_id_priv;
@@ -386,9 +394,15 @@ static struct cm_id_private * cm_find_listen(__be64 service_id)
while (node) {
cm_id_priv = rb_entry(node, struct cm_id_private, service_node);
if ((cm_id_priv->id.service_mask & service_id) ==
- (cm_id_priv->id.service_mask & cm_id_priv->id.service_id))
+ cm_id_priv->id.service_id &&
+ (cm_id_priv->id.device == device))
return cm_id_priv;
- if (service_id < cm_id_priv->id.service_id)
+
+ if (device < cm_id_priv->id.device)
+ node = node->rb_left;
+ else if (device > cm_id_priv->id.device)
+ node = node->rb_right;
+ else if (service_id < cm_id_priv->id.service_id)
node = node->rb_left;
else
node = node->rb_right;
@@ -523,18 +537,19 @@ static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv,
ib_send_cm_sidr_rep(&cm_id_priv->id, &param);
}
-struct ib_cm_id *ib_create_cm_id(ib_cm_handler cm_handler,
+struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
+ ib_cm_handler cm_handler,
void *context)
{
struct cm_id_private *cm_id_priv;
int ret;
- cm_id_priv = kmalloc(sizeof *cm_id_priv, GFP_KERNEL);
+ cm_id_priv = kzalloc(sizeof *cm_id_priv, GFP_KERNEL);
if (!cm_id_priv)
return ERR_PTR(-ENOMEM);
- memset(cm_id_priv, 0, sizeof *cm_id_priv);
cm_id_priv->id.state = IB_CM_IDLE;
+ cm_id_priv->id.device = device;
cm_id_priv->id.cm_handler = cm_handler;
cm_id_priv->id.context = context;
cm_id_priv->id.remote_cm_qpn = 1;
@@ -605,10 +620,9 @@ static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id)
{
struct cm_timewait_info *timewait_info;
- timewait_info = kmalloc(sizeof *timewait_info, GFP_KERNEL);
+ timewait_info = kzalloc(sizeof *timewait_info, GFP_KERNEL);
if (!timewait_info)
return ERR_PTR(-ENOMEM);
- memset(timewait_info, 0, sizeof *timewait_info);
timewait_info->work.local_id = local_id;
INIT_WORK(&timewait_info->work.work, cm_work_handler,
@@ -662,8 +676,7 @@ retest:
break;
case IB_CM_SIDR_REQ_SENT:
cm_id->state = IB_CM_IDLE;
- ib_cancel_mad(cm_id_priv->av.port->mad_agent,
- (unsigned long) cm_id_priv->msg);
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
break;
case IB_CM_SIDR_REQ_RCVD:
@@ -674,8 +687,7 @@ retest:
case IB_CM_MRA_REQ_RCVD:
case IB_CM_REP_SENT:
case IB_CM_MRA_REP_RCVD:
- ib_cancel_mad(cm_id_priv->av.port->mad_agent,
- (unsigned long) cm_id_priv->msg);
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
/* Fall through */
case IB_CM_REQ_RCVD:
case IB_CM_MRA_REQ_SENT:
@@ -692,8 +704,7 @@ retest:
ib_send_cm_dreq(cm_id, NULL, 0);
goto retest;
case IB_CM_DREQ_SENT:
- ib_cancel_mad(cm_id_priv->av.port->mad_agent,
- (unsigned long) cm_id_priv->msg);
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
cm_enter_timewait(cm_id_priv);
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
break;
@@ -867,7 +878,6 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
struct ib_cm_req_param *param)
{
struct cm_id_private *cm_id_priv;
- struct ib_send_wr *bad_send_wr;
struct cm_req_msg *req_msg;
unsigned long flags;
int ret;
@@ -911,6 +921,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
cm_id_priv->responder_resources = param->responder_resources;
cm_id_priv->retry_count = param->retry_count;
cm_id_priv->path_mtu = param->primary_path->mtu;
+ cm_id_priv->qp_type = param->qp_type;
ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg);
if (ret)
@@ -919,7 +930,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad;
cm_format_req(req_msg, cm_id_priv, param);
cm_id_priv->tid = req_msg->hdr.tid;
- cm_id_priv->msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms;
+ cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms;
cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT;
cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg);
@@ -928,8 +939,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
cm_req_get_primary_local_ack_timeout(req_msg);
spin_lock_irqsave(&cm_id_priv->lock, flags);
- ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
- &cm_id_priv->msg->send_wr, &bad_send_wr);
+ ret = ib_post_send_mad(cm_id_priv->msg, NULL);
if (ret) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
goto error2;
@@ -952,7 +962,6 @@ static int cm_issue_rej(struct cm_port *port,
void *ari, u8 ari_length)
{
struct ib_mad_send_buf *msg = NULL;
- struct ib_send_wr *bad_send_wr;
struct cm_rej_msg *rej_msg, *rcv_msg;
int ret;
@@ -975,7 +984,7 @@ static int cm_issue_rej(struct cm_port *port,
memcpy(rej_msg->ari, ari, ari_length);
}
- ret = ib_post_send_mad(port->mad_agent, &msg->send_wr, &bad_send_wr);
+ ret = ib_post_send_mad(msg, NULL);
if (ret)
cm_free_msg(msg);
@@ -1047,7 +1056,6 @@ static void cm_format_req_event(struct cm_work *work,
req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
param = &work->cm_event.param.req_rcvd;
param->listen_id = listen_id;
- param->device = cm_id_priv->av.port->mad_agent->device;
param->port = cm_id_priv->av.port->port_num;
param->primary_path = &work->path[0];
if (req_msg->alt_local_lid)
@@ -1156,7 +1164,6 @@ static void cm_dup_req_handler(struct cm_work *work,
struct cm_id_private *cm_id_priv)
{
struct ib_mad_send_buf *msg = NULL;
- struct ib_send_wr *bad_send_wr;
unsigned long flags;
int ret;
@@ -1185,8 +1192,7 @@ static void cm_dup_req_handler(struct cm_work *work,
}
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, &msg->send_wr,
- &bad_send_wr);
+ ret = ib_post_send_mad(msg, NULL);
if (ret)
goto free;
return;
@@ -1226,7 +1232,8 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
}
/* Find matching listen request. */
- listen_cm_id_priv = cm_find_listen(req_msg->service_id);
+ listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device,
+ req_msg->service_id);
if (!listen_cm_id_priv) {
spin_unlock_irqrestore(&cm.lock, flags);
cm_issue_rej(work->port, work->mad_recv_wc,
@@ -1254,7 +1261,7 @@ static int cm_req_handler(struct cm_work *work)
req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
- cm_id = ib_create_cm_id(NULL, NULL);
+ cm_id = ib_create_cm_id(work->port->cm_dev->device, NULL, NULL);
if (IS_ERR(cm_id))
return PTR_ERR(cm_id);
@@ -1305,6 +1312,7 @@ static int cm_req_handler(struct cm_work *work)
cm_req_get_primary_local_ack_timeout(req_msg);
cm_id_priv->retry_count = cm_req_get_retry_count(req_msg);
cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
+ cm_id_priv->qp_type = cm_req_get_qp_type(req_msg);
cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id);
cm_process_work(cm_id_priv, work);
@@ -1349,7 +1357,6 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
struct cm_id_private *cm_id_priv;
struct ib_mad_send_buf *msg;
struct cm_rep_msg *rep_msg;
- struct ib_send_wr *bad_send_wr;
unsigned long flags;
int ret;
@@ -1371,11 +1378,10 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
rep_msg = (struct cm_rep_msg *) msg->mad;
cm_format_rep(rep_msg, cm_id_priv, param);
- msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms;
+ msg->timeout_ms = cm_id_priv->timeout_ms;
msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT;
- ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
- &msg->send_wr, &bad_send_wr);
+ ret = ib_post_send_mad(msg, NULL);
if (ret) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
cm_free_msg(msg);
@@ -1413,7 +1419,6 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id,
{
struct cm_id_private *cm_id_priv;
struct ib_mad_send_buf *msg;
- struct ib_send_wr *bad_send_wr;
unsigned long flags;
void *data;
int ret;
@@ -1440,8 +1445,7 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id,
cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
private_data, private_data_len);
- ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
- &msg->send_wr, &bad_send_wr);
+ ret = ib_post_send_mad(msg, NULL);
if (ret) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
cm_free_msg(msg);
@@ -1486,7 +1490,6 @@ static void cm_dup_rep_handler(struct cm_work *work)
struct cm_id_private *cm_id_priv;
struct cm_rep_msg *rep_msg;
struct ib_mad_send_buf *msg = NULL;
- struct ib_send_wr *bad_send_wr;
unsigned long flags;
int ret;
@@ -1514,8 +1517,7 @@ static void cm_dup_rep_handler(struct cm_work *work)
goto unlock;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, &msg->send_wr,
- &bad_send_wr);
+ ret = ib_post_send_mad(msg, NULL);
if (ret)
goto free;
goto deref;
@@ -1583,8 +1585,7 @@ static int cm_rep_handler(struct cm_work *work)
/* todo: handle peer_to_peer */
- ib_cancel_mad(cm_id_priv->av.port->mad_agent,
- (unsigned long) cm_id_priv->msg);
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
ret = atomic_inc_and_test(&cm_id_priv->work_count);
if (!ret)
list_add_tail(&work->list, &cm_id_priv->work_list);
@@ -1618,8 +1619,7 @@ static int cm_establish_handler(struct cm_work *work)
goto out;
}
- ib_cancel_mad(cm_id_priv->av.port->mad_agent,
- (unsigned long) cm_id_priv->msg);
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
ret = atomic_inc_and_test(&cm_id_priv->work_count);
if (!ret)
list_add_tail(&work->list, &cm_id_priv->work_list);
@@ -1658,8 +1658,7 @@ static int cm_rtu_handler(struct cm_work *work)
}
cm_id_priv->id.state = IB_CM_ESTABLISHED;
- ib_cancel_mad(cm_id_priv->av.port->mad_agent,
- (unsigned long) cm_id_priv->msg);
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
ret = atomic_inc_and_test(&cm_id_priv->work_count);
if (!ret)
list_add_tail(&work->list, &cm_id_priv->work_list);
@@ -1696,7 +1695,6 @@ int ib_send_cm_dreq(struct ib_cm_id *cm_id,
{
struct cm_id_private *cm_id_priv;
struct ib_mad_send_buf *msg;
- struct ib_send_wr *bad_send_wr;
unsigned long flags;
int ret;
@@ -1718,11 +1716,10 @@ int ib_send_cm_dreq(struct ib_cm_id *cm_id,
cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv,
private_data, private_data_len);
- msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms;
+ msg->timeout_ms = cm_id_priv->timeout_ms;
msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT;
- ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
- &msg->send_wr, &bad_send_wr);
+ ret = ib_post_send_mad(msg, NULL);
if (ret) {
cm_enter_timewait(cm_id_priv);
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -1756,7 +1753,6 @@ int ib_send_cm_drep(struct ib_cm_id *cm_id,
{
struct cm_id_private *cm_id_priv;
struct ib_mad_send_buf *msg;
- struct ib_send_wr *bad_send_wr;
unsigned long flags;
void *data;
int ret;
@@ -1786,8 +1782,7 @@ int ib_send_cm_drep(struct ib_cm_id *cm_id,
cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
private_data, private_data_len);
- ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, &msg->send_wr,
- &bad_send_wr);
+ ret = ib_post_send_mad(msg, NULL);
if (ret) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
cm_free_msg(msg);
@@ -1804,7 +1799,6 @@ static int cm_dreq_handler(struct cm_work *work)
struct cm_id_private *cm_id_priv;
struct cm_dreq_msg *dreq_msg;
struct ib_mad_send_buf *msg = NULL;
- struct ib_send_wr *bad_send_wr;
unsigned long flags;
int ret;
@@ -1823,8 +1817,7 @@ static int cm_dreq_handler(struct cm_work *work)
switch (cm_id_priv->id.state) {
case IB_CM_REP_SENT:
case IB_CM_DREQ_SENT:
- ib_cancel_mad(cm_id_priv->av.port->mad_agent,
- (unsigned long) cm_id_priv->msg);
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
break;
case IB_CM_ESTABLISHED:
case IB_CM_MRA_REP_RCVD:
@@ -1838,8 +1831,7 @@ static int cm_dreq_handler(struct cm_work *work)
cm_id_priv->private_data_len);
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- if (ib_post_send_mad(cm_id_priv->av.port->mad_agent,
- &msg->send_wr, &bad_send_wr))
+ if (ib_post_send_mad(msg, NULL))
cm_free_msg(msg);
goto deref;
default:
@@ -1886,8 +1878,7 @@ static int cm_drep_handler(struct cm_work *work)
}
cm_enter_timewait(cm_id_priv);
- ib_cancel_mad(cm_id_priv->av.port->mad_agent,
- (unsigned long) cm_id_priv->msg);
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
ret = atomic_inc_and_test(&cm_id_priv->work_count);
if (!ret)
list_add_tail(&work->list, &cm_id_priv->work_list);
@@ -1912,7 +1903,6 @@ int ib_send_cm_rej(struct ib_cm_id *cm_id,
{
struct cm_id_private *cm_id_priv;
struct ib_mad_send_buf *msg;
- struct ib_send_wr *bad_send_wr;
unsigned long flags;
int ret;
@@ -1956,8 +1946,7 @@ int ib_send_cm_rej(struct ib_cm_id *cm_id,
if (ret)
goto out;
- ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
- &msg->send_wr, &bad_send_wr);
+ ret = ib_post_send_mad(msg, NULL);
if (ret)
cm_free_msg(msg);
@@ -2033,8 +2022,7 @@ static int cm_rej_handler(struct cm_work *work)
case IB_CM_MRA_REQ_RCVD:
case IB_CM_REP_SENT:
case IB_CM_MRA_REP_RCVD:
- ib_cancel_mad(cm_id_priv->av.port->mad_agent,
- (unsigned long) cm_id_priv->msg);
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
/* fall through */
case IB_CM_REQ_RCVD:
case IB_CM_MRA_REQ_SENT:
@@ -2044,8 +2032,7 @@ static int cm_rej_handler(struct cm_work *work)
cm_reset_to_idle(cm_id_priv);
break;
case IB_CM_DREQ_SENT:
- ib_cancel_mad(cm_id_priv->av.port->mad_agent,
- (unsigned long) cm_id_priv->msg);
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
/* fall through */
case IB_CM_REP_RCVD:
case IB_CM_MRA_REP_SENT:
@@ -2080,7 +2067,6 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
{
struct cm_id_private *cm_id_priv;
struct ib_mad_send_buf *msg;
- struct ib_send_wr *bad_send_wr;
void *data;
unsigned long flags;
int ret;
@@ -2104,8 +2090,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
CM_MSG_RESPONSE_REQ, service_timeout,
private_data, private_data_len);
- ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
- &msg->send_wr, &bad_send_wr);
+ ret = ib_post_send_mad(msg, NULL);
if (ret)
goto error2;
cm_id->state = IB_CM_MRA_REQ_SENT;
@@ -2118,8 +2103,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
CM_MSG_RESPONSE_REP, service_timeout,
private_data, private_data_len);
- ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
- &msg->send_wr, &bad_send_wr);
+ ret = ib_post_send_mad(msg, NULL);
if (ret)
goto error2;
cm_id->state = IB_CM_MRA_REP_SENT;
@@ -2132,8 +2116,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
CM_MSG_RESPONSE_OTHER, service_timeout,
private_data, private_data_len);
- ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
- &msg->send_wr, &bad_send_wr);
+ ret = ib_post_send_mad(msg, NULL);
if (ret)
goto error2;
cm_id->lap_state = IB_CM_MRA_LAP_SENT;
@@ -2195,14 +2178,14 @@ static int cm_mra_handler(struct cm_work *work)
case IB_CM_REQ_SENT:
if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ ||
ib_modify_mad(cm_id_priv->av.port->mad_agent,
- (unsigned long) cm_id_priv->msg, timeout))
+ cm_id_priv->msg, timeout))
goto out;
cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD;
break;
case IB_CM_REP_SENT:
if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REP ||
ib_modify_mad(cm_id_priv->av.port->mad_agent,
- (unsigned long) cm_id_priv->msg, timeout))
+ cm_id_priv->msg, timeout))
goto out;
cm_id_priv->id.state = IB_CM_MRA_REP_RCVD;
break;
@@ -2210,7 +2193,7 @@ static int cm_mra_handler(struct cm_work *work)
if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER ||
cm_id_priv->id.lap_state != IB_CM_LAP_SENT ||
ib_modify_mad(cm_id_priv->av.port->mad_agent,
- (unsigned long) cm_id_priv->msg, timeout))
+ cm_id_priv->msg, timeout))
goto out;
cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD;
break;
@@ -2273,7 +2256,6 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id,
{
struct cm_id_private *cm_id_priv;
struct ib_mad_send_buf *msg;
- struct ib_send_wr *bad_send_wr;
unsigned long flags;
int ret;
@@ -2294,11 +2276,10 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id,
cm_format_lap((struct cm_lap_msg *) msg->mad, cm_id_priv,
alternate_path, private_data, private_data_len);
- msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms;
+ msg->timeout_ms = cm_id_priv->timeout_ms;
msg->context[1] = (void *) (unsigned long) IB_CM_ESTABLISHED;
- ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
- &msg->send_wr, &bad_send_wr);
+ ret = ib_post_send_mad(msg, NULL);
if (ret) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
cm_free_msg(msg);
@@ -2342,7 +2323,6 @@ static int cm_lap_handler(struct cm_work *work)
struct cm_lap_msg *lap_msg;
struct ib_cm_lap_event_param *param;
struct ib_mad_send_buf *msg = NULL;
- struct ib_send_wr *bad_send_wr;
unsigned long flags;
int ret;
@@ -2376,8 +2356,7 @@ static int cm_lap_handler(struct cm_work *work)
cm_id_priv->private_data_len);
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- if (ib_post_send_mad(cm_id_priv->av.port->mad_agent,
- &msg->send_wr, &bad_send_wr))
+ if (ib_post_send_mad(msg, NULL))
cm_free_msg(msg);
goto deref;
default:
@@ -2433,7 +2412,6 @@ int ib_send_cm_apr(struct ib_cm_id *cm_id,
{
struct cm_id_private *cm_id_priv;
struct ib_mad_send_buf *msg;
- struct ib_send_wr *bad_send_wr;
unsigned long flags;
int ret;
@@ -2456,8 +2434,7 @@ int ib_send_cm_apr(struct ib_cm_id *cm_id,
cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status,
info, info_length, private_data, private_data_len);
- ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
- &msg->send_wr, &bad_send_wr);
+ ret = ib_post_send_mad(msg, NULL);
if (ret) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
cm_free_msg(msg);
@@ -2496,8 +2473,7 @@ static int cm_apr_handler(struct cm_work *work)
goto out;
}
cm_id_priv->id.lap_state = IB_CM_LAP_IDLE;
- ib_cancel_mad(cm_id_priv->av.port->mad_agent,
- (unsigned long) cm_id_priv->msg);
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
cm_id_priv->msg = NULL;
ret = atomic_inc_and_test(&cm_id_priv->work_count);
@@ -2572,7 +2548,6 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
{
struct cm_id_private *cm_id_priv;
struct ib_mad_send_buf *msg;
- struct ib_send_wr *bad_send_wr;
unsigned long flags;
int ret;
@@ -2595,13 +2570,12 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv,
param);
- msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms;
+ msg->timeout_ms = cm_id_priv->timeout_ms;
msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT;
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state == IB_CM_IDLE)
- ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
- &msg->send_wr, &bad_send_wr);
+ ret = ib_post_send_mad(msg, NULL);
else
ret = -EINVAL;
@@ -2629,7 +2603,6 @@ static void cm_format_sidr_req_event(struct cm_work *work,
param = &work->cm_event.param.sidr_req_rcvd;
param->pkey = __be16_to_cpu(sidr_req_msg->pkey);
param->listen_id = listen_id;
- param->device = work->port->mad_agent->device;
param->port = work->port->port_num;
work->cm_event.private_data = &sidr_req_msg->private_data;
}
@@ -2642,7 +2615,7 @@ static int cm_sidr_req_handler(struct cm_work *work)
struct ib_wc *wc;
unsigned long flags;
- cm_id = ib_create_cm_id(NULL, NULL);
+ cm_id = ib_create_cm_id(work->port->cm_dev->device, NULL, NULL);
if (IS_ERR(cm_id))
return PTR_ERR(cm_id);
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
@@ -2666,7 +2639,8 @@ static int cm_sidr_req_handler(struct cm_work *work)
spin_unlock_irqrestore(&cm.lock, flags);
goto out; /* Duplicate message. */
}
- cur_cm_id_priv = cm_find_listen(sidr_req_msg->service_id);
+ cur_cm_id_priv = cm_find_listen(cm_id->device,
+ sidr_req_msg->service_id);
if (!cur_cm_id_priv) {
rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
spin_unlock_irqrestore(&cm.lock, flags);
@@ -2715,7 +2689,6 @@ int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
{
struct cm_id_private *cm_id_priv;
struct ib_mad_send_buf *msg;
- struct ib_send_wr *bad_send_wr;
unsigned long flags;
int ret;
@@ -2737,8 +2710,7 @@ int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv,
param);
- ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
- &msg->send_wr, &bad_send_wr);
+ ret = ib_post_send_mad(msg, NULL);
if (ret) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
cm_free_msg(msg);
@@ -2791,8 +2763,7 @@ static int cm_sidr_rep_handler(struct cm_work *work)
goto out;
}
cm_id_priv->id.state = IB_CM_IDLE;
- ib_cancel_mad(cm_id_priv->av.port->mad_agent,
- (unsigned long) cm_id_priv->msg);
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
cm_format_sidr_rep_event(work);
@@ -2860,9 +2831,7 @@ discard:
static void cm_send_handler(struct ib_mad_agent *mad_agent,
struct ib_mad_send_wc *mad_send_wc)
{
- struct ib_mad_send_buf *msg;
-
- msg = (struct ib_mad_send_buf *)(unsigned long)mad_send_wc->wr_id;
+ struct ib_mad_send_buf *msg = mad_send_wc->send_buf;
switch (mad_send_wc->status) {
case IB_WC_SUCCESS:
@@ -3064,10 +3033,10 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
case IB_CM_ESTABLISHED:
*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS |
IB_QP_PKEY_INDEX | IB_QP_PORT;
- qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE;
+ qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_WRITE;
if (cm_id_priv->responder_resources)
- qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_WRITE |
- IB_ACCESS_REMOTE_READ;
+ qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ;
qp_attr->pkey_index = cm_id_priv->av.pkey_index;
qp_attr->port_num = cm_id_priv->av.port->port_num;
ret = 0;
@@ -3097,14 +3066,18 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
case IB_CM_MRA_REP_RCVD:
case IB_CM_ESTABLISHED:
*qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
- IB_QP_DEST_QPN | IB_QP_RQ_PSN |
- IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER;
+ IB_QP_DEST_QPN | IB_QP_RQ_PSN;
qp_attr->ah_attr = cm_id_priv->av.ah_attr;
qp_attr->path_mtu = cm_id_priv->path_mtu;
qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
- qp_attr->max_dest_rd_atomic = cm_id_priv->responder_resources;
- qp_attr->min_rnr_timer = 0;
+ if (cm_id_priv->qp_type == IB_QPT_RC) {
+ *qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC |
+ IB_QP_MIN_RNR_TIMER;
+ qp_attr->max_dest_rd_atomic =
+ cm_id_priv->responder_resources;
+ qp_attr->min_rnr_timer = 0;
+ }
if (cm_id_priv->alt_av.ah_attr.dlid) {
*qp_attr_mask |= IB_QP_ALT_PATH;
qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
@@ -3133,14 +3106,17 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
case IB_CM_REP_SENT:
case IB_CM_MRA_REP_RCVD:
case IB_CM_ESTABLISHED:
- *qp_attr_mask = IB_QP_STATE | IB_QP_TIMEOUT | IB_QP_RETRY_CNT |
- IB_QP_RNR_RETRY | IB_QP_SQ_PSN |
- IB_QP_MAX_QP_RD_ATOMIC;
- qp_attr->timeout = cm_id_priv->local_ack_timeout;
- qp_attr->retry_cnt = cm_id_priv->retry_count;
- qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
+ *qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN;
qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn);
- qp_attr->max_rd_atomic = cm_id_priv->initiator_depth;
+ if (cm_id_priv->qp_type == IB_QPT_RC) {
+ *qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT |
+ IB_QP_RNR_RETRY |
+ IB_QP_MAX_QP_RD_ATOMIC;
+ qp_attr->timeout = cm_id_priv->local_ack_timeout;
+ qp_attr->retry_cnt = cm_id_priv->retry_count;
+ qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
+ qp_attr->max_rd_atomic = cm_id_priv->initiator_depth;
+ }
if (cm_id_priv->alt_av.ah_attr.dlid) {
*qp_attr_mask |= IB_QP_PATH_MIG_STATE;
qp_attr->path_mig_state = IB_MIG_REARM;
@@ -3323,6 +3299,7 @@ static void __exit ib_cm_cleanup(void)
flush_workqueue(cm.wq);
destroy_workqueue(cm.wq);
ib_unregister_client(&cm_client);
+ idr_destroy(&cm.local_id_table);
}
module_init(ib_cm_init);
diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h
index 813ab70bf6d..4d3aee90c24 100644
--- a/drivers/infiniband/core/cm_msgs.h
+++ b/drivers/infiniband/core/cm_msgs.h
@@ -186,6 +186,7 @@ static inline void cm_req_set_qp_type(struct cm_req_msg *req_msg,
req_msg->offset40 = cpu_to_be32((be32_to_cpu(
req_msg->offset40) &
0xFFFFFFF9) | 0x2);
+ break;
default:
req_msg->offset40 = cpu_to_be32(be32_to_cpu(
req_msg->offset40) &
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index d3cf84e0158..e169e798354 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -161,17 +161,9 @@ static int alloc_name(char *name)
*/
struct ib_device *ib_alloc_device(size_t size)
{
- void *dev;
-
BUG_ON(size < sizeof (struct ib_device));
- dev = kmalloc(size, GFP_KERNEL);
- if (!dev)
- return NULL;
-
- memset(dev, 0, size);
-
- return dev;
+ return kzalloc(size, GFP_KERNEL);
}
EXPORT_SYMBOL(ib_alloc_device);
@@ -514,6 +506,12 @@ int ib_query_port(struct ib_device *device,
u8 port_num,
struct ib_port_attr *port_attr)
{
+ if (device->node_type == IB_NODE_SWITCH) {
+ if (port_num)
+ return -EINVAL;
+ } else if (port_num < 1 || port_num > device->phys_port_cnt)
+ return -EINVAL;
+
return device->query_port(device, port_num, port_attr);
}
EXPORT_SYMBOL(ib_query_port);
@@ -583,6 +581,12 @@ int ib_modify_port(struct ib_device *device,
u8 port_num, int port_modify_mask,
struct ib_port_modify *port_modify)
{
+ if (device->node_type == IB_NODE_SWITCH) {
+ if (port_num)
+ return -EINVAL;
+ } else if (port_num < 1 || port_num > device->phys_port_cnt)
+ return -EINVAL;
+
return device->modify_port(device, port_num, port_modify_mask,
port_modify);
}
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index a4a4d9c1eef..d393b504bf2 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -255,12 +255,11 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
}
/* Allocate structures */
- mad_agent_priv = kmalloc(sizeof *mad_agent_priv, GFP_KERNEL);
+ mad_agent_priv = kzalloc(sizeof *mad_agent_priv, GFP_KERNEL);
if (!mad_agent_priv) {
ret = ERR_PTR(-ENOMEM);
goto error1;
}
- memset(mad_agent_priv, 0, sizeof *mad_agent_priv);
mad_agent_priv->agent.mr = ib_get_dma_mr(port_priv->qp_info[qpn].qp->pd,
IB_ACCESS_LOCAL_WRITE);
@@ -356,9 +355,9 @@ error4:
spin_unlock_irqrestore(&port_priv->reg_lock, flags);
kfree(reg_req);
error3:
- kfree(mad_agent_priv);
-error2:
ib_dereg_mr(mad_agent_priv->agent.mr);
+error2:
+ kfree(mad_agent_priv);
error1:
return ret;
}
@@ -448,14 +447,13 @@ struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device,
goto error1;
}
/* Allocate structures */
- mad_snoop_priv = kmalloc(sizeof *mad_snoop_priv, GFP_KERNEL);
+ mad_snoop_priv = kzalloc(sizeof *mad_snoop_priv, GFP_KERNEL);
if (!mad_snoop_priv) {
ret = ERR_PTR(-ENOMEM);
goto error1;
}
/* Now, fill in the various structures */
- memset(mad_snoop_priv, 0, sizeof *mad_snoop_priv);
mad_snoop_priv->qp_info = &port_priv->qp_info[qpn];
mad_snoop_priv->agent.device = device;
mad_snoop_priv->agent.recv_handler = recv_handler;
@@ -510,8 +508,7 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
wait_event(mad_agent_priv->wait,
!atomic_read(&mad_agent_priv->refcount));
- if (mad_agent_priv->reg_req)
- kfree(mad_agent_priv->reg_req);
+ kfree(mad_agent_priv->reg_req);
ib_dereg_mr(mad_agent_priv->agent.mr);
kfree(mad_agent_priv);
}
@@ -579,7 +576,7 @@ static void dequeue_mad(struct ib_mad_list_head *mad_list)
}
static void snoop_send(struct ib_mad_qp_info *qp_info,
- struct ib_send_wr *send_wr,
+ struct ib_mad_send_buf *send_buf,
struct ib_mad_send_wc *mad_send_wc,
int mad_snoop_flags)
{
@@ -597,7 +594,7 @@ static void snoop_send(struct ib_mad_qp_info *qp_info,
atomic_inc(&mad_snoop_priv->refcount);
spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
mad_snoop_priv->agent.snoop_handler(&mad_snoop_priv->agent,
- send_wr, mad_send_wc);
+ send_buf, mad_send_wc);
if (atomic_dec_and_test(&mad_snoop_priv->refcount))
wake_up(&mad_snoop_priv->wait);
spin_lock_irqsave(&qp_info->snoop_lock, flags);
@@ -654,10 +651,10 @@ static void build_smp_wc(u64 wr_id, u16 slid, u16 pkey_index, u8 port_num,
* Return < 0 if error
*/
static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
- struct ib_smp *smp,
- struct ib_send_wr *send_wr)
+ struct ib_mad_send_wr_private *mad_send_wr)
{
int ret;
+ struct ib_smp *smp = mad_send_wr->send_buf.mad;
unsigned long flags;
struct ib_mad_local_private *local;
struct ib_mad_private *mad_priv;
@@ -666,6 +663,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
struct ib_device *device = mad_agent_priv->agent.device;
u8 port_num = mad_agent_priv->agent.port_num;
struct ib_wc mad_wc;
+ struct ib_send_wr *send_wr = &mad_send_wr->send_wr;
if (!smi_handle_dr_smp_send(smp, device->node_type, port_num)) {
ret = -EINVAL;
@@ -745,13 +743,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
goto out;
}
- local->send_wr = *send_wr;
- local->send_wr.sg_list = local->sg_list;
- memcpy(local->sg_list, send_wr->sg_list,
- sizeof *send_wr->sg_list * send_wr->num_sge);
- local->send_wr.next = NULL;
- local->tid = send_wr->wr.ud.mad_hdr->tid;
- local->wr_id = send_wr->wr_id;
+ local->mad_send_wr = mad_send_wr;
/* Reference MAD agent until send side of local completion handled */
atomic_inc(&mad_agent_priv->refcount);
/* Queue local completion to local list */
@@ -781,17 +773,17 @@ static int get_buf_length(int hdr_len, int data_len)
struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
u32 remote_qpn, u16 pkey_index,
- struct ib_ah *ah, int rmpp_active,
+ int rmpp_active,
int hdr_len, int data_len,
- unsigned int __nocast gfp_mask)
+ gfp_t gfp_mask)
{
struct ib_mad_agent_private *mad_agent_priv;
- struct ib_mad_send_buf *send_buf;
+ struct ib_mad_send_wr_private *mad_send_wr;
int buf_size;
void *buf;
- mad_agent_priv = container_of(mad_agent,
- struct ib_mad_agent_private, agent);
+ mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
+ agent);
buf_size = get_buf_length(hdr_len, data_len);
if ((!mad_agent->rmpp_version &&
@@ -799,45 +791,39 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
(!rmpp_active && buf_size > sizeof(struct ib_mad)))
return ERR_PTR(-EINVAL);
- buf = kmalloc(sizeof *send_buf + buf_size, gfp_mask);
+ buf = kzalloc(sizeof *mad_send_wr + buf_size, gfp_mask);
if (!buf)
return ERR_PTR(-ENOMEM);
- memset(buf, 0, sizeof *send_buf + buf_size);
-
- send_buf = buf + buf_size;
- send_buf->mad = buf;
-
- send_buf->sge.addr = dma_map_single(mad_agent->device->dma_device,
- buf, buf_size, DMA_TO_DEVICE);
- pci_unmap_addr_set(send_buf, mapping, send_buf->sge.addr);
- send_buf->sge.length = buf_size;
- send_buf->sge.lkey = mad_agent->mr->lkey;
-
- send_buf->send_wr.wr_id = (unsigned long) send_buf;
- send_buf->send_wr.sg_list = &send_buf->sge;
- send_buf->send_wr.num_sge = 1;
- send_buf->send_wr.opcode = IB_WR_SEND;
- send_buf->send_wr.send_flags = IB_SEND_SIGNALED;
- send_buf->send_wr.wr.ud.ah = ah;
- send_buf->send_wr.wr.ud.mad_hdr = &send_buf->mad->mad_hdr;
- send_buf->send_wr.wr.ud.remote_qpn = remote_qpn;
- send_buf->send_wr.wr.ud.remote_qkey = IB_QP_SET_QKEY;
- send_buf->send_wr.wr.ud.pkey_index = pkey_index;
+
+ mad_send_wr = buf + buf_size;
+ mad_send_wr->send_buf.mad = buf;
+
+ mad_send_wr->mad_agent_priv = mad_agent_priv;
+ mad_send_wr->sg_list[0].length = buf_size;
+ mad_send_wr->sg_list[0].lkey = mad_agent->mr->lkey;
+
+ mad_send_wr->send_wr.wr_id = (unsigned long) mad_send_wr;
+ mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list;
+ mad_send_wr->send_wr.num_sge = 1;
+ mad_send_wr->send_wr.opcode = IB_WR_SEND;
+ mad_send_wr->send_wr.send_flags = IB_SEND_SIGNALED;
+ mad_send_wr->send_wr.wr.ud.remote_qpn = remote_qpn;
+ mad_send_wr->send_wr.wr.ud.remote_qkey = IB_QP_SET_QKEY;
+ mad_send_wr->send_wr.wr.ud.pkey_index = pkey_index;
if (rmpp_active) {
- struct ib_rmpp_mad *rmpp_mad;
- rmpp_mad = (struct ib_rmpp_mad *)send_buf->mad;
+ struct ib_rmpp_mad *rmpp_mad = mad_send_wr->send_buf.mad;
rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(hdr_len -
- offsetof(struct ib_rmpp_mad, data) + data_len);
+ IB_MGMT_RMPP_HDR + data_len);
rmpp_mad->rmpp_hdr.rmpp_version = mad_agent->rmpp_version;
rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA;
ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr,
IB_MGMT_RMPP_FLAG_ACTIVE);
}
- send_buf->mad_agent = mad_agent;
+ mad_send_wr->send_buf.mad_agent = mad_agent;
atomic_inc(&mad_agent_priv->refcount);
- return send_buf;
+ return &mad_send_wr->send_buf;
}
EXPORT_SYMBOL(ib_create_send_mad);
@@ -847,10 +833,6 @@ void ib_free_send_mad(struct ib_mad_send_buf *send_buf)
mad_agent_priv = container_of(send_buf->mad_agent,
struct ib_mad_agent_private, agent);
-
- dma_unmap_single(send_buf->mad_agent->device->dma_device,
- pci_unmap_addr(send_buf, mapping),
- send_buf->sge.length, DMA_TO_DEVICE);
kfree(send_buf->mad);
if (atomic_dec_and_test(&mad_agent_priv->refcount))
@@ -861,8 +843,10 @@ EXPORT_SYMBOL(ib_free_send_mad);
int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
{
struct ib_mad_qp_info *qp_info;
- struct ib_send_wr *bad_send_wr;
struct list_head *list;
+ struct ib_send_wr *bad_send_wr;
+ struct ib_mad_agent *mad_agent;
+ struct ib_sge *sge;
unsigned long flags;
int ret;
@@ -871,10 +855,17 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
mad_send_wr->send_wr.wr_id = (unsigned long)&mad_send_wr->mad_list;
mad_send_wr->mad_list.mad_queue = &qp_info->send_queue;
+ mad_agent = mad_send_wr->send_buf.mad_agent;
+ sge = mad_send_wr->sg_list;
+ sge->addr = dma_map_single(mad_agent->device->dma_device,
+ mad_send_wr->send_buf.mad, sge->length,
+ DMA_TO_DEVICE);
+ pci_unmap_addr_set(mad_send_wr, mapping, sge->addr);
+
spin_lock_irqsave(&qp_info->send_queue.lock, flags);
if (qp_info->send_queue.count < qp_info->send_queue.max_active) {
- ret = ib_post_send(mad_send_wr->mad_agent_priv->agent.qp,
- &mad_send_wr->send_wr, &bad_send_wr);
+ ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr,
+ &bad_send_wr);
list = &qp_info->send_queue.list;
} else {
ret = 0;
@@ -886,6 +877,11 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
list_add_tail(&mad_send_wr->mad_list.list, list);
}
spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
+ if (ret)
+ dma_unmap_single(mad_agent->device->dma_device,
+ pci_unmap_addr(mad_send_wr, mapping),
+ sge->length, DMA_TO_DEVICE);
+
return ret;
}
@@ -893,45 +889,28 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
* ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated
* with the registered client
*/
-int ib_post_send_mad(struct ib_mad_agent *mad_agent,
- struct ib_send_wr *send_wr,
- struct ib_send_wr **bad_send_wr)
+int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
+ struct ib_mad_send_buf **bad_send_buf)
{
- int ret = -EINVAL;
struct ib_mad_agent_private *mad_agent_priv;
-
- /* Validate supplied parameters */
- if (!bad_send_wr)
- goto error1;
-
- if (!mad_agent || !send_wr)
- goto error2;
-
- if (!mad_agent->send_handler)
- goto error2;
-
- mad_agent_priv = container_of(mad_agent,
- struct ib_mad_agent_private,
- agent);
+ struct ib_mad_send_buf *next_send_buf;
+ struct ib_mad_send_wr_private *mad_send_wr;
+ unsigned long flags;
+ int ret = -EINVAL;
/* Walk list of send WRs and post each on send list */
- while (send_wr) {
- unsigned long flags;
- struct ib_send_wr *next_send_wr;
- struct ib_mad_send_wr_private *mad_send_wr;
- struct ib_smp *smp;
-
- /* Validate more parameters */
- if (send_wr->num_sge > IB_MAD_SEND_REQ_MAX_SG)
- goto error2;
+ for (; send_buf; send_buf = next_send_buf) {
- if (send_wr->wr.ud.timeout_ms && !mad_agent->recv_handler)
- goto error2;
-
- if (!send_wr->wr.ud.mad_hdr) {
- printk(KERN_ERR PFX "MAD header must be supplied "
- "in WR %p\n", send_wr);
- goto error2;
+ mad_send_wr = container_of(send_buf,
+ struct ib_mad_send_wr_private,
+ send_buf);
+ mad_agent_priv = mad_send_wr->mad_agent_priv;
+
+ if (!send_buf->mad_agent->send_handler ||
+ (send_buf->timeout_ms &&
+ !send_buf->mad_agent->recv_handler)) {
+ ret = -EINVAL;
+ goto error;
}
/*
@@ -939,40 +918,24 @@ int ib_post_send_mad(struct ib_mad_agent *mad_agent,
* current one completes, and the user modifies the work
* request associated with the completion
*/
- next_send_wr = (struct ib_send_wr *)send_wr->next;
+ next_send_buf = send_buf->next;
+ mad_send_wr->send_wr.wr.ud.ah = send_buf->ah;
- smp = (struct ib_smp *)send_wr->wr.ud.mad_hdr;
- if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
- ret = handle_outgoing_dr_smp(mad_agent_priv, smp,
- send_wr);
+ if (((struct ib_mad_hdr *) send_buf->mad)->mgmt_class ==
+ IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
+ ret = handle_outgoing_dr_smp(mad_agent_priv,
+ mad_send_wr);
if (ret < 0) /* error */
- goto error2;
+ goto error;
else if (ret == 1) /* locally consumed */
- goto next;
+ continue;
}
- /* Allocate MAD send WR tracking structure */
- mad_send_wr = kmalloc(sizeof *mad_send_wr, GFP_ATOMIC);
- if (!mad_send_wr) {
- printk(KERN_ERR PFX "No memory for "
- "ib_mad_send_wr_private\n");
- ret = -ENOMEM;
- goto error2;
- }
- memset(mad_send_wr, 0, sizeof *mad_send_wr);
-
- mad_send_wr->send_wr = *send_wr;
- mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list;
- memcpy(mad_send_wr->sg_list, send_wr->sg_list,
- sizeof *send_wr->sg_list * send_wr->num_sge);
- mad_send_wr->wr_id = send_wr->wr_id;
- mad_send_wr->tid = send_wr->wr.ud.mad_hdr->tid;
- mad_send_wr->mad_agent_priv = mad_agent_priv;
+ mad_send_wr->tid = ((struct ib_mad_hdr *) send_buf->mad)->tid;
/* Timeout will be updated after send completes */
- mad_send_wr->timeout = msecs_to_jiffies(send_wr->wr.
- ud.timeout_ms);
- mad_send_wr->retries = mad_send_wr->send_wr.wr.ud.retries;
- /* One reference for each work request to QP + response */
+ mad_send_wr->timeout = msecs_to_jiffies(send_buf->timeout_ms);
+ mad_send_wr->retries = send_buf->retries;
+ /* Reference for work request to QP + response */
mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0);
mad_send_wr->status = IB_WC_SUCCESS;
@@ -995,16 +958,13 @@ int ib_post_send_mad(struct ib_mad_agent *mad_agent,
list_del(&mad_send_wr->agent_list);
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
atomic_dec(&mad_agent_priv->refcount);
- goto error2;
+ goto error;
}
-next:
- send_wr = next_send_wr;
}
return 0;
-
-error2:
- *bad_send_wr = send_wr;
-error1:
+error:
+ if (bad_send_buf)
+ *bad_send_buf = send_buf;
return ret;
}
EXPORT_SYMBOL(ib_post_send_mad);
@@ -1075,14 +1035,12 @@ static int method_in_use(struct ib_mad_mgmt_method_table **method,
static int allocate_method_table(struct ib_mad_mgmt_method_table **method)
{
/* Allocate management method table */
- *method = kmalloc(sizeof **method, GFP_ATOMIC);
+ *method = kzalloc(sizeof **method, GFP_ATOMIC);
if (!*method) {
printk(KERN_ERR PFX "No memory for "
"ib_mad_mgmt_method_table\n");
return -ENOMEM;
}
- /* Clear management method table */
- memset(*method, 0, sizeof **method);
return 0;
}
@@ -1173,15 +1131,14 @@ static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
class = &port_priv->version[mad_reg_req->mgmt_class_version].class;
if (!*class) {
/* Allocate management class table for "new" class version */
- *class = kmalloc(sizeof **class, GFP_ATOMIC);
+ *class = kzalloc(sizeof **class, GFP_ATOMIC);
if (!*class) {
printk(KERN_ERR PFX "No memory for "
"ib_mad_mgmt_class_table\n");
ret = -ENOMEM;
goto error1;
}
- /* Clear management class table */
- memset(*class, 0, sizeof(**class));
+
/* Allocate method table for this management class */
method = &(*class)->method_table[mgmt_class];
if ((ret = allocate_method_table(method)))
@@ -1245,25 +1202,24 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
mad_reg_req->mgmt_class_version].vendor;
if (!*vendor_table) {
/* Allocate mgmt vendor class table for "new" class version */
- vendor = kmalloc(sizeof *vendor, GFP_ATOMIC);
+ vendor = kzalloc(sizeof *vendor, GFP_ATOMIC);
if (!vendor) {
printk(KERN_ERR PFX "No memory for "
"ib_mad_mgmt_vendor_class_table\n");
goto error1;
}
- /* Clear management vendor class table */
- memset(vendor, 0, sizeof(*vendor));
+
*vendor_table = vendor;
}
if (!(*vendor_table)->vendor_class[vclass]) {
/* Allocate table for this management vendor class */
- vendor_class = kmalloc(sizeof *vendor_class, GFP_ATOMIC);
+ vendor_class = kzalloc(sizeof *vendor_class, GFP_ATOMIC);
if (!vendor_class) {
printk(KERN_ERR PFX "No memory for "
"ib_mad_mgmt_vendor_class\n");
goto error2;
}
- memset(vendor_class, 0, sizeof(*vendor_class));
+
(*vendor_table)->vendor_class[vclass] = vendor_class;
}
for (i = 0; i < MAX_MGMT_OUI; i++) {
@@ -1447,8 +1403,7 @@ find_mad_agent(struct ib_mad_port_private *port_priv,
* of MAD.
*/
hi_tid = be64_to_cpu(mad->mad_hdr.tid) >> 32;
- list_for_each_entry(entry, &port_priv->agent_list,
- agent_list) {
+ list_for_each_entry(entry, &port_priv->agent_list, agent_list) {
if (entry->agent.hi_tid == hi_tid) {
mad_agent = entry;
break;
@@ -1571,8 +1526,7 @@ ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, __be64 tid)
*/
list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list,
agent_list) {
- if (is_data_mad(mad_agent_priv,
- mad_send_wr->send_wr.wr.ud.mad_hdr) &&
+ if (is_data_mad(mad_agent_priv, mad_send_wr->send_buf.mad) &&
mad_send_wr->tid == tid && mad_send_wr->timeout) {
/* Verify request has not been canceled */
return (mad_send_wr->status == IB_WC_SUCCESS) ?
@@ -1628,14 +1582,14 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
/* Defined behavior is to complete response before request */
- mad_recv_wc->wc->wr_id = mad_send_wr->wr_id;
+ mad_recv_wc->wc->wr_id = (unsigned long) &mad_send_wr->send_buf;
mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
mad_recv_wc);
atomic_dec(&mad_agent_priv->refcount);
mad_send_wc.status = IB_WC_SUCCESS;
mad_send_wc.vendor_err = 0;
- mad_send_wc.wr_id = mad_send_wr->wr_id;
+ mad_send_wc.send_buf = &mad_send_wr->send_buf;
ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
} else {
mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
@@ -1728,11 +1682,11 @@ local:
if (ret & IB_MAD_RESULT_CONSUMED)
goto out;
if (ret & IB_MAD_RESULT_REPLY) {
- /* Send response */
- if (!agent_send(response, &recv->grh, wc,
- port_priv->device,
- port_priv->port_num))
- response = NULL;
+ agent_send_response(&response->mad.mad,
+ &recv->grh, wc,
+ port_priv->device,
+ port_priv->port_num,
+ qp_info->qp->qp_num);
goto out;
}
}
@@ -1866,15 +1820,15 @@ void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
if (mad_send_wr->status != IB_WC_SUCCESS )
mad_send_wc->status = mad_send_wr->status;
- if (ret != IB_RMPP_RESULT_INTERNAL)
+ if (ret == IB_RMPP_RESULT_INTERNAL)
+ ib_rmpp_send_handler(mad_send_wc);
+ else
mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
mad_send_wc);
/* Release reference on agent taken when sending */
if (atomic_dec_and_test(&mad_agent_priv->refcount))
wake_up(&mad_agent_priv->wait);
-
- kfree(mad_send_wr);
return;
done:
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
@@ -1888,6 +1842,7 @@ static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
struct ib_mad_qp_info *qp_info;
struct ib_mad_queue *send_queue;
struct ib_send_wr *bad_send_wr;
+ struct ib_mad_send_wc mad_send_wc;
unsigned long flags;
int ret;
@@ -1898,6 +1853,9 @@ static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
qp_info = send_queue->qp_info;
retry:
+ dma_unmap_single(mad_send_wr->send_buf.mad_agent->device->dma_device,
+ pci_unmap_addr(mad_send_wr, mapping),
+ mad_send_wr->sg_list[0].length, DMA_TO_DEVICE);
queued_send_wr = NULL;
spin_lock_irqsave(&send_queue->lock, flags);
list_del(&mad_list->list);
@@ -1914,17 +1872,17 @@ retry:
}
spin_unlock_irqrestore(&send_queue->lock, flags);
- /* Restore client wr_id in WC and complete send */
- wc->wr_id = mad_send_wr->wr_id;
+ mad_send_wc.send_buf = &mad_send_wr->send_buf;
+ mad_send_wc.status = wc->status;
+ mad_send_wc.vendor_err = wc->vendor_err;
if (atomic_read(&qp_info->snoop_count))
- snoop_send(qp_info, &mad_send_wr->send_wr,
- (struct ib_mad_send_wc *)wc,
+ snoop_send(qp_info, &mad_send_wr->send_buf, &mad_send_wc,
IB_MAD_SNOOP_SEND_COMPLETIONS);
- ib_mad_complete_send_wr(mad_send_wr, (struct ib_mad_send_wc *)wc);
+ ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
if (queued_send_wr) {
ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr,
- &bad_send_wr);
+ &bad_send_wr);
if (ret) {
printk(KERN_ERR PFX "ib_post_send failed: %d\n", ret);
mad_send_wr = queued_send_wr;
@@ -2066,38 +2024,37 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
&cancel_list, agent_list) {
- mad_send_wc.wr_id = mad_send_wr->wr_id;
+ mad_send_wc.send_buf = &mad_send_wr->send_buf;
+ list_del(&mad_send_wr->agent_list);
mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
&mad_send_wc);
-
- list_del(&mad_send_wr->agent_list);
- kfree(mad_send_wr);
atomic_dec(&mad_agent_priv->refcount);
}
}
static struct ib_mad_send_wr_private*
-find_send_by_wr_id(struct ib_mad_agent_private *mad_agent_priv, u64 wr_id)
+find_send_wr(struct ib_mad_agent_private *mad_agent_priv,
+ struct ib_mad_send_buf *send_buf)
{
struct ib_mad_send_wr_private *mad_send_wr;
list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list,
agent_list) {
- if (mad_send_wr->wr_id == wr_id)
+ if (&mad_send_wr->send_buf == send_buf)
return mad_send_wr;
}
list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list,
agent_list) {
- if (is_data_mad(mad_agent_priv,
- mad_send_wr->send_wr.wr.ud.mad_hdr) &&
- mad_send_wr->wr_id == wr_id)
+ if (is_data_mad(mad_agent_priv, mad_send_wr->send_buf.mad) &&
+ &mad_send_wr->send_buf == send_buf)
return mad_send_wr;
}
return NULL;
}
-int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms)
+int ib_modify_mad(struct ib_mad_agent *mad_agent,
+ struct ib_mad_send_buf *send_buf, u32 timeout_ms)
{
struct ib_mad_agent_private *mad_agent_priv;
struct ib_mad_send_wr_private *mad_send_wr;
@@ -2107,7 +2064,7 @@ int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms)
mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
agent);
spin_lock_irqsave(&mad_agent_priv->lock, flags);
- mad_send_wr = find_send_by_wr_id(mad_agent_priv, wr_id);
+ mad_send_wr = find_send_wr(mad_agent_priv, send_buf);
if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) {
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
return -EINVAL;
@@ -2119,7 +2076,7 @@ int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms)
mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
}
- mad_send_wr->send_wr.wr.ud.timeout_ms = timeout_ms;
+ mad_send_wr->send_buf.timeout_ms = timeout_ms;
if (active)
mad_send_wr->timeout = msecs_to_jiffies(timeout_ms);
else
@@ -2130,9 +2087,10 @@ int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms)
}
EXPORT_SYMBOL(ib_modify_mad);
-void ib_cancel_mad(struct ib_mad_agent *mad_agent, u64 wr_id)
+void ib_cancel_mad(struct ib_mad_agent *mad_agent,
+ struct ib_mad_send_buf *send_buf)
{
- ib_modify_mad(mad_agent, wr_id, 0);
+ ib_modify_mad(mad_agent, send_buf, 0);
}
EXPORT_SYMBOL(ib_cancel_mad);
@@ -2166,10 +2124,9 @@ static void local_completions(void *data)
* Defined behavior is to complete response
* before request
*/
- build_smp_wc(local->wr_id,
+ build_smp_wc((unsigned long) local->mad_send_wr,
be16_to_cpu(IB_LID_PERMISSIVE),
- 0 /* pkey index */,
- recv_mad_agent->agent.port_num, &wc);
+ 0, recv_mad_agent->agent.port_num, &wc);
local->mad_priv->header.recv_wc.wc = &wc;
local->mad_priv->header.recv_wc.mad_len =
@@ -2196,11 +2153,11 @@ local_send_completion:
/* Complete send */
mad_send_wc.status = IB_WC_SUCCESS;
mad_send_wc.vendor_err = 0;
- mad_send_wc.wr_id = local->wr_id;
+ mad_send_wc.send_buf = &local->mad_send_wr->send_buf;
if (atomic_read(&mad_agent_priv->qp_info->snoop_count))
- snoop_send(mad_agent_priv->qp_info, &local->send_wr,
- &mad_send_wc,
- IB_MAD_SNOOP_SEND_COMPLETIONS);
+ snoop_send(mad_agent_priv->qp_info,
+ &local->mad_send_wr->send_buf,
+ &mad_send_wc, IB_MAD_SNOOP_SEND_COMPLETIONS);
mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
&mad_send_wc);
@@ -2221,8 +2178,7 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)
if (!mad_send_wr->retries--)
return -ETIMEDOUT;
- mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_wr.
- wr.ud.timeout_ms);
+ mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
if (mad_send_wr->mad_agent_priv->agent.rmpp_version) {
ret = ib_retry_rmpp(mad_send_wr);
@@ -2285,11 +2241,10 @@ static void timeout_sends(void *data)
mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR;
else
mad_send_wc.status = mad_send_wr->status;
- mad_send_wc.wr_id = mad_send_wr->wr_id;
+ mad_send_wc.send_buf = &mad_send_wr->send_buf;
mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
&mad_send_wc);
- kfree(mad_send_wr);
atomic_dec(&mad_agent_priv->refcount);
spin_lock_irqsave(&mad_agent_priv->lock, flags);
}
@@ -2544,8 +2499,7 @@ error:
static void destroy_mad_qp(struct ib_mad_qp_info *qp_info)
{
ib_destroy_qp(qp_info->qp);
- if (qp_info->snoop_table)
- kfree(qp_info->snoop_table);
+ kfree(qp_info->snoop_table);
}
/*
@@ -2561,12 +2515,12 @@ static int ib_mad_port_open(struct ib_device *device,
char name[sizeof "ib_mad123"];
/* Create new device info */
- port_priv = kmalloc(sizeof *port_priv, GFP_KERNEL);
+ port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL);
if (!port_priv) {
printk(KERN_ERR PFX "No memory for ib_mad_port_private\n");
return -ENOMEM;
}
- memset(port_priv, 0, sizeof *port_priv);
+
port_priv->device = device;
port_priv->port_num = port_num;
spin_lock_init(&port_priv->reg_lock);
@@ -2683,40 +2637,47 @@ static int ib_mad_port_close(struct ib_device *device, int port_num)
static void ib_mad_init_device(struct ib_device *device)
{
- int num_ports, cur_port, i;
+ int start, end, i;
if (device->node_type == IB_NODE_SWITCH) {
- num_ports = 1;
- cur_port = 0;
+ start = 0;
+ end = 0;
} else {
- num_ports = device->phys_port_cnt;
- cur_port = 1;
+ start = 1;
+ end = device->phys_port_cnt;
}
- for (i = 0; i < num_ports; i++, cur_port++) {
- if (ib_mad_port_open(device, cur_port)) {
+
+ for (i = start; i <= end; i++) {
+ if (ib_mad_port_open(device, i)) {
printk(KERN_ERR PFX "Couldn't open %s port %d\n",
- device->name, cur_port);
- goto error_device_open;
+ device->name, i);
+ goto error;
}
- if (ib_agent_port_open(device, cur_port)) {
+ if (ib_agent_port_open(device, i)) {
printk(KERN_ERR PFX "Couldn't open %s port %d "
"for agents\n",
- device->name, cur_port);
- goto error_device_open;
+ device->name, i);
+ goto error_agent;
}
}
return;
-error_device_open:
- while (i > 0) {
- cur_port--;
- if (ib_agent_port_close(device, cur_port))
+error_agent:
+ if (ib_mad_port_close(device, i))
+ printk(KERN_ERR PFX "Couldn't close %s port %d\n",
+ device->name, i);
+
+error:
+ i--;
+
+ while (i >= start) {
+ if (ib_agent_port_close(device, i))
printk(KERN_ERR PFX "Couldn't close %s port %d "
"for agents\n",
- device->name, cur_port);
- if (ib_mad_port_close(device, cur_port))
+ device->name, i);
+ if (ib_mad_port_close(device, i))
printk(KERN_ERR PFX "Couldn't close %s port %d\n",
- device->name, cur_port);
+ device->name, i);
i--;
}
}
@@ -2754,7 +2715,6 @@ static int __init ib_mad_init_module(void)
int ret;
spin_lock_init(&ib_mad_port_list_lock);
- spin_lock_init(&ib_agent_port_list_lock);
ib_mad_cache = kmem_cache_create("ib_mad",
sizeof(struct ib_mad_private),
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index f1ba794e0da..570f78682af 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -118,9 +118,10 @@ struct ib_mad_send_wr_private {
struct ib_mad_list_head mad_list;
struct list_head agent_list;
struct ib_mad_agent_private *mad_agent_priv;
+ struct ib_mad_send_buf send_buf;
+ DECLARE_PCI_UNMAP_ADDR(mapping)
struct ib_send_wr send_wr;
struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG];
- u64 wr_id; /* client WR ID */
__be64 tid;
unsigned long timeout;
int retries;
@@ -141,10 +142,7 @@ struct ib_mad_local_private {
struct list_head completion_list;
struct ib_mad_private *mad_priv;
struct ib_mad_agent_private *recv_mad_agent;
- struct ib_send_wr send_wr;
- struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG];
- u64 wr_id; /* client WR ID */
- __be64 tid;
+ struct ib_mad_send_wr_private *mad_send_wr;
};
struct ib_mad_mgmt_method_table {
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index e23836d0e21..3249e1d8c07 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -103,12 +103,12 @@ void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent)
static int data_offset(u8 mgmt_class)
{
if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM)
- return offsetof(struct ib_sa_mad, data);
+ return IB_MGMT_SA_HDR;
else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
(mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END))
- return offsetof(struct ib_vendor_mad, data);
+ return IB_MGMT_VENDOR_HDR;
else
- return offsetof(struct ib_rmpp_mad, data);
+ return IB_MGMT_RMPP_HDR;
}
static void format_ack(struct ib_rmpp_mad *ack,
@@ -135,55 +135,52 @@ static void ack_recv(struct mad_rmpp_recv *rmpp_recv,
struct ib_mad_recv_wc *recv_wc)
{
struct ib_mad_send_buf *msg;
- struct ib_send_wr *bad_send_wr;
- int hdr_len, ret;
+ int ret;
- hdr_len = sizeof(struct ib_mad_hdr) + sizeof(struct ib_rmpp_hdr);
msg = ib_create_send_mad(&rmpp_recv->agent->agent, recv_wc->wc->src_qp,
- recv_wc->wc->pkey_index, rmpp_recv->ah, 1,
- hdr_len, sizeof(struct ib_rmpp_mad) - hdr_len,
- GFP_KERNEL);
+ recv_wc->wc->pkey_index, 1, IB_MGMT_RMPP_HDR,
+ IB_MGMT_RMPP_DATA, GFP_KERNEL);
if (!msg)
return;
- format_ack((struct ib_rmpp_mad *) msg->mad,
- (struct ib_rmpp_mad *) recv_wc->recv_buf.mad, rmpp_recv);
- ret = ib_post_send_mad(&rmpp_recv->agent->agent, &msg->send_wr,
- &bad_send_wr);
+ format_ack(msg->mad, (struct ib_rmpp_mad *) recv_wc->recv_buf.mad,
+ rmpp_recv);
+ msg->ah = rmpp_recv->ah;
+ ret = ib_post_send_mad(msg, NULL);
if (ret)
ib_free_send_mad(msg);
}
-static int alloc_response_msg(struct ib_mad_agent *agent,
- struct ib_mad_recv_wc *recv_wc,
- struct ib_mad_send_buf **msg)
+static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent,
+ struct ib_mad_recv_wc *recv_wc)
{
- struct ib_mad_send_buf *m;
+ struct ib_mad_send_buf *msg;
struct ib_ah *ah;
- int hdr_len;
ah = ib_create_ah_from_wc(agent->qp->pd, recv_wc->wc,
recv_wc->recv_buf.grh, agent->port_num);
if (IS_ERR(ah))
- return PTR_ERR(ah);
-
- hdr_len = sizeof(struct ib_mad_hdr) + sizeof(struct ib_rmpp_hdr);
- m = ib_create_send_mad(agent, recv_wc->wc->src_qp,
- recv_wc->wc->pkey_index, ah, 1, hdr_len,
- sizeof(struct ib_rmpp_mad) - hdr_len,
- GFP_KERNEL);
- if (IS_ERR(m)) {
+ return (void *) ah;
+
+ msg = ib_create_send_mad(agent, recv_wc->wc->src_qp,
+ recv_wc->wc->pkey_index, 1,
+ IB_MGMT_RMPP_HDR, IB_MGMT_RMPP_DATA,
+ GFP_KERNEL);
+ if (IS_ERR(msg))
ib_destroy_ah(ah);
- return PTR_ERR(m);
- }
- *msg = m;
- return 0;
+ else
+ msg->ah = ah;
+
+ return msg;
}
-static void free_msg(struct ib_mad_send_buf *msg)
+void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc)
{
- ib_destroy_ah(msg->send_wr.wr.ud.ah);
- ib_free_send_mad(msg);
+ struct ib_rmpp_mad *rmpp_mad = mad_send_wc->send_buf->mad;
+
+ if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_ACK)
+ ib_destroy_ah(mad_send_wc->send_buf->ah);
+ ib_free_send_mad(mad_send_wc->send_buf);
}
static void nack_recv(struct ib_mad_agent_private *agent,
@@ -191,14 +188,13 @@ static void nack_recv(struct ib_mad_agent_private *agent,
{
struct ib_mad_send_buf *msg;
struct ib_rmpp_mad *rmpp_mad;
- struct ib_send_wr *bad_send_wr;
int ret;
- ret = alloc_response_msg(&agent->agent, recv_wc, &msg);
- if (ret)
+ msg = alloc_response_msg(&agent->agent, recv_wc);
+ if (IS_ERR(msg))
return;
- rmpp_mad = (struct ib_rmpp_mad *) msg->mad;
+ rmpp_mad = msg->mad;
memcpy(rmpp_mad, recv_wc->recv_buf.mad,
data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class));
@@ -210,9 +206,11 @@ static void nack_recv(struct ib_mad_agent_private *agent,
rmpp_mad->rmpp_hdr.seg_num = 0;
rmpp_mad->rmpp_hdr.paylen_newwin = 0;
- ret = ib_post_send_mad(&agent->agent, &msg->send_wr, &bad_send_wr);
- if (ret)
- free_msg(msg);
+ ret = ib_post_send_mad(msg, NULL);
+ if (ret) {
+ ib_destroy_ah(msg->ah);
+ ib_free_send_mad(msg);
+ }
}
static void recv_timeout_handler(void *data)
@@ -585,7 +583,7 @@ static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr)
int timeout;
u32 paylen;
- rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr;
+ rmpp_mad = mad_send_wr->send_buf.mad;
ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
rmpp_mad->rmpp_hdr.seg_num = cpu_to_be32(mad_send_wr->seg_num);
@@ -612,7 +610,7 @@ static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr)
}
/* 2 seconds for an ACK until we can find the packet lifetime */
- timeout = mad_send_wr->send_wr.wr.ud.timeout_ms;
+ timeout = mad_send_wr->send_buf.timeout_ms;
if (!timeout || timeout > 2000)
mad_send_wr->timeout = msecs_to_jiffies(2000);
mad_send_wr->seg_num++;
@@ -640,7 +638,7 @@ static void abort_send(struct ib_mad_agent_private *agent, __be64 tid,
wc.status = IB_WC_REM_ABORT_ERR;
wc.vendor_err = rmpp_status;
- wc.wr_id = mad_send_wr->wr_id;
+ wc.send_buf = &mad_send_wr->send_buf;
ib_mad_complete_send_wr(mad_send_wr, &wc);
return;
out:
@@ -694,12 +692,12 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
if (seg_num > mad_send_wr->last_ack) {
mad_send_wr->last_ack = seg_num;
- mad_send_wr->retries = mad_send_wr->send_wr.wr.ud.retries;
+ mad_send_wr->retries = mad_send_wr->send_buf.retries;
}
mad_send_wr->newwin = newwin;
if (mad_send_wr->last_ack == mad_send_wr->total_seg) {
/* If no response is expected, the ACK completes the send */
- if (!mad_send_wr->send_wr.wr.ud.timeout_ms) {
+ if (!mad_send_wr->send_buf.timeout_ms) {
struct ib_mad_send_wc wc;
ib_mark_mad_done(mad_send_wr);
@@ -707,13 +705,13 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
wc.status = IB_WC_SUCCESS;
wc.vendor_err = 0;
- wc.wr_id = mad_send_wr->wr_id;
+ wc.send_buf = &mad_send_wr->send_buf;
ib_mad_complete_send_wr(mad_send_wr, &wc);
return;
}
if (mad_send_wr->refcount == 1)
- ib_reset_mad_timeout(mad_send_wr, mad_send_wr->
- send_wr.wr.ud.timeout_ms);
+ ib_reset_mad_timeout(mad_send_wr,
+ mad_send_wr->send_buf.timeout_ms);
} else if (mad_send_wr->refcount == 1 &&
mad_send_wr->seg_num < mad_send_wr->newwin &&
mad_send_wr->seg_num <= mad_send_wr->total_seg) {
@@ -842,7 +840,7 @@ int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr)
struct ib_rmpp_mad *rmpp_mad;
int i, total_len, ret;
- rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr;
+ rmpp_mad = mad_send_wr->send_buf.mad;
if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
IB_MGMT_RMPP_FLAG_ACTIVE))
return IB_RMPP_RESULT_UNHANDLED;
@@ -863,7 +861,7 @@ int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr)
mad_send_wr->total_seg = (total_len - mad_send_wr->data_offset) /
(sizeof(struct ib_rmpp_mad) - mad_send_wr->data_offset);
- mad_send_wr->pad = total_len - offsetof(struct ib_rmpp_mad, data) -
+ mad_send_wr->pad = total_len - IB_MGMT_RMPP_HDR -
be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin);
/* We need to wait for the final ACK even if there isn't a response */
@@ -878,23 +876,15 @@ int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr,
struct ib_mad_send_wc *mad_send_wc)
{
struct ib_rmpp_mad *rmpp_mad;
- struct ib_mad_send_buf *msg;
int ret;
- rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr;
+ rmpp_mad = mad_send_wr->send_buf.mad;
if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
IB_MGMT_RMPP_FLAG_ACTIVE))
return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */
- if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA) {
- msg = (struct ib_mad_send_buf *) (unsigned long)
- mad_send_wc->wr_id;
- if (rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_ACK)
- ib_free_send_mad(msg);
- else
- free_msg(msg);
+ if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA)
return IB_RMPP_RESULT_INTERNAL; /* ACK, STOP, or ABORT */
- }
if (mad_send_wc->status != IB_WC_SUCCESS ||
mad_send_wr->status != IB_WC_SUCCESS)
@@ -905,7 +895,7 @@ int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr,
if (mad_send_wr->last_ack == mad_send_wr->total_seg) {
mad_send_wr->timeout =
- msecs_to_jiffies(mad_send_wr->send_wr.wr.ud.timeout_ms);
+ msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
return IB_RMPP_RESULT_PROCESSED; /* Send done */
}
@@ -926,7 +916,7 @@ int ib_retry_rmpp(struct ib_mad_send_wr_private *mad_send_wr)
struct ib_rmpp_mad *rmpp_mad;
int ret;
- rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr;
+ rmpp_mad = mad_send_wr->send_buf.mad;
if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
IB_MGMT_RMPP_FLAG_ACTIVE))
return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */
diff --git a/drivers/infiniband/core/mad_rmpp.h b/drivers/infiniband/core/mad_rmpp.h
index c4924dfb8e7..f0616fd2249 100644
--- a/drivers/infiniband/core/mad_rmpp.h
+++ b/drivers/infiniband/core/mad_rmpp.h
@@ -51,6 +51,8 @@ ib_process_rmpp_recv_wc(struct ib_mad_agent_private *agent,
int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr,
struct ib_mad_send_wc *mad_send_wc);
+void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc);
+
void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent);
int ib_retry_rmpp(struct ib_mad_send_wr_private *mad_send_wr);
diff --git a/drivers/infiniband/core/packer.c b/drivers/infiniband/core/packer.c
index 35df5010e72..c972d723576 100644
--- a/drivers/infiniband/core/packer.c
+++ b/drivers/infiniband/core/packer.c
@@ -33,6 +33,8 @@
* $Id: packer.c 1349 2004-12-16 21:09:43Z roland $
*/
+#include <linux/string.h>
+
#include <rdma/ib_pack.h>
static u64 value_read(int offset, int size, void *structure)
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 78de2dd1a4f..acda7d63d6f 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -43,6 +43,7 @@
#include <linux/dma-mapping.h>
#include <linux/kref.h>
#include <linux/idr.h>
+#include <linux/workqueue.h>
#include <rdma/ib_pack.h>
#include <rdma/ib_sa.h>
@@ -73,11 +74,10 @@ struct ib_sa_device {
struct ib_sa_query {
void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *);
void (*release)(struct ib_sa_query *);
- struct ib_sa_port *port;
- struct ib_sa_mad *mad;
- struct ib_sa_sm_ah *sm_ah;
- DECLARE_PCI_UNMAP_ADDR(mapping)
- int id;
+ struct ib_sa_port *port;
+ struct ib_mad_send_buf *mad_buf;
+ struct ib_sa_sm_ah *sm_ah;
+ int id;
};
struct ib_sa_service_query {
@@ -426,6 +426,7 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query)
{
unsigned long flags;
struct ib_mad_agent *agent;
+ struct ib_mad_send_buf *mad_buf;
spin_lock_irqsave(&idr_lock, flags);
if (idr_find(&query_idr, id) != query) {
@@ -433,9 +434,10 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query)
return;
}
agent = query->port->agent;
+ mad_buf = query->mad_buf;
spin_unlock_irqrestore(&idr_lock, flags);
- ib_cancel_mad(agent, id);
+ ib_cancel_mad(agent, mad_buf);
}
EXPORT_SYMBOL(ib_sa_cancel_query);
@@ -457,71 +459,46 @@ static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
static int send_mad(struct ib_sa_query *query, int timeout_ms)
{
- struct ib_sa_port *port = query->port;
unsigned long flags;
- int ret;
- struct ib_sge gather_list;
- struct ib_send_wr *bad_wr, wr = {
- .opcode = IB_WR_SEND,
- .sg_list = &gather_list,
- .num_sge = 1,
- .send_flags = IB_SEND_SIGNALED,
- .wr = {
- .ud = {
- .mad_hdr = &query->mad->mad_hdr,
- .remote_qpn = 1,
- .remote_qkey = IB_QP1_QKEY,
- .timeout_ms = timeout_ms,
- }
- }
- };
+ int ret, id;
retry:
if (!idr_pre_get(&query_idr, GFP_ATOMIC))
return -ENOMEM;
spin_lock_irqsave(&idr_lock, flags);
- ret = idr_get_new(&query_idr, query, &query->id);
+ ret = idr_get_new(&query_idr, query, &id);
spin_unlock_irqrestore(&idr_lock, flags);
if (ret == -EAGAIN)
goto retry;
if (ret)
return ret;
- wr.wr_id = query->id;
+ query->mad_buf->timeout_ms = timeout_ms;
+ query->mad_buf->context[0] = query;
+ query->id = id;
- spin_lock_irqsave(&port->ah_lock, flags);
- kref_get(&port->sm_ah->ref);
- query->sm_ah = port->sm_ah;
- wr.wr.ud.ah = port->sm_ah->ah;
- spin_unlock_irqrestore(&port->ah_lock, flags);
+ spin_lock_irqsave(&query->port->ah_lock, flags);
+ kref_get(&query->port->sm_ah->ref);
+ query->sm_ah = query->port->sm_ah;
+ spin_unlock_irqrestore(&query->port->ah_lock, flags);
- gather_list.addr = dma_map_single(port->agent->device->dma_device,
- query->mad,
- sizeof (struct ib_sa_mad),
- DMA_TO_DEVICE);
- gather_list.length = sizeof (struct ib_sa_mad);
- gather_list.lkey = port->agent->mr->lkey;
- pci_unmap_addr_set(query, mapping, gather_list.addr);
+ query->mad_buf->ah = query->sm_ah->ah;
- ret = ib_post_send_mad(port->agent, &wr, &bad_wr);
+ ret = ib_post_send_mad(query->mad_buf, NULL);
if (ret) {
- dma_unmap_single(port->agent->device->dma_device,
- pci_unmap_addr(query, mapping),
- sizeof (struct ib_sa_mad),
- DMA_TO_DEVICE);
- kref_put(&query->sm_ah->ref, free_sm_ah);
spin_lock_irqsave(&idr_lock, flags);
- idr_remove(&query_idr, query->id);
+ idr_remove(&query_idr, id);
spin_unlock_irqrestore(&idr_lock, flags);
+
+ kref_put(&query->sm_ah->ref, free_sm_ah);
}
/*
* It's not safe to dereference query any more, because the
* send may already have completed and freed the query in
- * another context. So use wr.wr_id, which has a copy of the
- * query's id.
+ * another context.
*/
- return ret ? ret : wr.wr_id;
+ return ret ? ret : id;
}
static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
@@ -543,7 +520,6 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
{
- kfree(sa_query->mad);
kfree(container_of(sa_query, struct ib_sa_path_query, sa_query));
}
@@ -574,7 +550,7 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
struct ib_sa_path_rec *rec,
ib_sa_comp_mask comp_mask,
- int timeout_ms, unsigned int __nocast gfp_mask,
+ int timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
struct ib_sa_path_rec *resp,
void *context),
@@ -583,43 +559,58 @@ int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
{
struct ib_sa_path_query *query;
struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
- struct ib_sa_port *port = &sa_dev->port[port_num - sa_dev->start_port];
- struct ib_mad_agent *agent = port->agent;
+ struct ib_sa_port *port;
+ struct ib_mad_agent *agent;
+ struct ib_sa_mad *mad;
int ret;
+ if (!sa_dev)
+ return -ENODEV;
+
+ port = &sa_dev->port[port_num - sa_dev->start_port];
+ agent = port->agent;
+
query = kmalloc(sizeof *query, gfp_mask);
if (!query)
return -ENOMEM;
- query->sa_query.mad = kmalloc(sizeof *query->sa_query.mad, gfp_mask);
- if (!query->sa_query.mad) {
- kfree(query);
- return -ENOMEM;
+
+ query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0,
+ 0, IB_MGMT_SA_HDR,
+ IB_MGMT_SA_DATA, gfp_mask);
+ if (!query->sa_query.mad_buf) {
+ ret = -ENOMEM;
+ goto err1;
}
query->callback = callback;
query->context = context;
- init_mad(query->sa_query.mad, agent);
+ mad = query->sa_query.mad_buf->mad;
+ init_mad(mad, agent);
- query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL;
- query->sa_query.release = ib_sa_path_rec_release;
- query->sa_query.port = port;
- query->sa_query.mad->mad_hdr.method = IB_MGMT_METHOD_GET;
- query->sa_query.mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC);
- query->sa_query.mad->sa_hdr.comp_mask = comp_mask;
+ query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL;
+ query->sa_query.release = ib_sa_path_rec_release;
+ query->sa_query.port = port;
+ mad->mad_hdr.method = IB_MGMT_METHOD_GET;
+ mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC);
+ mad->sa_hdr.comp_mask = comp_mask;
- ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table),
- rec, query->sa_query.mad->data);
+ ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, mad->data);
*sa_query = &query->sa_query;
ret = send_mad(&query->sa_query, timeout_ms);
- if (ret < 0) {
- *sa_query = NULL;
- kfree(query->sa_query.mad);
- kfree(query);
- }
+ if (ret < 0)
+ goto err2;
+
+ return ret;
+err2:
+ *sa_query = NULL;
+ ib_free_send_mad(query->sa_query.mad_buf);
+
+err1:
+ kfree(query);
return ret;
}
EXPORT_SYMBOL(ib_sa_path_rec_get);
@@ -643,7 +634,6 @@ static void ib_sa_service_rec_callback(struct ib_sa_query *sa_query,
static void ib_sa_service_rec_release(struct ib_sa_query *sa_query)
{
- kfree(sa_query->mad);
kfree(container_of(sa_query, struct ib_sa_service_query, sa_query));
}
@@ -676,7 +666,7 @@ static void ib_sa_service_rec_release(struct ib_sa_query *sa_query)
int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method,
struct ib_sa_service_rec *rec,
ib_sa_comp_mask comp_mask,
- int timeout_ms, unsigned int __nocast gfp_mask,
+ int timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
struct ib_sa_service_rec *resp,
void *context),
@@ -685,10 +675,17 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method,
{
struct ib_sa_service_query *query;
struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
- struct ib_sa_port *port = &sa_dev->port[port_num - sa_dev->start_port];
- struct ib_mad_agent *agent = port->agent;
+ struct ib_sa_port *port;
+ struct ib_mad_agent *agent;
+ struct ib_sa_mad *mad;
int ret;
+ if (!sa_dev)
+ return -ENODEV;
+
+ port = &sa_dev->port[port_num - sa_dev->start_port];
+ agent = port->agent;
+
if (method != IB_MGMT_METHOD_GET &&
method != IB_MGMT_METHOD_SET &&
method != IB_SA_METHOD_DELETE)
@@ -697,37 +694,45 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method,
query = kmalloc(sizeof *query, gfp_mask);
if (!query)
return -ENOMEM;
- query->sa_query.mad = kmalloc(sizeof *query->sa_query.mad, gfp_mask);
- if (!query->sa_query.mad) {
- kfree(query);
- return -ENOMEM;
+
+ query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0,
+ 0, IB_MGMT_SA_HDR,
+ IB_MGMT_SA_DATA, gfp_mask);
+ if (!query->sa_query.mad_buf) {
+ ret = -ENOMEM;
+ goto err1;
}
query->callback = callback;
query->context = context;
- init_mad(query->sa_query.mad, agent);
+ mad = query->sa_query.mad_buf->mad;
+ init_mad(mad, agent);
- query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL;
- query->sa_query.release = ib_sa_service_rec_release;
- query->sa_query.port = port;
- query->sa_query.mad->mad_hdr.method = method;
- query->sa_query.mad->mad_hdr.attr_id =
- cpu_to_be16(IB_SA_ATTR_SERVICE_REC);
- query->sa_query.mad->sa_hdr.comp_mask = comp_mask;
+ query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL;
+ query->sa_query.release = ib_sa_service_rec_release;
+ query->sa_query.port = port;
+ mad->mad_hdr.method = method;
+ mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_SERVICE_REC);
+ mad->sa_hdr.comp_mask = comp_mask;
ib_pack(service_rec_table, ARRAY_SIZE(service_rec_table),
- rec, query->sa_query.mad->data);
+ rec, mad->data);
*sa_query = &query->sa_query;
ret = send_mad(&query->sa_query, timeout_ms);
- if (ret < 0) {
- *sa_query = NULL;
- kfree(query->sa_query.mad);
- kfree(query);
- }
+ if (ret < 0)
+ goto err2;
+
+ return ret;
+err2:
+ *sa_query = NULL;
+ ib_free_send_mad(query->sa_query.mad_buf);
+
+err1:
+ kfree(query);
return ret;
}
EXPORT_SYMBOL(ib_sa_service_rec_query);
@@ -751,7 +756,6 @@ static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query,
static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query)
{
- kfree(sa_query->mad);
kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query));
}
@@ -759,7 +763,7 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
u8 method,
struct ib_sa_mcmember_rec *rec,
ib_sa_comp_mask comp_mask,
- int timeout_ms, unsigned int __nocast gfp_mask,
+ int timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
struct ib_sa_mcmember_rec *resp,
void *context),
@@ -768,60 +772,69 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
{
struct ib_sa_mcmember_query *query;
struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
- struct ib_sa_port *port = &sa_dev->port[port_num - sa_dev->start_port];
- struct ib_mad_agent *agent = port->agent;
+ struct ib_sa_port *port;
+ struct ib_mad_agent *agent;
+ struct ib_sa_mad *mad;
int ret;
+ if (!sa_dev)
+ return -ENODEV;
+
+ port = &sa_dev->port[port_num - sa_dev->start_port];
+ agent = port->agent;
+
query = kmalloc(sizeof *query, gfp_mask);
if (!query)
return -ENOMEM;
- query->sa_query.mad = kmalloc(sizeof *query->sa_query.mad, gfp_mask);
- if (!query->sa_query.mad) {
- kfree(query);
- return -ENOMEM;
+
+ query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0,
+ 0, IB_MGMT_SA_HDR,
+ IB_MGMT_SA_DATA, gfp_mask);
+ if (!query->sa_query.mad_buf) {
+ ret = -ENOMEM;
+ goto err1;
}
query->callback = callback;
query->context = context;
- init_mad(query->sa_query.mad, agent);
+ mad = query->sa_query.mad_buf->mad;
+ init_mad(mad, agent);
- query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL;
- query->sa_query.release = ib_sa_mcmember_rec_release;
- query->sa_query.port = port;
- query->sa_query.mad->mad_hdr.method = method;
- query->sa_query.mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
- query->sa_query.mad->sa_hdr.comp_mask = comp_mask;
+ query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL;
+ query->sa_query.release = ib_sa_mcmember_rec_release;
+ query->sa_query.port = port;
+ mad->mad_hdr.method = method;
+ mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
+ mad->sa_hdr.comp_mask = comp_mask;
ib_pack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
- rec, query->sa_query.mad->data);
+ rec, mad->data);
*sa_query = &query->sa_query;
ret = send_mad(&query->sa_query, timeout_ms);
- if (ret < 0) {
- *sa_query = NULL;
- kfree(query->sa_query.mad);
- kfree(query);
- }
+ if (ret < 0)
+ goto err2;
return ret;
+
+err2:
+ *sa_query = NULL;
+ ib_free_send_mad(query->sa_query.mad_buf);
+
+err1:
+ kfree(query);
+ return ret;
}
EXPORT_SYMBOL(ib_sa_mcmember_rec_query);
static void send_handler(struct ib_mad_agent *agent,
struct ib_mad_send_wc *mad_send_wc)
{
- struct ib_sa_query *query;
+ struct ib_sa_query *query = mad_send_wc->send_buf->context[0];
unsigned long flags;
- spin_lock_irqsave(&idr_lock, flags);
- query = idr_find(&query_idr, mad_send_wc->wr_id);
- spin_unlock_irqrestore(&idr_lock, flags);
-
- if (!query)
- return;
-
if (query->callback)
switch (mad_send_wc->status) {
case IB_WC_SUCCESS:
@@ -838,30 +851,25 @@ static void send_handler(struct ib_mad_agent *agent,
break;
}
- dma_unmap_single(agent->device->dma_device,
- pci_unmap_addr(query, mapping),
- sizeof (struct ib_sa_mad),
- DMA_TO_DEVICE);
- kref_put(&query->sm_ah->ref, free_sm_ah);
-
- query->release(query);
-
spin_lock_irqsave(&idr_lock, flags);
- idr_remove(&query_idr, mad_send_wc->wr_id);
+ idr_remove(&query_idr, query->id);
spin_unlock_irqrestore(&idr_lock, flags);
+
+ ib_free_send_mad(mad_send_wc->send_buf);
+ kref_put(&query->sm_ah->ref, free_sm_ah);
+ query->release(query);
}
static void recv_handler(struct ib_mad_agent *mad_agent,
struct ib_mad_recv_wc *mad_recv_wc)
{
struct ib_sa_query *query;
- unsigned long flags;
+ struct ib_mad_send_buf *mad_buf;
- spin_lock_irqsave(&idr_lock, flags);
- query = idr_find(&query_idr, mad_recv_wc->wc->wr_id);
- spin_unlock_irqrestore(&idr_lock, flags);
+ mad_buf = (void *) (unsigned long) mad_recv_wc->wc->wr_id;
+ query = mad_buf->context[0];
- if (query && query->callback) {
+ if (query->callback) {
if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
query->callback(query,
mad_recv_wc->recv_buf.mad->mad_hdr.status ?
@@ -975,6 +983,7 @@ static int __init ib_sa_init(void)
static void __exit ib_sa_cleanup(void)
{
ib_unregister_client(&sa_client);
+ idr_destroy(&query_idr);
}
module_init(ib_sa_init);
diff --git a/drivers/infiniband/core/smi.h b/drivers/infiniband/core/smi.h
index db25503a073..2b3c40198f8 100644
--- a/drivers/infiniband/core/smi.h
+++ b/drivers/infiniband/core/smi.h
@@ -39,6 +39,8 @@
#ifndef __SMI_H_
#define __SMI_H_
+#include <rdma/ib_smi.h>
+
int smi_handle_dr_smp_recv(struct ib_smp *smp,
u8 node_type,
int port_num,
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 211ba3223f6..08648b1a387 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -36,6 +36,9 @@
#include "core_priv.h"
+#include <linux/slab.h>
+#include <linux/string.h>
+
#include <rdma/ib_mad.h>
struct ib_port {
@@ -65,6 +68,11 @@ struct port_table_attribute {
int index;
};
+static inline int ibdev_is_alive(const struct ib_device *dev)
+{
+ return dev->reg_state == IB_DEV_REGISTERED;
+}
+
static ssize_t port_attr_show(struct kobject *kobj,
struct attribute *attr, char *buf)
{
@@ -74,6 +82,8 @@ static ssize_t port_attr_show(struct kobject *kobj,
if (!port_attr->show)
return -EIO;
+ if (!ibdev_is_alive(p->ibdev))
+ return -ENODEV;
return port_attr->show(p, port_attr, buf);
}
@@ -300,14 +310,13 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
if (!p->ibdev->process_mad)
return sprintf(buf, "N/A (no PMA)\n");
- in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL);
+ in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *in_mad, GFP_KERNEL);
if (!in_mad || !out_mad) {
ret = -ENOMEM;
goto out;
}
- memset(in_mad, 0, sizeof *in_mad);
in_mad->mad_hdr.base_version = 1;
in_mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_PERF_MGMT;
in_mad->mad_hdr.class_version = 1;
@@ -501,10 +510,9 @@ static int add_port(struct ib_device *device, int port_num)
if (ret)
return ret;
- p = kmalloc(sizeof *p, GFP_KERNEL);
+ p = kzalloc(sizeof *p, GFP_KERNEL);
if (!p)
return -ENOMEM;
- memset(p, 0, sizeof *p);
p->ibdev = device;
p->port_num = port_num;
@@ -581,6 +589,9 @@ static ssize_t show_node_type(struct class_device *cdev, char *buf)
{
struct ib_device *dev = container_of(cdev, struct ib_device, class_dev);
+ if (!ibdev_is_alive(dev))
+ return -ENODEV;
+
switch (dev->node_type) {
case IB_NODE_CA: return sprintf(buf, "%d: CA\n", dev->node_type);
case IB_NODE_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
@@ -595,6 +606,9 @@ static ssize_t show_sys_image_guid(struct class_device *cdev, char *buf)
struct ib_device_attr attr;
ssize_t ret;
+ if (!ibdev_is_alive(dev))
+ return -ENODEV;
+
ret = ib_query_device(dev, &attr);
if (ret)
return ret;
@@ -612,6 +626,9 @@ static ssize_t show_node_guid(struct class_device *cdev, char *buf)
struct ib_device_attr attr;
ssize_t ret;
+ if (!ibdev_is_alive(dev))
+ return -ENODEV;
+
ret = ib_query_device(dev, &attr);
if (ret)
return ret;
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index d0f0b0a2edd..6e15787d1de 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -41,37 +41,81 @@
#include <linux/file.h>
#include <linux/mount.h>
#include <linux/cdev.h>
+#include <linux/idr.h>
#include <asm/uaccess.h>
-#include "ucm.h"
+#include <rdma/ib_cm.h>
+#include <rdma/ib_user_cm.h>
MODULE_AUTHOR("Libor Michalek");
MODULE_DESCRIPTION("InfiniBand userspace Connection Manager access");
MODULE_LICENSE("Dual BSD/GPL");
-static int ucm_debug_level;
+struct ib_ucm_device {
+ int devnum;
+ struct cdev dev;
+ struct class_device class_dev;
+ struct ib_device *ib_dev;
+};
+
+struct ib_ucm_file {
+ struct semaphore mutex;
+ struct file *filp;
+ struct ib_ucm_device *device;
+
+ struct list_head ctxs;
+ struct list_head events;
+ wait_queue_head_t poll_wait;
+};
+
+struct ib_ucm_context {
+ int id;
+ wait_queue_head_t wait;
+ atomic_t ref;
+ int events_reported;
+
+ struct ib_ucm_file *file;
+ struct ib_cm_id *cm_id;
+ __u64 uid;
+
+ struct list_head events; /* list of pending events. */
+ struct list_head file_list; /* member in file ctx list */
+};
+
+struct ib_ucm_event {
+ struct ib_ucm_context *ctx;
+ struct list_head file_list; /* member in file event list */
+ struct list_head ctx_list; /* member in ctx event list */
-module_param_named(debug_level, ucm_debug_level, int, 0644);
-MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
+ struct ib_cm_id *cm_id;
+ struct ib_ucm_event_resp resp;
+ void *data;
+ void *info;
+ int data_len;
+ int info_len;
+};
enum {
IB_UCM_MAJOR = 231,
- IB_UCM_MINOR = 255
+ IB_UCM_BASE_MINOR = 224,
+ IB_UCM_MAX_DEVICES = 32
};
-#define IB_UCM_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_MINOR)
+#define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR)
-#define PFX "UCM: "
+static void ib_ucm_add_one(struct ib_device *device);
+static void ib_ucm_remove_one(struct ib_device *device);
-#define ucm_dbg(format, arg...) \
- do { \
- if (ucm_debug_level > 0) \
- printk(KERN_DEBUG PFX format, ## arg); \
- } while (0)
+static struct ib_client ucm_client = {
+ .name = "ucm",
+ .add = ib_ucm_add_one,
+ .remove = ib_ucm_remove_one
+};
-static struct semaphore ctx_id_mutex;
-static struct idr ctx_id_table;
+static DECLARE_MUTEX(ctx_id_mutex);
+static DEFINE_IDR(ctx_id_table);
+static DECLARE_BITMAP(dev_map, IB_UCM_MAX_DEVICES);
static struct ib_ucm_context *ib_ucm_ctx_get(struct ib_ucm_file *file, int id)
{
@@ -128,11 +172,10 @@ static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file)
struct ib_ucm_context *ctx;
int result;
- ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+ ctx = kzalloc(sizeof *ctx, GFP_KERNEL);
if (!ctx)
return NULL;
- memset(ctx, 0, sizeof *ctx);
atomic_set(&ctx->ref, 1);
init_waitqueue_head(&ctx->wait);
ctx->file = file;
@@ -152,17 +195,13 @@ static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file)
goto error;
list_add_tail(&ctx->file_list, &file->ctxs);
- ucm_dbg("Allocated CM ID <%d>\n", ctx->id);
return ctx;
error:
kfree(ctx);
return NULL;
}
-/*
- * Event portion of the API, handle CM events
- * and allow event polling.
- */
+
static void ib_ucm_event_path_get(struct ib_ucm_path_rec *upath,
struct ib_sa_path_rec *kpath)
{
@@ -209,6 +248,7 @@ static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq,
ureq->retry_count = kreq->retry_count;
ureq->rnr_retry_count = kreq->rnr_retry_count;
ureq->srq = kreq->srq;
+ ureq->port = kreq->port;
ib_ucm_event_path_get(&ureq->primary_path, kreq->primary_path);
ib_ucm_event_path_get(&ureq->alternate_path, kreq->alternate_path);
@@ -295,6 +335,8 @@ static int ib_ucm_event_process(struct ib_cm_event *evt,
case IB_CM_SIDR_REQ_RECEIVED:
uvt->resp.u.sidr_req_resp.pkey =
evt->param.sidr_req_rcvd.pkey;
+ uvt->resp.u.sidr_req_resp.port =
+ evt->param.sidr_req_rcvd.port;
uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE;
break;
case IB_CM_SIDR_REP_RECEIVED:
@@ -343,11 +385,10 @@ static int ib_ucm_event_handler(struct ib_cm_id *cm_id,
ctx = cm_id->context;
- uevent = kmalloc(sizeof(*uevent), GFP_KERNEL);
+ uevent = kzalloc(sizeof *uevent, GFP_KERNEL);
if (!uevent)
goto err1;
- memset(uevent, 0, sizeof(*uevent));
uevent->ctx = ctx;
uevent->cm_id = cm_id;
uevent->resp.uid = ctx->uid;
@@ -387,9 +428,7 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file,
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
- /*
- * wait
- */
+
down(&file->mutex);
while (list_empty(&file->events)) {
@@ -471,7 +510,6 @@ done:
return result;
}
-
static ssize_t ib_ucm_create_id(struct ib_ucm_file *file,
const char __user *inbuf,
int in_len, int out_len)
@@ -494,29 +532,27 @@ static ssize_t ib_ucm_create_id(struct ib_ucm_file *file,
return -ENOMEM;
ctx->uid = cmd.uid;
- ctx->cm_id = ib_create_cm_id(ib_ucm_event_handler, ctx);
+ ctx->cm_id = ib_create_cm_id(file->device->ib_dev,
+ ib_ucm_event_handler, ctx);
if (IS_ERR(ctx->cm_id)) {
result = PTR_ERR(ctx->cm_id);
- goto err;
+ goto err1;
}
resp.id = ctx->id;
if (copy_to_user((void __user *)(unsigned long)cmd.response,
&resp, sizeof(resp))) {
result = -EFAULT;
- goto err;
+ goto err2;
}
-
return 0;
-err:
+err2:
+ ib_destroy_cm_id(ctx->cm_id);
+err1:
down(&ctx_id_mutex);
idr_remove(&ctx_id_table, ctx->id);
up(&ctx_id_mutex);
-
- if (!IS_ERR(ctx->cm_id))
- ib_destroy_cm_id(ctx->cm_id);
-
kfree(ctx);
return result;
}
@@ -1184,9 +1220,6 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf,
if (copy_from_user(&hdr, buf, sizeof(hdr)))
return -EFAULT;
- ucm_dbg("Write. cmd <%d> in <%d> out <%d> len <%Zu>\n",
- hdr.cmd, hdr.in, hdr.out, len);
-
if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucm_cmd_table))
return -EINVAL;
@@ -1231,8 +1264,7 @@ static int ib_ucm_open(struct inode *inode, struct file *filp)
filp->private_data = file;
file->filp = filp;
-
- ucm_dbg("Created struct\n");
+ file->device = container_of(inode->i_cdev, struct ib_ucm_device, dev);
return 0;
}
@@ -1263,7 +1295,17 @@ static int ib_ucm_close(struct inode *inode, struct file *filp)
return 0;
}
-static struct file_operations ib_ucm_fops = {
+static void ib_ucm_release_class_dev(struct class_device *class_dev)
+{
+ struct ib_ucm_device *dev;
+
+ dev = container_of(class_dev, struct ib_ucm_device, class_dev);
+ cdev_del(&dev->dev);
+ clear_bit(dev->devnum, dev_map);
+ kfree(dev);
+}
+
+static struct file_operations ucm_fops = {
.owner = THIS_MODULE,
.open = ib_ucm_open,
.release = ib_ucm_close,
@@ -1271,55 +1313,141 @@ static struct file_operations ib_ucm_fops = {
.poll = ib_ucm_poll,
};
+static struct class ucm_class = {
+ .name = "infiniband_cm",
+ .release = ib_ucm_release_class_dev
+};
-static struct class *ib_ucm_class;
-static struct cdev ib_ucm_cdev;
+static ssize_t show_dev(struct class_device *class_dev, char *buf)
+{
+ struct ib_ucm_device *dev;
+
+ dev = container_of(class_dev, struct ib_ucm_device, class_dev);
+ return print_dev_t(buf, dev->dev.dev);
+}
+static CLASS_DEVICE_ATTR(dev, S_IRUGO, show_dev, NULL);
-static int __init ib_ucm_init(void)
+static ssize_t show_ibdev(struct class_device *class_dev, char *buf)
{
- int result;
+ struct ib_ucm_device *dev;
+
+ dev = container_of(class_dev, struct ib_ucm_device, class_dev);
+ return sprintf(buf, "%s\n", dev->ib_dev->name);
+}
+static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
- result = register_chrdev_region(IB_UCM_DEV, 1, "infiniband_cm");
- if (result) {
- ucm_dbg("Error <%d> registering dev\n", result);
- goto err_chr;
- }
+static void ib_ucm_add_one(struct ib_device *device)
+{
+ struct ib_ucm_device *ucm_dev;
+
+ if (!device->alloc_ucontext)
+ return;
+
+ ucm_dev = kzalloc(sizeof *ucm_dev, GFP_KERNEL);
+ if (!ucm_dev)
+ return;
- cdev_init(&ib_ucm_cdev, &ib_ucm_fops);
+ ucm_dev->ib_dev = device;
+
+ ucm_dev->devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES);
+ if (ucm_dev->devnum >= IB_UCM_MAX_DEVICES)
+ goto err;
+
+ set_bit(ucm_dev->devnum, dev_map);
+
+ cdev_init(&ucm_dev->dev, &ucm_fops);
+ ucm_dev->dev.owner = THIS_MODULE;
+ kobject_set_name(&ucm_dev->dev.kobj, "ucm%d", ucm_dev->devnum);
+ if (cdev_add(&ucm_dev->dev, IB_UCM_BASE_DEV + ucm_dev->devnum, 1))
+ goto err;
- result = cdev_add(&ib_ucm_cdev, IB_UCM_DEV, 1);
- if (result) {
- ucm_dbg("Error <%d> adding cdev\n", result);
+ ucm_dev->class_dev.class = &ucm_class;
+ ucm_dev->class_dev.dev = device->dma_device;
+ snprintf(ucm_dev->class_dev.class_id, BUS_ID_SIZE, "ucm%d",
+ ucm_dev->devnum);
+ if (class_device_register(&ucm_dev->class_dev))
goto err_cdev;
- }
- ib_ucm_class = class_create(THIS_MODULE, "infiniband_cm");
- if (IS_ERR(ib_ucm_class)) {
- result = PTR_ERR(ib_ucm_class);
- ucm_dbg("Error <%d> creating class\n", result);
+ if (class_device_create_file(&ucm_dev->class_dev,
+ &class_device_attr_dev))
+ goto err_class;
+ if (class_device_create_file(&ucm_dev->class_dev,
+ &class_device_attr_ibdev))
goto err_class;
+
+ ib_set_client_data(device, &ucm_client, ucm_dev);
+ return;
+
+err_class:
+ class_device_unregister(&ucm_dev->class_dev);
+err_cdev:
+ cdev_del(&ucm_dev->dev);
+ clear_bit(ucm_dev->devnum, dev_map);
+err:
+ kfree(ucm_dev);
+ return;
+}
+
+static void ib_ucm_remove_one(struct ib_device *device)
+{
+ struct ib_ucm_device *ucm_dev = ib_get_client_data(device, &ucm_client);
+
+ if (!ucm_dev)
+ return;
+
+ class_device_unregister(&ucm_dev->class_dev);
+}
+
+static ssize_t show_abi_version(struct class *class, char *buf)
+{
+ return sprintf(buf, "%d\n", IB_USER_CM_ABI_VERSION);
+}
+static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
+
+static int __init ib_ucm_init(void)
+{
+ int ret;
+
+ ret = register_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES,
+ "infiniband_cm");
+ if (ret) {
+ printk(KERN_ERR "ucm: couldn't register device number\n");
+ goto err;
}
- class_device_create(ib_ucm_class, IB_UCM_DEV, NULL, "ucm");
+ ret = class_register(&ucm_class);
+ if (ret) {
+ printk(KERN_ERR "ucm: couldn't create class infiniband_cm\n");
+ goto err_chrdev;
+ }
- idr_init(&ctx_id_table);
- init_MUTEX(&ctx_id_mutex);
+ ret = class_create_file(&ucm_class, &class_attr_abi_version);
+ if (ret) {
+ printk(KERN_ERR "ucm: couldn't create abi_version attribute\n");
+ goto err_class;
+ }
+ ret = ib_register_client(&ucm_client);
+ if (ret) {
+ printk(KERN_ERR "ucm: couldn't register client\n");
+ goto err_class;
+ }
return 0;
+
err_class:
- cdev_del(&ib_ucm_cdev);
-err_cdev:
- unregister_chrdev_region(IB_UCM_DEV, 1);
-err_chr:
- return result;
+ class_unregister(&ucm_class);
+err_chrdev:
+ unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES);
+err:
+ return ret;
}
static void __exit ib_ucm_cleanup(void)
{
- class_device_destroy(ib_ucm_class, IB_UCM_DEV);
- class_destroy(ib_ucm_class);
- cdev_del(&ib_ucm_cdev);
- unregister_chrdev_region(IB_UCM_DEV, 1);
+ ib_unregister_client(&ucm_client);
+ class_unregister(&ucm_class);
+ unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES);
+ idr_destroy(&ctx_id_table);
}
module_init(ib_ucm_init);
diff --git a/drivers/infiniband/core/ucm.h b/drivers/infiniband/core/ucm.h
deleted file mode 100644
index f46f37bc120..00000000000
--- a/drivers/infiniband/core/ucm.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2005 Topspin Communications. All rights reserved.
- * Copyright (c) 2005 Intel Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * $Id: ucm.h 2208 2005-04-22 23:24:31Z libor $
- */
-
-#ifndef UCM_H
-#define UCM_H
-
-#include <linux/fs.h>
-#include <linux/device.h>
-#include <linux/cdev.h>
-#include <linux/idr.h>
-
-#include <rdma/ib_cm.h>
-#include <rdma/ib_user_cm.h>
-
-struct ib_ucm_file {
- struct semaphore mutex;
- struct file *filp;
-
- struct list_head ctxs; /* list of active connections */
- struct list_head events; /* list of pending events */
- wait_queue_head_t poll_wait;
-};
-
-struct ib_ucm_context {
- int id;
- wait_queue_head_t wait;
- atomic_t ref;
- int events_reported;
-
- struct ib_ucm_file *file;
- struct ib_cm_id *cm_id;
- __u64 uid;
-
- struct list_head events; /* list of pending events. */
- struct list_head file_list; /* member in file ctx list */
-};
-
-struct ib_ucm_event {
- struct ib_ucm_context *ctx;
- struct list_head file_list; /* member in file event list */
- struct list_head ctx_list; /* member in ctx event list */
-
- struct ib_cm_id *cm_id;
- struct ib_ucm_event_resp resp;
- void *data;
- void *info;
- int data_len;
- int info_len;
-};
-
-#endif /* UCM_H */
diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c
index 527b23450ab..997c07db6d8 100644
--- a/drivers/infiniband/core/ud_header.c
+++ b/drivers/infiniband/core/ud_header.c
@@ -34,6 +34,7 @@
*/
#include <linux/errno.h>
+#include <linux/string.h>
#include <rdma/ib_pack.h>
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index a64d6b4dcc1..eb7f52537cc 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -31,7 +31,7 @@
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
- * $Id: user_mad.c 2814 2005-07-06 19:14:09Z halr $
+ * $Id: user_mad.c 4010 2005-11-09 23:11:56Z roland $
*/
#include <linux/module.h>
@@ -64,18 +64,42 @@ enum {
IB_UMAD_MINOR_BASE = 0
};
+/*
+ * Our lifetime rules for these structs are the following: each time a
+ * device special file is opened, we look up the corresponding struct
+ * ib_umad_port by minor in the umad_port[] table while holding the
+ * port_lock. If this lookup succeeds, we take a reference on the
+ * ib_umad_port's struct ib_umad_device while still holding the
+ * port_lock; if the lookup fails, we fail the open(). We drop these
+ * references in the corresponding close().
+ *
+ * In addition to references coming from open character devices, there
+ * is one more reference to each ib_umad_device representing the
+ * module's reference taken when allocating the ib_umad_device in
+ * ib_umad_add_one().
+ *
+ * When destroying an ib_umad_device, we clear all of its
+ * ib_umad_ports from umad_port[] while holding port_lock before
+ * dropping the module's reference to the ib_umad_device. This is
+ * always safe because any open() calls will either succeed and obtain
+ * a reference before we clear the umad_port[] entries, or fail after
+ * we clear the umad_port[] entries.
+ */
+
struct ib_umad_port {
- int devnum;
- struct cdev dev;
- struct class_device class_dev;
+ struct cdev *dev;
+ struct class_device *class_dev;
- int sm_devnum;
- struct cdev sm_dev;
- struct class_device sm_class_dev;
+ struct cdev *sm_dev;
+ struct class_device *sm_class_dev;
struct semaphore sm_sem;
+ struct rw_semaphore mutex;
+ struct list_head file_list;
+
struct ib_device *ib_dev;
struct ib_umad_device *umad_dev;
+ int dev_num;
u8 port_num;
};
@@ -86,42 +110,59 @@ struct ib_umad_device {
};
struct ib_umad_file {
- struct ib_umad_port *port;
- spinlock_t recv_lock;
- struct list_head recv_list;
- wait_queue_head_t recv_wait;
- struct rw_semaphore agent_mutex;
- struct ib_mad_agent *agent[IB_UMAD_MAX_AGENTS];
- struct ib_mr *mr[IB_UMAD_MAX_AGENTS];
+ struct ib_umad_port *port;
+ struct list_head recv_list;
+ struct list_head port_list;
+ spinlock_t recv_lock;
+ wait_queue_head_t recv_wait;
+ struct ib_mad_agent *agent[IB_UMAD_MAX_AGENTS];
+ int agents_dead;
};
struct ib_umad_packet {
- struct ib_ah *ah;
struct ib_mad_send_buf *msg;
struct list_head list;
int length;
- DECLARE_PCI_UNMAP_ADDR(mapping)
struct ib_user_mad mad;
};
+static struct class *umad_class;
+
static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE);
-static spinlock_t map_lock;
+
+static DEFINE_SPINLOCK(port_lock);
+static struct ib_umad_port *umad_port[IB_UMAD_MAX_PORTS];
static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS * 2);
static void ib_umad_add_one(struct ib_device *device);
static void ib_umad_remove_one(struct ib_device *device);
+static void ib_umad_release_dev(struct kref *ref)
+{
+ struct ib_umad_device *dev =
+ container_of(ref, struct ib_umad_device, ref);
+
+ kfree(dev);
+}
+
+/* caller must hold port->mutex at least for reading */
+static struct ib_mad_agent *__get_agent(struct ib_umad_file *file, int id)
+{
+ return file->agents_dead ? NULL : file->agent[id];
+}
+
static int queue_packet(struct ib_umad_file *file,
struct ib_mad_agent *agent,
struct ib_umad_packet *packet)
{
int ret = 1;
- down_read(&file->agent_mutex);
+ down_read(&file->port->mutex);
+
for (packet->mad.hdr.id = 0;
packet->mad.hdr.id < IB_UMAD_MAX_AGENTS;
packet->mad.hdr.id++)
- if (agent == file->agent[packet->mad.hdr.id]) {
+ if (agent == __get_agent(file, packet->mad.hdr.id)) {
spin_lock_irq(&file->recv_lock);
list_add_tail(&packet->list, &file->recv_list);
spin_unlock_irq(&file->recv_lock);
@@ -130,7 +171,7 @@ static int queue_packet(struct ib_umad_file *file,
break;
}
- up_read(&file->agent_mutex);
+ up_read(&file->port->mutex);
return ret;
}
@@ -139,22 +180,19 @@ static void send_handler(struct ib_mad_agent *agent,
struct ib_mad_send_wc *send_wc)
{
struct ib_umad_file *file = agent->context;
- struct ib_umad_packet *timeout, *packet =
- (void *) (unsigned long) send_wc->wr_id;
+ struct ib_umad_packet *timeout;
+ struct ib_umad_packet *packet = send_wc->send_buf->context[0];
- ib_destroy_ah(packet->msg->send_wr.wr.ud.ah);
+ ib_destroy_ah(packet->msg->ah);
ib_free_send_mad(packet->msg);
if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) {
- timeout = kmalloc(sizeof *timeout + sizeof (struct ib_mad_hdr),
- GFP_KERNEL);
+ timeout = kzalloc(sizeof *timeout + IB_MGMT_MAD_HDR, GFP_KERNEL);
if (!timeout)
goto out;
- memset(timeout, 0, sizeof *timeout + sizeof (struct ib_mad_hdr));
-
- timeout->length = sizeof (struct ib_mad_hdr);
- timeout->mad.hdr.id = packet->mad.hdr.id;
+ timeout->length = IB_MGMT_MAD_HDR;
+ timeout->mad.hdr.id = packet->mad.hdr.id;
timeout->mad.hdr.status = ETIMEDOUT;
memcpy(timeout->mad.data, packet->mad.data,
sizeof (struct ib_mad_hdr));
@@ -177,11 +215,10 @@ static void recv_handler(struct ib_mad_agent *agent,
goto out;
length = mad_recv_wc->mad_len;
- packet = kmalloc(sizeof *packet + length, GFP_KERNEL);
+ packet = kzalloc(sizeof *packet + length, GFP_KERNEL);
if (!packet)
goto out;
- memset(packet, 0, sizeof *packet + length);
packet->length = length;
ib_coalesce_recv_mad(mad_recv_wc, packet->mad.data);
@@ -247,7 +284,7 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
else
ret = -ENOSPC;
} else if (copy_to_user(buf, &packet->mad,
- packet->length + sizeof (struct ib_user_mad)))
+ packet->length + sizeof (struct ib_user_mad)))
ret = -EFAULT;
else
ret = packet->length + sizeof (struct ib_user_mad);
@@ -268,26 +305,23 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
struct ib_umad_packet *packet;
struct ib_mad_agent *agent;
struct ib_ah_attr ah_attr;
- struct ib_send_wr *bad_wr;
+ struct ib_ah *ah;
struct ib_rmpp_mad *rmpp_mad;
u8 method;
__be64 *tid;
- int ret, length, hdr_len, data_len, rmpp_hdr_size;
- int rmpp_active = 0;
+ int ret, length, hdr_len, copy_offset;
+ int rmpp_active, has_rmpp_header;
- if (count < sizeof (struct ib_user_mad))
+ if (count < sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR)
return -EINVAL;
length = count - sizeof (struct ib_user_mad);
- packet = kmalloc(sizeof *packet + sizeof(struct ib_mad_hdr) +
- sizeof(struct ib_rmpp_hdr), GFP_KERNEL);
+ packet = kmalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL);
if (!packet)
return -ENOMEM;
if (copy_from_user(&packet->mad, buf,
- sizeof (struct ib_user_mad) +
- sizeof(struct ib_mad_hdr) +
- sizeof(struct ib_rmpp_hdr))) {
+ sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR)) {
ret = -EFAULT;
goto err;
}
@@ -298,11 +332,9 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
goto err;
}
- packet->length = length;
-
- down_read(&file->agent_mutex);
+ down_read(&file->port->mutex);
- agent = file->agent[packet->mad.hdr.id];
+ agent = __get_agent(file, packet->mad.hdr.id);
if (!agent) {
ret = -EINVAL;
goto err_up;
@@ -321,80 +353,63 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class;
}
- packet->ah = ib_create_ah(agent->qp->pd, &ah_attr);
- if (IS_ERR(packet->ah)) {
- ret = PTR_ERR(packet->ah);
+ ah = ib_create_ah(agent->qp->pd, &ah_attr);
+ if (IS_ERR(ah)) {
+ ret = PTR_ERR(ah);
goto err_up;
}
rmpp_mad = (struct ib_rmpp_mad *) packet->mad.data;
- if (ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE) {
- /* RMPP active */
- if (!agent->rmpp_version) {
- ret = -EINVAL;
- goto err_ah;
- }
-
- /* Validate that the management class can support RMPP */
- if (rmpp_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_ADM) {
- hdr_len = offsetof(struct ib_sa_mad, data);
- data_len = length - hdr_len;
- } else if ((rmpp_mad->mad_hdr.mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
- (rmpp_mad->mad_hdr.mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)) {
- hdr_len = offsetof(struct ib_vendor_mad, data);
- data_len = length - hdr_len;
- } else {
- ret = -EINVAL;
- goto err_ah;
- }
- rmpp_active = 1;
+ if (rmpp_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_ADM) {
+ hdr_len = IB_MGMT_SA_HDR;
+ copy_offset = IB_MGMT_RMPP_HDR;
+ has_rmpp_header = 1;
+ } else if (rmpp_mad->mad_hdr.mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START &&
+ rmpp_mad->mad_hdr.mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END) {
+ hdr_len = IB_MGMT_VENDOR_HDR;
+ copy_offset = IB_MGMT_RMPP_HDR;
+ has_rmpp_header = 1;
} else {
- if (length > sizeof(struct ib_mad)) {
- ret = -EINVAL;
- goto err_ah;
- }
- hdr_len = offsetof(struct ib_mad, data);
- data_len = length - hdr_len;
+ hdr_len = IB_MGMT_MAD_HDR;
+ copy_offset = IB_MGMT_MAD_HDR;
+ has_rmpp_header = 0;
+ }
+
+ if (has_rmpp_header)
+ rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
+ IB_MGMT_RMPP_FLAG_ACTIVE;
+ else
+ rmpp_active = 0;
+
+ /* Validate that the management class can support RMPP */
+ if (rmpp_active && !agent->rmpp_version) {
+ ret = -EINVAL;
+ goto err_ah;
}
packet->msg = ib_create_send_mad(agent,
be32_to_cpu(packet->mad.hdr.qpn),
- 0, packet->ah, rmpp_active,
- hdr_len, data_len,
+ 0, rmpp_active,
+ hdr_len, length - hdr_len,
GFP_KERNEL);
if (IS_ERR(packet->msg)) {
ret = PTR_ERR(packet->msg);
goto err_ah;
}
- packet->msg->send_wr.wr.ud.timeout_ms = packet->mad.hdr.timeout_ms;
- packet->msg->send_wr.wr.ud.retries = packet->mad.hdr.retries;
-
- /* Override send WR WRID initialized in ib_create_send_mad */
- packet->msg->send_wr.wr_id = (unsigned long) packet;
-
- if (!rmpp_active) {
- /* Copy message from user into send buffer */
- if (copy_from_user(packet->msg->mad,
- buf + sizeof(struct ib_user_mad), length)) {
- ret = -EFAULT;
- goto err_msg;
- }
- } else {
- rmpp_hdr_size = sizeof(struct ib_mad_hdr) +
- sizeof(struct ib_rmpp_hdr);
-
- /* Only copy MAD headers (RMPP header in place) */
- memcpy(packet->msg->mad, packet->mad.data,
- sizeof(struct ib_mad_hdr));
+ packet->msg->ah = ah;
+ packet->msg->timeout_ms = packet->mad.hdr.timeout_ms;
+ packet->msg->retries = packet->mad.hdr.retries;
+ packet->msg->context[0] = packet;
- /* Now, copy rest of message from user into send buffer */
- if (copy_from_user(((struct ib_rmpp_mad *) packet->msg->mad)->data,
- buf + sizeof (struct ib_user_mad) + rmpp_hdr_size,
- length - rmpp_hdr_size)) {
- ret = -EFAULT;
- goto err_msg;
- }
+ /* Copy MAD headers (RMPP header in place) */
+ memcpy(packet->msg->mad, packet->mad.data, IB_MGMT_MAD_HDR);
+ /* Now, copy rest of message from user into send buffer */
+ if (copy_from_user(packet->msg->mad + copy_offset,
+ buf + sizeof (struct ib_user_mad) + copy_offset,
+ length - copy_offset)) {
+ ret = -EFAULT;
+ goto err_msg;
}
/*
@@ -403,32 +418,32 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
* transaction ID matches the agent being used to send the
* MAD.
*/
- method = packet->msg->mad->mad_hdr.method;
+ method = ((struct ib_mad_hdr *) packet->msg->mad)->method;
if (!(method & IB_MGMT_METHOD_RESP) &&
method != IB_MGMT_METHOD_TRAP_REPRESS &&
method != IB_MGMT_METHOD_SEND) {
- tid = &packet->msg->mad->mad_hdr.tid;
+ tid = &((struct ib_mad_hdr *) packet->msg->mad)->tid;
*tid = cpu_to_be64(((u64) agent->hi_tid) << 32 |
(be64_to_cpup(tid) & 0xffffffff));
}
- ret = ib_post_send_mad(agent, &packet->msg->send_wr, &bad_wr);
+ ret = ib_post_send_mad(packet->msg, NULL);
if (ret)
goto err_msg;
- up_read(&file->agent_mutex);
+ up_read(&file->port->mutex);
- return sizeof (struct ib_user_mad_hdr) + packet->length;
+ return count;
err_msg:
ib_free_send_mad(packet->msg);
err_ah:
- ib_destroy_ah(packet->ah);
+ ib_destroy_ah(ah);
err_up:
- up_read(&file->agent_mutex);
+ up_read(&file->port->mutex);
err:
kfree(packet);
@@ -458,7 +473,12 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, unsigned long arg)
int agent_id;
int ret;
- down_write(&file->agent_mutex);
+ down_write(&file->port->mutex);
+
+ if (!file->port->ib_dev) {
+ ret = -EPIPE;
+ goto out;
+ }
if (copy_from_user(&ureq, (void __user *) arg, sizeof ureq)) {
ret = -EFAULT;
@@ -471,7 +491,7 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, unsigned long arg)
}
for (agent_id = 0; agent_id < IB_UMAD_MAX_AGENTS; ++agent_id)
- if (!file->agent[agent_id])
+ if (!__get_agent(file, agent_id))
goto found;
ret = -ENOMEM;
@@ -495,58 +515,46 @@ found:
goto out;
}
- file->agent[agent_id] = agent;
-
- file->mr[agent_id] = ib_get_dma_mr(agent->qp->pd, IB_ACCESS_LOCAL_WRITE);
- if (IS_ERR(file->mr[agent_id])) {
- ret = -ENOMEM;
- goto err;
- }
-
if (put_user(agent_id,
(u32 __user *) (arg + offsetof(struct ib_user_mad_reg_req, id)))) {
ret = -EFAULT;
- goto err_mr;
+ ib_unregister_mad_agent(agent);
+ goto out;
}
+ file->agent[agent_id] = agent;
ret = 0;
- goto out;
-
-err_mr:
- ib_dereg_mr(file->mr[agent_id]);
-
-err:
- file->agent[agent_id] = NULL;
- ib_unregister_mad_agent(agent);
out:
- up_write(&file->agent_mutex);
+ up_write(&file->port->mutex);
return ret;
}
static int ib_umad_unreg_agent(struct ib_umad_file *file, unsigned long arg)
{
+ struct ib_mad_agent *agent = NULL;
u32 id;
int ret = 0;
- down_write(&file->agent_mutex);
+ if (get_user(id, (u32 __user *) arg))
+ return -EFAULT;
- if (get_user(id, (u32 __user *) arg)) {
- ret = -EFAULT;
- goto out;
- }
+ down_write(&file->port->mutex);
- if (id < 0 || id >= IB_UMAD_MAX_AGENTS || !file->agent[id]) {
+ if (id < 0 || id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) {
ret = -EINVAL;
goto out;
}
- ib_dereg_mr(file->mr[id]);
- ib_unregister_mad_agent(file->agent[id]);
+ agent = file->agent[id];
file->agent[id] = NULL;
out:
- up_write(&file->agent_mutex);
+ up_write(&file->port->mutex);
+
+ if (agent)
+ ib_unregister_mad_agent(agent);
+
return ret;
}
@@ -565,43 +573,76 @@ static long ib_umad_ioctl(struct file *filp, unsigned int cmd,
static int ib_umad_open(struct inode *inode, struct file *filp)
{
- struct ib_umad_port *port =
- container_of(inode->i_cdev, struct ib_umad_port, dev);
+ struct ib_umad_port *port;
struct ib_umad_file *file;
+ int ret = 0;
- file = kmalloc(sizeof *file, GFP_KERNEL);
- if (!file)
- return -ENOMEM;
+ spin_lock(&port_lock);
+ port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE];
+ if (port)
+ kref_get(&port->umad_dev->ref);
+ spin_unlock(&port_lock);
+
+ if (!port)
+ return -ENXIO;
- memset(file, 0, sizeof *file);
+ down_write(&port->mutex);
+
+ if (!port->ib_dev) {
+ ret = -ENXIO;
+ goto out;
+ }
+
+ file = kzalloc(sizeof *file, GFP_KERNEL);
+ if (!file) {
+ kref_put(&port->umad_dev->ref, ib_umad_release_dev);
+ ret = -ENOMEM;
+ goto out;
+ }
spin_lock_init(&file->recv_lock);
- init_rwsem(&file->agent_mutex);
INIT_LIST_HEAD(&file->recv_list);
init_waitqueue_head(&file->recv_wait);
file->port = port;
filp->private_data = file;
- return 0;
+ list_add_tail(&file->port_list, &port->file_list);
+
+out:
+ up_write(&port->mutex);
+ return ret;
}
static int ib_umad_close(struct inode *inode, struct file *filp)
{
struct ib_umad_file *file = filp->private_data;
+ struct ib_umad_device *dev = file->port->umad_dev;
struct ib_umad_packet *packet, *tmp;
+ int already_dead;
int i;
- for (i = 0; i < IB_UMAD_MAX_AGENTS; ++i)
- if (file->agent[i]) {
- ib_dereg_mr(file->mr[i]);
- ib_unregister_mad_agent(file->agent[i]);
- }
+ down_write(&file->port->mutex);
+
+ already_dead = file->agents_dead;
+ file->agents_dead = 1;
list_for_each_entry_safe(packet, tmp, &file->recv_list, list)
kfree(packet);
+ list_del(&file->port_list);
+
+ downgrade_write(&file->port->mutex);
+
+ if (!already_dead)
+ for (i = 0; i < IB_UMAD_MAX_AGENTS; ++i)
+ if (file->agent[i])
+ ib_unregister_mad_agent(file->agent[i]);
+
+ up_read(&file->port->mutex);
+
kfree(file);
+ kref_put(&dev->ref, ib_umad_release_dev);
return 0;
}
@@ -619,30 +660,46 @@ static struct file_operations umad_fops = {
static int ib_umad_sm_open(struct inode *inode, struct file *filp)
{
- struct ib_umad_port *port =
- container_of(inode->i_cdev, struct ib_umad_port, sm_dev);
+ struct ib_umad_port *port;
struct ib_port_modify props = {
.set_port_cap_mask = IB_PORT_SM
};
int ret;
+ spin_lock(&port_lock);
+ port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE - IB_UMAD_MAX_PORTS];
+ if (port)
+ kref_get(&port->umad_dev->ref);
+ spin_unlock(&port_lock);
+
+ if (!port)
+ return -ENXIO;
+
if (filp->f_flags & O_NONBLOCK) {
- if (down_trylock(&port->sm_sem))
- return -EAGAIN;
+ if (down_trylock(&port->sm_sem)) {
+ ret = -EAGAIN;
+ goto fail;
+ }
} else {
- if (down_interruptible(&port->sm_sem))
- return -ERESTARTSYS;
+ if (down_interruptible(&port->sm_sem)) {
+ ret = -ERESTARTSYS;
+ goto fail;
+ }
}
ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props);
if (ret) {
up(&port->sm_sem);
- return ret;
+ goto fail;
}
filp->private_data = port;
return 0;
+
+fail:
+ kref_put(&port->umad_dev->ref, ib_umad_release_dev);
+ return ret;
}
static int ib_umad_sm_close(struct inode *inode, struct file *filp)
@@ -651,11 +708,17 @@ static int ib_umad_sm_close(struct inode *inode, struct file *filp)
struct ib_port_modify props = {
.clr_port_cap_mask = IB_PORT_SM
};
- int ret;
+ int ret = 0;
+
+ down_write(&port->mutex);
+ if (port->ib_dev)
+ ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props);
+ up_write(&port->mutex);
- ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props);
up(&port->sm_sem);
+ kref_put(&port->umad_dev->ref, ib_umad_release_dev);
+
return ret;
}
@@ -671,21 +734,13 @@ static struct ib_client umad_client = {
.remove = ib_umad_remove_one
};
-static ssize_t show_dev(struct class_device *class_dev, char *buf)
-{
- struct ib_umad_port *port = class_get_devdata(class_dev);
-
- if (class_dev == &port->class_dev)
- return print_dev_t(buf, port->dev.dev);
- else
- return print_dev_t(buf, port->sm_dev.dev);
-}
-static CLASS_DEVICE_ATTR(dev, S_IRUGO, show_dev, NULL);
-
static ssize_t show_ibdev(struct class_device *class_dev, char *buf)
{
struct ib_umad_port *port = class_get_devdata(class_dev);
+ if (!port)
+ return -ENODEV;
+
return sprintf(buf, "%s\n", port->ib_dev->name);
}
static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
@@ -694,38 +749,13 @@ static ssize_t show_port(struct class_device *class_dev, char *buf)
{
struct ib_umad_port *port = class_get_devdata(class_dev);
+ if (!port)
+ return -ENODEV;
+
return sprintf(buf, "%d\n", port->port_num);
}
static CLASS_DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
-static void ib_umad_release_dev(struct kref *ref)
-{
- struct ib_umad_device *dev =
- container_of(ref, struct ib_umad_device, ref);
-
- kfree(dev);
-}
-
-static void ib_umad_release_port(struct class_device *class_dev)
-{
- struct ib_umad_port *port = class_get_devdata(class_dev);
-
- if (class_dev == &port->class_dev) {
- cdev_del(&port->dev);
- clear_bit(port->devnum, dev_map);
- } else {
- cdev_del(&port->sm_dev);
- clear_bit(port->sm_devnum, dev_map);
- }
-
- kref_put(&port->umad_dev->ref, ib_umad_release_dev);
-}
-
-static struct class umad_class = {
- .name = "infiniband_mad",
- .release = ib_umad_release_port
-};
-
static ssize_t show_abi_version(struct class *class, char *buf)
{
return sprintf(buf, "%d\n", IB_USER_MAD_ABI_VERSION);
@@ -735,91 +765,144 @@ static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
static int ib_umad_init_port(struct ib_device *device, int port_num,
struct ib_umad_port *port)
{
- spin_lock(&map_lock);
- port->devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS);
- if (port->devnum >= IB_UMAD_MAX_PORTS) {
- spin_unlock(&map_lock);
- return -1;
- }
- port->sm_devnum = find_next_zero_bit(dev_map, IB_UMAD_MAX_PORTS * 2, IB_UMAD_MAX_PORTS);
- if (port->sm_devnum >= IB_UMAD_MAX_PORTS * 2) {
- spin_unlock(&map_lock);
+ spin_lock(&port_lock);
+ port->dev_num = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS);
+ if (port->dev_num >= IB_UMAD_MAX_PORTS) {
+ spin_unlock(&port_lock);
return -1;
}
- set_bit(port->devnum, dev_map);
- set_bit(port->sm_devnum, dev_map);
- spin_unlock(&map_lock);
+ set_bit(port->dev_num, dev_map);
+ spin_unlock(&port_lock);
port->ib_dev = device;
port->port_num = port_num;
init_MUTEX(&port->sm_sem);
+ init_rwsem(&port->mutex);
+ INIT_LIST_HEAD(&port->file_list);
- cdev_init(&port->dev, &umad_fops);
- port->dev.owner = THIS_MODULE;
- kobject_set_name(&port->dev.kobj, "umad%d", port->devnum);
- if (cdev_add(&port->dev, base_dev + port->devnum, 1))
+ port->dev = cdev_alloc();
+ if (!port->dev)
return -1;
-
- port->class_dev.class = &umad_class;
- port->class_dev.dev = device->dma_device;
-
- snprintf(port->class_dev.class_id, BUS_ID_SIZE, "umad%d", port->devnum);
-
- if (class_device_register(&port->class_dev))
+ port->dev->owner = THIS_MODULE;
+ port->dev->ops = &umad_fops;
+ kobject_set_name(&port->dev->kobj, "umad%d", port->dev_num);
+ if (cdev_add(port->dev, base_dev + port->dev_num, 1))
goto err_cdev;
- class_set_devdata(&port->class_dev, port);
- kref_get(&port->umad_dev->ref);
+ port->class_dev = class_device_create(umad_class, NULL, port->dev->dev,
+ device->dma_device,
+ "umad%d", port->dev_num);
+ if (IS_ERR(port->class_dev))
+ goto err_cdev;
- if (class_device_create_file(&port->class_dev, &class_device_attr_dev))
- goto err_class;
- if (class_device_create_file(&port->class_dev, &class_device_attr_ibdev))
+ if (class_device_create_file(port->class_dev, &class_device_attr_ibdev))
goto err_class;
- if (class_device_create_file(&port->class_dev, &class_device_attr_port))
+ if (class_device_create_file(port->class_dev, &class_device_attr_port))
goto err_class;
- cdev_init(&port->sm_dev, &umad_sm_fops);
- port->sm_dev.owner = THIS_MODULE;
- kobject_set_name(&port->dev.kobj, "issm%d", port->sm_devnum - IB_UMAD_MAX_PORTS);
- if (cdev_add(&port->sm_dev, base_dev + port->sm_devnum, 1))
- return -1;
-
- port->sm_class_dev.class = &umad_class;
- port->sm_class_dev.dev = device->dma_device;
-
- snprintf(port->sm_class_dev.class_id, BUS_ID_SIZE, "issm%d", port->sm_devnum - IB_UMAD_MAX_PORTS);
+ port->sm_dev = cdev_alloc();
+ if (!port->sm_dev)
+ goto err_class;
+ port->sm_dev->owner = THIS_MODULE;
+ port->sm_dev->ops = &umad_sm_fops;
+ kobject_set_name(&port->sm_dev->kobj, "issm%d", port->dev_num);
+ if (cdev_add(port->sm_dev, base_dev + port->dev_num + IB_UMAD_MAX_PORTS, 1))
+ goto err_sm_cdev;
- if (class_device_register(&port->sm_class_dev))
+ port->sm_class_dev = class_device_create(umad_class, NULL, port->sm_dev->dev,
+ device->dma_device,
+ "issm%d", port->dev_num);
+ if (IS_ERR(port->sm_class_dev))
goto err_sm_cdev;
- class_set_devdata(&port->sm_class_dev, port);
- kref_get(&port->umad_dev->ref);
+ class_set_devdata(port->class_dev, port);
+ class_set_devdata(port->sm_class_dev, port);
- if (class_device_create_file(&port->sm_class_dev, &class_device_attr_dev))
+ if (class_device_create_file(port->sm_class_dev, &class_device_attr_ibdev))
goto err_sm_class;
- if (class_device_create_file(&port->sm_class_dev, &class_device_attr_ibdev))
- goto err_sm_class;
- if (class_device_create_file(&port->sm_class_dev, &class_device_attr_port))
+ if (class_device_create_file(port->sm_class_dev, &class_device_attr_port))
goto err_sm_class;
+ spin_lock(&port_lock);
+ umad_port[port->dev_num] = port;
+ spin_unlock(&port_lock);
+
return 0;
err_sm_class:
- class_device_unregister(&port->sm_class_dev);
+ class_device_destroy(umad_class, port->sm_dev->dev);
err_sm_cdev:
- cdev_del(&port->sm_dev);
+ cdev_del(port->sm_dev);
err_class:
- class_device_unregister(&port->class_dev);
+ class_device_destroy(umad_class, port->dev->dev);
err_cdev:
- cdev_del(&port->dev);
- clear_bit(port->devnum, dev_map);
+ cdev_del(port->dev);
+ clear_bit(port->dev_num, dev_map);
return -1;
}
+static void ib_umad_kill_port(struct ib_umad_port *port)
+{
+ struct ib_umad_file *file;
+ int id;
+
+ class_set_devdata(port->class_dev, NULL);
+ class_set_devdata(port->sm_class_dev, NULL);
+
+ class_device_destroy(umad_class, port->dev->dev);
+ class_device_destroy(umad_class, port->sm_dev->dev);
+
+ cdev_del(port->dev);
+ cdev_del(port->sm_dev);
+
+ spin_lock(&port_lock);
+ umad_port[port->dev_num] = NULL;
+ spin_unlock(&port_lock);
+
+ down_write(&port->mutex);
+
+ port->ib_dev = NULL;
+
+ /*
+ * Now go through the list of files attached to this port and
+ * unregister all of their MAD agents. We need to hold
+ * port->mutex while doing this to avoid racing with
+ * ib_umad_close(), but we can't hold the mutex for writing
+ * while calling ib_unregister_mad_agent(), since that might
+ * deadlock by calling back into queue_packet(). So we
+ * downgrade our lock to a read lock, and then drop and
+ * reacquire the write lock for the next iteration.
+ *
+ * We do list_del_init() on the file's list_head so that the
+ * list_del in ib_umad_close() is still OK, even after the
+ * file is removed from the list.
+ */
+ while (!list_empty(&port->file_list)) {
+ file = list_entry(port->file_list.next, struct ib_umad_file,
+ port_list);
+
+ file->agents_dead = 1;
+ list_del_init(&file->port_list);
+
+ downgrade_write(&port->mutex);
+
+ for (id = 0; id < IB_UMAD_MAX_AGENTS; ++id)
+ if (file->agent[id])
+ ib_unregister_mad_agent(file->agent[id]);
+
+ up_read(&port->mutex);
+ down_write(&port->mutex);
+ }
+
+ up_write(&port->mutex);
+
+ clear_bit(port->dev_num, dev_map);
+}
+
static void ib_umad_add_one(struct ib_device *device)
{
struct ib_umad_device *umad_dev;
@@ -832,15 +915,12 @@ static void ib_umad_add_one(struct ib_device *device)
e = device->phys_port_cnt;
}
- umad_dev = kmalloc(sizeof *umad_dev +
+ umad_dev = kzalloc(sizeof *umad_dev +
(e - s + 1) * sizeof (struct ib_umad_port),
GFP_KERNEL);
if (!umad_dev)
return;
- memset(umad_dev, 0, sizeof *umad_dev +
- (e - s + 1) * sizeof (struct ib_umad_port));
-
kref_init(&umad_dev->ref);
umad_dev->start_port = s;
@@ -858,10 +938,8 @@ static void ib_umad_add_one(struct ib_device *device)
return;
err:
- while (--i >= s) {
- class_device_unregister(&umad_dev->port[i - s].class_dev);
- class_device_unregister(&umad_dev->port[i - s].sm_class_dev);
- }
+ while (--i >= s)
+ ib_umad_kill_port(&umad_dev->port[i - s]);
kref_put(&umad_dev->ref, ib_umad_release_dev);
}
@@ -874,10 +952,8 @@ static void ib_umad_remove_one(struct ib_device *device)
if (!umad_dev)
return;
- for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i) {
- class_device_unregister(&umad_dev->port[i].class_dev);
- class_device_unregister(&umad_dev->port[i].sm_class_dev);
- }
+ for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i)
+ ib_umad_kill_port(&umad_dev->port[i]);
kref_put(&umad_dev->ref, ib_umad_release_dev);
}
@@ -886,8 +962,6 @@ static int __init ib_umad_init(void)
{
int ret;
- spin_lock_init(&map_lock);
-
ret = register_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2,
"infiniband_mad");
if (ret) {
@@ -895,13 +969,14 @@ static int __init ib_umad_init(void)
goto out;
}
- ret = class_register(&umad_class);
- if (ret) {
+ umad_class = class_create(THIS_MODULE, "infiniband_mad");
+ if (IS_ERR(umad_class)) {
+ ret = PTR_ERR(umad_class);
printk(KERN_ERR "user_mad: couldn't create class infiniband_mad\n");
goto out_chrdev;
}
- ret = class_create_file(&umad_class, &class_attr_abi_version);
+ ret = class_create_file(umad_class, &class_attr_abi_version);
if (ret) {
printk(KERN_ERR "user_mad: couldn't create abi_version attribute\n");
goto out_class;
@@ -916,7 +991,7 @@ static int __init ib_umad_init(void)
return 0;
out_class:
- class_unregister(&umad_class);
+ class_destroy(umad_class);
out_chrdev:
unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2);
@@ -928,7 +1003,7 @@ out:
static void __exit ib_umad_cleanup(void)
{
ib_unregister_client(&umad_client);
- class_unregister(&umad_class);
+ class_destroy(umad_class);
unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2);
}
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index b1897bed14a..7114e3fbab0 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -3,6 +3,7 @@
* Copyright (c) 2005 Cisco Systems. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
* Copyright (c) 2005 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2005 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -38,29 +39,47 @@
#ifndef UVERBS_H
#define UVERBS_H
-/* Include device.h and fs.h until cdev.h is self-sufficient */
-#include <linux/fs.h>
-#include <linux/device.h>
-#include <linux/cdev.h>
#include <linux/kref.h>
#include <linux/idr.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_user_verbs.h>
+/*
+ * Our lifetime rules for these structs are the following:
+ *
+ * struct ib_uverbs_device: One reference is held by the module and
+ * released in ib_uverbs_remove_one(). Another reference is taken by
+ * ib_uverbs_open() each time the character special file is opened,
+ * and released in ib_uverbs_release_file() when the file is released.
+ *
+ * struct ib_uverbs_file: One reference is held by the VFS and
+ * released when the file is closed. Another reference is taken when
+ * an asynchronous event queue file is created and released when the
+ * event file is closed.
+ *
+ * struct ib_uverbs_event_file: One reference is held by the VFS and
+ * released when the file is closed. For asynchronous event files,
+ * another reference is held by the corresponding main context file
+ * and released when that file is closed. For completion event files,
+ * a reference is taken when a CQ is created that uses the file, and
+ * released when the CQ is destroyed.
+ */
+
struct ib_uverbs_device {
+ struct kref ref;
int devnum;
- struct cdev dev;
- struct class_device class_dev;
+ struct cdev *dev;
+ struct class_device *class_dev;
struct ib_device *ib_dev;
- int num_comp;
+ int num_comp_vectors;
};
struct ib_uverbs_event_file {
struct kref ref;
+ struct file *file;
struct ib_uverbs_file *uverbs_file;
spinlock_t lock;
- int fd;
int is_async;
wait_queue_head_t poll_wait;
struct fasync_struct *async_queue;
@@ -69,11 +88,11 @@ struct ib_uverbs_event_file {
struct ib_uverbs_file {
struct kref ref;
+ struct semaphore mutex;
struct ib_uverbs_device *device;
struct ib_ucontext *ucontext;
struct ib_event_handler event_handler;
- struct ib_uverbs_event_file async_file;
- struct ib_uverbs_event_file comp_file[1];
+ struct ib_uverbs_event_file *async_file;
};
struct ib_uverbs_event {
@@ -86,14 +105,26 @@ struct ib_uverbs_event {
u32 *counter;
};
+struct ib_uverbs_mcast_entry {
+ struct list_head list;
+ union ib_gid gid;
+ u16 lid;
+};
+
struct ib_uevent_object {
struct ib_uobject uobject;
struct list_head event_list;
u32 events_reported;
};
+struct ib_uqp_object {
+ struct ib_uevent_object uevent;
+ struct list_head mcast_list;
+};
+
struct ib_ucq_object {
struct ib_uobject uobject;
+ struct ib_uverbs_file *uverbs_file;
struct list_head comp_list;
struct list_head async_list;
u32 comp_events_reported;
@@ -109,10 +140,23 @@ extern struct idr ib_uverbs_cq_idr;
extern struct idr ib_uverbs_qp_idr;
extern struct idr ib_uverbs_srq_idr;
+struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
+ int is_async, int *fd);
+void ib_uverbs_release_event_file(struct kref *ref);
+struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd);
+
+void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
+ struct ib_uverbs_event_file *ev_file,
+ struct ib_ucq_object *uobj);
+void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
+ struct ib_uevent_object *uobj);
+
void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context);
void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
+void ib_uverbs_event_handler(struct ib_event_handler *handler,
+ struct ib_event *event);
int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
void *addr, size_t size, int write);
@@ -124,21 +168,26 @@ void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem);
const char __user *buf, int in_len, \
int out_len)
-IB_UVERBS_DECLARE_CMD(query_params);
IB_UVERBS_DECLARE_CMD(get_context);
IB_UVERBS_DECLARE_CMD(query_device);
IB_UVERBS_DECLARE_CMD(query_port);
-IB_UVERBS_DECLARE_CMD(query_gid);
-IB_UVERBS_DECLARE_CMD(query_pkey);
IB_UVERBS_DECLARE_CMD(alloc_pd);
IB_UVERBS_DECLARE_CMD(dealloc_pd);
IB_UVERBS_DECLARE_CMD(reg_mr);
IB_UVERBS_DECLARE_CMD(dereg_mr);
+IB_UVERBS_DECLARE_CMD(create_comp_channel);
IB_UVERBS_DECLARE_CMD(create_cq);
+IB_UVERBS_DECLARE_CMD(poll_cq);
+IB_UVERBS_DECLARE_CMD(req_notify_cq);
IB_UVERBS_DECLARE_CMD(destroy_cq);
IB_UVERBS_DECLARE_CMD(create_qp);
IB_UVERBS_DECLARE_CMD(modify_qp);
IB_UVERBS_DECLARE_CMD(destroy_qp);
+IB_UVERBS_DECLARE_CMD(post_send);
+IB_UVERBS_DECLARE_CMD(post_recv);
+IB_UVERBS_DECLARE_CMD(post_srq_recv);
+IB_UVERBS_DECLARE_CMD(create_ah);
+IB_UVERBS_DECLARE_CMD(destroy_ah);
IB_UVERBS_DECLARE_CMD(attach_mcast);
IB_UVERBS_DECLARE_CMD(detach_mcast);
IB_UVERBS_DECLARE_CMD(create_srq);
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index e91ebde4648..a57d021d435 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1,6 +1,7 @@
/*
* Copyright (c) 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -33,6 +34,9 @@
* $Id: uverbs_cmd.c 2708 2005-06-24 17:27:21Z roland $
*/
+#include <linux/file.h>
+#include <linux/fs.h>
+
#include <asm/uaccess.h>
#include "uverbs.h"
@@ -45,29 +49,6 @@
(udata)->outlen = (olen); \
} while (0)
-ssize_t ib_uverbs_query_params(struct ib_uverbs_file *file,
- const char __user *buf,
- int in_len, int out_len)
-{
- struct ib_uverbs_query_params cmd;
- struct ib_uverbs_query_params_resp resp;
-
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- memset(&resp, 0, sizeof resp);
-
- resp.num_cq_events = file->device->num_comp;
-
- if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp))
- return -EFAULT;
-
- return in_len;
-}
-
ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
const char __user *buf,
int in_len, int out_len)
@@ -76,8 +57,9 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
struct ib_uverbs_get_context_resp resp;
struct ib_udata udata;
struct ib_device *ibdev = file->device->ib_dev;
- int i;
- int ret = in_len;
+ struct ib_ucontext *ucontext;
+ struct file *filp;
+ int ret;
if (out_len < sizeof resp)
return -ENOSPC;
@@ -85,45 +67,72 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
+ down(&file->mutex);
+
+ if (file->ucontext) {
+ ret = -EINVAL;
+ goto err;
+ }
+
INIT_UDATA(&udata, buf + sizeof cmd,
(unsigned long) cmd.response + sizeof resp,
in_len - sizeof cmd, out_len - sizeof resp);
- file->ucontext = ibdev->alloc_ucontext(ibdev, &udata);
- if (IS_ERR(file->ucontext)) {
- ret = PTR_ERR(file->ucontext);
- file->ucontext = NULL;
- return ret;
- }
+ ucontext = ibdev->alloc_ucontext(ibdev, &udata);
+ if (IS_ERR(ucontext))
+ return PTR_ERR(file->ucontext);
- file->ucontext->device = ibdev;
- INIT_LIST_HEAD(&file->ucontext->pd_list);
- INIT_LIST_HEAD(&file->ucontext->mr_list);
- INIT_LIST_HEAD(&file->ucontext->mw_list);
- INIT_LIST_HEAD(&file->ucontext->cq_list);
- INIT_LIST_HEAD(&file->ucontext->qp_list);
- INIT_LIST_HEAD(&file->ucontext->srq_list);
- INIT_LIST_HEAD(&file->ucontext->ah_list);
- spin_lock_init(&file->ucontext->lock);
-
- resp.async_fd = file->async_file.fd;
- for (i = 0; i < file->device->num_comp; ++i)
- if (copy_to_user((void __user *) (unsigned long) cmd.cq_fd_tab +
- i * sizeof (__u32),
- &file->comp_file[i].fd, sizeof (__u32)))
- goto err;
+ ucontext->device = ibdev;
+ INIT_LIST_HEAD(&ucontext->pd_list);
+ INIT_LIST_HEAD(&ucontext->mr_list);
+ INIT_LIST_HEAD(&ucontext->mw_list);
+ INIT_LIST_HEAD(&ucontext->cq_list);
+ INIT_LIST_HEAD(&ucontext->qp_list);
+ INIT_LIST_HEAD(&ucontext->srq_list);
+ INIT_LIST_HEAD(&ucontext->ah_list);
+
+ resp.num_comp_vectors = file->device->num_comp_vectors;
+
+ filp = ib_uverbs_alloc_event_file(file, 1, &resp.async_fd);
+ if (IS_ERR(filp)) {
+ ret = PTR_ERR(filp);
+ goto err_free;
+ }
if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp))
- goto err;
+ &resp, sizeof resp)) {
+ ret = -EFAULT;
+ goto err_file;
+ }
+
+ file->async_file = filp->private_data;
+
+ INIT_IB_EVENT_HANDLER(&file->event_handler, file->device->ib_dev,
+ ib_uverbs_event_handler);
+ ret = ib_register_event_handler(&file->event_handler);
+ if (ret)
+ goto err_file;
+
+ kref_get(&file->async_file->ref);
+ kref_get(&file->ref);
+ file->ucontext = ucontext;
+
+ fd_install(resp.async_fd, filp);
+
+ up(&file->mutex);
return in_len;
-err:
- ibdev->dealloc_ucontext(file->ucontext);
- file->ucontext = NULL;
+err_file:
+ put_unused_fd(resp.async_fd);
+ fput(filp);
- return -EFAULT;
+err_free:
+ ibdev->dealloc_ucontext(ucontext);
+
+err:
+ up(&file->mutex);
+ return ret;
}
ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
@@ -243,62 +252,6 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
return in_len;
}
-ssize_t ib_uverbs_query_gid(struct ib_uverbs_file *file,
- const char __user *buf,
- int in_len, int out_len)
-{
- struct ib_uverbs_query_gid cmd;
- struct ib_uverbs_query_gid_resp resp;
- int ret;
-
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- memset(&resp, 0, sizeof resp);
-
- ret = ib_query_gid(file->device->ib_dev, cmd.port_num, cmd.index,
- (union ib_gid *) resp.gid);
- if (ret)
- return ret;
-
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp))
- return -EFAULT;
-
- return in_len;
-}
-
-ssize_t ib_uverbs_query_pkey(struct ib_uverbs_file *file,
- const char __user *buf,
- int in_len, int out_len)
-{
- struct ib_uverbs_query_pkey cmd;
- struct ib_uverbs_query_pkey_resp resp;
- int ret;
-
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- memset(&resp, 0, sizeof resp);
-
- ret = ib_query_pkey(file->device->ib_dev, cmd.port_num, cmd.index,
- &resp.pkey);
- if (ret)
- return ret;
-
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp))
- return -EFAULT;
-
- return in_len;
-}
-
ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
const char __user *buf,
int in_len, int out_len)
@@ -337,24 +290,20 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
pd->uobject = uobj;
atomic_set(&pd->usecnt, 0);
+ down(&ib_uverbs_idr_mutex);
+
retry:
if (!idr_pre_get(&ib_uverbs_pd_idr, GFP_KERNEL)) {
ret = -ENOMEM;
- goto err_pd;
+ goto err_up;
}
- down(&ib_uverbs_idr_mutex);
ret = idr_get_new(&ib_uverbs_pd_idr, pd, &uobj->id);
- up(&ib_uverbs_idr_mutex);
if (ret == -EAGAIN)
goto retry;
if (ret)
- goto err_pd;
-
- spin_lock_irq(&file->ucontext->lock);
- list_add_tail(&uobj->list, &file->ucontext->pd_list);
- spin_unlock_irq(&file->ucontext->lock);
+ goto err_up;
memset(&resp, 0, sizeof resp);
resp.pd_handle = uobj->id;
@@ -362,21 +311,22 @@ retry:
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp)) {
ret = -EFAULT;
- goto err_list;
+ goto err_idr;
}
- return in_len;
+ down(&file->mutex);
+ list_add_tail(&uobj->list, &file->ucontext->pd_list);
+ up(&file->mutex);
-err_list:
- spin_lock_irq(&file->ucontext->lock);
- list_del(&uobj->list);
- spin_unlock_irq(&file->ucontext->lock);
+ up(&ib_uverbs_idr_mutex);
- down(&ib_uverbs_idr_mutex);
+ return in_len;
+
+err_idr:
idr_remove(&ib_uverbs_pd_idr, uobj->id);
- up(&ib_uverbs_idr_mutex);
-err_pd:
+err_up:
+ up(&ib_uverbs_idr_mutex);
ib_dealloc_pd(pd);
err:
@@ -410,9 +360,9 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
idr_remove(&ib_uverbs_pd_idr, cmd.pd_handle);
- spin_lock_irq(&file->ucontext->lock);
+ down(&file->mutex);
list_del(&uobj->list);
- spin_unlock_irq(&file->ucontext->lock);
+ up(&file->mutex);
kfree(uobj);
@@ -447,6 +397,14 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
return -EINVAL;
+ /*
+ * Local write permission is required if remote write or
+ * remote atomic permission is also requested.
+ */
+ if (cmd.access_flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) &&
+ !(cmd.access_flags & IB_ACCESS_LOCAL_WRITE))
+ return -EINVAL;
+
obj = kmalloc(sizeof *obj, GFP_KERNEL);
if (!obj)
return -ENOMEM;
@@ -512,24 +470,22 @@ retry:
resp.mr_handle = obj->uobject.id;
- spin_lock_irq(&file->ucontext->lock);
- list_add_tail(&obj->uobject.list, &file->ucontext->mr_list);
- spin_unlock_irq(&file->ucontext->lock);
-
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp)) {
ret = -EFAULT;
- goto err_list;
+ goto err_idr;
}
+ down(&file->mutex);
+ list_add_tail(&obj->uobject.list, &file->ucontext->mr_list);
+ up(&file->mutex);
+
up(&ib_uverbs_idr_mutex);
return in_len;
-err_list:
- spin_lock_irq(&file->ucontext->lock);
- list_del(&obj->uobject.list);
- spin_unlock_irq(&file->ucontext->lock);
+err_idr:
+ idr_remove(&ib_uverbs_mr_idr, obj->uobject.id);
err_unreg:
ib_dereg_mr(mr);
@@ -570,9 +526,9 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
idr_remove(&ib_uverbs_mr_idr, cmd.mr_handle);
- spin_lock_irq(&file->ucontext->lock);
+ down(&file->mutex);
list_del(&memobj->uobject.list);
- spin_unlock_irq(&file->ucontext->lock);
+ up(&file->mutex);
ib_umem_release(file->device->ib_dev, &memobj->umem);
kfree(memobj);
@@ -583,6 +539,35 @@ out:
return ret ? ret : in_len;
}
+ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_create_comp_channel cmd;
+ struct ib_uverbs_create_comp_channel_resp resp;
+ struct file *filp;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ filp = ib_uverbs_alloc_event_file(file, 0, &resp.fd);
+ if (IS_ERR(filp))
+ return PTR_ERR(filp);
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp)) {
+ put_unused_fd(resp.fd);
+ fput(filp);
+ return -EFAULT;
+ }
+
+ fd_install(resp.fd, filp);
+ return in_len;
+}
+
ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
@@ -591,6 +576,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
struct ib_uverbs_create_cq_resp resp;
struct ib_udata udata;
struct ib_ucq_object *uobj;
+ struct ib_uverbs_event_file *ev_file = NULL;
struct ib_cq *cq;
int ret;
@@ -604,15 +590,19 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
(unsigned long) cmd.response + sizeof resp,
in_len - sizeof cmd, out_len - sizeof resp);
- if (cmd.event_handler >= file->device->num_comp)
+ if (cmd.comp_vector >= file->device->num_comp_vectors)
return -EINVAL;
+ if (cmd.comp_channel >= 0)
+ ev_file = ib_uverbs_lookup_comp_file(cmd.comp_channel);
+
uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
if (!uobj)
return -ENOMEM;
uobj->uobject.user_handle = cmd.user_handle;
uobj->uobject.context = file->ucontext;
+ uobj->uverbs_file = file;
uobj->comp_events_reported = 0;
uobj->async_events_reported = 0;
INIT_LIST_HEAD(&uobj->comp_list);
@@ -629,27 +619,23 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
cq->uobject = &uobj->uobject;
cq->comp_handler = ib_uverbs_comp_handler;
cq->event_handler = ib_uverbs_cq_event_handler;
- cq->cq_context = file;
+ cq->cq_context = ev_file;
atomic_set(&cq->usecnt, 0);
+ down(&ib_uverbs_idr_mutex);
+
retry:
if (!idr_pre_get(&ib_uverbs_cq_idr, GFP_KERNEL)) {
ret = -ENOMEM;
- goto err_cq;
+ goto err_up;
}
- down(&ib_uverbs_idr_mutex);
ret = idr_get_new(&ib_uverbs_cq_idr, cq, &uobj->uobject.id);
- up(&ib_uverbs_idr_mutex);
if (ret == -EAGAIN)
goto retry;
if (ret)
- goto err_cq;
-
- spin_lock_irq(&file->ucontext->lock);
- list_add_tail(&uobj->uobject.list, &file->ucontext->cq_list);
- spin_unlock_irq(&file->ucontext->lock);
+ goto err_up;
memset(&resp, 0, sizeof resp);
resp.cq_handle = uobj->uobject.id;
@@ -658,21 +644,22 @@ retry:
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp)) {
ret = -EFAULT;
- goto err_list;
+ goto err_idr;
}
- return in_len;
+ down(&file->mutex);
+ list_add_tail(&uobj->uobject.list, &file->ucontext->cq_list);
+ up(&file->mutex);
-err_list:
- spin_lock_irq(&file->ucontext->lock);
- list_del(&uobj->uobject.list);
- spin_unlock_irq(&file->ucontext->lock);
+ up(&ib_uverbs_idr_mutex);
- down(&ib_uverbs_idr_mutex);
+ return in_len;
+
+err_idr:
idr_remove(&ib_uverbs_cq_idr, uobj->uobject.id);
- up(&ib_uverbs_idr_mutex);
-err_cq:
+err_up:
+ up(&ib_uverbs_idr_mutex);
ib_destroy_cq(cq);
err:
@@ -680,6 +667,93 @@ err:
return ret;
}
+ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_poll_cq cmd;
+ struct ib_uverbs_poll_cq_resp *resp;
+ struct ib_cq *cq;
+ struct ib_wc *wc;
+ int ret = 0;
+ int i;
+ int rsize;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ wc = kmalloc(cmd.ne * sizeof *wc, GFP_KERNEL);
+ if (!wc)
+ return -ENOMEM;
+
+ rsize = sizeof *resp + cmd.ne * sizeof(struct ib_uverbs_wc);
+ resp = kmalloc(rsize, GFP_KERNEL);
+ if (!resp) {
+ ret = -ENOMEM;
+ goto out_wc;
+ }
+
+ down(&ib_uverbs_idr_mutex);
+ cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle);
+ if (!cq || cq->uobject->context != file->ucontext) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ resp->count = ib_poll_cq(cq, cmd.ne, wc);
+
+ for (i = 0; i < resp->count; i++) {
+ resp->wc[i].wr_id = wc[i].wr_id;
+ resp->wc[i].status = wc[i].status;
+ resp->wc[i].opcode = wc[i].opcode;
+ resp->wc[i].vendor_err = wc[i].vendor_err;
+ resp->wc[i].byte_len = wc[i].byte_len;
+ resp->wc[i].imm_data = (__u32 __force) wc[i].imm_data;
+ resp->wc[i].qp_num = wc[i].qp_num;
+ resp->wc[i].src_qp = wc[i].src_qp;
+ resp->wc[i].wc_flags = wc[i].wc_flags;
+ resp->wc[i].pkey_index = wc[i].pkey_index;
+ resp->wc[i].slid = wc[i].slid;
+ resp->wc[i].sl = wc[i].sl;
+ resp->wc[i].dlid_path_bits = wc[i].dlid_path_bits;
+ resp->wc[i].port_num = wc[i].port_num;
+ }
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response, resp, rsize))
+ ret = -EFAULT;
+
+out:
+ up(&ib_uverbs_idr_mutex);
+ kfree(resp);
+
+out_wc:
+ kfree(wc);
+ return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_req_notify_cq cmd;
+ struct ib_cq *cq;
+ int ret = -EINVAL;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ down(&ib_uverbs_idr_mutex);
+ cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle);
+ if (cq && cq->uobject->context == file->ucontext) {
+ ib_req_notify_cq(cq, cmd.solicited_only ?
+ IB_CQ_SOLICITED : IB_CQ_NEXT_COMP);
+ ret = in_len;
+ }
+ up(&ib_uverbs_idr_mutex);
+
+ return ret;
+}
+
ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
@@ -688,7 +762,7 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
struct ib_uverbs_destroy_cq_resp resp;
struct ib_cq *cq;
struct ib_ucq_object *uobj;
- struct ib_uverbs_event *evt, *tmp;
+ struct ib_uverbs_event_file *ev_file;
u64 user_handle;
int ret = -EINVAL;
@@ -704,7 +778,8 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
goto out;
user_handle = cq->uobject->user_handle;
- uobj = container_of(cq->uobject, struct ib_ucq_object, uobject);
+ uobj = container_of(cq->uobject, struct ib_ucq_object, uobject);
+ ev_file = cq->cq_context;
ret = ib_destroy_cq(cq);
if (ret)
@@ -712,23 +787,11 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
idr_remove(&ib_uverbs_cq_idr, cmd.cq_handle);
- spin_lock_irq(&file->ucontext->lock);
+ down(&file->mutex);
list_del(&uobj->uobject.list);
- spin_unlock_irq(&file->ucontext->lock);
+ up(&file->mutex);
- spin_lock_irq(&file->comp_file[0].lock);
- list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) {
- list_del(&evt->list);
- kfree(evt);
- }
- spin_unlock_irq(&file->comp_file[0].lock);
-
- spin_lock_irq(&file->async_file.lock);
- list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) {
- list_del(&evt->list);
- kfree(evt);
- }
- spin_unlock_irq(&file->async_file.lock);
+ ib_uverbs_release_ucq(file, ev_file, uobj);
resp.comp_events_reported = uobj->comp_events_reported;
resp.async_events_reported = uobj->async_events_reported;
@@ -752,7 +815,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
struct ib_uverbs_create_qp cmd;
struct ib_uverbs_create_qp_resp resp;
struct ib_udata udata;
- struct ib_uevent_object *uobj;
+ struct ib_uqp_object *uobj;
struct ib_pd *pd;
struct ib_cq *scq, *rcq;
struct ib_srq *srq;
@@ -803,10 +866,11 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
attr.cap.max_recv_sge = cmd.max_recv_sge;
attr.cap.max_inline_data = cmd.max_inline_data;
- uobj->uobject.user_handle = cmd.user_handle;
- uobj->uobject.context = file->ucontext;
- uobj->events_reported = 0;
- INIT_LIST_HEAD(&uobj->event_list);
+ uobj->uevent.uobject.user_handle = cmd.user_handle;
+ uobj->uevent.uobject.context = file->ucontext;
+ uobj->uevent.events_reported = 0;
+ INIT_LIST_HEAD(&uobj->uevent.event_list);
+ INIT_LIST_HEAD(&uobj->mcast_list);
qp = pd->device->create_qp(pd, &attr, &udata);
if (IS_ERR(qp)) {
@@ -819,7 +883,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
qp->send_cq = attr.send_cq;
qp->recv_cq = attr.recv_cq;
qp->srq = attr.srq;
- qp->uobject = &uobj->uobject;
+ qp->uobject = &uobj->uevent.uobject;
qp->event_handler = attr.event_handler;
qp->qp_context = attr.qp_context;
qp->qp_type = attr.qp_type;
@@ -838,33 +902,36 @@ retry:
goto err_destroy;
}
- ret = idr_get_new(&ib_uverbs_qp_idr, qp, &uobj->uobject.id);
+ ret = idr_get_new(&ib_uverbs_qp_idr, qp, &uobj->uevent.uobject.id);
if (ret == -EAGAIN)
goto retry;
if (ret)
goto err_destroy;
- resp.qp_handle = uobj->uobject.id;
-
- spin_lock_irq(&file->ucontext->lock);
- list_add_tail(&uobj->uobject.list, &file->ucontext->qp_list);
- spin_unlock_irq(&file->ucontext->lock);
+ resp.qp_handle = uobj->uevent.uobject.id;
+ resp.max_recv_sge = attr.cap.max_recv_sge;
+ resp.max_send_sge = attr.cap.max_send_sge;
+ resp.max_recv_wr = attr.cap.max_recv_wr;
+ resp.max_send_wr = attr.cap.max_send_wr;
+ resp.max_inline_data = attr.cap.max_inline_data;
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp)) {
ret = -EFAULT;
- goto err_list;
+ goto err_idr;
}
+ down(&file->mutex);
+ list_add_tail(&uobj->uevent.uobject.list, &file->ucontext->qp_list);
+ up(&file->mutex);
+
up(&ib_uverbs_idr_mutex);
return in_len;
-err_list:
- spin_lock_irq(&file->ucontext->lock);
- list_del(&uobj->uobject.list);
- spin_unlock_irq(&file->ucontext->lock);
+err_idr:
+ idr_remove(&ib_uverbs_qp_idr, uobj->uevent.uobject.id);
err_destroy:
ib_destroy_qp(qp);
@@ -966,8 +1033,7 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
struct ib_uverbs_destroy_qp cmd;
struct ib_uverbs_destroy_qp_resp resp;
struct ib_qp *qp;
- struct ib_uevent_object *uobj;
- struct ib_uverbs_event *evt, *tmp;
+ struct ib_uqp_object *uobj;
int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd))
@@ -981,7 +1047,12 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
if (!qp || qp->uobject->context != file->ucontext)
goto out;
- uobj = container_of(qp->uobject, struct ib_uevent_object, uobject);
+ uobj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
+
+ if (!list_empty(&uobj->mcast_list)) {
+ ret = -EBUSY;
+ goto out;
+ }
ret = ib_destroy_qp(qp);
if (ret)
@@ -989,18 +1060,13 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
idr_remove(&ib_uverbs_qp_idr, cmd.qp_handle);
- spin_lock_irq(&file->ucontext->lock);
- list_del(&uobj->uobject.list);
- spin_unlock_irq(&file->ucontext->lock);
+ down(&file->mutex);
+ list_del(&uobj->uevent.uobject.list);
+ up(&file->mutex);
- spin_lock_irq(&file->async_file.lock);
- list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
- list_del(&evt->list);
- kfree(evt);
- }
- spin_unlock_irq(&file->async_file.lock);
+ ib_uverbs_release_uevent(file, &uobj->uevent);
- resp.events_reported = uobj->events_reported;
+ resp.events_reported = uobj->uevent.events_reported;
kfree(uobj);
@@ -1014,12 +1080,476 @@ out:
return ret ? ret : in_len;
}
+ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_post_send cmd;
+ struct ib_uverbs_post_send_resp resp;
+ struct ib_uverbs_send_wr *user_wr;
+ struct ib_send_wr *wr = NULL, *last, *next, *bad_wr;
+ struct ib_qp *qp;
+ int i, sg_ind;
+ ssize_t ret = -EINVAL;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ if (in_len < sizeof cmd + cmd.wqe_size * cmd.wr_count +
+ cmd.sge_count * sizeof (struct ib_uverbs_sge))
+ return -EINVAL;
+
+ if (cmd.wqe_size < sizeof (struct ib_uverbs_send_wr))
+ return -EINVAL;
+
+ user_wr = kmalloc(cmd.wqe_size, GFP_KERNEL);
+ if (!user_wr)
+ return -ENOMEM;
+
+ down(&ib_uverbs_idr_mutex);
+
+ qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
+ if (!qp || qp->uobject->context != file->ucontext)
+ goto out;
+
+ sg_ind = 0;
+ last = NULL;
+ for (i = 0; i < cmd.wr_count; ++i) {
+ if (copy_from_user(user_wr,
+ buf + sizeof cmd + i * cmd.wqe_size,
+ cmd.wqe_size)) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ if (user_wr->num_sge + sg_ind > cmd.sge_count) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) +
+ user_wr->num_sge * sizeof (struct ib_sge),
+ GFP_KERNEL);
+ if (!next) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (!last)
+ wr = next;
+ else
+ last->next = next;
+ last = next;
+
+ next->next = NULL;
+ next->wr_id = user_wr->wr_id;
+ next->num_sge = user_wr->num_sge;
+ next->opcode = user_wr->opcode;
+ next->send_flags = user_wr->send_flags;
+ next->imm_data = (__be32 __force) user_wr->imm_data;
+
+ if (qp->qp_type == IB_QPT_UD) {
+ next->wr.ud.ah = idr_find(&ib_uverbs_ah_idr,
+ user_wr->wr.ud.ah);
+ if (!next->wr.ud.ah) {
+ ret = -EINVAL;
+ goto out;
+ }
+ next->wr.ud.remote_qpn = user_wr->wr.ud.remote_qpn;
+ next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey;
+ } else {
+ switch (next->opcode) {
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ case IB_WR_RDMA_READ:
+ next->wr.rdma.remote_addr =
+ user_wr->wr.rdma.remote_addr;
+ next->wr.rdma.rkey =
+ user_wr->wr.rdma.rkey;
+ break;
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ next->wr.atomic.remote_addr =
+ user_wr->wr.atomic.remote_addr;
+ next->wr.atomic.compare_add =
+ user_wr->wr.atomic.compare_add;
+ next->wr.atomic.swap = user_wr->wr.atomic.swap;
+ next->wr.atomic.rkey = user_wr->wr.atomic.rkey;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (next->num_sge) {
+ next->sg_list = (void *) next +
+ ALIGN(sizeof *next, sizeof (struct ib_sge));
+ if (copy_from_user(next->sg_list,
+ buf + sizeof cmd +
+ cmd.wr_count * cmd.wqe_size +
+ sg_ind * sizeof (struct ib_sge),
+ next->num_sge * sizeof (struct ib_sge))) {
+ ret = -EFAULT;
+ goto out;
+ }
+ sg_ind += next->num_sge;
+ } else
+ next->sg_list = NULL;
+ }
+
+ resp.bad_wr = 0;
+ ret = qp->device->post_send(qp, wr, &bad_wr);
+ if (ret)
+ for (next = wr; next; next = next->next) {
+ ++resp.bad_wr;
+ if (next == bad_wr)
+ break;
+ }
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp))
+ ret = -EFAULT;
+
+out:
+ up(&ib_uverbs_idr_mutex);
+
+ while (wr) {
+ next = wr->next;
+ kfree(wr);
+ wr = next;
+ }
+
+ kfree(user_wr);
+
+ return ret ? ret : in_len;
+}
+
+static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf,
+ int in_len,
+ u32 wr_count,
+ u32 sge_count,
+ u32 wqe_size)
+{
+ struct ib_uverbs_recv_wr *user_wr;
+ struct ib_recv_wr *wr = NULL, *last, *next;
+ int sg_ind;
+ int i;
+ int ret;
+
+ if (in_len < wqe_size * wr_count +
+ sge_count * sizeof (struct ib_uverbs_sge))
+ return ERR_PTR(-EINVAL);
+
+ if (wqe_size < sizeof (struct ib_uverbs_recv_wr))
+ return ERR_PTR(-EINVAL);
+
+ user_wr = kmalloc(wqe_size, GFP_KERNEL);
+ if (!user_wr)
+ return ERR_PTR(-ENOMEM);
+
+ sg_ind = 0;
+ last = NULL;
+ for (i = 0; i < wr_count; ++i) {
+ if (copy_from_user(user_wr, buf + i * wqe_size,
+ wqe_size)) {
+ ret = -EFAULT;
+ goto err;
+ }
+
+ if (user_wr->num_sge + sg_ind > sge_count) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) +
+ user_wr->num_sge * sizeof (struct ib_sge),
+ GFP_KERNEL);
+ if (!next) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ if (!last)
+ wr = next;
+ else
+ last->next = next;
+ last = next;
+
+ next->next = NULL;
+ next->wr_id = user_wr->wr_id;
+ next->num_sge = user_wr->num_sge;
+
+ if (next->num_sge) {
+ next->sg_list = (void *) next +
+ ALIGN(sizeof *next, sizeof (struct ib_sge));
+ if (copy_from_user(next->sg_list,
+ buf + wr_count * wqe_size +
+ sg_ind * sizeof (struct ib_sge),
+ next->num_sge * sizeof (struct ib_sge))) {
+ ret = -EFAULT;
+ goto err;
+ }
+ sg_ind += next->num_sge;
+ } else
+ next->sg_list = NULL;
+ }
+
+ kfree(user_wr);
+ return wr;
+
+err:
+ kfree(user_wr);
+
+ while (wr) {
+ next = wr->next;
+ kfree(wr);
+ wr = next;
+ }
+
+ return ERR_PTR(ret);
+}
+
+ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_post_recv cmd;
+ struct ib_uverbs_post_recv_resp resp;
+ struct ib_recv_wr *wr, *next, *bad_wr;
+ struct ib_qp *qp;
+ ssize_t ret = -EINVAL;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd,
+ in_len - sizeof cmd, cmd.wr_count,
+ cmd.sge_count, cmd.wqe_size);
+ if (IS_ERR(wr))
+ return PTR_ERR(wr);
+
+ down(&ib_uverbs_idr_mutex);
+
+ qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
+ if (!qp || qp->uobject->context != file->ucontext)
+ goto out;
+
+ resp.bad_wr = 0;
+ ret = qp->device->post_recv(qp, wr, &bad_wr);
+ if (ret)
+ for (next = wr; next; next = next->next) {
+ ++resp.bad_wr;
+ if (next == bad_wr)
+ break;
+ }
+
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp))
+ ret = -EFAULT;
+
+out:
+ up(&ib_uverbs_idr_mutex);
+
+ while (wr) {
+ next = wr->next;
+ kfree(wr);
+ wr = next;
+ }
+
+ return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_post_srq_recv cmd;
+ struct ib_uverbs_post_srq_recv_resp resp;
+ struct ib_recv_wr *wr, *next, *bad_wr;
+ struct ib_srq *srq;
+ ssize_t ret = -EINVAL;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd,
+ in_len - sizeof cmd, cmd.wr_count,
+ cmd.sge_count, cmd.wqe_size);
+ if (IS_ERR(wr))
+ return PTR_ERR(wr);
+
+ down(&ib_uverbs_idr_mutex);
+
+ srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle);
+ if (!srq || srq->uobject->context != file->ucontext)
+ goto out;
+
+ resp.bad_wr = 0;
+ ret = srq->device->post_srq_recv(srq, wr, &bad_wr);
+ if (ret)
+ for (next = wr; next; next = next->next) {
+ ++resp.bad_wr;
+ if (next == bad_wr)
+ break;
+ }
+
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp))
+ ret = -EFAULT;
+
+out:
+ up(&ib_uverbs_idr_mutex);
+
+ while (wr) {
+ next = wr->next;
+ kfree(wr);
+ wr = next;
+ }
+
+ return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_create_ah cmd;
+ struct ib_uverbs_create_ah_resp resp;
+ struct ib_uobject *uobj;
+ struct ib_pd *pd;
+ struct ib_ah *ah;
+ struct ib_ah_attr attr;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
+ if (!uobj)
+ return -ENOMEM;
+
+ down(&ib_uverbs_idr_mutex);
+
+ pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle);
+ if (!pd || pd->uobject->context != file->ucontext) {
+ ret = -EINVAL;
+ goto err_up;
+ }
+
+ uobj->user_handle = cmd.user_handle;
+ uobj->context = file->ucontext;
+
+ attr.dlid = cmd.attr.dlid;
+ attr.sl = cmd.attr.sl;
+ attr.src_path_bits = cmd.attr.src_path_bits;
+ attr.static_rate = cmd.attr.static_rate;
+ attr.port_num = cmd.attr.port_num;
+ attr.grh.flow_label = cmd.attr.grh.flow_label;
+ attr.grh.sgid_index = cmd.attr.grh.sgid_index;
+ attr.grh.hop_limit = cmd.attr.grh.hop_limit;
+ attr.grh.traffic_class = cmd.attr.grh.traffic_class;
+ memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16);
+
+ ah = ib_create_ah(pd, &attr);
+ if (IS_ERR(ah)) {
+ ret = PTR_ERR(ah);
+ goto err_up;
+ }
+
+ ah->uobject = uobj;
+
+retry:
+ if (!idr_pre_get(&ib_uverbs_ah_idr, GFP_KERNEL)) {
+ ret = -ENOMEM;
+ goto err_destroy;
+ }
+
+ ret = idr_get_new(&ib_uverbs_ah_idr, ah, &uobj->id);
+
+ if (ret == -EAGAIN)
+ goto retry;
+ if (ret)
+ goto err_destroy;
+
+ resp.ah_handle = uobj->id;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp)) {
+ ret = -EFAULT;
+ goto err_idr;
+ }
+
+ down(&file->mutex);
+ list_add_tail(&uobj->list, &file->ucontext->ah_list);
+ up(&file->mutex);
+
+ up(&ib_uverbs_idr_mutex);
+
+ return in_len;
+
+err_idr:
+ idr_remove(&ib_uverbs_ah_idr, uobj->id);
+
+err_destroy:
+ ib_destroy_ah(ah);
+
+err_up:
+ up(&ib_uverbs_idr_mutex);
+
+ kfree(uobj);
+ return ret;
+}
+
+ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len, int out_len)
+{
+ struct ib_uverbs_destroy_ah cmd;
+ struct ib_ah *ah;
+ struct ib_uobject *uobj;
+ int ret = -EINVAL;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ down(&ib_uverbs_idr_mutex);
+
+ ah = idr_find(&ib_uverbs_ah_idr, cmd.ah_handle);
+ if (!ah || ah->uobject->context != file->ucontext)
+ goto out;
+
+ uobj = ah->uobject;
+
+ ret = ib_destroy_ah(ah);
+ if (ret)
+ goto out;
+
+ idr_remove(&ib_uverbs_ah_idr, cmd.ah_handle);
+
+ down(&file->mutex);
+ list_del(&uobj->list);
+ up(&file->mutex);
+
+ kfree(uobj);
+
+out:
+ up(&ib_uverbs_idr_mutex);
+
+ return ret ? ret : in_len;
+}
+
ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
{
struct ib_uverbs_attach_mcast cmd;
struct ib_qp *qp;
+ struct ib_uqp_object *uobj;
+ struct ib_uverbs_mcast_entry *mcast;
int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd))
@@ -1028,9 +1558,36 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
down(&ib_uverbs_idr_mutex);
qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
- if (qp && qp->uobject->context == file->ucontext)
- ret = ib_attach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid);
+ if (!qp || qp->uobject->context != file->ucontext)
+ goto out;
+
+ uobj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
+ list_for_each_entry(mcast, &uobj->mcast_list, list)
+ if (cmd.mlid == mcast->lid &&
+ !memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) {
+ ret = 0;
+ goto out;
+ }
+
+ mcast = kmalloc(sizeof *mcast, GFP_KERNEL);
+ if (!mcast) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ mcast->lid = cmd.mlid;
+ memcpy(mcast->gid.raw, cmd.gid, sizeof mcast->gid.raw);
+
+ ret = ib_attach_mcast(qp, &mcast->gid, cmd.mlid);
+ if (!ret) {
+ uobj = container_of(qp->uobject, struct ib_uqp_object,
+ uevent.uobject);
+ list_add_tail(&mcast->list, &uobj->mcast_list);
+ } else
+ kfree(mcast);
+
+out:
up(&ib_uverbs_idr_mutex);
return ret ? ret : in_len;
@@ -1041,7 +1598,9 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
int out_len)
{
struct ib_uverbs_detach_mcast cmd;
+ struct ib_uqp_object *uobj;
struct ib_qp *qp;
+ struct ib_uverbs_mcast_entry *mcast;
int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd))
@@ -1050,9 +1609,24 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
down(&ib_uverbs_idr_mutex);
qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
- if (qp && qp->uobject->context == file->ucontext)
- ret = ib_detach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid);
+ if (!qp || qp->uobject->context != file->ucontext)
+ goto out;
+
+ ret = ib_detach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid);
+ if (ret)
+ goto out;
+ uobj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
+
+ list_for_each_entry(mcast, &uobj->mcast_list, list)
+ if (cmd.mlid == mcast->lid &&
+ !memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) {
+ list_del(&mcast->list);
+ kfree(mcast);
+ break;
+ }
+
+out:
up(&ib_uverbs_idr_mutex);
return ret ? ret : in_len;
@@ -1136,24 +1710,22 @@ retry:
resp.srq_handle = uobj->uobject.id;
- spin_lock_irq(&file->ucontext->lock);
- list_add_tail(&uobj->uobject.list, &file->ucontext->srq_list);
- spin_unlock_irq(&file->ucontext->lock);
-
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp)) {
ret = -EFAULT;
- goto err_list;
+ goto err_idr;
}
+ down(&file->mutex);
+ list_add_tail(&uobj->uobject.list, &file->ucontext->srq_list);
+ up(&file->mutex);
+
up(&ib_uverbs_idr_mutex);
return in_len;
-err_list:
- spin_lock_irq(&file->ucontext->lock);
- list_del(&uobj->uobject.list);
- spin_unlock_irq(&file->ucontext->lock);
+err_idr:
+ idr_remove(&ib_uverbs_srq_idr, uobj->uobject.id);
err_destroy:
ib_destroy_srq(srq);
@@ -1186,7 +1758,6 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
}
attr.max_wr = cmd.max_wr;
- attr.max_sge = cmd.max_sge;
attr.srq_limit = cmd.srq_limit;
ret = ib_modify_srq(srq, &attr, cmd.attr_mask);
@@ -1205,7 +1776,6 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
struct ib_uverbs_destroy_srq_resp resp;
struct ib_srq *srq;
struct ib_uevent_object *uobj;
- struct ib_uverbs_event *evt, *tmp;
int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd))
@@ -1227,16 +1797,11 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
idr_remove(&ib_uverbs_srq_idr, cmd.srq_handle);
- spin_lock_irq(&file->ucontext->lock);
+ down(&file->mutex);
list_del(&uobj->uobject.list);
- spin_unlock_irq(&file->ucontext->lock);
+ up(&file->mutex);
- spin_lock_irq(&file->async_file.lock);
- list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
- list_del(&evt->list);
- kfree(evt);
- }
- spin_unlock_irq(&file->async_file.lock);
+ ib_uverbs_release_uevent(file, uobj);
resp.events_reported = uobj->events_reported;
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index ce5bdb7af30..81737bd6fae 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -3,6 +3,7 @@
* Copyright (c) 2005 Cisco Systems. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
* Copyright (c) 2005 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2005 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -43,6 +44,7 @@
#include <linux/poll.h>
#include <linux/file.h>
#include <linux/mount.h>
+#include <linux/cdev.h>
#include <asm/uaccess.h>
@@ -62,6 +64,8 @@ enum {
#define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR)
+static struct class *uverbs_class;
+
DECLARE_MUTEX(ib_uverbs_idr_mutex);
DEFINE_IDR(ib_uverbs_pd_idr);
DEFINE_IDR(ib_uverbs_mr_idr);
@@ -72,31 +76,37 @@ DEFINE_IDR(ib_uverbs_qp_idr);
DEFINE_IDR(ib_uverbs_srq_idr);
static spinlock_t map_lock;
+static struct ib_uverbs_device *dev_table[IB_UVERBS_MAX_DEVICES];
static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len) = {
- [IB_USER_VERBS_CMD_QUERY_PARAMS] = ib_uverbs_query_params,
- [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context,
- [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device,
- [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port,
- [IB_USER_VERBS_CMD_QUERY_GID] = ib_uverbs_query_gid,
- [IB_USER_VERBS_CMD_QUERY_PKEY] = ib_uverbs_query_pkey,
- [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd,
- [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd,
- [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr,
- [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr,
- [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq,
- [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq,
- [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp,
- [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp,
- [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp,
- [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast,
- [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast,
- [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq,
- [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq,
- [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq,
+ [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context,
+ [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device,
+ [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port,
+ [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd,
+ [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd,
+ [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr,
+ [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr,
+ [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel,
+ [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq,
+ [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq,
+ [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq,
+ [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq,
+ [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp,
+ [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp,
+ [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp,
+ [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send,
+ [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv,
+ [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv,
+ [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah,
+ [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah,
+ [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast,
+ [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast,
+ [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq,
+ [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq,
+ [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq,
};
static struct vfsmount *uverbs_event_mnt;
@@ -104,7 +114,66 @@ static struct vfsmount *uverbs_event_mnt;
static void ib_uverbs_add_one(struct ib_device *device);
static void ib_uverbs_remove_one(struct ib_device *device);
-static int ib_dealloc_ucontext(struct ib_ucontext *context)
+static void ib_uverbs_release_dev(struct kref *ref)
+{
+ struct ib_uverbs_device *dev =
+ container_of(ref, struct ib_uverbs_device, ref);
+
+ kfree(dev);
+}
+
+void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
+ struct ib_uverbs_event_file *ev_file,
+ struct ib_ucq_object *uobj)
+{
+ struct ib_uverbs_event *evt, *tmp;
+
+ if (ev_file) {
+ spin_lock_irq(&ev_file->lock);
+ list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) {
+ list_del(&evt->list);
+ kfree(evt);
+ }
+ spin_unlock_irq(&ev_file->lock);
+
+ kref_put(&ev_file->ref, ib_uverbs_release_event_file);
+ }
+
+ spin_lock_irq(&file->async_file->lock);
+ list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) {
+ list_del(&evt->list);
+ kfree(evt);
+ }
+ spin_unlock_irq(&file->async_file->lock);
+}
+
+void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
+ struct ib_uevent_object *uobj)
+{
+ struct ib_uverbs_event *evt, *tmp;
+
+ spin_lock_irq(&file->async_file->lock);
+ list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
+ list_del(&evt->list);
+ kfree(evt);
+ }
+ spin_unlock_irq(&file->async_file->lock);
+}
+
+static void ib_uverbs_detach_umcast(struct ib_qp *qp,
+ struct ib_uqp_object *uobj)
+{
+ struct ib_uverbs_mcast_entry *mcast, *tmp;
+
+ list_for_each_entry_safe(mcast, tmp, &uobj->mcast_list, list) {
+ ib_detach_mcast(qp, &mcast->gid, mcast->lid);
+ list_del(&mcast->list);
+ kfree(mcast);
+ }
+}
+
+static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
+ struct ib_ucontext *context)
{
struct ib_uobject *uobj, *tmp;
@@ -113,30 +182,47 @@ static int ib_dealloc_ucontext(struct ib_ucontext *context)
down(&ib_uverbs_idr_mutex);
- /* XXX Free AHs */
+ list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) {
+ struct ib_ah *ah = idr_find(&ib_uverbs_ah_idr, uobj->id);
+ idr_remove(&ib_uverbs_ah_idr, uobj->id);
+ ib_destroy_ah(ah);
+ list_del(&uobj->list);
+ kfree(uobj);
+ }
list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) {
struct ib_qp *qp = idr_find(&ib_uverbs_qp_idr, uobj->id);
+ struct ib_uqp_object *uqp =
+ container_of(uobj, struct ib_uqp_object, uevent.uobject);
idr_remove(&ib_uverbs_qp_idr, uobj->id);
+ ib_uverbs_detach_umcast(qp, uqp);
ib_destroy_qp(qp);
list_del(&uobj->list);
- kfree(container_of(uobj, struct ib_uevent_object, uobject));
+ ib_uverbs_release_uevent(file, &uqp->uevent);
+ kfree(uqp);
}
list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
struct ib_cq *cq = idr_find(&ib_uverbs_cq_idr, uobj->id);
+ struct ib_uverbs_event_file *ev_file = cq->cq_context;
+ struct ib_ucq_object *ucq =
+ container_of(uobj, struct ib_ucq_object, uobject);
idr_remove(&ib_uverbs_cq_idr, uobj->id);
ib_destroy_cq(cq);
list_del(&uobj->list);
- kfree(container_of(uobj, struct ib_ucq_object, uobject));
+ ib_uverbs_release_ucq(file, ev_file, ucq);
+ kfree(ucq);
}
list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
struct ib_srq *srq = idr_find(&ib_uverbs_srq_idr, uobj->id);
+ struct ib_uevent_object *uevent =
+ container_of(uobj, struct ib_uevent_object, uobject);
idr_remove(&ib_uverbs_srq_idr, uobj->id);
ib_destroy_srq(srq);
list_del(&uobj->list);
- kfree(container_of(uobj, struct ib_uevent_object, uobject));
+ ib_uverbs_release_uevent(file, uevent);
+ kfree(uevent);
}
/* XXX Free MWs */
@@ -175,6 +261,8 @@ static void ib_uverbs_release_file(struct kref *ref)
container_of(ref, struct ib_uverbs_file, ref);
module_put(file->device->ib_dev->owner);
+ kref_put(&file->device->ref, ib_uverbs_release_dev);
+
kfree(file);
}
@@ -188,25 +276,19 @@ static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
spin_lock_irq(&file->lock);
- while (list_empty(&file->event_list) && file->fd >= 0) {
+ while (list_empty(&file->event_list)) {
spin_unlock_irq(&file->lock);
if (filp->f_flags & O_NONBLOCK)
return -EAGAIN;
if (wait_event_interruptible(file->poll_wait,
- !list_empty(&file->event_list) ||
- file->fd < 0))
+ !list_empty(&file->event_list)))
return -ERESTARTSYS;
spin_lock_irq(&file->lock);
}
- if (file->fd < 0) {
- spin_unlock_irq(&file->lock);
- return -ENODEV;
- }
-
event = list_entry(file->event_list.next, struct ib_uverbs_event, list);
if (file->is_async)
@@ -248,26 +330,19 @@ static unsigned int ib_uverbs_event_poll(struct file *filp,
poll_wait(filp, &file->poll_wait, wait);
spin_lock_irq(&file->lock);
- if (file->fd < 0)
- pollflags = POLLERR;
- else if (!list_empty(&file->event_list))
+ if (!list_empty(&file->event_list))
pollflags = POLLIN | POLLRDNORM;
spin_unlock_irq(&file->lock);
return pollflags;
}
-static void ib_uverbs_event_release(struct ib_uverbs_event_file *file)
+void ib_uverbs_release_event_file(struct kref *ref)
{
- struct ib_uverbs_event *entry, *tmp;
+ struct ib_uverbs_event_file *file =
+ container_of(ref, struct ib_uverbs_event_file, ref);
- spin_lock_irq(&file->lock);
- if (file->fd != -1) {
- file->fd = -1;
- list_for_each_entry_safe(entry, tmp, &file->event_list, list)
- kfree(entry);
- }
- spin_unlock_irq(&file->lock);
+ kfree(file);
}
static int ib_uverbs_event_fasync(int fd, struct file *filp, int on)
@@ -280,21 +355,30 @@ static int ib_uverbs_event_fasync(int fd, struct file *filp, int on)
static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
{
struct ib_uverbs_event_file *file = filp->private_data;
+ struct ib_uverbs_event *entry, *tmp;
+
+ spin_lock_irq(&file->lock);
+ file->file = NULL;
+ list_for_each_entry_safe(entry, tmp, &file->event_list, list) {
+ if (entry->counter)
+ list_del(&entry->obj_list);
+ kfree(entry);
+ }
+ spin_unlock_irq(&file->lock);
- ib_uverbs_event_release(file);
ib_uverbs_event_fasync(-1, filp, 0);
- kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
+
+ if (file->is_async) {
+ ib_unregister_event_handler(&file->uverbs_file->event_handler);
+ kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
+ }
+ kref_put(&file->ref, ib_uverbs_release_event_file);
return 0;
}
static struct file_operations uverbs_event_fops = {
- /*
- * No .owner field since we artificially create event files,
- * so there is no increment to the module reference count in
- * the open path. All event files come from a uverbs command
- * file, which already takes a module reference, so this is OK.
- */
+ .owner = THIS_MODULE,
.read = ib_uverbs_event_read,
.poll = ib_uverbs_event_poll,
.release = ib_uverbs_event_close,
@@ -303,27 +387,37 @@ static struct file_operations uverbs_event_fops = {
void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
{
- struct ib_uverbs_file *file = cq_context;
- struct ib_ucq_object *uobj;
- struct ib_uverbs_event *entry;
- unsigned long flags;
+ struct ib_uverbs_event_file *file = cq_context;
+ struct ib_ucq_object *uobj;
+ struct ib_uverbs_event *entry;
+ unsigned long flags;
+
+ if (!file)
+ return;
+
+ spin_lock_irqsave(&file->lock, flags);
+ if (!file->file) {
+ spin_unlock_irqrestore(&file->lock, flags);
+ return;
+ }
entry = kmalloc(sizeof *entry, GFP_ATOMIC);
- if (!entry)
+ if (!entry) {
+ spin_unlock_irqrestore(&file->lock, flags);
return;
+ }
uobj = container_of(cq->uobject, struct ib_ucq_object, uobject);
entry->desc.comp.cq_handle = cq->uobject->user_handle;
entry->counter = &uobj->comp_events_reported;
- spin_lock_irqsave(&file->comp_file[0].lock, flags);
- list_add_tail(&entry->list, &file->comp_file[0].event_list);
+ list_add_tail(&entry->list, &file->event_list);
list_add_tail(&entry->obj_list, &uobj->comp_list);
- spin_unlock_irqrestore(&file->comp_file[0].lock, flags);
+ spin_unlock_irqrestore(&file->lock, flags);
- wake_up_interruptible(&file->comp_file[0].poll_wait);
- kill_fasync(&file->comp_file[0].async_queue, SIGIO, POLL_IN);
+ wake_up_interruptible(&file->poll_wait);
+ kill_fasync(&file->async_queue, SIGIO, POLL_IN);
}
static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
@@ -334,32 +428,37 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
struct ib_uverbs_event *entry;
unsigned long flags;
+ spin_lock_irqsave(&file->async_file->lock, flags);
+ if (!file->async_file->file) {
+ spin_unlock_irqrestore(&file->async_file->lock, flags);
+ return;
+ }
+
entry = kmalloc(sizeof *entry, GFP_ATOMIC);
- if (!entry)
+ if (!entry) {
+ spin_unlock_irqrestore(&file->async_file->lock, flags);
return;
+ }
entry->desc.async.element = element;
entry->desc.async.event_type = event;
entry->counter = counter;
- spin_lock_irqsave(&file->async_file.lock, flags);
- list_add_tail(&entry->list, &file->async_file.event_list);
+ list_add_tail(&entry->list, &file->async_file->event_list);
if (obj_list)
list_add_tail(&entry->obj_list, obj_list);
- spin_unlock_irqrestore(&file->async_file.lock, flags);
+ spin_unlock_irqrestore(&file->async_file->lock, flags);
- wake_up_interruptible(&file->async_file.poll_wait);
- kill_fasync(&file->async_file.async_queue, SIGIO, POLL_IN);
+ wake_up_interruptible(&file->async_file->poll_wait);
+ kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN);
}
void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
{
- struct ib_ucq_object *uobj;
+ struct ib_ucq_object *uobj = container_of(event->element.cq->uobject,
+ struct ib_ucq_object, uobject);
- uobj = container_of(event->element.cq->uobject,
- struct ib_ucq_object, uobject);
-
- ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
+ ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle,
event->event, &uobj->async_list,
&uobj->async_events_reported);
@@ -389,8 +488,8 @@ void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr)
&uobj->events_reported);
}
-static void ib_uverbs_event_handler(struct ib_event_handler *handler,
- struct ib_event *event)
+void ib_uverbs_event_handler(struct ib_event_handler *handler,
+ struct ib_event *event)
{
struct ib_uverbs_file *file =
container_of(handler, struct ib_uverbs_file, event_handler);
@@ -399,38 +498,90 @@ static void ib_uverbs_event_handler(struct ib_event_handler *handler,
NULL, NULL);
}
-static int ib_uverbs_event_init(struct ib_uverbs_event_file *file,
- struct ib_uverbs_file *uverbs_file)
+struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
+ int is_async, int *fd)
{
+ struct ib_uverbs_event_file *ev_file;
struct file *filp;
+ int ret;
- spin_lock_init(&file->lock);
- INIT_LIST_HEAD(&file->event_list);
- init_waitqueue_head(&file->poll_wait);
- file->uverbs_file = uverbs_file;
- file->async_queue = NULL;
-
- file->fd = get_unused_fd();
- if (file->fd < 0)
- return file->fd;
+ ev_file = kmalloc(sizeof *ev_file, GFP_KERNEL);
+ if (!ev_file)
+ return ERR_PTR(-ENOMEM);
+
+ kref_init(&ev_file->ref);
+ spin_lock_init(&ev_file->lock);
+ INIT_LIST_HEAD(&ev_file->event_list);
+ init_waitqueue_head(&ev_file->poll_wait);
+ ev_file->uverbs_file = uverbs_file;
+ ev_file->async_queue = NULL;
+ ev_file->is_async = is_async;
+
+ *fd = get_unused_fd();
+ if (*fd < 0) {
+ ret = *fd;
+ goto err;
+ }
filp = get_empty_filp();
if (!filp) {
- put_unused_fd(file->fd);
- return -ENFILE;
+ ret = -ENFILE;
+ goto err_fd;
}
- filp->f_op = &uverbs_event_fops;
+ ev_file->file = filp;
+
+ /*
+ * fops_get() can't fail here, because we're coming from a
+ * system call on a uverbs file, which will already have a
+ * module reference.
+ */
+ filp->f_op = fops_get(&uverbs_event_fops);
filp->f_vfsmnt = mntget(uverbs_event_mnt);
filp->f_dentry = dget(uverbs_event_mnt->mnt_root);
filp->f_mapping = filp->f_dentry->d_inode->i_mapping;
filp->f_flags = O_RDONLY;
filp->f_mode = FMODE_READ;
- filp->private_data = file;
+ filp->private_data = ev_file;
- fd_install(file->fd, filp);
+ return filp;
- return 0;
+err_fd:
+ put_unused_fd(*fd);
+
+err:
+ kfree(ev_file);
+ return ERR_PTR(ret);
+}
+
+/*
+ * Look up a completion event file by FD. If lookup is successful,
+ * takes a ref to the event file struct that it returns; if
+ * unsuccessful, returns NULL.
+ */
+struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd)
+{
+ struct ib_uverbs_event_file *ev_file = NULL;
+ struct file *filp;
+
+ filp = fget(fd);
+ if (!filp)
+ return NULL;
+
+ if (filp->f_op != &uverbs_event_fops)
+ goto out;
+
+ ev_file = filp->private_data;
+ if (ev_file->is_async) {
+ ev_file = NULL;
+ goto out;
+ }
+
+ kref_get(&ev_file->ref);
+
+out:
+ fput(filp);
+ return ev_file;
}
static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
@@ -448,11 +599,13 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
if (hdr.in_words * 4 != count)
return -EINVAL;
- if (hdr.command < 0 || hdr.command >= ARRAY_SIZE(uverbs_cmd_table))
+ if (hdr.command < 0 ||
+ hdr.command >= ARRAY_SIZE(uverbs_cmd_table) ||
+ !uverbs_cmd_table[hdr.command] ||
+ !(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command)))
return -EINVAL;
- if (!file->ucontext &&
- hdr.command != IB_USER_VERBS_CMD_QUERY_PARAMS &&
+ if (!file->ucontext &&
hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT)
return -EINVAL;
@@ -472,60 +625,45 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
static int ib_uverbs_open(struct inode *inode, struct file *filp)
{
- struct ib_uverbs_device *dev =
- container_of(inode->i_cdev, struct ib_uverbs_device, dev);
+ struct ib_uverbs_device *dev;
struct ib_uverbs_file *file;
- int i = 0;
int ret;
- if (!try_module_get(dev->ib_dev->owner))
- return -ENODEV;
-
- file = kmalloc(sizeof *file +
- (dev->num_comp - 1) * sizeof (struct ib_uverbs_event_file),
- GFP_KERNEL);
- if (!file)
- return -ENOMEM;
-
- file->device = dev;
- kref_init(&file->ref);
+ spin_lock(&map_lock);
+ dev = dev_table[iminor(inode) - IB_UVERBS_BASE_MINOR];
+ if (dev)
+ kref_get(&dev->ref);
+ spin_unlock(&map_lock);
- file->ucontext = NULL;
+ if (!dev)
+ return -ENXIO;
- ret = ib_uverbs_event_init(&file->async_file, file);
- if (ret)
+ if (!try_module_get(dev->ib_dev->owner)) {
+ ret = -ENODEV;
goto err;
+ }
- file->async_file.is_async = 1;
-
- kref_get(&file->ref);
-
- for (i = 0; i < dev->num_comp; ++i) {
- ret = ib_uverbs_event_init(&file->comp_file[i], file);
- if (ret)
- goto err_async;
- kref_get(&file->ref);
- file->comp_file[i].is_async = 0;
+ file = kmalloc(sizeof *file, GFP_KERNEL);
+ if (!file) {
+ ret = -ENOMEM;
+ goto err_module;
}
+ file->device = dev;
+ file->ucontext = NULL;
+ file->async_file = NULL;
+ kref_init(&file->ref);
+ init_MUTEX(&file->mutex);
filp->private_data = file;
- INIT_IB_EVENT_HANDLER(&file->event_handler, dev->ib_dev,
- ib_uverbs_event_handler);
- if (ib_register_event_handler(&file->event_handler))
- goto err_async;
-
return 0;
-err_async:
- while (i--)
- ib_uverbs_event_release(&file->comp_file[i]);
-
- ib_uverbs_event_release(&file->async_file);
+err_module:
+ module_put(dev->ib_dev->owner);
err:
- kref_put(&file->ref, ib_uverbs_release_file);
+ kref_put(&dev->ref, ib_uverbs_release_dev);
return ret;
}
@@ -533,14 +671,11 @@ err:
static int ib_uverbs_close(struct inode *inode, struct file *filp)
{
struct ib_uverbs_file *file = filp->private_data;
- int i;
- ib_unregister_event_handler(&file->event_handler);
- ib_uverbs_event_release(&file->async_file);
- ib_dealloc_ucontext(file->ucontext);
+ ib_uverbs_cleanup_ucontext(file, file->ucontext);
- for (i = 0; i < file->device->num_comp; ++i)
- ib_uverbs_event_release(&file->comp_file[i]);
+ if (file->async_file)
+ kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
kref_put(&file->ref, ib_uverbs_release_file);
@@ -570,27 +705,25 @@ static struct ib_client uverbs_client = {
static ssize_t show_ibdev(struct class_device *class_dev, char *buf)
{
- struct ib_uverbs_device *dev =
- container_of(class_dev, struct ib_uverbs_device, class_dev);
+ struct ib_uverbs_device *dev = class_get_devdata(class_dev);
+
+ if (!dev)
+ return -ENODEV;
return sprintf(buf, "%s\n", dev->ib_dev->name);
}
static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
-static void ib_uverbs_release_class_dev(struct class_device *class_dev)
+static ssize_t show_dev_abi_version(struct class_device *class_dev, char *buf)
{
- struct ib_uverbs_device *dev =
- container_of(class_dev, struct ib_uverbs_device, class_dev);
+ struct ib_uverbs_device *dev = class_get_devdata(class_dev);
- cdev_del(&dev->dev);
- clear_bit(dev->devnum, dev_map);
- kfree(dev);
-}
+ if (!dev)
+ return -ENODEV;
-static struct class uverbs_class = {
- .name = "infiniband_verbs",
- .release = ib_uverbs_release_class_dev
-};
+ return sprintf(buf, "%d\n", dev->ib_dev->uverbs_abi_ver);
+}
+static CLASS_DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL);
static ssize_t show_abi_version(struct class *class, char *buf)
{
@@ -605,11 +738,11 @@ static void ib_uverbs_add_one(struct ib_device *device)
if (!device->alloc_ucontext)
return;
- uverbs_dev = kmalloc(sizeof *uverbs_dev, GFP_KERNEL);
+ uverbs_dev = kzalloc(sizeof *uverbs_dev, GFP_KERNEL);
if (!uverbs_dev)
return;
- memset(uverbs_dev, 0, sizeof *uverbs_dev);
+ kref_init(&uverbs_dev->ref);
spin_lock(&map_lock);
uverbs_dev->devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
@@ -620,41 +753,49 @@ static void ib_uverbs_add_one(struct ib_device *device)
set_bit(uverbs_dev->devnum, dev_map);
spin_unlock(&map_lock);
- uverbs_dev->ib_dev = device;
- uverbs_dev->num_comp = 1;
+ uverbs_dev->ib_dev = device;
+ uverbs_dev->num_comp_vectors = 1;
- if (device->mmap)
- cdev_init(&uverbs_dev->dev, &uverbs_mmap_fops);
- else
- cdev_init(&uverbs_dev->dev, &uverbs_fops);
- uverbs_dev->dev.owner = THIS_MODULE;
- kobject_set_name(&uverbs_dev->dev.kobj, "uverbs%d", uverbs_dev->devnum);
- if (cdev_add(&uverbs_dev->dev, IB_UVERBS_BASE_DEV + uverbs_dev->devnum, 1))
+ uverbs_dev->dev = cdev_alloc();
+ if (!uverbs_dev->dev)
goto err;
+ uverbs_dev->dev->owner = THIS_MODULE;
+ uverbs_dev->dev->ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
+ kobject_set_name(&uverbs_dev->dev->kobj, "uverbs%d", uverbs_dev->devnum);
+ if (cdev_add(uverbs_dev->dev, IB_UVERBS_BASE_DEV + uverbs_dev->devnum, 1))
+ goto err_cdev;
- uverbs_dev->class_dev.class = &uverbs_class;
- uverbs_dev->class_dev.dev = device->dma_device;
- uverbs_dev->class_dev.devt = uverbs_dev->dev.dev;
- snprintf(uverbs_dev->class_dev.class_id, BUS_ID_SIZE, "uverbs%d", uverbs_dev->devnum);
- if (class_device_register(&uverbs_dev->class_dev))
+ uverbs_dev->class_dev = class_device_create(uverbs_class, NULL,
+ uverbs_dev->dev->dev,
+ device->dma_device,
+ "uverbs%d", uverbs_dev->devnum);
+ if (IS_ERR(uverbs_dev->class_dev))
goto err_cdev;
- if (class_device_create_file(&uverbs_dev->class_dev, &class_device_attr_ibdev))
+ class_set_devdata(uverbs_dev->class_dev, uverbs_dev);
+
+ if (class_device_create_file(uverbs_dev->class_dev, &class_device_attr_ibdev))
+ goto err_class;
+ if (class_device_create_file(uverbs_dev->class_dev, &class_device_attr_abi_version))
goto err_class;
+ spin_lock(&map_lock);
+ dev_table[uverbs_dev->devnum] = uverbs_dev;
+ spin_unlock(&map_lock);
+
ib_set_client_data(device, &uverbs_client, uverbs_dev);
return;
err_class:
- class_device_unregister(&uverbs_dev->class_dev);
+ class_device_destroy(uverbs_class, uverbs_dev->dev->dev);
err_cdev:
- cdev_del(&uverbs_dev->dev);
+ cdev_del(uverbs_dev->dev);
clear_bit(uverbs_dev->devnum, dev_map);
err:
- kfree(uverbs_dev);
+ kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
return;
}
@@ -665,7 +806,16 @@ static void ib_uverbs_remove_one(struct ib_device *device)
if (!uverbs_dev)
return;
- class_device_unregister(&uverbs_dev->class_dev);
+ class_set_devdata(uverbs_dev->class_dev, NULL);
+ class_device_destroy(uverbs_class, uverbs_dev->dev->dev);
+ cdev_del(uverbs_dev->dev);
+
+ spin_lock(&map_lock);
+ dev_table[uverbs_dev->devnum] = NULL;
+ spin_unlock(&map_lock);
+
+ clear_bit(uverbs_dev->devnum, dev_map);
+ kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
}
static struct super_block *uverbs_event_get_sb(struct file_system_type *fs_type, int flags,
@@ -695,13 +845,14 @@ static int __init ib_uverbs_init(void)
goto out;
}
- ret = class_register(&uverbs_class);
- if (ret) {
+ uverbs_class = class_create(THIS_MODULE, "infiniband_verbs");
+ if (IS_ERR(uverbs_class)) {
+ ret = PTR_ERR(uverbs_class);
printk(KERN_ERR "user_verbs: couldn't create class infiniband_verbs\n");
goto out_chrdev;
}
- ret = class_create_file(&uverbs_class, &class_attr_abi_version);
+ ret = class_create_file(uverbs_class, &class_attr_abi_version);
if (ret) {
printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n");
goto out_class;
@@ -735,7 +886,7 @@ out_fs:
unregister_filesystem(&uverbs_event_fs);
out_class:
- class_unregister(&uverbs_class);
+ class_destroy(uverbs_class);
out_chrdev:
unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
@@ -749,8 +900,15 @@ static void __exit ib_uverbs_cleanup(void)
ib_unregister_client(&uverbs_client);
mntput(uverbs_event_mnt);
unregister_filesystem(&uverbs_event_fs);
- class_unregister(&uverbs_class);
+ class_destroy(uverbs_class);
unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
+ idr_destroy(&ib_uverbs_pd_idr);
+ idr_destroy(&ib_uverbs_mr_idr);
+ idr_destroy(&ib_uverbs_mw_idr);
+ idr_destroy(&ib_uverbs_ah_idr);
+ idr_destroy(&ib_uverbs_cq_idr);
+ idr_destroy(&ib_uverbs_qp_idr);
+ idr_destroy(&ib_uverbs_srq_idr);
}
module_init(ib_uverbs_init);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 5081d903e56..4c15e112736 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -40,6 +40,7 @@
#include <linux/errno.h>
#include <linux/err.h>
+#include <linux/string.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_cache.h>
@@ -324,16 +325,8 @@ EXPORT_SYMBOL(ib_destroy_cq);
int ib_resize_cq(struct ib_cq *cq,
int cqe)
{
- int ret;
-
- if (!cq->device->resize_cq)
- return -ENOSYS;
-
- ret = cq->device->resize_cq(cq, &cqe);
- if (!ret)
- cq->cqe = cqe;
-
- return ret;
+ return cq->device->resize_cq ?
+ cq->device->resize_cq(cq, cqe) : -ENOSYS;
}
EXPORT_SYMBOL(ib_resize_cq);
@@ -523,16 +516,22 @@ EXPORT_SYMBOL(ib_dealloc_fmr);
int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
{
- return qp->device->attach_mcast ?
- qp->device->attach_mcast(qp, gid, lid) :
- -ENOSYS;
+ if (!qp->device->attach_mcast)
+ return -ENOSYS;
+ if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
+ return -EINVAL;
+
+ return qp->device->attach_mcast(qp, gid, lid);
}
EXPORT_SYMBOL(ib_attach_mcast);
int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
{
- return qp->device->detach_mcast ?
- qp->device->detach_mcast(qp, gid, lid) :
- -ENOSYS;
+ if (!qp->device->detach_mcast)
+ return -ENOSYS;
+ if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
+ return -EINVAL;
+
+ return qp->device->detach_mcast(qp, gid, lid);
}
EXPORT_SYMBOL(ib_detach_mcast);
diff --git a/drivers/infiniband/hw/mthca/Makefile b/drivers/infiniband/hw/mthca/Makefile
index c44f7bae542..47ec5a7cba0 100644
--- a/drivers/infiniband/hw/mthca/Makefile
+++ b/drivers/infiniband/hw/mthca/Makefile
@@ -7,4 +7,5 @@ obj-$(CONFIG_INFINIBAND_MTHCA) += ib_mthca.o
ib_mthca-y := mthca_main.o mthca_cmd.o mthca_profile.o mthca_reset.o \
mthca_allocator.o mthca_eq.o mthca_pd.o mthca_cq.o \
mthca_mr.o mthca_qp.o mthca_av.o mthca_mcg.o mthca_mad.o \
- mthca_provider.o mthca_memfree.o mthca_uar.o mthca_srq.o
+ mthca_provider.o mthca_memfree.o mthca_uar.o mthca_srq.o \
+ mthca_catas.o
diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c
index 889e8509673..22fdc446f25 100644
--- a/drivers/infiniband/hw/mthca/mthca_av.c
+++ b/drivers/infiniband/hw/mthca/mthca_av.c
@@ -34,6 +34,8 @@
*/
#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/slab.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_cache.h>
diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c
new file mode 100644
index 00000000000..c3bec7490f5
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_catas.c
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ */
+
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+
+#include "mthca_dev.h"
+
+enum {
+ MTHCA_CATAS_POLL_INTERVAL = 5 * HZ,
+
+ MTHCA_CATAS_TYPE_INTERNAL = 0,
+ MTHCA_CATAS_TYPE_UPLINK = 3,
+ MTHCA_CATAS_TYPE_DDR = 4,
+ MTHCA_CATAS_TYPE_PARITY = 5,
+};
+
+static DEFINE_SPINLOCK(catas_lock);
+
+static void handle_catas(struct mthca_dev *dev)
+{
+ struct ib_event event;
+ const char *type;
+ int i;
+
+ event.device = &dev->ib_dev;
+ event.event = IB_EVENT_DEVICE_FATAL;
+ event.element.port_num = 0;
+
+ ib_dispatch_event(&event);
+
+ switch (swab32(readl(dev->catas_err.map)) >> 24) {
+ case MTHCA_CATAS_TYPE_INTERNAL:
+ type = "internal error";
+ break;
+ case MTHCA_CATAS_TYPE_UPLINK:
+ type = "uplink bus error";
+ break;
+ case MTHCA_CATAS_TYPE_DDR:
+ type = "DDR data error";
+ break;
+ case MTHCA_CATAS_TYPE_PARITY:
+ type = "internal parity error";
+ break;
+ default:
+ type = "unknown error";
+ break;
+ }
+
+ mthca_err(dev, "Catastrophic error detected: %s\n", type);
+ for (i = 0; i < dev->catas_err.size; ++i)
+ mthca_err(dev, " buf[%02x]: %08x\n",
+ i, swab32(readl(dev->catas_err.map + i)));
+}
+
+static void poll_catas(unsigned long dev_ptr)
+{
+ struct mthca_dev *dev = (struct mthca_dev *) dev_ptr;
+ unsigned long flags;
+ int i;
+
+ for (i = 0; i < dev->catas_err.size; ++i)
+ if (readl(dev->catas_err.map + i)) {
+ handle_catas(dev);
+ return;
+ }
+
+ spin_lock_irqsave(&catas_lock, flags);
+ if (!dev->catas_err.stop)
+ mod_timer(&dev->catas_err.timer,
+ jiffies + MTHCA_CATAS_POLL_INTERVAL);
+ spin_unlock_irqrestore(&catas_lock, flags);
+
+ return;
+}
+
+void mthca_start_catas_poll(struct mthca_dev *dev)
+{
+ unsigned long addr;
+
+ init_timer(&dev->catas_err.timer);
+ dev->catas_err.stop = 0;
+ dev->catas_err.map = NULL;
+
+ addr = pci_resource_start(dev->pdev, 0) +
+ ((pci_resource_len(dev->pdev, 0) - 1) &
+ dev->catas_err.addr);
+
+ if (!request_mem_region(addr, dev->catas_err.size * 4,
+ DRV_NAME)) {
+ mthca_warn(dev, "couldn't request catastrophic error region "
+ "at 0x%lx/0x%x\n", addr, dev->catas_err.size * 4);
+ return;
+ }
+
+ dev->catas_err.map = ioremap(addr, dev->catas_err.size * 4);
+ if (!dev->catas_err.map) {
+ mthca_warn(dev, "couldn't map catastrophic error region "
+ "at 0x%lx/0x%x\n", addr, dev->catas_err.size * 4);
+ release_mem_region(addr, dev->catas_err.size * 4);
+ return;
+ }
+
+ dev->catas_err.timer.data = (unsigned long) dev;
+ dev->catas_err.timer.function = poll_catas;
+ dev->catas_err.timer.expires = jiffies + MTHCA_CATAS_POLL_INTERVAL;
+ add_timer(&dev->catas_err.timer);
+}
+
+void mthca_stop_catas_poll(struct mthca_dev *dev)
+{
+ spin_lock_irq(&catas_lock);
+ dev->catas_err.stop = 1;
+ spin_unlock_irq(&catas_lock);
+
+ del_timer_sync(&dev->catas_err.timer);
+
+ if (dev->catas_err.map) {
+ iounmap(dev->catas_err.map);
+ release_mem_region(pci_resource_start(dev->pdev, 0) +
+ ((pci_resource_len(dev->pdev, 0) - 1) &
+ dev->catas_err.addr),
+ dev->catas_err.size * 4);
+ }
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index cc758a2d2bc..9ed34587fc5 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -1,6 +1,7 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -524,7 +525,7 @@ void mthca_cmd_use_polling(struct mthca_dev *dev)
}
struct mthca_mailbox *mthca_alloc_mailbox(struct mthca_dev *dev,
- unsigned int gfp_mask)
+ gfp_t gfp_mask)
{
struct mthca_mailbox *mailbox;
@@ -605,7 +606,7 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm,
err = -EINVAL;
goto out;
}
- for (i = 0; i < mthca_icm_size(&iter) / (1 << lg); ++i, ++nent) {
+ for (i = 0; i < mthca_icm_size(&iter) / (1 << lg); ++i) {
if (virt != -1) {
pages[nent * 2] = cpu_to_be64(virt);
virt += 1 << lg;
@@ -616,7 +617,7 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm,
ts += 1 << (lg - 10);
++tc;
- if (nent == MTHCA_MAILBOX_SIZE / 16) {
+ if (++nent == MTHCA_MAILBOX_SIZE / 16) {
err = mthca_cmd(dev, mailbox->dma, nent, 0, op,
CMD_TIME_CLASS_B, status);
if (err || *status)
@@ -706,9 +707,13 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status)
MTHCA_GET(lg, outbox, QUERY_FW_MAX_CMD_OFFSET);
dev->cmd.max_cmds = 1 << lg;
+ MTHCA_GET(dev->catas_err.addr, outbox, QUERY_FW_ERR_START_OFFSET);
+ MTHCA_GET(dev->catas_err.size, outbox, QUERY_FW_ERR_SIZE_OFFSET);
mthca_dbg(dev, "FW version %012llx, max commands %d\n",
(unsigned long long) dev->fw_ver, dev->cmd.max_cmds);
+ mthca_dbg(dev, "Catastrophic error buffer at 0x%llx, size 0x%x\n",
+ (unsigned long long) dev->catas_err.addr, dev->catas_err.size);
if (mthca_is_memfree(dev)) {
MTHCA_GET(dev->fw.arbel.fw_pages, outbox, QUERY_FW_SIZE_OFFSET);
@@ -933,9 +938,9 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
goto out;
MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET);
- dev_lim->max_srq_sz = 1 << field;
+ dev_lim->max_srq_sz = (1 << field) - 1;
MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_SZ_OFFSET);
- dev_lim->max_qp_sz = 1 << field;
+ dev_lim->max_qp_sz = (1 << field) - 1;
MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_QP_OFFSET);
dev_lim->reserved_qps = 1 << (field & 0xf);
MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_OFFSET);
@@ -1045,6 +1050,8 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
dev_lim->max_pds, dev_lim->reserved_pds, dev_lim->reserved_uars);
mthca_dbg(dev, "Max QP/MCG: %d, reserved MGMs: %d\n",
dev_lim->max_pds, dev_lim->reserved_mgms);
+ mthca_dbg(dev, "Max CQEs: %d, max WQEs: %d, max SRQ WQEs: %d\n",
+ dev_lim->max_cq_sz, dev_lim->max_qp_sz, dev_lim->max_srq_sz);
mthca_dbg(dev, "Flags: %08x\n", dev_lim->flags);
@@ -1053,6 +1060,8 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
dev_lim->hca.arbel.resize_srq = field & 1;
MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SG_RQ_OFFSET);
dev_lim->max_sg = min_t(int, field, dev_lim->max_sg);
+ MTHCA_GET(size, outbox, QUERY_DEV_LIM_MAX_DESC_SZ_RQ_OFFSET);
+ dev_lim->max_desc_sz = min_t(int, size, dev_lim->max_desc_sz);
MTHCA_GET(size, outbox, QUERY_DEV_LIM_MPT_ENTRY_SZ_OFFSET);
dev_lim->mpt_entry_sz = size;
MTHCA_GET(field, outbox, QUERY_DEV_LIM_PBL_SZ_OFFSET);
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.h b/drivers/infiniband/hw/mthca/mthca_cmd.h
index 65f976a13e0..18175bec84c 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.h
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.h
@@ -248,7 +248,7 @@ void mthca_cmd_event(struct mthca_dev *dev, u16 token,
u8 status, u64 out_param);
struct mthca_mailbox *mthca_alloc_mailbox(struct mthca_dev *dev,
- unsigned int gfp_mask);
+ gfp_t gfp_mask);
void mthca_free_mailbox(struct mthca_dev *dev, struct mthca_mailbox *mailbox);
int mthca_SYS_EN(struct mthca_dev *dev, u8 *status);
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
index 8600b6c3e0c..4a8adcef207 100644
--- a/drivers/infiniband/hw/mthca/mthca_cq.c
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -208,7 +208,7 @@ static inline void update_cons_index(struct mthca_dev *dev, struct mthca_cq *cq,
}
}
-void mthca_cq_event(struct mthca_dev *dev, u32 cqn)
+void mthca_cq_completion(struct mthca_dev *dev, u32 cqn)
{
struct mthca_cq *cq;
@@ -224,12 +224,41 @@ void mthca_cq_event(struct mthca_dev *dev, u32 cqn)
cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
}
+void mthca_cq_event(struct mthca_dev *dev, u32 cqn,
+ enum ib_event_type event_type)
+{
+ struct mthca_cq *cq;
+ struct ib_event event;
+
+ spin_lock(&dev->cq_table.lock);
+
+ cq = mthca_array_get(&dev->cq_table.cq, cqn & (dev->limits.num_cqs - 1));
+
+ if (cq)
+ atomic_inc(&cq->refcount);
+ spin_unlock(&dev->cq_table.lock);
+
+ if (!cq) {
+ mthca_warn(dev, "Async event for bogus CQ %08x\n", cqn);
+ return;
+ }
+
+ event.device = &dev->ib_dev;
+ event.event = event_type;
+ event.element.cq = &cq->ibcq;
+ if (cq->ibcq.event_handler)
+ cq->ibcq.event_handler(&event, cq->ibcq.cq_context);
+
+ if (atomic_dec_and_test(&cq->refcount))
+ wake_up(&cq->wait);
+}
+
void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn,
struct mthca_srq *srq)
{
struct mthca_cq *cq;
struct mthca_cqe *cqe;
- int prod_index;
+ u32 prod_index;
int nfreed = 0;
spin_lock_irq(&dev->cq_table.lock);
@@ -264,19 +293,15 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn,
* Now sweep backwards through the CQ, removing CQ entries
* that match our QP by copying older entries on top of them.
*/
- while (prod_index > cq->cons_index) {
- cqe = get_cqe(cq, (prod_index - 1) & cq->ibcq.cqe);
+ while ((int) --prod_index - (int) cq->cons_index >= 0) {
+ cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
if (cqe->my_qpn == cpu_to_be32(qpn)) {
if (srq)
mthca_free_srq_wqe(srq, be32_to_cpu(cqe->wqe));
++nfreed;
- }
- else if (nfreed)
- memcpy(get_cqe(cq, (prod_index - 1 + nfreed) &
- cq->ibcq.cqe),
- cqe,
- MTHCA_CQ_ENTRY_SIZE);
- --prod_index;
+ } else if (nfreed)
+ memcpy(get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe),
+ cqe, MTHCA_CQ_ENTRY_SIZE);
}
if (nfreed) {
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index 7bff5a8425f..497ff794ef6 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -83,6 +83,8 @@ enum {
/* Arbel FW gives us these, but we need them for Tavor */
MTHCA_MPT_ENTRY_SIZE = 0x40,
MTHCA_MTT_SEG_SIZE = 0x40,
+
+ MTHCA_QP_PER_MGM = 4 * (MTHCA_MGM_ENTRY_SIZE / 16 - 2)
};
enum {
@@ -128,12 +130,17 @@ struct mthca_limits {
int num_uars;
int max_sg;
int num_qps;
+ int max_wqes;
+ int max_desc_sz;
+ int max_qp_init_rdma;
int reserved_qps;
int num_srqs;
+ int max_srq_wqes;
int reserved_srqs;
int num_eecs;
int reserved_eecs;
int num_cqs;
+ int max_cqes;
int reserved_cqs;
int num_eqs;
int reserved_eqs;
@@ -148,6 +155,8 @@ struct mthca_limits {
int reserved_mcgs;
int num_pds;
int reserved_pds;
+ u32 page_size_cap;
+ u32 flags;
u8 port_width_cap;
};
@@ -251,6 +260,14 @@ struct mthca_mcg_table {
struct mthca_icm_table *table;
};
+struct mthca_catas_err {
+ u64 addr;
+ u32 __iomem *map;
+ unsigned long stop;
+ u32 size;
+ struct timer_list timer;
+};
+
struct mthca_dev {
struct ib_device ib_dev;
struct pci_dev *pdev;
@@ -311,6 +328,8 @@ struct mthca_dev {
struct mthca_av_table av_table;
struct mthca_mcg_table mcg_table;
+ struct mthca_catas_err catas_err;
+
struct mthca_uar driver_uar;
struct mthca_db_table *db_tab;
struct mthca_pd driver_pd;
@@ -398,6 +417,9 @@ void mthca_cleanup_mcg_table(struct mthca_dev *dev);
int mthca_register_device(struct mthca_dev *dev);
void mthca_unregister_device(struct mthca_dev *dev);
+void mthca_start_catas_poll(struct mthca_dev *dev);
+void mthca_stop_catas_poll(struct mthca_dev *dev);
+
int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar);
void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar);
@@ -440,13 +462,17 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
struct mthca_cq *cq);
void mthca_free_cq(struct mthca_dev *dev,
struct mthca_cq *cq);
-void mthca_cq_event(struct mthca_dev *dev, u32 cqn);
+void mthca_cq_completion(struct mthca_dev *dev, u32 cqn);
+void mthca_cq_event(struct mthca_dev *dev, u32 cqn,
+ enum ib_event_type event_type);
void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn,
struct mthca_srq *srq);
int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
struct ib_srq_attr *attr, struct mthca_srq *srq);
void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq);
+int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+ enum ib_srq_attr_mask attr_mask);
void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
enum ib_event_type event_type);
void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr);
diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c
index 78152a8ad17..34d68e5a72d 100644
--- a/drivers/infiniband/hw/mthca/mthca_eq.c
+++ b/drivers/infiniband/hw/mthca/mthca_eq.c
@@ -83,7 +83,8 @@ enum {
MTHCA_EVENT_TYPE_PATH_MIG = 0x01,
MTHCA_EVENT_TYPE_COMM_EST = 0x02,
MTHCA_EVENT_TYPE_SQ_DRAINED = 0x03,
- MTHCA_EVENT_TYPE_SRQ_LAST_WQE = 0x13,
+ MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE = 0x13,
+ MTHCA_EVENT_TYPE_SRQ_LIMIT = 0x14,
MTHCA_EVENT_TYPE_CQ_ERROR = 0x04,
MTHCA_EVENT_TYPE_WQ_CATAS_ERROR = 0x05,
MTHCA_EVENT_TYPE_EEC_CATAS_ERROR = 0x06,
@@ -110,8 +111,9 @@ enum {
(1ULL << MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR) | \
(1ULL << MTHCA_EVENT_TYPE_PORT_CHANGE) | \
(1ULL << MTHCA_EVENT_TYPE_ECC_DETECT))
-#define MTHCA_SRQ_EVENT_MASK (1ULL << MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR) | \
- (1ULL << MTHCA_EVENT_TYPE_SRQ_LAST_WQE)
+#define MTHCA_SRQ_EVENT_MASK ((1ULL << MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR) | \
+ (1ULL << MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE) | \
+ (1ULL << MTHCA_EVENT_TYPE_SRQ_LIMIT))
#define MTHCA_CMD_EVENT_MASK (1ULL << MTHCA_EVENT_TYPE_CMD)
#define MTHCA_EQ_DB_INC_CI (1 << 24)
@@ -142,6 +144,9 @@ struct mthca_eqe {
__be32 qpn;
} __attribute__((packed)) qp;
struct {
+ __be32 srqn;
+ } __attribute__((packed)) srq;
+ struct {
__be32 cqn;
u32 reserved1;
u8 reserved2[3];
@@ -287,7 +292,7 @@ static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq)
case MTHCA_EVENT_TYPE_COMP:
disarm_cqn = be32_to_cpu(eqe->event.comp.cqn) & 0xffffff;
disarm_cq(dev, eq->eqn, disarm_cqn);
- mthca_cq_event(dev, disarm_cqn);
+ mthca_cq_completion(dev, disarm_cqn);
break;
case MTHCA_EVENT_TYPE_PATH_MIG:
@@ -305,6 +310,16 @@ static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq)
IB_EVENT_SQ_DRAINED);
break;
+ case MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE:
+ mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
+ IB_EVENT_QP_LAST_WQE_REACHED);
+ break;
+
+ case MTHCA_EVENT_TYPE_SRQ_LIMIT:
+ mthca_srq_event(dev, be32_to_cpu(eqe->event.srq.srqn) & 0xffffff,
+ IB_EVENT_SRQ_LIMIT_REACHED);
+ break;
+
case MTHCA_EVENT_TYPE_WQ_CATAS_ERROR:
mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
IB_EVENT_QP_FATAL);
@@ -349,6 +364,8 @@ static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq)
eqe->event.cq_err.syndrome == 1 ?
"overrun" : "access violation",
be32_to_cpu(eqe->event.cq_err.cqn) & 0xffffff);
+ mthca_cq_event(dev, be32_to_cpu(eqe->event.cq_err.cqn),
+ IB_EVENT_CQ_ERR);
break;
case MTHCA_EVENT_TYPE_EQ_OVERFLOW:
@@ -396,20 +413,21 @@ static irqreturn_t mthca_tavor_interrupt(int irq, void *dev_ptr, struct pt_regs
writel(dev->eq_table.clr_mask, dev->eq_table.clr_int);
ecr = readl(dev->eq_regs.tavor.ecr_base + 4);
- if (ecr) {
- writel(ecr, dev->eq_regs.tavor.ecr_base +
- MTHCA_ECR_CLR_BASE - MTHCA_ECR_BASE + 4);
+ if (!ecr)
+ return IRQ_NONE;
- for (i = 0; i < MTHCA_NUM_EQ; ++i)
- if (ecr & dev->eq_table.eq[i].eqn_mask &&
- mthca_eq_int(dev, &dev->eq_table.eq[i])) {
+ writel(ecr, dev->eq_regs.tavor.ecr_base +
+ MTHCA_ECR_CLR_BASE - MTHCA_ECR_BASE + 4);
+
+ for (i = 0; i < MTHCA_NUM_EQ; ++i)
+ if (ecr & dev->eq_table.eq[i].eqn_mask) {
+ if (mthca_eq_int(dev, &dev->eq_table.eq[i]))
tavor_set_eq_ci(dev, &dev->eq_table.eq[i],
dev->eq_table.eq[i].cons_index);
- tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn);
- }
- }
+ tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn);
+ }
- return IRQ_RETVAL(ecr);
+ return IRQ_HANDLED;
}
static irqreturn_t mthca_tavor_msi_x_interrupt(int irq, void *eq_ptr,
@@ -836,7 +854,7 @@ int __devinit mthca_init_eq_table(struct mthca_dev *dev)
dev->eq_table.clr_mask =
swab32(1 << (dev->eq_table.inta_pin & 31));
dev->eq_table.clr_int = dev->clr_base +
- (dev->eq_table.inta_pin < 31 ? 4 : 0);
+ (dev->eq_table.inta_pin < 32 ? 4 : 0);
}
dev->eq_table.arm_mask = 0;
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
index 9804174f7f3..1229c604c6e 100644
--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -34,6 +34,9 @@
* $Id: mthca_mad.c 1349 2004-12-16 21:09:43Z roland $
*/
+#include <linux/string.h>
+#include <linux/slab.h>
+
#include <rdma/ib_verbs.h>
#include <rdma/ib_mad.h>
#include <rdma/ib_smi.h>
@@ -46,11 +49,6 @@ enum {
MTHCA_VENDOR_CLASS2 = 0xa
};
-struct mthca_trap_mad {
- struct ib_mad *mad;
- DECLARE_PCI_UNMAP_ADDR(mapping)
-};
-
static void update_sm_ah(struct mthca_dev *dev,
u8 port_num, u16 lid, u8 sl)
{
@@ -116,49 +114,14 @@ static void forward_trap(struct mthca_dev *dev,
struct ib_mad *mad)
{
int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED;
- struct mthca_trap_mad *tmad;
- struct ib_sge gather_list;
- struct ib_send_wr *bad_wr, wr = {
- .opcode = IB_WR_SEND,
- .sg_list = &gather_list,
- .num_sge = 1,
- .send_flags = IB_SEND_SIGNALED,
- .wr = {
- .ud = {
- .remote_qpn = qpn,
- .remote_qkey = qpn ? IB_QP1_QKEY : 0,
- .timeout_ms = 0
- }
- }
- };
+ struct ib_mad_send_buf *send_buf;
struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn];
int ret;
unsigned long flags;
if (agent) {
- tmad = kmalloc(sizeof *tmad, GFP_KERNEL);
- if (!tmad)
- return;
-
- tmad->mad = kmalloc(sizeof *tmad->mad, GFP_KERNEL);
- if (!tmad->mad) {
- kfree(tmad);
- return;
- }
-
- memcpy(tmad->mad, mad, sizeof *mad);
-
- wr.wr.ud.mad_hdr = &tmad->mad->mad_hdr;
- wr.wr_id = (unsigned long) tmad;
-
- gather_list.addr = dma_map_single(agent->device->dma_device,
- tmad->mad,
- sizeof *tmad->mad,
- DMA_TO_DEVICE);
- gather_list.length = sizeof *tmad->mad;
- gather_list.lkey = to_mpd(agent->qp->pd)->ntmr.ibmr.lkey;
- pci_unmap_addr_set(tmad, mapping, gather_list.addr);
-
+ send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR,
+ IB_MGMT_MAD_DATA, GFP_ATOMIC);
/*
* We rely here on the fact that MLX QPs don't use the
* address handle after the send is posted (this is
@@ -166,21 +129,15 @@ static void forward_trap(struct mthca_dev *dev,
* it's OK for our devices).
*/
spin_lock_irqsave(&dev->sm_lock, flags);
- wr.wr.ud.ah = dev->sm_ah[port_num - 1];
- if (wr.wr.ud.ah)
- ret = ib_post_send_mad(agent, &wr, &bad_wr);
+ memcpy(send_buf->mad, mad, sizeof *mad);
+ if ((send_buf->ah = dev->sm_ah[port_num - 1]))
+ ret = ib_post_send_mad(send_buf, NULL);
else
ret = -EINVAL;
spin_unlock_irqrestore(&dev->sm_lock, flags);
- if (ret) {
- dma_unmap_single(agent->device->dma_device,
- pci_unmap_addr(tmad, mapping),
- sizeof *tmad->mad,
- DMA_TO_DEVICE);
- kfree(tmad->mad);
- kfree(tmad);
- }
+ if (ret)
+ ib_free_send_mad(send_buf);
}
}
@@ -267,15 +224,7 @@ int mthca_process_mad(struct ib_device *ibdev,
static void send_handler(struct ib_mad_agent *agent,
struct ib_mad_send_wc *mad_send_wc)
{
- struct mthca_trap_mad *tmad =
- (void *) (unsigned long) mad_send_wc->wr_id;
-
- dma_unmap_single(agent->device->dma_device,
- pci_unmap_addr(tmad, mapping),
- sizeof *tmad->mad,
- DMA_TO_DEVICE);
- kfree(tmad->mad);
- kfree(tmad);
+ ib_free_send_mad(mad_send_wc->send_buf);
}
int mthca_create_agents(struct mthca_dev *dev)
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index ffbcd40418d..6f94b25f3ac 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -162,9 +162,19 @@ static int __devinit mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim
mdev->limits.pkey_table_len = dev_lim->max_pkeys;
mdev->limits.local_ca_ack_delay = dev_lim->local_ca_ack_delay;
mdev->limits.max_sg = dev_lim->max_sg;
+ mdev->limits.max_wqes = dev_lim->max_qp_sz;
+ mdev->limits.max_qp_init_rdma = dev_lim->max_requester_per_qp;
mdev->limits.reserved_qps = dev_lim->reserved_qps;
+ mdev->limits.max_srq_wqes = dev_lim->max_srq_sz;
mdev->limits.reserved_srqs = dev_lim->reserved_srqs;
mdev->limits.reserved_eecs = dev_lim->reserved_eecs;
+ mdev->limits.max_desc_sz = dev_lim->max_desc_sz;
+ /*
+ * Subtract 1 from the limit because we need to allocate a
+ * spare CQE so the HCA HW can tell the difference between an
+ * empty CQ and a full CQ.
+ */
+ mdev->limits.max_cqes = dev_lim->max_cq_sz - 1;
mdev->limits.reserved_cqs = dev_lim->reserved_cqs;
mdev->limits.reserved_eqs = dev_lim->reserved_eqs;
mdev->limits.reserved_mtts = dev_lim->reserved_mtts;
@@ -172,6 +182,8 @@ static int __devinit mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim
mdev->limits.reserved_uars = dev_lim->reserved_uars;
mdev->limits.reserved_pds = dev_lim->reserved_pds;
mdev->limits.port_width_cap = dev_lim->max_port_width;
+ mdev->limits.page_size_cap = ~(u32) (dev_lim->min_page_sz - 1);
+ mdev->limits.flags = dev_lim->flags;
/* IB_DEVICE_RESIZE_MAX_WR not supported by driver.
May be doable since hardware supports it for SRQ.
@@ -503,6 +515,25 @@ err_free_aux:
return err;
}
+static void mthca_free_icms(struct mthca_dev *mdev)
+{
+ u8 status;
+
+ mthca_free_icm_table(mdev, mdev->mcg_table.table);
+ if (mdev->mthca_flags & MTHCA_FLAG_SRQ)
+ mthca_free_icm_table(mdev, mdev->srq_table.table);
+ mthca_free_icm_table(mdev, mdev->cq_table.table);
+ mthca_free_icm_table(mdev, mdev->qp_table.rdb_table);
+ mthca_free_icm_table(mdev, mdev->qp_table.eqp_table);
+ mthca_free_icm_table(mdev, mdev->qp_table.qp_table);
+ mthca_free_icm_table(mdev, mdev->mr_table.mpt_table);
+ mthca_free_icm_table(mdev, mdev->mr_table.mtt_table);
+ mthca_unmap_eq_icm(mdev);
+
+ mthca_UNMAP_ICM_AUX(mdev, &status);
+ mthca_free_icm(mdev, mdev->fw.arbel.aux_icm);
+}
+
static int __devinit mthca_init_arbel(struct mthca_dev *mdev)
{
struct mthca_dev_lim dev_lim;
@@ -580,18 +611,7 @@ static int __devinit mthca_init_arbel(struct mthca_dev *mdev)
return 0;
err_free_icm:
- if (mdev->mthca_flags & MTHCA_FLAG_SRQ)
- mthca_free_icm_table(mdev, mdev->srq_table.table);
- mthca_free_icm_table(mdev, mdev->cq_table.table);
- mthca_free_icm_table(mdev, mdev->qp_table.rdb_table);
- mthca_free_icm_table(mdev, mdev->qp_table.eqp_table);
- mthca_free_icm_table(mdev, mdev->qp_table.qp_table);
- mthca_free_icm_table(mdev, mdev->mr_table.mpt_table);
- mthca_free_icm_table(mdev, mdev->mr_table.mtt_table);
- mthca_unmap_eq_icm(mdev);
-
- mthca_UNMAP_ICM_AUX(mdev, &status);
- mthca_free_icm(mdev, mdev->fw.arbel.aux_icm);
+ mthca_free_icms(mdev);
err_stop_fw:
mthca_UNMAP_FA(mdev, &status);
@@ -611,18 +631,7 @@ static void mthca_close_hca(struct mthca_dev *mdev)
mthca_CLOSE_HCA(mdev, 0, &status);
if (mthca_is_memfree(mdev)) {
- if (mdev->mthca_flags & MTHCA_FLAG_SRQ)
- mthca_free_icm_table(mdev, mdev->srq_table.table);
- mthca_free_icm_table(mdev, mdev->cq_table.table);
- mthca_free_icm_table(mdev, mdev->qp_table.rdb_table);
- mthca_free_icm_table(mdev, mdev->qp_table.eqp_table);
- mthca_free_icm_table(mdev, mdev->qp_table.qp_table);
- mthca_free_icm_table(mdev, mdev->mr_table.mpt_table);
- mthca_free_icm_table(mdev, mdev->mr_table.mtt_table);
- mthca_unmap_eq_icm(mdev);
-
- mthca_UNMAP_ICM_AUX(mdev, &status);
- mthca_free_icm(mdev, mdev->fw.arbel.aux_icm);
+ mthca_free_icms(mdev);
mthca_UNMAP_FA(mdev, &status);
mthca_free_icm(mdev, mdev->fw.arbel.fw_icm);
@@ -1050,7 +1059,7 @@ static int __devinit mthca_init_one(struct pci_dev *pdev,
goto err_cmd;
if (mdev->fw_ver < mthca_hca_table[id->driver_data].latest_fw) {
- mthca_warn(mdev, "HCA FW version %x.%x.%x is old (%x.%x.%x is current).\n",
+ mthca_warn(mdev, "HCA FW version %d.%d.%d is old (%d.%d.%d is current).\n",
(int) (mdev->fw_ver >> 32), (int) (mdev->fw_ver >> 16) & 0xffff,
(int) (mdev->fw_ver & 0xffff),
(int) (mthca_hca_table[id->driver_data].latest_fw >> 32),
diff --git a/drivers/infiniband/hw/mthca/mthca_mcg.c b/drivers/infiniband/hw/mthca/mthca_mcg.c
index a2707605f4c..2fc449da418 100644
--- a/drivers/infiniband/hw/mthca/mthca_mcg.c
+++ b/drivers/infiniband/hw/mthca/mthca_mcg.c
@@ -33,14 +33,12 @@
*/
#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/slab.h>
#include "mthca_dev.h"
#include "mthca_cmd.h"
-enum {
- MTHCA_QP_PER_MGM = 4 * (MTHCA_MGM_ENTRY_SIZE / 16 - 2)
-};
-
struct mthca_mgm {
__be32 next_gid_index;
u32 reserved[3];
@@ -189,7 +187,12 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
}
for (i = 0; i < MTHCA_QP_PER_MGM; ++i)
- if (!(mgm->qp[i] & cpu_to_be32(1 << 31))) {
+ if (mgm->qp[i] == cpu_to_be32(ibqp->qp_num | (1 << 31))) {
+ mthca_dbg(dev, "QP %06x already a member of MGM\n",
+ ibqp->qp_num);
+ err = 0;
+ goto out;
+ } else if (!(mgm->qp[i] & cpu_to_be32(1 << 31))) {
mgm->qp[i] = cpu_to_be32(ibqp->qp_num | (1 << 31));
break;
}
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index 1827400f189..d72fe95cba0 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -82,7 +82,7 @@ void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm)
}
struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
- unsigned int gfp_mask)
+ gfp_t gfp_mask)
{
struct mthca_icm *icm;
struct mthca_icm_chunk *chunk = NULL;
@@ -290,7 +290,7 @@ struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev,
int i;
u8 status;
- num_icm = obj_size * nobj / MTHCA_TABLE_CHUNK_SIZE;
+ num_icm = (obj_size * nobj + MTHCA_TABLE_CHUNK_SIZE - 1) / MTHCA_TABLE_CHUNK_SIZE;
table = kmalloc(sizeof *table + num_icm * sizeof *table->icm, GFP_KERNEL);
if (!table)
@@ -487,7 +487,8 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
}
}
-int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, __be32 **db)
+int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type,
+ u32 qn, __be32 **db)
{
int group;
int start, end, dir;
@@ -529,12 +530,25 @@ int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, __be32 **db)
goto found;
}
+ for (i = start; i != end; i += dir)
+ if (!dev->db_tab->page[i].db_rec) {
+ page = dev->db_tab->page + i;
+ goto alloc;
+ }
+
if (dev->db_tab->max_group1 >= dev->db_tab->min_group2 - 1) {
ret = -ENOMEM;
goto out;
}
+ if (group == 0)
+ ++dev->db_tab->max_group1;
+ else
+ --dev->db_tab->min_group2;
+
page = dev->db_tab->page + end;
+
+alloc:
page->db_rec = dma_alloc_coherent(&dev->pdev->dev, 4096,
&page->mapping, GFP_KERNEL);
if (!page->db_rec) {
@@ -554,10 +568,6 @@ int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, __be32 **db)
}
bitmap_zero(page->used, MTHCA_DB_REC_PER_PAGE);
- if (group == 0)
- ++dev->db_tab->max_group1;
- else
- --dev->db_tab->min_group2;
found:
j = find_first_zero_bit(page->used, MTHCA_DB_REC_PER_PAGE);
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.h b/drivers/infiniband/hw/mthca/mthca_memfree.h
index bafa51544aa..4fdca26eea8 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.h
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.h
@@ -77,7 +77,7 @@ struct mthca_icm_iter {
struct mthca_dev;
struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
- unsigned int gfp_mask);
+ gfp_t gfp_mask);
void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm);
struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev,
@@ -173,7 +173,8 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
int mthca_init_db_tab(struct mthca_dev *dev);
void mthca_cleanup_db_tab(struct mthca_dev *dev);
-int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, __be32 **db);
+int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type,
+ u32 qn, __be32 **db);
void mthca_free_db(struct mthca_dev *dev, int type, int db_index);
#endif /* MTHCA_MEMFREE_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c
index 1f97a44477f..e995e2aa016 100644
--- a/drivers/infiniband/hw/mthca/mthca_mr.c
+++ b/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -140,13 +140,11 @@ static int __devinit mthca_buddy_init(struct mthca_buddy *buddy, int max_order)
buddy->max_order = max_order;
spin_lock_init(&buddy->lock);
- buddy->bits = kmalloc((buddy->max_order + 1) * sizeof (long *),
+ buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *),
GFP_KERNEL);
if (!buddy->bits)
goto err_out;
- memset(buddy->bits, 0, (buddy->max_order + 1) * sizeof (long *));
-
for (i = 0; i <= buddy->max_order; ++i) {
s = BITS_TO_LONGS(1 << (buddy->max_order - i));
buddy->bits[i] = kmalloc(s * sizeof (long), GFP_KERNEL);
diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c
index 0576056b34f..08a909371b0 100644
--- a/drivers/infiniband/hw/mthca/mthca_profile.c
+++ b/drivers/infiniband/hw/mthca/mthca_profile.c
@@ -35,6 +35,8 @@
#include <linux/module.h>
#include <linux/moduleparam.h>
+#include <linux/string.h>
+#include <linux/slab.h>
#include "mthca_profile.h"
@@ -80,12 +82,10 @@ u64 mthca_make_profile(struct mthca_dev *dev,
struct mthca_resource tmp;
int i, j;
- profile = kmalloc(MTHCA_RES_NUM * sizeof *profile, GFP_KERNEL);
+ profile = kzalloc(MTHCA_RES_NUM * sizeof *profile, GFP_KERNEL);
if (!profile)
return -ENOMEM;
- memset(profile, 0, MTHCA_RES_NUM * sizeof *profile);
-
profile[MTHCA_RES_QP].size = dev_lim->qpc_entry_sz;
profile[MTHCA_RES_EEC].size = dev_lim->eec_entry_sz;
profile[MTHCA_RES_SRQ].size = dev_lim->srq_entry_sz;
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 1c1c2e23087..4cc7e2846df 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -37,6 +37,7 @@
*/
#include <rdma/ib_smi.h>
+#include <rdma/ib_user_verbs.h>
#include <linux/mm.h>
#include "mthca_dev.h"
@@ -84,21 +85,33 @@ static int mthca_query_device(struct ib_device *ibdev,
props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
0xffffff;
props->vendor_part_id = be16_to_cpup((__be16 *) (out_mad->data + 30));
- props->hw_ver = be16_to_cpup((__be16 *) (out_mad->data + 32));
+ props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32));
memcpy(&props->sys_image_guid, out_mad->data + 4, 8);
memcpy(&props->node_guid, out_mad->data + 12, 8);
props->max_mr_size = ~0ull;
+ props->page_size_cap = mdev->limits.page_size_cap;
props->max_qp = mdev->limits.num_qps - mdev->limits.reserved_qps;
- props->max_qp_wr = 0xffff;
+ props->max_qp_wr = mdev->limits.max_wqes;
props->max_sge = mdev->limits.max_sg;
props->max_cq = mdev->limits.num_cqs - mdev->limits.reserved_cqs;
- props->max_cqe = 0xffff;
+ props->max_cqe = mdev->limits.max_cqes;
props->max_mr = mdev->limits.num_mpts - mdev->limits.reserved_mrws;
props->max_pd = mdev->limits.num_pds - mdev->limits.reserved_pds;
props->max_qp_rd_atom = 1 << mdev->qp_table.rdb_shift;
- props->max_qp_init_rd_atom = 1 << mdev->qp_table.rdb_shift;
+ props->max_qp_init_rd_atom = mdev->limits.max_qp_init_rdma;
+ props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
+ props->max_srq = mdev->limits.num_srqs - mdev->limits.reserved_srqs;
+ props->max_srq_wr = mdev->limits.max_srq_wqes;
+ props->max_srq_sge = mdev->limits.max_sg;
props->local_ca_ack_delay = mdev->limits.local_ca_ack_delay;
+ props->atomic_cap = mdev->limits.flags & DEV_LIM_FLAG_ATOMIC ?
+ IB_ATOMIC_HCA : IB_ATOMIC_NONE;
+ props->max_pkeys = mdev->limits.pkey_table_len;
+ props->max_mcast_grp = mdev->limits.num_mgms + mdev->limits.num_amgms;
+ props->max_mcast_qp_attach = MTHCA_QP_PER_MGM;
+ props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
+ props->max_mcast_grp;
err = 0;
out:
@@ -150,9 +163,13 @@ static int mthca_query_port(struct ib_device *ibdev,
props->gid_tbl_len = to_mdev(ibdev)->limits.gid_table_len;
props->max_msg_sz = 0x80000000;
props->pkey_tbl_len = to_mdev(ibdev)->limits.pkey_table_len;
+ props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46));
props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48));
props->active_width = out_mad->data[31] & 0xf;
props->active_speed = out_mad->data[35] >> 4;
+ props->max_mtu = out_mad->data[41] & 0xf;
+ props->active_mtu = out_mad->data[36] >> 4;
+ props->subnet_timeout = out_mad->data[51] & 0x1f;
out:
kfree(in_mad);
@@ -599,11 +616,11 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
return ERR_PTR(err);
}
- init_attr->cap.max_inline_data = 0;
init_attr->cap.max_send_wr = qp->sq.max;
init_attr->cap.max_recv_wr = qp->rq.max;
init_attr->cap.max_send_sge = qp->sq.max_gs;
init_attr->cap.max_recv_sge = qp->rq.max_gs;
+ init_attr->cap.max_inline_data = qp->max_inline_data;
return &qp->ibqp;
}
@@ -634,6 +651,9 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries,
int nent;
int err;
+ if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes)
+ return ERR_PTR(-EINVAL);
+
if (context) {
if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
return ERR_PTR(-EFAULT);
@@ -1009,7 +1029,7 @@ static ssize_t show_rev(struct class_device *cdev, char *buf)
static ssize_t show_fw_ver(struct class_device *cdev, char *buf)
{
struct mthca_dev *dev = container_of(cdev, struct mthca_dev, ib_dev.class_dev);
- return sprintf(buf, "%x.%x.%x\n", (int) (dev->fw_ver >> 32),
+ return sprintf(buf, "%d.%d.%d\n", (int) (dev->fw_ver >> 32),
(int) (dev->fw_ver >> 16) & 0xffff,
(int) dev->fw_ver & 0xffff);
}
@@ -1058,6 +1078,26 @@ int mthca_register_device(struct mthca_dev *dev)
strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX);
dev->ib_dev.owner = THIS_MODULE;
+ dev->ib_dev.uverbs_abi_ver = MTHCA_UVERBS_ABI_VERSION;
+ dev->ib_dev.uverbs_cmd_mask =
+ (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+ (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_REG_MR) |
+ (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+ (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
+ (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
dev->ib_dev.node_type = IB_NODE_CA;
dev->ib_dev.phys_port_cnt = dev->limits.num_ports;
dev->ib_dev.dma_device = &dev->pdev->dev;
@@ -1077,6 +1117,7 @@ int mthca_register_device(struct mthca_dev *dev)
if (dev->mthca_flags & MTHCA_FLAG_SRQ) {
dev->ib_dev.create_srq = mthca_create_srq;
+ dev->ib_dev.modify_srq = mthca_modify_srq;
dev->ib_dev.destroy_srq = mthca_destroy_srq;
if (mthca_is_memfree(dev))
@@ -1135,10 +1176,13 @@ int mthca_register_device(struct mthca_dev *dev)
}
}
+ mthca_start_catas_poll(dev);
+
return 0;
}
void mthca_unregister_device(struct mthca_dev *dev)
{
+ mthca_stop_catas_poll(dev);
ib_unregister_device(&dev->ib_dev);
}
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h
index bcd4b01a339..1e73947b470 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.h
+++ b/drivers/infiniband/hw/mthca/mthca_provider.h
@@ -251,6 +251,7 @@ struct mthca_qp {
struct mthca_wq sq;
enum ib_sig_type sq_policy;
int send_wqe_offset;
+ int max_inline_data;
u64 *wrid;
union mthca_buf queue;
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 5fa00669f9b..7450550db73 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -36,6 +36,8 @@
*/
#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/slab.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_cache.h>
@@ -338,8 +340,7 @@ static const struct {
[UC] = (IB_QP_AV |
IB_QP_PATH_MTU |
IB_QP_DEST_QPN |
- IB_QP_RQ_PSN |
- IB_QP_MAX_DEST_RD_ATOMIC),
+ IB_QP_RQ_PSN),
[RC] = (IB_QP_AV |
IB_QP_PATH_MTU |
IB_QP_DEST_QPN |
@@ -368,8 +369,7 @@ static const struct {
.trans = MTHCA_TRANS_RTR2RTS,
.req_param = {
[UD] = IB_QP_SQ_PSN,
- [UC] = (IB_QP_SQ_PSN |
- IB_QP_MAX_QP_RD_ATOMIC),
+ [UC] = IB_QP_SQ_PSN,
[RC] = (IB_QP_TIMEOUT |
IB_QP_RETRY_CNT |
IB_QP_RNR_RETRY |
@@ -446,8 +446,6 @@ static const struct {
[UD] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
[UC] = (IB_QP_AV |
- IB_QP_MAX_QP_RD_ATOMIC |
- IB_QP_MAX_DEST_RD_ATOMIC |
IB_QP_CUR_STATE |
IB_QP_ALT_PATH |
IB_QP_ACCESS_FLAGS |
@@ -478,7 +476,7 @@ static const struct {
.opt_param = {
[UD] = (IB_QP_CUR_STATE |
IB_QP_QKEY),
- [UC] = (IB_QP_CUR_STATE),
+ [UC] = IB_QP_CUR_STATE,
[RC] = (IB_QP_CUR_STATE |
IB_QP_MIN_RNR_TIMER),
[MLX] = (IB_QP_CUR_STATE |
@@ -586,6 +584,13 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
return -EINVAL;
}
+ if ((attr_mask & IB_QP_PKEY_INDEX) &&
+ attr->pkey_index >= dev->limits.pkey_table_len) {
+ mthca_dbg(dev, "PKey index (%u) too large. max is %d\n",
+ attr->pkey_index,dev->limits.pkey_table_len-1);
+ return -EINVAL;
+ }
+
mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
@@ -725,15 +730,16 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
}
if (attr_mask & IB_QP_ACCESS_FLAGS) {
+ qp_context->params2 |=
+ cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE ?
+ MTHCA_QP_BIT_RWE : 0);
+
/*
- * Only enable RDMA/atomics if we have responder
- * resources set to a non-zero value.
+ * Only enable RDMA reads and atomics if we have
+ * responder resources set to a non-zero value.
*/
if (qp->resp_depth) {
qp_context->params2 |=
- cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE ?
- MTHCA_QP_BIT_RWE : 0);
- qp_context->params2 |=
cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_READ ?
MTHCA_QP_BIT_RRE : 0);
qp_context->params2 |=
@@ -754,31 +760,27 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
if (qp->resp_depth && !attr->max_dest_rd_atomic) {
/*
* Lowering our responder resources to zero.
- * Turn off RDMA/atomics as responder.
- * (RWE/RRE/RAE in params2 already zero)
+ * Turn off reads RDMA and atomics as responder.
+ * (RRE/RAE in params2 already zero)
*/
- qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RWE |
- MTHCA_QP_OPTPAR_RRE |
+ qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRE |
MTHCA_QP_OPTPAR_RAE);
}
if (!qp->resp_depth && attr->max_dest_rd_atomic) {
/*
* Increasing our responder resources from
- * zero. Turn on RDMA/atomics as appropriate.
+ * zero. Turn on RDMA reads and atomics as
+ * appropriate.
*/
qp_context->params2 |=
- cpu_to_be32(qp->atomic_rd_en & IB_ACCESS_REMOTE_WRITE ?
- MTHCA_QP_BIT_RWE : 0);
- qp_context->params2 |=
cpu_to_be32(qp->atomic_rd_en & IB_ACCESS_REMOTE_READ ?
MTHCA_QP_BIT_RRE : 0);
qp_context->params2 |=
cpu_to_be32(qp->atomic_rd_en & IB_ACCESS_REMOTE_ATOMIC ?
MTHCA_QP_BIT_RAE : 0);
- qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RWE |
- MTHCA_QP_OPTPAR_RRE |
+ qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRE |
MTHCA_QP_OPTPAR_RAE);
}
@@ -869,7 +871,10 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
mthca_wq_init(&qp->sq);
+ qp->sq.last = get_send_wqe(qp, qp->sq.max - 1);
+
mthca_wq_init(&qp->rq);
+ qp->rq.last = get_recv_wqe(qp, qp->rq.max - 1);
if (mthca_is_memfree(dev)) {
*qp->sq.db = 0;
@@ -880,6 +885,50 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
return err;
}
+static void mthca_adjust_qp_caps(struct mthca_dev *dev,
+ struct mthca_pd *pd,
+ struct mthca_qp *qp)
+{
+ int max_data_size;
+
+ /*
+ * Calculate the maximum size of WQE s/g segments, excluding
+ * the next segment and other non-data segments.
+ */
+ max_data_size = min(dev->limits.max_desc_sz, 1 << qp->sq.wqe_shift) -
+ sizeof (struct mthca_next_seg);
+
+ switch (qp->transport) {
+ case MLX:
+ max_data_size -= 2 * sizeof (struct mthca_data_seg);
+ break;
+
+ case UD:
+ if (mthca_is_memfree(dev))
+ max_data_size -= sizeof (struct mthca_arbel_ud_seg);
+ else
+ max_data_size -= sizeof (struct mthca_tavor_ud_seg);
+ break;
+
+ default:
+ max_data_size -= sizeof (struct mthca_raddr_seg);
+ break;
+ }
+
+ /* We don't support inline data for kernel QPs (yet). */
+ if (!pd->ibpd.uobject)
+ qp->max_inline_data = 0;
+ else
+ qp->max_inline_data = max_data_size - MTHCA_INLINE_HEADER_SIZE;
+
+ qp->sq.max_gs = min_t(int, dev->limits.max_sg,
+ max_data_size / sizeof (struct mthca_data_seg));
+ qp->rq.max_gs = min_t(int, dev->limits.max_sg,
+ (min(dev->limits.max_desc_sz, 1 << qp->rq.wqe_shift) -
+ sizeof (struct mthca_next_seg)) /
+ sizeof (struct mthca_data_seg));
+}
+
/*
* Allocate and register buffer for WQEs. qp->rq.max, sq.max,
* rq.max_gs and sq.max_gs must all be assigned.
@@ -897,27 +946,53 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev,
size = sizeof (struct mthca_next_seg) +
qp->rq.max_gs * sizeof (struct mthca_data_seg);
+ if (size > dev->limits.max_desc_sz)
+ return -EINVAL;
+
for (qp->rq.wqe_shift = 6; 1 << qp->rq.wqe_shift < size;
qp->rq.wqe_shift++)
; /* nothing */
- size = sizeof (struct mthca_next_seg) +
- qp->sq.max_gs * sizeof (struct mthca_data_seg);
+ size = qp->sq.max_gs * sizeof (struct mthca_data_seg);
switch (qp->transport) {
case MLX:
size += 2 * sizeof (struct mthca_data_seg);
break;
+
case UD:
- if (mthca_is_memfree(dev))
- size += sizeof (struct mthca_arbel_ud_seg);
- else
- size += sizeof (struct mthca_tavor_ud_seg);
+ size += mthca_is_memfree(dev) ?
+ sizeof (struct mthca_arbel_ud_seg) :
+ sizeof (struct mthca_tavor_ud_seg);
+ break;
+
+ case UC:
+ size += sizeof (struct mthca_raddr_seg);
+ break;
+
+ case RC:
+ size += sizeof (struct mthca_raddr_seg);
+ /*
+ * An atomic op will require an atomic segment, a
+ * remote address segment and one scatter entry.
+ */
+ size = max_t(int, size,
+ sizeof (struct mthca_atomic_seg) +
+ sizeof (struct mthca_raddr_seg) +
+ sizeof (struct mthca_data_seg));
break;
+
default:
- /* bind seg is as big as atomic + raddr segs */
- size += sizeof (struct mthca_bind_seg);
+ break;
}
+ /* Make sure that we have enough space for a bind request */
+ size = max_t(int, size, sizeof (struct mthca_bind_seg));
+
+ size += sizeof (struct mthca_next_seg);
+
+ if (size > dev->limits.max_desc_sz)
+ return -EINVAL;
+
for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size;
qp->sq.wqe_shift++)
; /* nothing */
@@ -1061,6 +1136,8 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev,
return ret;
}
+ mthca_adjust_qp_caps(dev, pd, qp);
+
/*
* If this is a userspace QP, we're done now. The doorbells
* will be allocated and buffers will be initialized in
@@ -1112,8 +1189,10 @@ static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap,
struct mthca_qp *qp)
{
/* Sanity check QP size before proceeding */
- if (cap->max_send_wr > 65536 || cap->max_recv_wr > 65536 ||
- cap->max_send_sge > 64 || cap->max_recv_sge > 64)
+ if (cap->max_send_wr > dev->limits.max_wqes ||
+ cap->max_recv_wr > dev->limits.max_wqes ||
+ cap->max_send_sge > dev->limits.max_sg ||
+ cap->max_recv_sge > dev->limits.max_sg)
return -EINVAL;
if (mthca_is_memfree(dev)) {
@@ -1479,8 +1558,8 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
}
wqe += sizeof (struct mthca_atomic_seg);
- size += sizeof (struct mthca_raddr_seg) / 16 +
- sizeof (struct mthca_atomic_seg);
+ size += (sizeof (struct mthca_raddr_seg) +
+ sizeof (struct mthca_atomic_seg)) / 16;
break;
case IB_WR_RDMA_WRITE:
@@ -1630,6 +1709,7 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
{
struct mthca_dev *dev = to_mdev(ibqp->device);
struct mthca_qp *qp = to_mqp(ibqp);
+ __be32 doorbell[2];
unsigned long flags;
int err = 0;
int nreq;
@@ -1647,6 +1727,22 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
ind = qp->rq.next_ind;
for (nreq = 0; wr; ++nreq, wr = wr->next) {
+ if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) {
+ nreq = 0;
+
+ doorbell[0] = cpu_to_be32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
+ doorbell[1] = cpu_to_be32(qp->qpn << 8);
+
+ wmb();
+
+ mthca_write64(doorbell,
+ dev->kar + MTHCA_RECEIVE_DOORBELL,
+ MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
+
+ qp->rq.head += MTHCA_TAVOR_MAX_WQES_PER_RECV_DB;
+ size0 = 0;
+ }
+
if (mthca_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
mthca_err(dev, "RQ %06x full (%u head, %u tail,"
" %d max, %d nreq)\n", qp->qpn,
@@ -1704,8 +1800,6 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
out:
if (likely(nreq)) {
- __be32 doorbell[2];
-
doorbell[0] = cpu_to_be32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
doorbell[1] = cpu_to_be32((qp->qpn << 8) | nreq);
@@ -1728,6 +1822,7 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
{
struct mthca_dev *dev = to_mdev(ibqp->device);
struct mthca_qp *qp = to_mqp(ibqp);
+ __be32 doorbell[2];
void *wqe;
void *prev_wqe;
unsigned long flags;
@@ -1747,6 +1842,34 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
ind = qp->sq.head & (qp->sq.max - 1);
for (nreq = 0; wr; ++nreq, wr = wr->next) {
+ if (unlikely(nreq == MTHCA_ARBEL_MAX_WQES_PER_SEND_DB)) {
+ nreq = 0;
+
+ doorbell[0] = cpu_to_be32((MTHCA_ARBEL_MAX_WQES_PER_SEND_DB << 24) |
+ ((qp->sq.head & 0xffff) << 8) |
+ f0 | op0);
+ doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0);
+
+ qp->sq.head += MTHCA_ARBEL_MAX_WQES_PER_SEND_DB;
+ size0 = 0;
+
+ /*
+ * Make sure that descriptors are written before
+ * doorbell record.
+ */
+ wmb();
+ *qp->sq.db = cpu_to_be32(qp->sq.head & 0xffff);
+
+ /*
+ * Make sure doorbell record is written before we
+ * write MMIO send doorbell.
+ */
+ wmb();
+ mthca_write64(doorbell,
+ dev->kar + MTHCA_SEND_DOORBELL,
+ MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
+ }
+
if (mthca_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
mthca_err(dev, "SQ %06x full (%u head, %u tail,"
" %d max, %d nreq)\n", qp->qpn,
@@ -1799,8 +1922,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
}
wqe += sizeof (struct mthca_atomic_seg);
- size += sizeof (struct mthca_raddr_seg) / 16 +
- sizeof (struct mthca_atomic_seg);
+ size += (sizeof (struct mthca_raddr_seg) +
+ sizeof (struct mthca_atomic_seg)) / 16;
break;
case IB_WR_RDMA_READ:
@@ -1923,8 +2046,6 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
out:
if (likely(nreq)) {
- __be32 doorbell[2];
-
doorbell[0] = cpu_to_be32((nreq << 24) |
((qp->sq.head & 0xffff) << 8) |
f0 | op0);
diff --git a/drivers/infiniband/hw/mthca/mthca_reset.c b/drivers/infiniband/hw/mthca/mthca_reset.c
index 4f995391dd1..df5e494a9d3 100644
--- a/drivers/infiniband/hw/mthca/mthca_reset.c
+++ b/drivers/infiniband/hw/mthca/mthca_reset.c
@@ -37,6 +37,7 @@
#include <linux/errno.h>
#include <linux/pci.h>
#include <linux/delay.h>
+#include <linux/slab.h>
#include "mthca_dev.h"
#include "mthca_cmd.h"
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
index 18998d48c53..f7d234295ef 100644
--- a/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -32,6 +32,9 @@
* $Id: mthca_srq.c 3047 2005-08-10 03:59:35Z roland $
*/
+#include <linux/slab.h>
+#include <linux/string.h>
+
#include "mthca_dev.h"
#include "mthca_cmd.h"
#include "mthca_memfree.h"
@@ -75,15 +78,16 @@ static void *get_wqe(struct mthca_srq *srq, int n)
/*
* Return a pointer to the location within a WQE that we're using as a
- * link when the WQE is in the free list. We use an offset of 4
- * because in the Tavor case, posting a WQE may overwrite the first
- * four bytes of the previous WQE. The offset avoids corrupting our
- * free list if the WQE has already completed and been put on the free
- * list when we post the next WQE.
+ * link when the WQE is in the free list. We use the imm field
+ * because in the Tavor case, posting a WQE may overwrite the next
+ * segment of the previous WQE, but a receive WQE will never touch the
+ * imm field. This avoids corrupting our free list if the previous
+ * WQE has already completed and been put on the free list when we
+ * post the next WQE.
*/
static inline int *wqe_to_link(void *wqe)
{
- return (int *) (wqe + 4);
+ return (int *) (wqe + offsetof(struct mthca_next_seg, imm));
}
static void mthca_tavor_init_srq_context(struct mthca_dev *dev,
@@ -186,7 +190,8 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
int err;
/* Sanity check SRQ size before proceeding */
- if (attr->max_wr > 16 << 20 || attr->max_sge > 64)
+ if (attr->max_wr > dev->limits.max_srq_wqes ||
+ attr->max_sge > dev->limits.max_sg)
return -EINVAL;
srq->max = attr->max_wr;
@@ -332,6 +337,29 @@ void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq)
mthca_free_mailbox(dev, mailbox);
}
+int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+ enum ib_srq_attr_mask attr_mask)
+{
+ struct mthca_dev *dev = to_mdev(ibsrq->device);
+ struct mthca_srq *srq = to_msrq(ibsrq);
+ int ret;
+ u8 status;
+
+ /* We don't support resizing SRQs (yet?) */
+ if (attr_mask & IB_SRQ_MAX_WR)
+ return -EINVAL;
+
+ if (attr_mask & IB_SRQ_LIMIT) {
+ ret = mthca_ARM_SRQ(dev, srq->srqn, attr->srq_limit, &status);
+ if (ret)
+ return ret;
+ if (status)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
enum ib_event_type event_type)
{
@@ -354,7 +382,7 @@ void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
event.device = &dev->ib_dev;
event.event = event_type;
- event.element.srq = &srq->ibsrq;
+ event.element.srq = &srq->ibsrq;
srq->ibsrq.event_handler(&event, srq->ibsrq.srq_context);
out:
@@ -389,6 +417,7 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
{
struct mthca_dev *dev = to_mdev(ibsrq->device);
struct mthca_srq *srq = to_msrq(ibsrq);
+ __be32 doorbell[2];
unsigned long flags;
int err = 0;
int first_ind;
@@ -404,6 +433,25 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
first_ind = srq->first_free;
for (nreq = 0; wr; ++nreq, wr = wr->next) {
+ if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) {
+ nreq = 0;
+
+ doorbell[0] = cpu_to_be32(first_ind << srq->wqe_shift);
+ doorbell[1] = cpu_to_be32(srq->srqn << 8);
+
+ /*
+ * Make sure that descriptors are written
+ * before doorbell is rung.
+ */
+ wmb();
+
+ mthca_write64(doorbell,
+ dev->kar + MTHCA_RECEIVE_DOORBELL,
+ MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
+
+ first_ind = srq->first_free;
+ }
+
ind = srq->first_free;
if (ind < 0) {
@@ -415,6 +463,14 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
wqe = get_wqe(srq, ind);
next_ind = *wqe_to_link(wqe);
+
+ if (next_ind < 0) {
+ mthca_err(dev, "SRQ %06x full\n", srq->srqn);
+ err = -ENOMEM;
+ *bad_wr = wr;
+ break;
+ }
+
prev_wqe = srq->last;
srq->last = wqe;
@@ -458,8 +514,6 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
}
if (likely(nreq)) {
- __be32 doorbell[2];
-
doorbell[0] = cpu_to_be32(first_ind << srq->wqe_shift);
doorbell[1] = cpu_to_be32((srq->srqn << 8) | nreq);
@@ -506,6 +560,13 @@ int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
wqe = get_wqe(srq, ind);
next_ind = *wqe_to_link(wqe);
+ if (next_ind < 0) {
+ mthca_err(dev, "SRQ %06x full\n", srq->srqn);
+ err = -ENOMEM;
+ *bad_wr = wr;
+ break;
+ }
+
((struct mthca_next_seg *) wqe)->nda_op =
cpu_to_be32((next_ind << srq->wqe_shift) | 1);
((struct mthca_next_seg *) wqe)->ee_nds = 0;
diff --git a/drivers/infiniband/hw/mthca/mthca_uar.c b/drivers/infiniband/hw/mthca/mthca_uar.c
index 1c8791ded6f..8e9219842be 100644
--- a/drivers/infiniband/hw/mthca/mthca_uar.c
+++ b/drivers/infiniband/hw/mthca/mthca_uar.c
@@ -32,6 +32,8 @@
* $Id$
*/
+#include <asm/page.h> /* PAGE_SHIFT */
+
#include "mthca_dev.h"
#include "mthca_memfree.h"
diff --git a/drivers/infiniband/hw/mthca/mthca_user.h b/drivers/infiniband/hw/mthca/mthca_user.h
index 41613ec8a04..bb015c6494c 100644
--- a/drivers/infiniband/hw/mthca/mthca_user.h
+++ b/drivers/infiniband/hw/mthca/mthca_user.h
@@ -38,6 +38,12 @@
#include <linux/types.h>
/*
+ * Increment this value if any changes that break userspace ABI
+ * compatibility are made.
+ */
+#define MTHCA_UVERBS_ABI_VERSION 1
+
+/*
* Make sure that all structs defined in this file remain laid out so
* that they pack the same way on 32-bit and 64-bit architectures (to
* avoid incompatibility between 32-bit userspace and 64-bit kernels).
diff --git a/drivers/infiniband/hw/mthca/mthca_wqe.h b/drivers/infiniband/hw/mthca/mthca_wqe.h
index 1f4c0ff28f7..e7d2c1e8619 100644
--- a/drivers/infiniband/hw/mthca/mthca_wqe.h
+++ b/drivers/infiniband/hw/mthca/mthca_wqe.h
@@ -49,7 +49,9 @@ enum {
};
enum {
- MTHCA_INVAL_LKEY = 0x100
+ MTHCA_INVAL_LKEY = 0x100,
+ MTHCA_TAVOR_MAX_WQES_PER_RECV_DB = 256,
+ MTHCA_ARBEL_MAX_WQES_PER_SEND_DB = 255
};
struct mthca_next_seg {
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 4ea1c1ca85b..9923a15a999 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -100,7 +100,12 @@ struct ipoib_pseudoheader {
struct ipoib_mcast;
-struct ipoib_buf {
+struct ipoib_rx_buf {
+ struct sk_buff *skb;
+ dma_addr_t mapping;
+};
+
+struct ipoib_tx_buf {
struct sk_buff *skb;
DECLARE_PCI_UNMAP_ADDR(mapping)
};
@@ -150,14 +155,14 @@ struct ipoib_dev_priv {
unsigned int admin_mtu;
unsigned int mcast_mtu;
- struct ipoib_buf *rx_ring;
+ struct ipoib_rx_buf *rx_ring;
- spinlock_t tx_lock;
- struct ipoib_buf *tx_ring;
- unsigned tx_head;
- unsigned tx_tail;
- struct ib_sge tx_sge;
- struct ib_send_wr tx_wr;
+ spinlock_t tx_lock;
+ struct ipoib_tx_buf *tx_ring;
+ unsigned tx_head;
+ unsigned tx_tail;
+ struct ib_sge tx_sge;
+ struct ib_send_wr tx_wr;
struct ib_wc ibwc[IPOIB_NUM_WC];
@@ -174,6 +179,7 @@ struct ipoib_dev_priv {
#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
struct list_head fs_list;
struct dentry *mcg_dentry;
+ struct dentry *path_dentry;
#endif
};
@@ -230,6 +236,7 @@ static inline void ipoib_put_ah(struct ipoib_ah *ah)
kref_put(&ah->ref, ipoib_free_ah);
}
+int ipoib_open(struct net_device *dev);
int ipoib_add_pkey_attr(struct net_device *dev);
void ipoib_send(struct net_device *dev, struct sk_buff *skb,
@@ -262,8 +269,8 @@ int ipoib_mcast_stop_thread(struct net_device *dev, int flush);
void ipoib_mcast_dev_down(struct net_device *dev);
void ipoib_mcast_dev_flush(struct net_device *dev);
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev);
-void ipoib_mcast_iter_free(struct ipoib_mcast_iter *iter);
int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter);
void ipoib_mcast_iter_read(struct ipoib_mcast_iter *iter,
union ib_gid *gid,
@@ -272,12 +279,18 @@ void ipoib_mcast_iter_read(struct ipoib_mcast_iter *iter,
unsigned int *complete,
unsigned int *send_only);
+struct ipoib_path_iter *ipoib_path_iter_init(struct net_device *dev);
+int ipoib_path_iter_next(struct ipoib_path_iter *iter);
+void ipoib_path_iter_read(struct ipoib_path_iter *iter,
+ struct ipoib_path *path);
+#endif
+
int ipoib_mcast_attach(struct net_device *dev, u16 mlid,
union ib_gid *mgid);
int ipoib_mcast_detach(struct net_device *dev, u16 mlid,
union ib_gid *mgid);
-int ipoib_qp_create(struct net_device *dev);
+int ipoib_init_qp(struct net_device *dev);
int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca);
void ipoib_transport_dev_cleanup(struct net_device *dev);
@@ -291,13 +304,13 @@ void ipoib_pkey_poll(void *dev);
int ipoib_pkey_dev_delay_open(struct net_device *dev);
#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
-int ipoib_create_debug_file(struct net_device *dev);
-void ipoib_delete_debug_file(struct net_device *dev);
+void ipoib_create_debug_files(struct net_device *dev);
+void ipoib_delete_debug_files(struct net_device *dev);
int ipoib_register_debugfs(void);
void ipoib_unregister_debugfs(void);
#else
-static inline int ipoib_create_debug_file(struct net_device *dev) { return 0; }
-static inline void ipoib_delete_debug_file(struct net_device *dev) { }
+static inline void ipoib_create_debug_files(struct net_device *dev) { }
+static inline void ipoib_delete_debug_files(struct net_device *dev) { }
static inline int ipoib_register_debugfs(void) { return 0; }
static inline void ipoib_unregister_debugfs(void) { }
#endif
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
index 38b150f775e..685258e3403 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
@@ -43,6 +43,18 @@ struct file_operations;
static struct dentry *ipoib_root;
+static void format_gid(union ib_gid *gid, char *buf)
+{
+ int i, n;
+
+ for (n = 0, i = 0; i < 8; ++i) {
+ n += sprintf(buf + n, "%x",
+ be16_to_cpu(((__be16 *) gid->raw)[i]));
+ if (i < 7)
+ buf[n++] = ':';
+ }
+}
+
static void *ipoib_mcg_seq_start(struct seq_file *file, loff_t *pos)
{
struct ipoib_mcast_iter *iter;
@@ -54,7 +66,7 @@ static void *ipoib_mcg_seq_start(struct seq_file *file, loff_t *pos)
while (n--) {
if (ipoib_mcast_iter_next(iter)) {
- ipoib_mcast_iter_free(iter);
+ kfree(iter);
return NULL;
}
}
@@ -70,7 +82,7 @@ static void *ipoib_mcg_seq_next(struct seq_file *file, void *iter_ptr,
(*pos)++;
if (ipoib_mcast_iter_next(iter)) {
- ipoib_mcast_iter_free(iter);
+ kfree(iter);
return NULL;
}
@@ -87,32 +99,32 @@ static int ipoib_mcg_seq_show(struct seq_file *file, void *iter_ptr)
struct ipoib_mcast_iter *iter = iter_ptr;
char gid_buf[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff"];
union ib_gid mgid;
- int i, n;
unsigned long created;
unsigned int queuelen, complete, send_only;
- if (iter) {
- ipoib_mcast_iter_read(iter, &mgid, &created, &queuelen,
- &complete, &send_only);
+ if (!iter)
+ return 0;
- for (n = 0, i = 0; i < sizeof mgid / 2; ++i) {
- n += sprintf(gid_buf + n, "%x",
- be16_to_cpu(((__be16 *) mgid.raw)[i]));
- if (i < sizeof mgid / 2 - 1)
- gid_buf[n++] = ':';
- }
- }
+ ipoib_mcast_iter_read(iter, &mgid, &created, &queuelen,
+ &complete, &send_only);
- seq_printf(file, "GID: %*s", -(1 + (int) sizeof gid_buf), gid_buf);
+ format_gid(&mgid, gid_buf);
seq_printf(file,
- " created: %10ld queuelen: %4d complete: %d send_only: %d\n",
- created, queuelen, complete, send_only);
+ "GID: %s\n"
+ " created: %10ld\n"
+ " queuelen: %9d\n"
+ " complete: %9s\n"
+ " send_only: %8s\n"
+ "\n",
+ gid_buf, created, queuelen,
+ complete ? "yes" : "no",
+ send_only ? "yes" : "no");
return 0;
}
-static struct seq_operations ipoib_seq_ops = {
+static struct seq_operations ipoib_mcg_seq_ops = {
.start = ipoib_mcg_seq_start,
.next = ipoib_mcg_seq_next,
.stop = ipoib_mcg_seq_stop,
@@ -124,7 +136,7 @@ static int ipoib_mcg_open(struct inode *inode, struct file *file)
struct seq_file *seq;
int ret;
- ret = seq_open(file, &ipoib_seq_ops);
+ ret = seq_open(file, &ipoib_mcg_seq_ops);
if (ret)
return ret;
@@ -134,7 +146,7 @@ static int ipoib_mcg_open(struct inode *inode, struct file *file)
return 0;
}
-static struct file_operations ipoib_fops = {
+static struct file_operations ipoib_mcg_fops = {
.owner = THIS_MODULE,
.open = ipoib_mcg_open,
.read = seq_read,
@@ -142,25 +154,138 @@ static struct file_operations ipoib_fops = {
.release = seq_release
};
-int ipoib_create_debug_file(struct net_device *dev)
+static void *ipoib_path_seq_start(struct seq_file *file, loff_t *pos)
+{
+ struct ipoib_path_iter *iter;
+ loff_t n = *pos;
+
+ iter = ipoib_path_iter_init(file->private);
+ if (!iter)
+ return NULL;
+
+ while (n--) {
+ if (ipoib_path_iter_next(iter)) {
+ kfree(iter);
+ return NULL;
+ }
+ }
+
+ return iter;
+}
+
+static void *ipoib_path_seq_next(struct seq_file *file, void *iter_ptr,
+ loff_t *pos)
+{
+ struct ipoib_path_iter *iter = iter_ptr;
+
+ (*pos)++;
+
+ if (ipoib_path_iter_next(iter)) {
+ kfree(iter);
+ return NULL;
+ }
+
+ return iter;
+}
+
+static void ipoib_path_seq_stop(struct seq_file *file, void *iter_ptr)
+{
+ /* nothing for now */
+}
+
+static int ipoib_path_seq_show(struct seq_file *file, void *iter_ptr)
+{
+ struct ipoib_path_iter *iter = iter_ptr;
+ char gid_buf[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff"];
+ struct ipoib_path path;
+ int rate;
+
+ if (!iter)
+ return 0;
+
+ ipoib_path_iter_read(iter, &path);
+
+ format_gid(&path.pathrec.dgid, gid_buf);
+
+ seq_printf(file,
+ "GID: %s\n"
+ " complete: %6s\n",
+ gid_buf, path.pathrec.dlid ? "yes" : "no");
+
+ if (path.pathrec.dlid) {
+ rate = ib_sa_rate_enum_to_int(path.pathrec.rate) * 25;
+
+ seq_printf(file,
+ " DLID: 0x%04x\n"
+ " SL: %12d\n"
+ " rate: %*d%s Gb/sec\n",
+ be16_to_cpu(path.pathrec.dlid),
+ path.pathrec.sl,
+ 10 - ((rate % 10) ? 2 : 0),
+ rate / 10, rate % 10 ? ".5" : "");
+ }
+
+ seq_putc(file, '\n');
+
+ return 0;
+}
+
+static struct seq_operations ipoib_path_seq_ops = {
+ .start = ipoib_path_seq_start,
+ .next = ipoib_path_seq_next,
+ .stop = ipoib_path_seq_stop,
+ .show = ipoib_path_seq_show,
+};
+
+static int ipoib_path_open(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq;
+ int ret;
+
+ ret = seq_open(file, &ipoib_path_seq_ops);
+ if (ret)
+ return ret;
+
+ seq = file->private_data;
+ seq->private = inode->u.generic_ip;
+
+ return 0;
+}
+
+static struct file_operations ipoib_path_fops = {
+ .owner = THIS_MODULE,
+ .open = ipoib_path_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release
+};
+
+void ipoib_create_debug_files(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- char name[IFNAMSIZ + sizeof "_mcg"];
+ char name[IFNAMSIZ + sizeof "_path"];
snprintf(name, sizeof name, "%s_mcg", dev->name);
-
priv->mcg_dentry = debugfs_create_file(name, S_IFREG | S_IRUGO,
- ipoib_root, dev, &ipoib_fops);
-
- return priv->mcg_dentry ? 0 : -ENOMEM;
+ ipoib_root, dev, &ipoib_mcg_fops);
+ if (!priv->mcg_dentry)
+ ipoib_warn(priv, "failed to create mcg debug file\n");
+
+ snprintf(name, sizeof name, "%s_path", dev->name);
+ priv->path_dentry = debugfs_create_file(name, S_IFREG | S_IRUGO,
+ ipoib_root, dev, &ipoib_path_fops);
+ if (!priv->path_dentry)
+ ipoib_warn(priv, "failed to create path debug file\n");
}
-void ipoib_delete_debug_file(struct net_device *dev)
+void ipoib_delete_debug_files(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
if (priv->mcg_dentry)
debugfs_remove(priv->mcg_dentry);
+ if (priv->path_dentry)
+ debugfs_remove(priv->path_dentry);
}
int ipoib_register_debugfs(void)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index f7440096b5e..23885801b6d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -95,57 +95,65 @@ void ipoib_free_ah(struct kref *kref)
}
}
-static inline int ipoib_ib_receive(struct ipoib_dev_priv *priv,
- unsigned int wr_id,
- dma_addr_t addr)
+static int ipoib_ib_post_receive(struct net_device *dev, int id)
{
- struct ib_sge list = {
- .addr = addr,
- .length = IPOIB_BUF_SIZE,
- .lkey = priv->mr->lkey,
- };
- struct ib_recv_wr param = {
- .wr_id = wr_id | IPOIB_OP_RECV,
- .sg_list = &list,
- .num_sge = 1,
- };
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ib_sge list;
+ struct ib_recv_wr param;
struct ib_recv_wr *bad_wr;
+ int ret;
+
+ list.addr = priv->rx_ring[id].mapping;
+ list.length = IPOIB_BUF_SIZE;
+ list.lkey = priv->mr->lkey;
+
+ param.next = NULL;
+ param.wr_id = id | IPOIB_OP_RECV;
+ param.sg_list = &list;
+ param.num_sge = 1;
+
+ ret = ib_post_recv(priv->qp, &param, &bad_wr);
+ if (unlikely(ret)) {
+ ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret);
+ dma_unmap_single(priv->ca->dma_device,
+ priv->rx_ring[id].mapping,
+ IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
+ dev_kfree_skb_any(priv->rx_ring[id].skb);
+ priv->rx_ring[id].skb = NULL;
+ }
- return ib_post_recv(priv->qp, &param, &bad_wr);
+ return ret;
}
-static int ipoib_ib_post_receive(struct net_device *dev, int id)
+static int ipoib_alloc_rx_skb(struct net_device *dev, int id)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct sk_buff *skb;
dma_addr_t addr;
- int ret;
skb = dev_alloc_skb(IPOIB_BUF_SIZE + 4);
- if (!skb) {
- ipoib_warn(priv, "failed to allocate receive buffer\n");
-
- priv->rx_ring[id].skb = NULL;
+ if (!skb)
return -ENOMEM;
- }
- skb_reserve(skb, 4); /* 16 byte align IP header */
- priv->rx_ring[id].skb = skb;
+
+ /*
+ * IB will leave a 40 byte gap for a GRH and IPoIB adds a 4 byte
+ * header. So we need 4 more bytes to get to 48 and align the
+ * IP header to a multiple of 16.
+ */
+ skb_reserve(skb, 4);
+
addr = dma_map_single(priv->ca->dma_device,
skb->data, IPOIB_BUF_SIZE,
DMA_FROM_DEVICE);
- pci_unmap_addr_set(&priv->rx_ring[id], mapping, addr);
-
- ret = ipoib_ib_receive(priv, id, addr);
- if (ret) {
- ipoib_warn(priv, "ipoib_ib_receive failed for buf %d (%d)\n",
- id, ret);
- dma_unmap_single(priv->ca->dma_device, addr,
- IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
+ if (unlikely(dma_mapping_error(addr))) {
dev_kfree_skb_any(skb);
- priv->rx_ring[id].skb = NULL;
+ return -EIO;
}
- return ret;
+ priv->rx_ring[id].skb = skb;
+ priv->rx_ring[id].mapping = addr;
+
+ return 0;
}
static int ipoib_ib_post_receives(struct net_device *dev)
@@ -154,6 +162,10 @@ static int ipoib_ib_post_receives(struct net_device *dev)
int i;
for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) {
+ if (ipoib_alloc_rx_skb(dev, i)) {
+ ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
+ return -ENOMEM;
+ }
if (ipoib_ib_post_receive(dev, i)) {
ipoib_warn(priv, "ipoib_ib_post_receive failed for buf %d\n", i);
return -EIO;
@@ -176,28 +188,36 @@ static void ipoib_ib_handle_wc(struct net_device *dev,
wr_id &= ~IPOIB_OP_RECV;
if (wr_id < IPOIB_RX_RING_SIZE) {
- struct sk_buff *skb = priv->rx_ring[wr_id].skb;
-
- priv->rx_ring[wr_id].skb = NULL;
-
- dma_unmap_single(priv->ca->dma_device,
- pci_unmap_addr(&priv->rx_ring[wr_id],
- mapping),
- IPOIB_BUF_SIZE,
- DMA_FROM_DEVICE);
+ struct sk_buff *skb = priv->rx_ring[wr_id].skb;
+ dma_addr_t addr = priv->rx_ring[wr_id].mapping;
- if (wc->status != IB_WC_SUCCESS) {
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
if (wc->status != IB_WC_WR_FLUSH_ERR)
ipoib_warn(priv, "failed recv event "
"(status=%d, wrid=%d vend_err %x)\n",
wc->status, wr_id, wc->vendor_err);
+ dma_unmap_single(priv->ca->dma_device, addr,
+ IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
dev_kfree_skb_any(skb);
+ priv->rx_ring[wr_id].skb = NULL;
return;
}
+ /*
+ * If we can't allocate a new RX buffer, dump
+ * this packet and reuse the old buffer.
+ */
+ if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) {
+ ++priv->stats.rx_dropped;
+ goto repost;
+ }
+
ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
wc->byte_len, wc->slid);
+ dma_unmap_single(priv->ca->dma_device, addr,
+ IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
+
skb_put(skb, wc->byte_len);
skb_pull(skb, IB_GRH_BYTES);
@@ -220,8 +240,8 @@ static void ipoib_ib_handle_wc(struct net_device *dev,
dev_kfree_skb_any(skb);
}
- /* repost receive */
- if (ipoib_ib_post_receive(dev, wr_id))
+ repost:
+ if (unlikely(ipoib_ib_post_receive(dev, wr_id)))
ipoib_warn(priv, "ipoib_ib_post_receive failed "
"for buf %d\n", wr_id);
} else
@@ -229,7 +249,7 @@ static void ipoib_ib_handle_wc(struct net_device *dev,
wr_id);
} else {
- struct ipoib_buf *tx_req;
+ struct ipoib_tx_buf *tx_req;
unsigned long flags;
if (wr_id >= IPOIB_TX_RING_SIZE) {
@@ -302,7 +322,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
struct ipoib_ah *address, u32 qpn)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- struct ipoib_buf *tx_req;
+ struct ipoib_tx_buf *tx_req;
dma_addr_t addr;
if (skb->len > dev->mtu + INFINIBAND_ALEN) {
@@ -387,9 +407,9 @@ int ipoib_ib_dev_open(struct net_device *dev)
struct ipoib_dev_priv *priv = netdev_priv(dev);
int ret;
- ret = ipoib_qp_create(dev);
+ ret = ipoib_init_qp(dev);
if (ret) {
- ipoib_warn(priv, "ipoib_qp_create returned %d\n", ret);
+ ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret);
return -1;
}
@@ -466,15 +486,16 @@ int ipoib_ib_dev_stop(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_qp_attr qp_attr;
- int attr_mask;
unsigned long begin;
- struct ipoib_buf *tx_req;
+ struct ipoib_tx_buf *tx_req;
int i;
- /* Kill the existing QP and allocate a new one */
+ /*
+ * Move our QP to the error state and then reinitialize in
+ * when all work requests have completed or have been flushed.
+ */
qp_attr.qp_state = IB_QPS_ERR;
- attr_mask = IB_QP_STATE;
- if (ib_modify_qp(priv->qp, &qp_attr, attr_mask))
+ if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
ipoib_warn(priv, "Failed to modify QP to ERROR state\n");
/* Wait for all sends and receives to complete */
@@ -521,8 +542,7 @@ int ipoib_ib_dev_stop(struct net_device *dev)
timeout:
qp_attr.qp_state = IB_QPS_RESET;
- attr_mask = IB_QP_STATE;
- if (ib_modify_qp(priv->qp, &qp_attr, attr_mask))
+ if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
ipoib_warn(priv, "Failed to modify QP to RESET state\n");
/* Wait for all AHs to be reaped */
@@ -588,9 +608,13 @@ void ipoib_ib_dev_flush(void *_dev)
if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
ipoib_ib_dev_up(dev);
+ down(&priv->vlan_mutex);
+
/* Flush any child interfaces too */
list_for_each_entry(cpriv, &priv->child_intfs, list)
ipoib_ib_dev_flush(&cpriv->dev);
+
+ up(&priv->vlan_mutex);
}
void ipoib_ib_dev_cleanup(struct net_device *dev)
@@ -616,7 +640,6 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
* Bug #2507. This implementation will probably be removed when the P_Key
* change async notification is available.
*/
-int ipoib_open(struct net_device *dev);
static void ipoib_pkey_dev_check_presence(struct net_device *dev)
{
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 704f48e0b6a..475d98fa9e2 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -58,6 +58,11 @@ module_param_named(debug_level, ipoib_debug_level, int, 0644);
MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
#endif
+struct ipoib_path_iter {
+ struct net_device *dev;
+ struct ipoib_path path;
+};
+
static const u8 ipv4_bcast_addr[] = {
0x00, 0xff, 0xff, 0xff,
0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00,
@@ -89,8 +94,10 @@ int ipoib_open(struct net_device *dev)
if (ipoib_ib_dev_open(dev))
return -EINVAL;
- if (ipoib_ib_dev_up(dev))
+ if (ipoib_ib_dev_up(dev)) {
+ ipoib_ib_dev_stop(dev);
return -EINVAL;
+ }
if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
struct ipoib_dev_priv *cpriv;
@@ -250,6 +257,64 @@ static void path_free(struct net_device *dev, struct ipoib_path *path)
kfree(path);
}
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+
+struct ipoib_path_iter *ipoib_path_iter_init(struct net_device *dev)
+{
+ struct ipoib_path_iter *iter;
+
+ iter = kmalloc(sizeof *iter, GFP_KERNEL);
+ if (!iter)
+ return NULL;
+
+ iter->dev = dev;
+ memset(iter->path.pathrec.dgid.raw, 0, 16);
+
+ if (ipoib_path_iter_next(iter)) {
+ kfree(iter);
+ return NULL;
+ }
+
+ return iter;
+}
+
+int ipoib_path_iter_next(struct ipoib_path_iter *iter)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(iter->dev);
+ struct rb_node *n;
+ struct ipoib_path *path;
+ int ret = 1;
+
+ spin_lock_irq(&priv->lock);
+
+ n = rb_first(&priv->path_tree);
+
+ while (n) {
+ path = rb_entry(n, struct ipoib_path, rb_node);
+
+ if (memcmp(iter->path.pathrec.dgid.raw, path->pathrec.dgid.raw,
+ sizeof (union ib_gid)) < 0) {
+ iter->path = *path;
+ ret = 0;
+ break;
+ }
+
+ n = rb_next(n);
+ }
+
+ spin_unlock_irq(&priv->lock);
+
+ return ret;
+}
+
+void ipoib_path_iter_read(struct ipoib_path_iter *iter,
+ struct ipoib_path *path)
+{
+ *path = iter->path;
+}
+
+#endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */
+
void ipoib_flush_paths(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -335,9 +400,9 @@ static void path_rec_completion(int status,
while ((skb = __skb_dequeue(&neigh->queue)))
__skb_queue_tail(&skqueue, skb);
}
- } else
- path->query = NULL;
+ }
+ path->query = NULL;
complete(&path->done);
spin_unlock_irqrestore(&priv->lock, flags);
@@ -356,19 +421,15 @@ static struct ipoib_path *path_rec_create(struct net_device *dev,
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_path *path;
- path = kmalloc(sizeof *path, GFP_ATOMIC);
+ path = kzalloc(sizeof *path, GFP_ATOMIC);
if (!path)
return NULL;
- path->dev = dev;
- path->pathrec.dlid = 0;
- path->ah = NULL;
+ path->dev = dev;
skb_queue_head_init(&path->queue);
INIT_LIST_HEAD(&path->neigh_list);
- path->query = NULL;
- init_completion(&path->done);
memcpy(path->pathrec.dgid.raw, gid->raw, sizeof (union ib_gid));
path->pathrec.sgid = priv->local_gid;
@@ -386,6 +447,8 @@ static int path_rec_start(struct net_device *dev,
ipoib_dbg(priv, "Start path record lookup for " IPOIB_GID_FMT "\n",
IPOIB_GID_ARG(path->pathrec.dgid));
+ init_completion(&path->done);
+
path->query_id =
ib_sa_path_rec_get(priv->ca, priv->port,
&path->pathrec,
@@ -474,7 +537,7 @@ err:
spin_unlock(&priv->lock);
}
-static void path_lookup(struct sk_buff *skb, struct net_device *dev)
+static void ipoib_path_lookup(struct sk_buff *skb, struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(skb->dev);
@@ -551,11 +614,8 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
struct ipoib_neigh *neigh;
unsigned long flags;
- local_irq_save(flags);
- if (!spin_trylock(&priv->tx_lock)) {
- local_irq_restore(flags);
+ if (!spin_trylock_irqsave(&priv->tx_lock, flags))
return NETDEV_TX_LOCKED;
- }
/*
* Check if our queue is stopped. Since we have the LLTX bit
@@ -569,7 +629,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
if (skb->dst && skb->dst->neighbour) {
if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) {
- path_lookup(skb, dev);
+ ipoib_path_lookup(skb, dev);
goto out;
}
@@ -637,8 +697,11 @@ static void ipoib_timeout(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- ipoib_warn(priv, "transmit timeout: latency %ld\n",
- jiffies - dev->trans_start);
+ ipoib_warn(priv, "transmit timeout: latency %d msecs\n",
+ jiffies_to_msecs(jiffies - dev->trans_start));
+ ipoib_warn(priv, "queue stopped %d, tx_head %u, tx_tail %u\n",
+ netif_queue_stopped(dev),
+ priv->tx_head, priv->tx_tail);
/* XXX reset QP, etc. */
}
@@ -729,25 +792,21 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
/* Allocate RX/TX "rings" to hold queued skbs */
- priv->rx_ring = kmalloc(IPOIB_RX_RING_SIZE * sizeof (struct ipoib_buf),
+ priv->rx_ring = kzalloc(IPOIB_RX_RING_SIZE * sizeof (struct ipoib_rx_buf),
GFP_KERNEL);
if (!priv->rx_ring) {
printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
ca->name, IPOIB_RX_RING_SIZE);
goto out;
}
- memset(priv->rx_ring, 0,
- IPOIB_RX_RING_SIZE * sizeof (struct ipoib_buf));
- priv->tx_ring = kmalloc(IPOIB_TX_RING_SIZE * sizeof (struct ipoib_buf),
+ priv->tx_ring = kzalloc(IPOIB_TX_RING_SIZE * sizeof (struct ipoib_tx_buf),
GFP_KERNEL);
if (!priv->tx_ring) {
printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n",
ca->name, IPOIB_TX_RING_SIZE);
goto out_rx_ring_cleanup;
}
- memset(priv->tx_ring, 0,
- IPOIB_TX_RING_SIZE * sizeof (struct ipoib_buf));
/* priv->tx_head & tx_tail are already 0 */
@@ -770,7 +829,7 @@ void ipoib_dev_cleanup(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev), *cpriv, *tcpriv;
- ipoib_delete_debug_file(dev);
+ ipoib_delete_debug_files(dev);
/* Delete any child interfaces first */
list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
@@ -804,10 +863,6 @@ static void ipoib_setup(struct net_device *dev)
dev->watchdog_timeo = HZ;
- dev->rebuild_header = NULL;
- dev->set_mac_address = NULL;
- dev->header_cache_update = NULL;
-
dev->flags |= IFF_BROADCAST | IFF_MULTICAST;
/*
@@ -983,8 +1038,7 @@ static struct net_device *ipoib_add_port(const char *format,
goto register_failed;
}
- if (ipoib_create_debug_file(priv->dev))
- goto debug_failed;
+ ipoib_create_debug_files(priv->dev);
if (ipoib_add_pkey_attr(priv->dev))
goto sysfs_failed;
@@ -998,9 +1052,7 @@ static struct net_device *ipoib_add_port(const char *format,
return priv->dev;
sysfs_failed:
- ipoib_delete_debug_file(priv->dev);
-
-debug_failed:
+ ipoib_delete_debug_files(priv->dev);
unregister_netdev(priv->dev);
register_failed:
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 36ce29836bf..ef3ee035bbc 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -120,12 +120,8 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast)
if (mcast->ah)
ipoib_put_ah(mcast->ah);
- while (!skb_queue_empty(&mcast->pkt_queue)) {
- struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue);
-
- skb->dev = dev;
- dev_kfree_skb_any(skb);
- }
+ while (!skb_queue_empty(&mcast->pkt_queue))
+ dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
kfree(mcast);
}
@@ -135,26 +131,18 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev,
{
struct ipoib_mcast *mcast;
- mcast = kmalloc(sizeof (*mcast), can_sleep ? GFP_KERNEL : GFP_ATOMIC);
+ mcast = kzalloc(sizeof *mcast, can_sleep ? GFP_KERNEL : GFP_ATOMIC);
if (!mcast)
return NULL;
- memset(mcast, 0, sizeof (*mcast));
-
- init_completion(&mcast->done);
-
mcast->dev = dev;
mcast->created = jiffies;
mcast->backoff = 1;
- mcast->logcount = 0;
INIT_LIST_HEAD(&mcast->list);
INIT_LIST_HEAD(&mcast->neigh_list);
skb_queue_head_init(&mcast->pkt_queue);
- mcast->ah = NULL;
- mcast->query = NULL;
-
return mcast;
}
@@ -319,13 +307,8 @@ ipoib_mcast_sendonly_join_complete(int status,
IPOIB_GID_ARG(mcast->mcmember.mgid), status);
/* Flush out any queued packets */
- while (!skb_queue_empty(&mcast->pkt_queue)) {
- struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue);
-
- skb->dev = dev;
-
- dev_kfree_skb_any(skb);
- }
+ while (!skb_queue_empty(&mcast->pkt_queue))
+ dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
/* Clear the busy flag so we try again */
clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
@@ -361,6 +344,8 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
rec.port_gid = priv->local_gid;
rec.pkey = cpu_to_be16(priv->pkey);
+ init_completion(&mcast->done);
+
ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec,
IB_SA_MCMEMBER_REC_MGID |
IB_SA_MCMEMBER_REC_PORT_GID |
@@ -480,6 +465,8 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
rec.traffic_class = priv->broadcast->mcmember.traffic_class;
}
+ init_completion(&mcast->done);
+
ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec, comp_mask,
mcast->backoff * 1000, GFP_ATOMIC,
ipoib_mcast_join_complete,
@@ -919,6 +906,8 @@ void ipoib_mcast_restart_task(void *dev_ptr)
ipoib_mcast_start_thread(dev);
}
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+
struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev)
{
struct ipoib_mcast_iter *iter;
@@ -928,21 +917,16 @@ struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev)
return NULL;
iter->dev = dev;
- memset(iter->mgid.raw, 0, sizeof iter->mgid);
+ memset(iter->mgid.raw, 0, 16);
if (ipoib_mcast_iter_next(iter)) {
- ipoib_mcast_iter_free(iter);
+ kfree(iter);
return NULL;
}
return iter;
}
-void ipoib_mcast_iter_free(struct ipoib_mcast_iter *iter)
-{
- kfree(iter);
-}
-
int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter)
{
struct ipoib_dev_priv *priv = netdev_priv(iter->dev);
@@ -991,3 +975,5 @@ void ipoib_mcast_iter_read(struct ipoib_mcast_iter *iter,
*complete = iter->complete;
*send_only = iter->send_only;
}
+
+#endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 79f59d0563e..e829e10400e 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -41,7 +41,6 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_qp_attr *qp_attr;
- int attr_mask;
int ret;
u16 pkey_index;
@@ -59,8 +58,7 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
/* set correct QKey for QP */
qp_attr->qkey = priv->qkey;
- attr_mask = IB_QP_QKEY;
- ret = ib_modify_qp(priv->qp, qp_attr, attr_mask);
+ ret = ib_modify_qp(priv->qp, qp_attr, IB_QP_QKEY);
if (ret) {
ipoib_warn(priv, "failed to modify QP, ret = %d\n", ret);
goto out;
@@ -92,7 +90,7 @@ int ipoib_mcast_detach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
return ret;
}
-int ipoib_qp_create(struct net_device *dev)
+int ipoib_init_qp(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
int ret;
@@ -149,10 +147,11 @@ int ipoib_qp_create(struct net_device *dev)
return 0;
out_fail:
- ib_destroy_qp(priv->qp);
- priv->qp = NULL;
+ qp_attr.qp_state = IB_QPS_RESET;
+ if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
+ ipoib_warn(priv, "Failed to modify QP to RESET state\n");
- return -EINVAL;
+ return ret;
}
int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index 332d730e60c..d280b341a37 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -113,8 +113,7 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
priv->parent = ppriv->dev;
- if (ipoib_create_debug_file(priv->dev))
- goto debug_failed;
+ ipoib_create_debug_files(priv->dev);
if (ipoib_add_pkey_attr(priv->dev))
goto sysfs_failed;
@@ -130,9 +129,7 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
return 0;
sysfs_failed:
- ipoib_delete_debug_file(priv->dev);
-
-debug_failed:
+ ipoib_delete_debug_files(priv->dev);
unregister_netdev(priv->dev);
register_failed:
diff --git a/drivers/infiniband/ulp/srp/Kbuild b/drivers/infiniband/ulp/srp/Kbuild
new file mode 100644
index 00000000000..a16c73c667c
--- /dev/null
+++ b/drivers/infiniband/ulp/srp/Kbuild
@@ -0,0 +1 @@
+obj-$(CONFIG_INFINIBAND_SRP) += ib_srp.o
diff --git a/drivers/infiniband/ulp/srp/Kconfig b/drivers/infiniband/ulp/srp/Kconfig
new file mode 100644
index 00000000000..8fe3be4e991
--- /dev/null
+++ b/drivers/infiniband/ulp/srp/Kconfig
@@ -0,0 +1,11 @@
+config INFINIBAND_SRP
+ tristate "InfiniBand SCSI RDMA Protocol"
+ depends on INFINIBAND && SCSI
+ ---help---
+ Support for the SCSI RDMA Protocol over InfiniBand. This
+ allows you to access storage devices that speak SRP over
+ InfiniBand.
+
+ The SRP protocol is defined by the INCITS T10 technical
+ committee. See <http://www.t10.org/>.
+
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
new file mode 100644
index 00000000000..ee9fe226ae9
--- /dev/null
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -0,0 +1,1704 @@
+/*
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: ib_srp.c 3932 2005-11-01 17:19:29Z roland $
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/string.h>
+#include <linux/parser.h>
+#include <linux/random.h>
+
+#include <asm/atomic.h>
+
+#include <scsi/scsi.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_dbg.h>
+#include <scsi/srp.h>
+
+#include <rdma/ib_cache.h>
+
+#include "ib_srp.h"
+
+#define DRV_NAME "ib_srp"
+#define PFX DRV_NAME ": "
+#define DRV_VERSION "0.2"
+#define DRV_RELDATE "November 1, 2005"
+
+MODULE_AUTHOR("Roland Dreier");
+MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator "
+ "v" DRV_VERSION " (" DRV_RELDATE ")");
+MODULE_LICENSE("Dual BSD/GPL");
+
+static int topspin_workarounds = 1;
+
+module_param(topspin_workarounds, int, 0444);
+MODULE_PARM_DESC(topspin_workarounds,
+ "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
+
+static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad };
+
+static void srp_add_one(struct ib_device *device);
+static void srp_remove_one(struct ib_device *device);
+static void srp_completion(struct ib_cq *cq, void *target_ptr);
+static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
+
+static struct ib_client srp_client = {
+ .name = "srp",
+ .add = srp_add_one,
+ .remove = srp_remove_one
+};
+
+static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
+{
+ return (struct srp_target_port *) host->hostdata;
+}
+
+static const char *srp_target_info(struct Scsi_Host *host)
+{
+ return host_to_target(host)->target_name;
+}
+
+static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,
+ gfp_t gfp_mask,
+ enum dma_data_direction direction)
+{
+ struct srp_iu *iu;
+
+ iu = kmalloc(sizeof *iu, gfp_mask);
+ if (!iu)
+ goto out;
+
+ iu->buf = kzalloc(size, gfp_mask);
+ if (!iu->buf)
+ goto out_free_iu;
+
+ iu->dma = dma_map_single(host->dev->dma_device, iu->buf, size, direction);
+ if (dma_mapping_error(iu->dma))
+ goto out_free_buf;
+
+ iu->size = size;
+ iu->direction = direction;
+
+ return iu;
+
+out_free_buf:
+ kfree(iu->buf);
+out_free_iu:
+ kfree(iu);
+out:
+ return NULL;
+}
+
+static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
+{
+ if (!iu)
+ return;
+
+ dma_unmap_single(host->dev->dma_device, iu->dma, iu->size, iu->direction);
+ kfree(iu->buf);
+ kfree(iu);
+}
+
+static void srp_qp_event(struct ib_event *event, void *context)
+{
+ printk(KERN_ERR PFX "QP event %d\n", event->event);
+}
+
+static int srp_init_qp(struct srp_target_port *target,
+ struct ib_qp *qp)
+{
+ struct ib_qp_attr *attr;
+ int ret;
+
+ attr = kmalloc(sizeof *attr, GFP_KERNEL);
+ if (!attr)
+ return -ENOMEM;
+
+ ret = ib_find_cached_pkey(target->srp_host->dev,
+ target->srp_host->port,
+ be16_to_cpu(target->path.pkey),
+ &attr->pkey_index);
+ if (ret)
+ goto out;
+
+ attr->qp_state = IB_QPS_INIT;
+ attr->qp_access_flags = (IB_ACCESS_REMOTE_READ |
+ IB_ACCESS_REMOTE_WRITE);
+ attr->port_num = target->srp_host->port;
+
+ ret = ib_modify_qp(qp, attr,
+ IB_QP_STATE |
+ IB_QP_PKEY_INDEX |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_PORT);
+
+out:
+ kfree(attr);
+ return ret;
+}
+
+static int srp_create_target_ib(struct srp_target_port *target)
+{
+ struct ib_qp_init_attr *init_attr;
+ int ret;
+
+ init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
+ if (!init_attr)
+ return -ENOMEM;
+
+ target->cq = ib_create_cq(target->srp_host->dev, srp_completion,
+ NULL, target, SRP_CQ_SIZE);
+ if (IS_ERR(target->cq)) {
+ ret = PTR_ERR(target->cq);
+ goto out;
+ }
+
+ ib_req_notify_cq(target->cq, IB_CQ_NEXT_COMP);
+
+ init_attr->event_handler = srp_qp_event;
+ init_attr->cap.max_send_wr = SRP_SQ_SIZE;
+ init_attr->cap.max_recv_wr = SRP_RQ_SIZE;
+ init_attr->cap.max_recv_sge = 1;
+ init_attr->cap.max_send_sge = 1;
+ init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
+ init_attr->qp_type = IB_QPT_RC;
+ init_attr->send_cq = target->cq;
+ init_attr->recv_cq = target->cq;
+
+ target->qp = ib_create_qp(target->srp_host->pd, init_attr);
+ if (IS_ERR(target->qp)) {
+ ret = PTR_ERR(target->qp);
+ ib_destroy_cq(target->cq);
+ goto out;
+ }
+
+ ret = srp_init_qp(target, target->qp);
+ if (ret) {
+ ib_destroy_qp(target->qp);
+ ib_destroy_cq(target->cq);
+ goto out;
+ }
+
+out:
+ kfree(init_attr);
+ return ret;
+}
+
+static void srp_free_target_ib(struct srp_target_port *target)
+{
+ int i;
+
+ ib_destroy_qp(target->qp);
+ ib_destroy_cq(target->cq);
+
+ for (i = 0; i < SRP_RQ_SIZE; ++i)
+ srp_free_iu(target->srp_host, target->rx_ring[i]);
+ for (i = 0; i < SRP_SQ_SIZE + 1; ++i)
+ srp_free_iu(target->srp_host, target->tx_ring[i]);
+}
+
+static void srp_path_rec_completion(int status,
+ struct ib_sa_path_rec *pathrec,
+ void *target_ptr)
+{
+ struct srp_target_port *target = target_ptr;
+
+ target->status = status;
+ if (status)
+ printk(KERN_ERR PFX "Got failed path rec status %d\n", status);
+ else
+ target->path = *pathrec;
+ complete(&target->done);
+}
+
+static int srp_lookup_path(struct srp_target_port *target)
+{
+ target->path.numb_path = 1;
+
+ init_completion(&target->done);
+
+ target->path_query_id = ib_sa_path_rec_get(target->srp_host->dev,
+ target->srp_host->port,
+ &target->path,
+ IB_SA_PATH_REC_DGID |
+ IB_SA_PATH_REC_SGID |
+ IB_SA_PATH_REC_NUMB_PATH |
+ IB_SA_PATH_REC_PKEY,
+ SRP_PATH_REC_TIMEOUT_MS,
+ GFP_KERNEL,
+ srp_path_rec_completion,
+ target, &target->path_query);
+ if (target->path_query_id < 0)
+ return target->path_query_id;
+
+ wait_for_completion(&target->done);
+
+ if (target->status < 0)
+ printk(KERN_WARNING PFX "Path record query failed\n");
+
+ return target->status;
+}
+
+static int srp_send_req(struct srp_target_port *target)
+{
+ struct {
+ struct ib_cm_req_param param;
+ struct srp_login_req priv;
+ } *req = NULL;
+ int status;
+
+ req = kzalloc(sizeof *req, GFP_KERNEL);
+ if (!req)
+ return -ENOMEM;
+
+ req->param.primary_path = &target->path;
+ req->param.alternate_path = NULL;
+ req->param.service_id = target->service_id;
+ req->param.qp_num = target->qp->qp_num;
+ req->param.qp_type = target->qp->qp_type;
+ req->param.private_data = &req->priv;
+ req->param.private_data_len = sizeof req->priv;
+ req->param.flow_control = 1;
+
+ get_random_bytes(&req->param.starting_psn, 4);
+ req->param.starting_psn &= 0xffffff;
+
+ /*
+ * Pick some arbitrary defaults here; we could make these
+ * module parameters if anyone cared about setting them.
+ */
+ req->param.responder_resources = 4;
+ req->param.remote_cm_response_timeout = 20;
+ req->param.local_cm_response_timeout = 20;
+ req->param.retry_count = 7;
+ req->param.rnr_retry_count = 7;
+ req->param.max_cm_retries = 15;
+
+ req->priv.opcode = SRP_LOGIN_REQ;
+ req->priv.tag = 0;
+ req->priv.req_it_iu_len = cpu_to_be32(SRP_MAX_IU_LEN);
+ req->priv.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
+ SRP_BUF_FORMAT_INDIRECT);
+ memcpy(req->priv.initiator_port_id, target->srp_host->initiator_port_id, 16);
+ /*
+ * Topspin/Cisco SRP targets will reject our login unless we
+ * zero out the first 8 bytes of our initiator port ID. The
+ * second 8 bytes must be our local node GUID, but we always
+ * use that anyway.
+ */
+ if (topspin_workarounds && !memcmp(&target->ioc_guid, topspin_oui, 3)) {
+ printk(KERN_DEBUG PFX "Topspin/Cisco initiator port ID workaround "
+ "activated for target GUID %016llx\n",
+ (unsigned long long) be64_to_cpu(target->ioc_guid));
+ memset(req->priv.initiator_port_id, 0, 8);
+ }
+ memcpy(req->priv.target_port_id, &target->id_ext, 8);
+ memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8);
+
+ status = ib_send_cm_req(target->cm_id, &req->param);
+
+ kfree(req);
+
+ return status;
+}
+
+static void srp_disconnect_target(struct srp_target_port *target)
+{
+ /* XXX should send SRP_I_LOGOUT request */
+
+ init_completion(&target->done);
+ ib_send_cm_dreq(target->cm_id, NULL, 0);
+ wait_for_completion(&target->done);
+}
+
+static void srp_remove_work(void *target_ptr)
+{
+ struct srp_target_port *target = target_ptr;
+
+ spin_lock_irq(target->scsi_host->host_lock);
+ if (target->state != SRP_TARGET_DEAD) {
+ spin_unlock_irq(target->scsi_host->host_lock);
+ scsi_host_put(target->scsi_host);
+ return;
+ }
+ target->state = SRP_TARGET_REMOVED;
+ spin_unlock_irq(target->scsi_host->host_lock);
+
+ down(&target->srp_host->target_mutex);
+ list_del(&target->list);
+ up(&target->srp_host->target_mutex);
+
+ scsi_remove_host(target->scsi_host);
+ ib_destroy_cm_id(target->cm_id);
+ srp_free_target_ib(target);
+ scsi_host_put(target->scsi_host);
+ /* And another put to really free the target port... */
+ scsi_host_put(target->scsi_host);
+}
+
+static int srp_connect_target(struct srp_target_port *target)
+{
+ int ret;
+
+ ret = srp_lookup_path(target);
+ if (ret)
+ return ret;
+
+ while (1) {
+ init_completion(&target->done);
+ ret = srp_send_req(target);
+ if (ret)
+ return ret;
+ wait_for_completion(&target->done);
+
+ /*
+ * The CM event handling code will set status to
+ * SRP_PORT_REDIRECT if we get a port redirect REJ
+ * back, or SRP_DLID_REDIRECT if we get a lid/qp
+ * redirect REJ back.
+ */
+ switch (target->status) {
+ case 0:
+ return 0;
+
+ case SRP_PORT_REDIRECT:
+ ret = srp_lookup_path(target);
+ if (ret)
+ return ret;
+ break;
+
+ case SRP_DLID_REDIRECT:
+ break;
+
+ default:
+ return target->status;
+ }
+ }
+}
+
+static int srp_reconnect_target(struct srp_target_port *target)
+{
+ struct ib_cm_id *new_cm_id;
+ struct ib_qp_attr qp_attr;
+ struct srp_request *req;
+ struct ib_wc wc;
+ int ret;
+ int i;
+
+ spin_lock_irq(target->scsi_host->host_lock);
+ if (target->state != SRP_TARGET_LIVE) {
+ spin_unlock_irq(target->scsi_host->host_lock);
+ return -EAGAIN;
+ }
+ target->state = SRP_TARGET_CONNECTING;
+ spin_unlock_irq(target->scsi_host->host_lock);
+
+ srp_disconnect_target(target);
+ /*
+ * Now get a new local CM ID so that we avoid confusing the
+ * target in case things are really fouled up.
+ */
+ new_cm_id = ib_create_cm_id(target->srp_host->dev,
+ srp_cm_handler, target);
+ if (IS_ERR(new_cm_id)) {
+ ret = PTR_ERR(new_cm_id);
+ goto err;
+ }
+ ib_destroy_cm_id(target->cm_id);
+ target->cm_id = new_cm_id;
+
+ qp_attr.qp_state = IB_QPS_RESET;
+ ret = ib_modify_qp(target->qp, &qp_attr, IB_QP_STATE);
+ if (ret)
+ goto err;
+
+ ret = srp_init_qp(target, target->qp);
+ if (ret)
+ goto err;
+
+ while (ib_poll_cq(target->cq, 1, &wc) > 0)
+ ; /* nothing */
+
+ list_for_each_entry(req, &target->req_queue, list) {
+ req->scmnd->result = DID_RESET << 16;
+ req->scmnd->scsi_done(req->scmnd);
+ }
+
+ target->rx_head = 0;
+ target->tx_head = 0;
+ target->tx_tail = 0;
+ target->req_head = 0;
+ for (i = 0; i < SRP_SQ_SIZE - 1; ++i)
+ target->req_ring[i].next = i + 1;
+ target->req_ring[SRP_SQ_SIZE - 1].next = -1;
+ INIT_LIST_HEAD(&target->req_queue);
+
+ ret = srp_connect_target(target);
+ if (ret)
+ goto err;
+
+ spin_lock_irq(target->scsi_host->host_lock);
+ if (target->state == SRP_TARGET_CONNECTING) {
+ ret = 0;
+ target->state = SRP_TARGET_LIVE;
+ } else
+ ret = -EAGAIN;
+ spin_unlock_irq(target->scsi_host->host_lock);
+
+ return ret;
+
+err:
+ printk(KERN_ERR PFX "reconnect failed (%d), removing target port.\n", ret);
+
+ /*
+ * We couldn't reconnect, so kill our target port off.
+ * However, we have to defer the real removal because we might
+ * be in the context of the SCSI error handler now, which
+ * would deadlock if we call scsi_remove_host().
+ */
+ spin_lock_irq(target->scsi_host->host_lock);
+ if (target->state == SRP_TARGET_CONNECTING) {
+ target->state = SRP_TARGET_DEAD;
+ INIT_WORK(&target->work, srp_remove_work, target);
+ schedule_work(&target->work);
+ }
+ spin_unlock_irq(target->scsi_host->host_lock);
+
+ return ret;
+}
+
+static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
+ struct srp_request *req)
+{
+ struct srp_cmd *cmd = req->cmd->buf;
+ int len;
+ u8 fmt;
+
+ if (!scmnd->request_buffer || scmnd->sc_data_direction == DMA_NONE)
+ return sizeof (struct srp_cmd);
+
+ if (scmnd->sc_data_direction != DMA_FROM_DEVICE &&
+ scmnd->sc_data_direction != DMA_TO_DEVICE) {
+ printk(KERN_WARNING PFX "Unhandled data direction %d\n",
+ scmnd->sc_data_direction);
+ return -EINVAL;
+ }
+
+ if (scmnd->use_sg) {
+ struct scatterlist *scat = scmnd->request_buffer;
+ int n;
+ int i;
+
+ n = dma_map_sg(target->srp_host->dev->dma_device,
+ scat, scmnd->use_sg, scmnd->sc_data_direction);
+
+ if (n == 1) {
+ struct srp_direct_buf *buf = (void *) cmd->add_data;
+
+ fmt = SRP_DATA_DESC_DIRECT;
+
+ buf->va = cpu_to_be64(sg_dma_address(scat));
+ buf->key = cpu_to_be32(target->srp_host->mr->rkey);
+ buf->len = cpu_to_be32(sg_dma_len(scat));
+
+ len = sizeof (struct srp_cmd) +
+ sizeof (struct srp_direct_buf);
+ } else {
+ struct srp_indirect_buf *buf = (void *) cmd->add_data;
+ u32 datalen = 0;
+
+ fmt = SRP_DATA_DESC_INDIRECT;
+
+ if (scmnd->sc_data_direction == DMA_TO_DEVICE)
+ cmd->data_out_desc_cnt = n;
+ else
+ cmd->data_in_desc_cnt = n;
+
+ buf->table_desc.va = cpu_to_be64(req->cmd->dma +
+ sizeof *cmd +
+ sizeof *buf);
+ buf->table_desc.key =
+ cpu_to_be32(target->srp_host->mr->rkey);
+ buf->table_desc.len =
+ cpu_to_be32(n * sizeof (struct srp_direct_buf));
+
+ for (i = 0; i < n; ++i) {
+ buf->desc_list[i].va = cpu_to_be64(sg_dma_address(&scat[i]));
+ buf->desc_list[i].key =
+ cpu_to_be32(target->srp_host->mr->rkey);
+ buf->desc_list[i].len = cpu_to_be32(sg_dma_len(&scat[i]));
+
+ datalen += sg_dma_len(&scat[i]);
+ }
+
+ buf->len = cpu_to_be32(datalen);
+
+ len = sizeof (struct srp_cmd) +
+ sizeof (struct srp_indirect_buf) +
+ n * sizeof (struct srp_direct_buf);
+ }
+ } else {
+ struct srp_direct_buf *buf = (void *) cmd->add_data;
+ dma_addr_t dma;
+
+ dma = dma_map_single(target->srp_host->dev->dma_device,
+ scmnd->request_buffer, scmnd->request_bufflen,
+ scmnd->sc_data_direction);
+ if (dma_mapping_error(dma)) {
+ printk(KERN_WARNING PFX "unable to map %p/%d (dir %d)\n",
+ scmnd->request_buffer, (int) scmnd->request_bufflen,
+ scmnd->sc_data_direction);
+ return -EINVAL;
+ }
+
+ pci_unmap_addr_set(req, direct_mapping, dma);
+
+ buf->va = cpu_to_be64(dma);
+ buf->key = cpu_to_be32(target->srp_host->mr->rkey);
+ buf->len = cpu_to_be32(scmnd->request_bufflen);
+
+ fmt = SRP_DATA_DESC_DIRECT;
+
+ len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf);
+ }
+
+ if (scmnd->sc_data_direction == DMA_TO_DEVICE)
+ cmd->buf_fmt = fmt << 4;
+ else
+ cmd->buf_fmt = fmt;
+
+
+ return len;
+}
+
+static void srp_unmap_data(struct scsi_cmnd *scmnd,
+ struct srp_target_port *target,
+ struct srp_request *req)
+{
+ if (!scmnd->request_buffer ||
+ (scmnd->sc_data_direction != DMA_TO_DEVICE &&
+ scmnd->sc_data_direction != DMA_FROM_DEVICE))
+ return;
+
+ if (scmnd->use_sg)
+ dma_unmap_sg(target->srp_host->dev->dma_device,
+ (struct scatterlist *) scmnd->request_buffer,
+ scmnd->use_sg, scmnd->sc_data_direction);
+ else
+ dma_unmap_single(target->srp_host->dev->dma_device,
+ pci_unmap_addr(req, direct_mapping),
+ scmnd->request_bufflen,
+ scmnd->sc_data_direction);
+}
+
+static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp)
+{
+ struct srp_request *req;
+ struct scsi_cmnd *scmnd;
+ unsigned long flags;
+ s32 delta;
+
+ delta = (s32) be32_to_cpu(rsp->req_lim_delta);
+
+ spin_lock_irqsave(target->scsi_host->host_lock, flags);
+
+ target->req_lim += delta;
+
+ req = &target->req_ring[rsp->tag & ~SRP_TAG_TSK_MGMT];
+
+ if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
+ if (be32_to_cpu(rsp->resp_data_len) < 4)
+ req->tsk_status = -1;
+ else
+ req->tsk_status = rsp->data[3];
+ complete(&req->done);
+ } else {
+ scmnd = req->scmnd;
+ if (!scmnd)
+ printk(KERN_ERR "Null scmnd for RSP w/tag %016llx\n",
+ (unsigned long long) rsp->tag);
+ scmnd->result = rsp->status;
+
+ if (rsp->flags & SRP_RSP_FLAG_SNSVALID) {
+ memcpy(scmnd->sense_buffer, rsp->data +
+ be32_to_cpu(rsp->resp_data_len),
+ min_t(int, be32_to_cpu(rsp->sense_data_len),
+ SCSI_SENSE_BUFFERSIZE));
+ }
+
+ if (rsp->flags & (SRP_RSP_FLAG_DOOVER | SRP_RSP_FLAG_DOUNDER))
+ scmnd->resid = be32_to_cpu(rsp->data_out_res_cnt);
+ else if (rsp->flags & (SRP_RSP_FLAG_DIOVER | SRP_RSP_FLAG_DIUNDER))
+ scmnd->resid = be32_to_cpu(rsp->data_in_res_cnt);
+
+ srp_unmap_data(scmnd, target, req);
+
+ if (!req->tsk_mgmt) {
+ req->scmnd = NULL;
+ scmnd->host_scribble = (void *) -1L;
+ scmnd->scsi_done(scmnd);
+
+ list_del(&req->list);
+ req->next = target->req_head;
+ target->req_head = rsp->tag & ~SRP_TAG_TSK_MGMT;
+ } else
+ req->cmd_done = 1;
+ }
+
+ spin_unlock_irqrestore(target->scsi_host->host_lock, flags);
+}
+
+static void srp_reconnect_work(void *target_ptr)
+{
+ struct srp_target_port *target = target_ptr;
+
+ srp_reconnect_target(target);
+}
+
+static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc)
+{
+ struct srp_iu *iu;
+ u8 opcode;
+
+ iu = target->rx_ring[wc->wr_id & ~SRP_OP_RECV];
+
+ dma_sync_single_for_cpu(target->srp_host->dev->dma_device, iu->dma,
+ target->max_ti_iu_len, DMA_FROM_DEVICE);
+
+ opcode = *(u8 *) iu->buf;
+
+ if (0) {
+ int i;
+
+ printk(KERN_ERR PFX "recv completion, opcode 0x%02x\n", opcode);
+
+ for (i = 0; i < wc->byte_len; ++i) {
+ if (i % 8 == 0)
+ printk(KERN_ERR " [%02x] ", i);
+ printk(" %02x", ((u8 *) iu->buf)[i]);
+ if ((i + 1) % 8 == 0)
+ printk("\n");
+ }
+
+ if (wc->byte_len % 8)
+ printk("\n");
+ }
+
+ switch (opcode) {
+ case SRP_RSP:
+ srp_process_rsp(target, iu->buf);
+ break;
+
+ case SRP_T_LOGOUT:
+ /* XXX Handle target logout */
+ printk(KERN_WARNING PFX "Got target logout request\n");
+ break;
+
+ default:
+ printk(KERN_WARNING PFX "Unhandled SRP opcode 0x%02x\n", opcode);
+ break;
+ }
+
+ dma_sync_single_for_device(target->srp_host->dev->dma_device, iu->dma,
+ target->max_ti_iu_len, DMA_FROM_DEVICE);
+}
+
+static void srp_completion(struct ib_cq *cq, void *target_ptr)
+{
+ struct srp_target_port *target = target_ptr;
+ struct ib_wc wc;
+ unsigned long flags;
+
+ ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+ while (ib_poll_cq(cq, 1, &wc) > 0) {
+ if (wc.status) {
+ printk(KERN_ERR PFX "failed %s status %d\n",
+ wc.wr_id & SRP_OP_RECV ? "receive" : "send",
+ wc.status);
+ spin_lock_irqsave(target->scsi_host->host_lock, flags);
+ if (target->state == SRP_TARGET_LIVE)
+ schedule_work(&target->work);
+ spin_unlock_irqrestore(target->scsi_host->host_lock, flags);
+ break;
+ }
+
+ if (wc.wr_id & SRP_OP_RECV)
+ srp_handle_recv(target, &wc);
+ else
+ ++target->tx_tail;
+ }
+}
+
+static int __srp_post_recv(struct srp_target_port *target)
+{
+ struct srp_iu *iu;
+ struct ib_sge list;
+ struct ib_recv_wr wr, *bad_wr;
+ unsigned int next;
+ int ret;
+
+ next = target->rx_head & (SRP_RQ_SIZE - 1);
+ wr.wr_id = next | SRP_OP_RECV;
+ iu = target->rx_ring[next];
+
+ list.addr = iu->dma;
+ list.length = iu->size;
+ list.lkey = target->srp_host->mr->lkey;
+
+ wr.next = NULL;
+ wr.sg_list = &list;
+ wr.num_sge = 1;
+
+ ret = ib_post_recv(target->qp, &wr, &bad_wr);
+ if (!ret)
+ ++target->rx_head;
+
+ return ret;
+}
+
+static int srp_post_recv(struct srp_target_port *target)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(target->scsi_host->host_lock, flags);
+ ret = __srp_post_recv(target);
+ spin_unlock_irqrestore(target->scsi_host->host_lock, flags);
+
+ return ret;
+}
+
+/*
+ * Must be called with target->scsi_host->host_lock held to protect
+ * req_lim and tx_head. Lock cannot be dropped between call here and
+ * call to __srp_post_send().
+ */
+static struct srp_iu *__srp_get_tx_iu(struct srp_target_port *target)
+{
+ if (target->tx_head - target->tx_tail >= SRP_SQ_SIZE)
+ return NULL;
+
+ if (unlikely(target->req_lim < 1)) {
+ if (printk_ratelimit())
+ printk(KERN_DEBUG PFX "Target has req_lim %d\n",
+ target->req_lim);
+ return NULL;
+ }
+
+ return target->tx_ring[target->tx_head & SRP_SQ_SIZE];
+}
+
+/*
+ * Must be called with target->scsi_host->host_lock held to protect
+ * req_lim and tx_head.
+ */
+static int __srp_post_send(struct srp_target_port *target,
+ struct srp_iu *iu, int len)
+{
+ struct ib_sge list;
+ struct ib_send_wr wr, *bad_wr;
+ int ret = 0;
+
+ list.addr = iu->dma;
+ list.length = len;
+ list.lkey = target->srp_host->mr->lkey;
+
+ wr.next = NULL;
+ wr.wr_id = target->tx_head & SRP_SQ_SIZE;
+ wr.sg_list = &list;
+ wr.num_sge = 1;
+ wr.opcode = IB_WR_SEND;
+ wr.send_flags = IB_SEND_SIGNALED;
+
+ ret = ib_post_send(target->qp, &wr, &bad_wr);
+
+ if (!ret) {
+ ++target->tx_head;
+ --target->req_lim;
+ }
+
+ return ret;
+}
+
+static int srp_queuecommand(struct scsi_cmnd *scmnd,
+ void (*done)(struct scsi_cmnd *))
+{
+ struct srp_target_port *target = host_to_target(scmnd->device->host);
+ struct srp_request *req;
+ struct srp_iu *iu;
+ struct srp_cmd *cmd;
+ long req_index;
+ int len;
+
+ if (target->state == SRP_TARGET_CONNECTING)
+ goto err;
+
+ if (target->state == SRP_TARGET_DEAD ||
+ target->state == SRP_TARGET_REMOVED) {
+ scmnd->result = DID_BAD_TARGET << 16;
+ done(scmnd);
+ return 0;
+ }
+
+ iu = __srp_get_tx_iu(target);
+ if (!iu)
+ goto err;
+
+ dma_sync_single_for_cpu(target->srp_host->dev->dma_device, iu->dma,
+ SRP_MAX_IU_LEN, DMA_TO_DEVICE);
+
+ req_index = target->req_head;
+
+ scmnd->scsi_done = done;
+ scmnd->result = 0;
+ scmnd->host_scribble = (void *) req_index;
+
+ cmd = iu->buf;
+ memset(cmd, 0, sizeof *cmd);
+
+ cmd->opcode = SRP_CMD;
+ cmd->lun = cpu_to_be64((u64) scmnd->device->lun << 48);
+ cmd->tag = req_index;
+ memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len);
+
+ req = &target->req_ring[req_index];
+
+ req->scmnd = scmnd;
+ req->cmd = iu;
+ req->cmd_done = 0;
+ req->tsk_mgmt = NULL;
+
+ len = srp_map_data(scmnd, target, req);
+ if (len < 0) {
+ printk(KERN_ERR PFX "Failed to map data\n");
+ goto err;
+ }
+
+ if (__srp_post_recv(target)) {
+ printk(KERN_ERR PFX "Recv failed\n");
+ goto err_unmap;
+ }
+
+ dma_sync_single_for_device(target->srp_host->dev->dma_device, iu->dma,
+ SRP_MAX_IU_LEN, DMA_TO_DEVICE);
+
+ if (__srp_post_send(target, iu, len)) {
+ printk(KERN_ERR PFX "Send failed\n");
+ goto err_unmap;
+ }
+
+ target->req_head = req->next;
+ list_add_tail(&req->list, &target->req_queue);
+
+ return 0;
+
+err_unmap:
+ srp_unmap_data(scmnd, target, req);
+
+err:
+ return SCSI_MLQUEUE_HOST_BUSY;
+}
+
+static int srp_alloc_iu_bufs(struct srp_target_port *target)
+{
+ int i;
+
+ for (i = 0; i < SRP_RQ_SIZE; ++i) {
+ target->rx_ring[i] = srp_alloc_iu(target->srp_host,
+ target->max_ti_iu_len,
+ GFP_KERNEL, DMA_FROM_DEVICE);
+ if (!target->rx_ring[i])
+ goto err;
+ }
+
+ for (i = 0; i < SRP_SQ_SIZE + 1; ++i) {
+ target->tx_ring[i] = srp_alloc_iu(target->srp_host,
+ SRP_MAX_IU_LEN,
+ GFP_KERNEL, DMA_TO_DEVICE);
+ if (!target->tx_ring[i])
+ goto err;
+ }
+
+ return 0;
+
+err:
+ for (i = 0; i < SRP_RQ_SIZE; ++i) {
+ srp_free_iu(target->srp_host, target->rx_ring[i]);
+ target->rx_ring[i] = NULL;
+ }
+
+ for (i = 0; i < SRP_SQ_SIZE + 1; ++i) {
+ srp_free_iu(target->srp_host, target->tx_ring[i]);
+ target->tx_ring[i] = NULL;
+ }
+
+ return -ENOMEM;
+}
+
+static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
+ struct ib_cm_event *event,
+ struct srp_target_port *target)
+{
+ struct ib_class_port_info *cpi;
+ int opcode;
+
+ switch (event->param.rej_rcvd.reason) {
+ case IB_CM_REJ_PORT_CM_REDIRECT:
+ cpi = event->param.rej_rcvd.ari;
+ target->path.dlid = cpi->redirect_lid;
+ target->path.pkey = cpi->redirect_pkey;
+ cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
+ memcpy(target->path.dgid.raw, cpi->redirect_gid, 16);
+
+ target->status = target->path.dlid ?
+ SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
+ break;
+
+ case IB_CM_REJ_PORT_REDIRECT:
+ if (topspin_workarounds &&
+ !memcmp(&target->ioc_guid, topspin_oui, 3)) {
+ /*
+ * Topspin/Cisco SRP gateways incorrectly send
+ * reject reason code 25 when they mean 24
+ * (port redirect).
+ */
+ memcpy(target->path.dgid.raw,
+ event->param.rej_rcvd.ari, 16);
+
+ printk(KERN_DEBUG PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n",
+ (unsigned long long) be64_to_cpu(target->path.dgid.global.subnet_prefix),
+ (unsigned long long) be64_to_cpu(target->path.dgid.global.interface_id));
+
+ target->status = SRP_PORT_REDIRECT;
+ } else {
+ printk(KERN_WARNING " REJ reason: IB_CM_REJ_PORT_REDIRECT\n");
+ target->status = -ECONNRESET;
+ }
+ break;
+
+ case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
+ printk(KERN_WARNING " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
+ target->status = -ECONNRESET;
+ break;
+
+ case IB_CM_REJ_CONSUMER_DEFINED:
+ opcode = *(u8 *) event->private_data;
+ if (opcode == SRP_LOGIN_REJ) {
+ struct srp_login_rej *rej = event->private_data;
+ u32 reason = be32_to_cpu(rej->reason);
+
+ if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
+ printk(KERN_WARNING PFX
+ "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
+ else
+ printk(KERN_WARNING PFX
+ "SRP LOGIN REJECTED, reason 0x%08x\n", reason);
+ } else
+ printk(KERN_WARNING " REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
+ " opcode 0x%02x\n", opcode);
+ target->status = -ECONNRESET;
+ break;
+
+ default:
+ printk(KERN_WARNING " REJ reason 0x%x\n",
+ event->param.rej_rcvd.reason);
+ target->status = -ECONNRESET;
+ }
+}
+
+static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
+{
+ struct srp_target_port *target = cm_id->context;
+ struct ib_qp_attr *qp_attr = NULL;
+ int attr_mask = 0;
+ int comp = 0;
+ int opcode = 0;
+
+ switch (event->event) {
+ case IB_CM_REQ_ERROR:
+ printk(KERN_DEBUG PFX "Sending CM REQ failed\n");
+ comp = 1;
+ target->status = -ECONNRESET;
+ break;
+
+ case IB_CM_REP_RECEIVED:
+ comp = 1;
+ opcode = *(u8 *) event->private_data;
+
+ if (opcode == SRP_LOGIN_RSP) {
+ struct srp_login_rsp *rsp = event->private_data;
+
+ target->max_ti_iu_len = be32_to_cpu(rsp->max_ti_iu_len);
+ target->req_lim = be32_to_cpu(rsp->req_lim_delta);
+
+ target->scsi_host->can_queue = min(target->req_lim,
+ target->scsi_host->can_queue);
+ } else {
+ printk(KERN_WARNING PFX "Unhandled RSP opcode %#x\n", opcode);
+ target->status = -ECONNRESET;
+ break;
+ }
+
+ target->status = srp_alloc_iu_bufs(target);
+ if (target->status)
+ break;
+
+ qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
+ if (!qp_attr) {
+ target->status = -ENOMEM;
+ break;
+ }
+
+ qp_attr->qp_state = IB_QPS_RTR;
+ target->status = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
+ if (target->status)
+ break;
+
+ target->status = ib_modify_qp(target->qp, qp_attr, attr_mask);
+ if (target->status)
+ break;
+
+ target->status = srp_post_recv(target);
+ if (target->status)
+ break;
+
+ qp_attr->qp_state = IB_QPS_RTS;
+ target->status = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
+ if (target->status)
+ break;
+
+ target->status = ib_modify_qp(target->qp, qp_attr, attr_mask);
+ if (target->status)
+ break;
+
+ target->status = ib_send_cm_rtu(cm_id, NULL, 0);
+ if (target->status)
+ break;
+
+ break;
+
+ case IB_CM_REJ_RECEIVED:
+ printk(KERN_DEBUG PFX "REJ received\n");
+ comp = 1;
+
+ srp_cm_rej_handler(cm_id, event, target);
+ break;
+
+ case IB_CM_MRA_RECEIVED:
+ printk(KERN_ERR PFX "MRA received\n");
+ break;
+
+ case IB_CM_DREP_RECEIVED:
+ break;
+
+ case IB_CM_TIMEWAIT_EXIT:
+ printk(KERN_ERR PFX "connection closed\n");
+
+ comp = 1;
+ target->status = 0;
+ break;
+
+ default:
+ printk(KERN_WARNING PFX "Unhandled CM event %d\n", event->event);
+ break;
+ }
+
+ if (comp)
+ complete(&target->done);
+
+ kfree(qp_attr);
+
+ return 0;
+}
+
+static int srp_send_tsk_mgmt(struct scsi_cmnd *scmnd, u8 func)
+{
+ struct srp_target_port *target = host_to_target(scmnd->device->host);
+ struct srp_request *req;
+ struct srp_iu *iu;
+ struct srp_tsk_mgmt *tsk_mgmt;
+ int req_index;
+ int ret = FAILED;
+
+ spin_lock_irq(target->scsi_host->host_lock);
+
+ if (scmnd->host_scribble == (void *) -1L)
+ goto out;
+
+ req_index = (long) scmnd->host_scribble;
+ printk(KERN_ERR "Abort for req_index %d\n", req_index);
+
+ req = &target->req_ring[req_index];
+ init_completion(&req->done);
+
+ iu = __srp_get_tx_iu(target);
+ if (!iu)
+ goto out;
+
+ tsk_mgmt = iu->buf;
+ memset(tsk_mgmt, 0, sizeof *tsk_mgmt);
+
+ tsk_mgmt->opcode = SRP_TSK_MGMT;
+ tsk_mgmt->lun = cpu_to_be64((u64) scmnd->device->lun << 48);
+ tsk_mgmt->tag = req_index | SRP_TAG_TSK_MGMT;
+ tsk_mgmt->tsk_mgmt_func = func;
+ tsk_mgmt->task_tag = req_index;
+
+ if (__srp_post_send(target, iu, sizeof *tsk_mgmt))
+ goto out;
+
+ req->tsk_mgmt = iu;
+
+ spin_unlock_irq(target->scsi_host->host_lock);
+ if (!wait_for_completion_timeout(&req->done,
+ msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)))
+ return FAILED;
+ spin_lock_irq(target->scsi_host->host_lock);
+
+ if (req->cmd_done) {
+ list_del(&req->list);
+ req->next = target->req_head;
+ target->req_head = req_index;
+
+ scmnd->scsi_done(scmnd);
+ } else if (!req->tsk_status) {
+ scmnd->result = DID_ABORT << 16;
+ ret = SUCCESS;
+ }
+
+out:
+ spin_unlock_irq(target->scsi_host->host_lock);
+ return ret;
+}
+
+static int srp_abort(struct scsi_cmnd *scmnd)
+{
+ printk(KERN_ERR "SRP abort called\n");
+
+ return srp_send_tsk_mgmt(scmnd, SRP_TSK_ABORT_TASK);
+}
+
+static int srp_reset_device(struct scsi_cmnd *scmnd)
+{
+ printk(KERN_ERR "SRP reset_device called\n");
+
+ return srp_send_tsk_mgmt(scmnd, SRP_TSK_LUN_RESET);
+}
+
+static int srp_reset_host(struct scsi_cmnd *scmnd)
+{
+ struct srp_target_port *target = host_to_target(scmnd->device->host);
+ int ret = FAILED;
+
+ printk(KERN_ERR PFX "SRP reset_host called\n");
+
+ if (!srp_reconnect_target(target))
+ ret = SUCCESS;
+
+ return ret;
+}
+
+static struct scsi_host_template srp_template = {
+ .module = THIS_MODULE,
+ .name = DRV_NAME,
+ .info = srp_target_info,
+ .queuecommand = srp_queuecommand,
+ .eh_abort_handler = srp_abort,
+ .eh_device_reset_handler = srp_reset_device,
+ .eh_host_reset_handler = srp_reset_host,
+ .can_queue = SRP_SQ_SIZE,
+ .this_id = -1,
+ .sg_tablesize = SRP_MAX_INDIRECT,
+ .cmd_per_lun = SRP_SQ_SIZE,
+ .use_clustering = ENABLE_CLUSTERING
+};
+
+static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
+{
+ sprintf(target->target_name, "SRP.T10:%016llX",
+ (unsigned long long) be64_to_cpu(target->id_ext));
+
+ if (scsi_add_host(target->scsi_host, host->dev->dma_device))
+ return -ENODEV;
+
+ down(&host->target_mutex);
+ list_add_tail(&target->list, &host->target_list);
+ up(&host->target_mutex);
+
+ target->state = SRP_TARGET_LIVE;
+
+ /* XXX: are we supposed to have a definition of SCAN_WILD_CARD ?? */
+ scsi_scan_target(&target->scsi_host->shost_gendev,
+ 0, target->scsi_id, ~0, 0);
+
+ return 0;
+}
+
+static void srp_release_class_dev(struct class_device *class_dev)
+{
+ struct srp_host *host =
+ container_of(class_dev, struct srp_host, class_dev);
+
+ complete(&host->released);
+}
+
+static struct class srp_class = {
+ .name = "infiniband_srp",
+ .release = srp_release_class_dev
+};
+
+/*
+ * Target ports are added by writing
+ *
+ * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>,
+ * pkey=<P_Key>,service_id=<service ID>
+ *
+ * to the add_target sysfs attribute.
+ */
+enum {
+ SRP_OPT_ERR = 0,
+ SRP_OPT_ID_EXT = 1 << 0,
+ SRP_OPT_IOC_GUID = 1 << 1,
+ SRP_OPT_DGID = 1 << 2,
+ SRP_OPT_PKEY = 1 << 3,
+ SRP_OPT_SERVICE_ID = 1 << 4,
+ SRP_OPT_MAX_SECT = 1 << 5,
+ SRP_OPT_ALL = (SRP_OPT_ID_EXT |
+ SRP_OPT_IOC_GUID |
+ SRP_OPT_DGID |
+ SRP_OPT_PKEY |
+ SRP_OPT_SERVICE_ID),
+};
+
+static match_table_t srp_opt_tokens = {
+ { SRP_OPT_ID_EXT, "id_ext=%s" },
+ { SRP_OPT_IOC_GUID, "ioc_guid=%s" },
+ { SRP_OPT_DGID, "dgid=%s" },
+ { SRP_OPT_PKEY, "pkey=%x" },
+ { SRP_OPT_SERVICE_ID, "service_id=%s" },
+ { SRP_OPT_MAX_SECT, "max_sect=%d" },
+ { SRP_OPT_ERR, NULL }
+};
+
+static int srp_parse_options(const char *buf, struct srp_target_port *target)
+{
+ char *options, *sep_opt;
+ char *p;
+ char dgid[3];
+ substring_t args[MAX_OPT_ARGS];
+ int opt_mask = 0;
+ int token;
+ int ret = -EINVAL;
+ int i;
+
+ options = kstrdup(buf, GFP_KERNEL);
+ if (!options)
+ return -ENOMEM;
+
+ sep_opt = options;
+ while ((p = strsep(&sep_opt, ",")) != NULL) {
+ if (!*p)
+ continue;
+
+ token = match_token(p, srp_opt_tokens, args);
+ opt_mask |= token;
+
+ switch (token) {
+ case SRP_OPT_ID_EXT:
+ p = match_strdup(args);
+ target->id_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
+ kfree(p);
+ break;
+
+ case SRP_OPT_IOC_GUID:
+ p = match_strdup(args);
+ target->ioc_guid = cpu_to_be64(simple_strtoull(p, NULL, 16));
+ kfree(p);
+ break;
+
+ case SRP_OPT_DGID:
+ p = match_strdup(args);
+ if (strlen(p) != 32) {
+ printk(KERN_WARNING PFX "bad dest GID parameter '%s'\n", p);
+ goto out;
+ }
+
+ for (i = 0; i < 16; ++i) {
+ strlcpy(dgid, p + i * 2, 3);
+ target->path.dgid.raw[i] = simple_strtoul(dgid, NULL, 16);
+ }
+ break;
+
+ case SRP_OPT_PKEY:
+ if (match_hex(args, &token)) {
+ printk(KERN_WARNING PFX "bad P_Key parameter '%s'\n", p);
+ goto out;
+ }
+ target->path.pkey = cpu_to_be16(token);
+ break;
+
+ case SRP_OPT_SERVICE_ID:
+ p = match_strdup(args);
+ target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16));
+ kfree(p);
+ break;
+
+ case SRP_OPT_MAX_SECT:
+ if (match_int(args, &token)) {
+ printk(KERN_WARNING PFX "bad max sect parameter '%s'\n", p);
+ goto out;
+ }
+ target->scsi_host->max_sectors = token;
+ break;
+
+ default:
+ printk(KERN_WARNING PFX "unknown parameter or missing value "
+ "'%s' in target creation request\n", p);
+ goto out;
+ }
+ }
+
+ if ((opt_mask & SRP_OPT_ALL) == SRP_OPT_ALL)
+ ret = 0;
+ else
+ for (i = 0; i < ARRAY_SIZE(srp_opt_tokens); ++i)
+ if ((srp_opt_tokens[i].token & SRP_OPT_ALL) &&
+ !(srp_opt_tokens[i].token & opt_mask))
+ printk(KERN_WARNING PFX "target creation request is "
+ "missing parameter '%s'\n",
+ srp_opt_tokens[i].pattern);
+
+out:
+ kfree(options);
+ return ret;
+}
+
+static ssize_t srp_create_target(struct class_device *class_dev,
+ const char *buf, size_t count)
+{
+ struct srp_host *host =
+ container_of(class_dev, struct srp_host, class_dev);
+ struct Scsi_Host *target_host;
+ struct srp_target_port *target;
+ int ret;
+ int i;
+
+ target_host = scsi_host_alloc(&srp_template,
+ sizeof (struct srp_target_port));
+ if (!target_host)
+ return -ENOMEM;
+
+ target_host->max_lun = SRP_MAX_LUN;
+
+ target = host_to_target(target_host);
+ memset(target, 0, sizeof *target);
+
+ target->scsi_host = target_host;
+ target->srp_host = host;
+
+ INIT_WORK(&target->work, srp_reconnect_work, target);
+
+ for (i = 0; i < SRP_SQ_SIZE - 1; ++i)
+ target->req_ring[i].next = i + 1;
+ target->req_ring[SRP_SQ_SIZE - 1].next = -1;
+ INIT_LIST_HEAD(&target->req_queue);
+
+ ret = srp_parse_options(buf, target);
+ if (ret)
+ goto err;
+
+ ib_get_cached_gid(host->dev, host->port, 0, &target->path.sgid);
+
+ printk(KERN_DEBUG PFX "new target: id_ext %016llx ioc_guid %016llx pkey %04x "
+ "service_id %016llx dgid %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+ (unsigned long long) be64_to_cpu(target->id_ext),
+ (unsigned long long) be64_to_cpu(target->ioc_guid),
+ be16_to_cpu(target->path.pkey),
+ (unsigned long long) be64_to_cpu(target->service_id),
+ (int) be16_to_cpu(*(__be16 *) &target->path.dgid.raw[0]),
+ (int) be16_to_cpu(*(__be16 *) &target->path.dgid.raw[2]),
+ (int) be16_to_cpu(*(__be16 *) &target->path.dgid.raw[4]),
+ (int) be16_to_cpu(*(__be16 *) &target->path.dgid.raw[6]),
+ (int) be16_to_cpu(*(__be16 *) &target->path.dgid.raw[8]),
+ (int) be16_to_cpu(*(__be16 *) &target->path.dgid.raw[10]),
+ (int) be16_to_cpu(*(__be16 *) &target->path.dgid.raw[12]),
+ (int) be16_to_cpu(*(__be16 *) &target->path.dgid.raw[14]));
+
+ ret = srp_create_target_ib(target);
+ if (ret)
+ goto err;
+
+ target->cm_id = ib_create_cm_id(host->dev, srp_cm_handler, target);
+ if (IS_ERR(target->cm_id)) {
+ ret = PTR_ERR(target->cm_id);
+ goto err_free;
+ }
+
+ ret = srp_connect_target(target);
+ if (ret) {
+ printk(KERN_ERR PFX "Connection failed\n");
+ goto err_cm_id;
+ }
+
+ ret = srp_add_target(host, target);
+ if (ret)
+ goto err_disconnect;
+
+ return count;
+
+err_disconnect:
+ srp_disconnect_target(target);
+
+err_cm_id:
+ ib_destroy_cm_id(target->cm_id);
+
+err_free:
+ srp_free_target_ib(target);
+
+err:
+ scsi_host_put(target_host);
+
+ return ret;
+}
+
+static CLASS_DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target);
+
+static ssize_t show_ibdev(struct class_device *class_dev, char *buf)
+{
+ struct srp_host *host =
+ container_of(class_dev, struct srp_host, class_dev);
+
+ return sprintf(buf, "%s\n", host->dev->name);
+}
+
+static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
+
+static ssize_t show_port(struct class_device *class_dev, char *buf)
+{
+ struct srp_host *host =
+ container_of(class_dev, struct srp_host, class_dev);
+
+ return sprintf(buf, "%d\n", host->port);
+}
+
+static CLASS_DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
+
+static struct srp_host *srp_add_port(struct ib_device *device,
+ __be64 node_guid, u8 port)
+{
+ struct srp_host *host;
+
+ host = kzalloc(sizeof *host, GFP_KERNEL);
+ if (!host)
+ return NULL;
+
+ INIT_LIST_HEAD(&host->target_list);
+ init_MUTEX(&host->target_mutex);
+ init_completion(&host->released);
+ host->dev = device;
+ host->port = port;
+
+ host->initiator_port_id[7] = port;
+ memcpy(host->initiator_port_id + 8, &node_guid, 8);
+
+ host->pd = ib_alloc_pd(device);
+ if (IS_ERR(host->pd))
+ goto err_free;
+
+ host->mr = ib_get_dma_mr(host->pd,
+ IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_READ |
+ IB_ACCESS_REMOTE_WRITE);
+ if (IS_ERR(host->mr))
+ goto err_pd;
+
+ host->class_dev.class = &srp_class;
+ host->class_dev.dev = device->dma_device;
+ snprintf(host->class_dev.class_id, BUS_ID_SIZE, "srp-%s-%d",
+ device->name, port);
+
+ if (class_device_register(&host->class_dev))
+ goto err_mr;
+ if (class_device_create_file(&host->class_dev, &class_device_attr_add_target))
+ goto err_class;
+ if (class_device_create_file(&host->class_dev, &class_device_attr_ibdev))
+ goto err_class;
+ if (class_device_create_file(&host->class_dev, &class_device_attr_port))
+ goto err_class;
+
+ return host;
+
+err_class:
+ class_device_unregister(&host->class_dev);
+
+err_mr:
+ ib_dereg_mr(host->mr);
+
+err_pd:
+ ib_dealloc_pd(host->pd);
+
+err_free:
+ kfree(host);
+
+ return NULL;
+}
+
+static void srp_add_one(struct ib_device *device)
+{
+ struct list_head *dev_list;
+ struct srp_host *host;
+ struct ib_device_attr *dev_attr;
+ int s, e, p;
+
+ dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
+ if (!dev_attr)
+ return;
+
+ if (ib_query_device(device, dev_attr)) {
+ printk(KERN_WARNING PFX "Couldn't query node GUID for %s.\n",
+ device->name);
+ goto out;
+ }
+
+ dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL);
+ if (!dev_list)
+ goto out;
+
+ INIT_LIST_HEAD(dev_list);
+
+ if (device->node_type == IB_NODE_SWITCH) {
+ s = 0;
+ e = 0;
+ } else {
+ s = 1;
+ e = device->phys_port_cnt;
+ }
+
+ for (p = s; p <= e; ++p) {
+ host = srp_add_port(device, dev_attr->node_guid, p);
+ if (host)
+ list_add_tail(&host->list, dev_list);
+ }
+
+ ib_set_client_data(device, &srp_client, dev_list);
+
+out:
+ kfree(dev_attr);
+}
+
+static void srp_remove_one(struct ib_device *device)
+{
+ struct list_head *dev_list;
+ struct srp_host *host, *tmp_host;
+ LIST_HEAD(target_list);
+ struct srp_target_port *target, *tmp_target;
+ unsigned long flags;
+
+ dev_list = ib_get_client_data(device, &srp_client);
+
+ list_for_each_entry_safe(host, tmp_host, dev_list, list) {
+ class_device_unregister(&host->class_dev);
+ /*
+ * Wait for the sysfs entry to go away, so that no new
+ * target ports can be created.
+ */
+ wait_for_completion(&host->released);
+
+ /*
+ * Mark all target ports as removed, so we stop queueing
+ * commands and don't try to reconnect.
+ */
+ down(&host->target_mutex);
+ list_for_each_entry_safe(target, tmp_target,
+ &host->target_list, list) {
+ spin_lock_irqsave(target->scsi_host->host_lock, flags);
+ if (target->state != SRP_TARGET_REMOVED)
+ target->state = SRP_TARGET_REMOVED;
+ spin_unlock_irqrestore(target->scsi_host->host_lock, flags);
+ }
+ up(&host->target_mutex);
+
+ /*
+ * Wait for any reconnection tasks that may have
+ * started before we marked our target ports as
+ * removed, and any target port removal tasks.
+ */
+ flush_scheduled_work();
+
+ list_for_each_entry_safe(target, tmp_target,
+ &host->target_list, list) {
+ scsi_remove_host(target->scsi_host);
+ srp_disconnect_target(target);
+ ib_destroy_cm_id(target->cm_id);
+ srp_free_target_ib(target);
+ scsi_host_put(target->scsi_host);
+ }
+
+ ib_dereg_mr(host->mr);
+ ib_dealloc_pd(host->pd);
+ kfree(host);
+ }
+
+ kfree(dev_list);
+}
+
+static int __init srp_init_module(void)
+{
+ int ret;
+
+ ret = class_register(&srp_class);
+ if (ret) {
+ printk(KERN_ERR PFX "couldn't register class infiniband_srp\n");
+ return ret;
+ }
+
+ ret = ib_register_client(&srp_client);
+ if (ret) {
+ printk(KERN_ERR PFX "couldn't register IB client\n");
+ class_unregister(&srp_class);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void __exit srp_cleanup_module(void)
+{
+ ib_unregister_client(&srp_client);
+ class_unregister(&srp_class);
+}
+
+module_init(srp_init_module);
+module_exit(srp_cleanup_module);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
new file mode 100644
index 00000000000..b564f18caf7
--- /dev/null
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: ib_srp.h 3932 2005-11-01 17:19:29Z roland $
+ */
+
+#ifndef IB_SRP_H
+#define IB_SRP_H
+
+#include <linux/types.h>
+#include <linux/list.h>
+
+#include <asm/semaphore.h>
+
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_cmnd.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_sa.h>
+#include <rdma/ib_cm.h>
+
+enum {
+ SRP_PATH_REC_TIMEOUT_MS = 1000,
+ SRP_ABORT_TIMEOUT_MS = 5000,
+
+ SRP_PORT_REDIRECT = 1,
+ SRP_DLID_REDIRECT = 2,
+
+ SRP_MAX_LUN = 512,
+ SRP_MAX_IU_LEN = 256,
+
+ SRP_RQ_SHIFT = 6,
+ SRP_RQ_SIZE = 1 << SRP_RQ_SHIFT,
+ SRP_SQ_SIZE = SRP_RQ_SIZE - 1,
+ SRP_CQ_SIZE = SRP_SQ_SIZE + SRP_RQ_SIZE,
+
+ SRP_TAG_TSK_MGMT = 1 << (SRP_RQ_SHIFT + 1)
+};
+
+#define SRP_OP_RECV (1 << 31)
+#define SRP_MAX_INDIRECT ((SRP_MAX_IU_LEN - \
+ sizeof (struct srp_cmd) - \
+ sizeof (struct srp_indirect_buf)) / 16)
+
+enum srp_target_state {
+ SRP_TARGET_LIVE,
+ SRP_TARGET_CONNECTING,
+ SRP_TARGET_DEAD,
+ SRP_TARGET_REMOVED
+};
+
+struct srp_host {
+ u8 initiator_port_id[16];
+ struct ib_device *dev;
+ u8 port;
+ struct ib_pd *pd;
+ struct ib_mr *mr;
+ struct class_device class_dev;
+ struct list_head target_list;
+ struct semaphore target_mutex;
+ struct completion released;
+ struct list_head list;
+};
+
+struct srp_request {
+ struct list_head list;
+ struct scsi_cmnd *scmnd;
+ struct srp_iu *cmd;
+ struct srp_iu *tsk_mgmt;
+ DECLARE_PCI_UNMAP_ADDR(direct_mapping)
+ struct completion done;
+ short next;
+ u8 cmd_done;
+ u8 tsk_status;
+};
+
+struct srp_target_port {
+ __be64 id_ext;
+ __be64 ioc_guid;
+ __be64 service_id;
+ struct srp_host *srp_host;
+ struct Scsi_Host *scsi_host;
+ char target_name[32];
+ unsigned int scsi_id;
+
+ struct ib_sa_path_rec path;
+ struct ib_sa_query *path_query;
+ int path_query_id;
+
+ struct ib_cm_id *cm_id;
+ struct ib_cq *cq;
+ struct ib_qp *qp;
+
+ int max_ti_iu_len;
+ s32 req_lim;
+
+ unsigned rx_head;
+ struct srp_iu *rx_ring[SRP_RQ_SIZE];
+
+ unsigned tx_head;
+ unsigned tx_tail;
+ struct srp_iu *tx_ring[SRP_SQ_SIZE + 1];
+
+ int req_head;
+ struct list_head req_queue;
+ struct srp_request req_ring[SRP_SQ_SIZE];
+
+ struct work_struct work;
+
+ struct list_head list;
+ struct completion done;
+ int status;
+ enum srp_target_state state;
+};
+
+struct srp_iu {
+ dma_addr_t dma;
+ void *buf;
+ size_t size;
+ enum dma_data_direction direction;
+};
+
+#endif /* IB_SRP_H */