From 0c33aeedb2cf99d877ad9adc7c3df07870f60293 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Mon, 26 Sep 2005 11:47:53 -0700 Subject: [IB] Add checks to multicast attach and detach Add checks so that we only allow multicast attach/detach with a valid multicast GID and the correct QP type. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/core/verbs.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 5081d903e56..72d3ef786db 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -523,16 +523,22 @@ EXPORT_SYMBOL(ib_dealloc_fmr); int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) { - return qp->device->attach_mcast ? - qp->device->attach_mcast(qp, gid, lid) : - -ENOSYS; + if (!qp->device->attach_mcast) + return -ENOSYS; + if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD) + return -EINVAL; + + return qp->device->attach_mcast(qp, gid, lid); } EXPORT_SYMBOL(ib_attach_mcast); int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) { - return qp->device->detach_mcast ? - qp->device->detach_mcast(qp, gid, lid) : - -ENOSYS; + if (!qp->device->detach_mcast) + return -ENOSYS; + if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD) + return -EINVAL; + + return qp->device->detach_mcast(qp, gid, lid); } EXPORT_SYMBOL(ib_detach_mcast); -- cgit From 33033b797225553e48ca68d8d8dc5e64ec22e02b Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Mon, 26 Sep 2005 12:30:02 -0700 Subject: [IB] mthca: Report correct atomic capability Return correct atomic capability flag from mthca query function. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_dev.h | 1 + drivers/infiniband/hw/mthca/mthca_main.c | 1 + drivers/infiniband/hw/mthca/mthca_provider.c | 2 ++ 3 files changed, 4 insertions(+) diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 7bff5a8425f..67a2f324a27 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -148,6 +148,7 @@ struct mthca_limits { int reserved_mcgs; int num_pds; int reserved_pds; + u32 flags; u8 port_width_cap; }; diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index 23a3f56c789..576e7fcd053 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -172,6 +172,7 @@ static int __devinit mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim mdev->limits.reserved_uars = dev_lim->reserved_uars; mdev->limits.reserved_pds = dev_lim->reserved_pds; mdev->limits.port_width_cap = dev_lim->max_port_width; + mdev->limits.flags = dev_lim->flags; /* IB_DEVICE_RESIZE_MAX_WR not supported by driver. May be doable since hardware supports it for SRQ. diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 3f5319a4657..141509dced7 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -99,6 +99,8 @@ static int mthca_query_device(struct ib_device *ibdev, props->max_qp_rd_atom = 1 << mdev->qp_table.rdb_shift; props->max_qp_init_rd_atom = 1 << mdev->qp_table.rdb_shift; props->local_ca_ack_delay = mdev->limits.local_ca_ack_delay; + props->atomic_cap = mdev->limits.flags & DEV_LIM_FLAG_ATOMIC ? + IB_ATOMIC_HCA : IB_ATOMIC_NONE; err = 0; out: -- cgit From 6b73597e7062118c0549c2702bfb7d273518c906 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 26 Sep 2005 13:53:25 -0700 Subject: [IB] uverbs: ABI-breaking fixes for userspace verbs Introduce new userspace verbs ABI version 3. This eliminates some unneeded commands, and adds support for user-created completion channels. This cleans up problems with file leaks on error paths, and also makes sure that file descriptors are always installed into the correct process. Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs.h | 18 ++- drivers/infiniband/core/uverbs_cmd.c | 186 ++++++++++------------ drivers/infiniband/core/uverbs_main.c | 292 ++++++++++++++++++---------------- include/rdma/ib_user_verbs.h | 58 ++----- 4 files changed, 258 insertions(+), 296 deletions(-) diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index cc124344dd2..475153e510a 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -53,14 +53,14 @@ struct ib_uverbs_device { struct cdev dev; struct class_device class_dev; struct ib_device *ib_dev; - int num_comp; + int num_comp_vectors; }; struct ib_uverbs_event_file { struct kref ref; + struct file *file; struct ib_uverbs_file *uverbs_file; spinlock_t lock; - int fd; int is_async; wait_queue_head_t poll_wait; struct fasync_struct *async_queue; @@ -73,8 +73,7 @@ struct ib_uverbs_file { struct ib_uverbs_device *device; struct ib_ucontext *ucontext; struct ib_event_handler event_handler; - struct ib_uverbs_event_file async_file; - struct ib_uverbs_event_file comp_file[1]; + struct ib_uverbs_event_file *async_file; }; struct ib_uverbs_event { @@ -110,10 +109,17 @@ extern struct idr ib_uverbs_cq_idr; extern struct idr ib_uverbs_qp_idr; extern struct idr ib_uverbs_srq_idr; +struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, + int is_async, int *fd); +void ib_uverbs_release_event_file(struct kref *ref); +struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd); + void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context); void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); +void ib_uverbs_event_handler(struct ib_event_handler *handler, + struct ib_event *event); int ib_umem_get(struct ib_device *dev, struct ib_umem *mem, void *addr, size_t size, int write); @@ -125,16 +131,14 @@ void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem); const char __user *buf, int in_len, \ int out_len) -IB_UVERBS_DECLARE_CMD(query_params); IB_UVERBS_DECLARE_CMD(get_context); IB_UVERBS_DECLARE_CMD(query_device); IB_UVERBS_DECLARE_CMD(query_port); -IB_UVERBS_DECLARE_CMD(query_gid); -IB_UVERBS_DECLARE_CMD(query_pkey); IB_UVERBS_DECLARE_CMD(alloc_pd); IB_UVERBS_DECLARE_CMD(dealloc_pd); IB_UVERBS_DECLARE_CMD(reg_mr); IB_UVERBS_DECLARE_CMD(dereg_mr); +IB_UVERBS_DECLARE_CMD(create_comp_channel); IB_UVERBS_DECLARE_CMD(create_cq); IB_UVERBS_DECLARE_CMD(destroy_cq); IB_UVERBS_DECLARE_CMD(create_qp); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 562445165d2..79b60c3dc8d 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -33,6 +33,8 @@ * $Id: uverbs_cmd.c 2708 2005-06-24 17:27:21Z roland $ */ +#include + #include #include "uverbs.h" @@ -45,29 +47,6 @@ (udata)->outlen = (olen); \ } while (0) -ssize_t ib_uverbs_query_params(struct ib_uverbs_file *file, - const char __user *buf, - int in_len, int out_len) -{ - struct ib_uverbs_query_params cmd; - struct ib_uverbs_query_params_resp resp; - - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - memset(&resp, 0, sizeof resp); - - resp.num_cq_events = file->device->num_comp; - - if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) - return -EFAULT; - - return in_len; -} - ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -77,7 +56,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, struct ib_udata udata; struct ib_device *ibdev = file->device->ib_dev; struct ib_ucontext *ucontext; - int i; + struct file *filp; int ret; if (out_len < sizeof resp) @@ -110,26 +89,42 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, INIT_LIST_HEAD(&ucontext->srq_list); INIT_LIST_HEAD(&ucontext->ah_list); - resp.async_fd = file->async_file.fd; - for (i = 0; i < file->device->num_comp; ++i) - if (copy_to_user((void __user *) (unsigned long) cmd.cq_fd_tab + - i * sizeof (__u32), - &file->comp_file[i].fd, sizeof (__u32))) { - ret = -EFAULT; - goto err_free; - } + resp.num_comp_vectors = file->device->num_comp_vectors; + + filp = ib_uverbs_alloc_event_file(file, 1, &resp.async_fd); + if (IS_ERR(filp)) { + ret = PTR_ERR(filp); + goto err_free; + } if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_free; + goto err_file; } - file->ucontext = ucontext; + file->async_file = filp->private_data; + + INIT_IB_EVENT_HANDLER(&file->event_handler, file->device->ib_dev, + ib_uverbs_event_handler); + ret = ib_register_event_handler(&file->event_handler); + if (ret) + goto err_file; + + kref_get(&file->async_file->ref); + kref_get(&file->ref); + file->ucontext = ucontext; + + fd_install(resp.async_fd, filp); + up(&file->mutex); return in_len; +err_file: + put_unused_fd(resp.async_fd); + fput(filp); + err_free: ibdev->dealloc_ucontext(ucontext); @@ -255,62 +250,6 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, return in_len; } -ssize_t ib_uverbs_query_gid(struct ib_uverbs_file *file, - const char __user *buf, - int in_len, int out_len) -{ - struct ib_uverbs_query_gid cmd; - struct ib_uverbs_query_gid_resp resp; - int ret; - - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - memset(&resp, 0, sizeof resp); - - ret = ib_query_gid(file->device->ib_dev, cmd.port_num, cmd.index, - (union ib_gid *) resp.gid); - if (ret) - return ret; - - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) - return -EFAULT; - - return in_len; -} - -ssize_t ib_uverbs_query_pkey(struct ib_uverbs_file *file, - const char __user *buf, - int in_len, int out_len) -{ - struct ib_uverbs_query_pkey cmd; - struct ib_uverbs_query_pkey_resp resp; - int ret; - - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - memset(&resp, 0, sizeof resp); - - ret = ib_query_pkey(file->device->ib_dev, cmd.port_num, cmd.index, - &resp.pkey); - if (ret) - return ret; - - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) - return -EFAULT; - - return in_len; -} - ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -595,6 +534,35 @@ out: return ret ? ret : in_len; } +ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_comp_channel cmd; + struct ib_uverbs_create_comp_channel_resp resp; + struct file *filp; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + filp = ib_uverbs_alloc_event_file(file, 0, &resp.fd); + if (IS_ERR(filp)) + return PTR_ERR(filp); + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + put_unused_fd(resp.fd); + fput(filp); + return -EFAULT; + } + + fd_install(resp.fd, filp); + return in_len; +} + ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -603,6 +571,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, struct ib_uverbs_create_cq_resp resp; struct ib_udata udata; struct ib_ucq_object *uobj; + struct ib_uverbs_event_file *ev_file = NULL; struct ib_cq *cq; int ret; @@ -616,9 +585,12 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); - if (cmd.event_handler >= file->device->num_comp) + if (cmd.comp_vector >= file->device->num_comp_vectors) return -EINVAL; + if (cmd.comp_channel >= 0) + ev_file = ib_uverbs_lookup_comp_file(cmd.comp_channel); + uobj = kmalloc(sizeof *uobj, GFP_KERNEL); if (!uobj) return -ENOMEM; @@ -641,7 +613,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, cq->uobject = &uobj->uobject; cq->comp_handler = ib_uverbs_comp_handler; cq->event_handler = ib_uverbs_cq_event_handler; - cq->cq_context = file; + cq->cq_context = ev_file; atomic_set(&cq->usecnt, 0); retry: @@ -700,6 +672,7 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, struct ib_uverbs_destroy_cq_resp resp; struct ib_cq *cq; struct ib_ucq_object *uobj; + struct ib_uverbs_event_file *ev_file; struct ib_uverbs_event *evt, *tmp; u64 user_handle; int ret = -EINVAL; @@ -716,7 +689,8 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, goto out; user_handle = cq->uobject->user_handle; - uobj = container_of(cq->uobject, struct ib_ucq_object, uobject); + uobj = container_of(cq->uobject, struct ib_ucq_object, uobject); + ev_file = cq->cq_context; ret = ib_destroy_cq(cq); if (ret) @@ -728,19 +702,23 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, list_del(&uobj->uobject.list); up(&file->mutex); - spin_lock_irq(&file->comp_file[0].lock); - list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) { - list_del(&evt->list); - kfree(evt); + if (ev_file) { + spin_lock_irq(&ev_file->lock); + list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) { + list_del(&evt->list); + kfree(evt); + } + spin_unlock_irq(&ev_file->lock); + + kref_put(&ev_file->ref, ib_uverbs_release_event_file); } - spin_unlock_irq(&file->comp_file[0].lock); - spin_lock_irq(&file->async_file.lock); + spin_lock_irq(&file->async_file->lock); list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) { list_del(&evt->list); kfree(evt); } - spin_unlock_irq(&file->async_file.lock); + spin_unlock_irq(&file->async_file->lock); resp.comp_events_reported = uobj->comp_events_reported; resp.async_events_reported = uobj->async_events_reported; @@ -1005,12 +983,12 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, list_del(&uobj->uobject.list); up(&file->mutex); - spin_lock_irq(&file->async_file.lock); + spin_lock_irq(&file->async_file->lock); list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { list_del(&evt->list); kfree(evt); } - spin_unlock_irq(&file->async_file.lock); + spin_unlock_irq(&file->async_file->lock); resp.events_reported = uobj->events_reported; @@ -1243,12 +1221,12 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, list_del(&uobj->uobject.list); up(&file->mutex); - spin_lock_irq(&file->async_file.lock); + spin_lock_irq(&file->async_file->lock); list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { list_del(&evt->list); kfree(evt); } - spin_unlock_irq(&file->async_file.lock); + spin_unlock_irq(&file->async_file->lock); resp.events_reported = uobj->events_reported; diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 12511808de2..e7058fbc607 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -77,26 +77,24 @@ static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) = { - [IB_USER_VERBS_CMD_QUERY_PARAMS] = ib_uverbs_query_params, - [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context, - [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device, - [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port, - [IB_USER_VERBS_CMD_QUERY_GID] = ib_uverbs_query_gid, - [IB_USER_VERBS_CMD_QUERY_PKEY] = ib_uverbs_query_pkey, - [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd, - [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd, - [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr, - [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, - [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq, - [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq, - [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp, - [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp, - [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp, - [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast, - [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast, - [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq, - [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, - [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, + [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context, + [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device, + [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port, + [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd, + [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd, + [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr, + [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, + [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel, + [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq, + [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq, + [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp, + [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp, + [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp, + [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast, + [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast, + [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq, + [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, + [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, }; static struct vfsmount *uverbs_event_mnt; @@ -188,25 +186,19 @@ static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf, spin_lock_irq(&file->lock); - while (list_empty(&file->event_list) && file->fd >= 0) { + while (list_empty(&file->event_list)) { spin_unlock_irq(&file->lock); if (filp->f_flags & O_NONBLOCK) return -EAGAIN; if (wait_event_interruptible(file->poll_wait, - !list_empty(&file->event_list) || - file->fd < 0)) + !list_empty(&file->event_list))) return -ERESTARTSYS; spin_lock_irq(&file->lock); } - if (file->fd < 0) { - spin_unlock_irq(&file->lock); - return -ENODEV; - } - event = list_entry(file->event_list.next, struct ib_uverbs_event, list); if (file->is_async) @@ -248,26 +240,19 @@ static unsigned int ib_uverbs_event_poll(struct file *filp, poll_wait(filp, &file->poll_wait, wait); spin_lock_irq(&file->lock); - if (file->fd < 0) - pollflags = POLLERR; - else if (!list_empty(&file->event_list)) + if (!list_empty(&file->event_list)) pollflags = POLLIN | POLLRDNORM; spin_unlock_irq(&file->lock); return pollflags; } -static void ib_uverbs_event_release(struct ib_uverbs_event_file *file) +void ib_uverbs_release_event_file(struct kref *ref) { - struct ib_uverbs_event *entry, *tmp; + struct ib_uverbs_event_file *file = + container_of(ref, struct ib_uverbs_event_file, ref); - spin_lock_irq(&file->lock); - if (file->fd != -1) { - file->fd = -1; - list_for_each_entry_safe(entry, tmp, &file->event_list, list) - kfree(entry); - } - spin_unlock_irq(&file->lock); + kfree(file); } static int ib_uverbs_event_fasync(int fd, struct file *filp, int on) @@ -280,21 +265,30 @@ static int ib_uverbs_event_fasync(int fd, struct file *filp, int on) static int ib_uverbs_event_close(struct inode *inode, struct file *filp) { struct ib_uverbs_event_file *file = filp->private_data; + struct ib_uverbs_event *entry, *tmp; + + spin_lock_irq(&file->lock); + file->file = NULL; + list_for_each_entry_safe(entry, tmp, &file->event_list, list) { + if (entry->counter) + list_del(&entry->obj_list); + kfree(entry); + } + spin_unlock_irq(&file->lock); - ib_uverbs_event_release(file); ib_uverbs_event_fasync(-1, filp, 0); - kref_put(&file->uverbs_file->ref, ib_uverbs_release_file); + + if (file->is_async) { + ib_unregister_event_handler(&file->uverbs_file->event_handler); + kref_put(&file->uverbs_file->ref, ib_uverbs_release_file); + } + kref_put(&file->ref, ib_uverbs_release_event_file); return 0; } static struct file_operations uverbs_event_fops = { - /* - * No .owner field since we artificially create event files, - * so there is no increment to the module reference count in - * the open path. All event files come from a uverbs command - * file, which already takes a module reference, so this is OK. - */ + .owner = THIS_MODULE, .read = ib_uverbs_event_read, .poll = ib_uverbs_event_poll, .release = ib_uverbs_event_close, @@ -303,10 +297,19 @@ static struct file_operations uverbs_event_fops = { void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) { - struct ib_uverbs_file *file = cq_context; - struct ib_ucq_object *uobj; - struct ib_uverbs_event *entry; - unsigned long flags; + struct ib_uverbs_event_file *file = cq_context; + struct ib_ucq_object *uobj; + struct ib_uverbs_event *entry; + unsigned long flags; + + if (!file) + return; + + spin_lock_irqsave(&file->lock, flags); + if (!file->file) { + spin_unlock_irqrestore(&file->lock, flags); + return; + } entry = kmalloc(sizeof *entry, GFP_ATOMIC); if (!entry) @@ -317,13 +320,12 @@ void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) entry->desc.comp.cq_handle = cq->uobject->user_handle; entry->counter = &uobj->comp_events_reported; - spin_lock_irqsave(&file->comp_file[0].lock, flags); - list_add_tail(&entry->list, &file->comp_file[0].event_list); + list_add_tail(&entry->list, &file->event_list); list_add_tail(&entry->obj_list, &uobj->comp_list); - spin_unlock_irqrestore(&file->comp_file[0].lock, flags); + spin_unlock_irqrestore(&file->lock, flags); - wake_up_interruptible(&file->comp_file[0].poll_wait); - kill_fasync(&file->comp_file[0].async_queue, SIGIO, POLL_IN); + wake_up_interruptible(&file->poll_wait); + kill_fasync(&file->async_queue, SIGIO, POLL_IN); } static void ib_uverbs_async_handler(struct ib_uverbs_file *file, @@ -334,6 +336,12 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file, struct ib_uverbs_event *entry; unsigned long flags; + spin_lock_irqsave(&file->async_file->lock, flags); + if (!file->async_file->file) { + spin_unlock_irqrestore(&file->async_file->lock, flags); + return; + } + entry = kmalloc(sizeof *entry, GFP_ATOMIC); if (!entry) return; @@ -342,24 +350,24 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file, entry->desc.async.event_type = event; entry->counter = counter; - spin_lock_irqsave(&file->async_file.lock, flags); - list_add_tail(&entry->list, &file->async_file.event_list); + list_add_tail(&entry->list, &file->async_file->event_list); if (obj_list) list_add_tail(&entry->obj_list, obj_list); - spin_unlock_irqrestore(&file->async_file.lock, flags); + spin_unlock_irqrestore(&file->async_file->lock, flags); - wake_up_interruptible(&file->async_file.poll_wait); - kill_fasync(&file->async_file.async_queue, SIGIO, POLL_IN); + wake_up_interruptible(&file->async_file->poll_wait); + kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN); } void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr) { + struct ib_uverbs_event_file *ev_file = context_ptr; struct ib_ucq_object *uobj; uobj = container_of(event->element.cq->uobject, struct ib_ucq_object, uobject); - ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, + ib_uverbs_async_handler(ev_file->uverbs_file, uobj->uobject.user_handle, event->event, &uobj->async_list, &uobj->async_events_reported); @@ -389,8 +397,8 @@ void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr) &uobj->events_reported); } -static void ib_uverbs_event_handler(struct ib_event_handler *handler, - struct ib_event *event) +void ib_uverbs_event_handler(struct ib_event_handler *handler, + struct ib_event *event) { struct ib_uverbs_file *file = container_of(handler, struct ib_uverbs_file, event_handler); @@ -399,38 +407,90 @@ static void ib_uverbs_event_handler(struct ib_event_handler *handler, NULL, NULL); } -static int ib_uverbs_event_init(struct ib_uverbs_event_file *file, - struct ib_uverbs_file *uverbs_file) +struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, + int is_async, int *fd) { + struct ib_uverbs_event_file *ev_file; struct file *filp; + int ret; - spin_lock_init(&file->lock); - INIT_LIST_HEAD(&file->event_list); - init_waitqueue_head(&file->poll_wait); - file->uverbs_file = uverbs_file; - file->async_queue = NULL; - - file->fd = get_unused_fd(); - if (file->fd < 0) - return file->fd; + ev_file = kmalloc(sizeof *ev_file, GFP_KERNEL); + if (!ev_file) + return ERR_PTR(-ENOMEM); + + kref_init(&ev_file->ref); + spin_lock_init(&ev_file->lock); + INIT_LIST_HEAD(&ev_file->event_list); + init_waitqueue_head(&ev_file->poll_wait); + ev_file->uverbs_file = uverbs_file; + ev_file->async_queue = NULL; + ev_file->is_async = is_async; + + *fd = get_unused_fd(); + if (*fd < 0) { + ret = *fd; + goto err; + } filp = get_empty_filp(); if (!filp) { - put_unused_fd(file->fd); - return -ENFILE; + ret = -ENFILE; + goto err_fd; } - filp->f_op = &uverbs_event_fops; + ev_file->file = filp; + + /* + * fops_get() can't fail here, because we're coming from a + * system call on a uverbs file, which will already have a + * module reference. + */ + filp->f_op = fops_get(&uverbs_event_fops); filp->f_vfsmnt = mntget(uverbs_event_mnt); filp->f_dentry = dget(uverbs_event_mnt->mnt_root); filp->f_mapping = filp->f_dentry->d_inode->i_mapping; filp->f_flags = O_RDONLY; filp->f_mode = FMODE_READ; - filp->private_data = file; + filp->private_data = ev_file; - fd_install(file->fd, filp); + return filp; - return 0; +err_fd: + put_unused_fd(*fd); + +err: + kfree(ev_file); + return ERR_PTR(ret); +} + +/* + * Look up a completion event file by FD. If lookup is successful, + * takes a ref to the event file struct that it returns; if + * unsuccessful, returns NULL. + */ +struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd) +{ + struct ib_uverbs_event_file *ev_file = NULL; + struct file *filp; + + filp = fget(fd); + if (!filp) + return NULL; + + if (filp->f_op != &uverbs_event_fops) + goto out; + + ev_file = filp->private_data; + if (ev_file->is_async) { + ev_file = NULL; + goto out; + } + + kref_get(&ev_file->ref); + +out: + fput(filp); + return ev_file; } static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, @@ -453,8 +513,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, !uverbs_cmd_table[hdr.command]) return -EINVAL; - if (!file->ucontext && - hdr.command != IB_USER_VERBS_CMD_QUERY_PARAMS && + if (!file->ucontext && hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT) return -EINVAL; @@ -477,82 +536,33 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) struct ib_uverbs_device *dev = container_of(inode->i_cdev, struct ib_uverbs_device, dev); struct ib_uverbs_file *file; - int i = 0; - int ret; if (!try_module_get(dev->ib_dev->owner)) return -ENODEV; - file = kmalloc(sizeof *file + - (dev->num_comp - 1) * sizeof (struct ib_uverbs_event_file), - GFP_KERNEL); + file = kmalloc(sizeof *file, GFP_KERNEL); if (!file) { - ret = -ENOMEM; - goto err; + module_put(dev->ib_dev->owner); + return -ENOMEM; } - file->device = dev; + file->device = dev; + file->ucontext = NULL; kref_init(&file->ref); init_MUTEX(&file->mutex); - file->ucontext = NULL; - - kref_get(&file->ref); - ret = ib_uverbs_event_init(&file->async_file, file); - if (ret) - goto err_kref; - - file->async_file.is_async = 1; - - for (i = 0; i < dev->num_comp; ++i) { - kref_get(&file->ref); - ret = ib_uverbs_event_init(&file->comp_file[i], file); - if (ret) - goto err_async; - file->comp_file[i].is_async = 0; - } - - filp->private_data = file; - INIT_IB_EVENT_HANDLER(&file->event_handler, dev->ib_dev, - ib_uverbs_event_handler); - if (ib_register_event_handler(&file->event_handler)) - goto err_async; - return 0; - -err_async: - while (i--) - ib_uverbs_event_release(&file->comp_file[i]); - - ib_uverbs_event_release(&file->async_file); - -err_kref: - /* - * One extra kref_put() because we took a reference before the - * event file creation that failed and got us here. - */ - kref_put(&file->ref, ib_uverbs_release_file); - kref_put(&file->ref, ib_uverbs_release_file); - -err: - module_put(dev->ib_dev->owner); - return ret; } static int ib_uverbs_close(struct inode *inode, struct file *filp) { struct ib_uverbs_file *file = filp->private_data; - int i; - ib_unregister_event_handler(&file->event_handler); - ib_uverbs_event_release(&file->async_file); ib_dealloc_ucontext(file->ucontext); - for (i = 0; i < file->device->num_comp; ++i) - ib_uverbs_event_release(&file->comp_file[i]); - + kref_put(&file->async_file->ref, ib_uverbs_release_event_file); kref_put(&file->ref, ib_uverbs_release_file); return 0; @@ -631,8 +641,8 @@ static void ib_uverbs_add_one(struct ib_device *device) set_bit(uverbs_dev->devnum, dev_map); spin_unlock(&map_lock); - uverbs_dev->ib_dev = device; - uverbs_dev->num_comp = 1; + uverbs_dev->ib_dev = device; + uverbs_dev->num_comp_vectors = 1; if (device->mmap) cdev_init(&uverbs_dev->dev, &uverbs_mmap_fops); diff --git a/include/rdma/ib_user_verbs.h b/include/rdma/ib_user_verbs.h index fd85725391a..0532b78200c 100644 --- a/include/rdma/ib_user_verbs.h +++ b/include/rdma/ib_user_verbs.h @@ -42,15 +42,12 @@ * Increment this value if any changes that break userspace ABI * compatibility are made. */ -#define IB_USER_VERBS_ABI_VERSION 2 +#define IB_USER_VERBS_ABI_VERSION 3 enum { - IB_USER_VERBS_CMD_QUERY_PARAMS, IB_USER_VERBS_CMD_GET_CONTEXT, IB_USER_VERBS_CMD_QUERY_DEVICE, IB_USER_VERBS_CMD_QUERY_PORT, - IB_USER_VERBS_CMD_QUERY_GID, - IB_USER_VERBS_CMD_QUERY_PKEY, IB_USER_VERBS_CMD_ALLOC_PD, IB_USER_VERBS_CMD_DEALLOC_PD, IB_USER_VERBS_CMD_CREATE_AH, @@ -65,6 +62,7 @@ enum { IB_USER_VERBS_CMD_ALLOC_MW, IB_USER_VERBS_CMD_BIND_MW, IB_USER_VERBS_CMD_DEALLOC_MW, + IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL, IB_USER_VERBS_CMD_CREATE_CQ, IB_USER_VERBS_CMD_RESIZE_CQ, IB_USER_VERBS_CMD_DESTROY_CQ, @@ -118,27 +116,14 @@ struct ib_uverbs_cmd_hdr { __u16 out_words; }; -/* - * No driver_data for "query params" command, since this is intended - * to be a core function with no possible device dependence. - */ -struct ib_uverbs_query_params { - __u64 response; -}; - -struct ib_uverbs_query_params_resp { - __u32 num_cq_events; -}; - struct ib_uverbs_get_context { __u64 response; - __u64 cq_fd_tab; __u64 driver_data[0]; }; struct ib_uverbs_get_context_resp { __u32 async_fd; - __u32 reserved; + __u32 num_comp_vectors; }; struct ib_uverbs_query_device { @@ -220,31 +205,6 @@ struct ib_uverbs_query_port_resp { __u8 reserved[3]; }; -struct ib_uverbs_query_gid { - __u64 response; - __u8 port_num; - __u8 index; - __u8 reserved[6]; - __u64 driver_data[0]; -}; - -struct ib_uverbs_query_gid_resp { - __u8 gid[16]; -}; - -struct ib_uverbs_query_pkey { - __u64 response; - __u8 port_num; - __u8 index; - __u8 reserved[6]; - __u64 driver_data[0]; -}; - -struct ib_uverbs_query_pkey_resp { - __u16 pkey; - __u16 reserved; -}; - struct ib_uverbs_alloc_pd { __u64 response; __u64 driver_data[0]; @@ -278,11 +238,21 @@ struct ib_uverbs_dereg_mr { __u32 mr_handle; }; +struct ib_uverbs_create_comp_channel { + __u64 response; +}; + +struct ib_uverbs_create_comp_channel_resp { + __u32 fd; +}; + struct ib_uverbs_create_cq { __u64 response; __u64 user_handle; __u32 cqe; - __u32 event_handler; + __u32 comp_vector; + __s32 comp_channel; + __u32 reserved; __u64 driver_data[0]; }; -- cgit From eb9d3cd5ce2828fbb99ed7270089ea152c8f64b3 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 27 Sep 2005 15:07:25 -0700 Subject: [IB] uverbs: Fix up resource creation error paths By waiting to add resources to our lists until after the last operation that can fail, we don't have to remove them from their lists in the error path. Also, we should hold the idr mutex until we know whether resource creation has succeed or failed, to avoid someone finding a resource in our table before we're ready. Loosely based on work by Robert Walsh . Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs_cmd.c | 111 ++++++++++++++++------------------- 1 file changed, 50 insertions(+), 61 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 79b60c3dc8d..a0ceb1cbed5 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -288,24 +289,20 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, pd->uobject = uobj; atomic_set(&pd->usecnt, 0); + down(&ib_uverbs_idr_mutex); + retry: if (!idr_pre_get(&ib_uverbs_pd_idr, GFP_KERNEL)) { ret = -ENOMEM; - goto err_pd; + goto err_up; } - down(&ib_uverbs_idr_mutex); ret = idr_get_new(&ib_uverbs_pd_idr, pd, &uobj->id); - up(&ib_uverbs_idr_mutex); if (ret == -EAGAIN) goto retry; if (ret) - goto err_pd; - - down(&file->mutex); - list_add_tail(&uobj->list, &file->ucontext->pd_list); - up(&file->mutex); + goto err_up; memset(&resp, 0, sizeof resp); resp.pd_handle = uobj->id; @@ -313,21 +310,22 @@ retry: if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_list; + goto err_idr; } - return in_len; - -err_list: - down(&file->mutex); - list_del(&uobj->list); + down(&file->mutex); + list_add_tail(&uobj->list, &file->ucontext->pd_list); up(&file->mutex); - down(&ib_uverbs_idr_mutex); - idr_remove(&ib_uverbs_pd_idr, uobj->id); up(&ib_uverbs_idr_mutex); -err_pd: + return in_len; + +err_idr: + idr_remove(&ib_uverbs_pd_idr, uobj->id); + +err_up: + up(&ib_uverbs_idr_mutex); ib_dealloc_pd(pd); err: @@ -463,24 +461,22 @@ retry: resp.mr_handle = obj->uobject.id; - down(&file->mutex); - list_add_tail(&obj->uobject.list, &file->ucontext->mr_list); - up(&file->mutex); - if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_list; + goto err_idr; } + down(&file->mutex); + list_add_tail(&obj->uobject.list, &file->ucontext->mr_list); + up(&file->mutex); + up(&ib_uverbs_idr_mutex); return in_len; -err_list: - down(&file->mutex); - list_del(&obj->uobject.list); - up(&file->mutex); +err_idr: + idr_remove(&ib_uverbs_mr_idr, obj->uobject.id); err_unreg: ib_dereg_mr(mr); @@ -616,24 +612,20 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, cq->cq_context = ev_file; atomic_set(&cq->usecnt, 0); + down(&ib_uverbs_idr_mutex); + retry: if (!idr_pre_get(&ib_uverbs_cq_idr, GFP_KERNEL)) { ret = -ENOMEM; - goto err_cq; + goto err_up; } - down(&ib_uverbs_idr_mutex); ret = idr_get_new(&ib_uverbs_cq_idr, cq, &uobj->uobject.id); - up(&ib_uverbs_idr_mutex); if (ret == -EAGAIN) goto retry; if (ret) - goto err_cq; - - down(&file->mutex); - list_add_tail(&uobj->uobject.list, &file->ucontext->cq_list); - up(&file->mutex); + goto err_up; memset(&resp, 0, sizeof resp); resp.cq_handle = uobj->uobject.id; @@ -642,21 +634,22 @@ retry: if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_list; + goto err_idr; } - return in_len; - -err_list: - down(&file->mutex); - list_del(&uobj->uobject.list); + down(&file->mutex); + list_add_tail(&uobj->uobject.list, &file->ucontext->cq_list); up(&file->mutex); - down(&ib_uverbs_idr_mutex); - idr_remove(&ib_uverbs_cq_idr, uobj->uobject.id); up(&ib_uverbs_idr_mutex); -err_cq: + return in_len; + +err_idr: + idr_remove(&ib_uverbs_cq_idr, uobj->uobject.id); + +err_up: + up(&ib_uverbs_idr_mutex); ib_destroy_cq(cq); err: @@ -837,24 +830,22 @@ retry: resp.qp_handle = uobj->uobject.id; - down(&file->mutex); - list_add_tail(&uobj->uobject.list, &file->ucontext->qp_list); - up(&file->mutex); - if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_list; + goto err_idr; } + down(&file->mutex); + list_add_tail(&uobj->uobject.list, &file->ucontext->qp_list); + up(&file->mutex); + up(&ib_uverbs_idr_mutex); return in_len; -err_list: - down(&file->mutex); - list_del(&uobj->uobject.list); - up(&file->mutex); +err_idr: + idr_remove(&ib_uverbs_qp_idr, uobj->uobject.id); err_destroy: ib_destroy_qp(qp); @@ -1126,24 +1117,22 @@ retry: resp.srq_handle = uobj->uobject.id; - down(&file->mutex); - list_add_tail(&uobj->uobject.list, &file->ucontext->srq_list); - up(&file->mutex); - if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_list; + goto err_idr; } + down(&file->mutex); + list_add_tail(&uobj->uobject.list, &file->ucontext->srq_list); + up(&file->mutex); + up(&ib_uverbs_idr_mutex); return in_len; -err_list: - down(&file->mutex); - list_del(&uobj->uobject.list); - up(&file->mutex); +err_idr: + idr_remove(&ib_uverbs_srq_idr, uobj->uobject.id); err_destroy: ib_destroy_srq(srq); -- cgit From 274c0891637c44ac71f3ac40be91b43c2318883a Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 29 Sep 2005 14:17:48 -0700 Subject: [IB] uverbs: Add device-specific ABI version attribute Add abi_version attribute to uverbs class devices to allow for ABI versioning of device-specific interfaces. Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs_main.c | 11 +++++++++++ drivers/infiniband/hw/mthca/mthca_provider.c | 1 + drivers/infiniband/hw/mthca/mthca_user.h | 6 ++++++ include/rdma/ib_verbs.h | 2 ++ 4 files changed, 20 insertions(+) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index e7058fbc607..c84cc28b60e 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -598,6 +598,15 @@ static ssize_t show_ibdev(struct class_device *class_dev, char *buf) } static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); +static ssize_t show_dev_abi_version(struct class_device *class_dev, char *buf) +{ + struct ib_uverbs_device *dev = + container_of(class_dev, struct ib_uverbs_device, class_dev); + + return sprintf(buf, "%d\n", dev->ib_dev->uverbs_abi_ver); +} +static CLASS_DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL); + static void ib_uverbs_release_class_dev(struct class_device *class_dev) { struct ib_uverbs_device *dev = @@ -662,6 +671,8 @@ static void ib_uverbs_add_one(struct ib_device *device) if (class_device_create_file(&uverbs_dev->class_dev, &class_device_attr_ibdev)) goto err_class; + if (class_device_create_file(&uverbs_dev->class_dev, &class_device_attr_abi_version)) + goto err_class; ib_set_client_data(device, &uverbs_client, uverbs_dev); diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 141509dced7..939b09dcfe0 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -1060,6 +1060,7 @@ int mthca_register_device(struct mthca_dev *dev) strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX); dev->ib_dev.owner = THIS_MODULE; + dev->ib_dev.uverbs_abi_ver = MTHCA_UVERBS_ABI_VERSION; dev->ib_dev.node_type = IB_NODE_CA; dev->ib_dev.phys_port_cnt = dev->limits.num_ports; dev->ib_dev.dma_device = &dev->pdev->dev; diff --git a/drivers/infiniband/hw/mthca/mthca_user.h b/drivers/infiniband/hw/mthca/mthca_user.h index 41613ec8a04..bb015c6494c 100644 --- a/drivers/infiniband/hw/mthca/mthca_user.h +++ b/drivers/infiniband/hw/mthca/mthca_user.h @@ -37,6 +37,12 @@ #include +/* + * Increment this value if any changes that break userspace ABI + * compatibility are made. + */ +#define MTHCA_UVERBS_ABI_VERSION 1 + /* * Make sure that all structs defined in this file remain laid out so * that they pack the same way on 32-bit and 64-bit architectures (to diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index e6f4c9e55df..d13f25a7819 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -951,6 +951,8 @@ struct ib_device { IB_DEV_UNREGISTERED } reg_state; + int uverbs_abi_ver; + u8 node_type; u8 phys_port_cnt; }; -- cgit From 9825051e8c81cbd4400333e7cdc04be77a0bab75 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Sat, 1 Oct 2005 13:09:44 -0700 Subject: [IB] mthca: Fill in more fields in query_port method Add code to fill in the bad_pkey_cntr, max_mtu, active_mtu and subnet_timeout fields in mthca_query_port(). Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_provider.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 939b09dcfe0..4e87ad11571 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -152,9 +152,13 @@ static int mthca_query_port(struct ib_device *ibdev, props->gid_tbl_len = to_mdev(ibdev)->limits.gid_table_len; props->max_msg_sz = 0x80000000; props->pkey_tbl_len = to_mdev(ibdev)->limits.pkey_table_len; + props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46)); props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48)); props->active_width = out_mad->data[31] & 0xf; props->active_speed = out_mad->data[35] >> 4; + props->max_mtu = out_mad->data[41] & 0xf; + props->active_mtu = out_mad->data[36] >> 4; + props->subnet_timeout = out_mad->data[51] & 0x1f; out: kfree(in_mad); -- cgit From f575394f1de9b4afa4b474f1882c7e2d3b8e51d7 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 3 Oct 2005 09:18:02 -0700 Subject: [IB] uverbs: reject invalid memory registration permission flags Reject userspace memory registrations with invalid permission flags: "local write" is required if "remote write" or "remote atomic" is also requested. Pointed out by Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs_cmd.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index a0ceb1cbed5..2e959acf2ff 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -396,6 +396,14 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)) return -EINVAL; + /* + * Local write permission is required if remote write or + * remote atomic permission is also requested. + */ + if (cmd.access_flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) && + !(cmd.access_flags & IB_ACCESS_LOCAL_WRITE)) + return -EINVAL; + obj = kmalloc(sizeof *obj, GFP_KERNEL); if (!obj) return -ENOMEM; -- cgit From 116c0074ecfd6f061570856bec52b691d54dbd3c Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 3 Oct 2005 09:32:33 -0700 Subject: [IB] Check port number in ib_query_port()/ib_modify_port() Check port number before passing query_port or modify_port operations on to device driver. Signed-off-by: Roland Dreier --- drivers/infiniband/core/device.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index d3cf84e0158..5a6e4497640 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -514,6 +514,12 @@ int ib_query_port(struct ib_device *device, u8 port_num, struct ib_port_attr *port_attr) { + if (device->node_type == IB_NODE_SWITCH) { + if (port_num) + return -EINVAL; + } else if (port_num < 1 || port_num > device->phys_port_cnt) + return -EINVAL; + return device->query_port(device, port_num, port_attr); } EXPORT_SYMBOL(ib_query_port); @@ -583,6 +589,12 @@ int ib_modify_port(struct ib_device *device, u8 port_num, int port_modify_mask, struct ib_port_modify *port_modify) { + if (device->node_type == IB_NODE_SWITCH) { + if (port_num) + return -EINVAL; + } else if (port_num < 1 || port_num > device->phys_port_cnt) + return -EINVAL; + return device->modify_port(device, port_num, port_modify_mask, port_modify); } -- cgit From 90f104da22bbf2e2b8a2136c12fb4e013fccf504 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 6 Oct 2005 13:15:56 -0700 Subject: [IB] mthca: SRQ limit reached events Our hardware supports generating an event when the number of receives posted to a shared receive queue (SRQ) falls below a user-specified limit. Implement mthca_modify_srq() to arm the limit, and add code to handle dispatching SRQ events when they occur. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_dev.h | 2 ++ drivers/infiniband/hw/mthca/mthca_eq.c | 21 ++++++++++++++++++--- drivers/infiniband/hw/mthca/mthca_provider.c | 1 + drivers/infiniband/hw/mthca/mthca_srq.c | 25 ++++++++++++++++++++++++- 4 files changed, 45 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 67a2f324a27..6e18c128af4 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -448,6 +448,8 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn, int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, struct ib_srq_attr *attr, struct mthca_srq *srq); void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq); +int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask); void mthca_srq_event(struct mthca_dev *dev, u32 srqn, enum ib_event_type event_type); void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr); diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c index c81fa8e975e..f2afdc6c7e6 100644 --- a/drivers/infiniband/hw/mthca/mthca_eq.c +++ b/drivers/infiniband/hw/mthca/mthca_eq.c @@ -83,7 +83,8 @@ enum { MTHCA_EVENT_TYPE_PATH_MIG = 0x01, MTHCA_EVENT_TYPE_COMM_EST = 0x02, MTHCA_EVENT_TYPE_SQ_DRAINED = 0x03, - MTHCA_EVENT_TYPE_SRQ_LAST_WQE = 0x13, + MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE = 0x13, + MTHCA_EVENT_TYPE_SRQ_LIMIT = 0x14, MTHCA_EVENT_TYPE_CQ_ERROR = 0x04, MTHCA_EVENT_TYPE_WQ_CATAS_ERROR = 0x05, MTHCA_EVENT_TYPE_EEC_CATAS_ERROR = 0x06, @@ -110,8 +111,9 @@ enum { (1ULL << MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR) | \ (1ULL << MTHCA_EVENT_TYPE_PORT_CHANGE) | \ (1ULL << MTHCA_EVENT_TYPE_ECC_DETECT)) -#define MTHCA_SRQ_EVENT_MASK (1ULL << MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR) | \ - (1ULL << MTHCA_EVENT_TYPE_SRQ_LAST_WQE) +#define MTHCA_SRQ_EVENT_MASK ((1ULL << MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR) | \ + (1ULL << MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE) | \ + (1ULL << MTHCA_EVENT_TYPE_SRQ_LIMIT)) #define MTHCA_CMD_EVENT_MASK (1ULL << MTHCA_EVENT_TYPE_CMD) #define MTHCA_EQ_DB_INC_CI (1 << 24) @@ -141,6 +143,9 @@ struct mthca_eqe { struct { __be32 qpn; } __attribute__((packed)) qp; + struct { + __be32 srqn; + } __attribute__((packed)) srq; struct { __be32 cqn; u32 reserved1; @@ -305,6 +310,16 @@ static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq) IB_EVENT_SQ_DRAINED); break; + case MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE: + mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff, + IB_EVENT_QP_LAST_WQE_REACHED); + break; + + case MTHCA_EVENT_TYPE_SRQ_LIMIT: + mthca_srq_event(dev, be32_to_cpu(eqe->event.srq.srqn) & 0xffffff, + IB_EVENT_SRQ_LIMIT_REACHED); + break; + case MTHCA_EVENT_TYPE_WQ_CATAS_ERROR: mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff, IB_EVENT_QP_FATAL); diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 4e87ad11571..53b29a0841b 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -1084,6 +1084,7 @@ int mthca_register_device(struct mthca_dev *dev) if (dev->mthca_flags & MTHCA_FLAG_SRQ) { dev->ib_dev.create_srq = mthca_create_srq; + dev->ib_dev.modify_srq = mthca_modify_srq; dev->ib_dev.destroy_srq = mthca_destroy_srq; if (mthca_is_memfree(dev)) diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c index 18998d48c53..13d2290261d 100644 --- a/drivers/infiniband/hw/mthca/mthca_srq.c +++ b/drivers/infiniband/hw/mthca/mthca_srq.c @@ -332,6 +332,29 @@ void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq) mthca_free_mailbox(dev, mailbox); } +int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask) +{ + struct mthca_dev *dev = to_mdev(ibsrq->device); + struct mthca_srq *srq = to_msrq(ibsrq); + int ret; + u8 status; + + /* We don't support resizing SRQs (yet?) */ + if (attr_mask & IB_SRQ_MAX_WR) + return -EINVAL; + + if (attr_mask & IB_SRQ_LIMIT) { + ret = mthca_ARM_SRQ(dev, srq->srqn, attr->srq_limit, &status); + if (ret) + return ret; + if (status) + return -EINVAL; + } + + return 0; +} + void mthca_srq_event(struct mthca_dev *dev, u32 srqn, enum ib_event_type event_type) { @@ -354,7 +377,7 @@ void mthca_srq_event(struct mthca_dev *dev, u32 srqn, event.device = &dev->ib_dev; event.event = event_type; - event.element.srq = &srq->ibsrq; + event.element.srq = &srq->ibsrq; srq->ibsrq.event_handler(&event, srq->ibsrq.srq_context); out: -- cgit From e23d6d2b090658007732770720a44375cba23200 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 6 Oct 2005 13:25:16 -0700 Subject: [IB] mthca: detect SRQ overflow The hardware relies on us keeping one extra work request that never gets used in SRQs. Add checks to the SRQ work request posting functions so that they fail when someone is about to use up that extra work request, rather than when someone uses the very last work request. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_srq.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c index 13d2290261d..e464321a7aa 100644 --- a/drivers/infiniband/hw/mthca/mthca_srq.c +++ b/drivers/infiniband/hw/mthca/mthca_srq.c @@ -438,6 +438,14 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, wqe = get_wqe(srq, ind); next_ind = *wqe_to_link(wqe); + + if (next_ind < 0) { + mthca_err(dev, "SRQ %06x full\n", srq->srqn); + err = -ENOMEM; + *bad_wr = wr; + break; + } + prev_wqe = srq->last; srq->last = wqe; @@ -529,6 +537,13 @@ int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, wqe = get_wqe(srq, ind); next_ind = *wqe_to_link(wqe); + if (next_ind < 0) { + mthca_err(dev, "SRQ %06x full\n", srq->srqn); + err = -ENOMEM; + *bad_wr = wr; + break; + } + ((struct mthca_next_seg *) wqe)->nda_op = cpu_to_be32((next_ind << srq->wqe_shift) | 1); ((struct mthca_next_seg *) wqe)->ee_nds = 0; -- cgit From 4ab6fb7e5b3d34b65a1c3473d80d9d1a462d3a49 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 6 Oct 2005 13:28:16 -0700 Subject: [IB] Fix leak on MAD initialization failure There is a bug in ib_mad_init_device(): if ib_agent_port_open() fails for a given port, then the current code doesn't call ib_mad_port_close() for that port. Signed-off-by: Roland Dreier --- drivers/infiniband/core/mad.c | 45 +++++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index a14ca87fda1..af302e83056 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -2683,40 +2683,47 @@ static int ib_mad_port_close(struct ib_device *device, int port_num) static void ib_mad_init_device(struct ib_device *device) { - int num_ports, cur_port, i; + int start, end, i; if (device->node_type == IB_NODE_SWITCH) { - num_ports = 1; - cur_port = 0; + start = 0; + end = 0; } else { - num_ports = device->phys_port_cnt; - cur_port = 1; + start = 1; + end = device->phys_port_cnt; } - for (i = 0; i < num_ports; i++, cur_port++) { - if (ib_mad_port_open(device, cur_port)) { + + for (i = start; i <= end; i++) { + if (ib_mad_port_open(device, i)) { printk(KERN_ERR PFX "Couldn't open %s port %d\n", - device->name, cur_port); - goto error_device_open; + device->name, i); + goto error; } - if (ib_agent_port_open(device, cur_port)) { + if (ib_agent_port_open(device, i)) { printk(KERN_ERR PFX "Couldn't open %s port %d " "for agents\n", - device->name, cur_port); - goto error_device_open; + device->name, i); + goto error_agent; } } return; -error_device_open: - while (i > 0) { - cur_port--; - if (ib_agent_port_close(device, cur_port)) +error_agent: + if (ib_mad_port_close(device, i)) + printk(KERN_ERR PFX "Couldn't close %s port %d\n", + device->name, i); + +error: + i--; + + while (i >= start) { + if (ib_agent_port_close(device, i)) printk(KERN_ERR PFX "Couldn't close %s port %d " "for agents\n", - device->name, cur_port); - if (ib_mad_port_close(device, cur_port)) + device->name, i); + if (ib_mad_port_close(device, i)) printk(KERN_ERR PFX "Couldn't close %s port %d\n", - device->name, cur_port); + device->name, i); i--; } } -- cgit From efaae8f71f3088cc73c9e5ceabbd314aa82ac768 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Mon, 10 Oct 2005 13:48:07 -0700 Subject: [IB] mthca: Better limit checking and reporting Check the sizes of CQs, QPs and SRQs when creating objects, and fail instead of creating too-big queues. Also return real limits instead of just plausible-sounding values from mthca_query_device(). Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_cmd.c | 6 ++++-- drivers/infiniband/hw/mthca/mthca_dev.h | 6 ++++++ drivers/infiniband/hw/mthca/mthca_main.c | 9 +++++++++ drivers/infiniband/hw/mthca/mthca_mcg.c | 4 ---- drivers/infiniband/hw/mthca/mthca_provider.c | 18 +++++++++++++++--- drivers/infiniband/hw/mthca/mthca_qp.c | 6 ++++-- drivers/infiniband/hw/mthca/mthca_srq.c | 3 ++- 7 files changed, 40 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index f6a8ac02655..1bd7dc8f778 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -933,9 +933,9 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, goto out; MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET); - dev_lim->max_srq_sz = 1 << field; + dev_lim->max_srq_sz = (1 << field) - 1; MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_SZ_OFFSET); - dev_lim->max_qp_sz = 1 << field; + dev_lim->max_qp_sz = (1 << field) - 1; MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_QP_OFFSET); dev_lim->reserved_qps = 1 << (field & 0xf); MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_OFFSET); @@ -1045,6 +1045,8 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, dev_lim->max_pds, dev_lim->reserved_pds, dev_lim->reserved_uars); mthca_dbg(dev, "Max QP/MCG: %d, reserved MGMs: %d\n", dev_lim->max_pds, dev_lim->reserved_mgms); + mthca_dbg(dev, "Max CQEs: %d, max WQEs: %d, max SRQ WQEs: %d\n", + dev_lim->max_cq_sz, dev_lim->max_qp_sz, dev_lim->max_srq_sz); mthca_dbg(dev, "Flags: %08x\n", dev_lim->flags); diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 6e18c128af4..f106bac0f92 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -83,6 +83,8 @@ enum { /* Arbel FW gives us these, but we need them for Tavor */ MTHCA_MPT_ENTRY_SIZE = 0x40, MTHCA_MTT_SEG_SIZE = 0x40, + + MTHCA_QP_PER_MGM = 4 * (MTHCA_MGM_ENTRY_SIZE / 16 - 2) }; enum { @@ -128,12 +130,16 @@ struct mthca_limits { int num_uars; int max_sg; int num_qps; + int max_wqes; + int max_qp_init_rdma; int reserved_qps; int num_srqs; + int max_srq_wqes; int reserved_srqs; int num_eecs; int reserved_eecs; int num_cqs; + int max_cqes; int reserved_cqs; int num_eqs; int reserved_eqs; diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index 576e7fcd053..7c35abec81e 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -162,9 +162,18 @@ static int __devinit mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim mdev->limits.pkey_table_len = dev_lim->max_pkeys; mdev->limits.local_ca_ack_delay = dev_lim->local_ca_ack_delay; mdev->limits.max_sg = dev_lim->max_sg; + mdev->limits.max_wqes = dev_lim->max_qp_sz; + mdev->limits.max_qp_init_rdma = dev_lim->max_requester_per_qp; mdev->limits.reserved_qps = dev_lim->reserved_qps; + mdev->limits.max_srq_wqes = dev_lim->max_srq_sz; mdev->limits.reserved_srqs = dev_lim->reserved_srqs; mdev->limits.reserved_eecs = dev_lim->reserved_eecs; + /* + * Subtract 1 from the limit because we need to allocate a + * spare CQE so the HCA HW can tell the difference between an + * empty CQ and a full CQ. + */ + mdev->limits.max_cqes = dev_lim->max_cq_sz - 1; mdev->limits.reserved_cqs = dev_lim->reserved_cqs; mdev->limits.reserved_eqs = dev_lim->reserved_eqs; mdev->limits.reserved_mtts = dev_lim->reserved_mtts; diff --git a/drivers/infiniband/hw/mthca/mthca_mcg.c b/drivers/infiniband/hw/mthca/mthca_mcg.c index a2707605f4c..9a0612a9d30 100644 --- a/drivers/infiniband/hw/mthca/mthca_mcg.c +++ b/drivers/infiniband/hw/mthca/mthca_mcg.c @@ -37,10 +37,6 @@ #include "mthca_dev.h" #include "mthca_cmd.h" -enum { - MTHCA_QP_PER_MGM = 4 * (MTHCA_MGM_ENTRY_SIZE / 16 - 2) -}; - struct mthca_mgm { __be32 next_gid_index; u32 reserved[3]; diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 53b29a0841b..46864d18827 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -90,17 +90,26 @@ static int mthca_query_device(struct ib_device *ibdev, props->max_mr_size = ~0ull; props->max_qp = mdev->limits.num_qps - mdev->limits.reserved_qps; - props->max_qp_wr = 0xffff; + props->max_qp_wr = mdev->limits.max_wqes; props->max_sge = mdev->limits.max_sg; props->max_cq = mdev->limits.num_cqs - mdev->limits.reserved_cqs; - props->max_cqe = 0xffff; + props->max_cqe = mdev->limits.max_cqes; props->max_mr = mdev->limits.num_mpts - mdev->limits.reserved_mrws; props->max_pd = mdev->limits.num_pds - mdev->limits.reserved_pds; props->max_qp_rd_atom = 1 << mdev->qp_table.rdb_shift; - props->max_qp_init_rd_atom = 1 << mdev->qp_table.rdb_shift; + props->max_qp_init_rd_atom = mdev->limits.max_qp_init_rdma; + props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; + props->max_srq = mdev->limits.num_srqs - mdev->limits.reserved_srqs; + props->max_srq_wr = mdev->limits.max_srq_wqes; + props->max_srq_sge = mdev->limits.max_sg; props->local_ca_ack_delay = mdev->limits.local_ca_ack_delay; props->atomic_cap = mdev->limits.flags & DEV_LIM_FLAG_ATOMIC ? IB_ATOMIC_HCA : IB_ATOMIC_NONE; + props->max_pkeys = mdev->limits.pkey_table_len; + props->max_mcast_grp = mdev->limits.num_mgms + mdev->limits.num_amgms; + props->max_mcast_qp_attach = MTHCA_QP_PER_MGM; + props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * + props->max_mcast_grp; err = 0; out: @@ -640,6 +649,9 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries, int nent; int err; + if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes) + return ERR_PTR(-EINVAL); + if (context) { if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) return ERR_PTR(-EFAULT); diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 5fa00669f9b..2ee0a2b0fd4 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -1112,8 +1112,10 @@ static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap, struct mthca_qp *qp) { /* Sanity check QP size before proceeding */ - if (cap->max_send_wr > 65536 || cap->max_recv_wr > 65536 || - cap->max_send_sge > 64 || cap->max_recv_sge > 64) + if (cap->max_send_wr > dev->limits.max_wqes || + cap->max_recv_wr > dev->limits.max_wqes || + cap->max_send_sge > dev->limits.max_sg || + cap->max_recv_sge > dev->limits.max_sg) return -EINVAL; if (mthca_is_memfree(dev)) { diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c index e464321a7aa..64f70aa1b3c 100644 --- a/drivers/infiniband/hw/mthca/mthca_srq.c +++ b/drivers/infiniband/hw/mthca/mthca_srq.c @@ -186,7 +186,8 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, int err; /* Sanity check SRQ size before proceeding */ - if (attr->max_wr > 16 << 20 || attr->max_sge > 64) + if (attr->max_wr > dev->limits.max_srq_wqes || + attr->max_sge > dev->limits.max_sg) return -EINVAL; srq->max = attr->max_wr; -- cgit From 5b6810e048435de508ef66aebd6b78db13d651b8 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 11 Oct 2005 11:08:24 -0700 Subject: [IPoIB] Rename ipoib_create_qp() -> ipoib_init_qp() and fix error cleanup ipoib_create_qp() no longer creates IPoIB's QP, so it shouldn't destroy the QP on failure -- that unwinding happens elsewhere, so the current code can cause a double free. While we're at it, the function's name should match what it actually does, so rename it to ipoib_init_qp(). Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib.h | 2 +- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 4 ++-- drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 9 +++++---- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 4ea1c1ca85b..6b14bd1c60a 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -277,7 +277,7 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid, int ipoib_mcast_detach(struct net_device *dev, u16 mlid, union ib_gid *mgid); -int ipoib_qp_create(struct net_device *dev); +int ipoib_init_qp(struct net_device *dev); int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca); void ipoib_transport_dev_cleanup(struct net_device *dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index f7440096b5e..02d0e000657 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -387,9 +387,9 @@ int ipoib_ib_dev_open(struct net_device *dev) struct ipoib_dev_priv *priv = netdev_priv(dev); int ret; - ret = ipoib_qp_create(dev); + ret = ipoib_init_qp(dev); if (ret) { - ipoib_warn(priv, "ipoib_qp_create returned %d\n", ret); + ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret); return -1; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 79f59d0563e..b5902a7ec24 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -92,7 +92,7 @@ int ipoib_mcast_detach(struct net_device *dev, u16 mlid, union ib_gid *mgid) return ret; } -int ipoib_qp_create(struct net_device *dev) +int ipoib_init_qp(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); int ret; @@ -149,10 +149,11 @@ int ipoib_qp_create(struct net_device *dev) return 0; out_fail: - ib_destroy_qp(priv->qp); - priv->qp = NULL; + qp_attr.qp_state = IB_QPS_RESET; + if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE)) + ipoib_warn(priv, "Failed to modify QP to RESET state\n"); - return -EINVAL; + return ret; } int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) -- cgit From 305a7e87055dde2466a855c24ef51a606915d149 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 11 Oct 2005 15:39:38 -0700 Subject: [IB] uverbs: unlock correctly in error paths A couple of functions were missing spin_unlock calls in error paths. Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs_main.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index c84cc28b60e..3a887006b25 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -312,8 +312,10 @@ void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) } entry = kmalloc(sizeof *entry, GFP_ATOMIC); - if (!entry) + if (!entry) { + spin_unlock_irqrestore(&file->lock, flags); return; + } uobj = container_of(cq->uobject, struct ib_ucq_object, uobject); @@ -343,8 +345,10 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file, } entry = kmalloc(sizeof *entry, GFP_ATOMIC); - if (!entry) + if (!entry) { + spin_unlock_irqrestore(&file->async_file->lock, flags); return; + } entry->desc.async.element = element; entry->desc.async.event_type = event; -- cgit From 56c202d6e4f542468fe79b8a735cf128898b87fb Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 13 Oct 2005 10:45:02 -0700 Subject: [IB] fail SA queries if device initialization failed If the SA query module's initialization fails for a device, then that device won't have a struct ib_sa_device associated. We should fail SA queries in that case, rather than blindly dereferencing the NULL pointer we get back from ib_get_client_data(). Signed-off-by: Roland Dreier --- drivers/infiniband/core/sa_query.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 262618210c1..a2c4234ca8a 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -583,10 +583,16 @@ int ib_sa_path_rec_get(struct ib_device *device, u8 port_num, { struct ib_sa_path_query *query; struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); - struct ib_sa_port *port = &sa_dev->port[port_num - sa_dev->start_port]; - struct ib_mad_agent *agent = port->agent; + struct ib_sa_port *port; + struct ib_mad_agent *agent; int ret; + if (!sa_dev) + return -ENODEV; + + port = &sa_dev->port[port_num - sa_dev->start_port]; + agent = port->agent; + query = kmalloc(sizeof *query, gfp_mask); if (!query) return -ENOMEM; @@ -685,10 +691,16 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method, { struct ib_sa_service_query *query; struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); - struct ib_sa_port *port = &sa_dev->port[port_num - sa_dev->start_port]; - struct ib_mad_agent *agent = port->agent; + struct ib_sa_port *port; + struct ib_mad_agent *agent; int ret; + if (!sa_dev) + return -ENODEV; + + port = &sa_dev->port[port_num - sa_dev->start_port]; + agent = port->agent; + if (method != IB_MGMT_METHOD_GET && method != IB_MGMT_METHOD_SET && method != IB_SA_METHOD_DELETE) @@ -768,10 +780,16 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num, { struct ib_sa_mcmember_query *query; struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); - struct ib_sa_port *port = &sa_dev->port[port_num - sa_dev->start_port]; - struct ib_mad_agent *agent = port->agent; + struct ib_sa_port *port; + struct ib_mad_agent *agent; int ret; + if (!sa_dev) + return -ENODEV; + + port = &sa_dev->port[port_num - sa_dev->start_port]; + agent = port->agent; + query = kmalloc(sizeof *query, gfp_mask); if (!query) return -ENOMEM; -- cgit From 883a99c7024c5763d6d4f22d9239c133893e8d74 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 14 Oct 2005 14:00:58 -0700 Subject: [IB] uverbs: Add a mask of device methods allowed for userspace Give each device a uverbs_cmd_mask, so that a low-level driver can control which methods may be called on behalf of userspace. Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs_main.c | 3 ++- drivers/infiniband/hw/mthca/mthca_provider.c | 20 ++++++++++++++++++++ include/rdma/ib_verbs.h | 1 + 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 3a887006b25..6a5e5084db7 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -514,7 +514,8 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, if (hdr.command < 0 || hdr.command >= ARRAY_SIZE(uverbs_cmd_table) || - !uverbs_cmd_table[hdr.command]) + !uverbs_cmd_table[hdr.command] || + !(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command))) return -EINVAL; if (!file->ucontext && diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 46864d18827..9e911a1ea41 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -37,6 +37,7 @@ */ #include +#include #include #include "mthca_dev.h" @@ -1077,6 +1078,25 @@ int mthca_register_device(struct mthca_dev *dev) dev->ib_dev.owner = THIS_MODULE; dev->ib_dev.uverbs_abi_ver = MTHCA_UVERBS_ABI_VERSION; + dev->ib_dev.uverbs_cmd_mask = + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | + (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | + (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | + (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); dev->ib_dev.node_type = IB_NODE_CA; dev->ib_dev.phys_port_cnt = dev->limits.num_ports; dev->ib_dev.dma_device = &dev->pdev->dev; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index d13f25a7819..a5a963cb567 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -951,6 +951,7 @@ struct ib_device { IB_DEV_UNREGISTERED } reg_state; + u64 uverbs_cmd_mask; int uverbs_abi_ver; u8 node_type; -- cgit From 91ecd4ae178bed83da4f6a94ced7992e4d7711eb Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 14 Oct 2005 15:21:44 -0700 Subject: [IB] uverbs: Add ABI structures for more commands Add kernel/user ABI structures for marshalling poll CQ, request CQ notification, post send, post receive, post SRQ receive, create AH and destroy AH commands. These commands allow us to support userspace verbs for devices that can't perform these operations directly from userspace (eg the PathScale HCA). Signed-off-by: Roland Dreier --- include/rdma/ib_user_verbs.h | 164 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 162 insertions(+), 2 deletions(-) diff --git a/include/rdma/ib_user_verbs.h b/include/rdma/ib_user_verbs.h index 0532b78200c..072f3a2edac 100644 --- a/include/rdma/ib_user_verbs.h +++ b/include/rdma/ib_user_verbs.h @@ -1,6 +1,7 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -88,8 +89,11 @@ enum { * Make sure that all structs defined in this file remain laid out so * that they pack the same way on 32-bit and 64-bit architectures (to * avoid incompatibility between 32-bit userspace and 64-bit kernels). - * In particular do not use pointer types -- pass pointers in __u64 - * instead. + * Specifically: + * - Do not use pointer types -- pass pointers in __u64 instead. + * - Make sure that any structure larger than 4 bytes is padded to a + * multiple of 8 bytes. Otherwise the structure size will be + * different between 32-bit and 64-bit architectures. */ struct ib_uverbs_async_event_desc { @@ -261,6 +265,41 @@ struct ib_uverbs_create_cq_resp { __u32 cqe; }; +struct ib_uverbs_poll_cq { + __u64 response; + __u32 cq_handle; + __u32 ne; +}; + +struct ib_uverbs_wc { + __u64 wr_id; + __u32 status; + __u32 opcode; + __u32 vendor_err; + __u32 byte_len; + __u32 imm_data; + __u32 qp_num; + __u32 src_qp; + __u32 wc_flags; + __u16 pkey_index; + __u16 slid; + __u8 sl; + __u8 dlid_path_bits; + __u8 port_num; + __u8 reserved; +}; + +struct ib_uverbs_poll_cq_resp { + __u32 count; + __u32 reserved; + struct ib_uverbs_wc wc[0]; +}; + +struct ib_uverbs_req_notify_cq { + __u32 cq_handle; + __u32 solicited_only; +}; + struct ib_uverbs_destroy_cq { __u64 response; __u32 cq_handle; @@ -358,6 +397,127 @@ struct ib_uverbs_destroy_qp_resp { __u32 events_reported; }; +/* + * The ib_uverbs_sge structure isn't used anywhere, since we assume + * the ib_sge structure is packed the same way on 32-bit and 64-bit + * architectures in both kernel and user space. It's just here to + * document the ABI. + */ +struct ib_uverbs_sge { + __u64 addr; + __u32 length; + __u32 lkey; +}; + +struct ib_uverbs_send_wr { + __u64 wr_id; + __u32 num_sge; + __u32 opcode; + __u32 send_flags; + __u32 imm_data; + union { + struct { + __u64 remote_addr; + __u32 rkey; + __u32 reserved; + } rdma; + struct { + __u64 remote_addr; + __u64 compare_add; + __u64 swap; + __u32 rkey; + __u32 reserved; + } atomic; + struct { + __u32 ah; + __u32 remote_qpn; + __u32 remote_qkey; + __u32 reserved; + } ud; + } wr; +}; + +struct ib_uverbs_post_send { + __u64 response; + __u32 qp_handle; + __u32 wr_count; + __u32 sge_count; + __u32 wqe_size; + struct ib_uverbs_send_wr send_wr[0]; +}; + +struct ib_uverbs_post_send_resp { + __u32 bad_wr; +}; + +struct ib_uverbs_recv_wr { + __u64 wr_id; + __u32 num_sge; + __u32 reserved; +}; + +struct ib_uverbs_post_recv { + __u64 response; + __u32 qp_handle; + __u32 wr_count; + __u32 sge_count; + __u32 wqe_size; + struct ib_uverbs_recv_wr recv_wr[0]; +}; + +struct ib_uverbs_post_recv_resp { + __u32 bad_wr; +}; + +struct ib_uverbs_post_srq_recv { + __u64 response; + __u32 srq_handle; + __u32 wr_count; + __u32 sge_count; + __u32 wqe_size; + struct ib_uverbs_recv_wr recv[0]; +}; + +struct ib_uverbs_post_srq_recv_resp { + __u32 bad_wr; +}; + +struct ib_uverbs_global_route { + __u8 dgid[16]; + __u32 flow_label; + __u8 sgid_index; + __u8 hop_limit; + __u8 traffic_class; + __u8 reserved; +}; + +struct ib_uverbs_ah_attr { + struct ib_uverbs_global_route grh; + __u16 dlid; + __u8 sl; + __u8 src_path_bits; + __u8 static_rate; + __u8 is_global; + __u8 port_num; + __u8 reserved; +}; + +struct ib_uverbs_create_ah { + __u64 response; + __u64 user_handle; + __u32 pd_handle; + __u32 reserved; + struct ib_uverbs_ah_attr attr; +}; + +struct ib_uverbs_create_ah_resp { + __u32 ah_handle; +}; + +struct ib_uverbs_destroy_ah { + __u32 ah_handle; +}; + struct ib_uverbs_attach_mcast { __u8 gid[16]; __u32 qp_handle; -- cgit From 67cdb40ca444c09853ab4d8a41cf547ac26a4de4 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 14 Oct 2005 15:26:04 -0700 Subject: [IB] uverbs: Implement more commands Add kernel support for userspace calling poll CQ, request CQ notification, post send, post receive, post SRQ receive, create AH and destroy AH commands. These commands allow us to support userspace verbs for devices that can't perform these operations directly from userspace (eg the PathScale HCA). Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs.h | 8 + drivers/infiniband/core/uverbs_cmd.c | 549 ++++++++++++++++++++++++++++++++++ drivers/infiniband/core/uverbs_main.c | 16 +- 3 files changed, 572 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 475153e510a..63c8085c0c9 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -3,6 +3,7 @@ * Copyright (c) 2005 Cisco Systems. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2005 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -140,10 +141,17 @@ IB_UVERBS_DECLARE_CMD(reg_mr); IB_UVERBS_DECLARE_CMD(dereg_mr); IB_UVERBS_DECLARE_CMD(create_comp_channel); IB_UVERBS_DECLARE_CMD(create_cq); +IB_UVERBS_DECLARE_CMD(poll_cq); +IB_UVERBS_DECLARE_CMD(req_notify_cq); IB_UVERBS_DECLARE_CMD(destroy_cq); IB_UVERBS_DECLARE_CMD(create_qp); IB_UVERBS_DECLARE_CMD(modify_qp); IB_UVERBS_DECLARE_CMD(destroy_qp); +IB_UVERBS_DECLARE_CMD(post_send); +IB_UVERBS_DECLARE_CMD(post_recv); +IB_UVERBS_DECLARE_CMD(post_srq_recv); +IB_UVERBS_DECLARE_CMD(create_ah); +IB_UVERBS_DECLARE_CMD(destroy_ah); IB_UVERBS_DECLARE_CMD(attach_mcast); IB_UVERBS_DECLARE_CMD(detach_mcast); IB_UVERBS_DECLARE_CMD(create_srq); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 2e959acf2ff..14583bb6e2c 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -665,6 +665,93 @@ err: return ret; } +ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_poll_cq cmd; + struct ib_uverbs_poll_cq_resp *resp; + struct ib_cq *cq; + struct ib_wc *wc; + int ret = 0; + int i; + int rsize; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + wc = kmalloc(cmd.ne * sizeof *wc, GFP_KERNEL); + if (!wc) + return -ENOMEM; + + rsize = sizeof *resp + cmd.ne * sizeof(struct ib_uverbs_wc); + resp = kmalloc(rsize, GFP_KERNEL); + if (!resp) { + ret = -ENOMEM; + goto out_wc; + } + + down(&ib_uverbs_idr_mutex); + cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle); + if (!cq || cq->uobject->context != file->ucontext) { + ret = -EINVAL; + goto out; + } + + resp->count = ib_poll_cq(cq, cmd.ne, wc); + + for (i = 0; i < resp->count; i++) { + resp->wc[i].wr_id = wc[i].wr_id; + resp->wc[i].status = wc[i].status; + resp->wc[i].opcode = wc[i].opcode; + resp->wc[i].vendor_err = wc[i].vendor_err; + resp->wc[i].byte_len = wc[i].byte_len; + resp->wc[i].imm_data = wc[i].imm_data; + resp->wc[i].qp_num = wc[i].qp_num; + resp->wc[i].src_qp = wc[i].src_qp; + resp->wc[i].wc_flags = wc[i].wc_flags; + resp->wc[i].pkey_index = wc[i].pkey_index; + resp->wc[i].slid = wc[i].slid; + resp->wc[i].sl = wc[i].sl; + resp->wc[i].dlid_path_bits = wc[i].dlid_path_bits; + resp->wc[i].port_num = wc[i].port_num; + } + + if (copy_to_user((void __user *) (unsigned long) cmd.response, resp, rsize)) + ret = -EFAULT; + +out: + up(&ib_uverbs_idr_mutex); + kfree(resp); + +out_wc: + kfree(wc); + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_req_notify_cq cmd; + struct ib_cq *cq; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + down(&ib_uverbs_idr_mutex); + cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle); + if (cq && cq->uobject->context == file->ucontext) { + ib_req_notify_cq(cq, cmd.solicited_only ? + IB_CQ_SOLICITED : IB_CQ_NEXT_COMP); + ret = in_len; + } + up(&ib_uverbs_idr_mutex); + + return ret; +} + ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -1003,6 +1090,468 @@ out: return ret ? ret : in_len; } +ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_post_send cmd; + struct ib_uverbs_post_send_resp resp; + struct ib_uverbs_send_wr *user_wr; + struct ib_send_wr *wr = NULL, *last, *next, *bad_wr; + struct ib_qp *qp; + int i, sg_ind; + ssize_t ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + if (in_len < sizeof cmd + cmd.wqe_size * cmd.wr_count + + cmd.sge_count * sizeof (struct ib_uverbs_sge)) + return -EINVAL; + + if (cmd.wqe_size < sizeof (struct ib_uverbs_send_wr)) + return -EINVAL; + + user_wr = kmalloc(cmd.wqe_size, GFP_KERNEL); + if (!user_wr) + return -ENOMEM; + + down(&ib_uverbs_idr_mutex); + + qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); + if (!qp || qp->uobject->context != file->ucontext) + goto out; + + sg_ind = 0; + last = NULL; + for (i = 0; i < cmd.wr_count; ++i) { + if (copy_from_user(user_wr, + buf + sizeof cmd + i * cmd.wqe_size, + cmd.wqe_size)) { + ret = -EFAULT; + goto out; + } + + if (user_wr->num_sge + sg_ind > cmd.sge_count) { + ret = -EINVAL; + goto out; + } + + next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) + + user_wr->num_sge * sizeof (struct ib_sge), + GFP_KERNEL); + if (!next) { + ret = -ENOMEM; + goto out; + } + + if (!last) + wr = next; + else + last->next = next; + last = next; + + next->next = NULL; + next->wr_id = user_wr->wr_id; + next->num_sge = user_wr->num_sge; + next->opcode = user_wr->opcode; + next->send_flags = user_wr->send_flags; + next->imm_data = user_wr->imm_data; + + if (qp->qp_type == IB_QPT_UD) { + next->wr.ud.ah = idr_find(&ib_uverbs_ah_idr, + user_wr->wr.ud.ah); + if (!next->wr.ud.ah) { + ret = -EINVAL; + goto out; + } + next->wr.ud.remote_qpn = user_wr->wr.ud.remote_qpn; + next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey; + } else { + switch (next->opcode) { + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + case IB_WR_RDMA_READ: + next->wr.rdma.remote_addr = + user_wr->wr.rdma.remote_addr; + next->wr.rdma.rkey = + user_wr->wr.rdma.rkey; + break; + case IB_WR_ATOMIC_CMP_AND_SWP: + case IB_WR_ATOMIC_FETCH_AND_ADD: + next->wr.atomic.remote_addr = + user_wr->wr.atomic.remote_addr; + next->wr.atomic.compare_add = + user_wr->wr.atomic.compare_add; + next->wr.atomic.swap = user_wr->wr.atomic.swap; + next->wr.atomic.rkey = user_wr->wr.atomic.rkey; + break; + default: + break; + } + } + + if (next->num_sge) { + next->sg_list = (void *) next + + ALIGN(sizeof *next, sizeof (struct ib_sge)); + if (copy_from_user(next->sg_list, + buf + sizeof cmd + + cmd.wr_count * cmd.wqe_size + + sg_ind * sizeof (struct ib_sge), + next->num_sge * sizeof (struct ib_sge))) { + ret = -EFAULT; + goto out; + } + sg_ind += next->num_sge; + } else + next->sg_list = NULL; + } + + resp.bad_wr = 0; + ret = qp->device->post_send(qp, wr, &bad_wr); + if (ret) + for (next = wr; next; next = next->next) { + ++resp.bad_wr; + if (next == bad_wr) + break; + } + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + ret = -EFAULT; + +out: + up(&ib_uverbs_idr_mutex); + + while (wr) { + next = wr->next; + kfree(wr); + wr = next; + } + + kfree(user_wr); + + return ret ? ret : in_len; +} + +static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf, + int in_len, + u32 wr_count, + u32 sge_count, + u32 wqe_size) +{ + struct ib_uverbs_recv_wr *user_wr; + struct ib_recv_wr *wr = NULL, *last, *next; + int sg_ind; + int i; + int ret; + + if (in_len < wqe_size * wr_count + + sge_count * sizeof (struct ib_uverbs_sge)) + return ERR_PTR(-EINVAL); + + if (wqe_size < sizeof (struct ib_uverbs_recv_wr)) + return ERR_PTR(-EINVAL); + + user_wr = kmalloc(wqe_size, GFP_KERNEL); + if (!user_wr) + return ERR_PTR(-ENOMEM); + + sg_ind = 0; + last = NULL; + for (i = 0; i < wr_count; ++i) { + if (copy_from_user(user_wr, buf + i * wqe_size, + wqe_size)) { + ret = -EFAULT; + goto err; + } + + if (user_wr->num_sge + sg_ind > sge_count) { + ret = -EINVAL; + goto err; + } + + next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) + + user_wr->num_sge * sizeof (struct ib_sge), + GFP_KERNEL); + if (!next) { + ret = -ENOMEM; + goto err; + } + + if (!last) + wr = next; + else + last->next = next; + last = next; + + next->next = NULL; + next->wr_id = user_wr->wr_id; + next->num_sge = user_wr->num_sge; + + if (next->num_sge) { + next->sg_list = (void *) next + + ALIGN(sizeof *next, sizeof (struct ib_sge)); + if (copy_from_user(next->sg_list, + buf + wr_count * wqe_size + + sg_ind * sizeof (struct ib_sge), + next->num_sge * sizeof (struct ib_sge))) { + ret = -EFAULT; + goto err; + } + sg_ind += next->num_sge; + } else + next->sg_list = NULL; + } + + kfree(user_wr); + return wr; + +err: + kfree(user_wr); + + while (wr) { + next = wr->next; + kfree(wr); + wr = next; + } + + return ERR_PTR(ret); +} + +ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_post_recv cmd; + struct ib_uverbs_post_recv_resp resp; + struct ib_recv_wr *wr, *next, *bad_wr; + struct ib_qp *qp; + ssize_t ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd, + in_len - sizeof cmd, cmd.wr_count, + cmd.sge_count, cmd.wqe_size); + if (IS_ERR(wr)) + return PTR_ERR(wr); + + down(&ib_uverbs_idr_mutex); + + qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); + if (!qp || qp->uobject->context != file->ucontext) + goto out; + + resp.bad_wr = 0; + ret = qp->device->post_recv(qp, wr, &bad_wr); + if (ret) + for (next = wr; next; next = next->next) { + ++resp.bad_wr; + if (next == bad_wr) + break; + } + + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + ret = -EFAULT; + +out: + up(&ib_uverbs_idr_mutex); + + while (wr) { + next = wr->next; + kfree(wr); + wr = next; + } + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_post_srq_recv cmd; + struct ib_uverbs_post_srq_recv_resp resp; + struct ib_recv_wr *wr, *next, *bad_wr; + struct ib_srq *srq; + ssize_t ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd, + in_len - sizeof cmd, cmd.wr_count, + cmd.sge_count, cmd.wqe_size); + if (IS_ERR(wr)) + return PTR_ERR(wr); + + down(&ib_uverbs_idr_mutex); + + srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle); + if (!srq || srq->uobject->context != file->ucontext) + goto out; + + resp.bad_wr = 0; + ret = srq->device->post_srq_recv(srq, wr, &bad_wr); + if (ret) + for (next = wr; next; next = next->next) { + ++resp.bad_wr; + if (next == bad_wr) + break; + } + + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + ret = -EFAULT; + +out: + up(&ib_uverbs_idr_mutex); + + while (wr) { + next = wr->next; + kfree(wr); + wr = next; + } + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_ah cmd; + struct ib_uverbs_create_ah_resp resp; + struct ib_uobject *uobj; + struct ib_pd *pd; + struct ib_ah *ah; + struct ib_ah_attr attr; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + uobj = kmalloc(sizeof *uobj, GFP_KERNEL); + if (!uobj) + return -ENOMEM; + + down(&ib_uverbs_idr_mutex); + + pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle); + if (!pd || pd->uobject->context != file->ucontext) { + ret = -EINVAL; + goto err_up; + } + + uobj->user_handle = cmd.user_handle; + uobj->context = file->ucontext; + + attr.dlid = cmd.attr.dlid; + attr.sl = cmd.attr.sl; + attr.src_path_bits = cmd.attr.src_path_bits; + attr.static_rate = cmd.attr.static_rate; + attr.port_num = cmd.attr.port_num; + attr.grh.flow_label = cmd.attr.grh.flow_label; + attr.grh.sgid_index = cmd.attr.grh.sgid_index; + attr.grh.hop_limit = cmd.attr.grh.hop_limit; + attr.grh.traffic_class = cmd.attr.grh.traffic_class; + memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16); + + ah = ib_create_ah(pd, &attr); + if (IS_ERR(ah)) { + ret = PTR_ERR(ah); + goto err_up; + } + + ah->uobject = uobj; + +retry: + if (!idr_pre_get(&ib_uverbs_ah_idr, GFP_KERNEL)) { + ret = -ENOMEM; + goto err_destroy; + } + + ret = idr_get_new(&ib_uverbs_ah_idr, ah, &uobj->id); + + if (ret == -EAGAIN) + goto retry; + if (ret) + goto err_destroy; + + resp.ah_handle = uobj->id; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_idr; + } + + down(&file->mutex); + list_add_tail(&uobj->list, &file->ucontext->ah_list); + up(&file->mutex); + + up(&ib_uverbs_idr_mutex); + + return in_len; + +err_idr: + idr_remove(&ib_uverbs_ah_idr, uobj->id); + +err_destroy: + ib_destroy_ah(ah); + +err_up: + up(&ib_uverbs_idr_mutex); + + kfree(uobj); + return ret; +} + +ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file, + const char __user *buf, int in_len, int out_len) +{ + struct ib_uverbs_destroy_ah cmd; + struct ib_ah *ah; + struct ib_uobject *uobj; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + down(&ib_uverbs_idr_mutex); + + ah = idr_find(&ib_uverbs_ah_idr, cmd.ah_handle); + if (!ah || ah->uobject->context != file->ucontext) + goto out; + + uobj = ah->uobject; + + ret = ib_destroy_ah(ah); + if (ret) + goto out; + + idr_remove(&ib_uverbs_ah_idr, cmd.ah_handle); + + down(&file->mutex); + list_del(&uobj->list); + up(&file->mutex); + + kfree(uobj); + +out: + up(&ib_uverbs_idr_mutex); + + return ret ? ret : in_len; +} + ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 6a5e5084db7..ef2312a9ea7 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -3,6 +3,7 @@ * Copyright (c) 2005 Cisco Systems. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2005 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -86,10 +87,17 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel, [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq, + [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq, + [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq, [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq, [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp, [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp, [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp, + [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send, + [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv, + [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv, + [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah, + [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah, [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast, [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast, [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq, @@ -111,7 +119,13 @@ static int ib_dealloc_ucontext(struct ib_ucontext *context) down(&ib_uverbs_idr_mutex); - /* XXX Free AHs */ + list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) { + struct ib_ah *ah = idr_find(&ib_uverbs_ah_idr, uobj->id); + idr_remove(&ib_uverbs_ah_idr, uobj->id); + ib_destroy_ah(ah); + list_del(&uobj->list); + kfree(uobj); + } list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) { struct ib_qp *qp = idr_find(&ib_uverbs_qp_idr, uobj->id); -- cgit From 595e726a1f28420c5fc7970b1a87cbce77a1cd45 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Mon, 17 Oct 2005 15:33:47 -0700 Subject: [IB] merge ucm.h into ucm.c Eliminate ucm.h. Replace ucm_dbg with direct call to printk KERN_ERR. Signed-off-by: Sean Hefty --- drivers/infiniband/core/ucm.c | 62 +++++++++++++++++++++----------- drivers/infiniband/core/ucm.h | 83 ------------------------------------------- 2 files changed, 41 insertions(+), 104 deletions(-) delete mode 100644 drivers/infiniband/core/ucm.h diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index d0f0b0a2edd..b7470f0fe83 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -41,19 +41,52 @@ #include #include #include +#include #include -#include "ucm.h" +#include +#include MODULE_AUTHOR("Libor Michalek"); MODULE_DESCRIPTION("InfiniBand userspace Connection Manager access"); MODULE_LICENSE("Dual BSD/GPL"); -static int ucm_debug_level; +struct ib_ucm_file { + struct semaphore mutex; + struct file *filp; -module_param_named(debug_level, ucm_debug_level, int, 0644); -MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0"); + struct list_head ctxs; /* list of active connections */ + struct list_head events; /* list of pending events */ + wait_queue_head_t poll_wait; +}; + +struct ib_ucm_context { + int id; + wait_queue_head_t wait; + atomic_t ref; + int events_reported; + + struct ib_ucm_file *file; + struct ib_cm_id *cm_id; + __u64 uid; + + struct list_head events; /* list of pending events. */ + struct list_head file_list; /* member in file ctx list */ +}; + +struct ib_ucm_event { + struct ib_ucm_context *ctx; + struct list_head file_list; /* member in file event list */ + struct list_head ctx_list; /* member in ctx event list */ + + struct ib_cm_id *cm_id; + struct ib_ucm_event_resp resp; + void *data; + void *info; + int data_len; + int info_len; +}; enum { IB_UCM_MAJOR = 231, @@ -62,17 +95,10 @@ enum { #define IB_UCM_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_MINOR) -#define PFX "UCM: " - -#define ucm_dbg(format, arg...) \ - do { \ - if (ucm_debug_level > 0) \ - printk(KERN_DEBUG PFX format, ## arg); \ - } while (0) - static struct semaphore ctx_id_mutex; static struct idr ctx_id_table; + static struct ib_ucm_context *ib_ucm_ctx_get(struct ib_ucm_file *file, int id) { struct ib_ucm_context *ctx; @@ -152,7 +178,6 @@ static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file) goto error; list_add_tail(&ctx->file_list, &file->ctxs); - ucm_dbg("Allocated CM ID <%d>\n", ctx->id); return ctx; error: @@ -1184,9 +1209,6 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf, if (copy_from_user(&hdr, buf, sizeof(hdr))) return -EFAULT; - ucm_dbg("Write. cmd <%d> in <%d> out <%d> len <%Zu>\n", - hdr.cmd, hdr.in, hdr.out, len); - if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucm_cmd_table)) return -EINVAL; @@ -1232,8 +1254,6 @@ static int ib_ucm_open(struct inode *inode, struct file *filp) filp->private_data = file; file->filp = filp; - ucm_dbg("Created struct\n"); - return 0; } @@ -1281,7 +1301,7 @@ static int __init ib_ucm_init(void) result = register_chrdev_region(IB_UCM_DEV, 1, "infiniband_cm"); if (result) { - ucm_dbg("Error <%d> registering dev\n", result); + printk(KERN_ERR "ucm: Error <%d> registering dev\n", result); goto err_chr; } @@ -1289,14 +1309,14 @@ static int __init ib_ucm_init(void) result = cdev_add(&ib_ucm_cdev, IB_UCM_DEV, 1); if (result) { - ucm_dbg("Error <%d> adding cdev\n", result); + printk(KERN_ERR "ucm: Error <%d> adding cdev\n", result); goto err_cdev; } ib_ucm_class = class_create(THIS_MODULE, "infiniband_cm"); if (IS_ERR(ib_ucm_class)) { result = PTR_ERR(ib_ucm_class); - ucm_dbg("Error <%d> creating class\n", result); + printk(KERN_ERR "Error <%d> creating class\n", result); goto err_class; } diff --git a/drivers/infiniband/core/ucm.h b/drivers/infiniband/core/ucm.h deleted file mode 100644 index f46f37bc120..00000000000 --- a/drivers/infiniband/core/ucm.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2005 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * $Id: ucm.h 2208 2005-04-22 23:24:31Z libor $ - */ - -#ifndef UCM_H -#define UCM_H - -#include -#include -#include -#include - -#include -#include - -struct ib_ucm_file { - struct semaphore mutex; - struct file *filp; - - struct list_head ctxs; /* list of active connections */ - struct list_head events; /* list of pending events */ - wait_queue_head_t poll_wait; -}; - -struct ib_ucm_context { - int id; - wait_queue_head_t wait; - atomic_t ref; - int events_reported; - - struct ib_ucm_file *file; - struct ib_cm_id *cm_id; - __u64 uid; - - struct list_head events; /* list of pending events. */ - struct list_head file_list; /* member in file ctx list */ -}; - -struct ib_ucm_event { - struct ib_ucm_context *ctx; - struct list_head file_list; /* member in file event list */ - struct list_head ctx_list; /* member in ctx event list */ - - struct ib_cm_id *cm_id; - struct ib_ucm_event_resp resp; - void *data; - void *info; - int data_len; - int info_len; -}; - -#endif /* UCM_H */ -- cgit From 07d357d0cbf89d9980b1769d5444a3c70f000e00 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Mon, 17 Oct 2005 15:37:43 -0700 Subject: [IB] CM: bind IDs to a specific device Bind communication identifiers to a device to support device removal. Export per HCA CM devices to userspace. Signed-off-by: Sean Hefty --- drivers/infiniband/core/cm.c | 41 +++++--- drivers/infiniband/core/ucm.c | 218 +++++++++++++++++++++++++++++++----------- include/rdma/ib_cm.h | 10 +- include/rdma/ib_user_cm.h | 10 +- 4 files changed, 202 insertions(+), 77 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 54db6d4831f..6f747debca9 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -366,9 +366,15 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) cur_cm_id_priv = rb_entry(parent, struct cm_id_private, service_node); if ((cur_cm_id_priv->id.service_mask & service_id) == - (service_mask & cur_cm_id_priv->id.service_id)) - return cm_id_priv; - if (service_id < cur_cm_id_priv->id.service_id) + (service_mask & cur_cm_id_priv->id.service_id) && + (cm_id_priv->id.device == cur_cm_id_priv->id.device)) + return cur_cm_id_priv; + + if (cm_id_priv->id.device < cur_cm_id_priv->id.device) + link = &(*link)->rb_left; + else if (cm_id_priv->id.device > cur_cm_id_priv->id.device) + link = &(*link)->rb_right; + else if (service_id < cur_cm_id_priv->id.service_id) link = &(*link)->rb_left; else link = &(*link)->rb_right; @@ -378,7 +384,8 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) return NULL; } -static struct cm_id_private * cm_find_listen(__be64 service_id) +static struct cm_id_private * cm_find_listen(struct ib_device *device, + __be64 service_id) { struct rb_node *node = cm.listen_service_table.rb_node; struct cm_id_private *cm_id_priv; @@ -386,9 +393,15 @@ static struct cm_id_private * cm_find_listen(__be64 service_id) while (node) { cm_id_priv = rb_entry(node, struct cm_id_private, service_node); if ((cm_id_priv->id.service_mask & service_id) == - (cm_id_priv->id.service_mask & cm_id_priv->id.service_id)) + cm_id_priv->id.service_id && + (cm_id_priv->id.device == device)) return cm_id_priv; - if (service_id < cm_id_priv->id.service_id) + + if (device < cm_id_priv->id.device) + node = node->rb_left; + else if (device > cm_id_priv->id.device) + node = node->rb_right; + else if (service_id < cm_id_priv->id.service_id) node = node->rb_left; else node = node->rb_right; @@ -523,7 +536,8 @@ static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv, ib_send_cm_sidr_rep(&cm_id_priv->id, ¶m); } -struct ib_cm_id *ib_create_cm_id(ib_cm_handler cm_handler, +struct ib_cm_id *ib_create_cm_id(struct ib_device *device, + ib_cm_handler cm_handler, void *context) { struct cm_id_private *cm_id_priv; @@ -535,6 +549,7 @@ struct ib_cm_id *ib_create_cm_id(ib_cm_handler cm_handler, memset(cm_id_priv, 0, sizeof *cm_id_priv); cm_id_priv->id.state = IB_CM_IDLE; + cm_id_priv->id.device = device; cm_id_priv->id.cm_handler = cm_handler; cm_id_priv->id.context = context; cm_id_priv->id.remote_cm_qpn = 1; @@ -1047,7 +1062,6 @@ static void cm_format_req_event(struct cm_work *work, req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; param = &work->cm_event.param.req_rcvd; param->listen_id = listen_id; - param->device = cm_id_priv->av.port->mad_agent->device; param->port = cm_id_priv->av.port->port_num; param->primary_path = &work->path[0]; if (req_msg->alt_local_lid) @@ -1226,7 +1240,8 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, } /* Find matching listen request. */ - listen_cm_id_priv = cm_find_listen(req_msg->service_id); + listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device, + req_msg->service_id); if (!listen_cm_id_priv) { spin_unlock_irqrestore(&cm.lock, flags); cm_issue_rej(work->port, work->mad_recv_wc, @@ -1254,7 +1269,7 @@ static int cm_req_handler(struct cm_work *work) req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; - cm_id = ib_create_cm_id(NULL, NULL); + cm_id = ib_create_cm_id(work->port->cm_dev->device, NULL, NULL); if (IS_ERR(cm_id)) return PTR_ERR(cm_id); @@ -2629,7 +2644,6 @@ static void cm_format_sidr_req_event(struct cm_work *work, param = &work->cm_event.param.sidr_req_rcvd; param->pkey = __be16_to_cpu(sidr_req_msg->pkey); param->listen_id = listen_id; - param->device = work->port->mad_agent->device; param->port = work->port->port_num; work->cm_event.private_data = &sidr_req_msg->private_data; } @@ -2642,7 +2656,7 @@ static int cm_sidr_req_handler(struct cm_work *work) struct ib_wc *wc; unsigned long flags; - cm_id = ib_create_cm_id(NULL, NULL); + cm_id = ib_create_cm_id(work->port->cm_dev->device, NULL, NULL); if (IS_ERR(cm_id)) return PTR_ERR(cm_id); cm_id_priv = container_of(cm_id, struct cm_id_private, id); @@ -2666,7 +2680,8 @@ static int cm_sidr_req_handler(struct cm_work *work) spin_unlock_irqrestore(&cm.lock, flags); goto out; /* Duplicate message. */ } - cur_cm_id_priv = cm_find_listen(sidr_req_msg->service_id); + cur_cm_id_priv = cm_find_listen(cm_id->device, + sidr_req_msg->service_id); if (!cur_cm_id_priv) { rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); spin_unlock_irqrestore(&cm.lock, flags); diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index b7470f0fe83..d208ea29e07 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -52,12 +52,20 @@ MODULE_AUTHOR("Libor Michalek"); MODULE_DESCRIPTION("InfiniBand userspace Connection Manager access"); MODULE_LICENSE("Dual BSD/GPL"); +struct ib_ucm_device { + int devnum; + struct cdev dev; + struct class_device class_dev; + struct ib_device *ib_dev; +}; + struct ib_ucm_file { struct semaphore mutex; struct file *filp; + struct ib_ucm_device *device; - struct list_head ctxs; /* list of active connections */ - struct list_head events; /* list of pending events */ + struct list_head ctxs; + struct list_head events; wait_queue_head_t poll_wait; }; @@ -90,14 +98,24 @@ struct ib_ucm_event { enum { IB_UCM_MAJOR = 231, - IB_UCM_MINOR = 255 + IB_UCM_BASE_MINOR = 224, + IB_UCM_MAX_DEVICES = 32 }; -#define IB_UCM_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_MINOR) +#define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR) -static struct semaphore ctx_id_mutex; -static struct idr ctx_id_table; +static void ib_ucm_add_one(struct ib_device *device); +static void ib_ucm_remove_one(struct ib_device *device); +static struct ib_client ucm_client = { + .name = "ucm", + .add = ib_ucm_add_one, + .remove = ib_ucm_remove_one +}; + +DECLARE_MUTEX(ctx_id_mutex); +DEFINE_IDR(ctx_id_table); +static DECLARE_BITMAP(dev_map, IB_UCM_MAX_DEVICES); static struct ib_ucm_context *ib_ucm_ctx_get(struct ib_ucm_file *file, int id) { @@ -184,10 +202,7 @@ error: kfree(ctx); return NULL; } -/* - * Event portion of the API, handle CM events - * and allow event polling. - */ + static void ib_ucm_event_path_get(struct ib_ucm_path_rec *upath, struct ib_sa_path_rec *kpath) { @@ -234,6 +249,7 @@ static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq, ureq->retry_count = kreq->retry_count; ureq->rnr_retry_count = kreq->rnr_retry_count; ureq->srq = kreq->srq; + ureq->port = kreq->port; ib_ucm_event_path_get(&ureq->primary_path, kreq->primary_path); ib_ucm_event_path_get(&ureq->alternate_path, kreq->alternate_path); @@ -320,6 +336,8 @@ static int ib_ucm_event_process(struct ib_cm_event *evt, case IB_CM_SIDR_REQ_RECEIVED: uvt->resp.u.sidr_req_resp.pkey = evt->param.sidr_req_rcvd.pkey; + uvt->resp.u.sidr_req_resp.port = + evt->param.sidr_req_rcvd.port; uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE; break; case IB_CM_SIDR_REP_RECEIVED: @@ -412,9 +430,7 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file, if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; - /* - * wait - */ + down(&file->mutex); while (list_empty(&file->events)) { @@ -496,7 +512,6 @@ done: return result; } - static ssize_t ib_ucm_create_id(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) @@ -519,29 +534,27 @@ static ssize_t ib_ucm_create_id(struct ib_ucm_file *file, return -ENOMEM; ctx->uid = cmd.uid; - ctx->cm_id = ib_create_cm_id(ib_ucm_event_handler, ctx); + ctx->cm_id = ib_create_cm_id(file->device->ib_dev, + ib_ucm_event_handler, ctx); if (IS_ERR(ctx->cm_id)) { result = PTR_ERR(ctx->cm_id); - goto err; + goto err1; } resp.id = ctx->id; if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) { result = -EFAULT; - goto err; + goto err2; } - return 0; -err: +err2: + ib_destroy_cm_id(ctx->cm_id); +err1: down(&ctx_id_mutex); idr_remove(&ctx_id_table, ctx->id); up(&ctx_id_mutex); - - if (!IS_ERR(ctx->cm_id)) - ib_destroy_cm_id(ctx->cm_id); - kfree(ctx); return result; } @@ -1253,6 +1266,7 @@ static int ib_ucm_open(struct inode *inode, struct file *filp) filp->private_data = file; file->filp = filp; + file->device = container_of(inode->i_cdev, struct ib_ucm_device, dev); return 0; } @@ -1283,7 +1297,17 @@ static int ib_ucm_close(struct inode *inode, struct file *filp) return 0; } -static struct file_operations ib_ucm_fops = { +static void ib_ucm_release_class_dev(struct class_device *class_dev) +{ + struct ib_ucm_device *dev; + + dev = container_of(class_dev, struct ib_ucm_device, class_dev); + cdev_del(&dev->dev); + clear_bit(dev->devnum, dev_map); + kfree(dev); +} + +static struct file_operations ucm_fops = { .owner = THIS_MODULE, .open = ib_ucm_open, .release = ib_ucm_close, @@ -1291,55 +1315,141 @@ static struct file_operations ib_ucm_fops = { .poll = ib_ucm_poll, }; +static struct class ucm_class = { + .name = "infiniband_cm", + .release = ib_ucm_release_class_dev +}; + +static ssize_t show_dev(struct class_device *class_dev, char *buf) +{ + struct ib_ucm_device *dev; + + dev = container_of(class_dev, struct ib_ucm_device, class_dev); + return print_dev_t(buf, dev->dev.dev); +} +static CLASS_DEVICE_ATTR(dev, S_IRUGO, show_dev, NULL); -static struct class *ib_ucm_class; -static struct cdev ib_ucm_cdev; +static ssize_t show_ibdev(struct class_device *class_dev, char *buf) +{ + struct ib_ucm_device *dev; + + dev = container_of(class_dev, struct ib_ucm_device, class_dev); + return sprintf(buf, "%s\n", dev->ib_dev->name); +} +static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); -static int __init ib_ucm_init(void) +static void ib_ucm_add_one(struct ib_device *device) { - int result; + struct ib_ucm_device *ucm_dev; - result = register_chrdev_region(IB_UCM_DEV, 1, "infiniband_cm"); - if (result) { - printk(KERN_ERR "ucm: Error <%d> registering dev\n", result); - goto err_chr; - } + if (!device->alloc_ucontext) + return; + + ucm_dev = kmalloc(sizeof *ucm_dev, GFP_KERNEL); + if (!ucm_dev) + return; - cdev_init(&ib_ucm_cdev, &ib_ucm_fops); + memset(ucm_dev, 0, sizeof *ucm_dev); + ucm_dev->ib_dev = device; - result = cdev_add(&ib_ucm_cdev, IB_UCM_DEV, 1); - if (result) { - printk(KERN_ERR "ucm: Error <%d> adding cdev\n", result); + ucm_dev->devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES); + if (ucm_dev->devnum >= IB_UCM_MAX_DEVICES) + goto err; + + set_bit(ucm_dev->devnum, dev_map); + + cdev_init(&ucm_dev->dev, &ucm_fops); + ucm_dev->dev.owner = THIS_MODULE; + kobject_set_name(&ucm_dev->dev.kobj, "ucm%d", ucm_dev->devnum); + if (cdev_add(&ucm_dev->dev, IB_UCM_BASE_DEV + ucm_dev->devnum, 1)) + goto err; + + ucm_dev->class_dev.class = &ucm_class; + ucm_dev->class_dev.dev = device->dma_device; + snprintf(ucm_dev->class_dev.class_id, BUS_ID_SIZE, "ucm%d", + ucm_dev->devnum); + if (class_device_register(&ucm_dev->class_dev)) goto err_cdev; - } - ib_ucm_class = class_create(THIS_MODULE, "infiniband_cm"); - if (IS_ERR(ib_ucm_class)) { - result = PTR_ERR(ib_ucm_class); - printk(KERN_ERR "Error <%d> creating class\n", result); + if (class_device_create_file(&ucm_dev->class_dev, + &class_device_attr_dev)) + goto err_class; + if (class_device_create_file(&ucm_dev->class_dev, + &class_device_attr_ibdev)) goto err_class; + + ib_set_client_data(device, &ucm_client, ucm_dev); + return; + +err_class: + class_device_unregister(&ucm_dev->class_dev); +err_cdev: + cdev_del(&ucm_dev->dev); + clear_bit(ucm_dev->devnum, dev_map); +err: + kfree(ucm_dev); + return; +} + +static void ib_ucm_remove_one(struct ib_device *device) +{ + struct ib_ucm_device *ucm_dev = ib_get_client_data(device, &ucm_client); + + if (!ucm_dev) + return; + + class_device_unregister(&ucm_dev->class_dev); +} + +static ssize_t show_abi_version(struct class *class, char *buf) +{ + return sprintf(buf, "%d\n", IB_USER_CM_ABI_VERSION); +} +static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); + +static int __init ib_ucm_init(void) +{ + int ret; + + ret = register_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES, + "infiniband_cm"); + if (ret) { + printk(KERN_ERR "ucm: couldn't register device number\n"); + goto err; } - class_device_create(ib_ucm_class, IB_UCM_DEV, NULL, "ucm"); + ret = class_register(&ucm_class); + if (ret) { + printk(KERN_ERR "ucm: couldn't create class infiniband_cm\n"); + goto err_chrdev; + } - idr_init(&ctx_id_table); - init_MUTEX(&ctx_id_mutex); + ret = class_create_file(&ucm_class, &class_attr_abi_version); + if (ret) { + printk(KERN_ERR "ucm: couldn't create abi_version attribute\n"); + goto err_class; + } + ret = ib_register_client(&ucm_client); + if (ret) { + printk(KERN_ERR "ucm: couldn't register client\n"); + goto err_class; + } return 0; + err_class: - cdev_del(&ib_ucm_cdev); -err_cdev: - unregister_chrdev_region(IB_UCM_DEV, 1); -err_chr: - return result; + class_unregister(&ucm_class); +err_chrdev: + unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES); +err: + return ret; } static void __exit ib_ucm_cleanup(void) { - class_device_destroy(ib_ucm_class, IB_UCM_DEV); - class_destroy(ib_ucm_class); - cdev_del(&ib_ucm_cdev); - unregister_chrdev_region(IB_UCM_DEV, 1); + ib_unregister_client(&ucm_client); + class_unregister(&ucm_class); + unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES); } module_init(ib_ucm_init); diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index 5308683c8c4..0a9fcd59eb4 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. @@ -109,7 +109,6 @@ struct ib_cm_id; struct ib_cm_req_event_param { struct ib_cm_id *listen_id; - struct ib_device *device; u8 port; struct ib_sa_path_rec *primary_path; @@ -220,7 +219,6 @@ struct ib_cm_apr_event_param { struct ib_cm_sidr_req_event_param { struct ib_cm_id *listen_id; - struct ib_device *device; u8 port; u16 pkey; }; @@ -284,6 +282,7 @@ typedef int (*ib_cm_handler)(struct ib_cm_id *cm_id, struct ib_cm_id { ib_cm_handler cm_handler; void *context; + struct ib_device *device; __be64 service_id; __be64 service_mask; enum ib_cm_state state; /* internal CM/debug use */ @@ -295,6 +294,8 @@ struct ib_cm_id { /** * ib_create_cm_id - Allocate a communication identifier. + * @device: Device associated with the cm_id. All related communication will + * be associated with the specified device. * @cm_handler: Callback invoked to notify the user of CM events. * @context: User specified context associated with the communication * identifier. @@ -302,7 +303,8 @@ struct ib_cm_id { * Communication identifiers are used to track connection states, service * ID resolution requests, and listen requests. */ -struct ib_cm_id *ib_create_cm_id(ib_cm_handler cm_handler, +struct ib_cm_id *ib_create_cm_id(struct ib_device *device, + ib_cm_handler cm_handler, void *context); /** diff --git a/include/rdma/ib_user_cm.h b/include/rdma/ib_user_cm.h index e4d1654276a..3037588b846 100644 --- a/include/rdma/ib_user_cm.h +++ b/include/rdma/ib_user_cm.h @@ -38,7 +38,7 @@ #include -#define IB_USER_CM_ABI_VERSION 2 +#define IB_USER_CM_ABI_VERSION 3 enum { IB_USER_CM_CMD_CREATE_ID, @@ -299,8 +299,6 @@ struct ib_ucm_event_get { }; struct ib_ucm_req_event_resp { - /* device */ - /* port */ struct ib_ucm_path_rec primary_path; struct ib_ucm_path_rec alternate_path; __be64 remote_ca_guid; @@ -316,6 +314,7 @@ struct ib_ucm_req_event_resp { __u8 retry_count; __u8 rnr_retry_count; __u8 srq; + __u8 port; }; struct ib_ucm_rep_event_resp { @@ -353,10 +352,9 @@ struct ib_ucm_apr_event_resp { }; struct ib_ucm_sidr_req_event_resp { - /* device */ - /* port */ __u16 pkey; - __u8 reserved[2]; + __u8 port; + __u8 reserved; }; struct ib_ucm_sidr_rep_event_resp { -- cgit From 762a03e21ed08daa5170d9cd98e83539861e7692 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 17 Oct 2005 15:38:50 -0700 Subject: [IB] ucm: quiet sparse warnings Make ctx_id_mutex and ctx_id_table static to quiet sparse warnings. Signed-off-by: Roland Dreier --- drivers/infiniband/core/ucm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index d208ea29e07..02ca642089b 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -113,8 +113,8 @@ static struct ib_client ucm_client = { .remove = ib_ucm_remove_one }; -DECLARE_MUTEX(ctx_id_mutex); -DEFINE_IDR(ctx_id_table); +static DECLARE_MUTEX(ctx_id_mutex); +static DEFINE_IDR(ctx_id_table); static DECLARE_BITMAP(dev_map, IB_UCM_MAX_DEVICES); static struct ib_ucm_context *ib_ucm_ctx_get(struct ib_ucm_file *file, int id) -- cgit From 4b2d319b53810ab00ef3d8fdfc1c1ab0647ab0a7 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 18 Oct 2005 12:20:06 -0700 Subject: [IPoIB] Improve ipoib_timeout() output Use jiffies_to_msecs() so we print a human-readable time so we don't have to worry about what HZ is configured to, and print out a few values to make post-mortem analysis easier. Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 6c5bf07489f..ee303859b04 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -637,8 +637,11 @@ static void ipoib_timeout(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); - ipoib_warn(priv, "transmit timeout: latency %ld\n", - jiffies - dev->trans_start); + ipoib_warn(priv, "transmit timeout: latency %d msecs\n", + jiffies_to_msecs(jiffies - dev->trans_start)); + ipoib_warn(priv, "queue stopped %d, tx_head %u, tx_tail %u\n", + netif_queue_stopped(dev), + priv->tx_head, priv->tx_tail); /* XXX reset QP, etc. */ } -- cgit From c6f5cb7be0ab478e0618e3c2c6ada27f56d1e7fb Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 18 Oct 2005 13:22:16 -0700 Subject: [IB] mthca: Use enum in mthca_alloc_db() prototype Make the type parameter of mthca_alloc_db() be an enum mthca_db_type instead of an int. This doesn't have any practical effect but documents the functions a little better. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_memfree.c | 3 ++- drivers/infiniband/hw/mthca/mthca_memfree.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c index 7bd7a4bec7b..d63b1a14710 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.c +++ b/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -487,7 +487,8 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar, } } -int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, __be32 **db) +int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type, + u32 qn, __be32 **db) { int group; int start, end, dir; diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.h b/drivers/infiniband/hw/mthca/mthca_memfree.h index bafa51544aa..29c01a4b265 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.h +++ b/drivers/infiniband/hw/mthca/mthca_memfree.h @@ -173,7 +173,8 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar, int mthca_init_db_tab(struct mthca_dev *dev); void mthca_cleanup_db_tab(struct mthca_dev *dev); -int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, __be32 **db); +int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type, + u32 qn, __be32 **db); void mthca_free_db(struct mthca_dev *dev, int type, int db_index); #endif /* MTHCA_MEMFREE_H */ -- cgit From d476306f1cc9156ce4b42586aacce70a356ef656 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 18 Oct 2005 14:07:07 -0700 Subject: [IB] mthca: Add struct pci_driver.owner field Set mthca_driver.owner to THIS_MODULE. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index 7c35abec81e..883d1e5a79b 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -1196,6 +1196,7 @@ MODULE_DEVICE_TABLE(pci, mthca_pci_table); static struct pci_driver mthca_driver = { .name = DRV_NAME, + .owner = THIS_MODULE, .id_table = mthca_pci_table, .probe = mthca_init_one, .remove = __devexit_p(mthca_remove_one) -- cgit From ba8e9310244180bbc820e865d2e4378809b4a443 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 18 Oct 2005 14:14:56 -0700 Subject: [IB] Fail sysfs queries after device is unregistered We keep IB device structures around until the last sysfs reference is gone, but we shouldn't ask the low-level driver to do anything after the LLD unregisters the device. To handle this, check the reg_state field and just fail sysfs show() requests if the device has already been unregistered. Signed-off-by: Roland Dreier --- drivers/infiniband/core/sysfs.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 211ba3223f6..7ce7a6c782f 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -65,6 +65,11 @@ struct port_table_attribute { int index; }; +static inline int ibdev_is_alive(const struct ib_device *dev) +{ + return dev->reg_state == IB_DEV_REGISTERED; +} + static ssize_t port_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -74,6 +79,8 @@ static ssize_t port_attr_show(struct kobject *kobj, if (!port_attr->show) return -EIO; + if (!ibdev_is_alive(p->ibdev)) + return -ENODEV; return port_attr->show(p, port_attr, buf); } @@ -581,6 +588,9 @@ static ssize_t show_node_type(struct class_device *cdev, char *buf) { struct ib_device *dev = container_of(cdev, struct ib_device, class_dev); + if (!ibdev_is_alive(dev)) + return -ENODEV; + switch (dev->node_type) { case IB_NODE_CA: return sprintf(buf, "%d: CA\n", dev->node_type); case IB_NODE_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type); @@ -595,6 +605,9 @@ static ssize_t show_sys_image_guid(struct class_device *cdev, char *buf) struct ib_device_attr attr; ssize_t ret; + if (!ibdev_is_alive(dev)) + return -ENODEV; + ret = ib_query_device(dev, &attr); if (ret) return ret; @@ -612,6 +625,9 @@ static ssize_t show_node_guid(struct class_device *cdev, char *buf) struct ib_device_attr attr; ssize_t ret; + if (!ibdev_is_alive(dev)) + return -ENODEV; + ret = ib_query_device(dev, &attr); if (ret) return ret; -- cgit From 7150bf8a98f14f1ba67e090ef2778004c746f465 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 18 Oct 2005 14:46:38 -0700 Subject: [IB] mthca: Don't enter QP into MCG more than once. Avoid entering a QP as member of a multicast group multiple times. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_mcg.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mthca/mthca_mcg.c b/drivers/infiniband/hw/mthca/mthca_mcg.c index 9a0612a9d30..b47ea7daf08 100644 --- a/drivers/infiniband/hw/mthca/mthca_mcg.c +++ b/drivers/infiniband/hw/mthca/mthca_mcg.c @@ -185,7 +185,12 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) } for (i = 0; i < MTHCA_QP_PER_MGM; ++i) - if (!(mgm->qp[i] & cpu_to_be32(1 << 31))) { + if (mgm->qp[i] == cpu_to_be32(ibqp->qp_num | (1 << 31))) { + mthca_dbg(dev, "QP %06x already a member of MGM\n", + ibqp->qp_num); + err = 0; + goto out; + } else if (!(mgm->qp[i] & cpu_to_be32(1 << 31))) { mgm->qp[i] = cpu_to_be32(ibqp->qp_num | (1 << 31)); break; } -- cgit From 3910f44d79a8a4a26fcfefbf16c5b8e1a7743175 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 20 Oct 2005 12:29:36 -0700 Subject: [IB] cm: Add missing break in switch Add missing "break" in switch statement. Without the break, the CM ended up always falling through and setting every connection request to use RC transport, which meant that UC connections didn't work. Signed-off-by: Roland Dreier --- drivers/infiniband/core/cm_msgs.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h index 813ab70bf6d..4d3aee90c24 100644 --- a/drivers/infiniband/core/cm_msgs.h +++ b/drivers/infiniband/core/cm_msgs.h @@ -186,6 +186,7 @@ static inline void cm_req_set_qp_type(struct cm_req_msg *req_msg, req_msg->offset40 = cpu_to_be32((be32_to_cpu( req_msg->offset40) & 0xFFFFFFF9) | 0x2); + break; default: req_msg->offset40 = cpu_to_be32(be32_to_cpu( req_msg->offset40) & -- cgit From 2e0c512aff978a7040464e81fc9c0dfdf4639c23 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 20 Oct 2005 12:30:16 -0700 Subject: [IB] user_mad: trivial coding style fixes Add spaces after "sizeof" operator to match the rest of file. Signed-off-by: Roland Dreier --- drivers/infiniband/core/user_mad.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index a64d6b4dcc1..38f4bfbb713 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -280,14 +280,14 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, length = count - sizeof (struct ib_user_mad); packet = kmalloc(sizeof *packet + sizeof(struct ib_mad_hdr) + - sizeof(struct ib_rmpp_hdr), GFP_KERNEL); + sizeof (struct ib_rmpp_hdr), GFP_KERNEL); if (!packet) return -ENOMEM; if (copy_from_user(&packet->mad, buf, sizeof (struct ib_user_mad) + - sizeof(struct ib_mad_hdr) + - sizeof(struct ib_rmpp_hdr))) { + sizeof (struct ib_mad_hdr) + + sizeof (struct ib_rmpp_hdr))) { ret = -EFAULT; goto err; } @@ -349,7 +349,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, } rmpp_active = 1; } else { - if (length > sizeof(struct ib_mad)) { + if (length > sizeof (struct ib_mad)) { ret = -EINVAL; goto err_ah; } @@ -376,17 +376,17 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, if (!rmpp_active) { /* Copy message from user into send buffer */ if (copy_from_user(packet->msg->mad, - buf + sizeof(struct ib_user_mad), length)) { + buf + sizeof (struct ib_user_mad), length)) { ret = -EFAULT; goto err_msg; } } else { - rmpp_hdr_size = sizeof(struct ib_mad_hdr) + - sizeof(struct ib_rmpp_hdr); + rmpp_hdr_size = sizeof (struct ib_mad_hdr) + + sizeof (struct ib_rmpp_hdr); /* Only copy MAD headers (RMPP header in place) */ memcpy(packet->msg->mad, packet->mad.data, - sizeof(struct ib_mad_hdr)); + sizeof (struct ib_mad_hdr)); /* Now, copy rest of message from user into send buffer */ if (copy_from_user(((struct ib_rmpp_mad *) packet->msg->mad)->data, -- cgit From bbf207860931b6a033d0fbcd170ae2332c0d8216 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 20 Oct 2005 12:54:01 -0700 Subject: [IB] user_mad: Use class_device.devt Use devt member of struct class_device so that we don't have to create our own "dev" file in sysfs. Signed-off-by: Roland Dreier --- drivers/infiniband/core/user_mad.c | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 38f4bfbb713..fd200c064a2 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -671,17 +671,6 @@ static struct ib_client umad_client = { .remove = ib_umad_remove_one }; -static ssize_t show_dev(struct class_device *class_dev, char *buf) -{ - struct ib_umad_port *port = class_get_devdata(class_dev); - - if (class_dev == &port->class_dev) - return print_dev_t(buf, port->dev.dev); - else - return print_dev_t(buf, port->sm_dev.dev); -} -static CLASS_DEVICE_ATTR(dev, S_IRUGO, show_dev, NULL); - static ssize_t show_ibdev(struct class_device *class_dev, char *buf) { struct ib_umad_port *port = class_get_devdata(class_dev); @@ -762,6 +751,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, port->class_dev.class = &umad_class; port->class_dev.dev = device->dma_device; + port->class_dev.devt = port->dev.dev; snprintf(port->class_dev.class_id, BUS_ID_SIZE, "umad%d", port->devnum); @@ -771,8 +761,6 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, class_set_devdata(&port->class_dev, port); kref_get(&port->umad_dev->ref); - if (class_device_create_file(&port->class_dev, &class_device_attr_dev)) - goto err_class; if (class_device_create_file(&port->class_dev, &class_device_attr_ibdev)) goto err_class; if (class_device_create_file(&port->class_dev, &class_device_attr_port)) @@ -786,6 +774,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, port->sm_class_dev.class = &umad_class; port->sm_class_dev.dev = device->dma_device; + port->sm_class_dev.devt = port->sm_dev.dev; snprintf(port->sm_class_dev.class_id, BUS_ID_SIZE, "issm%d", port->sm_devnum - IB_UMAD_MAX_PORTS); @@ -795,8 +784,6 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, class_set_devdata(&port->sm_class_dev, port); kref_get(&port->umad_dev->ref); - if (class_device_create_file(&port->sm_class_dev, &class_device_attr_dev)) - goto err_sm_class; if (class_device_create_file(&port->sm_class_dev, &class_device_attr_ibdev)) goto err_sm_class; if (class_device_create_file(&port->sm_class_dev, &class_device_attr_port)) -- cgit From c8e0ca683dfea7242ff29814561dfe761945e5b3 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Sat, 22 Oct 2005 09:43:29 -0700 Subject: [IB] mthca: Always re-arm EQs in mthca_tavor_interrupt() We should always re-arm an event queue's interrupt in mthca_tavor_interrupt() if the corresponding bit is set in the event cause register (ECR), even if we didn't find any entries in the EQ. If we don't, then there's a window where we miss an EQ entry and then get stuck because we don't get another EQ event. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_eq.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c index c81fa8e975e..8dfafda5ed2 100644 --- a/drivers/infiniband/hw/mthca/mthca_eq.c +++ b/drivers/infiniband/hw/mthca/mthca_eq.c @@ -396,20 +396,21 @@ static irqreturn_t mthca_tavor_interrupt(int irq, void *dev_ptr, struct pt_regs writel(dev->eq_table.clr_mask, dev->eq_table.clr_int); ecr = readl(dev->eq_regs.tavor.ecr_base + 4); - if (ecr) { - writel(ecr, dev->eq_regs.tavor.ecr_base + - MTHCA_ECR_CLR_BASE - MTHCA_ECR_BASE + 4); + if (!ecr) + return IRQ_NONE; - for (i = 0; i < MTHCA_NUM_EQ; ++i) - if (ecr & dev->eq_table.eq[i].eqn_mask && - mthca_eq_int(dev, &dev->eq_table.eq[i])) { + writel(ecr, dev->eq_regs.tavor.ecr_base + + MTHCA_ECR_CLR_BASE - MTHCA_ECR_BASE + 4); + + for (i = 0; i < MTHCA_NUM_EQ; ++i) + if (ecr & dev->eq_table.eq[i].eqn_mask) { + if (mthca_eq_int(dev, &dev->eq_table.eq[i])) tavor_set_eq_ci(dev, &dev->eq_table.eq[i], dev->eq_table.eq[i].cons_index); - tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn); - } - } + tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn); + } - return IRQ_RETVAL(ecr); + return IRQ_HANDLED; } static irqreturn_t mthca_tavor_msi_x_interrupt(int irq, void *eq_ptr, -- cgit From 5d7edb3c1a01310725d86f0d83fb3be45685dc82 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 24 Oct 2005 10:53:25 -0700 Subject: [IB] Add idr_destroy() calls on module unload Add idr_destroy() calls to the module_exit() functions of the four IB driver modules that use idrs, so we don't leak idr_layer_cache objects when these modules are unloaded. Signed-off-by: Roland Dreier --- drivers/infiniband/core/cm.c | 1 + drivers/infiniband/core/sa_query.c | 1 + drivers/infiniband/core/ucm.c | 1 + drivers/infiniband/core/uverbs_main.c | 7 +++++++ 4 files changed, 10 insertions(+) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 54db6d4831f..151ef83cc14 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -3323,6 +3323,7 @@ static void __exit ib_cm_cleanup(void) flush_workqueue(cm.wq); destroy_workqueue(cm.wq); ib_unregister_client(&cm_client); + idr_destroy(&cm.local_id_table); } module_init(ib_cm_init); diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 262618210c1..e215cf0478d 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -975,6 +975,7 @@ static int __init ib_sa_init(void) static void __exit ib_sa_cleanup(void) { ib_unregister_client(&sa_client); + idr_destroy(&query_idr); } module_init(ib_sa_init); diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index d0f0b0a2edd..5a6ba4030d4 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -1320,6 +1320,7 @@ static void __exit ib_ucm_cleanup(void) class_destroy(ib_ucm_class); cdev_del(&ib_ucm_cdev); unregister_chrdev_region(IB_UCM_DEV, 1); + idr_destroy(&ctx_id_table); } module_init(ib_ucm_init); diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 12511808de2..add45f7faa5 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -762,6 +762,13 @@ static void __exit ib_uverbs_cleanup(void) unregister_filesystem(&uverbs_event_fs); class_unregister(&uverbs_class); unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); + idr_destroy(&ib_uverbs_pd_idr); + idr_destroy(&ib_uverbs_mr_idr); + idr_destroy(&ib_uverbs_mw_idr); + idr_destroy(&ib_uverbs_ah_idr); + idr_destroy(&ib_uverbs_cq_idr); + idr_destroy(&ib_uverbs_qp_idr); + idr_destroy(&ib_uverbs_srq_idr); } module_init(ib_uverbs_init); -- cgit From ae7971a7706384ca373fb7e212fe195698e6c5a1 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Mon, 24 Oct 2005 12:33:56 -0700 Subject: [IB] CM: Fix initialization of QP attributes for UC QPs. Fix cm_init_qp_init_attr(), cm_init_qp_rtr_attr() and cm_init_qp_rts_attr() so that they correctly handle the differences between UC and RC QPs. This fixes problems with setting up UC QPs through the CM. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/cm.c | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 3fe6f4754fa..389fecbaf66 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -135,6 +135,7 @@ struct cm_id_private { __be64 tid; __be32 local_qpn; __be32 remote_qpn; + enum ib_qp_type qp_type; __be32 sq_psn; __be32 rq_psn; int timeout_ms; @@ -926,6 +927,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, cm_id_priv->responder_resources = param->responder_resources; cm_id_priv->retry_count = param->retry_count; cm_id_priv->path_mtu = param->primary_path->mtu; + cm_id_priv->qp_type = param->qp_type; ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg); if (ret) @@ -1320,6 +1322,7 @@ static int cm_req_handler(struct cm_work *work) cm_req_get_primary_local_ack_timeout(req_msg); cm_id_priv->retry_count = cm_req_get_retry_count(req_msg); cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg); + cm_id_priv->qp_type = cm_req_get_qp_type(req_msg); cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id); cm_process_work(cm_id_priv, work); @@ -3079,10 +3082,10 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv, case IB_CM_ESTABLISHED: *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT; - qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE; + qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE; if (cm_id_priv->responder_resources) - qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_WRITE | - IB_ACCESS_REMOTE_READ; + qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ; qp_attr->pkey_index = cm_id_priv->av.pkey_index; qp_attr->port_num = cm_id_priv->av.port->port_num; ret = 0; @@ -3112,14 +3115,18 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv, case IB_CM_MRA_REP_RCVD: case IB_CM_ESTABLISHED: *qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | - IB_QP_DEST_QPN | IB_QP_RQ_PSN | - IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER; + IB_QP_DEST_QPN | IB_QP_RQ_PSN; qp_attr->ah_attr = cm_id_priv->av.ah_attr; qp_attr->path_mtu = cm_id_priv->path_mtu; qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn); qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn); - qp_attr->max_dest_rd_atomic = cm_id_priv->responder_resources; - qp_attr->min_rnr_timer = 0; + if (cm_id_priv->qp_type == IB_QPT_RC) { + *qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC | + IB_QP_MIN_RNR_TIMER; + qp_attr->max_dest_rd_atomic = + cm_id_priv->responder_resources; + qp_attr->min_rnr_timer = 0; + } if (cm_id_priv->alt_av.ah_attr.dlid) { *qp_attr_mask |= IB_QP_ALT_PATH; qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr; @@ -3148,14 +3155,17 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv, case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: case IB_CM_ESTABLISHED: - *qp_attr_mask = IB_QP_STATE | IB_QP_TIMEOUT | IB_QP_RETRY_CNT | - IB_QP_RNR_RETRY | IB_QP_SQ_PSN | - IB_QP_MAX_QP_RD_ATOMIC; - qp_attr->timeout = cm_id_priv->local_ack_timeout; - qp_attr->retry_cnt = cm_id_priv->retry_count; - qp_attr->rnr_retry = cm_id_priv->rnr_retry_count; + *qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN; qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn); - qp_attr->max_rd_atomic = cm_id_priv->initiator_depth; + if (cm_id_priv->qp_type == IB_QPT_RC) { + *qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT | + IB_QP_RNR_RETRY | + IB_QP_MAX_QP_RD_ATOMIC; + qp_attr->timeout = cm_id_priv->local_ack_timeout; + qp_attr->retry_cnt = cm_id_priv->retry_count; + qp_attr->rnr_retry = cm_id_priv->rnr_retry_count; + qp_attr->max_rd_atomic = cm_id_priv->initiator_depth; + } if (cm_id_priv->alt_av.ah_attr.dlid) { *qp_attr_mask |= IB_QP_PATH_MIG_STATE; qp_attr->path_mig_state = IB_MIG_REARM; -- cgit From 34816ad98efe4d47ffd858a0345321f9d85d9420 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Tue, 25 Oct 2005 10:51:39 -0700 Subject: [IB] Fix MAD layer DMA mappings to avoid touching data buffer once mapped The MAD layer was violating the DMA API by touching data buffers used for sends after the DMA mapping was done. This causes problems on non-cache-coherent architectures, because the device doing DMA won't see updates to the payload buffers that exist only in the CPU cache. Fix this by having all MAD consumers use ib_create_send_mad() to allocate their send buffers, and moving the DMA mapping into the MAD layer so it can be done just before calling send (and after any modifications of the send buffer by the MAD layer). Tested on a non-cache-coherent PowerPC 440SPe system. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/agent.c | 293 +++++++++----------------------- drivers/infiniband/core/agent.h | 13 +- drivers/infiniband/core/agent_priv.h | 62 ------- drivers/infiniband/core/cm.c | 137 +++++---------- drivers/infiniband/core/mad.c | 288 ++++++++++++++----------------- drivers/infiniband/core/mad_priv.h | 8 +- drivers/infiniband/core/mad_rmpp.c | 87 +++++----- drivers/infiniband/core/mad_rmpp.h | 2 + drivers/infiniband/core/sa_query.c | 239 +++++++++++++------------- drivers/infiniband/core/smi.h | 2 + drivers/infiniband/core/user_mad.c | 47 +++-- drivers/infiniband/hw/mthca/mthca_mad.c | 72 +------- include/rdma/ib_mad.h | 66 +++---- include/rdma/ib_verbs.h | 3 - 14 files changed, 475 insertions(+), 844 deletions(-) delete mode 100644 drivers/infiniband/core/agent_priv.h diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c index 5ac86f566dc..0c3c6952faa 100644 --- a/drivers/infiniband/core/agent.c +++ b/drivers/infiniband/core/agent.c @@ -37,58 +37,41 @@ * $Id: agent.c 1389 2004-12-27 22:56:47Z roland $ */ -#include - -#include +#include "agent.h" +#include "smi.h" -#include +#define SPFX "ib_agent: " -#include "smi.h" -#include "agent_priv.h" -#include "mad_priv.h" -#include "agent.h" +struct ib_agent_port_private { + struct list_head port_list; + struct ib_mad_agent *agent[2]; +}; -spinlock_t ib_agent_port_list_lock; +static DEFINE_SPINLOCK(ib_agent_port_list_lock); static LIST_HEAD(ib_agent_port_list); -/* - * Caller must hold ib_agent_port_list_lock - */ -static inline struct ib_agent_port_private * -__ib_get_agent_port(struct ib_device *device, int port_num, - struct ib_mad_agent *mad_agent) +static struct ib_agent_port_private * +__ib_get_agent_port(struct ib_device *device, int port_num) { struct ib_agent_port_private *entry; - BUG_ON(!(!!device ^ !!mad_agent)); /* Exactly one MUST be (!NULL) */ - - if (device) { - list_for_each_entry(entry, &ib_agent_port_list, port_list) { - if (entry->smp_agent->device == device && - entry->port_num == port_num) - return entry; - } - } else { - list_for_each_entry(entry, &ib_agent_port_list, port_list) { - if ((entry->smp_agent == mad_agent) || - (entry->perf_mgmt_agent == mad_agent)) - return entry; - } + list_for_each_entry(entry, &ib_agent_port_list, port_list) { + if (entry->agent[0]->device == device && + entry->agent[0]->port_num == port_num) + return entry; } return NULL; } -static inline struct ib_agent_port_private * -ib_get_agent_port(struct ib_device *device, int port_num, - struct ib_mad_agent *mad_agent) +static struct ib_agent_port_private * +ib_get_agent_port(struct ib_device *device, int port_num) { struct ib_agent_port_private *entry; unsigned long flags; spin_lock_irqsave(&ib_agent_port_list_lock, flags); - entry = __ib_get_agent_port(device, port_num, mad_agent); + entry = __ib_get_agent_port(device, port_num); spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); - return entry; } @@ -100,192 +83,76 @@ int smi_check_local_dr_smp(struct ib_smp *smp, if (smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) return 1; - port_priv = ib_get_agent_port(device, port_num, NULL); + + port_priv = ib_get_agent_port(device, port_num); if (!port_priv) { printk(KERN_DEBUG SPFX "smi_check_local_dr_smp %s port %d " - "not open\n", - device->name, port_num); + "not open\n", device->name, port_num); return 1; } - return smi_check_local_smp(port_priv->smp_agent, smp); + return smi_check_local_smp(port_priv->agent[0], smp); } -static int agent_mad_send(struct ib_mad_agent *mad_agent, - struct ib_agent_port_private *port_priv, - struct ib_mad_private *mad_priv, - struct ib_grh *grh, - struct ib_wc *wc) +int agent_send_response(struct ib_mad *mad, struct ib_grh *grh, + struct ib_wc *wc, struct ib_device *device, + int port_num, int qpn) { - struct ib_agent_send_wr *agent_send_wr; - struct ib_sge gather_list; - struct ib_send_wr send_wr; - struct ib_send_wr *bad_send_wr; - struct ib_ah_attr ah_attr; - unsigned long flags; - int ret = 1; - - agent_send_wr = kmalloc(sizeof(*agent_send_wr), GFP_KERNEL); - if (!agent_send_wr) - goto out; - agent_send_wr->mad = mad_priv; - - gather_list.addr = dma_map_single(mad_agent->device->dma_device, - &mad_priv->mad, - sizeof(mad_priv->mad), - DMA_TO_DEVICE); - gather_list.length = sizeof(mad_priv->mad); - gather_list.lkey = mad_agent->mr->lkey; - - send_wr.next = NULL; - send_wr.opcode = IB_WR_SEND; - send_wr.sg_list = &gather_list; - send_wr.num_sge = 1; - send_wr.wr.ud.remote_qpn = wc->src_qp; /* DQPN */ - send_wr.wr.ud.timeout_ms = 0; - send_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_SOLICITED; + struct ib_agent_port_private *port_priv; + struct ib_mad_agent *agent; + struct ib_mad_send_buf *send_buf; + struct ib_ah *ah; + int ret; - ah_attr.dlid = wc->slid; - ah_attr.port_num = mad_agent->port_num; - ah_attr.src_path_bits = wc->dlid_path_bits; - ah_attr.sl = wc->sl; - ah_attr.static_rate = 0; - ah_attr.ah_flags = 0; /* No GRH */ - if (mad_priv->mad.mad.mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT) { - if (wc->wc_flags & IB_WC_GRH) { - ah_attr.ah_flags = IB_AH_GRH; - /* Should sgid be looked up ? */ - ah_attr.grh.sgid_index = 0; - ah_attr.grh.hop_limit = grh->hop_limit; - ah_attr.grh.flow_label = be32_to_cpu( - grh->version_tclass_flow) & 0xfffff; - ah_attr.grh.traffic_class = (be32_to_cpu( - grh->version_tclass_flow) >> 20) & 0xff; - memcpy(ah_attr.grh.dgid.raw, - grh->sgid.raw, - sizeof(ah_attr.grh.dgid)); - } + port_priv = ib_get_agent_port(device, port_num); + if (!port_priv) { + printk(KERN_ERR SPFX "Unable to find port agent\n"); + return -ENODEV; } - agent_send_wr->ah = ib_create_ah(mad_agent->qp->pd, &ah_attr); - if (IS_ERR(agent_send_wr->ah)) { - printk(KERN_ERR SPFX "No memory for address handle\n"); - kfree(agent_send_wr); - goto out; + agent = port_priv->agent[qpn]; + ah = ib_create_ah_from_wc(agent->qp->pd, wc, grh, port_num); + if (IS_ERR(ah)) { + ret = PTR_ERR(ah); + printk(KERN_ERR SPFX "ib_create_ah_from_wc error:%d\n", ret); + return ret; } - send_wr.wr.ud.ah = agent_send_wr->ah; - if (mad_priv->mad.mad.mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT) { - send_wr.wr.ud.pkey_index = wc->pkey_index; - send_wr.wr.ud.remote_qkey = IB_QP1_QKEY; - } else { /* for SMPs */ - send_wr.wr.ud.pkey_index = 0; - send_wr.wr.ud.remote_qkey = 0; + send_buf = ib_create_send_mad(agent, wc->src_qp, wc->pkey_index, 0, + IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, + GFP_KERNEL); + if (IS_ERR(send_buf)) { + ret = PTR_ERR(send_buf); + printk(KERN_ERR SPFX "ib_create_send_mad error:%d\n", ret); + goto err1; } - send_wr.wr.ud.mad_hdr = &mad_priv->mad.mad.mad_hdr; - send_wr.wr_id = (unsigned long)agent_send_wr; - pci_unmap_addr_set(agent_send_wr, mapping, gather_list.addr); - - /* Send */ - spin_lock_irqsave(&port_priv->send_list_lock, flags); - if (ib_post_send_mad(mad_agent, &send_wr, &bad_send_wr)) { - spin_unlock_irqrestore(&port_priv->send_list_lock, flags); - dma_unmap_single(mad_agent->device->dma_device, - pci_unmap_addr(agent_send_wr, mapping), - sizeof(mad_priv->mad), - DMA_TO_DEVICE); - ib_destroy_ah(agent_send_wr->ah); - kfree(agent_send_wr); - } else { - list_add_tail(&agent_send_wr->send_list, - &port_priv->send_posted_list); - spin_unlock_irqrestore(&port_priv->send_list_lock, flags); - ret = 0; + memcpy(send_buf->mad, mad, sizeof *mad); + send_buf->ah = ah; + if ((ret = ib_post_send_mad(send_buf, NULL))) { + printk(KERN_ERR SPFX "ib_post_send_mad error:%d\n", ret); + goto err2; } - -out: + return 0; +err2: + ib_free_send_mad(send_buf); +err1: + ib_destroy_ah(ah); return ret; } -int agent_send(struct ib_mad_private *mad, - struct ib_grh *grh, - struct ib_wc *wc, - struct ib_device *device, - int port_num) -{ - struct ib_agent_port_private *port_priv; - struct ib_mad_agent *mad_agent; - - port_priv = ib_get_agent_port(device, port_num, NULL); - if (!port_priv) { - printk(KERN_DEBUG SPFX "agent_send %s port %d not open\n", - device->name, port_num); - return 1; - } - - /* Get mad agent based on mgmt_class in MAD */ - switch (mad->mad.mad.mad_hdr.mgmt_class) { - case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: - case IB_MGMT_CLASS_SUBN_LID_ROUTED: - mad_agent = port_priv->smp_agent; - break; - case IB_MGMT_CLASS_PERF_MGMT: - mad_agent = port_priv->perf_mgmt_agent; - break; - default: - return 1; - } - - return agent_mad_send(mad_agent, port_priv, mad, grh, wc); -} - static void agent_send_handler(struct ib_mad_agent *mad_agent, struct ib_mad_send_wc *mad_send_wc) { - struct ib_agent_port_private *port_priv; - struct ib_agent_send_wr *agent_send_wr; - unsigned long flags; - - /* Find matching MAD agent */ - port_priv = ib_get_agent_port(NULL, 0, mad_agent); - if (!port_priv) { - printk(KERN_ERR SPFX "agent_send_handler: no matching MAD " - "agent %p\n", mad_agent); - return; - } - - agent_send_wr = (struct ib_agent_send_wr *)(unsigned long)mad_send_wc->wr_id; - spin_lock_irqsave(&port_priv->send_list_lock, flags); - /* Remove completed send from posted send MAD list */ - list_del(&agent_send_wr->send_list); - spin_unlock_irqrestore(&port_priv->send_list_lock, flags); - - dma_unmap_single(mad_agent->device->dma_device, - pci_unmap_addr(agent_send_wr, mapping), - sizeof(agent_send_wr->mad->mad), - DMA_TO_DEVICE); - - ib_destroy_ah(agent_send_wr->ah); - - /* Release allocated memory */ - kmem_cache_free(ib_mad_cache, agent_send_wr->mad); - kfree(agent_send_wr); + ib_destroy_ah(mad_send_wc->send_buf->ah); + ib_free_send_mad(mad_send_wc->send_buf); } int ib_agent_port_open(struct ib_device *device, int port_num) { - int ret; struct ib_agent_port_private *port_priv; unsigned long flags; - - /* First, check if port already open for SMI */ - port_priv = ib_get_agent_port(device, port_num, NULL); - if (port_priv) { - printk(KERN_DEBUG SPFX "%s port %d already open\n", - device->name, port_num); - return 0; - } + int ret; /* Create new device info */ port_priv = kmalloc(sizeof *port_priv, GFP_KERNEL); @@ -294,32 +161,25 @@ int ib_agent_port_open(struct ib_device *device, int port_num) ret = -ENOMEM; goto error1; } - memset(port_priv, 0, sizeof *port_priv); - port_priv->port_num = port_num; - spin_lock_init(&port_priv->send_list_lock); - INIT_LIST_HEAD(&port_priv->send_posted_list); - /* Obtain send only MAD agent for SM class (SMI QP) */ - port_priv->smp_agent = ib_register_mad_agent(device, port_num, - IB_QPT_SMI, - NULL, 0, + /* Obtain send only MAD agent for SMI QP */ + port_priv->agent[0] = ib_register_mad_agent(device, port_num, + IB_QPT_SMI, NULL, 0, &agent_send_handler, - NULL, NULL); - - if (IS_ERR(port_priv->smp_agent)) { - ret = PTR_ERR(port_priv->smp_agent); + NULL, NULL); + if (IS_ERR(port_priv->agent[0])) { + ret = PTR_ERR(port_priv->agent[0]); goto error2; } - /* Obtain send only MAD agent for PerfMgmt class (GSI QP) */ - port_priv->perf_mgmt_agent = ib_register_mad_agent(device, port_num, - IB_QPT_GSI, - NULL, 0, - &agent_send_handler, - NULL, NULL); - if (IS_ERR(port_priv->perf_mgmt_agent)) { - ret = PTR_ERR(port_priv->perf_mgmt_agent); + /* Obtain send only MAD agent for GSI QP */ + port_priv->agent[1] = ib_register_mad_agent(device, port_num, + IB_QPT_GSI, NULL, 0, + &agent_send_handler, + NULL, NULL); + if (IS_ERR(port_priv->agent[1])) { + ret = PTR_ERR(port_priv->agent[1]); goto error3; } @@ -330,7 +190,7 @@ int ib_agent_port_open(struct ib_device *device, int port_num) return 0; error3: - ib_unregister_mad_agent(port_priv->smp_agent); + ib_unregister_mad_agent(port_priv->agent[0]); error2: kfree(port_priv); error1: @@ -343,7 +203,7 @@ int ib_agent_port_close(struct ib_device *device, int port_num) unsigned long flags; spin_lock_irqsave(&ib_agent_port_list_lock, flags); - port_priv = __ib_get_agent_port(device, port_num, NULL); + port_priv = __ib_get_agent_port(device, port_num); if (port_priv == NULL) { spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); printk(KERN_ERR SPFX "Port %d not found\n", port_num); @@ -352,9 +212,8 @@ int ib_agent_port_close(struct ib_device *device, int port_num) list_del(&port_priv->port_list); spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); - ib_unregister_mad_agent(port_priv->perf_mgmt_agent); - ib_unregister_mad_agent(port_priv->smp_agent); + ib_unregister_mad_agent(port_priv->agent[1]); + ib_unregister_mad_agent(port_priv->agent[0]); kfree(port_priv); - return 0; } diff --git a/drivers/infiniband/core/agent.h b/drivers/infiniband/core/agent.h index d9426842254..c5f3cfec942 100644 --- a/drivers/infiniband/core/agent.h +++ b/drivers/infiniband/core/agent.h @@ -39,17 +39,14 @@ #ifndef __AGENT_H_ #define __AGENT_H_ -extern spinlock_t ib_agent_port_list_lock; +#include -extern int ib_agent_port_open(struct ib_device *device, - int port_num); +extern int ib_agent_port_open(struct ib_device *device, int port_num); extern int ib_agent_port_close(struct ib_device *device, int port_num); -extern int agent_send(struct ib_mad_private *mad, - struct ib_grh *grh, - struct ib_wc *wc, - struct ib_device *device, - int port_num); +extern int agent_send_response(struct ib_mad *mad, struct ib_grh *grh, + struct ib_wc *wc, struct ib_device *device, + int port_num, int qpn); #endif /* __AGENT_H_ */ diff --git a/drivers/infiniband/core/agent_priv.h b/drivers/infiniband/core/agent_priv.h deleted file mode 100644 index 2ec6d7f1b7d..00000000000 --- a/drivers/infiniband/core/agent_priv.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2004, 2005 Mellanox Technologies Ltd. All rights reserved. - * Copyright (c) 2004, 2005 Infinicon Corporation. All rights reserved. - * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. - * Copyright (c) 2004, 2005 Topspin Corporation. All rights reserved. - * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * $Id: agent_priv.h 1640 2005-01-24 22:39:02Z halr $ - */ - -#ifndef __IB_AGENT_PRIV_H__ -#define __IB_AGENT_PRIV_H__ - -#include - -#define SPFX "ib_agent: " - -struct ib_agent_send_wr { - struct list_head send_list; - struct ib_ah *ah; - struct ib_mad_private *mad; - DECLARE_PCI_UNMAP_ADDR(mapping) -}; - -struct ib_agent_port_private { - struct list_head port_list; - struct list_head send_posted_list; - spinlock_t send_list_lock; - int port_num; - struct ib_mad_agent *smp_agent; /* SM class */ - struct ib_mad_agent *perf_mgmt_agent; /* PerfMgmt class */ -}; - -#endif /* __IB_AGENT_PRIV_H__ */ diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 389fecbaf66..580c3a2bb10 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -176,8 +176,7 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv, m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn, cm_id_priv->av.pkey_index, - ah, 0, sizeof(struct ib_mad_hdr), - sizeof(struct ib_mad)-sizeof(struct ib_mad_hdr), + 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, GFP_ATOMIC); if (IS_ERR(m)) { ib_destroy_ah(ah); @@ -185,7 +184,8 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv, } /* Timeout set by caller if response is expected. */ - m->send_wr.wr.ud.retries = cm_id_priv->max_cm_retries; + m->ah = ah; + m->retries = cm_id_priv->max_cm_retries; atomic_inc(&cm_id_priv->refcount); m->context[0] = cm_id_priv; @@ -206,20 +206,20 @@ static int cm_alloc_response_msg(struct cm_port *port, return PTR_ERR(ah); m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index, - ah, 0, sizeof(struct ib_mad_hdr), - sizeof(struct ib_mad)-sizeof(struct ib_mad_hdr), + 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, GFP_ATOMIC); if (IS_ERR(m)) { ib_destroy_ah(ah); return PTR_ERR(m); } + m->ah = ah; *msg = m; return 0; } static void cm_free_msg(struct ib_mad_send_buf *msg) { - ib_destroy_ah(msg->send_wr.wr.ud.ah); + ib_destroy_ah(msg->ah); if (msg->context[0]) cm_deref_id(msg->context[0]); ib_free_send_mad(msg); @@ -678,8 +678,7 @@ retest: break; case IB_CM_SIDR_REQ_SENT: cm_id->state = IB_CM_IDLE; - ib_cancel_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); spin_unlock_irqrestore(&cm_id_priv->lock, flags); break; case IB_CM_SIDR_REQ_RCVD: @@ -690,8 +689,7 @@ retest: case IB_CM_MRA_REQ_RCVD: case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); /* Fall through */ case IB_CM_REQ_RCVD: case IB_CM_MRA_REQ_SENT: @@ -708,8 +706,7 @@ retest: ib_send_cm_dreq(cm_id, NULL, 0); goto retest; case IB_CM_DREQ_SENT: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); cm_enter_timewait(cm_id_priv); spin_unlock_irqrestore(&cm_id_priv->lock, flags); break; @@ -883,7 +880,6 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, struct ib_cm_req_param *param) { struct cm_id_private *cm_id_priv; - struct ib_send_wr *bad_send_wr; struct cm_req_msg *req_msg; unsigned long flags; int ret; @@ -936,7 +932,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad; cm_format_req(req_msg, cm_id_priv, param); cm_id_priv->tid = req_msg->hdr.tid; - cm_id_priv->msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms; + cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms; cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT; cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg); @@ -945,8 +941,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, cm_req_get_primary_local_ack_timeout(req_msg); spin_lock_irqsave(&cm_id_priv->lock, flags); - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &cm_id_priv->msg->send_wr, &bad_send_wr); + ret = ib_post_send_mad(cm_id_priv->msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); goto error2; @@ -969,7 +964,6 @@ static int cm_issue_rej(struct cm_port *port, void *ari, u8 ari_length) { struct ib_mad_send_buf *msg = NULL; - struct ib_send_wr *bad_send_wr; struct cm_rej_msg *rej_msg, *rcv_msg; int ret; @@ -992,7 +986,7 @@ static int cm_issue_rej(struct cm_port *port, memcpy(rej_msg->ari, ari, ari_length); } - ret = ib_post_send_mad(port->mad_agent, &msg->send_wr, &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) cm_free_msg(msg); @@ -1172,7 +1166,6 @@ static void cm_dup_req_handler(struct cm_work *work, struct cm_id_private *cm_id_priv) { struct ib_mad_send_buf *msg = NULL; - struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -1201,8 +1194,7 @@ static void cm_dup_req_handler(struct cm_work *work, } spin_unlock_irqrestore(&cm_id_priv->lock, flags); - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, &msg->send_wr, - &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) goto free; return; @@ -1367,7 +1359,6 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id, struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; struct cm_rep_msg *rep_msg; - struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -1389,11 +1380,10 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id, rep_msg = (struct cm_rep_msg *) msg->mad; cm_format_rep(rep_msg, cm_id_priv, param); - msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms; + msg->timeout_ms = cm_id_priv->timeout_ms; msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT; - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &msg->send_wr, &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); @@ -1431,7 +1421,6 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id, { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; - struct ib_send_wr *bad_send_wr; unsigned long flags; void *data; int ret; @@ -1458,8 +1447,7 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id, cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv, private_data, private_data_len); - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &msg->send_wr, &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); @@ -1504,7 +1492,6 @@ static void cm_dup_rep_handler(struct cm_work *work) struct cm_id_private *cm_id_priv; struct cm_rep_msg *rep_msg; struct ib_mad_send_buf *msg = NULL; - struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -1532,8 +1519,7 @@ static void cm_dup_rep_handler(struct cm_work *work) goto unlock; spin_unlock_irqrestore(&cm_id_priv->lock, flags); - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, &msg->send_wr, - &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) goto free; goto deref; @@ -1601,8 +1587,7 @@ static int cm_rep_handler(struct cm_work *work) /* todo: handle peer_to_peer */ - ib_cancel_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); @@ -1636,8 +1621,7 @@ static int cm_establish_handler(struct cm_work *work) goto out; } - ib_cancel_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); @@ -1676,8 +1660,7 @@ static int cm_rtu_handler(struct cm_work *work) } cm_id_priv->id.state = IB_CM_ESTABLISHED; - ib_cancel_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); @@ -1714,7 +1697,6 @@ int ib_send_cm_dreq(struct ib_cm_id *cm_id, { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; - struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -1736,11 +1718,10 @@ int ib_send_cm_dreq(struct ib_cm_id *cm_id, cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv, private_data, private_data_len); - msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms; + msg->timeout_ms = cm_id_priv->timeout_ms; msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT; - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &msg->send_wr, &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) { cm_enter_timewait(cm_id_priv); spin_unlock_irqrestore(&cm_id_priv->lock, flags); @@ -1774,7 +1755,6 @@ int ib_send_cm_drep(struct ib_cm_id *cm_id, { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; - struct ib_send_wr *bad_send_wr; unsigned long flags; void *data; int ret; @@ -1804,8 +1784,7 @@ int ib_send_cm_drep(struct ib_cm_id *cm_id, cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv, private_data, private_data_len); - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, &msg->send_wr, - &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); @@ -1822,7 +1801,6 @@ static int cm_dreq_handler(struct cm_work *work) struct cm_id_private *cm_id_priv; struct cm_dreq_msg *dreq_msg; struct ib_mad_send_buf *msg = NULL; - struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -1841,8 +1819,7 @@ static int cm_dreq_handler(struct cm_work *work) switch (cm_id_priv->id.state) { case IB_CM_REP_SENT: case IB_CM_DREQ_SENT: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); break; case IB_CM_ESTABLISHED: case IB_CM_MRA_REP_RCVD: @@ -1856,8 +1833,7 @@ static int cm_dreq_handler(struct cm_work *work) cm_id_priv->private_data_len); spin_unlock_irqrestore(&cm_id_priv->lock, flags); - if (ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &msg->send_wr, &bad_send_wr)) + if (ib_post_send_mad(msg, NULL)) cm_free_msg(msg); goto deref; default: @@ -1904,8 +1880,7 @@ static int cm_drep_handler(struct cm_work *work) } cm_enter_timewait(cm_id_priv); - ib_cancel_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); @@ -1930,7 +1905,6 @@ int ib_send_cm_rej(struct ib_cm_id *cm_id, { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; - struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -1974,8 +1948,7 @@ int ib_send_cm_rej(struct ib_cm_id *cm_id, if (ret) goto out; - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &msg->send_wr, &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) cm_free_msg(msg); @@ -2051,8 +2024,7 @@ static int cm_rej_handler(struct cm_work *work) case IB_CM_MRA_REQ_RCVD: case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); /* fall through */ case IB_CM_REQ_RCVD: case IB_CM_MRA_REQ_SENT: @@ -2062,8 +2034,7 @@ static int cm_rej_handler(struct cm_work *work) cm_reset_to_idle(cm_id_priv); break; case IB_CM_DREQ_SENT: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); /* fall through */ case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: @@ -2098,7 +2069,6 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id, { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; - struct ib_send_wr *bad_send_wr; void *data; unsigned long flags; int ret; @@ -2122,8 +2092,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id, cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, CM_MSG_RESPONSE_REQ, service_timeout, private_data, private_data_len); - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &msg->send_wr, &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) goto error2; cm_id->state = IB_CM_MRA_REQ_SENT; @@ -2136,8 +2105,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id, cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, CM_MSG_RESPONSE_REP, service_timeout, private_data, private_data_len); - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &msg->send_wr, &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) goto error2; cm_id->state = IB_CM_MRA_REP_SENT; @@ -2150,8 +2118,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id, cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, CM_MSG_RESPONSE_OTHER, service_timeout, private_data, private_data_len); - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &msg->send_wr, &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) goto error2; cm_id->lap_state = IB_CM_MRA_LAP_SENT; @@ -2213,14 +2180,14 @@ static int cm_mra_handler(struct cm_work *work) case IB_CM_REQ_SENT: if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ || ib_modify_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg, timeout)) + cm_id_priv->msg, timeout)) goto out; cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD; break; case IB_CM_REP_SENT: if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REP || ib_modify_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg, timeout)) + cm_id_priv->msg, timeout)) goto out; cm_id_priv->id.state = IB_CM_MRA_REP_RCVD; break; @@ -2228,7 +2195,7 @@ static int cm_mra_handler(struct cm_work *work) if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER || cm_id_priv->id.lap_state != IB_CM_LAP_SENT || ib_modify_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg, timeout)) + cm_id_priv->msg, timeout)) goto out; cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD; break; @@ -2291,7 +2258,6 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id, { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; - struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -2312,11 +2278,10 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id, cm_format_lap((struct cm_lap_msg *) msg->mad, cm_id_priv, alternate_path, private_data, private_data_len); - msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms; + msg->timeout_ms = cm_id_priv->timeout_ms; msg->context[1] = (void *) (unsigned long) IB_CM_ESTABLISHED; - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &msg->send_wr, &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); @@ -2360,7 +2325,6 @@ static int cm_lap_handler(struct cm_work *work) struct cm_lap_msg *lap_msg; struct ib_cm_lap_event_param *param; struct ib_mad_send_buf *msg = NULL; - struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -2394,8 +2358,7 @@ static int cm_lap_handler(struct cm_work *work) cm_id_priv->private_data_len); spin_unlock_irqrestore(&cm_id_priv->lock, flags); - if (ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &msg->send_wr, &bad_send_wr)) + if (ib_post_send_mad(msg, NULL)) cm_free_msg(msg); goto deref; default: @@ -2451,7 +2414,6 @@ int ib_send_cm_apr(struct ib_cm_id *cm_id, { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; - struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -2474,8 +2436,7 @@ int ib_send_cm_apr(struct ib_cm_id *cm_id, cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status, info, info_length, private_data, private_data_len); - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &msg->send_wr, &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); @@ -2514,8 +2475,7 @@ static int cm_apr_handler(struct cm_work *work) goto out; } cm_id_priv->id.lap_state = IB_CM_LAP_IDLE; - ib_cancel_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); cm_id_priv->msg = NULL; ret = atomic_inc_and_test(&cm_id_priv->work_count); @@ -2590,7 +2550,6 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; - struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -2613,13 +2572,12 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv, param); - msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms; + msg->timeout_ms = cm_id_priv->timeout_ms; msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT; spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state == IB_CM_IDLE) - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &msg->send_wr, &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); else ret = -EINVAL; @@ -2733,7 +2691,6 @@ int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id, { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; - struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -2755,8 +2712,7 @@ int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id, cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv, param); - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &msg->send_wr, &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); @@ -2809,8 +2765,7 @@ static int cm_sidr_rep_handler(struct cm_work *work) goto out; } cm_id_priv->id.state = IB_CM_IDLE; - ib_cancel_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_format_sidr_rep_event(work); @@ -2878,9 +2833,7 @@ discard: static void cm_send_handler(struct ib_mad_agent *mad_agent, struct ib_mad_send_wc *mad_send_wc) { - struct ib_mad_send_buf *msg; - - msg = (struct ib_mad_send_buf *)(unsigned long)mad_send_wc->wr_id; + struct ib_mad_send_buf *msg = mad_send_wc->send_buf; switch (mad_send_wc->status) { case IB_WC_SUCCESS: diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index af302e83056..88f9f8c9eac 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -579,7 +579,7 @@ static void dequeue_mad(struct ib_mad_list_head *mad_list) } static void snoop_send(struct ib_mad_qp_info *qp_info, - struct ib_send_wr *send_wr, + struct ib_mad_send_buf *send_buf, struct ib_mad_send_wc *mad_send_wc, int mad_snoop_flags) { @@ -597,7 +597,7 @@ static void snoop_send(struct ib_mad_qp_info *qp_info, atomic_inc(&mad_snoop_priv->refcount); spin_unlock_irqrestore(&qp_info->snoop_lock, flags); mad_snoop_priv->agent.snoop_handler(&mad_snoop_priv->agent, - send_wr, mad_send_wc); + send_buf, mad_send_wc); if (atomic_dec_and_test(&mad_snoop_priv->refcount)) wake_up(&mad_snoop_priv->wait); spin_lock_irqsave(&qp_info->snoop_lock, flags); @@ -654,10 +654,10 @@ static void build_smp_wc(u64 wr_id, u16 slid, u16 pkey_index, u8 port_num, * Return < 0 if error */ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, - struct ib_smp *smp, - struct ib_send_wr *send_wr) + struct ib_mad_send_wr_private *mad_send_wr) { int ret; + struct ib_smp *smp = mad_send_wr->send_buf.mad; unsigned long flags; struct ib_mad_local_private *local; struct ib_mad_private *mad_priv; @@ -666,6 +666,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, struct ib_device *device = mad_agent_priv->agent.device; u8 port_num = mad_agent_priv->agent.port_num; struct ib_wc mad_wc; + struct ib_send_wr *send_wr = &mad_send_wr->send_wr; if (!smi_handle_dr_smp_send(smp, device->node_type, port_num)) { ret = -EINVAL; @@ -745,13 +746,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, goto out; } - local->send_wr = *send_wr; - local->send_wr.sg_list = local->sg_list; - memcpy(local->sg_list, send_wr->sg_list, - sizeof *send_wr->sg_list * send_wr->num_sge); - local->send_wr.next = NULL; - local->tid = send_wr->wr.ud.mad_hdr->tid; - local->wr_id = send_wr->wr_id; + local->mad_send_wr = mad_send_wr; /* Reference MAD agent until send side of local completion handled */ atomic_inc(&mad_agent_priv->refcount); /* Queue local completion to local list */ @@ -781,17 +776,17 @@ static int get_buf_length(int hdr_len, int data_len) struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, u32 remote_qpn, u16 pkey_index, - struct ib_ah *ah, int rmpp_active, + int rmpp_active, int hdr_len, int data_len, gfp_t gfp_mask) { struct ib_mad_agent_private *mad_agent_priv; - struct ib_mad_send_buf *send_buf; + struct ib_mad_send_wr_private *mad_send_wr; int buf_size; void *buf; - mad_agent_priv = container_of(mad_agent, - struct ib_mad_agent_private, agent); + mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, + agent); buf_size = get_buf_length(hdr_len, data_len); if ((!mad_agent->rmpp_version && @@ -799,45 +794,40 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, (!rmpp_active && buf_size > sizeof(struct ib_mad))) return ERR_PTR(-EINVAL); - buf = kmalloc(sizeof *send_buf + buf_size, gfp_mask); + buf = kmalloc(sizeof *mad_send_wr + buf_size, gfp_mask); if (!buf) return ERR_PTR(-ENOMEM); - memset(buf, 0, sizeof *send_buf + buf_size); - - send_buf = buf + buf_size; - send_buf->mad = buf; - - send_buf->sge.addr = dma_map_single(mad_agent->device->dma_device, - buf, buf_size, DMA_TO_DEVICE); - pci_unmap_addr_set(send_buf, mapping, send_buf->sge.addr); - send_buf->sge.length = buf_size; - send_buf->sge.lkey = mad_agent->mr->lkey; - - send_buf->send_wr.wr_id = (unsigned long) send_buf; - send_buf->send_wr.sg_list = &send_buf->sge; - send_buf->send_wr.num_sge = 1; - send_buf->send_wr.opcode = IB_WR_SEND; - send_buf->send_wr.send_flags = IB_SEND_SIGNALED; - send_buf->send_wr.wr.ud.ah = ah; - send_buf->send_wr.wr.ud.mad_hdr = &send_buf->mad->mad_hdr; - send_buf->send_wr.wr.ud.remote_qpn = remote_qpn; - send_buf->send_wr.wr.ud.remote_qkey = IB_QP_SET_QKEY; - send_buf->send_wr.wr.ud.pkey_index = pkey_index; + memset(buf, 0, sizeof *mad_send_wr + buf_size); + + mad_send_wr = buf + buf_size; + mad_send_wr->send_buf.mad = buf; + + mad_send_wr->mad_agent_priv = mad_agent_priv; + mad_send_wr->sg_list[0].length = buf_size; + mad_send_wr->sg_list[0].lkey = mad_agent->mr->lkey; + + mad_send_wr->send_wr.wr_id = (unsigned long) mad_send_wr; + mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list; + mad_send_wr->send_wr.num_sge = 1; + mad_send_wr->send_wr.opcode = IB_WR_SEND; + mad_send_wr->send_wr.send_flags = IB_SEND_SIGNALED; + mad_send_wr->send_wr.wr.ud.remote_qpn = remote_qpn; + mad_send_wr->send_wr.wr.ud.remote_qkey = IB_QP_SET_QKEY; + mad_send_wr->send_wr.wr.ud.pkey_index = pkey_index; if (rmpp_active) { - struct ib_rmpp_mad *rmpp_mad; - rmpp_mad = (struct ib_rmpp_mad *)send_buf->mad; + struct ib_rmpp_mad *rmpp_mad = mad_send_wr->send_buf.mad; rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(hdr_len - - offsetof(struct ib_rmpp_mad, data) + data_len); + IB_MGMT_RMPP_HDR + data_len); rmpp_mad->rmpp_hdr.rmpp_version = mad_agent->rmpp_version; rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA; ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); } - send_buf->mad_agent = mad_agent; + mad_send_wr->send_buf.mad_agent = mad_agent; atomic_inc(&mad_agent_priv->refcount); - return send_buf; + return &mad_send_wr->send_buf; } EXPORT_SYMBOL(ib_create_send_mad); @@ -847,10 +837,6 @@ void ib_free_send_mad(struct ib_mad_send_buf *send_buf) mad_agent_priv = container_of(send_buf->mad_agent, struct ib_mad_agent_private, agent); - - dma_unmap_single(send_buf->mad_agent->device->dma_device, - pci_unmap_addr(send_buf, mapping), - send_buf->sge.length, DMA_TO_DEVICE); kfree(send_buf->mad); if (atomic_dec_and_test(&mad_agent_priv->refcount)) @@ -861,8 +847,10 @@ EXPORT_SYMBOL(ib_free_send_mad); int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) { struct ib_mad_qp_info *qp_info; - struct ib_send_wr *bad_send_wr; struct list_head *list; + struct ib_send_wr *bad_send_wr; + struct ib_mad_agent *mad_agent; + struct ib_sge *sge; unsigned long flags; int ret; @@ -871,10 +859,17 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) mad_send_wr->send_wr.wr_id = (unsigned long)&mad_send_wr->mad_list; mad_send_wr->mad_list.mad_queue = &qp_info->send_queue; + mad_agent = mad_send_wr->send_buf.mad_agent; + sge = mad_send_wr->sg_list; + sge->addr = dma_map_single(mad_agent->device->dma_device, + mad_send_wr->send_buf.mad, sge->length, + DMA_TO_DEVICE); + pci_unmap_addr_set(mad_send_wr, mapping, sge->addr); + spin_lock_irqsave(&qp_info->send_queue.lock, flags); if (qp_info->send_queue.count < qp_info->send_queue.max_active) { - ret = ib_post_send(mad_send_wr->mad_agent_priv->agent.qp, - &mad_send_wr->send_wr, &bad_send_wr); + ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr, + &bad_send_wr); list = &qp_info->send_queue.list; } else { ret = 0; @@ -886,6 +881,11 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) list_add_tail(&mad_send_wr->mad_list.list, list); } spin_unlock_irqrestore(&qp_info->send_queue.lock, flags); + if (ret) + dma_unmap_single(mad_agent->device->dma_device, + pci_unmap_addr(mad_send_wr, mapping), + sge->length, DMA_TO_DEVICE); + return ret; } @@ -893,45 +893,28 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated * with the registered client */ -int ib_post_send_mad(struct ib_mad_agent *mad_agent, - struct ib_send_wr *send_wr, - struct ib_send_wr **bad_send_wr) +int ib_post_send_mad(struct ib_mad_send_buf *send_buf, + struct ib_mad_send_buf **bad_send_buf) { - int ret = -EINVAL; struct ib_mad_agent_private *mad_agent_priv; - - /* Validate supplied parameters */ - if (!bad_send_wr) - goto error1; - - if (!mad_agent || !send_wr) - goto error2; - - if (!mad_agent->send_handler) - goto error2; - - mad_agent_priv = container_of(mad_agent, - struct ib_mad_agent_private, - agent); + struct ib_mad_send_buf *next_send_buf; + struct ib_mad_send_wr_private *mad_send_wr; + unsigned long flags; + int ret = -EINVAL; /* Walk list of send WRs and post each on send list */ - while (send_wr) { - unsigned long flags; - struct ib_send_wr *next_send_wr; - struct ib_mad_send_wr_private *mad_send_wr; - struct ib_smp *smp; - - /* Validate more parameters */ - if (send_wr->num_sge > IB_MAD_SEND_REQ_MAX_SG) - goto error2; + for (; send_buf; send_buf = next_send_buf) { - if (send_wr->wr.ud.timeout_ms && !mad_agent->recv_handler) - goto error2; + mad_send_wr = container_of(send_buf, + struct ib_mad_send_wr_private, + send_buf); + mad_agent_priv = mad_send_wr->mad_agent_priv; - if (!send_wr->wr.ud.mad_hdr) { - printk(KERN_ERR PFX "MAD header must be supplied " - "in WR %p\n", send_wr); - goto error2; + if (!send_buf->mad_agent->send_handler || + (send_buf->timeout_ms && + !send_buf->mad_agent->recv_handler)) { + ret = -EINVAL; + goto error; } /* @@ -939,40 +922,24 @@ int ib_post_send_mad(struct ib_mad_agent *mad_agent, * current one completes, and the user modifies the work * request associated with the completion */ - next_send_wr = (struct ib_send_wr *)send_wr->next; + next_send_buf = send_buf->next; + mad_send_wr->send_wr.wr.ud.ah = send_buf->ah; - smp = (struct ib_smp *)send_wr->wr.ud.mad_hdr; - if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { - ret = handle_outgoing_dr_smp(mad_agent_priv, smp, - send_wr); + if (((struct ib_mad_hdr *) send_buf->mad)->mgmt_class == + IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { + ret = handle_outgoing_dr_smp(mad_agent_priv, + mad_send_wr); if (ret < 0) /* error */ - goto error2; + goto error; else if (ret == 1) /* locally consumed */ - goto next; + continue; } - /* Allocate MAD send WR tracking structure */ - mad_send_wr = kmalloc(sizeof *mad_send_wr, GFP_ATOMIC); - if (!mad_send_wr) { - printk(KERN_ERR PFX "No memory for " - "ib_mad_send_wr_private\n"); - ret = -ENOMEM; - goto error2; - } - memset(mad_send_wr, 0, sizeof *mad_send_wr); - - mad_send_wr->send_wr = *send_wr; - mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list; - memcpy(mad_send_wr->sg_list, send_wr->sg_list, - sizeof *send_wr->sg_list * send_wr->num_sge); - mad_send_wr->wr_id = send_wr->wr_id; - mad_send_wr->tid = send_wr->wr.ud.mad_hdr->tid; - mad_send_wr->mad_agent_priv = mad_agent_priv; + mad_send_wr->tid = ((struct ib_mad_hdr *) send_buf->mad)->tid; /* Timeout will be updated after send completes */ - mad_send_wr->timeout = msecs_to_jiffies(send_wr->wr. - ud.timeout_ms); - mad_send_wr->retries = mad_send_wr->send_wr.wr.ud.retries; - /* One reference for each work request to QP + response */ + mad_send_wr->timeout = msecs_to_jiffies(send_buf->timeout_ms); + mad_send_wr->retries = send_buf->retries; + /* Reference for work request to QP + response */ mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0); mad_send_wr->status = IB_WC_SUCCESS; @@ -995,16 +962,13 @@ int ib_post_send_mad(struct ib_mad_agent *mad_agent, list_del(&mad_send_wr->agent_list); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); atomic_dec(&mad_agent_priv->refcount); - goto error2; + goto error; } -next: - send_wr = next_send_wr; } return 0; - -error2: - *bad_send_wr = send_wr; -error1: +error: + if (bad_send_buf) + *bad_send_buf = send_buf; return ret; } EXPORT_SYMBOL(ib_post_send_mad); @@ -1447,8 +1411,7 @@ find_mad_agent(struct ib_mad_port_private *port_priv, * of MAD. */ hi_tid = be64_to_cpu(mad->mad_hdr.tid) >> 32; - list_for_each_entry(entry, &port_priv->agent_list, - agent_list) { + list_for_each_entry(entry, &port_priv->agent_list, agent_list) { if (entry->agent.hi_tid == hi_tid) { mad_agent = entry; break; @@ -1571,8 +1534,7 @@ ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, __be64 tid) */ list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list, agent_list) { - if (is_data_mad(mad_agent_priv, - mad_send_wr->send_wr.wr.ud.mad_hdr) && + if (is_data_mad(mad_agent_priv, mad_send_wr->send_buf.mad) && mad_send_wr->tid == tid && mad_send_wr->timeout) { /* Verify request has not been canceled */ return (mad_send_wr->status == IB_WC_SUCCESS) ? @@ -1628,14 +1590,14 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, spin_unlock_irqrestore(&mad_agent_priv->lock, flags); /* Defined behavior is to complete response before request */ - mad_recv_wc->wc->wr_id = mad_send_wr->wr_id; + mad_recv_wc->wc->wr_id = (unsigned long) &mad_send_wr->send_buf; mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, mad_recv_wc); atomic_dec(&mad_agent_priv->refcount); mad_send_wc.status = IB_WC_SUCCESS; mad_send_wc.vendor_err = 0; - mad_send_wc.wr_id = mad_send_wr->wr_id; + mad_send_wc.send_buf = &mad_send_wr->send_buf; ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); } else { mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, @@ -1728,11 +1690,11 @@ local: if (ret & IB_MAD_RESULT_CONSUMED) goto out; if (ret & IB_MAD_RESULT_REPLY) { - /* Send response */ - if (!agent_send(response, &recv->grh, wc, - port_priv->device, - port_priv->port_num)) - response = NULL; + agent_send_response(&response->mad.mad, + &recv->grh, wc, + port_priv->device, + port_priv->port_num, + qp_info->qp->qp_num); goto out; } } @@ -1866,15 +1828,15 @@ void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, if (mad_send_wr->status != IB_WC_SUCCESS ) mad_send_wc->status = mad_send_wr->status; - if (ret != IB_RMPP_RESULT_INTERNAL) + if (ret == IB_RMPP_RESULT_INTERNAL) + ib_rmpp_send_handler(mad_send_wc); + else mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, mad_send_wc); /* Release reference on agent taken when sending */ if (atomic_dec_and_test(&mad_agent_priv->refcount)) wake_up(&mad_agent_priv->wait); - - kfree(mad_send_wr); return; done: spin_unlock_irqrestore(&mad_agent_priv->lock, flags); @@ -1888,6 +1850,7 @@ static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv, struct ib_mad_qp_info *qp_info; struct ib_mad_queue *send_queue; struct ib_send_wr *bad_send_wr; + struct ib_mad_send_wc mad_send_wc; unsigned long flags; int ret; @@ -1898,6 +1861,9 @@ static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv, qp_info = send_queue->qp_info; retry: + dma_unmap_single(mad_send_wr->send_buf.mad_agent->device->dma_device, + pci_unmap_addr(mad_send_wr, mapping), + mad_send_wr->sg_list[0].length, DMA_TO_DEVICE); queued_send_wr = NULL; spin_lock_irqsave(&send_queue->lock, flags); list_del(&mad_list->list); @@ -1914,17 +1880,17 @@ retry: } spin_unlock_irqrestore(&send_queue->lock, flags); - /* Restore client wr_id in WC and complete send */ - wc->wr_id = mad_send_wr->wr_id; + mad_send_wc.send_buf = &mad_send_wr->send_buf; + mad_send_wc.status = wc->status; + mad_send_wc.vendor_err = wc->vendor_err; if (atomic_read(&qp_info->snoop_count)) - snoop_send(qp_info, &mad_send_wr->send_wr, - (struct ib_mad_send_wc *)wc, + snoop_send(qp_info, &mad_send_wr->send_buf, &mad_send_wc, IB_MAD_SNOOP_SEND_COMPLETIONS); - ib_mad_complete_send_wr(mad_send_wr, (struct ib_mad_send_wc *)wc); + ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); if (queued_send_wr) { ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr, - &bad_send_wr); + &bad_send_wr); if (ret) { printk(KERN_ERR PFX "ib_post_send failed: %d\n", ret); mad_send_wr = queued_send_wr; @@ -2066,38 +2032,37 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv) list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr, &cancel_list, agent_list) { - mad_send_wc.wr_id = mad_send_wr->wr_id; + mad_send_wc.send_buf = &mad_send_wr->send_buf; + list_del(&mad_send_wr->agent_list); mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, &mad_send_wc); - - list_del(&mad_send_wr->agent_list); - kfree(mad_send_wr); atomic_dec(&mad_agent_priv->refcount); } } static struct ib_mad_send_wr_private* -find_send_by_wr_id(struct ib_mad_agent_private *mad_agent_priv, u64 wr_id) +find_send_wr(struct ib_mad_agent_private *mad_agent_priv, + struct ib_mad_send_buf *send_buf) { struct ib_mad_send_wr_private *mad_send_wr; list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list, agent_list) { - if (mad_send_wr->wr_id == wr_id) + if (&mad_send_wr->send_buf == send_buf) return mad_send_wr; } list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list, agent_list) { - if (is_data_mad(mad_agent_priv, - mad_send_wr->send_wr.wr.ud.mad_hdr) && - mad_send_wr->wr_id == wr_id) + if (is_data_mad(mad_agent_priv, mad_send_wr->send_buf.mad) && + &mad_send_wr->send_buf == send_buf) return mad_send_wr; } return NULL; } -int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms) +int ib_modify_mad(struct ib_mad_agent *mad_agent, + struct ib_mad_send_buf *send_buf, u32 timeout_ms) { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_wr_private *mad_send_wr; @@ -2107,7 +2072,7 @@ int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms) mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, agent); spin_lock_irqsave(&mad_agent_priv->lock, flags); - mad_send_wr = find_send_by_wr_id(mad_agent_priv, wr_id); + mad_send_wr = find_send_wr(mad_agent_priv, send_buf); if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) { spin_unlock_irqrestore(&mad_agent_priv->lock, flags); return -EINVAL; @@ -2119,7 +2084,7 @@ int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms) mad_send_wr->refcount -= (mad_send_wr->timeout > 0); } - mad_send_wr->send_wr.wr.ud.timeout_ms = timeout_ms; + mad_send_wr->send_buf.timeout_ms = timeout_ms; if (active) mad_send_wr->timeout = msecs_to_jiffies(timeout_ms); else @@ -2130,9 +2095,10 @@ int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms) } EXPORT_SYMBOL(ib_modify_mad); -void ib_cancel_mad(struct ib_mad_agent *mad_agent, u64 wr_id) +void ib_cancel_mad(struct ib_mad_agent *mad_agent, + struct ib_mad_send_buf *send_buf) { - ib_modify_mad(mad_agent, wr_id, 0); + ib_modify_mad(mad_agent, send_buf, 0); } EXPORT_SYMBOL(ib_cancel_mad); @@ -2166,10 +2132,9 @@ static void local_completions(void *data) * Defined behavior is to complete response * before request */ - build_smp_wc(local->wr_id, + build_smp_wc((unsigned long) local->mad_send_wr, be16_to_cpu(IB_LID_PERMISSIVE), - 0 /* pkey index */, - recv_mad_agent->agent.port_num, &wc); + 0, recv_mad_agent->agent.port_num, &wc); local->mad_priv->header.recv_wc.wc = &wc; local->mad_priv->header.recv_wc.mad_len = @@ -2196,11 +2161,11 @@ local_send_completion: /* Complete send */ mad_send_wc.status = IB_WC_SUCCESS; mad_send_wc.vendor_err = 0; - mad_send_wc.wr_id = local->wr_id; + mad_send_wc.send_buf = &local->mad_send_wr->send_buf; if (atomic_read(&mad_agent_priv->qp_info->snoop_count)) - snoop_send(mad_agent_priv->qp_info, &local->send_wr, - &mad_send_wc, - IB_MAD_SNOOP_SEND_COMPLETIONS); + snoop_send(mad_agent_priv->qp_info, + &local->mad_send_wr->send_buf, + &mad_send_wc, IB_MAD_SNOOP_SEND_COMPLETIONS); mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, &mad_send_wc); @@ -2221,8 +2186,7 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr) if (!mad_send_wr->retries--) return -ETIMEDOUT; - mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_wr. - wr.ud.timeout_ms); + mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms); if (mad_send_wr->mad_agent_priv->agent.rmpp_version) { ret = ib_retry_rmpp(mad_send_wr); @@ -2285,11 +2249,10 @@ static void timeout_sends(void *data) mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR; else mad_send_wc.status = mad_send_wr->status; - mad_send_wc.wr_id = mad_send_wr->wr_id; + mad_send_wc.send_buf = &mad_send_wr->send_buf; mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, &mad_send_wc); - kfree(mad_send_wr); atomic_dec(&mad_agent_priv->refcount); spin_lock_irqsave(&mad_agent_priv->lock, flags); } @@ -2761,7 +2724,6 @@ static int __init ib_mad_init_module(void) int ret; spin_lock_init(&ib_mad_port_list_lock); - spin_lock_init(&ib_agent_port_list_lock); ib_mad_cache = kmem_cache_create("ib_mad", sizeof(struct ib_mad_private), diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index f1ba794e0da..570f78682af 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -118,9 +118,10 @@ struct ib_mad_send_wr_private { struct ib_mad_list_head mad_list; struct list_head agent_list; struct ib_mad_agent_private *mad_agent_priv; + struct ib_mad_send_buf send_buf; + DECLARE_PCI_UNMAP_ADDR(mapping) struct ib_send_wr send_wr; struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG]; - u64 wr_id; /* client WR ID */ __be64 tid; unsigned long timeout; int retries; @@ -141,10 +142,7 @@ struct ib_mad_local_private { struct list_head completion_list; struct ib_mad_private *mad_priv; struct ib_mad_agent_private *recv_mad_agent; - struct ib_send_wr send_wr; - struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG]; - u64 wr_id; /* client WR ID */ - __be64 tid; + struct ib_mad_send_wr_private *mad_send_wr; }; struct ib_mad_mgmt_method_table { diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c index e23836d0e21..ba112cd5f93 100644 --- a/drivers/infiniband/core/mad_rmpp.c +++ b/drivers/infiniband/core/mad_rmpp.c @@ -103,12 +103,12 @@ void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent) static int data_offset(u8 mgmt_class) { if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM) - return offsetof(struct ib_sa_mad, data); + return IB_MGMT_SA_HDR; else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) && (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)) - return offsetof(struct ib_vendor_mad, data); + return IB_MGMT_VENDOR_HDR; else - return offsetof(struct ib_rmpp_mad, data); + return IB_MGMT_RMPP_HDR; } static void format_ack(struct ib_rmpp_mad *ack, @@ -135,21 +135,18 @@ static void ack_recv(struct mad_rmpp_recv *rmpp_recv, struct ib_mad_recv_wc *recv_wc) { struct ib_mad_send_buf *msg; - struct ib_send_wr *bad_send_wr; - int hdr_len, ret; + int ret; - hdr_len = sizeof(struct ib_mad_hdr) + sizeof(struct ib_rmpp_hdr); msg = ib_create_send_mad(&rmpp_recv->agent->agent, recv_wc->wc->src_qp, - recv_wc->wc->pkey_index, rmpp_recv->ah, 1, - hdr_len, sizeof(struct ib_rmpp_mad) - hdr_len, - GFP_KERNEL); + recv_wc->wc->pkey_index, 1, IB_MGMT_RMPP_HDR, + IB_MGMT_RMPP_DATA, GFP_KERNEL); if (!msg) return; - format_ack((struct ib_rmpp_mad *) msg->mad, - (struct ib_rmpp_mad *) recv_wc->recv_buf.mad, rmpp_recv); - ret = ib_post_send_mad(&rmpp_recv->agent->agent, &msg->send_wr, - &bad_send_wr); + format_ack(msg->mad, (struct ib_rmpp_mad *) recv_wc->recv_buf.mad, + rmpp_recv); + msg->ah = rmpp_recv->ah; + ret = ib_post_send_mad(msg, NULL); if (ret) ib_free_send_mad(msg); } @@ -160,30 +157,31 @@ static int alloc_response_msg(struct ib_mad_agent *agent, { struct ib_mad_send_buf *m; struct ib_ah *ah; - int hdr_len; ah = ib_create_ah_from_wc(agent->qp->pd, recv_wc->wc, recv_wc->recv_buf.grh, agent->port_num); if (IS_ERR(ah)) return PTR_ERR(ah); - hdr_len = sizeof(struct ib_mad_hdr) + sizeof(struct ib_rmpp_hdr); m = ib_create_send_mad(agent, recv_wc->wc->src_qp, - recv_wc->wc->pkey_index, ah, 1, hdr_len, - sizeof(struct ib_rmpp_mad) - hdr_len, - GFP_KERNEL); + recv_wc->wc->pkey_index, 1, + IB_MGMT_RMPP_HDR, IB_MGMT_RMPP_DATA, GFP_KERNEL); if (IS_ERR(m)) { ib_destroy_ah(ah); return PTR_ERR(m); } + m->ah = ah; *msg = m; return 0; } -static void free_msg(struct ib_mad_send_buf *msg) +void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc) { - ib_destroy_ah(msg->send_wr.wr.ud.ah); - ib_free_send_mad(msg); + struct ib_rmpp_mad *rmpp_mad = mad_send_wc->send_buf->mad; + + if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_ACK) + ib_destroy_ah(mad_send_wc->send_buf->ah); + ib_free_send_mad(mad_send_wc->send_buf); } static void nack_recv(struct ib_mad_agent_private *agent, @@ -191,14 +189,13 @@ static void nack_recv(struct ib_mad_agent_private *agent, { struct ib_mad_send_buf *msg; struct ib_rmpp_mad *rmpp_mad; - struct ib_send_wr *bad_send_wr; int ret; ret = alloc_response_msg(&agent->agent, recv_wc, &msg); if (ret) return; - rmpp_mad = (struct ib_rmpp_mad *) msg->mad; + rmpp_mad = msg->mad; memcpy(rmpp_mad, recv_wc->recv_buf.mad, data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class)); @@ -210,9 +207,11 @@ static void nack_recv(struct ib_mad_agent_private *agent, rmpp_mad->rmpp_hdr.seg_num = 0; rmpp_mad->rmpp_hdr.paylen_newwin = 0; - ret = ib_post_send_mad(&agent->agent, &msg->send_wr, &bad_send_wr); - if (ret) - free_msg(msg); + ret = ib_post_send_mad(msg, NULL); + if (ret) { + ib_destroy_ah(msg->ah); + ib_free_send_mad(msg); + } } static void recv_timeout_handler(void *data) @@ -585,7 +584,7 @@ static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr) int timeout; u32 paylen; - rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr; + rmpp_mad = mad_send_wr->send_buf.mad; ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); rmpp_mad->rmpp_hdr.seg_num = cpu_to_be32(mad_send_wr->seg_num); @@ -612,7 +611,7 @@ static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr) } /* 2 seconds for an ACK until we can find the packet lifetime */ - timeout = mad_send_wr->send_wr.wr.ud.timeout_ms; + timeout = mad_send_wr->send_buf.timeout_ms; if (!timeout || timeout > 2000) mad_send_wr->timeout = msecs_to_jiffies(2000); mad_send_wr->seg_num++; @@ -640,7 +639,7 @@ static void abort_send(struct ib_mad_agent_private *agent, __be64 tid, wc.status = IB_WC_REM_ABORT_ERR; wc.vendor_err = rmpp_status; - wc.wr_id = mad_send_wr->wr_id; + wc.send_buf = &mad_send_wr->send_buf; ib_mad_complete_send_wr(mad_send_wr, &wc); return; out: @@ -694,12 +693,12 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent, if (seg_num > mad_send_wr->last_ack) { mad_send_wr->last_ack = seg_num; - mad_send_wr->retries = mad_send_wr->send_wr.wr.ud.retries; + mad_send_wr->retries = mad_send_wr->send_buf.retries; } mad_send_wr->newwin = newwin; if (mad_send_wr->last_ack == mad_send_wr->total_seg) { /* If no response is expected, the ACK completes the send */ - if (!mad_send_wr->send_wr.wr.ud.timeout_ms) { + if (!mad_send_wr->send_buf.timeout_ms) { struct ib_mad_send_wc wc; ib_mark_mad_done(mad_send_wr); @@ -707,13 +706,13 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent, wc.status = IB_WC_SUCCESS; wc.vendor_err = 0; - wc.wr_id = mad_send_wr->wr_id; + wc.send_buf = &mad_send_wr->send_buf; ib_mad_complete_send_wr(mad_send_wr, &wc); return; } if (mad_send_wr->refcount == 1) - ib_reset_mad_timeout(mad_send_wr, mad_send_wr-> - send_wr.wr.ud.timeout_ms); + ib_reset_mad_timeout(mad_send_wr, + mad_send_wr->send_buf.timeout_ms); } else if (mad_send_wr->refcount == 1 && mad_send_wr->seg_num < mad_send_wr->newwin && mad_send_wr->seg_num <= mad_send_wr->total_seg) { @@ -842,7 +841,7 @@ int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr) struct ib_rmpp_mad *rmpp_mad; int i, total_len, ret; - rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr; + rmpp_mad = mad_send_wr->send_buf.mad; if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) return IB_RMPP_RESULT_UNHANDLED; @@ -863,7 +862,7 @@ int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr) mad_send_wr->total_seg = (total_len - mad_send_wr->data_offset) / (sizeof(struct ib_rmpp_mad) - mad_send_wr->data_offset); - mad_send_wr->pad = total_len - offsetof(struct ib_rmpp_mad, data) - + mad_send_wr->pad = total_len - IB_MGMT_RMPP_HDR - be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin); /* We need to wait for the final ACK even if there isn't a response */ @@ -878,23 +877,15 @@ int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr, struct ib_mad_send_wc *mad_send_wc) { struct ib_rmpp_mad *rmpp_mad; - struct ib_mad_send_buf *msg; int ret; - rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr; + rmpp_mad = mad_send_wr->send_buf.mad; if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */ - if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA) { - msg = (struct ib_mad_send_buf *) (unsigned long) - mad_send_wc->wr_id; - if (rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_ACK) - ib_free_send_mad(msg); - else - free_msg(msg); + if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA) return IB_RMPP_RESULT_INTERNAL; /* ACK, STOP, or ABORT */ - } if (mad_send_wc->status != IB_WC_SUCCESS || mad_send_wr->status != IB_WC_SUCCESS) @@ -905,7 +896,7 @@ int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr, if (mad_send_wr->last_ack == mad_send_wr->total_seg) { mad_send_wr->timeout = - msecs_to_jiffies(mad_send_wr->send_wr.wr.ud.timeout_ms); + msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms); return IB_RMPP_RESULT_PROCESSED; /* Send done */ } @@ -926,7 +917,7 @@ int ib_retry_rmpp(struct ib_mad_send_wr_private *mad_send_wr) struct ib_rmpp_mad *rmpp_mad; int ret; - rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr; + rmpp_mad = mad_send_wr->send_buf.mad; if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */ diff --git a/drivers/infiniband/core/mad_rmpp.h b/drivers/infiniband/core/mad_rmpp.h index c4924dfb8e7..f0616fd2249 100644 --- a/drivers/infiniband/core/mad_rmpp.h +++ b/drivers/infiniband/core/mad_rmpp.h @@ -51,6 +51,8 @@ ib_process_rmpp_recv_wc(struct ib_mad_agent_private *agent, int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr, struct ib_mad_send_wc *mad_send_wc); +void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc); + void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent); int ib_retry_rmpp(struct ib_mad_send_wr_private *mad_send_wr); diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 0e5ef97f763..89ce9dc210d 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -73,11 +73,10 @@ struct ib_sa_device { struct ib_sa_query { void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *); void (*release)(struct ib_sa_query *); - struct ib_sa_port *port; - struct ib_sa_mad *mad; - struct ib_sa_sm_ah *sm_ah; - DECLARE_PCI_UNMAP_ADDR(mapping) - int id; + struct ib_sa_port *port; + struct ib_mad_send_buf *mad_buf; + struct ib_sa_sm_ah *sm_ah; + int id; }; struct ib_sa_service_query { @@ -426,6 +425,7 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query) { unsigned long flags; struct ib_mad_agent *agent; + struct ib_mad_send_buf *mad_buf; spin_lock_irqsave(&idr_lock, flags); if (idr_find(&query_idr, id) != query) { @@ -433,9 +433,10 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query) return; } agent = query->port->agent; + mad_buf = query->mad_buf; spin_unlock_irqrestore(&idr_lock, flags); - ib_cancel_mad(agent, id); + ib_cancel_mad(agent, mad_buf); } EXPORT_SYMBOL(ib_sa_cancel_query); @@ -457,71 +458,46 @@ static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent) static int send_mad(struct ib_sa_query *query, int timeout_ms) { - struct ib_sa_port *port = query->port; unsigned long flags; - int ret; - struct ib_sge gather_list; - struct ib_send_wr *bad_wr, wr = { - .opcode = IB_WR_SEND, - .sg_list = &gather_list, - .num_sge = 1, - .send_flags = IB_SEND_SIGNALED, - .wr = { - .ud = { - .mad_hdr = &query->mad->mad_hdr, - .remote_qpn = 1, - .remote_qkey = IB_QP1_QKEY, - .timeout_ms = timeout_ms, - } - } - }; + int ret, id; retry: if (!idr_pre_get(&query_idr, GFP_ATOMIC)) return -ENOMEM; spin_lock_irqsave(&idr_lock, flags); - ret = idr_get_new(&query_idr, query, &query->id); + ret = idr_get_new(&query_idr, query, &id); spin_unlock_irqrestore(&idr_lock, flags); if (ret == -EAGAIN) goto retry; if (ret) return ret; - wr.wr_id = query->id; + query->mad_buf->timeout_ms = timeout_ms; + query->mad_buf->context[0] = query; + query->id = id; - spin_lock_irqsave(&port->ah_lock, flags); - kref_get(&port->sm_ah->ref); - query->sm_ah = port->sm_ah; - wr.wr.ud.ah = port->sm_ah->ah; - spin_unlock_irqrestore(&port->ah_lock, flags); + spin_lock_irqsave(&query->port->ah_lock, flags); + kref_get(&query->port->sm_ah->ref); + query->sm_ah = query->port->sm_ah; + spin_unlock_irqrestore(&query->port->ah_lock, flags); - gather_list.addr = dma_map_single(port->agent->device->dma_device, - query->mad, - sizeof (struct ib_sa_mad), - DMA_TO_DEVICE); - gather_list.length = sizeof (struct ib_sa_mad); - gather_list.lkey = port->agent->mr->lkey; - pci_unmap_addr_set(query, mapping, gather_list.addr); + query->mad_buf->ah = query->sm_ah->ah; - ret = ib_post_send_mad(port->agent, &wr, &bad_wr); + ret = ib_post_send_mad(query->mad_buf, NULL); if (ret) { - dma_unmap_single(port->agent->device->dma_device, - pci_unmap_addr(query, mapping), - sizeof (struct ib_sa_mad), - DMA_TO_DEVICE); - kref_put(&query->sm_ah->ref, free_sm_ah); spin_lock_irqsave(&idr_lock, flags); - idr_remove(&query_idr, query->id); + idr_remove(&query_idr, id); spin_unlock_irqrestore(&idr_lock, flags); + + kref_put(&query->sm_ah->ref, free_sm_ah); } /* * It's not safe to dereference query any more, because the * send may already have completed and freed the query in - * another context. So use wr.wr_id, which has a copy of the - * query's id. + * another context. */ - return ret ? ret : wr.wr_id; + return ret ? ret : id; } static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, @@ -543,7 +519,6 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, static void ib_sa_path_rec_release(struct ib_sa_query *sa_query) { - kfree(sa_query->mad); kfree(container_of(sa_query, struct ib_sa_path_query, sa_query)); } @@ -585,6 +560,7 @@ int ib_sa_path_rec_get(struct ib_device *device, u8 port_num, struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); struct ib_sa_port *port; struct ib_mad_agent *agent; + struct ib_sa_mad *mad; int ret; if (!sa_dev) @@ -596,36 +572,44 @@ int ib_sa_path_rec_get(struct ib_device *device, u8 port_num, query = kmalloc(sizeof *query, gfp_mask); if (!query) return -ENOMEM; - query->sa_query.mad = kmalloc(sizeof *query->sa_query.mad, gfp_mask); - if (!query->sa_query.mad) { - kfree(query); - return -ENOMEM; + + query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0, + 0, IB_MGMT_SA_HDR, + IB_MGMT_SA_DATA, gfp_mask); + if (!query->sa_query.mad_buf) { + ret = -ENOMEM; + goto err1; } query->callback = callback; query->context = context; - init_mad(query->sa_query.mad, agent); + mad = query->sa_query.mad_buf->mad; + init_mad(mad, agent); - query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL; - query->sa_query.release = ib_sa_path_rec_release; - query->sa_query.port = port; - query->sa_query.mad->mad_hdr.method = IB_MGMT_METHOD_GET; - query->sa_query.mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC); - query->sa_query.mad->sa_hdr.comp_mask = comp_mask; + query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL; + query->sa_query.release = ib_sa_path_rec_release; + query->sa_query.port = port; + mad->mad_hdr.method = IB_MGMT_METHOD_GET; + mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC); + mad->sa_hdr.comp_mask = comp_mask; - ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), - rec, query->sa_query.mad->data); + ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, mad->data); *sa_query = &query->sa_query; ret = send_mad(&query->sa_query, timeout_ms); - if (ret < 0) { - *sa_query = NULL; - kfree(query->sa_query.mad); - kfree(query); - } + if (ret < 0) + goto err2; + + return ret; + +err2: + *sa_query = NULL; + ib_free_send_mad(query->sa_query.mad_buf); +err1: + kfree(query); return ret; } EXPORT_SYMBOL(ib_sa_path_rec_get); @@ -649,7 +633,6 @@ static void ib_sa_service_rec_callback(struct ib_sa_query *sa_query, static void ib_sa_service_rec_release(struct ib_sa_query *sa_query) { - kfree(sa_query->mad); kfree(container_of(sa_query, struct ib_sa_service_query, sa_query)); } @@ -693,6 +676,7 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method, struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); struct ib_sa_port *port; struct ib_mad_agent *agent; + struct ib_sa_mad *mad; int ret; if (!sa_dev) @@ -709,37 +693,45 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method, query = kmalloc(sizeof *query, gfp_mask); if (!query) return -ENOMEM; - query->sa_query.mad = kmalloc(sizeof *query->sa_query.mad, gfp_mask); - if (!query->sa_query.mad) { - kfree(query); - return -ENOMEM; + + query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0, + 0, IB_MGMT_SA_HDR, + IB_MGMT_SA_DATA, gfp_mask); + if (!query->sa_query.mad_buf) { + ret = -ENOMEM; + goto err1; } query->callback = callback; query->context = context; - init_mad(query->sa_query.mad, agent); + mad = query->sa_query.mad_buf->mad; + init_mad(mad, agent); - query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL; - query->sa_query.release = ib_sa_service_rec_release; - query->sa_query.port = port; - query->sa_query.mad->mad_hdr.method = method; - query->sa_query.mad->mad_hdr.attr_id = - cpu_to_be16(IB_SA_ATTR_SERVICE_REC); - query->sa_query.mad->sa_hdr.comp_mask = comp_mask; + query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL; + query->sa_query.release = ib_sa_service_rec_release; + query->sa_query.port = port; + mad->mad_hdr.method = method; + mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_SERVICE_REC); + mad->sa_hdr.comp_mask = comp_mask; ib_pack(service_rec_table, ARRAY_SIZE(service_rec_table), - rec, query->sa_query.mad->data); + rec, mad->data); *sa_query = &query->sa_query; ret = send_mad(&query->sa_query, timeout_ms); - if (ret < 0) { - *sa_query = NULL; - kfree(query->sa_query.mad); - kfree(query); - } + if (ret < 0) + goto err2; + + return ret; +err2: + *sa_query = NULL; + ib_free_send_mad(query->sa_query.mad_buf); + +err1: + kfree(query); return ret; } EXPORT_SYMBOL(ib_sa_service_rec_query); @@ -763,7 +755,6 @@ static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query, static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query) { - kfree(sa_query->mad); kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query)); } @@ -782,6 +773,7 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num, struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); struct ib_sa_port *port; struct ib_mad_agent *agent; + struct ib_sa_mad *mad; int ret; if (!sa_dev) @@ -793,53 +785,55 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num, query = kmalloc(sizeof *query, gfp_mask); if (!query) return -ENOMEM; - query->sa_query.mad = kmalloc(sizeof *query->sa_query.mad, gfp_mask); - if (!query->sa_query.mad) { - kfree(query); - return -ENOMEM; + + query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0, + 0, IB_MGMT_SA_HDR, + IB_MGMT_SA_DATA, gfp_mask); + if (!query->sa_query.mad_buf) { + ret = -ENOMEM; + goto err1; } query->callback = callback; query->context = context; - init_mad(query->sa_query.mad, agent); + mad = query->sa_query.mad_buf->mad; + init_mad(mad, agent); - query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL; - query->sa_query.release = ib_sa_mcmember_rec_release; - query->sa_query.port = port; - query->sa_query.mad->mad_hdr.method = method; - query->sa_query.mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC); - query->sa_query.mad->sa_hdr.comp_mask = comp_mask; + query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL; + query->sa_query.release = ib_sa_mcmember_rec_release; + query->sa_query.port = port; + mad->mad_hdr.method = method; + mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC); + mad->sa_hdr.comp_mask = comp_mask; ib_pack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table), - rec, query->sa_query.mad->data); + rec, mad->data); *sa_query = &query->sa_query; ret = send_mad(&query->sa_query, timeout_ms); - if (ret < 0) { - *sa_query = NULL; - kfree(query->sa_query.mad); - kfree(query); - } + if (ret < 0) + goto err2; return ret; + +err2: + *sa_query = NULL; + ib_free_send_mad(query->sa_query.mad_buf); + +err1: + kfree(query); + return ret; } EXPORT_SYMBOL(ib_sa_mcmember_rec_query); static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *mad_send_wc) { - struct ib_sa_query *query; + struct ib_sa_query *query = mad_send_wc->send_buf->context[0]; unsigned long flags; - spin_lock_irqsave(&idr_lock, flags); - query = idr_find(&query_idr, mad_send_wc->wr_id); - spin_unlock_irqrestore(&idr_lock, flags); - - if (!query) - return; - if (query->callback) switch (mad_send_wc->status) { case IB_WC_SUCCESS: @@ -856,30 +850,25 @@ static void send_handler(struct ib_mad_agent *agent, break; } - dma_unmap_single(agent->device->dma_device, - pci_unmap_addr(query, mapping), - sizeof (struct ib_sa_mad), - DMA_TO_DEVICE); - kref_put(&query->sm_ah->ref, free_sm_ah); - - query->release(query); - spin_lock_irqsave(&idr_lock, flags); - idr_remove(&query_idr, mad_send_wc->wr_id); + idr_remove(&query_idr, query->id); spin_unlock_irqrestore(&idr_lock, flags); + + ib_free_send_mad(mad_send_wc->send_buf); + kref_put(&query->sm_ah->ref, free_sm_ah); + query->release(query); } static void recv_handler(struct ib_mad_agent *mad_agent, struct ib_mad_recv_wc *mad_recv_wc) { struct ib_sa_query *query; - unsigned long flags; + struct ib_mad_send_buf *mad_buf; - spin_lock_irqsave(&idr_lock, flags); - query = idr_find(&query_idr, mad_recv_wc->wc->wr_id); - spin_unlock_irqrestore(&idr_lock, flags); + mad_buf = (void *) (unsigned long) mad_recv_wc->wc->wr_id; + query = mad_buf->context[0]; - if (query && query->callback) { + if (query->callback) { if (mad_recv_wc->wc->status == IB_WC_SUCCESS) query->callback(query, mad_recv_wc->recv_buf.mad->mad_hdr.status ? diff --git a/drivers/infiniband/core/smi.h b/drivers/infiniband/core/smi.h index db25503a073..2b3c40198f8 100644 --- a/drivers/infiniband/core/smi.h +++ b/drivers/infiniband/core/smi.h @@ -39,6 +39,8 @@ #ifndef __SMI_H_ #define __SMI_H_ +#include + int smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type, int port_num, diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index fd200c064a2..fc5519a3de9 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -96,7 +96,6 @@ struct ib_umad_file { }; struct ib_umad_packet { - struct ib_ah *ah; struct ib_mad_send_buf *msg; struct list_head list; int length; @@ -139,10 +138,10 @@ static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *send_wc) { struct ib_umad_file *file = agent->context; - struct ib_umad_packet *timeout, *packet = - (void *) (unsigned long) send_wc->wr_id; + struct ib_umad_packet *timeout; + struct ib_umad_packet *packet = send_wc->send_buf->context[0]; - ib_destroy_ah(packet->msg->send_wr.wr.ud.ah); + ib_destroy_ah(packet->msg->ah); ib_free_send_mad(packet->msg); if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) { @@ -268,11 +267,11 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, struct ib_umad_packet *packet; struct ib_mad_agent *agent; struct ib_ah_attr ah_attr; - struct ib_send_wr *bad_wr; + struct ib_ah *ah; struct ib_rmpp_mad *rmpp_mad; u8 method; __be64 *tid; - int ret, length, hdr_len, data_len, rmpp_hdr_size; + int ret, length, hdr_len, rmpp_hdr_size; int rmpp_active = 0; if (count < sizeof (struct ib_user_mad)) @@ -321,9 +320,9 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class; } - packet->ah = ib_create_ah(agent->qp->pd, &ah_attr); - if (IS_ERR(packet->ah)) { - ret = PTR_ERR(packet->ah); + ah = ib_create_ah(agent->qp->pd, &ah_attr); + if (IS_ERR(ah)) { + ret = PTR_ERR(ah); goto err_up; } @@ -337,12 +336,10 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, /* Validate that the management class can support RMPP */ if (rmpp_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_ADM) { - hdr_len = offsetof(struct ib_sa_mad, data); - data_len = length - hdr_len; + hdr_len = IB_MGMT_SA_HDR; } else if ((rmpp_mad->mad_hdr.mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) && (rmpp_mad->mad_hdr.mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)) { - hdr_len = offsetof(struct ib_vendor_mad, data); - data_len = length - hdr_len; + hdr_len = IB_MGMT_VENDOR_HDR; } else { ret = -EINVAL; goto err_ah; @@ -353,25 +350,23 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, ret = -EINVAL; goto err_ah; } - hdr_len = offsetof(struct ib_mad, data); - data_len = length - hdr_len; + hdr_len = IB_MGMT_MAD_HDR; } packet->msg = ib_create_send_mad(agent, be32_to_cpu(packet->mad.hdr.qpn), - 0, packet->ah, rmpp_active, - hdr_len, data_len, + 0, rmpp_active, + hdr_len, length - hdr_len, GFP_KERNEL); if (IS_ERR(packet->msg)) { ret = PTR_ERR(packet->msg); goto err_ah; } - packet->msg->send_wr.wr.ud.timeout_ms = packet->mad.hdr.timeout_ms; - packet->msg->send_wr.wr.ud.retries = packet->mad.hdr.retries; - - /* Override send WR WRID initialized in ib_create_send_mad */ - packet->msg->send_wr.wr_id = (unsigned long) packet; + packet->msg->ah = ah; + packet->msg->timeout_ms = packet->mad.hdr.timeout_ms; + packet->msg->retries = packet->mad.hdr.retries; + packet->msg->context[0] = packet; if (!rmpp_active) { /* Copy message from user into send buffer */ @@ -403,17 +398,17 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, * transaction ID matches the agent being used to send the * MAD. */ - method = packet->msg->mad->mad_hdr.method; + method = ((struct ib_mad_hdr *) packet->msg)->method; if (!(method & IB_MGMT_METHOD_RESP) && method != IB_MGMT_METHOD_TRAP_REPRESS && method != IB_MGMT_METHOD_SEND) { - tid = &packet->msg->mad->mad_hdr.tid; + tid = &((struct ib_mad_hdr *) packet->msg)->tid; *tid = cpu_to_be64(((u64) agent->hi_tid) << 32 | (be64_to_cpup(tid) & 0xffffffff)); } - ret = ib_post_send_mad(agent, &packet->msg->send_wr, &bad_wr); + ret = ib_post_send_mad(packet->msg, NULL); if (ret) goto err_msg; @@ -425,7 +420,7 @@ err_msg: ib_free_send_mad(packet->msg); err_ah: - ib_destroy_ah(packet->ah); + ib_destroy_ah(ah); err_up: up_read(&file->agent_mutex); diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index 9804174f7f3..8561b297a19 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -46,11 +46,6 @@ enum { MTHCA_VENDOR_CLASS2 = 0xa }; -struct mthca_trap_mad { - struct ib_mad *mad; - DECLARE_PCI_UNMAP_ADDR(mapping) -}; - static void update_sm_ah(struct mthca_dev *dev, u8 port_num, u16 lid, u8 sl) { @@ -116,49 +111,14 @@ static void forward_trap(struct mthca_dev *dev, struct ib_mad *mad) { int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED; - struct mthca_trap_mad *tmad; - struct ib_sge gather_list; - struct ib_send_wr *bad_wr, wr = { - .opcode = IB_WR_SEND, - .sg_list = &gather_list, - .num_sge = 1, - .send_flags = IB_SEND_SIGNALED, - .wr = { - .ud = { - .remote_qpn = qpn, - .remote_qkey = qpn ? IB_QP1_QKEY : 0, - .timeout_ms = 0 - } - } - }; + struct ib_mad_send_buf *send_buf; struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn]; int ret; unsigned long flags; if (agent) { - tmad = kmalloc(sizeof *tmad, GFP_KERNEL); - if (!tmad) - return; - - tmad->mad = kmalloc(sizeof *tmad->mad, GFP_KERNEL); - if (!tmad->mad) { - kfree(tmad); - return; - } - - memcpy(tmad->mad, mad, sizeof *mad); - - wr.wr.ud.mad_hdr = &tmad->mad->mad_hdr; - wr.wr_id = (unsigned long) tmad; - - gather_list.addr = dma_map_single(agent->device->dma_device, - tmad->mad, - sizeof *tmad->mad, - DMA_TO_DEVICE); - gather_list.length = sizeof *tmad->mad; - gather_list.lkey = to_mpd(agent->qp->pd)->ntmr.ibmr.lkey; - pci_unmap_addr_set(tmad, mapping, gather_list.addr); - + send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR, + IB_MGMT_MAD_DATA, GFP_ATOMIC); /* * We rely here on the fact that MLX QPs don't use the * address handle after the send is posted (this is @@ -166,21 +126,15 @@ static void forward_trap(struct mthca_dev *dev, * it's OK for our devices). */ spin_lock_irqsave(&dev->sm_lock, flags); - wr.wr.ud.ah = dev->sm_ah[port_num - 1]; - if (wr.wr.ud.ah) - ret = ib_post_send_mad(agent, &wr, &bad_wr); + memcpy(send_buf->mad, mad, sizeof *mad); + if ((send_buf->ah = dev->sm_ah[port_num - 1])) + ret = ib_post_send_mad(send_buf, NULL); else ret = -EINVAL; spin_unlock_irqrestore(&dev->sm_lock, flags); - if (ret) { - dma_unmap_single(agent->device->dma_device, - pci_unmap_addr(tmad, mapping), - sizeof *tmad->mad, - DMA_TO_DEVICE); - kfree(tmad->mad); - kfree(tmad); - } + if (ret) + ib_free_send_mad(send_buf); } } @@ -267,15 +221,7 @@ int mthca_process_mad(struct ib_device *ibdev, static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *mad_send_wc) { - struct mthca_trap_mad *tmad = - (void *) (unsigned long) mad_send_wc->wr_id; - - dma_unmap_single(agent->device->dma_device, - pci_unmap_addr(tmad, mapping), - sizeof *tmad->mad, - DMA_TO_DEVICE); - kfree(tmad->mad); - kfree(tmad); + ib_free_send_mad(mad_send_wc->send_buf); } int mthca_create_agents(struct mthca_dev *dev) diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index 4172e6841e3..2c133506742 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -109,10 +109,14 @@ #define IB_QP_SET_QKEY 0x80000000 enum { + IB_MGMT_MAD_HDR = 24, IB_MGMT_MAD_DATA = 232, + IB_MGMT_RMPP_HDR = 36, IB_MGMT_RMPP_DATA = 220, + IB_MGMT_VENDOR_HDR = 40, IB_MGMT_VENDOR_DATA = 216, - IB_MGMT_SA_DATA = 200 + IB_MGMT_SA_HDR = 56, + IB_MGMT_SA_DATA = 200, }; struct ib_mad_hdr { @@ -203,26 +207,25 @@ struct ib_class_port_info /** * ib_mad_send_buf - MAD data buffer and work request for sends. - * @mad: References an allocated MAD data buffer. The size of the data - * buffer is specified in the @send_wr.length field. - * @mapping: DMA mapping information. + * @next: A pointer used to chain together MADs for posting. + * @mad: References an allocated MAD data buffer. * @mad_agent: MAD agent that allocated the buffer. + * @ah: The address handle to use when sending the MAD. * @context: User-controlled context fields. - * @send_wr: An initialized work request structure used when sending the MAD. - * The wr_id field of the work request is initialized to reference this - * data structure. - * @sge: A scatter-gather list referenced by the work request. + * @timeout_ms: Time to wait for a response. + * @retries: Number of times to retry a request for a response. * * Users are responsible for initializing the MAD buffer itself, with the * exception of specifying the payload length field in any RMPP MAD. */ struct ib_mad_send_buf { - struct ib_mad *mad; - DECLARE_PCI_UNMAP_ADDR(mapping) + struct ib_mad_send_buf *next; + void *mad; struct ib_mad_agent *mad_agent; + struct ib_ah *ah; void *context[2]; - struct ib_send_wr send_wr; - struct ib_sge sge; + int timeout_ms; + int retries; }; /** @@ -287,7 +290,7 @@ typedef void (*ib_mad_send_handler)(struct ib_mad_agent *mad_agent, * or @mad_send_wc. */ typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent, - struct ib_send_wr *send_wr, + struct ib_mad_send_buf *send_buf, struct ib_mad_send_wc *mad_send_wc); /** @@ -334,13 +337,13 @@ struct ib_mad_agent { /** * ib_mad_send_wc - MAD send completion information. - * @wr_id: Work request identifier associated with the send MAD request. + * @send_buf: Send MAD data buffer associated with the send MAD request. * @status: Completion status. * @vendor_err: Optional vendor error information returned with a failed * request. */ struct ib_mad_send_wc { - u64 wr_id; + struct ib_mad_send_buf *send_buf; enum ib_wc_status status; u32 vendor_err; }; @@ -366,7 +369,7 @@ struct ib_mad_recv_buf { * @rmpp_list: Specifies a list of RMPP reassembled received MAD buffers. * @mad_len: The length of the received MAD, without duplicated headers. * - * For received response, the wr_id field of the wc is set to the wr_id + * For received response, the wr_id contains a pointer to the ib_mad_send_buf * for the corresponding send request. */ struct ib_mad_recv_wc { @@ -463,9 +466,9 @@ int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent); /** * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated * with the registered client. - * @mad_agent: Specifies the associated registration to post the send to. - * @send_wr: Specifies the information needed to send the MAD(s). - * @bad_send_wr: Specifies the MAD on which an error was encountered. + * @send_buf: Specifies the information needed to send the MAD(s). + * @bad_send_buf: Specifies the MAD on which an error was encountered. This + * parameter is optional if only a single MAD is posted. * * Sent MADs are not guaranteed to complete in the order that they were posted. * @@ -479,9 +482,8 @@ int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent); * defined data being transferred. The paylen_newwin field should be * specified in network-byte order. */ -int ib_post_send_mad(struct ib_mad_agent *mad_agent, - struct ib_send_wr *send_wr, - struct ib_send_wr **bad_send_wr); +int ib_post_send_mad(struct ib_mad_send_buf *send_buf, + struct ib_mad_send_buf **bad_send_buf); /** * ib_coalesce_recv_mad - Coalesces received MAD data into a single buffer. @@ -507,23 +509,25 @@ void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc); /** * ib_cancel_mad - Cancels an outstanding send MAD operation. * @mad_agent: Specifies the registration associated with sent MAD. - * @wr_id: Indicates the work request identifier of the MAD to cancel. + * @send_buf: Indicates the MAD to cancel. * * MADs will be returned to the user through the corresponding * ib_mad_send_handler. */ -void ib_cancel_mad(struct ib_mad_agent *mad_agent, u64 wr_id); +void ib_cancel_mad(struct ib_mad_agent *mad_agent, + struct ib_mad_send_buf *send_buf); /** * ib_modify_mad - Modifies an outstanding send MAD operation. * @mad_agent: Specifies the registration associated with sent MAD. - * @wr_id: Indicates the work request identifier of the MAD to modify. + * @send_buf: Indicates the MAD to modify. * @timeout_ms: New timeout value for sent MAD. * * This call will reset the timeout value for a sent MAD to the specified * value. */ -int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms); +int ib_modify_mad(struct ib_mad_agent *mad_agent, + struct ib_mad_send_buf *send_buf, u32 timeout_ms); /** * ib_redirect_mad_qp - Registers a QP for MAD services. @@ -572,7 +576,6 @@ int ib_process_mad_wc(struct ib_mad_agent *mad_agent, * @remote_qpn: Specifies the QPN of the receiving node. * @pkey_index: Specifies which PKey the MAD will be sent using. This field * is valid only if the remote_qpn is QP 1. - * @ah: References the address handle used to transfer to the remote node. * @rmpp_active: Indicates if the send will enable RMPP. * @hdr_len: Indicates the size of the data header of the MAD. This length * should include the common MAD header, RMPP header, plus any class @@ -582,11 +585,10 @@ int ib_process_mad_wc(struct ib_mad_agent *mad_agent, * additional padding that may be necessary. * @gfp_mask: GFP mask used for the memory allocation. * - * This is a helper routine that may be used to allocate a MAD. Users are - * not required to allocate outbound MADs using this call. The returned - * MAD send buffer will reference a data buffer usable for sending a MAD, along + * This routine allocates a MAD for sending. The returned MAD send buffer + * will reference a data buffer usable for sending a MAD, along * with an initialized work request structure. Users may modify the returned - * MAD data buffer or work request before posting the send. + * MAD data buffer before posting the send. * * The returned data buffer will be cleared. Users are responsible for * initializing the common MAD and any class specific headers. If @rmpp_active @@ -594,7 +596,7 @@ int ib_process_mad_wc(struct ib_mad_agent *mad_agent, */ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, u32 remote_qpn, u16 pkey_index, - struct ib_ah *ah, int rmpp_active, + int rmpp_active, int hdr_len, int data_len, gfp_t gfp_mask); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index a5a963cb567..f72d46d54e0 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -595,11 +595,8 @@ struct ib_send_wr { } atomic; struct { struct ib_ah *ah; - struct ib_mad_hdr *mad_hdr; u32 remote_qpn; u32 remote_qkey; - int timeout_ms; /* valid for MADs only */ - int retries; /* valid for MADs only */ u16 pkey_index; /* valid for GSI only */ u8 port_num; /* valid for DR SMPs on switch only */ } ud; -- cgit From 547e3090738b04be650770b64265835dbb6ddf92 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 25 Oct 2005 10:57:32 -0700 Subject: [IB] mthca: correct modify QP attribute masks for UC The UC transport does not support RDMA reads or atomic operations, so we shouldn't require or even allow the consumer to set attributes relating to these operations for UC QPs. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_qp.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 2ee0a2b0fd4..62ff091505d 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -338,8 +338,7 @@ static const struct { [UC] = (IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | - IB_QP_RQ_PSN | - IB_QP_MAX_DEST_RD_ATOMIC), + IB_QP_RQ_PSN), [RC] = (IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | @@ -368,8 +367,7 @@ static const struct { .trans = MTHCA_TRANS_RTR2RTS, .req_param = { [UD] = IB_QP_SQ_PSN, - [UC] = (IB_QP_SQ_PSN | - IB_QP_MAX_QP_RD_ATOMIC), + [UC] = IB_QP_SQ_PSN, [RC] = (IB_QP_TIMEOUT | IB_QP_RETRY_CNT | IB_QP_RNR_RETRY | @@ -446,8 +444,6 @@ static const struct { [UD] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), [UC] = (IB_QP_AV | - IB_QP_MAX_QP_RD_ATOMIC | - IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_CUR_STATE | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | @@ -478,7 +474,7 @@ static const struct { .opt_param = { [UD] = (IB_QP_CUR_STATE | IB_QP_QKEY), - [UC] = (IB_QP_CUR_STATE), + [UC] = IB_QP_CUR_STATE, [RC] = (IB_QP_CUR_STATE | IB_QP_MIN_RNR_TIMER), [MLX] = (IB_QP_CUR_STATE | -- cgit From 7cc656efb560cda66b5ed48444cad7556ea4fe99 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 25 Oct 2005 15:13:54 -0700 Subject: [IB] simplify mad_rmpp.c:alloc_response_msg() Change alloc_response_msg() in mad_rmpp.c to return the struct it allocates directly (or an error code a la ERR_PTR), rather than returning a status and passing the struct back in a pointer param. This simplifies the code and gets rid of warnings like drivers/infiniband/core/mad_rmpp.c: In function nack_recv: drivers/infiniband/core/mad_rmpp.c:192: warning: msg may be used uninitialized in this function with newer versions of gcc. Signed-off-by: Roland Dreier --- drivers/infiniband/core/mad_rmpp.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c index ba112cd5f93..3249e1d8c07 100644 --- a/drivers/infiniband/core/mad_rmpp.c +++ b/drivers/infiniband/core/mad_rmpp.c @@ -151,28 +151,27 @@ static void ack_recv(struct mad_rmpp_recv *rmpp_recv, ib_free_send_mad(msg); } -static int alloc_response_msg(struct ib_mad_agent *agent, - struct ib_mad_recv_wc *recv_wc, - struct ib_mad_send_buf **msg) +static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent, + struct ib_mad_recv_wc *recv_wc) { - struct ib_mad_send_buf *m; + struct ib_mad_send_buf *msg; struct ib_ah *ah; ah = ib_create_ah_from_wc(agent->qp->pd, recv_wc->wc, recv_wc->recv_buf.grh, agent->port_num); if (IS_ERR(ah)) - return PTR_ERR(ah); + return (void *) ah; - m = ib_create_send_mad(agent, recv_wc->wc->src_qp, - recv_wc->wc->pkey_index, 1, - IB_MGMT_RMPP_HDR, IB_MGMT_RMPP_DATA, GFP_KERNEL); - if (IS_ERR(m)) { + msg = ib_create_send_mad(agent, recv_wc->wc->src_qp, + recv_wc->wc->pkey_index, 1, + IB_MGMT_RMPP_HDR, IB_MGMT_RMPP_DATA, + GFP_KERNEL); + if (IS_ERR(msg)) ib_destroy_ah(ah); - return PTR_ERR(m); - } - m->ah = ah; - *msg = m; - return 0; + else + msg->ah = ah; + + return msg; } void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc) @@ -191,8 +190,8 @@ static void nack_recv(struct ib_mad_agent_private *agent, struct ib_rmpp_mad *rmpp_mad; int ret; - ret = alloc_response_msg(&agent->agent, recv_wc, &msg); - if (ret) + msg = alloc_response_msg(&agent->agent, recv_wc); + if (IS_ERR(msg)) return; rmpp_mad = msg->mad; -- cgit From 3d155f8cd0d077938d271225d26ee52f8eb26082 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 27 Oct 2005 11:03:38 -0700 Subject: [IB] mthca: first pass at catastrophic error reporting Add some initial support for detecting and reporting catastrophic errors reported by Mellanox HCAs. We start a periodic timer which polls the catastrophic error reporting buffer in device memory. If an error is detected, we dump the contents of the buffer for port-mortem debugging, and report a fatal asynchronous error to higher levels. In the future we can try to recover from these errors by resetting the device, but this will require some work in higher-level code as well. Let's get this in now, so that we at least get catastrophic errors reported in logs. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/Makefile | 3 +- drivers/infiniband/hw/mthca/mthca_catas.c | 153 +++++++++++++++++++++++++++ drivers/infiniband/hw/mthca/mthca_cmd.c | 5 + drivers/infiniband/hw/mthca/mthca_dev.h | 13 +++ drivers/infiniband/hw/mthca/mthca_provider.c | 3 + 5 files changed, 176 insertions(+), 1 deletion(-) create mode 100644 drivers/infiniband/hw/mthca/mthca_catas.c diff --git a/drivers/infiniband/hw/mthca/Makefile b/drivers/infiniband/hw/mthca/Makefile index c44f7bae542..47ec5a7cba0 100644 --- a/drivers/infiniband/hw/mthca/Makefile +++ b/drivers/infiniband/hw/mthca/Makefile @@ -7,4 +7,5 @@ obj-$(CONFIG_INFINIBAND_MTHCA) += ib_mthca.o ib_mthca-y := mthca_main.o mthca_cmd.o mthca_profile.o mthca_reset.o \ mthca_allocator.o mthca_eq.o mthca_pd.o mthca_cq.o \ mthca_mr.o mthca_qp.o mthca_av.o mthca_mcg.o mthca_mad.o \ - mthca_provider.o mthca_memfree.o mthca_uar.o mthca_srq.o + mthca_provider.o mthca_memfree.o mthca_uar.o mthca_srq.o \ + mthca_catas.o diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c new file mode 100644 index 00000000000..7ac52af43b9 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_catas.c @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include "mthca_dev.h" + +enum { + MTHCA_CATAS_POLL_INTERVAL = 5 * HZ, + + MTHCA_CATAS_TYPE_INTERNAL = 0, + MTHCA_CATAS_TYPE_UPLINK = 3, + MTHCA_CATAS_TYPE_DDR = 4, + MTHCA_CATAS_TYPE_PARITY = 5, +}; + +static DEFINE_SPINLOCK(catas_lock); + +static void handle_catas(struct mthca_dev *dev) +{ + struct ib_event event; + const char *type; + int i; + + event.device = &dev->ib_dev; + event.event = IB_EVENT_DEVICE_FATAL; + event.element.port_num = 0; + + ib_dispatch_event(&event); + + switch (swab32(readl(dev->catas_err.map)) >> 24) { + case MTHCA_CATAS_TYPE_INTERNAL: + type = "internal error"; + break; + case MTHCA_CATAS_TYPE_UPLINK: + type = "uplink bus error"; + break; + case MTHCA_CATAS_TYPE_DDR: + type = "DDR data error"; + break; + case MTHCA_CATAS_TYPE_PARITY: + type = "internal parity error"; + break; + default: + type = "unknown error"; + break; + } + + mthca_err(dev, "Catastrophic error detected: %s\n", type); + for (i = 0; i < dev->catas_err.size; ++i) + mthca_err(dev, " buf[%02x]: %08x\n", + i, swab32(readl(dev->catas_err.map + i))); +} + +static void poll_catas(unsigned long dev_ptr) +{ + struct mthca_dev *dev = (struct mthca_dev *) dev_ptr; + unsigned long flags; + int i; + + for (i = 0; i < dev->catas_err.size; ++i) + if (readl(dev->catas_err.map + i)) { + handle_catas(dev); + return; + } + + spin_lock_irqsave(&catas_lock, flags); + if (dev->catas_err.stop) + mod_timer(&dev->catas_err.timer, + jiffies + MTHCA_CATAS_POLL_INTERVAL); + spin_unlock_irqrestore(&catas_lock, flags); + + return; +} + +void mthca_start_catas_poll(struct mthca_dev *dev) +{ + unsigned long addr; + + init_timer(&dev->catas_err.timer); + dev->catas_err.stop = 0; + dev->catas_err.map = NULL; + + addr = pci_resource_start(dev->pdev, 0) + + ((pci_resource_len(dev->pdev, 0) - 1) & + dev->catas_err.addr); + + if (!request_mem_region(addr, dev->catas_err.size * 4, + DRV_NAME)) { + mthca_warn(dev, "couldn't request catastrophic error region " + "at 0x%lx/0x%x\n", addr, dev->catas_err.size * 4); + return; + } + + dev->catas_err.map = ioremap(addr, dev->catas_err.size * 4); + if (!dev->catas_err.map) { + mthca_warn(dev, "couldn't map catastrophic error region " + "at 0x%lx/0x%x\n", addr, dev->catas_err.size * 4); + release_mem_region(addr, dev->catas_err.size * 4); + return; + } + + dev->catas_err.timer.data = (unsigned long) dev; + dev->catas_err.timer.function = poll_catas; + dev->catas_err.timer.expires = jiffies + MTHCA_CATAS_POLL_INTERVAL; + add_timer(&dev->catas_err.timer); +} + +void mthca_stop_catas_poll(struct mthca_dev *dev) +{ + spin_lock_irq(&catas_lock); + dev->catas_err.stop = 1; + spin_unlock_irq(&catas_lock); + + del_timer_sync(&dev->catas_err.timer); + + if (dev->catas_err.map) { + iounmap(dev->catas_err.map); + release_mem_region(pci_resource_start(dev->pdev, 0) + + ((pci_resource_len(dev->pdev, 0) - 1) & + dev->catas_err.addr), + dev->catas_err.size * 4); + } +} diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index 1bd7dc8f778..9220473dbfb 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -706,9 +707,13 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status) MTHCA_GET(lg, outbox, QUERY_FW_MAX_CMD_OFFSET); dev->cmd.max_cmds = 1 << lg; + MTHCA_GET(dev->catas_err.addr, outbox, QUERY_FW_ERR_START_OFFSET); + MTHCA_GET(dev->catas_err.size, outbox, QUERY_FW_ERR_SIZE_OFFSET); mthca_dbg(dev, "FW version %012llx, max commands %d\n", (unsigned long long) dev->fw_ver, dev->cmd.max_cmds); + mthca_dbg(dev, "Catastrophic error buffer at 0x%llx, size 0x%x\n", + (unsigned long long) dev->catas_err.addr, dev->catas_err.size); if (mthca_is_memfree(dev)) { MTHCA_GET(dev->fw.arbel.fw_pages, outbox, QUERY_FW_SIZE_OFFSET); diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index f106bac0f92..7e68bd4a378 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -258,6 +258,14 @@ struct mthca_mcg_table { struct mthca_icm_table *table; }; +struct mthca_catas_err { + u64 addr; + u32 __iomem *map; + unsigned long stop; + u32 size; + struct timer_list timer; +}; + struct mthca_dev { struct ib_device ib_dev; struct pci_dev *pdev; @@ -318,6 +326,8 @@ struct mthca_dev { struct mthca_av_table av_table; struct mthca_mcg_table mcg_table; + struct mthca_catas_err catas_err; + struct mthca_uar driver_uar; struct mthca_db_table *db_tab; struct mthca_pd driver_pd; @@ -405,6 +415,9 @@ void mthca_cleanup_mcg_table(struct mthca_dev *dev); int mthca_register_device(struct mthca_dev *dev); void mthca_unregister_device(struct mthca_dev *dev); +void mthca_start_catas_poll(struct mthca_dev *dev); +void mthca_stop_catas_poll(struct mthca_dev *dev); + int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar); void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar); diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 9e911a1ea41..1b9477edbd7 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -1175,10 +1175,13 @@ int mthca_register_device(struct mthca_dev *dev) } } + mthca_start_catas_poll(dev); + return 0; } void mthca_unregister_device(struct mthca_dev *dev) { + mthca_stop_catas_poll(dev); ib_unregister_device(&dev->ib_dev); } -- cgit From 089a1bedd84be16a4f49a319e7ccb4a128da5ce9 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 27 Oct 2005 20:33:43 -0700 Subject: [IB] ib_umad: fix crash when freeing send buffers The conversion of user_mad.c to the new MAD send API was slightly off: in a few places, we used packet->msg instead of packet->msg->mad when referring to the actual data buffer, which ended up corrupting the underlying data structure and crashing when we free an invalid pointer. Signed-off-by: Roland Dreier --- drivers/infiniband/core/user_mad.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index fc5519a3de9..a48166a8e04 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -398,12 +398,12 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, * transaction ID matches the agent being used to send the * MAD. */ - method = ((struct ib_mad_hdr *) packet->msg)->method; + method = ((struct ib_mad_hdr *) packet->msg->mad)->method; if (!(method & IB_MGMT_METHOD_RESP) && method != IB_MGMT_METHOD_TRAP_REPRESS && method != IB_MGMT_METHOD_SEND) { - tid = &((struct ib_mad_hdr *) packet->msg)->tid; + tid = &((struct ib_mad_hdr *) packet->msg->mad)->tid; *tid = cpu_to_be64(((u64) agent->hi_tid) << 32 | (be64_to_cpup(tid) & 0xffffffff)); } -- cgit From cb0f0910f4b41772a6771bdb4fb2d419b27bcd77 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Thu, 27 Oct 2005 20:48:11 -0700 Subject: [IB] ib_umad: various cleanups Simplify user_mad.c code in a few places, and convert from kmalloc() + memset() to kzalloc(). This also fixes a theoretical race window by not accessing packet->length after posting the send buffer (the send could complete and packet could be freed before we get to the return statement at the end of ib_umad_write()). Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/user_mad.c | 73 +++++++++++--------------------------- 1 file changed, 21 insertions(+), 52 deletions(-) diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index a48166a8e04..17ec0a19dbc 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -99,7 +99,6 @@ struct ib_umad_packet { struct ib_mad_send_buf *msg; struct list_head list; int length; - DECLARE_PCI_UNMAP_ADDR(mapping) struct ib_user_mad mad; }; @@ -145,15 +144,12 @@ static void send_handler(struct ib_mad_agent *agent, ib_free_send_mad(packet->msg); if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) { - timeout = kmalloc(sizeof *timeout + sizeof (struct ib_mad_hdr), - GFP_KERNEL); + timeout = kzalloc(sizeof *timeout + IB_MGMT_MAD_HDR, GFP_KERNEL); if (!timeout) goto out; - memset(timeout, 0, sizeof *timeout + sizeof (struct ib_mad_hdr)); - - timeout->length = sizeof (struct ib_mad_hdr); - timeout->mad.hdr.id = packet->mad.hdr.id; + timeout->length = IB_MGMT_MAD_HDR; + timeout->mad.hdr.id = packet->mad.hdr.id; timeout->mad.hdr.status = ETIMEDOUT; memcpy(timeout->mad.data, packet->mad.data, sizeof (struct ib_mad_hdr)); @@ -176,11 +172,10 @@ static void recv_handler(struct ib_mad_agent *agent, goto out; length = mad_recv_wc->mad_len; - packet = kmalloc(sizeof *packet + length, GFP_KERNEL); + packet = kzalloc(sizeof *packet + length, GFP_KERNEL); if (!packet) goto out; - memset(packet, 0, sizeof *packet + length); packet->length = length; ib_coalesce_recv_mad(mad_recv_wc, packet->mad.data); @@ -246,7 +241,7 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf, else ret = -ENOSPC; } else if (copy_to_user(buf, &packet->mad, - packet->length + sizeof (struct ib_user_mad))) + packet->length + sizeof (struct ib_user_mad))) ret = -EFAULT; else ret = packet->length + sizeof (struct ib_user_mad); @@ -271,22 +266,19 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, struct ib_rmpp_mad *rmpp_mad; u8 method; __be64 *tid; - int ret, length, hdr_len, rmpp_hdr_size; + int ret, length, hdr_len, copy_offset; int rmpp_active = 0; if (count < sizeof (struct ib_user_mad)) return -EINVAL; length = count - sizeof (struct ib_user_mad); - packet = kmalloc(sizeof *packet + sizeof(struct ib_mad_hdr) + - sizeof (struct ib_rmpp_hdr), GFP_KERNEL); + packet = kmalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL); if (!packet) return -ENOMEM; if (copy_from_user(&packet->mad, buf, - sizeof (struct ib_user_mad) + - sizeof (struct ib_mad_hdr) + - sizeof (struct ib_rmpp_hdr))) { + sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR)) { ret = -EFAULT; goto err; } @@ -297,8 +289,6 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, goto err; } - packet->length = length; - down_read(&file->agent_mutex); agent = file->agent[packet->mad.hdr.id]; @@ -345,12 +335,10 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, goto err_ah; } rmpp_active = 1; + copy_offset = IB_MGMT_RMPP_HDR; } else { - if (length > sizeof (struct ib_mad)) { - ret = -EINVAL; - goto err_ah; - } hdr_len = IB_MGMT_MAD_HDR; + copy_offset = IB_MGMT_MAD_HDR; } packet->msg = ib_create_send_mad(agent, @@ -368,28 +356,14 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, packet->msg->retries = packet->mad.hdr.retries; packet->msg->context[0] = packet; - if (!rmpp_active) { - /* Copy message from user into send buffer */ - if (copy_from_user(packet->msg->mad, - buf + sizeof (struct ib_user_mad), length)) { - ret = -EFAULT; - goto err_msg; - } - } else { - rmpp_hdr_size = sizeof (struct ib_mad_hdr) + - sizeof (struct ib_rmpp_hdr); - - /* Only copy MAD headers (RMPP header in place) */ - memcpy(packet->msg->mad, packet->mad.data, - sizeof (struct ib_mad_hdr)); - - /* Now, copy rest of message from user into send buffer */ - if (copy_from_user(((struct ib_rmpp_mad *) packet->msg->mad)->data, - buf + sizeof (struct ib_user_mad) + rmpp_hdr_size, - length - rmpp_hdr_size)) { - ret = -EFAULT; - goto err_msg; - } + /* Copy MAD headers (RMPP header in place) */ + memcpy(packet->msg->mad, packet->mad.data, IB_MGMT_MAD_HDR); + /* Now, copy rest of message from user into send buffer */ + if (copy_from_user(packet->msg->mad + copy_offset, + buf + sizeof (struct ib_user_mad) + copy_offset, + length - copy_offset)) { + ret = -EFAULT; + goto err_msg; } /* @@ -414,7 +388,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, up_read(&file->agent_mutex); - return sizeof (struct ib_user_mad_hdr) + packet->length; + return count; err_msg: ib_free_send_mad(packet->msg); @@ -564,12 +538,10 @@ static int ib_umad_open(struct inode *inode, struct file *filp) container_of(inode->i_cdev, struct ib_umad_port, dev); struct ib_umad_file *file; - file = kmalloc(sizeof *file, GFP_KERNEL); + file = kzalloc(sizeof *file, GFP_KERNEL); if (!file) return -ENOMEM; - memset(file, 0, sizeof *file); - spin_lock_init(&file->recv_lock); init_rwsem(&file->agent_mutex); INIT_LIST_HEAD(&file->recv_list); @@ -814,15 +786,12 @@ static void ib_umad_add_one(struct ib_device *device) e = device->phys_port_cnt; } - umad_dev = kmalloc(sizeof *umad_dev + + umad_dev = kzalloc(sizeof *umad_dev + (e - s + 1) * sizeof (struct ib_umad_port), GFP_KERNEL); if (!umad_dev) return; - memset(umad_dev, 0, sizeof *umad_dev + - (e - s + 1) * sizeof (struct ib_umad_port)); - kref_init(&umad_dev->ref); umad_dev->start_port = s; -- cgit From 1993d683f39f77ddb46a662d7146247877d50b8f Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 28 Oct 2005 15:30:34 -0700 Subject: [IPoIB] Drop RX packets when out of memory Change the way IPoIB handles RX packets when it can't allocate a new receive skbuff. If the allocation of a new receive skb fails, we now drop the packet we just received and repost the original receive skb. This means that the receive ring always stays full and we don't have to monkey around with trying to schedule a refill task for later. Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib.h | 21 +++--- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 116 +++++++++++++++++------------- drivers/infiniband/ulp/ipoib/ipoib_main.c | 8 +-- 3 files changed, 85 insertions(+), 60 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 6b14bd1c60a..c994a916a58 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -100,7 +100,12 @@ struct ipoib_pseudoheader { struct ipoib_mcast; -struct ipoib_buf { +struct ipoib_rx_buf { + struct sk_buff *skb; + dma_addr_t mapping; +}; + +struct ipoib_tx_buf { struct sk_buff *skb; DECLARE_PCI_UNMAP_ADDR(mapping) }; @@ -150,14 +155,14 @@ struct ipoib_dev_priv { unsigned int admin_mtu; unsigned int mcast_mtu; - struct ipoib_buf *rx_ring; + struct ipoib_rx_buf *rx_ring; - spinlock_t tx_lock; - struct ipoib_buf *tx_ring; - unsigned tx_head; - unsigned tx_tail; - struct ib_sge tx_sge; - struct ib_send_wr tx_wr; + spinlock_t tx_lock; + struct ipoib_tx_buf *tx_ring; + unsigned tx_head; + unsigned tx_tail; + struct ib_sge tx_sge; + struct ib_send_wr tx_wr; struct ib_wc ibwc[IPOIB_NUM_WC]; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 02d0e000657..192fef884e2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -95,57 +95,65 @@ void ipoib_free_ah(struct kref *kref) } } -static inline int ipoib_ib_receive(struct ipoib_dev_priv *priv, - unsigned int wr_id, - dma_addr_t addr) +static int ipoib_ib_post_receive(struct net_device *dev, int id) { - struct ib_sge list = { - .addr = addr, - .length = IPOIB_BUF_SIZE, - .lkey = priv->mr->lkey, - }; - struct ib_recv_wr param = { - .wr_id = wr_id | IPOIB_OP_RECV, - .sg_list = &list, - .num_sge = 1, - }; + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ib_sge list; + struct ib_recv_wr param; struct ib_recv_wr *bad_wr; + int ret; + + list.addr = priv->rx_ring[id].mapping; + list.length = IPOIB_BUF_SIZE; + list.lkey = priv->mr->lkey; + + param.next = NULL; + param.wr_id = id | IPOIB_OP_RECV; + param.sg_list = &list; + param.num_sge = 1; + + ret = ib_post_recv(priv->qp, ¶m, &bad_wr); + if (unlikely(ret)) { + ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret); + dma_unmap_single(priv->ca->dma_device, + priv->rx_ring[id].mapping, + IPOIB_BUF_SIZE, DMA_FROM_DEVICE); + dev_kfree_skb_any(priv->rx_ring[id].skb); + priv->rx_ring[id].skb = NULL; + } - return ib_post_recv(priv->qp, ¶m, &bad_wr); + return ret; } -static int ipoib_ib_post_receive(struct net_device *dev, int id) +static int ipoib_alloc_rx_skb(struct net_device *dev, int id) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct sk_buff *skb; dma_addr_t addr; - int ret; skb = dev_alloc_skb(IPOIB_BUF_SIZE + 4); - if (!skb) { - ipoib_warn(priv, "failed to allocate receive buffer\n"); - - priv->rx_ring[id].skb = NULL; + if (!skb) return -ENOMEM; - } - skb_reserve(skb, 4); /* 16 byte align IP header */ - priv->rx_ring[id].skb = skb; + + /* + * IB will leave a 40 byte gap for a GRH and IPoIB adds a 4 byte + * header. So we need 4 more bytes to get to 48 and align the + * IP header to a multiple of 16. + */ + skb_reserve(skb, 4); + addr = dma_map_single(priv->ca->dma_device, skb->data, IPOIB_BUF_SIZE, DMA_FROM_DEVICE); - pci_unmap_addr_set(&priv->rx_ring[id], mapping, addr); - - ret = ipoib_ib_receive(priv, id, addr); - if (ret) { - ipoib_warn(priv, "ipoib_ib_receive failed for buf %d (%d)\n", - id, ret); - dma_unmap_single(priv->ca->dma_device, addr, - IPOIB_BUF_SIZE, DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(addr))) { dev_kfree_skb_any(skb); - priv->rx_ring[id].skb = NULL; + return -EIO; } - return ret; + priv->rx_ring[id].skb = skb; + priv->rx_ring[id].mapping = addr; + + return 0; } static int ipoib_ib_post_receives(struct net_device *dev) @@ -154,6 +162,10 @@ static int ipoib_ib_post_receives(struct net_device *dev) int i; for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) { + if (ipoib_alloc_rx_skb(dev, i)) { + ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); + return -ENOMEM; + } if (ipoib_ib_post_receive(dev, i)) { ipoib_warn(priv, "ipoib_ib_post_receive failed for buf %d\n", i); return -EIO; @@ -176,28 +188,36 @@ static void ipoib_ib_handle_wc(struct net_device *dev, wr_id &= ~IPOIB_OP_RECV; if (wr_id < IPOIB_RX_RING_SIZE) { - struct sk_buff *skb = priv->rx_ring[wr_id].skb; - - priv->rx_ring[wr_id].skb = NULL; + struct sk_buff *skb = priv->rx_ring[wr_id].skb; + dma_addr_t addr = priv->rx_ring[wr_id].mapping; - dma_unmap_single(priv->ca->dma_device, - pci_unmap_addr(&priv->rx_ring[wr_id], - mapping), - IPOIB_BUF_SIZE, - DMA_FROM_DEVICE); - - if (wc->status != IB_WC_SUCCESS) { + if (unlikely(wc->status != IB_WC_SUCCESS)) { if (wc->status != IB_WC_WR_FLUSH_ERR) ipoib_warn(priv, "failed recv event " "(status=%d, wrid=%d vend_err %x)\n", wc->status, wr_id, wc->vendor_err); + dma_unmap_single(priv->ca->dma_device, addr, + IPOIB_BUF_SIZE, DMA_FROM_DEVICE); dev_kfree_skb_any(skb); + priv->rx_ring[wr_id].skb = NULL; return; } + /* + * If we can't allocate a new RX buffer, dump + * this packet and reuse the old buffer. + */ + if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) { + ++priv->stats.rx_dropped; + goto repost; + } + ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", wc->byte_len, wc->slid); + dma_unmap_single(priv->ca->dma_device, addr, + IPOIB_BUF_SIZE, DMA_FROM_DEVICE); + skb_put(skb, wc->byte_len); skb_pull(skb, IB_GRH_BYTES); @@ -220,8 +240,8 @@ static void ipoib_ib_handle_wc(struct net_device *dev, dev_kfree_skb_any(skb); } - /* repost receive */ - if (ipoib_ib_post_receive(dev, wr_id)) + repost: + if (unlikely(ipoib_ib_post_receive(dev, wr_id))) ipoib_warn(priv, "ipoib_ib_post_receive failed " "for buf %d\n", wr_id); } else @@ -229,7 +249,7 @@ static void ipoib_ib_handle_wc(struct net_device *dev, wr_id); } else { - struct ipoib_buf *tx_req; + struct ipoib_tx_buf *tx_req; unsigned long flags; if (wr_id >= IPOIB_TX_RING_SIZE) { @@ -302,7 +322,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_ah *address, u32 qpn) { struct ipoib_dev_priv *priv = netdev_priv(dev); - struct ipoib_buf *tx_req; + struct ipoib_tx_buf *tx_req; dma_addr_t addr; if (skb->len > dev->mtu + INFINIBAND_ALEN) { @@ -468,7 +488,7 @@ int ipoib_ib_dev_stop(struct net_device *dev) struct ib_qp_attr qp_attr; int attr_mask; unsigned long begin; - struct ipoib_buf *tx_req; + struct ipoib_tx_buf *tx_req; int i; /* Kill the existing QP and allocate a new one */ diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index ee303859b04..cd4f42328db 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -732,7 +732,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) /* Allocate RX/TX "rings" to hold queued skbs */ - priv->rx_ring = kmalloc(IPOIB_RX_RING_SIZE * sizeof (struct ipoib_buf), + priv->rx_ring = kmalloc(IPOIB_RX_RING_SIZE * sizeof (struct ipoib_rx_buf), GFP_KERNEL); if (!priv->rx_ring) { printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", @@ -740,9 +740,9 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) goto out; } memset(priv->rx_ring, 0, - IPOIB_RX_RING_SIZE * sizeof (struct ipoib_buf)); + IPOIB_RX_RING_SIZE * sizeof (struct ipoib_rx_buf)); - priv->tx_ring = kmalloc(IPOIB_TX_RING_SIZE * sizeof (struct ipoib_buf), + priv->tx_ring = kmalloc(IPOIB_TX_RING_SIZE * sizeof (struct ipoib_tx_buf), GFP_KERNEL); if (!priv->tx_ring) { printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n", @@ -750,7 +750,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) goto out_rx_ring_cleanup; } memset(priv->tx_ring, 0, - IPOIB_TX_RING_SIZE * sizeof (struct ipoib_buf)); + IPOIB_TX_RING_SIZE * sizeof (struct ipoib_tx_buf)); /* priv->tx_head & tx_tail are already 0 */ -- cgit From a74968f8c3b1166cfe0942901b56165f06ab6f60 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 28 Oct 2005 15:37:23 -0700 Subject: [IB] umad: Fix device lifetime problems Move ib_umad module to using cdev_alloc() and class_device_create() so that we can handle device lifetime properly. Now we can make sure we keep all of our data structures around until the last way to reach them is gone. Signed-off-by: Roland Dreier --- drivers/infiniband/core/user_mad.c | 268 ++++++++++++++++++++++--------------- 1 file changed, 159 insertions(+), 109 deletions(-) diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 17ec0a19dbc..9e49816d728 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -64,18 +64,39 @@ enum { IB_UMAD_MINOR_BASE = 0 }; +/* + * Our lifetime rules for these structs are the following: each time a + * device special file is opened, we look up the corresponding struct + * ib_umad_port by minor in the umad_port[] table while holding the + * port_lock. If this lookup succeeds, we take a reference on the + * ib_umad_port's struct ib_umad_device while still holding the + * port_lock; if the lookup fails, we fail the open(). We drop these + * references in the corresponding close(). + * + * In addition to references coming from open character devices, there + * is one more reference to each ib_umad_device representing the + * module's reference taken when allocating the ib_umad_device in + * ib_umad_add_one(). + * + * When destroying an ib_umad_device, we clear all of its + * ib_umad_ports from umad_port[] while holding port_lock before + * dropping the module's reference to the ib_umad_device. This is + * always safe because any open() calls will either succeed and obtain + * a reference before we clear the umad_port[] entries, or fail after + * we clear the umad_port[] entries. + */ + struct ib_umad_port { - int devnum; - struct cdev dev; - struct class_device class_dev; + struct cdev *dev; + struct class_device *class_dev; - int sm_devnum; - struct cdev sm_dev; - struct class_device sm_class_dev; + struct cdev *sm_dev; + struct class_device *sm_class_dev; struct semaphore sm_sem; struct ib_device *ib_dev; struct ib_umad_device *umad_dev; + int dev_num; u8 port_num; }; @@ -102,13 +123,25 @@ struct ib_umad_packet { struct ib_user_mad mad; }; +static struct class *umad_class; + static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE); -static spinlock_t map_lock; + +static DEFINE_SPINLOCK(port_lock); +static struct ib_umad_port *umad_port[IB_UMAD_MAX_PORTS]; static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS * 2); static void ib_umad_add_one(struct ib_device *device); static void ib_umad_remove_one(struct ib_device *device); +static void ib_umad_release_dev(struct kref *ref) +{ + struct ib_umad_device *dev = + container_of(ref, struct ib_umad_device, ref); + + kfree(dev); +} + static int queue_packet(struct ib_umad_file *file, struct ib_mad_agent *agent, struct ib_umad_packet *packet) @@ -534,13 +567,23 @@ static long ib_umad_ioctl(struct file *filp, unsigned int cmd, static int ib_umad_open(struct inode *inode, struct file *filp) { - struct ib_umad_port *port = - container_of(inode->i_cdev, struct ib_umad_port, dev); + struct ib_umad_port *port; struct ib_umad_file *file; + spin_lock(&port_lock); + port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE]; + if (port) + kref_get(&port->umad_dev->ref); + spin_unlock(&port_lock); + + if (!port) + return -ENXIO; + file = kzalloc(sizeof *file, GFP_KERNEL); - if (!file) + if (!file) { + kref_put(&port->umad_dev->ref, ib_umad_release_dev); return -ENOMEM; + } spin_lock_init(&file->recv_lock); init_rwsem(&file->agent_mutex); @@ -556,6 +599,7 @@ static int ib_umad_open(struct inode *inode, struct file *filp) static int ib_umad_close(struct inode *inode, struct file *filp) { struct ib_umad_file *file = filp->private_data; + struct ib_umad_device *dev = file->port->umad_dev; struct ib_umad_packet *packet, *tmp; int i; @@ -570,6 +614,8 @@ static int ib_umad_close(struct inode *inode, struct file *filp) kfree(file); + kref_put(&dev->ref, ib_umad_release_dev); + return 0; } @@ -586,30 +632,46 @@ static struct file_operations umad_fops = { static int ib_umad_sm_open(struct inode *inode, struct file *filp) { - struct ib_umad_port *port = - container_of(inode->i_cdev, struct ib_umad_port, sm_dev); + struct ib_umad_port *port; struct ib_port_modify props = { .set_port_cap_mask = IB_PORT_SM }; int ret; + spin_lock(&port_lock); + port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE - IB_UMAD_MAX_PORTS]; + if (port) + kref_get(&port->umad_dev->ref); + spin_unlock(&port_lock); + + if (!port) + return -ENXIO; + if (filp->f_flags & O_NONBLOCK) { - if (down_trylock(&port->sm_sem)) - return -EAGAIN; + if (down_trylock(&port->sm_sem)) { + ret = -EAGAIN; + goto fail; + } } else { - if (down_interruptible(&port->sm_sem)) - return -ERESTARTSYS; + if (down_interruptible(&port->sm_sem)) { + ret = -ERESTARTSYS; + goto fail; + } } ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props); if (ret) { up(&port->sm_sem); - return ret; + goto fail; } filp->private_data = port; return 0; + +fail: + kref_put(&port->umad_dev->ref, ib_umad_release_dev); + return ret; } static int ib_umad_sm_close(struct inode *inode, struct file *filp) @@ -623,6 +685,8 @@ static int ib_umad_sm_close(struct inode *inode, struct file *filp) ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props); up(&port->sm_sem); + kref_put(&port->umad_dev->ref, ib_umad_release_dev); + return ret; } @@ -642,6 +706,9 @@ static ssize_t show_ibdev(struct class_device *class_dev, char *buf) { struct ib_umad_port *port = class_get_devdata(class_dev); + if (!port) + return -ENODEV; + return sprintf(buf, "%s\n", port->ib_dev->name); } static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); @@ -650,38 +717,13 @@ static ssize_t show_port(struct class_device *class_dev, char *buf) { struct ib_umad_port *port = class_get_devdata(class_dev); + if (!port) + return -ENODEV; + return sprintf(buf, "%d\n", port->port_num); } static CLASS_DEVICE_ATTR(port, S_IRUGO, show_port, NULL); -static void ib_umad_release_dev(struct kref *ref) -{ - struct ib_umad_device *dev = - container_of(ref, struct ib_umad_device, ref); - - kfree(dev); -} - -static void ib_umad_release_port(struct class_device *class_dev) -{ - struct ib_umad_port *port = class_get_devdata(class_dev); - - if (class_dev == &port->class_dev) { - cdev_del(&port->dev); - clear_bit(port->devnum, dev_map); - } else { - cdev_del(&port->sm_dev); - clear_bit(port->sm_devnum, dev_map); - } - - kref_put(&port->umad_dev->ref, ib_umad_release_dev); -} - -static struct class umad_class = { - .name = "infiniband_mad", - .release = ib_umad_release_port -}; - static ssize_t show_abi_version(struct class *class, char *buf) { return sprintf(buf, "%d\n", IB_USER_MAD_ABI_VERSION); @@ -691,89 +733,102 @@ static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); static int ib_umad_init_port(struct ib_device *device, int port_num, struct ib_umad_port *port) { - spin_lock(&map_lock); - port->devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS); - if (port->devnum >= IB_UMAD_MAX_PORTS) { - spin_unlock(&map_lock); + spin_lock(&port_lock); + port->dev_num = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS); + if (port->dev_num >= IB_UMAD_MAX_PORTS) { + spin_unlock(&port_lock); return -1; } - port->sm_devnum = find_next_zero_bit(dev_map, IB_UMAD_MAX_PORTS * 2, IB_UMAD_MAX_PORTS); - if (port->sm_devnum >= IB_UMAD_MAX_PORTS * 2) { - spin_unlock(&map_lock); - return -1; - } - set_bit(port->devnum, dev_map); - set_bit(port->sm_devnum, dev_map); - spin_unlock(&map_lock); + set_bit(port->dev_num, dev_map); + spin_unlock(&port_lock); port->ib_dev = device; port->port_num = port_num; init_MUTEX(&port->sm_sem); - cdev_init(&port->dev, &umad_fops); - port->dev.owner = THIS_MODULE; - kobject_set_name(&port->dev.kobj, "umad%d", port->devnum); - if (cdev_add(&port->dev, base_dev + port->devnum, 1)) + port->dev = cdev_alloc(); + if (!port->dev) return -1; - - port->class_dev.class = &umad_class; - port->class_dev.dev = device->dma_device; - port->class_dev.devt = port->dev.dev; - - snprintf(port->class_dev.class_id, BUS_ID_SIZE, "umad%d", port->devnum); - - if (class_device_register(&port->class_dev)) + port->dev->owner = THIS_MODULE; + port->dev->ops = &umad_fops; + kobject_set_name(&port->dev->kobj, "umad%d", port->dev_num); + if (cdev_add(port->dev, base_dev + port->dev_num, 1)) goto err_cdev; - class_set_devdata(&port->class_dev, port); - kref_get(&port->umad_dev->ref); + port->class_dev = class_device_create(umad_class, port->dev->dev, + device->dma_device, + "umad%d", port->dev_num); + if (IS_ERR(port->class_dev)) + goto err_cdev; - if (class_device_create_file(&port->class_dev, &class_device_attr_ibdev)) + if (class_device_create_file(port->class_dev, &class_device_attr_ibdev)) goto err_class; - if (class_device_create_file(&port->class_dev, &class_device_attr_port)) + if (class_device_create_file(port->class_dev, &class_device_attr_port)) goto err_class; - cdev_init(&port->sm_dev, &umad_sm_fops); - port->sm_dev.owner = THIS_MODULE; - kobject_set_name(&port->dev.kobj, "issm%d", port->sm_devnum - IB_UMAD_MAX_PORTS); - if (cdev_add(&port->sm_dev, base_dev + port->sm_devnum, 1)) - return -1; - - port->sm_class_dev.class = &umad_class; - port->sm_class_dev.dev = device->dma_device; - port->sm_class_dev.devt = port->sm_dev.dev; - - snprintf(port->sm_class_dev.class_id, BUS_ID_SIZE, "issm%d", port->sm_devnum - IB_UMAD_MAX_PORTS); + port->sm_dev = cdev_alloc(); + if (!port->sm_dev) + goto err_class; + port->sm_dev->owner = THIS_MODULE; + port->sm_dev->ops = &umad_sm_fops; + kobject_set_name(&port->dev->kobj, "issm%d", port->dev_num); + if (cdev_add(port->sm_dev, base_dev + port->dev_num + IB_UMAD_MAX_PORTS, 1)) + goto err_sm_cdev; - if (class_device_register(&port->sm_class_dev)) + port->sm_class_dev = class_device_create(umad_class, port->sm_dev->dev, + device->dma_device, + "issm%d", port->dev_num); + if (IS_ERR(port->sm_class_dev)) goto err_sm_cdev; - class_set_devdata(&port->sm_class_dev, port); - kref_get(&port->umad_dev->ref); + class_set_devdata(port->class_dev, port); + class_set_devdata(port->sm_class_dev, port); - if (class_device_create_file(&port->sm_class_dev, &class_device_attr_ibdev)) + if (class_device_create_file(port->sm_class_dev, &class_device_attr_ibdev)) goto err_sm_class; - if (class_device_create_file(&port->sm_class_dev, &class_device_attr_port)) + if (class_device_create_file(port->sm_class_dev, &class_device_attr_port)) goto err_sm_class; + spin_lock(&port_lock); + umad_port[port->dev_num] = port; + spin_unlock(&port_lock); + return 0; err_sm_class: - class_device_unregister(&port->sm_class_dev); + class_device_destroy(umad_class, port->sm_dev->dev); err_sm_cdev: - cdev_del(&port->sm_dev); + cdev_del(port->sm_dev); err_class: - class_device_unregister(&port->class_dev); + class_device_destroy(umad_class, port->dev->dev); err_cdev: - cdev_del(&port->dev); - clear_bit(port->devnum, dev_map); + cdev_del(port->dev); + clear_bit(port->dev_num, dev_map); return -1; } +static void ib_umad_kill_port(struct ib_umad_port *port) +{ + class_set_devdata(port->class_dev, NULL); + class_set_devdata(port->sm_class_dev, NULL); + + class_device_destroy(umad_class, port->dev->dev); + class_device_destroy(umad_class, port->sm_dev->dev); + + cdev_del(port->dev); + cdev_del(port->sm_dev); + + spin_lock(&port_lock); + umad_port[port->dev_num] = NULL; + spin_unlock(&port_lock); + + clear_bit(port->dev_num, dev_map); +} + static void ib_umad_add_one(struct ib_device *device) { struct ib_umad_device *umad_dev; @@ -809,10 +864,8 @@ static void ib_umad_add_one(struct ib_device *device) return; err: - while (--i >= s) { - class_device_unregister(&umad_dev->port[i - s].class_dev); - class_device_unregister(&umad_dev->port[i - s].sm_class_dev); - } + while (--i >= s) + ib_umad_kill_port(&umad_dev->port[i]); kref_put(&umad_dev->ref, ib_umad_release_dev); } @@ -825,10 +878,8 @@ static void ib_umad_remove_one(struct ib_device *device) if (!umad_dev) return; - for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i) { - class_device_unregister(&umad_dev->port[i].class_dev); - class_device_unregister(&umad_dev->port[i].sm_class_dev); - } + for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i) + ib_umad_kill_port(&umad_dev->port[i]); kref_put(&umad_dev->ref, ib_umad_release_dev); } @@ -837,8 +888,6 @@ static int __init ib_umad_init(void) { int ret; - spin_lock_init(&map_lock); - ret = register_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2, "infiniband_mad"); if (ret) { @@ -846,13 +895,14 @@ static int __init ib_umad_init(void) goto out; } - ret = class_register(&umad_class); - if (ret) { + umad_class = class_create(THIS_MODULE, "infiniband_mad"); + if (IS_ERR(umad_class)) { + ret = PTR_ERR(umad_class); printk(KERN_ERR "user_mad: couldn't create class infiniband_mad\n"); goto out_chrdev; } - ret = class_create_file(&umad_class, &class_attr_abi_version); + ret = class_create_file(umad_class, &class_attr_abi_version); if (ret) { printk(KERN_ERR "user_mad: couldn't create abi_version attribute\n"); goto out_class; @@ -867,7 +917,7 @@ static int __init ib_umad_init(void) return 0; out_class: - class_unregister(&umad_class); + class_destroy(umad_class); out_chrdev: unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2); @@ -879,7 +929,7 @@ out: static void __exit ib_umad_cleanup(void) { ib_unregister_client(&umad_client); - class_unregister(&umad_class); + class_destroy(umad_class); unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2); } -- cgit From 70a30e16a8a9d22396a4d1e96af86e43594df584 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 28 Oct 2005 15:38:26 -0700 Subject: [IB] uverbs: Fix device lifetime problems Move ib_uverbs module to using cdev_alloc() and class_device_create() so that we can handle device lifetime properly. Now we can make sure we keep all of our data structures around until the last way to reach them is gone. Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs.h | 36 +++++- drivers/infiniband/core/uverbs_cmd.c | 38 +------ drivers/infiniband/core/uverbs_main.c | 203 ++++++++++++++++++++++++---------- 3 files changed, 182 insertions(+), 95 deletions(-) diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 63c8085c0c9..031cdf3c066 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -39,20 +39,38 @@ #ifndef UVERBS_H #define UVERBS_H -/* Include device.h and fs.h until cdev.h is self-sufficient */ -#include -#include -#include #include #include #include #include +/* + * Our lifetime rules for these structs are the following: + * + * struct ib_uverbs_device: One reference is held by the module and + * released in ib_uverbs_remove_one(). Another reference is taken by + * ib_uverbs_open() each time the character special file is opened, + * and released in ib_uverbs_release_file() when the file is released. + * + * struct ib_uverbs_file: One reference is held by the VFS and + * released when the file is closed. Another reference is taken when + * an asynchronous event queue file is created and released when the + * event file is closed. + * + * struct ib_uverbs_event_file: One reference is held by the VFS and + * released when the file is closed. For asynchronous event files, + * another reference is held by the corresponding main context file + * and released when that file is closed. For completion event files, + * a reference is taken when a CQ is created that uses the file, and + * released when the CQ is destroyed. + */ + struct ib_uverbs_device { + struct kref ref; int devnum; - struct cdev dev; - struct class_device class_dev; + struct cdev *dev; + struct class_device *class_dev; struct ib_device *ib_dev; int num_comp_vectors; }; @@ -115,6 +133,12 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, void ib_uverbs_release_event_file(struct kref *ref); struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd); +void ib_uverbs_release_ucq(struct ib_uverbs_file *file, + struct ib_uverbs_event_file *ev_file, + struct ib_ucq_object *uobj); +void ib_uverbs_release_uevent(struct ib_uverbs_file *file, + struct ib_uevent_object *uobj); + void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context); void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 14583bb6e2c..8c89abc8c76 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -35,6 +35,7 @@ */ #include +#include #include @@ -114,7 +115,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, kref_get(&file->async_file->ref); kref_get(&file->ref); - file->ucontext = ucontext; + file->ucontext = ucontext; fd_install(resp.async_fd, filp); @@ -761,7 +762,6 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, struct ib_cq *cq; struct ib_ucq_object *uobj; struct ib_uverbs_event_file *ev_file; - struct ib_uverbs_event *evt, *tmp; u64 user_handle; int ret = -EINVAL; @@ -790,23 +790,7 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, list_del(&uobj->uobject.list); up(&file->mutex); - if (ev_file) { - spin_lock_irq(&ev_file->lock); - list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) { - list_del(&evt->list); - kfree(evt); - } - spin_unlock_irq(&ev_file->lock); - - kref_put(&ev_file->ref, ib_uverbs_release_event_file); - } - - spin_lock_irq(&file->async_file->lock); - list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) { - list_del(&evt->list); - kfree(evt); - } - spin_unlock_irq(&file->async_file->lock); + ib_uverbs_release_ucq(file, ev_file, uobj); resp.comp_events_reported = uobj->comp_events_reported; resp.async_events_reported = uobj->async_events_reported; @@ -1043,7 +1027,6 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, struct ib_uverbs_destroy_qp_resp resp; struct ib_qp *qp; struct ib_uevent_object *uobj; - struct ib_uverbs_event *evt, *tmp; int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) @@ -1069,12 +1052,7 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, list_del(&uobj->uobject.list); up(&file->mutex); - spin_lock_irq(&file->async_file->lock); - list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { - list_del(&evt->list); - kfree(evt); - } - spin_unlock_irq(&file->async_file->lock); + ib_uverbs_release_uevent(file, uobj); resp.events_reported = uobj->events_reported; @@ -1741,7 +1719,6 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, struct ib_uverbs_destroy_srq_resp resp; struct ib_srq *srq; struct ib_uevent_object *uobj; - struct ib_uverbs_event *evt, *tmp; int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) @@ -1767,12 +1744,7 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, list_del(&uobj->uobject.list); up(&file->mutex); - spin_lock_irq(&file->async_file->lock); - list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { - list_del(&evt->list); - kfree(evt); - } - spin_unlock_irq(&file->async_file->lock); + ib_uverbs_release_uevent(file, uobj); resp.events_reported = uobj->events_reported; diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 251c752a7ae..ac08d2c93ea 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -44,6 +44,7 @@ #include #include #include +#include #include @@ -63,6 +64,8 @@ enum { #define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR) +static struct class *uverbs_class; + DECLARE_MUTEX(ib_uverbs_idr_mutex); DEFINE_IDR(ib_uverbs_pd_idr); DEFINE_IDR(ib_uverbs_mr_idr); @@ -73,6 +76,7 @@ DEFINE_IDR(ib_uverbs_qp_idr); DEFINE_IDR(ib_uverbs_srq_idr); static spinlock_t map_lock; +static struct ib_uverbs_device *dev_table[IB_UVERBS_MAX_DEVICES]; static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, @@ -110,7 +114,54 @@ static struct vfsmount *uverbs_event_mnt; static void ib_uverbs_add_one(struct ib_device *device); static void ib_uverbs_remove_one(struct ib_device *device); -static int ib_dealloc_ucontext(struct ib_ucontext *context) +static void ib_uverbs_release_dev(struct kref *ref) +{ + struct ib_uverbs_device *dev = + container_of(ref, struct ib_uverbs_device, ref); + + kfree(dev); +} + +void ib_uverbs_release_ucq(struct ib_uverbs_file *file, + struct ib_uverbs_event_file *ev_file, + struct ib_ucq_object *uobj) +{ + struct ib_uverbs_event *evt, *tmp; + + if (ev_file) { + spin_lock_irq(&ev_file->lock); + list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) { + list_del(&evt->list); + kfree(evt); + } + spin_unlock_irq(&ev_file->lock); + + kref_put(&ev_file->ref, ib_uverbs_release_event_file); + } + + spin_lock_irq(&file->async_file->lock); + list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) { + list_del(&evt->list); + kfree(evt); + } + spin_unlock_irq(&file->async_file->lock); +} + +void ib_uverbs_release_uevent(struct ib_uverbs_file *file, + struct ib_uevent_object *uobj) +{ + struct ib_uverbs_event *evt, *tmp; + + spin_lock_irq(&file->async_file->lock); + list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { + list_del(&evt->list); + kfree(evt); + } + spin_unlock_irq(&file->async_file->lock); +} + +static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, + struct ib_ucontext *context) { struct ib_uobject *uobj, *tmp; @@ -129,26 +180,36 @@ static int ib_dealloc_ucontext(struct ib_ucontext *context) list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) { struct ib_qp *qp = idr_find(&ib_uverbs_qp_idr, uobj->id); + struct ib_uevent_object *uevent = + container_of(uobj, struct ib_uevent_object, uobject); idr_remove(&ib_uverbs_qp_idr, uobj->id); ib_destroy_qp(qp); list_del(&uobj->list); - kfree(container_of(uobj, struct ib_uevent_object, uobject)); + ib_uverbs_release_uevent(file, uevent); + kfree(uevent); } list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) { struct ib_cq *cq = idr_find(&ib_uverbs_cq_idr, uobj->id); + struct ib_uverbs_event_file *ev_file = cq->cq_context; + struct ib_ucq_object *ucq = + container_of(uobj, struct ib_ucq_object, uobject); idr_remove(&ib_uverbs_cq_idr, uobj->id); ib_destroy_cq(cq); list_del(&uobj->list); - kfree(container_of(uobj, struct ib_ucq_object, uobject)); + ib_uverbs_release_ucq(file, ev_file, ucq); + kfree(ucq); } list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) { struct ib_srq *srq = idr_find(&ib_uverbs_srq_idr, uobj->id); + struct ib_uevent_object *uevent = + container_of(uobj, struct ib_uevent_object, uobject); idr_remove(&ib_uverbs_srq_idr, uobj->id); ib_destroy_srq(srq); list_del(&uobj->list); - kfree(container_of(uobj, struct ib_uevent_object, uobject)); + ib_uverbs_release_uevent(file, uevent); + kfree(uevent); } /* XXX Free MWs */ @@ -187,6 +248,8 @@ static void ib_uverbs_release_file(struct kref *ref) container_of(ref, struct ib_uverbs_file, ref); module_put(file->device->ib_dev->owner); + kref_put(&file->device->ref, ib_uverbs_release_dev); + kfree(file); } @@ -552,36 +615,58 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) static int ib_uverbs_open(struct inode *inode, struct file *filp) { - struct ib_uverbs_device *dev = - container_of(inode->i_cdev, struct ib_uverbs_device, dev); + struct ib_uverbs_device *dev; struct ib_uverbs_file *file; + int ret; - if (!try_module_get(dev->ib_dev->owner)) - return -ENODEV; + spin_lock(&map_lock); + dev = dev_table[iminor(inode) - IB_UVERBS_BASE_MINOR]; + if (dev) + kref_get(&dev->ref); + spin_unlock(&map_lock); + + if (!dev) + return -ENXIO; + + if (!try_module_get(dev->ib_dev->owner)) { + ret = -ENODEV; + goto err; + } file = kmalloc(sizeof *file, GFP_KERNEL); if (!file) { - module_put(dev->ib_dev->owner); - return -ENOMEM; + ret = -ENOMEM; + goto err_module; } - file->device = dev; - file->ucontext = NULL; + file->device = dev; + file->ucontext = NULL; + file->async_file = NULL; kref_init(&file->ref); init_MUTEX(&file->mutex); filp->private_data = file; return 0; + +err_module: + module_put(dev->ib_dev->owner); + +err: + kref_put(&dev->ref, ib_uverbs_release_dev); + + return ret; } static int ib_uverbs_close(struct inode *inode, struct file *filp) { struct ib_uverbs_file *file = filp->private_data; - ib_dealloc_ucontext(file->ucontext); + ib_uverbs_cleanup_ucontext(file, file->ucontext); + + if (file->async_file) + kref_put(&file->async_file->ref, ib_uverbs_release_event_file); - kref_put(&file->async_file->ref, ib_uverbs_release_event_file); kref_put(&file->ref, ib_uverbs_release_file); return 0; @@ -610,8 +695,10 @@ static struct ib_client uverbs_client = { static ssize_t show_ibdev(struct class_device *class_dev, char *buf) { - struct ib_uverbs_device *dev = - container_of(class_dev, struct ib_uverbs_device, class_dev); + struct ib_uverbs_device *dev = class_get_devdata(class_dev); + + if (!dev) + return -ENODEV; return sprintf(buf, "%s\n", dev->ib_dev->name); } @@ -619,28 +706,15 @@ static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); static ssize_t show_dev_abi_version(struct class_device *class_dev, char *buf) { - struct ib_uverbs_device *dev = - container_of(class_dev, struct ib_uverbs_device, class_dev); + struct ib_uverbs_device *dev = class_get_devdata(class_dev); + + if (!dev) + return -ENODEV; return sprintf(buf, "%d\n", dev->ib_dev->uverbs_abi_ver); } static CLASS_DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL); -static void ib_uverbs_release_class_dev(struct class_device *class_dev) -{ - struct ib_uverbs_device *dev = - container_of(class_dev, struct ib_uverbs_device, class_dev); - - cdev_del(&dev->dev); - clear_bit(dev->devnum, dev_map); - kfree(dev); -} - -static struct class uverbs_class = { - .name = "infiniband_verbs", - .release = ib_uverbs_release_class_dev -}; - static ssize_t show_abi_version(struct class *class, char *buf) { return sprintf(buf, "%d\n", IB_USER_VERBS_ABI_VERSION); @@ -660,6 +734,8 @@ static void ib_uverbs_add_one(struct ib_device *device) memset(uverbs_dev, 0, sizeof *uverbs_dev); + kref_init(&uverbs_dev->ref); + spin_lock(&map_lock); uverbs_dev->devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES); if (uverbs_dev->devnum >= IB_UVERBS_MAX_DEVICES) { @@ -672,40 +748,45 @@ static void ib_uverbs_add_one(struct ib_device *device) uverbs_dev->ib_dev = device; uverbs_dev->num_comp_vectors = 1; - if (device->mmap) - cdev_init(&uverbs_dev->dev, &uverbs_mmap_fops); - else - cdev_init(&uverbs_dev->dev, &uverbs_fops); - uverbs_dev->dev.owner = THIS_MODULE; - kobject_set_name(&uverbs_dev->dev.kobj, "uverbs%d", uverbs_dev->devnum); - if (cdev_add(&uverbs_dev->dev, IB_UVERBS_BASE_DEV + uverbs_dev->devnum, 1)) + uverbs_dev->dev = cdev_alloc(); + if (!uverbs_dev->dev) goto err; + uverbs_dev->dev->owner = THIS_MODULE; + uverbs_dev->dev->ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops; + kobject_set_name(&uverbs_dev->dev->kobj, "uverbs%d", uverbs_dev->devnum); + if (cdev_add(uverbs_dev->dev, IB_UVERBS_BASE_DEV + uverbs_dev->devnum, 1)) + goto err_cdev; - uverbs_dev->class_dev.class = &uverbs_class; - uverbs_dev->class_dev.dev = device->dma_device; - uverbs_dev->class_dev.devt = uverbs_dev->dev.dev; - snprintf(uverbs_dev->class_dev.class_id, BUS_ID_SIZE, "uverbs%d", uverbs_dev->devnum); - if (class_device_register(&uverbs_dev->class_dev)) + uverbs_dev->class_dev = class_device_create(uverbs_class, uverbs_dev->dev->dev, + device->dma_device, + "uverbs%d", uverbs_dev->devnum); + if (IS_ERR(uverbs_dev->class_dev)) goto err_cdev; - if (class_device_create_file(&uverbs_dev->class_dev, &class_device_attr_ibdev)) + class_set_devdata(uverbs_dev->class_dev, uverbs_dev); + + if (class_device_create_file(uverbs_dev->class_dev, &class_device_attr_ibdev)) goto err_class; - if (class_device_create_file(&uverbs_dev->class_dev, &class_device_attr_abi_version)) + if (class_device_create_file(uverbs_dev->class_dev, &class_device_attr_abi_version)) goto err_class; + spin_lock(&map_lock); + dev_table[uverbs_dev->devnum] = uverbs_dev; + spin_unlock(&map_lock); + ib_set_client_data(device, &uverbs_client, uverbs_dev); return; err_class: - class_device_unregister(&uverbs_dev->class_dev); + class_device_destroy(uverbs_class, uverbs_dev->dev->dev); err_cdev: - cdev_del(&uverbs_dev->dev); + cdev_del(uverbs_dev->dev); clear_bit(uverbs_dev->devnum, dev_map); err: - kfree(uverbs_dev); + kref_put(&uverbs_dev->ref, ib_uverbs_release_dev); return; } @@ -716,7 +797,16 @@ static void ib_uverbs_remove_one(struct ib_device *device) if (!uverbs_dev) return; - class_device_unregister(&uverbs_dev->class_dev); + class_set_devdata(uverbs_dev->class_dev, NULL); + class_device_destroy(uverbs_class, uverbs_dev->dev->dev); + cdev_del(uverbs_dev->dev); + + spin_lock(&map_lock); + dev_table[uverbs_dev->devnum] = NULL; + spin_unlock(&map_lock); + + clear_bit(uverbs_dev->devnum, dev_map); + kref_put(&uverbs_dev->ref, ib_uverbs_release_dev); } static struct super_block *uverbs_event_get_sb(struct file_system_type *fs_type, int flags, @@ -746,13 +836,14 @@ static int __init ib_uverbs_init(void) goto out; } - ret = class_register(&uverbs_class); - if (ret) { + uverbs_class = class_create(THIS_MODULE, "infiniband_verbs"); + if (IS_ERR(uverbs_class)) { + ret = PTR_ERR(uverbs_class); printk(KERN_ERR "user_verbs: couldn't create class infiniband_verbs\n"); goto out_chrdev; } - ret = class_create_file(&uverbs_class, &class_attr_abi_version); + ret = class_create_file(uverbs_class, &class_attr_abi_version); if (ret) { printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n"); goto out_class; @@ -786,7 +877,7 @@ out_fs: unregister_filesystem(&uverbs_event_fs); out_class: - class_unregister(&uverbs_class); + class_destroy(uverbs_class); out_chrdev: unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); @@ -800,7 +891,7 @@ static void __exit ib_uverbs_cleanup(void) ib_unregister_client(&uverbs_client); mntput(uverbs_event_mnt); unregister_filesystem(&uverbs_event_fs); - class_unregister(&uverbs_class); + class_destroy(uverbs_class); unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); idr_destroy(&ib_uverbs_pd_idr); idr_destroy(&ib_uverbs_mr_idr); -- cgit From 4cce3390c998600f6648e075e475cf8f6dd8cebe Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 28 Oct 2005 16:38:15 -0700 Subject: [IB] fix up class_device_create() calls Fix class_device_create() calls to match the new prototype which takes a parent device pointer. Signed-off-by: Roland Dreier --- drivers/infiniband/core/user_mad.c | 4 ++-- drivers/infiniband/core/uverbs_main.c | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 9e49816d728..97128e25f78 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -755,7 +755,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, if (cdev_add(port->dev, base_dev + port->dev_num, 1)) goto err_cdev; - port->class_dev = class_device_create(umad_class, port->dev->dev, + port->class_dev = class_device_create(umad_class, NULL, port->dev->dev, device->dma_device, "umad%d", port->dev_num); if (IS_ERR(port->class_dev)) @@ -775,7 +775,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, if (cdev_add(port->sm_dev, base_dev + port->dev_num + IB_UMAD_MAX_PORTS, 1)) goto err_sm_cdev; - port->sm_class_dev = class_device_create(umad_class, port->sm_dev->dev, + port->sm_class_dev = class_device_create(umad_class, NULL, port->sm_dev->dev, device->dma_device, "issm%d", port->dev_num); if (IS_ERR(port->sm_class_dev)) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index ac08d2c93ea..0eb38f479b3 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -757,7 +757,8 @@ static void ib_uverbs_add_one(struct ib_device *device) if (cdev_add(uverbs_dev->dev, IB_UVERBS_BASE_DEV + uverbs_dev->devnum, 1)) goto err_cdev; - uverbs_dev->class_dev = class_device_create(uverbs_class, uverbs_dev->dev->dev, + uverbs_dev->class_dev = class_device_create(uverbs_class, NULL, + uverbs_dev->dev->dev, device->dma_device, "uverbs%d", uverbs_dev->devnum); if (IS_ERR(uverbs_dev->class_dev)) -- cgit