diff mbox

[2/2] IB/mad: use CQ abstraction

Message ID 1451913359-25074-3-git-send-email-hch@lst.de (mailing list archive)
State Superseded
Headers show

Commit Message

Christoph Hellwig Jan. 4, 2016, 1:15 p.m. UTC
Remove the local workqueue to process mad completions and use the CQ API
instead.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/infiniband/core/mad.c      | 159 +++++++++++++------------------------
 drivers/infiniband/core/mad_priv.h |   2 +-
 2 files changed, 58 insertions(+), 103 deletions(-)

Comments

Hal Rosenstock Jan. 4, 2016, 4:12 p.m. UTC | #1
On 1/4/2016 9:16 AM, Christoph Hellwig wrote:
> Remove the local workqueue to process mad completions and use the CQ API
> instead.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Hal Rosenstock <hal@mellanox.com>
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ira Weiny Jan. 5, 2016, 12:04 a.m. UTC | #2
On Mon, Jan 04, 2016 at 02:15:59PM +0100, Christoph Hellwig wrote:
> Remove the local workqueue to process mad completions and use the CQ API
> instead.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

One minor nit below...


> ---
>  drivers/infiniband/core/mad.c      | 159 +++++++++++++------------------------
>  drivers/infiniband/core/mad_priv.h |   2 +-
>  2 files changed, 58 insertions(+), 103 deletions(-)
> 
> diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
> index cbe232a..286d1a9 100644
> --- a/drivers/infiniband/core/mad.c
> +++ b/drivers/infiniband/core/mad.c
> @@ -61,18 +61,6 @@ MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests
>  module_param_named(recv_queue_size, mad_recvq_size, int, 0444);
>  MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests");
>  
> -/*
> - * Define a limit on the number of completions which will be processed by the
> - * worker thread in a single work item.  This ensures that other work items
> - * (potentially from other users) are processed fairly.
> - *
> - * The number of completions was derived from the default queue sizes above.
> - * We use a value which is double the larger of the 2 queues (receive @ 512)
> - * but keep it fixed such that an increase in that value does not introduce
> - * unfairness.
> - */
> -#define MAD_COMPLETION_PROC_LIMIT 1024
> -
>  static struct list_head ib_mad_port_list;
>  static u32 ib_mad_client_id = 0;
>  
> @@ -96,6 +84,9 @@ static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
>  			      u8 mgmt_class);
>  static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
>  			   struct ib_mad_agent_private *agent_priv);
> +static bool ib_mad_send_error(struct ib_mad_port_private *port_priv,
> +			      struct ib_wc *wc);
> +static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc);
>  
>  /*
>   * Returns a ib_mad_port_private structure or NULL for a device/port
> @@ -702,11 +693,11 @@ static void snoop_recv(struct ib_mad_qp_info *qp_info,
>  }
>  
>  static void build_smp_wc(struct ib_qp *qp,
> -			 u64 wr_id, u16 slid, u16 pkey_index, u8 port_num,
> +			 void *wr_cqe, u16 slid, u16 pkey_index, u8 port_num,

Sorry I did not catch this before but rather than void * wouldn't it be better
to use struct ib_cqe?

Regardless:

Reviewed-by: Ira Weiny <ira.weiny@intel.com>

>  			 struct ib_wc *wc)
>  {
>  	memset(wc, 0, sizeof *wc);
> -	wc->wr_id = wr_id;
> +	wc->wr_cqe = wr_cqe;
>  	wc->status = IB_WC_SUCCESS;
>  	wc->opcode = IB_WC_RECV;
>  	wc->pkey_index = pkey_index;
> @@ -844,7 +835,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
>  	}
>  
>  	build_smp_wc(mad_agent_priv->agent.qp,
> -		     send_wr->wr.wr_id, drslid,
> +		     send_wr->wr.wr_cqe, drslid,
>  		     send_wr->pkey_index,
>  		     send_wr->port_num, &mad_wc);
>  
> @@ -1051,7 +1042,9 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
>  
>  	mad_send_wr->sg_list[1].lkey = mad_agent->qp->pd->local_dma_lkey;
>  
> -	mad_send_wr->send_wr.wr.wr_id = (unsigned long) mad_send_wr;
> +	mad_send_wr->mad_list.cqe.done = ib_mad_send_done;
> +
> +	mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe;
>  	mad_send_wr->send_wr.wr.sg_list = mad_send_wr->sg_list;
>  	mad_send_wr->send_wr.wr.num_sge = 2;
>  	mad_send_wr->send_wr.wr.opcode = IB_WR_SEND;
> @@ -1163,8 +1156,9 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
>  
>  	/* Set WR ID to find mad_send_wr upon completion */
>  	qp_info = mad_send_wr->mad_agent_priv->qp_info;
> -	mad_send_wr->send_wr.wr.wr_id = (unsigned long)&mad_send_wr->mad_list;
>  	mad_send_wr->mad_list.mad_queue = &qp_info->send_queue;
> +	mad_send_wr->mad_list.cqe.done = ib_mad_send_done;
> +	mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe;
>  
>  	mad_agent = mad_send_wr->send_buf.mad_agent;
>  	sge = mad_send_wr->sg_list;
> @@ -2185,13 +2179,14 @@ handle_smi(struct ib_mad_port_private *port_priv,
>  	return handle_ib_smi(port_priv, qp_info, wc, port_num, recv, response);
>  }
>  
> -static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
> -				     struct ib_wc *wc)
> +static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc)
>  {
> +	struct ib_mad_port_private *port_priv = cq->cq_context;
> +	struct ib_mad_list_head *mad_list =
> +		container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
>  	struct ib_mad_qp_info *qp_info;
>  	struct ib_mad_private_header *mad_priv_hdr;
>  	struct ib_mad_private *recv, *response = NULL;
> -	struct ib_mad_list_head *mad_list;
>  	struct ib_mad_agent_private *mad_agent;
>  	int port_num;
>  	int ret = IB_MAD_RESULT_SUCCESS;
> @@ -2199,7 +2194,17 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
>  	u16 resp_mad_pkey_index = 0;
>  	bool opa;
>  
> -	mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
> +	if (list_empty_careful(&port_priv->port_list))
> +		return;
> +
> +	if (wc->status != IB_WC_SUCCESS) {
> +		/*
> +		 * Receive errors indicate that the QP has entered the error
> +		 * state - error handling/shutdown code will cleanup
> +		 */
> +		return;
> +	}
> +
>  	qp_info = mad_list->mad_queue->qp_info;
>  	dequeue_mad(mad_list);
>  
> @@ -2240,7 +2245,7 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
>  	response = alloc_mad_private(mad_size, GFP_KERNEL);
>  	if (!response) {
>  		dev_err(&port_priv->device->dev,
> -			"ib_mad_recv_done_handler no memory for response buffer\n");
> +			"%s: no memory for response buffer\n", __func__);
>  		goto out;
>  	}
>  
> @@ -2426,11 +2431,12 @@ done:
>  	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
>  }
>  
> -static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
> -				     struct ib_wc *wc)
> +static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc)
>  {
> +	struct ib_mad_port_private *port_priv = cq->cq_context;
> +	struct ib_mad_list_head *mad_list =
> +		container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
>  	struct ib_mad_send_wr_private	*mad_send_wr, *queued_send_wr;
> -	struct ib_mad_list_head		*mad_list;
>  	struct ib_mad_qp_info		*qp_info;
>  	struct ib_mad_queue		*send_queue;
>  	struct ib_send_wr		*bad_send_wr;
> @@ -2438,7 +2444,14 @@ static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
>  	unsigned long flags;
>  	int ret;
>  
> -	mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
> +	if (list_empty_careful(&port_priv->port_list))
> +		return;
> +
> +	if (wc->status != IB_WC_SUCCESS) {
> +		if (!ib_mad_send_error(port_priv, wc))
> +			return;
> +	}
> +
>  	mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private,
>  				   mad_list);
>  	send_queue = mad_list->mad_queue;
> @@ -2503,24 +2516,15 @@ static void mark_sends_for_retry(struct ib_mad_qp_info *qp_info)
>  	spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
>  }
>  
> -static void mad_error_handler(struct ib_mad_port_private *port_priv,
> -			      struct ib_wc *wc)
> +static bool ib_mad_send_error(struct ib_mad_port_private *port_priv,
> +		struct ib_wc *wc)
>  {
> -	struct ib_mad_list_head *mad_list;
> -	struct ib_mad_qp_info *qp_info;
> +	struct ib_mad_list_head *mad_list =
> +		container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
> +	struct ib_mad_qp_info *qp_info = mad_list->mad_queue->qp_info;
>  	struct ib_mad_send_wr_private *mad_send_wr;
>  	int ret;
>  
> -	/* Determine if failure was a send or receive */
> -	mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
> -	qp_info = mad_list->mad_queue->qp_info;
> -	if (mad_list->mad_queue == &qp_info->recv_queue)
> -		/*
> -		 * Receive errors indicate that the QP has entered the error
> -		 * state - error handling/shutdown code will cleanup
> -		 */
> -		return;
> -
>  	/*
>  	 * Send errors will transition the QP to SQE - move
>  	 * QP to RTS and repost flushed work requests
> @@ -2535,10 +2539,9 @@ static void mad_error_handler(struct ib_mad_port_private *port_priv,
>  			mad_send_wr->retry = 0;
>  			ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr,
>  					&bad_send_wr);
> -			if (ret)
> -				ib_mad_send_done_handler(port_priv, wc);
> -		} else
> -			ib_mad_send_done_handler(port_priv, wc);
> +			if (!ret)
> +				return false;
> +		}
>  	} else {
>  		struct ib_qp_attr *attr;
>  
> @@ -2552,48 +2555,14 @@ static void mad_error_handler(struct ib_mad_port_private *port_priv,
>  			kfree(attr);
>  			if (ret)
>  				dev_err(&port_priv->device->dev,
> -					"mad_error_handler - ib_modify_qp to RTS : %d\n",
> -					ret);
> +					"%s - ib_modify_qp to RTS: %d\n",
> +					__func__, ret);
>  			else
>  				mark_sends_for_retry(qp_info);
>  		}
> -		ib_mad_send_done_handler(port_priv, wc);
>  	}
> -}
>  
> -/*
> - * IB MAD completion callback
> - */
> -static void ib_mad_completion_handler(struct work_struct *work)
> -{
> -	struct ib_mad_port_private *port_priv;
> -	struct ib_wc wc;
> -	int count = 0;
> -
> -	port_priv = container_of(work, struct ib_mad_port_private, work);
> -	ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
> -
> -	while (ib_poll_cq(port_priv->cq, 1, &wc) == 1) {
> -		if (wc.status == IB_WC_SUCCESS) {
> -			switch (wc.opcode) {
> -			case IB_WC_SEND:
> -				ib_mad_send_done_handler(port_priv, &wc);
> -				break;
> -			case IB_WC_RECV:
> -				ib_mad_recv_done_handler(port_priv, &wc);
> -				break;
> -			default:
> -				BUG_ON(1);
> -				break;
> -			}
> -		} else
> -			mad_error_handler(port_priv, &wc);
> -
> -		if (++count > MAD_COMPLETION_PROC_LIMIT) {
> -			queue_work(port_priv->wq, &port_priv->work);
> -			break;
> -		}
> -	}
> +	return true;
>  }
>  
>  static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
> @@ -2735,7 +2704,7 @@ static void local_completions(struct work_struct *work)
>  			 * before request
>  			 */
>  			build_smp_wc(recv_mad_agent->agent.qp,
> -				     (unsigned long) local->mad_send_wr,
> +				     local->mad_send_wr->send_wr.wr.wr_cqe,
>  				     be16_to_cpu(IB_LID_PERMISSIVE),
>  				     local->mad_send_wr->send_wr.pkey_index,
>  				     recv_mad_agent->agent.port_num, &wc);
> @@ -2875,17 +2844,6 @@ static void timeout_sends(struct work_struct *work)
>  	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
>  }
>  
> -static void ib_mad_thread_completion_handler(struct ib_cq *cq, void *arg)
> -{
> -	struct ib_mad_port_private *port_priv = cq->cq_context;
> -	unsigned long flags;
> -
> -	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
> -	if (!list_empty(&port_priv->port_list))
> -		queue_work(port_priv->wq, &port_priv->work);
> -	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
> -}
> -
>  /*
>   * Allocate receive MADs and post receive WRs for them
>   */
> @@ -2933,8 +2891,9 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
>  			break;
>  		}
>  		mad_priv->header.mapping = sg_list.addr;
> -		recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list;
>  		mad_priv->header.mad_list.mad_queue = recv_queue;
> +		mad_priv->header.mad_list.cqe.done = ib_mad_recv_done;
> +		recv_wr.wr_cqe = &mad_priv->header.mad_list.cqe;
>  
>  		/* Post receive WR */
>  		spin_lock_irqsave(&recv_queue->lock, flags);
> @@ -3171,7 +3130,6 @@ static int ib_mad_port_open(struct ib_device *device,
>  	unsigned long flags;
>  	char name[sizeof "ib_mad123"];
>  	int has_smi;
> -	struct ib_cq_init_attr cq_attr = {};
>  
>  	if (WARN_ON(rdma_max_mad_size(device, port_num) < IB_MGMT_MAD_SIZE))
>  		return -EFAULT;
> @@ -3199,10 +3157,8 @@ static int ib_mad_port_open(struct ib_device *device,
>  	if (has_smi)
>  		cq_size *= 2;
>  
> -	cq_attr.cqe = cq_size;
> -	port_priv->cq = ib_create_cq(port_priv->device,
> -				     ib_mad_thread_completion_handler,
> -				     NULL, port_priv, &cq_attr);
> +	port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0,
> +			IB_POLL_WORKQUEUE);
>  	if (IS_ERR(port_priv->cq)) {
>  		dev_err(&device->dev, "Couldn't create ib_mad CQ\n");
>  		ret = PTR_ERR(port_priv->cq);
> @@ -3231,7 +3187,6 @@ static int ib_mad_port_open(struct ib_device *device,
>  		ret = -ENOMEM;
>  		goto error8;
>  	}
> -	INIT_WORK(&port_priv->work, ib_mad_completion_handler);
>  
>  	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
>  	list_add_tail(&port_priv->port_list, &ib_mad_port_list);
> @@ -3258,7 +3213,7 @@ error7:
>  error6:
>  	ib_dealloc_pd(port_priv->pd);
>  error4:
> -	ib_destroy_cq(port_priv->cq);
> +	ib_free_cq(port_priv->cq);
>  	cleanup_recv_queue(&port_priv->qp_info[1]);
>  	cleanup_recv_queue(&port_priv->qp_info[0]);
>  error3:
> @@ -3291,7 +3246,7 @@ static int ib_mad_port_close(struct ib_device *device, int port_num)
>  	destroy_mad_qp(&port_priv->qp_info[1]);
>  	destroy_mad_qp(&port_priv->qp_info[0]);
>  	ib_dealloc_pd(port_priv->pd);
> -	ib_destroy_cq(port_priv->cq);
> +	ib_free_cq(port_priv->cq);
>  	cleanup_recv_queue(&port_priv->qp_info[1]);
>  	cleanup_recv_queue(&port_priv->qp_info[0]);
>  	/* XXX: Handle deallocation of MAD registration tables */
> diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
> index 990698a..28669f6 100644
> --- a/drivers/infiniband/core/mad_priv.h
> +++ b/drivers/infiniband/core/mad_priv.h
> @@ -64,6 +64,7 @@
>  
>  struct ib_mad_list_head {
>  	struct list_head list;
> +	struct ib_cqe cqe;
>  	struct ib_mad_queue *mad_queue;
>  };
>  
> @@ -204,7 +205,6 @@ struct ib_mad_port_private {
>  	struct ib_mad_mgmt_version_table version[MAX_MGMT_VERSION];
>  	struct list_head agent_list;
>  	struct workqueue_struct *wq;
> -	struct work_struct work;
>  	struct ib_mad_qp_info qp_info[IB_MAD_QPS_CORE];
>  };
>  
> -- 
> 1.9.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christoph Hellwig Jan. 6, 2016, 6:40 a.m. UTC | #3
On Mon, Jan 04, 2016 at 07:04:03PM -0500, ira.weiny wrote:
> Sorry I did not catch this before but rather than void * wouldn't it be better
> to use struct ib_cqe?

Sure, I'll fix it up.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index cbe232a..286d1a9 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -61,18 +61,6 @@  MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests
 module_param_named(recv_queue_size, mad_recvq_size, int, 0444);
 MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests");
 
-/*
- * Define a limit on the number of completions which will be processed by the
- * worker thread in a single work item.  This ensures that other work items
- * (potentially from other users) are processed fairly.
- *
- * The number of completions was derived from the default queue sizes above.
- * We use a value which is double the larger of the 2 queues (receive @ 512)
- * but keep it fixed such that an increase in that value does not introduce
- * unfairness.
- */
-#define MAD_COMPLETION_PROC_LIMIT 1024
-
 static struct list_head ib_mad_port_list;
 static u32 ib_mad_client_id = 0;
 
@@ -96,6 +84,9 @@  static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
 			      u8 mgmt_class);
 static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
 			   struct ib_mad_agent_private *agent_priv);
+static bool ib_mad_send_error(struct ib_mad_port_private *port_priv,
+			      struct ib_wc *wc);
+static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc);
 
 /*
  * Returns a ib_mad_port_private structure or NULL for a device/port
@@ -702,11 +693,11 @@  static void snoop_recv(struct ib_mad_qp_info *qp_info,
 }
 
 static void build_smp_wc(struct ib_qp *qp,
-			 u64 wr_id, u16 slid, u16 pkey_index, u8 port_num,
+			 void *wr_cqe, u16 slid, u16 pkey_index, u8 port_num,
 			 struct ib_wc *wc)
 {
 	memset(wc, 0, sizeof *wc);
-	wc->wr_id = wr_id;
+	wc->wr_cqe = wr_cqe;
 	wc->status = IB_WC_SUCCESS;
 	wc->opcode = IB_WC_RECV;
 	wc->pkey_index = pkey_index;
@@ -844,7 +835,7 @@  static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
 	}
 
 	build_smp_wc(mad_agent_priv->agent.qp,
-		     send_wr->wr.wr_id, drslid,
+		     send_wr->wr.wr_cqe, drslid,
 		     send_wr->pkey_index,
 		     send_wr->port_num, &mad_wc);
 
@@ -1051,7 +1042,9 @@  struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
 
 	mad_send_wr->sg_list[1].lkey = mad_agent->qp->pd->local_dma_lkey;
 
-	mad_send_wr->send_wr.wr.wr_id = (unsigned long) mad_send_wr;
+	mad_send_wr->mad_list.cqe.done = ib_mad_send_done;
+
+	mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe;
 	mad_send_wr->send_wr.wr.sg_list = mad_send_wr->sg_list;
 	mad_send_wr->send_wr.wr.num_sge = 2;
 	mad_send_wr->send_wr.wr.opcode = IB_WR_SEND;
@@ -1163,8 +1156,9 @@  int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
 
 	/* Set WR ID to find mad_send_wr upon completion */
 	qp_info = mad_send_wr->mad_agent_priv->qp_info;
-	mad_send_wr->send_wr.wr.wr_id = (unsigned long)&mad_send_wr->mad_list;
 	mad_send_wr->mad_list.mad_queue = &qp_info->send_queue;
+	mad_send_wr->mad_list.cqe.done = ib_mad_send_done;
+	mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe;
 
 	mad_agent = mad_send_wr->send_buf.mad_agent;
 	sge = mad_send_wr->sg_list;
@@ -2185,13 +2179,14 @@  handle_smi(struct ib_mad_port_private *port_priv,
 	return handle_ib_smi(port_priv, qp_info, wc, port_num, recv, response);
 }
 
-static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
-				     struct ib_wc *wc)
+static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc)
 {
+	struct ib_mad_port_private *port_priv = cq->cq_context;
+	struct ib_mad_list_head *mad_list =
+		container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
 	struct ib_mad_qp_info *qp_info;
 	struct ib_mad_private_header *mad_priv_hdr;
 	struct ib_mad_private *recv, *response = NULL;
-	struct ib_mad_list_head *mad_list;
 	struct ib_mad_agent_private *mad_agent;
 	int port_num;
 	int ret = IB_MAD_RESULT_SUCCESS;
@@ -2199,7 +2194,17 @@  static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
 	u16 resp_mad_pkey_index = 0;
 	bool opa;
 
-	mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
+	if (list_empty_careful(&port_priv->port_list))
+		return;
+
+	if (wc->status != IB_WC_SUCCESS) {
+		/*
+		 * Receive errors indicate that the QP has entered the error
+		 * state - error handling/shutdown code will cleanup
+		 */
+		return;
+	}
+
 	qp_info = mad_list->mad_queue->qp_info;
 	dequeue_mad(mad_list);
 
@@ -2240,7 +2245,7 @@  static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
 	response = alloc_mad_private(mad_size, GFP_KERNEL);
 	if (!response) {
 		dev_err(&port_priv->device->dev,
-			"ib_mad_recv_done_handler no memory for response buffer\n");
+			"%s: no memory for response buffer\n", __func__);
 		goto out;
 	}
 
@@ -2426,11 +2431,12 @@  done:
 	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
 }
 
-static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
-				     struct ib_wc *wc)
+static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc)
 {
+	struct ib_mad_port_private *port_priv = cq->cq_context;
+	struct ib_mad_list_head *mad_list =
+		container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
 	struct ib_mad_send_wr_private	*mad_send_wr, *queued_send_wr;
-	struct ib_mad_list_head		*mad_list;
 	struct ib_mad_qp_info		*qp_info;
 	struct ib_mad_queue		*send_queue;
 	struct ib_send_wr		*bad_send_wr;
@@ -2438,7 +2444,14 @@  static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
 	unsigned long flags;
 	int ret;
 
-	mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
+	if (list_empty_careful(&port_priv->port_list))
+		return;
+
+	if (wc->status != IB_WC_SUCCESS) {
+		if (!ib_mad_send_error(port_priv, wc))
+			return;
+	}
+
 	mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private,
 				   mad_list);
 	send_queue = mad_list->mad_queue;
@@ -2503,24 +2516,15 @@  static void mark_sends_for_retry(struct ib_mad_qp_info *qp_info)
 	spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
 }
 
-static void mad_error_handler(struct ib_mad_port_private *port_priv,
-			      struct ib_wc *wc)
+static bool ib_mad_send_error(struct ib_mad_port_private *port_priv,
+		struct ib_wc *wc)
 {
-	struct ib_mad_list_head *mad_list;
-	struct ib_mad_qp_info *qp_info;
+	struct ib_mad_list_head *mad_list =
+		container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
+	struct ib_mad_qp_info *qp_info = mad_list->mad_queue->qp_info;
 	struct ib_mad_send_wr_private *mad_send_wr;
 	int ret;
 
-	/* Determine if failure was a send or receive */
-	mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
-	qp_info = mad_list->mad_queue->qp_info;
-	if (mad_list->mad_queue == &qp_info->recv_queue)
-		/*
-		 * Receive errors indicate that the QP has entered the error
-		 * state - error handling/shutdown code will cleanup
-		 */
-		return;
-
 	/*
 	 * Send errors will transition the QP to SQE - move
 	 * QP to RTS and repost flushed work requests
@@ -2535,10 +2539,9 @@  static void mad_error_handler(struct ib_mad_port_private *port_priv,
 			mad_send_wr->retry = 0;
 			ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr,
 					&bad_send_wr);
-			if (ret)
-				ib_mad_send_done_handler(port_priv, wc);
-		} else
-			ib_mad_send_done_handler(port_priv, wc);
+			if (!ret)
+				return false;
+		}
 	} else {
 		struct ib_qp_attr *attr;
 
@@ -2552,48 +2555,14 @@  static void mad_error_handler(struct ib_mad_port_private *port_priv,
 			kfree(attr);
 			if (ret)
 				dev_err(&port_priv->device->dev,
-					"mad_error_handler - ib_modify_qp to RTS : %d\n",
-					ret);
+					"%s - ib_modify_qp to RTS: %d\n",
+					__func__, ret);
 			else
 				mark_sends_for_retry(qp_info);
 		}
-		ib_mad_send_done_handler(port_priv, wc);
 	}
-}
 
-/*
- * IB MAD completion callback
- */
-static void ib_mad_completion_handler(struct work_struct *work)
-{
-	struct ib_mad_port_private *port_priv;
-	struct ib_wc wc;
-	int count = 0;
-
-	port_priv = container_of(work, struct ib_mad_port_private, work);
-	ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
-
-	while (ib_poll_cq(port_priv->cq, 1, &wc) == 1) {
-		if (wc.status == IB_WC_SUCCESS) {
-			switch (wc.opcode) {
-			case IB_WC_SEND:
-				ib_mad_send_done_handler(port_priv, &wc);
-				break;
-			case IB_WC_RECV:
-				ib_mad_recv_done_handler(port_priv, &wc);
-				break;
-			default:
-				BUG_ON(1);
-				break;
-			}
-		} else
-			mad_error_handler(port_priv, &wc);
-
-		if (++count > MAD_COMPLETION_PROC_LIMIT) {
-			queue_work(port_priv->wq, &port_priv->work);
-			break;
-		}
-	}
+	return true;
 }
 
 static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
@@ -2735,7 +2704,7 @@  static void local_completions(struct work_struct *work)
 			 * before request
 			 */
 			build_smp_wc(recv_mad_agent->agent.qp,
-				     (unsigned long) local->mad_send_wr,
+				     local->mad_send_wr->send_wr.wr.wr_cqe,
 				     be16_to_cpu(IB_LID_PERMISSIVE),
 				     local->mad_send_wr->send_wr.pkey_index,
 				     recv_mad_agent->agent.port_num, &wc);
@@ -2875,17 +2844,6 @@  static void timeout_sends(struct work_struct *work)
 	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
 }
 
-static void ib_mad_thread_completion_handler(struct ib_cq *cq, void *arg)
-{
-	struct ib_mad_port_private *port_priv = cq->cq_context;
-	unsigned long flags;
-
-	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
-	if (!list_empty(&port_priv->port_list))
-		queue_work(port_priv->wq, &port_priv->work);
-	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
-}
-
 /*
  * Allocate receive MADs and post receive WRs for them
  */
@@ -2933,8 +2891,9 @@  static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
 			break;
 		}
 		mad_priv->header.mapping = sg_list.addr;
-		recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list;
 		mad_priv->header.mad_list.mad_queue = recv_queue;
+		mad_priv->header.mad_list.cqe.done = ib_mad_recv_done;
+		recv_wr.wr_cqe = &mad_priv->header.mad_list.cqe;
 
 		/* Post receive WR */
 		spin_lock_irqsave(&recv_queue->lock, flags);
@@ -3171,7 +3130,6 @@  static int ib_mad_port_open(struct ib_device *device,
 	unsigned long flags;
 	char name[sizeof "ib_mad123"];
 	int has_smi;
-	struct ib_cq_init_attr cq_attr = {};
 
 	if (WARN_ON(rdma_max_mad_size(device, port_num) < IB_MGMT_MAD_SIZE))
 		return -EFAULT;
@@ -3199,10 +3157,8 @@  static int ib_mad_port_open(struct ib_device *device,
 	if (has_smi)
 		cq_size *= 2;
 
-	cq_attr.cqe = cq_size;
-	port_priv->cq = ib_create_cq(port_priv->device,
-				     ib_mad_thread_completion_handler,
-				     NULL, port_priv, &cq_attr);
+	port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0,
+			IB_POLL_WORKQUEUE);
 	if (IS_ERR(port_priv->cq)) {
 		dev_err(&device->dev, "Couldn't create ib_mad CQ\n");
 		ret = PTR_ERR(port_priv->cq);
@@ -3231,7 +3187,6 @@  static int ib_mad_port_open(struct ib_device *device,
 		ret = -ENOMEM;
 		goto error8;
 	}
-	INIT_WORK(&port_priv->work, ib_mad_completion_handler);
 
 	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
 	list_add_tail(&port_priv->port_list, &ib_mad_port_list);
@@ -3258,7 +3213,7 @@  error7:
 error6:
 	ib_dealloc_pd(port_priv->pd);
 error4:
-	ib_destroy_cq(port_priv->cq);
+	ib_free_cq(port_priv->cq);
 	cleanup_recv_queue(&port_priv->qp_info[1]);
 	cleanup_recv_queue(&port_priv->qp_info[0]);
 error3:
@@ -3291,7 +3246,7 @@  static int ib_mad_port_close(struct ib_device *device, int port_num)
 	destroy_mad_qp(&port_priv->qp_info[1]);
 	destroy_mad_qp(&port_priv->qp_info[0]);
 	ib_dealloc_pd(port_priv->pd);
-	ib_destroy_cq(port_priv->cq);
+	ib_free_cq(port_priv->cq);
 	cleanup_recv_queue(&port_priv->qp_info[1]);
 	cleanup_recv_queue(&port_priv->qp_info[0]);
 	/* XXX: Handle deallocation of MAD registration tables */
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 990698a..28669f6 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -64,6 +64,7 @@ 
 
 struct ib_mad_list_head {
 	struct list_head list;
+	struct ib_cqe cqe;
 	struct ib_mad_queue *mad_queue;
 };
 
@@ -204,7 +205,6 @@  struct ib_mad_port_private {
 	struct ib_mad_mgmt_version_table version[MAX_MGMT_VERSION];
 	struct list_head agent_list;
 	struct workqueue_struct *wq;
-	struct work_struct work;
 	struct ib_mad_qp_info qp_info[IB_MAD_QPS_CORE];
 };