diff mbox series

[for-next,2/3] IB/hfi1: Prepare resource waits for dual leg

Message ID 20180926172649.13912.7078.stgit@scvm10.sc.intel.com (mailing list archive)
State Changes Requested
Delegated to: Jason Gunthorpe
Headers show
Series IB/hfi1: TID RDMA pre-reqs | expand

Commit Message

Dennis Dalessandro Sept. 26, 2018, 5:26 p.m. UTC
From: Kaike Wan <kaike.wan@intel.com>

Current implementation allows each qp to have only one send engine.
As such, each qp has only one list to queue prebuilt packets when send
engine resources are not available. To improve performance, it is
desired to support multiple send engines for each qp.

This patch creates the framework to support two send engines
(two legs) for each qp for the TID RDMA protocol, which can be easily
extended to support more send engines. It achieves the goal by creating
a leg specific struct, iowait_work in the iowait struct, to hold the
work_struct and the tx_list as well as a pointer to the parent iowait
struct.

The hfi1_pkt_state now has an additional field to record the current
legs work structure and that is now passed to all egress waiters to
determine the leg that needs to wait via a new iowait helper.  The
APIs are adjusted to use the new leg specific struct as required.

Many new and modified helpers are added to support this change.

Reviewed-by: Mitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
---
 drivers/infiniband/hw/hfi1/Makefile      |    1 
 drivers/infiniband/hw/hfi1/iowait.c      |  133 +++++++++++++++++
 drivers/infiniband/hw/hfi1/iowait.h      |  233 ++++++++++++++++++++----------
 drivers/infiniband/hw/hfi1/qp.c          |   67 ++++++---
 drivers/infiniband/hw/hfi1/qp.h          |   31 ++--
 drivers/infiniband/hw/hfi1/ruc.c         |   10 +
 drivers/infiniband/hw/hfi1/sdma.c        |   52 +++----
 drivers/infiniband/hw/hfi1/sdma.h        |    8 +
 drivers/infiniband/hw/hfi1/user_sdma.c   |   14 +-
 drivers/infiniband/hw/hfi1/verbs.c       |   11 +
 drivers/infiniband/hw/hfi1/verbs.h       |    4 -
 drivers/infiniband/hw/hfi1/verbs_txreq.h |   11 +
 drivers/infiniband/hw/hfi1/vnic_sdma.c   |   21 ++-
 drivers/infiniband/hw/qib/qib_verbs.c    |    9 +
 drivers/infiniband/hw/qib/qib_verbs.h    |    6 -
 include/rdma/rdma_vt.h                   |    4 -
 16 files changed, 427 insertions(+), 188 deletions(-)
 create mode 100644 drivers/infiniband/hw/hfi1/iowait.c

Comments

Jason Gunthorpe Sept. 27, 2018, 6:38 p.m. UTC | #1
On Wed, Sep 26, 2018 at 10:26:54AM -0700, Dennis Dalessandro wrote:
>  /**
>   * iowait_sdma_drain() - wait for DMAs to drain
> - *
>   * @wait: iowait structure
>   *
>   * This will delay until the iowait sdmas have
> @@ -215,7 +219,7 @@ static inline void iowait_sdma_inc(struct iowait *wait)
>  
>  /**
>   * iowait_sdma_add - add count to pending
> - * @wait: iowait structure
> + * @wait: iowait_work structure
>   */
>  static inline void iowait_sdma_add(struct iowait *wait, int count)
>  {
> @@ -223,15 +227,6 @@ static inline void iowait_sdma_add(struct iowait *wait, int count)
>  }
>  
>  /**
> - * iowait_sdma_dec - note sdma complete
> - * @wait: iowait structure
> - */
> -static inline int iowait_sdma_dec(struct iowait *wait)
> -{
> -	return atomic_dec_and_test(&wait->sdma_busy);
> -}
> -
> -/**
>   * iowait_pio_drain() - wait for pios to drain
>   *
>   * @wait: iowait structure
> @@ -252,9 +247,23 @@ static inline void iowait_pio_drain(struct iowait *wait)
>   * @wait: iowait structure
>   *
>   */
> -static inline int iowait_pio_pending(struct iowait *wait)
> +static inline int iowait_pio_pending(struct iowait *w)
> +{
> +	return atomic_read(&w->pio_busy);
> +}
> +
> +/**
> + * iowait_drain_wakeup() - trigger iowait_drain() waiter
> + * @wait: iowait structure
> + *
> + * This will trigger any waiters.
> + */
> +static inline void iowait_drain_wakeup(struct iowait *w)
>  {
> -	return atomic_read(&wait->pio_busy);
> +	wake_up(&w->wait_dma);
> +	wake_up(&w->wait_pio);
> +	if (w->sdma_drained)
> +		w->sdma_drained(w);
>  }

Why is there so much churn and code motion here that doesn't seem to
really change anything? This is bad practice, diffs should be reviewed
to remove unncessary hunks.

Like iowait_drain_wakeup was moved up a bit, iowait_sdma_dec was moved
down a bit, etc.

Jason
Dennis Dalessandro Sept. 27, 2018, 6:56 p.m. UTC | #2
On 9/27/2018 2:38 PM, Jason Gunthorpe wrote:
> On Wed, Sep 26, 2018 at 10:26:54AM -0700, Dennis Dalessandro wrote:
>>   /**
>>    * iowait_sdma_drain() - wait for DMAs to drain
>> - *
>>    * @wait: iowait structure
>>    *
>>    * This will delay until the iowait sdmas have
>> @@ -215,7 +219,7 @@ static inline void iowait_sdma_inc(struct iowait *wait)
>>   
>>   /**
>>    * iowait_sdma_add - add count to pending
>> - * @wait: iowait structure
>> + * @wait: iowait_work structure
>>    */
>>   static inline void iowait_sdma_add(struct iowait *wait, int count)
>>   {
>> @@ -223,15 +227,6 @@ static inline void iowait_sdma_add(struct iowait *wait, int count)
>>   }
>>   
>>   /**
>> - * iowait_sdma_dec - note sdma complete
>> - * @wait: iowait structure
>> - */
>> -static inline int iowait_sdma_dec(struct iowait *wait)
>> -{
>> -	return atomic_dec_and_test(&wait->sdma_busy);
>> -}
>> -
>> -/**
>>    * iowait_pio_drain() - wait for pios to drain
>>    *
>>    * @wait: iowait structure
>> @@ -252,9 +247,23 @@ static inline void iowait_pio_drain(struct iowait *wait)
>>    * @wait: iowait structure
>>    *
>>    */
>> -static inline int iowait_pio_pending(struct iowait *wait)
>> +static inline int iowait_pio_pending(struct iowait *w)
>> +{
>> +	return atomic_read(&w->pio_busy);
>> +}
>> +
>> +/**
>> + * iowait_drain_wakeup() - trigger iowait_drain() waiter
>> + * @wait: iowait structure
>> + *
>> + * This will trigger any waiters.
>> + */
>> +static inline void iowait_drain_wakeup(struct iowait *w)
>>   {
>> -	return atomic_read(&wait->pio_busy);
>> +	wake_up(&w->wait_dma);
>> +	wake_up(&w->wait_pio);
>> +	if (w->sdma_drained)
>> +		w->sdma_drained(w);
>>   }
> 
> Why is there so much churn and code motion here that doesn't seem to
> really change anything? This is bad practice, diffs should be reviewed
> to remove unncessary hunks.
> 
> Like iowait_drain_wakeup was moved up a bit, iowait_sdma_dec was moved
> down a bit, etc.

Just an oversight I'd say. Kaike is trying to merge and split and 
shuffle patches for the TID RDMA series to be more organized into 
logical pieces per your initial feedback.

-Denny
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile
index a8dcf82..ff79039 100644
--- a/drivers/infiniband/hw/hfi1/Makefile
+++ b/drivers/infiniband/hw/hfi1/Makefile
@@ -20,6 +20,7 @@  hfi1-y := \
 	firmware.o \
 	init.o \
 	intr.o \
+	iowait.o \
 	mad.o \
 	mmu_rb.o \
 	msix.o \
diff --git a/drivers/infiniband/hw/hfi1/iowait.c b/drivers/infiniband/hw/hfi1/iowait.c
new file mode 100644
index 0000000..012b845
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/iowait.c
@@ -0,0 +1,133 @@ 
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#include "iowait.h"
+
+void iowait_set_flag(struct iowait *wait, u32 flag)
+{
+	set_bit(flag, &wait->flags);
+}
+
+bool iowait_flag_set(struct iowait *wait, u32 flag)
+{
+	return test_bit(flag, &wait->flags);
+}
+
+inline void iowait_clear_flag(struct iowait *wait, u32 flag)
+{
+	clear_bit(flag, &wait->flags);
+}
+
+/**
+ * iowait_init() - initialize wait structure
+ * @wait: wait struct to initialize
+ * @tx_limit: limit for overflow queuing
+ * @func: restart function for workqueue
+ * @sleep: sleep function for no space
+ * @resume: wakeup function for no space
+ *
+ * This function initializes the iowait
+ * structure embedded in the QP or PQ.
+ *
+ */
+void iowait_init(struct iowait *wait, u32 tx_limit,
+		 void (*func)(struct work_struct *work),
+		 void (*tidfunc)(struct work_struct *work),
+		 int (*sleep)(struct sdma_engine *sde,
+			      struct iowait_work *wait,
+			      struct sdma_txreq *tx,
+			      uint seq,
+			      bool pkts_sent),
+		 void (*wakeup)(struct iowait *wait, int reason),
+		 void (*sdma_drained)(struct iowait *wait))
+{
+	int i;
+
+	wait->count = 0;
+	INIT_LIST_HEAD(&wait->list);
+	init_waitqueue_head(&wait->wait_dma);
+	init_waitqueue_head(&wait->wait_pio);
+	atomic_set(&wait->sdma_busy, 0);
+	atomic_set(&wait->pio_busy, 0);
+	wait->tx_limit = tx_limit;
+	wait->sleep = sleep;
+	wait->wakeup = wakeup;
+	wait->sdma_drained = sdma_drained;
+	wait->flags = 0;
+	for (i = 0; i < IOWAIT_SES; i++) {
+		wait->wait[i].iow = wait;
+		INIT_LIST_HEAD(&wait->wait[i].tx_head);
+		if (i == IOWAIT_IB_SE)
+			INIT_WORK(&wait->wait[i].iowork, func);
+		else
+			INIT_WORK(&wait->wait[i].iowork, tidfunc);
+	}
+}
+
+/**
+ * iowait_cancel_work - cancel all work in iowait
+ * @w: the iowait struct
+ */
+void iowait_cancel_work(struct iowait *w)
+{
+	cancel_work_sync(&iowait_get_ib_work(w)->iowork);
+	cancel_work_sync(&iowait_get_tid_work(w)->iowork);
+}
+
+/**
+ * iowait_set_work_flag - set work flag based on leg
+ * @w - the iowait work struct
+ */
+int iowait_set_work_flag(struct iowait_work *w)
+{
+	if (w == &w->iow->wait[IOWAIT_IB_SE]) {
+		iowait_set_flag(w->iow, IOWAIT_PENDING_IB);
+		return IOWAIT_IB_SE;
+	}
+	iowait_set_flag(w->iow, IOWAIT_PENDING_TID);
+	return IOWAIT_TID_SE;
+}
diff --git a/drivers/infiniband/hw/hfi1/iowait.h b/drivers/infiniband/hw/hfi1/iowait.h
index 3d9c32c..c20a093 100644
--- a/drivers/infiniband/hw/hfi1/iowait.h
+++ b/drivers/infiniband/hw/hfi1/iowait.h
@@ -1,7 +1,7 @@ 
 #ifndef _HFI1_IOWAIT_H
 #define _HFI1_IOWAIT_H
 /*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -49,6 +49,7 @@ 
 
 #include <linux/list.h>
 #include <linux/workqueue.h>
+#include <linux/wait.h>
 #include <linux/sched.h>
 
 #include "sdma_txreq.h"
@@ -59,16 +60,47 @@ 
  */
 typedef void (*restart_t)(struct work_struct *work);
 
+#define IOWAIT_PENDING_IB  0x0
+#define IOWAIT_PENDING_TID 0x1
+
+/*
+ * A QP can have multiple Send Engines (SEs).
+ *
+ * The current use case is for supporting a TID RDMA
+ * packet build/xmit mechanism independent from verbs.
+ */
+#define IOWAIT_SES 2
+#define IOWAIT_IB_SE 0
+#define IOWAIT_TID_SE 1
+
 struct sdma_txreq;
 struct sdma_engine;
 /**
- * struct iowait - linkage for delayed progress/waiting
+ * @iowork: the work struct
+ * @tx_head: list of prebuilt packets
+ * @iow: the parent iowait structure
+ *
+ * This structure is the work item (process) specific
+ * details associated with the each of the two SEs of the
+ * QP.
+ *
+ * The workstruct and the queued TXs are unique to each
+ * SE.
+ */
+struct iowait;
+struct iowait_work {
+	struct work_struct iowork;
+	struct list_head tx_head;
+	struct iowait *iow;
+};
+
+/**
  * @list: used to add/insert into QP/PQ wait lists
- * @lock: uses to record the list head lock
  * @tx_head: overflow list of sdma_txreq's
  * @sleep: no space callback
  * @wakeup: space callback wakeup
  * @sdma_drained: sdma count drained
+ * @lock: lock protected head of wait queue
  * @iowork: workqueue overhead
  * @wait_dma: wait for sdma_busy == 0
  * @wait_pio: wait for pio_busy == 0
@@ -76,6 +108,8 @@ 
  * @count: total number of descriptors in tx_head'ed list
  * @tx_limit: limit for overflow queuing
  * @tx_count: number of tx entry's in tx_head'ed list
+ * @flags: wait flags (one per QP)
+ * @wait: SE array
  *
  * This is to be embedded in user's state structure
  * (QP or PQ).
@@ -98,13 +132,11 @@ 
  * Waiters explicity know that, but the destroy
  * code that unwaits QPs does not.
  */
-
 struct iowait {
 	struct list_head list;
-	struct list_head tx_head;
 	int (*sleep)(
 		struct sdma_engine *sde,
-		struct iowait *wait,
+		struct iowait_work *wait,
 		struct sdma_txreq *tx,
 		uint seq,
 		bool pkts_sent
@@ -112,7 +144,6 @@  struct iowait {
 	void (*wakeup)(struct iowait *wait, int reason);
 	void (*sdma_drained)(struct iowait *wait);
 	seqlock_t *lock;
-	struct work_struct iowork;
 	wait_queue_head_t wait_dma;
 	wait_queue_head_t wait_pio;
 	atomic_t sdma_busy;
@@ -121,68 +152,41 @@  struct iowait {
 	u32 tx_limit;
 	u32 tx_count;
 	u8 starved_cnt;
+	unsigned long flags;
+	struct iowait_work wait[IOWAIT_SES];
 };
 
 #define SDMA_AVAIL_REASON 0
 
-/**
- * iowait_init() - initialize wait structure
- * @wait: wait struct to initialize
- * @tx_limit: limit for overflow queuing
- * @func: restart function for workqueue
- * @sleep: sleep function for no space
- * @resume: wakeup function for no space
- *
- * This function initializes the iowait
- * structure embedded in the QP or PQ.
- *
- */
+void iowait_set_flag(struct iowait *wait, u32 flag);
+bool iowait_flag_set(struct iowait *wait, u32 flag);
+void iowait_clear_flag(struct iowait *wait, u32 flag);
 
-static inline void iowait_init(
-	struct iowait *wait,
-	u32 tx_limit,
-	void (*func)(struct work_struct *work),
-	int (*sleep)(
-		struct sdma_engine *sde,
-		struct iowait *wait,
-		struct sdma_txreq *tx,
-		uint seq,
-		bool pkts_sent),
-	void (*wakeup)(struct iowait *wait, int reason),
-	void (*sdma_drained)(struct iowait *wait))
-{
-	wait->count = 0;
-	wait->lock = NULL;
-	INIT_LIST_HEAD(&wait->list);
-	INIT_LIST_HEAD(&wait->tx_head);
-	INIT_WORK(&wait->iowork, func);
-	init_waitqueue_head(&wait->wait_dma);
-	init_waitqueue_head(&wait->wait_pio);
-	atomic_set(&wait->sdma_busy, 0);
-	atomic_set(&wait->pio_busy, 0);
-	wait->tx_limit = tx_limit;
-	wait->sleep = sleep;
-	wait->wakeup = wakeup;
-	wait->sdma_drained = sdma_drained;
-}
+void iowait_init(struct iowait *wait, u32 tx_limit,
+		 void (*func)(struct work_struct *work),
+		 void (*tidfunc)(struct work_struct *work),
+		 int (*sleep)(struct sdma_engine *sde,
+			      struct iowait_work *wait,
+			      struct sdma_txreq *tx,
+			      uint seq,
+			      bool pkts_sent),
+		 void (*wakeup)(struct iowait *wait, int reason),
+		 void (*sdma_drained)(struct iowait *wait));
 
 /**
- * iowait_schedule() - initialize wait structure
+ * iowait_schedule() - schedule the default send engine work
  * @wait: wait struct to schedule
  * @wq: workqueue for schedule
  * @cpu: cpu
  */
-static inline void iowait_schedule(
-	struct iowait *wait,
-	struct workqueue_struct *wq,
-	int cpu)
+static inline bool iowait_schedule(struct iowait *wait,
+				   struct workqueue_struct *wq, int cpu)
 {
-	queue_work_on(cpu, wq, &wait->iowork);
+	return !!queue_work_on(cpu, wq, &wait->wait[IOWAIT_IB_SE].iowork);
 }
 
 /**
  * iowait_sdma_drain() - wait for DMAs to drain
- *
  * @wait: iowait structure
  *
  * This will delay until the iowait sdmas have
@@ -215,7 +219,7 @@  static inline void iowait_sdma_inc(struct iowait *wait)
 
 /**
  * iowait_sdma_add - add count to pending
- * @wait: iowait structure
+ * @wait: iowait_work structure
  */
 static inline void iowait_sdma_add(struct iowait *wait, int count)
 {
@@ -223,15 +227,6 @@  static inline void iowait_sdma_add(struct iowait *wait, int count)
 }
 
 /**
- * iowait_sdma_dec - note sdma complete
- * @wait: iowait structure
- */
-static inline int iowait_sdma_dec(struct iowait *wait)
-{
-	return atomic_dec_and_test(&wait->sdma_busy);
-}
-
-/**
  * iowait_pio_drain() - wait for pios to drain
  *
  * @wait: iowait structure
@@ -252,9 +247,23 @@  static inline void iowait_pio_drain(struct iowait *wait)
  * @wait: iowait structure
  *
  */
-static inline int iowait_pio_pending(struct iowait *wait)
+static inline int iowait_pio_pending(struct iowait *w)
+{
+	return atomic_read(&w->pio_busy);
+}
+
+/**
+ * iowait_drain_wakeup() - trigger iowait_drain() waiter
+ * @wait: iowait structure
+ *
+ * This will trigger any waiters.
+ */
+static inline void iowait_drain_wakeup(struct iowait *w)
 {
-	return atomic_read(&wait->pio_busy);
+	wake_up(&w->wait_dma);
+	wake_up(&w->wait_pio);
+	if (w->sdma_drained)
+		w->sdma_drained(w);
 }
 
 /**
@@ -267,35 +276,32 @@  static inline void iowait_pio_inc(struct iowait *wait)
 }
 
 /**
- * iowait_sdma_dec - note pio complete
+ * iowait_pio_dec - note pio complete
  * @wait: iowait structure
  */
 static inline int iowait_pio_dec(struct iowait *wait)
 {
+	if (!wait)
+		return 0;
 	return atomic_dec_and_test(&wait->pio_busy);
 }
 
 /**
- * iowait_drain_wakeup() - trigger iowait_drain() waiter
- *
+ * iowait_sdma_dec - note pio complete
  * @wait: iowait structure
- *
- * This will trigger any waiters.
  */
-static inline void iowait_drain_wakeup(struct iowait *wait)
+static inline int iowait_sdma_dec(struct iowait *wait)
 {
-	wake_up(&wait->wait_dma);
-	wake_up(&wait->wait_pio);
-	if (wait->sdma_drained)
-		wait->sdma_drained(wait);
+	if (!wait)
+		return 0;
+	return atomic_dec_and_test(&wait->sdma_busy);
 }
 
 /**
  * iowait_get_txhead() - get packet off of iowait list
- *
  * @wait wait struture
  */
-static inline struct sdma_txreq *iowait_get_txhead(struct iowait *wait)
+static inline struct sdma_txreq *iowait_get_txhead(struct iowait_work *wait)
 {
 	struct sdma_txreq *tx = NULL;
 
@@ -309,6 +315,28 @@  static inline void iowait_drain_wakeup(struct iowait *wait)
 	return tx;
 }
 
+static inline u16 iowait_get_desc(struct iowait_work *w)
+{
+	u16 num_desc = 0;
+	struct sdma_txreq *tx = NULL;
+
+	if (!list_empty(&w->tx_head)) {
+		tx = list_first_entry(&w->tx_head, struct sdma_txreq,
+				      list);
+		num_desc = tx->num_desc;
+	}
+	return num_desc;
+}
+
+static inline u32 iowait_get_all_desc(struct iowait *w)
+{
+	u32 num_desc = 0;
+
+	num_desc = iowait_get_desc(&w->wait[IOWAIT_IB_SE]);
+	num_desc += iowait_get_desc(&w->wait[IOWAIT_TID_SE]);
+	return num_desc;
+}
+
 /**
  * iowait_queue - Put the iowait on a wait queue
  * @pkts_sent: have some packets been sent before queuing?
@@ -372,12 +400,57 @@  static inline void iowait_starve_find_max(struct iowait *w, u8 *max,
 }
 
 /**
- * iowait_packet_queued() - determine if a packet is already built
+ * iowait_packet_queued() - determine if a packet it queued
  * @wait: the wait structure
  */
-static inline bool iowait_packet_queued(struct iowait *wait)
+static inline bool iowait_packet_queued(struct iowait_work *w)
+{
+	return !list_empty(&w->tx_head);
+}
+
+/**
+ * inc_wait_count - increment wait counts
+ * @w: the log work struct
+ * @n: the count
+ */
+static inline void iowait_inc_wait_count(struct iowait_work *w, u16 n)
+{
+	if (!w)
+		return;
+	w->iow->tx_count++;
+	w->iow->count += n;
+}
+
+/**
+ * iowait_get_tid_work - return iowait_work for tid SE
+ * @w: the iowait struct
+ */
+static inline struct iowait_work *iowait_get_tid_work(struct iowait *w)
+{
+	return &w->wait[IOWAIT_TID_SE];
+}
+
+/**
+ * iowait_get_ib_work - return iowait_work for ib SE
+ * @w: the iowait struct
+ */
+static inline struct iowait_work *iowait_get_ib_work(struct iowait *w)
 {
-	return !list_empty(&wait->tx_head);
+	return &w->wait[IOWAIT_IB_SE];
 }
 
+/**
+ * iowait_ioww_to_iow - return iowait given iowait_work
+ * @w: the iowait_work struct
+ */
+static inline struct iowait *iowait_ioww_to_iow(struct iowait_work *w)
+{
+	if (likely(w))
+		return w->iow;
+	return NULL;
+}
+
+void iowait_cancel_work(struct iowait *w);
+int iowait_set_work_flag(struct iowait_work *w);
+
 #endif
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index edaca2e..c9fe0d9 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -66,7 +66,7 @@ 
 static void flush_tx_list(struct rvt_qp *qp);
 static int iowait_sleep(
 	struct sdma_engine *sde,
-	struct iowait *wait,
+	struct iowait_work *wait,
 	struct sdma_txreq *stx,
 	unsigned int seq,
 	bool pkts_sent);
@@ -134,15 +134,13 @@  static int iowait_sleep(
 
 };
 
-static void flush_tx_list(struct rvt_qp *qp)
+static void flush_list_head(struct list_head *l)
 {
-	struct hfi1_qp_priv *priv = qp->priv;
-
-	while (!list_empty(&priv->s_iowait.tx_head)) {
+	while (!list_empty(l)) {
 		struct sdma_txreq *tx;
 
 		tx = list_first_entry(
-			&priv->s_iowait.tx_head,
+			l,
 			struct sdma_txreq,
 			list);
 		list_del_init(&tx->list);
@@ -151,6 +149,14 @@  static void flush_tx_list(struct rvt_qp *qp)
 	}
 }
 
+static void flush_tx_list(struct rvt_qp *qp)
+{
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	flush_list_head(&iowait_get_ib_work(&priv->s_iowait)->tx_head);
+	flush_list_head(&iowait_get_tid_work(&priv->s_iowait)->tx_head);
+}
+
 static void flush_iowait(struct rvt_qp *qp)
 {
 	struct hfi1_qp_priv *priv = qp->priv;
@@ -336,7 +342,7 @@  int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send)
  * It is only used in the post send, which doesn't hold
  * the s_lock.
  */
-void _hfi1_schedule_send(struct rvt_qp *qp)
+bool _hfi1_schedule_send(struct rvt_qp *qp)
 {
 	struct hfi1_qp_priv *priv = qp->priv;
 	struct hfi1_ibport *ibp =
@@ -344,10 +350,10 @@  void _hfi1_schedule_send(struct rvt_qp *qp)
 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 	struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
 
-	iowait_schedule(&priv->s_iowait, ppd->hfi1_wq,
-			priv->s_sde ?
-			priv->s_sde->cpu :
-			cpumask_first(cpumask_of_node(dd->node)));
+	return iowait_schedule(&priv->s_iowait, ppd->hfi1_wq,
+			       priv->s_sde ?
+			       priv->s_sde->cpu :
+			       cpumask_first(cpumask_of_node(dd->node)));
 }
 
 static void qp_pio_drain(struct rvt_qp *qp)
@@ -375,12 +381,32 @@  static void qp_pio_drain(struct rvt_qp *qp)
  *
  * This schedules qp progress and caller should hold
  * the s_lock.
+ * @return true if the first leg is scheduled;
+ * false if the first leg is not scheduled.
  */
-void hfi1_schedule_send(struct rvt_qp *qp)
+bool hfi1_schedule_send(struct rvt_qp *qp)
 {
 	lockdep_assert_held(&qp->s_lock);
-	if (hfi1_send_ok(qp))
+	if (hfi1_send_ok(qp)) {
 		_hfi1_schedule_send(qp);
+		return true;
+	}
+	if (qp->s_flags & HFI1_S_ANY_WAIT_IO)
+		iowait_set_flag(&((struct hfi1_qp_priv *)qp->priv)->s_iowait,
+				IOWAIT_PENDING_IB);
+	return false;
+}
+
+static void hfi1_qp_schedule(struct rvt_qp *qp)
+{
+	struct hfi1_qp_priv *priv = qp->priv;
+	bool ret;
+
+	if (iowait_flag_set(&priv->s_iowait, IOWAIT_PENDING_IB)) {
+		ret = hfi1_schedule_send(qp);
+		if (ret)
+			iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
+	}
 }
 
 void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
@@ -391,16 +417,22 @@  void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
 	if (qp->s_flags & flag) {
 		qp->s_flags &= ~flag;
 		trace_hfi1_qpwakeup(qp, flag);
-		hfi1_schedule_send(qp);
+		hfi1_qp_schedule(qp);
 	}
 	spin_unlock_irqrestore(&qp->s_lock, flags);
 	/* Notify hfi1_destroy_qp() if it is waiting. */
 	rvt_put_qp(qp);
 }
 
+void hfi1_qp_unbusy(struct rvt_qp *qp, struct iowait_work *wait)
+{
+	if (iowait_set_work_flag(wait) == IOWAIT_IB_SE)
+		qp->s_flags &= ~RVT_S_BUSY;
+}
+
 static int iowait_sleep(
 	struct sdma_engine *sde,
-	struct iowait *wait,
+	struct iowait_work *wait,
 	struct sdma_txreq *stx,
 	uint seq,
 	bool pkts_sent)
@@ -441,7 +473,7 @@  static int iowait_sleep(
 			rvt_get_qp(qp);
 		}
 		write_sequnlock(&dev->iowait_lock);
-		qp->s_flags &= ~RVT_S_BUSY;
+		hfi1_qp_unbusy(qp, wait);
 		spin_unlock_irqrestore(&qp->s_lock, flags);
 		ret = -EBUSY;
 	} else {
@@ -668,6 +700,7 @@  void qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter)
 		&priv->s_iowait,
 		1,
 		_hfi1_do_send,
+		NULL,
 		iowait_sleep,
 		iowait_wakeup,
 		iowait_sdma_drained);
@@ -717,7 +750,7 @@  void stop_send_queue(struct rvt_qp *qp)
 {
 	struct hfi1_qp_priv *priv = qp->priv;
 
-	cancel_work_sync(&priv->s_iowait.iowork);
+	iowait_cancel_work(&priv->s_iowait);
 }
 
 void quiesce_qp(struct rvt_qp *qp)
diff --git a/drivers/infiniband/hw/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h
index 078cff7..7adb6df 100644
--- a/drivers/infiniband/hw/hfi1/qp.h
+++ b/drivers/infiniband/hw/hfi1/qp.h
@@ -58,18 +58,6 @@ 
 extern const struct rvt_operation_params hfi1_post_parms[];
 
 /*
- * Send if not busy or waiting for I/O and either
- * a RC response is pending or we can process send work requests.
- */
-static inline int hfi1_send_ok(struct rvt_qp *qp)
-{
-	return !(qp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT_IO)) &&
-		(verbs_txreq_queued(qp) ||
-		(qp->s_flags & RVT_S_RESP_PENDING) ||
-		 !(qp->s_flags & RVT_S_ANY_WAIT_SEND));
-}
-
-/*
  * Driver specific s_flags starting at bit 31 down to HFI1_S_MIN_BIT_MASK
  *
  * HFI1_S_AHG_VALID - ahg header valid on chip
@@ -90,6 +78,20 @@  static inline int hfi1_send_ok(struct rvt_qp *qp)
 #define HFI1_S_ANY_WAIT (HFI1_S_ANY_WAIT_IO | RVT_S_ANY_WAIT_SEND)
 
 /*
+ * Send if not busy or waiting for I/O and either
+ * a RC response is pending or we can process send work requests.
+ */
+static inline int hfi1_send_ok(struct rvt_qp *qp)
+{
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	return !(qp->s_flags & (RVT_S_BUSY | HFI1_S_ANY_WAIT_IO)) &&
+		(verbs_txreq_queued(iowait_get_ib_work(&priv->s_iowait)) ||
+		(qp->s_flags & RVT_S_RESP_PENDING) ||
+		 !(qp->s_flags & RVT_S_ANY_WAIT_SEND));
+}
+
+/*
  * free_ahg - clear ahg from QP
  */
 static inline void clear_ahg(struct rvt_qp *qp)
@@ -129,8 +131,8 @@  struct ib_qp *hfi1_create_qp(struct ib_pd *ibpd,
 
 void qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter);
 
-void _hfi1_schedule_send(struct rvt_qp *qp);
-void hfi1_schedule_send(struct rvt_qp *qp);
+bool _hfi1_schedule_send(struct rvt_qp *qp);
+bool hfi1_schedule_send(struct rvt_qp *qp);
 
 void hfi1_migrate_qp(struct rvt_qp *qp);
 
@@ -150,4 +152,5 @@  int get_pmtu_from_attr(struct rvt_dev_info *rdi, struct rvt_qp *qp,
 u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu);
 int mtu_to_path_mtu(u32 mtu);
 void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl);
+void hfi1_qp_unbusy(struct rvt_qp *qp, struct iowait_work *wait);
 #endif /* _QP_H */
diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index 5f56f3c..17b49b4 100644
--- a/drivers/infiniband/hw/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
@@ -825,8 +825,8 @@  void hfi1_do_send_from_rvt(struct rvt_qp *qp)
 
 void _hfi1_do_send(struct work_struct *work)
 {
-	struct iowait *wait = container_of(work, struct iowait, iowork);
-	struct rvt_qp *qp = iowait_to_qp(wait);
+	struct iowait_work *w = container_of(work, struct iowait_work, iowork);
+	struct rvt_qp *qp = iowait_to_qp(w->iow);
 
 	hfi1_do_send(qp, true);
 }
@@ -850,6 +850,7 @@  void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
 	ps.ibp = to_iport(qp->ibqp.device, qp->port_num);
 	ps.ppd = ppd_from_ibp(ps.ibp);
 	ps.in_thread = in_thread;
+	ps.wait = iowait_get_ib_work(&priv->s_iowait);
 
 	trace_hfi1_rc_do_send(qp, in_thread);
 
@@ -883,6 +884,8 @@  void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
 
 	/* Return if we are already busy processing a work request. */
 	if (!hfi1_send_ok(qp)) {
+		if (qp->s_flags & HFI1_S_ANY_WAIT_IO)
+			iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
 		spin_unlock_irqrestore(&qp->s_lock, ps.flags);
 		return;
 	}
@@ -896,7 +899,7 @@  void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
 	ps.pkts_sent = false;
 
 	/* insure a pre-built packet is handled  */
-	ps.s_txreq = get_waiting_verbs_txreq(qp);
+	ps.s_txreq = get_waiting_verbs_txreq(ps.wait);
 	do {
 		/* Check for a constructed packet to be sent. */
 		if (ps.s_txreq) {
@@ -907,6 +910,7 @@  void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
 			 */
 			if (hfi1_verbs_send(qp, &ps))
 				return;
+
 			/* allow other tasks to run */
 			if (schedule_send_yield(qp, &ps))
 				return;
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index 7a9b67e..891d238 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -378,7 +378,7 @@  static inline void complete_tx(struct sdma_engine *sde,
 	__sdma_txclean(sde->dd, tx);
 	if (complete)
 		(*complete)(tx, res);
-	if (wait && iowait_sdma_dec(wait))
+	if (iowait_sdma_dec(wait))
 		iowait_drain_wakeup(wait);
 }
 
@@ -1758,7 +1758,6 @@  static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
 	struct iowait *wait, *nw;
 	struct iowait *waits[SDMA_WAIT_BATCH_SIZE];
 	uint i, n = 0, seq, max_idx = 0;
-	struct sdma_txreq *stx;
 	struct hfi1_ibdev *dev = &sde->dd->verbs_dev;
 	u8 max_starved_cnt = 0;
 
@@ -1779,19 +1778,13 @@  static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
 					nw,
 					&sde->dmawait,
 					list) {
-				u16 num_desc = 0;
+				u32 num_desc;
 
 				if (!wait->wakeup)
 					continue;
 				if (n == ARRAY_SIZE(waits))
 					break;
-				if (!list_empty(&wait->tx_head)) {
-					stx = list_first_entry(
-						&wait->tx_head,
-						struct sdma_txreq,
-						list);
-					num_desc = stx->num_desc;
-				}
+				num_desc = iowait_get_all_desc(wait);
 				if (num_desc > avail)
 					break;
 				avail -= num_desc;
@@ -2346,7 +2339,7 @@  static inline u16 submit_tx(struct sdma_engine *sde, struct sdma_txreq *tx)
  */
 static int sdma_check_progress(
 	struct sdma_engine *sde,
-	struct iowait *wait,
+	struct iowait_work *wait,
 	struct sdma_txreq *tx,
 	bool pkts_sent)
 {
@@ -2356,12 +2349,12 @@  static int sdma_check_progress(
 	if (tx->num_desc <= sde->desc_avail)
 		return -EAGAIN;
 	/* pulse the head_lock */
-	if (wait && wait->sleep) {
+	if (wait && iowait_ioww_to_iow(wait)->sleep) {
 		unsigned seq;
 
 		seq = raw_seqcount_begin(
 			(const seqcount_t *)&sde->head_lock.seqcount);
-		ret = wait->sleep(sde, wait, tx, seq, pkts_sent);
+		ret = wait->iow->sleep(sde, wait, tx, seq, pkts_sent);
 		if (ret == -EAGAIN)
 			sde->desc_avail = sdma_descq_freecnt(sde);
 	} else {
@@ -2373,7 +2366,7 @@  static int sdma_check_progress(
 /**
  * sdma_send_txreq() - submit a tx req to ring
  * @sde: sdma engine to use
- * @wait: wait structure to use when full (may be NULL)
+ * @wait: SE wait structure to use when full (may be NULL)
  * @tx: sdma_txreq to submit
  * @pkts_sent: has any packet been sent yet?
  *
@@ -2386,7 +2379,7 @@  static int sdma_check_progress(
  * -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state
  */
 int sdma_send_txreq(struct sdma_engine *sde,
-		    struct iowait *wait,
+		    struct iowait_work *wait,
 		    struct sdma_txreq *tx,
 		    bool pkts_sent)
 {
@@ -2397,7 +2390,7 @@  int sdma_send_txreq(struct sdma_engine *sde,
 	/* user should have supplied entire packet */
 	if (unlikely(tx->tlen))
 		return -EINVAL;
-	tx->wait = wait;
+	tx->wait = iowait_ioww_to_iow(wait);
 	spin_lock_irqsave(&sde->tail_lock, flags);
 retry:
 	if (unlikely(!__sdma_running(sde)))
@@ -2406,14 +2399,14 @@  int sdma_send_txreq(struct sdma_engine *sde,
 		goto nodesc;
 	tail = submit_tx(sde, tx);
 	if (wait)
-		iowait_sdma_inc(wait);
+		iowait_sdma_inc(iowait_ioww_to_iow(wait));
 	sdma_update_tail(sde, tail);
 unlock:
 	spin_unlock_irqrestore(&sde->tail_lock, flags);
 	return ret;
 unlock_noconn:
 	if (wait)
-		iowait_sdma_inc(wait);
+		iowait_sdma_inc(iowait_ioww_to_iow(wait));
 	tx->next_descq_idx = 0;
 #ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
 	tx->sn = sde->tail_sn++;
@@ -2422,10 +2415,7 @@  int sdma_send_txreq(struct sdma_engine *sde,
 	spin_lock(&sde->flushlist_lock);
 	list_add_tail(&tx->list, &sde->flushlist);
 	spin_unlock(&sde->flushlist_lock);
-	if (wait) {
-		wait->tx_count++;
-		wait->count += tx->num_desc;
-	}
+	iowait_inc_wait_count(wait, tx->num_desc);
 	schedule_work(&sde->flush_worker);
 	ret = -ECOMM;
 	goto unlock;
@@ -2442,7 +2432,7 @@  int sdma_send_txreq(struct sdma_engine *sde,
 /**
  * sdma_send_txlist() - submit a list of tx req to ring
  * @sde: sdma engine to use
- * @wait: wait structure to use when full (may be NULL)
+ * @wait: SE wait structure to use when full (may be NULL)
  * @tx_list: list of sdma_txreqs to submit
  * @count: pointer to a u16 which, after return will contain the total number of
  *         sdma_txreqs removed from the tx_list. This will include sdma_txreqs
@@ -2467,7 +2457,7 @@  int sdma_send_txreq(struct sdma_engine *sde,
  * -EINVAL - sdma_txreq incomplete, -EBUSY - no space in ring (wait == NULL)
  * -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state
  */
-int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait,
+int sdma_send_txlist(struct sdma_engine *sde, struct iowait_work *wait,
 		     struct list_head *tx_list, u16 *count_out)
 {
 	struct sdma_txreq *tx, *tx_next;
@@ -2479,7 +2469,7 @@  int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait,
 	spin_lock_irqsave(&sde->tail_lock, flags);
 retry:
 	list_for_each_entry_safe(tx, tx_next, tx_list, list) {
-		tx->wait = wait;
+		tx->wait = iowait_ioww_to_iow(wait);
 		if (unlikely(!__sdma_running(sde)))
 			goto unlock_noconn;
 		if (unlikely(tx->num_desc > sde->desc_avail))
@@ -2500,8 +2490,9 @@  int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait,
 update_tail:
 	total_count = submit_count + flush_count;
 	if (wait) {
-		iowait_sdma_add(wait, total_count);
-		iowait_starve_clear(submit_count > 0, wait);
+		iowait_sdma_add(iowait_ioww_to_iow(wait), total_count);
+		iowait_starve_clear(submit_count > 0,
+				    iowait_ioww_to_iow(wait));
 	}
 	if (tail != INVALID_TAIL)
 		sdma_update_tail(sde, tail);
@@ -2511,7 +2502,7 @@  int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait,
 unlock_noconn:
 	spin_lock(&sde->flushlist_lock);
 	list_for_each_entry_safe(tx, tx_next, tx_list, list) {
-		tx->wait = wait;
+		tx->wait = iowait_ioww_to_iow(wait);
 		list_del_init(&tx->list);
 		tx->next_descq_idx = 0;
 #ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
@@ -2520,10 +2511,7 @@  int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait,
 #endif
 		list_add_tail(&tx->list, &sde->flushlist);
 		flush_count++;
-		if (wait) {
-			wait->tx_count++;
-			wait->count += tx->num_desc;
-		}
+		iowait_inc_wait_count(wait, tx->num_desc);
 	}
 	spin_unlock(&sde->flushlist_lock);
 	schedule_work(&sde->flush_worker);
diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h
index c076eef..6dc63d7 100644
--- a/drivers/infiniband/hw/hfi1/sdma.h
+++ b/drivers/infiniband/hw/hfi1/sdma.h
@@ -1,7 +1,7 @@ 
 #ifndef _HFI1_SDMA_H
 #define _HFI1_SDMA_H
 /*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -840,14 +840,14 @@  static inline int sdma_txadd_kvaddr(
 			dd, SDMA_MAP_SINGLE, tx, addr, len);
 }
 
-struct iowait;
+struct iowait_work;
 
 int sdma_send_txreq(struct sdma_engine *sde,
-		    struct iowait *wait,
+		    struct iowait_work *wait,
 		    struct sdma_txreq *tx,
 		    bool pkts_sent);
 int sdma_send_txlist(struct sdma_engine *sde,
-		     struct iowait *wait,
+		     struct iowait_work *wait,
 		     struct list_head *tx_list,
 		     u16 *count_out);
 
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index 704ad99..3f0aadc 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -1,5 +1,5 @@ 
 /*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -100,7 +100,7 @@  static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq,
 
 static int defer_packet_queue(
 	struct sdma_engine *sde,
-	struct iowait *wait,
+	struct iowait_work *wait,
 	struct sdma_txreq *txreq,
 	uint seq,
 	bool pkts_sent);
@@ -123,13 +123,13 @@  static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode,
 
 static int defer_packet_queue(
 	struct sdma_engine *sde,
-	struct iowait *wait,
+	struct iowait_work *wait,
 	struct sdma_txreq *txreq,
 	uint seq,
 	bool pkts_sent)
 {
 	struct hfi1_user_sdma_pkt_q *pq =
-		container_of(wait, struct hfi1_user_sdma_pkt_q, busy);
+		container_of(wait->iow, struct hfi1_user_sdma_pkt_q, busy);
 	struct hfi1_ibdev *dev = &pq->dd->verbs_dev;
 	struct user_sdma_txreq *tx =
 		container_of(txreq, struct user_sdma_txreq, txreq);
@@ -191,7 +191,7 @@  int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
 	atomic_set(&pq->n_locked, 0);
 	pq->mm = fd->mm;
 
-	iowait_init(&pq->busy, 0, NULL, defer_packet_queue,
+	iowait_init(&pq->busy, 0, NULL, NULL, defer_packet_queue,
 		    activate_packet_queue, NULL);
 	pq->reqidx = 0;
 
@@ -912,7 +912,9 @@  static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts)
 		npkts++;
 	}
 dosend:
-	ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count);
+	ret = sdma_send_txlist(req->sde,
+			       iowait_get_ib_work(&pq->busy),
+			       &req->txps, &count);
 	req->seqsubmitted += count;
 	if (req->seqsubmitted == req->info.npkts) {
 		/*
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 3cc549b..0bcd898 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -737,7 +737,7 @@  static int wait_kmem(struct hfi1_ibdev *dev,
 	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
 		write_seqlock(&dev->iowait_lock);
 		list_add_tail(&ps->s_txreq->txreq.list,
-			      &priv->s_iowait.tx_head);
+			      &ps->wait->tx_head);
 		if (list_empty(&priv->s_iowait.list)) {
 			if (list_empty(&dev->memwait))
 				mod_timer(&dev->mem_timer, jiffies + 1);
@@ -748,7 +748,7 @@  static int wait_kmem(struct hfi1_ibdev *dev,
 			rvt_get_qp(qp);
 		}
 		write_sequnlock(&dev->iowait_lock);
-		qp->s_flags &= ~RVT_S_BUSY;
+		hfi1_qp_unbusy(qp, ps->wait);
 		ret = -EBUSY;
 	}
 	spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -950,8 +950,7 @@  int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
 		if (unlikely(ret))
 			goto bail_build;
 	}
-	ret =  sdma_send_txreq(tx->sde, &priv->s_iowait, &tx->txreq,
-			       ps->pkts_sent);
+	ret =  sdma_send_txreq(tx->sde, ps->wait, &tx->txreq, ps->pkts_sent);
 	if (unlikely(ret < 0)) {
 		if (ret == -ECOMM)
 			goto bail_ecomm;
@@ -1001,7 +1000,7 @@  static int pio_wait(struct rvt_qp *qp,
 	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
 		write_seqlock(&dev->iowait_lock);
 		list_add_tail(&ps->s_txreq->txreq.list,
-			      &priv->s_iowait.tx_head);
+			      &ps->wait->tx_head);
 		if (list_empty(&priv->s_iowait.list)) {
 			struct hfi1_ibdev *dev = &dd->verbs_dev;
 			int was_empty;
@@ -1020,7 +1019,7 @@  static int pio_wait(struct rvt_qp *qp,
 				hfi1_sc_wantpiobuf_intr(sc, 1);
 		}
 		write_sequnlock(&dev->iowait_lock);
-		qp->s_flags &= ~RVT_S_BUSY;
+		hfi1_qp_unbusy(qp, ps->wait);
 		ret = -EBUSY;
 	}
 	spin_unlock_irqrestore(&qp->s_lock, flags);
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index bc77ffe..d416411 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -166,11 +166,13 @@  struct hfi1_qp_priv {
  * This structure is used to hold commonly lookedup and computed values during
  * the send engine progress.
  */
+struct iowait_work;
 struct hfi1_pkt_state {
 	struct hfi1_ibdev *dev;
 	struct hfi1_ibport *ibp;
 	struct hfi1_pportdata *ppd;
 	struct verbs_txreq *s_txreq;
+	struct iowait_work *wait;
 	unsigned long flags;
 	unsigned long timeout;
 	unsigned long timeout_int;
@@ -247,7 +249,7 @@  struct hfi1_ibdev {
 	return container_of(rdi, struct hfi1_ibdev, rdi);
 }
 
-static inline struct rvt_qp *iowait_to_qp(struct  iowait *s_iowait)
+static inline struct rvt_qp *iowait_to_qp(struct iowait *s_iowait)
 {
 	struct hfi1_qp_priv *priv;
 
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h
index 1c19bbc..2a77af2 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.h
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h
@@ -102,22 +102,19 @@  struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
 	return &tx->txreq;
 }
 
-static inline struct verbs_txreq *get_waiting_verbs_txreq(struct rvt_qp *qp)
+static inline struct verbs_txreq *get_waiting_verbs_txreq(struct iowait_work *w)
 {
 	struct sdma_txreq *stx;
-	struct hfi1_qp_priv *priv = qp->priv;
 
-	stx = iowait_get_txhead(&priv->s_iowait);
+	stx = iowait_get_txhead(w);
 	if (stx)
 		return container_of(stx, struct verbs_txreq, txreq);
 	return NULL;
 }
 
-static inline bool verbs_txreq_queued(struct rvt_qp *qp)
+static inline bool verbs_txreq_queued(struct iowait_work *w)
 {
-	struct hfi1_qp_priv *priv = qp->priv;
-
-	return iowait_packet_queued(&priv->s_iowait);
+	return iowait_packet_queued(w);
 }
 
 void hfi1_put_txreq(struct verbs_txreq *tx);
diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c
index c3c96c5..97bd940 100644
--- a/drivers/infiniband/hw/hfi1/vnic_sdma.c
+++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c
@@ -1,5 +1,5 @@ 
 /*
- * Copyright(c) 2017 Intel Corporation.
+ * Copyright(c) 2017 - 2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -198,8 +198,8 @@  int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx,
 		goto free_desc;
 	tx->retry_count = 0;
 
-	ret = sdma_send_txreq(sde, &vnic_sdma->wait, &tx->txreq,
-			      vnic_sdma->pkts_sent);
+	ret = sdma_send_txreq(sde, iowait_get_ib_work(&vnic_sdma->wait),
+			      &tx->txreq, vnic_sdma->pkts_sent);
 	/* When -ECOMM, sdma callback will be called with ABORT status */
 	if (unlikely(ret && unlikely(ret != -ECOMM)))
 		goto free_desc;
@@ -230,13 +230,13 @@  int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx,
  * become available.
  */
 static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde,
-				struct iowait *wait,
+				struct iowait_work *wait,
 				struct sdma_txreq *txreq,
 				uint seq,
 				bool pkts_sent)
 {
 	struct hfi1_vnic_sdma *vnic_sdma =
-		container_of(wait, struct hfi1_vnic_sdma, wait);
+		container_of(wait->iow, struct hfi1_vnic_sdma, wait);
 	struct hfi1_ibdev *dev = &vnic_sdma->dd->verbs_dev;
 	struct vnic_txreq *tx = container_of(txreq, struct vnic_txreq, txreq);
 
@@ -247,7 +247,7 @@  static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde,
 	vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED;
 	write_seqlock(&dev->iowait_lock);
 	if (list_empty(&vnic_sdma->wait.list))
-		iowait_queue(pkts_sent, wait, &sde->dmawait);
+		iowait_queue(pkts_sent, wait->iow, &sde->dmawait);
 	write_sequnlock(&dev->iowait_lock);
 	return -EBUSY;
 }
@@ -285,7 +285,8 @@  void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo)
 	for (i = 0; i < vinfo->num_tx_q; i++) {
 		struct hfi1_vnic_sdma *vnic_sdma = &vinfo->sdma[i];
 
-		iowait_init(&vnic_sdma->wait, 0, NULL, hfi1_vnic_sdma_sleep,
+		iowait_init(&vnic_sdma->wait, 0, NULL, NULL,
+			    hfi1_vnic_sdma_sleep,
 			    hfi1_vnic_sdma_wakeup, NULL);
 		vnic_sdma->sde = &vinfo->dd->per_sdma[i];
 		vnic_sdma->dd = vinfo->dd;
@@ -295,10 +296,12 @@  void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo)
 
 		/* Add a free descriptor watermark for wakeups */
 		if (vnic_sdma->sde->descq_cnt > HFI1_VNIC_SDMA_DESC_WTRMRK) {
+			struct iowait_work *work;
+
 			INIT_LIST_HEAD(&vnic_sdma->stx.list);
 			vnic_sdma->stx.num_desc = HFI1_VNIC_SDMA_DESC_WTRMRK;
-			list_add_tail(&vnic_sdma->stx.list,
-				      &vnic_sdma->wait.tx_head);
+			work = iowait_get_ib_work(&vnic_sdma->wait);
+			list_add_tail(&vnic_sdma->stx.list, &work->tx_head);
 		}
 	}
 }
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index ad9093d..26ab78e 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -1716,14 +1716,14 @@  void qib_unregister_ib_device(struct qib_devdata *dd)
  * It is only used in post send, which doesn't hold
  * the s_lock.
  */
-void _qib_schedule_send(struct rvt_qp *qp)
+bool _qib_schedule_send(struct rvt_qp *qp)
 {
 	struct qib_ibport *ibp =
 		to_iport(qp->ibqp.device, qp->port_num);
 	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
 	struct qib_qp_priv *priv = qp->priv;
 
-	queue_work(ppd->qib_wq, &priv->s_work);
+	return queue_work(ppd->qib_wq, &priv->s_work);
 }
 
 /**
@@ -1733,8 +1733,9 @@  void _qib_schedule_send(struct rvt_qp *qp)
  * This schedules qp progress.  The s_lock
  * should be held.
  */
-void qib_schedule_send(struct rvt_qp *qp)
+bool qib_schedule_send(struct rvt_qp *qp)
 {
 	if (qib_send_ok(qp))
-		_qib_schedule_send(qp);
+		return _qib_schedule_send(qp);
+	return false;
 }
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index 3d7b744..df90a7a 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -1,5 +1,5 @@ 
 /*
- * Copyright (c) 2012 - 2017 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2012 - 2018 Intel Corporation.  All rights reserved.
  * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
@@ -223,8 +223,8 @@  static inline int qib_send_ok(struct rvt_qp *qp)
 		 !(qp->s_flags & RVT_S_ANY_WAIT_SEND));
 }
 
-void _qib_schedule_send(struct rvt_qp *qp);
-void qib_schedule_send(struct rvt_qp *qp);
+bool _qib_schedule_send(struct rvt_qp *qp);
+bool qib_schedule_send(struct rvt_qp *qp);
 
 static inline int qib_pkey_ok(u16 pkey1, u16 pkey2)
 {
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
index 0fda98d..14d53c4 100644
--- a/include/rdma/rdma_vt.h
+++ b/include/rdma/rdma_vt.h
@@ -211,8 +211,8 @@  struct rvt_driver_provided {
 	 * version requires the s_lock not to be held. The other assumes the
 	 * s_lock is held.
 	 */
-	void (*schedule_send)(struct rvt_qp *qp);
-	void (*schedule_send_no_lock)(struct rvt_qp *qp);
+	bool (*schedule_send)(struct rvt_qp *qp);
+	bool (*schedule_send_no_lock)(struct rvt_qp *qp);
 
 	/*
 	 * Driver specific work request setup and checking.