diff mbox

[rebase,to,for-next,2/2] rdma/cxgb4: Add support for kernel mode srqs

Message ID 20180517054828.26289-3-rajur@chelsio.com (mailing list archive)
State Changes Requested
Delegated to: Jason Gunthorpe
Headers show

Commit Message

Raju Rangoju May 17, 2018, 5:48 a.m. UTC
- Add srq create/destroy/modify to support kernel mode
- Add post_srq function
- Update poll_cq code to deal with srqs
- Handle kernel mode SRQ_LIMIT events
- Handle flushed SRQ buffers

Signed-off-by: Raju Rangoju <rajur@chelsio.com>
Reviewed-by: Steve Wise <swise@opengridcomputing.com>
---
 drivers/infiniband/hw/cxgb4/cm.c       |  42 +-
 drivers/infiniband/hw/cxgb4/cq.c       | 176 ++++++-
 drivers/infiniband/hw/cxgb4/device.c   |  19 +-
 drivers/infiniband/hw/cxgb4/iw_cxgb4.h |   3 +-
 drivers/infiniband/hw/cxgb4/provider.c |  10 +-
 drivers/infiniband/hw/cxgb4/qp.c       | 821 ++++++++++++++++++++++++++++-----
 drivers/infiniband/hw/cxgb4/resource.c |  51 +-
 drivers/infiniband/hw/cxgb4/t4.h       |  17 +-
 include/uapi/rdma/cxgb4-abi.h          |  12 +-
 9 files changed, 980 insertions(+), 171 deletions(-)

Comments

Jason Gunthorpe May 17, 2018, 4:17 p.m. UTC | #1
On Thu, May 17, 2018 at 11:18:28AM +0530, Raju Rangoju wrote:
>  
> +	/* The support for new kernel mode SRQ extends the t4_cqe struct
> +	 * size to 64 Bytes from existing 32 Bytes. But, this change brings
> +	 * in the library/kernel ABI and 32/64 Byte CQE compatibility issues.
> +	 *
> +	 * To address these issues, the user <-> kernel response message
> +	 * c4iw_create_cq_resp's 'reserved' field has been renamed to 'flags'.
> +	 *
> +	 * Newer version of libcxgb4 can pre-initialize the 'flags' field in
> +	 * the request struct with the flag C4IW_64B_CQE. And a new iw_cxgb4
> +	 * can detect the new libcxgb4 by checking for the C4IW_64B_CQE flag
> +	 * in the structure c4iw_create_cq received from userspace.
> +	 * New kernel driver can not work with older userspace library,
> +	 * c4iw_create_cq() will log a warning message to upgrade libcxgb4 and
> +	 * aborts the cq creation in this case.
> +	 *
> +	 * Likewise, new iw_cxgb4 which supports 64B CQE will set the flag
> +	 * C4IW_64B_CQE in c4iw_create_cq_resp struct. So, if the response
> +	 * from the create_cq verb results in the flags field still being zero,
> +	 * libcxgb4 can destroy the cq and print a warning and fail the user
> +	 * create cq verb.
> +	 */
> +
> +	if (ib_context && udata->inlen < sizeof(ucmd)) {
> +		pr_err_once("WARNING: Downlevel libcxgb4. RDMA cannot be supported with this driver/lib combination. Please update libcxgb4.\n");
> +		return ERR_PTR(-EINVAL);
> +	}

Nope, hard NAK on this idea.

The driver must continue to support current user space with full
compatibility.

And user space must generally support all past kernels as well.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
kernel test robot May 18, 2018, 3:01 a.m. UTC | #2
Hi Raju,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on rdma/for-next]
[also build test WARNING on v4.17-rc5 next-20180517]
[cannot apply to linus/master]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Raju-Rangoju/rdma-cxgb4-Add-SRQ-support-for-Chelsio-adapters/20180518-075228
base:   https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git for-next
reproduce:
        # apt-get install sparse
        make ARCH=x86_64 allmodconfig
        make C=1 CF=-D__CHECK_ENDIAN__


sparse warnings: (new ones prefixed by >>)

   drivers/infiniband/hw/cxgb4/iw_cxgb4.h:422:16: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/iw_cxgb4.h:422:16: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/iw_cxgb4.h:422:16: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/iw_cxgb4.h:422:16: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/iw_cxgb4.h:422:16: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/iw_cxgb4.h:422:16: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/cm.c:1752:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/iw_cxgb4.h:422:16: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/cm.c:1752:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/iw_cxgb4.h:422:16: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/iw_cxgb4.h:422:16: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/iw_cxgb4.h:422:16: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/iw_cxgb4.h:422:16: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/iw_cxgb4.h:422:16: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/cm.c:1756:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/iw_cxgb4.h:422:16: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/cm.c:1756:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/iw_cxgb4.h:422:16: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/iw_cxgb4.h:422:16: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/iw_cxgb4.h:422:16: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/iw_cxgb4.h:422:16: sparse: expression using sizeof(void)
>> drivers/infiniband/hw/cxgb4/cm.c:1863:18: sparse: cast to restricted __be32
>> drivers/infiniband/hw/cxgb4/cm.c:1863:18: sparse: cast to restricted __be32
>> drivers/infiniband/hw/cxgb4/cm.c:1863:18: sparse: cast to restricted __be32
>> drivers/infiniband/hw/cxgb4/cm.c:1863:18: sparse: cast to restricted __be32
>> drivers/infiniband/hw/cxgb4/cm.c:1863:18: sparse: cast to restricted __be32
>> drivers/infiniband/hw/cxgb4/cm.c:1863:18: sparse: cast to restricted __be32
   drivers/infiniband/hw/cxgb4/cm.c:1864:18: sparse: cast to restricted __be32
   drivers/infiniband/hw/cxgb4/cm.c:1864:18: sparse: cast to restricted __be32
   drivers/infiniband/hw/cxgb4/cm.c:1864:18: sparse: cast to restricted __be32
   drivers/infiniband/hw/cxgb4/cm.c:1864:18: sparse: cast to restricted __be32
   drivers/infiniband/hw/cxgb4/cm.c:1864:18: sparse: cast to restricted __be32
   drivers/infiniband/hw/cxgb4/cm.c:1864:18: sparse: cast to restricted __be32
   drivers/infiniband/hw/cxgb4/cm.c:2764:18: sparse: cast to restricted __be32
   drivers/infiniband/hw/cxgb4/cm.c:2764:18: sparse: cast to restricted __be32
   drivers/infiniband/hw/cxgb4/cm.c:2764:18: sparse: cast to restricted __be32
   drivers/infiniband/hw/cxgb4/cm.c:2764:18: sparse: cast to restricted __be32
   drivers/infiniband/hw/cxgb4/cm.c:2764:18: sparse: cast to restricted __be32
   drivers/infiniband/hw/cxgb4/cm.c:2764:18: sparse: cast to restricted __be32
   drivers/infiniband/hw/cxgb4/iw_cxgb4.h:422:16: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/iw_cxgb4.h:422:16: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/iw_cxgb4.h:422:16: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/iw_cxgb4.h:422:16: sparse: expression using sizeof(void)
--
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:248:35: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/iw_cxgb4.h:422:16: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/iw_cxgb4.h:422:16: sparse: expression using sizeof(void)
>> drivers/infiniband/hw/cxgb4/qp.c:2264:34: sparse: Using plain integer as NULL pointer
   drivers/infiniband/hw/cxgb4/qp.c:2538:18: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:2538:18: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:2538:18: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:2538:18: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:2538:18: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:2538:18: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:2538:18: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:2538:18: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:2538:18: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:2538:18: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:2538:18: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:2538:18: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:2538:18: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:2538:18: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:2538:18: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:2538:18: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:2538:18: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:2538:18: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:2538:18: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:2538:18: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:2538:18: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:2538:18: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:2538:18: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:2538:18: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:2538:18: sparse: expression using sizeof(void)
   drivers/infiniband/hw/cxgb4/qp.c:2538:18: sparse: expression using sizeof(void)
>> drivers/infiniband/hw/cxgb4/qp.c:2538:18: sparse: too many warnings

vim +1863 drivers/infiniband/hw/cxgb4/cm.c

  1651	
  1652	/*
  1653	 * process_mpa_request - process streaming mode MPA request
  1654	 *
  1655	 * Returns:
  1656	 *
  1657	 * 0 upon success indicating a connect request was delivered to the ULP
  1658	 * or the mpa request is incomplete but valid so far.
  1659	 *
  1660	 * 1 if a failure requires the caller to close the connection.
  1661	 *
  1662	 * 2 if a failure requires the caller to abort the connection.
  1663	 */
  1664	static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
  1665	{
  1666		struct mpa_message *mpa;
  1667		struct mpa_v2_conn_params *mpa_v2_params;
  1668		u16 plen;
  1669	
  1670		pr_debug("ep %p tid %u\n", ep, ep->hwtid);
  1671	
  1672		/*
  1673		 * If we get more than the supported amount of private data
  1674		 * then we must fail this connection.
  1675		 */
  1676		if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt))
  1677			goto err_stop_timer;
  1678	
  1679		pr_debug("enter (%s line %u)\n", __FILE__, __LINE__);
  1680	
  1681		/*
  1682		 * Copy the new data into our accumulation buffer.
  1683		 */
  1684		skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
  1685					  skb->len);
  1686		ep->mpa_pkt_len += skb->len;
  1687	
  1688		/*
  1689		 * If we don't even have the mpa message, then bail.
  1690		 * We'll continue process when more data arrives.
  1691		 */
  1692		if (ep->mpa_pkt_len < sizeof(*mpa))
  1693			return 0;
  1694	
  1695		pr_debug("enter (%s line %u)\n", __FILE__, __LINE__);
  1696		mpa = (struct mpa_message *) ep->mpa_pkt;
  1697	
  1698		/*
  1699		 * Validate MPA Header.
  1700		 */
  1701		if (mpa->revision > mpa_rev) {
  1702			pr_err("%s MPA version mismatch. Local = %d, Received = %d\n",
  1703			       __func__, mpa_rev, mpa->revision);
  1704			goto err_stop_timer;
  1705		}
  1706	
  1707		if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)))
  1708			goto err_stop_timer;
  1709	
  1710		plen = ntohs(mpa->private_data_size);
  1711	
  1712		/*
  1713		 * Fail if there's too much private data.
  1714		 */
  1715		if (plen > MPA_MAX_PRIVATE_DATA)
  1716			goto err_stop_timer;
  1717	
  1718		/*
  1719		 * If plen does not account for pkt size
  1720		 */
  1721		if (ep->mpa_pkt_len > (sizeof(*mpa) + plen))
  1722			goto err_stop_timer;
  1723		ep->plen = (u8) plen;
  1724	
  1725		/*
  1726		 * If we don't have all the pdata yet, then bail.
  1727		 */
  1728		if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
  1729			return 0;
  1730	
  1731		/*
  1732		 * If we get here we have accumulated the entire mpa
  1733		 * start reply message including private data.
  1734		 */
  1735		ep->mpa_attr.initiator = 0;
  1736		ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
  1737		ep->mpa_attr.recv_marker_enabled = markers_enabled;
  1738		ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
  1739		ep->mpa_attr.version = mpa->revision;
  1740		if (mpa->revision == 1)
  1741			ep->tried_with_mpa_v1 = 1;
  1742		ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
  1743	
  1744		if (mpa->revision == 2) {
  1745			ep->mpa_attr.enhanced_rdma_conn =
  1746				mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
  1747			if (ep->mpa_attr.enhanced_rdma_conn) {
  1748				mpa_v2_params = (struct mpa_v2_conn_params *)
  1749					(ep->mpa_pkt + sizeof(*mpa));
  1750				ep->ird = ntohs(mpa_v2_params->ird) &
  1751					MPA_V2_IRD_ORD_MASK;
  1752				ep->ird = min_t(u32, ep->ird,
  1753						cur_max_read_depth(ep->com.dev));
  1754				ep->ord = ntohs(mpa_v2_params->ord) &
  1755					MPA_V2_IRD_ORD_MASK;
> 1756				ep->ord = min_t(u32, ep->ord,
  1757						cur_max_read_depth(ep->com.dev));
  1758				pr_debug("initiator ird %u ord %u\n",
  1759					 ep->ird, ep->ord);
  1760				if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL)
  1761					if (peer2peer) {
  1762						if (ntohs(mpa_v2_params->ord) &
  1763								MPA_V2_RDMA_WRITE_RTR)
  1764							ep->mpa_attr.p2p_type =
  1765							FW_RI_INIT_P2PTYPE_RDMA_WRITE;
  1766						else if (ntohs(mpa_v2_params->ord) &
  1767								MPA_V2_RDMA_READ_RTR)
  1768							ep->mpa_attr.p2p_type =
  1769							FW_RI_INIT_P2PTYPE_READ_REQ;
  1770					}
  1771			}
  1772		} else if (mpa->revision == 1)
  1773			if (peer2peer)
  1774				ep->mpa_attr.p2p_type = p2p_type;
  1775	
  1776		pr_debug("crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d p2p_type=%d\n",
  1777			 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
  1778			 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
  1779			 ep->mpa_attr.p2p_type);
  1780	
  1781		__state_set(&ep->com, MPA_REQ_RCVD);
  1782	
  1783		/* drive upcall */
  1784		mutex_lock_nested(&ep->parent_ep->com.mutex, SINGLE_DEPTH_NESTING);
  1785		if (ep->parent_ep->com.state != DEAD) {
  1786			if (connect_request_upcall(ep))
  1787				goto err_unlock_parent;
  1788		} else {
  1789			goto err_unlock_parent;
  1790		}
  1791		mutex_unlock(&ep->parent_ep->com.mutex);
  1792		return 0;
  1793	
  1794	err_unlock_parent:
  1795		mutex_unlock(&ep->parent_ep->com.mutex);
  1796		goto err_out;
  1797	err_stop_timer:
  1798		(void)stop_ep_timer(ep);
  1799	err_out:
  1800		return 2;
  1801	}
  1802	
  1803	static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
  1804	{
  1805		struct c4iw_ep *ep;
  1806		struct cpl_rx_data *hdr = cplhdr(skb);
  1807		unsigned int dlen = ntohs(hdr->len);
  1808		unsigned int tid = GET_TID(hdr);
  1809		__u8 status = hdr->status;
  1810		int disconnect = 0;
  1811	
  1812		ep = get_ep_from_tid(dev, tid);
  1813		if (!ep)
  1814			return 0;
  1815		pr_debug("ep %p tid %u dlen %u\n", ep, ep->hwtid, dlen);
  1816		skb_pull(skb, sizeof(*hdr));
  1817		skb_trim(skb, dlen);
  1818		mutex_lock(&ep->com.mutex);
  1819	
  1820		switch (ep->com.state) {
  1821		case MPA_REQ_SENT:
  1822			update_rx_credits(ep, dlen);
  1823			ep->rcv_seq += dlen;
  1824			disconnect = process_mpa_reply(ep, skb);
  1825			break;
  1826		case MPA_REQ_WAIT:
  1827			update_rx_credits(ep, dlen);
  1828			ep->rcv_seq += dlen;
  1829			disconnect = process_mpa_request(ep, skb);
  1830			break;
  1831		case FPDU_MODE: {
  1832			struct c4iw_qp_attributes attrs;
  1833	
  1834			update_rx_credits(ep, dlen);
  1835			if (status)
  1836				pr_err("%s Unexpected streaming data." \
  1837				       " qpid %u ep %p state %d tid %u status %d\n",
  1838				       __func__, ep->com.qp->wq.sq.qid, ep,
  1839				       ep->com.state, ep->hwtid, status);
  1840			attrs.next_state = C4IW_QP_STATE_TERMINATE;
  1841			c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
  1842				       C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
  1843			disconnect = 1;
  1844			break;
  1845		}
  1846		default:
  1847			break;
  1848		}
  1849		mutex_unlock(&ep->com.mutex);
  1850		if (disconnect)
  1851			c4iw_ep_disconnect(ep, disconnect == 2, GFP_KERNEL);
  1852		c4iw_put_ep(&ep->com);
  1853		return 0;
  1854	}
  1855	
  1856	static void complete_cached_srq_buffers(struct c4iw_ep *ep, u32 srqidx_status)
  1857	{
  1858		enum chip_type adapter_type;
  1859		u32 srqidx;
  1860		u8 status;
  1861	
  1862		adapter_type = ep->com.dev->rdev.lldi.adapter_type;
> 1863		status = ABORT_RSS_STATUS_G(be32_to_cpu(srqidx_status));
  1864		srqidx = ABORT_RSS_SRQIDX_G(be32_to_cpu(srqidx_status));
  1865	
  1866		/*
  1867		 * If this TCB had a srq buffer cached, then we must complete
  1868		 * it. For user mode, that means saving the srqidx in the
  1869		 * user/kernel status page for this qp.  For kernel mode, just
  1870		 * synthesize the CQE now.
  1871		 */
  1872		if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T5 && srqidx) {
  1873			if (ep->com.qp->ibqp.uobject)
  1874				t4_set_wq_in_error(&ep->com.qp->wq, srqidx);
  1875			else
  1876				c4iw_flush_srqidx(ep->com.qp, srqidx);
  1877		}
  1878	}
  1879	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steve Wise May 21, 2018, 3:34 p.m. UTC | #3
> 
> On Thu, May 17, 2018 at 11:18:28AM +0530, Raju Rangoju wrote:
> >
> > +	/* The support for new kernel mode SRQ extends the t4_cqe struct
> > +	 * size to 64 Bytes from existing 32 Bytes. But, this change brings
> > +	 * in the library/kernel ABI and 32/64 Byte CQE compatibility
issues.
> > +	 *
> > +	 * To address these issues, the user <-> kernel response message
> > +	 * c4iw_create_cq_resp's 'reserved' field has been renamed to
'flags'.
> > +	 *
> > +	 * Newer version of libcxgb4 can pre-initialize the 'flags' field in
> > +	 * the request struct with the flag C4IW_64B_CQE. And a new
> iw_cxgb4
> > +	 * can detect the new libcxgb4 by checking for the C4IW_64B_CQE
> flag
> > +	 * in the structure c4iw_create_cq received from userspace.
> > +	 * New kernel driver can not work with older userspace library,
> > +	 * c4iw_create_cq() will log a warning message to upgrade libcxgb4
> and
> > +	 * aborts the cq creation in this case.
> > +	 *
> > +	 * Likewise, new iw_cxgb4 which supports 64B CQE will set the flag
> > +	 * C4IW_64B_CQE in c4iw_create_cq_resp struct. So, if the response
> > +	 * from the create_cq verb results in the flags field still being
zero,
> > +	 * libcxgb4 can destroy the cq and print a warning and fail the user
> > +	 * create cq verb.
> > +	 */
> > +
> > +	if (ib_context && udata->inlen < sizeof(ucmd)) {
> > +		pr_err_once("WARNING: Downlevel libcxgb4. RDMA cannot
> be supported with this driver/lib combination. Please update
libcxgb4.\n");
> > +		return ERR_PTR(-EINVAL);
> > +	}
> 
> Nope, hard NAK on this idea.
> 
> The driver must continue to support current user space with full
> compatibility.
> 
> And user space must generally support all past kernels as well.
> 
> Jason

Hey Raju, please separate this cqe issue into its own patch series for both
kernel and rdma-core.    It will simplify reviewing and testing 32/64
support.

Thanks! 

Steve.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 4cf17c650c36..e758b6783b3b 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -1853,10 +1853,34 @@  static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
 	return 0;
 }
 
+static void complete_cached_srq_buffers(struct c4iw_ep *ep, u32 srqidx_status)
+{
+	enum chip_type adapter_type;
+	u32 srqidx;
+	u8 status;
+
+	adapter_type = ep->com.dev->rdev.lldi.adapter_type;
+	status = ABORT_RSS_STATUS_G(be32_to_cpu(srqidx_status));
+	srqidx = ABORT_RSS_SRQIDX_G(be32_to_cpu(srqidx_status));
+
+	/*
+	 * If this TCB had a srq buffer cached, then we must complete
+	 * it. For user mode, that means saving the srqidx in the
+	 * user/kernel status page for this qp.  For kernel mode, just
+	 * synthesize the CQE now.
+	 */
+	if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T5 && srqidx) {
+		if (ep->com.qp->ibqp.uobject)
+			t4_set_wq_in_error(&ep->com.qp->wq, srqidx);
+		else
+			c4iw_flush_srqidx(ep->com.qp, srqidx);
+	}
+}
+
 static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
 {
 	struct c4iw_ep *ep;
-	struct cpl_abort_rpl_rss *rpl = cplhdr(skb);
+	struct cpl_abort_rpl_rss6 *rpl = cplhdr(skb);
 	int release = 0;
 	unsigned int tid = GET_TID(rpl);
 
@@ -1865,6 +1889,9 @@  static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
 		pr_warn("Abort rpl to freed endpoint\n");
 		return 0;
 	}
+
+	complete_cached_srq_buffers(ep, rpl->srqidx_status);
+
 	pr_debug("ep %p tid %u\n", ep, ep->hwtid);
 	mutex_lock(&ep->com.mutex);
 	switch (ep->com.state) {
@@ -2719,28 +2746,35 @@  static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
 
 static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
 {
-	struct cpl_abort_req_rss *req = cplhdr(skb);
+	struct cpl_abort_req_rss6 *req = cplhdr(skb);
 	struct c4iw_ep *ep;
 	struct sk_buff *rpl_skb;
 	struct c4iw_qp_attributes attrs;
 	int ret;
 	int release = 0;
 	unsigned int tid = GET_TID(req);
+	u8 status;
+
 	u32 len = roundup(sizeof(struct cpl_abort_rpl), 16);
 
 	ep = get_ep_from_tid(dev, tid);
 	if (!ep)
 		return 0;
 
-	if (cxgb_is_neg_adv(req->status)) {
+	status = ABORT_RSS_STATUS_G(be32_to_cpu(req->srqidx_status));
+
+	if (cxgb_is_neg_adv(status)) {
 		pr_debug("Negative advice on abort- tid %u status %d (%s)\n",
-			 ep->hwtid, req->status, neg_adv_str(req->status));
+			 ep->hwtid, status, neg_adv_str(status));
 		ep->stats.abort_neg_adv++;
 		mutex_lock(&dev->rdev.stats.lock);
 		dev->rdev.stats.neg_adv++;
 		mutex_unlock(&dev->rdev.stats.lock);
 		goto deref_ep;
 	}
+
+	complete_cached_srq_buffers(ep, req->srqidx_status);
+
 	pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid,
 		 ep->com.state);
 	set_bit(PEER_ABORT, &ep->com.history);
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 2be2e1ac1b5f..1bba37c1267d 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -132,7 +132,7 @@  static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
 			FW_RI_RES_WR_IQPCIECH_V(2) |
 			FW_RI_RES_WR_IQINTCNTTHRESH_V(0) |
 			FW_RI_RES_WR_IQO_F |
-			FW_RI_RES_WR_IQESIZE_V(1));
+			FW_RI_RES_WR_IQESIZE_V(ilog2(sizeof(*cq->queue)) - 4));
 	res->u.cq.iqsize = cpu_to_be16(cq->size);
 	res->u.cq.iqaddr = cpu_to_be64(cq->dma_addr);
 
@@ -166,7 +166,7 @@  static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
 	return ret;
 }
 
-static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq)
+static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq, u32 srqidx)
 {
 	struct t4_cqe cqe;
 
@@ -179,6 +179,8 @@  static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq)
 				 CQE_SWCQE_V(1) |
 				 CQE_QPID_V(wq->sq.qid));
 	cqe.bits_type_ts = cpu_to_be64(CQE_GENBIT_V((u64)cq->gen));
+	if (srqidx)
+		cqe.u.srcqe.abs_rqe_idx = cpu_to_be32(srqidx);
 	cq->sw_queue[cq->sw_pidx] = cqe;
 	t4_swcq_produce(cq);
 }
@@ -191,7 +193,7 @@  int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count)
 	pr_debug("wq %p cq %p rq.in_use %u skip count %u\n",
 		 wq, cq, wq->rq.in_use, count);
 	while (in_use--) {
-		insert_recv_cqe(wq, cq);
+		insert_recv_cqe(wq, cq, 0);
 		flushed++;
 	}
 	return flushed;
@@ -442,6 +444,72 @@  void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
 	pr_debug("cq %p count %d\n", cq, *count);
 }
 
+static void post_pending_srq_wrs(struct t4_srq *srq)
+{
+	struct t4_srq_pending_wr *pwr;
+	u16 idx = 0;
+
+	while (srq->pending_in_use) {
+		pwr = &srq->pending_wrs[srq->pending_cidx];
+		srq->sw_rq[srq->pidx].wr_id = pwr->wr_id;
+		srq->sw_rq[srq->pidx].valid = 1;
+
+		pr_debug("%s posting pending cidx %u pidx %u wq_pidx %u in_use %u rq_size %u wr_id %llx\n",
+			 __func__,
+			 srq->cidx, srq->pidx, srq->wq_pidx,
+			 srq->in_use, srq->size,
+			 (unsigned long long)pwr->wr_id);
+
+		c4iw_copy_wr_to_srq(srq, &pwr->wqe, pwr->len16);
+		t4_srq_consume_pending_wr(srq);
+		t4_srq_produce(srq, pwr->len16);
+		idx += DIV_ROUND_UP(pwr->len16 * 16, T4_EQ_ENTRY_SIZE);
+	}
+
+	if (idx) {
+		t4_ring_srq_db(srq, idx, pwr->len16, &pwr->wqe);
+		srq->queue[srq->size].status.host_wq_pidx =
+			srq->wq_pidx;
+	}
+}
+
+static u64 reap_srq_cqe(struct t4_cqe *hw_cqe, struct t4_srq *srq)
+{
+	int rel_idx = CQE_ABS_RQE_IDX(hw_cqe) - srq->rqt_abs_idx;
+	u64 wr_id;
+
+	srq->sw_rq[rel_idx].valid = 0;
+	wr_id = srq->sw_rq[rel_idx].wr_id;
+
+	if (rel_idx == srq->cidx) {
+		pr_debug("%s in order cqe rel_idx %u cidx %u pidx %u wq_pidx %u in_use %u rq_size %u wr_id %llx\n",
+			 __func__, rel_idx, srq->cidx, srq->pidx,
+			 srq->wq_pidx, srq->in_use, srq->size,
+			 (unsigned long long)srq->sw_rq[rel_idx].wr_id);
+		t4_srq_consume(srq);
+		while (srq->ooo_count && !srq->sw_rq[srq->cidx].valid) {
+			pr_debug("%s eat ooo cidx %u pidx %u wq_pidx %u in_use %u rq_size %u ooo_count %u wr_id %llx\n",
+				 __func__, srq->cidx, srq->pidx,
+				 srq->wq_pidx, srq->in_use,
+				 srq->size, srq->ooo_count,
+				 (unsigned long long)
+				 srq->sw_rq[srq->cidx].wr_id);
+			t4_srq_consume_ooo(srq);
+		}
+		if (srq->ooo_count == 0 && srq->pending_in_use)
+			post_pending_srq_wrs(srq);
+	} else {
+		pr_debug("%s ooo cqe rel_idx %u cidx %u pidx %u wq_pidx %u in_use %u rq_size %u ooo_count %u wr_id %llx\n",
+			 __func__, rel_idx, srq->cidx,
+			 srq->pidx, srq->wq_pidx,
+			 srq->in_use, srq->size,
+			 srq->ooo_count,
+			 (unsigned long long)srq->sw_rq[rel_idx].wr_id);
+		t4_srq_produce_ooo(srq);
+	}
+	return wr_id;
+}
+
 /*
  * poll_cq
  *
@@ -459,7 +527,8 @@  void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
  *    -EOVERFLOW    CQ overflow detected.
  */
 static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
-		   u8 *cqe_flushed, u64 *cookie, u32 *credit)
+		   u8 *cqe_flushed, u64 *cookie, u32 *credit,
+		   struct t4_srq *srq)
 {
 	int ret = 0;
 	struct t4_cqe *hw_cqe, read_cqe;
@@ -524,7 +593,7 @@  static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
 		 */
 		if (CQE_TYPE(hw_cqe) == 1) {
 			if (CQE_STATUS(hw_cqe))
-				t4_set_wq_in_error(wq);
+				t4_set_wq_in_error(wq, 0);
 			ret = -EAGAIN;
 			goto skip_cqe;
 		}
@@ -535,7 +604,7 @@  static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
 		 */
 		if (CQE_WRID_STAG(hw_cqe) == 1) {
 			if (CQE_STATUS(hw_cqe))
-				t4_set_wq_in_error(wq);
+				t4_set_wq_in_error(wq, 0);
 			ret = -EAGAIN;
 			goto skip_cqe;
 		}
@@ -560,7 +629,7 @@  static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
 
 	if (CQE_STATUS(hw_cqe) || t4_wq_in_error(wq)) {
 		*cqe_flushed = (CQE_STATUS(hw_cqe) == T4_ERR_SWFLUSH);
-		t4_set_wq_in_error(wq);
+		t4_set_wq_in_error(wq, 0);
 	}
 
 	/*
@@ -574,15 +643,9 @@  static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
 		 * then we complete this with T4_ERR_MSN and mark the wq in
 		 * error.
 		 */
-
-		if (t4_rq_empty(wq)) {
-			t4_set_wq_in_error(wq);
-			ret = -EAGAIN;
-			goto skip_cqe;
-		}
 		if (unlikely(!CQE_STATUS(hw_cqe) &&
 			     CQE_WRID_MSN(hw_cqe) != wq->rq.msn)) {
-			t4_set_wq_in_error(wq);
+			t4_set_wq_in_error(wq, 0);
 			hw_cqe->header |= cpu_to_be32(CQE_STATUS_V(T4_ERR_MSN));
 		}
 		goto proc_cqe;
@@ -641,11 +704,16 @@  static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
 			c4iw_log_wr_stats(wq, hw_cqe);
 		t4_sq_consume(wq);
 	} else {
-		pr_debug("completing rq idx %u\n", wq->rq.cidx);
-		*cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id;
-		if (c4iw_wr_log)
-			c4iw_log_wr_stats(wq, hw_cqe);
-		t4_rq_consume(wq);
+		if (!srq) {
+			pr_debug("completing rq idx %u\n", wq->rq.cidx);
+			*cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id;
+			if (c4iw_wr_log)
+				c4iw_log_wr_stats(wq, hw_cqe);
+			t4_rq_consume(wq);
+		} else {
+			*cookie = reap_srq_cqe(hw_cqe, srq);
+		}
+		wq->rq.msn++;
 		goto skip_cqe;
 	}
 
@@ -680,6 +748,7 @@  static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
 static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
 {
 	struct c4iw_qp *qhp = NULL;
+	struct c4iw_srq *srq = NULL;
 	struct t4_cqe uninitialized_var(cqe), *rd_cqe;
 	struct t4_wq *wq;
 	u32 credit = 0;
@@ -698,8 +767,12 @@  static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
 	else {
 		spin_lock(&qhp->lock);
 		wq = &(qhp->wq);
+		srq = qhp->srq;
+		if (srq)
+			spin_lock(&srq->lock);
 	}
-	ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit);
+	ret = poll_cq(wq, &chp->cq, &cqe, &cqe_flushed, &cookie, &credit,
+		      srq ? &srq->wq : NULL);
 	if (ret)
 		goto out;
 
@@ -708,6 +781,13 @@  static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
 	wc->vendor_err = CQE_STATUS(&cqe);
 	wc->wc_flags = 0;
 
+	/*
+	 * Simulate a SRQ_LIMIT_REACHED HW notification if required.
+	 */
+	if (srq && !(srq->flags & T4_SRQ_LIMIT_SUPPORT) && srq->armed &&
+	    srq->wq.in_use < srq->srq_limit)
+		c4iw_dispatch_srq_limit_reached_event(srq);
+
 	pr_debug("qpid 0x%x type %d opcode %d status 0x%x len %u wrid hi 0x%x lo 0x%x cookie 0x%llx\n",
 		 CQE_QPID(&cqe),
 		 CQE_TYPE(&cqe), CQE_OPCODE(&cqe),
@@ -819,8 +899,11 @@  static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
 		}
 	}
 out:
-	if (wq)
+	if (wq) {
+		if (srq)
+			spin_unlock(&srq->lock);
 		spin_unlock(&qhp->lock);
+	}
 	return ret;
 }
 
@@ -877,6 +960,7 @@  struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
 	struct c4iw_dev *rhp;
 	struct c4iw_cq *chp;
 	struct c4iw_create_cq_resp uresp;
+	struct c4iw_create_cq ucmd;
 	struct c4iw_ucontext *ucontext = NULL;
 	int ret, wr_len;
 	size_t memsize, hwentries;
@@ -888,6 +972,33 @@  struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
 
 	rhp = to_c4iw_dev(ibdev);
 
+	/* The support for new kernel mode SRQ extends the t4_cqe struct
+	 * size to 64 Bytes from existing 32 Bytes. But, this change brings
+	 * in the library/kernel ABI and 32/64 Byte CQE compatibility issues.
+	 *
+	 * To address these issues, the user <-> kernel response message
+	 * c4iw_create_cq_resp's 'reserved' field has been renamed to 'flags'.
+	 *
+	 * Newer version of libcxgb4 can pre-initialize the 'flags' field in
+	 * the request struct with the flag C4IW_64B_CQE. And a new iw_cxgb4
+	 * can detect the new libcxgb4 by checking for the C4IW_64B_CQE flag
+	 * in the structure c4iw_create_cq received from userspace.
+	 * New kernel driver can not work with older userspace library,
+	 * c4iw_create_cq() will log a warning message to upgrade libcxgb4 and
+	 * aborts the cq creation in this case.
+	 *
+	 * Likewise, new iw_cxgb4 which supports 64B CQE will set the flag
+	 * C4IW_64B_CQE in c4iw_create_cq_resp struct. So, if the response
+	 * from the create_cq verb results in the flags field still being zero,
+	 * libcxgb4 can destroy the cq and print a warning and fail the user
+	 * create cq verb.
+	 */
+
+	if (ib_context && udata->inlen < sizeof(ucmd)) {
+		pr_err_once("WARNING: Downlevel libcxgb4. RDMA cannot be supported with this driver/lib combination. Please update libcxgb4.\n");
+		return ERR_PTR(-EINVAL);
+	}
+
 	if (vector >= rhp->rdev.lldi.nciq)
 		return ERR_PTR(-EINVAL);
 
@@ -980,9 +1091,12 @@  struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
 		ucontext->key += PAGE_SIZE;
 		uresp.gts_key = ucontext->key;
 		ucontext->key += PAGE_SIZE;
+		/* communicate to the userspace that
+		 * kernel driver supports 64B CQE
+		 */
+		uresp.flags |= C4IW_64B_CQE;
 		spin_unlock(&ucontext->mmap_lock);
-		ret = ib_copy_to_udata(udata, &uresp,
-				       sizeof(uresp) - sizeof(uresp.reserved));
+		ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
 		if (ret)
 			goto err_free_mm2;
 
@@ -1039,3 +1153,19 @@  int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
 	spin_unlock_irqrestore(&chp->lock, flag);
 	return ret;
 }
+
+void c4iw_flush_srqidx(struct c4iw_qp *qhp, u32 srqidx)
+{
+	struct c4iw_cq *rchp = to_c4iw_cq(qhp->ibqp.recv_cq);
+	unsigned long flag;
+
+	/* locking heirarchy: cq lock first, then qp lock. */
+	spin_lock_irqsave(&rchp->lock, flag);
+	spin_lock(&qhp->lock);
+
+	/* create a SRQ RECV CQE for srqidx */
+	insert_recv_cqe(&qhp->wq, &rchp->cq, srqidx);
+
+	spin_unlock(&qhp->lock);
+	spin_unlock_irqrestore(&rchp->lock, flag);
+}
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 44161ca4d2a8..864a44ae26ef 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -275,10 +275,11 @@  static int dump_qp(int id, void *p, void *data)
 
 			set_ep_sin_addrs(ep, &lsin, &rsin, &m_lsin, &m_rsin);
 			cc = snprintf(qpd->buf + qpd->pos, space,
-				      "rc qp sq id %u rq id %u state %u "
+				      "rc qp sq id %u %s id %u state %u "
 				      "onchip %u ep tid %u state %u "
 				      "%pI4:%u/%u->%pI4:%u/%u\n",
-				      qp->wq.sq.qid, qp->wq.rq.qid,
+				      qp->wq.sq.qid, qp->srq ? "srq" : "rq",
+				      qp->srq ? qp->srq->idx : qp->wq.rq.qid,
 				      (int)qp->attr.state,
 				      qp->wq.sq.flags & T4_SQ_ONCHIP,
 				      ep->hwtid, (int)ep->com.state,
@@ -480,6 +481,9 @@  static int stats_show(struct seq_file *seq, void *v)
 	seq_printf(seq, "      QID: %10llu %10llu %10llu %10llu\n",
 			dev->rdev.stats.qid.total, dev->rdev.stats.qid.cur,
 			dev->rdev.stats.qid.max, dev->rdev.stats.qid.fail);
+	seq_printf(seq, "     SRQS: %10llu %10llu %10llu %10llu\n",
+		   dev->rdev.stats.srqt.total, dev->rdev.stats.srqt.cur,
+			dev->rdev.stats.srqt.max, dev->rdev.stats.srqt.fail);
 	seq_printf(seq, "   TPTMEM: %10llu %10llu %10llu %10llu\n",
 			dev->rdev.stats.stag.total, dev->rdev.stats.stag.cur,
 			dev->rdev.stats.stag.max, dev->rdev.stats.stag.fail);
@@ -530,6 +534,8 @@  static ssize_t stats_clear(struct file *file, const char __user *buf,
 	dev->rdev.stats.pbl.fail = 0;
 	dev->rdev.stats.rqt.max = 0;
 	dev->rdev.stats.rqt.fail = 0;
+	dev->rdev.stats.rqt.max = 0;
+	dev->rdev.stats.rqt.fail = 0;
 	dev->rdev.stats.ocqp.max = 0;
 	dev->rdev.stats.ocqp.fail = 0;
 	dev->rdev.stats.db_full = 0;
@@ -802,7 +808,7 @@  static int c4iw_rdev_open(struct c4iw_rdev *rdev)
 
 	rdev->qpmask = rdev->lldi.udb_density - 1;
 	rdev->cqmask = rdev->lldi.ucq_density - 1;
-	pr_debug("dev %s stag start 0x%0x size 0x%0x num stags %d pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x qp qid start %u size %u cq qid start %u size %u\n",
+	pr_debug("dev %s stag start 0x%0x size 0x%0x num stags %d pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x qp qid start %u size %u cq qid start %u size %u srq size %u\n",
 		 pci_name(rdev->lldi.pdev), rdev->lldi.vr->stag.start,
 		 rdev->lldi.vr->stag.size, c4iw_num_stags(rdev),
 		 rdev->lldi.vr->pbl.start,
@@ -811,7 +817,8 @@  static int c4iw_rdev_open(struct c4iw_rdev *rdev)
 		 rdev->lldi.vr->qp.start,
 		 rdev->lldi.vr->qp.size,
 		 rdev->lldi.vr->cq.start,
-		 rdev->lldi.vr->cq.size);
+		 rdev->lldi.vr->cq.size,
+		 rdev->lldi.vr->srq.size);
 	pr_debug("udb %pR db_reg %p gts_reg %p qpmask 0x%x cqmask 0x%x\n",
 		 &rdev->lldi.pdev->resource[2],
 		 rdev->lldi.db_reg, rdev->lldi.gts_reg,
@@ -824,10 +831,12 @@  static int c4iw_rdev_open(struct c4iw_rdev *rdev)
 	rdev->stats.stag.total = rdev->lldi.vr->stag.size;
 	rdev->stats.pbl.total = rdev->lldi.vr->pbl.size;
 	rdev->stats.rqt.total = rdev->lldi.vr->rq.size;
+	rdev->stats.srqt.total = rdev->lldi.vr->srq.size;
 	rdev->stats.ocqp.total = rdev->lldi.vr->ocq.size;
 	rdev->stats.qid.total = rdev->lldi.vr->qp.size;
 
-	err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), T4_MAX_NUM_PD);
+	err = c4iw_init_resource(rdev, c4iw_num_stags(rdev),
+				 T4_MAX_NUM_PD, rdev->lldi.vr->srq.size);
 	if (err) {
 		pr_err("error %d initializing resources\n", err);
 		return err;
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 06fd98ec12f9..b163914993b1 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -1012,7 +1012,8 @@  void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qpid,
 		   struct c4iw_dev_ucontext *uctx);
 u32 c4iw_get_resource(struct c4iw_id_table *id_table);
 void c4iw_put_resource(struct c4iw_id_table *id_table, u32 entry);
-int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid);
+int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt,
+		       u32 nr_pdid, u32 nr_srqt);
 int c4iw_init_ctrl_qp(struct c4iw_rdev *rdev);
 int c4iw_pblpool_create(struct c4iw_rdev *rdev);
 int c4iw_rqtpool_create(struct c4iw_rdev *rdev);
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 1feade8bb4b3..966059cc4c1b 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -343,6 +343,7 @@  static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *pro
 	props->max_mr_size = T4_MAX_MR_SIZE;
 	props->max_qp = dev->rdev.lldi.vr->qp.size / 2;
 	props->max_qp_wr = dev->rdev.hw_queue.t4_max_qp_depth;
+	props->max_srq_wr = dev->rdev.hw_queue.t4_max_qp_depth;
 	props->max_sge = T4_MAX_RECV_SGE;
 	props->max_sge_rd = 1;
 	props->max_res_rd_atom = dev->rdev.lldi.max_ird_adapter;
@@ -592,7 +593,10 @@  void c4iw_register_device(struct work_struct *work)
 	    (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
 	    (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
 	    (1ull << IB_USER_VERBS_CMD_POST_SEND) |
-	    (1ull << IB_USER_VERBS_CMD_POST_RECV);
+	    (1ull << IB_USER_VERBS_CMD_POST_RECV) |
+	    (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
+	    (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
+	    (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
 	dev->ibdev.node_type = RDMA_NODE_RNIC;
 	BUILD_BUG_ON(sizeof(C4IW_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX);
 	memcpy(dev->ibdev.node_desc, C4IW_NODE_DESC, sizeof(C4IW_NODE_DESC));
@@ -614,6 +618,9 @@  void c4iw_register_device(struct work_struct *work)
 	dev->ibdev.modify_qp = c4iw_ib_modify_qp;
 	dev->ibdev.query_qp = c4iw_ib_query_qp;
 	dev->ibdev.destroy_qp = c4iw_destroy_qp;
+	dev->ibdev.create_srq = c4iw_create_srq;
+	dev->ibdev.modify_srq = c4iw_modify_srq;
+	dev->ibdev.destroy_srq = c4iw_destroy_srq;
 	dev->ibdev.create_cq = c4iw_create_cq;
 	dev->ibdev.destroy_cq = c4iw_destroy_cq;
 	dev->ibdev.resize_cq = c4iw_resize_cq;
@@ -631,6 +638,7 @@  void c4iw_register_device(struct work_struct *work)
 	dev->ibdev.req_notify_cq = c4iw_arm_cq;
 	dev->ibdev.post_send = c4iw_post_send;
 	dev->ibdev.post_recv = c4iw_post_receive;
+	dev->ibdev.post_srq_recv = c4iw_post_srq_recv;
 	dev->ibdev.alloc_hw_stats = c4iw_alloc_stats;
 	dev->ibdev.get_hw_stats = c4iw_get_mib;
 	dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION;
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 4106eed1b8fb..f25011943efc 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -147,21 +147,24 @@  static int alloc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq, int user)
 }
 
 static int destroy_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
-		      struct c4iw_dev_ucontext *uctx)
+		      struct c4iw_dev_ucontext *uctx, int has_rq)
 {
 	/*
 	 * uP clears EQ contexts when the connection exits rdma mode,
 	 * so no need to post a RESET WR for these EQs.
 	 */
-	dma_free_coherent(&(rdev->lldi.pdev->dev),
-			  wq->rq.memsize, wq->rq.queue,
-			  dma_unmap_addr(&wq->rq, mapping));
 	dealloc_sq(rdev, &wq->sq);
-	c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size);
-	kfree(wq->rq.sw_rq);
 	kfree(wq->sq.sw_sq);
-	c4iw_put_qpid(rdev, wq->rq.qid, uctx);
 	c4iw_put_qpid(rdev, wq->sq.qid, uctx);
+
+	if (has_rq) {
+		dma_free_coherent(&rdev->lldi.pdev->dev,
+				  wq->rq.memsize, wq->rq.queue,
+				  dma_unmap_addr(&wq->rq, mapping));
+		c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size);
+		kfree(wq->rq.sw_rq);
+		c4iw_put_qpid(rdev, wq->rq.qid, uctx);
+	}
 	return 0;
 }
 
@@ -195,7 +198,8 @@  void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid,
 static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
 		     struct t4_cq *rcq, struct t4_cq *scq,
 		     struct c4iw_dev_ucontext *uctx,
-		     struct c4iw_wr_wait *wr_waitp)
+		     struct c4iw_wr_wait *wr_waitp,
+		     int need_rq)
 {
 	int user = (uctx != &rdev->uctx);
 	struct fw_ri_res_wr *res_wr;
@@ -209,36 +213,45 @@  static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
 	if (!wq->sq.qid)
 		return -ENOMEM;
 
-	wq->rq.qid = c4iw_get_qpid(rdev, uctx);
-	if (!wq->rq.qid) {
-		ret = -ENOMEM;
-		goto free_sq_qid;
+	if (need_rq) {
+		wq->rq.qid = c4iw_get_qpid(rdev, uctx);
+		if (!wq->rq.qid) {
+			ret = -ENOMEM;
+			goto free_sq_qid;
+		}
 	}
 
 	if (!user) {
-		wq->sq.sw_sq = kzalloc(wq->sq.size * sizeof *wq->sq.sw_sq,
-				 GFP_KERNEL);
+		wq->sq.sw_sq = kcalloc(wq->sq.size,
+				       sizeof(*wq->sq.sw_sq),
+				       GFP_KERNEL);
 		if (!wq->sq.sw_sq) {
 			ret = -ENOMEM;
 			goto free_rq_qid;
 		}
 
-		wq->rq.sw_rq = kzalloc(wq->rq.size * sizeof *wq->rq.sw_rq,
-				 GFP_KERNEL);
-		if (!wq->rq.sw_rq) {
-			ret = -ENOMEM;
-			goto free_sw_sq;
+		if (need_rq) {
+			wq->rq.sw_rq = kcalloc(wq->rq.size,
+					       sizeof(*wq->rq.sw_rq),
+					       GFP_KERNEL);
+			if (!wq->rq.sw_rq) {
+				ret = -ENOMEM;
+				goto free_sw_sq;
+			}
 		}
 	}
 
-	/*
-	 * RQT must be a power of 2 and at least 16 deep.
-	 */
-	wq->rq.rqt_size = roundup_pow_of_two(max_t(u16, wq->rq.size, 16));
-	wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size);
-	if (!wq->rq.rqt_hwaddr) {
-		ret = -ENOMEM;
-		goto free_sw_rq;
+	if (need_rq) {
+		/*
+		 * RQT must be a power of 2 and at least 16 deep.
+		 */
+		wq->rq.rqt_size = roundup_pow_of_two(max_t(u16,
+							   wq->rq.size, 16));
+		wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size);
+		if (!wq->rq.rqt_hwaddr) {
+			ret = -ENOMEM;
+			goto free_sw_rq;
+		}
 	}
 
 	ret = alloc_sq(rdev, &wq->sq, user);
@@ -247,34 +260,39 @@  static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
 	memset(wq->sq.queue, 0, wq->sq.memsize);
 	dma_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr);
 
-	wq->rq.queue = dma_alloc_coherent(&(rdev->lldi.pdev->dev),
-					  wq->rq.memsize, &(wq->rq.dma_addr),
-					  GFP_KERNEL);
-	if (!wq->rq.queue) {
-		ret = -ENOMEM;
-		goto free_sq;
+	if (need_rq) {
+		wq->rq.queue = dma_alloc_coherent(&rdev->lldi.pdev->dev,
+						  wq->rq.memsize,
+						  &wq->rq.dma_addr,
+						  GFP_KERNEL);
+		if (!wq->rq.queue) {
+			ret = -ENOMEM;
+			goto free_sq;
+		}
+		pr_debug("sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n",
+			 wq->sq.queue,
+			 (unsigned long long)virt_to_phys(wq->sq.queue),
+			 wq->rq.queue,
+			 (unsigned long long)virt_to_phys(wq->rq.queue));
+		memset(wq->rq.queue, 0, wq->rq.memsize);
+		dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr);
 	}
-	pr_debug("sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n",
-		 wq->sq.queue,
-		 (unsigned long long)virt_to_phys(wq->sq.queue),
-		 wq->rq.queue,
-		 (unsigned long long)virt_to_phys(wq->rq.queue));
-	memset(wq->rq.queue, 0, wq->rq.memsize);
-	dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr);
 
 	wq->db = rdev->lldi.db_reg;
 
 	wq->sq.bar2_va = c4iw_bar2_addrs(rdev, wq->sq.qid, T4_BAR2_QTYPE_EGRESS,
 					 &wq->sq.bar2_qid,
 					 user ? &wq->sq.bar2_pa : NULL);
-	wq->rq.bar2_va = c4iw_bar2_addrs(rdev, wq->rq.qid, T4_BAR2_QTYPE_EGRESS,
-					 &wq->rq.bar2_qid,
-					 user ? &wq->rq.bar2_pa : NULL);
+	if (need_rq)
+		wq->rq.bar2_va = c4iw_bar2_addrs(rdev, wq->rq.qid,
+						 T4_BAR2_QTYPE_EGRESS,
+						 &wq->rq.bar2_qid,
+						 user ? &wq->rq.bar2_pa : NULL);
 
 	/*
 	 * User mode must have bar2 access.
 	 */
-	if (user && (!wq->sq.bar2_pa || !wq->rq.bar2_pa)) {
+	if (user && (!wq->sq.bar2_pa || (need_rq && !wq->rq.bar2_pa))) {
 		pr_warn("%s: sqid %u or rqid %u not in BAR2 range\n",
 			pci_name(rdev->lldi.pdev), wq->sq.qid, wq->rq.qid);
 		goto free_dma;
@@ -285,7 +303,8 @@  static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
 
 	/* build fw_ri_res_wr */
 	wr_len = sizeof *res_wr + 2 * sizeof *res;
-
+	if (need_rq)
+		wr_len += sizeof(*res);
 	skb = alloc_skb(wr_len, GFP_KERNEL);
 	if (!skb) {
 		ret = -ENOMEM;
@@ -296,7 +315,7 @@  static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
 	res_wr = __skb_put_zero(skb, wr_len);
 	res_wr->op_nres = cpu_to_be32(
 			FW_WR_OP_V(FW_RI_RES_WR) |
-			FW_RI_RES_WR_NRES_V(2) |
+			FW_RI_RES_WR_NRES_V(need_rq ? 2 : 1) |
 			FW_WR_COMPL_F);
 	res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
 	res_wr->cookie = (uintptr_t)wr_waitp;
@@ -327,30 +346,36 @@  static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
 		FW_RI_RES_WR_EQSIZE_V(eqsize));
 	res->u.sqrq.eqid = cpu_to_be32(wq->sq.qid);
 	res->u.sqrq.eqaddr = cpu_to_be64(wq->sq.dma_addr);
-	res++;
-	res->u.sqrq.restype = FW_RI_RES_TYPE_RQ;
-	res->u.sqrq.op = FW_RI_RES_OP_WRITE;
 
-	/*
-	 * eqsize is the number of 64B entries plus the status page size.
-	 */
-	eqsize = wq->rq.size * T4_RQ_NUM_SLOTS +
-		rdev->hw_queue.t4_eq_status_entries;
-	res->u.sqrq.fetchszm_to_iqid = cpu_to_be32(
-		FW_RI_RES_WR_HOSTFCMODE_V(0) |	/* no host cidx updates */
-		FW_RI_RES_WR_CPRIO_V(0) |	/* don't keep in chip cache */
-		FW_RI_RES_WR_PCIECHN_V(0) |	/* set by uP at ri_init time */
-		FW_RI_RES_WR_IQID_V(rcq->cqid));
-	res->u.sqrq.dcaen_to_eqsize = cpu_to_be32(
-		FW_RI_RES_WR_DCAEN_V(0) |
-		FW_RI_RES_WR_DCACPU_V(0) |
-		FW_RI_RES_WR_FBMIN_V(2) |
-		FW_RI_RES_WR_FBMAX_V(3) |
-		FW_RI_RES_WR_CIDXFTHRESHO_V(0) |
-		FW_RI_RES_WR_CIDXFTHRESH_V(0) |
-		FW_RI_RES_WR_EQSIZE_V(eqsize));
-	res->u.sqrq.eqid = cpu_to_be32(wq->rq.qid);
-	res->u.sqrq.eqaddr = cpu_to_be64(wq->rq.dma_addr);
+	if (need_rq) {
+		res++;
+		res->u.sqrq.restype = FW_RI_RES_TYPE_RQ;
+		res->u.sqrq.op = FW_RI_RES_OP_WRITE;
+
+		/*
+		 * eqsize is the number of 64B entries plus the status page size
+		 */
+		eqsize = wq->rq.size * T4_RQ_NUM_SLOTS +
+			rdev->hw_queue.t4_eq_status_entries;
+		res->u.sqrq.fetchszm_to_iqid =
+			/* no host cidx updates */
+			cpu_to_be32(FW_RI_RES_WR_HOSTFCMODE_V(0) |
+			/* don't keep in chip cache */
+			FW_RI_RES_WR_CPRIO_V(0) |
+			/* set by uP at ri_init time */
+			FW_RI_RES_WR_PCIECHN_V(0) |
+			FW_RI_RES_WR_IQID_V(rcq->cqid));
+		res->u.sqrq.dcaen_to_eqsize =
+			cpu_to_be32(FW_RI_RES_WR_DCAEN_V(0) |
+			FW_RI_RES_WR_DCACPU_V(0) |
+			FW_RI_RES_WR_FBMIN_V(2) |
+			FW_RI_RES_WR_FBMAX_V(3) |
+			FW_RI_RES_WR_CIDXFTHRESHO_V(0) |
+			FW_RI_RES_WR_CIDXFTHRESH_V(0) |
+			FW_RI_RES_WR_EQSIZE_V(eqsize));
+		res->u.sqrq.eqid = cpu_to_be32(wq->rq.qid);
+		res->u.sqrq.eqaddr = cpu_to_be64(wq->rq.dma_addr);
+	}
 
 	c4iw_init_wr_wait(wr_waitp);
 	ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, wq->sq.qid, __func__);
@@ -363,19 +388,23 @@  static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
 
 	return 0;
 free_dma:
-	dma_free_coherent(&(rdev->lldi.pdev->dev),
-			  wq->rq.memsize, wq->rq.queue,
-			  dma_unmap_addr(&wq->rq, mapping));
+	if (need_rq)
+		dma_free_coherent(&rdev->lldi.pdev->dev,
+				  wq->rq.memsize, wq->rq.queue,
+				  dma_unmap_addr(&wq->rq, mapping));
 free_sq:
 	dealloc_sq(rdev, &wq->sq);
 free_hwaddr:
-	c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size);
+	if (need_rq)
+		c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size);
 free_sw_rq:
-	kfree(wq->rq.sw_rq);
+	if (need_rq)
+		kfree(wq->rq.sw_rq);
 free_sw_sq:
 	kfree(wq->sq.sw_sq);
 free_rq_qid:
-	c4iw_put_qpid(rdev, wq->rq.qid, uctx);
+	if (need_rq)
+		c4iw_put_qpid(rdev, wq->rq.qid, uctx);
 free_sq_qid:
 	c4iw_put_qpid(rdev, wq->sq.qid, uctx);
 	return ret;
@@ -605,6 +634,20 @@  static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe,
 	return 0;
 }
 
+static int build_srq_recv(union t4_recv_wr *wqe, struct ib_recv_wr *wr,
+			  u8 *len16)
+{
+	int ret;
+
+	ret = build_isgl((__be64 *)wqe, (__be64 *)(wqe + 1),
+			 &wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL);
+	if (ret)
+		return ret;
+	*len16 = DIV_ROUND_UP(sizeof(wqe->recv) +
+			      wr->num_sge * sizeof(struct fw_ri_sge), 16);
+	return 0;
+}
+
 static void build_tpte_memreg(struct fw_ri_fr_nsmr_tpte_wr *fr,
 			      struct ib_reg_wr *wr, struct c4iw_mr *mhp,
 			      u8 *len16)
@@ -721,7 +764,7 @@  static void free_qp_work(struct work_struct *work)
 
 	pr_debug("qhp %p ucontext %p\n", qhp, ucontext);
 	destroy_qp(&rhp->rdev, &qhp->wq,
-		   ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
+		   ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !qhp->srq);
 
 	if (ucontext)
 		c4iw_put_ucontext(ucontext);
@@ -1145,6 +1188,89 @@  int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 	return err;
 }
 
+static void defer_srq_wr(struct t4_srq *srq, union t4_recv_wr *wqe,
+			 u64 wr_id, u8 len16)
+{
+	struct t4_srq_pending_wr *pwr = &srq->pending_wrs[srq->pending_pidx];
+
+	pr_debug("%s cidx %u pidx %u wq_pidx %u in_use %u ooo_count %u wr_id 0x%llx pending_cidx %u pending_pidx %u pending_in_use %u\n",
+		 __func__, srq->cidx, srq->pidx, srq->wq_pidx,
+		 srq->in_use, srq->ooo_count,
+		 (unsigned long long)wr_id, srq->pending_cidx,
+		 srq->pending_pidx, srq->pending_in_use);
+	pwr->wr_id = wr_id;
+	pwr->len16 = len16;
+	memcpy(&pwr->wqe, wqe, len16 * 16);
+	t4_srq_produce_pending_wr(srq);
+}
+
+int c4iw_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
+		       struct ib_recv_wr **bad_wr)
+{
+	union t4_recv_wr *wqe, lwqe;
+	struct c4iw_srq *srq;
+	unsigned long flag;
+	u8 len16 = 0;
+	u16 idx = 0;
+	int err = 0;
+	u32 num_wrs;
+
+	srq = to_c4iw_srq(ibsrq);
+	spin_lock_irqsave(&srq->lock, flag);
+	num_wrs = t4_srq_avail(&srq->wq);
+	if (num_wrs == 0) {
+		spin_unlock_irqrestore(&srq->lock, flag);
+		return -ENOMEM;
+	}
+	while (wr) {
+		if (wr->num_sge > T4_MAX_RECV_SGE) {
+			err = -EINVAL;
+			*bad_wr = wr;
+			break;
+		}
+		wqe = &lwqe;
+		if (num_wrs)
+			err = build_srq_recv(wqe, wr, &len16);
+		else
+			err = -ENOMEM;
+		if (err) {
+			*bad_wr = wr;
+			break;
+		}
+
+		wqe->recv.opcode = FW_RI_RECV_WR;
+		wqe->recv.r1 = 0;
+		wqe->recv.wrid = srq->wq.pidx;
+		wqe->recv.r2[0] = 0;
+		wqe->recv.r2[1] = 0;
+		wqe->recv.r2[2] = 0;
+		wqe->recv.len16 = len16;
+
+		if (srq->wq.ooo_count ||
+		    srq->wq.pending_in_use ||
+		    srq->wq.sw_rq[srq->wq.pidx].valid) {
+			defer_srq_wr(&srq->wq, wqe, wr->wr_id, len16);
+		} else {
+			srq->wq.sw_rq[srq->wq.pidx].wr_id = wr->wr_id;
+			srq->wq.sw_rq[srq->wq.pidx].valid = 1;
+			c4iw_copy_wr_to_srq(&srq->wq, wqe, len16);
+			pr_debug("%s cidx %u pidx %u wq_pidx %u in_use %u wr_id 0x%llx\n",
+				 __func__, srq->wq.cidx,
+				 srq->wq.pidx, srq->wq.wq_pidx,
+				 srq->wq.in_use,
+				 (unsigned long long)wr->wr_id);
+			t4_srq_produce(&srq->wq, len16);
+			idx += DIV_ROUND_UP(len16 * 16, T4_EQ_ENTRY_SIZE);
+		}
+		wr = wr->next;
+		num_wrs--;
+	}
+	if (idx)
+		t4_ring_srq_db(&srq->wq, idx, len16, wqe);
+	spin_unlock_irqrestore(&srq->lock, flag);
+	return err;
+}
+
 static inline void build_term_codes(struct t4_cqe *err_cqe, u8 *layer_type,
 				    u8 *ecode)
 {
@@ -1321,7 +1447,7 @@  static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
 		       struct c4iw_cq *schp)
 {
 	int count;
-	int rq_flushed, sq_flushed;
+	int rq_flushed = 0, sq_flushed;
 	unsigned long flag;
 
 	pr_debug("qhp %p rchp %p schp %p\n", qhp, rchp, schp);
@@ -1340,11 +1466,13 @@  static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
 		return;
 	}
 	qhp->wq.flushed = 1;
-	t4_set_wq_in_error(&qhp->wq);
+	t4_set_wq_in_error(&qhp->wq, 0);
 
 	c4iw_flush_hw_cq(rchp, qhp);
-	c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count);
-	rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count);
+	if (!qhp->srq) {
+		c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count);
+		rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count);
+	}
 
 	if (schp != rchp)
 		c4iw_flush_hw_cq(schp, qhp);
@@ -1388,7 +1516,7 @@  static void flush_qp(struct c4iw_qp *qhp)
 	schp = to_c4iw_cq(qhp->ibqp.send_cq);
 
 	if (qhp->ibqp.uobject) {
-		t4_set_wq_in_error(&qhp->wq);
+		t4_set_wq_in_error(&qhp->wq, 0);
 		t4_set_cq_in_error(&rchp->cq);
 		spin_lock_irqsave(&rchp->comp_handler_lock, flag);
 		(*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
@@ -1517,16 +1645,21 @@  static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp)
 	wqe->u.init.pdid = cpu_to_be32(qhp->attr.pd);
 	wqe->u.init.qpid = cpu_to_be32(qhp->wq.sq.qid);
 	wqe->u.init.sq_eqid = cpu_to_be32(qhp->wq.sq.qid);
-	wqe->u.init.rq_eqid = cpu_to_be32(qhp->wq.rq.qid);
+	if (qhp->srq) {
+		wqe->u.init.rq_eqid = cpu_to_be32(FW_RI_INIT_RQEQID_SRQ |
+						  qhp->srq->idx);
+	} else {
+		wqe->u.init.rq_eqid = cpu_to_be32(qhp->wq.rq.qid);
+		wqe->u.init.hwrqsize = cpu_to_be32(qhp->wq.rq.rqt_size);
+		wqe->u.init.hwrqaddr = cpu_to_be32(qhp->wq.rq.rqt_hwaddr -
+						   rhp->rdev.lldi.vr->rq.start);
+	}
 	wqe->u.init.scqid = cpu_to_be32(qhp->attr.scq);
 	wqe->u.init.rcqid = cpu_to_be32(qhp->attr.rcq);
 	wqe->u.init.ord_max = cpu_to_be32(qhp->attr.max_ord);
 	wqe->u.init.ird_max = cpu_to_be32(qhp->attr.max_ird);
 	wqe->u.init.iss = cpu_to_be32(qhp->ep->snd_seq);
 	wqe->u.init.irs = cpu_to_be32(qhp->ep->rcv_seq);
-	wqe->u.init.hwrqsize = cpu_to_be32(qhp->wq.rq.rqt_size);
-	wqe->u.init.hwrqaddr = cpu_to_be32(qhp->wq.rq.rqt_hwaddr -
-					 rhp->rdev.lldi.vr->rq.start);
 	if (qhp->attr.mpa_attr.initiator)
 		build_rtr_msg(qhp->attr.mpa_attr.p2p_type, &wqe->u.init);
 
@@ -1643,7 +1776,7 @@  int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
 	case C4IW_QP_STATE_RTS:
 		switch (attrs->next_state) {
 		case C4IW_QP_STATE_CLOSING:
-			t4_set_wq_in_error(&qhp->wq);
+			t4_set_wq_in_error(&qhp->wq, 0);
 			set_state(qhp, C4IW_QP_STATE_CLOSING);
 			ep = qhp->ep;
 			if (!internal) {
@@ -1656,7 +1789,7 @@  int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
 				goto err;
 			break;
 		case C4IW_QP_STATE_TERMINATE:
-			t4_set_wq_in_error(&qhp->wq);
+			t4_set_wq_in_error(&qhp->wq, 0);
 			set_state(qhp, C4IW_QP_STATE_TERMINATE);
 			qhp->attr.layer_etype = attrs->layer_etype;
 			qhp->attr.ecode = attrs->ecode;
@@ -1673,7 +1806,7 @@  int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
 			}
 			break;
 		case C4IW_QP_STATE_ERROR:
-			t4_set_wq_in_error(&qhp->wq);
+			t4_set_wq_in_error(&qhp->wq, 0);
 			set_state(qhp, C4IW_QP_STATE_ERROR);
 			if (!internal) {
 				abort = 1;
@@ -1819,7 +1952,7 @@  struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
 	struct c4iw_cq *schp;
 	struct c4iw_cq *rchp;
 	struct c4iw_create_qp_resp uresp;
-	unsigned int sqsize, rqsize;
+	unsigned int sqsize, rqsize = 0;
 	struct c4iw_ucontext *ucontext;
 	int ret;
 	struct c4iw_mm_entry *sq_key_mm, *rq_key_mm = NULL, *sq_db_key_mm;
@@ -1840,11 +1973,13 @@  struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
 	if (attrs->cap.max_inline_data > T4_MAX_SEND_INLINE)
 		return ERR_PTR(-EINVAL);
 
-	if (attrs->cap.max_recv_wr > rhp->rdev.hw_queue.t4_max_rq_size)
-		return ERR_PTR(-E2BIG);
-	rqsize = attrs->cap.max_recv_wr + 1;
-	if (rqsize < 8)
-		rqsize = 8;
+	if (!attrs->srq) {
+		if (attrs->cap.max_recv_wr > rhp->rdev.hw_queue.t4_max_rq_size)
+			return ERR_PTR(-E2BIG);
+		rqsize = attrs->cap.max_recv_wr + 1;
+		if (rqsize < 8)
+			rqsize = 8;
+	}
 
 	if (attrs->cap.max_send_wr > rhp->rdev.hw_queue.t4_max_sq_size)
 		return ERR_PTR(-E2BIG);
@@ -1869,19 +2004,23 @@  struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
 		(sqsize + rhp->rdev.hw_queue.t4_eq_status_entries) *
 		sizeof(*qhp->wq.sq.queue) + 16 * sizeof(__be64);
 	qhp->wq.sq.flush_cidx = -1;
-	qhp->wq.rq.size = rqsize;
-	qhp->wq.rq.memsize =
-		(rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) *
-		sizeof(*qhp->wq.rq.queue);
+	if (!attrs->srq) {
+		qhp->wq.rq.size = rqsize;
+		qhp->wq.rq.memsize =
+			(rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) *
+			sizeof(*qhp->wq.rq.queue);
+	}
 
 	if (ucontext) {
 		qhp->wq.sq.memsize = roundup(qhp->wq.sq.memsize, PAGE_SIZE);
-		qhp->wq.rq.memsize = roundup(qhp->wq.rq.memsize, PAGE_SIZE);
+		if (!attrs->srq)
+			qhp->wq.rq.memsize =
+				roundup(qhp->wq.rq.memsize, PAGE_SIZE);
 	}
 
 	ret = create_qp(&rhp->rdev, &qhp->wq, &schp->cq, &rchp->cq,
 			ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
-			qhp->wr_waitp);
+			qhp->wr_waitp, !attrs->srq);
 	if (ret)
 		goto err_free_wr_wait;
 
@@ -1894,10 +2033,12 @@  struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
 	qhp->attr.scq = ((struct c4iw_cq *) attrs->send_cq)->cq.cqid;
 	qhp->attr.rcq = ((struct c4iw_cq *) attrs->recv_cq)->cq.cqid;
 	qhp->attr.sq_num_entries = attrs->cap.max_send_wr;
-	qhp->attr.rq_num_entries = attrs->cap.max_recv_wr;
 	qhp->attr.sq_max_sges = attrs->cap.max_send_sge;
 	qhp->attr.sq_max_sges_rdma_write = attrs->cap.max_send_sge;
-	qhp->attr.rq_max_sges = attrs->cap.max_recv_sge;
+	if (!attrs->srq) {
+		qhp->attr.rq_num_entries = attrs->cap.max_recv_wr;
+		qhp->attr.rq_max_sges = attrs->cap.max_recv_sge;
+	}
 	qhp->attr.state = C4IW_QP_STATE_IDLE;
 	qhp->attr.next_state = C4IW_QP_STATE_IDLE;
 	qhp->attr.enable_rdma_read = 1;
@@ -1922,20 +2063,25 @@  struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
 			ret = -ENOMEM;
 			goto err_remove_handle;
 		}
-		rq_key_mm = kmalloc(sizeof(*rq_key_mm), GFP_KERNEL);
-		if (!rq_key_mm) {
-			ret = -ENOMEM;
-			goto err_free_sq_key;
+		if (!attrs->srq) {
+			rq_key_mm = kmalloc(sizeof(*rq_key_mm), GFP_KERNEL);
+			if (!rq_key_mm) {
+				ret = -ENOMEM;
+				goto err_free_sq_key;
+			}
 		}
 		sq_db_key_mm = kmalloc(sizeof(*sq_db_key_mm), GFP_KERNEL);
 		if (!sq_db_key_mm) {
 			ret = -ENOMEM;
 			goto err_free_rq_key;
 		}
-		rq_db_key_mm = kmalloc(sizeof(*rq_db_key_mm), GFP_KERNEL);
-		if (!rq_db_key_mm) {
-			ret = -ENOMEM;
-			goto err_free_sq_db_key;
+		if (!attrs->srq) {
+			rq_db_key_mm =
+				kmalloc(sizeof(*rq_db_key_mm), GFP_KERNEL);
+			if (!rq_db_key_mm) {
+				ret = -ENOMEM;
+				goto err_free_sq_db_key;
+			}
 		}
 		if (t4_sq_onchip(&qhp->wq.sq)) {
 			ma_sync_key_mm = kmalloc(sizeof(*ma_sync_key_mm),
@@ -1951,9 +2097,11 @@  struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
 		uresp.sqid = qhp->wq.sq.qid;
 		uresp.sq_size = qhp->wq.sq.size;
 		uresp.sq_memsize = qhp->wq.sq.memsize;
-		uresp.rqid = qhp->wq.rq.qid;
-		uresp.rq_size = qhp->wq.rq.size;
-		uresp.rq_memsize = qhp->wq.rq.memsize;
+		if (!attrs->srq) {
+			uresp.rqid = qhp->wq.rq.qid;
+			uresp.rq_size = qhp->wq.rq.size;
+			uresp.rq_memsize = qhp->wq.rq.memsize;
+		}
 		spin_lock(&ucontext->mmap_lock);
 		if (ma_sync_key_mm) {
 			uresp.ma_sync_key = ucontext->key;
@@ -1963,12 +2111,16 @@  struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
 		}
 		uresp.sq_key = ucontext->key;
 		ucontext->key += PAGE_SIZE;
-		uresp.rq_key = ucontext->key;
-		ucontext->key += PAGE_SIZE;
+		if (!attrs->srq) {
+			uresp.rq_key = ucontext->key;
+			ucontext->key += PAGE_SIZE;
+		}
 		uresp.sq_db_gts_key = ucontext->key;
 		ucontext->key += PAGE_SIZE;
-		uresp.rq_db_gts_key = ucontext->key;
-		ucontext->key += PAGE_SIZE;
+		if (!attrs->srq) {
+			uresp.rq_db_gts_key = ucontext->key;
+			ucontext->key += PAGE_SIZE;
+		}
 		spin_unlock(&ucontext->mmap_lock);
 		ret = ib_copy_to_udata(udata, &uresp, sizeof uresp);
 		if (ret)
@@ -1977,18 +2129,23 @@  struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
 		sq_key_mm->addr = qhp->wq.sq.phys_addr;
 		sq_key_mm->len = PAGE_ALIGN(qhp->wq.sq.memsize);
 		insert_mmap(ucontext, sq_key_mm);
-		rq_key_mm->key = uresp.rq_key;
-		rq_key_mm->addr = virt_to_phys(qhp->wq.rq.queue);
-		rq_key_mm->len = PAGE_ALIGN(qhp->wq.rq.memsize);
-		insert_mmap(ucontext, rq_key_mm);
+		if (!attrs->srq) {
+			rq_key_mm->key = uresp.rq_key;
+			rq_key_mm->addr = virt_to_phys(qhp->wq.rq.queue);
+			rq_key_mm->len = PAGE_ALIGN(qhp->wq.rq.memsize);
+			insert_mmap(ucontext, rq_key_mm);
+		}
 		sq_db_key_mm->key = uresp.sq_db_gts_key;
 		sq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.sq.bar2_pa;
 		sq_db_key_mm->len = PAGE_SIZE;
 		insert_mmap(ucontext, sq_db_key_mm);
-		rq_db_key_mm->key = uresp.rq_db_gts_key;
-		rq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.rq.bar2_pa;
-		rq_db_key_mm->len = PAGE_SIZE;
-		insert_mmap(ucontext, rq_db_key_mm);
+		if (!attrs->srq) {
+			rq_db_key_mm->key = uresp.rq_db_gts_key;
+			rq_db_key_mm->addr =
+				(u64)(unsigned long)qhp->wq.rq.bar2_pa;
+			rq_db_key_mm->len = PAGE_SIZE;
+			insert_mmap(ucontext, rq_db_key_mm);
+		}
 		if (ma_sync_key_mm) {
 			ma_sync_key_mm->key = uresp.ma_sync_key;
 			ma_sync_key_mm->addr =
@@ -2001,7 +2158,19 @@  struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
 		c4iw_get_ucontext(ucontext);
 		qhp->ucontext = ucontext;
 	}
+	if (!attrs->srq) {
+		qhp->wq.qp_errp =
+			&qhp->wq.rq.queue[qhp->wq.rq.size].status.qp_err;
+	} else {
+		qhp->wq.qp_errp =
+			&qhp->wq.sq.queue[qhp->wq.sq.size].status.qp_err;
+		qhp->wq.srqidxp =
+			&qhp->wq.sq.queue[qhp->wq.sq.size].status.srqidx;
+	}
+
 	qhp->ibqp.qp_num = qhp->wq.sq.qid;
+	if (attrs->srq)
+		qhp->srq = to_c4iw_srq(attrs->srq);
 	INIT_LIST_HEAD(&qhp->db_fc_entry);
 	pr_debug("sq id %u size %u memsize %zu num_entries %u rq id %u size %u memsize %zu num_entries %u\n",
 		 qhp->wq.sq.qid, qhp->wq.sq.size, qhp->wq.sq.memsize,
@@ -2011,18 +2180,20 @@  struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
 err_free_ma_sync_key:
 	kfree(ma_sync_key_mm);
 err_free_rq_db_key:
-	kfree(rq_db_key_mm);
+	if (!attrs->srq)
+		kfree(rq_db_key_mm);
 err_free_sq_db_key:
 	kfree(sq_db_key_mm);
 err_free_rq_key:
-	kfree(rq_key_mm);
+	if (!attrs->srq)
+		kfree(rq_key_mm);
 err_free_sq_key:
 	kfree(sq_key_mm);
 err_remove_handle:
 	remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid);
 err_destroy_qp:
 	destroy_qp(&rhp->rdev, &qhp->wq,
-		   ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
+		   ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !attrs->srq);
 err_free_wr_wait:
 	c4iw_put_wr_wait(qhp->wr_waitp);
 err_free_qhp:
@@ -2088,6 +2259,45 @@  struct ib_qp *c4iw_get_qp(struct ib_device *dev, int qpn)
 	return (struct ib_qp *)get_qhp(to_c4iw_dev(dev), qpn);
 }
 
+void c4iw_dispatch_srq_limit_reached_event(struct c4iw_srq *srq)
+{
+	struct ib_event event = {0};
+
+	event.device = &srq->rhp->ibdev;
+	event.element.srq = &srq->ibsrq;
+	event.event = IB_EVENT_SRQ_LIMIT_REACHED;
+	ib_dispatch_event(&event);
+}
+
+int c4iw_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *attr,
+		    enum ib_srq_attr_mask srq_attr_mask,
+		    struct ib_udata *udata)
+{
+	struct c4iw_srq *srq = to_c4iw_srq(ib_srq);
+	int ret = 0;
+
+	/*
+	 * XXX 0 mask == a SW interrupt for srq_limit reached...
+	 */
+	if (udata && !srq_attr_mask) {
+		c4iw_dispatch_srq_limit_reached_event(srq);
+		goto out;
+	}
+
+	/* no support for this yet */
+	if (srq_attr_mask & IB_SRQ_MAX_WR) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (!udata && (srq_attr_mask & IB_SRQ_LIMIT)) {
+		srq->armed = true;
+		srq->srq_limit = attr->srq_limit;
+	}
+out:
+	return ret;
+}
+
 int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		     int attr_mask, struct ib_qp_init_attr *init_attr)
 {
@@ -2104,3 +2314,358 @@  int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0;
 	return 0;
 }
+
+static void free_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx,
+			   struct c4iw_wr_wait *wr_waitp)
+{
+	struct c4iw_rdev *rdev = &srq->rhp->rdev;
+	struct sk_buff *skb = srq->destroy_skb;
+	struct t4_srq *wq = &srq->wq;
+	struct fw_ri_res_wr *res_wr;
+	struct fw_ri_res *res;
+	int wr_len;
+
+	wr_len = sizeof(*res_wr) + sizeof(*res);
+	set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
+
+	res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len);
+	memset(res_wr, 0, wr_len);
+	res_wr->op_nres = cpu_to_be32(FW_WR_OP_V(FW_RI_RES_WR) |
+			FW_RI_RES_WR_NRES_V(1) |
+			FW_WR_COMPL_F);
+	res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
+	res_wr->cookie = (uintptr_t)wr_waitp;
+	res = res_wr->res;
+	res->u.srq.restype = FW_RI_RES_TYPE_SRQ;
+	res->u.srq.op = FW_RI_RES_OP_RESET;
+	res->u.srq.srqid = cpu_to_be32(srq->idx);
+	res->u.srq.eqid = cpu_to_be32(wq->qid);
+
+	c4iw_init_wr_wait(wr_waitp);
+	c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, __func__);
+
+	dma_free_coherent(&rdev->lldi.pdev->dev,
+			  wq->memsize, wq->queue,
+			pci_unmap_addr(wq, mapping));
+	c4iw_rqtpool_free(rdev, wq->rqt_hwaddr, wq->rqt_size);
+	kfree(wq->sw_rq);
+	c4iw_put_qpid(rdev, wq->qid, uctx);
+}
+
+static int alloc_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx,
+			   struct c4iw_wr_wait *wr_waitp)
+{
+	struct c4iw_rdev *rdev = &srq->rhp->rdev;
+	int user = (uctx != &rdev->uctx);
+	struct t4_srq *wq = &srq->wq;
+	struct fw_ri_res_wr *res_wr;
+	struct fw_ri_res *res;
+	struct sk_buff *skb;
+	int wr_len;
+	int eqsize;
+	int ret = -ENOMEM;
+
+	wq->qid = c4iw_get_qpid(rdev, uctx);
+	if (!wq->qid)
+		goto err;
+
+	if (!user) {
+		wq->sw_rq = kcalloc(wq->size, sizeof(*wq->sw_rq),
+				    GFP_KERNEL);
+		if (!wq->sw_rq)
+			goto err_put_qpid;
+		wq->pending_wrs = kcalloc(srq->wq.size,
+					  sizeof(*srq->wq.pending_wrs),
+					  GFP_KERNEL);
+		if (!wq->pending_wrs)
+			goto err_free_sw_rq;
+	}
+
+	wq->rqt_size = wq->size;
+	wq->rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rqt_size);
+	if (!wq->rqt_hwaddr)
+		goto err_free_pending_wrs;
+	wq->rqt_abs_idx = (wq->rqt_hwaddr - rdev->lldi.vr->rq.start) >>
+		T4_RQT_ENTRY_SHIFT;
+
+	wq->queue = dma_alloc_coherent(&rdev->lldi.pdev->dev,
+				       wq->memsize, &wq->dma_addr,
+			GFP_KERNEL);
+	if (!wq->queue)
+		goto err_free_rqtpool;
+
+	memset(wq->queue, 0, wq->memsize);
+	pci_unmap_addr_set(wq, mapping, wq->dma_addr);
+
+	wq->bar2_va = c4iw_bar2_addrs(rdev, wq->qid, T4_BAR2_QTYPE_EGRESS,
+				      &wq->bar2_qid,
+			user ? &wq->bar2_pa : NULL);
+
+	/*
+	 * User mode must have bar2 access.
+	 */
+
+	if (user && !wq->bar2_va) {
+		pr_warn(MOD "%s: srqid %u not in BAR2 range.\n",
+			pci_name(rdev->lldi.pdev), wq->qid);
+		ret = -EINVAL;
+		goto err_free_queue;
+	}
+
+	/* build fw_ri_res_wr */
+	wr_len = sizeof(*res_wr) + sizeof(*res);
+
+	skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL);
+	if (!skb)
+		goto err_free_queue;
+	set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
+
+	res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len);
+	memset(res_wr, 0, wr_len);
+	res_wr->op_nres = cpu_to_be32(FW_WR_OP_V(FW_RI_RES_WR) |
+			FW_RI_RES_WR_NRES_V(1) |
+			FW_WR_COMPL_F);
+	res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
+	res_wr->cookie = (uintptr_t)wr_waitp;
+	res = res_wr->res;
+	res->u.srq.restype = FW_RI_RES_TYPE_SRQ;
+	res->u.srq.op = FW_RI_RES_OP_WRITE;
+
+	/*
+	 * eqsize is the number of 64B entries plus the status page size.
+	 */
+	eqsize = wq->size * T4_RQ_NUM_SLOTS +
+		rdev->hw_queue.t4_eq_status_entries;
+	res->u.srq.eqid = cpu_to_be32(wq->qid);
+	res->u.srq.fetchszm_to_iqid =
+						/* no host cidx updates */
+		cpu_to_be32(FW_RI_RES_WR_HOSTFCMODE_V(0) |
+		FW_RI_RES_WR_CPRIO_V(0) |       /* don't keep in chip cache */
+		FW_RI_RES_WR_PCIECHN_V(0) |     /* set by uP at ri_init time */
+		FW_RI_RES_WR_FETCHRO_V(0));     /* relaxed_ordering */
+	res->u.srq.dcaen_to_eqsize =
+		cpu_to_be32(FW_RI_RES_WR_DCAEN_V(0) |
+		FW_RI_RES_WR_DCACPU_V(0) |
+		FW_RI_RES_WR_FBMIN_V(2) |
+		FW_RI_RES_WR_FBMAX_V(3) |
+		FW_RI_RES_WR_CIDXFTHRESHO_V(0) |
+		FW_RI_RES_WR_CIDXFTHRESH_V(0) |
+		FW_RI_RES_WR_EQSIZE_V(eqsize));
+	res->u.srq.eqaddr = cpu_to_be64(wq->dma_addr);
+	res->u.srq.srqid = cpu_to_be32(srq->idx);
+	res->u.srq.pdid = cpu_to_be32(srq->pdid);
+	res->u.srq.hwsrqsize = cpu_to_be32(wq->rqt_size);
+	res->u.srq.hwsrqaddr = cpu_to_be32(wq->rqt_hwaddr -
+			rdev->lldi.vr->rq.start);
+
+	c4iw_init_wr_wait(wr_waitp);
+
+	ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, wq->qid, __func__);
+	if (ret)
+		goto err_free_queue;
+
+	pr_debug("%s srq %u eqid %u pdid %u queue va %p pa 0x%llx\n"
+			" bar2_addr %p rqt addr 0x%x size %d\n",
+			__func__, srq->idx, wq->qid, srq->pdid, wq->queue,
+			(u64)virt_to_phys(wq->queue), wq->bar2_va,
+			wq->rqt_hwaddr, wq->rqt_size);
+
+	return 0;
+err_free_queue:
+	dma_free_coherent(&rdev->lldi.pdev->dev,
+			  wq->memsize, wq->queue,
+			pci_unmap_addr(wq, mapping));
+err_free_rqtpool:
+	c4iw_rqtpool_free(rdev, wq->rqt_hwaddr, wq->rqt_size);
+err_free_pending_wrs:
+	if (!user)
+		kfree(wq->pending_wrs);
+err_free_sw_rq:
+	if (!user)
+		kfree(wq->sw_rq);
+err_put_qpid:
+	c4iw_put_qpid(rdev, wq->qid, uctx);
+err:
+	return ret;
+}
+
+void c4iw_copy_wr_to_srq(struct t4_srq *srq, union t4_recv_wr *wqe, u8 len16)
+{
+	u64 *src, *dst;
+
+	src = (u64 *)wqe;
+	dst = (u64 *)((u8 *)srq->queue + srq->wq_pidx * T4_EQ_ENTRY_SIZE);
+	while (len16) {
+		*dst++ = *src++;
+		if (dst >= (u64 *)&srq->queue[srq->size])
+			dst = (u64 *)srq->queue;
+		*dst++ = *src++;
+		if (dst >= (u64 *)&srq->queue[srq->size])
+			dst = (u64 *)srq->queue;
+		len16--;
+	}
+}
+
+struct ib_srq *c4iw_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *attrs,
+			       struct ib_udata *udata)
+{
+	struct c4iw_dev *rhp;
+	struct c4iw_srq *srq;
+	struct c4iw_pd *php;
+	struct c4iw_create_srq_resp uresp;
+	struct c4iw_ucontext *ucontext;
+	struct c4iw_mm_entry *srq_key_mm, *srq_db_key_mm;
+	int rqsize;
+	int ret;
+	int wr_len;
+
+	pr_debug("%s ib_pd %p\n", __func__, pd);
+
+	php = to_c4iw_pd(pd);
+	rhp = php->rhp;
+
+	if (!rhp->rdev.lldi.vr->srq.size)
+		return ERR_PTR(-EINVAL);
+	if (attrs->attr.max_wr > rhp->rdev.hw_queue.t4_max_rq_size)
+		return ERR_PTR(-E2BIG);
+	if (attrs->attr.max_sge > T4_MAX_RECV_SGE)
+		return ERR_PTR(-E2BIG);
+
+	/*
+	 * SRQ RQT and RQ must be a power of 2 and at least 16 deep.
+	 */
+	rqsize = attrs->attr.max_wr + 1;
+	rqsize = roundup_pow_of_two(max_t(u16, rqsize, 16));
+
+	ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL;
+
+	srq = kzalloc(sizeof(*srq), GFP_KERNEL);
+	if (!srq)
+		return ERR_PTR(-ENOMEM);
+
+	srq->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL);
+	if (!srq->wr_waitp) {
+		ret = -ENOMEM;
+		goto err_free_srq;
+	}
+
+	srq->idx = c4iw_alloc_srq_idx(&rhp->rdev);
+	if (srq->idx < 0) {
+		ret = -ENOMEM;
+		goto err_free_wr_wait;
+	}
+
+	wr_len = sizeof(struct fw_ri_res_wr) + sizeof(struct fw_ri_res);
+	srq->destroy_skb = alloc_skb(wr_len, GFP_KERNEL);
+	if (!srq->destroy_skb) {
+		ret = -ENOMEM;
+		goto err_free_srq_idx;
+	}
+
+	srq->rhp = rhp;
+	srq->pdid = php->pdid;
+
+	srq->wq.size = rqsize;
+	srq->wq.memsize =
+		(rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) *
+		sizeof(*srq->wq.queue);
+	if (ucontext)
+		srq->wq.memsize = roundup(srq->wq.memsize, PAGE_SIZE);
+
+	ret = alloc_srq_queue(srq, ucontext ? &ucontext->uctx :
+			&rhp->rdev.uctx, srq->wr_waitp);
+	if (ret)
+		goto err_free_skb;
+	attrs->attr.max_wr = rqsize - 1;
+
+	if (CHELSIO_CHIP_VERSION(rhp->rdev.lldi.adapter_type) > CHELSIO_T6)
+		srq->flags = T4_SRQ_LIMIT_SUPPORT;
+
+	ret = insert_handle(rhp, &rhp->qpidr, srq, srq->wq.qid);
+	if (ret)
+		goto err_free_queue;
+
+	if (udata) {
+		srq_key_mm = kmalloc(sizeof(*srq_key_mm), GFP_KERNEL);
+		if (!srq_key_mm) {
+			ret = -ENOMEM;
+			goto err_remove_handle;
+		}
+		srq_db_key_mm = kmalloc(sizeof(*srq_db_key_mm), GFP_KERNEL);
+		if (!srq_db_key_mm) {
+			ret = -ENOMEM;
+			goto err_free_srq_key_mm;
+		}
+		uresp.flags = srq->flags;
+		uresp.qid_mask = rhp->rdev.qpmask;
+		uresp.srqid = srq->wq.qid;
+		uresp.srq_size = srq->wq.size;
+		uresp.srq_memsize = srq->wq.memsize;
+		uresp.rqt_abs_idx = srq->wq.rqt_abs_idx;
+		spin_lock(&ucontext->mmap_lock);
+		uresp.srq_key = ucontext->key;
+		ucontext->key += PAGE_SIZE;
+		uresp.srq_db_gts_key = ucontext->key;
+		ucontext->key += PAGE_SIZE;
+		spin_unlock(&ucontext->mmap_lock);
+		ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+		if (ret)
+			goto err_free_srq_db_key_mm;
+		srq_key_mm->key = uresp.srq_key;
+		srq_key_mm->addr = virt_to_phys(srq->wq.queue);
+		srq_key_mm->len = PAGE_ALIGN(srq->wq.memsize);
+		insert_mmap(ucontext, srq_key_mm);
+		srq_db_key_mm->key = uresp.srq_db_gts_key;
+		srq_db_key_mm->addr = (u64)(unsigned long)srq->wq.bar2_pa;
+		srq_db_key_mm->len = PAGE_SIZE;
+		insert_mmap(ucontext, srq_db_key_mm);
+	}
+
+	pr_debug("%s srq qid %u idx %u size %u memsize %lu num_entries %u\n",
+		 __func__, srq->wq.qid, srq->idx, srq->wq.size,
+			(unsigned long)srq->wq.memsize, attrs->attr.max_wr);
+
+	spin_lock_init(&srq->lock);
+	return &srq->ibsrq;
+err_free_srq_db_key_mm:
+	kfree(srq_db_key_mm);
+err_free_srq_key_mm:
+	kfree(srq_key_mm);
+err_remove_handle:
+	remove_handle(rhp, &rhp->qpidr, srq->wq.qid);
+err_free_queue:
+	free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
+		       srq->wr_waitp);
+err_free_skb:
+	if (srq->destroy_skb)
+		kfree_skb(srq->destroy_skb);
+err_free_srq_idx:
+	c4iw_free_srq_idx(&rhp->rdev, srq->idx);
+err_free_wr_wait:
+	c4iw_put_wr_wait(srq->wr_waitp);
+err_free_srq:
+	kfree(srq);
+	return ERR_PTR(ret);
+}
+
+int c4iw_destroy_srq(struct ib_srq *ibsrq)
+{
+	struct c4iw_dev *rhp;
+	struct c4iw_srq *srq;
+	struct c4iw_ucontext *ucontext;
+
+	srq = to_c4iw_srq(ibsrq);
+	rhp = srq->rhp;
+
+	pr_debug("%s id %d\n", __func__, srq->wq.qid);
+
+	remove_handle(rhp, &rhp->qpidr, srq->wq.qid);
+	ucontext = ibsrq->uobject ?
+		to_c4iw_ucontext(ibsrq->uobject->context) : NULL;
+	free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
+		       srq->wr_waitp);
+	c4iw_free_srq_idx(&rhp->rdev, srq->idx);
+	c4iw_put_wr_wait(srq->wr_waitp);
+	kfree(srq);
+	return 0;
+}
diff --git a/drivers/infiniband/hw/cxgb4/resource.c b/drivers/infiniband/hw/cxgb4/resource.c
index 0ef25ae05e6f..57ed26b3cc21 100644
--- a/drivers/infiniband/hw/cxgb4/resource.c
+++ b/drivers/infiniband/hw/cxgb4/resource.c
@@ -53,7 +53,8 @@  static int c4iw_init_qid_table(struct c4iw_rdev *rdev)
 }
 
 /* nr_* must be power of 2 */
-int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid)
+int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt,
+		       u32 nr_pdid, u32 nr_srqt)
 {
 	int err = 0;
 	err = c4iw_id_table_alloc(&rdev->resource.tpt_table, 0, nr_tpt, 1,
@@ -67,7 +68,17 @@  int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid)
 					nr_pdid, 1, 0);
 	if (err)
 		goto pdid_err;
+	if (!nr_srqt)
+		err = c4iw_id_table_alloc(&rdev->resource.srq_table, 0,
+					  1, 1, 0);
+	else
+		err = c4iw_id_table_alloc(&rdev->resource.srq_table, 0,
+					  nr_srqt, 0, 0);
+	if (err)
+		goto srq_err;
 	return 0;
+ srq_err:
+	c4iw_id_table_free(&rdev->resource.pdid_table);
  pdid_err:
 	c4iw_id_table_free(&rdev->resource.qid_table);
  qid_err:
@@ -371,13 +382,21 @@  void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size)
 int c4iw_rqtpool_create(struct c4iw_rdev *rdev)
 {
 	unsigned rqt_start, rqt_chunk, rqt_top;
+	int skip = 0;
 
 	rdev->rqt_pool = gen_pool_create(MIN_RQT_SHIFT, -1);
 	if (!rdev->rqt_pool)
 		return -ENOMEM;
 
-	rqt_start = rdev->lldi.vr->rq.start;
-	rqt_chunk = rdev->lldi.vr->rq.size;
+	/*
+	 * If SRQs are supported, then never use the first RQE from
+	 * the RQT region. This is because HW uses RQT index 0 as NULL.
+	 */
+	if (rdev->lldi.vr->srq.size)
+		skip = T4_RQT_ENTRY_SIZE;
+
+	rqt_start = rdev->lldi.vr->rq.start + skip;
+	rqt_chunk = rdev->lldi.vr->rq.size - skip;
 	rqt_top = rqt_start + rqt_chunk;
 
 	while (rqt_start < rqt_top) {
@@ -405,6 +424,32 @@  void c4iw_rqtpool_destroy(struct c4iw_rdev *rdev)
 	kref_put(&rdev->rqt_kref, destroy_rqtpool);
 }
 
+int c4iw_alloc_srq_idx(struct c4iw_rdev *rdev)
+{
+	int idx;
+
+	idx = c4iw_id_alloc(&rdev->resource.srq_table);
+	mutex_lock(&rdev->stats.lock);
+	if (idx == -1) {
+		rdev->stats.srqt.fail++;
+		mutex_unlock(&rdev->stats.lock);
+		return -ENOMEM;
+	}
+	rdev->stats.srqt.cur++;
+	if (rdev->stats.srqt.cur > rdev->stats.srqt.max)
+		rdev->stats.srqt.max = rdev->stats.srqt.cur;
+	mutex_unlock(&rdev->stats.lock);
+	return idx;
+}
+
+void c4iw_free_srq_idx(struct c4iw_rdev *rdev, int idx)
+{
+	c4iw_id_free(&rdev->resource.srq_table, idx);
+	mutex_lock(&rdev->stats.lock);
+	rdev->stats.srqt.cur--;
+	mutex_unlock(&rdev->stats.lock);
+}
+
 /*
  * On-Chip QP Memory.
  */
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index 0aa8044058d5..e62fb7d164ee 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -183,9 +183,15 @@  struct t4_cqe {
 			__be32 wrid_hi;
 			__be32 wrid_low;
 		} gen;
+		struct {
+			__be32 stag;
+			__be32 msn;
+			__be32 reserved;
+			__be32 abs_rqe_idx;
+		} srcqe;
 		u64 drain_cookie;
 	} u;
-	__be64 reserved;
+	__be64 reserved[4];
 	__be64 bits_type_ts;
 };
 
@@ -480,7 +486,6 @@  static inline void t4_rq_produce(struct t4_wq *wq, u8 len16)
 static inline void t4_rq_consume(struct t4_wq *wq)
 {
 	wq->rq.in_use--;
-	wq->rq.msn++;
 	if (++wq->rq.cidx == wq->rq.size)
 		wq->rq.cidx = 0;
 }
@@ -633,12 +638,14 @@  static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc,
 
 static inline int t4_wq_in_error(struct t4_wq *wq)
 {
-	return wq->rq.queue[wq->rq.size].status.qp_err;
+	return *wq->qp_errp;
 }
 
-static inline void t4_set_wq_in_error(struct t4_wq *wq)
+static inline void t4_set_wq_in_error(struct t4_wq *wq, u32 srqidx)
 {
-	wq->rq.queue[wq->rq.size].status.qp_err = 1;
+	if (srqidx)
+		*wq->srqidxp = srqidx;
+	*wq->qp_errp = 1;
 }
 
 static inline void t4_disable_wq_db(struct t4_wq *wq)
diff --git a/include/uapi/rdma/cxgb4-abi.h b/include/uapi/rdma/cxgb4-abi.h
index c1e846d442c2..4b95a1d306c1 100644
--- a/include/uapi/rdma/cxgb4-abi.h
+++ b/include/uapi/rdma/cxgb4-abi.h
@@ -44,6 +44,16 @@ 
  * In particular do not use pointer types -- pass pointers in __aligned_u64
  * instead.
  */
+
+enum {
+	C4IW_64B_CQE = (1 << 0)
+};
+
+struct c4iw_create_cq {
+	__u32 flags;
+	__u32 reserved;
+};
+
 struct c4iw_create_cq_resp {
 	__aligned_u64 key;
 	__aligned_u64 gts_key;
@@ -51,7 +61,7 @@  struct c4iw_create_cq_resp {
 	__u32 cqid;
 	__u32 size;
 	__u32 qid_mask;
-	__u32 reserved; /* explicit padding (optional for i386) */
+	__u32 flags;
 };
 
 enum {