diff mbox

[RFC,2/2] RDMA/isert: Support iWARP transport

Message ID 20150625153922.13272.41789.stgit@build.ogc.int (mailing list archive)
State Not Applicable
Headers show

Commit Message

Steve Wise June 25, 2015, 3:39 p.m. UTC
Memory regions that are the target of an iWARP RDMA READ RESPONSE need
REMOTE_WRITE access rights.  So enable REMOTE_WRITE for iWARP devices.

iWARP RDMA READ target sge depth is 1.  So save the max_read_sge in
the target device structure and use that when creating RDMA_READ work
requests.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Tested-by: Vasu Dev <vasu.dev@intel.com>
---
 drivers/infiniband/ulp/isert/ib_isert.c |   55 ++++++++++++++++++++++++++-----
 drivers/infiniband/ulp/isert/ib_isert.h |    1 +
 2 files changed, 48 insertions(+), 8 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Sagi Grimberg June 25, 2015, 4:51 p.m. UTC | #1
On 6/25/2015 6:39 PM, Steve Wise wrote:
> Memory regions that are the target of an iWARP RDMA READ RESPONSE need
> REMOTE_WRITE access rights.  So enable REMOTE_WRITE for iWARP devices.
>
> iWARP RDMA READ target sge depth is 1.  So save the max_read_sge in
> the target device structure and use that when creating RDMA_READ work
> requests.

Hi Steve,

CC'ing target-devel...

>
> Signed-off-by: Steve Wise <swise@opengridcomputing.com>
> Tested-by: Vasu Dev <vasu.dev@intel.com>
> ---
>   drivers/infiniband/ulp/isert/ib_isert.c |   55 ++++++++++++++++++++++++++-----
>   drivers/infiniband/ulp/isert/ib_isert.h |    1 +
>   2 files changed, 48 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
> index 9e7b492..b5cde5d 100644
> --- a/drivers/infiniband/ulp/isert/ib_isert.c
> +++ b/drivers/infiniband/ulp/isert/ib_isert.c
> @@ -165,6 +165,11 @@ isert_create_qp(struct isert_conn *isert_conn,
>   	attr.cap.max_send_sge = max(2, device->dev_attr.max_sge - 2);
>   	isert_conn->max_sge = attr.cap.max_send_sge;
>
> +	if (rdma_cap_read_multi_sge(device->ib_device, cma_id->port_num))
> +		isert_conn->max_read_sge = isert_conn->max_sge;
> +	else
> +		isert_conn->max_read_sge = 1;
> +

I think it would make sense to change now max_sge to max_write_sge.

And, shouldn't we just use:
max_wr_sge = max(2, device->dev_attr.max_sge - 2);
max_rd_sge = device->dev_attr.max_sge_rd;

Does the Chelsio device reports max_sge_rd != 1 ?


>   	attr.cap.max_recv_sge = 1;
>   	attr.sq_sig_type = IB_SIGNAL_REQ_WR;
>   	attr.qp_type = IB_QPT_RC;
> @@ -348,6 +353,17 @@ out_cq:
>   	return ret;
>   }
>
> +static int any_port_is_iwarp(struct isert_device *device)
> +{
> +	int i;
> +
> +	for (i = rdma_start_port(device->ib_device);
> +	     i <= rdma_end_port(device->ib_device); i++)
> +		if (rdma_protocol_iwarp(device->ib_device, i))
> +			return 1;
> +	return 0;
> +}
> +

This does not look like it belongs here... Can we
place this in rdma_cm?

>   static int
>   isert_create_device_ib_res(struct isert_device *device)
>   {
> @@ -383,7 +399,17 @@ isert_create_device_ib_res(struct isert_device *device)
>   		goto out_cq;
>   	}
>
> -	device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE);
> +	/*
> +	 * IWARP transports need REMOTE_WRITE for MRs used as the target of
> +	 * an RDMA_READ.  Since the DMA MR is used for all ports, then if
> +	 * any port is running IWARP, add REMOTE_WRITE.
> +	 */
> +	if (any_port_is_iwarp(device))
> +		device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE |
> +						       IB_ACCESS_REMOTE_WRITE);
> +	else
> +		device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE);
> +
>   	if (IS_ERR(device->mr)) {
>   		ret = PTR_ERR(device->mr);
>   		isert_err("failed to create dma mr, device %p, ret=%d\n",
> @@ -2375,7 +2401,7 @@ isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
>   static int
>   isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
>   		    struct ib_sge *ib_sge, struct ib_send_wr *send_wr,
> -		    u32 data_left, u32 offset)
> +		    u32 data_left, u32 offset, u32 max_sge)
>   {
>   	struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
>   	struct scatterlist *sg_start, *tmp_sg;
> @@ -2386,7 +2412,7 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
>
>   	sg_off = offset / PAGE_SIZE;
>   	sg_start = &cmd->se_cmd.t_data_sg[sg_off];
> -	sg_nents = min(cmd->se_cmd.t_data_nents - sg_off, isert_conn->max_sge);
> +	sg_nents = min(cmd->se_cmd.t_data_nents - sg_off, max_sge);
>   	page_off = offset % PAGE_SIZE;
>
>   	send_wr->sg_list = ib_sge;
> @@ -2430,8 +2456,9 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
>   	struct isert_data_buf *data = &wr->data;
>   	struct ib_send_wr *send_wr;
>   	struct ib_sge *ib_sge;
> -	u32 offset, data_len, data_left, rdma_write_max, va_offset = 0;
> +	u32 offset, data_len, data_left, rdma_max_len, va_offset = 0;
>   	int ret = 0, i, ib_sge_cnt;
> +	u32 max_sge;
>
>   	isert_cmd->tx_desc.isert_cmd = isert_cmd;
>
> @@ -2453,7 +2480,12 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
>   	}
>   	wr->ib_sge = ib_sge;
>
> -	wr->send_wr_num = DIV_ROUND_UP(data->nents, isert_conn->max_sge);
> +	if (wr->iser_ib_op == ISER_IB_RDMA_WRITE)
> +		max_sge = isert_conn->max_sge;
> +	else
> +		max_sge =  isert_conn->max_read_sge;
> +
> +	wr->send_wr_num = DIV_ROUND_UP(data->nents, max_sge);
>   	wr->send_wr = kzalloc(sizeof(struct ib_send_wr) * wr->send_wr_num,
>   				GFP_KERNEL);
>   	if (!wr->send_wr) {
> @@ -2463,11 +2495,11 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
>   	}
>
>   	wr->isert_cmd = isert_cmd;
> -	rdma_write_max = isert_conn->max_sge * PAGE_SIZE;
>
> +	rdma_max_len = max_sge * PAGE_SIZE;
>   	for (i = 0; i < wr->send_wr_num; i++) {
>   		send_wr = &isert_cmd->rdma_wr.send_wr[i];
> -		data_len = min(data_left, rdma_write_max);
> +		data_len = min(data_left, rdma_max_len);
>
>   		send_wr->send_flags = 0;
>   		if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) {
> @@ -2489,7 +2521,7 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
>   		}
>
>   		ib_sge_cnt = isert_build_rdma_wr(isert_conn, isert_cmd, ib_sge,
> -					send_wr, data_len, offset);
> +					send_wr, data_len, offset, max_sge);
>   		ib_sge += ib_sge_cnt;
>
>   		offset += data_len;
> @@ -2618,6 +2650,13 @@ isert_fast_reg_mr(struct isert_conn *isert_conn,
>   	fr_wr.wr.fast_reg.rkey = mr->rkey;
>   	fr_wr.wr.fast_reg.access_flags = IB_ACCESS_LOCAL_WRITE;
>
> +	/*
> +	 * IWARP transports need REMOTE_WRITE for MRs used as the target of
> +	 * an RDMA_READ.
> +	 */
> +	if (rdma_protocol_iwarp(ib_dev, isert_conn->cm_id->port_num))
> +		fr_wr.wr.fast_reg.access_flags |= IB_ACCESS_REMOTE_WRITE;
> +
>   	if (!wr)
>   		wr = &fr_wr;
>   	else
> diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h
> index 9ec23a7..47cf11b 100644
> --- a/drivers/infiniband/ulp/isert/ib_isert.h
> +++ b/drivers/infiniband/ulp/isert/ib_isert.h
> @@ -153,6 +153,7 @@ struct isert_conn {
>   	u32			initiator_depth;
>   	bool			pi_support;
>   	u32			max_sge;
> +	u32			max_read_sge;
>   	char			*login_buf;
>   	char			*login_req_buf;
>   	char			*login_rsp_buf;
>

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steve Wise June 25, 2015, 5:06 p.m. UTC | #2
> -----Original Message-----
> From: linux-rdma-owner@vger.kernel.org [mailto:linux-rdma-owner@vger.kernel.org] On Behalf Of Sagi Grimberg
> Sent: Thursday, June 25, 2015 11:51 AM
> To: Steve Wise; linux-rdma@vger.kernel.org
> Cc: Or Gerlitz; Roi Dayan; target-devel
> Subject: Re: [PATCH RFC 2/2] RDMA/isert: Support iWARP transport
> 
> On 6/25/2015 6:39 PM, Steve Wise wrote:
> > Memory regions that are the target of an iWARP RDMA READ RESPONSE need
> > REMOTE_WRITE access rights.  So enable REMOTE_WRITE for iWARP devices.
> >
> > iWARP RDMA READ target sge depth is 1.  So save the max_read_sge in
> > the target device structure and use that when creating RDMA_READ work
> > requests.
> 
> Hi Steve,
> 
> CC'ing target-devel...
> 
> >
> > Signed-off-by: Steve Wise <swise@opengridcomputing.com>
> > Tested-by: Vasu Dev <vasu.dev@intel.com>
> > ---
> >   drivers/infiniband/ulp/isert/ib_isert.c |   55 ++++++++++++++++++++++++++-----
> >   drivers/infiniband/ulp/isert/ib_isert.h |    1 +
> >   2 files changed, 48 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
> > index 9e7b492..b5cde5d 100644
> > --- a/drivers/infiniband/ulp/isert/ib_isert.c
> > +++ b/drivers/infiniband/ulp/isert/ib_isert.c
> > @@ -165,6 +165,11 @@ isert_create_qp(struct isert_conn *isert_conn,
> >   	attr.cap.max_send_sge = max(2, device->dev_attr.max_sge - 2);
> >   	isert_conn->max_sge = attr.cap.max_send_sge;
> >
> > +	if (rdma_cap_read_multi_sge(device->ib_device, cma_id->port_num))
> > +		isert_conn->max_read_sge = isert_conn->max_sge;
> > +	else
> > +		isert_conn->max_read_sge = 1;
> > +
> 
> I think it would make sense to change now max_sge to max_write_sge.
> 

Ok.

> And, shouldn't we just use:
> max_wr_sge = max(2, device->dev_attr.max_sge - 2);
> max_rd_sge = device->dev_attr.max_sge_rd;
> 
> Does the Chelsio device reports max_sge_rd != 1 ?
> 

Yes, but I wasn't sure max_sge_rd is really the read target max sge.  I don't see it being set by the mlx* drivers.  Also since we have the new rdma_cap_read_multi_sge() helper, I thought I should use it. :)

> 
> >   	attr.cap.max_recv_sge = 1;
> >   	attr.sq_sig_type = IB_SIGNAL_REQ_WR;
> >   	attr.qp_type = IB_QPT_RC;
> > @@ -348,6 +353,17 @@ out_cq:
> >   	return ret;
> >   }
> >
> > +static int any_port_is_iwarp(struct isert_device *device)
> > +{
> > +	int i;
> > +
> > +	for (i = rdma_start_port(device->ib_device);
> > +	     i <= rdma_end_port(device->ib_device); i++)
> > +		if (rdma_protocol_iwarp(device->ib_device, i))
> > +			return 1;
> > +	return 0;
> > +}
> > +
> 
> This does not look like it belongs here... Can we
> place this in rdma_cm?
>

Ok...that makes sense.
 
> >   static int
> >   isert_create_device_ib_res(struct isert_device *device)
> >   {
> > @@ -383,7 +399,17 @@ isert_create_device_ib_res(struct isert_device *device)
> >   		goto out_cq;
> >   	}
> >
> > -	device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE);
> > +	/*
> > +	 * IWARP transports need REMOTE_WRITE for MRs used as the target of
> > +	 * an RDMA_READ.  Since the DMA MR is used for all ports, then if
> > +	 * any port is running IWARP, add REMOTE_WRITE.
> > +	 */
> > +	if (any_port_is_iwarp(device))
> > +		device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE |
> > +						       IB_ACCESS_REMOTE_WRITE);
> > +	else
> > +		device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE);
> > +
> >   	if (IS_ERR(device->mr)) {
> >   		ret = PTR_ERR(device->mr);
> >   		isert_err("failed to create dma mr, device %p, ret=%d\n",
> > @@ -2375,7 +2401,7 @@ isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
> >   static int
> >   isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
> >   		    struct ib_sge *ib_sge, struct ib_send_wr *send_wr,
> > -		    u32 data_left, u32 offset)
> > +		    u32 data_left, u32 offset, u32 max_sge)
> >   {
> >   	struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
> >   	struct scatterlist *sg_start, *tmp_sg;
> > @@ -2386,7 +2412,7 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
> >
> >   	sg_off = offset / PAGE_SIZE;
> >   	sg_start = &cmd->se_cmd.t_data_sg[sg_off];
> > -	sg_nents = min(cmd->se_cmd.t_data_nents - sg_off, isert_conn->max_sge);
> > +	sg_nents = min(cmd->se_cmd.t_data_nents - sg_off, max_sge);
> >   	page_off = offset % PAGE_SIZE;
> >
> >   	send_wr->sg_list = ib_sge;
> > @@ -2430,8 +2456,9 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
> >   	struct isert_data_buf *data = &wr->data;
> >   	struct ib_send_wr *send_wr;
> >   	struct ib_sge *ib_sge;
> > -	u32 offset, data_len, data_left, rdma_write_max, va_offset = 0;
> > +	u32 offset, data_len, data_left, rdma_max_len, va_offset = 0;
> >   	int ret = 0, i, ib_sge_cnt;
> > +	u32 max_sge;
> >
> >   	isert_cmd->tx_desc.isert_cmd = isert_cmd;
> >
> > @@ -2453,7 +2480,12 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
> >   	}
> >   	wr->ib_sge = ib_sge;
> >
> > -	wr->send_wr_num = DIV_ROUND_UP(data->nents, isert_conn->max_sge);
> > +	if (wr->iser_ib_op == ISER_IB_RDMA_WRITE)
> > +		max_sge = isert_conn->max_sge;
> > +	else
> > +		max_sge =  isert_conn->max_read_sge;
> > +
> > +	wr->send_wr_num = DIV_ROUND_UP(data->nents, max_sge);
> >   	wr->send_wr = kzalloc(sizeof(struct ib_send_wr) * wr->send_wr_num,
> >   				GFP_KERNEL);
> >   	if (!wr->send_wr) {
> > @@ -2463,11 +2495,11 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
> >   	}
> >
> >   	wr->isert_cmd = isert_cmd;
> > -	rdma_write_max = isert_conn->max_sge * PAGE_SIZE;
> >
> > +	rdma_max_len = max_sge * PAGE_SIZE;
> >   	for (i = 0; i < wr->send_wr_num; i++) {
> >   		send_wr = &isert_cmd->rdma_wr.send_wr[i];
> > -		data_len = min(data_left, rdma_write_max);
> > +		data_len = min(data_left, rdma_max_len);
> >
> >   		send_wr->send_flags = 0;
> >   		if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) {
> > @@ -2489,7 +2521,7 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
> >   		}
> >
> >   		ib_sge_cnt = isert_build_rdma_wr(isert_conn, isert_cmd, ib_sge,
> > -					send_wr, data_len, offset);
> > +					send_wr, data_len, offset, max_sge);
> >   		ib_sge += ib_sge_cnt;
> >
> >   		offset += data_len;
> > @@ -2618,6 +2650,13 @@ isert_fast_reg_mr(struct isert_conn *isert_conn,
> >   	fr_wr.wr.fast_reg.rkey = mr->rkey;
> >   	fr_wr.wr.fast_reg.access_flags = IB_ACCESS_LOCAL_WRITE;
> >
> > +	/*
> > +	 * IWARP transports need REMOTE_WRITE for MRs used as the target of
> > +	 * an RDMA_READ.
> > +	 */
> > +	if (rdma_protocol_iwarp(ib_dev, isert_conn->cm_id->port_num))
> > +		fr_wr.wr.fast_reg.access_flags |= IB_ACCESS_REMOTE_WRITE;
> > +
> >   	if (!wr)
> >   		wr = &fr_wr;
> >   	else
> > diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h
> > index 9ec23a7..47cf11b 100644
> > --- a/drivers/infiniband/ulp/isert/ib_isert.h
> > +++ b/drivers/infiniband/ulp/isert/ib_isert.h
> > @@ -153,6 +153,7 @@ struct isert_conn {
> >   	u32			initiator_depth;
> >   	bool			pi_support;
> >   	u32			max_sge;
> > +	u32			max_read_sge;
> >   	char			*login_buf;
> >   	char			*login_req_buf;
> >   	char			*login_rsp_buf;
> >
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe June 25, 2015, 6:25 p.m. UTC | #3
On Thu, Jun 25, 2015 at 10:39:23AM -0500, Steve Wise wrote:
> +	/*
> +	 * IWARP transports need REMOTE_WRITE for MRs used as the target of
> +	 * an RDMA_READ.  Since the DMA MR is used for all ports, then if
> +	 * any port is running IWARP, add REMOTE_WRITE.
> +	 */
> +	if (any_port_is_iwarp(device))

It would be nice to have a new-style cap test for this instead of open
coding iwarp. Similar to rdma_cap_read_multi_sge

I'm confused about the 'any_port_is_iwarp' stuff too, I thought if one
port was iwarp then all ports had to be iwarp?

Even if we move away from that, I would think that some caps must be
the same on all ports, and multi_sge, remote_write, etc would fit into
that limitation.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hefty, Sean June 25, 2015, 6:30 p.m. UTC | #4
> > +	 * IWARP transports need REMOTE_WRITE for MRs used as the target of
> > +	 * an RDMA_READ.  Since the DMA MR is used for all ports, then if
> > +	 * any port is running IWARP, add REMOTE_WRITE.
> > +	 */
> > +	if (any_port_is_iwarp(device))
> 
> It would be nice to have a new-style cap test for this instead of open
> coding iwarp. Similar to rdma_cap_read_multi_sge

This should be pushed down into the devices, or at least within verbs, rather than having ULPs needing to know this oddness.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steve Wise June 25, 2015, 6:32 p.m. UTC | #5
> -----Original Message-----
> From: Jason Gunthorpe [mailto:jgunthorpe@obsidianresearch.com]
> Sent: Thursday, June 25, 2015 1:25 PM
> To: Steve Wise
> Cc: linux-rdma@vger.kernel.org; sagig@mellanox.com; orgerlitz@mellanox.com; raid@mellanox.com
> Subject: Re: [PATCH RFC 2/2] RDMA/isert: Support iWARP transport
> 
> On Thu, Jun 25, 2015 at 10:39:23AM -0500, Steve Wise wrote:
> > +	/*
> > +	 * IWARP transports need REMOTE_WRITE for MRs used as the target of
> > +	 * an RDMA_READ.  Since the DMA MR is used for all ports, then if
> > +	 * any port is running IWARP, add REMOTE_WRITE.
> > +	 */
> > +	if (any_port_is_iwarp(device))
> 
> It would be nice to have a new-style cap test for this instead of open
> coding iwarp. Similar to rdma_cap_read_multi_sge
> 

That would be ok with me.  Now for naming this new function:

rdma_cap_read_requires_remote_write_rights()

That's pretty long.  Any other ideas for naming this?


> I'm confused about the 'any_port_is_iwarp' stuff too, I thought if one
> port was iwarp then all ports had to be iwarp?
> 

Currently no device supports iwarp + any other protocol.

> Even if we move away from that, I would think that some caps must be
> the same on all ports, and multi_sge, remote_write, etc would fit into
> that limitation.
> 

I'm happy with making this a device-global capability.  If it becomes a per-port capability, then the code can change later. 

Steve.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steve Wise June 25, 2015, 6:33 p.m. UTC | #6
> -----Original Message-----
> From: linux-rdma-owner@vger.kernel.org [mailto:linux-rdma-owner@vger.kernel.org] On Behalf Of Hefty, Sean
> Sent: Thursday, June 25, 2015 1:30 PM
> To: Jason Gunthorpe; Steve Wise
> Cc: linux-rdma@vger.kernel.org; sagig@mellanox.com; orgerlitz@mellanox.com; raid@mellanox.com
> Subject: RE: [PATCH RFC 2/2] RDMA/isert: Support iWARP transport
> 
> > > +	 * IWARP transports need REMOTE_WRITE for MRs used as the target of
> > > +	 * an RDMA_READ.  Since the DMA MR is used for all ports, then if
> > > +	 * any port is running IWARP, add REMOTE_WRITE.
> > > +	 */
> > > +	if (any_port_is_iwarp(device))
> >
> > It would be nice to have a new-style cap test for this instead of open
> > coding iwarp. Similar to rdma_cap_read_multi_sge
> 
> This should be pushed down into the devices, or at least within verbs, rather than having ULPs needing to know this oddness.

How would you envision doing this?  At the time a MR is registered the device driver doesn't know if the application will be doing
RDMA reads or not on that MR.

Steve.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hefty, Sean June 25, 2015, 6:45 p.m. UTC | #7
> How would you envision doing this?  At the time a MR is registered the
> device driver doesn't know if the application will be doing
> RDMA reads or not on that MR.

I was thinking of checking for REMOTE_READ, but that doesn't work on the initiator side.

I guess you could a READ_DEST(SOURCE? TARGET?) flag to indicate that the MR will be used on the initiating side of the RDMA read operation.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe June 25, 2015, 7:12 p.m. UTC | #8
On Thu, Jun 25, 2015 at 06:45:56PM +0000, Hefty, Sean wrote:
> > How would you envision doing this?  At the time a MR is registered the
> > device driver doesn't know if the application will be doing
> > RDMA reads or not on that MR.
> 
> I was thinking of checking for REMOTE_READ, but that doesn't work on the initiator side.
> 
> I guess you could a READ_DEST(SOURCE? TARGET?) flag to indicate that
> the MR will be used on the initiating side of the RDMA read
> operation.

Yes, that does make more sense...

What about moving to something more specific? Encode the allowed verbs
in the access flag?

I don't understand why iWarp HW choose to ignore the verbs spec and
not use IB_ACCESS_LOCAL_WRITE to cover RDMA READ responses...

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hefty, Sean June 25, 2015, 7:20 p.m. UTC | #9
> What about moving to something more specific? Encode the allowed verbs
> in the access flag?

This makes more sense to me.  Something like: SEND, RECV, INIT READ, INIT WRITE, READ TARGET, WRITE TARGET, etc.  We're close to this, but it's not clear, for example, what flags are needed for a receive buffer.  None?  LOCAL_WRITE?
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steve Wise June 25, 2015, 7:25 p.m. UTC | #10
> -----Original Message-----
> From: linux-rdma-owner@vger.kernel.org [mailto:linux-rdma-owner@vger.kernel.org] On Behalf Of Hefty, Sean
> Sent: Thursday, June 25, 2015 2:20 PM
> To: Jason Gunthorpe
> Cc: Steve Wise; linux-rdma@vger.kernel.org; sagig@mellanox.com; orgerlitz@mellanox.com; Roi Dayan
> Subject: RE: [PATCH RFC 2/2] RDMA/isert: Support iWARP transport
> 
> > What about moving to something more specific? Encode the allowed verbs
> > in the access flag?
> 
> This makes more sense to me.  Something like: SEND, RECV, INIT READ, INIT WRITE, READ TARGET, WRITE TARGET, etc.  We're close to
> this, but it's not clear, for example, what flags are needed for a receive buffer.  None?  LOCAL_WRITE?

Make sense to me too.

To stage the changes we could introduce a new function that returns the needed ib_access_flags value given the desired opcodes.
Then have a series that changes all the existing ULPs to make use of this new function.  

Steve.


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe June 25, 2015, 7:29 p.m. UTC | #11
On Thu, Jun 25, 2015 at 02:25:49PM -0500, Steve Wise wrote:

> To stage the changes we could introduce a new function that returns
> the needed ib_access_flags value given the desired opcodes.  Then
> have a series that changes all the existing ULPs to make use of this
> new function.

I wouldn't be afraid to add a new create_mr entry point that does the
right thing, we can unexport/delete the old one when all kernel users
are gone. Trivially the core can just have a default that translates
based on iwarp/!iwarp for now.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steve Wise June 25, 2015, 7:44 p.m. UTC | #12
> -----Original Message-----
> From: linux-rdma-owner@vger.kernel.org [mailto:linux-rdma-owner@vger.kernel.org] On Behalf Of Jason Gunthorpe
> Sent: Thursday, June 25, 2015 2:30 PM
> To: Steve Wise
> Cc: 'Hefty, Sean'; linux-rdma@vger.kernel.org; sagig@mellanox.com; orgerlitz@mellanox.com; 'Roi Dayan'
> Subject: Re: [PATCH RFC 2/2] RDMA/isert: Support iWARP transport
> 
> On Thu, Jun 25, 2015 at 02:25:49PM -0500, Steve Wise wrote:
> 
> > To stage the changes we could introduce a new function that returns
> > the needed ib_access_flags value given the desired opcodes.  Then
> > have a series that changes all the existing ULPs to make use of this
> > new function.
> 
> I wouldn't be afraid to add a new create_mr entry point that does the
> right thing, we can unexport/delete the old one when all kernel users
> are gone. Trivially the core can just have a default that translates
> based on iwarp/!iwarp for now.
> 

Ignoring user MRs at this point, I think we would need a new entry point for creating dma mrs ala ib_get_dma_mr().

How defining MR "roles":

/**
 * ib_mr_role - possible roles a MR will be used for
 *
 * This allows a transport independent RDMA application to
 * create MRs that are usable for all the desired roles w/o
 * having to understand which access rights are needed.
 */
enum ib_mr_role {
	IB_MRR_RECV		= 1,
	IB_MRR_SEND		 = (1<<1),
	IB_MRR_READ_SOURCE = (1<<2),
	IB_MRR_READ_SINK	= (1<<3),
	IB_MRR_WRITE_SOURCE = (1<<4),
	IB_MRR_WRITE_SINK	 = (1<<5),
	/* probably more roles */
};

Then new entry point that will call ib_get_dma_mr() with the ib_access_flags required to handle the roles for the given device
protocol.

ib_get_dma_role_mr(struct ib_pd *pd, int mr_role_flags)


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steve Wise June 25, 2015, 7:59 p.m. UTC | #13
> 
> I don't understand why iWarp HW choose to ignore the verbs spec and
> not use IB_ACCESS_LOCAL_WRITE to cover RDMA READ responses...
>

The iWARP verbs spec mandates this. 
 
Steve.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sagi Grimberg June 27, 2015, 9:10 a.m. UTC | #14
Resending - somehow this didn't make it to
the lists...


On 6/25/2015 7:51 PM, Sagi Grimberg wrote:
> On 6/25/2015 6:39 PM, Steve Wise wrote:
>> Memory regions that are the target of an iWARP RDMA READ RESPONSE need
>> REMOTE_WRITE access rights.  So enable REMOTE_WRITE for iWARP devices.
>>
>> iWARP RDMA READ target sge depth is 1.  So save the max_read_sge in
>> the target device structure and use that when creating RDMA_READ work
>> requests.
>
> Hi Steve,
>
> CC'ing target-devel...
>
>>
>> Signed-off-by: Steve Wise <swise@opengridcomputing.com>
>> Tested-by: Vasu Dev <vasu.dev@intel.com>
>> ---
>>   drivers/infiniband/ulp/isert/ib_isert.c |   55
>> ++++++++++++++++++++++++++-----
>>   drivers/infiniband/ulp/isert/ib_isert.h |    1 +
>>   2 files changed, 48 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/infiniband/ulp/isert/ib_isert.c
>> b/drivers/infiniband/ulp/isert/ib_isert.c
>> index 9e7b492..b5cde5d 100644
>> --- a/drivers/infiniband/ulp/isert/ib_isert.c
>> +++ b/drivers/infiniband/ulp/isert/ib_isert.c
>> @@ -165,6 +165,11 @@ isert_create_qp(struct isert_conn *isert_conn,
>>       attr.cap.max_send_sge = max(2, device->dev_attr.max_sge - 2);
>>       isert_conn->max_sge = attr.cap.max_send_sge;
>>
>> +    if (rdma_cap_read_multi_sge(device->ib_device, cma_id->port_num))
>> +        isert_conn->max_read_sge = isert_conn->max_sge;
>> +    else
>> +        isert_conn->max_read_sge = 1;
>> +
>
> I think it would make sense to change now max_sge to max_write_sge.
>
> And, shouldn't we just use:
> max_wr_sge = max(2, device->dev_attr.max_sge - 2);
> max_rd_sge = device->dev_attr.max_sge_rd;
>
> Does the Chelsio device reports max_sge_rd != 1 ?
>
>
>>       attr.cap.max_recv_sge = 1;
>>       attr.sq_sig_type = IB_SIGNAL_REQ_WR;
>>       attr.qp_type = IB_QPT_RC;
>> @@ -348,6 +353,17 @@ out_cq:
>>       return ret;
>>   }
>>
>> +static int any_port_is_iwarp(struct isert_device *device)
>> +{
>> +    int i;
>> +
>> +    for (i = rdma_start_port(device->ib_device);
>> +         i <= rdma_end_port(device->ib_device); i++)
>> +        if (rdma_protocol_iwarp(device->ib_device, i))
>> +            return 1;
>> +    return 0;
>> +}
>> +
>
> This does not look like it belongs here... Can we
> place this in rdma_cm?
>
>>   static int
>>   isert_create_device_ib_res(struct isert_device *device)
>>   {
>> @@ -383,7 +399,17 @@ isert_create_device_ib_res(struct isert_device
>> *device)
>>           goto out_cq;
>>       }
>>
>> -    device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE);
>> +    /*
>> +     * IWARP transports need REMOTE_WRITE for MRs used as the target of
>> +     * an RDMA_READ.  Since the DMA MR is used for all ports, then if
>> +     * any port is running IWARP, add REMOTE_WRITE.
>> +     */
>> +    if (any_port_is_iwarp(device))
>> +        device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE |
>> +                               IB_ACCESS_REMOTE_WRITE);
>> +    else
>> +        device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE);
>> +
>>       if (IS_ERR(device->mr)) {
>>           ret = PTR_ERR(device->mr);
>>           isert_err("failed to create dma mr, device %p, ret=%d\n",
>> @@ -2375,7 +2401,7 @@ isert_put_text_rsp(struct iscsi_cmd *cmd, struct
>> iscsi_conn *conn)
>>   static int
>>   isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd
>> *isert_cmd,
>>               struct ib_sge *ib_sge, struct ib_send_wr *send_wr,
>> -            u32 data_left, u32 offset)
>> +            u32 data_left, u32 offset, u32 max_sge)
>>   {
>>       struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
>>       struct scatterlist *sg_start, *tmp_sg;
>> @@ -2386,7 +2412,7 @@ isert_build_rdma_wr(struct isert_conn
>> *isert_conn, struct isert_cmd *isert_cmd,
>>
>>       sg_off = offset / PAGE_SIZE;
>>       sg_start = &cmd->se_cmd.t_data_sg[sg_off];
>> -    sg_nents = min(cmd->se_cmd.t_data_nents - sg_off,
>> isert_conn->max_sge);
>> +    sg_nents = min(cmd->se_cmd.t_data_nents - sg_off, max_sge);
>>       page_off = offset % PAGE_SIZE;
>>
>>       send_wr->sg_list = ib_sge;
>> @@ -2430,8 +2456,9 @@ isert_map_rdma(struct iscsi_conn *conn, struct
>> iscsi_cmd *cmd,
>>       struct isert_data_buf *data = &wr->data;
>>       struct ib_send_wr *send_wr;
>>       struct ib_sge *ib_sge;
>> -    u32 offset, data_len, data_left, rdma_write_max, va_offset = 0;
>> +    u32 offset, data_len, data_left, rdma_max_len, va_offset = 0;
>>       int ret = 0, i, ib_sge_cnt;
>> +    u32 max_sge;
>>
>>       isert_cmd->tx_desc.isert_cmd = isert_cmd;
>>
>> @@ -2453,7 +2480,12 @@ isert_map_rdma(struct iscsi_conn *conn, struct
>> iscsi_cmd *cmd,
>>       }
>>       wr->ib_sge = ib_sge;
>>
>> -    wr->send_wr_num = DIV_ROUND_UP(data->nents, isert_conn->max_sge);
>> +    if (wr->iser_ib_op == ISER_IB_RDMA_WRITE)
>> +        max_sge = isert_conn->max_sge;
>> +    else
>> +        max_sge =  isert_conn->max_read_sge;
>> +
>> +    wr->send_wr_num = DIV_ROUND_UP(data->nents, max_sge);
>>       wr->send_wr = kzalloc(sizeof(struct ib_send_wr) * wr->send_wr_num,
>>                   GFP_KERNEL);
>>       if (!wr->send_wr) {
>> @@ -2463,11 +2495,11 @@ isert_map_rdma(struct iscsi_conn *conn, struct
>> iscsi_cmd *cmd,
>>       }
>>
>>       wr->isert_cmd = isert_cmd;
>> -    rdma_write_max = isert_conn->max_sge * PAGE_SIZE;
>>
>> +    rdma_max_len = max_sge * PAGE_SIZE;
>>       for (i = 0; i < wr->send_wr_num; i++) {
>>           send_wr = &isert_cmd->rdma_wr.send_wr[i];
>> -        data_len = min(data_left, rdma_write_max);
>> +        data_len = min(data_left, rdma_max_len);
>>
>>           send_wr->send_flags = 0;
>>           if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) {
>> @@ -2489,7 +2521,7 @@ isert_map_rdma(struct iscsi_conn *conn, struct
>> iscsi_cmd *cmd,
>>           }
>>
>>           ib_sge_cnt = isert_build_rdma_wr(isert_conn, isert_cmd, ib_sge,
>> -                    send_wr, data_len, offset);
>> +                    send_wr, data_len, offset, max_sge);
>>           ib_sge += ib_sge_cnt;
>>
>>           offset += data_len;
>> @@ -2618,6 +2650,13 @@ isert_fast_reg_mr(struct isert_conn *isert_conn,
>>       fr_wr.wr.fast_reg.rkey = mr->rkey;
>>       fr_wr.wr.fast_reg.access_flags = IB_ACCESS_LOCAL_WRITE;
>>
>> +    /*
>> +     * IWARP transports need REMOTE_WRITE for MRs used as the target of
>> +     * an RDMA_READ.
>> +     */
>> +    if (rdma_protocol_iwarp(ib_dev, isert_conn->cm_id->port_num))
>> +        fr_wr.wr.fast_reg.access_flags |= IB_ACCESS_REMOTE_WRITE;
>> +
>>       if (!wr)
>>           wr = &fr_wr;
>>       else
>> diff --git a/drivers/infiniband/ulp/isert/ib_isert.h
>> b/drivers/infiniband/ulp/isert/ib_isert.h
>> index 9ec23a7..47cf11b 100644
>> --- a/drivers/infiniband/ulp/isert/ib_isert.h
>> +++ b/drivers/infiniband/ulp/isert/ib_isert.h
>> @@ -153,6 +153,7 @@ struct isert_conn {
>>       u32            initiator_depth;
>>       bool            pi_support;
>>       u32            max_sge;
>> +    u32            max_read_sge;
>>       char            *login_buf;
>>       char            *login_req_buf;
>>       char            *login_rsp_buf;
>>
>

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sagi Grimberg June 27, 2015, 9:12 a.m. UTC | #15
On 6/25/2015 8:06 PM, Steve Wise wrote:
>
>
>> -----Original Message-----
>> From: linux-rdma-owner@vger.kernel.org [mailto:linux-rdma-owner@vger.kernel.org] On Behalf Of Sagi Grimberg
>> Sent: Thursday, June 25, 2015 11:51 AM
>> To: Steve Wise; linux-rdma@vger.kernel.org
>> Cc: Or Gerlitz; Roi Dayan; target-devel
>> Subject: Re: [PATCH RFC 2/2] RDMA/isert: Support iWARP transport
>>
>> On 6/25/2015 6:39 PM, Steve Wise wrote:
>>> Memory regions that are the target of an iWARP RDMA READ RESPONSE need
>>> REMOTE_WRITE access rights.  So enable REMOTE_WRITE for iWARP devices.
>>>
>>> iWARP RDMA READ target sge depth is 1.  So save the max_read_sge in
>>> the target device structure and use that when creating RDMA_READ work
>>> requests.
>>
>> Hi Steve,
>>
>> CC'ing target-devel...
>>
>>>
>>> Signed-off-by: Steve Wise <swise@opengridcomputing.com>
>>> Tested-by: Vasu Dev <vasu.dev@intel.com>
>>> ---
>>>    drivers/infiniband/ulp/isert/ib_isert.c |   55 ++++++++++++++++++++++++++-----
>>>    drivers/infiniband/ulp/isert/ib_isert.h |    1 +
>>>    2 files changed, 48 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
>>> index 9e7b492..b5cde5d 100644
>>> --- a/drivers/infiniband/ulp/isert/ib_isert.c
>>> +++ b/drivers/infiniband/ulp/isert/ib_isert.c
>>> @@ -165,6 +165,11 @@ isert_create_qp(struct isert_conn *isert_conn,
>>>    	attr.cap.max_send_sge = max(2, device->dev_attr.max_sge - 2);
>>>    	isert_conn->max_sge = attr.cap.max_send_sge;
>>>
>>> +	if (rdma_cap_read_multi_sge(device->ib_device, cma_id->port_num))
>>> +		isert_conn->max_read_sge = isert_conn->max_sge;
>>> +	else
>>> +		isert_conn->max_read_sge = 1;
>>> +
>>
>> I think it would make sense to change now max_sge to max_write_sge.
>>
>
> Ok.
>
>> And, shouldn't we just use:
>> max_wr_sge = max(2, device->dev_attr.max_sge - 2);
>> max_rd_sge = device->dev_attr.max_sge_rd;
>>
>> Does the Chelsio device reports max_sge_rd != 1 ?
>>
>
> Yes, but I wasn't sure max_sge_rd is really the read target max sge.

Why not?

> I don't see it being set by the mlx* drivers.

Umm, that's a bug.
Let me send an easy fix for all mlx drivers.

> Also since we have the new rdma_cap_read_multi_sge() helper, I thought I should use it. :)

I think that reading the exact device caps max_sge, max_sge_is better
and more straight forward...

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sagi Grimberg June 27, 2015, 9:33 a.m. UTC | #16
On 6/25/2015 10:29 PM, Jason Gunthorpe wrote:
> On Thu, Jun 25, 2015 at 02:25:49PM -0500, Steve Wise wrote:
>
>> To stage the changes we could introduce a new function that returns
>> the needed ib_access_flags value given the desired opcodes.  Then
>> have a series that changes all the existing ULPs to make use of this
>> new function.
>
> I wouldn't be afraid to add a new create_mr entry point that does the
> right thing, we can unexport/delete the old one when all kernel users
> are gone. Trivially the core can just have a default that translates
> based on iwarp/!iwarp for now.
>

Note that we have ib_create_mr() that receives an easily extensible
ib_mr_init_attr (similar to qp creation). I think this is a good
opportunity to unite all the create routines to ib_create_mr.

ib_get_dma_mr will become:
ib_mr_init_attr.type = IB_MR_DMA;
ib_mr_init_attr.mr_access_flags = mr_access_flags;
ib_create_mr(pd, &ib_mr_init_attr);

ib_alloc_fast_reg_mr will become:
ib_mr_init_attr.type = IB_MR_FAST_REG;
ib_create_mr(pd, &ib_mr_init_attr);

ib_reg_phys_mr (if we don't want to kill it by now) will become:
ib_mr_init_attr.type = IB_MR_REG_PHYS;
ib_mr_init_attr.arr = phys_buf_array;
ib_mr_init_attr.max_reg_descriptors = num_phys_buf;
ib_mr_init_attr.mr_access_flags = mr_access_flags;
ib_mr_init_attr.iova_start = iova_start;
ib_create_mr(pd, &ib_mr_init_attr);

The core layer will spread these in the different driver routines
(although it would be nice if the drivers can have a single MR craetion
routine that can handle all MR types (including fail if unsupported).

Thoughts?
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe June 29, 2015, 5:46 p.m. UTC | #17
On Sat, Jun 27, 2015 at 12:12:11PM +0300, Sagi Grimberg wrote:

> >Also since we have the new rdma_cap_read_multi_sge() helper, I
> >thought I should use it. :)
> 
> I think that reading the exact device caps max_sge, max_sge_is better
> and more straight forward...

Right, rdma_cap_read_multi_sge was just a stand in wanting something
better to expose asymmetric read/write sge caps.

If iwarp can get what it needs via max_sge_rd then lets drop
rdma_cap_read_multi_sge.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 9e7b492..b5cde5d 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -165,6 +165,11 @@  isert_create_qp(struct isert_conn *isert_conn,
 	attr.cap.max_send_sge = max(2, device->dev_attr.max_sge - 2);
 	isert_conn->max_sge = attr.cap.max_send_sge;
 
+	if (rdma_cap_read_multi_sge(device->ib_device, cma_id->port_num))
+		isert_conn->max_read_sge = isert_conn->max_sge;
+	else
+		isert_conn->max_read_sge = 1;
+
 	attr.cap.max_recv_sge = 1;
 	attr.sq_sig_type = IB_SIGNAL_REQ_WR;
 	attr.qp_type = IB_QPT_RC;
@@ -348,6 +353,17 @@  out_cq:
 	return ret;
 }
 
+static int any_port_is_iwarp(struct isert_device *device)
+{
+	int i;
+
+	for (i = rdma_start_port(device->ib_device);
+	     i <= rdma_end_port(device->ib_device); i++)
+		if (rdma_protocol_iwarp(device->ib_device, i))
+			return 1;
+	return 0;
+}
+
 static int
 isert_create_device_ib_res(struct isert_device *device)
 {
@@ -383,7 +399,17 @@  isert_create_device_ib_res(struct isert_device *device)
 		goto out_cq;
 	}
 
-	device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE);
+	/*
+	 * IWARP transports need REMOTE_WRITE for MRs used as the target of
+	 * an RDMA_READ.  Since the DMA MR is used for all ports, then if
+	 * any port is running IWARP, add REMOTE_WRITE.
+	 */
+	if (any_port_is_iwarp(device))
+		device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE |
+						       IB_ACCESS_REMOTE_WRITE);
+	else
+		device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE);
+
 	if (IS_ERR(device->mr)) {
 		ret = PTR_ERR(device->mr);
 		isert_err("failed to create dma mr, device %p, ret=%d\n",
@@ -2375,7 +2401,7 @@  isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
 static int
 isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
 		    struct ib_sge *ib_sge, struct ib_send_wr *send_wr,
-		    u32 data_left, u32 offset)
+		    u32 data_left, u32 offset, u32 max_sge)
 {
 	struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
 	struct scatterlist *sg_start, *tmp_sg;
@@ -2386,7 +2412,7 @@  isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
 
 	sg_off = offset / PAGE_SIZE;
 	sg_start = &cmd->se_cmd.t_data_sg[sg_off];
-	sg_nents = min(cmd->se_cmd.t_data_nents - sg_off, isert_conn->max_sge);
+	sg_nents = min(cmd->se_cmd.t_data_nents - sg_off, max_sge);
 	page_off = offset % PAGE_SIZE;
 
 	send_wr->sg_list = ib_sge;
@@ -2430,8 +2456,9 @@  isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 	struct isert_data_buf *data = &wr->data;
 	struct ib_send_wr *send_wr;
 	struct ib_sge *ib_sge;
-	u32 offset, data_len, data_left, rdma_write_max, va_offset = 0;
+	u32 offset, data_len, data_left, rdma_max_len, va_offset = 0;
 	int ret = 0, i, ib_sge_cnt;
+	u32 max_sge;
 
 	isert_cmd->tx_desc.isert_cmd = isert_cmd;
 
@@ -2453,7 +2480,12 @@  isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 	}
 	wr->ib_sge = ib_sge;
 
-	wr->send_wr_num = DIV_ROUND_UP(data->nents, isert_conn->max_sge);
+	if (wr->iser_ib_op == ISER_IB_RDMA_WRITE)
+		max_sge = isert_conn->max_sge;
+	else
+		max_sge =  isert_conn->max_read_sge;
+
+	wr->send_wr_num = DIV_ROUND_UP(data->nents, max_sge);
 	wr->send_wr = kzalloc(sizeof(struct ib_send_wr) * wr->send_wr_num,
 				GFP_KERNEL);
 	if (!wr->send_wr) {
@@ -2463,11 +2495,11 @@  isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 	}
 
 	wr->isert_cmd = isert_cmd;
-	rdma_write_max = isert_conn->max_sge * PAGE_SIZE;
 
+	rdma_max_len = max_sge * PAGE_SIZE;
 	for (i = 0; i < wr->send_wr_num; i++) {
 		send_wr = &isert_cmd->rdma_wr.send_wr[i];
-		data_len = min(data_left, rdma_write_max);
+		data_len = min(data_left, rdma_max_len);
 
 		send_wr->send_flags = 0;
 		if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) {
@@ -2489,7 +2521,7 @@  isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 		}
 
 		ib_sge_cnt = isert_build_rdma_wr(isert_conn, isert_cmd, ib_sge,
-					send_wr, data_len, offset);
+					send_wr, data_len, offset, max_sge);
 		ib_sge += ib_sge_cnt;
 
 		offset += data_len;
@@ -2618,6 +2650,13 @@  isert_fast_reg_mr(struct isert_conn *isert_conn,
 	fr_wr.wr.fast_reg.rkey = mr->rkey;
 	fr_wr.wr.fast_reg.access_flags = IB_ACCESS_LOCAL_WRITE;
 
+	/*
+	 * IWARP transports need REMOTE_WRITE for MRs used as the target of
+	 * an RDMA_READ.
+	 */
+	if (rdma_protocol_iwarp(ib_dev, isert_conn->cm_id->port_num))
+		fr_wr.wr.fast_reg.access_flags |= IB_ACCESS_REMOTE_WRITE;
+
 	if (!wr)
 		wr = &fr_wr;
 	else
diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h
index 9ec23a7..47cf11b 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.h
+++ b/drivers/infiniband/ulp/isert/ib_isert.h
@@ -153,6 +153,7 @@  struct isert_conn {
 	u32			initiator_depth;
 	bool			pi_support;
 	u32			max_sge;
+	u32			max_read_sge;
 	char			*login_buf;
 	char			*login_req_buf;
 	char			*login_rsp_buf;