Message ID | 20150625153922.13272.41789.stgit@build.ogc.int (mailing list archive) |
---|---|
State | Not Applicable |
Headers | show |
On 6/25/2015 6:39 PM, Steve Wise wrote: > Memory regions that are the target of an iWARP RDMA READ RESPONSE need > REMOTE_WRITE access rights. So enable REMOTE_WRITE for iWARP devices. > > iWARP RDMA READ target sge depth is 1. So save the max_read_sge in > the target device structure and use that when creating RDMA_READ work > requests. Hi Steve, CC'ing target-devel... > > Signed-off-by: Steve Wise <swise@opengridcomputing.com> > Tested-by: Vasu Dev <vasu.dev@intel.com> > --- > drivers/infiniband/ulp/isert/ib_isert.c | 55 ++++++++++++++++++++++++++----- > drivers/infiniband/ulp/isert/ib_isert.h | 1 + > 2 files changed, 48 insertions(+), 8 deletions(-) > > diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c > index 9e7b492..b5cde5d 100644 > --- a/drivers/infiniband/ulp/isert/ib_isert.c > +++ b/drivers/infiniband/ulp/isert/ib_isert.c > @@ -165,6 +165,11 @@ isert_create_qp(struct isert_conn *isert_conn, > attr.cap.max_send_sge = max(2, device->dev_attr.max_sge - 2); > isert_conn->max_sge = attr.cap.max_send_sge; > > + if (rdma_cap_read_multi_sge(device->ib_device, cma_id->port_num)) > + isert_conn->max_read_sge = isert_conn->max_sge; > + else > + isert_conn->max_read_sge = 1; > + I think it would make sense to change now max_sge to max_write_sge. And, shouldn't we just use: max_wr_sge = max(2, device->dev_attr.max_sge - 2); max_rd_sge = device->dev_attr.max_sge_rd; Does the Chelsio device reports max_sge_rd != 1 ? > attr.cap.max_recv_sge = 1; > attr.sq_sig_type = IB_SIGNAL_REQ_WR; > attr.qp_type = IB_QPT_RC; > @@ -348,6 +353,17 @@ out_cq: > return ret; > } > > +static int any_port_is_iwarp(struct isert_device *device) > +{ > + int i; > + > + for (i = rdma_start_port(device->ib_device); > + i <= rdma_end_port(device->ib_device); i++) > + if (rdma_protocol_iwarp(device->ib_device, i)) > + return 1; > + return 0; > +} > + This does not look like it belongs here... Can we place this in rdma_cm? > static int > isert_create_device_ib_res(struct isert_device *device) > { > @@ -383,7 +399,17 @@ isert_create_device_ib_res(struct isert_device *device) > goto out_cq; > } > > - device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE); > + /* > + * IWARP transports need REMOTE_WRITE for MRs used as the target of > + * an RDMA_READ. Since the DMA MR is used for all ports, then if > + * any port is running IWARP, add REMOTE_WRITE. > + */ > + if (any_port_is_iwarp(device)) > + device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE | > + IB_ACCESS_REMOTE_WRITE); > + else > + device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE); > + > if (IS_ERR(device->mr)) { > ret = PTR_ERR(device->mr); > isert_err("failed to create dma mr, device %p, ret=%d\n", > @@ -2375,7 +2401,7 @@ isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) > static int > isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, > struct ib_sge *ib_sge, struct ib_send_wr *send_wr, > - u32 data_left, u32 offset) > + u32 data_left, u32 offset, u32 max_sge) > { > struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; > struct scatterlist *sg_start, *tmp_sg; > @@ -2386,7 +2412,7 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, > > sg_off = offset / PAGE_SIZE; > sg_start = &cmd->se_cmd.t_data_sg[sg_off]; > - sg_nents = min(cmd->se_cmd.t_data_nents - sg_off, isert_conn->max_sge); > + sg_nents = min(cmd->se_cmd.t_data_nents - sg_off, max_sge); > page_off = offset % PAGE_SIZE; > > send_wr->sg_list = ib_sge; > @@ -2430,8 +2456,9 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, > struct isert_data_buf *data = &wr->data; > struct ib_send_wr *send_wr; > struct ib_sge *ib_sge; > - u32 offset, data_len, data_left, rdma_write_max, va_offset = 0; > + u32 offset, data_len, data_left, rdma_max_len, va_offset = 0; > int ret = 0, i, ib_sge_cnt; > + u32 max_sge; > > isert_cmd->tx_desc.isert_cmd = isert_cmd; > > @@ -2453,7 +2480,12 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, > } > wr->ib_sge = ib_sge; > > - wr->send_wr_num = DIV_ROUND_UP(data->nents, isert_conn->max_sge); > + if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) > + max_sge = isert_conn->max_sge; > + else > + max_sge = isert_conn->max_read_sge; > + > + wr->send_wr_num = DIV_ROUND_UP(data->nents, max_sge); > wr->send_wr = kzalloc(sizeof(struct ib_send_wr) * wr->send_wr_num, > GFP_KERNEL); > if (!wr->send_wr) { > @@ -2463,11 +2495,11 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, > } > > wr->isert_cmd = isert_cmd; > - rdma_write_max = isert_conn->max_sge * PAGE_SIZE; > > + rdma_max_len = max_sge * PAGE_SIZE; > for (i = 0; i < wr->send_wr_num; i++) { > send_wr = &isert_cmd->rdma_wr.send_wr[i]; > - data_len = min(data_left, rdma_write_max); > + data_len = min(data_left, rdma_max_len); > > send_wr->send_flags = 0; > if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) { > @@ -2489,7 +2521,7 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, > } > > ib_sge_cnt = isert_build_rdma_wr(isert_conn, isert_cmd, ib_sge, > - send_wr, data_len, offset); > + send_wr, data_len, offset, max_sge); > ib_sge += ib_sge_cnt; > > offset += data_len; > @@ -2618,6 +2650,13 @@ isert_fast_reg_mr(struct isert_conn *isert_conn, > fr_wr.wr.fast_reg.rkey = mr->rkey; > fr_wr.wr.fast_reg.access_flags = IB_ACCESS_LOCAL_WRITE; > > + /* > + * IWARP transports need REMOTE_WRITE for MRs used as the target of > + * an RDMA_READ. > + */ > + if (rdma_protocol_iwarp(ib_dev, isert_conn->cm_id->port_num)) > + fr_wr.wr.fast_reg.access_flags |= IB_ACCESS_REMOTE_WRITE; > + > if (!wr) > wr = &fr_wr; > else > diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h > index 9ec23a7..47cf11b 100644 > --- a/drivers/infiniband/ulp/isert/ib_isert.h > +++ b/drivers/infiniband/ulp/isert/ib_isert.h > @@ -153,6 +153,7 @@ struct isert_conn { > u32 initiator_depth; > bool pi_support; > u32 max_sge; > + u32 max_read_sge; > char *login_buf; > char *login_req_buf; > char *login_rsp_buf; > -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> -----Original Message----- > From: linux-rdma-owner@vger.kernel.org [mailto:linux-rdma-owner@vger.kernel.org] On Behalf Of Sagi Grimberg > Sent: Thursday, June 25, 2015 11:51 AM > To: Steve Wise; linux-rdma@vger.kernel.org > Cc: Or Gerlitz; Roi Dayan; target-devel > Subject: Re: [PATCH RFC 2/2] RDMA/isert: Support iWARP transport > > On 6/25/2015 6:39 PM, Steve Wise wrote: > > Memory regions that are the target of an iWARP RDMA READ RESPONSE need > > REMOTE_WRITE access rights. So enable REMOTE_WRITE for iWARP devices. > > > > iWARP RDMA READ target sge depth is 1. So save the max_read_sge in > > the target device structure and use that when creating RDMA_READ work > > requests. > > Hi Steve, > > CC'ing target-devel... > > > > > Signed-off-by: Steve Wise <swise@opengridcomputing.com> > > Tested-by: Vasu Dev <vasu.dev@intel.com> > > --- > > drivers/infiniband/ulp/isert/ib_isert.c | 55 ++++++++++++++++++++++++++----- > > drivers/infiniband/ulp/isert/ib_isert.h | 1 + > > 2 files changed, 48 insertions(+), 8 deletions(-) > > > > diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c > > index 9e7b492..b5cde5d 100644 > > --- a/drivers/infiniband/ulp/isert/ib_isert.c > > +++ b/drivers/infiniband/ulp/isert/ib_isert.c > > @@ -165,6 +165,11 @@ isert_create_qp(struct isert_conn *isert_conn, > > attr.cap.max_send_sge = max(2, device->dev_attr.max_sge - 2); > > isert_conn->max_sge = attr.cap.max_send_sge; > > > > + if (rdma_cap_read_multi_sge(device->ib_device, cma_id->port_num)) > > + isert_conn->max_read_sge = isert_conn->max_sge; > > + else > > + isert_conn->max_read_sge = 1; > > + > > I think it would make sense to change now max_sge to max_write_sge. > Ok. > And, shouldn't we just use: > max_wr_sge = max(2, device->dev_attr.max_sge - 2); > max_rd_sge = device->dev_attr.max_sge_rd; > > Does the Chelsio device reports max_sge_rd != 1 ? > Yes, but I wasn't sure max_sge_rd is really the read target max sge. I don't see it being set by the mlx* drivers. Also since we have the new rdma_cap_read_multi_sge() helper, I thought I should use it. :) > > > attr.cap.max_recv_sge = 1; > > attr.sq_sig_type = IB_SIGNAL_REQ_WR; > > attr.qp_type = IB_QPT_RC; > > @@ -348,6 +353,17 @@ out_cq: > > return ret; > > } > > > > +static int any_port_is_iwarp(struct isert_device *device) > > +{ > > + int i; > > + > > + for (i = rdma_start_port(device->ib_device); > > + i <= rdma_end_port(device->ib_device); i++) > > + if (rdma_protocol_iwarp(device->ib_device, i)) > > + return 1; > > + return 0; > > +} > > + > > This does not look like it belongs here... Can we > place this in rdma_cm? > Ok...that makes sense. > > static int > > isert_create_device_ib_res(struct isert_device *device) > > { > > @@ -383,7 +399,17 @@ isert_create_device_ib_res(struct isert_device *device) > > goto out_cq; > > } > > > > - device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE); > > + /* > > + * IWARP transports need REMOTE_WRITE for MRs used as the target of > > + * an RDMA_READ. Since the DMA MR is used for all ports, then if > > + * any port is running IWARP, add REMOTE_WRITE. > > + */ > > + if (any_port_is_iwarp(device)) > > + device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE | > > + IB_ACCESS_REMOTE_WRITE); > > + else > > + device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE); > > + > > if (IS_ERR(device->mr)) { > > ret = PTR_ERR(device->mr); > > isert_err("failed to create dma mr, device %p, ret=%d\n", > > @@ -2375,7 +2401,7 @@ isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) > > static int > > isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, > > struct ib_sge *ib_sge, struct ib_send_wr *send_wr, > > - u32 data_left, u32 offset) > > + u32 data_left, u32 offset, u32 max_sge) > > { > > struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; > > struct scatterlist *sg_start, *tmp_sg; > > @@ -2386,7 +2412,7 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, > > > > sg_off = offset / PAGE_SIZE; > > sg_start = &cmd->se_cmd.t_data_sg[sg_off]; > > - sg_nents = min(cmd->se_cmd.t_data_nents - sg_off, isert_conn->max_sge); > > + sg_nents = min(cmd->se_cmd.t_data_nents - sg_off, max_sge); > > page_off = offset % PAGE_SIZE; > > > > send_wr->sg_list = ib_sge; > > @@ -2430,8 +2456,9 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, > > struct isert_data_buf *data = &wr->data; > > struct ib_send_wr *send_wr; > > struct ib_sge *ib_sge; > > - u32 offset, data_len, data_left, rdma_write_max, va_offset = 0; > > + u32 offset, data_len, data_left, rdma_max_len, va_offset = 0; > > int ret = 0, i, ib_sge_cnt; > > + u32 max_sge; > > > > isert_cmd->tx_desc.isert_cmd = isert_cmd; > > > > @@ -2453,7 +2480,12 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, > > } > > wr->ib_sge = ib_sge; > > > > - wr->send_wr_num = DIV_ROUND_UP(data->nents, isert_conn->max_sge); > > + if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) > > + max_sge = isert_conn->max_sge; > > + else > > + max_sge = isert_conn->max_read_sge; > > + > > + wr->send_wr_num = DIV_ROUND_UP(data->nents, max_sge); > > wr->send_wr = kzalloc(sizeof(struct ib_send_wr) * wr->send_wr_num, > > GFP_KERNEL); > > if (!wr->send_wr) { > > @@ -2463,11 +2495,11 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, > > } > > > > wr->isert_cmd = isert_cmd; > > - rdma_write_max = isert_conn->max_sge * PAGE_SIZE; > > > > + rdma_max_len = max_sge * PAGE_SIZE; > > for (i = 0; i < wr->send_wr_num; i++) { > > send_wr = &isert_cmd->rdma_wr.send_wr[i]; > > - data_len = min(data_left, rdma_write_max); > > + data_len = min(data_left, rdma_max_len); > > > > send_wr->send_flags = 0; > > if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) { > > @@ -2489,7 +2521,7 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, > > } > > > > ib_sge_cnt = isert_build_rdma_wr(isert_conn, isert_cmd, ib_sge, > > - send_wr, data_len, offset); > > + send_wr, data_len, offset, max_sge); > > ib_sge += ib_sge_cnt; > > > > offset += data_len; > > @@ -2618,6 +2650,13 @@ isert_fast_reg_mr(struct isert_conn *isert_conn, > > fr_wr.wr.fast_reg.rkey = mr->rkey; > > fr_wr.wr.fast_reg.access_flags = IB_ACCESS_LOCAL_WRITE; > > > > + /* > > + * IWARP transports need REMOTE_WRITE for MRs used as the target of > > + * an RDMA_READ. > > + */ > > + if (rdma_protocol_iwarp(ib_dev, isert_conn->cm_id->port_num)) > > + fr_wr.wr.fast_reg.access_flags |= IB_ACCESS_REMOTE_WRITE; > > + > > if (!wr) > > wr = &fr_wr; > > else > > diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h > > index 9ec23a7..47cf11b 100644 > > --- a/drivers/infiniband/ulp/isert/ib_isert.h > > +++ b/drivers/infiniband/ulp/isert/ib_isert.h > > @@ -153,6 +153,7 @@ struct isert_conn { > > u32 initiator_depth; > > bool pi_support; > > u32 max_sge; > > + u32 max_read_sge; > > char *login_buf; > > char *login_req_buf; > > char *login_rsp_buf; > > > -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Jun 25, 2015 at 10:39:23AM -0500, Steve Wise wrote: > + /* > + * IWARP transports need REMOTE_WRITE for MRs used as the target of > + * an RDMA_READ. Since the DMA MR is used for all ports, then if > + * any port is running IWARP, add REMOTE_WRITE. > + */ > + if (any_port_is_iwarp(device)) It would be nice to have a new-style cap test for this instead of open coding iwarp. Similar to rdma_cap_read_multi_sge I'm confused about the 'any_port_is_iwarp' stuff too, I thought if one port was iwarp then all ports had to be iwarp? Even if we move away from that, I would think that some caps must be the same on all ports, and multi_sge, remote_write, etc would fit into that limitation. Jason -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> > + * IWARP transports need REMOTE_WRITE for MRs used as the target of > > + * an RDMA_READ. Since the DMA MR is used for all ports, then if > > + * any port is running IWARP, add REMOTE_WRITE. > > + */ > > + if (any_port_is_iwarp(device)) > > It would be nice to have a new-style cap test for this instead of open > coding iwarp. Similar to rdma_cap_read_multi_sge This should be pushed down into the devices, or at least within verbs, rather than having ULPs needing to know this oddness. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> -----Original Message----- > From: Jason Gunthorpe [mailto:jgunthorpe@obsidianresearch.com] > Sent: Thursday, June 25, 2015 1:25 PM > To: Steve Wise > Cc: linux-rdma@vger.kernel.org; sagig@mellanox.com; orgerlitz@mellanox.com; raid@mellanox.com > Subject: Re: [PATCH RFC 2/2] RDMA/isert: Support iWARP transport > > On Thu, Jun 25, 2015 at 10:39:23AM -0500, Steve Wise wrote: > > + /* > > + * IWARP transports need REMOTE_WRITE for MRs used as the target of > > + * an RDMA_READ. Since the DMA MR is used for all ports, then if > > + * any port is running IWARP, add REMOTE_WRITE. > > + */ > > + if (any_port_is_iwarp(device)) > > It would be nice to have a new-style cap test for this instead of open > coding iwarp. Similar to rdma_cap_read_multi_sge > That would be ok with me. Now for naming this new function: rdma_cap_read_requires_remote_write_rights() That's pretty long. Any other ideas for naming this? > I'm confused about the 'any_port_is_iwarp' stuff too, I thought if one > port was iwarp then all ports had to be iwarp? > Currently no device supports iwarp + any other protocol. > Even if we move away from that, I would think that some caps must be > the same on all ports, and multi_sge, remote_write, etc would fit into > that limitation. > I'm happy with making this a device-global capability. If it becomes a per-port capability, then the code can change later. Steve. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> -----Original Message----- > From: linux-rdma-owner@vger.kernel.org [mailto:linux-rdma-owner@vger.kernel.org] On Behalf Of Hefty, Sean > Sent: Thursday, June 25, 2015 1:30 PM > To: Jason Gunthorpe; Steve Wise > Cc: linux-rdma@vger.kernel.org; sagig@mellanox.com; orgerlitz@mellanox.com; raid@mellanox.com > Subject: RE: [PATCH RFC 2/2] RDMA/isert: Support iWARP transport > > > > + * IWARP transports need REMOTE_WRITE for MRs used as the target of > > > + * an RDMA_READ. Since the DMA MR is used for all ports, then if > > > + * any port is running IWARP, add REMOTE_WRITE. > > > + */ > > > + if (any_port_is_iwarp(device)) > > > > It would be nice to have a new-style cap test for this instead of open > > coding iwarp. Similar to rdma_cap_read_multi_sge > > This should be pushed down into the devices, or at least within verbs, rather than having ULPs needing to know this oddness. How would you envision doing this? At the time a MR is registered the device driver doesn't know if the application will be doing RDMA reads or not on that MR. Steve. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> How would you envision doing this? At the time a MR is registered the > device driver doesn't know if the application will be doing > RDMA reads or not on that MR. I was thinking of checking for REMOTE_READ, but that doesn't work on the initiator side. I guess you could a READ_DEST(SOURCE? TARGET?) flag to indicate that the MR will be used on the initiating side of the RDMA read operation. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Jun 25, 2015 at 06:45:56PM +0000, Hefty, Sean wrote: > > How would you envision doing this? At the time a MR is registered the > > device driver doesn't know if the application will be doing > > RDMA reads or not on that MR. > > I was thinking of checking for REMOTE_READ, but that doesn't work on the initiator side. > > I guess you could a READ_DEST(SOURCE? TARGET?) flag to indicate that > the MR will be used on the initiating side of the RDMA read > operation. Yes, that does make more sense... What about moving to something more specific? Encode the allowed verbs in the access flag? I don't understand why iWarp HW choose to ignore the verbs spec and not use IB_ACCESS_LOCAL_WRITE to cover RDMA READ responses... Jason -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> What about moving to something more specific? Encode the allowed verbs > in the access flag? This makes more sense to me. Something like: SEND, RECV, INIT READ, INIT WRITE, READ TARGET, WRITE TARGET, etc. We're close to this, but it's not clear, for example, what flags are needed for a receive buffer. None? LOCAL_WRITE? -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> -----Original Message----- > From: linux-rdma-owner@vger.kernel.org [mailto:linux-rdma-owner@vger.kernel.org] On Behalf Of Hefty, Sean > Sent: Thursday, June 25, 2015 2:20 PM > To: Jason Gunthorpe > Cc: Steve Wise; linux-rdma@vger.kernel.org; sagig@mellanox.com; orgerlitz@mellanox.com; Roi Dayan > Subject: RE: [PATCH RFC 2/2] RDMA/isert: Support iWARP transport > > > What about moving to something more specific? Encode the allowed verbs > > in the access flag? > > This makes more sense to me. Something like: SEND, RECV, INIT READ, INIT WRITE, READ TARGET, WRITE TARGET, etc. We're close to > this, but it's not clear, for example, what flags are needed for a receive buffer. None? LOCAL_WRITE? Make sense to me too. To stage the changes we could introduce a new function that returns the needed ib_access_flags value given the desired opcodes. Then have a series that changes all the existing ULPs to make use of this new function. Steve. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Jun 25, 2015 at 02:25:49PM -0500, Steve Wise wrote: > To stage the changes we could introduce a new function that returns > the needed ib_access_flags value given the desired opcodes. Then > have a series that changes all the existing ULPs to make use of this > new function. I wouldn't be afraid to add a new create_mr entry point that does the right thing, we can unexport/delete the old one when all kernel users are gone. Trivially the core can just have a default that translates based on iwarp/!iwarp for now. Jason -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> -----Original Message----- > From: linux-rdma-owner@vger.kernel.org [mailto:linux-rdma-owner@vger.kernel.org] On Behalf Of Jason Gunthorpe > Sent: Thursday, June 25, 2015 2:30 PM > To: Steve Wise > Cc: 'Hefty, Sean'; linux-rdma@vger.kernel.org; sagig@mellanox.com; orgerlitz@mellanox.com; 'Roi Dayan' > Subject: Re: [PATCH RFC 2/2] RDMA/isert: Support iWARP transport > > On Thu, Jun 25, 2015 at 02:25:49PM -0500, Steve Wise wrote: > > > To stage the changes we could introduce a new function that returns > > the needed ib_access_flags value given the desired opcodes. Then > > have a series that changes all the existing ULPs to make use of this > > new function. > > I wouldn't be afraid to add a new create_mr entry point that does the > right thing, we can unexport/delete the old one when all kernel users > are gone. Trivially the core can just have a default that translates > based on iwarp/!iwarp for now. > Ignoring user MRs at this point, I think we would need a new entry point for creating dma mrs ala ib_get_dma_mr(). How defining MR "roles": /** * ib_mr_role - possible roles a MR will be used for * * This allows a transport independent RDMA application to * create MRs that are usable for all the desired roles w/o * having to understand which access rights are needed. */ enum ib_mr_role { IB_MRR_RECV = 1, IB_MRR_SEND = (1<<1), IB_MRR_READ_SOURCE = (1<<2), IB_MRR_READ_SINK = (1<<3), IB_MRR_WRITE_SOURCE = (1<<4), IB_MRR_WRITE_SINK = (1<<5), /* probably more roles */ }; Then new entry point that will call ib_get_dma_mr() with the ib_access_flags required to handle the roles for the given device protocol. ib_get_dma_role_mr(struct ib_pd *pd, int mr_role_flags) -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> > I don't understand why iWarp HW choose to ignore the verbs spec and > not use IB_ACCESS_LOCAL_WRITE to cover RDMA READ responses... > The iWARP verbs spec mandates this. Steve. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Resending - somehow this didn't make it to the lists... On 6/25/2015 7:51 PM, Sagi Grimberg wrote: > On 6/25/2015 6:39 PM, Steve Wise wrote: >> Memory regions that are the target of an iWARP RDMA READ RESPONSE need >> REMOTE_WRITE access rights. So enable REMOTE_WRITE for iWARP devices. >> >> iWARP RDMA READ target sge depth is 1. So save the max_read_sge in >> the target device structure and use that when creating RDMA_READ work >> requests. > > Hi Steve, > > CC'ing target-devel... > >> >> Signed-off-by: Steve Wise <swise@opengridcomputing.com> >> Tested-by: Vasu Dev <vasu.dev@intel.com> >> --- >> drivers/infiniband/ulp/isert/ib_isert.c | 55 >> ++++++++++++++++++++++++++----- >> drivers/infiniband/ulp/isert/ib_isert.h | 1 + >> 2 files changed, 48 insertions(+), 8 deletions(-) >> >> diff --git a/drivers/infiniband/ulp/isert/ib_isert.c >> b/drivers/infiniband/ulp/isert/ib_isert.c >> index 9e7b492..b5cde5d 100644 >> --- a/drivers/infiniband/ulp/isert/ib_isert.c >> +++ b/drivers/infiniband/ulp/isert/ib_isert.c >> @@ -165,6 +165,11 @@ isert_create_qp(struct isert_conn *isert_conn, >> attr.cap.max_send_sge = max(2, device->dev_attr.max_sge - 2); >> isert_conn->max_sge = attr.cap.max_send_sge; >> >> + if (rdma_cap_read_multi_sge(device->ib_device, cma_id->port_num)) >> + isert_conn->max_read_sge = isert_conn->max_sge; >> + else >> + isert_conn->max_read_sge = 1; >> + > > I think it would make sense to change now max_sge to max_write_sge. > > And, shouldn't we just use: > max_wr_sge = max(2, device->dev_attr.max_sge - 2); > max_rd_sge = device->dev_attr.max_sge_rd; > > Does the Chelsio device reports max_sge_rd != 1 ? > > >> attr.cap.max_recv_sge = 1; >> attr.sq_sig_type = IB_SIGNAL_REQ_WR; >> attr.qp_type = IB_QPT_RC; >> @@ -348,6 +353,17 @@ out_cq: >> return ret; >> } >> >> +static int any_port_is_iwarp(struct isert_device *device) >> +{ >> + int i; >> + >> + for (i = rdma_start_port(device->ib_device); >> + i <= rdma_end_port(device->ib_device); i++) >> + if (rdma_protocol_iwarp(device->ib_device, i)) >> + return 1; >> + return 0; >> +} >> + > > This does not look like it belongs here... Can we > place this in rdma_cm? > >> static int >> isert_create_device_ib_res(struct isert_device *device) >> { >> @@ -383,7 +399,17 @@ isert_create_device_ib_res(struct isert_device >> *device) >> goto out_cq; >> } >> >> - device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE); >> + /* >> + * IWARP transports need REMOTE_WRITE for MRs used as the target of >> + * an RDMA_READ. Since the DMA MR is used for all ports, then if >> + * any port is running IWARP, add REMOTE_WRITE. >> + */ >> + if (any_port_is_iwarp(device)) >> + device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE | >> + IB_ACCESS_REMOTE_WRITE); >> + else >> + device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE); >> + >> if (IS_ERR(device->mr)) { >> ret = PTR_ERR(device->mr); >> isert_err("failed to create dma mr, device %p, ret=%d\n", >> @@ -2375,7 +2401,7 @@ isert_put_text_rsp(struct iscsi_cmd *cmd, struct >> iscsi_conn *conn) >> static int >> isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd >> *isert_cmd, >> struct ib_sge *ib_sge, struct ib_send_wr *send_wr, >> - u32 data_left, u32 offset) >> + u32 data_left, u32 offset, u32 max_sge) >> { >> struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; >> struct scatterlist *sg_start, *tmp_sg; >> @@ -2386,7 +2412,7 @@ isert_build_rdma_wr(struct isert_conn >> *isert_conn, struct isert_cmd *isert_cmd, >> >> sg_off = offset / PAGE_SIZE; >> sg_start = &cmd->se_cmd.t_data_sg[sg_off]; >> - sg_nents = min(cmd->se_cmd.t_data_nents - sg_off, >> isert_conn->max_sge); >> + sg_nents = min(cmd->se_cmd.t_data_nents - sg_off, max_sge); >> page_off = offset % PAGE_SIZE; >> >> send_wr->sg_list = ib_sge; >> @@ -2430,8 +2456,9 @@ isert_map_rdma(struct iscsi_conn *conn, struct >> iscsi_cmd *cmd, >> struct isert_data_buf *data = &wr->data; >> struct ib_send_wr *send_wr; >> struct ib_sge *ib_sge; >> - u32 offset, data_len, data_left, rdma_write_max, va_offset = 0; >> + u32 offset, data_len, data_left, rdma_max_len, va_offset = 0; >> int ret = 0, i, ib_sge_cnt; >> + u32 max_sge; >> >> isert_cmd->tx_desc.isert_cmd = isert_cmd; >> >> @@ -2453,7 +2480,12 @@ isert_map_rdma(struct iscsi_conn *conn, struct >> iscsi_cmd *cmd, >> } >> wr->ib_sge = ib_sge; >> >> - wr->send_wr_num = DIV_ROUND_UP(data->nents, isert_conn->max_sge); >> + if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) >> + max_sge = isert_conn->max_sge; >> + else >> + max_sge = isert_conn->max_read_sge; >> + >> + wr->send_wr_num = DIV_ROUND_UP(data->nents, max_sge); >> wr->send_wr = kzalloc(sizeof(struct ib_send_wr) * wr->send_wr_num, >> GFP_KERNEL); >> if (!wr->send_wr) { >> @@ -2463,11 +2495,11 @@ isert_map_rdma(struct iscsi_conn *conn, struct >> iscsi_cmd *cmd, >> } >> >> wr->isert_cmd = isert_cmd; >> - rdma_write_max = isert_conn->max_sge * PAGE_SIZE; >> >> + rdma_max_len = max_sge * PAGE_SIZE; >> for (i = 0; i < wr->send_wr_num; i++) { >> send_wr = &isert_cmd->rdma_wr.send_wr[i]; >> - data_len = min(data_left, rdma_write_max); >> + data_len = min(data_left, rdma_max_len); >> >> send_wr->send_flags = 0; >> if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) { >> @@ -2489,7 +2521,7 @@ isert_map_rdma(struct iscsi_conn *conn, struct >> iscsi_cmd *cmd, >> } >> >> ib_sge_cnt = isert_build_rdma_wr(isert_conn, isert_cmd, ib_sge, >> - send_wr, data_len, offset); >> + send_wr, data_len, offset, max_sge); >> ib_sge += ib_sge_cnt; >> >> offset += data_len; >> @@ -2618,6 +2650,13 @@ isert_fast_reg_mr(struct isert_conn *isert_conn, >> fr_wr.wr.fast_reg.rkey = mr->rkey; >> fr_wr.wr.fast_reg.access_flags = IB_ACCESS_LOCAL_WRITE; >> >> + /* >> + * IWARP transports need REMOTE_WRITE for MRs used as the target of >> + * an RDMA_READ. >> + */ >> + if (rdma_protocol_iwarp(ib_dev, isert_conn->cm_id->port_num)) >> + fr_wr.wr.fast_reg.access_flags |= IB_ACCESS_REMOTE_WRITE; >> + >> if (!wr) >> wr = &fr_wr; >> else >> diff --git a/drivers/infiniband/ulp/isert/ib_isert.h >> b/drivers/infiniband/ulp/isert/ib_isert.h >> index 9ec23a7..47cf11b 100644 >> --- a/drivers/infiniband/ulp/isert/ib_isert.h >> +++ b/drivers/infiniband/ulp/isert/ib_isert.h >> @@ -153,6 +153,7 @@ struct isert_conn { >> u32 initiator_depth; >> bool pi_support; >> u32 max_sge; >> + u32 max_read_sge; >> char *login_buf; >> char *login_req_buf; >> char *login_rsp_buf; >> > -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 6/25/2015 8:06 PM, Steve Wise wrote: > > >> -----Original Message----- >> From: linux-rdma-owner@vger.kernel.org [mailto:linux-rdma-owner@vger.kernel.org] On Behalf Of Sagi Grimberg >> Sent: Thursday, June 25, 2015 11:51 AM >> To: Steve Wise; linux-rdma@vger.kernel.org >> Cc: Or Gerlitz; Roi Dayan; target-devel >> Subject: Re: [PATCH RFC 2/2] RDMA/isert: Support iWARP transport >> >> On 6/25/2015 6:39 PM, Steve Wise wrote: >>> Memory regions that are the target of an iWARP RDMA READ RESPONSE need >>> REMOTE_WRITE access rights. So enable REMOTE_WRITE for iWARP devices. >>> >>> iWARP RDMA READ target sge depth is 1. So save the max_read_sge in >>> the target device structure and use that when creating RDMA_READ work >>> requests. >> >> Hi Steve, >> >> CC'ing target-devel... >> >>> >>> Signed-off-by: Steve Wise <swise@opengridcomputing.com> >>> Tested-by: Vasu Dev <vasu.dev@intel.com> >>> --- >>> drivers/infiniband/ulp/isert/ib_isert.c | 55 ++++++++++++++++++++++++++----- >>> drivers/infiniband/ulp/isert/ib_isert.h | 1 + >>> 2 files changed, 48 insertions(+), 8 deletions(-) >>> >>> diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c >>> index 9e7b492..b5cde5d 100644 >>> --- a/drivers/infiniband/ulp/isert/ib_isert.c >>> +++ b/drivers/infiniband/ulp/isert/ib_isert.c >>> @@ -165,6 +165,11 @@ isert_create_qp(struct isert_conn *isert_conn, >>> attr.cap.max_send_sge = max(2, device->dev_attr.max_sge - 2); >>> isert_conn->max_sge = attr.cap.max_send_sge; >>> >>> + if (rdma_cap_read_multi_sge(device->ib_device, cma_id->port_num)) >>> + isert_conn->max_read_sge = isert_conn->max_sge; >>> + else >>> + isert_conn->max_read_sge = 1; >>> + >> >> I think it would make sense to change now max_sge to max_write_sge. >> > > Ok. > >> And, shouldn't we just use: >> max_wr_sge = max(2, device->dev_attr.max_sge - 2); >> max_rd_sge = device->dev_attr.max_sge_rd; >> >> Does the Chelsio device reports max_sge_rd != 1 ? >> > > Yes, but I wasn't sure max_sge_rd is really the read target max sge. Why not? > I don't see it being set by the mlx* drivers. Umm, that's a bug. Let me send an easy fix for all mlx drivers. > Also since we have the new rdma_cap_read_multi_sge() helper, I thought I should use it. :) I think that reading the exact device caps max_sge, max_sge_is better and more straight forward... -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 6/25/2015 10:29 PM, Jason Gunthorpe wrote: > On Thu, Jun 25, 2015 at 02:25:49PM -0500, Steve Wise wrote: > >> To stage the changes we could introduce a new function that returns >> the needed ib_access_flags value given the desired opcodes. Then >> have a series that changes all the existing ULPs to make use of this >> new function. > > I wouldn't be afraid to add a new create_mr entry point that does the > right thing, we can unexport/delete the old one when all kernel users > are gone. Trivially the core can just have a default that translates > based on iwarp/!iwarp for now. > Note that we have ib_create_mr() that receives an easily extensible ib_mr_init_attr (similar to qp creation). I think this is a good opportunity to unite all the create routines to ib_create_mr. ib_get_dma_mr will become: ib_mr_init_attr.type = IB_MR_DMA; ib_mr_init_attr.mr_access_flags = mr_access_flags; ib_create_mr(pd, &ib_mr_init_attr); ib_alloc_fast_reg_mr will become: ib_mr_init_attr.type = IB_MR_FAST_REG; ib_create_mr(pd, &ib_mr_init_attr); ib_reg_phys_mr (if we don't want to kill it by now) will become: ib_mr_init_attr.type = IB_MR_REG_PHYS; ib_mr_init_attr.arr = phys_buf_array; ib_mr_init_attr.max_reg_descriptors = num_phys_buf; ib_mr_init_attr.mr_access_flags = mr_access_flags; ib_mr_init_attr.iova_start = iova_start; ib_create_mr(pd, &ib_mr_init_attr); The core layer will spread these in the different driver routines (although it would be nice if the drivers can have a single MR craetion routine that can handle all MR types (including fail if unsupported). Thoughts? -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sat, Jun 27, 2015 at 12:12:11PM +0300, Sagi Grimberg wrote: > >Also since we have the new rdma_cap_read_multi_sge() helper, I > >thought I should use it. :) > > I think that reading the exact device caps max_sge, max_sge_is better > and more straight forward... Right, rdma_cap_read_multi_sge was just a stand in wanting something better to expose asymmetric read/write sge caps. If iwarp can get what it needs via max_sge_rd then lets drop rdma_cap_read_multi_sge. Jason -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 9e7b492..b5cde5d 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -165,6 +165,11 @@ isert_create_qp(struct isert_conn *isert_conn, attr.cap.max_send_sge = max(2, device->dev_attr.max_sge - 2); isert_conn->max_sge = attr.cap.max_send_sge; + if (rdma_cap_read_multi_sge(device->ib_device, cma_id->port_num)) + isert_conn->max_read_sge = isert_conn->max_sge; + else + isert_conn->max_read_sge = 1; + attr.cap.max_recv_sge = 1; attr.sq_sig_type = IB_SIGNAL_REQ_WR; attr.qp_type = IB_QPT_RC; @@ -348,6 +353,17 @@ out_cq: return ret; } +static int any_port_is_iwarp(struct isert_device *device) +{ + int i; + + for (i = rdma_start_port(device->ib_device); + i <= rdma_end_port(device->ib_device); i++) + if (rdma_protocol_iwarp(device->ib_device, i)) + return 1; + return 0; +} + static int isert_create_device_ib_res(struct isert_device *device) { @@ -383,7 +399,17 @@ isert_create_device_ib_res(struct isert_device *device) goto out_cq; } - device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE); + /* + * IWARP transports need REMOTE_WRITE for MRs used as the target of + * an RDMA_READ. Since the DMA MR is used for all ports, then if + * any port is running IWARP, add REMOTE_WRITE. + */ + if (any_port_is_iwarp(device)) + device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE); + else + device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(device->mr)) { ret = PTR_ERR(device->mr); isert_err("failed to create dma mr, device %p, ret=%d\n", @@ -2375,7 +2401,7 @@ isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) static int isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, struct ib_sge *ib_sge, struct ib_send_wr *send_wr, - u32 data_left, u32 offset) + u32 data_left, u32 offset, u32 max_sge) { struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; struct scatterlist *sg_start, *tmp_sg; @@ -2386,7 +2412,7 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, sg_off = offset / PAGE_SIZE; sg_start = &cmd->se_cmd.t_data_sg[sg_off]; - sg_nents = min(cmd->se_cmd.t_data_nents - sg_off, isert_conn->max_sge); + sg_nents = min(cmd->se_cmd.t_data_nents - sg_off, max_sge); page_off = offset % PAGE_SIZE; send_wr->sg_list = ib_sge; @@ -2430,8 +2456,9 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, struct isert_data_buf *data = &wr->data; struct ib_send_wr *send_wr; struct ib_sge *ib_sge; - u32 offset, data_len, data_left, rdma_write_max, va_offset = 0; + u32 offset, data_len, data_left, rdma_max_len, va_offset = 0; int ret = 0, i, ib_sge_cnt; + u32 max_sge; isert_cmd->tx_desc.isert_cmd = isert_cmd; @@ -2453,7 +2480,12 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, } wr->ib_sge = ib_sge; - wr->send_wr_num = DIV_ROUND_UP(data->nents, isert_conn->max_sge); + if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) + max_sge = isert_conn->max_sge; + else + max_sge = isert_conn->max_read_sge; + + wr->send_wr_num = DIV_ROUND_UP(data->nents, max_sge); wr->send_wr = kzalloc(sizeof(struct ib_send_wr) * wr->send_wr_num, GFP_KERNEL); if (!wr->send_wr) { @@ -2463,11 +2495,11 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, } wr->isert_cmd = isert_cmd; - rdma_write_max = isert_conn->max_sge * PAGE_SIZE; + rdma_max_len = max_sge * PAGE_SIZE; for (i = 0; i < wr->send_wr_num; i++) { send_wr = &isert_cmd->rdma_wr.send_wr[i]; - data_len = min(data_left, rdma_write_max); + data_len = min(data_left, rdma_max_len); send_wr->send_flags = 0; if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) { @@ -2489,7 +2521,7 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, } ib_sge_cnt = isert_build_rdma_wr(isert_conn, isert_cmd, ib_sge, - send_wr, data_len, offset); + send_wr, data_len, offset, max_sge); ib_sge += ib_sge_cnt; offset += data_len; @@ -2618,6 +2650,13 @@ isert_fast_reg_mr(struct isert_conn *isert_conn, fr_wr.wr.fast_reg.rkey = mr->rkey; fr_wr.wr.fast_reg.access_flags = IB_ACCESS_LOCAL_WRITE; + /* + * IWARP transports need REMOTE_WRITE for MRs used as the target of + * an RDMA_READ. + */ + if (rdma_protocol_iwarp(ib_dev, isert_conn->cm_id->port_num)) + fr_wr.wr.fast_reg.access_flags |= IB_ACCESS_REMOTE_WRITE; + if (!wr) wr = &fr_wr; else diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index 9ec23a7..47cf11b 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -153,6 +153,7 @@ struct isert_conn { u32 initiator_depth; bool pi_support; u32 max_sge; + u32 max_read_sge; char *login_buf; char *login_req_buf; char *login_rsp_buf;