diff mbox

[V6,9/9] isert: Support iWARP transports using FRMRs

Message ID 20150724161904.25617.85015.stgit@build2.ogc.int (mailing list archive)
State New, archived
Headers show

Commit Message

Steve Wise July 24, 2015, 4:19 p.m. UTC
Add new register and unregister functions to be used with devices that
support FRMRs, provide a local dma lkey, yet do not support DIF/PI.

isert_reg_frmr() only needs to use FRMRs for RDMA READ since RDMA WRITE
can be handled entirely with the local dma lkey.  So for RDMA READ,
it calls isert_reg_read_frmr().  Otherwise is uses the lkey map service
isert_map_lkey() for RDMA WRITEs.

isert_reg_read_frmr() will create a linked list of WR triplets of the
form: INV->FRWR->READ.  The number of these triplets is dependent on
the devices fast reg page list length limit.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
---

 drivers/infiniband/ulp/isert/ib_isert.c |  224 ++++++++++++++++++++++++++++++-
 1 files changed, 218 insertions(+), 6 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Jason Gunthorpe July 24, 2015, 4:57 p.m. UTC | #1
On Fri, Jul 24, 2015 at 11:19:05AM -0500, Steve Wise wrote:
> Add new register and unregister functions to be used with devices that
> support FRMRs, provide a local dma lkey, yet do not support DIF/PI.
> 
> isert_reg_frmr() only needs to use FRMRs for RDMA READ since RDMA WRITE
> can be handled entirely with the local dma lkey.  So for RDMA READ,
> it calls isert_reg_read_frmr().  Otherwise is uses the lkey map service
> isert_map_lkey() for RDMA WRITEs.
> 
> isert_reg_read_frmr() will create a linked list of WR triplets of the
> form: INV->FRWR->READ.  The number of these triplets is dependent on
> the devices fast reg page list length limit.

That ordering seems strange, surely it should be

FRWR->READ->INV

And use IB_WR_RDMA_READ_WITH_INV if possible?

ACCESS_REMOTE rkey's should not be left open across the FROM_DEVICE
DMA flush.

>  	/* assign function handlers */
> -	if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS &&
> -	    dev_attr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY &&
> -	    dev_attr->device_cap_flags & IB_DEVICE_SIGNATURE_HANDOVER) {
> -		device->use_fastreg = 1;
> -		device->reg_rdma_mem = isert_reg_frmr_pi;
> -		device->unreg_rdma_mem = isert_unreg_frmr_pi;
> +	cap_flags = dev_attr->device_cap_flags;
> +	if (cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS &&
> +	    cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
> +		if (cap_flags & IB_DEVICE_SIGNATURE_HANDOVER) {
> +			device->use_fastreg = 1;
> +			device->reg_rdma_mem = isert_reg_frmr_pi;
> +			device->unreg_rdma_mem = isert_unreg_frmr_pi;
> +		} else {
> +			device->use_fastreg = 1;
> +			device->reg_rdma_mem = isert_reg_frmr;
> +			device->unreg_rdma_mem = isert_unreg_frmr;
> +		}

The use of FRWR for RDMA READ should be iWarp specific, IB shouldn't
pay that overhead. I am expecting to see a cap_rdma_read_rkey or
something in here ?

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steve Wise July 24, 2015, 6:48 p.m. UTC | #2
> -----Original Message-----
> From: Jason Gunthorpe [mailto:jgunthorpe@obsidianresearch.com]
> Sent: Friday, July 24, 2015 11:57 AM
> To: Steve Wise
> Cc: dledford@redhat.com; infinipath@intel.com; sagig@mellanox.com; ogerlitz@mellanox.com; roid@mellanox.com; linux-
> rdma@vger.kernel.org; eli@mellanox.com; target-devel@vger.kernel.org; linux-nfs@vger.kernel.org; bfields@fieldses.org
> Subject: Re: [PATCH V6 9/9] isert: Support iWARP transports using FRMRs
> 
> On Fri, Jul 24, 2015 at 11:19:05AM -0500, Steve Wise wrote:
> > Add new register and unregister functions to be used with devices that
> > support FRMRs, provide a local dma lkey, yet do not support DIF/PI.
> >
> > isert_reg_frmr() only needs to use FRMRs for RDMA READ since RDMA WRITE
> > can be handled entirely with the local dma lkey.  So for RDMA READ,
> > it calls isert_reg_read_frmr().  Otherwise is uses the lkey map service
> > isert_map_lkey() for RDMA WRITEs.
> >
> > isert_reg_read_frmr() will create a linked list of WR triplets of the
> > form: INV->FRWR->READ.  The number of these triplets is dependent on
> > the devices fast reg page list length limit.
> 
> That ordering seems strange, surely it should be
> 
> FRWR->READ->INV
> 
> And use IB_WR_RDMA_READ_WITH_INV if possible?
> 
> ACCESS_REMOTE rkey's should not be left open across the FROM_DEVICE
> DMA flush.
>

You're correct.  I was thinking to simplify the IO by always invalidating before re-registering.  But it does leave the FRMR
registered and exposes a security hole.  

I'll have to rework this.
 
> >  	/* assign function handlers */
> > -	if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS &&
> > -	    dev_attr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY &&
> > -	    dev_attr->device_cap_flags & IB_DEVICE_SIGNATURE_HANDOVER) {
> > -		device->use_fastreg = 1;
> > -		device->reg_rdma_mem = isert_reg_frmr_pi;
> > -		device->unreg_rdma_mem = isert_unreg_frmr_pi;
> > +	cap_flags = dev_attr->device_cap_flags;
> > +	if (cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS &&
> > +	    cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
> > +		if (cap_flags & IB_DEVICE_SIGNATURE_HANDOVER) {
> > +			device->use_fastreg = 1;
> > +			device->reg_rdma_mem = isert_reg_frmr_pi;
> > +			device->unreg_rdma_mem = isert_unreg_frmr_pi;
> > +		} else {
> > +			device->use_fastreg = 1;
> > +			device->reg_rdma_mem = isert_reg_frmr;
> > +			device->unreg_rdma_mem = isert_unreg_frmr;
> > +		}
> 
> The use of FRWR for RDMA READ should be iWarp specific, IB shouldn't
> pay that overhead. I am expecting to see a cap_rdma_read_rkey or
> something in here ?
>

Ok.  But cap_rdma_read_rkey() doesn't really describe the requirement.  The requirement is rkey + REMOTE_WRITE.  So it is more like
rdma_cap_read_requires_remote_write() which is ugly and too long (but descriptive)...




--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe July 24, 2015, 7:24 p.m. UTC | #3
On Fri, Jul 24, 2015 at 01:48:09PM -0500, Steve Wise wrote:
> > The use of FRWR for RDMA READ should be iWarp specific, IB shouldn't
> > pay that overhead. I am expecting to see a cap_rdma_read_rkey or
> > something in here ?
> 
> Ok.  But cap_rdma_read_rkey() doesn't really describe the
> requirement.  The requirement is rkey + REMOTE_WRITE.  So it is more
> like rdma_cap_read_requires_remote_write() which is ugly and too
> long (but descriptive)...

I don't care much what name you pick, just jam something like this in
the description

 If set then RDMA_READ must be performed by mapping the local
 buffers through a rkey MR with ACCESS_REMOTE_WRITE enabled.
 The rkey of this MR should be passed in as the sg_lists's lkey for
 IB_WR_RDMA_READ_WITH_INV.

 FRWR should be used to register the buffer in the send queue,
 and the read should be issued using IB_WR_RDMA_READ_WITH_INV (xx
 can we just implicitly rely on this? Are there any iWarp cards that
 support FRWR but not WITH_INV?)

 Finally, only a single SGE can be used with RDMA_READ, all scattering
 must be accomplished with the MR.

This quite dramatically changes what is an allowed scatter for the
transfer, IB can support arbitary unaligned S/G lists, while this is
now forced into gapless page aligned elements.

Your patch takes care of this? And only impacts IB?

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steve Wise July 24, 2015, 7:57 p.m. UTC | #4
> -----Original Message-----
> From: Jason Gunthorpe [mailto:jgunthorpe@obsidianresearch.com]
> Sent: Friday, July 24, 2015 2:24 PM
> To: Steve Wise
> Cc: dledford@redhat.com; infinipath@intel.com; sagig@mellanox.com; ogerlitz@mellanox.com; roid@mellanox.com; linux-
> rdma@vger.kernel.org; eli@mellanox.com; target-devel@vger.kernel.org; linux-nfs@vger.kernel.org; bfields@fieldses.org
> Subject: Re: [PATCH V6 9/9] isert: Support iWARP transports using FRMRs
> 
> On Fri, Jul 24, 2015 at 01:48:09PM -0500, Steve Wise wrote:
> > > The use of FRWR for RDMA READ should be iWarp specific, IB shouldn't
> > > pay that overhead. I am expecting to see a cap_rdma_read_rkey or
> > > something in here ?
> >
> > Ok.  But cap_rdma_read_rkey() doesn't really describe the
> > requirement.  The requirement is rkey + REMOTE_WRITE.  So it is more
> > like rdma_cap_read_requires_remote_write() which is ugly and too
> > long (but descriptive)...
> 
> I don't care much what name you pick, just jam something like this in
> the description
> 
>  If set then RDMA_READ must be performed by mapping the local
>  buffers through a rkey MR with ACCESS_REMOTE_WRITE enabled.
>  The rkey of this MR should be passed in as the sg_lists's lkey for
>  IB_WR_RDMA_READ_WITH_INV.
> 
>  FRWR should be used to register the buffer in the send queue,
>  and the read should be issued using IB_WR_RDMA_READ_WITH_INV (xx
>  can we just implicitly rely on this? Are there any iWarp cards that
>  support FRWR but not WITH_INV?)
>

No.  And iWARP devices must support READ_WITH_INV from my reading of the iWARP verbs spec.  

I will add all these comments and make use of READ_WITH_INV.

>  Finally, only a single SGE can be used with RDMA_READ, all scattering
>  must be accomplished with the MR.
> 
> This quite dramatically changes what is an allowed scatter for the
> transfer, IB can support arbitary unaligned S/G lists, while this is
> now forced into gapless page aligned elements.
> 
> Your patch takes care of this? And only impacts IB?

Did you mean "only impacts iWARP?"  

Yes the patch takes care of sg->fr page list packing. And it uses the existing isert_map_fr_pagelist() to pack the sg into the
fastreg page list.  This same routine is used by the IB FRMR/PI reg/unreg routines as well.

None of this impacts isert over IB, assuming the suggested change above to only use the new reg/unreg functions for devices that
need the iwarp style of read. IE IB devices will either use the existing FRMR/PI methods if they support FRMR/LOCAL_DMA_LKEY/PI
(mlx5) or they will use the existing DMA_MR methods (all the rest of the IB devices).  IW devices will get my new reg/unreg
methods...

Steve.



--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steve Wise July 24, 2015, 10:11 p.m. UTC | #5
> -----Original Message-----
> From: linux-rdma-owner@vger.kernel.org [mailto:linux-rdma-owner@vger.kernel.org] On Behalf Of Steve Wise
> Sent: Friday, July 24, 2015 2:58 PM
> To: 'Jason Gunthorpe'
> Cc: dledford@redhat.com; infinipath@intel.com; sagig@mellanox.com; ogerlitz@mellanox.com; roid@mellanox.com; linux-
> rdma@vger.kernel.org; eli@mellanox.com; target-devel@vger.kernel.org; linux-nfs@vger.kernel.org; bfields@fieldses.org
> Subject: RE: [PATCH V6 9/9] isert: Support iWARP transports using FRMRs
> 
> 
> 
> > -----Original Message-----
> > From: Jason Gunthorpe [mailto:jgunthorpe@obsidianresearch.com]
> > Sent: Friday, July 24, 2015 2:24 PM
> > To: Steve Wise
> > Cc: dledford@redhat.com; infinipath@intel.com; sagig@mellanox.com; ogerlitz@mellanox.com; roid@mellanox.com; linux-
> > rdma@vger.kernel.org; eli@mellanox.com; target-devel@vger.kernel.org; linux-nfs@vger.kernel.org; bfields@fieldses.org
> > Subject: Re: [PATCH V6 9/9] isert: Support iWARP transports using FRMRs
> >
> > On Fri, Jul 24, 2015 at 01:48:09PM -0500, Steve Wise wrote:
> > > > The use of FRWR for RDMA READ should be iWarp specific, IB shouldn't
> > > > pay that overhead. I am expecting to see a cap_rdma_read_rkey or
> > > > something in here ?
> > >
> > > Ok.  But cap_rdma_read_rkey() doesn't really describe the
> > > requirement.  The requirement is rkey + REMOTE_WRITE.  So it is more
> > > like rdma_cap_read_requires_remote_write() which is ugly and too
> > > long (but descriptive)...
> >
> > I don't care much what name you pick, just jam something like this in
> > the description
> >
> >  If set then RDMA_READ must be performed by mapping the local
> >  buffers through a rkey MR with ACCESS_REMOTE_WRITE enabled.
> >  The rkey of this MR should be passed in as the sg_lists's lkey for
> >  IB_WR_RDMA_READ_WITH_INV.
> >
> >  FRWR should be used to register the buffer in the send queue,
> >  and the read should be issued using IB_WR_RDMA_READ_WITH_INV (xx
> >  can we just implicitly rely on this? Are there any iWarp cards that
> >  support FRWR but not WITH_INV?)
> >
> 
> No.  And iWARP devices must support READ_WITH_INV from my reading of the iWARP verbs spec.
> 
> I will add all these comments and make use of READ_WITH_INV.
> 
> >  Finally, only a single SGE can be used with RDMA_READ, all scattering
> >  must be accomplished with the MR.
> >
> > This quite dramatically changes what is an allowed scatter for the
> > transfer, IB can support arbitary unaligned S/G lists, while this is
> > now forced into gapless page aligned elements.
> >
> > Your patch takes care of this? And only impacts IB?
> 
> Did you mean "only impacts iWARP?"
> 
> Yes the patch takes care of sg->fr page list packing. And it uses the existing isert_map_fr_pagelist() to pack the sg into the
> fastreg page list.  This same routine is used by the IB FRMR/PI reg/unreg routines as well.
> 

By the way, just to be clear: If you use a FRWR, you by definition only have one SGE entry as the result of the registration.  So
regardless of what a device/protocol can do with the destination SGE of an RDMA READ operation, if you use FRWR to register the
destination region, you need only 1 SGE in the RDMA READ WR.

Stevo.

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe July 24, 2015, 10:38 p.m. UTC | #6
On Fri, Jul 24, 2015 at 05:11:04PM -0500, Steve Wise wrote:

> By the way, just to be clear: If you use a FRWR, you by definition
> only have one SGE entry as the result of the registration.  So
> regardless of what a device/protocol can do with the destination SGE
> of an RDMA READ operation, if you use FRWR to register the
> destination region, you need only 1 SGE in the RDMA READ WR.

FRWR's have horrible restrictions on the scatterlist. To realize
arbitrary scatterlists requires multiple MRs, and thus multiple SGE
entries.

The warning is for a reader who might be familiar with IB and think
iWarp simply needs to have a MR wrapped around the memory and assume
gaps/etc can be supported through a s/g list, like IB can.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sagi Grimberg July 26, 2015, 10:23 a.m. UTC | #7
On 7/24/2015 10:24 PM, Jason Gunthorpe wrote:
> On Fri, Jul 24, 2015 at 01:48:09PM -0500, Steve Wise wrote:
>>> The use of FRWR for RDMA READ should be iWarp specific, IB shouldn't
>>> pay that overhead. I am expecting to see a cap_rdma_read_rkey or
>>> something in here ?
>>
>> Ok.  But cap_rdma_read_rkey() doesn't really describe the
>> requirement.  The requirement is rkey + REMOTE_WRITE.  So it is more
>> like rdma_cap_read_requires_remote_write() which is ugly and too
>> long (but descriptive)...
>
> I don't care much what name you pick, just jam something like this in
> the description

I think we can just do if (signature || iwarp) use fastreg else
use local_dma_lkey.

>
>   If set then RDMA_READ must be performed by mapping the local
>   buffers through a rkey MR with ACCESS_REMOTE_WRITE enabled.
>   The rkey of this MR should be passed in as the sg_lists's lkey for
>   IB_WR_RDMA_READ_WITH_INV.

I think this would be an incremental patch and not as part of iwarp
support.

Question though, wouldn't it be better to do a single RDMA_READ to say
4 registered keys rather than RDMA_READ_WITH_INV for each?
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe July 27, 2015, 5:07 p.m. UTC | #8
On Sun, Jul 26, 2015 at 01:23:12PM +0300, Sagi Grimberg wrote:

> Question though, wouldn't it be better to do a single RDMA_READ to say
> 4 registered keys rather than RDMA_READ_WITH_INV for each?

RDMA READ is limted to 1 sg in iWarp.

RDMA_READ_WITH_INV and 1 sg is really the only correct way to drive
iWarp.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 47bb790..34b3151 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -57,6 +57,11 @@  isert_unreg_frmr_pi(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn);
 static int
 isert_reg_frmr_pi(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 		  struct isert_rdma_wr *wr);
+static void
+isert_unreg_frmr(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn);
+static int
+isert_reg_frmr(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
+	       struct isert_rdma_wr *wr);
 static int
 isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd);
 static int
@@ -368,6 +373,7 @@  static int
 isert_create_device_ib_res(struct isert_device *device)
 {
 	struct ib_device_attr *dev_attr;
+	int cap_flags;
 	int ret;
 
 	dev_attr = &device->dev_attr;
@@ -376,12 +382,18 @@  isert_create_device_ib_res(struct isert_device *device)
 		return ret;
 
 	/* assign function handlers */
-	if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS &&
-	    dev_attr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY &&
-	    dev_attr->device_cap_flags & IB_DEVICE_SIGNATURE_HANDOVER) {
-		device->use_fastreg = 1;
-		device->reg_rdma_mem = isert_reg_frmr_pi;
-		device->unreg_rdma_mem = isert_unreg_frmr_pi;
+	cap_flags = dev_attr->device_cap_flags;
+	if (cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS &&
+	    cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
+		if (cap_flags & IB_DEVICE_SIGNATURE_HANDOVER) {
+			device->use_fastreg = 1;
+			device->reg_rdma_mem = isert_reg_frmr_pi;
+			device->unreg_rdma_mem = isert_unreg_frmr_pi;
+		} else {
+			device->use_fastreg = 1;
+			device->reg_rdma_mem = isert_reg_frmr;
+			device->unreg_rdma_mem = isert_unreg_frmr;
+		}
 	} else {
 		device->use_fastreg = 0;
 		device->reg_rdma_mem = isert_map_lkey;
@@ -1782,6 +1794,50 @@  isert_unreg_frmr_pi(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn)
 }
 
 static void
+isert_unreg_frmr(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn)
+{
+	struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
+
+	if (wr->data.sg) {
+		isert_dbg("Cmd %p unmap_sg op\n", isert_cmd);
+		isert_unmap_data_buf(isert_conn, &wr->data);
+	}
+
+	if (wr->send_wr) {
+		int i;
+
+		isert_dbg("Cmd %p free send_wr wr_num %d\n", isert_cmd,
+			  wr->send_wr_num);
+		if (wr->iser_ib_op == ISER_IB_RDMA_READ) {
+			spin_lock_bh(&isert_conn->pool_lock);
+			for (i = 0; i < wr->send_wr_num; i += 3) {
+				struct ib_send_wr *fr_wr;
+				struct fast_reg_descriptor *fr_desc;
+
+				fr_wr = &wr->send_wr[i + 1];
+				fr_desc = (struct fast_reg_descriptor *)
+					  (uintptr_t) fr_wr->wr_id;
+				isert_dbg("Cmd %p free fr_desc %p\n", isert_cmd,
+					  fr_desc);
+				list_add_tail(&fr_desc->list,
+					      &isert_conn->fr_pool);
+			}
+			spin_unlock_bh(&isert_conn->pool_lock);
+		}
+		isert_dbg("Cmd %p free wr->send_wr\n", isert_cmd);
+		kfree(wr->send_wr);
+		wr->send_wr = NULL;
+		wr->send_wr_num = 0;
+	}
+
+	if (wr->ib_sge) {
+		isert_dbg("Cmd %p free ib_sge\n", isert_cmd);
+		kfree(wr->ib_sge);
+		wr->ib_sge = NULL;
+	}
+}
+
+static void
 isert_put_cmd(struct isert_cmd *isert_cmd, bool comp_err)
 {
 	struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
@@ -2958,6 +3014,162 @@  unmap_cmd:
 	return ret;
 }
 
+static void
+isert_build_inv_fr_wrs(struct isert_conn *isert_conn,
+		       struct isert_cmd *isert_cmd, struct ib_sge *ib_sge,
+		       struct ib_send_wr *inv_wr, u32 data_len, u32 offset)
+{
+	struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
+	struct scatterlist *sg_start;
+	struct isert_device *device = isert_conn->device;
+	struct ib_device *ib_dev = device->ib_device;
+	struct ib_fast_reg_page_list *frpl;
+	struct fast_reg_descriptor *fr_desc;
+	struct ib_send_wr *fr_wr;
+	u32 sg_off, page_off;
+	int sg_nents;
+	int frpl_len;
+	unsigned long flags;
+
+	sg_off = offset / PAGE_SIZE;
+	sg_start = &cmd->se_cmd.t_data_sg[sg_off];
+	sg_nents = min(cmd->se_cmd.t_data_nents - sg_off,
+		   isert_conn->max_frpl_len);
+	page_off = offset % PAGE_SIZE;
+
+	spin_lock_irqsave(&isert_conn->pool_lock, flags);
+	fr_desc = list_first_entry(&isert_conn->fr_pool,
+				   struct fast_reg_descriptor, list);
+	list_del(&fr_desc->list);
+	spin_unlock_irqrestore(&isert_conn->pool_lock, flags);
+	fr_desc->ind &= ~ISERT_DATA_KEY_VALID;
+
+	/* Build the INV WR */
+	isert_inv_rkey(inv_wr, fr_desc->data_mr);
+
+	/* Build the FR WR */
+	frpl = fr_desc->data_frpl;
+	frpl_len = isert_map_fr_pagelist(ib_dev, sg_start, sg_nents,
+					     &frpl->page_list[0]);
+	fr_wr = inv_wr + 1;
+	memset(fr_wr, 0, sizeof(fr_wr));
+	fr_wr->wr_id = (uintptr_t)fr_desc;
+	fr_wr->opcode = IB_WR_FAST_REG_MR;
+	fr_wr->wr.fast_reg.iova_start = frpl->page_list[0] + page_off;
+	fr_wr->wr.fast_reg.page_list = frpl;
+	fr_wr->wr.fast_reg.page_list_len = frpl_len;
+	fr_wr->wr.fast_reg.page_shift = PAGE_SHIFT;
+	fr_wr->wr.fast_reg.length = data_len;
+	fr_wr->wr.fast_reg.rkey = fr_desc->data_mr->rkey;
+	fr_wr->wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE |
+					  IB_ACCESS_LOCAL_WRITE;
+
+	ib_sge->addr = frpl->page_list[0] + page_off;
+	ib_sge->length = data_len;
+	ib_sge->lkey = fr_desc->data_mr->rkey;
+
+	/* Link up the wrs: inv->fr->read */
+	inv_wr->next = fr_wr;
+	fr_wr->next = fr_wr + 1;
+}
+
+static int
+isert_reg_read_frmr(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
+		    struct isert_rdma_wr *wr)
+{
+	struct se_cmd *se_cmd = &cmd->se_cmd;
+	struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
+	struct isert_conn *isert_conn = conn->context;
+	struct isert_data_buf *data = &wr->data;
+	struct ib_send_wr *send_wr = NULL;
+	struct ib_sge *ib_sge;
+	u32 offset, data_len, data_left, rdma_max_len, va_offset = 0;
+	int ret = 0, i;
+	u32 num_frwrs;
+
+	isert_cmd->tx_desc.isert_cmd = isert_cmd;
+
+	offset = cmd->write_data_done;
+	ret = isert_map_data_buf(isert_conn, isert_cmd, se_cmd->t_data_sg,
+				 se_cmd->t_data_nents, se_cmd->data_length,
+				 offset, wr->iser_ib_op, &wr->data);
+	if (ret)
+		return ret;
+
+	data_left = data->len;
+	offset = data->offset;
+	num_frwrs = DIV_ROUND_UP(data->nents, isert_conn->max_frpl_len);
+
+	ib_sge = kcalloc(num_frwrs, sizeof(struct ib_sge), GFP_KERNEL);
+	if (!ib_sge) {
+		ret = -ENOMEM;
+		goto unmap_cmd;
+	}
+	wr->ib_sge = ib_sge;
+
+	/* always do INV + FR + READ for now */
+	wr->send_wr_num = 3 * num_frwrs;
+	wr->send_wr = kcalloc(wr->send_wr_num, sizeof(struct ib_send_wr),
+			      GFP_KERNEL);
+	if (!wr->send_wr) {
+		ret = -ENOMEM;
+		goto unmap_cmd;
+	}
+	isert_info("num_frwrs %d send_wr_num %d\n", num_frwrs, wr->send_wr_num);
+
+	wr->isert_cmd = isert_cmd;
+	rdma_max_len = isert_conn->max_frpl_len * PAGE_SIZE;
+
+	for (i = 0; i < wr->send_wr_num; i += 3) {
+
+		/*
+		 * i = INV, i+1 = FR, i+2 = READ
+		 */
+		send_wr = &isert_cmd->rdma_wr.send_wr[i+2];
+		data_len = min(data_left, rdma_max_len);
+		isert_build_inv_fr_wrs(isert_conn, isert_cmd, ib_sge,
+				       send_wr - 2, data_len, offset);
+		send_wr->opcode = IB_WR_RDMA_READ;
+		send_wr->wr.rdma.remote_addr = isert_cmd->write_va + va_offset;
+		send_wr->wr.rdma.rkey = isert_cmd->write_stag;
+		send_wr->sg_list = ib_sge;
+		send_wr->num_sge = 1;
+		if (i + 3 == wr->send_wr_num) {
+			send_wr->wr_id = (uintptr_t)&isert_cmd->tx_desc;
+			send_wr->send_flags = IB_SEND_SIGNALED;
+		} else {
+			send_wr->next = send_wr + 1;
+		}
+
+		ib_sge++;
+		offset += data_len;
+		va_offset += data_len;
+		data_left -= data_len;
+	}
+	isert_info("send_wr->send_flags 0x%x\n", send_wr->send_flags);
+
+	return 0;
+unmap_cmd:
+	isert_unmap_data_buf(isert_conn, data);
+
+	return ret;
+}
+
+static int
+isert_reg_frmr(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
+	       struct isert_rdma_wr *wr)
+{
+
+	/*
+	 * Use the local dma lkey for Writes since it only requires local
+	 * access flags.  For reads, build up inv+fr+read
+	 * wr chains as needed.
+	 */
+	if (wr->iser_ib_op == ISER_IB_RDMA_WRITE)
+		return isert_map_lkey(conn, cmd, wr);
+	return isert_reg_read_frmr(conn, cmd, wr);
+}
+
 static int
 isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
 {