diff mbox

[for-next,10/10] IB/iser: Support the remote invalidation exception

Message ID 1447691861-3796-11-git-send-email-sagig@mellanox.com (mailing list archive)
State Superseded
Headers show

Commit Message

Sagi Grimberg Nov. 16, 2015, 4:37 p.m. UTC
From: Jenny Derzhavetz <jennyf@mellanox.com>

Declare that we support remote invalidation and be able to detect
the invalidated rkey so we won't invalidate it locally. The spec
mandates that we must not rely on the taget remote invalidate our
rkey so we must check it upon a receive (scsi response) completion.

Signed-off-by: Jenny Derzhavetz <jennyf@mellanox.com>
Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
---
 drivers/infiniband/ulp/iser/iscsi_iser.h     |  3 +-
 drivers/infiniband/ulp/iser/iser_initiator.c | 55 +++++++++++++++++++++++++++-
 drivers/infiniband/ulp/iser/iser_verbs.c     | 19 +++++++---
 3 files changed, 70 insertions(+), 7 deletions(-)

Comments

Or Gerlitz Nov. 17, 2015, 8:03 a.m. UTC | #1
On 11/16/2015 6:37 PM, Sagi Grimberg wrote:
>   void iser_rcv_completion(struct iser_rx_desc *rx_desc,
> -			 unsigned long rx_xfer_len,
> +			 struct ib_wc *wc,
>   			 struct ib_conn *ib_conn)
>   {
>   	struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
> @@ -575,6 +621,7 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc,
>   	struct iscsi_hdr *hdr;
>   	u64 rx_dma;
>   	int rx_buflen, outstanding, count, err;
> +	unsigned long rx_xfer_len = wc->byte_len;

unrelated small enhancement which has nothing to do with this patch, 
remove it from here
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Or Gerlitz Nov. 17, 2015, 8:05 a.m. UTC | #2
On 11/16/2015 6:37 PM, Sagi Grimberg wrote:
> +			if (iser_task->dir[ISER_DIR_IN])
> +				reg = &iser_task->rdma_reg[ISER_DIR_IN];
> +			else
> +				reg = &iser_task->rdma_reg[ISER_DIR_OUT];

and what happens with bidirectional commands?!
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Or Gerlitz Nov. 17, 2015, 8:08 a.m. UTC | #3
On 11/16/2015 6:37 PM, Sagi Grimberg wrote:
> --- a/drivers/infiniband/ulp/iser/iser_verbs.c
> +++ b/drivers/infiniband/ulp/iser/iser_verbs.c
> @@ -847,7 +847,7 @@ static void iser_route_handler(struct rdma_cm_id *cma_id)
>   	conn_param.rnr_retry_count     = 6;
>   
>   	memset(&req_hdr, 0, sizeof(req_hdr));
> -	req_hdr.flags = (ISER_ZBVA_NOT_SUP | ISER_SEND_W_INV_NOT_SUP);
> +	req_hdr.flags = ISER_ZBVA_NOT_SUP;

isn't there a property of the **local** device we need to check before 
advertizing that
to the target? to be on the safe side, I would do  that only over 
devices that support
IB_DEVICE_MEM_MGT_EXTENSIONS, as non-local invalidations are part of the 
BMME ext
of IBTA, right?
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sagi Grimberg Nov. 17, 2015, 9:50 a.m. UTC | #4
On 17/11/2015 10:03, Or Gerlitz wrote:
> On 11/16/2015 6:37 PM, Sagi Grimberg wrote:
>>   void iser_rcv_completion(struct iser_rx_desc *rx_desc,
>> -             unsigned long rx_xfer_len,
>> +             struct ib_wc *wc,
>>                struct ib_conn *ib_conn)
>>   {
>>       struct iser_conn *iser_conn = container_of(ib_conn, struct
>> iser_conn,
>> @@ -575,6 +621,7 @@ void iser_rcv_completion(struct iser_rx_desc
>> *rx_desc,
>>       struct iscsi_hdr *hdr;
>>       u64 rx_dma;
>>       int rx_buflen, outstanding, count, err;
>> +    unsigned long rx_xfer_len = wc->byte_len;
>
> unrelated small enhancement which has nothing to do with this patch,
> remove it from here

It is related, we pass wc to check the invalidate stuff down the road...
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sagi Grimberg Nov. 17, 2015, 9:53 a.m. UTC | #5
On 17/11/2015 10:05, Or Gerlitz wrote:
> On 11/16/2015 6:37 PM, Sagi Grimberg wrote:
>> +            if (iser_task->dir[ISER_DIR_IN])
>> +                reg = &iser_task->rdma_reg[ISER_DIR_IN];
>> +            else
>> +                reg = &iser_task->rdma_reg[ISER_DIR_OUT];
>
> and what happens with bidirectional commands?!

It won't work, iSER lacks support for bidirectional commands for as
long as I'm involved with it and until we either decide that we care
enough to implement it both in the target and initiator sides or some
array with iser bidi support shows up I don't know how much its worth
our attention.

Thoughts?
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sagi Grimberg Nov. 17, 2015, 9:55 a.m. UTC | #6
On 17/11/2015 10:08, Or Gerlitz wrote:
> On 11/16/2015 6:37 PM, Sagi Grimberg wrote:
>> --- a/drivers/infiniband/ulp/iser/iser_verbs.c
>> +++ b/drivers/infiniband/ulp/iser/iser_verbs.c
>> @@ -847,7 +847,7 @@ static void iser_route_handler(struct rdma_cm_id
>> *cma_id)
>>       conn_param.rnr_retry_count     = 6;
>>       memset(&req_hdr, 0, sizeof(req_hdr));
>> -    req_hdr.flags = (ISER_ZBVA_NOT_SUP | ISER_SEND_W_INV_NOT_SUP);
>> +    req_hdr.flags = ISER_ZBVA_NOT_SUP;
>
> isn't there a property of the **local** device we need to check before
> advertizing that
> to the target? to be on the safe side, I would do  that only over
> devices that support
> IB_DEVICE_MEM_MGT_EXTENSIONS, as non-local invalidations are part of the
> BMME ext
> of IBTA, right?

This was dependent on using fastreg (which depends on
IB_DEVICE_MEM_MGT_EXTENSIONS) at some point but it must have got lost
at some point..

Will fix. Thanks!
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Or Gerlitz Nov. 17, 2015, 10:04 a.m. UTC | #7
On 11/17/2015 11:53 AM, Sagi Grimberg wrote:
> [...] iSER lacks support for bidirectional commands for as long as I'm 
> involved with it 


Why? we don't support and when did we broke it after the initial 2.6.18 
upstreaming of the driver

Also, do we refuse to queuecommand a bidi? where?



--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christoph Hellwig Nov. 17, 2015, 10:15 a.m. UTC | #8
On Tue, Nov 17, 2015 at 12:04:03PM +0200, Or Gerlitz wrote:
> Also, do we refuse to queuecommand a bidi? where?

Or, can you please do the basic research first?  Thanks! (Hint: check
how few drivers support bidi commands, and how it's enabled)
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sagi Grimberg Nov. 17, 2015, 10:26 a.m. UTC | #9
> Why? we don't support and when did we broke it after the initial 2.6.18
> upstreaming of the driver
>
> Also, do we refuse to queuecommand a bidi? where?

Or, bidirectional support is not assumed and needs to be actively set
as a request queue flag (see iscsi_sw_tcp_slave_alloc()). AFAICT iser
never exposed support for bidirectional commands.

Did things change from back in 2.6.18 that we ever carried bidi
commands over iser?
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Or Gerlitz Nov. 17, 2015, 4:53 p.m. UTC | #10
On Tue, Nov 17, 2015 at 12:26 PM, Sagi Grimberg
<sagig@dev.mellanox.co.il> wrote:
>
>> Why? we don't support and when did we broke it after the initial 2.6.18
>> upstreaming of the driver
>>
>> Also, do we refuse to queuecommand a bidi? where?
>
>
> Or, bidirectional support is not assumed and needs to be actively set
> as a request queue flag (see iscsi_sw_tcp_slave_alloc()).

> AFAICT iser never exposed support for bidirectional commands.

AFAIR, in the past there weren't explicit means to do that.

What's makes iscsi tcp eligible to support bidi's that we don't have @ iser?

> Did things change from back in 2.6.18 that we ever carried bidi
> commands over iser?

AFAIK, Pete Wyckoff had iser working with BIDI commands for few object
storage projects.

It's been a long time, but I was totally unaware that we don't publish
the whatever means
needed by upper layers.

I copied some 2010 email address of his here...

Or.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sagi Grimberg Nov. 18, 2015, 11:38 a.m. UTC | #11
> AFAIR, in the past there weren't explicit means to do that.
>
> What's makes iscsi tcp eligible to support bidi's that we don't have @ iser?

In theory, nothing.
In practice, iser is missing customer demands, iser targets that
support bidi and testing...

If someone cared enough about it then I assume we'd hear about it by
now...
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Or Gerlitz Nov. 18, 2015, 1:33 p.m. UTC | #12
On Wed, Nov 18, 2015 at 1:38 PM, Sagi Grimberg <sagig@dev.mellanox.co.il> wrote:
>> AFAIR, in the past there weren't explicit means to do that.
>> What's makes iscsi tcp eligible to support bidi's that we don't have @ iser?

> In theory, nothing. In practice, iser is missing customer demands, iser targets that
> support bidi and testing...

> If someone cared enough about it then I assume we'd hear about it by now...

Sagi, it works in TGT and AFAIR with the initiator too.

Looking on this paper of Pete Wyckoff  [1] I see that he says that
few changes to the initiator were needed, not sure which.

Or.


[1] http://pw.padd.com/papers/dalessandro-iser-snapi07.pdf
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christoph Hellwig Nov. 18, 2015, 1:52 p.m. UTC | #13
On Wed, Nov 18, 2015 at 03:33:01PM +0200, Or Gerlitz wrote:
> Sagi, it works in TGT and AFAIR with the initiator too.
> 
> Looking on this paper of Pete Wyckoff  [1] I see that he says that
> few changes to the initiator were needed, not sure which.

Or, can you please stop it?

Fortunately neither the iSER target or initiator ever support the
nightmare called bidi commands, and I'be happy o kill of that cruft
entirely if we could.

Did you buy shares in a defunct T10 OSD vendor or why do you even care?
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Or Gerlitz Nov. 18, 2015, 1:58 p.m. UTC | #14
On Wed, Nov 18, 2015 at 3:52 PM, Christoph Hellwig <hch@infradead.org> wrote:

> Fortunately neither the iSER target or initiator ever support the
> nightmare called bidi commands,

I honestly don't know why you call it nightmare nor what make you
make that strong assertion. I checked with Alex N. the TGT iser
transport author and he confirmed to me that bidi's worked on TGT,
as for the Linux upstream initiator, I thought that was the case too, but
I never saw it my eyes, and Pete's paper states he had to change the code,
so on that I can't be sure.

>  and I'be happy o kill of that cruft entirely if we could.

that's beyond the scope of this thread, I guess..
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christoph Hellwig Nov. 18, 2015, 2:10 p.m. UTC | #15
On Wed, Nov 18, 2015 at 03:58:48PM +0200, Or Gerlitz wrote:
> > Fortunately neither the iSER target or initiator ever support the
> > nightmare called bidi commands,
> 
> I honestly don't know why you call it nightmare nor what make you
> make that strong assertion. 

Beause I actually had to deal with block layer code implementing the
bidi semantics.

> I checked with Alex N. the TGT iser
> transport author and he confirmed to me that bidi's worked on TGT,
> as for the Linux upstream initiator, I thought that was the case too, but
> I never saw it my eyes, and Pete's paper states he had to change the code,
> so on that I can't be sure.

No need to trust words, we have git.  There was absolutely nothing
relating to bidi in either the initial iSER checking nor in the
changelogs since, and neither has the initial BIDI checking touched
iSER.

> that's beyond the scope of this thread, I guess..

As is iSER BIDI suppport.  Let's thank Sagi for doing a very important
performance feature for iSER instead of having this endless discussion
about a pointless fringe feature!
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sagi Grimberg Nov. 18, 2015, 2:16 p.m. UTC | #16
> Sagi, it works in TGT and AFAIR with the initiator too.
>
> Looking on this paper of Pete Wyckoff  [1] I see that he says that
> few changes to the initiator were needed, not sure which.

I see. I wasn't aware that TGT supports bidi. However, AFAICT the
initiator support was never fully introduced upstream nor in our mlnx
backports (perhaps in an off-tree implementation). As I see it, bidi
functionality, is broken for a long time (if it was ever supported).

So I'd suggest we (you and me Or) look at bidi separately and try to
find out if someone wants it (not for academic research).

Would you mind if we don't include bidi considerations in this patchset?

Sagi.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Or Gerlitz Nov. 19, 2015, 7:12 a.m. UTC | #17
On 11/18/2015 4:10 PM, Christoph Hellwig wrote:
> There was absolutely nothing relating to bidi in either the initial iSER checking

This is wrong assertion.

Look on the code throughout the iser path done from iser_send_command, 
we allowed the command associated with the
iscsi task to be IN, OUT, both or none, when we do all the dma-mapping, 
memory registrations and such for either of the
needed directions and same on the completion path.

Sagi, do we still do IN, OUT, both or none? if not, where this stopped 
to be supported? how large would be the fix?

Or.


[1] maybe start with 2.6.20,  I guess we improved later... till the 
point where things were potentially started
to be caught down, as I realized now that Sagi wasn't fully aligned on 
that aspect of the driver
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Or Gerlitz Nov. 19, 2015, 7:16 a.m. UTC | #18
On 11/18/2015 4:16 PM, Sagi Grimberg wrote:
>> Sagi, it works in TGT and AFAIR with the initiator too.
>>
>> Looking on this paper of Pete Wyckoff  [1] I see that he says that
>> few changes to the initiator were needed, not sure which.
>
> I see. I wasn't aware that TGT supports bidi. However, AFAICT the
> initiator support was never fully introduced upstream nor in our mlnx
> backports (perhaps in an off-tree implementation). As I see it, bidi
> functionality, is broken for a long time (if it was ever supported).

Sounds like we weren't communicating enough while reviewing the patches
since you joined as maintainer... lets improve.

> So I'd suggest we (you and me Or) look at bidi separately and try to
> find out if someone wants it (not for academic research).

I didn't follow on the comment, what's wrong with having the upstream 
kernel serve
for academics? features used for academics today might turn to 
production tomorrow.
It's not that we're writing a whole new driver for that, there's one 
piece in our
design/code which is good for that purpose, this is perfectly fine.


> Would you mind if we don't include bidi considerations in this patchset? 

You should not further break it, whatever is still there should remain.  
As for breakages
that were introduced over the last few cycles, we should think that to do.

Or.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sagi Grimberg Nov. 19, 2015, 9:15 a.m. UTC | #19
> Sagi, do we still do IN, OUT, both or none? if not, where this stopped
> to be supported? how large would be the fix?

Or, it's hard for me to say where exactly it stopped being supported
because I've never tested it and I'm not convinced it was ever
supported.

I'm exhausted with this discussion so I'll change the tiny condition
and bidi will remain unsupported until we'll decide we want to get it
working.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christoph Hellwig Nov. 19, 2015, 10:01 a.m. UTC | #20
On Thu, Nov 19, 2015 at 09:12:20AM +0200, Or Gerlitz wrote:
> This is wrong assertion.
> 
> Look on the code throughout the iser path done from iser_send_command, we
> allowed the command associated with the
> iscsi task to be IN, OUT, both or none, when we do all the dma-mapping,
> memory registrations and such for either of the
> needed directions and same on the completion path.

Internal code structure is one thing, ever supporting bidi is another
one.  Without QUEUE_FLAG_BIDI a driver has never supported bidirectional
requests.  And iSER never had that flag set in mainline.  So even if you
spent of time supporting bidi in iSER it never was supported and all
that great support was entirely untested the last 10 years.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Or Gerlitz Nov. 19, 2015, 11:17 a.m. UTC | #21
On Thu, Nov 19, 2015 at 12:01 PM, Christoph Hellwig <hch@infradead.org> wrote:

> Internal code structure is one thing, ever supporting bidi is another
> one.  Without QUEUE_FLAG_BIDI a driver has never supported bidirectional
> requests.  And iSER never had that flag set in mainline.  So even if you
> spent of time supporting bidi in iSER it never was supported and all
> that great support was entirely untested the last 10 years.

Christoph,

To make it clear iser's role is very well defined and strict w.r.t
supporting bidis, we should

on IO submission path

1. dma map the SC properly in, out, both or none
2. memory register what's needed
3. put in the iser header 1,2 or 0 keys

on IO completion path

4. set a pointer to the iscsi response header etc
5. memory unregister what's needed
6. dma unmap properly
7. call up to the iscsi/scsi stack with that pointer

anything else that I missed?

AFAIK steps 1..7 were supported ok in the past, probably now not, we
should fix at some point



Or.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 096d5234bbea..2e0a24ba18ed 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -519,6 +519,7 @@  struct iser_conn {
 	u32                          num_rx_descs;
 	unsigned short               scsi_sg_tablesize;
 	unsigned int                 scsi_max_sectors;
+	bool			     snd_w_inv;
 };
 
 /**
@@ -603,7 +604,7 @@  int iser_conn_terminate(struct iser_conn *iser_conn);
 void iser_release_work(struct work_struct *work);
 
 void iser_rcv_completion(struct iser_rx_desc *desc,
-			 unsigned long dto_xfer_len,
+			 struct ib_wc *wc,
 			 struct ib_conn *ib_conn);
 
 void iser_snd_completion(struct iser_tx_desc *desc,
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 6a968e350c14..df6a4b70ffc9 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -563,11 +563,57 @@  send_control_error:
 	return err;
 }
 
+static int
+iser_check_remote_inv(struct iser_conn *iser_conn,
+		      struct ib_wc *wc,
+		      struct iscsi_hdr *hdr)
+{
+	if (wc->wc_flags & IB_WC_WITH_INVALIDATE) {
+		struct iscsi_task *task;
+		struct iscsi_iser_task *iser_task;
+		u32 rkey = wc->ex.invalidate_rkey;
+
+		iser_dbg("conn %p: remote invalidation\n", iser_conn);
+		if (unlikely(!iser_conn->snd_w_inv)) {
+			iser_err("conn %p: unexepected remote invalidation,"
+				 "terminating connection\n", iser_conn);
+			return -EPROTO;
+		}
+
+		task = iscsi_itt_to_ctask(iser_conn->iscsi_conn, hdr->itt);
+		if (likely(task)) {
+			struct iser_mem_reg *reg;
+			struct iser_fr_desc *desc;
+
+			iser_task = task->dd_data;
+			if (iser_task->dir[ISER_DIR_IN])
+				reg = &iser_task->rdma_reg[ISER_DIR_IN];
+			else
+				reg = &iser_task->rdma_reg[ISER_DIR_OUT];
+			desc = reg->mem_h;
+
+			if (likely(rkey == desc->rsc.mr->rkey)) {
+				desc->rsc.mr_valid = 0;
+			} else if (likely(rkey == desc->pi_ctx->sig_mr->rkey)) {
+				desc->pi_ctx->sig_mr_valid = 0;
+			} else {
+				iser_err("invalidation of unknown rkey=0x%x\n", rkey);
+				return -EINVAL;
+			}
+		} else {
+			iser_err("failed to get task for itt=%d\n", hdr->itt);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
 /**
  * iser_rcv_dto_completion - recv DTO completion
  */
 void iser_rcv_completion(struct iser_rx_desc *rx_desc,
-			 unsigned long rx_xfer_len,
+			 struct ib_wc *wc,
 			 struct ib_conn *ib_conn)
 {
 	struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
@@ -575,6 +621,7 @@  void iser_rcv_completion(struct iser_rx_desc *rx_desc,
 	struct iscsi_hdr *hdr;
 	u64 rx_dma;
 	int rx_buflen, outstanding, count, err;
+	unsigned long rx_xfer_len = wc->byte_len;
 
 	/* differentiate between login to all other PDUs */
 	if ((char *)rx_desc == iser_conn->login_resp_buf) {
@@ -593,6 +640,12 @@  void iser_rcv_completion(struct iser_rx_desc *rx_desc,
 	iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode,
 			hdr->itt, (int)(rx_xfer_len - ISER_HEADERS_LEN));
 
+	if (iser_check_remote_inv(iser_conn, wc, hdr)) {
+		iscsi_conn_failure(iser_conn->iscsi_conn,
+				   ISCSI_ERR_CONN_FAILED);
+		return;
+	}
+
 	iscsi_iser_recv(iser_conn->iscsi_conn, hdr, rx_desc->data,
 			rx_xfer_len - ISER_HEADERS_LEN);
 
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 74161a566852..5dd7cbd8e2e2 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -847,7 +847,7 @@  static void iser_route_handler(struct rdma_cm_id *cma_id)
 	conn_param.rnr_retry_count     = 6;
 
 	memset(&req_hdr, 0, sizeof(req_hdr));
-	req_hdr.flags = (ISER_ZBVA_NOT_SUP | ISER_SEND_W_INV_NOT_SUP);
+	req_hdr.flags = ISER_ZBVA_NOT_SUP;
 	conn_param.private_data	= (void *)&req_hdr;
 	conn_param.private_data_len = sizeof(struct iser_cm_hdr);
 
@@ -862,7 +862,8 @@  failure:
 	iser_connect_error(cma_id);
 }
 
-static void iser_connected_handler(struct rdma_cm_id *cma_id)
+static void iser_connected_handler(struct rdma_cm_id *cma_id,
+				   const void *private_data)
 {
 	struct iser_conn *iser_conn;
 	struct ib_qp_attr attr;
@@ -876,6 +877,15 @@  static void iser_connected_handler(struct rdma_cm_id *cma_id)
 	(void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr);
 	iser_info("remote qpn:%x my qpn:%x\n", attr.dest_qp_num, cma_id->qp->qp_num);
 
+	if (private_data) {
+		u8 flags = *(u8 *)private_data;
+
+		iser_conn->snd_w_inv = !(flags & ISER_SEND_W_INV_NOT_SUP);
+	}
+
+	iser_info("conn %p: negotiated %s invalidation\n",
+		  iser_conn, iser_conn->snd_w_inv ? "remote" : "local");
+
 	iser_conn->state = ISER_CONN_UP;
 	complete(&iser_conn->up_completion);
 }
@@ -927,7 +937,7 @@  static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve
 		iser_route_handler(cma_id);
 		break;
 	case RDMA_CM_EVENT_ESTABLISHED:
-		iser_connected_handler(cma_id);
+		iser_connected_handler(cma_id, event->param.conn.private_data);
 		break;
 	case RDMA_CM_EVENT_ADDR_ERROR:
 	case RDMA_CM_EVENT_ROUTE_ERROR:
@@ -1205,8 +1215,7 @@  static void iser_handle_wc(struct ib_wc *wc)
 	if (likely(wc->status == IB_WC_SUCCESS)) {
 		if (wc->opcode == IB_WC_RECV) {
 			rx_desc = (struct iser_rx_desc *)(uintptr_t)wc->wr_id;
-			iser_rcv_completion(rx_desc, wc->byte_len,
-					    ib_conn);
+			iser_rcv_completion(rx_desc, wc, ib_conn);
 		} else
 		if (wc->opcode == IB_WC_SEND) {
 			tx_desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id;