diff mbox series

[v2] xprtrdma: Fix DMA scatter-gather list mapping imbalance

Message ID 158151473332.515306.1111360128438553868.stgit@morisot.1015granger.net (mailing list archive)
State New, archived
Headers show
Series [v2] xprtrdma: Fix DMA scatter-gather list mapping imbalance | expand

Commit Message

Chuck Lever Feb. 12, 2020, 1:43 p.m. UTC
The @nents value that was passed to ib_dma_map_sg() has to be passed
to the matching ib_dma_unmap_sg() call. If ib_dma_map_sg() choses to
concatenate sg entries, it will return a different nents value than
it was passed.

The bug was exposed by recent changes to the AMD IOMMU driver, which
enabled sg entry concatenation.

Looking all the way back to 4143f34e01e9 ("xprtrdma: Port to new
memory registration API") and reviewing other kernel ULPs, it's not
clear that the frwr_map() logic was ever correct for this case.

Reported-by: Andre Tomt <andre@tomt.net>
Suggested-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/trace/events/rpcrdma.h |    6 ++++--
 net/sunrpc/xprtrdma/frwr_ops.c |   13 +++++++------
 2 files changed, 11 insertions(+), 8 deletions(-)

Hi Andre, here's take 2, based on the trace data you sent me.
Please let me know if this one fares any better.

Changes since v1:
- Ensure the correct nents value is passed to ib_map_mr_sg
- Record the mr_nents value in the MR trace points

Comments

Chuck Lever Feb. 12, 2020, 1:48 p.m. UTC | #1
> On Feb 12, 2020, at 8:43 AM, Chuck Lever <chuck.lever@oracle.com> wrote:
> 
> The @nents value that was passed to ib_dma_map_sg() has to be passed
> to the matching ib_dma_unmap_sg() call. If ib_dma_map_sg() choses to
> concatenate sg entries, it will return a different nents value than
> it was passed.
> 
> The bug was exposed by recent changes to the AMD IOMMU driver, which
> enabled sg entry concatenation.
> 
> Looking all the way back to 4143f34e01e9 ("xprtrdma: Port to new
> memory registration API") and reviewing other kernel ULPs, it's not
> clear that the frwr_map() logic was ever correct for this case.
> 
> Reported-by: Andre Tomt <andre@tomt.net>
> Suggested-by: Robin Murphy <robin.murphy@arm.com>
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
> include/trace/events/rpcrdma.h |    6 ++++--
> net/sunrpc/xprtrdma/frwr_ops.c |   13 +++++++------
> 2 files changed, 11 insertions(+), 8 deletions(-)
> 
> Hi Andre, here's take 2, based on the trace data you sent me.
> Please let me know if this one fares any better.
> 
> Changes since v1:
> - Ensure the correct nents value is passed to ib_map_mr_sg
> - Record the mr_nents value in the MR trace points
> 
> diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h
> index c0e4c93324f5..023c5da45999 100644
> --- a/include/trace/events/rpcrdma.h
> +++ b/include/trace/events/rpcrdma.h
> @@ -275,6 +275,7 @@ DECLARE_EVENT_CLASS(xprtrdma_mr,
> 
> 	TP_STRUCT__entry(
> 		__field(const void *, mr)
> +		__field(unsigned int, nents)
> 		__field(u32, handle)
> 		__field(u32, length)
> 		__field(u64, offset)
> @@ -283,14 +284,15 @@ DECLARE_EVENT_CLASS(xprtrdma_mr,
> 
> 	TP_fast_assign(
> 		__entry->mr = mr;
> +		__entry->nents = mr->mr_nents;
> 		__entry->handle = mr->mr_handle;
> 		__entry->length = mr->mr_length;
> 		__entry->offset = mr->mr_offset;
> 		__entry->dir    = mr->mr_dir;
> 	),
> 
> -	TP_printk("mr=%p %u@0x%016llx:0x%08x (%s)",
> -		__entry->mr, __entry->length,
> +	TP_printk("mr=%p %d %u@0x%016llx:0x%08x (%s)",
> +		__entry->mr, __entry->mr_nents, __entry->length,

This should be:
		__entry->mr, __entry->nents, __entry->length,

Sorry about that.


> 		(unsigned long long)__entry->offset, __entry->handle,
> 		xprtrdma_show_direction(__entry->dir)
> 	)
> diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
> index 095be887753e..75617646702b 100644
> --- a/net/sunrpc/xprtrdma/frwr_ops.c
> +++ b/net/sunrpc/xprtrdma/frwr_ops.c
> @@ -288,8 +288,8 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
> {
> 	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
> 	struct ib_reg_wr *reg_wr;
> +	int i, n, dma_nents;
> 	struct ib_mr *ibmr;
> -	int i, n;
> 	u8 key;
> 
> 	if (nsegs > ia->ri_max_frwr_depth)
> @@ -313,15 +313,16 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
> 			break;
> 	}
> 	mr->mr_dir = rpcrdma_data_dir(writing);
> +	mr->mr_nents = i;
> 
> -	mr->mr_nents =
> -		ib_dma_map_sg(ia->ri_id->device, mr->mr_sg, i, mr->mr_dir);
> -	if (!mr->mr_nents)
> +	dma_nents = ib_dma_map_sg(ia->ri_id->device, mr->mr_sg,
> +				  mr->mr_nents, mr->mr_dir);
> +	if (!dma_nents)
> 		goto out_dmamap_err;
> 
> 	ibmr = mr->frwr.fr_mr;
> -	n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE);
> -	if (unlikely(n != mr->mr_nents))
> +	n = ib_map_mr_sg(ibmr, mr->mr_sg, dma_nents, NULL, PAGE_SIZE);
> +	if (n != dma_nents)
> 		goto out_mapmr_err;
> 
> 	ibmr->iova &= 0x00000000ffffffff;
> 
> 

--
Chuck Lever
Andre Tomt Feb. 12, 2020, 4:03 p.m. UTC | #2
On 12.02.2020 14:48, Chuck Lever wrote:
>> On Feb 12, 2020, at 8:43 AM, Chuck Lever <chuck.lever@oracle.com> wrote:
>>
>> The @nents value that was passed to ib_dma_map_sg() has to be passed
>> to the matching ib_dma_unmap_sg() call. If ib_dma_map_sg() choses to
>> concatenate sg entries, it will return a different nents value than
>> it was passed.
>>
>> The bug was exposed by recent changes to the AMD IOMMU driver, which
>> enabled sg entry concatenation.
>>
>> Looking all the way back to 4143f34e01e9 ("xprtrdma: Port to new
>> memory registration API") and reviewing other kernel ULPs, it's not
>> clear that the frwr_map() logic was ever correct for this case.
>>
>> Reported-by: Andre Tomt <andre@tomt.net>
>> Suggested-by: Robin Murphy <robin.murphy@arm.com>
>> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
>> ---
>> include/trace/events/rpcrdma.h |    6 ++++--
>> net/sunrpc/xprtrdma/frwr_ops.c |   13 +++++++------
>> 2 files changed, 11 insertions(+), 8 deletions(-)
>>
>> Hi Andre, here's take 2, based on the trace data you sent me.
>> Please let me know if this one fares any better.
>>
>> Changes since v1:
>> - Ensure the correct nents value is passed to ib_map_mr_sg
>> - Record the mr_nents value in the MR trace points
Verified working (with the patch correction) in my environment, with 
some quick testing (mount + some random and bulk I/O)

client, 5.5.3 + patch + amd iommu on = OK
client, 5.5.3 + patch + amd iommu off = OK
client, 5.6-rc1 + patch + amd iommu on = OK

server, 5.5.3 + patch + intel iommu on = OK

Thanks!
Chuck Lever Feb. 12, 2020, 4:05 p.m. UTC | #3
> On Feb 12, 2020, at 11:03 AM, Andre Tomt <andre@tomt.net> wrote:
> 
> On 12.02.2020 14:48, Chuck Lever wrote:
>>> On Feb 12, 2020, at 8:43 AM, Chuck Lever <chuck.lever@oracle.com> wrote:
>>> 
>>> The @nents value that was passed to ib_dma_map_sg() has to be passed
>>> to the matching ib_dma_unmap_sg() call. If ib_dma_map_sg() choses to
>>> concatenate sg entries, it will return a different nents value than
>>> it was passed.
>>> 
>>> The bug was exposed by recent changes to the AMD IOMMU driver, which
>>> enabled sg entry concatenation.
>>> 
>>> Looking all the way back to 4143f34e01e9 ("xprtrdma: Port to new
>>> memory registration API") and reviewing other kernel ULPs, it's not
>>> clear that the frwr_map() logic was ever correct for this case.
>>> 
>>> Reported-by: Andre Tomt <andre@tomt.net>
>>> Suggested-by: Robin Murphy <robin.murphy@arm.com>
>>> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
>>> ---
>>> include/trace/events/rpcrdma.h |    6 ++++--
>>> net/sunrpc/xprtrdma/frwr_ops.c |   13 +++++++------
>>> 2 files changed, 11 insertions(+), 8 deletions(-)
>>> 
>>> Hi Andre, here's take 2, based on the trace data you sent me.
>>> Please let me know if this one fares any better.
>>> 
>>> Changes since v1:
>>> - Ensure the correct nents value is passed to ib_map_mr_sg
>>> - Record the mr_nents value in the MR trace points
> Verified working (with the patch correction) in my environment, with some quick testing (mount + some random and bulk I/O)
> 
> client, 5.5.3 + patch + amd iommu on = OK
> client, 5.5.3 + patch + amd iommu off = OK
> client, 5.6-rc1 + patch + amd iommu on = OK
> 
> server, 5.5.3 + patch + intel iommu on = OK

Very good! I'll submit the fix through the NFS tree ASAP, and request backport to v5.5 stable.

--
Chuck Lever
diff mbox series

Patch

diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h
index c0e4c93324f5..023c5da45999 100644
--- a/include/trace/events/rpcrdma.h
+++ b/include/trace/events/rpcrdma.h
@@ -275,6 +275,7 @@  DECLARE_EVENT_CLASS(xprtrdma_mr,
 
 	TP_STRUCT__entry(
 		__field(const void *, mr)
+		__field(unsigned int, nents)
 		__field(u32, handle)
 		__field(u32, length)
 		__field(u64, offset)
@@ -283,14 +284,15 @@  DECLARE_EVENT_CLASS(xprtrdma_mr,
 
 	TP_fast_assign(
 		__entry->mr = mr;
+		__entry->nents = mr->mr_nents;
 		__entry->handle = mr->mr_handle;
 		__entry->length = mr->mr_length;
 		__entry->offset = mr->mr_offset;
 		__entry->dir    = mr->mr_dir;
 	),
 
-	TP_printk("mr=%p %u@0x%016llx:0x%08x (%s)",
-		__entry->mr, __entry->length,
+	TP_printk("mr=%p %d %u@0x%016llx:0x%08x (%s)",
+		__entry->mr, __entry->mr_nents, __entry->length,
 		(unsigned long long)__entry->offset, __entry->handle,
 		xprtrdma_show_direction(__entry->dir)
 	)
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 095be887753e..75617646702b 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -288,8 +288,8 @@  struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
 {
 	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
 	struct ib_reg_wr *reg_wr;
+	int i, n, dma_nents;
 	struct ib_mr *ibmr;
-	int i, n;
 	u8 key;
 
 	if (nsegs > ia->ri_max_frwr_depth)
@@ -313,15 +313,16 @@  struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
 			break;
 	}
 	mr->mr_dir = rpcrdma_data_dir(writing);
+	mr->mr_nents = i;
 
-	mr->mr_nents =
-		ib_dma_map_sg(ia->ri_id->device, mr->mr_sg, i, mr->mr_dir);
-	if (!mr->mr_nents)
+	dma_nents = ib_dma_map_sg(ia->ri_id->device, mr->mr_sg,
+				  mr->mr_nents, mr->mr_dir);
+	if (!dma_nents)
 		goto out_dmamap_err;
 
 	ibmr = mr->frwr.fr_mr;
-	n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE);
-	if (unlikely(n != mr->mr_nents))
+	n = ib_map_mr_sg(ibmr, mr->mr_sg, dma_nents, NULL, PAGE_SIZE);
+	if (n != dma_nents)
 		goto out_mapmr_err;
 
 	ibmr->iova &= 0x00000000ffffffff;