mbox series

[for-next,v7,0/6] RDMA/rxe: Replace mr page map with an xarray

Message ID 20230119235936.19728-1-rpearsonhpe@gmail.com (mailing list archive)
Headers show
Series RDMA/rxe: Replace mr page map with an xarray | expand

Message

Bob Pearson Jan. 19, 2023, 11:59 p.m. UTC
This patch series replaces the page map carried in each memory region
with a struct xarray. It is based on a sketch developed by Jason
Gunthorpe. The first five patches are preparation that tries to
cleanly isolate all the mr specific code into rxe_mr.c. The sixth
patch is the actual change.

v7:
  Link: https://lore.kernel.org/linux-rdma/Y8f53jdDAN0B9qy7@nvidia.com/
  Made changes requested by Jason to return RESPST_ERR_XXX from rxe_mr.c
  to rxe_resp.c.
v6:
  Backed out.
v5:
  Responded to a note from lizhijian@fujitsu.com and restored calls to
  is_pmem_page() which were accidentally dropped in earlier versions.
v4:
  Responded to a comment by Zhu and cleaned up error passing between
  rxe_mr.c and rxe_resp.c.
  Other various cleanups including more careful use of unsigned ints.
  Rebased to current for-next.
v3:
  Fixed an error reported by kernel test robot
v2:
  Rebased to 6.2.0-rc1+
  Minor cleanups
  Fixed error reported by Jason in 4/6 missing if after else.


Bob Pearson (6):
  RDMA/rxe: Cleanup mr_check_range
  RDMA/rxe: Move rxe_map_mr_sg to rxe_mr.c
  RDMA-rxe: Isolate mr code from atomic_reply()
  RDMA-rxe: Isolate mr code from atomic_write_reply()
  RDMA/rxe: Cleanup page variables in rxe_mr.c
  RDMA/rxe: Replace rxe_map and rxe_phys_buf by xarray

 drivers/infiniband/sw/rxe/rxe.h       |  38 ++
 drivers/infiniband/sw/rxe/rxe_loc.h   |  12 +-
 drivers/infiniband/sw/rxe/rxe_mr.c    | 605 ++++++++++++++------------
 drivers/infiniband/sw/rxe/rxe_resp.c  | 143 ++----
 drivers/infiniband/sw/rxe/rxe_verbs.c |  36 --
 drivers/infiniband/sw/rxe/rxe_verbs.h |  32 +-
 6 files changed, 425 insertions(+), 441 deletions(-)


base-commit: 1ec82317a1daac78c04b0c15af89018ccf9fa2b7

Comments

Jason Gunthorpe Jan. 27, 2023, 4:23 p.m. UTC | #1
On Thu, Jan 19, 2023 at 05:59:31PM -0600, Bob Pearson wrote:
> This patch series replaces the page map carried in each memory region
> with a struct xarray. It is based on a sketch developed by Jason
> Gunthorpe. The first five patches are preparation that tries to
> cleanly isolate all the mr specific code into rxe_mr.c. The sixth
> patch is the actual change.
> 
> v7:
>   Link: https://lore.kernel.org/linux-rdma/Y8f53jdDAN0B9qy7@nvidia.com/
>   Made changes requested by Jason to return RESPST_ERR_XXX from rxe_mr.c
>   to rxe_resp.c.

I took it to for-next, but I made these changes, please check:

diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index fe4049330c9f19..c80458634962c6 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -114,7 +114,8 @@ static int rxe_mr_fill_pages_from_sgt(struct rxe_mr *mr, struct sg_table *sgt)
 
 			if (persistent && !is_pmem_page(page)) {
 				rxe_dbg_mr(mr, "Page can't be persistent\n");
-				return -EINVAL;
+				xas_set_err(&xas, -EINVAL);
+				break;
 			}
 
 			xas_store(&xas, page);
@@ -213,7 +214,6 @@ static int rxe_set_page(struct ib_mr *ibmr, u64 iova)
 {
 	struct rxe_mr *mr = to_rmr(ibmr);
 	struct page *page = virt_to_page(iova & mr->page_mask);
-	XA_STATE(xas, &mr->page_list, mr->nbuf);
 	bool persistent = !!(mr->access & IB_ACCESS_FLUSH_PERSISTENT);
 	int err;
 
@@ -225,13 +225,7 @@ static int rxe_set_page(struct ib_mr *ibmr, u64 iova)
 	if (unlikely(mr->nbuf == mr->num_buf))
 		return -ENOMEM;
 
-	do {
-		xas_lock(&xas);
-		xas_store(&xas, page);
-		xas_unlock(&xas);
-	} while (xas_nomem(&xas, GFP_KERNEL));
-
-	err = xas_error(&xas);
+	err = xa_err(xa_store(&mr->page_list, mr->nbuf, page, GFP_KERNEL));
 	if (err)
 		return err;
 
@@ -458,10 +452,8 @@ int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, unsigned int length)
 				mr_page_size(mr) - page_offset);
 
 		va = kmap_local_page(page);
-		if (!va)
-			return -EFAULT;
-
 		arch_wb_cache_pmem(va + page_offset, bytes);
+		kunmap_local(va);
 
 		length -= bytes;
 		iova += bytes;
Bob Pearson Jan. 27, 2023, 5:05 p.m. UTC | #2
On 1/27/23 10:23, Jason Gunthorpe wrote:
> On Thu, Jan 19, 2023 at 05:59:31PM -0600, Bob Pearson wrote:
>> This patch series replaces the page map carried in each memory region
>> with a struct xarray. It is based on a sketch developed by Jason
>> Gunthorpe. The first five patches are preparation that tries to
>> cleanly isolate all the mr specific code into rxe_mr.c. The sixth
>> patch is the actual change.
>>
>> v7:
>>   Link: https://lore.kernel.org/linux-rdma/Y8f53jdDAN0B9qy7@nvidia.com/
>>   Made changes requested by Jason to return RESPST_ERR_XXX from rxe_mr.c
>>   to rxe_resp.c.
> 
> I took it to for-next, but I made these changes, please check:
> 
> diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
> index fe4049330c9f19..c80458634962c6 100644
> --- a/drivers/infiniband/sw/rxe/rxe_mr.c
> +++ b/drivers/infiniband/sw/rxe/rxe_mr.c
> @@ -114,7 +114,8 @@ static int rxe_mr_fill_pages_from_sgt(struct rxe_mr *mr, struct sg_table *sgt)
>  
>  			if (persistent && !is_pmem_page(page)) {
>  				rxe_dbg_mr(mr, "Page can't be persistent\n");
> -				return -EINVAL;
> +				xas_set_err(&xas, -EINVAL);
> +				break;
>  			}
>  
>  			xas_store(&xas, page);
> @@ -213,7 +214,6 @@ static int rxe_set_page(struct ib_mr *ibmr, u64 iova)
>  {
>  	struct rxe_mr *mr = to_rmr(ibmr);
>  	struct page *page = virt_to_page(iova & mr->page_mask);
> -	XA_STATE(xas, &mr->page_list, mr->nbuf);
>  	bool persistent = !!(mr->access & IB_ACCESS_FLUSH_PERSISTENT);
>  	int err;
>  
> @@ -225,13 +225,7 @@ static int rxe_set_page(struct ib_mr *ibmr, u64 iova)
>  	if (unlikely(mr->nbuf == mr->num_buf))
>  		return -ENOMEM;
>  
> -	do {
> -		xas_lock(&xas);
> -		xas_store(&xas, page);
> -		xas_unlock(&xas);
> -	} while (xas_nomem(&xas, GFP_KERNEL));
> -
> -	err = xas_error(&xas);
> +	err = xa_err(xa_store(&mr->page_list, mr->nbuf, page, GFP_KERNEL));
>  	if (err)
>  		return err;
>  
> @@ -458,10 +452,8 @@ int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, unsigned int length)
>  				mr_page_size(mr) - page_offset);
>  
>  		va = kmap_local_page(page);
> -		if (!va)
> -			return -EFAULT;
> -
>  		arch_wb_cache_pmem(va + page_offset, bytes);
> +		kunmap_local(va);
>  
>  		length -= bytes;
>  		iova += bytes;

Thanks. I'll check these.

Bob
Bob Pearson Jan. 27, 2023, 7:26 p.m. UTC | #3
On 1/27/23 10:23, Jason Gunthorpe wrote:
> On Thu, Jan 19, 2023 at 05:59:31PM -0600, Bob Pearson wrote:
>> This patch series replaces the page map carried in each memory region
>> with a struct xarray. It is based on a sketch developed by Jason
>> Gunthorpe. The first five patches are preparation that tries to
>> cleanly isolate all the mr specific code into rxe_mr.c. The sixth
>> patch is the actual change.
>>
>> v7:
>>   Link: https://lore.kernel.org/linux-rdma/Y8f53jdDAN0B9qy7@nvidia.com/
>>   Made changes requested by Jason to return RESPST_ERR_XXX from rxe_mr.c
>>   to rxe_resp.c.
> 
> I took it to for-next, but I made these changes, please check:
> 
> diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
> index fe4049330c9f19..c80458634962c6 100644
> --- a/drivers/infiniband/sw/rxe/rxe_mr.c
> +++ b/drivers/infiniband/sw/rxe/rxe_mr.c
> @@ -114,7 +114,8 @@ static int rxe_mr_fill_pages_from_sgt(struct rxe_mr *mr, struct sg_table *sgt)
>  
>  			if (persistent && !is_pmem_page(page)) {
>  				rxe_dbg_mr(mr, "Page can't be persistent\n");
> -				return -EINVAL;
> +				xas_set_err(&xas, -EINVAL);
> +				break;
>  			}
>  
>  			xas_store(&xas, page);

Looks good. Good catch.

> @@ -213,7 +214,6 @@ static int rxe_set_page(struct ib_mr *ibmr, u64 iova)
>  {
>  	struct rxe_mr *mr = to_rmr(ibmr);
>  	struct page *page = virt_to_page(iova & mr->page_mask);
> -	XA_STATE(xas, &mr->page_list, mr->nbuf);
>  	bool persistent = !!(mr->access & IB_ACCESS_FLUSH_PERSISTENT);
>  	int err;
>  
> @@ -225,13 +225,7 @@ static int rxe_set_page(struct ib_mr *ibmr, u64 iova)
>  	if (unlikely(mr->nbuf == mr->num_buf))
>  		return -ENOMEM;
>  
> -	do {
> -		xas_lock(&xas);
> -		xas_store(&xas, page);
> -		xas_unlock(&xas);
> -	} while (xas_nomem(&xas, GFP_KERNEL));
> -
> -	err = xas_error(&xas);
> +	err = xa_err(xa_store(&mr->page_list, mr->nbuf, page, GFP_KERNEL));

Looks good.

>  	if (err)
>  		return err;
>  
> @@ -458,10 +452,8 @@ int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, unsigned int length)
>  				mr_page_size(mr) - page_offset);
>  
>  		va = kmap_local_page(page);
> -		if (!va)
> -			return -EFAULT;
> -
>  		arch_wb_cache_pmem(va + page_offset, bytes);
> +		kunmap_local(va);
>  
>  		length -= bytes;
>  		iova += bytes;

Looks good. Good catch. I take it kmap_local_page shouldn't fail.

Thanks,

Bob