diff mbox series

[v1,05/11] xprtrdma: Allocate Protection Domain in rpcrdma_ep_create()

Message ID 20200221220033.2072.22880.stgit@manet.1015granger.net (mailing list archive)
State New, archived
Headers show
Series NFS/RDMA client side connection overhaul | expand

Commit Message

Chuck Lever Feb. 21, 2020, 10 p.m. UTC
Make a Protection Domain (PD) a per-connection resource rather than
a per-transport resource. In other words, when the connection
terminates, the PD is destroyed.

Thus there is one less HW resource that remains allocated to a
transport after a connection is closed.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 net/sunrpc/xprtrdma/verbs.c |   26 ++++++++++----------------
 1 file changed, 10 insertions(+), 16 deletions(-)

Comments

Tom Talpey March 1, 2020, 6:11 p.m. UTC | #1
On 2/21/2020 2:00 PM, Chuck Lever wrote:
> Make a Protection Domain (PD) a per-connection resource rather than
> a per-transport resource. In other words, when the connection
> terminates, the PD is destroyed.
> 
> Thus there is one less HW resource that remains allocated to a
> transport after a connection is closed.

That's a good goal, but there are cases where the upper layer may
want the PD to be shared. For example, in a multichannel configuration,
where RDMA may not be constrained to use a single connection.

How would this approach support such a requirement? Can a PD be
provided to a new connection?

Tom.

> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
>   net/sunrpc/xprtrdma/verbs.c |   26 ++++++++++----------------
>   1 file changed, 10 insertions(+), 16 deletions(-)
> 
> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
> index f361213a8157..36fe7baea014 100644
> --- a/net/sunrpc/xprtrdma/verbs.c
> +++ b/net/sunrpc/xprtrdma/verbs.c
> @@ -363,14 +363,6 @@ static void rpcrdma_update_cm_private(struct rpcrdma_xprt *r_xprt,
>   		rc = PTR_ERR(ia->ri_id);
>   		goto out_err;
>   	}
> -
> -	ia->ri_pd = ib_alloc_pd(ia->ri_id->device, 0);
> -	if (IS_ERR(ia->ri_pd)) {
> -		rc = PTR_ERR(ia->ri_pd);
> -		pr_err("rpcrdma: ib_alloc_pd() returned %d\n", rc);
> -		goto out_err;
> -	}
> -
>   	return 0;
>   
>   out_err:
> @@ -403,9 +395,6 @@ static void rpcrdma_update_cm_private(struct rpcrdma_xprt *r_xprt,
>   
>   	rpcrdma_ep_destroy(r_xprt);
>   
> -	ib_dealloc_pd(ia->ri_pd);
> -	ia->ri_pd = NULL;
> -
>   	/* Allow waiters to continue */
>   	complete(&ia->ri_remove_done);
>   
> @@ -423,11 +412,6 @@ static void rpcrdma_update_cm_private(struct rpcrdma_xprt *r_xprt,
>   	if (ia->ri_id && !IS_ERR(ia->ri_id))
>   		rdma_destroy_id(ia->ri_id);
>   	ia->ri_id = NULL;
> -
> -	/* If the pd is still busy, xprtrdma missed freeing a resource */
> -	if (ia->ri_pd && !IS_ERR(ia->ri_pd))
> -		ib_dealloc_pd(ia->ri_pd);
> -	ia->ri_pd = NULL;
>   }
>   
>   static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt,
> @@ -514,6 +498,12 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt,
>   	ep->rep_remote_cma.flow_control = 0;
>   	ep->rep_remote_cma.rnr_retry_count = 0;
>   
> +	ia->ri_pd = ib_alloc_pd(id->device, 0);
> +	if (IS_ERR(ia->ri_pd)) {
> +		rc = PTR_ERR(ia->ri_pd);
> +		goto out_destroy;
> +	}
> +
>   	rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
>   	if (rc)
>   		goto out_destroy;
> @@ -540,6 +530,10 @@ static void rpcrdma_ep_destroy(struct rpcrdma_xprt *r_xprt)
>   	if (ep->rep_attr.send_cq)
>   		ib_free_cq(ep->rep_attr.send_cq);
>   	ep->rep_attr.send_cq = NULL;
> +
> +	if (ia->ri_pd)
> +		ib_dealloc_pd(ia->ri_pd);
> +	ia->ri_pd = NULL;
>   }
>   
>   /* Re-establish a connection after a device removal event.
> 
> 
>
Chuck Lever March 1, 2020, 6:29 p.m. UTC | #2
Hi Tom-

Thanks for your careful reading of the patch series!


> On Mar 1, 2020, at 1:11 PM, Tom Talpey <tom@talpey.com> wrote:
> 
> On 2/21/2020 2:00 PM, Chuck Lever wrote:
>> Make a Protection Domain (PD) a per-connection resource rather than
>> a per-transport resource. In other words, when the connection
>> terminates, the PD is destroyed.
>> Thus there is one less HW resource that remains allocated to a
>> transport after a connection is closed.
> 
> That's a good goal, but there are cases where the upper layer may
> want the PD to be shared. For example, in a multichannel configuration,
> where RDMA may not be constrained to use a single connection.

I see two reasons why PD sharing won't be needed for the Linux
client implementation of RPC/RDMA:

- The plan for Linux NFS/RDMA is to manage multiple connections
  in the NFS client layer, not at the RPC transport layer.

- We don't intend to retransmit an RPC that was registered via
  one connection on a different connection; a retransmitted RPC
  is re-marshaled from scratch before each transmit.

The purpose of destroying the PD at disconnect is to enable a
clean device removal model: basically, disconnect all connections
through that device, and we're guaranteed to have no more pinned
HW resources. AFAICT, that is the model also used in other kernel
ULPs.


> How would this approach support such a requirement?

As above, the Linux NFS client would create additional transports,
each with their own single connection (and PD).


> Can a PD be provided to a new connection?

The sequence of API events is that an ID and PD are created first,
then a QP is created with the ID and PD, then the connection is
established.

But I might not have understood your question.




> Tom.
> 
>> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
>> ---
>>  net/sunrpc/xprtrdma/verbs.c |   26 ++++++++++----------------
>>  1 file changed, 10 insertions(+), 16 deletions(-)
>> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
>> index f361213a8157..36fe7baea014 100644
>> --- a/net/sunrpc/xprtrdma/verbs.c
>> +++ b/net/sunrpc/xprtrdma/verbs.c
>> @@ -363,14 +363,6 @@ static void rpcrdma_update_cm_private(struct rpcrdma_xprt *r_xprt,
>>  		rc = PTR_ERR(ia->ri_id);
>>  		goto out_err;
>>  	}
>> -
>> -	ia->ri_pd = ib_alloc_pd(ia->ri_id->device, 0);
>> -	if (IS_ERR(ia->ri_pd)) {
>> -		rc = PTR_ERR(ia->ri_pd);
>> -		pr_err("rpcrdma: ib_alloc_pd() returned %d\n", rc);
>> -		goto out_err;
>> -	}
>> -
>>  	return 0;
>>    out_err:
>> @@ -403,9 +395,6 @@ static void rpcrdma_update_cm_private(struct rpcrdma_xprt *r_xprt,
>>    	rpcrdma_ep_destroy(r_xprt);
>>  -	ib_dealloc_pd(ia->ri_pd);
>> -	ia->ri_pd = NULL;
>> -
>>  	/* Allow waiters to continue */
>>  	complete(&ia->ri_remove_done);
>>  @@ -423,11 +412,6 @@ static void rpcrdma_update_cm_private(struct rpcrdma_xprt *r_xprt,
>>  	if (ia->ri_id && !IS_ERR(ia->ri_id))
>>  		rdma_destroy_id(ia->ri_id);
>>  	ia->ri_id = NULL;
>> -
>> -	/* If the pd is still busy, xprtrdma missed freeing a resource */
>> -	if (ia->ri_pd && !IS_ERR(ia->ri_pd))
>> -		ib_dealloc_pd(ia->ri_pd);
>> -	ia->ri_pd = NULL;
>>  }
>>    static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt,
>> @@ -514,6 +498,12 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt,
>>  	ep->rep_remote_cma.flow_control = 0;
>>  	ep->rep_remote_cma.rnr_retry_count = 0;
>>  +	ia->ri_pd = ib_alloc_pd(id->device, 0);
>> +	if (IS_ERR(ia->ri_pd)) {
>> +		rc = PTR_ERR(ia->ri_pd);
>> +		goto out_destroy;
>> +	}
>> +
>>  	rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
>>  	if (rc)
>>  		goto out_destroy;
>> @@ -540,6 +530,10 @@ static void rpcrdma_ep_destroy(struct rpcrdma_xprt *r_xprt)
>>  	if (ep->rep_attr.send_cq)
>>  		ib_free_cq(ep->rep_attr.send_cq);
>>  	ep->rep_attr.send_cq = NULL;
>> +
>> +	if (ia->ri_pd)
>> +		ib_dealloc_pd(ia->ri_pd);
>> +	ia->ri_pd = NULL;
>>  }
>>    /* Re-establish a connection after a device removal event.

--
Chuck Lever
Tom Talpey March 1, 2020, 6:38 p.m. UTC | #3
On 3/1/2020 10:29 AM, Chuck Lever wrote:
> Hi Tom-
> 
> Thanks for your careful reading of the patch series!
> 
> 
>> On Mar 1, 2020, at 1:11 PM, Tom Talpey <tom@talpey.com> wrote:
>>
>> On 2/21/2020 2:00 PM, Chuck Lever wrote:
>>> Make a Protection Domain (PD) a per-connection resource rather than
>>> a per-transport resource. In other words, when the connection
>>> terminates, the PD is destroyed.
>>> Thus there is one less HW resource that remains allocated to a
>>> transport after a connection is closed.
>>
>> That's a good goal, but there are cases where the upper layer may
>> want the PD to be shared. For example, in a multichannel configuration,
>> where RDMA may not be constrained to use a single connection.
> 
> I see two reasons why PD sharing won't be needed for the Linux
> client implementation of RPC/RDMA:
> 
> - The plan for Linux NFS/RDMA is to manage multiple connections
>    in the NFS client layer, not at the RPC transport layer.
> 
> - We don't intend to retransmit an RPC that was registered via
>    one connection on a different connection; a retransmitted RPC
>    is re-marshaled from scratch before each transmit.
> 
> The purpose of destroying the PD at disconnect is to enable a
> clean device removal model: basically, disconnect all connections
> through that device, and we're guaranteed to have no more pinned
> HW resources. AFAICT, that is the model also used in other kernel
> ULPs.

True, and revoking the PD ensures that no further remote access can
occur, that is, it's a fully secure approach.

>> How would this approach support such a requirement?
> 
> As above, the Linux NFS client would create additional transports,
> each with their own single connection (and PD).
> 
> 
>> Can a PD be provided to a new connection?
> 
> The sequence of API events is that an ID and PD are created first,
> then a QP is created with the ID and PD, then the connection is
> established.

Yes, I'm aware, and that was the question - if PD sharing were desired,
can the PD be passed to the QP creation, instead of being allocated
inline?

If this isn't needed now, then it's fine to leave it out. But I think
it's worth considering that it may be desirable in future.

Tom.

>>> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
>>> ---
>>>   net/sunrpc/xprtrdma/verbs.c |   26 ++++++++++----------------
>>>   1 file changed, 10 insertions(+), 16 deletions(-)
>>> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
>>> index f361213a8157..36fe7baea014 100644
>>> --- a/net/sunrpc/xprtrdma/verbs.c
>>> +++ b/net/sunrpc/xprtrdma/verbs.c
>>> @@ -363,14 +363,6 @@ static void rpcrdma_update_cm_private(struct rpcrdma_xprt *r_xprt,
>>>   		rc = PTR_ERR(ia->ri_id);
>>>   		goto out_err;
>>>   	}
>>> -
>>> -	ia->ri_pd = ib_alloc_pd(ia->ri_id->device, 0);
>>> -	if (IS_ERR(ia->ri_pd)) {
>>> -		rc = PTR_ERR(ia->ri_pd);
>>> -		pr_err("rpcrdma: ib_alloc_pd() returned %d\n", rc);
>>> -		goto out_err;
>>> -	}
>>> -
>>>   	return 0;
>>>     out_err:
>>> @@ -403,9 +395,6 @@ static void rpcrdma_update_cm_private(struct rpcrdma_xprt *r_xprt,
>>>     	rpcrdma_ep_destroy(r_xprt);
>>>   -	ib_dealloc_pd(ia->ri_pd);
>>> -	ia->ri_pd = NULL;
>>> -
>>>   	/* Allow waiters to continue */
>>>   	complete(&ia->ri_remove_done);
>>>   @@ -423,11 +412,6 @@ static void rpcrdma_update_cm_private(struct rpcrdma_xprt *r_xprt,
>>>   	if (ia->ri_id && !IS_ERR(ia->ri_id))
>>>   		rdma_destroy_id(ia->ri_id);
>>>   	ia->ri_id = NULL;
>>> -
>>> -	/* If the pd is still busy, xprtrdma missed freeing a resource */
>>> -	if (ia->ri_pd && !IS_ERR(ia->ri_pd))
>>> -		ib_dealloc_pd(ia->ri_pd);
>>> -	ia->ri_pd = NULL;
>>>   }
>>>     static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt,
>>> @@ -514,6 +498,12 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt,
>>>   	ep->rep_remote_cma.flow_control = 0;
>>>   	ep->rep_remote_cma.rnr_retry_count = 0;
>>>   +	ia->ri_pd = ib_alloc_pd(id->device, 0);
>>> +	if (IS_ERR(ia->ri_pd)) {
>>> +		rc = PTR_ERR(ia->ri_pd);
>>> +		goto out_destroy;
>>> +	}
>>> +
>>>   	rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
>>>   	if (rc)
>>>   		goto out_destroy;
>>> @@ -540,6 +530,10 @@ static void rpcrdma_ep_destroy(struct rpcrdma_xprt *r_xprt)
>>>   	if (ep->rep_attr.send_cq)
>>>   		ib_free_cq(ep->rep_attr.send_cq);
>>>   	ep->rep_attr.send_cq = NULL;
>>> +
>>> +	if (ia->ri_pd)
>>> +		ib_dealloc_pd(ia->ri_pd);
>>> +	ia->ri_pd = NULL;
>>>   }
>>>     /* Re-establish a connection after a device removal event.
> 
> --
> Chuck Lever
> 
> 
> 
> 
>
diff mbox series

Patch

diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index f361213a8157..36fe7baea014 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -363,14 +363,6 @@  static void rpcrdma_update_cm_private(struct rpcrdma_xprt *r_xprt,
 		rc = PTR_ERR(ia->ri_id);
 		goto out_err;
 	}
-
-	ia->ri_pd = ib_alloc_pd(ia->ri_id->device, 0);
-	if (IS_ERR(ia->ri_pd)) {
-		rc = PTR_ERR(ia->ri_pd);
-		pr_err("rpcrdma: ib_alloc_pd() returned %d\n", rc);
-		goto out_err;
-	}
-
 	return 0;
 
 out_err:
@@ -403,9 +395,6 @@  static void rpcrdma_update_cm_private(struct rpcrdma_xprt *r_xprt,
 
 	rpcrdma_ep_destroy(r_xprt);
 
-	ib_dealloc_pd(ia->ri_pd);
-	ia->ri_pd = NULL;
-
 	/* Allow waiters to continue */
 	complete(&ia->ri_remove_done);
 
@@ -423,11 +412,6 @@  static void rpcrdma_update_cm_private(struct rpcrdma_xprt *r_xprt,
 	if (ia->ri_id && !IS_ERR(ia->ri_id))
 		rdma_destroy_id(ia->ri_id);
 	ia->ri_id = NULL;
-
-	/* If the pd is still busy, xprtrdma missed freeing a resource */
-	if (ia->ri_pd && !IS_ERR(ia->ri_pd))
-		ib_dealloc_pd(ia->ri_pd);
-	ia->ri_pd = NULL;
 }
 
 static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt,
@@ -514,6 +498,12 @@  static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt,
 	ep->rep_remote_cma.flow_control = 0;
 	ep->rep_remote_cma.rnr_retry_count = 0;
 
+	ia->ri_pd = ib_alloc_pd(id->device, 0);
+	if (IS_ERR(ia->ri_pd)) {
+		rc = PTR_ERR(ia->ri_pd);
+		goto out_destroy;
+	}
+
 	rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
 	if (rc)
 		goto out_destroy;
@@ -540,6 +530,10 @@  static void rpcrdma_ep_destroy(struct rpcrdma_xprt *r_xprt)
 	if (ep->rep_attr.send_cq)
 		ib_free_cq(ep->rep_attr.send_cq);
 	ep->rep_attr.send_cq = NULL;
+
+	if (ia->ri_pd)
+		ib_dealloc_pd(ia->ri_pd);
+	ia->ri_pd = NULL;
 }
 
 /* Re-establish a connection after a device removal event.