diff mbox series

[v3,07/26] xprtrdma: Improve locking around rpcrdma_rep destruction

Message ID 161885534259.38598.17355600080366820390.stgit@manet.1015granger.net (mailing list archive)
State Not Applicable
Headers show
Series NFS/RDMA client patches for next | expand

Commit Message

Chuck Lever III April 19, 2021, 6:02 p.m. UTC
Currently rpcrdma_reps_destroy() assumes that, at transport
tear-down, the content of the rb_free_reps list is the same as the
content of the rb_all_reps list. Although that is usually true,
using the rb_all_reps list should be more reliable because of
the way it's managed. And, rpcrdma_reps_unmap() uses rb_all_reps;
these two functions should both traverse the "all" list.

Ensure that all rpcrdma_reps are always destroyed whether they are
on the rep free list or not.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 net/sunrpc/xprtrdma/verbs.c |   31 ++++++++++++++++++++++++-------
 1 file changed, 24 insertions(+), 7 deletions(-)

Comments

Trond Myklebust April 23, 2021, 9:06 p.m. UTC | #1
On Mon, 2021-04-19 at 14:02 -0400, Chuck Lever wrote:
> Currently rpcrdma_reps_destroy() assumes that, at transport
> tear-down, the content of the rb_free_reps list is the same as the
> content of the rb_all_reps list. Although that is usually true,
> using the rb_all_reps list should be more reliable because of
> the way it's managed. And, rpcrdma_reps_unmap() uses rb_all_reps;
> these two functions should both traverse the "all" list.
> 
> Ensure that all rpcrdma_reps are always destroyed whether they are
> on the rep free list or not.
> 
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
>  net/sunrpc/xprtrdma/verbs.c |   31 ++++++++++++++++++++++++-------
>  1 file changed, 24 insertions(+), 7 deletions(-)
> 
> diff --git a/net/sunrpc/xprtrdma/verbs.c
> b/net/sunrpc/xprtrdma/verbs.c
> index 1b599a623eea..482fdc9e25c2 100644
> --- a/net/sunrpc/xprtrdma/verbs.c
> +++ b/net/sunrpc/xprtrdma/verbs.c
> @@ -1007,16 +1007,23 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct
> rpcrdma_xprt *r_xprt,
>         return NULL;
>  }
>  
> -/* No locking needed here. This function is invoked only by the
> - * Receive completion handler, or during transport shutdown.
> - */
> -static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
> +static void rpcrdma_rep_destroy_locked(struct rpcrdma_rep *rep)

The name here is extremely confusing. As far as I can tell, this isn't
called with any lock?

>  {
> -       list_del(&rep->rr_all);
>         rpcrdma_regbuf_free(rep->rr_rdmabuf);
>         kfree(rep);
>  }
>  
> +static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
> +{
> +       struct rpcrdma_buffer *buf = &rep->rr_rxprt->rx_buf;
> +
> +       spin_lock(&buf->rb_lock);
> +       list_del(&rep->rr_all);
> +       spin_unlock(&buf->rb_lock);
> +
> +       rpcrdma_rep_destroy_locked(rep);
> +}
> +
>  static struct rpcrdma_rep *rpcrdma_rep_get_locked(struct
> rpcrdma_buffer *buf)
>  {
>         struct llist_node *node;
> @@ -1049,8 +1056,18 @@ static void rpcrdma_reps_destroy(struct
> rpcrdma_buffer *buf)
>  {
>         struct rpcrdma_rep *rep;
>  
> -       while ((rep = rpcrdma_rep_get_locked(buf)) != NULL)
> -               rpcrdma_rep_destroy(rep);
> +       spin_lock(&buf->rb_lock);
> +       while ((rep = list_first_entry_or_null(&buf->rb_all_reps,
> +                                              struct rpcrdma_rep,
> +                                              rr_all)) != NULL) {
> +               list_del(&rep->rr_all);
> +               spin_unlock(&buf->rb_lock);
> +
> +               rpcrdma_rep_destroy_locked(rep);
> +
> +               spin_lock(&buf->rb_lock);
> +       }
> +       spin_unlock(&buf->rb_lock);
>  }
>  
>  /**
> 
>
Chuck Lever III April 24, 2021, 5:39 p.m. UTC | #2
> On Apr 23, 2021, at 5:06 PM, Trond Myklebust <trondmy@hammerspace.com> wrote:
> 
> On Mon, 2021-04-19 at 14:02 -0400, Chuck Lever wrote:
>> Currently rpcrdma_reps_destroy() assumes that, at transport
>> tear-down, the content of the rb_free_reps list is the same as the
>> content of the rb_all_reps list. Although that is usually true,
>> using the rb_all_reps list should be more reliable because of
>> the way it's managed. And, rpcrdma_reps_unmap() uses rb_all_reps;
>> these two functions should both traverse the "all" list.
>> 
>> Ensure that all rpcrdma_reps are always destroyed whether they are
>> on the rep free list or not.
>> 
>> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
>> ---
>>  net/sunrpc/xprtrdma/verbs.c |   31 ++++++++++++++++++++++++-------
>>  1 file changed, 24 insertions(+), 7 deletions(-)
>> 
>> diff --git a/net/sunrpc/xprtrdma/verbs.c
>> b/net/sunrpc/xprtrdma/verbs.c
>> index 1b599a623eea..482fdc9e25c2 100644
>> --- a/net/sunrpc/xprtrdma/verbs.c
>> +++ b/net/sunrpc/xprtrdma/verbs.c
>> @@ -1007,16 +1007,23 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct
>> rpcrdma_xprt *r_xprt,
>>         return NULL;
>>  }
>>  
>> -/* No locking needed here. This function is invoked only by the
>> - * Receive completion handler, or during transport shutdown.
>> - */
>> -static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
>> +static void rpcrdma_rep_destroy_locked(struct rpcrdma_rep *rep)
> 
> The name here is extremely confusing. As far as I can tell, this isn't
> called with any lock?

Fair enough.

I renamed it rpcrdma_rep_free() and it doesn't seem to have
any consequences for downstream commits in this series.

You could do a global edit, I can resend you this patch with
the change, or I can post a v4 of this series. Let me know
your preference.


>>  {
>> -       list_del(&rep->rr_all);
>>         rpcrdma_regbuf_free(rep->rr_rdmabuf);
>>         kfree(rep);
>>  }
>>  
>> +static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
>> +{
>> +       struct rpcrdma_buffer *buf = &rep->rr_rxprt->rx_buf;
>> +
>> +       spin_lock(&buf->rb_lock);
>> +       list_del(&rep->rr_all);
>> +       spin_unlock(&buf->rb_lock);
>> +
>> +       rpcrdma_rep_destroy_locked(rep);
>> +}
>> +
>>  static struct rpcrdma_rep *rpcrdma_rep_get_locked(struct
>> rpcrdma_buffer *buf)
>>  {
>>         struct llist_node *node;
>> @@ -1049,8 +1056,18 @@ static void rpcrdma_reps_destroy(struct
>> rpcrdma_buffer *buf)
>>  {
>>         struct rpcrdma_rep *rep;
>>  
>> -       while ((rep = rpcrdma_rep_get_locked(buf)) != NULL)
>> -               rpcrdma_rep_destroy(rep);
>> +       spin_lock(&buf->rb_lock);
>> +       while ((rep = list_first_entry_or_null(&buf->rb_all_reps,
>> +                                              struct rpcrdma_rep,
>> +                                              rr_all)) != NULL) {
>> +               list_del(&rep->rr_all);
>> +               spin_unlock(&buf->rb_lock);
>> +
>> +               rpcrdma_rep_destroy_locked(rep);
>> +
>> +               spin_lock(&buf->rb_lock);
>> +       }
>> +       spin_unlock(&buf->rb_lock);
>>  }
>>  
>>  /**
>> 
>> 
> 
> -- 
> Trond Myklebust
> Linux NFS client maintainer, Hammerspace
> trond.myklebust@hammerspace.com

--
Chuck Lever
Trond Myklebust April 24, 2021, 5:59 p.m. UTC | #3
On Sat, 2021-04-24 at 17:39 +0000, Chuck Lever III wrote:
> 
> 
> > On Apr 23, 2021, at 5:06 PM, Trond Myklebust < 
> > trondmy@hammerspace.com> wrote:
> > 
> > On Mon, 2021-04-19 at 14:02 -0400, Chuck Lever wrote:
> > > Currently rpcrdma_reps_destroy() assumes that, at transport
> > > tear-down, the content of the rb_free_reps list is the same as
> > > the
> > > content of the rb_all_reps list. Although that is usually true,
> > > using the rb_all_reps list should be more reliable because of
> > > the way it's managed. And, rpcrdma_reps_unmap() uses rb_all_reps;
> > > these two functions should both traverse the "all" list.
> > > 
> > > Ensure that all rpcrdma_reps are always destroyed whether they
> > > are
> > > on the rep free list or not.
> > > 
> > > Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> > > ---
> > >  net/sunrpc/xprtrdma/verbs.c |   31 ++++++++++++++++++++++++-----
> > > --
> > >  1 file changed, 24 insertions(+), 7 deletions(-)
> > > 
> > > diff --git a/net/sunrpc/xprtrdma/verbs.c
> > > b/net/sunrpc/xprtrdma/verbs.c
> > > index 1b599a623eea..482fdc9e25c2 100644
> > > --- a/net/sunrpc/xprtrdma/verbs.c
> > > +++ b/net/sunrpc/xprtrdma/verbs.c
> > > @@ -1007,16 +1007,23 @@ struct rpcrdma_rep
> > > *rpcrdma_rep_create(struct
> > > rpcrdma_xprt *r_xprt,
> > >         return NULL;
> > >  }
> > >  
> > > -/* No locking needed here. This function is invoked only by the
> > > - * Receive completion handler, or during transport shutdown.
> > > - */
> > > -static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
> > > +static void rpcrdma_rep_destroy_locked(struct rpcrdma_rep *rep)
> > 
> > The name here is extremely confusing. As far as I can tell, this
> > isn't
> > called with any lock?
> 
> Fair enough.
> 
> I renamed it rpcrdma_rep_free() and it doesn't seem to have
> any consequences for downstream commits in this series.

Sounds good.

> 
> You could do a global edit, I can resend you this patch with
> the change, or I can post a v4 of this series. Let me know
> your preference.
> 

Can you please just resend this patch with the update, unless there are
repercussions for the other patches? (in which case a v4 would be
welcome)

> 
> > >  {
> > > -       list_del(&rep->rr_all);
> > >         rpcrdma_regbuf_free(rep->rr_rdmabuf);
> > >         kfree(rep);
> > >  }
> > >  
> > > +static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
> > > +{
> > > +       struct rpcrdma_buffer *buf = &rep->rr_rxprt->rx_buf;
> > > +
> > > +       spin_lock(&buf->rb_lock);
> > > +       list_del(&rep->rr_all);
> > > +       spin_unlock(&buf->rb_lock);
> > > +
> > > +       rpcrdma_rep_destroy_locked(rep);
> > > +}
> > > +
> > >  static struct rpcrdma_rep *rpcrdma_rep_get_locked(struct
> > > rpcrdma_buffer *buf)
> > >  {
> > >         struct llist_node *node;
> > > @@ -1049,8 +1056,18 @@ static void rpcrdma_reps_destroy(struct
> > > rpcrdma_buffer *buf)
> > >  {
> > >         struct rpcrdma_rep *rep;
> > >  
> > > -       while ((rep = rpcrdma_rep_get_locked(buf)) != NULL)
> > > -               rpcrdma_rep_destroy(rep);
> > > +       spin_lock(&buf->rb_lock);
> > > +       while ((rep = list_first_entry_or_null(&buf->rb_all_reps,
> > > +                                              struct
> > > rpcrdma_rep,
> > > +                                              rr_all)) != NULL)
> > > {
> > > +               list_del(&rep->rr_all);
> > > +               spin_unlock(&buf->rb_lock);
> > > +
> > > +               rpcrdma_rep_destroy_locked(rep);
> > > +
> > > +               spin_lock(&buf->rb_lock);
> > > +       }
> > > +       spin_unlock(&buf->rb_lock);
> > >  }
> > >  
> > >  /**
> > > 
> > > 
> > 
> > -- 
> > Trond Myklebust
> > Linux NFS client maintainer, Hammerspace
> > trond.myklebust@hammerspace.com
> 
> --
> Chuck Lever
> 
> 
>
diff mbox series

Patch

diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 1b599a623eea..482fdc9e25c2 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1007,16 +1007,23 @@  struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
 	return NULL;
 }
 
-/* No locking needed here. This function is invoked only by the
- * Receive completion handler, or during transport shutdown.
- */
-static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
+static void rpcrdma_rep_destroy_locked(struct rpcrdma_rep *rep)
 {
-	list_del(&rep->rr_all);
 	rpcrdma_regbuf_free(rep->rr_rdmabuf);
 	kfree(rep);
 }
 
+static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
+{
+	struct rpcrdma_buffer *buf = &rep->rr_rxprt->rx_buf;
+
+	spin_lock(&buf->rb_lock);
+	list_del(&rep->rr_all);
+	spin_unlock(&buf->rb_lock);
+
+	rpcrdma_rep_destroy_locked(rep);
+}
+
 static struct rpcrdma_rep *rpcrdma_rep_get_locked(struct rpcrdma_buffer *buf)
 {
 	struct llist_node *node;
@@ -1049,8 +1056,18 @@  static void rpcrdma_reps_destroy(struct rpcrdma_buffer *buf)
 {
 	struct rpcrdma_rep *rep;
 
-	while ((rep = rpcrdma_rep_get_locked(buf)) != NULL)
-		rpcrdma_rep_destroy(rep);
+	spin_lock(&buf->rb_lock);
+	while ((rep = list_first_entry_or_null(&buf->rb_all_reps,
+					       struct rpcrdma_rep,
+					       rr_all)) != NULL) {
+		list_del(&rep->rr_all);
+		spin_unlock(&buf->rb_lock);
+
+		rpcrdma_rep_destroy_locked(rep);
+
+		spin_lock(&buf->rb_lock);
+	}
+	spin_unlock(&buf->rb_lock);
 }
 
 /**