diff mbox

[1/2] NFS: Add sequence_priviliged_ops for nfs4_proc_sequence()

Message ID

1352745355-31157-1-git-send-email-bjschuma@netapp.com (mailing list archive)

State

New, archived

Headers

show

From: bjschuma@netapp.com
To: Trond.Myklebust@netapp.com
Cc: linux-nfs@vger.kernel.org
Subject: [PATCH 1/2] NFS: Add sequence_priviliged_ops for
	nfs4_proc_sequence()
Date: Mon, 12 Nov 2012 13:35:54 -0500
Message-Id: <1352745355-31157-1-git-send-email-bjschuma@netapp.com>
Sender: linux-nfs-owner@vger.kernel.org
Precedence: bulk

Commit Message

Bryan Schumaker Nov. 12, 2012, 6:35 p.m. UTC

From: Bryan Schumaker <bjschuma@netapp.com>

During recovery the NFS4_SESSION_DRAINING flag might be set on the
client structure.  This can cause lease renewal to abort when
nfs41_setup_sequence sees that we are doing recovery.  As a result, the
client never recovers and all activity with the NFS server halts.

Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
---
 fs/nfs/nfs4proc.c | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

Comments

William A. (Andy) Adamson Nov. 12, 2012, 8:22 p.m. UTC | #1

On Mon, Nov 12, 2012 at 1:35 PM,  <bjschuma@netapp.com> wrote:
> From: Bryan Schumaker <bjschuma@netapp.com>
>
> During recovery the NFS4_SESSION_DRAINING flag might be set on the
> client structure.  This can cause lease renewal to abort when
> nfs41_setup_sequence sees that we are doing recovery.  As a result, the
> client never recovers and all activity with the NFS server halts.


When does this happen? Say the session is draining, and an RPC returns
from one of the compounds such as nfs_open, nfs_locku etc whose
rpc_call_done routine calls renew_lease after freeing it's slot.  Like
all calls on a draining session, the renew_lease sleeps on the session
slot_tbl_waitq - is this what you mean by "causes lease renewal to
abort"? How does this cause the client to never recover?

The only other call to renew_lease is from the state manager itself
which runs one function at a time, and will not call renew_lease until
the recovery is over....

What am I missing....?
-->Andy


>
> Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
> ---
>  fs/nfs/nfs4proc.c | 21 +++++++++++++++++----
>  1 file changed, 17 insertions(+), 4 deletions(-)
>
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index 5eec442..537181c 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -6138,13 +6138,26 @@ static void nfs41_sequence_prepare(struct rpc_task *task, void *data)
>         rpc_call_start(task);
>  }
>
> +static void nfs41_sequence_prepare_privileged(struct rpc_task *task, void *data)
> +{
> +       rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
> +       nfs41_sequence_prepare(task, data);
> +}
> +
>  static const struct rpc_call_ops nfs41_sequence_ops = {
>         .rpc_call_done = nfs41_sequence_call_done,
>         .rpc_call_prepare = nfs41_sequence_prepare,
>         .rpc_release = nfs41_sequence_release,
>  };
>
> -static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
> +static const struct rpc_call_ops nfs41_sequence_privileged_ops = {
> +       .rpc_call_done = nfs41_sequence_call_done,
> +       .rpc_call_prepare = nfs41_sequence_prepare_privileged,
> +       .rpc_release = nfs41_sequence_release,
> +};
> +
> +static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred,
> +                                            const struct rpc_call_ops *seq_ops)
>  {
>         struct nfs4_sequence_data *calldata;
>         struct rpc_message msg = {
> @@ -6154,7 +6167,7 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_
>         struct rpc_task_setup task_setup_data = {
>                 .rpc_client = clp->cl_rpcclient,
>                 .rpc_message = &msg,
> -               .callback_ops = &nfs41_sequence_ops,
> +               .callback_ops = seq_ops,
>                 .flags = RPC_TASK_ASYNC | RPC_TASK_SOFT,
>         };
>
> @@ -6181,7 +6194,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr
>
>         if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0)
>                 return 0;
> -       task = _nfs41_proc_sequence(clp, cred);
> +       task = _nfs41_proc_sequence(clp, cred, &nfs41_sequence_ops);
>         if (IS_ERR(task))
>                 ret = PTR_ERR(task);
>         else
> @@ -6195,7 +6208,7 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
>         struct rpc_task *task;
>         int ret;
>
> -       task = _nfs41_proc_sequence(clp, cred);
> +       task = _nfs41_proc_sequence(clp, cred, &nfs41_sequence_privileged_ops);
>         if (IS_ERR(task)) {
>                 ret = PTR_ERR(task);
>                 goto out;
> --
> 1.8.0
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Bryan Schumaker Nov. 12, 2012, 8:27 p.m. UTC | #2

On 11/12/2012 03:22 PM, Andy Adamson wrote:
> On Mon, Nov 12, 2012 at 1:35 PM,  <bjschuma@netapp.com> wrote:
>> From: Bryan Schumaker <bjschuma@netapp.com>
>>
>> During recovery the NFS4_SESSION_DRAINING flag might be set on the
>> client structure.  This can cause lease renewal to abort when
>> nfs41_setup_sequence sees that we are doing recovery.  As a result, the
>> client never recovers and all activity with the NFS server halts.
> 
> 
> When does this happen? Say the session is draining, and an RPC returns
> from one of the compounds such as nfs_open, nfs_locku etc whose
> rpc_call_done routine calls renew_lease after freeing it's slot.  Like
> all calls on a draining session, the renew_lease sleeps on the session
> slot_tbl_waitq - is this what you mean by "causes lease renewal to
> abort"? How does this cause the client to never recover?

I'm not sure exactly what causes it, but I was able to hit this fairly reliably when mounting a server to 4 or 5 mountpoints on a client and then running xfstests against each mountpoint.  At some point during the tests the client gets a cb_path_down sequence error, tries to recover but hangs after the bind connection to session.

> 
> The only other call to renew_lease is from the state manager itself
> which runs one function at a time, and will not call renew_lease until
> the recovery is over....

It's this call that isn't completing.  nfs4_begin_drain_session() was called as part of bind connection to session, but there was never a call to nfs4_end_drain_session() by the time renew_lease was called so the client doesn't know that recovery is over.

> 
> What am I missing....?
> -->Andy
> 
> 
>>
>> Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
>> ---
>>  fs/nfs/nfs4proc.c | 21 +++++++++++++++++----
>>  1 file changed, 17 insertions(+), 4 deletions(-)
>>
>> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
>> index 5eec442..537181c 100644
>> --- a/fs/nfs/nfs4proc.c
>> +++ b/fs/nfs/nfs4proc.c
>> @@ -6138,13 +6138,26 @@ static void nfs41_sequence_prepare(struct rpc_task *task, void *data)
>>         rpc_call_start(task);
>>  }
>>
>> +static void nfs41_sequence_prepare_privileged(struct rpc_task *task, void *data)
>> +{
>> +       rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
>> +       nfs41_sequence_prepare(task, data);
>> +}
>> +
>>  static const struct rpc_call_ops nfs41_sequence_ops = {
>>         .rpc_call_done = nfs41_sequence_call_done,
>>         .rpc_call_prepare = nfs41_sequence_prepare,
>>         .rpc_release = nfs41_sequence_release,
>>  };
>>
>> -static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
>> +static const struct rpc_call_ops nfs41_sequence_privileged_ops = {
>> +       .rpc_call_done = nfs41_sequence_call_done,
>> +       .rpc_call_prepare = nfs41_sequence_prepare_privileged,
>> +       .rpc_release = nfs41_sequence_release,
>> +};
>> +
>> +static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred,
>> +                                            const struct rpc_call_ops *seq_ops)
>>  {
>>         struct nfs4_sequence_data *calldata;
>>         struct rpc_message msg = {
>> @@ -6154,7 +6167,7 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_
>>         struct rpc_task_setup task_setup_data = {
>>                 .rpc_client = clp->cl_rpcclient,
>>                 .rpc_message = &msg,
>> -               .callback_ops = &nfs41_sequence_ops,
>> +               .callback_ops = seq_ops,
>>                 .flags = RPC_TASK_ASYNC | RPC_TASK_SOFT,
>>         };
>>
>> @@ -6181,7 +6194,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr
>>
>>         if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0)
>>                 return 0;
>> -       task = _nfs41_proc_sequence(clp, cred);
>> +       task = _nfs41_proc_sequence(clp, cred, &nfs41_sequence_ops);
>>         if (IS_ERR(task))
>>                 ret = PTR_ERR(task);
>>         else
>> @@ -6195,7 +6208,7 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
>>         struct rpc_task *task;
>>         int ret;
>>
>> -       task = _nfs41_proc_sequence(clp, cred);
>> +       task = _nfs41_proc_sequence(clp, cred, &nfs41_sequence_privileged_ops);
>>         if (IS_ERR(task)) {
>>                 ret = PTR_ERR(task);
>>                 goto out;
>> --
>> 1.8.0
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Trond Myklebust Nov. 12, 2012, 8:29 p.m. UTC | #3

On Mon, 2012-11-12 at 15:22 -0500, Andy Adamson wrote:
> On Mon, Nov 12, 2012 at 1:35 PM,  <bjschuma@netapp.com> wrote:

> > From: Bryan Schumaker <bjschuma@netapp.com>

> >

> > During recovery the NFS4_SESSION_DRAINING flag might be set on the

> > client structure.  This can cause lease renewal to abort when


Not lease renewal. It is lease verification (i.e. checking that we have
a valid lease and session) that will deadlock.

> > nfs41_setup_sequence sees that we are doing recovery.  As a result, the

> > client never recovers and all activity with the NFS server halts.

> 

> 

> When does this happen? Say the session is draining, and an RPC returns

> from one of the compounds such as nfs_open, nfs_locku etc whose

> rpc_call_done routine calls renew_lease after freeing it's slot.  Like

> all calls on a draining session, the renew_lease sleeps on the session

> slot_tbl_waitq - is this what you mean by "causes lease renewal to

> abort"? How does this cause the client to never recover?

> 

> The only other call to renew_lease is from the state manager itself

> which runs one function at a time, and will not call renew_lease until

> the recovery is over....

> 

> What am I missing....?


nfs4_check_lease() is run exclusively by the state manager thread in
order to check that the clientid (and session id on NFSv4.1) are valid.
It will deadlock the state manager thread if NFS4_SESSION_DRAINING is
already set.

> -->Andy

> 

> 

> >

> > Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>

> > ---

> >  fs/nfs/nfs4proc.c | 21 +++++++++++++++++----

> >  1 file changed, 17 insertions(+), 4 deletions(-)

> >

> > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c

> > index 5eec442..537181c 100644

> > --- a/fs/nfs/nfs4proc.c

> > +++ b/fs/nfs/nfs4proc.c

> > @@ -6138,13 +6138,26 @@ static void nfs41_sequence_prepare(struct rpc_task *task, void *data)

> >         rpc_call_start(task);

> >  }

> >

> > +static void nfs41_sequence_prepare_privileged(struct rpc_task *task, void *data)

> > +{

> > +       rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);

> > +       nfs41_sequence_prepare(task, data);

> > +}

> > +

> >  static const struct rpc_call_ops nfs41_sequence_ops = {

> >         .rpc_call_done = nfs41_sequence_call_done,

> >         .rpc_call_prepare = nfs41_sequence_prepare,

> >         .rpc_release = nfs41_sequence_release,

> >  };

> >

> > -static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)

> > +static const struct rpc_call_ops nfs41_sequence_privileged_ops = {

> > +       .rpc_call_done = nfs41_sequence_call_done,

> > +       .rpc_call_prepare = nfs41_sequence_prepare_privileged,

> > +       .rpc_release = nfs41_sequence_release,

> > +};

> > +

> > +static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred,

> > +                                            const struct rpc_call_ops *seq_ops)

> >  {

> >         struct nfs4_sequence_data *calldata;

> >         struct rpc_message msg = {

> > @@ -6154,7 +6167,7 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_

> >         struct rpc_task_setup task_setup_data = {

> >                 .rpc_client = clp->cl_rpcclient,

> >                 .rpc_message = &msg,

> > -               .callback_ops = &nfs41_sequence_ops,

> > +               .callback_ops = seq_ops,

> >                 .flags = RPC_TASK_ASYNC | RPC_TASK_SOFT,

> >         };

> >

> > @@ -6181,7 +6194,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr

> >

> >         if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0)

> >                 return 0;

> > -       task = _nfs41_proc_sequence(clp, cred);

> > +       task = _nfs41_proc_sequence(clp, cred, &nfs41_sequence_ops);

> >         if (IS_ERR(task))

> >                 ret = PTR_ERR(task);

> >         else

> > @@ -6195,7 +6208,7 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)

> >         struct rpc_task *task;

> >         int ret;

> >

> > -       task = _nfs41_proc_sequence(clp, cred);

> > +       task = _nfs41_proc_sequence(clp, cred, &nfs41_sequence_privileged_ops);

> >         if (IS_ERR(task)) {

> >                 ret = PTR_ERR(task);

> >                 goto out;

> > --

> > 1.8.0

> >

> > --

> > To unsubscribe from this list: send the line "unsubscribe linux-nfs" in

> > the body of a message to majordomo@vger.kernel.org

> > More majordomo info at  http://vger.kernel.org/majordomo-info.html


-- 
Trond Myklebust
Linux NFS client maintainer

NetApp
Trond.Myklebust@netapp.com
www.netapp.com

William A. (Andy) Adamson Nov. 12, 2012, 8:49 p.m. UTC | #4

On Mon, Nov 12, 2012 at 3:29 PM, Myklebust, Trond
<Trond.Myklebust@netapp.com> wrote:
> On Mon, 2012-11-12 at 15:22 -0500, Andy Adamson wrote:
>> On Mon, Nov 12, 2012 at 1:35 PM,  <bjschuma@netapp.com> wrote:
>> > From: Bryan Schumaker <bjschuma@netapp.com>
>> >
>> > During recovery the NFS4_SESSION_DRAINING flag might be set on the
>> > client structure.  This can cause lease renewal to abort when
>
> Not lease renewal. It is lease verification (i.e. checking that we have
> a valid lease and session) that will deadlock.
>
>> > nfs41_setup_sequence sees that we are doing recovery.  As a result, the
>> > client never recovers and all activity with the NFS server halts.
>>
>>
>> When does this happen? Say the session is draining, and an RPC returns
>> from one of the compounds such as nfs_open, nfs_locku etc whose
>> rpc_call_done routine calls renew_lease after freeing it's slot.  Like
>> all calls on a draining session, the renew_lease sleeps on the session
>> slot_tbl_waitq - is this what you mean by "causes lease renewal to
>> abort"? How does this cause the client to never recover?
>>
>> The only other call to renew_lease is from the state manager itself
>> which runs one function at a time, and will not call renew_lease until
>> the recovery is over....
>>
>> What am I missing....?
>
> nfs4_check_lease() is run exclusively by the state manager thread in
> order to check that the clientid (and session id on NFSv4.1) are valid.
> It will deadlock the state manager thread if NFS4_SESSION_DRAINING is
> already set.

OK. NFS4_SESSION_DRAINING is also set exclusively by the state manager
thread. Why is the state manager told to check the lease when it's
also draining a session?

No matter what the answer, please update the patch description to
better explain the problem being solved.

-->Andy

>
>> -->Andy
>>
>>
>> >
>> > Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
>> > ---
>> >  fs/nfs/nfs4proc.c | 21 +++++++++++++++++----
>> >  1 file changed, 17 insertions(+), 4 deletions(-)
>> >
>> > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
>> > index 5eec442..537181c 100644
>> > --- a/fs/nfs/nfs4proc.c
>> > +++ b/fs/nfs/nfs4proc.c
>> > @@ -6138,13 +6138,26 @@ static void nfs41_sequence_prepare(struct rpc_task *task, void *data)
>> >         rpc_call_start(task);
>> >  }
>> >
>> > +static void nfs41_sequence_prepare_privileged(struct rpc_task *task, void *data)
>> > +{
>> > +       rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
>> > +       nfs41_sequence_prepare(task, data);
>> > +}
>> > +
>> >  static const struct rpc_call_ops nfs41_sequence_ops = {
>> >         .rpc_call_done = nfs41_sequence_call_done,
>> >         .rpc_call_prepare = nfs41_sequence_prepare,
>> >         .rpc_release = nfs41_sequence_release,
>> >  };
>> >
>> > -static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
>> > +static const struct rpc_call_ops nfs41_sequence_privileged_ops = {
>> > +       .rpc_call_done = nfs41_sequence_call_done,
>> > +       .rpc_call_prepare = nfs41_sequence_prepare_privileged,
>> > +       .rpc_release = nfs41_sequence_release,
>> > +};
>> > +
>> > +static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred,
>> > +                                            const struct rpc_call_ops *seq_ops)
>> >  {
>> >         struct nfs4_sequence_data *calldata;
>> >         struct rpc_message msg = {
>> > @@ -6154,7 +6167,7 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_
>> >         struct rpc_task_setup task_setup_data = {
>> >                 .rpc_client = clp->cl_rpcclient,
>> >                 .rpc_message = &msg,
>> > -               .callback_ops = &nfs41_sequence_ops,
>> > +               .callback_ops = seq_ops,
>> >                 .flags = RPC_TASK_ASYNC | RPC_TASK_SOFT,
>> >         };
>> >
>> > @@ -6181,7 +6194,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr
>> >
>> >         if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0)
>> >                 return 0;
>> > -       task = _nfs41_proc_sequence(clp, cred);
>> > +       task = _nfs41_proc_sequence(clp, cred, &nfs41_sequence_ops);
>> >         if (IS_ERR(task))
>> >                 ret = PTR_ERR(task);
>> >         else
>> > @@ -6195,7 +6208,7 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
>> >         struct rpc_task *task;
>> >         int ret;
>> >
>> > -       task = _nfs41_proc_sequence(clp, cred);
>> > +       task = _nfs41_proc_sequence(clp, cred, &nfs41_sequence_privileged_ops);
>> >         if (IS_ERR(task)) {
>> >                 ret = PTR_ERR(task);
>> >                 goto out;
>> > --
>> > 1.8.0
>> >
>> > --
>> > To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>> > the body of a message to majordomo@vger.kernel.org
>> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
> --
> Trond Myklebust
> Linux NFS client maintainer
>
> NetApp
> Trond.Myklebust@netapp.com
> www.netapp.com
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Bryan Schumaker Nov. 12, 2012, 8:51 p.m. UTC | #5

On 11/12/2012 03:49 PM, Andy Adamson wrote:
> On Mon, Nov 12, 2012 at 3:29 PM, Myklebust, Trond
> <Trond.Myklebust@netapp.com> wrote:
>> On Mon, 2012-11-12 at 15:22 -0500, Andy Adamson wrote:
>>> On Mon, Nov 12, 2012 at 1:35 PM,  <bjschuma@netapp.com> wrote:
>>>> From: Bryan Schumaker <bjschuma@netapp.com>
>>>>
>>>> During recovery the NFS4_SESSION_DRAINING flag might be set on the
>>>> client structure.  This can cause lease renewal to abort when
>>
>> Not lease renewal. It is lease verification (i.e. checking that we have
>> a valid lease and session) that will deadlock.
>>
>>>> nfs41_setup_sequence sees that we are doing recovery.  As a result, the
>>>> client never recovers and all activity with the NFS server halts.
>>>
>>>
>>> When does this happen? Say the session is draining, and an RPC returns
>>> from one of the compounds such as nfs_open, nfs_locku etc whose
>>> rpc_call_done routine calls renew_lease after freeing it's slot.  Like
>>> all calls on a draining session, the renew_lease sleeps on the session
>>> slot_tbl_waitq - is this what you mean by "causes lease renewal to
>>> abort"? How does this cause the client to never recover?
>>>
>>> The only other call to renew_lease is from the state manager itself
>>> which runs one function at a time, and will not call renew_lease until
>>> the recovery is over....
>>>
>>> What am I missing....?
>>
>> nfs4_check_lease() is run exclusively by the state manager thread in
>> order to check that the clientid (and session id on NFSv4.1) are valid.
>> It will deadlock the state manager thread if NFS4_SESSION_DRAINING is
>> already set.
> 
> OK. NFS4_SESSION_DRAINING is also set exclusively by the state manager
> thread. Why is the state manager told to check the lease when it's
> also draining a session?
> 
> No matter what the answer, please update the patch description to
> better explain the problem being solved.

Yeah, I was just thinking about doing that.  Give me a few minutes...

- Bryan

> 
> -->Andy
> 
>>
>>> -->Andy
>>>
>>>
>>>>
>>>> Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
>>>> ---
>>>>  fs/nfs/nfs4proc.c | 21 +++++++++++++++++----
>>>>  1 file changed, 17 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
>>>> index 5eec442..537181c 100644
>>>> --- a/fs/nfs/nfs4proc.c
>>>> +++ b/fs/nfs/nfs4proc.c
>>>> @@ -6138,13 +6138,26 @@ static void nfs41_sequence_prepare(struct rpc_task *task, void *data)
>>>>         rpc_call_start(task);
>>>>  }
>>>>
>>>> +static void nfs41_sequence_prepare_privileged(struct rpc_task *task, void *data)
>>>> +{
>>>> +       rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
>>>> +       nfs41_sequence_prepare(task, data);
>>>> +}
>>>> +
>>>>  static const struct rpc_call_ops nfs41_sequence_ops = {
>>>>         .rpc_call_done = nfs41_sequence_call_done,
>>>>         .rpc_call_prepare = nfs41_sequence_prepare,
>>>>         .rpc_release = nfs41_sequence_release,
>>>>  };
>>>>
>>>> -static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
>>>> +static const struct rpc_call_ops nfs41_sequence_privileged_ops = {
>>>> +       .rpc_call_done = nfs41_sequence_call_done,
>>>> +       .rpc_call_prepare = nfs41_sequence_prepare_privileged,
>>>> +       .rpc_release = nfs41_sequence_release,
>>>> +};
>>>> +
>>>> +static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred,
>>>> +                                            const struct rpc_call_ops *seq_ops)
>>>>  {
>>>>         struct nfs4_sequence_data *calldata;
>>>>         struct rpc_message msg = {
>>>> @@ -6154,7 +6167,7 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_
>>>>         struct rpc_task_setup task_setup_data = {
>>>>                 .rpc_client = clp->cl_rpcclient,
>>>>                 .rpc_message = &msg,
>>>> -               .callback_ops = &nfs41_sequence_ops,
>>>> +               .callback_ops = seq_ops,
>>>>                 .flags = RPC_TASK_ASYNC | RPC_TASK_SOFT,
>>>>         };
>>>>
>>>> @@ -6181,7 +6194,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr
>>>>
>>>>         if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0)
>>>>                 return 0;
>>>> -       task = _nfs41_proc_sequence(clp, cred);
>>>> +       task = _nfs41_proc_sequence(clp, cred, &nfs41_sequence_ops);
>>>>         if (IS_ERR(task))
>>>>                 ret = PTR_ERR(task);
>>>>         else
>>>> @@ -6195,7 +6208,7 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
>>>>         struct rpc_task *task;
>>>>         int ret;
>>>>
>>>> -       task = _nfs41_proc_sequence(clp, cred);
>>>> +       task = _nfs41_proc_sequence(clp, cred, &nfs41_sequence_privileged_ops);
>>>>         if (IS_ERR(task)) {
>>>>                 ret = PTR_ERR(task);
>>>>                 goto out;
>>>> --
>>>> 1.8.0
>>>>
>>>> --
>>>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>>>> the body of a message to majordomo@vger.kernel.org
>>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
>> --
>> Trond Myklebust
>> Linux NFS client maintainer
>>
>> NetApp
>> Trond.Myklebust@netapp.com
>> www.netapp.com
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Trond Myklebust Nov. 12, 2012, 8:54 p.m. UTC | #6

T24gTW9uLCAyMDEyLTExLTEyIGF0IDE1OjQ5IC0wNTAwLCBBbmR5IEFkYW1zb24gd3JvdGU6DQo+
IE9uIE1vbiwgTm92IDEyLCAyMDEyIGF0IDM6MjkgUE0sIE15a2xlYnVzdCwgVHJvbmQNCj4gPFRy
b25kLk15a2xlYnVzdEBuZXRhcHAuY29tPiB3cm90ZToNCj4gPiBPbiBNb24sIDIwMTItMTEtMTIg
YXQgMTU6MjIgLTA1MDAsIEFuZHkgQWRhbXNvbiB3cm90ZToNCj4gPj4gT24gTW9uLCBOb3YgMTIs
IDIwMTIgYXQgMTozNSBQTSwgIDxianNjaHVtYUBuZXRhcHAuY29tPiB3cm90ZToNCj4gPj4gPiBG
cm9tOiBCcnlhbiBTY2h1bWFrZXIgPGJqc2NodW1hQG5ldGFwcC5jb20+DQo+ID4+ID4NCj4gPj4g
PiBEdXJpbmcgcmVjb3ZlcnkgdGhlIE5GUzRfU0VTU0lPTl9EUkFJTklORyBmbGFnIG1pZ2h0IGJl
IHNldCBvbiB0aGUNCj4gPj4gPiBjbGllbnQgc3RydWN0dXJlLiAgVGhpcyBjYW4gY2F1c2UgbGVh
c2UgcmVuZXdhbCB0byBhYm9ydCB3aGVuDQo+ID4NCj4gPiBOb3QgbGVhc2UgcmVuZXdhbC4gSXQg
aXMgbGVhc2UgdmVyaWZpY2F0aW9uIChpLmUuIGNoZWNraW5nIHRoYXQgd2UgaGF2ZQ0KPiA+IGEg
dmFsaWQgbGVhc2UgYW5kIHNlc3Npb24pIHRoYXQgd2lsbCBkZWFkbG9jay4NCj4gPg0KPiA+PiA+
IG5mczQxX3NldHVwX3NlcXVlbmNlIHNlZXMgdGhhdCB3ZSBhcmUgZG9pbmcgcmVjb3ZlcnkuICBB
cyBhIHJlc3VsdCwgdGhlDQo+ID4+ID4gY2xpZW50IG5ldmVyIHJlY292ZXJzIGFuZCBhbGwgYWN0
aXZpdHkgd2l0aCB0aGUgTkZTIHNlcnZlciBoYWx0cy4NCj4gPj4NCj4gPj4NCj4gPj4gV2hlbiBk
b2VzIHRoaXMgaGFwcGVuPyBTYXkgdGhlIHNlc3Npb24gaXMgZHJhaW5pbmcsIGFuZCBhbiBSUEMg
cmV0dXJucw0KPiA+PiBmcm9tIG9uZSBvZiB0aGUgY29tcG91bmRzIHN1Y2ggYXMgbmZzX29wZW4s
IG5mc19sb2NrdSBldGMgd2hvc2UNCj4gPj4gcnBjX2NhbGxfZG9uZSByb3V0aW5lIGNhbGxzIHJl
bmV3X2xlYXNlIGFmdGVyIGZyZWVpbmcgaXQncyBzbG90LiAgTGlrZQ0KPiA+PiBhbGwgY2FsbHMg
b24gYSBkcmFpbmluZyBzZXNzaW9uLCB0aGUgcmVuZXdfbGVhc2Ugc2xlZXBzIG9uIHRoZSBzZXNz
aW9uDQo+ID4+IHNsb3RfdGJsX3dhaXRxIC0gaXMgdGhpcyB3aGF0IHlvdSBtZWFuIGJ5ICJjYXVz
ZXMgbGVhc2UgcmVuZXdhbCB0bw0KPiA+PiBhYm9ydCI/IEhvdyBkb2VzIHRoaXMgY2F1c2UgdGhl
IGNsaWVudCB0byBuZXZlciByZWNvdmVyPw0KPiA+Pg0KPiA+PiBUaGUgb25seSBvdGhlciBjYWxs
IHRvIHJlbmV3X2xlYXNlIGlzIGZyb20gdGhlIHN0YXRlIG1hbmFnZXIgaXRzZWxmDQo+ID4+IHdo
aWNoIHJ1bnMgb25lIGZ1bmN0aW9uIGF0IGEgdGltZSwgYW5kIHdpbGwgbm90IGNhbGwgcmVuZXdf
bGVhc2UgdW50aWwNCj4gPj4gdGhlIHJlY292ZXJ5IGlzIG92ZXIuLi4uDQo+ID4+DQo+ID4+IFdo
YXQgYW0gSSBtaXNzaW5nLi4uLj8NCj4gPg0KPiA+IG5mczRfY2hlY2tfbGVhc2UoKSBpcyBydW4g
ZXhjbHVzaXZlbHkgYnkgdGhlIHN0YXRlIG1hbmFnZXIgdGhyZWFkIGluDQo+ID4gb3JkZXIgdG8g
Y2hlY2sgdGhhdCB0aGUgY2xpZW50aWQgKGFuZCBzZXNzaW9uIGlkIG9uIE5GU3Y0LjEpIGFyZSB2
YWxpZC4NCj4gPiBJdCB3aWxsIGRlYWRsb2NrIHRoZSBzdGF0ZSBtYW5hZ2VyIHRocmVhZCBpZiBO
RlM0X1NFU1NJT05fRFJBSU5JTkcgaXMNCj4gPiBhbHJlYWR5IHNldC4NCj4gDQo+IE9LLiBORlM0
X1NFU1NJT05fRFJBSU5JTkcgaXMgYWxzbyBzZXQgZXhjbHVzaXZlbHkgYnkgdGhlIHN0YXRlIG1h
bmFnZXINCj4gdGhyZWFkLiBXaHkgaXMgdGhlIHN0YXRlIG1hbmFnZXIgdG9sZCB0byBjaGVjayB0
aGUgbGVhc2Ugd2hlbiBpdCdzDQo+IGFsc28gZHJhaW5pbmcgYSBzZXNzaW9uPw0KDQpORlM0X1NF
U1NJT05fRFJBSU5JTkcgZG9lcyBub3QganVzdCBtZWFuIHRoYXQgdGhlIHNlc3Npb24gaXMgYmVp
bmcNCmRyYWluZWQ7IGl0IHJlbWFpbnMgc2V0IHVudGlsIG9wZW4gYW5kIGxvY2sgc3RhdGUgcmVj
b3ZlcnkgaXMgY29tcGxldGVkLg0KDQpJT1c6IGlmIHNvbWV0aGluZyBoYXBwZW5zIGR1cmluZyBz
dGF0ZSByZWNvdmVyeSB0aGF0IHJlcXVpcmVzIHVzIHRvDQpjaGVjayB0aGUgbGVhc2UgKGUuZy4g
c29tZXRoaW5nIHJldHVybnMgTkZTNEVSUl9FWFBJUkVEKSB0aGVuIHRoZQ0KY3VycmVudCBjb2Rl
IHdpbGwgZGVhZGxvY2suDQoNCj4gTm8gbWF0dGVyIHdoYXQgdGhlIGFuc3dlciwgcGxlYXNlIHVw
ZGF0ZSB0aGUgcGF0Y2ggZGVzY3JpcHRpb24gdG8NCj4gYmV0dGVyIGV4cGxhaW4gdGhlIHByb2Js
ZW0gYmVpbmcgc29sdmVkLg0KDQpBQ0suIEkgYWdyZWUgdGhhdCB0aGUgY2hhbmdlbG9nIGVudHJ5
IGNhbiBiZSBpbXByb3ZlZC4NCg0KLS0gDQpUcm9uZCBNeWtsZWJ1c3QNCkxpbnV4IE5GUyBjbGll
bnQgbWFpbnRhaW5lcg0KDQpOZXRBcHANClRyb25kLk15a2xlYnVzdEBuZXRhcHAuY29tDQp3d3cu
bmV0YXBwLmNvbQ0K
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Bryan Schumaker Nov. 12, 2012, 9:02 p.m. UTC | #7

On 11/12/2012 03:54 PM, Myklebust, Trond wrote:
> On Mon, 2012-11-12 at 15:49 -0500, Andy Adamson wrote:
>> On Mon, Nov 12, 2012 at 3:29 PM, Myklebust, Trond
>> <Trond.Myklebust@netapp.com> wrote:
>>> On Mon, 2012-11-12 at 15:22 -0500, Andy Adamson wrote:
>>>> On Mon, Nov 12, 2012 at 1:35 PM,  <bjschuma@netapp.com> wrote:
>>>>> From: Bryan Schumaker <bjschuma@netapp.com>
>>>>>
>>>>> During recovery the NFS4_SESSION_DRAINING flag might be set on the
>>>>> client structure.  This can cause lease renewal to abort when
>>>
>>> Not lease renewal. It is lease verification (i.e. checking that we have
>>> a valid lease and session) that will deadlock.
>>>
>>>>> nfs41_setup_sequence sees that we are doing recovery.  As a result, the
>>>>> client never recovers and all activity with the NFS server halts.
>>>>
>>>>
>>>> When does this happen? Say the session is draining, and an RPC returns
>>>> from one of the compounds such as nfs_open, nfs_locku etc whose
>>>> rpc_call_done routine calls renew_lease after freeing it's slot.  Like
>>>> all calls on a draining session, the renew_lease sleeps on the session
>>>> slot_tbl_waitq - is this what you mean by "causes lease renewal to
>>>> abort"? How does this cause the client to never recover?
>>>>
>>>> The only other call to renew_lease is from the state manager itself
>>>> which runs one function at a time, and will not call renew_lease until
>>>> the recovery is over....
>>>>
>>>> What am I missing....?
>>>
>>> nfs4_check_lease() is run exclusively by the state manager thread in
>>> order to check that the clientid (and session id on NFSv4.1) are valid.
>>> It will deadlock the state manager thread if NFS4_SESSION_DRAINING is
>>> already set.
>>
>> OK. NFS4_SESSION_DRAINING is also set exclusively by the state manager
>> thread. Why is the state manager told to check the lease when it's
>> also draining a session?
> 
> NFS4_SESSION_DRAINING does not just mean that the session is being
> drained; it remains set until open and lock state recovery is completed.
> 
> IOW: if something happens during state recovery that requires us to
> check the lease (e.g. something returns NFS4ERR_EXPIRED) then the
> current code will deadlock.
> 
>> No matter what the answer, please update the patch description to
>> better explain the problem being solved.
> 
> ACK. I agree that the changelog entry can be improved.
> 

Does this read any better (and should I resend the patch)?

NFS: Add sequence_priviliged_ops for nfs4_proc_sequence()

If I mount an NFS v4.1 server to a single client multiple times and then
run xfstests over each mountpoint I usually get the client into a state
where recovery deadlocks.  The server informs the client of a
cb_path_down sequence error, the client then does a
bind_connection_to_session and checks the status of the lease.

I found that bind_connection_to_session sets the NFS4_SESSION_DRAINING
flag on the client, but this flag is never unset before
nfs4_check_lease() reaches nfs4_proc_sequence().  This causes the client
to deadlock, halting all NFS activity to the server.


--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

William A. (Andy) Adamson Nov. 12, 2012, 9:08 p.m. UTC | #8

On Mon, Nov 12, 2012 at 3:51 PM, Bryan Schumaker <bjschuma@netapp.com> wrote:
> On 11/12/2012 03:49 PM, Andy Adamson wrote:
>> On Mon, Nov 12, 2012 at 3:29 PM, Myklebust, Trond
>> <Trond.Myklebust@netapp.com> wrote:
>>> On Mon, 2012-11-12 at 15:22 -0500, Andy Adamson wrote:
>>>> On Mon, Nov 12, 2012 at 1:35 PM,  <bjschuma@netapp.com> wrote:
>>>>> From: Bryan Schumaker <bjschuma@netapp.com>
>>>>>
>>>>> During recovery the NFS4_SESSION_DRAINING flag might be set on the
>>>>> client structure.  This can cause lease renewal to abort when
>>>
>>> Not lease renewal. It is lease verification (i.e. checking that we have
>>> a valid lease and session) that will deadlock.
>>>
>>>>> nfs41_setup_sequence sees that we are doing recovery.  As a result, the
>>>>> client never recovers and all activity with the NFS server halts.
>>>>
>>>>
>>>> When does this happen? Say the session is draining, and an RPC returns
>>>> from one of the compounds such as nfs_open, nfs_locku etc whose
>>>> rpc_call_done routine calls renew_lease after freeing it's slot.  Like
>>>> all calls on a draining session, the renew_lease sleeps on the session
>>>> slot_tbl_waitq - is this what you mean by "causes lease renewal to
>>>> abort"? How does this cause the client to never recover?
>>>>
>>>> The only other call to renew_lease is from the state manager itself
>>>> which runs one function at a time, and will not call renew_lease until
>>>> the recovery is over....
>>>>
>>>> What am I missing....?
>>>
>>> nfs4_check_lease() is run exclusively by the state manager thread in
>>> order to check that the clientid (and session id on NFSv4.1) are valid.
>>> It will deadlock the state manager thread if NFS4_SESSION_DRAINING is
>>> already set.
>>
>> OK. NFS4_SESSION_DRAINING is also set exclusively by the state manager
>> thread. Why is the state manager told to check the lease when it's
>> also draining a session?

Here is the call in the state manager.

                if (test_and_clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION,
                                &clp->cl_state)) {
                        section = "bind conn to session";
                        status = nfs4_bind_conn_to_session(clp);
                        if (status < 0)
                                goto out_error;
                        continue;
                }

nfs4_bind_conn_to_session calls nfs4_begin_drain_session. The error
case is fine - nfs4_end_drain_session is called in out_error.

But the continue when nfs4_bind_conn_to_session succeeds can send the
state manager to process other flags (such as NFS4CLNT_CHECK_LEASE)
without a call to nfs4_end_drain_session.

That looks like a bug to me. The same can occur with
NFS4CLNT_RECALL_SLOT. Why not just call nfs4_end_drain_session prior
to the continue, or perhaps remove the continue and hit the
nfs4_end_drain_session call further down in the state manager loop?

-->Andy

>>
>> No matter what the answer, please update the patch description to
>> better explain the problem being solved.
>
> Yeah, I was just thinking about doing that.  Give me a few minutes...


>
> - Bryan
>
>>
>> -->Andy
>>
>>>
>>>> -->Andy
>>>>
>>>>
>>>>>
>>>>> Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
>>>>> ---
>>>>>  fs/nfs/nfs4proc.c | 21 +++++++++++++++++----
>>>>>  1 file changed, 17 insertions(+), 4 deletions(-)
>>>>>
>>>>> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
>>>>> index 5eec442..537181c 100644
>>>>> --- a/fs/nfs/nfs4proc.c
>>>>> +++ b/fs/nfs/nfs4proc.c
>>>>> @@ -6138,13 +6138,26 @@ static void nfs41_sequence_prepare(struct rpc_task *task, void *data)
>>>>>         rpc_call_start(task);
>>>>>  }
>>>>>
>>>>> +static void nfs41_sequence_prepare_privileged(struct rpc_task *task, void *data)
>>>>> +{
>>>>> +       rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
>>>>> +       nfs41_sequence_prepare(task, data);
>>>>> +}
>>>>> +
>>>>>  static const struct rpc_call_ops nfs41_sequence_ops = {
>>>>>         .rpc_call_done = nfs41_sequence_call_done,
>>>>>         .rpc_call_prepare = nfs41_sequence_prepare,
>>>>>         .rpc_release = nfs41_sequence_release,
>>>>>  };
>>>>>
>>>>> -static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
>>>>> +static const struct rpc_call_ops nfs41_sequence_privileged_ops = {
>>>>> +       .rpc_call_done = nfs41_sequence_call_done,
>>>>> +       .rpc_call_prepare = nfs41_sequence_prepare_privileged,
>>>>> +       .rpc_release = nfs41_sequence_release,
>>>>> +};
>>>>> +
>>>>> +static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred,
>>>>> +                                            const struct rpc_call_ops *seq_ops)
>>>>>  {
>>>>>         struct nfs4_sequence_data *calldata;
>>>>>         struct rpc_message msg = {
>>>>> @@ -6154,7 +6167,7 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_
>>>>>         struct rpc_task_setup task_setup_data = {
>>>>>                 .rpc_client = clp->cl_rpcclient,
>>>>>                 .rpc_message = &msg,
>>>>> -               .callback_ops = &nfs41_sequence_ops,
>>>>> +               .callback_ops = seq_ops,
>>>>>                 .flags = RPC_TASK_ASYNC | RPC_TASK_SOFT,
>>>>>         };
>>>>>
>>>>> @@ -6181,7 +6194,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr
>>>>>
>>>>>         if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0)
>>>>>                 return 0;
>>>>> -       task = _nfs41_proc_sequence(clp, cred);
>>>>> +       task = _nfs41_proc_sequence(clp, cred, &nfs41_sequence_ops);
>>>>>         if (IS_ERR(task))
>>>>>                 ret = PTR_ERR(task);
>>>>>         else
>>>>> @@ -6195,7 +6208,7 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
>>>>>         struct rpc_task *task;
>>>>>         int ret;
>>>>>
>>>>> -       task = _nfs41_proc_sequence(clp, cred);
>>>>> +       task = _nfs41_proc_sequence(clp, cred, &nfs41_sequence_privileged_ops);
>>>>>         if (IS_ERR(task)) {
>>>>>                 ret = PTR_ERR(task);
>>>>>                 goto out;
>>>>> --
>>>>> 1.8.0
>>>>>
>>>>> --
>>>>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>>>>> the body of a message to majordomo@vger.kernel.org
>>>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>>
>>> --
>>> Trond Myklebust
>>> Linux NFS client maintainer
>>>
>>> NetApp
>>> Trond.Myklebust@netapp.com
>>> www.netapp.com
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Trond Myklebust Nov. 12, 2012, 9:10 p.m. UTC | #9

On Mon, 2012-11-12 at 16:02 -0500, Bryan Schumaker wrote:
> On 11/12/2012 03:54 PM, Myklebust, Trond wrote:

> > On Mon, 2012-11-12 at 15:49 -0500, Andy Adamson wrote:

> >> On Mon, Nov 12, 2012 at 3:29 PM, Myklebust, Trond

> >> <Trond.Myklebust@netapp.com> wrote:

> >>> On Mon, 2012-11-12 at 15:22 -0500, Andy Adamson wrote:

> >>>> On Mon, Nov 12, 2012 at 1:35 PM,  <bjschuma@netapp.com> wrote:

> >>>>> From: Bryan Schumaker <bjschuma@netapp.com>

> >>>>>

> >>>>> During recovery the NFS4_SESSION_DRAINING flag might be set on the

> >>>>> client structure.  This can cause lease renewal to abort when

> >>>

> >>> Not lease renewal. It is lease verification (i.e. checking that we have

> >>> a valid lease and session) that will deadlock.

> >>>

> >>>>> nfs41_setup_sequence sees that we are doing recovery.  As a result, the

> >>>>> client never recovers and all activity with the NFS server halts.

> >>>>

> >>>>

> >>>> When does this happen? Say the session is draining, and an RPC returns

> >>>> from one of the compounds such as nfs_open, nfs_locku etc whose

> >>>> rpc_call_done routine calls renew_lease after freeing it's slot.  Like

> >>>> all calls on a draining session, the renew_lease sleeps on the session

> >>>> slot_tbl_waitq - is this what you mean by "causes lease renewal to

> >>>> abort"? How does this cause the client to never recover?

> >>>>

> >>>> The only other call to renew_lease is from the state manager itself

> >>>> which runs one function at a time, and will not call renew_lease until

> >>>> the recovery is over....

> >>>>

> >>>> What am I missing....?

> >>>

> >>> nfs4_check_lease() is run exclusively by the state manager thread in

> >>> order to check that the clientid (and session id on NFSv4.1) are valid.

> >>> It will deadlock the state manager thread if NFS4_SESSION_DRAINING is

> >>> already set.

> >>

> >> OK. NFS4_SESSION_DRAINING is also set exclusively by the state manager

> >> thread. Why is the state manager told to check the lease when it's

> >> also draining a session?

> > 

> > NFS4_SESSION_DRAINING does not just mean that the session is being

> > drained; it remains set until open and lock state recovery is completed.

> > 

> > IOW: if something happens during state recovery that requires us to

> > check the lease (e.g. something returns NFS4ERR_EXPIRED) then the

> > current code will deadlock.

> > 

> >> No matter what the answer, please update the patch description to

> >> better explain the problem being solved.

> > 

> > ACK. I agree that the changelog entry can be improved.

> > 

> 

> Does this read any better (and should I resend the patch)?

> 

> NFS: Add sequence_priviliged_ops for nfs4_proc_sequence()

> 

> If I mount an NFS v4.1 server to a single client multiple times and then

> run xfstests over each mountpoint I usually get the client into a state

> where recovery deadlocks.  The server informs the client of a

> cb_path_down sequence error, the client then does a

> bind_connection_to_session and checks the status of the lease.


Why are you getting the cb_path_down? Is that a result of a fault
injection, or is it a genuine error?

While cb_path_down is a valid error, and we do want the recovery to work
correctly, I would expect it to be rare since it implies that we lost
the TCP connection to the server for some reason. Finding out why it
happens is therefore worth doing.

> I found that bind_connection_to_session sets the NFS4_SESSION_DRAINING

> flag on the client, but this flag is never unset before

> nfs4_check_lease() reaches nfs4_proc_sequence().  This causes the client

> to deadlock, halting all NFS activity to the server.


Otherwise, the text is more or less OK. The one thing that I'm missing
is a statement about why nfs4_proc_sequence() needs to be a privileged
operation.

-- 
Trond Myklebust
Linux NFS client maintainer

NetApp
Trond.Myklebust@netapp.com
www.netapp.com

Trond Myklebust Nov. 12, 2012, 9:24 p.m. UTC | #10

On Mon, 2012-11-12 at 16:08 -0500, Andy Adamson wrote:
> On Mon, Nov 12, 2012 at 3:51 PM, Bryan Schumaker <bjschuma@netapp.com> wrote:

> > On 11/12/2012 03:49 PM, Andy Adamson wrote:

> >> On Mon, Nov 12, 2012 at 3:29 PM, Myklebust, Trond

> >> <Trond.Myklebust@netapp.com> wrote:

> >>> On Mon, 2012-11-12 at 15:22 -0500, Andy Adamson wrote:

> >>>> On Mon, Nov 12, 2012 at 1:35 PM,  <bjschuma@netapp.com> wrote:

> >>>>> From: Bryan Schumaker <bjschuma@netapp.com>

> >>>>>

> >>>>> During recovery the NFS4_SESSION_DRAINING flag might be set on the

> >>>>> client structure.  This can cause lease renewal to abort when

> >>>

> >>> Not lease renewal. It is lease verification (i.e. checking that we have

> >>> a valid lease and session) that will deadlock.

> >>>

> >>>>> nfs41_setup_sequence sees that we are doing recovery.  As a result, the

> >>>>> client never recovers and all activity with the NFS server halts.

> >>>>

> >>>>

> >>>> When does this happen? Say the session is draining, and an RPC returns

> >>>> from one of the compounds such as nfs_open, nfs_locku etc whose

> >>>> rpc_call_done routine calls renew_lease after freeing it's slot.  Like

> >>>> all calls on a draining session, the renew_lease sleeps on the session

> >>>> slot_tbl_waitq - is this what you mean by "causes lease renewal to

> >>>> abort"? How does this cause the client to never recover?

> >>>>

> >>>> The only other call to renew_lease is from the state manager itself

> >>>> which runs one function at a time, and will not call renew_lease until

> >>>> the recovery is over....

> >>>>

> >>>> What am I missing....?

> >>>

> >>> nfs4_check_lease() is run exclusively by the state manager thread in

> >>> order to check that the clientid (and session id on NFSv4.1) are valid.

> >>> It will deadlock the state manager thread if NFS4_SESSION_DRAINING is

> >>> already set.

> >>

> >> OK. NFS4_SESSION_DRAINING is also set exclusively by the state manager

> >> thread. Why is the state manager told to check the lease when it's

> >> also draining a session?

> 

> Here is the call in the state manager.

> 

>                 if (test_and_clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION,

>                                 &clp->cl_state)) {

>                         section = "bind conn to session";

>                         status = nfs4_bind_conn_to_session(clp);

>                         if (status < 0)

>                                 goto out_error;

>                         continue;

>                 }

> 

> nfs4_bind_conn_to_session calls nfs4_begin_drain_session. The error

> case is fine - nfs4_end_drain_session is called in out_error.

> 

> But the continue when nfs4_bind_conn_to_session succeeds can send the

> state manager to process other flags (such as NFS4CLNT_CHECK_LEASE)

> without a call to nfs4_end_drain_session.

> 

> That looks like a bug to me. The same can occur with

> NFS4CLNT_RECALL_SLOT.


So what?

>  Why not just call nfs4_end_drain_session prior

> to the continue, or perhaps remove the continue and hit the

> nfs4_end_drain_session call further down in the state manager loop?


Calling nfs4_end_drain_session in order to allow unprivileged operations
to proceed, when we're not even sure that we still have a lease, makes
no sense. The whole point here is to block those operations until the
state manager thread has recovered the lease, session, open stateids and
lock stateids.

-- 
Trond Myklebust
Linux NFS client maintainer

NetApp
Trond.Myklebust@netapp.com
www.netapp.com

Bryan Schumaker Nov. 12, 2012, 9:26 p.m. UTC | #11

On 11/12/2012 04:10 PM, Myklebust, Trond wrote:
> On Mon, 2012-11-12 at 16:02 -0500, Bryan Schumaker wrote:
>> On 11/12/2012 03:54 PM, Myklebust, Trond wrote:
>>> On Mon, 2012-11-12 at 15:49 -0500, Andy Adamson wrote:
>>>> On Mon, Nov 12, 2012 at 3:29 PM, Myklebust, Trond
>>>> <Trond.Myklebust@netapp.com> wrote:
>>>>> On Mon, 2012-11-12 at 15:22 -0500, Andy Adamson wrote:
>>>>>> On Mon, Nov 12, 2012 at 1:35 PM,  <bjschuma@netapp.com> wrote:
>>>>>>> From: Bryan Schumaker <bjschuma@netapp.com>
>>>>>>>
>>>>>>> During recovery the NFS4_SESSION_DRAINING flag might be set on the
>>>>>>> client structure.  This can cause lease renewal to abort when
>>>>>
>>>>> Not lease renewal. It is lease verification (i.e. checking that we have
>>>>> a valid lease and session) that will deadlock.
>>>>>
>>>>>>> nfs41_setup_sequence sees that we are doing recovery.  As a result, the
>>>>>>> client never recovers and all activity with the NFS server halts.
>>>>>>
>>>>>>
>>>>>> When does this happen? Say the session is draining, and an RPC returns
>>>>>> from one of the compounds such as nfs_open, nfs_locku etc whose
>>>>>> rpc_call_done routine calls renew_lease after freeing it's slot.  Like
>>>>>> all calls on a draining session, the renew_lease sleeps on the session
>>>>>> slot_tbl_waitq - is this what you mean by "causes lease renewal to
>>>>>> abort"? How does this cause the client to never recover?
>>>>>>
>>>>>> The only other call to renew_lease is from the state manager itself
>>>>>> which runs one function at a time, and will not call renew_lease until
>>>>>> the recovery is over....
>>>>>>
>>>>>> What am I missing....?
>>>>>
>>>>> nfs4_check_lease() is run exclusively by the state manager thread in
>>>>> order to check that the clientid (and session id on NFSv4.1) are valid.
>>>>> It will deadlock the state manager thread if NFS4_SESSION_DRAINING is
>>>>> already set.
>>>>
>>>> OK. NFS4_SESSION_DRAINING is also set exclusively by the state manager
>>>> thread. Why is the state manager told to check the lease when it's
>>>> also draining a session?
>>>
>>> NFS4_SESSION_DRAINING does not just mean that the session is being
>>> drained; it remains set until open and lock state recovery is completed.
>>>
>>> IOW: if something happens during state recovery that requires us to
>>> check the lease (e.g. something returns NFS4ERR_EXPIRED) then the
>>> current code will deadlock.
>>>
>>>> No matter what the answer, please update the patch description to
>>>> better explain the problem being solved.
>>>
>>> ACK. I agree that the changelog entry can be improved.
>>>
>>
>> Does this read any better (and should I resend the patch)?
>>
>> NFS: Add sequence_priviliged_ops for nfs4_proc_sequence()
>>
>> If I mount an NFS v4.1 server to a single client multiple times and then
>> run xfstests over each mountpoint I usually get the client into a state
>> where recovery deadlocks.  The server informs the client of a
>> cb_path_down sequence error, the client then does a
>> bind_connection_to_session and checks the status of the lease.
> 
> Why are you getting the cb_path_down? Is that a result of a fault
> injection, or is it a genuine error?
> 
> While cb_path_down is a valid error, and we do want the recovery to work
> correctly, I would expect it to be rare since it implies that we lost
> the TCP connection to the server for some reason. Finding out why it
> happens is therefore worth doing.

I didn't get it with fault injection, it just happened by itself somewhere during testing.  My attempts at capturing it with wireshark usually slow down my system as wireshark tries to display 1,000,000+ packets... I'll try again, though.

> 
>> I found that bind_connection_to_session sets the NFS4_SESSION_DRAINING
>> flag on the client, but this flag is never unset before
>> nfs4_check_lease() reaches nfs4_proc_sequence().  This causes the client
>> to deadlock, halting all NFS activity to the server.
> 
> Otherwise, the text is more or less OK. The one thing that I'm missing
> is a statement about why nfs4_proc_sequence() needs to be a privileged
> operation.
> 

Here is the new last paragraph, I just added the sentence at the end:

I found that bind_connection_to_session sets the NFS4_SESSION_DRAINING
flag on the client, but this flag is never unset before
nfs4_check_lease() reaches nfs4_proc_sequence().  This causes the client
to deadlock, halting all NFS activity to the server.  This patch changes
nfs4_proc_sequence() to run in privileged mode to bypass the
NFS4_SESSION_DRAINING check and continue with recovery.


--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Trond Myklebust Nov. 12, 2012, 9:37 p.m. UTC | #12

On Mon, 2012-11-12 at 16:26 -0500, Bryan Schumaker wrote:
> On 11/12/2012 04:10 PM, Myklebust, Trond wrote:

> > On Mon, 2012-11-12 at 16:02 -0500, Bryan Schumaker wrote:

> >> On 11/12/2012 03:54 PM, Myklebust, Trond wrote:

> >>> On Mon, 2012-11-12 at 15:49 -0500, Andy Adamson wrote:

> >>>> On Mon, Nov 12, 2012 at 3:29 PM, Myklebust, Trond

> >>>> <Trond.Myklebust@netapp.com> wrote:

> >>>>> On Mon, 2012-11-12 at 15:22 -0500, Andy Adamson wrote:

> >>>>>> On Mon, Nov 12, 2012 at 1:35 PM,  <bjschuma@netapp.com> wrote:

> >>>>>>> From: Bryan Schumaker <bjschuma@netapp.com>

> >>>>>>>

> >>>>>>> During recovery the NFS4_SESSION_DRAINING flag might be set on the

> >>>>>>> client structure.  This can cause lease renewal to abort when

> >>>>>

> >>>>> Not lease renewal. It is lease verification (i.e. checking that we have

> >>>>> a valid lease and session) that will deadlock.

> >>>>>

> >>>>>>> nfs41_setup_sequence sees that we are doing recovery.  As a result, the

> >>>>>>> client never recovers and all activity with the NFS server halts.

> >>>>>>

> >>>>>>

> >>>>>> When does this happen? Say the session is draining, and an RPC returns

> >>>>>> from one of the compounds such as nfs_open, nfs_locku etc whose

> >>>>>> rpc_call_done routine calls renew_lease after freeing it's slot.  Like

> >>>>>> all calls on a draining session, the renew_lease sleeps on the session

> >>>>>> slot_tbl_waitq - is this what you mean by "causes lease renewal to

> >>>>>> abort"? How does this cause the client to never recover?

> >>>>>>

> >>>>>> The only other call to renew_lease is from the state manager itself

> >>>>>> which runs one function at a time, and will not call renew_lease until

> >>>>>> the recovery is over....

> >>>>>>

> >>>>>> What am I missing....?

> >>>>>

> >>>>> nfs4_check_lease() is run exclusively by the state manager thread in

> >>>>> order to check that the clientid (and session id on NFSv4.1) are valid.

> >>>>> It will deadlock the state manager thread if NFS4_SESSION_DRAINING is

> >>>>> already set.

> >>>>

> >>>> OK. NFS4_SESSION_DRAINING is also set exclusively by the state manager

> >>>> thread. Why is the state manager told to check the lease when it's

> >>>> also draining a session?

> >>>

> >>> NFS4_SESSION_DRAINING does not just mean that the session is being

> >>> drained; it remains set until open and lock state recovery is completed.

> >>>

> >>> IOW: if something happens during state recovery that requires us to

> >>> check the lease (e.g. something returns NFS4ERR_EXPIRED) then the

> >>> current code will deadlock.

> >>>

> >>>> No matter what the answer, please update the patch description to

> >>>> better explain the problem being solved.

> >>>

> >>> ACK. I agree that the changelog entry can be improved.

> >>>

> >>

> >> Does this read any better (and should I resend the patch)?

> >>

> >> NFS: Add sequence_priviliged_ops for nfs4_proc_sequence()

> >>

> >> If I mount an NFS v4.1 server to a single client multiple times and then

> >> run xfstests over each mountpoint I usually get the client into a state

> >> where recovery deadlocks.  The server informs the client of a

> >> cb_path_down sequence error, the client then does a

> >> bind_connection_to_session and checks the status of the lease.

> > 

> > Why are you getting the cb_path_down? Is that a result of a fault

> > injection, or is it a genuine error?

> > 

> > While cb_path_down is a valid error, and we do want the recovery to work

> > correctly, I would expect it to be rare since it implies that we lost

> > the TCP connection to the server for some reason. Finding out why it

> > happens is therefore worth doing.

> 

> I didn't get it with fault injection, it just happened by itself somewhere during testing.  My attempts at capturing it with wireshark usually slow down my system as wireshark tries to display 1,000,000+ packets... I'll try again, though.


See if you can just turn on the two dprintks() in xs_tcp_state_change(),
and then add a printk() trigger to the
nfs41_handle_sequence_flag_errors() to see if you can catch a
correlation between a TCP state change and the path_down issue.

> > 

> >> I found that bind_connection_to_session sets the NFS4_SESSION_DRAINING

> >> flag on the client, but this flag is never unset before

> >> nfs4_check_lease() reaches nfs4_proc_sequence().  This causes the client

> >> to deadlock, halting all NFS activity to the server.

> > 

> > Otherwise, the text is more or less OK. The one thing that I'm missing

> > is a statement about why nfs4_proc_sequence() needs to be a privileged

> > operation.

> > 

> 

> Here is the new last paragraph, I just added the sentence at the end:

> 

> I found that bind_connection_to_session sets the NFS4_SESSION_DRAINING

> flag on the client, but this flag is never unset before

> nfs4_check_lease() reaches nfs4_proc_sequence().  This causes the client

> to deadlock, halting all NFS activity to the server.  This patch changes

> nfs4_proc_sequence() to run in privileged mode to bypass the

> NFS4_SESSION_DRAINING check and continue with recovery.


Thanks. You might want to add a note to the fact that
nfs4_proc_sequence() is always run from the state recovery thread, and
therefore it is correct to run it as a privileged operation.

-- 
Trond Myklebust
Linux NFS client maintainer

NetApp
Trond.Myklebust@netapp.com
www.netapp.com

Bryan Schumaker Nov. 12, 2012, 9:41 p.m. UTC | #13

On 11/12/2012 04:37 PM, Myklebust, Trond wrote:
> On Mon, 2012-11-12 at 16:26 -0500, Bryan Schumaker wrote:
>> On 11/12/2012 04:10 PM, Myklebust, Trond wrote:
>>> On Mon, 2012-11-12 at 16:02 -0500, Bryan Schumaker wrote:
>>>> On 11/12/2012 03:54 PM, Myklebust, Trond wrote:
>>>>> On Mon, 2012-11-12 at 15:49 -0500, Andy Adamson wrote:
>>>>>> On Mon, Nov 12, 2012 at 3:29 PM, Myklebust, Trond
>>>>>> <Trond.Myklebust@netapp.com> wrote:
>>>>>>> On Mon, 2012-11-12 at 15:22 -0500, Andy Adamson wrote:
>>>>>>>> On Mon, Nov 12, 2012 at 1:35 PM,  <bjschuma@netapp.com> wrote:
>>>>>>>>> From: Bryan Schumaker <bjschuma@netapp.com>
>>>>>>>>>
>>>>>>>>> During recovery the NFS4_SESSION_DRAINING flag might be set on the
>>>>>>>>> client structure.  This can cause lease renewal to abort when
>>>>>>>
>>>>>>> Not lease renewal. It is lease verification (i.e. checking that we have
>>>>>>> a valid lease and session) that will deadlock.
>>>>>>>
>>>>>>>>> nfs41_setup_sequence sees that we are doing recovery.  As a result, the
>>>>>>>>> client never recovers and all activity with the NFS server halts.
>>>>>>>>
>>>>>>>>
>>>>>>>> When does this happen? Say the session is draining, and an RPC returns
>>>>>>>> from one of the compounds such as nfs_open, nfs_locku etc whose
>>>>>>>> rpc_call_done routine calls renew_lease after freeing it's slot.  Like
>>>>>>>> all calls on a draining session, the renew_lease sleeps on the session
>>>>>>>> slot_tbl_waitq - is this what you mean by "causes lease renewal to
>>>>>>>> abort"? How does this cause the client to never recover?
>>>>>>>>
>>>>>>>> The only other call to renew_lease is from the state manager itself
>>>>>>>> which runs one function at a time, and will not call renew_lease until
>>>>>>>> the recovery is over....
>>>>>>>>
>>>>>>>> What am I missing....?
>>>>>>>
>>>>>>> nfs4_check_lease() is run exclusively by the state manager thread in
>>>>>>> order to check that the clientid (and session id on NFSv4.1) are valid.
>>>>>>> It will deadlock the state manager thread if NFS4_SESSION_DRAINING is
>>>>>>> already set.
>>>>>>
>>>>>> OK. NFS4_SESSION_DRAINING is also set exclusively by the state manager
>>>>>> thread. Why is the state manager told to check the lease when it's
>>>>>> also draining a session?
>>>>>
>>>>> NFS4_SESSION_DRAINING does not just mean that the session is being
>>>>> drained; it remains set until open and lock state recovery is completed.
>>>>>
>>>>> IOW: if something happens during state recovery that requires us to
>>>>> check the lease (e.g. something returns NFS4ERR_EXPIRED) then the
>>>>> current code will deadlock.
>>>>>
>>>>>> No matter what the answer, please update the patch description to
>>>>>> better explain the problem being solved.
>>>>>
>>>>> ACK. I agree that the changelog entry can be improved.
>>>>>
>>>>
>>>> Does this read any better (and should I resend the patch)?
>>>>
>>>> NFS: Add sequence_priviliged_ops for nfs4_proc_sequence()
>>>>
>>>> If I mount an NFS v4.1 server to a single client multiple times and then
>>>> run xfstests over each mountpoint I usually get the client into a state
>>>> where recovery deadlocks.  The server informs the client of a
>>>> cb_path_down sequence error, the client then does a
>>>> bind_connection_to_session and checks the status of the lease.
>>>
>>> Why are you getting the cb_path_down? Is that a result of a fault
>>> injection, or is it a genuine error?
>>>
>>> While cb_path_down is a valid error, and we do want the recovery to work
>>> correctly, I would expect it to be rare since it implies that we lost
>>> the TCP connection to the server for some reason. Finding out why it
>>> happens is therefore worth doing.
>>
>> I didn't get it with fault injection, it just happened by itself somewhere during testing.  My attempts at capturing it with wireshark usually slow down my system as wireshark tries to display 1,000,000+ packets... I'll try again, though.
> 
> See if you can just turn on the two dprintks() in xs_tcp_state_change(),
> and then add a printk() trigger to the
> nfs41_handle_sequence_flag_errors() to see if you can catch a
> correlation between a TCP state change and the path_down issue.

Sounds simple enough.  I'll do that now...

> 
>>>
>>>> I found that bind_connection_to_session sets the NFS4_SESSION_DRAINING
>>>> flag on the client, but this flag is never unset before
>>>> nfs4_check_lease() reaches nfs4_proc_sequence().  This causes the client
>>>> to deadlock, halting all NFS activity to the server.
>>>
>>> Otherwise, the text is more or less OK. The one thing that I'm missing
>>> is a statement about why nfs4_proc_sequence() needs to be a privileged
>>> operation.
>>>
>>
>> Here is the new last paragraph, I just added the sentence at the end:
>>
>> I found that bind_connection_to_session sets the NFS4_SESSION_DRAINING
>> flag on the client, but this flag is never unset before
>> nfs4_check_lease() reaches nfs4_proc_sequence().  This causes the client
>> to deadlock, halting all NFS activity to the server.  This patch changes
>> nfs4_proc_sequence() to run in privileged mode to bypass the
>> NFS4_SESSION_DRAINING check and continue with recovery.
> 
> Thanks. You might want to add a note to the fact that
> nfs4_proc_sequence() is always run from the state recovery thread, and
> therefore it is correct to run it as a privileged operation.
> 

Ok.  Do you want a new patch?  Just the commit text?

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Trond Myklebust Nov. 12, 2012, 9:44 p.m. UTC | #14

On Mon, 2012-11-12 at 16:41 -0500, Bryan Schumaker wrote:
> On 11/12/2012 04:37 PM, Myklebust, Trond wrote:

> > On Mon, 2012-11-12 at 16:26 -0500, Bryan Schumaker wrote:

> >> On 11/12/2012 04:10 PM, Myklebust, Trond wrote:

> >>> On Mon, 2012-11-12 at 16:02 -0500, Bryan Schumaker wrote:

> >>>> On 11/12/2012 03:54 PM, Myklebust, Trond wrote:

> >>>>> On Mon, 2012-11-12 at 15:49 -0500, Andy Adamson wrote:

> >>>>>> On Mon, Nov 12, 2012 at 3:29 PM, Myklebust, Trond

> >>>>>> <Trond.Myklebust@netapp.com> wrote:

> >>>>>>> On Mon, 2012-11-12 at 15:22 -0500, Andy Adamson wrote:

> >>>>>>>> On Mon, Nov 12, 2012 at 1:35 PM,  <bjschuma@netapp.com> wrote:

> >>>>>>>>> From: Bryan Schumaker <bjschuma@netapp.com>

> >>>>>>>>>

> >>>>>>>>> During recovery the NFS4_SESSION_DRAINING flag might be set on the

> >>>>>>>>> client structure.  This can cause lease renewal to abort when

> >>>>>>>

> >>>>>>> Not lease renewal. It is lease verification (i.e. checking that we have

> >>>>>>> a valid lease and session) that will deadlock.

> >>>>>>>

> >>>>>>>>> nfs41_setup_sequence sees that we are doing recovery.  As a result, the

> >>>>>>>>> client never recovers and all activity with the NFS server halts.

> >>>>>>>>

> >>>>>>>>

> >>>>>>>> When does this happen? Say the session is draining, and an RPC returns

> >>>>>>>> from one of the compounds such as nfs_open, nfs_locku etc whose

> >>>>>>>> rpc_call_done routine calls renew_lease after freeing it's slot.  Like

> >>>>>>>> all calls on a draining session, the renew_lease sleeps on the session

> >>>>>>>> slot_tbl_waitq - is this what you mean by "causes lease renewal to

> >>>>>>>> abort"? How does this cause the client to never recover?

> >>>>>>>>

> >>>>>>>> The only other call to renew_lease is from the state manager itself

> >>>>>>>> which runs one function at a time, and will not call renew_lease until

> >>>>>>>> the recovery is over....

> >>>>>>>>

> >>>>>>>> What am I missing....?

> >>>>>>>

> >>>>>>> nfs4_check_lease() is run exclusively by the state manager thread in

> >>>>>>> order to check that the clientid (and session id on NFSv4.1) are valid.

> >>>>>>> It will deadlock the state manager thread if NFS4_SESSION_DRAINING is

> >>>>>>> already set.

> >>>>>>

> >>>>>> OK. NFS4_SESSION_DRAINING is also set exclusively by the state manager

> >>>>>> thread. Why is the state manager told to check the lease when it's

> >>>>>> also draining a session?

> >>>>>

> >>>>> NFS4_SESSION_DRAINING does not just mean that the session is being

> >>>>> drained; it remains set until open and lock state recovery is completed.

> >>>>>

> >>>>> IOW: if something happens during state recovery that requires us to

> >>>>> check the lease (e.g. something returns NFS4ERR_EXPIRED) then the

> >>>>> current code will deadlock.

> >>>>>

> >>>>>> No matter what the answer, please update the patch description to

> >>>>>> better explain the problem being solved.

> >>>>>

> >>>>> ACK. I agree that the changelog entry can be improved.

> >>>>>

> >>>>

> >>>> Does this read any better (and should I resend the patch)?

> >>>>

> >>>> NFS: Add sequence_priviliged_ops for nfs4_proc_sequence()

> >>>>

> >>>> If I mount an NFS v4.1 server to a single client multiple times and then

> >>>> run xfstests over each mountpoint I usually get the client into a state

> >>>> where recovery deadlocks.  The server informs the client of a

> >>>> cb_path_down sequence error, the client then does a

> >>>> bind_connection_to_session and checks the status of the lease.

> >>>

> >>> Why are you getting the cb_path_down? Is that a result of a fault

> >>> injection, or is it a genuine error?

> >>>

> >>> While cb_path_down is a valid error, and we do want the recovery to work

> >>> correctly, I would expect it to be rare since it implies that we lost

> >>> the TCP connection to the server for some reason. Finding out why it

> >>> happens is therefore worth doing.

> >>

> >> I didn't get it with fault injection, it just happened by itself somewhere during testing.  My attempts at capturing it with wireshark usually slow down my system as wireshark tries to display 1,000,000+ packets... I'll try again, though.

> > 

> > See if you can just turn on the two dprintks() in xs_tcp_state_change(),

> > and then add a printk() trigger to the

> > nfs41_handle_sequence_flag_errors() to see if you can catch a

> > correlation between a TCP state change and the path_down issue.

> 

> Sounds simple enough.  I'll do that now...

> 

> > 

> >>>

> >>>> I found that bind_connection_to_session sets the NFS4_SESSION_DRAINING

> >>>> flag on the client, but this flag is never unset before

> >>>> nfs4_check_lease() reaches nfs4_proc_sequence().  This causes the client

> >>>> to deadlock, halting all NFS activity to the server.

> >>>

> >>> Otherwise, the text is more or less OK. The one thing that I'm missing

> >>> is a statement about why nfs4_proc_sequence() needs to be a privileged

> >>> operation.

> >>>

> >>

> >> Here is the new last paragraph, I just added the sentence at the end:

> >>

> >> I found that bind_connection_to_session sets the NFS4_SESSION_DRAINING

> >> flag on the client, but this flag is never unset before

> >> nfs4_check_lease() reaches nfs4_proc_sequence().  This causes the client

> >> to deadlock, halting all NFS activity to the server.  This patch changes

> >> nfs4_proc_sequence() to run in privileged mode to bypass the

> >> NFS4_SESSION_DRAINING check and continue with recovery.

> > 

> > Thanks. You might want to add a note to the fact that

> > nfs4_proc_sequence() is always run from the state recovery thread, and

> > therefore it is correct to run it as a privileged operation.

> > 

> 

> Ok.  Do you want a new patch?  Just the commit text?

> 

Just send the whole patch, please.

-- 
Trond Myklebust
Linux NFS client maintainer

NetApp
Trond.Myklebust@netapp.com
www.netapp.com

Bryan Schumaker Nov. 13, 2012, 2:47 p.m. UTC | #15

On 11/12/2012 04:37 PM, Myklebust, Trond wrote:
> On Mon, 2012-11-12 at 16:26 -0500, Bryan Schumaker wrote:
>> On 11/12/2012 04:10 PM, Myklebust, Trond wrote:
>>> On Mon, 2012-11-12 at 16:02 -0500, Bryan Schumaker wrote:
>>>> On 11/12/2012 03:54 PM, Myklebust, Trond wrote:
>>>>> On Mon, 2012-11-12 at 15:49 -0500, Andy Adamson wrote:
>>>>>> On Mon, Nov 12, 2012 at 3:29 PM, Myklebust, Trond
>>>>>> <Trond.Myklebust@netapp.com> wrote:
>>>>>>> On Mon, 2012-11-12 at 15:22 -0500, Andy Adamson wrote:
>>>>>>>> On Mon, Nov 12, 2012 at 1:35 PM,  <bjschuma@netapp.com> wrote:
>>>>>>>>> From: Bryan Schumaker <bjschuma@netapp.com>
>>>>>>>>>
>>>>>>>>> During recovery the NFS4_SESSION_DRAINING flag might be set on the
>>>>>>>>> client structure.  This can cause lease renewal to abort when
>>>>>>>
>>>>>>> Not lease renewal. It is lease verification (i.e. checking that we have
>>>>>>> a valid lease and session) that will deadlock.
>>>>>>>
>>>>>>>>> nfs41_setup_sequence sees that we are doing recovery.  As a result, the
>>>>>>>>> client never recovers and all activity with the NFS server halts.
>>>>>>>>
>>>>>>>>
>>>>>>>> When does this happen? Say the session is draining, and an RPC returns
>>>>>>>> from one of the compounds such as nfs_open, nfs_locku etc whose
>>>>>>>> rpc_call_done routine calls renew_lease after freeing it's slot.  Like
>>>>>>>> all calls on a draining session, the renew_lease sleeps on the session
>>>>>>>> slot_tbl_waitq - is this what you mean by "causes lease renewal to
>>>>>>>> abort"? How does this cause the client to never recover?
>>>>>>>>
>>>>>>>> The only other call to renew_lease is from the state manager itself
>>>>>>>> which runs one function at a time, and will not call renew_lease until
>>>>>>>> the recovery is over....
>>>>>>>>
>>>>>>>> What am I missing....?
>>>>>>>
>>>>>>> nfs4_check_lease() is run exclusively by the state manager thread in
>>>>>>> order to check that the clientid (and session id on NFSv4.1) are valid.
>>>>>>> It will deadlock the state manager thread if NFS4_SESSION_DRAINING is
>>>>>>> already set.
>>>>>>
>>>>>> OK. NFS4_SESSION_DRAINING is also set exclusively by the state manager
>>>>>> thread. Why is the state manager told to check the lease when it's
>>>>>> also draining a session?
>>>>>
>>>>> NFS4_SESSION_DRAINING does not just mean that the session is being
>>>>> drained; it remains set until open and lock state recovery is completed.
>>>>>
>>>>> IOW: if something happens during state recovery that requires us to
>>>>> check the lease (e.g. something returns NFS4ERR_EXPIRED) then the
>>>>> current code will deadlock.
>>>>>
>>>>>> No matter what the answer, please update the patch description to
>>>>>> better explain the problem being solved.
>>>>>
>>>>> ACK. I agree that the changelog entry can be improved.
>>>>>
>>>>
>>>> Does this read any better (and should I resend the patch)?
>>>>
>>>> NFS: Add sequence_priviliged_ops for nfs4_proc_sequence()
>>>>
>>>> If I mount an NFS v4.1 server to a single client multiple times and then
>>>> run xfstests over each mountpoint I usually get the client into a state
>>>> where recovery deadlocks.  The server informs the client of a
>>>> cb_path_down sequence error, the client then does a
>>>> bind_connection_to_session and checks the status of the lease.
>>>
>>> Why are you getting the cb_path_down? Is that a result of a fault
>>> injection, or is it a genuine error?
>>>
>>> While cb_path_down is a valid error, and we do want the recovery to work
>>> correctly, I would expect it to be rare since it implies that we lost
>>> the TCP connection to the server for some reason. Finding out why it
>>> happens is therefore worth doing.
>>
>> I didn't get it with fault injection, it just happened by itself somewhere during testing.  My attempts at capturing it with wireshark usually slow down my system as wireshark tries to display 1,000,000+ packets... I'll try again, though.
> 
> See if you can just turn on the two dprintks() in xs_tcp_state_change(),
> and then add a printk() trigger to the
> nfs41_handle_sequence_flag_errors() to see if you can catch a
> correlation between a TCP state change and the path_down issue.

I saw this in the logs, but the printk I added to nfs41_handle_sequence_flag_errors() wasn't hit.  Maybe that was something hit by waiting read / write requests that were stacked up?  I know that a few read_prepare() and write_prepare() requests were stacked up... but I don't remember seeing the "Callback slot table overflowed" message last week.

[  287.951307] Callback slot table overflowed
[  287.951540] RPC:       xs_tcp_state_change client ffff88003d7cc000...
[  287.951542] RPC:       state 4 conn 1 dead 0 zapped 1 sk_shutdown 2
[  287.951550] RPC: Could not send backchannel reply error: -32
[  287.951746] RPC:       xs_tcp_state_change client ffff88003d7cc000...
[  287.951748] RPC:       state 5 conn 0 dead 0 zapped 1 sk_shutdown 2
[  287.951759] RPC:       xs_tcp_state_change client ffff88003d7cc000...
[  287.951761] RPC:       state 7 conn 0 dead 0 zapped 1 sk_shutdown 3
[  287.951762] RPC:       xs_tcp_state_change client ffff88003d7cc000...
[  287.951763] RPC:       state 7 conn 0 dead 0 zapped 1 sk_shutdown 3
[  287.951978] RPC:       xs_tcp_state_change client ffff88003d7cc000...
[  287.951981] RPC:       state 1 conn 0 dead 0 zapped 1 sk_shutdown 0

- Bryan


> 
>>>
>>>> I found that bind_connection_to_session sets the NFS4_SESSION_DRAINING
>>>> flag on the client, but this flag is never unset before
>>>> nfs4_check_lease() reaches nfs4_proc_sequence().  This causes the client
>>>> to deadlock, halting all NFS activity to the server.
>>>
>>> Otherwise, the text is more or less OK. The one thing that I'm missing
>>> is a statement about why nfs4_proc_sequence() needs to be a privileged
>>> operation.
>>>
>>
>> Here is the new last paragraph, I just added the sentence at the end:
>>
>> I found that bind_connection_to_session sets the NFS4_SESSION_DRAINING
>> flag on the client, but this flag is never unset before
>> nfs4_check_lease() reaches nfs4_proc_sequence().  This causes the client
>> to deadlock, halting all NFS activity to the server.  This patch changes
>> nfs4_proc_sequence() to run in privileged mode to bypass the
>> NFS4_SESSION_DRAINING check and continue with recovery.
> 
> Thanks. You might want to add a note to the fact that
> nfs4_proc_sequence() is always run from the state recovery thread, and
> therefore it is correct to run it as a privileged operation.
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

diff mbox

Patch

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 5eec442..537181c 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -6138,13 +6138,26 @@  static void nfs41_sequence_prepare(struct rpc_task *task, void *data)
 	rpc_call_start(task);
 }
 
+static void nfs41_sequence_prepare_privileged(struct rpc_task *task, void *data)
+{
+	rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
+	nfs41_sequence_prepare(task, data);
+}
+
 static const struct rpc_call_ops nfs41_sequence_ops = {
 	.rpc_call_done = nfs41_sequence_call_done,
 	.rpc_call_prepare = nfs41_sequence_prepare,
 	.rpc_release = nfs41_sequence_release,
 };
 
-static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
+static const struct rpc_call_ops nfs41_sequence_privileged_ops = {
+	.rpc_call_done = nfs41_sequence_call_done,
+	.rpc_call_prepare = nfs41_sequence_prepare_privileged,
+	.rpc_release = nfs41_sequence_release,
+};
+
+static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred,
+					     const struct rpc_call_ops *seq_ops)
 {
 	struct nfs4_sequence_data *calldata;
 	struct rpc_message msg = {
@@ -6154,7 +6167,7 @@  static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_
 	struct rpc_task_setup task_setup_data = {
 		.rpc_client = clp->cl_rpcclient,
 		.rpc_message = &msg,
-		.callback_ops = &nfs41_sequence_ops,
+		.callback_ops = seq_ops,
 		.flags = RPC_TASK_ASYNC | RPC_TASK_SOFT,
 	};
 
@@ -6181,7 +6194,7 @@  static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr
 
 	if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0)
 		return 0;
-	task = _nfs41_proc_sequence(clp, cred);
+	task = _nfs41_proc_sequence(clp, cred, &nfs41_sequence_ops);
 	if (IS_ERR(task))
 		ret = PTR_ERR(task);
 	else
@@ -6195,7 +6208,7 @@  static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
 	struct rpc_task *task;
 	int ret;
 
-	task = _nfs41_proc_sequence(clp, cred);
+	task = _nfs41_proc_sequence(clp, cred, &nfs41_sequence_privileged_ops);
 	if (IS_ERR(task)) {
 		ret = PTR_ERR(task);
 		goto out;