diff mbox series

[1/1] NFSv4.1: fix lone sequence transport assignment

Message ID 20200417151540.22111-1-olga.kornievskaia@gmail.com (mailing list archive)
State New, archived
Headers show
Series [1/1] NFSv4.1: fix lone sequence transport assignment | expand

Commit Message

Olga Kornievskaia April 17, 2020, 3:15 p.m. UTC
When nconnect is used, SEQUENCE operation currently isn't bound to
a particular transport. The problem is created on an idle mount,
where SEQUENCE is the only operation being sent and opened TPC
connections are slowly being close from the lack of use. If SEQUENCE
is not assigned to the main connection, the main connection can
be closed and with that so is the back channel bound to that
connection.

Since the only way client handles callback_path down is by sending
BIND_CONN_TO_SESSION requesting to bind both backchannel and fore
channel on the connection that was left going, but that connection
was already bound to only forechannel. According to the spec, it's
not allowed to change channel binding after they are done.

The fix is to make sure that a lone SEQUENCE always goes on the
main connection, keeping backchannel alive.

Fixes: 5a0c257f8 ("NFS: send state management on a single connection")
Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
---
 fs/nfs/nfs4proc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

Comments

Trond Myklebust April 17, 2020, 3:31 p.m. UTC | #1
Hi Olga,

On Fri, 2020-04-17 at 11:15 -0400, Olga Kornievskaia wrote:
> When nconnect is used, SEQUENCE operation currently isn't bound to
> a particular transport. The problem is created on an idle mount,
> where SEQUENCE is the only operation being sent and opened TPC
> connections are slowly being close from the lack of use. If SEQUENCE
> is not assigned to the main connection, the main connection can
> be closed and with that so is the back channel bound to that
> connection.
> 
> Since the only way client handles callback_path down is by sending
> BIND_CONN_TO_SESSION requesting to bind both backchannel and fore
> channel on the connection that was left going, but that connection
> was already bound to only forechannel. According to the spec, it's
> not allowed to change channel binding after they are done.
> 
> The fix is to make sure that a lone SEQUENCE always goes on the
> main connection, keeping backchannel alive.
> 
> Fixes: 5a0c257f8 ("NFS: send state management on a single
> connection")
> Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> ---
>  fs/nfs/nfs4proc.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index 99e9f2e..461f85d 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -8857,7 +8857,7 @@ static struct rpc_task
> *_nfs41_proc_sequence(struct nfs_client *clp,
>  		.rpc_client = clp->cl_rpcclient,
>  		.rpc_message = &msg,
>  		.callback_ops = &nfs41_sequence_ops,
> -		.flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT,
> +		.flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT |
> RPC_TASK_NO_ROUND_ROBIN,
>  	};
>  	struct rpc_task *ret;
>  

This works only in the case where the client is only sending SEQUENCE
instructions. There are other cases where it could be sending out other
operations that also renew the lease, but is doing it very
infrequently. Won't that also run into the same problem?

Is the fundamental problem here that we're not handling the
SEQ4_STATUS_CB_PATH_DOWN / SEQ4_STATUS_CB_PATH_DOWN_SESSION flags
correctly or is there something else going on?
Olga Kornievskaia April 17, 2020, 3:43 p.m. UTC | #2
Hi Trond,

On Fri, Apr 17, 2020 at 11:31 AM Trond Myklebust
<trondmy@hammerspace.com> wrote:
>
> Hi Olga,
>
> On Fri, 2020-04-17 at 11:15 -0400, Olga Kornievskaia wrote:
> > When nconnect is used, SEQUENCE operation currently isn't bound to
> > a particular transport. The problem is created on an idle mount,
> > where SEQUENCE is the only operation being sent and opened TPC
> > connections are slowly being close from the lack of use. If SEQUENCE
> > is not assigned to the main connection, the main connection can
> > be closed and with that so is the back channel bound to that
> > connection.
> >
> > Since the only way client handles callback_path down is by sending
> > BIND_CONN_TO_SESSION requesting to bind both backchannel and fore
> > channel on the connection that was left going, but that connection
> > was already bound to only forechannel. According to the spec, it's
> > not allowed to change channel binding after they are done.
> >
> > The fix is to make sure that a lone SEQUENCE always goes on the
> > main connection, keeping backchannel alive.
> >
> > Fixes: 5a0c257f8 ("NFS: send state management on a single
> > connection")
> > Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> > ---
> >  fs/nfs/nfs4proc.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> > index 99e9f2e..461f85d 100644
> > --- a/fs/nfs/nfs4proc.c
> > +++ b/fs/nfs/nfs4proc.c
> > @@ -8857,7 +8857,7 @@ static struct rpc_task
> > *_nfs41_proc_sequence(struct nfs_client *clp,
> >               .rpc_client = clp->cl_rpcclient,
> >               .rpc_message = &msg,
> >               .callback_ops = &nfs41_sequence_ops,
> > -             .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT,
> > +             .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT |
> > RPC_TASK_NO_ROUND_ROBIN,
> >       };
> >       struct rpc_task *ret;
> >
>
> This works only in the case where the client is only sending SEQUENCE
> instructions. There are other cases where it could be sending out other
> operations that also renew the lease, but is doing it very
> infrequently. Won't that also run into the same problem?

Hm... I see so main channel can still go idle and close, when
infrequent operations are happening on the other connections before
round-robin-ing to the main connection....

> Is the fundamental problem here that we're not handling the
> SEQ4_STATUS_CB_PATH_DOWN / SEQ4_STATUS_CB_PATH_DOWN_SESSION flags
> correctly or is there something else going on?

Yes the client doesn't recover properly. But the fix wasn't trivial to
me (so I thought my patch was enough but I see it's not). Say client
shuts down the main connection because it was idle. Now whatever
operations goes on a different connection is going to get callback
down. The only way the client can create a new backchannel (according
to the spec) is if it creates a brand new connection and sends
BIND_CONN_TO_SESSION there (all existing connections are already bound
to fore channel and according to the spec you can't modify the
existing binding). But then we'd need to make sure that it's the first
one in the list of connections we iterate thru (as i think 1st marks
the main connection?) as the other operations that supposed to only go
on main connection need to know which connection to pick.

The reason it's not seen against linux is because it doesn't follow
the spec is doesn't reject attempts to bind a backchannel to an
already existing connection that was only bound for fore channel.

>
> --
> Trond Myklebust
> Linux NFS client maintainer, Hammerspace
> trond.myklebust@hammerspace.com
>
>
Trond Myklebust April 17, 2020, 4:20 p.m. UTC | #3
On Fri, 2020-04-17 at 11:43 -0400, Olga Kornievskaia wrote:
> Hi Trond,
> 
> On Fri, Apr 17, 2020 at 11:31 AM Trond Myklebust
> <trondmy@hammerspace.com> wrote:
> > Hi Olga,
> > 
> > On Fri, 2020-04-17 at 11:15 -0400, Olga Kornievskaia wrote:
> > > When nconnect is used, SEQUENCE operation currently isn't bound
> > > to
> > > a particular transport. The problem is created on an idle mount,
> > > where SEQUENCE is the only operation being sent and opened TPC
> > > connections are slowly being close from the lack of use. If
> > > SEQUENCE
> > > is not assigned to the main connection, the main connection can
> > > be closed and with that so is the back channel bound to that
> > > connection.
> > > 
> > > Since the only way client handles callback_path down is by
> > > sending
> > > BIND_CONN_TO_SESSION requesting to bind both backchannel and fore
> > > channel on the connection that was left going, but that
> > > connection
> > > was already bound to only forechannel. According to the spec,
> > > it's
> > > not allowed to change channel binding after they are done.
> > > 
> > > The fix is to make sure that a lone SEQUENCE always goes on the
> > > main connection, keeping backchannel alive.
> > > 
> > > Fixes: 5a0c257f8 ("NFS: send state management on a single
> > > connection")
> > > Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> > > ---
> > >  fs/nfs/nfs4proc.c | 2 +-
> > >  1 file changed, 1 insertion(+), 1 deletion(-)
> > > 
> > > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> > > index 99e9f2e..461f85d 100644
> > > --- a/fs/nfs/nfs4proc.c
> > > +++ b/fs/nfs/nfs4proc.c
> > > @@ -8857,7 +8857,7 @@ static struct rpc_task
> > > *_nfs41_proc_sequence(struct nfs_client *clp,
> > >               .rpc_client = clp->cl_rpcclient,
> > >               .rpc_message = &msg,
> > >               .callback_ops = &nfs41_sequence_ops,
> > > -             .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT,
> > > +             .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT |
> > > RPC_TASK_NO_ROUND_ROBIN,
> > >       };
> > >       struct rpc_task *ret;
> > > 
> > 
> > This works only in the case where the client is only sending
> > SEQUENCE
> > instructions. There are other cases where it could be sending out
> > other
> > operations that also renew the lease, but is doing it very
> > infrequently. Won't that also run into the same problem?
> 
> Hm... I see so main channel can still go idle and close, when
> infrequent operations are happening on the other connections before
> round-robin-ing to the main connection....
> 
> > Is the fundamental problem here that we're not handling the
> > SEQ4_STATUS_CB_PATH_DOWN / SEQ4_STATUS_CB_PATH_DOWN_SESSION flags
> > correctly or is there something else going on?
> 
> Yes the client doesn't recover properly. But the fix wasn't trivial
> to
> me (so I thought my patch was enough but I see it's not). Say client
> shuts down the main connection because it was idle. Now whatever
> operations goes on a different connection is going to get callback
> down. The only way the client can create a new backchannel (according
> to the spec) is if it creates a brand new connection and sends
> BIND_CONN_TO_SESSION there (all existing connections are already
> bound
> to fore channel and according to the spec you can't modify the
> existing binding). But then we'd need to make sure that it's the
> first
> one in the list of connections we iterate thru (as i think 1st marks
> the main connection?) as the other operations that supposed to only
> go
> on main connection need to know which connection to pick.
> 
> The reason it's not seen against linux is because it doesn't follow
> the spec is doesn't reject attempts to bind a backchannel to an
> already existing connection that was only bound for fore channel.
> 

Oh, I see. So the server is replying NFS4ERR_INVAL in order to let the
client know that it is trying to change the channel bindings for that
connection.
Hmm... Is there any reason why we can't just add a handler to
nfs4_bind_one_conn_to_session_done() that intercepts NFS4ERR_INVAL, and
disconnects the xprt before retrying?
We should probably add a wrapper to xprt_force_disconnect() in
include/linux/sunrpc/clnt.h. Something like the following?


static inline void rpc_task_close_connection(struct rpc_task *task)
{
	if (task->tk_xprt)
		xprt_force_disconnect(task->tk_xprt);
}
Olga Kornievskaia April 17, 2020, 4:46 p.m. UTC | #4
On Fri, Apr 17, 2020 at 12:20 PM Trond Myklebust
<trondmy@hammerspace.com> wrote:
>
> On Fri, 2020-04-17 at 11:43 -0400, Olga Kornievskaia wrote:
> > Hi Trond,
> >
> > On Fri, Apr 17, 2020 at 11:31 AM Trond Myklebust
> > <trondmy@hammerspace.com> wrote:
> > > Hi Olga,
> > >
> > > On Fri, 2020-04-17 at 11:15 -0400, Olga Kornievskaia wrote:
> > > > When nconnect is used, SEQUENCE operation currently isn't bound
> > > > to
> > > > a particular transport. The problem is created on an idle mount,
> > > > where SEQUENCE is the only operation being sent and opened TPC
> > > > connections are slowly being close from the lack of use. If
> > > > SEQUENCE
> > > > is not assigned to the main connection, the main connection can
> > > > be closed and with that so is the back channel bound to that
> > > > connection.
> > > >
> > > > Since the only way client handles callback_path down is by
> > > > sending
> > > > BIND_CONN_TO_SESSION requesting to bind both backchannel and fore
> > > > channel on the connection that was left going, but that
> > > > connection
> > > > was already bound to only forechannel. According to the spec,
> > > > it's
> > > > not allowed to change channel binding after they are done.
> > > >
> > > > The fix is to make sure that a lone SEQUENCE always goes on the
> > > > main connection, keeping backchannel alive.
> > > >
> > > > Fixes: 5a0c257f8 ("NFS: send state management on a single
> > > > connection")
> > > > Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> > > > ---
> > > >  fs/nfs/nfs4proc.c | 2 +-
> > > >  1 file changed, 1 insertion(+), 1 deletion(-)
> > > >
> > > > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> > > > index 99e9f2e..461f85d 100644
> > > > --- a/fs/nfs/nfs4proc.c
> > > > +++ b/fs/nfs/nfs4proc.c
> > > > @@ -8857,7 +8857,7 @@ static struct rpc_task
> > > > *_nfs41_proc_sequence(struct nfs_client *clp,
> > > >               .rpc_client = clp->cl_rpcclient,
> > > >               .rpc_message = &msg,
> > > >               .callback_ops = &nfs41_sequence_ops,
> > > > -             .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT,
> > > > +             .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT |
> > > > RPC_TASK_NO_ROUND_ROBIN,
> > > >       };
> > > >       struct rpc_task *ret;
> > > >
> > >
> > > This works only in the case where the client is only sending
> > > SEQUENCE
> > > instructions. There are other cases where it could be sending out
> > > other
> > > operations that also renew the lease, but is doing it very
> > > infrequently. Won't that also run into the same problem?
> >
> > Hm... I see so main channel can still go idle and close, when
> > infrequent operations are happening on the other connections before
> > round-robin-ing to the main connection....
> >
> > > Is the fundamental problem here that we're not handling the
> > > SEQ4_STATUS_CB_PATH_DOWN / SEQ4_STATUS_CB_PATH_DOWN_SESSION flags
> > > correctly or is there something else going on?
> >
> > Yes the client doesn't recover properly. But the fix wasn't trivial
> > to
> > me (so I thought my patch was enough but I see it's not). Say client
> > shuts down the main connection because it was idle. Now whatever
> > operations goes on a different connection is going to get callback
> > down. The only way the client can create a new backchannel (according
> > to the spec) is if it creates a brand new connection and sends
> > BIND_CONN_TO_SESSION there (all existing connections are already
> > bound
> > to fore channel and according to the spec you can't modify the
> > existing binding). But then we'd need to make sure that it's the
> > first
> > one in the list of connections we iterate thru (as i think 1st marks
> > the main connection?) as the other operations that supposed to only
> > go
> > on main connection need to know which connection to pick.
> >
> > The reason it's not seen against linux is because it doesn't follow
> > the spec is doesn't reject attempts to bind a backchannel to an
> > already existing connection that was only bound for fore channel.
> >
>
> Oh, I see. So the server is replying NFS4ERR_INVAL in order to let the
> client know that it is trying to change the channel bindings for that
> connection.

Well server isn't failing because client is asking for FORE_OR_BOTH
and it's a choice so server is returning FORE. I'm not sure we can ask
the server to fail the request with ERR_INVAL.... (rather I can ask
but ) rather can we expect the server to do that always?

> Hmm... Is there any reason why we can't just add a handler to
> nfs4_bind_one_conn_to_session_done() that intercepts NFS4ERR_INVAL, and
> disconnects the xprt before retrying?
> We should probably add a wrapper to xprt_force_disconnect() in
> include/linux/sunrpc/clnt.h. Something like the following?
>
>
> static inline void rpc_task_close_connection(struct rpc_task *task)
> {
>         if (task->tk_xprt)
>                 xprt_force_disconnect(task->tk_xprt);
> }
>
> --
> Trond Myklebust
> Linux NFS client maintainer, Hammerspace
> trond.myklebust@hammerspace.com
>
>
Trond Myklebust April 17, 2020, 4:53 p.m. UTC | #5
On Fri, 2020-04-17 at 12:46 -0400, Olga Kornievskaia wrote:
> On Fri, Apr 17, 2020 at 12:20 PM Trond Myklebust
> <trondmy@hammerspace.com> wrote:
> > On Fri, 2020-04-17 at 11:43 -0400, Olga Kornievskaia wrote:
> > > Hi Trond,
> > > 
> > > On Fri, Apr 17, 2020 at 11:31 AM Trond Myklebust
> > > <trondmy@hammerspace.com> wrote:
> > > > Hi Olga,
> > > > 
> > > > On Fri, 2020-04-17 at 11:15 -0400, Olga Kornievskaia wrote:
> > > > > When nconnect is used, SEQUENCE operation currently isn't
> > > > > bound
> > > > > to
> > > > > a particular transport. The problem is created on an idle
> > > > > mount,
> > > > > where SEQUENCE is the only operation being sent and opened
> > > > > TPC
> > > > > connections are slowly being close from the lack of use. If
> > > > > SEQUENCE
> > > > > is not assigned to the main connection, the main connection
> > > > > can
> > > > > be closed and with that so is the back channel bound to that
> > > > > connection.
> > > > > 
> > > > > Since the only way client handles callback_path down is by
> > > > > sending
> > > > > BIND_CONN_TO_SESSION requesting to bind both backchannel and
> > > > > fore
> > > > > channel on the connection that was left going, but that
> > > > > connection
> > > > > was already bound to only forechannel. According to the spec,
> > > > > it's
> > > > > not allowed to change channel binding after they are done.
> > > > > 
> > > > > The fix is to make sure that a lone SEQUENCE always goes on
> > > > > the
> > > > > main connection, keeping backchannel alive.
> > > > > 
> > > > > Fixes: 5a0c257f8 ("NFS: send state management on a single
> > > > > connection")
> > > > > Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> > > > > ---
> > > > >  fs/nfs/nfs4proc.c | 2 +-
> > > > >  1 file changed, 1 insertion(+), 1 deletion(-)
> > > > > 
> > > > > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> > > > > index 99e9f2e..461f85d 100644
> > > > > --- a/fs/nfs/nfs4proc.c
> > > > > +++ b/fs/nfs/nfs4proc.c
> > > > > @@ -8857,7 +8857,7 @@ static struct rpc_task
> > > > > *_nfs41_proc_sequence(struct nfs_client *clp,
> > > > >               .rpc_client = clp->cl_rpcclient,
> > > > >               .rpc_message = &msg,
> > > > >               .callback_ops = &nfs41_sequence_ops,
> > > > > -             .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT,
> > > > > +             .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT |
> > > > > RPC_TASK_NO_ROUND_ROBIN,
> > > > >       };
> > > > >       struct rpc_task *ret;
> > > > > 
> > > > 
> > > > This works only in the case where the client is only sending
> > > > SEQUENCE
> > > > instructions. There are other cases where it could be sending
> > > > out
> > > > other
> > > > operations that also renew the lease, but is doing it very
> > > > infrequently. Won't that also run into the same problem?
> > > 
> > > Hm... I see so main channel can still go idle and close, when
> > > infrequent operations are happening on the other connections
> > > before
> > > round-robin-ing to the main connection....
> > > 
> > > > Is the fundamental problem here that we're not handling the
> > > > SEQ4_STATUS_CB_PATH_DOWN / SEQ4_STATUS_CB_PATH_DOWN_SESSION
> > > > flags
> > > > correctly or is there something else going on?
> > > 
> > > Yes the client doesn't recover properly. But the fix wasn't
> > > trivial
> > > to
> > > me (so I thought my patch was enough but I see it's not). Say
> > > client
> > > shuts down the main connection because it was idle. Now whatever
> > > operations goes on a different connection is going to get
> > > callback
> > > down. The only way the client can create a new backchannel
> > > (according
> > > to the spec) is if it creates a brand new connection and sends
> > > BIND_CONN_TO_SESSION there (all existing connections are already
> > > bound
> > > to fore channel and according to the spec you can't modify the
> > > existing binding). But then we'd need to make sure that it's the
> > > first
> > > one in the list of connections we iterate thru (as i think 1st
> > > marks
> > > the main connection?) as the other operations that supposed to
> > > only
> > > go
> > > on main connection need to know which connection to pick.
> > > 
> > > The reason it's not seen against linux is because it doesn't
> > > follow
> > > the spec is doesn't reject attempts to bind a backchannel to an
> > > already existing connection that was only bound for fore channel.
> > > 
> > 
> > Oh, I see. So the server is replying NFS4ERR_INVAL in order to let
> > the
> > client know that it is trying to change the channel bindings for
> > that
> > connection.
> 
> Well server isn't failing because client is asking for FORE_OR_BOTH
> and it's a choice so server is returning FORE. I'm not sure we can
> ask
> the server to fail the request with ERR_INVAL.... (rather I can ask
> but ) rather can we expect the server to do that always?

In RFC5661, Section 18.34.3 I found the following normative text:

   Invoking BIND_CONN_TO_SESSION on a connection already associated with
   the specified session has no effect, and the server MUST respond with
   NFS4_OK, unless the client is demanding changes to the set of
   channels the connection is associated with.  If so, the server MUST
   return NFS4ERR_INVAL.


IOW: it sounds like your server isn't following the spec either. 
Olga Kornievskaia April 17, 2020, 6:08 p.m. UTC | #6
On Fri, Apr 17, 2020 at 12:53 PM Trond Myklebust
<trondmy@hammerspace.com> wrote:
>
> On Fri, 2020-04-17 at 12:46 -0400, Olga Kornievskaia wrote:
> > On Fri, Apr 17, 2020 at 12:20 PM Trond Myklebust
> > <trondmy@hammerspace.com> wrote:
> > > On Fri, 2020-04-17 at 11:43 -0400, Olga Kornievskaia wrote:
> > > > Hi Trond,
> > > >
> > > > On Fri, Apr 17, 2020 at 11:31 AM Trond Myklebust
> > > > <trondmy@hammerspace.com> wrote:
> > > > > Hi Olga,
> > > > >
> > > > > On Fri, 2020-04-17 at 11:15 -0400, Olga Kornievskaia wrote:
> > > > > > When nconnect is used, SEQUENCE operation currently isn't
> > > > > > bound
> > > > > > to
> > > > > > a particular transport. The problem is created on an idle
> > > > > > mount,
> > > > > > where SEQUENCE is the only operation being sent and opened
> > > > > > TPC
> > > > > > connections are slowly being close from the lack of use. If
> > > > > > SEQUENCE
> > > > > > is not assigned to the main connection, the main connection
> > > > > > can
> > > > > > be closed and with that so is the back channel bound to that
> > > > > > connection.
> > > > > >
> > > > > > Since the only way client handles callback_path down is by
> > > > > > sending
> > > > > > BIND_CONN_TO_SESSION requesting to bind both backchannel and
> > > > > > fore
> > > > > > channel on the connection that was left going, but that
> > > > > > connection
> > > > > > was already bound to only forechannel. According to the spec,
> > > > > > it's
> > > > > > not allowed to change channel binding after they are done.
> > > > > >
> > > > > > The fix is to make sure that a lone SEQUENCE always goes on
> > > > > > the
> > > > > > main connection, keeping backchannel alive.
> > > > > >
> > > > > > Fixes: 5a0c257f8 ("NFS: send state management on a single
> > > > > > connection")
> > > > > > Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> > > > > > ---
> > > > > >  fs/nfs/nfs4proc.c | 2 +-
> > > > > >  1 file changed, 1 insertion(+), 1 deletion(-)
> > > > > >
> > > > > > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> > > > > > index 99e9f2e..461f85d 100644
> > > > > > --- a/fs/nfs/nfs4proc.c
> > > > > > +++ b/fs/nfs/nfs4proc.c
> > > > > > @@ -8857,7 +8857,7 @@ static struct rpc_task
> > > > > > *_nfs41_proc_sequence(struct nfs_client *clp,
> > > > > >               .rpc_client = clp->cl_rpcclient,
> > > > > >               .rpc_message = &msg,
> > > > > >               .callback_ops = &nfs41_sequence_ops,
> > > > > > -             .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT,
> > > > > > +             .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT |
> > > > > > RPC_TASK_NO_ROUND_ROBIN,
> > > > > >       };
> > > > > >       struct rpc_task *ret;
> > > > > >
> > > > >
> > > > > This works only in the case where the client is only sending
> > > > > SEQUENCE
> > > > > instructions. There are other cases where it could be sending
> > > > > out
> > > > > other
> > > > > operations that also renew the lease, but is doing it very
> > > > > infrequently. Won't that also run into the same problem?
> > > >
> > > > Hm... I see so main channel can still go idle and close, when
> > > > infrequent operations are happening on the other connections
> > > > before
> > > > round-robin-ing to the main connection....
> > > >
> > > > > Is the fundamental problem here that we're not handling the
> > > > > SEQ4_STATUS_CB_PATH_DOWN / SEQ4_STATUS_CB_PATH_DOWN_SESSION
> > > > > flags
> > > > > correctly or is there something else going on?
> > > >
> > > > Yes the client doesn't recover properly. But the fix wasn't
> > > > trivial
> > > > to
> > > > me (so I thought my patch was enough but I see it's not). Say
> > > > client
> > > > shuts down the main connection because it was idle. Now whatever
> > > > operations goes on a different connection is going to get
> > > > callback
> > > > down. The only way the client can create a new backchannel
> > > > (according
> > > > to the spec) is if it creates a brand new connection and sends
> > > > BIND_CONN_TO_SESSION there (all existing connections are already
> > > > bound
> > > > to fore channel and according to the spec you can't modify the
> > > > existing binding). But then we'd need to make sure that it's the
> > > > first
> > > > one in the list of connections we iterate thru (as i think 1st
> > > > marks
> > > > the main connection?) as the other operations that supposed to
> > > > only
> > > > go
> > > > on main connection need to know which connection to pick.
> > > >
> > > > The reason it's not seen against linux is because it doesn't
> > > > follow
> > > > the spec is doesn't reject attempts to bind a backchannel to an
> > > > already existing connection that was only bound for fore channel.
> > > >
> > >
> > > Oh, I see. So the server is replying NFS4ERR_INVAL in order to let
> > > the
> > > client know that it is trying to change the channel bindings for
> > > that
> > > connection.
> >
> > Well server isn't failing because client is asking for FORE_OR_BOTH
> > and it's a choice so server is returning FORE. I'm not sure we can
> > ask
> > the server to fail the request with ERR_INVAL.... (rather I can ask
> > but ) rather can we expect the server to do that always?
>
> In RFC5661, Section 18.34.3 I found the following normative text:
>
>    Invoking BIND_CONN_TO_SESSION on a connection already associated with
>    the specified session has no effect, and the server MUST respond with
>    NFS4_OK, unless the client is demanding changes to the set of
>    channels the connection is associated with.  If so, the server MUST
>    return NFS4ERR_INVAL.
>
>
> IOW: it sounds like your server isn't following the spec either.

Thank you thank you! I missed that. Ok so we'll assume it does and I
will try to come up with what you are suggesting...

Do we need to do reorder the connection list? After we reset the
connection, it will be on some random connection on the list of
connections. But for the NO_ROBIN connections we pick the first
connection in the list (which will not be the backchannel connection).
Does it matter?

>
> > > Hmm... Is there any reason why we can't just add a handler to
> > > nfs4_bind_one_conn_to_session_done() that intercepts NFS4ERR_INVAL,
> > > and
> > > disconnects the xprt before retrying?
> > > We should probably add a wrapper to xprt_force_disconnect() in
> > > include/linux/sunrpc/clnt.h. Something like the following?
> > >
> > >
> > > static inline void rpc_task_close_connection(struct rpc_task *task)
> > > {
> > >         if (task->tk_xprt)
> > >                 xprt_force_disconnect(task->tk_xprt);
> > > }
> > >
>
> --
> Trond Myklebust
> Linux NFS client maintainer, Hammerspace
> trond.myklebust@hammerspace.com
>
>
Trond Myklebust April 17, 2020, 6:41 p.m. UTC | #7
On Fri, 2020-04-17 at 14:08 -0400, Olga Kornievskaia wrote:
> On Fri, Apr 17, 2020 at 12:53 PM Trond Myklebust
> <trondmy@hammerspace.com> wrote:
> > On Fri, 2020-04-17 at 12:46 -0400, Olga Kornievskaia wrote:
> > > On Fri, Apr 17, 2020 at 12:20 PM Trond Myklebust
> > > <trondmy@hammerspace.com> wrote:
> > > > On Fri, 2020-04-17 at 11:43 -0400, Olga Kornievskaia wrote:
> > > > > Hi Trond,
> > > > > 
> > > > > On Fri, Apr 17, 2020 at 11:31 AM Trond Myklebust
> > > > > <trondmy@hammerspace.com> wrote:
> > > > > > Hi Olga,
> > > > > > 
> > > > > > On Fri, 2020-04-17 at 11:15 -0400, Olga Kornievskaia wrote:
> > > > > > > When nconnect is used, SEQUENCE operation currently isn't
> > > > > > > bound
> > > > > > > to
> > > > > > > a particular transport. The problem is created on an idle
> > > > > > > mount,
> > > > > > > where SEQUENCE is the only operation being sent and
> > > > > > > opened
> > > > > > > TPC
> > > > > > > connections are slowly being close from the lack of use.
> > > > > > > If
> > > > > > > SEQUENCE
> > > > > > > is not assigned to the main connection, the main
> > > > > > > connection
> > > > > > > can
> > > > > > > be closed and with that so is the back channel bound to
> > > > > > > that
> > > > > > > connection.
> > > > > > > 
> > > > > > > Since the only way client handles callback_path down is
> > > > > > > by
> > > > > > > sending
> > > > > > > BIND_CONN_TO_SESSION requesting to bind both backchannel
> > > > > > > and
> > > > > > > fore
> > > > > > > channel on the connection that was left going, but that
> > > > > > > connection
> > > > > > > was already bound to only forechannel. According to the
> > > > > > > spec,
> > > > > > > it's
> > > > > > > not allowed to change channel binding after they are
> > > > > > > done.
> > > > > > > 
> > > > > > > The fix is to make sure that a lone SEQUENCE always goes
> > > > > > > on
> > > > > > > the
> > > > > > > main connection, keeping backchannel alive.
> > > > > > > 
> > > > > > > Fixes: 5a0c257f8 ("NFS: send state management on a single
> > > > > > > connection")
> > > > > > > Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> > > > > > > ---
> > > > > > >  fs/nfs/nfs4proc.c | 2 +-
> > > > > > >  1 file changed, 1 insertion(+), 1 deletion(-)
> > > > > > > 
> > > > > > > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> > > > > > > index 99e9f2e..461f85d 100644
> > > > > > > --- a/fs/nfs/nfs4proc.c
> > > > > > > +++ b/fs/nfs/nfs4proc.c
> > > > > > > @@ -8857,7 +8857,7 @@ static struct rpc_task
> > > > > > > *_nfs41_proc_sequence(struct nfs_client *clp,
> > > > > > >               .rpc_client = clp->cl_rpcclient,
> > > > > > >               .rpc_message = &msg,
> > > > > > >               .callback_ops = &nfs41_sequence_ops,
> > > > > > > -             .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT,
> > > > > > > +             .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT
> > > > > > > |
> > > > > > > RPC_TASK_NO_ROUND_ROBIN,
> > > > > > >       };
> > > > > > >       struct rpc_task *ret;
> > > > > > > 
> > > > > > 
> > > > > > This works only in the case where the client is only
> > > > > > sending
> > > > > > SEQUENCE
> > > > > > instructions. There are other cases where it could be
> > > > > > sending
> > > > > > out
> > > > > > other
> > > > > > operations that also renew the lease, but is doing it very
> > > > > > infrequently. Won't that also run into the same problem?
> > > > > 
> > > > > Hm... I see so main channel can still go idle and close, when
> > > > > infrequent operations are happening on the other connections
> > > > > before
> > > > > round-robin-ing to the main connection....
> > > > > 
> > > > > > Is the fundamental problem here that we're not handling the
> > > > > > SEQ4_STATUS_CB_PATH_DOWN / SEQ4_STATUS_CB_PATH_DOWN_SESSION
> > > > > > flags
> > > > > > correctly or is there something else going on?
> > > > > 
> > > > > Yes the client doesn't recover properly. But the fix wasn't
> > > > > trivial
> > > > > to
> > > > > me (so I thought my patch was enough but I see it's not). Say
> > > > > client
> > > > > shuts down the main connection because it was idle. Now
> > > > > whatever
> > > > > operations goes on a different connection is going to get
> > > > > callback
> > > > > down. The only way the client can create a new backchannel
> > > > > (according
> > > > > to the spec) is if it creates a brand new connection and
> > > > > sends
> > > > > BIND_CONN_TO_SESSION there (all existing connections are
> > > > > already
> > > > > bound
> > > > > to fore channel and according to the spec you can't modify
> > > > > the
> > > > > existing binding). But then we'd need to make sure that it's
> > > > > the
> > > > > first
> > > > > one in the list of connections we iterate thru (as i think
> > > > > 1st
> > > > > marks
> > > > > the main connection?) as the other operations that supposed
> > > > > to
> > > > > only
> > > > > go
> > > > > on main connection need to know which connection to pick.
> > > > > 
> > > > > The reason it's not seen against linux is because it doesn't
> > > > > follow
> > > > > the spec is doesn't reject attempts to bind a backchannel to
> > > > > an
> > > > > already existing connection that was only bound for fore
> > > > > channel.
> > > > > 
> > > > 
> > > > Oh, I see. So the server is replying NFS4ERR_INVAL in order to
> > > > let
> > > > the
> > > > client know that it is trying to change the channel bindings
> > > > for
> > > > that
> > > > connection.
> > > 
> > > Well server isn't failing because client is asking for
> > > FORE_OR_BOTH
> > > and it's a choice so server is returning FORE. I'm not sure we
> > > can
> > > ask
> > > the server to fail the request with ERR_INVAL.... (rather I can
> > > ask
> > > but ) rather can we expect the server to do that always?
> > 
> > In RFC5661, Section 18.34.3 I found the following normative text:
> > 
> >    Invoking BIND_CONN_TO_SESSION on a connection already associated
> > with
> >    the specified session has no effect, and the server MUST respond
> > with
> >    NFS4_OK, unless the client is demanding changes to the set of
> >    channels the connection is associated with.  If so, the server
> > MUST
> >    return NFS4ERR_INVAL.
> > 
> > 
> > IOW: it sounds like your server isn't following the spec either.
> 
> Thank you thank you! I missed that. Ok so we'll assume it does and I
> will try to come up with what you are suggesting...
> 
> Do we need to do reorder the connection list? After we reset the
> connection, it will be on some random connection on the list of
> connections. But for the NO_ROBIN connections we pick the first
> connection in the list (which will not be the backchannel
> connection).
> Does it matter?

That's why I suggested the rpc_task_close_connection() wrapper below.
It should ensure that we close the same connection that got the
NFS4ERR_INVAL, and then when we restart the RPC call (using
rpc_restart_call_prepare()) it will just automatically create a new
connection on that same struct xprt...

> 
> > > > Hmm... Is there any reason why we can't just add a handler to
> > > > nfs4_bind_one_conn_to_session_done() that intercepts
> > > > NFS4ERR_INVAL,
> > > > and
> > > > disconnects the xprt before retrying?
> > > > We should probably add a wrapper to xprt_force_disconnect() in
> > > > include/linux/sunrpc/clnt.h. Something like the following?
> > > > 
> > > > 
> > > > static inline void rpc_task_close_connection(struct rpc_task
> > > > *task)
> > > > {
> > > >         if (task->tk_xprt)
> > > >                 xprt_force_disconnect(task->tk_xprt);
> > > > }
> > > > 
> >
Olga Kornievskaia April 17, 2020, 7:06 p.m. UTC | #8
On Fri, Apr 17, 2020 at 2:41 PM Trond Myklebust <trondmy@hammerspace.com> wrote:
>
> On Fri, 2020-04-17 at 14:08 -0400, Olga Kornievskaia wrote:
> > On Fri, Apr 17, 2020 at 12:53 PM Trond Myklebust
> > <trondmy@hammerspace.com> wrote:
> > > On Fri, 2020-04-17 at 12:46 -0400, Olga Kornievskaia wrote:
> > > > On Fri, Apr 17, 2020 at 12:20 PM Trond Myklebust
> > > > <trondmy@hammerspace.com> wrote:
> > > > > On Fri, 2020-04-17 at 11:43 -0400, Olga Kornievskaia wrote:
> > > > > > Hi Trond,
> > > > > >
> > > > > > On Fri, Apr 17, 2020 at 11:31 AM Trond Myklebust
> > > > > > <trondmy@hammerspace.com> wrote:
> > > > > > > Hi Olga,
> > > > > > >
> > > > > > > On Fri, 2020-04-17 at 11:15 -0400, Olga Kornievskaia wrote:
> > > > > > > > When nconnect is used, SEQUENCE operation currently isn't
> > > > > > > > bound
> > > > > > > > to
> > > > > > > > a particular transport. The problem is created on an idle
> > > > > > > > mount,
> > > > > > > > where SEQUENCE is the only operation being sent and
> > > > > > > > opened
> > > > > > > > TPC
> > > > > > > > connections are slowly being close from the lack of use.
> > > > > > > > If
> > > > > > > > SEQUENCE
> > > > > > > > is not assigned to the main connection, the main
> > > > > > > > connection
> > > > > > > > can
> > > > > > > > be closed and with that so is the back channel bound to
> > > > > > > > that
> > > > > > > > connection.
> > > > > > > >
> > > > > > > > Since the only way client handles callback_path down is
> > > > > > > > by
> > > > > > > > sending
> > > > > > > > BIND_CONN_TO_SESSION requesting to bind both backchannel
> > > > > > > > and
> > > > > > > > fore
> > > > > > > > channel on the connection that was left going, but that
> > > > > > > > connection
> > > > > > > > was already bound to only forechannel. According to the
> > > > > > > > spec,
> > > > > > > > it's
> > > > > > > > not allowed to change channel binding after they are
> > > > > > > > done.
> > > > > > > >
> > > > > > > > The fix is to make sure that a lone SEQUENCE always goes
> > > > > > > > on
> > > > > > > > the
> > > > > > > > main connection, keeping backchannel alive.
> > > > > > > >
> > > > > > > > Fixes: 5a0c257f8 ("NFS: send state management on a single
> > > > > > > > connection")
> > > > > > > > Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> > > > > > > > ---
> > > > > > > >  fs/nfs/nfs4proc.c | 2 +-
> > > > > > > >  1 file changed, 1 insertion(+), 1 deletion(-)
> > > > > > > >
> > > > > > > > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> > > > > > > > index 99e9f2e..461f85d 100644
> > > > > > > > --- a/fs/nfs/nfs4proc.c
> > > > > > > > +++ b/fs/nfs/nfs4proc.c
> > > > > > > > @@ -8857,7 +8857,7 @@ static struct rpc_task
> > > > > > > > *_nfs41_proc_sequence(struct nfs_client *clp,
> > > > > > > >               .rpc_client = clp->cl_rpcclient,
> > > > > > > >               .rpc_message = &msg,
> > > > > > > >               .callback_ops = &nfs41_sequence_ops,
> > > > > > > > -             .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT,
> > > > > > > > +             .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT
> > > > > > > > |
> > > > > > > > RPC_TASK_NO_ROUND_ROBIN,
> > > > > > > >       };
> > > > > > > >       struct rpc_task *ret;
> > > > > > > >
> > > > > > >
> > > > > > > This works only in the case where the client is only
> > > > > > > sending
> > > > > > > SEQUENCE
> > > > > > > instructions. There are other cases where it could be
> > > > > > > sending
> > > > > > > out
> > > > > > > other
> > > > > > > operations that also renew the lease, but is doing it very
> > > > > > > infrequently. Won't that also run into the same problem?
> > > > > >
> > > > > > Hm... I see so main channel can still go idle and close, when
> > > > > > infrequent operations are happening on the other connections
> > > > > > before
> > > > > > round-robin-ing to the main connection....
> > > > > >
> > > > > > > Is the fundamental problem here that we're not handling the
> > > > > > > SEQ4_STATUS_CB_PATH_DOWN / SEQ4_STATUS_CB_PATH_DOWN_SESSION
> > > > > > > flags
> > > > > > > correctly or is there something else going on?
> > > > > >
> > > > > > Yes the client doesn't recover properly. But the fix wasn't
> > > > > > trivial
> > > > > > to
> > > > > > me (so I thought my patch was enough but I see it's not). Say
> > > > > > client
> > > > > > shuts down the main connection because it was idle. Now
> > > > > > whatever
> > > > > > operations goes on a different connection is going to get
> > > > > > callback
> > > > > > down. The only way the client can create a new backchannel
> > > > > > (according
> > > > > > to the spec) is if it creates a brand new connection and
> > > > > > sends
> > > > > > BIND_CONN_TO_SESSION there (all existing connections are
> > > > > > already
> > > > > > bound
> > > > > > to fore channel and according to the spec you can't modify
> > > > > > the
> > > > > > existing binding). But then we'd need to make sure that it's
> > > > > > the
> > > > > > first
> > > > > > one in the list of connections we iterate thru (as i think
> > > > > > 1st
> > > > > > marks
> > > > > > the main connection?) as the other operations that supposed
> > > > > > to
> > > > > > only
> > > > > > go
> > > > > > on main connection need to know which connection to pick.
> > > > > >
> > > > > > The reason it's not seen against linux is because it doesn't
> > > > > > follow
> > > > > > the spec is doesn't reject attempts to bind a backchannel to
> > > > > > an
> > > > > > already existing connection that was only bound for fore
> > > > > > channel.
> > > > > >
> > > > >
> > > > > Oh, I see. So the server is replying NFS4ERR_INVAL in order to
> > > > > let
> > > > > the
> > > > > client know that it is trying to change the channel bindings
> > > > > for
> > > > > that
> > > > > connection.
> > > >
> > > > Well server isn't failing because client is asking for
> > > > FORE_OR_BOTH
> > > > and it's a choice so server is returning FORE. I'm not sure we
> > > > can
> > > > ask
> > > > the server to fail the request with ERR_INVAL.... (rather I can
> > > > ask
> > > > but ) rather can we expect the server to do that always?
> > >
> > > In RFC5661, Section 18.34.3 I found the following normative text:
> > >
> > >    Invoking BIND_CONN_TO_SESSION on a connection already associated
> > > with
> > >    the specified session has no effect, and the server MUST respond
> > > with
> > >    NFS4_OK, unless the client is demanding changes to the set of
> > >    channels the connection is associated with.  If so, the server
> > > MUST
> > >    return NFS4ERR_INVAL.
> > >
> > >
> > > IOW: it sounds like your server isn't following the spec either.
> >
> > Thank you thank you! I missed that. Ok so we'll assume it does and I
> > will try to come up with what you are suggesting...
> >
> > Do we need to do reorder the connection list? After we reset the
> > connection, it will be on some random connection on the list of
> > connections. But for the NO_ROBIN connections we pick the first
> > connection in the list (which will not be the backchannel
> > connection).
> > Does it matter?
>
> That's why I suggested the rpc_task_close_connection() wrapper below.
> It should ensure that we close the same connection that got the
> NFS4ERR_INVAL, and then when we restart the RPC call (using
> rpc_restart_call_prepare()) it will just automatically create a new
> connection on that same struct xprt...

Ok let me try with an example.
Start with:
connection1 (first one on the list) (also the backchannel connection)
connection2
connection3

1. connection1 and 2 go idle and are stopped. Now backchannel is down.
2. connection3 sends a SEQUENCE (or whatever op) and gets callback_path down
3. client send BIND_CONN_TO_SESSION on connection3. gets ERR_INVAL.
Client resets the connection3 and sends new BIND_CONN_TO_SESSION and
now connection3 is our backchannel and fore channel. But connection3
is not the first one the list of transports
4. state management operations will pick connection1 out of the list
(but it's not a backchannel connection).

Is that a problem?

>
> >
> > > > > Hmm... Is there any reason why we can't just add a handler to
> > > > > nfs4_bind_one_conn_to_session_done() that intercepts
> > > > > NFS4ERR_INVAL,
> > > > > and
> > > > > disconnects the xprt before retrying?
> > > > > We should probably add a wrapper to xprt_force_disconnect() in
> > > > > include/linux/sunrpc/clnt.h. Something like the following?
> > > > >
> > > > >
> > > > > static inline void rpc_task_close_connection(struct rpc_task
> > > > > *task)
> > > > > {
> > > > >         if (task->tk_xprt)
> > > > >                 xprt_force_disconnect(task->tk_xprt);
> > > > > }
> > > > >
> > >
>
> --
> Trond Myklebust
> Linux NFS client maintainer, Hammerspace
> trond.myklebust@hammerspace.com
>
>
Trond Myklebust April 17, 2020, 7:24 p.m. UTC | #9
On Fri, 2020-04-17 at 15:06 -0400, Olga Kornievskaia wrote:
> On Fri, Apr 17, 2020 at 2:41 PM Trond Myklebust <
> trondmy@hammerspace.com> wrote:
> > On Fri, 2020-04-17 at 14:08 -0400, Olga Kornievskaia wrote:
> > > On Fri, Apr 17, 2020 at 12:53 PM Trond Myklebust
> > > <trondmy@hammerspace.com> wrote:
> > > > On Fri, 2020-04-17 at 12:46 -0400, Olga Kornievskaia wrote:
> > > > > On Fri, Apr 17, 2020 at 12:20 PM Trond Myklebust
> > > > > <trondmy@hammerspace.com> wrote:
> > > > > > On Fri, 2020-04-17 at 11:43 -0400, Olga Kornievskaia wrote:
> > > > > > > Hi Trond,
> > > > > > > 
> > > > > > > On Fri, Apr 17, 2020 at 11:31 AM Trond Myklebust
> > > > > > > <trondmy@hammerspace.com> wrote:
> > > > > > > > Hi Olga,
> > > > > > > > 
> > > > > > > > On Fri, 2020-04-17 at 11:15 -0400, Olga Kornievskaia
> > > > > > > > wrote:
> > > > > > > > > When nconnect is used, SEQUENCE operation currently
> > > > > > > > > isn't
> > > > > > > > > bound
> > > > > > > > > to
> > > > > > > > > a particular transport. The problem is created on an
> > > > > > > > > idle
> > > > > > > > > mount,
> > > > > > > > > where SEQUENCE is the only operation being sent and
> > > > > > > > > opened
> > > > > > > > > TPC
> > > > > > > > > connections are slowly being close from the lack of
> > > > > > > > > use.
> > > > > > > > > If
> > > > > > > > > SEQUENCE
> > > > > > > > > is not assigned to the main connection, the main
> > > > > > > > > connection
> > > > > > > > > can
> > > > > > > > > be closed and with that so is the back channel bound
> > > > > > > > > to
> > > > > > > > > that
> > > > > > > > > connection.
> > > > > > > > > 
> > > > > > > > > Since the only way client handles callback_path down
> > > > > > > > > is
> > > > > > > > > by
> > > > > > > > > sending
> > > > > > > > > BIND_CONN_TO_SESSION requesting to bind both
> > > > > > > > > backchannel
> > > > > > > > > and
> > > > > > > > > fore
> > > > > > > > > channel on the connection that was left going, but
> > > > > > > > > that
> > > > > > > > > connection
> > > > > > > > > was already bound to only forechannel. According to
> > > > > > > > > the
> > > > > > > > > spec,
> > > > > > > > > it's
> > > > > > > > > not allowed to change channel binding after they are
> > > > > > > > > done.
> > > > > > > > > 
> > > > > > > > > The fix is to make sure that a lone SEQUENCE always
> > > > > > > > > goes
> > > > > > > > > on
> > > > > > > > > the
> > > > > > > > > main connection, keeping backchannel alive.
> > > > > > > > > 
> > > > > > > > > Fixes: 5a0c257f8 ("NFS: send state management on a
> > > > > > > > > single
> > > > > > > > > connection")
> > > > > > > > > Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> > > > > > > > > ---
> > > > > > > > >  fs/nfs/nfs4proc.c | 2 +-
> > > > > > > > >  1 file changed, 1 insertion(+), 1 deletion(-)
> > > > > > > > > 
> > > > > > > > > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> > > > > > > > > index 99e9f2e..461f85d 100644
> > > > > > > > > --- a/fs/nfs/nfs4proc.c
> > > > > > > > > +++ b/fs/nfs/nfs4proc.c
> > > > > > > > > @@ -8857,7 +8857,7 @@ static struct rpc_task
> > > > > > > > > *_nfs41_proc_sequence(struct nfs_client *clp,
> > > > > > > > >               .rpc_client = clp->cl_rpcclient,
> > > > > > > > >               .rpc_message = &msg,
> > > > > > > > >               .callback_ops = &nfs41_sequence_ops,
> > > > > > > > > -             .flags = RPC_TASK_ASYNC |
> > > > > > > > > RPC_TASK_TIMEOUT,
> > > > > > > > > +             .flags = RPC_TASK_ASYNC |
> > > > > > > > > RPC_TASK_TIMEOUT
> > > > > > > > > RPC_TASK_NO_ROUND_ROBIN,
> > > > > > > > >       };
> > > > > > > > >       struct rpc_task *ret;
> > > > > > > > > 
> > > > > > > > 
> > > > > > > > This works only in the case where the client is only
> > > > > > > > sending
> > > > > > > > SEQUENCE
> > > > > > > > instructions. There are other cases where it could be
> > > > > > > > sending
> > > > > > > > out
> > > > > > > > other
> > > > > > > > operations that also renew the lease, but is doing it
> > > > > > > > very
> > > > > > > > infrequently. Won't that also run into the same
> > > > > > > > problem?
> > > > > > > 
> > > > > > > Hm... I see so main channel can still go idle and close,
> > > > > > > when
> > > > > > > infrequent operations are happening on the other
> > > > > > > connections
> > > > > > > before
> > > > > > > round-robin-ing to the main connection....
> > > > > > > 
> > > > > > > > Is the fundamental problem here that we're not handling
> > > > > > > > the
> > > > > > > > SEQ4_STATUS_CB_PATH_DOWN /
> > > > > > > > SEQ4_STATUS_CB_PATH_DOWN_SESSION
> > > > > > > > flags
> > > > > > > > correctly or is there something else going on?
> > > > > > > 
> > > > > > > Yes the client doesn't recover properly. But the fix
> > > > > > > wasn't
> > > > > > > trivial
> > > > > > > to
> > > > > > > me (so I thought my patch was enough but I see it's not).
> > > > > > > Say
> > > > > > > client
> > > > > > > shuts down the main connection because it was idle. Now
> > > > > > > whatever
> > > > > > > operations goes on a different connection is going to get
> > > > > > > callback
> > > > > > > down. The only way the client can create a new
> > > > > > > backchannel
> > > > > > > (according
> > > > > > > to the spec) is if it creates a brand new connection and
> > > > > > > sends
> > > > > > > BIND_CONN_TO_SESSION there (all existing connections are
> > > > > > > already
> > > > > > > bound
> > > > > > > to fore channel and according to the spec you can't
> > > > > > > modify
> > > > > > > the
> > > > > > > existing binding). But then we'd need to make sure that
> > > > > > > it's
> > > > > > > the
> > > > > > > first
> > > > > > > one in the list of connections we iterate thru (as i
> > > > > > > think
> > > > > > > 1st
> > > > > > > marks
> > > > > > > the main connection?) as the other operations that
> > > > > > > supposed
> > > > > > > to
> > > > > > > only
> > > > > > > go
> > > > > > > on main connection need to know which connection to pick.
> > > > > > > 
> > > > > > > The reason it's not seen against linux is because it
> > > > > > > doesn't
> > > > > > > follow
> > > > > > > the spec is doesn't reject attempts to bind a backchannel
> > > > > > > to
> > > > > > > an
> > > > > > > already existing connection that was only bound for fore
> > > > > > > channel.
> > > > > > > 
> > > > > > 
> > > > > > Oh, I see. So the server is replying NFS4ERR_INVAL in order
> > > > > > to
> > > > > > let
> > > > > > the
> > > > > > client know that it is trying to change the channel
> > > > > > bindings
> > > > > > for
> > > > > > that
> > > > > > connection.
> > > > > 
> > > > > Well server isn't failing because client is asking for
> > > > > FORE_OR_BOTH
> > > > > and it's a choice so server is returning FORE. I'm not sure
> > > > > we
> > > > > can
> > > > > ask
> > > > > the server to fail the request with ERR_INVAL.... (rather I
> > > > > can
> > > > > ask
> > > > > but ) rather can we expect the server to do that always?
> > > > 
> > > > In RFC5661, Section 18.34.3 I found the following normative
> > > > text:
> > > > 
> > > >    Invoking BIND_CONN_TO_SESSION on a connection already
> > > > associated
> > > > with
> > > >    the specified session has no effect, and the server MUST
> > > > respond
> > > > with
> > > >    NFS4_OK, unless the client is demanding changes to the set
> > > > of
> > > >    channels the connection is associated with.  If so, the
> > > > server
> > > > MUST
> > > >    return NFS4ERR_INVAL.
> > > > 
> > > > 
> > > > IOW: it sounds like your server isn't following the spec
> > > > either.
> > > 
> > > Thank you thank you! I missed that. Ok so we'll assume it does
> > > and I
> > > will try to come up with what you are suggesting...
> > > 
> > > Do we need to do reorder the connection list? After we reset the
> > > connection, it will be on some random connection on the list of
> > > connections. But for the NO_ROBIN connections we pick the first
> > > connection in the list (which will not be the backchannel
> > > connection).
> > > Does it matter?
> > 
> > That's why I suggested the rpc_task_close_connection() wrapper
> > below.
> > It should ensure that we close the same connection that got the
> > NFS4ERR_INVAL, and then when we restart the RPC call (using
> > rpc_restart_call_prepare()) it will just automatically create a new
> > connection on that same struct xprt...
> 
> Ok let me try with an example.
> Start with:
> connection1 (first one on the list) (also the backchannel connection)
> connection2
> connection3
> 
> 1. connection1 and 2 go idle and are stopped. Now backchannel is
> down.
> 2. connection3 sends a SEQUENCE (or whatever op) and gets
> callback_path down
> 3. client send BIND_CONN_TO_SESSION on connection3. gets ERR_INVAL.
> Client resets the connection3 and sends new BIND_CONN_TO_SESSION and
> now connection3 is our backchannel and fore channel. But connection3
> is not the first one the list of transports
> 4. state management operations will pick connection1 out of the list
> (but it's not a backchannel connection).
> 
> Is that a problem?
> 

Right now it shouldn't be a problem because
nfs4_proc_bind_conn_to_session() actually iterates through all the
connections and sends a BIND_CONN_TO_SESSION on each one.

It only sets NFS4_CDFC4_FORE_OR_BOTH if the session flags indicate a
back channel is required, and if this is the first connection in the
list (i.e. if xprt == clnt->cl_xprt).

So as far as I can tell, we should actually already be OK. The client
should consistently be setting the same flags on the same struct xprts
every time there is a new connection.
As far as I can tell, we should therefore actually never be seeing any
NFS4ERR_INVAL cases but only be hitting the case that the spec allows.
That said, it can't harm to have a handler for it.

> > > > > > Hmm... Is there any reason why we can't just add a handler
> > > > > > to
> > > > > > nfs4_bind_one_conn_to_session_done() that intercepts
> > > > > > NFS4ERR_INVAL,
> > > > > > and
> > > > > > disconnects the xprt before retrying?
> > > > > > We should probably add a wrapper to xprt_force_disconnect()
> > > > > > in
> > > > > > include/linux/sunrpc/clnt.h. Something like the following?
> > > > > > 
> > > > > > 
> > > > > > static inline void rpc_task_close_connection(struct
> > > > > > rpc_task
> > > > > > *task)
> > > > > > {
> > > > > >         if (task->tk_xprt)
> > > > > >                 xprt_force_disconnect(task->tk_xprt);
> > > > > > }
> > > > > >
Trond Myklebust April 20, 2020, 2:11 p.m. UTC | #10
On Mon, 2020-04-20 at 09:58 -0400, Olga Kornievskaia wrote:
> 
> 
> On Fri, Apr 17, 2020 at 12:53 PM Trond Myklebust <
> trondmy@hammerspace.com> wrote:
> > On Fri, 2020-04-17 at 12:46 -0400, Olga Kornievskaia wrote:
> > > On Fri, Apr 17, 2020 at 12:20 PM Trond Myklebust
> > > <trondmy@hammerspace.com> wrote:
> > > > On Fri, 2020-04-17 at 11:43 -0400, Olga Kornievskaia wrote:
> > > > > Hi Trond,
> > > > > 
> > > > > On Fri, Apr 17, 2020 at 11:31 AM Trond Myklebust
> > > > > <trondmy@hammerspace.com> wrote:
> > > > > > Hi Olga,
> > > > > > 
> > > > > > On Fri, 2020-04-17 at 11:15 -0400, Olga Kornievskaia wrote:
> > > > > > > When nconnect is used, SEQUENCE operation currently isn't
> > > > > > > bound
> > > > > > > to
> > > > > > > a particular transport. The problem is created on an idle
> > > > > > > mount,
> > > > > > > where SEQUENCE is the only operation being sent and
> > opened
> > > > > > > TPC
> > > > > > > connections are slowly being close from the lack of use.
> > If
> > > > > > > SEQUENCE
> > > > > > > is not assigned to the main connection, the main
> > connection
> > > > > > > can
> > > > > > > be closed and with that so is the back channel bound to
> > that
> > > > > > > connection.
> > > > > > > 
> > > > > > > Since the only way client handles callback_path down is
> > by
> > > > > > > sending
> > > > > > > BIND_CONN_TO_SESSION requesting to bind both backchannel
> > and
> > > > > > > fore
> > > > > > > channel on the connection that was left going, but that
> > > > > > > connection
> > > > > > > was already bound to only forechannel. According to the
> > spec,
> > > > > > > it's
> > > > > > > not allowed to change channel binding after they are
> > done.
> > > > > > > 
> > > > > > > The fix is to make sure that a lone SEQUENCE always goes
> > on
> > > > > > > the
> > > > > > > main connection, keeping backchannel alive.
> > > > > > > 
> > > > > > > Fixes: 5a0c257f8 ("NFS: send state management on a single
> > > > > > > connection")
> > > > > > > Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> > > > > > > ---
> > > > > > >  fs/nfs/nfs4proc.c | 2 +-
> > > > > > >  1 file changed, 1 insertion(+), 1 deletion(-)
> > > > > > > 
> > > > > > > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> > > > > > > index 99e9f2e..461f85d 100644
> > > > > > > --- a/fs/nfs/nfs4proc.c
> > > > > > > +++ b/fs/nfs/nfs4proc.c
> > > > > > > @@ -8857,7 +8857,7 @@ static struct rpc_task
> > > > > > > *_nfs41_proc_sequence(struct nfs_client *clp,
> > > > > > >               .rpc_client = clp->cl_rpcclient,
> > > > > > >               .rpc_message = &msg,
> > > > > > >               .callback_ops = &nfs41_sequence_ops,
> > > > > > > -             .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT,
> > > > > > > +             .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT
> > |
> > > > > > > RPC_TASK_NO_ROUND_ROBIN,
> > > > > > >       };
> > > > > > >       struct rpc_task *ret;
> > > > > > > 
> > > > > > 
> > > > > > This works only in the case where the client is only
> > sending
> > > > > > SEQUENCE
> > > > > > instructions. There are other cases where it could be
> > sending
> > > > > > out
> > > > > > other
> > > > > > operations that also renew the lease, but is doing it very
> > > > > > infrequently. Won't that also run into the same problem?
> > > > > 
> > > > > Hm... I see so main channel can still go idle and close, when
> > > > > infrequent operations are happening on the other connections
> > > > > before
> > > > > round-robin-ing to the main connection....
> > > > > 
> > > > > > Is the fundamental problem here that we're not handling the
> > > > > > SEQ4_STATUS_CB_PATH_DOWN / SEQ4_STATUS_CB_PATH_DOWN_SESSION
> > > > > > flags
> > > > > > correctly or is there something else going on?
> > > > > 
> > > > > Yes the client doesn't recover properly. But the fix wasn't
> > > > > trivial
> > > > > to
> > > > > me (so I thought my patch was enough but I see it's not). Say
> > > > > client
> > > > > shuts down the main connection because it was idle. Now
> > whatever
> > > > > operations goes on a different connection is going to get
> > > > > callback
> > > > > down. The only way the client can create a new backchannel
> > > > > (according
> > > > > to the spec) is if it creates a brand new connection and
> > sends
> > > > > BIND_CONN_TO_SESSION there (all existing connections are
> > already
> > > > > bound
> > > > > to fore channel and according to the spec you can't modify
> > the
> > > > > existing binding). But then we'd need to make sure that it's
> > the
> > > > > first
> > > > > one in the list of connections we iterate thru (as i think
> > 1st
> > > > > marks
> > > > > the main connection?) as the other operations that supposed
> > to
> > > > > only
> > > > > go
> > > > > on main connection need to know which connection to pick.
> > > > > 
> > > > > The reason it's not seen against linux is because it doesn't
> > > > > follow
> > > > > the spec is doesn't reject attempts to bind a backchannel to
> > an
> > > > > already existing connection that was only bound for fore
> > channel.
> > > > > 
> > > > 
> > > > Oh, I see. So the server is replying NFS4ERR_INVAL in order to
> > let
> > > > the
> > > > client know that it is trying to change the channel bindings
> > for
> > > > that
> > > > connection.
> > > 
> > > Well server isn't failing because client is asking for
> > FORE_OR_BOTH
> > > and it's a choice so server is returning FORE. I'm not sure we
> > can
> > > ask
> > > the server to fail the request with ERR_INVAL.... (rather I can
> > ask
> > > but ) rather can we expect the server to do that always?
> > 
> > In RFC5661, Section 18.34.3 I found the following normative text:
> > 
> >    Invoking BIND_CONN_TO_SESSION on a connection already associated
> > with
> >    the specified session has no effect, and the server MUST respond
> > with
> >    NFS4_OK, unless the client is demanding changes to the set of
> >    channels the connection is associated with.  If so, the server
> > MUST
> >    return NFS4ERR_INVAL.
> > 
> > 
> > IOW: it sounds like your server isn't following the spec either. 
Trond Myklebust April 20, 2020, 7:02 p.m. UTC | #11
On Mon, 2020-04-20 at 10:59 -0400, Olga Kornievskaia wrote:
> 
> 
> On Mon, Apr 20, 2020 at 10:53 AM Olga Kornievskaia <
> olga.kornievskaia@gmail.com> wrote:
> > 
> > Yes we are consistent in requesting to same connection to with the
> > same channel binding, but we don't send BIND_CONN_TO_SESSION as the
> > first thing on the "main" connection (ie connection that cared the
> > CREATE_SESSION and was bound to fore and back channel by default).
> > When that connection is reset, the first thing that happens is the
> > client re-sends the operation that was not replied to. That has a
> > SEQUENCE and by the rule the server binds that connection to the
> > fore channel only (and sets the callback being down). We then send
> > BIND_CONN_TO_SESSION and request FORE_OR_BOTH where this has
> > already been bound to FORE only.
> > 
> 
> 
> How about this: before we send BIND_CONN_TO_SESSION with
> fore_or_both, we somehow always reset the connection (maybe you were
> suggestion that already and i wasn't following).

No. I didn't realise that we were being automatically set to just the
fore channel. However as I said earlier, the spec says that the server
MUST reply with NFS4ERR_INVAL in this case. It is not allowed to just
return NFS4_OK and silently set the wrong channel binding.

On the client we should probably do something to track whether or not
the backchannel has been lost due to connection breakage. We probably
need to allow the client to check the xprt->connect_cookie to find out
if the connection broke.

> i don't think this is going to the list as i'm getting auto
> rejections emails but i don't know how to fix it.

You need to turn off HTML mail.
Olga Kornievskaia April 20, 2020, 7:35 p.m. UTC | #12
On Mon, Apr 20, 2020 at 3:02 PM Trond Myklebust <trondmy@hammerspace.com> wrote:
>
> On Mon, 2020-04-20 at 10:59 -0400, Olga Kornievskaia wrote:
> >
> >
> > On Mon, Apr 20, 2020 at 10:53 AM Olga Kornievskaia <
> > olga.kornievskaia@gmail.com> wrote:
> > >
> > > Yes we are consistent in requesting to same connection to with the
> > > same channel binding, but we don't send BIND_CONN_TO_SESSION as the
> > > first thing on the "main" connection (ie connection that cared the
> > > CREATE_SESSION and was bound to fore and back channel by default).
> > > When that connection is reset, the first thing that happens is the
> > > client re-sends the operation that was not replied to. That has a
> > > SEQUENCE and by the rule the server binds that connection to the
> > > fore channel only (and sets the callback being down). We then send
> > > BIND_CONN_TO_SESSION and request FORE_OR_BOTH where this has
> > > already been bound to FORE only.
> > >
> >
> >
> > How about this: before we send BIND_CONN_TO_SESSION with
> > fore_or_both, we somehow always reset the connection (maybe you were
> > suggestion that already and i wasn't following).
>
> No. I didn't realise that we were being automatically set to just the
> fore channel. However as I said earlier, the spec says that the server
> MUST reply with NFS4ERR_INVAL in this case. It is not allowed to just
> return NFS4_OK and silently set the wrong channel binding.

How's this:
client sends BIND_CONN_TO_SESSION with FORE_OR_BOTH, server select
FORE channel. connection breaks before the reply gets to the client.
Client resends. Is the server suppose to now fail with ERR_INVAL?

There actually is such a thing between demand and request. FORE and
BACK are demands and FORE_OR_BOTH and BACK_OR_BOTH are requests. The
spec writes only about demands and not the requests. I believe that's
intentional because BIND_CONN_TO_SESSIOn doesn't have a sequence and
not protected by reply session semantics.

> On the client we should probably do something to track whether or not
> the backchannel has been lost due to connection breakage. We probably
> need to allow the client to check the xprt->connect_cookie to find out
> if the connection broke.
>
> > i don't think this is going to the list as i'm getting auto
> > rejections emails but i don't know how to fix it.
>
> You need to turn off HTML mail.

hm.. google tells me it's plain text mode for the message... so i'm not sure.

>
> --
> Trond Myklebust
> Linux NFS client maintainer, Hammerspace
> trond.myklebust@hammerspace.com
>
>
Trond Myklebust April 20, 2020, 9:20 p.m. UTC | #13
On Mon, 2020-04-20 at 15:35 -0400, Olga Kornievskaia wrote:
> On Mon, Apr 20, 2020 at 3:02 PM Trond Myklebust <
> trondmy@hammerspace.com> wrote:
> > On Mon, 2020-04-20 at 10:59 -0400, Olga Kornievskaia wrote:
> > > 
> > > On Mon, Apr 20, 2020 at 10:53 AM Olga Kornievskaia <
> > > olga.kornievskaia@gmail.com> wrote:
> > > > Yes we are consistent in requesting to same connection to with
> > > > the
> > > > same channel binding, but we don't send BIND_CONN_TO_SESSION as
> > > > the
> > > > first thing on the "main" connection (ie connection that cared
> > > > the
> > > > CREATE_SESSION and was bound to fore and back channel by
> > > > default).
> > > > When that connection is reset, the first thing that happens is
> > > > the
> > > > client re-sends the operation that was not replied to. That has
> > > > a
> > > > SEQUENCE and by the rule the server binds that connection to
> > > > the
> > > > fore channel only (and sets the callback being down). We then
> > > > send
> > > > BIND_CONN_TO_SESSION and request FORE_OR_BOTH where this has
> > > > already been bound to FORE only.
> > > > 
> > > 
> > > How about this: before we send BIND_CONN_TO_SESSION with
> > > fore_or_both, we somehow always reset the connection (maybe you
> > > were
> > > suggestion that already and i wasn't following).
> > 
> > No. I didn't realise that we were being automatically set to just
> > the
> > fore channel. However as I said earlier, the spec says that the
> > server
> > MUST reply with NFS4ERR_INVAL in this case. It is not allowed to
> > just
> > return NFS4_OK and silently set the wrong channel binding.
> 
> How's this:
> client sends BIND_CONN_TO_SESSION with FORE_OR_BOTH, server select
> FORE channel. connection breaks before the reply gets to the client.
> Client resends. Is the server suppose to now fail with ERR_INVAL?
> 
> There actually is such a thing between demand and request. FORE and
> BACK are demands and FORE_OR_BOTH and BACK_OR_BOTH are requests. The
> spec writes only about demands and not the requests. I believe that's
> intentional because BIND_CONN_TO_SESSIOn doesn't have a sequence and
> not protected by reply session semantics.

OK. However if we take that interpretation, then the question is why
would the server downgrade our FORE_OR_BOTH to FORE and what is the
point of the client even retrying at all in that case?

The server can always reject the client's back channel creation
attempt, but doing so has consequences: it means there is no way to
hand out delegations or layouts. So I'm confused by the concept of a
server that sets SEQ4_STATUS_CB_PATH_DOWN or
SEQ4_STATUS_CB_PATH_DOWN_SESSION, but then doesn't allow the client to
set a back channel.
Olga Kornievskaia April 21, 2020, 7:47 p.m. UTC | #14
On Mon, Apr 20, 2020 at 5:20 PM Trond Myklebust <trondmy@hammerspace.com> wrote:
>
> On Mon, 2020-04-20 at 15:35 -0400, Olga Kornievskaia wrote:
> > On Mon, Apr 20, 2020 at 3:02 PM Trond Myklebust <
> > trondmy@hammerspace.com> wrote:
> > > On Mon, 2020-04-20 at 10:59 -0400, Olga Kornievskaia wrote:
> > > >
> > > > On Mon, Apr 20, 2020 at 10:53 AM Olga Kornievskaia <
> > > > olga.kornievskaia@gmail.com> wrote:
> > > > > Yes we are consistent in requesting to same connection to with
> > > > > the
> > > > > same channel binding, but we don't send BIND_CONN_TO_SESSION as
> > > > > the
> > > > > first thing on the "main" connection (ie connection that cared
> > > > > the
> > > > > CREATE_SESSION and was bound to fore and back channel by
> > > > > default).
> > > > > When that connection is reset, the first thing that happens is
> > > > > the
> > > > > client re-sends the operation that was not replied to. That has
> > > > > a
> > > > > SEQUENCE and by the rule the server binds that connection to
> > > > > the
> > > > > fore channel only (and sets the callback being down). We then
> > > > > send
> > > > > BIND_CONN_TO_SESSION and request FORE_OR_BOTH where this has
> > > > > already been bound to FORE only.
> > > > >
> > > >
> > > > How about this: before we send BIND_CONN_TO_SESSION with
> > > > fore_or_both, we somehow always reset the connection (maybe you
> > > > were
> > > > suggestion that already and i wasn't following).
> > >
> > > No. I didn't realise that we were being automatically set to just
> > > the
> > > fore channel. However as I said earlier, the spec says that the
> > > server
> > > MUST reply with NFS4ERR_INVAL in this case. It is not allowed to
> > > just
> > > return NFS4_OK and silently set the wrong channel binding.
> >
> > How's this:
> > client sends BIND_CONN_TO_SESSION with FORE_OR_BOTH, server select
> > FORE channel. connection breaks before the reply gets to the client.
> > Client resends. Is the server suppose to now fail with ERR_INVAL?
> >
> > There actually is such a thing between demand and request. FORE and
> > BACK are demands and FORE_OR_BOTH and BACK_OR_BOTH are requests. The
> > spec writes only about demands and not the requests. I believe that's
> > intentional because BIND_CONN_TO_SESSIOn doesn't have a sequence and
> > not protected by reply session semantics.
>
> OK. However if we take that interpretation, then the question is why
> would the server downgrade our FORE_OR_BOTH to FORE and what is the
> point of the client even retrying at all in that case?

As far as I can tell, the client behaves improperly. It shouldn't have
sent an operation on a new connection before sending
BIND_CONN_TO_SESSION.

> The server can always reject the client's back channel creation
> attempt, but doing so has consequences: it means there is no way to
> hand out delegations or layouts. So I'm confused by the concept of a
> server that sets SEQ4_STATUS_CB_PATH_DOWN or
> SEQ4_STATUS_CB_PATH_DOWN_SESSION, but then doesn't allow the client to
> set a back channel.

Because we can't guarantee that BIND_CONN_TO_SESSION happens as the
first operation, I think the solution is for the transport that will
do FORE_OR_BOTH request, the client must reset the connection first?
Would that be acceptable?


>
> --
> Trond Myklebust
> Linux NFS client maintainer, Hammerspace
> trond.myklebust@hammerspace.com
>
>
Trond Myklebust April 21, 2020, 9:20 p.m. UTC | #15
On Tue, 2020-04-21 at 15:47 -0400, Olga Kornievskaia wrote:
> On Mon, Apr 20, 2020 at 5:20 PM Trond Myklebust <
> trondmy@hammerspace.com> wrote:
> > On Mon, 2020-04-20 at 15:35 -0400, Olga Kornievskaia wrote:
> > > On Mon, Apr 20, 2020 at 3:02 PM Trond Myklebust <
> > > trondmy@hammerspace.com> wrote:
> > > > On Mon, 2020-04-20 at 10:59 -0400, Olga Kornievskaia wrote:
> > > > > On Mon, Apr 20, 2020 at 10:53 AM Olga Kornievskaia <
> > > > > olga.kornievskaia@gmail.com> wrote:
> > > > > > Yes we are consistent in requesting to same connection to
> > > > > > with
> > > > > > the
> > > > > > same channel binding, but we don't send
> > > > > > BIND_CONN_TO_SESSION as
> > > > > > the
> > > > > > first thing on the "main" connection (ie connection that
> > > > > > cared
> > > > > > the
> > > > > > CREATE_SESSION and was bound to fore and back channel by
> > > > > > default).
> > > > > > When that connection is reset, the first thing that happens
> > > > > > is
> > > > > > the
> > > > > > client re-sends the operation that was not replied to. That
> > > > > > has
> > > > > > a
> > > > > > SEQUENCE and by the rule the server binds that connection
> > > > > > to
> > > > > > the
> > > > > > fore channel only (and sets the callback being down). We
> > > > > > then
> > > > > > send
> > > > > > BIND_CONN_TO_SESSION and request FORE_OR_BOTH where this
> > > > > > has
> > > > > > already been bound to FORE only.
> > > > > > 
> > > > > 
> > > > > How about this: before we send BIND_CONN_TO_SESSION with
> > > > > fore_or_both, we somehow always reset the connection (maybe
> > > > > you
> > > > > were
> > > > > suggestion that already and i wasn't following).
> > > > 
> > > > No. I didn't realise that we were being automatically set to
> > > > just
> > > > the
> > > > fore channel. However as I said earlier, the spec says that the
> > > > server
> > > > MUST reply with NFS4ERR_INVAL in this case. It is not allowed
> > > > to
> > > > just
> > > > return NFS4_OK and silently set the wrong channel binding.
> > > 
> > > How's this:
> > > client sends BIND_CONN_TO_SESSION with FORE_OR_BOTH, server
> > > select
> > > FORE channel. connection breaks before the reply gets to the
> > > client.
> > > Client resends. Is the server suppose to now fail with ERR_INVAL?
> > > 
> > > There actually is such a thing between demand and request. FORE
> > > and
> > > BACK are demands and FORE_OR_BOTH and BACK_OR_BOTH are requests.
> > > The
> > > spec writes only about demands and not the requests. I believe
> > > that's
> > > intentional because BIND_CONN_TO_SESSIOn doesn't have a sequence
> > > and
> > > not protected by reply session semantics.
> > 
> > OK. However if we take that interpretation, then the question is
> > why
> > would the server downgrade our FORE_OR_BOTH to FORE and what is the
> > point of the client even retrying at all in that case?
> 
> As far as I can tell, the client behaves improperly. It shouldn't
> have
> sent an operation on a new connection before sending
> BIND_CONN_TO_SESSION.
> 
> > The server can always reject the client's back channel creation
> > attempt, but doing so has consequences: it means there is no way to
> > hand out delegations or layouts. So I'm confused by the concept of
> > a
> > server that sets SEQ4_STATUS_CB_PATH_DOWN or
> > SEQ4_STATUS_CB_PATH_DOWN_SESSION, but then doesn't allow the client
> > to
> > set a back channel.
> 
> Because we can't guarantee that BIND_CONN_TO_SESSION happens as the
> first operation, I think the solution is for the transport that will
> do FORE_OR_BOTH request, the client must reset the connection first?
> Would that be acceptable?
> 

It sounds like we have no choice. However in that case, we should not
try to recover in the case where the attempt to bind the backchannel
fails.
Olga Kornievskaia April 23, 2020, 9:33 p.m. UTC | #16
On Tue, Apr 21, 2020 at 5:20 PM Trond Myklebust <trondmy@hammerspace.com> wrote:
>
> On Tue, 2020-04-21 at 15:47 -0400, Olga Kornievskaia wrote:
> > On Mon, Apr 20, 2020 at 5:20 PM Trond Myklebust <
> > trondmy@hammerspace.com> wrote:
> > > On Mon, 2020-04-20 at 15:35 -0400, Olga Kornievskaia wrote:
> > > > On Mon, Apr 20, 2020 at 3:02 PM Trond Myklebust <
> > > > trondmy@hammerspace.com> wrote:
> > > > > On Mon, 2020-04-20 at 10:59 -0400, Olga Kornievskaia wrote:
> > > > > > On Mon, Apr 20, 2020 at 10:53 AM Olga Kornievskaia <
> > > > > > olga.kornievskaia@gmail.com> wrote:
> > > > > > > Yes we are consistent in requesting to same connection to
> > > > > > > with
> > > > > > > the
> > > > > > > same channel binding, but we don't send
> > > > > > > BIND_CONN_TO_SESSION as
> > > > > > > the
> > > > > > > first thing on the "main" connection (ie connection that
> > > > > > > cared
> > > > > > > the
> > > > > > > CREATE_SESSION and was bound to fore and back channel by
> > > > > > > default).
> > > > > > > When that connection is reset, the first thing that happens
> > > > > > > is
> > > > > > > the
> > > > > > > client re-sends the operation that was not replied to. That
> > > > > > > has
> > > > > > > a
> > > > > > > SEQUENCE and by the rule the server binds that connection
> > > > > > > to
> > > > > > > the
> > > > > > > fore channel only (and sets the callback being down). We
> > > > > > > then
> > > > > > > send
> > > > > > > BIND_CONN_TO_SESSION and request FORE_OR_BOTH where this
> > > > > > > has
> > > > > > > already been bound to FORE only.
> > > > > > >
> > > > > >
> > > > > > How about this: before we send BIND_CONN_TO_SESSION with
> > > > > > fore_or_both, we somehow always reset the connection (maybe
> > > > > > you
> > > > > > were
> > > > > > suggestion that already and i wasn't following).
> > > > >
> > > > > No. I didn't realise that we were being automatically set to
> > > > > just
> > > > > the
> > > > > fore channel. However as I said earlier, the spec says that the
> > > > > server
> > > > > MUST reply with NFS4ERR_INVAL in this case. It is not allowed
> > > > > to
> > > > > just
> > > > > return NFS4_OK and silently set the wrong channel binding.
> > > >
> > > > How's this:
> > > > client sends BIND_CONN_TO_SESSION with FORE_OR_BOTH, server
> > > > select
> > > > FORE channel. connection breaks before the reply gets to the
> > > > client.
> > > > Client resends. Is the server suppose to now fail with ERR_INVAL?
> > > >
> > > > There actually is such a thing between demand and request. FORE
> > > > and
> > > > BACK are demands and FORE_OR_BOTH and BACK_OR_BOTH are requests.
> > > > The
> > > > spec writes only about demands and not the requests. I believe
> > > > that's
> > > > intentional because BIND_CONN_TO_SESSIOn doesn't have a sequence
> > > > and
> > > > not protected by reply session semantics.
> > >
> > > OK. However if we take that interpretation, then the question is
> > > why
> > > would the server downgrade our FORE_OR_BOTH to FORE and what is the
> > > point of the client even retrying at all in that case?
> >
> > As far as I can tell, the client behaves improperly. It shouldn't
> > have
> > sent an operation on a new connection before sending
> > BIND_CONN_TO_SESSION.
> >
> > > The server can always reject the client's back channel creation
> > > attempt, but doing so has consequences: it means there is no way to
> > > hand out delegations or layouts. So I'm confused by the concept of
> > > a
> > > server that sets SEQ4_STATUS_CB_PATH_DOWN or
> > > SEQ4_STATUS_CB_PATH_DOWN_SESSION, but then doesn't allow the client
> > > to
> > > set a back channel.
> >
> > Because we can't guarantee that BIND_CONN_TO_SESSION happens as the
> > first operation, I think the solution is for the transport that will
> > do FORE_OR_BOTH request, the client must reset the connection first?
> > Would that be acceptable?
> >
>
> It sounds like we have no choice. However in that case, we should not
> try to recover in the case where the attempt to bind the backchannel
> fails.

I can't get the client to reset the connection. Is calling
xprt_force_disconnect() suppose to do that? If so, then that doesn't
happen. I have tried calling into xprt->ops->close(xprt) but that
leads to badness as well... Any suggestions of how to get connection
reset? Is xs_reset_transport() it? It's hard to figure out how all
these functions are different when their names imply connection
resetting...

Thank you


>
> --
> Trond Myklebust
> Linux NFS client maintainer, Hammerspace
> trond.myklebust@hammerspace.com
>
>
Trond Myklebust April 23, 2020, 9:57 p.m. UTC | #17
On Thu, 2020-04-23 at 17:33 -0400, Olga Kornievskaia wrote:
> On Tue, Apr 21, 2020 at 5:20 PM Trond Myklebust <
> trondmy@hammerspace.com> wrote:
> > On Tue, 2020-04-21 at 15:47 -0400, Olga Kornievskaia wrote:
> > > On Mon, Apr 20, 2020 at 5:20 PM Trond Myklebust <
> > > trondmy@hammerspace.com> wrote:
> > > > On Mon, 2020-04-20 at 15:35 -0400, Olga Kornievskaia wrote:
> > > > > On Mon, Apr 20, 2020 at 3:02 PM Trond Myklebust <
> > > > > trondmy@hammerspace.com> wrote:
> > > > > > On Mon, 2020-04-20 at 10:59 -0400, Olga Kornievskaia wrote:
> > > > > > > On Mon, Apr 20, 2020 at 10:53 AM Olga Kornievskaia <
> > > > > > > olga.kornievskaia@gmail.com> wrote:
> > > > > > > > Yes we are consistent in requesting to same connection
> > > > > > > > to
> > > > > > > > with
> > > > > > > > the
> > > > > > > > same channel binding, but we don't send
> > > > > > > > BIND_CONN_TO_SESSION as
> > > > > > > > the
> > > > > > > > first thing on the "main" connection (ie connection
> > > > > > > > that
> > > > > > > > cared
> > > > > > > > the
> > > > > > > > CREATE_SESSION and was bound to fore and back channel
> > > > > > > > by
> > > > > > > > default).
> > > > > > > > When that connection is reset, the first thing that
> > > > > > > > happens
> > > > > > > > is
> > > > > > > > the
> > > > > > > > client re-sends the operation that was not replied to.
> > > > > > > > That
> > > > > > > > has
> > > > > > > > a
> > > > > > > > SEQUENCE and by the rule the server binds that
> > > > > > > > connection
> > > > > > > > to
> > > > > > > > the
> > > > > > > > fore channel only (and sets the callback being down).
> > > > > > > > We
> > > > > > > > then
> > > > > > > > send
> > > > > > > > BIND_CONN_TO_SESSION and request FORE_OR_BOTH where
> > > > > > > > this
> > > > > > > > has
> > > > > > > > already been bound to FORE only.
> > > > > > > > 
> > > > > > > 
> > > > > > > How about this: before we send BIND_CONN_TO_SESSION with
> > > > > > > fore_or_both, we somehow always reset the connection
> > > > > > > (maybe
> > > > > > > you
> > > > > > > were
> > > > > > > suggestion that already and i wasn't following).
> > > > > > 
> > > > > > No. I didn't realise that we were being automatically set
> > > > > > to
> > > > > > just
> > > > > > the
> > > > > > fore channel. However as I said earlier, the spec says that
> > > > > > the
> > > > > > server
> > > > > > MUST reply with NFS4ERR_INVAL in this case. It is not
> > > > > > allowed
> > > > > > to
> > > > > > just
> > > > > > return NFS4_OK and silently set the wrong channel binding.
> > > > > 
> > > > > How's this:
> > > > > client sends BIND_CONN_TO_SESSION with FORE_OR_BOTH, server
> > > > > select
> > > > > FORE channel. connection breaks before the reply gets to the
> > > > > client.
> > > > > Client resends. Is the server suppose to now fail with
> > > > > ERR_INVAL?
> > > > > 
> > > > > There actually is such a thing between demand and request.
> > > > > FORE
> > > > > and
> > > > > BACK are demands and FORE_OR_BOTH and BACK_OR_BOTH are
> > > > > requests.
> > > > > The
> > > > > spec writes only about demands and not the requests. I
> > > > > believe
> > > > > that's
> > > > > intentional because BIND_CONN_TO_SESSIOn doesn't have a
> > > > > sequence
> > > > > and
> > > > > not protected by reply session semantics.
> > > > 
> > > > OK. However if we take that interpretation, then the question
> > > > is
> > > > why
> > > > would the server downgrade our FORE_OR_BOTH to FORE and what is
> > > > the
> > > > point of the client even retrying at all in that case?
> > > 
> > > As far as I can tell, the client behaves improperly. It shouldn't
> > > have
> > > sent an operation on a new connection before sending
> > > BIND_CONN_TO_SESSION.
> > > 
> > > > The server can always reject the client's back channel creation
> > > > attempt, but doing so has consequences: it means there is no
> > > > way to
> > > > hand out delegations or layouts. So I'm confused by the concept
> > > > of
> > > > a
> > > > server that sets SEQ4_STATUS_CB_PATH_DOWN or
> > > > SEQ4_STATUS_CB_PATH_DOWN_SESSION, but then doesn't allow the
> > > > client
> > > > to
> > > > set a back channel.
> > > 
> > > Because we can't guarantee that BIND_CONN_TO_SESSION happens as
> > > the
> > > first operation, I think the solution is for the transport that
> > > will
> > > do FORE_OR_BOTH request, the client must reset the connection
> > > first?
> > > Would that be acceptable?
> > > 
> > 
> > It sounds like we have no choice. However in that case, we should
> > not
> > try to recover in the case where the attempt to bind the
> > backchannel
> > fails.
> 
> I can't get the client to reset the connection. Is calling
> xprt_force_disconnect() suppose to do that? If so, then that doesn't
> happen. I have tried calling into xprt->ops->close(xprt) but that
> leads to badness as well... Any suggestions of how to get connection
> reset? Is xs_reset_transport() it? It's hard to figure out how all
> these functions are different when their names imply connection
> resetting...

xprt_force_disconnect() should be the right function.

Note, though, that the function is asynchronous (it queues up the call
to xprt_autoclose() on the xprtiod workqueue), so it won't necessarily
immediately show the connection as being broken. However it should
ensure that no further requests are transmitted on that xprt until
after the connection breaks.
J. Bruce Fields April 27, 2020, 10:07 p.m. UTC | #18
On Fri, Apr 17, 2020 at 04:53:32PM +0000, Trond Myklebust wrote:
> In RFC5661, Section 18.34.3 I found the following normative text:
> 
>    Invoking BIND_CONN_TO_SESSION on a connection already associated with
>    the specified session has no effect, and the server MUST respond with
>    NFS4_OK, unless the client is demanding changes to the set of
>    channels the connection is associated with.  If so, the server MUST
>    return NFS4ERR_INVAL.

Hm, I should fix that.

(Totally untested.)

--b.

commit cd66e66c4ce7
Author: J. Bruce Fields <bfields@redhat.com>
Date:   Mon Apr 27 17:59:01 2020 -0400

    nfsd: handle repeated BIND_CONN_TO_SESSION
    
    If the client attempts BIND_CONN_TO_SESSION on an already bound
    connection, it should be either a no-op or an error.
    
    Signed-off-by: J. Bruce Fields <bfields@redhat.com>

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index a50c045fe7c5..73f821f65ce8 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3483,6 +3483,45 @@ __be32 nfsd4_backchannel_ctl(struct svc_rqst *rqstp,
 	return nfs_ok;
 }
 
+static struct nfsd4_conn *__nfsd4_find_conn(struct svc_xprt *xpt, struct nfsd4_session *s)
+{
+	struct nfsd4_conn *c;
+
+	list_for_each_entry(c, &s->se_conns, cn_persession) {
+		if (c->cn_xprt == xpt) {
+			return c;
+		}
+	}
+	return NULL;
+}
+
+static __be32 nfsd4_match_existing_connection(struct svc_rqst *rqst,
+				struct nfsd4_session *session, u32 req)
+{
+	struct nfs4_client *clp = session->se_client;
+	struct svc_xprt *xpt = rqst->rq_xprt;
+	struct nfsd4_conn *c;
+	__be32 status;
+
+	/* Following the last paragraph of RFC 5661 Section 18.34.3: */
+	spin_lock(&clp->cl_lock);
+	c = __nfsd4_find_conn(xpt, session);
+	if (!c)
+		status = nfserr_noent;
+	else if (req == c->cn_flags)
+		status = nfs_ok;
+	else if (req == NFS4_CDFC4_FORE_OR_BOTH &&
+				c->cn_flags != NFS4_CDFC4_BACK)
+		status = nfs_ok;
+	else if (req == NFS4_CDFC4_BACK_OR_BOTH &&
+				c->cn_flags != NFS4_CDFC4_FORE)
+		status = nfs_ok;
+	else
+		status = nfserr_inval;
+	spin_unlock(&clp->cl_lock);
+	return status;
+}
+
 __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp,
 		     struct nfsd4_compound_state *cstate,
 		     union nfsd4_op_u *u)
@@ -3504,6 +3543,9 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp,
 	status = nfserr_wrong_cred;
 	if (!nfsd4_mach_creds_match(session->se_client, rqstp))
 		goto out;
+	status = nfsd4_match_existing_connection(rqstp, session, bcts->dir);
+	if (status == nfs_ok || status == nfserr_inval)
+		goto out;
 	status = nfsd4_map_bcts_dir(&bcts->dir);
 	if (status)
 		goto out;
@@ -3569,18 +3611,6 @@ nfsd4_destroy_session(struct svc_rqst *r, struct nfsd4_compound_state *cstate,
 	return status;
 }
 
-static struct nfsd4_conn *__nfsd4_find_conn(struct svc_xprt *xpt, struct nfsd4_session *s)
-{
-	struct nfsd4_conn *c;
-
-	list_for_each_entry(c, &s->se_conns, cn_persession) {
-		if (c->cn_xprt == xpt) {
-			return c;
-		}
-	}
-	return NULL;
-}
-
 static __be32 nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_session *ses)
 {
 	struct nfs4_client *clp = ses->se_client;
diff mbox series

Patch

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 99e9f2e..461f85d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -8857,7 +8857,7 @@  static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
 		.rpc_client = clp->cl_rpcclient,
 		.rpc_message = &msg,
 		.callback_ops = &nfs41_sequence_ops,
-		.flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT,
+		.flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT | RPC_TASK_NO_ROUND_ROBIN,
 	};
 	struct rpc_task *ret;