diff mbox series

[RFC,1/4] NFS: Unset RPC_TASK_NO_RETRANS_TIMEOUT for async lease renewal

Message ID 162627781762.1294.17862468684529354297.stgit@manet.1015granger.net (mailing list archive)
State New, archived
Headers show
Series Ensure RPC_TASK_NORTO is disabled for select operations | expand

Commit Message

Chuck Lever III July 14, 2021, 3:50 p.m. UTC
In some rare failure modes, the server is actually reading the
transport, but then just dropping the requests on the floor.
TCP_USER_TIMEOUT cannot detect that case.

Prevent such a stuck server from pinning client resources
indefinitely by ensuring that async lease renewal requests can time
out even if the connection is still operational.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfs/nfs4proc.c |    9 +++++++++
 1 file changed, 9 insertions(+)

Comments

Trond Myklebust July 14, 2021, 4 p.m. UTC | #1
On Wed, 2021-07-14 at 11:50 -0400, Chuck Lever wrote:
> In some rare failure modes, the server is actually reading the
> transport, but then just dropping the requests on the floor.
> TCP_USER_TIMEOUT cannot detect that case.
> 
> Prevent such a stuck server from pinning client resources
> indefinitely by ensuring that async lease renewal requests can time
> out even if the connection is still operational.
> 
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
>  fs/nfs/nfs4proc.c |    9 +++++++++
>  1 file changed, 9 insertions(+)
> 
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index e1214bb6b7ee..346217f6a00b 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -5612,6 +5612,12 @@ struct nfs4_renewdata {
>   * nfs4_proc_async_renew(): This is not one of the nfs_rpc_ops; it
> is a special
>   * standalone procedure for queueing an asynchronous RENEW.
>   */
> +static void nfs4_renew_prepare(struct rpc_task *task, void
> *calldata)
> +{
> +       task->tk_flags &= ~RPC_TASK_NO_RETRANS_TIMEOUT;
> +       rpc_call_start(task);
> +}
> +
>  static void nfs4_renew_release(void *calldata)
>  {
>         struct nfs4_renewdata *data = calldata;
> @@ -5650,6 +5656,7 @@ static void nfs4_renew_done(struct rpc_task
> *task, void *calldata)
>  }
>  
>  static const struct rpc_call_ops nfs4_renew_ops = {
> +       .rpc_call_prepare = nfs4_renew_prepare,
>         .rpc_call_done = nfs4_renew_done,
>         .rpc_release = nfs4_renew_release,
>  };
> @@ -9219,6 +9226,8 @@ static void nfs41_sequence_prepare(struct
> rpc_task *task, void *data)
>         struct nfs4_sequence_args *args;
>         struct nfs4_sequence_res *res;
>  
> +       task->tk_flags &= ~RPC_TASK_NO_RETRANS_TIMEOUT;
> +
>         args = task->tk_msg.rpc_argp;
>         res = task->tk_msg.rpc_resp;
>  
> 
> 

This isn't necessary. The server isn't allowed to drop these calls on
the floor.
Chuck Lever III July 14, 2021, 5:53 p.m. UTC | #2
> On Jul 14, 2021, at 12:00 PM, Trond Myklebust <trondmy@hammerspace.com> wrote:
> 
> On Wed, 2021-07-14 at 11:50 -0400, Chuck Lever wrote:
>> In some rare failure modes, the server is actually reading the
>> transport, but then just dropping the requests on the floor.
>> TCP_USER_TIMEOUT cannot detect that case.
>> 
>> Prevent such a stuck server from pinning client resources
>> indefinitely by ensuring that async lease renewal requests can time
>> out even if the connection is still operational.
>> 
>> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
>> ---
>>  fs/nfs/nfs4proc.c |    9 +++++++++
>>  1 file changed, 9 insertions(+)
>> 
>> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
>> index e1214bb6b7ee..346217f6a00b 100644
>> --- a/fs/nfs/nfs4proc.c
>> +++ b/fs/nfs/nfs4proc.c
>> @@ -5612,6 +5612,12 @@ struct nfs4_renewdata {
>>   * nfs4_proc_async_renew(): This is not one of the nfs_rpc_ops; it
>> is a special
>>   * standalone procedure for queueing an asynchronous RENEW.
>>   */
>> +static void nfs4_renew_prepare(struct rpc_task *task, void
>> *calldata)
>> +{
>> +       task->tk_flags &= ~RPC_TASK_NO_RETRANS_TIMEOUT;
>> +       rpc_call_start(task);
>> +}
>> +
>>  static void nfs4_renew_release(void *calldata)
>>  {
>>         struct nfs4_renewdata *data = calldata;
>> @@ -5650,6 +5656,7 @@ static void nfs4_renew_done(struct rpc_task
>> *task, void *calldata)
>>  }
>>  
>>  static const struct rpc_call_ops nfs4_renew_ops = {
>> +       .rpc_call_prepare = nfs4_renew_prepare,
>>         .rpc_call_done = nfs4_renew_done,
>>         .rpc_release = nfs4_renew_release,
>>  };
>> @@ -9219,6 +9226,8 @@ static void nfs41_sequence_prepare(struct
>> rpc_task *task, void *data)
>>         struct nfs4_sequence_args *args;
>>         struct nfs4_sequence_res *res;
>>  
>> +       task->tk_flags &= ~RPC_TASK_NO_RETRANS_TIMEOUT;
>> +
>>         args = task->tk_msg.rpc_argp;
>>         res = task->tk_msg.rpc_resp;
> 
> This isn't necessary. The server isn't allowed to drop these calls on
> the floor.

Again, a server bug, a misconfiguration, a dependence on an
inoperative network service (like DNS), a weird crash, or even
a malicious server can pin client resources. This is plainly a
denial of service. Clients cannot depend on "the server is not
allowed to" if they are to be considered secure.

I'm not suggesting that the Linux client should make a heroic
effort to operate normally when a server behaves like this. I
_am_ suggesting that the client should protect itself and its
users by not pinning its own resources when a server is
behaving bizarrely.

I can drop 1/4 & 2/4 for the moment, but I don't agree that
3/4 & 4/4 alone are adequate to resolve the denial of service.

----

Now with regard to the spec requirement, I believe it might
be under-specified. It's at least problematic.

This is from RFC 8881 Section 2.9.2, and is referring in
particular to non-NULL NFSv4 operations:

> A replier MUST NOT silently drop a request, even if the request is a retry. (The silent drop behavior of RPCSEC_GSS [4] does not apply because this behavior happens at the RPCSEC_GSS layer, a lower layer in the request processing.) Instead, the replier SHOULD return an appropriate error (see Section 2.10.6.1), or it MAY disconnect the connection.


It states that the server cannot drop a request, but the text
does /not/ literally mandate that the only signal for a lost
request is connection loss -- that part is only a MAY.

Further the use of SHOULD suggests that a session error is the
preferred mechanism for signaling such a loss. The use of MAY
indicates that connection termination is permissible but not
preferred.

Moreover, the text is not careful to state that these two options
are exhaustive. It leaves open the possibility for other server
responses in this situation. (I recognize that might not have
been the intent of the authors, but that's certainly how it
reads a decade later).

Let's not even get into the carve-out for RPCSEC_GSS silent drops.

Thus I don't think we can lean on the current spec to ensure that
a server will always signal the loss of a reply by disconnecting,
especially in cases where there is no session. Further down in
ss2.9.2, we have this one-sentence paragraph:

> In addition, as described in Section 2.10.6.2, while a session is active, the NFSv4.1 requester MUST NOT stop waiting for a reply.


So what about when there is no active session? For example, what
about EXCHANGE_ID, CREATE_SESSION, BIND_CONN_TO_SESSION, and
DESTROY_CLIENTID ? I would argue that a high quality client
implementation will not wait indefinitely for the completion of
these operations.


--
Chuck Lever
Trond Myklebust July 14, 2021, 6:26 p.m. UTC | #3
On Wed, 2021-07-14 at 17:53 +0000, Chuck Lever III wrote:
> 
> 
> > On Jul 14, 2021, at 12:00 PM, Trond Myklebust
> > <trondmy@hammerspace.com> wrote:
> > 
> > On Wed, 2021-07-14 at 11:50 -0400, Chuck Lever wrote:
> > > In some rare failure modes, the server is actually reading the
> > > transport, but then just dropping the requests on the floor.
> > > TCP_USER_TIMEOUT cannot detect that case.
> > > 
> > > Prevent such a stuck server from pinning client resources
> > > indefinitely by ensuring that async lease renewal requests can
> > > time
> > > out even if the connection is still operational.
> > > 
> > > Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> > > ---
> > >  fs/nfs/nfs4proc.c |    9 +++++++++
> > >  1 file changed, 9 insertions(+)
> > > 
> > > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> > > index e1214bb6b7ee..346217f6a00b 100644
> > > --- a/fs/nfs/nfs4proc.c
> > > +++ b/fs/nfs/nfs4proc.c
> > > @@ -5612,6 +5612,12 @@ struct nfs4_renewdata {
> > >   * nfs4_proc_async_renew(): This is not one of the nfs_rpc_ops;
> > > it
> > > is a special
> > >   * standalone procedure for queueing an asynchronous RENEW.
> > >   */
> > > +static void nfs4_renew_prepare(struct rpc_task *task, void
> > > *calldata)
> > > +{
> > > +       task->tk_flags &= ~RPC_TASK_NO_RETRANS_TIMEOUT;
> > > +       rpc_call_start(task);
> > > +}
> > > +
> > >  static void nfs4_renew_release(void *calldata)
> > >  {
> > >         struct nfs4_renewdata *data = calldata;
> > > @@ -5650,6 +5656,7 @@ static void nfs4_renew_done(struct rpc_task
> > > *task, void *calldata)
> > >  }
> > >  
> > >  static const struct rpc_call_ops nfs4_renew_ops = {
> > > +       .rpc_call_prepare = nfs4_renew_prepare,
> > >         .rpc_call_done = nfs4_renew_done,
> > >         .rpc_release = nfs4_renew_release,
> > >  };
> > > @@ -9219,6 +9226,8 @@ static void nfs41_sequence_prepare(struct
> > > rpc_task *task, void *data)
> > >         struct nfs4_sequence_args *args;
> > >         struct nfs4_sequence_res *res;
> > >  
> > > +       task->tk_flags &= ~RPC_TASK_NO_RETRANS_TIMEOUT;
> > > +
> > >         args = task->tk_msg.rpc_argp;
> > >         res = task->tk_msg.rpc_resp;
> > 
> > This isn't necessary. The server isn't allowed to drop these calls
> > on
> > the floor.
> 
> Again, a server bug, a misconfiguration, a dependence on an
> inoperative network service (like DNS), a weird crash, or even
> a malicious server can pin client resources. This is plainly a
> denial of service. Clients cannot depend on "the server is not
> allowed to" if they are to be considered secure.
> 
> I'm not suggesting that the Linux client should make a heroic
> effort to operate normally when a server behaves like this. I
> _am_ suggesting that the client should protect itself and its
> users by not pinning its own resources when a server is
> behaving bizarrely.
> 
> I can drop 1/4 & 2/4 for the moment, but I don't agree that
> 3/4 & 4/4 alone are adequate to resolve the denial of service.
> 
> ----
> 
> Now with regard to the spec requirement, I believe it might
> be under-specified. It's at least problematic.
> 
> This is from RFC 8881 Section 2.9.2, and is referring in
> particular to non-NULL NFSv4 operations:
> 
> > A replier MUST NOT silently drop a request, even if the request is
> > a retry. (The silent drop behavior of RPCSEC_GSS [4] does not apply
> > because this behavior happens at the RPCSEC_GSS layer, a lower
> > layer in the request processing.) Instead, the replier SHOULD
> > return an appropriate error (see Section 2.10.6.1), or it MAY
> > disconnect the connection.
> 
> 
> It states that the server cannot drop a request, but the text
> does /not/ literally mandate that the only signal for a lost
> request is connection loss -- that part is only a MAY.

That's irrelevant. The normative behaviour is that requests MUST NOT be
dropped. Since no options are listed other than replying or dropping
the connection, I'm not seeing how else the server can meet the
requirement. The SHOULD and MAY are just there to emphasise which is
the preferred behaviour.

> 
> Further the use of SHOULD suggests that a session error is the
> preferred mechanism for signaling such a loss. The use of MAY
> indicates that connection termination is permissible but not
> preferred.
> 
> Moreover, the text is not careful to state that these two options
> are exhaustive. It leaves open the possibility for other server
> responses in this situation. (I recognize that might not have
> been the intent of the authors, but that's certainly how it
> reads a decade later).
> 
> Let's not even get into the carve-out for RPCSEC_GSS silent drops.
> 
> Thus I don't think we can lean on the current spec to ensure that
> a server will always signal the loss of a reply by disconnecting,
> especially in cases where there is no session. Further down in
> ss2.9.2, we have this one-sentence paragraph:
> 
> > In addition, as described in Section 2.10.6.2, while a session is
> > active, the NFSv4.1 requester MUST NOT stop waiting for a reply.
> 
> 
> So what about when there is no active session? For example, what
> about EXCHANGE_ID, CREATE_SESSION, BIND_CONN_TO_SESSION, and
> DESTROY_CLIENTID ? I would argue that a high quality client
> implementation will not wait indefinitely for the completion of
> these operations.
> 

Sorry, but no! We're NOT working under the assumption that servers are
broken. We fix what is necessary in order to make the client spec-
adherent. That is all.

If the server is up, and is reading the socket, then we assume it is a
NFSv4 server, and we assume that it is not broken. If it starts getting
picky about which operations is will or will not reply to, then it is
by definition broken because it is in violation of the normative
behaviour for non-NULL NFSv4 operations.
diff mbox series

Patch

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index e1214bb6b7ee..346217f6a00b 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5612,6 +5612,12 @@  struct nfs4_renewdata {
  * nfs4_proc_async_renew(): This is not one of the nfs_rpc_ops; it is a special
  * standalone procedure for queueing an asynchronous RENEW.
  */
+static void nfs4_renew_prepare(struct rpc_task *task, void *calldata)
+{
+	task->tk_flags &= ~RPC_TASK_NO_RETRANS_TIMEOUT;
+	rpc_call_start(task);
+}
+
 static void nfs4_renew_release(void *calldata)
 {
 	struct nfs4_renewdata *data = calldata;
@@ -5650,6 +5656,7 @@  static void nfs4_renew_done(struct rpc_task *task, void *calldata)
 }
 
 static const struct rpc_call_ops nfs4_renew_ops = {
+	.rpc_call_prepare = nfs4_renew_prepare,
 	.rpc_call_done = nfs4_renew_done,
 	.rpc_release = nfs4_renew_release,
 };
@@ -9219,6 +9226,8 @@  static void nfs41_sequence_prepare(struct rpc_task *task, void *data)
 	struct nfs4_sequence_args *args;
 	struct nfs4_sequence_res *res;
 
+	task->tk_flags &= ~RPC_TASK_NO_RETRANS_TIMEOUT;
+
 	args = task->tk_msg.rpc_argp;
 	res = task->tk_msg.rpc_resp;