diff mbox series

NFS: Restore -EIO as error to return when "umount -f" aborts request.

Message ID 170909199843.24797.6320949640369986924@noble.neil.brown.name (mailing list archive)
State New
Headers show
Series NFS: Restore -EIO as error to return when "umount -f" aborts request. | expand

Commit Message

NeilBrown Feb. 28, 2024, 3:46 a.m. UTC
When "umount -f" is used to abort all outstanding requests on an NFS
mount, some pending systemcalls can be expected to return an error.
Currently this error is ERESTARTSYS which should never be exposed to
applications (it should only be returned due to a signal).

Prior to Linux v5.2 EIO would be returned in these cases, which it is
more likely that applications will handle.

This patch restores that behaviour so EIO is returned.

Reported-and-tested-by: Zhitao Li <zhitao.li@smartx.com>
Closes: https://lore.kernel.org/linux-nfs/CAPKjjnrYvzH8hEk9boaBt-fETX3VD2cjjN-Z6iNgwZpHqYUjWw@mail.gmail.com/
Fixes: ae67bd3821bb ("SUNRPC: Fix up task signalling")
Signed-off-by: NeilBrown <neilb@suse.de>
---
 include/linux/sunrpc/sched.h | 2 +-
 net/sunrpc/clnt.c            | 2 +-
 net/sunrpc/sched.c           | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

Comments

Trond Myklebust Feb. 28, 2024, 9:16 p.m. UTC | #1
On Wed, 2024-02-28 at 14:46 +1100, NeilBrown wrote:
> 
> When "umount -f" is used to abort all outstanding requests on an NFS
> mount, some pending systemcalls can be expected to return an error.
> Currently this error is ERESTARTSYS which should never be exposed to
> applications (it should only be returned due to a signal).
> 
> Prior to Linux v5.2 EIO would be returned in these cases, which it is
> more likely that applications will handle.
> 
> This patch restores that behaviour so EIO is returned.
> 
> Reported-and-tested-by: Zhitao Li <zhitao.li@smartx.com>
> Closes:
> https://lore.kernel.org/linux-nfs/CAPKjjnrYvzH8hEk9boaBt-fETX3VD2cjjN-Z6iNgwZpHqYUjWw@mail.gmail.com/
> Fixes: ae67bd3821bb ("SUNRPC: Fix up task signalling")
> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>  include/linux/sunrpc/sched.h | 2 +-
>  net/sunrpc/clnt.c            | 2 +-
>  net/sunrpc/sched.c           | 6 +++---
>  3 files changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/include/linux/sunrpc/sched.h
> b/include/linux/sunrpc/sched.h
> index 2d61987b3545..ed3a116efd5d 100644
> --- a/include/linux/sunrpc/sched.h
> +++ b/include/linux/sunrpc/sched.h
> @@ -222,7 +222,7 @@ void		rpc_put_task(struct rpc_task
> *);
>  void		rpc_put_task_async(struct rpc_task *);
>  bool		rpc_task_set_rpc_status(struct rpc_task *task, int
> rpc_status);
>  void		rpc_task_try_cancel(struct rpc_task *task, int
> error);
> -void		rpc_signal_task(struct rpc_task *);
> +void		rpc_signal_task(struct rpc_task *, int);
>  void		rpc_exit_task(struct rpc_task *);
>  void		rpc_exit(struct rpc_task *, int);
>  void		rpc_release_calldata(const struct rpc_call_ops *,
> void *);
> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> index cda0935a68c9..cdbdfae13030 100644
> --- a/net/sunrpc/clnt.c
> +++ b/net/sunrpc/clnt.c
> @@ -895,7 +895,7 @@ void rpc_killall_tasks(struct rpc_clnt *clnt)
>  	trace_rpc_clnt_killall(clnt);
>  	spin_lock(&clnt->cl_lock);
>  	list_for_each_entry(rovr, &clnt->cl_tasks, tk_task)
> -		rpc_signal_task(rovr);
> +		rpc_signal_task(rovr, -EIO);

rpc_killall_tasks() is called by rpc_shutdown_client(). It is not
obvious to me that EIO is an appropriate return value in all the cases
where that is being called.

If we're going to replace ERESTARTSYS, then why would we not want to go
for EINTR?

>  	spin_unlock(&clnt->cl_lock);
>  }
>  EXPORT_SYMBOL_GPL(rpc_killall_tasks);
> diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
> index 6debf4fd42d4..e4f36fe16808 100644
> --- a/net/sunrpc/sched.c
> +++ b/net/sunrpc/sched.c
> @@ -852,14 +852,14 @@ void rpc_exit_task(struct rpc_task *task)
>  	}
>  }
>  
> -void rpc_signal_task(struct rpc_task *task)
> +void rpc_signal_task(struct rpc_task *task, int err)
>  {
>  	struct rpc_wait_queue *queue;
>  
>  	if (!RPC_IS_ACTIVATED(task))
>  		return;
>  
> -	if (!rpc_task_set_rpc_status(task, -ERESTARTSYS))
> +	if (!rpc_task_set_rpc_status(task, err))
>  		return;
>  	trace_rpc_task_signalled(task, task->tk_action);
>  	set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate);
> @@ -992,7 +992,7 @@ static void __rpc_execute(struct rpc_task *task)
>  			 * clean up after sleeping on some queue, we
> don't
>  			 * break the loop here, but go around once
> more.
>  			 */
> -			rpc_signal_task(task);
> +			rpc_signal_task(task, -ERESTARTSYS);
>  		}
>  		trace_rpc_task_sync_wake(task, task->tk_action);
>  	}
NeilBrown March 4, 2024, 4:50 a.m. UTC | #2
On Thu, 29 Feb 2024, Trond Myklebust wrote:
> On Wed, 2024-02-28 at 14:46 +1100, NeilBrown wrote:
> > 
> > When "umount -f" is used to abort all outstanding requests on an NFS
> > mount, some pending systemcalls can be expected to return an error.
> > Currently this error is ERESTARTSYS which should never be exposed to
> > applications (it should only be returned due to a signal).
> > 
> > Prior to Linux v5.2 EIO would be returned in these cases, which it is
> > more likely that applications will handle.
> > 
> > This patch restores that behaviour so EIO is returned.
> > 
> > Reported-and-tested-by: Zhitao Li <zhitao.li@smartx.com>
> > Closes:
> > https://lore.kernel.org/linux-nfs/CAPKjjnrYvzH8hEk9boaBt-fETX3VD2cjjN-Z6iNgwZpHqYUjWw@mail.gmail.com/
> > Fixes: ae67bd3821bb ("SUNRPC: Fix up task signalling")
> > Signed-off-by: NeilBrown <neilb@suse.de>
> > ---
> >  include/linux/sunrpc/sched.h | 2 +-
> >  net/sunrpc/clnt.c            | 2 +-
> >  net/sunrpc/sched.c           | 6 +++---
> >  3 files changed, 5 insertions(+), 5 deletions(-)
> > 
> > diff --git a/include/linux/sunrpc/sched.h
> > b/include/linux/sunrpc/sched.h
> > index 2d61987b3545..ed3a116efd5d 100644
> > --- a/include/linux/sunrpc/sched.h
> > +++ b/include/linux/sunrpc/sched.h
> > @@ -222,7 +222,7 @@ void		rpc_put_task(struct rpc_task
> > *);
> >  void		rpc_put_task_async(struct rpc_task *);
> >  bool		rpc_task_set_rpc_status(struct rpc_task *task, int
> > rpc_status);
> >  void		rpc_task_try_cancel(struct rpc_task *task, int
> > error);
> > -void		rpc_signal_task(struct rpc_task *);
> > +void		rpc_signal_task(struct rpc_task *, int);
> >  void		rpc_exit_task(struct rpc_task *);
> >  void		rpc_exit(struct rpc_task *, int);
> >  void		rpc_release_calldata(const struct rpc_call_ops *,
> > void *);
> > diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> > index cda0935a68c9..cdbdfae13030 100644
> > --- a/net/sunrpc/clnt.c
> > +++ b/net/sunrpc/clnt.c
> > @@ -895,7 +895,7 @@ void rpc_killall_tasks(struct rpc_clnt *clnt)
> >  	trace_rpc_clnt_killall(clnt);
> >  	spin_lock(&clnt->cl_lock);
> >  	list_for_each_entry(rovr, &clnt->cl_tasks, tk_task)
> > -		rpc_signal_task(rovr);
> > +		rpc_signal_task(rovr, -EIO);
> 
> rpc_killall_tasks() is called by rpc_shutdown_client(). It is not
> obvious to me that EIO is an appropriate return value in all the cases
> where that is being called.
> 
> If we're going to replace ERESTARTSYS, then why would we not want to go
> for EINTR?

I have no particular opinion on which error code is correct, though I am
confident that ERESTARTSYS is not.  I do see the attraction of EINTR,
though "man 3 errno" suggests that is appropriate when a signal is
received, which is not the case here.

With this patch I was primarily reverting what appeared to be an
unintentional change in a previous patch.  Such a reversion can then be
used in -stable kernels if that seems appropriate.

If this exercise suggests to you that a different error code might be
more appropriate, then I think any such change should be in a separate
patch which only goes to mainline.

Thanks,
NeilBrown


> 
> >  	spin_unlock(&clnt->cl_lock);
> >  }
> >  EXPORT_SYMBOL_GPL(rpc_killall_tasks);
> > diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
> > index 6debf4fd42d4..e4f36fe16808 100644
> > --- a/net/sunrpc/sched.c
> > +++ b/net/sunrpc/sched.c
> > @@ -852,14 +852,14 @@ void rpc_exit_task(struct rpc_task *task)
> >  	}
> >  }
> >  
> > -void rpc_signal_task(struct rpc_task *task)
> > +void rpc_signal_task(struct rpc_task *task, int err)
> >  {
> >  	struct rpc_wait_queue *queue;
> >  
> >  	if (!RPC_IS_ACTIVATED(task))
> >  		return;
> >  
> > -	if (!rpc_task_set_rpc_status(task, -ERESTARTSYS))
> > +	if (!rpc_task_set_rpc_status(task, err))
> >  		return;
> >  	trace_rpc_task_signalled(task, task->tk_action);
> >  	set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate);
> > @@ -992,7 +992,7 @@ static void __rpc_execute(struct rpc_task *task)
> >  			 * clean up after sleeping on some queue, we
> > don't
> >  			 * break the loop here, but go around once
> > more.
> >  			 */
> > -			rpc_signal_task(task);
> > +			rpc_signal_task(task, -ERESTARTSYS);
> >  		}
> >  		trace_rpc_task_sync_wake(task, task->tk_action);
> >  	}
> 
> -- 
> Trond Myklebust
> Linux NFS client maintainer, Hammerspace
> trond.myklebust@hammerspace.com
> 
> 
>
Jeffrey Layton March 4, 2024, 1:35 p.m. UTC | #3
On Wed, 2024-02-28 at 14:46 +1100, NeilBrown wrote:
> When "umount -f" is used to abort all outstanding requests on an NFS
> mount, some pending systemcalls can be expected to return an error.
> Currently this error is ERESTARTSYS which should never be exposed to
> applications (it should only be returned due to a signal).
> 
> Prior to Linux v5.2 EIO would be returned in these cases, which it is
> more likely that applications will handle.
> 
> This patch restores that behaviour so EIO is returned.
> 
> Reported-and-tested-by: Zhitao Li <zhitao.li@smartx.com>
> Closes: https://lore.kernel.org/linux-nfs/CAPKjjnrYvzH8hEk9boaBt-fETX3VD2cjjN-Z6iNgwZpHqYUjWw@mail.gmail.com/
> Fixes: ae67bd3821bb ("SUNRPC: Fix up task signalling")
> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>  include/linux/sunrpc/sched.h | 2 +-
>  net/sunrpc/clnt.c            | 2 +-
>  net/sunrpc/sched.c           | 6 +++---
>  3 files changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
> index 2d61987b3545..ed3a116efd5d 100644
> --- a/include/linux/sunrpc/sched.h
> +++ b/include/linux/sunrpc/sched.h
> @@ -222,7 +222,7 @@ void		rpc_put_task(struct rpc_task *);
>  void		rpc_put_task_async(struct rpc_task *);
>  bool		rpc_task_set_rpc_status(struct rpc_task *task, int rpc_status);
>  void		rpc_task_try_cancel(struct rpc_task *task, int error);
> -void		rpc_signal_task(struct rpc_task *);
> +void		rpc_signal_task(struct rpc_task *, int);
>  void		rpc_exit_task(struct rpc_task *);
>  void		rpc_exit(struct rpc_task *, int);
>  void		rpc_release_calldata(const struct rpc_call_ops *, void *);
> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> index cda0935a68c9..cdbdfae13030 100644
> --- a/net/sunrpc/clnt.c
> +++ b/net/sunrpc/clnt.c
> @@ -895,7 +895,7 @@ void rpc_killall_tasks(struct rpc_clnt *clnt)
>  	trace_rpc_clnt_killall(clnt);
>  	spin_lock(&clnt->cl_lock);
>  	list_for_each_entry(rovr, &clnt->cl_tasks, tk_task)
> -		rpc_signal_task(rovr);
> +		rpc_signal_task(rovr, -EIO);
>  	spin_unlock(&clnt->cl_lock);
>  }
>  EXPORT_SYMBOL_GPL(rpc_killall_tasks);
> diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
> index 6debf4fd42d4..e4f36fe16808 100644
> --- a/net/sunrpc/sched.c
> +++ b/net/sunrpc/sched.c
> @@ -852,14 +852,14 @@ void rpc_exit_task(struct rpc_task *task)
>  	}
>  }
>  
> -void rpc_signal_task(struct rpc_task *task)
> +void rpc_signal_task(struct rpc_task *task, int err)
>  {
>  	struct rpc_wait_queue *queue;
>  
>  	if (!RPC_IS_ACTIVATED(task))
>  		return;
>  
> -	if (!rpc_task_set_rpc_status(task, -ERESTARTSYS))
> +	if (!rpc_task_set_rpc_status(task, err))
>  		return;
>  	trace_rpc_task_signalled(task, task->tk_action);
>  	set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate);
> @@ -992,7 +992,7 @@ static void __rpc_execute(struct rpc_task *task)
>  			 * clean up after sleeping on some queue, we don't
>  			 * break the loop here, but go around once more.
>  			 */
> -			rpc_signal_task(task);
> +			rpc_signal_task(task, -ERESTARTSYS);
>  		}
>  		trace_rpc_task_sync_wake(task, task->tk_action);
>  	}

This seems like the right thing to do.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
diff mbox series

Patch

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 2d61987b3545..ed3a116efd5d 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -222,7 +222,7 @@  void		rpc_put_task(struct rpc_task *);
 void		rpc_put_task_async(struct rpc_task *);
 bool		rpc_task_set_rpc_status(struct rpc_task *task, int rpc_status);
 void		rpc_task_try_cancel(struct rpc_task *task, int error);
-void		rpc_signal_task(struct rpc_task *);
+void		rpc_signal_task(struct rpc_task *, int);
 void		rpc_exit_task(struct rpc_task *);
 void		rpc_exit(struct rpc_task *, int);
 void		rpc_release_calldata(const struct rpc_call_ops *, void *);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index cda0935a68c9..cdbdfae13030 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -895,7 +895,7 @@  void rpc_killall_tasks(struct rpc_clnt *clnt)
 	trace_rpc_clnt_killall(clnt);
 	spin_lock(&clnt->cl_lock);
 	list_for_each_entry(rovr, &clnt->cl_tasks, tk_task)
-		rpc_signal_task(rovr);
+		rpc_signal_task(rovr, -EIO);
 	spin_unlock(&clnt->cl_lock);
 }
 EXPORT_SYMBOL_GPL(rpc_killall_tasks);
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 6debf4fd42d4..e4f36fe16808 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -852,14 +852,14 @@  void rpc_exit_task(struct rpc_task *task)
 	}
 }
 
-void rpc_signal_task(struct rpc_task *task)
+void rpc_signal_task(struct rpc_task *task, int err)
 {
 	struct rpc_wait_queue *queue;
 
 	if (!RPC_IS_ACTIVATED(task))
 		return;
 
-	if (!rpc_task_set_rpc_status(task, -ERESTARTSYS))
+	if (!rpc_task_set_rpc_status(task, err))
 		return;
 	trace_rpc_task_signalled(task, task->tk_action);
 	set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate);
@@ -992,7 +992,7 @@  static void __rpc_execute(struct rpc_task *task)
 			 * clean up after sleeping on some queue, we don't
 			 * break the loop here, but go around once more.
 			 */
-			rpc_signal_task(task);
+			rpc_signal_task(task, -ERESTARTSYS);
 		}
 		trace_rpc_task_sync_wake(task, task->tk_action);
 	}