diff mbox series

pNFS: Fix a hang in nfs4_evict_inode()

Message ID 20231008182019.12842-1-trondmy@kernel.org (mailing list archive)
State New, archived
Headers show
Series pNFS: Fix a hang in nfs4_evict_inode() | expand

Commit Message

Trond Myklebust Oct. 8, 2023, 6:20 p.m. UTC
From: Trond Myklebust <trond.myklebust@hammerspace.com>

We are not allowed to call pnfs_mark_matching_lsegs_return() without
also holding a reference to the layout header, since doing so could lead
to the reference count going to zero when we call
pnfs_layout_remove_lseg(). This again can lead to a hang when we get to
nfs4_evict_inode() and are unable to clear the layout pointer.

pnfs_layout_return_unused_byserver() is guilty of this behaviour, and
has been seen to trigger the refcount warning prior to a hang.

Fixes: b6d49ecd1081 ("NFSv4: Fix a pNFS layout related use-after-free race when freeing the inode")
Cc: stable@vger.kernel.org
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/pnfs.c | 33 +++++++++++++++++++++++----------
 1 file changed, 23 insertions(+), 10 deletions(-)

Comments

Benjamin Coddington Nov. 1, 2023, 1:20 p.m. UTC | #1
Hi Trond,

On 8 Oct 2023, at 14:20, trondmy@kernel.org wrote:

> From: Trond Myklebust <trond.myklebust@hammerspace.com>
>
> We are not allowed to call pnfs_mark_matching_lsegs_return() without
> also holding a reference to the layout header, since doing so could lead
> to the reference count going to zero when we call
> pnfs_layout_remove_lseg(). This again can lead to a hang when we get to
> nfs4_evict_inode() and are unable to clear the layout pointer.
>
> pnfs_layout_return_unused_byserver() is guilty of this behaviour, and
> has been seen to trigger the refcount warning prior to a hang.
>
> Fixes: b6d49ecd1081 ("NFSv4: Fix a pNFS layout related use-after-free race when freeing the inode")
> Cc: stable@vger.kernel.org
> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> ---
>  fs/nfs/pnfs.c | 33 +++++++++++++++++++++++----------
>  1 file changed, 23 insertions(+), 10 deletions(-)
>
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index 63904a372b2f..21a365357629 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -2638,31 +2638,44 @@ pnfs_should_return_unused_layout(struct pnfs_layout_hdr *lo,
>  	return mode == 0;
>  }
>
> -static int
> -pnfs_layout_return_unused_byserver(struct nfs_server *server, void *data)
> +static int pnfs_layout_return_unused_byserver(struct nfs_server *server,
> +					      void *data)
>  {
>  	const struct pnfs_layout_range *range = data;
> +	const struct cred *cred;
>  	struct pnfs_layout_hdr *lo;
>  	struct inode *inode;
> +	nfs4_stateid stateid;
> +	enum pnfs_iomode iomode;
> +
>  restart:
>  	rcu_read_lock();
>  	list_for_each_entry_rcu(lo, &server->layouts, plh_layouts) {
> -		if (!pnfs_layout_can_be_returned(lo) ||
> +		inode = lo->plh_inode;
> +		if (!inode || !pnfs_layout_can_be_returned(lo) ||
>  		    test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
>  			continue;
> -		inode = lo->plh_inode;
>  		spin_lock(&inode->i_lock);
> -		if (!pnfs_should_return_unused_layout(lo, range)) {
> +		if (!lo->plh_inode ||
> +		    !pnfs_should_return_unused_layout(lo, range)) {
>  			spin_unlock(&inode->i_lock);
>  			continue;
>  		}
> +		pnfs_get_layout_hdr(lo);

We're getting a crash with the nfs_inode.layout == NULL in writeback.

We haven't bisected to this yet, but I think this change is exposing the
case where the pnfs_layout_hdr refcount goes to zero, but we can still find
it here on server->layouts, and bump the refcount incorrectly.

Plausible?  We can send a fix or test one..

Ben
diff mbox series

Patch

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 63904a372b2f..21a365357629 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -2638,31 +2638,44 @@  pnfs_should_return_unused_layout(struct pnfs_layout_hdr *lo,
 	return mode == 0;
 }
 
-static int
-pnfs_layout_return_unused_byserver(struct nfs_server *server, void *data)
+static int pnfs_layout_return_unused_byserver(struct nfs_server *server,
+					      void *data)
 {
 	const struct pnfs_layout_range *range = data;
+	const struct cred *cred;
 	struct pnfs_layout_hdr *lo;
 	struct inode *inode;
+	nfs4_stateid stateid;
+	enum pnfs_iomode iomode;
+
 restart:
 	rcu_read_lock();
 	list_for_each_entry_rcu(lo, &server->layouts, plh_layouts) {
-		if (!pnfs_layout_can_be_returned(lo) ||
+		inode = lo->plh_inode;
+		if (!inode || !pnfs_layout_can_be_returned(lo) ||
 		    test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
 			continue;
-		inode = lo->plh_inode;
 		spin_lock(&inode->i_lock);
-		if (!pnfs_should_return_unused_layout(lo, range)) {
+		if (!lo->plh_inode ||
+		    !pnfs_should_return_unused_layout(lo, range)) {
 			spin_unlock(&inode->i_lock);
 			continue;
 		}
+		pnfs_get_layout_hdr(lo);
+		pnfs_set_plh_return_info(lo, range->iomode, 0);
+		if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs,
+						    range, 0) != 0 ||
+		    !pnfs_prepare_layoutreturn(lo, &stateid, &cred, &iomode)) {
+			spin_unlock(&inode->i_lock);
+			rcu_read_unlock();
+			pnfs_put_layout_hdr(lo);
+			cond_resched();
+			goto restart;
+		}
 		spin_unlock(&inode->i_lock);
-		inode = pnfs_grab_inode_layout_hdr(lo);
-		if (!inode)
-			continue;
 		rcu_read_unlock();
-		pnfs_mark_layout_for_return(inode, range);
-		iput(inode);
+		pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false);
+		pnfs_put_layout_hdr(lo);
 		cond_resched();
 		goto restart;
 	}