diff mbox

[v3,1/1] PNFS fix dangling DS mount

Message ID 20170630195201.95597-1-kolga@netapp.com (mailing list archive)
State New, archived
Headers show

Commit Message

Olga Kornievskaia June 30, 2017, 7:52 p.m. UTC
There is a regression by commit 8d40b0f14846 ("NFS filelayout:call
GETDEVICEINFO after pnfs_layout_process completes"). It leaves the
DS mount dangling.

Previously, filelayout_alloc_sec() would call filelayout_check_layout()
which would call nfs4_find_get_deviceid which ups the count on the
device_id. It's only called once and it's matched by the
filelayout_free_lseg() that calls nfs4_fl_put_deviceid().

After that patch, each read/write ends up calling nfs4_find_get_deviceid
and there is no balance for that. Instead, do nfs4_fl_put_deviceid()
in the filelayout's .pg_cleanup and remove it from filelayout_free_lseg.

But we still need a reference to hold over the lifetime of the segment.
For every new lseg that's created we need to take a reference on deviceid
that uses it. It will be released in the "free_lseg" routine.

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
---
 fs/nfs/filelayout/filelayout.c         | 21 +++++++++++++++++----
 fs/nfs/flexfilelayout/flexfilelayout.c |  9 ++++++---
 fs/nfs/pnfs.c                          | 13 ++++++++++---
 fs/nfs/pnfs.h                          |  3 ++-
 4 files changed, 35 insertions(+), 11 deletions(-)

Comments

Steve Dickson July 5, 2017, 1:52 p.m. UTC | #1
On 06/30/2017 03:52 PM, Olga Kornievskaia wrote:
> There is a regression by commit 8d40b0f14846 ("NFS filelayout:call
> GETDEVICEINFO after pnfs_layout_process completes"). It leaves the
> DS mount dangling.
> 
> Previously, filelayout_alloc_sec() would call filelayout_check_layout()
> which would call nfs4_find_get_deviceid which ups the count on the
> device_id. It's only called once and it's matched by the
> filelayout_free_lseg() that calls nfs4_fl_put_deviceid().
> 
> After that patch, each read/write ends up calling nfs4_find_get_deviceid
> and there is no balance for that. Instead, do nfs4_fl_put_deviceid()
> in the filelayout's .pg_cleanup and remove it from filelayout_free_lseg.
> 
> But we still need a reference to hold over the lifetime of the segment.
> For every new lseg that's created we need to take a reference on deviceid
> that uses it. It will be released in the "free_lseg" routine.
> 
> Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
Tested-by: Steve Dickson <steved@redhat.com>

steved.

> ---
>  fs/nfs/filelayout/filelayout.c         | 21 +++++++++++++++++----
>  fs/nfs/flexfilelayout/flexfilelayout.c |  9 ++++++---
>  fs/nfs/pnfs.c                          | 13 ++++++++++---
>  fs/nfs/pnfs.h                          |  3 ++-
>  4 files changed, 35 insertions(+), 11 deletions(-)
> 
> diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
> index 1cf85d6..86d694e 100644
> --- a/fs/nfs/filelayout/filelayout.c
> +++ b/fs/nfs/filelayout/filelayout.c
> @@ -909,9 +909,10 @@ static void _filelayout_free_lseg(struct nfs4_filelayout_segment *fl)
>  	struct pnfs_layout_hdr *lo;
>  	struct nfs4_filelayout_segment *fl;
>  	int status;
> +	bool new_layout = false;
>  
>  	lseg = pnfs_update_layout(ino, ctx, pos, count, iomode, strict_iomode,
> -				  gfp_flags);
> +				  gfp_flags, &new_layout);
>  	if (!lseg)
>  		lseg = ERR_PTR(-ENOMEM);
>  	if (IS_ERR(lseg))
> @@ -924,7 +925,8 @@ static void _filelayout_free_lseg(struct nfs4_filelayout_segment *fl)
>  	if (status) {
>  		pnfs_put_lseg(lseg);
>  		lseg = ERR_PTR(status);
> -	}
> +	} else if (new_layout)
> +		nfs4_get_deviceid(&fl->dsaddr->id_node);
>  out:
>  	return lseg;
>  }
> @@ -991,18 +993,29 @@ static void _filelayout_free_lseg(struct nfs4_filelayout_segment *fl)
>  	nfs_pageio_reset_write_mds(pgio);
>  }
>  
> +static void filelayout_pg_cleanup(struct nfs_pageio_descriptor *desc)
> +{
> +	if (desc->pg_lseg) {
> +		struct nfs4_filelayout_segment *fl =
> +			FILELAYOUT_LSEG(desc->pg_lseg);
> +
> +		nfs4_fl_put_deviceid(fl->dsaddr);
> +	}
> +	pnfs_generic_pg_cleanup(desc);
> +}
> +
>  static const struct nfs_pageio_ops filelayout_pg_read_ops = {
>  	.pg_init = filelayout_pg_init_read,
>  	.pg_test = filelayout_pg_test,
>  	.pg_doio = pnfs_generic_pg_readpages,
> -	.pg_cleanup = pnfs_generic_pg_cleanup,
> +	.pg_cleanup = filelayout_pg_cleanup,
>  };
>  
>  static const struct nfs_pageio_ops filelayout_pg_write_ops = {
>  	.pg_init = filelayout_pg_init_write,
>  	.pg_test = filelayout_pg_test,
>  	.pg_doio = pnfs_generic_pg_writepages,
> -	.pg_cleanup = pnfs_generic_pg_cleanup,
> +	.pg_cleanup = filelayout_pg_cleanup,
>  };
>  
>  static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
> diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
> index 23542dc..53a4a19 100644
> --- a/fs/nfs/flexfilelayout/flexfilelayout.c
> +++ b/fs/nfs/flexfilelayout/flexfilelayout.c
> @@ -820,7 +820,8 @@ static bool ff_layout_has_rw_segments(struct pnfs_layout_hdr *layout)
>  					   NFS4_MAX_UINT64,
>  					   IOMODE_READ,
>  					   strict_iomode,
> -					   GFP_KERNEL);
> +					   GFP_KERNEL,
> +					   NULL);
>  	if (IS_ERR(pgio->pg_lseg)) {
>  		pgio->pg_error = PTR_ERR(pgio->pg_lseg);
>  		pgio->pg_lseg = NULL;
> @@ -904,7 +905,8 @@ static bool ff_layout_has_rw_segments(struct pnfs_layout_hdr *layout)
>  						   NFS4_MAX_UINT64,
>  						   IOMODE_RW,
>  						   false,
> -						   GFP_NOFS);
> +						   GFP_NOFS,
> +						   NULL);
>  		if (IS_ERR(pgio->pg_lseg)) {
>  			pgio->pg_error = PTR_ERR(pgio->pg_lseg);
>  			pgio->pg_lseg = NULL;
> @@ -960,7 +962,8 @@ static bool ff_layout_has_rw_segments(struct pnfs_layout_hdr *layout)
>  						   NFS4_MAX_UINT64,
>  						   IOMODE_RW,
>  						   false,
> -						   GFP_NOFS);
> +						   GFP_NOFS,
> +						   NULL);
>  		if (IS_ERR(pgio->pg_lseg)) {
>  			pgio->pg_error = PTR_ERR(pgio->pg_lseg);
>  			pgio->pg_lseg = NULL;
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index c383d09..fb011c6 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -1698,7 +1698,8 @@ struct pnfs_layout_segment *
>  		   u64 count,
>  		   enum pnfs_iomode iomode,
>  		   bool strict_iomode,
> -		   gfp_t gfp_flags)
> +		   gfp_t gfp_flags,
> +		   bool *new)
>  {
>  	struct pnfs_layout_range arg = {
>  		.iomode = iomode,
> @@ -1764,8 +1765,12 @@ struct pnfs_layout_segment *
>  	if (lseg) {
>  		trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
>  				PNFS_UPDATE_LAYOUT_FOUND_CACHED);
> +		if (new)
> +			*new = false;
>  		goto out_unlock;
>  	}
> +	if (new)
> +		*new = true;
>  
>  	if (!nfs4_valid_open_stateid(ctx->state)) {
>  		trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
> @@ -2126,7 +2131,8 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
>  						   rd_size,
>  						   IOMODE_READ,
>  						   false,
> -						   GFP_KERNEL);
> +						   GFP_KERNEL,
> +						   NULL);
>  		if (IS_ERR(pgio->pg_lseg)) {
>  			pgio->pg_error = PTR_ERR(pgio->pg_lseg);
>  			pgio->pg_lseg = NULL;
> @@ -2153,7 +2159,8 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
>  						   wb_size,
>  						   IOMODE_RW,
>  						   false,
> -						   GFP_NOFS);
> +						   GFP_NOFS,
> +						   NULL);
>  		if (IS_ERR(pgio->pg_lseg)) {
>  			pgio->pg_error = PTR_ERR(pgio->pg_lseg);
>  			pgio->pg_lseg = NULL;
> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
> index 99731e3..978fab0 100644
> --- a/fs/nfs/pnfs.h
> +++ b/fs/nfs/pnfs.h
> @@ -291,7 +291,8 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
>  					       u64 count,
>  					       enum pnfs_iomode iomode,
>  					       bool strict_iomode,
> -					       gfp_t gfp_flags);
> +					       gfp_t gfp_flags,
> +					       bool *new);
>  void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo,
>  		const nfs4_stateid *arg_stateid,
>  		const struct pnfs_layout_range *range,
>
Trond Myklebust July 20, 2017, 7:56 p.m. UTC | #2
Hi Olga,

Apologies for missing this patch. It was hiding in my 'linux-fsdevel'
mailbox, so I didn't recognise it as a NFS patch.

On Fri, 2017-06-30 at 15:52 -0400, Olga Kornievskaia wrote:
> There is a regression by commit 8d40b0f14846 ("NFS filelayout:call

> GETDEVICEINFO after pnfs_layout_process completes"). It leaves the

> DS mount dangling.

> 

> Previously, filelayout_alloc_sec() would call

> filelayout_check_layout()

> which would call nfs4_find_get_deviceid which ups the count on the

> device_id. It's only called once and it's matched by the

> filelayout_free_lseg() that calls nfs4_fl_put_deviceid().

> 

> After that patch, each read/write ends up calling

> nfs4_find_get_deviceid

> and there is no balance for that. Instead, do nfs4_fl_put_deviceid()

> in the filelayout's .pg_cleanup and remove it from

> filelayout_free_lseg.

> 

> But we still need a reference to hold over the lifetime of the

> segment.

> For every new lseg that's created we need to take a reference on

> deviceid

> that uses it. It will be released in the "free_lseg" routine.


This is what I'm not understanding. If you have a reference in the
layout segment, then why do you need to call nfs4_find_get_deviceid()
in the read/write code?

Isn't it sufficient to change the "pg_init" calls to check whether or
not the struct nfs4_filelayout_segment has set a value for dsaddr (that
needs to be done with care to avoid races - cmpxchg() is your friend),
and then rely on that reference being set for the remainder of the
layout segment lifetime?


-- 
Trond Myklebust
Linux NFS client maintainer, PrimaryData
trond.myklebust@primarydata.com
diff mbox

Patch

diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 1cf85d6..86d694e 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -909,9 +909,10 @@  static void _filelayout_free_lseg(struct nfs4_filelayout_segment *fl)
 	struct pnfs_layout_hdr *lo;
 	struct nfs4_filelayout_segment *fl;
 	int status;
+	bool new_layout = false;
 
 	lseg = pnfs_update_layout(ino, ctx, pos, count, iomode, strict_iomode,
-				  gfp_flags);
+				  gfp_flags, &new_layout);
 	if (!lseg)
 		lseg = ERR_PTR(-ENOMEM);
 	if (IS_ERR(lseg))
@@ -924,7 +925,8 @@  static void _filelayout_free_lseg(struct nfs4_filelayout_segment *fl)
 	if (status) {
 		pnfs_put_lseg(lseg);
 		lseg = ERR_PTR(status);
-	}
+	} else if (new_layout)
+		nfs4_get_deviceid(&fl->dsaddr->id_node);
 out:
 	return lseg;
 }
@@ -991,18 +993,29 @@  static void _filelayout_free_lseg(struct nfs4_filelayout_segment *fl)
 	nfs_pageio_reset_write_mds(pgio);
 }
 
+static void filelayout_pg_cleanup(struct nfs_pageio_descriptor *desc)
+{
+	if (desc->pg_lseg) {
+		struct nfs4_filelayout_segment *fl =
+			FILELAYOUT_LSEG(desc->pg_lseg);
+
+		nfs4_fl_put_deviceid(fl->dsaddr);
+	}
+	pnfs_generic_pg_cleanup(desc);
+}
+
 static const struct nfs_pageio_ops filelayout_pg_read_ops = {
 	.pg_init = filelayout_pg_init_read,
 	.pg_test = filelayout_pg_test,
 	.pg_doio = pnfs_generic_pg_readpages,
-	.pg_cleanup = pnfs_generic_pg_cleanup,
+	.pg_cleanup = filelayout_pg_cleanup,
 };
 
 static const struct nfs_pageio_ops filelayout_pg_write_ops = {
 	.pg_init = filelayout_pg_init_write,
 	.pg_test = filelayout_pg_test,
 	.pg_doio = pnfs_generic_pg_writepages,
-	.pg_cleanup = pnfs_generic_pg_cleanup,
+	.pg_cleanup = filelayout_pg_cleanup,
 };
 
 static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 23542dc..53a4a19 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -820,7 +820,8 @@  static bool ff_layout_has_rw_segments(struct pnfs_layout_hdr *layout)
 					   NFS4_MAX_UINT64,
 					   IOMODE_READ,
 					   strict_iomode,
-					   GFP_KERNEL);
+					   GFP_KERNEL,
+					   NULL);
 	if (IS_ERR(pgio->pg_lseg)) {
 		pgio->pg_error = PTR_ERR(pgio->pg_lseg);
 		pgio->pg_lseg = NULL;
@@ -904,7 +905,8 @@  static bool ff_layout_has_rw_segments(struct pnfs_layout_hdr *layout)
 						   NFS4_MAX_UINT64,
 						   IOMODE_RW,
 						   false,
-						   GFP_NOFS);
+						   GFP_NOFS,
+						   NULL);
 		if (IS_ERR(pgio->pg_lseg)) {
 			pgio->pg_error = PTR_ERR(pgio->pg_lseg);
 			pgio->pg_lseg = NULL;
@@ -960,7 +962,8 @@  static bool ff_layout_has_rw_segments(struct pnfs_layout_hdr *layout)
 						   NFS4_MAX_UINT64,
 						   IOMODE_RW,
 						   false,
-						   GFP_NOFS);
+						   GFP_NOFS,
+						   NULL);
 		if (IS_ERR(pgio->pg_lseg)) {
 			pgio->pg_error = PTR_ERR(pgio->pg_lseg);
 			pgio->pg_lseg = NULL;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index c383d09..fb011c6 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1698,7 +1698,8 @@  struct pnfs_layout_segment *
 		   u64 count,
 		   enum pnfs_iomode iomode,
 		   bool strict_iomode,
-		   gfp_t gfp_flags)
+		   gfp_t gfp_flags,
+		   bool *new)
 {
 	struct pnfs_layout_range arg = {
 		.iomode = iomode,
@@ -1764,8 +1765,12 @@  struct pnfs_layout_segment *
 	if (lseg) {
 		trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
 				PNFS_UPDATE_LAYOUT_FOUND_CACHED);
+		if (new)
+			*new = false;
 		goto out_unlock;
 	}
+	if (new)
+		*new = true;
 
 	if (!nfs4_valid_open_stateid(ctx->state)) {
 		trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
@@ -2126,7 +2131,8 @@  void pnfs_error_mark_layout_for_return(struct inode *inode,
 						   rd_size,
 						   IOMODE_READ,
 						   false,
-						   GFP_KERNEL);
+						   GFP_KERNEL,
+						   NULL);
 		if (IS_ERR(pgio->pg_lseg)) {
 			pgio->pg_error = PTR_ERR(pgio->pg_lseg);
 			pgio->pg_lseg = NULL;
@@ -2153,7 +2159,8 @@  void pnfs_error_mark_layout_for_return(struct inode *inode,
 						   wb_size,
 						   IOMODE_RW,
 						   false,
-						   GFP_NOFS);
+						   GFP_NOFS,
+						   NULL);
 		if (IS_ERR(pgio->pg_lseg)) {
 			pgio->pg_error = PTR_ERR(pgio->pg_lseg);
 			pgio->pg_lseg = NULL;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 99731e3..978fab0 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -291,7 +291,8 @@  struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
 					       u64 count,
 					       enum pnfs_iomode iomode,
 					       bool strict_iomode,
-					       gfp_t gfp_flags);
+					       gfp_t gfp_flags,
+					       bool *new);
 void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo,
 		const nfs4_stateid *arg_stateid,
 		const struct pnfs_layout_range *range,