Message ID | 20170630195201.95597-1-kolga@netapp.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 06/30/2017 03:52 PM, Olga Kornievskaia wrote: > There is a regression by commit 8d40b0f14846 ("NFS filelayout:call > GETDEVICEINFO after pnfs_layout_process completes"). It leaves the > DS mount dangling. > > Previously, filelayout_alloc_sec() would call filelayout_check_layout() > which would call nfs4_find_get_deviceid which ups the count on the > device_id. It's only called once and it's matched by the > filelayout_free_lseg() that calls nfs4_fl_put_deviceid(). > > After that patch, each read/write ends up calling nfs4_find_get_deviceid > and there is no balance for that. Instead, do nfs4_fl_put_deviceid() > in the filelayout's .pg_cleanup and remove it from filelayout_free_lseg. > > But we still need a reference to hold over the lifetime of the segment. > For every new lseg that's created we need to take a reference on deviceid > that uses it. It will be released in the "free_lseg" routine. > > Signed-off-by: Olga Kornievskaia <kolga@netapp.com> Tested-by: Steve Dickson <steved@redhat.com> steved. > --- > fs/nfs/filelayout/filelayout.c | 21 +++++++++++++++++---- > fs/nfs/flexfilelayout/flexfilelayout.c | 9 ++++++--- > fs/nfs/pnfs.c | 13 ++++++++++--- > fs/nfs/pnfs.h | 3 ++- > 4 files changed, 35 insertions(+), 11 deletions(-) > > diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c > index 1cf85d6..86d694e 100644 > --- a/fs/nfs/filelayout/filelayout.c > +++ b/fs/nfs/filelayout/filelayout.c > @@ -909,9 +909,10 @@ static void _filelayout_free_lseg(struct nfs4_filelayout_segment *fl) > struct pnfs_layout_hdr *lo; > struct nfs4_filelayout_segment *fl; > int status; > + bool new_layout = false; > > lseg = pnfs_update_layout(ino, ctx, pos, count, iomode, strict_iomode, > - gfp_flags); > + gfp_flags, &new_layout); > if (!lseg) > lseg = ERR_PTR(-ENOMEM); > if (IS_ERR(lseg)) > @@ -924,7 +925,8 @@ static void _filelayout_free_lseg(struct nfs4_filelayout_segment *fl) > if (status) { > pnfs_put_lseg(lseg); > lseg = ERR_PTR(status); > - } > + } else if (new_layout) > + nfs4_get_deviceid(&fl->dsaddr->id_node); > out: > return lseg; > } > @@ -991,18 +993,29 @@ static void _filelayout_free_lseg(struct nfs4_filelayout_segment *fl) > nfs_pageio_reset_write_mds(pgio); > } > > +static void filelayout_pg_cleanup(struct nfs_pageio_descriptor *desc) > +{ > + if (desc->pg_lseg) { > + struct nfs4_filelayout_segment *fl = > + FILELAYOUT_LSEG(desc->pg_lseg); > + > + nfs4_fl_put_deviceid(fl->dsaddr); > + } > + pnfs_generic_pg_cleanup(desc); > +} > + > static const struct nfs_pageio_ops filelayout_pg_read_ops = { > .pg_init = filelayout_pg_init_read, > .pg_test = filelayout_pg_test, > .pg_doio = pnfs_generic_pg_readpages, > - .pg_cleanup = pnfs_generic_pg_cleanup, > + .pg_cleanup = filelayout_pg_cleanup, > }; > > static const struct nfs_pageio_ops filelayout_pg_write_ops = { > .pg_init = filelayout_pg_init_write, > .pg_test = filelayout_pg_test, > .pg_doio = pnfs_generic_pg_writepages, > - .pg_cleanup = pnfs_generic_pg_cleanup, > + .pg_cleanup = filelayout_pg_cleanup, > }; > > static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j) > diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c > index 23542dc..53a4a19 100644 > --- a/fs/nfs/flexfilelayout/flexfilelayout.c > +++ b/fs/nfs/flexfilelayout/flexfilelayout.c > @@ -820,7 +820,8 @@ static bool ff_layout_has_rw_segments(struct pnfs_layout_hdr *layout) > NFS4_MAX_UINT64, > IOMODE_READ, > strict_iomode, > - GFP_KERNEL); > + GFP_KERNEL, > + NULL); > if (IS_ERR(pgio->pg_lseg)) { > pgio->pg_error = PTR_ERR(pgio->pg_lseg); > pgio->pg_lseg = NULL; > @@ -904,7 +905,8 @@ static bool ff_layout_has_rw_segments(struct pnfs_layout_hdr *layout) > NFS4_MAX_UINT64, > IOMODE_RW, > false, > - GFP_NOFS); > + GFP_NOFS, > + NULL); > if (IS_ERR(pgio->pg_lseg)) { > pgio->pg_error = PTR_ERR(pgio->pg_lseg); > pgio->pg_lseg = NULL; > @@ -960,7 +962,8 @@ static bool ff_layout_has_rw_segments(struct pnfs_layout_hdr *layout) > NFS4_MAX_UINT64, > IOMODE_RW, > false, > - GFP_NOFS); > + GFP_NOFS, > + NULL); > if (IS_ERR(pgio->pg_lseg)) { > pgio->pg_error = PTR_ERR(pgio->pg_lseg); > pgio->pg_lseg = NULL; > diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c > index c383d09..fb011c6 100644 > --- a/fs/nfs/pnfs.c > +++ b/fs/nfs/pnfs.c > @@ -1698,7 +1698,8 @@ struct pnfs_layout_segment * > u64 count, > enum pnfs_iomode iomode, > bool strict_iomode, > - gfp_t gfp_flags) > + gfp_t gfp_flags, > + bool *new) > { > struct pnfs_layout_range arg = { > .iomode = iomode, > @@ -1764,8 +1765,12 @@ struct pnfs_layout_segment * > if (lseg) { > trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, > PNFS_UPDATE_LAYOUT_FOUND_CACHED); > + if (new) > + *new = false; > goto out_unlock; > } > + if (new) > + *new = true; > > if (!nfs4_valid_open_stateid(ctx->state)) { > trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, > @@ -2126,7 +2131,8 @@ void pnfs_error_mark_layout_for_return(struct inode *inode, > rd_size, > IOMODE_READ, > false, > - GFP_KERNEL); > + GFP_KERNEL, > + NULL); > if (IS_ERR(pgio->pg_lseg)) { > pgio->pg_error = PTR_ERR(pgio->pg_lseg); > pgio->pg_lseg = NULL; > @@ -2153,7 +2159,8 @@ void pnfs_error_mark_layout_for_return(struct inode *inode, > wb_size, > IOMODE_RW, > false, > - GFP_NOFS); > + GFP_NOFS, > + NULL); > if (IS_ERR(pgio->pg_lseg)) { > pgio->pg_error = PTR_ERR(pgio->pg_lseg); > pgio->pg_lseg = NULL; > diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h > index 99731e3..978fab0 100644 > --- a/fs/nfs/pnfs.h > +++ b/fs/nfs/pnfs.h > @@ -291,7 +291,8 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, > u64 count, > enum pnfs_iomode iomode, > bool strict_iomode, > - gfp_t gfp_flags); > + gfp_t gfp_flags, > + bool *new); > void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo, > const nfs4_stateid *arg_stateid, > const struct pnfs_layout_range *range, > -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Olga, Apologies for missing this patch. It was hiding in my 'linux-fsdevel' mailbox, so I didn't recognise it as a NFS patch. On Fri, 2017-06-30 at 15:52 -0400, Olga Kornievskaia wrote: > There is a regression by commit 8d40b0f14846 ("NFS filelayout:call > GETDEVICEINFO after pnfs_layout_process completes"). It leaves the > DS mount dangling. > > Previously, filelayout_alloc_sec() would call > filelayout_check_layout() > which would call nfs4_find_get_deviceid which ups the count on the > device_id. It's only called once and it's matched by the > filelayout_free_lseg() that calls nfs4_fl_put_deviceid(). > > After that patch, each read/write ends up calling > nfs4_find_get_deviceid > and there is no balance for that. Instead, do nfs4_fl_put_deviceid() > in the filelayout's .pg_cleanup and remove it from > filelayout_free_lseg. > > But we still need a reference to hold over the lifetime of the > segment. > For every new lseg that's created we need to take a reference on > deviceid > that uses it. It will be released in the "free_lseg" routine. This is what I'm not understanding. If you have a reference in the layout segment, then why do you need to call nfs4_find_get_deviceid() in the read/write code? Isn't it sufficient to change the "pg_init" calls to check whether or not the struct nfs4_filelayout_segment has set a value for dsaddr (that needs to be done with care to avoid races - cmpxchg() is your friend), and then rely on that reference being set for the remainder of the layout segment lifetime? -- Trond Myklebust Linux NFS client maintainer, PrimaryData trond.myklebust@primarydata.com
> On Jul 20, 2017, at 3:56 PM, Trond Myklebust <trondmy@primarydata.com> wrote: > > Hi Olga, > > Apologies for missing this patch. It was hiding in my 'linux-fsdevel' > mailbox, so I didn't recognise it as a NFS patch. > Yeah after I mailed it out I realized I cc-ed fsdevel incorrectly. > On Fri, 2017-06-30 at 15:52 -0400, Olga Kornievskaia wrote: >> There is a regression by commit 8d40b0f14846 ("NFS filelayout:call >> GETDEVICEINFO after pnfs_layout_process completes"). It leaves the >> DS mount dangling. >> >> Previously, filelayout_alloc_sec() would call >> filelayout_check_layout() >> which would call nfs4_find_get_deviceid which ups the count on the >> device_id. It's only called once and it's matched by the >> filelayout_free_lseg() that calls nfs4_fl_put_deviceid(). >> >> After that patch, each read/write ends up calling >> nfs4_find_get_deviceid >> and there is no balance for that. Instead, do nfs4_fl_put_deviceid() >> in the filelayout's .pg_cleanup and remove it from >> filelayout_free_lseg. >> >> But we still need a reference to hold over the lifetime of the >> segment. >> For every new lseg that's created we need to take a reference on >> deviceid >> that uses it. It will be released in the "free_lseg" routine. > > This is what I'm not understanding. If you have a reference in the > layout segment, then why do you need to call nfs4_find_get_deviceid() > in the read/write code? I think I’m probably misunderstanding the question. It sounds to me that you asking me for why the commit 8d40b0f14846 was done the way it was done (I’d would say it was done as per your suggestion). I would say the call to nfs4_find_get_deviceid() has always been in the read/write code. It was a part of the pnfs_update_layout() but the commit 8d40b0f14846 moved it out of it (so that the layoutget was complete) and then the call to the getdeviceinfo would be done. > Isn't it sufficient to change the "pg_init" calls to check whether or > not the struct nfs4_filelayout_segment has set a value for dsaddr (that > needs to be done with care to avoid races - cmpxchg() is your friend), > and then rely on that reference being set for the remainder of the > layout segment lifetime? Are you suggesting to change when getdeviceinfo is suppose to be called? > > > -- > Trond Myklebust > Linux NFS client maintainer, PrimaryData > trond.myklebust@primarydata.com -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 1cf85d6..86d694e 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -909,9 +909,10 @@ static void _filelayout_free_lseg(struct nfs4_filelayout_segment *fl) struct pnfs_layout_hdr *lo; struct nfs4_filelayout_segment *fl; int status; + bool new_layout = false; lseg = pnfs_update_layout(ino, ctx, pos, count, iomode, strict_iomode, - gfp_flags); + gfp_flags, &new_layout); if (!lseg) lseg = ERR_PTR(-ENOMEM); if (IS_ERR(lseg)) @@ -924,7 +925,8 @@ static void _filelayout_free_lseg(struct nfs4_filelayout_segment *fl) if (status) { pnfs_put_lseg(lseg); lseg = ERR_PTR(status); - } + } else if (new_layout) + nfs4_get_deviceid(&fl->dsaddr->id_node); out: return lseg; } @@ -991,18 +993,29 @@ static void _filelayout_free_lseg(struct nfs4_filelayout_segment *fl) nfs_pageio_reset_write_mds(pgio); } +static void filelayout_pg_cleanup(struct nfs_pageio_descriptor *desc) +{ + if (desc->pg_lseg) { + struct nfs4_filelayout_segment *fl = + FILELAYOUT_LSEG(desc->pg_lseg); + + nfs4_fl_put_deviceid(fl->dsaddr); + } + pnfs_generic_pg_cleanup(desc); +} + static const struct nfs_pageio_ops filelayout_pg_read_ops = { .pg_init = filelayout_pg_init_read, .pg_test = filelayout_pg_test, .pg_doio = pnfs_generic_pg_readpages, - .pg_cleanup = pnfs_generic_pg_cleanup, + .pg_cleanup = filelayout_pg_cleanup, }; static const struct nfs_pageio_ops filelayout_pg_write_ops = { .pg_init = filelayout_pg_init_write, .pg_test = filelayout_pg_test, .pg_doio = pnfs_generic_pg_writepages, - .pg_cleanup = pnfs_generic_pg_cleanup, + .pg_cleanup = filelayout_pg_cleanup, }; static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 23542dc..53a4a19 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -820,7 +820,8 @@ static bool ff_layout_has_rw_segments(struct pnfs_layout_hdr *layout) NFS4_MAX_UINT64, IOMODE_READ, strict_iomode, - GFP_KERNEL); + GFP_KERNEL, + NULL); if (IS_ERR(pgio->pg_lseg)) { pgio->pg_error = PTR_ERR(pgio->pg_lseg); pgio->pg_lseg = NULL; @@ -904,7 +905,8 @@ static bool ff_layout_has_rw_segments(struct pnfs_layout_hdr *layout) NFS4_MAX_UINT64, IOMODE_RW, false, - GFP_NOFS); + GFP_NOFS, + NULL); if (IS_ERR(pgio->pg_lseg)) { pgio->pg_error = PTR_ERR(pgio->pg_lseg); pgio->pg_lseg = NULL; @@ -960,7 +962,8 @@ static bool ff_layout_has_rw_segments(struct pnfs_layout_hdr *layout) NFS4_MAX_UINT64, IOMODE_RW, false, - GFP_NOFS); + GFP_NOFS, + NULL); if (IS_ERR(pgio->pg_lseg)) { pgio->pg_error = PTR_ERR(pgio->pg_lseg); pgio->pg_lseg = NULL; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index c383d09..fb011c6 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1698,7 +1698,8 @@ struct pnfs_layout_segment * u64 count, enum pnfs_iomode iomode, bool strict_iomode, - gfp_t gfp_flags) + gfp_t gfp_flags, + bool *new) { struct pnfs_layout_range arg = { .iomode = iomode, @@ -1764,8 +1765,12 @@ struct pnfs_layout_segment * if (lseg) { trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, PNFS_UPDATE_LAYOUT_FOUND_CACHED); + if (new) + *new = false; goto out_unlock; } + if (new) + *new = true; if (!nfs4_valid_open_stateid(ctx->state)) { trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, @@ -2126,7 +2131,8 @@ void pnfs_error_mark_layout_for_return(struct inode *inode, rd_size, IOMODE_READ, false, - GFP_KERNEL); + GFP_KERNEL, + NULL); if (IS_ERR(pgio->pg_lseg)) { pgio->pg_error = PTR_ERR(pgio->pg_lseg); pgio->pg_lseg = NULL; @@ -2153,7 +2159,8 @@ void pnfs_error_mark_layout_for_return(struct inode *inode, wb_size, IOMODE_RW, false, - GFP_NOFS); + GFP_NOFS, + NULL); if (IS_ERR(pgio->pg_lseg)) { pgio->pg_error = PTR_ERR(pgio->pg_lseg); pgio->pg_lseg = NULL; diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 99731e3..978fab0 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -291,7 +291,8 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, u64 count, enum pnfs_iomode iomode, bool strict_iomode, - gfp_t gfp_flags); + gfp_t gfp_flags, + bool *new); void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo, const nfs4_stateid *arg_stateid, const struct pnfs_layout_range *range,
There is a regression by commit 8d40b0f14846 ("NFS filelayout:call GETDEVICEINFO after pnfs_layout_process completes"). It leaves the DS mount dangling. Previously, filelayout_alloc_sec() would call filelayout_check_layout() which would call nfs4_find_get_deviceid which ups the count on the device_id. It's only called once and it's matched by the filelayout_free_lseg() that calls nfs4_fl_put_deviceid(). After that patch, each read/write ends up calling nfs4_find_get_deviceid and there is no balance for that. Instead, do nfs4_fl_put_deviceid() in the filelayout's .pg_cleanup and remove it from filelayout_free_lseg. But we still need a reference to hold over the lifetime of the segment. For every new lseg that's created we need to take a reference on deviceid that uses it. It will be released in the "free_lseg" routine. Signed-off-by: Olga Kornievskaia <kolga@netapp.com> --- fs/nfs/filelayout/filelayout.c | 21 +++++++++++++++++---- fs/nfs/flexfilelayout/flexfilelayout.c | 9 ++++++--- fs/nfs/pnfs.c | 13 ++++++++++--- fs/nfs/pnfs.h | 3 ++- 4 files changed, 35 insertions(+), 11 deletions(-)