Message ID | 20240304224714.10370-3-neilb@suse.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | nfsd: fix deadlock in move_to_close_lru() | expand |
On Tue, Mar 05, 2024 at 09:45:22AM +1100, NeilBrown wrote: > Currently find_openstateowner_str looks are done both in > nfsd4_process_open1() and alloc_init_open_stateowner() - the latter > possibly being a surprise based on its name. > > It would be easier to follow, and more conformant to common patterns, if > the lookup was all in the one place. > > So replace alloc_init_open_stateowner() with > find_or_alloc_open_stateowner() and use the latter in > nfsd4_process_open1() without any calls to find_openstateowner_str(). > > This means all finds are find_openstateowner_str_locked() and > find_openstateowner_str() is no longer needed. So discard > find_openstateowner_str() and rename find_openstateowner_str_locked() to > find_openstateowner_str(). > > Signed-off-by: NeilBrown <neilb@suse.de> > --- > fs/nfsd/nfs4state.c | 93 +++++++++++++++++++-------------------------- > 1 file changed, 40 insertions(+), 53 deletions(-) > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c > index b9b3a2601675..0c1058e8cc4b 100644 > --- a/fs/nfsd/nfs4state.c > +++ b/fs/nfsd/nfs4state.c > @@ -541,7 +541,7 @@ same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner) > } > > static struct nfs4_openowner * > -find_openstateowner_str_locked(unsigned int hashval, struct nfsd4_open *open, > +find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open, > struct nfs4_client *clp) > { > struct nfs4_stateowner *so; > @@ -558,18 +558,6 @@ find_openstateowner_str_locked(unsigned int hashval, struct nfsd4_open *open, > return NULL; > } > > -static struct nfs4_openowner * > -find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open, > - struct nfs4_client *clp) > -{ > - struct nfs4_openowner *oo; > - > - spin_lock(&clp->cl_lock); > - oo = find_openstateowner_str_locked(hashval, open, clp); > - spin_unlock(&clp->cl_lock); > - return oo; > -} > - > static inline u32 > opaque_hashval(const void *ptr, int nbytes) > { > @@ -4855,34 +4843,46 @@ nfsd4_find_and_lock_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) > } > > static struct nfs4_openowner * > -alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, > - struct nfsd4_compound_state *cstate) > +find_or_alloc_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, > + struct nfsd4_compound_state *cstate) > { > struct nfs4_client *clp = cstate->clp; > - struct nfs4_openowner *oo, *ret; > + struct nfs4_openowner *oo, *new = NULL; > > - oo = alloc_stateowner(openowner_slab, &open->op_owner, clp); > - if (!oo) > - return NULL; > - oo->oo_owner.so_ops = &openowner_ops; > - oo->oo_owner.so_is_open_owner = 1; > - oo->oo_owner.so_seqid = open->op_seqid; > - oo->oo_flags = 0; > - if (nfsd4_has_session(cstate)) > - oo->oo_flags |= NFS4_OO_CONFIRMED; > - oo->oo_time = 0; > - oo->oo_last_closed_stid = NULL; > - INIT_LIST_HEAD(&oo->oo_close_lru); > - spin_lock(&clp->cl_lock); > - ret = find_openstateowner_str_locked(strhashval, open, clp); > - if (ret == NULL) { > - hash_openowner(oo, clp, strhashval); > - ret = oo; > - } else > - nfs4_free_stateowner(&oo->oo_owner); > + while (1) { > + spin_lock(&clp->cl_lock); > + oo = find_openstateowner_str(strhashval, open, clp); > + if (oo && !(oo->oo_flags & NFS4_OO_CONFIRMED)) { > + /* Replace unconfirmed owners without checking for replay. */ > + release_openowner(oo); > + oo = NULL; > + } > + if (oo) { > + spin_unlock(&clp->cl_lock); > + if (new) > + nfs4_free_stateowner(&new->oo_owner); > + return oo; > + } > + if (new) { > + hash_openowner(new, clp, strhashval); > + spin_unlock(&clp->cl_lock); > + return new; > + } > + spin_unlock(&clp->cl_lock); > > - spin_unlock(&clp->cl_lock); > - return ret; > + new = alloc_stateowner(openowner_slab, &open->op_owner, clp); > + if (!new) > + return NULL; > + new->oo_owner.so_ops = &openowner_ops; > + new->oo_owner.so_is_open_owner = 1; > + new->oo_owner.so_seqid = open->op_seqid; > + new->oo_flags = 0; > + if (nfsd4_has_session(cstate)) > + new->oo_flags |= NFS4_OO_CONFIRMED; > + new->oo_time = 0; > + new->oo_last_closed_stid = NULL; > + INIT_LIST_HEAD(&new->oo_close_lru); > + } > } > > static struct nfs4_ol_stateid * > @@ -5331,28 +5331,15 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate, > clp = cstate->clp; > > strhashval = ownerstr_hashval(&open->op_owner); > - oo = find_openstateowner_str(strhashval, open, clp); > + oo = find_or_alloc_open_stateowner(strhashval, open, cstate); > open->op_openowner = oo; > if (!oo) > - goto new_owner; > - if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) { > - /* Replace unconfirmed owners without checking for replay. */ > - release_openowner(oo); > - open->op_openowner = NULL; > - goto new_owner; > - } > + return nfserr_jukebox; > nfsd4_cstate_assign_replay(cstate, &oo->oo_owner); > status = nfsd4_check_seqid(cstate, &oo->oo_owner, open->op_seqid); > if (status) > return status; > - goto alloc_stateid; > -new_owner: > - oo = alloc_init_open_stateowner(strhashval, open, cstate); > - if (oo == NULL) > - return nfserr_jukebox; > - open->op_openowner = oo; > - nfsd4_cstate_assign_replay(cstate, &oo->oo_owner); > -alloc_stateid: > + > open->op_stp = nfs4_alloc_open_stateid(clp); > if (!open->op_stp) > return nfserr_jukebox; > -- > 2.43.0 > While running the NFSv4.0 pynfs tests with KASAN and lock debugging enabled, I get a number of CPU soft lockup warnings on the test server and then it wedges hard. I bisected to this patch. Because we are just a day shy of the v6.9 merge window, I'm going to drop these four patches for v6.9. We can try them again for v6.10.
On Sun, 10 Mar 2024, Chuck Lever wrote: > On Tue, Mar 05, 2024 at 09:45:22AM +1100, NeilBrown wrote: > > Currently find_openstateowner_str looks are done both in > > nfsd4_process_open1() and alloc_init_open_stateowner() - the latter > > possibly being a surprise based on its name. > > > > It would be easier to follow, and more conformant to common patterns, if > > the lookup was all in the one place. > > > > So replace alloc_init_open_stateowner() with > > find_or_alloc_open_stateowner() and use the latter in > > nfsd4_process_open1() without any calls to find_openstateowner_str(). > > > > This means all finds are find_openstateowner_str_locked() and > > find_openstateowner_str() is no longer needed. So discard > > find_openstateowner_str() and rename find_openstateowner_str_locked() to > > find_openstateowner_str(). > > > > Signed-off-by: NeilBrown <neilb@suse.de> > > --- > > fs/nfsd/nfs4state.c | 93 +++++++++++++++++++-------------------------- > > 1 file changed, 40 insertions(+), 53 deletions(-) > > > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c > > index b9b3a2601675..0c1058e8cc4b 100644 > > --- a/fs/nfsd/nfs4state.c > > +++ b/fs/nfsd/nfs4state.c > > @@ -541,7 +541,7 @@ same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner) > > } > > > > static struct nfs4_openowner * > > -find_openstateowner_str_locked(unsigned int hashval, struct nfsd4_open *open, > > +find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open, > > struct nfs4_client *clp) > > { > > struct nfs4_stateowner *so; > > @@ -558,18 +558,6 @@ find_openstateowner_str_locked(unsigned int hashval, struct nfsd4_open *open, > > return NULL; > > } > > > > -static struct nfs4_openowner * > > -find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open, > > - struct nfs4_client *clp) > > -{ > > - struct nfs4_openowner *oo; > > - > > - spin_lock(&clp->cl_lock); > > - oo = find_openstateowner_str_locked(hashval, open, clp); > > - spin_unlock(&clp->cl_lock); > > - return oo; > > -} > > - > > static inline u32 > > opaque_hashval(const void *ptr, int nbytes) > > { > > @@ -4855,34 +4843,46 @@ nfsd4_find_and_lock_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) > > } > > > > static struct nfs4_openowner * > > -alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, > > - struct nfsd4_compound_state *cstate) > > +find_or_alloc_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, > > + struct nfsd4_compound_state *cstate) > > { > > struct nfs4_client *clp = cstate->clp; > > - struct nfs4_openowner *oo, *ret; > > + struct nfs4_openowner *oo, *new = NULL; > > > > - oo = alloc_stateowner(openowner_slab, &open->op_owner, clp); > > - if (!oo) > > - return NULL; > > - oo->oo_owner.so_ops = &openowner_ops; > > - oo->oo_owner.so_is_open_owner = 1; > > - oo->oo_owner.so_seqid = open->op_seqid; > > - oo->oo_flags = 0; > > - if (nfsd4_has_session(cstate)) > > - oo->oo_flags |= NFS4_OO_CONFIRMED; > > - oo->oo_time = 0; > > - oo->oo_last_closed_stid = NULL; > > - INIT_LIST_HEAD(&oo->oo_close_lru); > > - spin_lock(&clp->cl_lock); > > - ret = find_openstateowner_str_locked(strhashval, open, clp); > > - if (ret == NULL) { > > - hash_openowner(oo, clp, strhashval); > > - ret = oo; > > - } else > > - nfs4_free_stateowner(&oo->oo_owner); > > + while (1) { > > + spin_lock(&clp->cl_lock); > > + oo = find_openstateowner_str(strhashval, open, clp); > > + if (oo && !(oo->oo_flags & NFS4_OO_CONFIRMED)) { > > + /* Replace unconfirmed owners without checking for replay. */ > > + release_openowner(oo); > > + oo = NULL; > > + } > > + if (oo) { > > + spin_unlock(&clp->cl_lock); > > + if (new) > > + nfs4_free_stateowner(&new->oo_owner); > > + return oo; > > + } > > + if (new) { > > + hash_openowner(new, clp, strhashval); > > + spin_unlock(&clp->cl_lock); > > + return new; > > + } > > + spin_unlock(&clp->cl_lock); > > > > - spin_unlock(&clp->cl_lock); > > - return ret; > > + new = alloc_stateowner(openowner_slab, &open->op_owner, clp); > > + if (!new) > > + return NULL; > > + new->oo_owner.so_ops = &openowner_ops; > > + new->oo_owner.so_is_open_owner = 1; > > + new->oo_owner.so_seqid = open->op_seqid; > > + new->oo_flags = 0; > > + if (nfsd4_has_session(cstate)) > > + new->oo_flags |= NFS4_OO_CONFIRMED; > > + new->oo_time = 0; > > + new->oo_last_closed_stid = NULL; > > + INIT_LIST_HEAD(&new->oo_close_lru); > > + } > > } > > > > static struct nfs4_ol_stateid * > > @@ -5331,28 +5331,15 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate, > > clp = cstate->clp; > > > > strhashval = ownerstr_hashval(&open->op_owner); > > - oo = find_openstateowner_str(strhashval, open, clp); > > + oo = find_or_alloc_open_stateowner(strhashval, open, cstate); > > open->op_openowner = oo; > > if (!oo) > > - goto new_owner; > > - if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) { > > - /* Replace unconfirmed owners without checking for replay. */ > > - release_openowner(oo); > > - open->op_openowner = NULL; > > - goto new_owner; > > - } > > + return nfserr_jukebox; > > nfsd4_cstate_assign_replay(cstate, &oo->oo_owner); > > status = nfsd4_check_seqid(cstate, &oo->oo_owner, open->op_seqid); > > if (status) > > return status; > > - goto alloc_stateid; > > -new_owner: > > - oo = alloc_init_open_stateowner(strhashval, open, cstate); > > - if (oo == NULL) > > - return nfserr_jukebox; > > - open->op_openowner = oo; > > - nfsd4_cstate_assign_replay(cstate, &oo->oo_owner); > > -alloc_stateid: > > + > > open->op_stp = nfs4_alloc_open_stateid(clp); > > if (!open->op_stp) > > return nfserr_jukebox; > > -- > > 2.43.0 > > > > While running the NFSv4.0 pynfs tests with KASAN and lock debugging > enabled, I get a number of CPU soft lockup warnings on the test > server and then it wedges hard. I bisected to this patch. Almost certainly this is the problem Dan Carpenter found and reported. > > Because we are just a day shy of the v6.9 merge window, I'm going to > drop these four patches for v6.9. We can try them again for v6.10. I'll post a revised version of this patch - it shouldn't affect the subsequent patches (though I can resend them to if/when you want). Up to you when it lands of course. NeilBrown
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b9b3a2601675..0c1058e8cc4b 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -541,7 +541,7 @@ same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner) } static struct nfs4_openowner * -find_openstateowner_str_locked(unsigned int hashval, struct nfsd4_open *open, +find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open, struct nfs4_client *clp) { struct nfs4_stateowner *so; @@ -558,18 +558,6 @@ find_openstateowner_str_locked(unsigned int hashval, struct nfsd4_open *open, return NULL; } -static struct nfs4_openowner * -find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open, - struct nfs4_client *clp) -{ - struct nfs4_openowner *oo; - - spin_lock(&clp->cl_lock); - oo = find_openstateowner_str_locked(hashval, open, clp); - spin_unlock(&clp->cl_lock); - return oo; -} - static inline u32 opaque_hashval(const void *ptr, int nbytes) { @@ -4855,34 +4843,46 @@ nfsd4_find_and_lock_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) } static struct nfs4_openowner * -alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, - struct nfsd4_compound_state *cstate) +find_or_alloc_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, + struct nfsd4_compound_state *cstate) { struct nfs4_client *clp = cstate->clp; - struct nfs4_openowner *oo, *ret; + struct nfs4_openowner *oo, *new = NULL; - oo = alloc_stateowner(openowner_slab, &open->op_owner, clp); - if (!oo) - return NULL; - oo->oo_owner.so_ops = &openowner_ops; - oo->oo_owner.so_is_open_owner = 1; - oo->oo_owner.so_seqid = open->op_seqid; - oo->oo_flags = 0; - if (nfsd4_has_session(cstate)) - oo->oo_flags |= NFS4_OO_CONFIRMED; - oo->oo_time = 0; - oo->oo_last_closed_stid = NULL; - INIT_LIST_HEAD(&oo->oo_close_lru); - spin_lock(&clp->cl_lock); - ret = find_openstateowner_str_locked(strhashval, open, clp); - if (ret == NULL) { - hash_openowner(oo, clp, strhashval); - ret = oo; - } else - nfs4_free_stateowner(&oo->oo_owner); + while (1) { + spin_lock(&clp->cl_lock); + oo = find_openstateowner_str(strhashval, open, clp); + if (oo && !(oo->oo_flags & NFS4_OO_CONFIRMED)) { + /* Replace unconfirmed owners without checking for replay. */ + release_openowner(oo); + oo = NULL; + } + if (oo) { + spin_unlock(&clp->cl_lock); + if (new) + nfs4_free_stateowner(&new->oo_owner); + return oo; + } + if (new) { + hash_openowner(new, clp, strhashval); + spin_unlock(&clp->cl_lock); + return new; + } + spin_unlock(&clp->cl_lock); - spin_unlock(&clp->cl_lock); - return ret; + new = alloc_stateowner(openowner_slab, &open->op_owner, clp); + if (!new) + return NULL; + new->oo_owner.so_ops = &openowner_ops; + new->oo_owner.so_is_open_owner = 1; + new->oo_owner.so_seqid = open->op_seqid; + new->oo_flags = 0; + if (nfsd4_has_session(cstate)) + new->oo_flags |= NFS4_OO_CONFIRMED; + new->oo_time = 0; + new->oo_last_closed_stid = NULL; + INIT_LIST_HEAD(&new->oo_close_lru); + } } static struct nfs4_ol_stateid * @@ -5331,28 +5331,15 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate, clp = cstate->clp; strhashval = ownerstr_hashval(&open->op_owner); - oo = find_openstateowner_str(strhashval, open, clp); + oo = find_or_alloc_open_stateowner(strhashval, open, cstate); open->op_openowner = oo; if (!oo) - goto new_owner; - if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) { - /* Replace unconfirmed owners without checking for replay. */ - release_openowner(oo); - open->op_openowner = NULL; - goto new_owner; - } + return nfserr_jukebox; nfsd4_cstate_assign_replay(cstate, &oo->oo_owner); status = nfsd4_check_seqid(cstate, &oo->oo_owner, open->op_seqid); if (status) return status; - goto alloc_stateid; -new_owner: - oo = alloc_init_open_stateowner(strhashval, open, cstate); - if (oo == NULL) - return nfserr_jukebox; - open->op_openowner = oo; - nfsd4_cstate_assign_replay(cstate, &oo->oo_owner); -alloc_stateid: + open->op_stp = nfs4_alloc_open_stateid(clp); if (!open->op_stp) return nfserr_jukebox;
Currently find_openstateowner_str looks are done both in nfsd4_process_open1() and alloc_init_open_stateowner() - the latter possibly being a surprise based on its name. It would be easier to follow, and more conformant to common patterns, if the lookup was all in the one place. So replace alloc_init_open_stateowner() with find_or_alloc_open_stateowner() and use the latter in nfsd4_process_open1() without any calls to find_openstateowner_str(). This means all finds are find_openstateowner_str_locked() and find_openstateowner_str() is no longer needed. So discard find_openstateowner_str() and rename find_openstateowner_str_locked() to find_openstateowner_str(). Signed-off-by: NeilBrown <neilb@suse.de> --- fs/nfsd/nfs4state.c | 93 +++++++++++++++++++-------------------------- 1 file changed, 40 insertions(+), 53 deletions(-)