diff mbox series

[RFC] NFSD: Bump the ref count on nf_inode

Message ID 165720933938.1180.14325183467695610136.stgit@klimt.1015granger.net (mailing list archive)
State New, archived
Headers show
Series [RFC] NFSD: Bump the ref count on nf_inode | expand

Commit Message

Chuck Lever July 7, 2022, 3:58 p.m. UTC
The documenting comment for struct nf_file states:

/*
 * A representation of a file that has been opened by knfsd. These are hashed
 * in the hashtable by inode pointer value. Note that this object doesn't
 * hold a reference to the inode by itself, so the nf_inode pointer should
 * never be dereferenced, only used for comparison.
 */

However, nfsd_file_mark_find_or_create() does dereference the pointer stored
in this field.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/filecache.c |    3 +++
 fs/nfsd/filecache.h |    4 +---
 2 files changed, 4 insertions(+), 3 deletions(-)

Hi Jeff-

I'm still testing this one, but I'm wondering what you think of it.
I did hit a KASAN splat that might be related, but it's not 100%
reproducible.

Comments

Jeff Layton July 7, 2022, 4:55 p.m. UTC | #1
On Thu, 2022-07-07 at 11:58 -0400, Chuck Lever wrote:
> The documenting comment for struct nf_file states:
> 
> /*
>  * A representation of a file that has been opened by knfsd. These are hashed
>  * in the hashtable by inode pointer value. Note that this object doesn't
>  * hold a reference to the inode by itself, so the nf_inode pointer should
>  * never be dereferenced, only used for comparison.
>  */
> 
> However, nfsd_file_mark_find_or_create() does dereference the pointer stored
> in this field.
> 
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
>  fs/nfsd/filecache.c |    3 +++
>  fs/nfsd/filecache.h |    4 +---
>  2 files changed, 4 insertions(+), 3 deletions(-)
> 
> Hi Jeff-
> 
> I'm still testing this one, but I'm wondering what you think of it.
> I did hit a KASAN splat that might be related, but it's not 100%
> reproducible.
> 

My first thought is "what the hell was I thinking, tracking an inode
field without holding a reference to it?"

But now that I look, the nf_inode value only gets dereferenced in one
place -- nfs4_show_superblock, and I think that's a bug. The comments
over struct nfsd_file say:

"Note that this object doesn't hold a reference to the inode by itself,
so the nf_inode pointer should never be dereferenced, only used for
comparison."

We should probably annotate nf_inode better. __attribute__((noderef))
maybe? It would also be good to make nfs4_show_superblock use a
different way to get the sb.

In any case, this is unlikely to fix anything unless the crash happened
in nfs4_show_superblock.


> 
> diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
> index 9cb2d590c036..7b43bb427a53 100644
> --- a/fs/nfsd/filecache.c
> +++ b/fs/nfsd/filecache.c
> @@ -180,6 +180,7 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval,
>  		nf->nf_cred = get_current_cred();
>  		nf->nf_net = net;
>  		nf->nf_flags = 0;
> +		ihold(inode);
>  		nf->nf_inode = inode;
>  		nf->nf_hashval = hashval;
>  		refcount_set(&nf->nf_ref, 1);
> @@ -210,6 +211,7 @@ nfsd_file_free(struct nfsd_file *nf)
>  		fput(nf->nf_file);
>  		flush = true;
>  	}
> +	iput(nf->nf_inode);
>  	call_rcu(&nf->nf_rcu, nfsd_file_slab_free);
>  	return flush;
>  }
> @@ -940,6 +942,7 @@ nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
>  	if (nf == NULL)
>  		goto open_file;
>  	spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
> +	iput(new->nf_inode);
>  	nfsd_file_slab_free(&new->nf_rcu);
>  
>  wait_for_construction:
> diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
> index 1da0c79a5580..01fbf6e88cce 100644
> --- a/fs/nfsd/filecache.h
> +++ b/fs/nfsd/filecache.h
> @@ -24,9 +24,7 @@ struct nfsd_file_mark {
>  
>  /*
>   * A representation of a file that has been opened by knfsd. These are hashed
> - * in the hashtable by inode pointer value. Note that this object doesn't
> - * hold a reference to the inode by itself, so the nf_inode pointer should
> - * never be dereferenced, only used for comparison.
> + * in the hashtable by inode pointer value.
>   */
>  struct nfsd_file {
>  	struct hlist_node	nf_node;
> 
>
Chuck Lever July 7, 2022, 4:58 p.m. UTC | #2
> On Jul 7, 2022, at 12:55 PM, Jeff Layton <jlayton@redhat.com> wrote:
> 
> On Thu, 2022-07-07 at 11:58 -0400, Chuck Lever wrote:
>> The documenting comment for struct nf_file states:
>> 
>> /*
>> * A representation of a file that has been opened by knfsd. These are hashed
>> * in the hashtable by inode pointer value. Note that this object doesn't
>> * hold a reference to the inode by itself, so the nf_inode pointer should
>> * never be dereferenced, only used for comparison.
>> */
>> 
>> However, nfsd_file_mark_find_or_create() does dereference the pointer stored
>> in this field.
>> 
>> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
>> ---
>> fs/nfsd/filecache.c | 3 +++
>> fs/nfsd/filecache.h | 4 +---
>> 2 files changed, 4 insertions(+), 3 deletions(-)
>> 
>> Hi Jeff-
>> 
>> I'm still testing this one, but I'm wondering what you think of it.
>> I did hit a KASAN splat that might be related, but it's not 100%
>> reproducible.
>> 
> 
> My first thought is "what the hell was I thinking, tracking an inode
> field without holding a reference to it?"
> 
> But now that I look, the nf_inode value only gets dereferenced in one
> place -- nfs4_show_superblock, and I think that's a bug. The comments
> over struct nfsd_file say:
> 
> "Note that this object doesn't hold a reference to the inode by itself,
> so the nf_inode pointer should never be dereferenced, only used for
> comparison."
> 
> We should probably annotate nf_inode better. __attribute__((noderef))
> maybe? It would also be good to make nfs4_show_superblock use a
> different way to get the sb.
> 
> In any case, this is unlikely to fix anything unless the crash happened
> in nfs4_show_superblock.

Thanks for the look. I will treat this as a clean-up, then, and see
what can be done about nfsd_file_mark_find_or_create() and
nfs4_show_superblock().


>> diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
>> index 9cb2d590c036..7b43bb427a53 100644
>> --- a/fs/nfsd/filecache.c
>> +++ b/fs/nfsd/filecache.c
>> @@ -180,6 +180,7 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval,
>> 		nf->nf_cred = get_current_cred();
>> 		nf->nf_net = net;
>> 		nf->nf_flags = 0;
>> +		ihold(inode);
>> 		nf->nf_inode = inode;
>> 		nf->nf_hashval = hashval;
>> 		refcount_set(&nf->nf_ref, 1);
>> @@ -210,6 +211,7 @@ nfsd_file_free(struct nfsd_file *nf)
>> 		fput(nf->nf_file);
>> 		flush = true;
>> 	}
>> +	iput(nf->nf_inode);
>> 	call_rcu(&nf->nf_rcu, nfsd_file_slab_free);
>> 	return flush;
>> }
>> @@ -940,6 +942,7 @@ nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
>> 	if (nf == NULL)
>> 		goto open_file;
>> 	spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
>> +	iput(new->nf_inode);
>> 	nfsd_file_slab_free(&new->nf_rcu);
>> 
>> wait_for_construction:
>> diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
>> index 1da0c79a5580..01fbf6e88cce 100644
>> --- a/fs/nfsd/filecache.h
>> +++ b/fs/nfsd/filecache.h
>> @@ -24,9 +24,7 @@ struct nfsd_file_mark {
>> 
>> /*
>> * A representation of a file that has been opened by knfsd. These are hashed
>> - * in the hashtable by inode pointer value. Note that this object doesn't
>> - * hold a reference to the inode by itself, so the nf_inode pointer should
>> - * never be dereferenced, only used for comparison.
>> + * in the hashtable by inode pointer value.
>> */
>> struct nfsd_file {
>> 	struct hlist_node	nf_node;
>> 
>> 
> 
> -- 
> Jeff Layton <jlayton@redhat.com>

--
Chuck Lever
Jeff Layton July 7, 2022, 4:59 p.m. UTC | #3
On Thu, 2022-07-07 at 12:55 -0400, Jeff Layton wrote:
> On Thu, 2022-07-07 at 11:58 -0400, Chuck Lever wrote:
> > The documenting comment for struct nf_file states:
> > 
> > /*
> >  * A representation of a file that has been opened by knfsd. These are hashed
> >  * in the hashtable by inode pointer value. Note that this object doesn't
> >  * hold a reference to the inode by itself, so the nf_inode pointer should
> >  * never be dereferenced, only used for comparison.
> >  */
> > 
> > However, nfsd_file_mark_find_or_create() does dereference the pointer stored
> > in this field.
> > 
> > Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> > ---
> >  fs/nfsd/filecache.c |    3 +++
> >  fs/nfsd/filecache.h |    4 +---
> >  2 files changed, 4 insertions(+), 3 deletions(-)
> > 
> > Hi Jeff-
> > 
> > I'm still testing this one, but I'm wondering what you think of it.
> > I did hit a KASAN splat that might be related, but it's not 100%
> > reproducible.
> > 
> 
> My first thought is "what the hell was I thinking, tracking an inode
> field without holding a reference to it?"
> 
> But now that I look, the nf_inode value only gets dereferenced in one
> place -- nfs4_show_superblock, and I think that's a bug. The comments
> over struct nfsd_file say:
> 
> "Note that this object doesn't hold a reference to the inode by itself,
> so the nf_inode pointer should never be dereferenced, only used for
> comparison."
> 
> We should probably annotate nf_inode better. __attribute__((noderef))
> maybe? It would also be good to make nfs4_show_superblock use a
> different way to get the sb.
> 
> In any case, this is unlikely to fix anything unless the crash happened
> in nfs4_show_superblock.
> 
> 

One other spot. We also dereference it in nfsd_file_mark_find_or_create,
but I think that specific instance is OK. We know that we still hold a
reference to the inode at that point since it comes from fhp->fh_dentry,
so we shouldn't need to worry about it disappearing out from under us.

What did the crash look like?

> > 
> > diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
> > index 9cb2d590c036..7b43bb427a53 100644
> > --- a/fs/nfsd/filecache.c
> > +++ b/fs/nfsd/filecache.c
> > @@ -180,6 +180,7 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval,
> >  		nf->nf_cred = get_current_cred();
> >  		nf->nf_net = net;
> >  		nf->nf_flags = 0;
> > +		ihold(inode);
> >  		nf->nf_inode = inode;
> >  		nf->nf_hashval = hashval;
> >  		refcount_set(&nf->nf_ref, 1);
> > @@ -210,6 +211,7 @@ nfsd_file_free(struct nfsd_file *nf)
> >  		fput(nf->nf_file);
> >  		flush = true;
> >  	}
> > +	iput(nf->nf_inode);
> >  	call_rcu(&nf->nf_rcu, nfsd_file_slab_free);
> >  	return flush;
> >  }
> > @@ -940,6 +942,7 @@ nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
> >  	if (nf == NULL)
> >  		goto open_file;
> >  	spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
> > +	iput(new->nf_inode);
> >  	nfsd_file_slab_free(&new->nf_rcu);
> >  
> >  wait_for_construction:
> > diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
> > index 1da0c79a5580..01fbf6e88cce 100644
> > --- a/fs/nfsd/filecache.h
> > +++ b/fs/nfsd/filecache.h
> > @@ -24,9 +24,7 @@ struct nfsd_file_mark {
> >  
> >  /*
> >   * A representation of a file that has been opened by knfsd. These are hashed
> > - * in the hashtable by inode pointer value. Note that this object doesn't
> > - * hold a reference to the inode by itself, so the nf_inode pointer should
> > - * never be dereferenced, only used for comparison.
> > + * in the hashtable by inode pointer value.
> >   */
> >  struct nfsd_file {
> >  	struct hlist_node	nf_node;
> > 
> > 
>
Chuck Lever July 7, 2022, 5:01 p.m. UTC | #4
> On Jul 7, 2022, at 12:59 PM, Jeff Layton <jlayton@redhat.com> wrote:
> 
> On Thu, 2022-07-07 at 12:55 -0400, Jeff Layton wrote:
>> On Thu, 2022-07-07 at 11:58 -0400, Chuck Lever wrote:
>>> The documenting comment for struct nf_file states:
>>> 
>>> /*
>>> * A representation of a file that has been opened by knfsd. These are hashed
>>> * in the hashtable by inode pointer value. Note that this object doesn't
>>> * hold a reference to the inode by itself, so the nf_inode pointer should
>>> * never be dereferenced, only used for comparison.
>>> */
>>> 
>>> However, nfsd_file_mark_find_or_create() does dereference the pointer stored
>>> in this field.
>>> 
>>> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
>>> ---
>>> fs/nfsd/filecache.c | 3 +++
>>> fs/nfsd/filecache.h | 4 +---
>>> 2 files changed, 4 insertions(+), 3 deletions(-)
>>> 
>>> Hi Jeff-
>>> 
>>> I'm still testing this one, but I'm wondering what you think of it.
>>> I did hit a KASAN splat that might be related, but it's not 100%
>>> reproducible.
>>> 
>> 
>> My first thought is "what the hell was I thinking, tracking an inode
>> field without holding a reference to it?"
>> 
>> But now that I look, the nf_inode value only gets dereferenced in one
>> place -- nfs4_show_superblock, and I think that's a bug. The comments
>> over struct nfsd_file say:
>> 
>> "Note that this object doesn't hold a reference to the inode by itself,
>> so the nf_inode pointer should never be dereferenced, only used for
>> comparison."
>> 
>> We should probably annotate nf_inode better. __attribute__((noderef))
>> maybe? It would also be good to make nfs4_show_superblock use a
>> different way to get the sb.
>> 
>> In any case, this is unlikely to fix anything unless the crash happened
>> in nfs4_show_superblock.
>> 
>> 
> 
> One other spot. We also dereference it in nfsd_file_mark_find_or_create,
> but I think that specific instance is OK. We know that we still hold a
> reference to the inode at that point since it comes from fhp->fh_dentry,
> so we shouldn't need to worry about it disappearing out from under us.

Needs some annotation. I would prefer not to get that pointer from
nf_inode, then. As your comment says: compare only, never deref.


> What did the crash look like?

Jul 06 11:19:32 klimt.1015granger.net kernel: BUG: KASAN: use-after-free in nfsd_file_obj_cmpfn+0x26b/0x49a [nfsd]
Jul 06 11:19:32 klimt.1015granger.net kernel: Read of size 4 at addr ffff888180623e1c by task nfsd/1003
Jul 06 11:19:32 klimt.1015granger.net kernel: 
Jul 06 11:19:32 klimt.1015granger.net kernel: CPU: 3 PID: 1003 Comm: nfsd Not tainted 5.19.0-rc5-00037-g17ba024b204f #3522
Jul 06 11:19:32 klimt.1015granger.net kernel: Hardware name: Supermicro Super Server/X10SRL-F, BIOS 3.3 10/28/2020
Jul 06 11:19:32 klimt.1015granger.net kernel: Call Trace:
Jul 06 11:19:32 klimt.1015granger.net kernel:  <TASK>
Jul 06 11:19:32 klimt.1015granger.net kernel:  dump_stack_lvl+0x56/0x7c
Jul 06 11:19:32 klimt.1015granger.net kernel:  print_report+0x101/0x4bb
Jul 06 11:19:32 klimt.1015granger.net kernel:  kasan_report+0x9f/0xbf
Jul 06 11:19:32 klimt.1015granger.net kernel:  nfsd_file_obj_cmpfn+0x26b/0x49a [nfsd]
Jul 06 11:19:32 klimt.1015granger.net kernel:  rhashtable_lookup.constprop.0+0x143/0x1ca [nfsd]
Jul 06 11:19:32 klimt.1015granger.net kernel:  nfsd_file_do_acquire+0x20b/0x1189 [nfsd]
Jul 06 11:19:32 klimt.1015granger.net kernel:  nfsd_write+0x138/0x255 [nfsd]
Jul 06 11:19:32 klimt.1015granger.net kernel:  nfsd3_proc_write+0x37e/0x3fc [nfsd]
Jul 06 11:19:32 klimt.1015granger.net kernel:  nfsd_dispatch+0x5ed/0x7d0 [nfsd]
Jul 06 11:19:32 klimt.1015granger.net kernel:  svc_process_common+0x8e9/0xefe [sunrpc]
Jul 06 11:19:32 klimt.1015granger.net kernel:  svc_process+0x34d/0x378 [sunrpc]
Jul 06 11:19:32 klimt.1015granger.net kernel:  nfsd+0x26b/0x34c [nfsd]
Jul 06 11:19:32 klimt.1015granger.net kernel:  kthread+0x249/0x258
Jul 06 11:19:32 klimt.1015granger.net kernel:  ret_from_fork+0x22/0x30
Jul 06 11:19:32 klimt.1015granger.net kernel:  </TASK>

The freed object was actually not an inode, so it's a red herring.
Still chasing it.


>>> diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
>>> index 9cb2d590c036..7b43bb427a53 100644
>>> --- a/fs/nfsd/filecache.c
>>> +++ b/fs/nfsd/filecache.c
>>> @@ -180,6 +180,7 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval,
>>> 		nf->nf_cred = get_current_cred();
>>> 		nf->nf_net = net;
>>> 		nf->nf_flags = 0;
>>> +		ihold(inode);
>>> 		nf->nf_inode = inode;
>>> 		nf->nf_hashval = hashval;
>>> 		refcount_set(&nf->nf_ref, 1);
>>> @@ -210,6 +211,7 @@ nfsd_file_free(struct nfsd_file *nf)
>>> 		fput(nf->nf_file);
>>> 		flush = true;
>>> 	}
>>> +	iput(nf->nf_inode);
>>> 	call_rcu(&nf->nf_rcu, nfsd_file_slab_free);
>>> 	return flush;
>>> }
>>> @@ -940,6 +942,7 @@ nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
>>> 	if (nf == NULL)
>>> 		goto open_file;
>>> 	spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
>>> +	iput(new->nf_inode);
>>> 	nfsd_file_slab_free(&new->nf_rcu);
>>> 
>>> wait_for_construction:
>>> diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
>>> index 1da0c79a5580..01fbf6e88cce 100644
>>> --- a/fs/nfsd/filecache.h
>>> +++ b/fs/nfsd/filecache.h
>>> @@ -24,9 +24,7 @@ struct nfsd_file_mark {
>>> 
>>> /*
>>> * A representation of a file that has been opened by knfsd. These are hashed
>>> - * in the hashtable by inode pointer value. Note that this object doesn't
>>> - * hold a reference to the inode by itself, so the nf_inode pointer should
>>> - * never be dereferenced, only used for comparison.
>>> + * in the hashtable by inode pointer value.
>>> */
>>> struct nfsd_file {
>>> 	struct hlist_node	nf_node;
>>> 
>>> 
>> 
> 
> -- 
> Jeff Layton <jlayton@redhat.com>

--
Chuck Lever
Jeff Layton July 7, 2022, 5:11 p.m. UTC | #5
On Thu, 2022-07-07 at 16:58 +0000, Chuck Lever III wrote:
> 
> > On Jul 7, 2022, at 12:55 PM, Jeff Layton <jlayton@redhat.com> wrote:
> > 
> > On Thu, 2022-07-07 at 11:58 -0400, Chuck Lever wrote:
> > > The documenting comment for struct nf_file states:
> > > 
> > > /*
> > > * A representation of a file that has been opened by knfsd. These are hashed
> > > * in the hashtable by inode pointer value. Note that this object doesn't
> > > * hold a reference to the inode by itself, so the nf_inode pointer should
> > > * never be dereferenced, only used for comparison.
> > > */
> > > 
> > > However, nfsd_file_mark_find_or_create() does dereference the pointer stored
> > > in this field.
> > > 
> > > Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> > > ---
> > > fs/nfsd/filecache.c | 3 +++
> > > fs/nfsd/filecache.h | 4 +---
> > > 2 files changed, 4 insertions(+), 3 deletions(-)
> > > 
> > > Hi Jeff-
> > > 
> > > I'm still testing this one, but I'm wondering what you think of it.
> > > I did hit a KASAN splat that might be related, but it's not 100%
> > > reproducible.
> > > 
> > 
> > My first thought is "what the hell was I thinking, tracking an inode
> > field without holding a reference to it?"
> > 
> > But now that I look, the nf_inode value only gets dereferenced in one
> > place -- nfs4_show_superblock, and I think that's a bug. The comments
> > over struct nfsd_file say:
> > 
> > "Note that this object doesn't hold a reference to the inode by itself,
> > so the nf_inode pointer should never be dereferenced, only used for
> > comparison."
> > 
> > We should probably annotate nf_inode better. __attribute__((noderef))
> > maybe? It would also be good to make nfs4_show_superblock use a
> > different way to get the sb.
> > 
> > In any case, this is unlikely to fix anything unless the crash happened
> > in nfs4_show_superblock.
> 
> Thanks for the look. I will treat this as a clean-up, then, and see
> what can be done about nfsd_file_mark_find_or_create() and
> nfs4_show_superblock().
> 
> 

I don't see a real need to hold a separate inode reference in the
nfsd_file as you should have one already by virtue of the open file
itself. It probably won't hurt anything to hold one though if you decide
that's safer.

> > > diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
> > > index 9cb2d590c036..7b43bb427a53 100644
> > > --- a/fs/nfsd/filecache.c
> > > +++ b/fs/nfsd/filecache.c
> > > @@ -180,6 +180,7 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval,
> > > 		nf->nf_cred = get_current_cred();
> > > 		nf->nf_net = net;
> > > 		nf->nf_flags = 0;
> > > +		ihold(inode);
> > > 		nf->nf_inode = inode;
> > > 		nf->nf_hashval = hashval;
> > > 		refcount_set(&nf->nf_ref, 1);
> > > @@ -210,6 +211,7 @@ nfsd_file_free(struct nfsd_file *nf)
> > > 		fput(nf->nf_file);
> > > 		flush = true;
> > > 	}
> > > +	iput(nf->nf_inode);
> > > 	call_rcu(&nf->nf_rcu, nfsd_file_slab_free);
> > > 	return flush;
> > > }
> > > @@ -940,6 +942,7 @@ nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
> > > 	if (nf == NULL)
> > > 		goto open_file;
> > > 	spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
> > > +	iput(new->nf_inode);
> > > 	nfsd_file_slab_free(&new->nf_rcu);
> > > 
> > > wait_for_construction:
> > > diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
> > > index 1da0c79a5580..01fbf6e88cce 100644
> > > --- a/fs/nfsd/filecache.h
> > > +++ b/fs/nfsd/filecache.h
> > > @@ -24,9 +24,7 @@ struct nfsd_file_mark {
> > > 
> > > /*
> > > * A representation of a file that has been opened by knfsd. These are hashed
> > > - * in the hashtable by inode pointer value. Note that this object doesn't
> > > - * hold a reference to the inode by itself, so the nf_inode pointer should
> > > - * never be dereferenced, only used for comparison.
> > > + * in the hashtable by inode pointer value.
> > > */
> > > struct nfsd_file {
> > > 	struct hlist_node	nf_node;
> > > 
> > > 
> > 
> > -- 
> > Jeff Layton <jlayton@redhat.com>
> 
> --
> Chuck Lever
> 
> 
>
Jeff Layton July 7, 2022, 5:12 p.m. UTC | #6
On Thu, 2022-07-07 at 17:01 +0000, Chuck Lever III wrote:
> 
> > On Jul 7, 2022, at 12:59 PM, Jeff Layton <jlayton@redhat.com> wrote:
> > 
> > On Thu, 2022-07-07 at 12:55 -0400, Jeff Layton wrote:
> > > On Thu, 2022-07-07 at 11:58 -0400, Chuck Lever wrote:
> > > > The documenting comment for struct nf_file states:
> > > > 
> > > > /*
> > > > * A representation of a file that has been opened by knfsd. These are hashed
> > > > * in the hashtable by inode pointer value. Note that this object doesn't
> > > > * hold a reference to the inode by itself, so the nf_inode pointer should
> > > > * never be dereferenced, only used for comparison.
> > > > */
> > > > 
> > > > However, nfsd_file_mark_find_or_create() does dereference the pointer stored
> > > > in this field.
> > > > 
> > > > Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> > > > ---
> > > > fs/nfsd/filecache.c | 3 +++
> > > > fs/nfsd/filecache.h | 4 +---
> > > > 2 files changed, 4 insertions(+), 3 deletions(-)
> > > > 
> > > > Hi Jeff-
> > > > 
> > > > I'm still testing this one, but I'm wondering what you think of it.
> > > > I did hit a KASAN splat that might be related, but it's not 100%
> > > > reproducible.
> > > > 
> > > 
> > > My first thought is "what the hell was I thinking, tracking an inode
> > > field without holding a reference to it?"
> > > 
> > > But now that I look, the nf_inode value only gets dereferenced in one
> > > place -- nfs4_show_superblock, and I think that's a bug. The comments
> > > over struct nfsd_file say:
> > > 
> > > "Note that this object doesn't hold a reference to the inode by itself,
> > > so the nf_inode pointer should never be dereferenced, only used for
> > > comparison."
> > > 
> > > We should probably annotate nf_inode better. __attribute__((noderef))
> > > maybe? It would also be good to make nfs4_show_superblock use a
> > > different way to get the sb.
> > > 
> > > In any case, this is unlikely to fix anything unless the crash happened
> > > in nfs4_show_superblock.
> > > 
> > > 
> > 
> > One other spot. We also dereference it in nfsd_file_mark_find_or_create,
> > but I think that specific instance is OK. We know that we still hold a
> > reference to the inode at that point since it comes from fhp->fh_dentry,
> > so we shouldn't need to worry about it disappearing out from under us.
> 
> Needs some annotation. I would prefer not to get that pointer from
> nf_inode, then. As your comment says: compare only, never deref.
> 
> 

Yeah, maybe we should pass in the inode as a separate parameter to that
function?

> > What did the crash look like?
> 
> Jul 06 11:19:32 klimt.1015granger.net kernel: BUG: KASAN: use-after-free in nfsd_file_obj_cmpfn+0x26b/0x49a [nfsd]
> Jul 06 11:19:32 klimt.1015granger.net kernel: Read of size 4 at addr ffff888180623e1c by task nfsd/1003
> Jul 06 11:19:32 klimt.1015granger.net kernel: 
> Jul 06 11:19:32 klimt.1015granger.net kernel: CPU: 3 PID: 1003 Comm: nfsd Not tainted 5.19.0-rc5-00037-g17ba024b204f #3522
> Jul 06 11:19:32 klimt.1015granger.net kernel: Hardware name: Supermicro Super Server/X10SRL-F, BIOS 3.3 10/28/2020
> Jul 06 11:19:32 klimt.1015granger.net kernel: Call Trace:
> Jul 06 11:19:32 klimt.1015granger.net kernel:  <TASK>
> Jul 06 11:19:32 klimt.1015granger.net kernel:  dump_stack_lvl+0x56/0x7c
> Jul 06 11:19:32 klimt.1015granger.net kernel:  print_report+0x101/0x4bb
> Jul 06 11:19:32 klimt.1015granger.net kernel:  kasan_report+0x9f/0xbf
> Jul 06 11:19:32 klimt.1015granger.net kernel:  nfsd_file_obj_cmpfn+0x26b/0x49a [nfsd]
> Jul 06 11:19:32 klimt.1015granger.net kernel:  rhashtable_lookup.constprop.0+0x143/0x1ca [nfsd]
> Jul 06 11:19:32 klimt.1015granger.net kernel:  nfsd_file_do_acquire+0x20b/0x1189 [nfsd]
> Jul 06 11:19:32 klimt.1015granger.net kernel:  nfsd_write+0x138/0x255 [nfsd]
> Jul 06 11:19:32 klimt.1015granger.net kernel:  nfsd3_proc_write+0x37e/0x3fc [nfsd]
> Jul 06 11:19:32 klimt.1015granger.net kernel:  nfsd_dispatch+0x5ed/0x7d0 [nfsd]
> Jul 06 11:19:32 klimt.1015granger.net kernel:  svc_process_common+0x8e9/0xefe [sunrpc]
> Jul 06 11:19:32 klimt.1015granger.net kernel:  svc_process+0x34d/0x378 [sunrpc]
> Jul 06 11:19:32 klimt.1015granger.net kernel:  nfsd+0x26b/0x34c [nfsd]
> Jul 06 11:19:32 klimt.1015granger.net kernel:  kthread+0x249/0x258
> Jul 06 11:19:32 klimt.1015granger.net kernel:  ret_from_fork+0x22/0x30
> Jul 06 11:19:32 klimt.1015granger.net kernel:  </TASK>
> 
> The freed object was actually not an inode, so it's a red herring.
> Still chasing it.
> 

ACK

> 
> > > > diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
> > > > index 9cb2d590c036..7b43bb427a53 100644
> > > > --- a/fs/nfsd/filecache.c
> > > > +++ b/fs/nfsd/filecache.c
> > > > @@ -180,6 +180,7 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval,
> > > > 		nf->nf_cred = get_current_cred();
> > > > 		nf->nf_net = net;
> > > > 		nf->nf_flags = 0;
> > > > +		ihold(inode);
> > > > 		nf->nf_inode = inode;
> > > > 		nf->nf_hashval = hashval;
> > > > 		refcount_set(&nf->nf_ref, 1);
> > > > @@ -210,6 +211,7 @@ nfsd_file_free(struct nfsd_file *nf)
> > > > 		fput(nf->nf_file);
> > > > 		flush = true;
> > > > 	}
> > > > +	iput(nf->nf_inode);
> > > > 	call_rcu(&nf->nf_rcu, nfsd_file_slab_free);
> > > > 	return flush;
> > > > }
> > > > @@ -940,6 +942,7 @@ nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
> > > > 	if (nf == NULL)
> > > > 		goto open_file;
> > > > 	spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
> > > > +	iput(new->nf_inode);
> > > > 	nfsd_file_slab_free(&new->nf_rcu);
> > > > 
> > > > wait_for_construction:
> > > > diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
> > > > index 1da0c79a5580..01fbf6e88cce 100644
> > > > --- a/fs/nfsd/filecache.h
> > > > +++ b/fs/nfsd/filecache.h
> > > > @@ -24,9 +24,7 @@ struct nfsd_file_mark {
> > > > 
> > > > /*
> > > > * A representation of a file that has been opened by knfsd. These are hashed
> > > > - * in the hashtable by inode pointer value. Note that this object doesn't
> > > > - * hold a reference to the inode by itself, so the nf_inode pointer should
> > > > - * never be dereferenced, only used for comparison.
> > > > + * in the hashtable by inode pointer value.
> > > > */
> > > > struct nfsd_file {
> > > > 	struct hlist_node	nf_node;
> > > > 
> > > > 
> > > 
> > 
> > -- 
> > Jeff Layton <jlayton@redhat.com>
> 
> --
> Chuck Lever
> 
> 
>
Chuck Lever July 7, 2022, 5:13 p.m. UTC | #7
> On Jul 7, 2022, at 1:12 PM, Jeff Layton <jlayton@redhat.com> wrote:
> 
> On Thu, 2022-07-07 at 17:01 +0000, Chuck Lever III wrote:
>> 
>>> On Jul 7, 2022, at 12:59 PM, Jeff Layton <jlayton@redhat.com> wrote:
>>> 
>>> One other spot. We also dereference it in nfsd_file_mark_find_or_create,
>>> but I think that specific instance is OK. We know that we still hold a
>>> reference to the inode at that point since it comes from fhp->fh_dentry,
>>> so we shouldn't need to worry about it disappearing out from under us.
>> 
>> Needs some annotation. I would prefer not to get that pointer from
>> nf_inode, then. As your comment says: compare only, never deref.
>> 
>> 
> 
> Yeah, maybe we should pass in the inode as a separate parameter to that
> function?

That's what I ended up doing, that seems most clear.


--
Chuck Lever
Chuck Lever July 7, 2022, 5:25 p.m. UTC | #8
> On Jul 7, 2022, at 12:55 PM, Jeff Layton <jlayton@redhat.com> wrote:
> 
> On Thu, 2022-07-07 at 11:58 -0400, Chuck Lever wrote:
>> The documenting comment for struct nf_file states:
>> 
>> /*
>> * A representation of a file that has been opened by knfsd. These are hashed
>> * in the hashtable by inode pointer value. Note that this object doesn't
>> * hold a reference to the inode by itself, so the nf_inode pointer should
>> * never be dereferenced, only used for comparison.
>> */
>> 
>> However, nfsd_file_mark_find_or_create() does dereference the pointer stored
>> in this field.
>> 
>> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
>> ---
>> fs/nfsd/filecache.c | 3 +++
>> fs/nfsd/filecache.h | 4 +---
>> 2 files changed, 4 insertions(+), 3 deletions(-)
>> 
>> Hi Jeff-
>> 
>> I'm still testing this one, but I'm wondering what you think of it.
>> I did hit a KASAN splat that might be related, but it's not 100%
>> reproducible.
>> 
> 
> My first thought is "what the hell was I thinking, tracking an inode
> field without holding a reference to it?"
> 
> But now that I look, the nf_inode value only gets dereferenced in one
> place -- nfs4_show_superblock, and I think that's a bug. The comments
> over struct nfsd_file say:
> 
> "Note that this object doesn't hold a reference to the inode by itself,
> so the nf_inode pointer should never be dereferenced, only used for
> comparison."
> 
> We should probably annotate nf_inode better. __attribute__((noderef))
> maybe? It would also be good to make nfs4_show_superblock use a
> different way to get the sb.

How about f->nf_file->f_inode ?


--
Chuck Lever
Jeff Layton July 7, 2022, 5:30 p.m. UTC | #9
On Thu, 2022-07-07 at 17:25 +0000, Chuck Lever III wrote:
> 
> > On Jul 7, 2022, at 12:55 PM, Jeff Layton <jlayton@redhat.com> wrote:
> > 
> > On Thu, 2022-07-07 at 11:58 -0400, Chuck Lever wrote:
> > > The documenting comment for struct nf_file states:
> > > 
> > > /*
> > > * A representation of a file that has been opened by knfsd. These are hashed
> > > * in the hashtable by inode pointer value. Note that this object doesn't
> > > * hold a reference to the inode by itself, so the nf_inode pointer should
> > > * never be dereferenced, only used for comparison.
> > > */
> > > 
> > > However, nfsd_file_mark_find_or_create() does dereference the pointer stored
> > > in this field.
> > > 
> > > Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> > > ---
> > > fs/nfsd/filecache.c | 3 +++
> > > fs/nfsd/filecache.h | 4 +---
> > > 2 files changed, 4 insertions(+), 3 deletions(-)
> > > 
> > > Hi Jeff-
> > > 
> > > I'm still testing this one, but I'm wondering what you think of it.
> > > I did hit a KASAN splat that might be related, but it's not 100%
> > > reproducible.
> > > 
> > 
> > My first thought is "what the hell was I thinking, tracking an inode
> > field without holding a reference to it?"
> > 
> > But now that I look, the nf_inode value only gets dereferenced in one
> > place -- nfs4_show_superblock, and I think that's a bug. The comments
> > over struct nfsd_file say:
> > 
> > "Note that this object doesn't hold a reference to the inode by itself,
> > so the nf_inode pointer should never be dereferenced, only used for
> > comparison."
> > 
> > We should probably annotate nf_inode better. __attribute__((noderef))
> > maybe? It would also be good to make nfs4_show_superblock use a
> > different way to get the sb.
> 
> How about f->nf_file->f_inode ?
> 
> 

I'd probably prefer:

    file_inode(f->nf_file)

...and I don't think there is a potential crash here either.
nfs4_show_superblock is called while holding the cl_lock. I don't think
the inode can disappear out from under you with that.
diff mbox series

Patch

diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index 9cb2d590c036..7b43bb427a53 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -180,6 +180,7 @@  nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval,
 		nf->nf_cred = get_current_cred();
 		nf->nf_net = net;
 		nf->nf_flags = 0;
+		ihold(inode);
 		nf->nf_inode = inode;
 		nf->nf_hashval = hashval;
 		refcount_set(&nf->nf_ref, 1);
@@ -210,6 +211,7 @@  nfsd_file_free(struct nfsd_file *nf)
 		fput(nf->nf_file);
 		flush = true;
 	}
+	iput(nf->nf_inode);
 	call_rcu(&nf->nf_rcu, nfsd_file_slab_free);
 	return flush;
 }
@@ -940,6 +942,7 @@  nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	if (nf == NULL)
 		goto open_file;
 	spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
+	iput(new->nf_inode);
 	nfsd_file_slab_free(&new->nf_rcu);
 
 wait_for_construction:
diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
index 1da0c79a5580..01fbf6e88cce 100644
--- a/fs/nfsd/filecache.h
+++ b/fs/nfsd/filecache.h
@@ -24,9 +24,7 @@  struct nfsd_file_mark {
 
 /*
  * A representation of a file that has been opened by knfsd. These are hashed
- * in the hashtable by inode pointer value. Note that this object doesn't
- * hold a reference to the inode by itself, so the nf_inode pointer should
- * never be dereferenced, only used for comparison.
+ * in the hashtable by inode pointer value.
  */
 struct nfsd_file {
 	struct hlist_node	nf_node;