diff mbox series

[v2] NFS: Revalidate once when holding a delegation

Message ID bcb5ffd399c4434730e6d100a5b7cae5e207244e.1580225161.git.bcodding@redhat.com (mailing list archive)
State New, archived
Headers show
Series [v2] NFS: Revalidate once when holding a delegation | expand

Commit Message

Benjamin Coddington Jan. 28, 2020, 3:26 p.m. UTC
If we skip lookup revalidataion while holding a delegation, we might miss
that the file has changed directories on the server.  This can happen if
the file is moved in between the client caching a dentry and obtaining a
delegation.  That can be reproduced on a single system with this bash:

mkdir -p /exports/dir{1,2}
exportfs -o rw localhost:/exports
mount -t nfs -ov4.1,noac localhost:/exports /mnt/localhost

touch /exports/dir1/fubar

cat /mnt/localhost/dir1/fubar
mv /mnt/localhost/dir{1,2}/fubar

mv /exports/dir{2,1}/fubar

cat /mnt/localhost/dir1/fubar
mv /mnt/localhost/dir{1,2}/fubar

In this example, the final `mv` will stat source and destination and find
they are the same dentry.

To fix this without giving up the increased lookup performance that holding
a delegation provides, let's revalidate the dentry only once after
obtaining a delegation by placing a magic value in the dentry's verifier.

Suggested-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
---
 fs/nfs/dir.c | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

Comments

Trond Myklebust Jan. 28, 2020, 3:43 p.m. UTC | #1
On Tue, 2020-01-28 at 10:26 -0500, Benjamin Coddington wrote:
> If we skip lookup revalidataion while holding a delegation, we might
> miss
> that the file has changed directories on the server.  This can happen
> if
> the file is moved in between the client caching a dentry and
> obtaining a
> delegation.  That can be reproduced on a single system with this
> bash:
> 
> mkdir -p /exports/dir{1,2}
> exportfs -o rw localhost:/exports
> mount -t nfs -ov4.1,noac localhost:/exports /mnt/localhost
> 
> touch /exports/dir1/fubar
> 
> cat /mnt/localhost/dir1/fubar
> mv /mnt/localhost/dir{1,2}/fubar
> 
> mv /exports/dir{2,1}/fubar
> 
> cat /mnt/localhost/dir1/fubar
> mv /mnt/localhost/dir{1,2}/fubar
> 
> In this example, the final `mv` will stat source and destination and
> find
> they are the same dentry.
> 
> To fix this without giving up the increased lookup performance that
> holding
> a delegation provides, let's revalidate the dentry only once after
> obtaining a delegation by placing a magic value in the dentry's
> verifier.
> 
> Suggested-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
> ---
>  fs/nfs/dir.c | 22 ++++++++++++++++++++--
>  1 file changed, 20 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> index e180033e35cf..e9d07dcd6d6f 100644
> --- a/fs/nfs/dir.c
> +++ b/fs/nfs/dir.c
> @@ -949,6 +949,7 @@ static int nfs_fsync_dir(struct file *filp,
> loff_t start, loff_t end,
>  	return 0;
>  }
>  
> +#define NFS_DELEGATION_VERF 0xfeeddeaf
>  /**
>   * nfs_force_lookup_revalidate - Mark the directory as having
> changed
>   * @dir: pointer to directory inode
> @@ -962,6 +963,8 @@ static int nfs_fsync_dir(struct file *filp,
> loff_t start, loff_t end,
>  void nfs_force_lookup_revalidate(struct inode *dir)
>  {
>  	NFS_I(dir)->cache_change_attribute++;
> +	if (NFS_I(dir)->cache_change_attribute == NFS_DELEGATION_VERF)
> +		NFS_I(dir)->cache_change_attribute++;
>  }
>  EXPORT_SYMBOL_GPL(nfs_force_lookup_revalidate);
>  
> @@ -1133,6 +1136,7 @@ nfs_lookup_revalidate_dentry(struct inode *dir,
> struct dentry *dentry,
>  	struct nfs_fh *fhandle;
>  	struct nfs_fattr *fattr;
>  	struct nfs4_label *label;
> +	unsigned long verifier;
>  	int ret;
>  
>  	ret = -ENOMEM;
> @@ -1154,6 +1158,11 @@ nfs_lookup_revalidate_dentry(struct inode
> *dir, struct dentry *dentry,
>  	if (nfs_refresh_inode(inode, fattr) < 0)
>  		goto out;
>  
> +	if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
> +		verifier = NFS_DELEGATION_VERF;
> +	else
> +		verifier = nfs_save_change_attribute(dir);
> +
>  	nfs_setsecurity(inode, fattr, label);
>  	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
>  
> @@ -1167,6 +1176,15 @@ nfs_lookup_revalidate_dentry(struct inode
> *dir, struct dentry *dentry,
>  	return nfs_lookup_revalidate_done(dir, dentry, inode, ret);
>  }
>  
> +static int nfs_delegation_matches_dentry(struct inode *dir,
> +			struct dentry *dentry, struct inode *inode)
> +{
> +	if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ) &&
> +		dentry->d_time == NFS_DELEGATION_VERF)
> +		return 1;
> +	return 0;
> +}
> +
>  /*
>   * This is called every time the dcache has a lookup hit,
>   * and we should check whether we can really trust that
> @@ -1197,7 +1215,7 @@ nfs_do_lookup_revalidate(struct inode *dir,
> struct dentry *dentry,
>  		goto out_bad;
>  	}
>  
> -	if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
> +	if (nfs_delegation_matches_dentry(dir, dentry, inode))
>  		return nfs_lookup_revalidate_delegated(dir, dentry,
> inode);
>  
>  	/* Force a full look up iff the parent directory has changed */
> @@ -1635,7 +1653,7 @@ nfs4_do_lookup_revalidate(struct inode *dir,
> struct dentry *dentry,
>  	if (inode == NULL)
>  		goto full_reval;
>  
> -	if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
> +	if (nfs_delegation_matches_dentry(dir, dentry, inode))
>  		return nfs_lookup_revalidate_delegated(dir, dentry,
> inode);
>  
>  	/* NFS only supports OPEN on regular files */

Thanks Ben!

Reviewed-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Trond Myklebust Jan. 28, 2020, 3:56 p.m. UTC | #2
On Tue, 2020-01-28 at 10:26 -0500, Benjamin Coddington wrote:
> If we skip lookup revalidataion while holding a delegation, we might
> miss
> that the file has changed directories on the server.  This can happen
> if
> the file is moved in between the client caching a dentry and
> obtaining a
> delegation.  That can be reproduced on a single system with this
> bash:
> 
> mkdir -p /exports/dir{1,2}
> exportfs -o rw localhost:/exports
> mount -t nfs -ov4.1,noac localhost:/exports /mnt/localhost
> 
> touch /exports/dir1/fubar
> 
> cat /mnt/localhost/dir1/fubar
> mv /mnt/localhost/dir{1,2}/fubar
> 
> mv /exports/dir{2,1}/fubar
> 
> cat /mnt/localhost/dir1/fubar
> mv /mnt/localhost/dir{1,2}/fubar
> 
> In this example, the final `mv` will stat source and destination and
> find
> they are the same dentry.
> 
> To fix this without giving up the increased lookup performance that
> holding
> a delegation provides, let's revalidate the dentry only once after
> obtaining a delegation by placing a magic value in the dentry's
> verifier.
> 
> Suggested-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
> ---
>  fs/nfs/dir.c | 22 ++++++++++++++++++++--
>  1 file changed, 20 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> index e180033e35cf..e9d07dcd6d6f 100644
> --- a/fs/nfs/dir.c
> +++ b/fs/nfs/dir.c
> @@ -949,6 +949,7 @@ static int nfs_fsync_dir(struct file *filp,
> loff_t start, loff_t end,
>  	return 0;
>  }
>  
> +#define NFS_DELEGATION_VERF 0xfeeddeaf
>  /**
>   * nfs_force_lookup_revalidate - Mark the directory as having
> changed
>   * @dir: pointer to directory inode
> @@ -962,6 +963,8 @@ static int nfs_fsync_dir(struct file *filp,
> loff_t start, loff_t end,
>  void nfs_force_lookup_revalidate(struct inode *dir)
>  {
>  	NFS_I(dir)->cache_change_attribute++;
> +	if (NFS_I(dir)->cache_change_attribute == NFS_DELEGATION_VERF)
> +		NFS_I(dir)->cache_change_attribute++;

Actually, I think a slight modification to this can might be
beneficial. If we change to the following:

	if (unlikely(NFS_I(dir)->cache_change_attribute == NFS_DELEGATION_VERF - 1))
		NFS_I(dir)->cache_change_attribute = NFS_DELEGATION_VERF + 1;
 	else
		NFS_I(dir)->cache_change_attribute++;

then that should ensure those readers of cache_change_attribute that do
not use locking will also never see the value NFS_DELEGATION_VERF
(assuming that gcc doesn't optimise the above to something weird).

>  }
>  EXPORT_SYMBOL_GPL(nfs_force_lookup_revalidate);
>  
> @@ -1133,6 +1136,7 @@ nfs_lookup_revalidate_dentry(struct inode *dir,
> struct dentry *dentry,
>  	struct nfs_fh *fhandle;
>  	struct nfs_fattr *fattr;
>  	struct nfs4_label *label;
> +	unsigned long verifier;
>  	int ret;
>  
>  	ret = -ENOMEM;
> @@ -1154,6 +1158,11 @@ nfs_lookup_revalidate_dentry(struct inode
> *dir, struct dentry *dentry,
>  	if (nfs_refresh_inode(inode, fattr) < 0)
>  		goto out;
>  
> +	if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
> +		verifier = NFS_DELEGATION_VERF;
> +	else
> +		verifier = nfs_save_change_attribute(dir);
> +
>  	nfs_setsecurity(inode, fattr, label);
>  	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
>  
> @@ -1167,6 +1176,15 @@ nfs_lookup_revalidate_dentry(struct inode
> *dir, struct dentry *dentry,
>  	return nfs_lookup_revalidate_done(dir, dentry, inode, ret);
>  }
>  
> +static int nfs_delegation_matches_dentry(struct inode *dir,
> +			struct dentry *dentry, struct inode *inode)
> +{
> +	if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ) &&
> +		dentry->d_time == NFS_DELEGATION_VERF)
> +		return 1;
> +	return 0;
> +}
> +
>  /*
>   * This is called every time the dcache has a lookup hit,
>   * and we should check whether we can really trust that
> @@ -1197,7 +1215,7 @@ nfs_do_lookup_revalidate(struct inode *dir,
> struct dentry *dentry,
>  		goto out_bad;
>  	}
>  
> -	if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
> +	if (nfs_delegation_matches_dentry(dir, dentry, inode))
>  		return nfs_lookup_revalidate_delegated(dir, dentry,
> inode);
>  
>  	/* Force a full look up iff the parent directory has changed */
> @@ -1635,7 +1653,7 @@ nfs4_do_lookup_revalidate(struct inode *dir,
> struct dentry *dentry,
>  	if (inode == NULL)
>  		goto full_reval;
>  
> -	if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
> +	if (nfs_delegation_matches_dentry(dir, dentry, inode))
>  		return nfs_lookup_revalidate_delegated(dir, dentry,
> inode);
>  
>  	/* NFS only supports OPEN on regular files */
Trond Myklebust Jan. 28, 2020, 8:21 p.m. UTC | #3
On Tue, 2020-01-28 at 10:56 -0500, Trond Myklebust wrote:
> On Tue, 2020-01-28 at 10:26 -0500, Benjamin Coddington wrote:
> > If we skip lookup revalidataion while holding a delegation, we
> > might
> > miss
> > that the file has changed directories on the server.  This can
> > happen
> > if
> > the file is moved in between the client caching a dentry and
> > obtaining a
> > delegation.  That can be reproduced on a single system with this
> > bash:
> > 
> > mkdir -p /exports/dir{1,2}
> > exportfs -o rw localhost:/exports
> > mount -t nfs -ov4.1,noac localhost:/exports /mnt/localhost
> > 
> > touch /exports/dir1/fubar
> > 
> > cat /mnt/localhost/dir1/fubar
> > mv /mnt/localhost/dir{1,2}/fubar
> > 
> > mv /exports/dir{2,1}/fubar
> > 
> > cat /mnt/localhost/dir1/fubar
> > mv /mnt/localhost/dir{1,2}/fubar
> > 
> > In this example, the final `mv` will stat source and destination
> > and
> > find
> > they are the same dentry.
> > 
> > To fix this without giving up the increased lookup performance that
> > holding
> > a delegation provides, let's revalidate the dentry only once after
> > obtaining a delegation by placing a magic value in the dentry's
> > verifier.
> > 
> > Suggested-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> > Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
> > ---
> >  fs/nfs/dir.c | 22 ++++++++++++++++++++--
> >  1 file changed, 20 insertions(+), 2 deletions(-)
> > 
> > diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> > index e180033e35cf..e9d07dcd6d6f 100644
> > --- a/fs/nfs/dir.c
> > +++ b/fs/nfs/dir.c
> > @@ -949,6 +949,7 @@ static int nfs_fsync_dir(struct file *filp,
> > loff_t start, loff_t end,
> >  	return 0;
> >  }
> >  
> > +#define NFS_DELEGATION_VERF 0xfeeddeaf
> >  /**
> >   * nfs_force_lookup_revalidate - Mark the directory as having
> > changed
> >   * @dir: pointer to directory inode
> > @@ -962,6 +963,8 @@ static int nfs_fsync_dir(struct file *filp,
> > loff_t start, loff_t end,
> >  void nfs_force_lookup_revalidate(struct inode *dir)
> >  {
> >  	NFS_I(dir)->cache_change_attribute++;
> > +	if (NFS_I(dir)->cache_change_attribute == NFS_DELEGATION_VERF)
> > +		NFS_I(dir)->cache_change_attribute++;
> 
> Actually, I think a slight modification to this can might be
> beneficial. If we change to the following:
> 
> 	if (unlikely(NFS_I(dir)->cache_change_attribute ==
> NFS_DELEGATION_VERF - 1))
> 		NFS_I(dir)->cache_change_attribute =
> NFS_DELEGATION_VERF + 1;
>  	else
> 		NFS_I(dir)->cache_change_attribute++;
> 

...actually, it would be nice to clean that up too using a declaration
of 'struct nfs_inode *nfsi = NFS_I(dir));'

> then that should ensure those readers of cache_change_attribute that
> do
> not use locking will also never see the value NFS_DELEGATION_VERF
> (assuming that gcc doesn't optimise the above to something weird).
> 
> >  }
> >  EXPORT_SYMBOL_GPL(nfs_force_lookup_revalidate);
> >  
> > @@ -1133,6 +1136,7 @@ nfs_lookup_revalidate_dentry(struct inode
> > *dir,
> > struct dentry *dentry,
> >  	struct nfs_fh *fhandle;
> >  	struct nfs_fattr *fattr;
> >  	struct nfs4_label *label;
> > +	unsigned long verifier;
> >  	int ret;
> >  
> >  	ret = -ENOMEM;
> > @@ -1154,6 +1158,11 @@ nfs_lookup_revalidate_dentry(struct inode
> > *dir, struct dentry *dentry,
> >  	if (nfs_refresh_inode(inode, fattr) < 0)
> >  		goto out;
> >  
> > +	if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
> > +		verifier = NFS_DELEGATION_VERF;
> > +	else
> > +		verifier = nfs_save_change_attribute(dir);
> > +
> >  	nfs_setsecurity(inode, fattr, label);
> >  	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));

Oops! When reviewing, I missed this. Shouldn't the above be changed to
nfs_set_verifier(dentry, verifier) ?

> >  
> > @@ -1167,6 +1176,15 @@ nfs_lookup_revalidate_dentry(struct inode
> > *dir, struct dentry *dentry,
> >  	return nfs_lookup_revalidate_done(dir, dentry, inode, ret);
> >  }
> >  
> > +static int nfs_delegation_matches_dentry(struct inode *dir,
> > +			struct dentry *dentry, struct inode *inode)
> > +{
> > +	if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ) &&
> > +		dentry->d_time == NFS_DELEGATION_VERF)
> > +		return 1;
> > +	return 0;
> > +}
> > +
> >  /*
> >   * This is called every time the dcache has a lookup hit,
> >   * and we should check whether we can really trust that
> > @@ -1197,7 +1215,7 @@ nfs_do_lookup_revalidate(struct inode *dir,
> > struct dentry *dentry,
> >  		goto out_bad;
> >  	}
> >  
> > -	if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
> > +	if (nfs_delegation_matches_dentry(dir, dentry, inode))
> >  		return nfs_lookup_revalidate_delegated(dir, dentry,
> > inode);
> >  
> >  	/* Force a full look up iff the parent directory has changed */
> > @@ -1635,7 +1653,7 @@ nfs4_do_lookup_revalidate(struct inode *dir,
> > struct dentry *dentry,
> >  	if (inode == NULL)
> >  		goto full_reval;
> >  
> > -	if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
> > +	if (nfs_delegation_matches_dentry(dir, dentry, inode))
> >  		return nfs_lookup_revalidate_delegated(dir, dentry,
> > inode);
> >  
> >  	/* NFS only supports OPEN on regular files */
Trond Myklebust Jan. 28, 2020, 9:20 p.m. UTC | #4
On Tue, 2020-01-28 at 15:21 -0500, Trond Myklebust wrote:
> On Tue, 2020-01-28 at 10:56 -0500, Trond Myklebust wrote:
> > On Tue, 2020-01-28 at 10:26 -0500, Benjamin Coddington wrote:
> > > If we skip lookup revalidataion while holding a delegation, we
> > > might
> > > miss
> > > that the file has changed directories on the server.  This can
> > > happen
> > > if
> > > the file is moved in between the client caching a dentry and
> > > obtaining a
> > > delegation.  That can be reproduced on a single system with this
> > > bash:
> > > 
> > > mkdir -p /exports/dir{1,2}
> > > exportfs -o rw localhost:/exports
> > > mount -t nfs -ov4.1,noac localhost:/exports /mnt/localhost
> > > 
> > > touch /exports/dir1/fubar
> > > 
> > > cat /mnt/localhost/dir1/fubar
> > > mv /mnt/localhost/dir{1,2}/fubar
> > > 
> > > mv /exports/dir{2,1}/fubar
> > > 
> > > cat /mnt/localhost/dir1/fubar
> > > mv /mnt/localhost/dir{1,2}/fubar
> > > 
> > > In this example, the final `mv` will stat source and destination
> > > and
> > > find
> > > they are the same dentry.
> > > 
> > > To fix this without giving up the increased lookup performance
> > > that
> > > holding
> > > a delegation provides, let's revalidate the dentry only once
> > > after
> > > obtaining a delegation by placing a magic value in the dentry's
> > > verifier.
> > > 
> > > Suggested-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> > > Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
> > > ---
> > >  fs/nfs/dir.c | 22 ++++++++++++++++++++--
> > >  1 file changed, 20 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> > > index e180033e35cf..e9d07dcd6d6f 100644
> > > --- a/fs/nfs/dir.c
> > > +++ b/fs/nfs/dir.c
> > > @@ -949,6 +949,7 @@ static int nfs_fsync_dir(struct file *filp,
> > > loff_t start, loff_t end,
> > >  	return 0;
> > >  }
> > >  
> > > +#define NFS_DELEGATION_VERF 0xfeeddeaf
> > >  /**
> > >   * nfs_force_lookup_revalidate - Mark the directory as having
> > > changed
> > >   * @dir: pointer to directory inode
> > > @@ -962,6 +963,8 @@ static int nfs_fsync_dir(struct file *filp,
> > > loff_t start, loff_t end,
> > >  void nfs_force_lookup_revalidate(struct inode *dir)
> > >  {
> > >  	NFS_I(dir)->cache_change_attribute++;
> > > +	if (NFS_I(dir)->cache_change_attribute == NFS_DELEGATION_VERF)
> > > +		NFS_I(dir)->cache_change_attribute++;
> > 
> > Actually, I think a slight modification to this can might be
> > beneficial. If we change to the following:
> > 
> > 	if (unlikely(NFS_I(dir)->cache_change_attribute ==
> > NFS_DELEGATION_VERF - 1))
> > 		NFS_I(dir)->cache_change_attribute =
> > NFS_DELEGATION_VERF + 1;
> >  	else
> > 		NFS_I(dir)->cache_change_attribute++;
> > 
> 
> ...actually, it would be nice to clean that up too using a
> declaration
> of 'struct nfs_inode *nfsi = NFS_I(dir));'
> 
> > then that should ensure those readers of cache_change_attribute
> > that
> > do
> > not use locking will also never see the value NFS_DELEGATION_VERF
> > (assuming that gcc doesn't optimise the above to something weird).
> > 
> > >  }
> > >  EXPORT_SYMBOL_GPL(nfs_force_lookup_revalidate);
> > >  
> > > @@ -1133,6 +1136,7 @@ nfs_lookup_revalidate_dentry(struct inode
> > > *dir,
> > > struct dentry *dentry,
> > >  	struct nfs_fh *fhandle;
> > >  	struct nfs_fattr *fattr;
> > >  	struct nfs4_label *label;
> > > +	unsigned long verifier;
> > >  	int ret;
> > >  
> > >  	ret = -ENOMEM;
> > > @@ -1154,6 +1158,11 @@ nfs_lookup_revalidate_dentry(struct inode
> > > *dir, struct dentry *dentry,
> > >  	if (nfs_refresh_inode(inode, fattr) < 0)
> > >  		goto out;
> > >  
> > > +	if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
> > > +		verifier = NFS_DELEGATION_VERF;
> > > +	else
> > > +		verifier = nfs_save_change_attribute(dir);
> > > +
> > >  	nfs_setsecurity(inode, fattr, label);
> > >  	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
> 
> Oops! When reviewing, I missed this. Shouldn't the above be changed
> to
> nfs_set_verifier(dentry, verifier) ?

...and on a similar vein: nfs_lookup_revalidate_delegated() needs to
change so as to not reset the verifier...

Sorry for not catching that one either.

> 
> > >  
> > > @@ -1167,6 +1176,15 @@ nfs_lookup_revalidate_dentry(struct inode
> > > *dir, struct dentry *dentry,
> > >  	return nfs_lookup_revalidate_done(dir, dentry, inode, ret);
> > >  }
> > >  
> > > +static int nfs_delegation_matches_dentry(struct inode *dir,
> > > +			struct dentry *dentry, struct inode *inode)
> > > +{
> > > +	if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ) &&
> > > +		dentry->d_time == NFS_DELEGATION_VERF)
> > > +		return 1;
> > > +	return 0;
> > > +}
> > > +
> > >  /*
> > >   * This is called every time the dcache has a lookup hit,
> > >   * and we should check whether we can really trust that
> > > @@ -1197,7 +1215,7 @@ nfs_do_lookup_revalidate(struct inode *dir,
> > > struct dentry *dentry,
> > >  		goto out_bad;
> > >  	}
> > >  
> > > -	if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
> > > +	if (nfs_delegation_matches_dentry(dir, dentry, inode))
> > >  		return nfs_lookup_revalidate_delegated(dir, dentry,
> > > inode);
> > >  
> > >  	/* Force a full look up iff the parent directory has changed */
> > > @@ -1635,7 +1653,7 @@ nfs4_do_lookup_revalidate(struct inode
> > > *dir,
> > > struct dentry *dentry,
> > >  	if (inode == NULL)
> > >  		goto full_reval;
> > >  
> > > -	if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
> > > +	if (nfs_delegation_matches_dentry(dir, dentry, inode))
> > >  		return nfs_lookup_revalidate_delegated(dir, dentry,
> > > inode);
> > >  
> > >  	/* NFS only supports OPEN on regular files */
Trond Myklebust Jan. 28, 2020, 9:30 p.m. UTC | #5
On Tue, 2020-01-28 at 16:20 -0500, Trond Myklebust wrote:
> On Tue, 2020-01-28 at 15:21 -0500, Trond Myklebust wrote:
> > On Tue, 2020-01-28 at 10:56 -0500, Trond Myklebust wrote:
> > > On Tue, 2020-01-28 at 10:26 -0500, Benjamin Coddington wrote:
> > > > If we skip lookup revalidataion while holding a delegation, we
> > > > might
> > > > miss
> > > > that the file has changed directories on the server.  This can
> > > > happen
> > > > if
> > > > the file is moved in between the client caching a dentry and
> > > > obtaining a
> > > > delegation.  That can be reproduced on a single system with
> > > > this
> > > > bash:
> > > > 
> > > > mkdir -p /exports/dir{1,2}
> > > > exportfs -o rw localhost:/exports
> > > > mount -t nfs -ov4.1,noac localhost:/exports /mnt/localhost
> > > > 
> > > > touch /exports/dir1/fubar
> > > > 
> > > > cat /mnt/localhost/dir1/fubar
> > > > mv /mnt/localhost/dir{1,2}/fubar
> > > > 
> > > > mv /exports/dir{2,1}/fubar
> > > > 
> > > > cat /mnt/localhost/dir1/fubar
> > > > mv /mnt/localhost/dir{1,2}/fubar
> > > > 
> > > > In this example, the final `mv` will stat source and
> > > > destination
> > > > and
> > > > find
> > > > they are the same dentry.
> > > > 
> > > > To fix this without giving up the increased lookup performance
> > > > that
> > > > holding
> > > > a delegation provides, let's revalidate the dentry only once
> > > > after
> > > > obtaining a delegation by placing a magic value in the dentry's
> > > > verifier.
> > > > 
> > > > Suggested-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> > > > Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
> > > > ---
> > > >  fs/nfs/dir.c | 22 ++++++++++++++++++++--
> > > >  1 file changed, 20 insertions(+), 2 deletions(-)
> > > > 
> > > > diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> > > > index e180033e35cf..e9d07dcd6d6f 100644
> > > > --- a/fs/nfs/dir.c
> > > > +++ b/fs/nfs/dir.c
> > > > @@ -949,6 +949,7 @@ static int nfs_fsync_dir(struct file *filp,
> > > > loff_t start, loff_t end,
> > > >  	return 0;
> > > >  }
> > > >  
> > > > +#define NFS_DELEGATION_VERF 0xfeeddeaf
> > > >  /**
> > > >   * nfs_force_lookup_revalidate - Mark the directory as having
> > > > changed
> > > >   * @dir: pointer to directory inode
> > > > @@ -962,6 +963,8 @@ static int nfs_fsync_dir(struct file *filp,
> > > > loff_t start, loff_t end,
> > > >  void nfs_force_lookup_revalidate(struct inode *dir)
> > > >  {
> > > >  	NFS_I(dir)->cache_change_attribute++;
> > > > +	if (NFS_I(dir)->cache_change_attribute ==
> > > > NFS_DELEGATION_VERF)
> > > > +		NFS_I(dir)->cache_change_attribute++;
> > > 
> > > Actually, I think a slight modification to this can might be
> > > beneficial. If we change to the following:
> > > 
> > > 	if (unlikely(NFS_I(dir)->cache_change_attribute ==
> > > NFS_DELEGATION_VERF - 1))
> > > 		NFS_I(dir)->cache_change_attribute =
> > > NFS_DELEGATION_VERF + 1;
> > >  	else
> > > 		NFS_I(dir)->cache_change_attribute++;
> > > 
> > 
> > ...actually, it would be nice to clean that up too using a
> > declaration
> > of 'struct nfs_inode *nfsi = NFS_I(dir));'
> > 
> > > then that should ensure those readers of cache_change_attribute
> > > that
> > > do
> > > not use locking will also never see the value NFS_DELEGATION_VERF
> > > (assuming that gcc doesn't optimise the above to something
> > > weird).
> > > 
> > > >  }
> > > >  EXPORT_SYMBOL_GPL(nfs_force_lookup_revalidate);
> > > >  
> > > > @@ -1133,6 +1136,7 @@ nfs_lookup_revalidate_dentry(struct inode
> > > > *dir,
> > > > struct dentry *dentry,
> > > >  	struct nfs_fh *fhandle;
> > > >  	struct nfs_fattr *fattr;
> > > >  	struct nfs4_label *label;
> > > > +	unsigned long verifier;
> > > >  	int ret;
> > > >  
> > > >  	ret = -ENOMEM;
> > > > @@ -1154,6 +1158,11 @@ nfs_lookup_revalidate_dentry(struct
> > > > inode
> > > > *dir, struct dentry *dentry,
> > > >  	if (nfs_refresh_inode(inode, fattr) < 0)
> > > >  		goto out;
> > > >  
> > > > +	if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
> > > > +		verifier = NFS_DELEGATION_VERF;
> > > > +	else
> > > > +		verifier = nfs_save_change_attribute(dir);
> > > > +
> > > >  	nfs_setsecurity(inode, fattr, label);
> > > >  	nfs_set_verifier(dentry,
> > > > nfs_save_change_attribute(dir));
> > 
> > Oops! When reviewing, I missed this. Shouldn't the above be changed
> > to
> > nfs_set_verifier(dentry, verifier) ?
> 
> ...and on a similar vein: nfs_lookup_revalidate_delegated() needs to
> change so as to not reset the verifier...
> 
> Sorry for not catching that one either.

Not my day...

nfs_prime_dcache() will clobber the verifier too in the nfs_same_file()
case. That one also needs to set NFS_DELEGATION_VERF if there is a
delegation.

Perhaps add a helper function for that +
nfs_lookup_revalidate_dentry()?

> 
> > > >  
> > > > @@ -1167,6 +1176,15 @@ nfs_lookup_revalidate_dentry(struct
> > > > inode
> > > > *dir, struct dentry *dentry,
> > > >  	return nfs_lookup_revalidate_done(dir, dentry, inode,
> > > > ret);
> > > >  }
> > > >  
> > > > +static int nfs_delegation_matches_dentry(struct inode *dir,
> > > > +			struct dentry *dentry, struct inode
> > > > *inode)
> > > > +{
> > > > +	if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ)
> > > > &&
> > > > +		dentry->d_time == NFS_DELEGATION_VERF)
> > > > +		return 1;
> > > > +	return 0;
> > > > +}
> > > > +
> > > >  /*
> > > >   * This is called every time the dcache has a lookup hit,
> > > >   * and we should check whether we can really trust that
> > > > @@ -1197,7 +1215,7 @@ nfs_do_lookup_revalidate(struct inode
> > > > *dir,
> > > > struct dentry *dentry,
> > > >  		goto out_bad;
> > > >  	}
> > > >  
> > > > -	if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
> > > > +	if (nfs_delegation_matches_dentry(dir, dentry, inode))
> > > >  		return nfs_lookup_revalidate_delegated(dir,
> > > > dentry,
> > > > inode);
> > > >  
> > > >  	/* Force a full look up iff the parent directory has
> > > > changed */
> > > > @@ -1635,7 +1653,7 @@ nfs4_do_lookup_revalidate(struct inode
> > > > *dir,
> > > > struct dentry *dentry,
> > > >  	if (inode == NULL)
> > > >  		goto full_reval;
> > > >  
> > > > -	if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
> > > > +	if (nfs_delegation_matches_dentry(dir, dentry, inode))
> > > >  		return nfs_lookup_revalidate_delegated(dir,
> > > > dentry,
> > > > inode);
> > > >  
> > > >  	/* NFS only supports OPEN on regular files */
Trond Myklebust Jan. 28, 2020, 9:46 p.m. UTC | #6
On Tue, 2020-01-28 at 21:30 +0000, Trond Myklebust wrote:
> On Tue, 2020-01-28 at 16:20 -0500, Trond Myklebust wrote:
> > On Tue, 2020-01-28 at 15:21 -0500, Trond Myklebust wrote:
> > > On Tue, 2020-01-28 at 10:56 -0500, Trond Myklebust wrote:
> > > > On Tue, 2020-01-28 at 10:26 -0500, Benjamin Coddington wrote:
> > > > > If we skip lookup revalidataion while holding a delegation,
> > > > > we
> > > > > might
> > > > > miss
> > > > > that the file has changed directories on the server.  This
> > > > > can
> > > > > happen
> > > > > if
> > > > > the file is moved in between the client caching a dentry and
> > > > > obtaining a
> > > > > delegation.  That can be reproduced on a single system with
> > > > > this
> > > > > bash:
> > > > > 
> > > > > mkdir -p /exports/dir{1,2}
> > > > > exportfs -o rw localhost:/exports
> > > > > mount -t nfs -ov4.1,noac localhost:/exports /mnt/localhost
> > > > > 
> > > > > touch /exports/dir1/fubar
> > > > > 
> > > > > cat /mnt/localhost/dir1/fubar
> > > > > mv /mnt/localhost/dir{1,2}/fubar
> > > > > 
> > > > > mv /exports/dir{2,1}/fubar
> > > > > 
> > > > > cat /mnt/localhost/dir1/fubar
> > > > > mv /mnt/localhost/dir{1,2}/fubar
> > > > > 
> > > > > In this example, the final `mv` will stat source and
> > > > > destination
> > > > > and
> > > > > find
> > > > > they are the same dentry.
> > > > > 
> > > > > To fix this without giving up the increased lookup
> > > > > performance
> > > > > that
> > > > > holding
> > > > > a delegation provides, let's revalidate the dentry only once
> > > > > after
> > > > > obtaining a delegation by placing a magic value in the
> > > > > dentry's
> > > > > verifier.
> > > > > 
> > > > > Suggested-by: Trond Myklebust <
> > > > > trond.myklebust@hammerspace.com>
> > > > > Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
> > > > > ---
> > > > >  fs/nfs/dir.c | 22 ++++++++++++++++++++--
> > > > >  1 file changed, 20 insertions(+), 2 deletions(-)
> > > > > 
> > > > > diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> > > > > index e180033e35cf..e9d07dcd6d6f 100644
> > > > > --- a/fs/nfs/dir.c
> > > > > +++ b/fs/nfs/dir.c
> > > > > @@ -949,6 +949,7 @@ static int nfs_fsync_dir(struct file
> > > > > *filp,
> > > > > loff_t start, loff_t end,
> > > > >  	return 0;
> > > > >  }
> > > > >  
> > > > > +#define NFS_DELEGATION_VERF 0xfeeddeaf
> > > > >  /**
> > > > >   * nfs_force_lookup_revalidate - Mark the directory as
> > > > > having
> > > > > changed
> > > > >   * @dir: pointer to directory inode
> > > > > @@ -962,6 +963,8 @@ static int nfs_fsync_dir(struct file
> > > > > *filp,
> > > > > loff_t start, loff_t end,
> > > > >  void nfs_force_lookup_revalidate(struct inode *dir)
> > > > >  {
> > > > >  	NFS_I(dir)->cache_change_attribute++;
> > > > > +	if (NFS_I(dir)->cache_change_attribute ==
> > > > > NFS_DELEGATION_VERF)
> > > > > +		NFS_I(dir)->cache_change_attribute++;
> > > > 
> > > > Actually, I think a slight modification to this can might be
> > > > beneficial. If we change to the following:
> > > > 
> > > > 	if (unlikely(NFS_I(dir)->cache_change_attribute ==
> > > > NFS_DELEGATION_VERF - 1))
> > > > 		NFS_I(dir)->cache_change_attribute =
> > > > NFS_DELEGATION_VERF + 1;
> > > >  	else
> > > > 		NFS_I(dir)->cache_change_attribute++;
> > > > 
> > > 
> > > ...actually, it would be nice to clean that up too using a
> > > declaration
> > > of 'struct nfs_inode *nfsi = NFS_I(dir));'
> > > 
> > > > then that should ensure those readers of cache_change_attribute
> > > > that
> > > > do
> > > > not use locking will also never see the value
> > > > NFS_DELEGATION_VERF
> > > > (assuming that gcc doesn't optimise the above to something
> > > > weird).
> > > > 
> > > > >  }
> > > > >  EXPORT_SYMBOL_GPL(nfs_force_lookup_revalidate);
> > > > >  
> > > > > @@ -1133,6 +1136,7 @@ nfs_lookup_revalidate_dentry(struct
> > > > > inode
> > > > > *dir,
> > > > > struct dentry *dentry,
> > > > >  	struct nfs_fh *fhandle;
> > > > >  	struct nfs_fattr *fattr;
> > > > >  	struct nfs4_label *label;
> > > > > +	unsigned long verifier;
> > > > >  	int ret;
> > > > >  
> > > > >  	ret = -ENOMEM;
> > > > > @@ -1154,6 +1158,11 @@ nfs_lookup_revalidate_dentry(struct
> > > > > inode
> > > > > *dir, struct dentry *dentry,
> > > > >  	if (nfs_refresh_inode(inode, fattr) < 0)
> > > > >  		goto out;
> > > > >  
> > > > > +	if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
> > > > > +		verifier = NFS_DELEGATION_VERF;
> > > > > +	else
> > > > > +		verifier = nfs_save_change_attribute(dir);
> > > > > +
> > > > >  	nfs_setsecurity(inode, fattr, label);
> > > > >  	nfs_set_verifier(dentry,
> > > > > nfs_save_change_attribute(dir));
> > > 
> > > Oops! When reviewing, I missed this. Shouldn't the above be
> > > changed
> > > to
> > > nfs_set_verifier(dentry, verifier) ?
> > 
> > ...and on a similar vein: nfs_lookup_revalidate_delegated() needs
> > to
> > change so as to not reset the verifier...
> > 
> > Sorry for not catching that one either.
> 
> Not my day...
> 
> nfs_prime_dcache() will clobber the verifier too in the
> nfs_same_file()
> case. That one also needs to set NFS_DELEGATION_VERF if there is a
> delegation.
> 
> Perhaps add a helper function for that +
> nfs_lookup_revalidate_dentry()?

....and finally, we should remove the call to nfs_set_verifier() from
nfs4_file_open(). Aside from being incorrect in the case where we used
an open-by-filehandle, that case is taken care of in the preceding
dentry revalidation.
Benjamin Coddington Jan. 29, 2020, 2:18 p.m. UTC | #7
On 28 Jan 2020, at 16:46, Trond Myklebust wrote:

> On Tue, 2020-01-28 at 21:30 +0000, Trond Myklebust wrote:
>> On Tue, 2020-01-28 at 16:20 -0500, Trond Myklebust wrote:
>>> On Tue, 2020-01-28 at 15:21 -0500, Trond Myklebust wrote:
>>>> On Tue, 2020-01-28 at 10:56 -0500, Trond Myklebust wrote:
>>>>> On Tue, 2020-01-28 at 10:26 -0500, Benjamin Coddington wrote:
...
>>>>>> +	if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
>>>>>> +		verifier = NFS_DELEGATION_VERF;
>>>>>> +	else
>>>>>> +		verifier = nfs_save_change_attribute(dir);
>>>>>> +
>>>>>>  	nfs_setsecurity(inode, fattr, label);
>>>>>>  	nfs_set_verifier(dentry,
>>>>>> nfs_save_change_attribute(dir));
>>>>
>>>> Oops! When reviewing, I missed this. Shouldn't the above be
>>>> changed
>>>> to
>>>> nfs_set_verifier(dentry, verifier) ?

Ugh, yep.

>>> ...and on a similar vein: nfs_lookup_revalidate_delegated() needs
>>> to
>>> change so as to not reset the verifier...
>>>
>>> Sorry for not catching that one either.
>>
>> Not my day...
>>
>> nfs_prime_dcache() will clobber the verifier too in the
>> nfs_same_file()
>> case. That one also needs to set NFS_DELEGATION_VERF if there is a
>> delegation.
>>
>> Perhaps add a helper function for that +
>> nfs_lookup_revalidate_dentry()?
>
> ....and finally, we should remove the call to nfs_set_verifier() from
> nfs4_file_open(). Aside from being incorrect in the case where we used
> an open-by-filehandle, that case is taken care of in the preceding
> dentry revalidation.

Ok, I'll get these done.  This doesn't make the revalidation code
any simpler. I am impressed that you can spot these problems just doing
review.  I do wish we could use d_fsdata, seems like exactly the kind
of thing we need it for, but is it worth it to do another allocation every
time we need a dentry.  I wonder if we're going to end up having more
cases like this, or want to have more private information per-dentry.

Right now d_fsdata holds the devicename for IS_ROOT, and caches the
appropriate info for a delayed removal of silly-renamed files.

Problem is that we don't drop the delegation until after caching the
silly-rename data, and then we're still doing the same sorts of things
trying to figure out what data is in which dentry.

Maybe Al would be willing to reserve some of the top of d_flags for
filesystems to use privately?

Al, can we have such?  NFS already has DCACHE_NFSFS_RENAMED, that could move
up above the core d_flags?

Ben
Trond Myklebust Jan. 29, 2020, 3:04 p.m. UTC | #8
On Wed, 2020-01-29 at 09:18 -0500, Benjamin Coddington wrote:
> On 28 Jan 2020, at 16:46, Trond Myklebust wrote:
> 
> > On Tue, 2020-01-28 at 21:30 +0000, Trond Myklebust wrote:
> > > On Tue, 2020-01-28 at 16:20 -0500, Trond Myklebust wrote:
> > > > On Tue, 2020-01-28 at 15:21 -0500, Trond Myklebust wrote:
> > > > > On Tue, 2020-01-28 at 10:56 -0500, Trond Myklebust wrote:
> > > > > > On Tue, 2020-01-28 at 10:26 -0500, Benjamin Coddington
> > > > > > wrote:
> ...
> > > > > > > +	if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
> > > > > > > +		verifier = NFS_DELEGATION_VERF;
> > > > > > > +	else
> > > > > > > +		verifier = nfs_save_change_attribute(dir);
> > > > > > > +
> > > > > > >  	nfs_setsecurity(inode, fattr, label);
> > > > > > >  	nfs_set_verifier(dentry,
> > > > > > > nfs_save_change_attribute(dir));
> > > > > 
> > > > > Oops! When reviewing, I missed this. Shouldn't the above be
> > > > > changed
> > > > > to
> > > > > nfs_set_verifier(dentry, verifier) ?
> 
> Ugh, yep.
> 
> > > > ...and on a similar vein: nfs_lookup_revalidate_delegated()
> > > > needs
> > > > to
> > > > change so as to not reset the verifier...
> > > > 
> > > > Sorry for not catching that one either.
> > > 
> > > Not my day...
> > > 
> > > nfs_prime_dcache() will clobber the verifier too in the
> > > nfs_same_file()
> > > case. That one also needs to set NFS_DELEGATION_VERF if there is
> > > a
> > > delegation.
> > > 
> > > Perhaps add a helper function for that +
> > > nfs_lookup_revalidate_dentry()?
> > 
> > ....and finally, we should remove the call to nfs_set_verifier()
> > from
> > nfs4_file_open(). Aside from being incorrect in the case where we
> > used
> > an open-by-filehandle, that case is taken care of in the preceding
> > dentry revalidation.
> 
> Ok, I'll get these done.  This doesn't make the revalidation code
> any simpler. I am impressed that you can spot these problems just
> doing
> review.  I do wish we could use d_fsdata, seems like exactly the kind
> of thing we need it for, but is it worth it to do another allocation
> every
> time we need a dentry.  I wonder if we're going to end up having more
> cases like this, or want to have more private information per-dentry.
> 
> Right now d_fsdata holds the devicename for IS_ROOT, and caches the
> appropriate info for a delayed removal of silly-renamed files.
> 
> Problem is that we don't drop the delegation until after caching the
> silly-rename data, and then we're still doing the same sorts of
> things
> trying to figure out what data is in which dentry.
> 
> Maybe Al would be willing to reserve some of the top of d_flags for
> filesystems to use privately?
> 
> Al, can we have such?  NFS already has DCACHE_NFSFS_RENAMED, that
> could move
> up above the core d_flags?

I did look into this a few weeks ago, and it seemed to me that we're
already low on free bits in d_flags.

An alternative might just be to reserve a whole bit in d_time as being
the 'delegated dentry revalidated' bit. e.g. reserve bit 'BITS_PER_LONG
- 1' (or just bit '0') for that purpose.

We would then want to change nfs_set_verifier() to set the verifier in
the remaining bits, and have it set the delegated dentry revalidated
bit depending on whether there is a delegation for the inode at the
time of revalidation.
We may also want to do the same setting of the delegation bit in
nfs_verify_change_attribute() when there is a successful match of the
remaining verifier.

Finally, we should have nfs_mark_delegation_revoked() and
nfs_start_delegation_return_locked() clear that delegated dentry
revalidated bit, perhaps by iterating over the inode->i_dentry list?

Note that this has the advantage that since the actual verifier part of
dentry->d_time can be kept up to date while we hold the delegation, we
don't have to worry about the dentry getting revalidated for no reason
after the delegation is returned.
Benjamin Coddington Jan. 29, 2020, 3:36 p.m. UTC | #9
On 29 Jan 2020, at 10:04, Trond Myklebust wrote:

> On Wed, 2020-01-29 at 09:18 -0500, Benjamin Coddington wrote:
>> On 28 Jan 2020, at 16:46, Trond Myklebust wrote:
>>
>>> On Tue, 2020-01-28 at 21:30 +0000, Trond Myklebust wrote:
>>>> On Tue, 2020-01-28 at 16:20 -0500, Trond Myklebust wrote:
>>>>> On Tue, 2020-01-28 at 15:21 -0500, Trond Myklebust wrote:
>>>>>> On Tue, 2020-01-28 at 10:56 -0500, Trond Myklebust wrote:
>>>>>>> On Tue, 2020-01-28 at 10:26 -0500, Benjamin Coddington
>>>>>>> wrote:
>> ...
>>>>>>>> +	if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
>>>>>>>> +		verifier = NFS_DELEGATION_VERF;
>>>>>>>> +	else
>>>>>>>> +		verifier = nfs_save_change_attribute(dir);
>>>>>>>> +
>>>>>>>>  	nfs_setsecurity(inode, fattr, label);
>>>>>>>>  	nfs_set_verifier(dentry,
>>>>>>>> nfs_save_change_attribute(dir));
>>>>>>
>>>>>> Oops! When reviewing, I missed this. Shouldn't the above be
>>>>>> changed
>>>>>> to
>>>>>> nfs_set_verifier(dentry, verifier) ?
>>
>> Ugh, yep.
>>
>>>>> ...and on a similar vein: nfs_lookup_revalidate_delegated()
>>>>> needs
>>>>> to
>>>>> change so as to not reset the verifier...
>>>>>
>>>>> Sorry for not catching that one either.
>>>>
>>>> Not my day...
>>>>
>>>> nfs_prime_dcache() will clobber the verifier too in the
>>>> nfs_same_file()
>>>> case. That one also needs to set NFS_DELEGATION_VERF if there is
>>>> a
>>>> delegation.
>>>>
>>>> Perhaps add a helper function for that +
>>>> nfs_lookup_revalidate_dentry()?
>>>
>>> ....and finally, we should remove the call to nfs_set_verifier()
>>> from
>>> nfs4_file_open(). Aside from being incorrect in the case where we
>>> used
>>> an open-by-filehandle, that case is taken care of in the preceding
>>> dentry revalidation.
>>
>> Ok, I'll get these done.  This doesn't make the revalidation code
>> any simpler. I am impressed that you can spot these problems just
>> doing
>> review.  I do wish we could use d_fsdata, seems like exactly the kind
>> of thing we need it for, but is it worth it to do another allocation
>> every
>> time we need a dentry.  I wonder if we're going to end up having more
>> cases like this, or want to have more private information per-dentry.
>>
>> Right now d_fsdata holds the devicename for IS_ROOT, and caches the
>> appropriate info for a delayed removal of silly-renamed files.
>>
>> Problem is that we don't drop the delegation until after caching the
>> silly-rename data, and then we're still doing the same sorts of
>> things
>> trying to figure out what data is in which dentry.
>>
>> Maybe Al would be willing to reserve some of the top of d_flags for
>> filesystems to use privately?
>>
>> Al, can we have such?  NFS already has DCACHE_NFSFS_RENAMED, that
>> could move
>> up above the core d_flags?
>
> I did look into this a few weeks ago, and it seemed to me that we're
> already low on free bits in d_flags.
>
> An alternative might just be to reserve a whole bit in d_time as being
> the 'delegated dentry revalidated' bit. e.g. reserve bit 'BITS_PER_LONG
> - 1' (or just bit '0') for that purpose.

I did look at doing this already for the purposes of making
nfs_force_dir_revalidate() faster than having an if statement in it, and the
neat thing is that the if statement is always faster than any bit shifting
work I came up with - however I was playing with most-significant bits.

Maybe using the least-significant would be faster, then we just do:

nfs_force_lookup_revalidate() {
	nfsi->cache_change_attribute += 2;
}

But, again - what happens when we need another bit?  Is it OK to keep
chopping d_time in half?

What about coming up with a struct for d_fsdata that unionizes the
devicename and the unlink data, and holds our private flags?  The case I am
trying to fix is so rare that we could just do the allocation when it
occurs, and that way we don't have to modify too much of the revalidation
code just to handle it.  It also would give us more flags and private dentry
data if we need it later.

Another option - allow filesystems to do d_alloc, then we could just account
for the space we might need privately.  NFS would have larger dentries, but
VFS could probably drop d_time.  Maybe too big a burden to grow another api
for VFS.

I think I like the "allocate it if you need it" option for d_fsdata best so
far.

Ben

> We would then want to change nfs_set_verifier() to set the verifier in
> the remaining bits, and have it set the delegated dentry revalidated
> bit depending on whether there is a delegation for the inode at the
> time of revalidation.
> We may also want to do the same setting of the delegation bit in
> nfs_verify_change_attribute() when there is a successful match of the
> remaining verifier.
>
> Finally, we should have nfs_mark_delegation_revoked() and
> nfs_start_delegation_return_locked() clear that delegated dentry
> revalidated bit, perhaps by iterating over the inode->i_dentry list?
>
> Note that this has the advantage that since the actual verifier part of
> dentry->d_time can be kept up to date while we hold the delegation, we
> don't have to worry about the dentry getting revalidated for no reason
> after the delegation is returned.
>
> -- 
> Trond Myklebust
> Linux NFS client maintainer, Hammerspace
> trond.myklebust@hammerspace.com
Trond Myklebust Jan. 29, 2020, 3:52 p.m. UTC | #10
On Wed, 2020-01-29 at 10:36 -0500, Benjamin Coddington wrote:
> On 29 Jan 2020, at 10:04, Trond Myklebust wrote:
> 
> > On Wed, 2020-01-29 at 09:18 -0500, Benjamin Coddington wrote:
> > > On 28 Jan 2020, at 16:46, Trond Myklebust wrote:
> > > 
> > > > On Tue, 2020-01-28 at 21:30 +0000, Trond Myklebust wrote:
> > > > > On Tue, 2020-01-28 at 16:20 -0500, Trond Myklebust wrote:
> > > > > > On Tue, 2020-01-28 at 15:21 -0500, Trond Myklebust wrote:
> > > > > > > On Tue, 2020-01-28 at 10:56 -0500, Trond Myklebust wrote:
> > > > > > > > On Tue, 2020-01-28 at 10:26 -0500, Benjamin Coddington
> > > > > > > > wrote:
> > > ...
> > > > > > > > > +	if (NFS_PROTO(dir)->have_delegation(inode,
> > > > > > > > > FMODE_READ))
> > > > > > > > > +		verifier = NFS_DELEGATION_VERF;
> > > > > > > > > +	else
> > > > > > > > > +		verifier =
> > > > > > > > > nfs_save_change_attribute(dir);
> > > > > > > > > +
> > > > > > > > >  	nfs_setsecurity(inode, fattr, label);
> > > > > > > > >  	nfs_set_verifier(dentry,
> > > > > > > > > nfs_save_change_attribute(dir));
> > > > > > > 
> > > > > > > Oops! When reviewing, I missed this. Shouldn't the above
> > > > > > > be
> > > > > > > changed
> > > > > > > to
> > > > > > > nfs_set_verifier(dentry, verifier) ?
> > > 
> > > Ugh, yep.
> > > 
> > > > > > ...and on a similar vein: nfs_lookup_revalidate_delegated()
> > > > > > needs
> > > > > > to
> > > > > > change so as to not reset the verifier...
> > > > > > 
> > > > > > Sorry for not catching that one either.
> > > > > 
> > > > > Not my day...
> > > > > 
> > > > > nfs_prime_dcache() will clobber the verifier too in the
> > > > > nfs_same_file()
> > > > > case. That one also needs to set NFS_DELEGATION_VERF if there
> > > > > is
> > > > > a
> > > > > delegation.
> > > > > 
> > > > > Perhaps add a helper function for that +
> > > > > nfs_lookup_revalidate_dentry()?
> > > > 
> > > > ....and finally, we should remove the call to
> > > > nfs_set_verifier()
> > > > from
> > > > nfs4_file_open(). Aside from being incorrect in the case where
> > > > we
> > > > used
> > > > an open-by-filehandle, that case is taken care of in the
> > > > preceding
> > > > dentry revalidation.
> > > 
> > > Ok, I'll get these done.  This doesn't make the revalidation code
> > > any simpler. I am impressed that you can spot these problems just
> > > doing
> > > review.  I do wish we could use d_fsdata, seems like exactly the
> > > kind
> > > of thing we need it for, but is it worth it to do another
> > > allocation
> > > every
> > > time we need a dentry.  I wonder if we're going to end up having
> > > more
> > > cases like this, or want to have more private information per-
> > > dentry.
> > > 
> > > Right now d_fsdata holds the devicename for IS_ROOT, and caches
> > > the
> > > appropriate info for a delayed removal of silly-renamed files.
> > > 
> > > Problem is that we don't drop the delegation until after caching
> > > the
> > > silly-rename data, and then we're still doing the same sorts of
> > > things
> > > trying to figure out what data is in which dentry.
> > > 
> > > Maybe Al would be willing to reserve some of the top of d_flags
> > > for
> > > filesystems to use privately?
> > > 
> > > Al, can we have such?  NFS already has DCACHE_NFSFS_RENAMED, that
> > > could move
> > > up above the core d_flags?
> > 
> > I did look into this a few weeks ago, and it seemed to me that
> > we're
> > already low on free bits in d_flags.
> > 
> > An alternative might just be to reserve a whole bit in d_time as
> > being
> > the 'delegated dentry revalidated' bit. e.g. reserve bit
> > 'BITS_PER_LONG
> > - 1' (or just bit '0') for that purpose.
> 
> I did look at doing this already for the purposes of making
> nfs_force_dir_revalidate() faster than having an if statement in it,
> and the
> neat thing is that the if statement is always faster than any bit
> shifting
> work I came up with - however I was playing with most-significant
> bits.
> 
> Maybe using the least-significant would be faster, then we just do:
> 
> nfs_force_lookup_revalidate() {
> 	nfsi->cache_change_attribute += 2;
> }

Either that, or just do ' << 1' in nfs_set_verifier(). Either way
avoids the conditional.

> 
> But, again - what happens when we need another bit?  Is it OK to keep
> chopping d_time in half?

It shouldn't be a problem for 64-bit architectures. It's going to take
quite a while to overflow a 2^63 valued field.

Even for 32-bit, I'd say we're unlikely to see 2 billion updates of the
directory without a single revalidation of the dentry. It would have to
be a very peculiar 32-bit client with some serious high-end networking
hardware...

> What about coming up with a struct for d_fsdata that unionizes the
> devicename and the unlink data, and holds our private flags?  The
> case I am
> trying to fix is so rare that we could just do the allocation when it
> occurs, and that way we don't have to modify too much of the
> revalidation
> code just to handle it.  It also would give us more flags and private
> dentry
> data if we need it later.

I'm not sure this case warrants it. Maybe later if we find we need more
flags and are worried about the 32-bit arch case.

I'd also dispute that revalidating a delegation is a rare event. The
Hammerspace server will usually prefer to hand out a delegation when
possible on file open, so those cases happen very frequently. Having to
allocate a d_fsdata is likely to be a measurable overhead.

> Another option - allow filesystems to do d_alloc, then we could just
> account
> for the space we might need privately.  NFS would have larger
> dentries, but
> VFS could probably drop d_time.  Maybe too big a burden to grow
> another api
> for VFS.
> 
> I think I like the "allocate it if you need it" option for d_fsdata
> best so
> far.

Again, I'd argue that is overkill for this particular problem, but yes,
it would be a more efficient solution than extending d_fsdata.

> Ben
> 
> > We would then want to change nfs_set_verifier() to set the verifier
> > in
> > the remaining bits, and have it set the delegated dentry
> > revalidated
> > bit depending on whether there is a delegation for the inode at the
> > time of revalidation.
> > We may also want to do the same setting of the delegation bit in
> > nfs_verify_change_attribute() when there is a successful match of
> > the
> > remaining verifier.
> > 
> > Finally, we should have nfs_mark_delegation_revoked() and
> > nfs_start_delegation_return_locked() clear that delegated dentry
> > revalidated bit, perhaps by iterating over the inode->i_dentry
> > list?
> > 
> > Note that this has the advantage that since the actual verifier
> > part of
> > dentry->d_time can be kept up to date while we hold the delegation,
> > we
> > don't have to worry about the dentry getting revalidated for no
> > reason
> > after the delegation is returned.
> > 
> > -- 
> > Trond Myklebust
> > Linux NFS client maintainer, Hammerspace
> > trond.myklebust@hammerspace.com
diff mbox series

Patch

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e180033e35cf..e9d07dcd6d6f 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -949,6 +949,7 @@  static int nfs_fsync_dir(struct file *filp, loff_t start, loff_t end,
 	return 0;
 }
 
+#define NFS_DELEGATION_VERF 0xfeeddeaf
 /**
  * nfs_force_lookup_revalidate - Mark the directory as having changed
  * @dir: pointer to directory inode
@@ -962,6 +963,8 @@  static int nfs_fsync_dir(struct file *filp, loff_t start, loff_t end,
 void nfs_force_lookup_revalidate(struct inode *dir)
 {
 	NFS_I(dir)->cache_change_attribute++;
+	if (NFS_I(dir)->cache_change_attribute == NFS_DELEGATION_VERF)
+		NFS_I(dir)->cache_change_attribute++;
 }
 EXPORT_SYMBOL_GPL(nfs_force_lookup_revalidate);
 
@@ -1133,6 +1136,7 @@  nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry,
 	struct nfs_fh *fhandle;
 	struct nfs_fattr *fattr;
 	struct nfs4_label *label;
+	unsigned long verifier;
 	int ret;
 
 	ret = -ENOMEM;
@@ -1154,6 +1158,11 @@  nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry,
 	if (nfs_refresh_inode(inode, fattr) < 0)
 		goto out;
 
+	if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
+		verifier = NFS_DELEGATION_VERF;
+	else
+		verifier = nfs_save_change_attribute(dir);
+
 	nfs_setsecurity(inode, fattr, label);
 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 
@@ -1167,6 +1176,15 @@  nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry,
 	return nfs_lookup_revalidate_done(dir, dentry, inode, ret);
 }
 
+static int nfs_delegation_matches_dentry(struct inode *dir,
+			struct dentry *dentry, struct inode *inode)
+{
+	if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ) &&
+		dentry->d_time == NFS_DELEGATION_VERF)
+		return 1;
+	return 0;
+}
+
 /*
  * This is called every time the dcache has a lookup hit,
  * and we should check whether we can really trust that
@@ -1197,7 +1215,7 @@  nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
 		goto out_bad;
 	}
 
-	if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
+	if (nfs_delegation_matches_dentry(dir, dentry, inode))
 		return nfs_lookup_revalidate_delegated(dir, dentry, inode);
 
 	/* Force a full look up iff the parent directory has changed */
@@ -1635,7 +1653,7 @@  nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
 	if (inode == NULL)
 		goto full_reval;
 
-	if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
+	if (nfs_delegation_matches_dentry(dir, dentry, inode))
 		return nfs_lookup_revalidate_delegated(dir, dentry, inode);
 
 	/* NFS only supports OPEN on regular files */