Message ID | 1623682098-13236-4-git-send-email-bfields@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | reexport lock fixes | expand |
On Mon, 2021-06-14 at 10:48 -0400, J. Bruce Fields wrote: > From: "J. Bruce Fields" <bfields@redhat.com> > > In the reexport case, nfsd is currently passing along locks with the > reclaim bit set. The client sends a new lock request, which is > granted > if there's currently no conflict--even if it's possible a conflicting > lock could have been briefly held in the interim. > > We don't currently have any way to safely grant reclaim, so for now > let's just deny them all. > > I'm doing this by passing the reclaim bit to nfs and letting it fail > the > call, with the idea that eventually the client might be able to do > something more forgiving here. > > Signed-off-by: J. Bruce Fields <bfields@redhat.com> > --- > fs/nfs/file.c | 3 +++ > fs/nfsd/nfs4state.c | 3 +++ > fs/nfsd/nfsproc.c | 1 + > include/linux/fs.h | 1 + > 4 files changed, 8 insertions(+) > > diff --git a/fs/nfs/file.c b/fs/nfs/file.c > index 1fef107961bc..35a29b440e3e 100644 > --- a/fs/nfs/file.c > +++ b/fs/nfs/file.c > @@ -806,6 +806,9 @@ int nfs_lock(struct file *filp, int cmd, struct > file_lock *fl) > > nfs_inc_stats(inode, NFSIOS_VFSLOCK); > > + if (fl->fl_flags & FL_RECLAIM) > + return -NFSERR_NO_GRACE; NACK. nfs_lock() is required to return a POSIX error. I know that right now, nfsd is the only thing setting FL_RECLAIM, but we can't guarantee that will always be the case. > + > /* No mandatory locks over NFS */ > if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) > goto out_err; > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c > index 00d98bbab2a6..3ef42c0d5d38 100644 > --- a/fs/nfsd/nfs4state.c > +++ b/fs/nfsd/nfs4state.c > @@ -6903,6 +6903,9 @@ nfsd4_lock(struct svc_rqst *rqstp, struct > nfsd4_compound_state *cstate, > if (!locks_in_grace(net) && lock->lk_reclaim) > goto out; > > + if (lock->lk_reclaim) > + fl_flags |= FL_RECLAIM; > + > fp = lock_stp->st_stid.sc_file; > switch (lock->lk_type) { > case NFS4_READW_LT: > diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c > index 60d7c59e7935..80c430c37ab7 100644 > --- a/fs/nfsd/nfsproc.c > +++ b/fs/nfsd/nfsproc.c > @@ -881,6 +881,7 @@ nfserrno (int errno) > { nfserr_serverfault, -ENFILE }, > { nfserr_io, -EUCLEAN }, > { nfserr_perm, -ENOKEY }, > + { nfserr_no_grace, -NFSERR_NO_GRACE}, > }; > int i; > > diff --git a/include/linux/fs.h b/include/linux/fs.h > index c3c88fdb9b2a..9be479999109 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -997,6 +997,7 @@ static inline struct file *get_file(struct file > *f) > #define FL_UNLOCK_PENDING 512 /* Lease is being broken */ > #define FL_OFDLCK 1024 /* lock is "owned" by struct file */ > #define FL_LAYOUT 2048 /* outstanding pNFS layout */ > +#define FL_RECLAIM 4096 /* reclaiming from a reboot server */ > > #define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE) >
On Mon, Jun 14, 2021 at 02:56:55PM +0000, Trond Myklebust wrote: > On Mon, 2021-06-14 at 10:48 -0400, J. Bruce Fields wrote: > > From: "J. Bruce Fields" <bfields@redhat.com> > > > > In the reexport case, nfsd is currently passing along locks with the > > reclaim bit set. The client sends a new lock request, which is > > granted > > if there's currently no conflict--even if it's possible a conflicting > > lock could have been briefly held in the interim. > > > > We don't currently have any way to safely grant reclaim, so for now > > let's just deny them all. > > > > I'm doing this by passing the reclaim bit to nfs and letting it fail > > the > > call, with the idea that eventually the client might be able to do > > something more forgiving here. > > > > Signed-off-by: J. Bruce Fields <bfields@redhat.com> > > --- > > fs/nfs/file.c | 3 +++ > > fs/nfsd/nfs4state.c | 3 +++ > > fs/nfsd/nfsproc.c | 1 + > > include/linux/fs.h | 1 + > > 4 files changed, 8 insertions(+) > > > > diff --git a/fs/nfs/file.c b/fs/nfs/file.c > > index 1fef107961bc..35a29b440e3e 100644 > > --- a/fs/nfs/file.c > > +++ b/fs/nfs/file.c > > @@ -806,6 +806,9 @@ int nfs_lock(struct file *filp, int cmd, struct > > file_lock *fl) > > > > nfs_inc_stats(inode, NFSIOS_VFSLOCK); > > > > + if (fl->fl_flags & FL_RECLAIM) > > + return -NFSERR_NO_GRACE; > > NACK. nfs_lock() is required to return a POSIX error. I know that right > now, nfsd is the only thing setting FL_RECLAIM, but we can't guarantee > that will always be the case. Setting FL_RECLAIM tells the filesystem that you're prepared to handle NFSERR_NO_GRACE. I'm not seeing the risk. --b. > > /* No mandatory locks over NFS */ > > if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) > > goto out_err; > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c > > index 00d98bbab2a6..3ef42c0d5d38 100644 > > --- a/fs/nfsd/nfs4state.c > > +++ b/fs/nfsd/nfs4state.c > > @@ -6903,6 +6903,9 @@ nfsd4_lock(struct svc_rqst *rqstp, struct > > nfsd4_compound_state *cstate, > > if (!locks_in_grace(net) && lock->lk_reclaim) > > goto out; > > > > + if (lock->lk_reclaim) > > + fl_flags |= FL_RECLAIM; > > + > > fp = lock_stp->st_stid.sc_file; > > switch (lock->lk_type) { > > case NFS4_READW_LT: > > diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c > > index 60d7c59e7935..80c430c37ab7 100644 > > --- a/fs/nfsd/nfsproc.c > > +++ b/fs/nfsd/nfsproc.c > > @@ -881,6 +881,7 @@ nfserrno (int errno) > > { nfserr_serverfault, -ENFILE }, > > { nfserr_io, -EUCLEAN }, > > { nfserr_perm, -ENOKEY }, > > + { nfserr_no_grace, -NFSERR_NO_GRACE}, > > }; > > int i; > > > > diff --git a/include/linux/fs.h b/include/linux/fs.h > > index c3c88fdb9b2a..9be479999109 100644 > > --- a/include/linux/fs.h > > +++ b/include/linux/fs.h > > @@ -997,6 +997,7 @@ static inline struct file *get_file(struct file > > *f) > > #define FL_UNLOCK_PENDING 512 /* Lease is being broken */ > > #define FL_OFDLCK 1024 /* lock is "owned" by struct file */ > > #define FL_LAYOUT 2048 /* outstanding pNFS layout */ > > +#define FL_RECLAIM 4096 /* reclaiming from a reboot server */ > > > > #define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE) > > > > -- > Trond Myklebust > Linux NFS client maintainer, Hammerspace > trond.myklebust@hammerspace.com > >
On Mon, 2021-06-14 at 15:34 -0400, J. Bruce Fields wrote: > On Mon, Jun 14, 2021 at 02:56:55PM +0000, Trond Myklebust wrote: > > On Mon, 2021-06-14 at 10:48 -0400, J. Bruce Fields wrote: > > > From: "J. Bruce Fields" <bfields@redhat.com> > > > > > > In the reexport case, nfsd is currently passing along locks with > > > the > > > reclaim bit set. The client sends a new lock request, which is > > > granted > > > if there's currently no conflict--even if it's possible a > > > conflicting > > > lock could have been briefly held in the interim. > > > > > > We don't currently have any way to safely grant reclaim, so for > > > now > > > let's just deny them all. > > > > > > I'm doing this by passing the reclaim bit to nfs and letting it > > > fail > > > the > > > call, with the idea that eventually the client might be able to > > > do > > > something more forgiving here. > > > > > > Signed-off-by: J. Bruce Fields <bfields@redhat.com> > > > --- > > > fs/nfs/file.c | 3 +++ > > > fs/nfsd/nfs4state.c | 3 +++ > > > fs/nfsd/nfsproc.c | 1 + > > > include/linux/fs.h | 1 + > > > 4 files changed, 8 insertions(+) > > > > > > diff --git a/fs/nfs/file.c b/fs/nfs/file.c > > > index 1fef107961bc..35a29b440e3e 100644 > > > --- a/fs/nfs/file.c > > > +++ b/fs/nfs/file.c > > > @@ -806,6 +806,9 @@ int nfs_lock(struct file *filp, int cmd, > > > struct > > > file_lock *fl) > > > > > > nfs_inc_stats(inode, NFSIOS_VFSLOCK); > > > > > > + if (fl->fl_flags & FL_RECLAIM) > > > + return -NFSERR_NO_GRACE; > > > > NACK. nfs_lock() is required to return a POSIX error. I know that > > right > > now, nfsd is the only thing setting FL_RECLAIM, but we can't > > guarantee > > that will always be the case. > > Setting FL_RECLAIM tells the filesystem that you're prepared to > handle > NFSERR_NO_GRACE. I'm not seeing the risk. You are using a function that is exposed to the VFS. On error, that function is expected to return a value that is a Linux error between -1 and -4095. I suggest adding an error value ENOGRACE to include/linux/errno.h.
On Mon, Jun 14, 2021 at 07:53:52PM +0000, Trond Myklebust wrote: > On Mon, 2021-06-14 at 15:34 -0400, J. Bruce Fields wrote: > > On Mon, Jun 14, 2021 at 02:56:55PM +0000, Trond Myklebust wrote: > > > On Mon, 2021-06-14 at 10:48 -0400, J. Bruce Fields wrote: > > > > From: "J. Bruce Fields" <bfields@redhat.com> > > > > > > > > In the reexport case, nfsd is currently passing along locks with > > > > the > > > > reclaim bit set. The client sends a new lock request, which is > > > > granted > > > > if there's currently no conflict--even if it's possible a > > > > conflicting > > > > lock could have been briefly held in the interim. > > > > > > > > We don't currently have any way to safely grant reclaim, so for > > > > now > > > > let's just deny them all. > > > > > > > > I'm doing this by passing the reclaim bit to nfs and letting it > > > > fail > > > > the > > > > call, with the idea that eventually the client might be able to > > > > do > > > > something more forgiving here. > > > > > > > > Signed-off-by: J. Bruce Fields <bfields@redhat.com> > > > > --- > > > > fs/nfs/file.c | 3 +++ > > > > fs/nfsd/nfs4state.c | 3 +++ > > > > fs/nfsd/nfsproc.c | 1 + > > > > include/linux/fs.h | 1 + > > > > 4 files changed, 8 insertions(+) > > > > > > > > diff --git a/fs/nfs/file.c b/fs/nfs/file.c > > > > index 1fef107961bc..35a29b440e3e 100644 > > > > --- a/fs/nfs/file.c > > > > +++ b/fs/nfs/file.c > > > > @@ -806,6 +806,9 @@ int nfs_lock(struct file *filp, int cmd, > > > > struct > > > > file_lock *fl) > > > > > > > > nfs_inc_stats(inode, NFSIOS_VFSLOCK); > > > > > > > > + if (fl->fl_flags & FL_RECLAIM) > > > > + return -NFSERR_NO_GRACE; > > > > > > NACK. nfs_lock() is required to return a POSIX error. I know that > > > right > > > now, nfsd is the only thing setting FL_RECLAIM, but we can't > > > guarantee > > > that will always be the case. > > > > Setting FL_RECLAIM tells the filesystem that you're prepared to > > handle > > NFSERR_NO_GRACE. I'm not seeing the risk. > > You are using a function that is exposed to the VFS. On error, that > function is expected to return a value that is a Linux error between -1 > and -4095. Or 1, actually (FILE_LOCK_DEFERRED). > I suggest adding an error value ENOGRACE to include/linux/errno.h. I can live with that, but I'm still curious what exactly you're worried about. --b.
On Mon, 2021-06-14 at 16:03 -0400, bfields@fieldses.org wrote: > On Mon, Jun 14, 2021 at 07:53:52PM +0000, Trond Myklebust wrote: > > On Mon, 2021-06-14 at 15:34 -0400, J. Bruce Fields wrote: > > > On Mon, Jun 14, 2021 at 02:56:55PM +0000, Trond Myklebust wrote: > > > > On Mon, 2021-06-14 at 10:48 -0400, J. Bruce Fields wrote: > > > > > From: "J. Bruce Fields" <bfields@redhat.com> > > > > > > > > > > In the reexport case, nfsd is currently passing along locks > > > > > with > > > > > the > > > > > reclaim bit set. The client sends a new lock request, which > > > > > is > > > > > granted > > > > > if there's currently no conflict--even if it's possible a > > > > > conflicting > > > > > lock could have been briefly held in the interim. > > > > > > > > > > We don't currently have any way to safely grant reclaim, so > > > > > for > > > > > now > > > > > let's just deny them all. > > > > > > > > > > I'm doing this by passing the reclaim bit to nfs and letting > > > > > it > > > > > fail > > > > > the > > > > > call, with the idea that eventually the client might be able > > > > > to > > > > > do > > > > > something more forgiving here. > > > > > > > > > > Signed-off-by: J. Bruce Fields <bfields@redhat.com> > > > > > --- > > > > > fs/nfs/file.c | 3 +++ > > > > > fs/nfsd/nfs4state.c | 3 +++ > > > > > fs/nfsd/nfsproc.c | 1 + > > > > > include/linux/fs.h | 1 + > > > > > 4 files changed, 8 insertions(+) > > > > > > > > > > diff --git a/fs/nfs/file.c b/fs/nfs/file.c > > > > > index 1fef107961bc..35a29b440e3e 100644 > > > > > --- a/fs/nfs/file.c > > > > > +++ b/fs/nfs/file.c > > > > > @@ -806,6 +806,9 @@ int nfs_lock(struct file *filp, int cmd, > > > > > struct > > > > > file_lock *fl) > > > > > > > > > > nfs_inc_stats(inode, NFSIOS_VFSLOCK); > > > > > > > > > > + if (fl->fl_flags & FL_RECLAIM) > > > > > + return -NFSERR_NO_GRACE; > > > > > > > > NACK. nfs_lock() is required to return a POSIX error. I know > > > > that > > > > right > > > > now, nfsd is the only thing setting FL_RECLAIM, but we can't > > > > guarantee > > > > that will always be the case. > > > > > > Setting FL_RECLAIM tells the filesystem that you're prepared to > > > handle > > > NFSERR_NO_GRACE. I'm not seeing the risk. > > > > You are using a function that is exposed to the VFS. On error, that > > function is expected to return a value that is a Linux error > > between -1 > > and -4095. > > Or 1, actually (FILE_LOCK_DEFERRED). > > > I suggest adding an error value ENOGRACE to include/linux/errno.h. > > I can live with that, but I'm still curious what exactly you're > worried > about. > I want to avoid the kind of issues we've be met with earlier when mixing types just because they happened to be integer valued. We introduced the mixing of POSIX/Linux and NFS errors in the NFS client back in the 1990s, and that was a mistake that we're still paying for. For instance, the value ERR_PTR(-NFSERR_NO_GRACE) will be happily declared as a valid pointer by the IS_ERR() test, and every so often we find an Oops around that issue because someone used the return value from a function that they thought was POSIX/Linux error valued, because it usually is returning POSIX errors.
On Mon, Jun 14, 2021 at 09:03:35PM +0000, Trond Myklebust wrote: > I want to avoid the kind of issues we've be met with earlier when > mixing types just because they happened to be integer valued. > > We introduced the mixing of POSIX/Linux and NFS errors in the NFS > client back in the 1990s, and that was a mistake that we're still > paying for. For instance, the value ERR_PTR(-NFSERR_NO_GRACE) will be > happily declared as a valid pointer by the IS_ERR() test, and every so > often we find an Oops around that issue because someone used the return > value from a function that they thought was POSIX/Linux error valued, > because it usually is returning POSIX errors. I did this, by the way, but also ran across a couple more bugs in testing. At this point I've got connectathon locking tests passing on a re-export--I need to do a little more cleanup and then I'll repost. --b.
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 1fef107961bc..35a29b440e3e 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -806,6 +806,9 @@ int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) nfs_inc_stats(inode, NFSIOS_VFSLOCK); + if (fl->fl_flags & FL_RECLAIM) + return -NFSERR_NO_GRACE; + /* No mandatory locks over NFS */ if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) goto out_err; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 00d98bbab2a6..3ef42c0d5d38 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -6903,6 +6903,9 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (!locks_in_grace(net) && lock->lk_reclaim) goto out; + if (lock->lk_reclaim) + fl_flags |= FL_RECLAIM; + fp = lock_stp->st_stid.sc_file; switch (lock->lk_type) { case NFS4_READW_LT: diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 60d7c59e7935..80c430c37ab7 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -881,6 +881,7 @@ nfserrno (int errno) { nfserr_serverfault, -ENFILE }, { nfserr_io, -EUCLEAN }, { nfserr_perm, -ENOKEY }, + { nfserr_no_grace, -NFSERR_NO_GRACE}, }; int i; diff --git a/include/linux/fs.h b/include/linux/fs.h index c3c88fdb9b2a..9be479999109 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -997,6 +997,7 @@ static inline struct file *get_file(struct file *f) #define FL_UNLOCK_PENDING 512 /* Lease is being broken */ #define FL_OFDLCK 1024 /* lock is "owned" by struct file */ #define FL_LAYOUT 2048 /* outstanding pNFS layout */ +#define FL_RECLAIM 4096 /* reclaiming from a reboot server */ #define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE)