Message ID | 1739475438-5640-3-git-send-email-dai.ngo@oracle.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Chuck Lever |
Headers | show |
Series | NFSD: offer write delegation for OPEN with OPEN4_SHARE_ACCESS only | expand |
On Thu, 2025-02-13 at 11:37 -0800, Dai Ngo wrote: > Allow read using write delegation stateid granted on OPENs with > OPEN4_SHARE_ACCESS_WRITE only, to accommodate clients whose WRITE > implementation may unavoidably do (e.g., due to buffer cache > constraints). > > When this condition is detected in nfsd4_encode_read the access > mode FMODE_READ is temporarily added to the file's f_mode and is > removed when the read is done. > > Signed-off-by: Dai Ngo <dai.ngo@oracle.com> > --- > fs/nfsd/nfs4proc.c | 15 ++++++++++++++- > fs/nfsd/nfs4xdr.c | 8 ++++++++ > fs/nfsd/xdr4.h | 1 + > 3 files changed, 23 insertions(+), 1 deletion(-) > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c > index f6e06c779d09..be43627bbf78 100644 > --- a/fs/nfsd/nfs4proc.c > +++ b/fs/nfsd/nfs4proc.c > @@ -973,7 +973,18 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > /* check stateid */ > status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, > &read->rd_stateid, RD_STATE, > - &read->rd_nf, NULL); > + &read->rd_nf, &read->rd_wd_stid); > + /* > + * rd_wd_stid is needed for nfsd4_encode_read to allow write > + * delegation stateid used for read. Its refcount is decremented > + * by nfsd4_read_release when read is done. > + */ > + if (!status && read->rd_wd_stid && > + (read->rd_wd_stid->sc_type != SC_TYPE_DELEG || > + delegstateid(read->rd_wd_stid)->dl_type != NFS4_OPEN_DELEGATE_WRITE)) { > + nfs4_put_stid(read->rd_wd_stid); > + read->rd_wd_stid = NULL; > + } > > read->rd_rqstp = rqstp; > read->rd_fhp = &cstate->current_fh; > @@ -984,6 +995,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > static void > nfsd4_read_release(union nfsd4_op_u *u) > { > + if (u->read.rd_wd_stid) > + nfs4_put_stid(u->read.rd_wd_stid); > if (u->read.rd_nf) > nfsd_file_put(u->read.rd_nf); > trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp, > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c > index e67420729ecd..3996678bab3f 100644 > --- a/fs/nfsd/nfs4xdr.c > +++ b/fs/nfsd/nfs4xdr.c > @@ -4498,6 +4498,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, > unsigned long maxcount; > __be32 wire_data[2]; > struct file *file; > + bool wronly = false; > > if (nfserr) > return nfserr; > @@ -4515,10 +4516,17 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, > maxcount = min_t(unsigned long, read->rd_length, > (xdr->buf->buflen - xdr->buf->len)); > > + if (!(file->f_mode & FMODE_READ) && read->rd_wd_stid) { > + /* allow READ using write delegation stateid */ > + wronly = true; > + file->f_mode |= FMODE_READ; > + } Is that really OK? Can we just upgrade the f_mode like that? Also, what happens with more exotic exported filesystems like NFS? For example, if I'm reexporting NFS, the backend NFS server may not allow you to do a READ operation using a OPEN4_SHARE_ACCESS_WRITE only stateid. Won't this break in that case? > if (file->f_op->splice_read && splice_ok) > nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount); > else > nfserr = nfsd4_encode_readv(resp, read, file, maxcount); > + if (wronly) > + file->f_mode &= ~FMODE_READ; > if (nfserr) { > xdr_truncate_encode(xdr, eof_offset); > return nfserr; > diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h > index c26ba86dbdfd..2f053beed899 100644 > --- a/fs/nfsd/xdr4.h > +++ b/fs/nfsd/xdr4.h > @@ -426,6 +426,7 @@ struct nfsd4_read { > struct svc_rqst *rd_rqstp; /* response */ > struct svc_fh *rd_fhp; /* response */ > u32 rd_eof; /* response */ > + struct nfs4_stid *rd_wd_stid; /* internal */ > }; > > struct nfsd4_readdir {
On Thu, 2025-02-13 at 16:07 -0500, Jeff Layton wrote: > On Thu, 2025-02-13 at 11:37 -0800, Dai Ngo wrote: > > Allow read using write delegation stateid granted on OPENs with > > OPEN4_SHARE_ACCESS_WRITE only, to accommodate clients whose WRITE > > implementation may unavoidably do (e.g., due to buffer cache > > constraints). > > > > When this condition is detected in nfsd4_encode_read the access > > mode FMODE_READ is temporarily added to the file's f_mode and is > > removed when the read is done. > > > > Signed-off-by: Dai Ngo <dai.ngo@oracle.com> > > --- > > fs/nfsd/nfs4proc.c | 15 ++++++++++++++- > > fs/nfsd/nfs4xdr.c | 8 ++++++++ > > fs/nfsd/xdr4.h | 1 + > > 3 files changed, 23 insertions(+), 1 deletion(-) > > > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c > > index f6e06c779d09..be43627bbf78 100644 > > --- a/fs/nfsd/nfs4proc.c > > +++ b/fs/nfsd/nfs4proc.c > > @@ -973,7 +973,18 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > > /* check stateid */ > > status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, > > &read->rd_stateid, RD_STATE, > > - &read->rd_nf, NULL); > > + &read->rd_nf, &read->rd_wd_stid); > > + /* > > + * rd_wd_stid is needed for nfsd4_encode_read to allow write > > + * delegation stateid used for read. Its refcount is decremented > > + * by nfsd4_read_release when read is done. > > + */ > > + if (!status && read->rd_wd_stid && > > + (read->rd_wd_stid->sc_type != SC_TYPE_DELEG || > > + delegstateid(read->rd_wd_stid)->dl_type != NFS4_OPEN_DELEGATE_WRITE)) { > > + nfs4_put_stid(read->rd_wd_stid); > > + read->rd_wd_stid = NULL; > > + } > > > > read->rd_rqstp = rqstp; > > read->rd_fhp = &cstate->current_fh; > > @@ -984,6 +995,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > > static void > > nfsd4_read_release(union nfsd4_op_u *u) > > { > > + if (u->read.rd_wd_stid) > > + nfs4_put_stid(u->read.rd_wd_stid); > > if (u->read.rd_nf) > > nfsd_file_put(u->read.rd_nf); > > trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp, > > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c > > index e67420729ecd..3996678bab3f 100644 > > --- a/fs/nfsd/nfs4xdr.c > > +++ b/fs/nfsd/nfs4xdr.c > > @@ -4498,6 +4498,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, > > unsigned long maxcount; > > __be32 wire_data[2]; > > struct file *file; > > + bool wronly = false; > > > > if (nfserr) > > return nfserr; > > @@ -4515,10 +4516,17 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, > > maxcount = min_t(unsigned long, read->rd_length, > > (xdr->buf->buflen - xdr->buf->len)); > > > > + if (!(file->f_mode & FMODE_READ) && read->rd_wd_stid) { > > + /* allow READ using write delegation stateid */ > > + wronly = true; > > + file->f_mode |= FMODE_READ; > > + } > > Is that really OK? Can we just upgrade the f_mode like that? > > Also, what happens with more exotic exported filesystems like NFS? > > For example, if I'm reexporting NFS, the backend NFS server may not > allow you to do a READ operation using a OPEN4_SHARE_ACCESS_WRITE only > stateid. Won't this break in that case? > Hmm...bad example since we don't allow delegations on reexported NFS these days. Reexporting Ceph or SMB might be a better example. They'll likely both have problems if you try to issue a read on the result from a O_WRONLY open. I think you will probably need to rework the way nfs4_file's track their struct files. IOW, when the client does a OPEN4_SHARE_ACCESS_WRITE-only open, you need to get a struct file that is FMODE_READ|FMODE_WRITE to hang off the delegation. But, you'll also need to fix up the accounting for the share/deny mode locking to ignore that you _actually_ have it open for read too in that case. Smoke and mirrors... > > if (file->f_op->splice_read && splice_ok) > > nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount); > > else > > nfserr = nfsd4_encode_readv(resp, read, file, maxcount); > > + if (wronly) > > + file->f_mode &= ~FMODE_READ; > > if (nfserr) { > > xdr_truncate_encode(xdr, eof_offset); > > return nfserr; > > diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h > > index c26ba86dbdfd..2f053beed899 100644 > > --- a/fs/nfsd/xdr4.h > > +++ b/fs/nfsd/xdr4.h > > @@ -426,6 +426,7 @@ struct nfsd4_read { > > struct svc_rqst *rd_rqstp; /* response */ > > struct svc_fh *rd_fhp; /* response */ > > u32 rd_eof; /* response */ > > + struct nfs4_stid *rd_wd_stid; /* internal */ > > }; > > > > struct nfsd4_readdir { >
On 2/13/25 6:29 PM, Jeff Layton wrote: > On Thu, 2025-02-13 at 16:07 -0500, Jeff Layton wrote: >> On Thu, 2025-02-13 at 11:37 -0800, Dai Ngo wrote: >>> Allow read using write delegation stateid granted on OPENs with >>> OPEN4_SHARE_ACCESS_WRITE only, to accommodate clients whose WRITE >>> implementation may unavoidably do (e.g., due to buffer cache >>> constraints). >>> >>> When this condition is detected in nfsd4_encode_read the access >>> mode FMODE_READ is temporarily added to the file's f_mode and is >>> removed when the read is done. >>> >>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com> >>> --- >>> fs/nfsd/nfs4proc.c | 15 ++++++++++++++- >>> fs/nfsd/nfs4xdr.c | 8 ++++++++ >>> fs/nfsd/xdr4.h | 1 + >>> 3 files changed, 23 insertions(+), 1 deletion(-) >>> >>> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c >>> index f6e06c779d09..be43627bbf78 100644 >>> --- a/fs/nfsd/nfs4proc.c >>> +++ b/fs/nfsd/nfs4proc.c >>> @@ -973,7 +973,18 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, >>> /* check stateid */ >>> status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, >>> &read->rd_stateid, RD_STATE, >>> - &read->rd_nf, NULL); >>> + &read->rd_nf, &read->rd_wd_stid); >>> + /* >>> + * rd_wd_stid is needed for nfsd4_encode_read to allow write >>> + * delegation stateid used for read. Its refcount is decremented >>> + * by nfsd4_read_release when read is done. >>> + */ >>> + if (!status && read->rd_wd_stid && >>> + (read->rd_wd_stid->sc_type != SC_TYPE_DELEG || >>> + delegstateid(read->rd_wd_stid)->dl_type != NFS4_OPEN_DELEGATE_WRITE)) { >>> + nfs4_put_stid(read->rd_wd_stid); >>> + read->rd_wd_stid = NULL; >>> + } >>> >>> read->rd_rqstp = rqstp; >>> read->rd_fhp = &cstate->current_fh; >>> @@ -984,6 +995,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, >>> static void >>> nfsd4_read_release(union nfsd4_op_u *u) >>> { >>> + if (u->read.rd_wd_stid) >>> + nfs4_put_stid(u->read.rd_wd_stid); >>> if (u->read.rd_nf) >>> nfsd_file_put(u->read.rd_nf); >>> trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp, >>> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c >>> index e67420729ecd..3996678bab3f 100644 >>> --- a/fs/nfsd/nfs4xdr.c >>> +++ b/fs/nfsd/nfs4xdr.c >>> @@ -4498,6 +4498,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, >>> unsigned long maxcount; >>> __be32 wire_data[2]; >>> struct file *file; >>> + bool wronly = false; >>> >>> if (nfserr) >>> return nfserr; >>> @@ -4515,10 +4516,17 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, >>> maxcount = min_t(unsigned long, read->rd_length, >>> (xdr->buf->buflen - xdr->buf->len)); >>> >>> + if (!(file->f_mode & FMODE_READ) && read->rd_wd_stid) { >>> + /* allow READ using write delegation stateid */ >>> + wronly = true; >>> + file->f_mode |= FMODE_READ; >>> + } >> >> Is that really OK? Can we just upgrade the f_mode like that? >> >> Also, what happens with more exotic exported filesystems like NFS? >> >> For example, if I'm reexporting NFS, the backend NFS server may not >> allow you to do a READ operation using a OPEN4_SHARE_ACCESS_WRITE only >> stateid. Won't this break in that case? >> > > Hmm...bad example since we don't allow delegations on reexported NFS > these days. Reexporting Ceph or SMB might be a better example. They'll > likely both have problems if you try to issue a read on the result from > a O_WRONLY open. I think you will probably need to rework the way > nfs4_file's track their struct files. > > IOW, when the client does a OPEN4_SHARE_ACCESS_WRITE-only open, you > need to get a struct file that is FMODE_READ|FMODE_WRITE to hang off > the delegation. But, you'll also need to fix up the accounting for the > share/deny mode locking to ignore that you _actually_ have it open for > read too in that case. For the record, I agree with Jeff's suggested approach. > Smoke and mirrors... > >>> if (file->f_op->splice_read && splice_ok) >>> nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount); >>> else >>> nfserr = nfsd4_encode_readv(resp, read, file, maxcount); >>> + if (wronly) >>> + file->f_mode &= ~FMODE_READ; >>> if (nfserr) { >>> xdr_truncate_encode(xdr, eof_offset); >>> return nfserr; >>> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h >>> index c26ba86dbdfd..2f053beed899 100644 >>> --- a/fs/nfsd/xdr4.h >>> +++ b/fs/nfsd/xdr4.h >>> @@ -426,6 +426,7 @@ struct nfsd4_read { >>> struct svc_rqst *rd_rqstp; /* response */ >>> struct svc_fh *rd_fhp; /* response */ >>> u32 rd_eof; /* response */ >>> + struct nfs4_stid *rd_wd_stid; /* internal */ >>> }; >>> >>> struct nfsd4_readdir { >> >
On 2/14/25 6:26 AM, Chuck Lever wrote: > On 2/13/25 6:29 PM, Jeff Layton wrote: >> On Thu, 2025-02-13 at 16:07 -0500, Jeff Layton wrote: >>> On Thu, 2025-02-13 at 11:37 -0800, Dai Ngo wrote: >>>> Allow read using write delegation stateid granted on OPENs with >>>> OPEN4_SHARE_ACCESS_WRITE only, to accommodate clients whose WRITE >>>> implementation may unavoidably do (e.g., due to buffer cache >>>> constraints). >>>> >>>> When this condition is detected in nfsd4_encode_read the access >>>> mode FMODE_READ is temporarily added to the file's f_mode and is >>>> removed when the read is done. >>>> >>>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com> >>>> --- >>>> fs/nfsd/nfs4proc.c | 15 ++++++++++++++- >>>> fs/nfsd/nfs4xdr.c | 8 ++++++++ >>>> fs/nfsd/xdr4.h | 1 + >>>> 3 files changed, 23 insertions(+), 1 deletion(-) >>>> >>>> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c >>>> index f6e06c779d09..be43627bbf78 100644 >>>> --- a/fs/nfsd/nfs4proc.c >>>> +++ b/fs/nfsd/nfs4proc.c >>>> @@ -973,7 +973,18 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, >>>> /* check stateid */ >>>> status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, >>>> &read->rd_stateid, RD_STATE, >>>> - &read->rd_nf, NULL); >>>> + &read->rd_nf, &read->rd_wd_stid); >>>> + /* >>>> + * rd_wd_stid is needed for nfsd4_encode_read to allow write >>>> + * delegation stateid used for read. Its refcount is decremented >>>> + * by nfsd4_read_release when read is done. >>>> + */ >>>> + if (!status && read->rd_wd_stid && >>>> + (read->rd_wd_stid->sc_type != SC_TYPE_DELEG || >>>> + delegstateid(read->rd_wd_stid)->dl_type != NFS4_OPEN_DELEGATE_WRITE)) { >>>> + nfs4_put_stid(read->rd_wd_stid); >>>> + read->rd_wd_stid = NULL; >>>> + } >>>> >>>> read->rd_rqstp = rqstp; >>>> read->rd_fhp = &cstate->current_fh; >>>> @@ -984,6 +995,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, >>>> static void >>>> nfsd4_read_release(union nfsd4_op_u *u) >>>> { >>>> + if (u->read.rd_wd_stid) >>>> + nfs4_put_stid(u->read.rd_wd_stid); >>>> if (u->read.rd_nf) >>>> nfsd_file_put(u->read.rd_nf); >>>> trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp, >>>> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c >>>> index e67420729ecd..3996678bab3f 100644 >>>> --- a/fs/nfsd/nfs4xdr.c >>>> +++ b/fs/nfsd/nfs4xdr.c >>>> @@ -4498,6 +4498,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, >>>> unsigned long maxcount; >>>> __be32 wire_data[2]; >>>> struct file *file; >>>> + bool wronly = false; >>>> >>>> if (nfserr) >>>> return nfserr; >>>> @@ -4515,10 +4516,17 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, >>>> maxcount = min_t(unsigned long, read->rd_length, >>>> (xdr->buf->buflen - xdr->buf->len)); >>>> >>>> + if (!(file->f_mode & FMODE_READ) && read->rd_wd_stid) { >>>> + /* allow READ using write delegation stateid */ >>>> + wronly = true; >>>> + file->f_mode |= FMODE_READ; >>>> + } >>> Is that really OK? Can we just upgrade the f_mode like that? It seems too simple but it works. I tested with pynfs, nfstest and git test, also with reexported NFS share. >>> >>> Also, what happens with more exotic exported filesystems like NFS? >>> >>> For example, if I'm reexporting NFS, the backend NFS server may not >>> allow you to do a READ operation using a OPEN4_SHARE_ACCESS_WRITE only >>> stateid. Won't this break in that case? >>> >> Hmm...bad example since we don't allow delegations on reexported NFS >> these days. As of 6.14-rc1 the NFSD grants delegations on reexported NFS shares as long as the server where the shares reside grants delegations. And this seems to work properly; delegations are recalled when expected. >> Reexporting Ceph or SMB might be a better example. They'll >> likely both have problems if you try to issue a read on the result from >> a O_WRONLY open. I think you will probably need to rework the way >> nfs4_file's track their struct files. >> >> IOW, when the client does a OPEN4_SHARE_ACCESS_WRITE-only open, you >> need to get a struct file that is FMODE_READ|FMODE_WRITE to hang off >> the delegation. There won't be any existing struct file with FMODE_READ|FMODE_WRITE when nfs4_set_delegation is called if the client opens the file with access mode OPEN4_SHARE_ACCESS_WRITE. Unless we create a new one which means now we have 2 struct file's for the same nfs4_file, it seems like problematic. >> But, you'll also need to fix up the accounting for the >> share/deny mode locking to ignore that you _actually_ have it open for >> read too in that case. If I understand you correctly, you suggest that we upgrade the file access mode to FMODE_READ|FMODE_WRITE permanently if the client opens the file with OPEN4_SHARE_ACCESS_WRITE only. That works too but we have to remove the FMODE_READ from the struct file if the delegation is recalled. -Dai > For the record, I agree with Jeff's suggested approach. > > >> Smoke and mirrors... >> >>>> if (file->f_op->splice_read && splice_ok) >>>> nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount); >>>> else >>>> nfserr = nfsd4_encode_readv(resp, read, file, maxcount); >>>> + if (wronly) >>>> + file->f_mode &= ~FMODE_READ; >>>> if (nfserr) { >>>> xdr_truncate_encode(xdr, eof_offset); >>>> return nfserr; >>>> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h >>>> index c26ba86dbdfd..2f053beed899 100644 >>>> --- a/fs/nfsd/xdr4.h >>>> +++ b/fs/nfsd/xdr4.h >>>> @@ -426,6 +426,7 @@ struct nfsd4_read { >>>> struct svc_rqst *rd_rqstp; /* response */ >>>> struct svc_fh *rd_fhp; /* response */ >>>> u32 rd_eof; /* response */ >>>> + struct nfs4_stid *rd_wd_stid; /* internal */ >>>> }; >>>> >>>> struct nfsd4_readdir { >
On Fri, 2025-02-14 at 10:24 -0800, Dai Ngo wrote: > On 2/14/25 6:26 AM, Chuck Lever wrote: > > On 2/13/25 6:29 PM, Jeff Layton wrote: > > > On Thu, 2025-02-13 at 16:07 -0500, Jeff Layton wrote: > > > > On Thu, 2025-02-13 at 11:37 -0800, Dai Ngo wrote: > > > > > Allow read using write delegation stateid granted on OPENs with > > > > > OPEN4_SHARE_ACCESS_WRITE only, to accommodate clients whose WRITE > > > > > implementation may unavoidably do (e.g., due to buffer cache > > > > > constraints). > > > > > > > > > > When this condition is detected in nfsd4_encode_read the access > > > > > mode FMODE_READ is temporarily added to the file's f_mode and is > > > > > removed when the read is done. > > > > > > > > > > Signed-off-by: Dai Ngo <dai.ngo@oracle.com> > > > > > --- > > > > > fs/nfsd/nfs4proc.c | 15 ++++++++++++++- > > > > > fs/nfsd/nfs4xdr.c | 8 ++++++++ > > > > > fs/nfsd/xdr4.h | 1 + > > > > > 3 files changed, 23 insertions(+), 1 deletion(-) > > > > > > > > > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c > > > > > index f6e06c779d09..be43627bbf78 100644 > > > > > --- a/fs/nfsd/nfs4proc.c > > > > > +++ b/fs/nfsd/nfs4proc.c > > > > > @@ -973,7 +973,18 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > > > > > /* check stateid */ > > > > > status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, > > > > > &read->rd_stateid, RD_STATE, > > > > > - &read->rd_nf, NULL); > > > > > + &read->rd_nf, &read->rd_wd_stid); > > > > > + /* > > > > > + * rd_wd_stid is needed for nfsd4_encode_read to allow write > > > > > + * delegation stateid used for read. Its refcount is decremented > > > > > + * by nfsd4_read_release when read is done. > > > > > + */ > > > > > + if (!status && read->rd_wd_stid && > > > > > + (read->rd_wd_stid->sc_type != SC_TYPE_DELEG || > > > > > + delegstateid(read->rd_wd_stid)->dl_type != NFS4_OPEN_DELEGATE_WRITE)) { > > > > > + nfs4_put_stid(read->rd_wd_stid); > > > > > + read->rd_wd_stid = NULL; > > > > > + } > > > > > > > > > > read->rd_rqstp = rqstp; > > > > > read->rd_fhp = &cstate->current_fh; > > > > > @@ -984,6 +995,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > > > > > static void > > > > > nfsd4_read_release(union nfsd4_op_u *u) > > > > > { > > > > > + if (u->read.rd_wd_stid) > > > > > + nfs4_put_stid(u->read.rd_wd_stid); > > > > > if (u->read.rd_nf) > > > > > nfsd_file_put(u->read.rd_nf); > > > > > trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp, > > > > > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c > > > > > index e67420729ecd..3996678bab3f 100644 > > > > > --- a/fs/nfsd/nfs4xdr.c > > > > > +++ b/fs/nfsd/nfs4xdr.c > > > > > @@ -4498,6 +4498,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, > > > > > unsigned long maxcount; > > > > > __be32 wire_data[2]; > > > > > struct file *file; > > > > > + bool wronly = false; > > > > > > > > > > if (nfserr) > > > > > return nfserr; > > > > > @@ -4515,10 +4516,17 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, > > > > > maxcount = min_t(unsigned long, read->rd_length, > > > > > (xdr->buf->buflen - xdr->buf->len)); > > > > > > > > > > + if (!(file->f_mode & FMODE_READ) && read->rd_wd_stid) { > > > > > + /* allow READ using write delegation stateid */ > > > > > + wronly = true; > > > > > + file->f_mode |= FMODE_READ; > > > > > + } > > > > Is that really OK? Can we just upgrade the f_mode like that? > > It seems too simple but it works. I tested with pynfs, nfstest and > git test, also with reexported NFS share. > I don't think it's that simple. Some filesystems will have problems here. There has been talk for years about allowing fcntl(F_SETFL, ...) to change the file access mode, but that still has never materialized. > > > > > > > > Also, what happens with more exotic exported filesystems like NFS? > > > > > > > > For example, if I'm reexporting NFS, the backend NFS server may not > > > > allow you to do a READ operation using a OPEN4_SHARE_ACCESS_WRITE only > > > > stateid. Won't this break in that case? > > > > > > > Hmm...bad example since we don't allow delegations on reexported NFS > > > these days. > > As of 6.14-rc1 the NFSD grants delegations on reexported NFS shares as > long as the server where the shares reside grants delegations. And this > seems to work properly; delegations are recalled when expected. > Ahh, I was thinking of this patch in Chuck's nfsd-testing branch: commit 2d7501a673a5d855a941409e6003a0b2afbbe149 Author: Mike Snitzer <snitzer@kernel.org> Date: Mon Feb 10 11:25:53 2025 -0500 nfsd: disallow file locking and delegations for NFSv4 reexport We do not and cannot support file locking with NFS reexport over NFSv4.x for the same reason we don't do it for NFSv3: NFS reexport server reboot cannot allow clients to recover locks because the source NFS server has not rebooted, and so it is not in grace. Since the source NFS server is not in grace, it cannot offer any guarantees that the file won't have been changed between the locks getting lost and any attempt to recover/reclaim them. The same applies to delegations and any associated locks, so disallow them too. Clients are no longer allowed to get file locks or delegations from a reexport server, any attempts will fail with operation not supported. Update the "Reboot recovery" section accordingly in Documentation/filesystems/nfs/reexport.rst Signed-off-by: Mike Snitzer <snitzer@kernel.org> Reviewed-by: Jeff Layton <jlayton@kernel.org> Signed-off-by: Chuck Lever <chuck.lever@oracle.com> > > > Reexporting Ceph or SMB might be a better example. They'll > > > likely both have problems if you try to issue a read on the result from > > > a O_WRONLY open. I think you will probably need to rework the way > > > nfs4_file's track their struct files. > > > > > > IOW, when the client does a OPEN4_SHARE_ACCESS_WRITE-only open, you > > > need to get a struct file that is FMODE_READ|FMODE_WRITE to hang off > > > the delegation. > > There won't be any existing struct file with FMODE_READ|FMODE_WRITE when > nfs4_set_delegation is called if the client opens the file with access > mode OPEN4_SHARE_ACCESS_WRITE. Unless we create a new one which means now > we have 2 struct file's for the same nfs4_file, it seems like problematic. > > > > But, you'll also need to fix up the accounting for the > > > share/deny mode locking to ignore that you _actually_ have it open for > > > read too in that case. > > If I understand you correctly, you suggest that we upgrade the file access > mode to FMODE_READ|FMODE_WRITE permanently if the client opens the file with > OPEN4_SHARE_ACCESS_WRITE only. That works too but we have to remove the > FMODE_READ from the struct file if the delegation is recalled. > > I don't see a problem with leaving the backend file open FMODE_READ|FMODE_WRITE in that case. You can just stop allowing reads on it at the nfsd layer. > > > For the record, I agree with Jeff's suggested approach. > > > > > > > Smoke and mirrors... > > > > > > > > if (file->f_op->splice_read && splice_ok) > > > > > nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount); > > > > > else > > > > > nfserr = nfsd4_encode_readv(resp, read, file, maxcount); > > > > > + if (wronly) > > > > > + file->f_mode &= ~FMODE_READ; > > > > > if (nfserr) { > > > > > xdr_truncate_encode(xdr, eof_offset); > > > > > return nfserr; > > > > > diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h > > > > > index c26ba86dbdfd..2f053beed899 100644 > > > > > --- a/fs/nfsd/xdr4.h > > > > > +++ b/fs/nfsd/xdr4.h > > > > > @@ -426,6 +426,7 @@ struct nfsd4_read { > > > > > struct svc_rqst *rd_rqstp; /* response */ > > > > > struct svc_fh *rd_fhp; /* response */ > > > > > u32 rd_eof; /* response */ > > > > > + struct nfs4_stid *rd_wd_stid; /* internal */ > > > > > }; > > > > > > > > > > struct nfsd4_readdir { > >
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index f6e06c779d09..be43627bbf78 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -973,7 +973,18 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, /* check stateid */ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &read->rd_stateid, RD_STATE, - &read->rd_nf, NULL); + &read->rd_nf, &read->rd_wd_stid); + /* + * rd_wd_stid is needed for nfsd4_encode_read to allow write + * delegation stateid used for read. Its refcount is decremented + * by nfsd4_read_release when read is done. + */ + if (!status && read->rd_wd_stid && + (read->rd_wd_stid->sc_type != SC_TYPE_DELEG || + delegstateid(read->rd_wd_stid)->dl_type != NFS4_OPEN_DELEGATE_WRITE)) { + nfs4_put_stid(read->rd_wd_stid); + read->rd_wd_stid = NULL; + } read->rd_rqstp = rqstp; read->rd_fhp = &cstate->current_fh; @@ -984,6 +995,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, static void nfsd4_read_release(union nfsd4_op_u *u) { + if (u->read.rd_wd_stid) + nfs4_put_stid(u->read.rd_wd_stid); if (u->read.rd_nf) nfsd_file_put(u->read.rd_nf); trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp, diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index e67420729ecd..3996678bab3f 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -4498,6 +4498,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, unsigned long maxcount; __be32 wire_data[2]; struct file *file; + bool wronly = false; if (nfserr) return nfserr; @@ -4515,10 +4516,17 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, maxcount = min_t(unsigned long, read->rd_length, (xdr->buf->buflen - xdr->buf->len)); + if (!(file->f_mode & FMODE_READ) && read->rd_wd_stid) { + /* allow READ using write delegation stateid */ + wronly = true; + file->f_mode |= FMODE_READ; + } if (file->f_op->splice_read && splice_ok) nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount); else nfserr = nfsd4_encode_readv(resp, read, file, maxcount); + if (wronly) + file->f_mode &= ~FMODE_READ; if (nfserr) { xdr_truncate_encode(xdr, eof_offset); return nfserr; diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index c26ba86dbdfd..2f053beed899 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -426,6 +426,7 @@ struct nfsd4_read { struct svc_rqst *rd_rqstp; /* response */ struct svc_fh *rd_fhp; /* response */ u32 rd_eof; /* response */ + struct nfs4_stid *rd_wd_stid; /* internal */ }; struct nfsd4_readdir {
Allow read using write delegation stateid granted on OPENs with OPEN4_SHARE_ACCESS_WRITE only, to accommodate clients whose WRITE implementation may unavoidably do (e.g., due to buffer cache constraints). When this condition is detected in nfsd4_encode_read the access mode FMODE_READ is temporarily added to the file's f_mode and is removed when the read is done. Signed-off-by: Dai Ngo <dai.ngo@oracle.com> --- fs/nfsd/nfs4proc.c | 15 ++++++++++++++- fs/nfsd/nfs4xdr.c | 8 ++++++++ fs/nfsd/xdr4.h | 1 + 3 files changed, 23 insertions(+), 1 deletion(-)