diff mbox series

[2/2] NFSD: allow client to use write delegation stateid for READ

Message ID 1739475438-5640-3-git-send-email-dai.ngo@oracle.com (mailing list archive)
State Changes Requested
Delegated to: Chuck Lever
Headers show
Series NFSD: offer write delegation for OPEN with OPEN4_SHARE_ACCESS only | expand

Commit Message

Dai Ngo Feb. 13, 2025, 7:37 p.m. UTC
Allow read using write delegation stateid granted on OPENs with
OPEN4_SHARE_ACCESS_WRITE only, to accommodate clients whose WRITE
implementation may unavoidably do (e.g., due to buffer cache
constraints).

When this condition is detected in nfsd4_encode_read the access
mode FMODE_READ is temporarily added to the file's f_mode and is
removed when the read is done.

Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
---
 fs/nfsd/nfs4proc.c | 15 ++++++++++++++-
 fs/nfsd/nfs4xdr.c  |  8 ++++++++
 fs/nfsd/xdr4.h     |  1 +
 3 files changed, 23 insertions(+), 1 deletion(-)

Comments

Jeff Layton Feb. 13, 2025, 9:07 p.m. UTC | #1
On Thu, 2025-02-13 at 11:37 -0800, Dai Ngo wrote:
> Allow read using write delegation stateid granted on OPENs with
> OPEN4_SHARE_ACCESS_WRITE only, to accommodate clients whose WRITE
> implementation may unavoidably do (e.g., due to buffer cache
> constraints).
> 
> When this condition is detected in nfsd4_encode_read the access
> mode FMODE_READ is temporarily added to the file's f_mode and is
> removed when the read is done.
> 
> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> ---
>  fs/nfsd/nfs4proc.c | 15 ++++++++++++++-
>  fs/nfsd/nfs4xdr.c  |  8 ++++++++
>  fs/nfsd/xdr4.h     |  1 +
>  3 files changed, 23 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index f6e06c779d09..be43627bbf78 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -973,7 +973,18 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	/* check stateid */
>  	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
>  					&read->rd_stateid, RD_STATE,
> -					&read->rd_nf, NULL);
> +					&read->rd_nf, &read->rd_wd_stid);
> +	/*
> +	 * rd_wd_stid is needed for nfsd4_encode_read to allow write
> +	 * delegation stateid used for read. Its refcount is decremented
> +	 * by nfsd4_read_release when read is done.
> +	 */
> +	if (!status && read->rd_wd_stid &&
> +		(read->rd_wd_stid->sc_type != SC_TYPE_DELEG ||
> +		delegstateid(read->rd_wd_stid)->dl_type != NFS4_OPEN_DELEGATE_WRITE)) {
> +		nfs4_put_stid(read->rd_wd_stid);
> +		read->rd_wd_stid = NULL;
> +	}
>  
>  	read->rd_rqstp = rqstp;
>  	read->rd_fhp = &cstate->current_fh;
> @@ -984,6 +995,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  static void
>  nfsd4_read_release(union nfsd4_op_u *u)
>  {
> +	if (u->read.rd_wd_stid)
> +		nfs4_put_stid(u->read.rd_wd_stid);
>  	if (u->read.rd_nf)
>  		nfsd_file_put(u->read.rd_nf);
>  	trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp,
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index e67420729ecd..3996678bab3f 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -4498,6 +4498,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
>  	unsigned long maxcount;
>  	__be32 wire_data[2];
>  	struct file *file;
> +	bool wronly = false;
>  
>  	if (nfserr)
>  		return nfserr;
> @@ -4515,10 +4516,17 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
>  	maxcount = min_t(unsigned long, read->rd_length,
>  			 (xdr->buf->buflen - xdr->buf->len));
>  
> +	if (!(file->f_mode & FMODE_READ) && read->rd_wd_stid) {
> +		/* allow READ using write delegation stateid */
> +		wronly = true;
> +		file->f_mode |= FMODE_READ;
> +	}

Is that really OK? Can we just upgrade the f_mode like that?

Also, what happens with more exotic exported filesystems like NFS? 

For example, if I'm reexporting NFS, the backend NFS server may not
allow you to do a READ operation using a OPEN4_SHARE_ACCESS_WRITE only
stateid. Won't this break in that case?

>  	if (file->f_op->splice_read && splice_ok)
>  		nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
>  	else
>  		nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
> +	if (wronly)
> +		file->f_mode &= ~FMODE_READ;
>  	if (nfserr) {
>  		xdr_truncate_encode(xdr, eof_offset);
>  		return nfserr;
> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> index c26ba86dbdfd..2f053beed899 100644
> --- a/fs/nfsd/xdr4.h
> +++ b/fs/nfsd/xdr4.h
> @@ -426,6 +426,7 @@ struct nfsd4_read {
>  	struct svc_rqst		*rd_rqstp;          /* response */
>  	struct svc_fh		*rd_fhp;            /* response */
>  	u32			rd_eof;             /* response */
> +	struct nfs4_stid	*rd_wd_stid;        /* internal */
>  };
>  
>  struct nfsd4_readdir {
Jeff Layton Feb. 13, 2025, 11:29 p.m. UTC | #2
On Thu, 2025-02-13 at 16:07 -0500, Jeff Layton wrote:
> On Thu, 2025-02-13 at 11:37 -0800, Dai Ngo wrote:
> > Allow read using write delegation stateid granted on OPENs with
> > OPEN4_SHARE_ACCESS_WRITE only, to accommodate clients whose WRITE
> > implementation may unavoidably do (e.g., due to buffer cache
> > constraints).
> > 
> > When this condition is detected in nfsd4_encode_read the access
> > mode FMODE_READ is temporarily added to the file's f_mode and is
> > removed when the read is done.
> > 
> > Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> > ---
> >  fs/nfsd/nfs4proc.c | 15 ++++++++++++++-
> >  fs/nfsd/nfs4xdr.c  |  8 ++++++++
> >  fs/nfsd/xdr4.h     |  1 +
> >  3 files changed, 23 insertions(+), 1 deletion(-)
> > 
> > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> > index f6e06c779d09..be43627bbf78 100644
> > --- a/fs/nfsd/nfs4proc.c
> > +++ b/fs/nfsd/nfs4proc.c
> > @@ -973,7 +973,18 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >  	/* check stateid */
> >  	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
> >  					&read->rd_stateid, RD_STATE,
> > -					&read->rd_nf, NULL);
> > +					&read->rd_nf, &read->rd_wd_stid);
> > +	/*
> > +	 * rd_wd_stid is needed for nfsd4_encode_read to allow write
> > +	 * delegation stateid used for read. Its refcount is decremented
> > +	 * by nfsd4_read_release when read is done.
> > +	 */
> > +	if (!status && read->rd_wd_stid &&
> > +		(read->rd_wd_stid->sc_type != SC_TYPE_DELEG ||
> > +		delegstateid(read->rd_wd_stid)->dl_type != NFS4_OPEN_DELEGATE_WRITE)) {
> > +		nfs4_put_stid(read->rd_wd_stid);
> > +		read->rd_wd_stid = NULL;
> > +	}
> >  
> >  	read->rd_rqstp = rqstp;
> >  	read->rd_fhp = &cstate->current_fh;
> > @@ -984,6 +995,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >  static void
> >  nfsd4_read_release(union nfsd4_op_u *u)
> >  {
> > +	if (u->read.rd_wd_stid)
> > +		nfs4_put_stid(u->read.rd_wd_stid);
> >  	if (u->read.rd_nf)
> >  		nfsd_file_put(u->read.rd_nf);
> >  	trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp,
> > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> > index e67420729ecd..3996678bab3f 100644
> > --- a/fs/nfsd/nfs4xdr.c
> > +++ b/fs/nfsd/nfs4xdr.c
> > @@ -4498,6 +4498,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
> >  	unsigned long maxcount;
> >  	__be32 wire_data[2];
> >  	struct file *file;
> > +	bool wronly = false;
> >  
> >  	if (nfserr)
> >  		return nfserr;
> > @@ -4515,10 +4516,17 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
> >  	maxcount = min_t(unsigned long, read->rd_length,
> >  			 (xdr->buf->buflen - xdr->buf->len));
> >  
> > +	if (!(file->f_mode & FMODE_READ) && read->rd_wd_stid) {
> > +		/* allow READ using write delegation stateid */
> > +		wronly = true;
> > +		file->f_mode |= FMODE_READ;
> > +	}
> 
> Is that really OK? Can we just upgrade the f_mode like that?
> 
> Also, what happens with more exotic exported filesystems like NFS? 
> 
> For example, if I'm reexporting NFS, the backend NFS server may not
> allow you to do a READ operation using a OPEN4_SHARE_ACCESS_WRITE only
> stateid. Won't this break in that case?
> 

Hmm...bad example since we don't allow delegations on reexported NFS
these days. Reexporting Ceph or SMB might be a better example. They'll
likely both have problems if you try to issue a read on the result from
a O_WRONLY open. I think you will probably need to rework the way
nfs4_file's track their struct files.

IOW, when the client does a OPEN4_SHARE_ACCESS_WRITE-only open, you
need to get a struct file that is FMODE_READ|FMODE_WRITE to hang off
the delegation. But, you'll also need to fix up the accounting for the
share/deny mode locking to ignore that you _actually_ have it open for
read too in that case.

Smoke and mirrors...

> >  	if (file->f_op->splice_read && splice_ok)
> >  		nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
> >  	else
> >  		nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
> > +	if (wronly)
> > +		file->f_mode &= ~FMODE_READ;
> >  	if (nfserr) {
> >  		xdr_truncate_encode(xdr, eof_offset);
> >  		return nfserr;
> > diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> > index c26ba86dbdfd..2f053beed899 100644
> > --- a/fs/nfsd/xdr4.h
> > +++ b/fs/nfsd/xdr4.h
> > @@ -426,6 +426,7 @@ struct nfsd4_read {
> >  	struct svc_rqst		*rd_rqstp;          /* response */
> >  	struct svc_fh		*rd_fhp;            /* response */
> >  	u32			rd_eof;             /* response */
> > +	struct nfs4_stid	*rd_wd_stid;        /* internal */
> >  };
> >  
> >  struct nfsd4_readdir {
>
Chuck Lever Feb. 14, 2025, 2:26 p.m. UTC | #3
On 2/13/25 6:29 PM, Jeff Layton wrote:
> On Thu, 2025-02-13 at 16:07 -0500, Jeff Layton wrote:
>> On Thu, 2025-02-13 at 11:37 -0800, Dai Ngo wrote:
>>> Allow read using write delegation stateid granted on OPENs with
>>> OPEN4_SHARE_ACCESS_WRITE only, to accommodate clients whose WRITE
>>> implementation may unavoidably do (e.g., due to buffer cache
>>> constraints).
>>>
>>> When this condition is detected in nfsd4_encode_read the access
>>> mode FMODE_READ is temporarily added to the file's f_mode and is
>>> removed when the read is done.
>>>
>>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>>> ---
>>>  fs/nfsd/nfs4proc.c | 15 ++++++++++++++-
>>>  fs/nfsd/nfs4xdr.c  |  8 ++++++++
>>>  fs/nfsd/xdr4.h     |  1 +
>>>  3 files changed, 23 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
>>> index f6e06c779d09..be43627bbf78 100644
>>> --- a/fs/nfsd/nfs4proc.c
>>> +++ b/fs/nfsd/nfs4proc.c
>>> @@ -973,7 +973,18 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>>>  	/* check stateid */
>>>  	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
>>>  					&read->rd_stateid, RD_STATE,
>>> -					&read->rd_nf, NULL);
>>> +					&read->rd_nf, &read->rd_wd_stid);
>>> +	/*
>>> +	 * rd_wd_stid is needed for nfsd4_encode_read to allow write
>>> +	 * delegation stateid used for read. Its refcount is decremented
>>> +	 * by nfsd4_read_release when read is done.
>>> +	 */
>>> +	if (!status && read->rd_wd_stid &&
>>> +		(read->rd_wd_stid->sc_type != SC_TYPE_DELEG ||
>>> +		delegstateid(read->rd_wd_stid)->dl_type != NFS4_OPEN_DELEGATE_WRITE)) {
>>> +		nfs4_put_stid(read->rd_wd_stid);
>>> +		read->rd_wd_stid = NULL;
>>> +	}
>>>  
>>>  	read->rd_rqstp = rqstp;
>>>  	read->rd_fhp = &cstate->current_fh;
>>> @@ -984,6 +995,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>>>  static void
>>>  nfsd4_read_release(union nfsd4_op_u *u)
>>>  {
>>> +	if (u->read.rd_wd_stid)
>>> +		nfs4_put_stid(u->read.rd_wd_stid);
>>>  	if (u->read.rd_nf)
>>>  		nfsd_file_put(u->read.rd_nf);
>>>  	trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp,
>>> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
>>> index e67420729ecd..3996678bab3f 100644
>>> --- a/fs/nfsd/nfs4xdr.c
>>> +++ b/fs/nfsd/nfs4xdr.c
>>> @@ -4498,6 +4498,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
>>>  	unsigned long maxcount;
>>>  	__be32 wire_data[2];
>>>  	struct file *file;
>>> +	bool wronly = false;
>>>  
>>>  	if (nfserr)
>>>  		return nfserr;
>>> @@ -4515,10 +4516,17 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
>>>  	maxcount = min_t(unsigned long, read->rd_length,
>>>  			 (xdr->buf->buflen - xdr->buf->len));
>>>  
>>> +	if (!(file->f_mode & FMODE_READ) && read->rd_wd_stid) {
>>> +		/* allow READ using write delegation stateid */
>>> +		wronly = true;
>>> +		file->f_mode |= FMODE_READ;
>>> +	}
>>
>> Is that really OK? Can we just upgrade the f_mode like that?
>>
>> Also, what happens with more exotic exported filesystems like NFS? 
>>
>> For example, if I'm reexporting NFS, the backend NFS server may not
>> allow you to do a READ operation using a OPEN4_SHARE_ACCESS_WRITE only
>> stateid. Won't this break in that case?
>>
> 
> Hmm...bad example since we don't allow delegations on reexported NFS
> these days. Reexporting Ceph or SMB might be a better example. They'll
> likely both have problems if you try to issue a read on the result from
> a O_WRONLY open. I think you will probably need to rework the way
> nfs4_file's track their struct files.
> 
> IOW, when the client does a OPEN4_SHARE_ACCESS_WRITE-only open, you
> need to get a struct file that is FMODE_READ|FMODE_WRITE to hang off
> the delegation. But, you'll also need to fix up the accounting for the
> share/deny mode locking to ignore that you _actually_ have it open for
> read too in that case.

For the record, I agree with Jeff's suggested approach.


> Smoke and mirrors...
> 
>>>  	if (file->f_op->splice_read && splice_ok)
>>>  		nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
>>>  	else
>>>  		nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
>>> +	if (wronly)
>>> +		file->f_mode &= ~FMODE_READ;
>>>  	if (nfserr) {
>>>  		xdr_truncate_encode(xdr, eof_offset);
>>>  		return nfserr;
>>> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
>>> index c26ba86dbdfd..2f053beed899 100644
>>> --- a/fs/nfsd/xdr4.h
>>> +++ b/fs/nfsd/xdr4.h
>>> @@ -426,6 +426,7 @@ struct nfsd4_read {
>>>  	struct svc_rqst		*rd_rqstp;          /* response */
>>>  	struct svc_fh		*rd_fhp;            /* response */
>>>  	u32			rd_eof;             /* response */
>>> +	struct nfs4_stid	*rd_wd_stid;        /* internal */
>>>  };
>>>  
>>>  struct nfsd4_readdir {
>>
>
Dai Ngo Feb. 14, 2025, 6:24 p.m. UTC | #4
On 2/14/25 6:26 AM, Chuck Lever wrote:
> On 2/13/25 6:29 PM, Jeff Layton wrote:
>> On Thu, 2025-02-13 at 16:07 -0500, Jeff Layton wrote:
>>> On Thu, 2025-02-13 at 11:37 -0800, Dai Ngo wrote:
>>>> Allow read using write delegation stateid granted on OPENs with
>>>> OPEN4_SHARE_ACCESS_WRITE only, to accommodate clients whose WRITE
>>>> implementation may unavoidably do (e.g., due to buffer cache
>>>> constraints).
>>>>
>>>> When this condition is detected in nfsd4_encode_read the access
>>>> mode FMODE_READ is temporarily added to the file's f_mode and is
>>>> removed when the read is done.
>>>>
>>>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>>>> ---
>>>>   fs/nfsd/nfs4proc.c | 15 ++++++++++++++-
>>>>   fs/nfsd/nfs4xdr.c  |  8 ++++++++
>>>>   fs/nfsd/xdr4.h     |  1 +
>>>>   3 files changed, 23 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
>>>> index f6e06c779d09..be43627bbf78 100644
>>>> --- a/fs/nfsd/nfs4proc.c
>>>> +++ b/fs/nfsd/nfs4proc.c
>>>> @@ -973,7 +973,18 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>>>>   	/* check stateid */
>>>>   	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
>>>>   					&read->rd_stateid, RD_STATE,
>>>> -					&read->rd_nf, NULL);
>>>> +					&read->rd_nf, &read->rd_wd_stid);
>>>> +	/*
>>>> +	 * rd_wd_stid is needed for nfsd4_encode_read to allow write
>>>> +	 * delegation stateid used for read. Its refcount is decremented
>>>> +	 * by nfsd4_read_release when read is done.
>>>> +	 */
>>>> +	if (!status && read->rd_wd_stid &&
>>>> +		(read->rd_wd_stid->sc_type != SC_TYPE_DELEG ||
>>>> +		delegstateid(read->rd_wd_stid)->dl_type != NFS4_OPEN_DELEGATE_WRITE)) {
>>>> +		nfs4_put_stid(read->rd_wd_stid);
>>>> +		read->rd_wd_stid = NULL;
>>>> +	}
>>>>   
>>>>   	read->rd_rqstp = rqstp;
>>>>   	read->rd_fhp = &cstate->current_fh;
>>>> @@ -984,6 +995,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>>>>   static void
>>>>   nfsd4_read_release(union nfsd4_op_u *u)
>>>>   {
>>>> +	if (u->read.rd_wd_stid)
>>>> +		nfs4_put_stid(u->read.rd_wd_stid);
>>>>   	if (u->read.rd_nf)
>>>>   		nfsd_file_put(u->read.rd_nf);
>>>>   	trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp,
>>>> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
>>>> index e67420729ecd..3996678bab3f 100644
>>>> --- a/fs/nfsd/nfs4xdr.c
>>>> +++ b/fs/nfsd/nfs4xdr.c
>>>> @@ -4498,6 +4498,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
>>>>   	unsigned long maxcount;
>>>>   	__be32 wire_data[2];
>>>>   	struct file *file;
>>>> +	bool wronly = false;
>>>>   
>>>>   	if (nfserr)
>>>>   		return nfserr;
>>>> @@ -4515,10 +4516,17 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
>>>>   	maxcount = min_t(unsigned long, read->rd_length,
>>>>   			 (xdr->buf->buflen - xdr->buf->len));
>>>>   
>>>> +	if (!(file->f_mode & FMODE_READ) && read->rd_wd_stid) {
>>>> +		/* allow READ using write delegation stateid */
>>>> +		wronly = true;
>>>> +		file->f_mode |= FMODE_READ;
>>>> +	}
>>> Is that really OK? Can we just upgrade the f_mode like that?

It seems too simple but it works. I tested with pynfs, nfstest and
git test, also with reexported NFS share.

>>>
>>> Also, what happens with more exotic exported filesystems like NFS?
>>>
>>> For example, if I'm reexporting NFS, the backend NFS server may not
>>> allow you to do a READ operation using a OPEN4_SHARE_ACCESS_WRITE only
>>> stateid. Won't this break in that case?
>>>
>> Hmm...bad example since we don't allow delegations on reexported NFS
>> these days.

As of 6.14-rc1 the NFSD grants delegations on reexported NFS shares as
long as the server where the shares reside grants delegations. And this
seems to work properly; delegations are recalled when expected.

>>   Reexporting Ceph or SMB might be a better example. They'll
>> likely both have problems if you try to issue a read on the result from
>> a O_WRONLY open. I think you will probably need to rework the way
>> nfs4_file's track their struct files.
>>
>> IOW, when the client does a OPEN4_SHARE_ACCESS_WRITE-only open, you
>> need to get a struct file that is FMODE_READ|FMODE_WRITE to hang off
>> the delegation.

There won't be any existing struct file with FMODE_READ|FMODE_WRITE when
nfs4_set_delegation is called if the client opens the file with access
mode OPEN4_SHARE_ACCESS_WRITE. Unless we create a new one which means now
we have 2 struct file's for the same nfs4_file, it seems like problematic.

>>   But, you'll also need to fix up the accounting for the
>> share/deny mode locking to ignore that you _actually_ have it open for
>> read too in that case.

If I understand you correctly, you suggest that we upgrade the file access
mode to FMODE_READ|FMODE_WRITE permanently if the client opens the file with
OPEN4_SHARE_ACCESS_WRITE only. That works too but we have to remove the
FMODE_READ from the struct file if the delegation is recalled.

-Dai

> For the record, I agree with Jeff's suggested approach.
>
>
>> Smoke and mirrors...
>>
>>>>   	if (file->f_op->splice_read && splice_ok)
>>>>   		nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
>>>>   	else
>>>>   		nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
>>>> +	if (wronly)
>>>> +		file->f_mode &= ~FMODE_READ;
>>>>   	if (nfserr) {
>>>>   		xdr_truncate_encode(xdr, eof_offset);
>>>>   		return nfserr;
>>>> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
>>>> index c26ba86dbdfd..2f053beed899 100644
>>>> --- a/fs/nfsd/xdr4.h
>>>> +++ b/fs/nfsd/xdr4.h
>>>> @@ -426,6 +426,7 @@ struct nfsd4_read {
>>>>   	struct svc_rqst		*rd_rqstp;          /* response */
>>>>   	struct svc_fh		*rd_fhp;            /* response */
>>>>   	u32			rd_eof;             /* response */
>>>> +	struct nfs4_stid	*rd_wd_stid;        /* internal */
>>>>   };
>>>>   
>>>>   struct nfsd4_readdir {
>
Jeff Layton Feb. 14, 2025, 7:19 p.m. UTC | #5
On Fri, 2025-02-14 at 10:24 -0800, Dai Ngo wrote:
> On 2/14/25 6:26 AM, Chuck Lever wrote:
> > On 2/13/25 6:29 PM, Jeff Layton wrote:
> > > On Thu, 2025-02-13 at 16:07 -0500, Jeff Layton wrote:
> > > > On Thu, 2025-02-13 at 11:37 -0800, Dai Ngo wrote:
> > > > > Allow read using write delegation stateid granted on OPENs with
> > > > > OPEN4_SHARE_ACCESS_WRITE only, to accommodate clients whose WRITE
> > > > > implementation may unavoidably do (e.g., due to buffer cache
> > > > > constraints).
> > > > > 
> > > > > When this condition is detected in nfsd4_encode_read the access
> > > > > mode FMODE_READ is temporarily added to the file's f_mode and is
> > > > > removed when the read is done.
> > > > > 
> > > > > Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> > > > > ---
> > > > >   fs/nfsd/nfs4proc.c | 15 ++++++++++++++-
> > > > >   fs/nfsd/nfs4xdr.c  |  8 ++++++++
> > > > >   fs/nfsd/xdr4.h     |  1 +
> > > > >   3 files changed, 23 insertions(+), 1 deletion(-)
> > > > > 
> > > > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> > > > > index f6e06c779d09..be43627bbf78 100644
> > > > > --- a/fs/nfsd/nfs4proc.c
> > > > > +++ b/fs/nfsd/nfs4proc.c
> > > > > @@ -973,7 +973,18 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> > > > >   	/* check stateid */
> > > > >   	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
> > > > >   					&read->rd_stateid, RD_STATE,
> > > > > -					&read->rd_nf, NULL);
> > > > > +					&read->rd_nf, &read->rd_wd_stid);
> > > > > +	/*
> > > > > +	 * rd_wd_stid is needed for nfsd4_encode_read to allow write
> > > > > +	 * delegation stateid used for read. Its refcount is decremented
> > > > > +	 * by nfsd4_read_release when read is done.
> > > > > +	 */
> > > > > +	if (!status && read->rd_wd_stid &&
> > > > > +		(read->rd_wd_stid->sc_type != SC_TYPE_DELEG ||
> > > > > +		delegstateid(read->rd_wd_stid)->dl_type != NFS4_OPEN_DELEGATE_WRITE)) {
> > > > > +		nfs4_put_stid(read->rd_wd_stid);
> > > > > +		read->rd_wd_stid = NULL;
> > > > > +	}
> > > > >   
> > > > >   	read->rd_rqstp = rqstp;
> > > > >   	read->rd_fhp = &cstate->current_fh;
> > > > > @@ -984,6 +995,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> > > > >   static void
> > > > >   nfsd4_read_release(union nfsd4_op_u *u)
> > > > >   {
> > > > > +	if (u->read.rd_wd_stid)
> > > > > +		nfs4_put_stid(u->read.rd_wd_stid);
> > > > >   	if (u->read.rd_nf)
> > > > >   		nfsd_file_put(u->read.rd_nf);
> > > > >   	trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp,
> > > > > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> > > > > index e67420729ecd..3996678bab3f 100644
> > > > > --- a/fs/nfsd/nfs4xdr.c
> > > > > +++ b/fs/nfsd/nfs4xdr.c
> > > > > @@ -4498,6 +4498,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
> > > > >   	unsigned long maxcount;
> > > > >   	__be32 wire_data[2];
> > > > >   	struct file *file;
> > > > > +	bool wronly = false;
> > > > >   
> > > > >   	if (nfserr)
> > > > >   		return nfserr;
> > > > > @@ -4515,10 +4516,17 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
> > > > >   	maxcount = min_t(unsigned long, read->rd_length,
> > > > >   			 (xdr->buf->buflen - xdr->buf->len));
> > > > >   
> > > > > +	if (!(file->f_mode & FMODE_READ) && read->rd_wd_stid) {
> > > > > +		/* allow READ using write delegation stateid */
> > > > > +		wronly = true;
> > > > > +		file->f_mode |= FMODE_READ;
> > > > > +	}
> > > > Is that really OK? Can we just upgrade the f_mode like that?
> 
> It seems too simple but it works. I tested with pynfs, nfstest and
> git test, also with reexported NFS share.
> 

I don't think it's that simple. Some filesystems will have problems
here. There has been talk for years about allowing fcntl(F_SETFL, ...)
to change the file access mode, but that still has never materialized.

 
> > > > 
> > > > Also, what happens with more exotic exported filesystems like NFS?
> > > > 
> > > > For example, if I'm reexporting NFS, the backend NFS server may not
> > > > allow you to do a READ operation using a OPEN4_SHARE_ACCESS_WRITE only
> > > > stateid. Won't this break in that case?
> > > > 
> > > Hmm...bad example since we don't allow delegations on reexported NFS
> > > these days.
> 
> As of 6.14-rc1 the NFSD grants delegations on reexported NFS shares as
> long as the server where the shares reside grants delegations. And this
> seems to work properly; delegations are recalled when expected.
> 

Ahh, I was thinking of this patch in Chuck's nfsd-testing branch:

commit 2d7501a673a5d855a941409e6003a0b2afbbe149
Author: Mike Snitzer <snitzer@kernel.org>
Date:   Mon Feb 10 11:25:53 2025 -0500

    nfsd: disallow file locking and delegations for NFSv4 reexport
    
    We do not and cannot support file locking with NFS reexport over
    NFSv4.x for the same reason we don't do it for NFSv3: NFS reexport
    server reboot cannot allow clients to recover locks because the source
    NFS server has not rebooted, and so it is not in grace.  Since the
    source NFS server is not in grace, it cannot offer any guarantees that
    the file won't have been changed between the locks getting lost and
    any attempt to recover/reclaim them.  The same applies to delegations
    and any associated locks, so disallow them too.
    
    Clients are no longer allowed to get file locks or delegations from a
    reexport server, any attempts will fail with operation not supported.
    
    Update the "Reboot recovery" section accordingly in
    Documentation/filesystems/nfs/reexport.rst
    
    Signed-off-by: Mike Snitzer <snitzer@kernel.org>
    Reviewed-by: Jeff Layton <jlayton@kernel.org>
    Signed-off-by: Chuck Lever <chuck.lever@oracle.com>


> > >   Reexporting Ceph or SMB might be a better example. They'll
> > > likely both have problems if you try to issue a read on the result from
> > > a O_WRONLY open. I think you will probably need to rework the way
> > > nfs4_file's track their struct files.
> > > 
> > > IOW, when the client does a OPEN4_SHARE_ACCESS_WRITE-only open, you
> > > need to get a struct file that is FMODE_READ|FMODE_WRITE to hang off
> > > the delegation.
> 
> There won't be any existing struct file with FMODE_READ|FMODE_WRITE when
> nfs4_set_delegation is called if the client opens the file with access
> mode OPEN4_SHARE_ACCESS_WRITE. Unless we create a new one which means now
> we have 2 struct file's for the same nfs4_file, it seems like problematic.
> 
> > >   But, you'll also need to fix up the accounting for the
> > > share/deny mode locking to ignore that you _actually_ have it open for
> > > read too in that case.
> 
> If I understand you correctly, you suggest that we upgrade the file access
> mode to FMODE_READ|FMODE_WRITE permanently if the client opens the file with
> OPEN4_SHARE_ACCESS_WRITE only. That works too but we have to remove the
> FMODE_READ from the struct file if the delegation is recalled.
> 
> 

I don't see a problem with leaving the backend file open
FMODE_READ|FMODE_WRITE in that case. You can just stop allowing reads
on it at the nfsd layer.

> 
> > For the record, I agree with Jeff's suggested approach.
> > 
> > 
> > > Smoke and mirrors...
> > > 
> > > > >   	if (file->f_op->splice_read && splice_ok)
> > > > >   		nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
> > > > >   	else
> > > > >   		nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
> > > > > +	if (wronly)
> > > > > +		file->f_mode &= ~FMODE_READ;
> > > > >   	if (nfserr) {
> > > > >   		xdr_truncate_encode(xdr, eof_offset);
> > > > >   		return nfserr;
> > > > > diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> > > > > index c26ba86dbdfd..2f053beed899 100644
> > > > > --- a/fs/nfsd/xdr4.h
> > > > > +++ b/fs/nfsd/xdr4.h
> > > > > @@ -426,6 +426,7 @@ struct nfsd4_read {
> > > > >   	struct svc_rqst		*rd_rqstp;          /* response */
> > > > >   	struct svc_fh		*rd_fhp;            /* response */
> > > > >   	u32			rd_eof;             /* response */
> > > > > +	struct nfs4_stid	*rd_wd_stid;        /* internal */
> > > > >   };
> > > > >   
> > > > >   struct nfsd4_readdir {
> >
diff mbox series

Patch

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index f6e06c779d09..be43627bbf78 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -973,7 +973,18 @@  nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	/* check stateid */
 	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
 					&read->rd_stateid, RD_STATE,
-					&read->rd_nf, NULL);
+					&read->rd_nf, &read->rd_wd_stid);
+	/*
+	 * rd_wd_stid is needed for nfsd4_encode_read to allow write
+	 * delegation stateid used for read. Its refcount is decremented
+	 * by nfsd4_read_release when read is done.
+	 */
+	if (!status && read->rd_wd_stid &&
+		(read->rd_wd_stid->sc_type != SC_TYPE_DELEG ||
+		delegstateid(read->rd_wd_stid)->dl_type != NFS4_OPEN_DELEGATE_WRITE)) {
+		nfs4_put_stid(read->rd_wd_stid);
+		read->rd_wd_stid = NULL;
+	}
 
 	read->rd_rqstp = rqstp;
 	read->rd_fhp = &cstate->current_fh;
@@ -984,6 +995,8 @@  nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 static void
 nfsd4_read_release(union nfsd4_op_u *u)
 {
+	if (u->read.rd_wd_stid)
+		nfs4_put_stid(u->read.rd_wd_stid);
 	if (u->read.rd_nf)
 		nfsd_file_put(u->read.rd_nf);
 	trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp,
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index e67420729ecd..3996678bab3f 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -4498,6 +4498,7 @@  nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
 	unsigned long maxcount;
 	__be32 wire_data[2];
 	struct file *file;
+	bool wronly = false;
 
 	if (nfserr)
 		return nfserr;
@@ -4515,10 +4516,17 @@  nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
 	maxcount = min_t(unsigned long, read->rd_length,
 			 (xdr->buf->buflen - xdr->buf->len));
 
+	if (!(file->f_mode & FMODE_READ) && read->rd_wd_stid) {
+		/* allow READ using write delegation stateid */
+		wronly = true;
+		file->f_mode |= FMODE_READ;
+	}
 	if (file->f_op->splice_read && splice_ok)
 		nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
 	else
 		nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
+	if (wronly)
+		file->f_mode &= ~FMODE_READ;
 	if (nfserr) {
 		xdr_truncate_encode(xdr, eof_offset);
 		return nfserr;
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index c26ba86dbdfd..2f053beed899 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -426,6 +426,7 @@  struct nfsd4_read {
 	struct svc_rqst		*rd_rqstp;          /* response */
 	struct svc_fh		*rd_fhp;            /* response */
 	u32			rd_eof;             /* response */
+	struct nfs4_stid	*rd_wd_stid;        /* internal */
 };
 
 struct nfsd4_readdir {