diff mbox series

[v7,2/4] NFSD: allow client to use write delegation stateid for READ

Message ID 1688089960-24568-3-git-send-email-dai.ngo@oracle.com (mailing list archive)
State New, archived
Headers show
Series NFSD: add support for NFSv4.1+ write delegation | expand

Commit Message

Dai Ngo June 30, 2023, 1:52 a.m. UTC
Allow NFSv4 client to use write delegation stateid for READ operation.
Per RFC 8881 section 9.1.2. Use of the Stateid and Locking.

Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
---
 fs/nfsd/nfs4proc.c | 16 ++++++++++++++--
 fs/nfsd/nfs4xdr.c  |  9 +++++++++
 fs/nfsd/xdr4.h     |  2 ++
 3 files changed, 25 insertions(+), 2 deletions(-)

Comments

Jeff Layton Aug. 11, 2023, 5:22 p.m. UTC | #1
On Thu, 2023-06-29 at 18:52 -0700, Dai Ngo wrote:
> Allow NFSv4 client to use write delegation stateid for READ operation.
> Per RFC 8881 section 9.1.2. Use of the Stateid and Locking.
> 
> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> ---
>  fs/nfsd/nfs4proc.c | 16 ++++++++++++++--
>  fs/nfsd/nfs4xdr.c  |  9 +++++++++
>  fs/nfsd/xdr4.h     |  2 ++
>  3 files changed, 25 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index 5ae670807449..3fa66cb38780 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -942,8 +942,18 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	/* check stateid */
>  	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
>  					&read->rd_stateid, RD_STATE,
> -					&read->rd_nf, NULL);
> -
> +					&read->rd_nf, &read->rd_wd_stid);

I think this patch is causing breakage with pynfs. WRT3 sends a READ
operation with the zero-stateid. On earlier kernels, this works, but a
linux-next kernel rejects this with BAD_STATEID.

nfs4_preprocess_stateid_op seems to assume that if cstid is set, then
it's a copy operation and anonymous stateids aren't allowed. Maybe that
test should be something besides checking cstid == NULL?

> +	/*
> +	 * rd_wd_stid is needed for nfsd4_encode_read to allow write
> +	 * delegation stateid used for read. Its refcount is decremented
> +	 * by nfsd4_read_release when read is done.
> +	 */
> +	if (!status && (read->rd_wd_stid->sc_type != NFS4_DELEG_STID ||
> +			delegstateid(read->rd_wd_stid)->dl_type !=
> +			NFS4_OPEN_DELEGATE_WRITE)) {
> +		nfs4_put_stid(read->rd_wd_stid);
> +		read->rd_wd_stid = NULL;
> +	}
>  	read->rd_rqstp = rqstp;
>  	read->rd_fhp = &cstate->current_fh;
>  	return status;
> @@ -953,6 +963,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  static void
>  nfsd4_read_release(union nfsd4_op_u *u)
>  {
> +	if (u->read.rd_wd_stid)
> +		nfs4_put_stid(u->read.rd_wd_stid);
>  	if (u->read.rd_nf)
>  		nfsd_file_put(u->read.rd_nf);
>  	trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp,
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index 76db2fe29624..e0640b31d041 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -4120,6 +4120,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
>  	struct file *file;
>  	int starting_len = xdr->buf->len;
>  	__be32 *p;
> +	fmode_t o_fmode = 0;
>  
>  	if (nfserr)
>  		return nfserr;
> @@ -4139,10 +4140,18 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
>  	maxcount = min_t(unsigned long, read->rd_length,
>  			 (xdr->buf->buflen - xdr->buf->len));
>  
> +	if (read->rd_wd_stid) {
> +		/* allow READ using write delegation stateid */
> +		o_fmode = file->f_mode;
> +		file->f_mode |= FMODE_READ;
> +	}
>  	if (file->f_op->splice_read && splice_ok)
>  		nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
>  	else
>  		nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
> +	if (o_fmode)
> +		file->f_mode = o_fmode;
> +
>  	if (nfserr) {
>  		xdr_truncate_encode(xdr, starting_len);
>  		return nfserr;
> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> index 510978e602da..3ccc40f9274a 100644
> --- a/fs/nfsd/xdr4.h
> +++ b/fs/nfsd/xdr4.h
> @@ -307,6 +307,8 @@ struct nfsd4_read {
>  	struct svc_rqst		*rd_rqstp;          /* response */
>  	struct svc_fh		*rd_fhp;            /* response */
>  	u32			rd_eof;             /* response */
> +
> +	struct nfs4_stid	*rd_wd_stid;		/* internal */
>  };
>  
>  struct nfsd4_readdir {
Dai Ngo Aug. 11, 2023, 5:47 p.m. UTC | #2
Hi Jeff,

I have not looked at this carefully, but since now we only grant
write delegation for OPEN with both read and write access then
perhaps this patch is no longer needed?

-Dai

On 8/11/23 10:22 AM, Jeff Layton wrote:
> On Thu, 2023-06-29 at 18:52 -0700, Dai Ngo wrote:
>> Allow NFSv4 client to use write delegation stateid for READ operation.
>> Per RFC 8881 section 9.1.2. Use of the Stateid and Locking.
>>
>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>> ---
>>   fs/nfsd/nfs4proc.c | 16 ++++++++++++++--
>>   fs/nfsd/nfs4xdr.c  |  9 +++++++++
>>   fs/nfsd/xdr4.h     |  2 ++
>>   3 files changed, 25 insertions(+), 2 deletions(-)
>>
>> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
>> index 5ae670807449..3fa66cb38780 100644
>> --- a/fs/nfsd/nfs4proc.c
>> +++ b/fs/nfsd/nfs4proc.c
>> @@ -942,8 +942,18 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>>   	/* check stateid */
>>   	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
>>   					&read->rd_stateid, RD_STATE,
>> -					&read->rd_nf, NULL);
>> -
>> +					&read->rd_nf, &read->rd_wd_stid);
> I think this patch is causing breakage with pynfs. WRT3 sends a READ
> operation with the zero-stateid. On earlier kernels, this works, but a
> linux-next kernel rejects this with BAD_STATEID.
>
> nfs4_preprocess_stateid_op seems to assume that if cstid is set, then
> it's a copy operation and anonymous stateids aren't allowed. Maybe that
> test should be something besides checking cstid == NULL?
>
>> +	/*
>> +	 * rd_wd_stid is needed for nfsd4_encode_read to allow write
>> +	 * delegation stateid used for read. Its refcount is decremented
>> +	 * by nfsd4_read_release when read is done.
>> +	 */
>> +	if (!status && (read->rd_wd_stid->sc_type != NFS4_DELEG_STID ||
>> +			delegstateid(read->rd_wd_stid)->dl_type !=
>> +			NFS4_OPEN_DELEGATE_WRITE)) {
>> +		nfs4_put_stid(read->rd_wd_stid);
>> +		read->rd_wd_stid = NULL;
>> +	}
>>   	read->rd_rqstp = rqstp;
>>   	read->rd_fhp = &cstate->current_fh;
>>   	return status;
>> @@ -953,6 +963,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>>   static void
>>   nfsd4_read_release(union nfsd4_op_u *u)
>>   {
>> +	if (u->read.rd_wd_stid)
>> +		nfs4_put_stid(u->read.rd_wd_stid);
>>   	if (u->read.rd_nf)
>>   		nfsd_file_put(u->read.rd_nf);
>>   	trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp,
>> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
>> index 76db2fe29624..e0640b31d041 100644
>> --- a/fs/nfsd/nfs4xdr.c
>> +++ b/fs/nfsd/nfs4xdr.c
>> @@ -4120,6 +4120,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
>>   	struct file *file;
>>   	int starting_len = xdr->buf->len;
>>   	__be32 *p;
>> +	fmode_t o_fmode = 0;
>>   
>>   	if (nfserr)
>>   		return nfserr;
>> @@ -4139,10 +4140,18 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
>>   	maxcount = min_t(unsigned long, read->rd_length,
>>   			 (xdr->buf->buflen - xdr->buf->len));
>>   
>> +	if (read->rd_wd_stid) {
>> +		/* allow READ using write delegation stateid */
>> +		o_fmode = file->f_mode;
>> +		file->f_mode |= FMODE_READ;
>> +	}
>>   	if (file->f_op->splice_read && splice_ok)
>>   		nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
>>   	else
>>   		nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
>> +	if (o_fmode)
>> +		file->f_mode = o_fmode;
>> +
>>   	if (nfserr) {
>>   		xdr_truncate_encode(xdr, starting_len);
>>   		return nfserr;
>> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
>> index 510978e602da..3ccc40f9274a 100644
>> --- a/fs/nfsd/xdr4.h
>> +++ b/fs/nfsd/xdr4.h
>> @@ -307,6 +307,8 @@ struct nfsd4_read {
>>   	struct svc_rqst		*rd_rqstp;          /* response */
>>   	struct svc_fh		*rd_fhp;            /* response */
>>   	u32			rd_eof;             /* response */
>> +
>> +	struct nfs4_stid	*rd_wd_stid;		/* internal */
>>   };
>>   
>>   struct nfsd4_readdir {
Jeff Layton Aug. 11, 2023, 7:06 p.m. UTC | #3
Yep. Reverting that patch seemed to fix the problem. Chuck, mind just
dropping this patch from nfsd-next?

Thanks,
Jeff


On Fri, 2023-08-11 at 10:47 -0700, dai.ngo@oracle.com wrote:
> Hi Jeff,
> 
> I have not looked at this carefully, but since now we only grant
> write delegation for OPEN with both read and write access then
> perhaps this patch is no longer needed?
> 
> -Dai
> 
> On 8/11/23 10:22 AM, Jeff Layton wrote:
> > On Thu, 2023-06-29 at 18:52 -0700, Dai Ngo wrote:
> > > Allow NFSv4 client to use write delegation stateid for READ operation.
> > > Per RFC 8881 section 9.1.2. Use of the Stateid and Locking.
> > > 
> > > Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> > > ---
> > >   fs/nfsd/nfs4proc.c | 16 ++++++++++++++--
> > >   fs/nfsd/nfs4xdr.c  |  9 +++++++++
> > >   fs/nfsd/xdr4.h     |  2 ++
> > >   3 files changed, 25 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> > > index 5ae670807449..3fa66cb38780 100644
> > > --- a/fs/nfsd/nfs4proc.c
> > > +++ b/fs/nfsd/nfs4proc.c
> > > @@ -942,8 +942,18 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> > >   	/* check stateid */
> > >   	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
> > >   					&read->rd_stateid, RD_STATE,
> > > -					&read->rd_nf, NULL);
> > > -
> > > +					&read->rd_nf, &read->rd_wd_stid);
> > I think this patch is causing breakage with pynfs. WRT3 sends a READ
> > operation with the zero-stateid. On earlier kernels, this works, but a
> > linux-next kernel rejects this with BAD_STATEID.
> > 
> > nfs4_preprocess_stateid_op seems to assume that if cstid is set, then
> > it's a copy operation and anonymous stateids aren't allowed. Maybe that
> > test should be something besides checking cstid == NULL?
> > 
> > > +	/*
> > > +	 * rd_wd_stid is needed for nfsd4_encode_read to allow write
> > > +	 * delegation stateid used for read. Its refcount is decremented
> > > +	 * by nfsd4_read_release when read is done.
> > > +	 */
> > > +	if (!status && (read->rd_wd_stid->sc_type != NFS4_DELEG_STID ||
> > > +			delegstateid(read->rd_wd_stid)->dl_type !=
> > > +			NFS4_OPEN_DELEGATE_WRITE)) {
> > > +		nfs4_put_stid(read->rd_wd_stid);
> > > +		read->rd_wd_stid = NULL;
> > > +	}
> > >   	read->rd_rqstp = rqstp;
> > >   	read->rd_fhp = &cstate->current_fh;
> > >   	return status;
> > > @@ -953,6 +963,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> > >   static void
> > >   nfsd4_read_release(union nfsd4_op_u *u)
> > >   {
> > > +	if (u->read.rd_wd_stid)
> > > +		nfs4_put_stid(u->read.rd_wd_stid);
> > >   	if (u->read.rd_nf)
> > >   		nfsd_file_put(u->read.rd_nf);
> > >   	trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp,
> > > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> > > index 76db2fe29624..e0640b31d041 100644
> > > --- a/fs/nfsd/nfs4xdr.c
> > > +++ b/fs/nfsd/nfs4xdr.c
> > > @@ -4120,6 +4120,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
> > >   	struct file *file;
> > >   	int starting_len = xdr->buf->len;
> > >   	__be32 *p;
> > > +	fmode_t o_fmode = 0;
> > >   
> > >   	if (nfserr)
> > >   		return nfserr;
> > > @@ -4139,10 +4140,18 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
> > >   	maxcount = min_t(unsigned long, read->rd_length,
> > >   			 (xdr->buf->buflen - xdr->buf->len));
> > >   
> > > +	if (read->rd_wd_stid) {
> > > +		/* allow READ using write delegation stateid */
> > > +		o_fmode = file->f_mode;
> > > +		file->f_mode |= FMODE_READ;
> > > +	}
> > >   	if (file->f_op->splice_read && splice_ok)
> > >   		nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
> > >   	else
> > >   		nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
> > > +	if (o_fmode)
> > > +		file->f_mode = o_fmode;
> > > +
> > >   	if (nfserr) {
> > >   		xdr_truncate_encode(xdr, starting_len);
> > >   		return nfserr;
> > > diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> > > index 510978e602da..3ccc40f9274a 100644
> > > --- a/fs/nfsd/xdr4.h
> > > +++ b/fs/nfsd/xdr4.h
> > > @@ -307,6 +307,8 @@ struct nfsd4_read {
> > >   	struct svc_rqst		*rd_rqstp;          /* response */
> > >   	struct svc_fh		*rd_fhp;            /* response */
> > >   	u32			rd_eof;             /* response */
> > > +
> > > +	struct nfs4_stid	*rd_wd_stid;		/* internal */
> > >   };
> > >   
> > >   struct nfsd4_readdir {
Chuck Lever Aug. 11, 2023, 7:08 p.m. UTC | #4
> On Aug 11, 2023, at 3:06 PM, Jeff Layton <jlayton@kernel.org> wrote:
> 
> Yep. Reverting that patch seemed to fix the problem. Chuck, mind just
> dropping this patch from nfsd-next?

I can, but let's make sure that doesn't break anything else first.
Send me any test results, and I'll run some tests here too.


--
Chuck Lever
Jeff Layton Aug. 11, 2023, 8:33 p.m. UTC | #5
On Fri, 2023-08-11 at 19:08 +0000, Chuck Lever III wrote:
> 
> > On Aug 11, 2023, at 3:06 PM, Jeff Layton <jlayton@kernel.org> wrote:
> > 
> > Yep. Reverting that patch seemed to fix the problem. Chuck, mind just
> > dropping this patch from nfsd-next?
> 
> I can, but let's make sure that doesn't break anything else first.
> Send me any test results, and I'll run some tests here too.
> 
> 

Sure, results attached from several runs. These are running "all" tests
in pynfs for both v4.0 and v4.1. The kernels are:

6.5.0-rc5-00050-gb1e667caad15: your nfsd-next branch as of today
6.5.0-rc5+: gb1e667caad15, but with this patch reverted

In summary:

[jlayton@tleilax pynfs-results]$ grep '"failures":' *
6.4.0-v4.0.json:    "failures": 0,
6.4.0-v4.1.json:    "failures": 0,
6.5.0-rc5-00050-gb1e667caad15-v4.0.json:    "failures": 17,
6.5.0-rc5-00050-gb1e667caad15-v4.1.json:    "failures": 1,
6.5.0-rc5-v4.0.json:    "failures": 3,
6.5.0-rc5+-v4.0.json:    "failures": 0,
6.5.0-rc5-v4.1.json:    "failures": 0,
6.5.0-rc5+-v4.1.json:    "failures": 0,

So, reverting this patch makes things look good in nfsd-next.

The more worrisome problem is that 6.5.0-rc5-v4.0.json shows 3
regressions in current mainline. It looks like some sort of data
corruption at first glance.

I'm still looking at that one.
Chuck Lever Aug. 12, 2023, 3:29 a.m. UTC | #6
> On Aug 11, 2023, at 4:33 PM, Jeff Layton <jlayton@kernel.org> wrote:
> 
> On Fri, 2023-08-11 at 19:08 +0000, Chuck Lever III wrote:
>> 
>>> On Aug 11, 2023, at 3:06 PM, Jeff Layton <jlayton@kernel.org> wrote:
>>> 
>>> Yep. Reverting that patch seemed to fix the problem. Chuck, mind just
>>> dropping this patch from nfsd-next?
>> 
>> I can, but let's make sure that doesn't break anything else first.
>> Send me any test results, and I'll run some tests here too.
> 
> Sure, results attached from several runs. These are running "all" tests
> in pynfs for both v4.0 and v4.1. The kernels are:
> 
> 6.5.0-rc5-00050-gb1e667caad15: your nfsd-next branch as of today
> 6.5.0-rc5+: gb1e667caad15, but with this patch reverted

I haven't found any problems either, so I've dropped the patch
and refreshed nfsd-next and topic-sunrpc-thread-scheduling.


--
Chuck Lever
diff mbox series

Patch

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 5ae670807449..3fa66cb38780 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -942,8 +942,18 @@  nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	/* check stateid */
 	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
 					&read->rd_stateid, RD_STATE,
-					&read->rd_nf, NULL);
-
+					&read->rd_nf, &read->rd_wd_stid);
+	/*
+	 * rd_wd_stid is needed for nfsd4_encode_read to allow write
+	 * delegation stateid used for read. Its refcount is decremented
+	 * by nfsd4_read_release when read is done.
+	 */
+	if (!status && (read->rd_wd_stid->sc_type != NFS4_DELEG_STID ||
+			delegstateid(read->rd_wd_stid)->dl_type !=
+			NFS4_OPEN_DELEGATE_WRITE)) {
+		nfs4_put_stid(read->rd_wd_stid);
+		read->rd_wd_stid = NULL;
+	}
 	read->rd_rqstp = rqstp;
 	read->rd_fhp = &cstate->current_fh;
 	return status;
@@ -953,6 +963,8 @@  nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 static void
 nfsd4_read_release(union nfsd4_op_u *u)
 {
+	if (u->read.rd_wd_stid)
+		nfs4_put_stid(u->read.rd_wd_stid);
 	if (u->read.rd_nf)
 		nfsd_file_put(u->read.rd_nf);
 	trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp,
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 76db2fe29624..e0640b31d041 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -4120,6 +4120,7 @@  nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
 	struct file *file;
 	int starting_len = xdr->buf->len;
 	__be32 *p;
+	fmode_t o_fmode = 0;
 
 	if (nfserr)
 		return nfserr;
@@ -4139,10 +4140,18 @@  nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
 	maxcount = min_t(unsigned long, read->rd_length,
 			 (xdr->buf->buflen - xdr->buf->len));
 
+	if (read->rd_wd_stid) {
+		/* allow READ using write delegation stateid */
+		o_fmode = file->f_mode;
+		file->f_mode |= FMODE_READ;
+	}
 	if (file->f_op->splice_read && splice_ok)
 		nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
 	else
 		nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
+	if (o_fmode)
+		file->f_mode = o_fmode;
+
 	if (nfserr) {
 		xdr_truncate_encode(xdr, starting_len);
 		return nfserr;
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 510978e602da..3ccc40f9274a 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -307,6 +307,8 @@  struct nfsd4_read {
 	struct svc_rqst		*rd_rqstp;          /* response */
 	struct svc_fh		*rd_fhp;            /* response */
 	u32			rd_eof;             /* response */
+
+	struct nfs4_stid	*rd_wd_stid;		/* internal */
 };
 
 struct nfsd4_readdir {