diff mbox series

[v5,2/3] NFSD: handle GETATTR conflict with write delegation

Message ID 1684799560-31663-3-git-send-email-dai.ngo@oracle.com (mailing list archive)
State New, archived
Headers show
Series NFSD: add support for NFSv4 write delegation | expand

Commit Message

Dai Ngo May 22, 2023, 11:52 p.m. UTC
If the GETATTR request on a file that has write delegation in effect
and the request attributes include the change info and size attribute
then the write delegation is recalled and NFS4ERR_DELAY is returned
for the GETATTR.

Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
---
 fs/nfsd/nfs4state.c | 37 +++++++++++++++++++++++++++++++++++++
 fs/nfsd/nfs4xdr.c   |  5 +++++
 fs/nfsd/state.h     |  3 +++
 3 files changed, 45 insertions(+)

Comments

Chuck Lever May 23, 2023, 2:43 a.m. UTC | #1
On Mon, May 22, 2023 at 04:52:39PM -0700, Dai Ngo wrote:
> If the GETATTR request on a file that has write delegation in effect
> and the request attributes include the change info and size attribute
> then the write delegation is recalled and NFS4ERR_DELAY is returned
> for the GETATTR.

Isn't this yet another case where the server should send the
CB_RECALL and wait for it briefly, before resorting to
NFS4ERR_DELAY?


> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> ---
>  fs/nfsd/nfs4state.c | 37 +++++++++++++++++++++++++++++++++++++
>  fs/nfsd/nfs4xdr.c   |  5 +++++
>  fs/nfsd/state.h     |  3 +++
>  3 files changed, 45 insertions(+)
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index b90b74a5e66e..ea9cd781db5f 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -8353,3 +8353,40 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate,
>  {
>  	get_stateid(cstate, &u->write.wr_stateid);
>  }
> +
> +__be32
> +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode *inode)

As a globally-visible function, this needs a documenting comment, and
please use "nfsd4_" rather than "nfs4_".


> +{
> +	struct file_lock_context *ctx;
> +	struct file_lock *fl;
> +	struct nfs4_delegation *dp;
> +
> +	ctx = locks_inode_context(inode);
> +	if (!ctx)
> +		return 0;
> +	spin_lock(&ctx->flc_lock);
> +	list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
> +		if (fl->fl_flags == FL_LAYOUT ||
> +				fl->fl_lmops != &nfsd_lease_mng_ops)
> +			continue;
> +		if (fl->fl_type == F_WRLCK) {
> +			dp = fl->fl_owner;
> +			/*
> +			 * increment the sc_count to prevent the delegation to
> +			 * be freed while sending the CB_RECALL. The refcount is
> +			 * decremented by nfs4_put_stid in nfsd4_cb_recall_release
> +			 * after the request was sent.
> +			 */
> +			if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker) ||
> +					!refcount_inc_not_zero(&dp->dl_stid.sc_count)) {
> +				spin_unlock(&ctx->flc_lock);
> +				return 0;
> +			}
> +			spin_unlock(&ctx->flc_lock);
> +			return nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
> +		}
> +		break;
> +	}
> +	spin_unlock(&ctx->flc_lock);
> +	return 0;
> +}
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index 76db2fe29624..ed09b575afac 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -2966,6 +2966,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
>  		if (status)
>  			goto out;
>  	}
> +	if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
> +		status = nfs4_handle_wrdeleg_conflict(rqstp, d_inode(dentry));
> +		if (status)
> +			goto out;
> +	}
>  
>  	err = vfs_getattr(&path, &stat,
>  			  STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index d49d3060ed4f..64727a39f0db 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -732,4 +732,7 @@ static inline bool try_to_expire_client(struct nfs4_client *clp)
>  	cmpxchg(&clp->cl_state, NFSD4_COURTESY, NFSD4_EXPIRABLE);
>  	return clp->cl_state == NFSD4_EXPIRABLE;
>  }
> +
> +extern __be32 nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp,
> +				struct inode *inode);
>  #endif   /* NFSD4_STATE_H */
> -- 
> 2.9.5
>
Dai Ngo May 23, 2023, 6:02 a.m. UTC | #2
On 5/22/23 7:43 PM, Chuck Lever wrote:
> On Mon, May 22, 2023 at 04:52:39PM -0700, Dai Ngo wrote:
>> If the GETATTR request on a file that has write delegation in effect
>> and the request attributes include the change info and size attribute
>> then the write delegation is recalled and NFS4ERR_DELAY is returned
>> for the GETATTR.
> Isn't this yet another case where the server should send the
> CB_RECALL and wait for it briefly, before resorting to
> NFS4ERR_DELAY?

Think about this more, I don't think we even need to recall the
delegation at all. The Linux client does not flush the dirty file
data before returning the delegation so the GETATTR still get stale
attributes at the server. And the spec is not explicitly requires
the delegation to be recalled.

If we want to provide the client with more accurate file attributes
then we need to use the CB_GETATTR to get the up-to-date change info
and file size from the client. I think we agreed to defer this for later.

>
>
>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>> ---
>>   fs/nfsd/nfs4state.c | 37 +++++++++++++++++++++++++++++++++++++
>>   fs/nfsd/nfs4xdr.c   |  5 +++++
>>   fs/nfsd/state.h     |  3 +++
>>   3 files changed, 45 insertions(+)
>>
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index b90b74a5e66e..ea9cd781db5f 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -8353,3 +8353,40 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate,
>>   {
>>   	get_stateid(cstate, &u->write.wr_stateid);
>>   }
>> +
>> +__be32
>> +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode *inode)

need to change this function name to nfsd4_deleg_getattr_conflict.

> As a globally-visible function, this needs a documenting comment, and
> please use "nfsd4_" rather than "nfs4_".

will fix, if we decide to do the recall.

-Dai

>
>
>> +{
>> +	struct file_lock_context *ctx;
>> +	struct file_lock *fl;
>> +	struct nfs4_delegation *dp;
>> +
>> +	ctx = locks_inode_context(inode);
>> +	if (!ctx)
>> +		return 0;
>> +	spin_lock(&ctx->flc_lock);
>> +	list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
>> +		if (fl->fl_flags == FL_LAYOUT ||
>> +				fl->fl_lmops != &nfsd_lease_mng_ops)
>> +			continue;
>> +		if (fl->fl_type == F_WRLCK) {
>> +			dp = fl->fl_owner;
>> +			/*
>> +			 * increment the sc_count to prevent the delegation to
>> +			 * be freed while sending the CB_RECALL. The refcount is
>> +			 * decremented by nfs4_put_stid in nfsd4_cb_recall_release
>> +			 * after the request was sent.
>> +			 */
>> +			if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker) ||
>> +					!refcount_inc_not_zero(&dp->dl_stid.sc_count)) {
>> +				spin_unlock(&ctx->flc_lock);
>> +				return 0;
>> +			}
>> +			spin_unlock(&ctx->flc_lock);
>> +			return nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
>> +		}
>> +		break;
>> +	}
>> +	spin_unlock(&ctx->flc_lock);
>> +	return 0;
>> +}
>> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
>> index 76db2fe29624..ed09b575afac 100644
>> --- a/fs/nfsd/nfs4xdr.c
>> +++ b/fs/nfsd/nfs4xdr.c
>> @@ -2966,6 +2966,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
>>   		if (status)
>>   			goto out;
>>   	}
>> +	if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
>> +		status = nfs4_handle_wrdeleg_conflict(rqstp, d_inode(dentry));
>> +		if (status)
>> +			goto out;
>> +	}
>>   
>>   	err = vfs_getattr(&path, &stat,
>>   			  STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
>> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
>> index d49d3060ed4f..64727a39f0db 100644
>> --- a/fs/nfsd/state.h
>> +++ b/fs/nfsd/state.h
>> @@ -732,4 +732,7 @@ static inline bool try_to_expire_client(struct nfs4_client *clp)
>>   	cmpxchg(&clp->cl_state, NFSD4_COURTESY, NFSD4_EXPIRABLE);
>>   	return clp->cl_state == NFSD4_EXPIRABLE;
>>   }
>> +
>> +extern __be32 nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp,
>> +				struct inode *inode);
>>   #endif   /* NFSD4_STATE_H */
>> -- 
>> 2.9.5
>>
Jeff Layton May 24, 2023, 3:07 p.m. UTC | #3
On Mon, 2023-05-22 at 16:52 -0700, Dai Ngo wrote:
> If the GETATTR request on a file that has write delegation in effect
> and the request attributes include the change info and size attribute
> then the write delegation is recalled and NFS4ERR_DELAY is returned
> for the GETATTR.
> 
> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> ---
>  fs/nfsd/nfs4state.c | 37 +++++++++++++++++++++++++++++++++++++
>  fs/nfsd/nfs4xdr.c   |  5 +++++
>  fs/nfsd/state.h     |  3 +++
>  3 files changed, 45 insertions(+)
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index b90b74a5e66e..ea9cd781db5f 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -8353,3 +8353,40 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate,
>  {
>  	get_stateid(cstate, &u->write.wr_stateid);
>  }
> +
> +__be32
> +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode *inode)
> +{
> +	struct file_lock_context *ctx;
> +	struct file_lock *fl;
> +	struct nfs4_delegation *dp;
> +
> +	ctx = locks_inode_context(inode);
> +	if (!ctx)
> +		return 0;
> +	spin_lock(&ctx->flc_lock);
> +	list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
> +		if (fl->fl_flags == FL_LAYOUT ||
> +				fl->fl_lmops != &nfsd_lease_mng_ops)
> +			continue;
> +		if (fl->fl_type == F_WRLCK) {
> +			dp = fl->fl_owner;
> +			/*
> +			 * increment the sc_count to prevent the delegation to
> +			 * be freed while sending the CB_RECALL. The refcount is
> +			 * decremented by nfs4_put_stid in nfsd4_cb_recall_release
> +			 * after the request was sent.
> +			 */
> +			if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker) ||
> +					!refcount_inc_not_zero(&dp->dl_stid.sc_count)) {

I still don't get why you're incrementing the refcount of this stateid.
At this point, you know that this stateid is owned by a different client
altogether,  and breaking its lease doesn't require a reference to the
stateid.

I think this will cause a refcount leak.

> +				spin_unlock(&ctx->flc_lock);
> +				return 0;
> +			}
> +			spin_unlock(&ctx->flc_lock);
> +			return nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
> +		}
> +		break;
> +	}
> +	spin_unlock(&ctx->flc_lock);
> +	return 0;
> +}
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index 76db2fe29624..ed09b575afac 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -2966,6 +2966,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
>  		if (status)
>  			goto out;
>  	}
> +	if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
> +		status = nfs4_handle_wrdeleg_conflict(rqstp, d_inode(dentry));
> +		if (status)
> +			goto out;
> +	}
>  
>  	err = vfs_getattr(&path, &stat,
>  			  STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index d49d3060ed4f..64727a39f0db 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -732,4 +732,7 @@ static inline bool try_to_expire_client(struct nfs4_client *clp)
>  	cmpxchg(&clp->cl_state, NFSD4_COURTESY, NFSD4_EXPIRABLE);
>  	return clp->cl_state == NFSD4_EXPIRABLE;
>  }
> +
> +extern __be32 nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp,
> +				struct inode *inode);
>  #endif   /* NFSD4_STATE_H */
Dai Ngo May 24, 2023, 5:49 p.m. UTC | #4
On 5/24/23 8:07 AM, Jeff Layton wrote:
> On Mon, 2023-05-22 at 16:52 -0700, Dai Ngo wrote:
>> If the GETATTR request on a file that has write delegation in effect
>> and the request attributes include the change info and size attribute
>> then the write delegation is recalled and NFS4ERR_DELAY is returned
>> for the GETATTR.
>>
>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>> ---
>>   fs/nfsd/nfs4state.c | 37 +++++++++++++++++++++++++++++++++++++
>>   fs/nfsd/nfs4xdr.c   |  5 +++++
>>   fs/nfsd/state.h     |  3 +++
>>   3 files changed, 45 insertions(+)
>>
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index b90b74a5e66e..ea9cd781db5f 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -8353,3 +8353,40 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate,
>>   {
>>   	get_stateid(cstate, &u->write.wr_stateid);
>>   }
>> +
>> +__be32
>> +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode *inode)
>> +{
>> +	struct file_lock_context *ctx;
>> +	struct file_lock *fl;
>> +	struct nfs4_delegation *dp;
>> +
>> +	ctx = locks_inode_context(inode);
>> +	if (!ctx)
>> +		return 0;
>> +	spin_lock(&ctx->flc_lock);
>> +	list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
>> +		if (fl->fl_flags == FL_LAYOUT ||
>> +				fl->fl_lmops != &nfsd_lease_mng_ops)
>> +			continue;
>> +		if (fl->fl_type == F_WRLCK) {
>> +			dp = fl->fl_owner;
>> +			/*
>> +			 * increment the sc_count to prevent the delegation to
>> +			 * be freed while sending the CB_RECALL. The refcount is
>> +			 * decremented by nfs4_put_stid in nfsd4_cb_recall_release
>> +			 * after the request was sent.
>> +			 */
>> +			if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker) ||
>> +					!refcount_inc_not_zero(&dp->dl_stid.sc_count)) {
> I still don't get why you're incrementing the refcount of this stateid.
> At this point, you know that this stateid is owned by a different client
> altogether,  and breaking its lease doesn't require a reference to the
> stateid.

You're right, the intention was to make sure the delegation does not go
away when the recall is being sent. However, this was already done in
nfsd_break_one_deleg where the sc_count is incremented. Incrementing the
sc_count refcount would be needed here if we do the CB_GETATTR. I'll remove
this in next version.

But should we drop the this patch altogether? since there is no value in
recall the write delegation when there is an GETATTR from another client
as I mentioned in the previous email.

-Dai

>
> I think this will cause a refcount leak.
>
>> +				spin_unlock(&ctx->flc_lock);
>> +				return 0;
>> +			}
>> +			spin_unlock(&ctx->flc_lock);
>> +			return nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
>> +		}
>> +		break;
>> +	}
>> +	spin_unlock(&ctx->flc_lock);
>> +	return 0;
>> +}
>> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
>> index 76db2fe29624..ed09b575afac 100644
>> --- a/fs/nfsd/nfs4xdr.c
>> +++ b/fs/nfsd/nfs4xdr.c
>> @@ -2966,6 +2966,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
>>   		if (status)
>>   			goto out;
>>   	}
>> +	if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
>> +		status = nfs4_handle_wrdeleg_conflict(rqstp, d_inode(dentry));
>> +		if (status)
>> +			goto out;
>> +	}
>>   
>>   	err = vfs_getattr(&path, &stat,
>>   			  STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
>> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
>> index d49d3060ed4f..64727a39f0db 100644
>> --- a/fs/nfsd/state.h
>> +++ b/fs/nfsd/state.h
>> @@ -732,4 +732,7 @@ static inline bool try_to_expire_client(struct nfs4_client *clp)
>>   	cmpxchg(&clp->cl_state, NFSD4_COURTESY, NFSD4_EXPIRABLE);
>>   	return clp->cl_state == NFSD4_EXPIRABLE;
>>   }
>> +
>> +extern __be32 nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp,
>> +				struct inode *inode);
>>   #endif   /* NFSD4_STATE_H */
Chuck Lever May 24, 2023, 5:51 p.m. UTC | #5
> On May 23, 2023, at 2:02 AM, Dai Ngo <dai.ngo@oracle.com> wrote:
> 
> 
> On 5/22/23 7:43 PM, Chuck Lever wrote:
>> On Mon, May 22, 2023 at 04:52:39PM -0700, Dai Ngo wrote:
>>> If the GETATTR request on a file that has write delegation in effect
>>> and the request attributes include the change info and size attribute
>>> then the write delegation is recalled and NFS4ERR_DELAY is returned
>>> for the GETATTR.
>> Isn't this yet another case where the server should send the
>> CB_RECALL and wait for it briefly, before resorting to
>> NFS4ERR_DELAY?
> 
> Think about this more, I don't think we even need to recall the
> delegation at all. The Linux client does not flush the dirty file
> data before returning the delegation so the GETATTR still get stale
> attributes at the server. And the spec is not explicitly requires
> the delegation to be recalled.

I'm trying to get some feedback from other server implementers
about this. I'm willing to consider dropping this patch for now.


--
Chuck Lever
Chuck Lever May 25, 2023, 5:21 p.m. UTC | #6
> On May 24, 2023, at 1:49 PM, Dai Ngo <dai.ngo@oracle.com> wrote:
> 
> 
> On 5/24/23 8:07 AM, Jeff Layton wrote:
>> On Mon, 2023-05-22 at 16:52 -0700, Dai Ngo wrote:
>>> If the GETATTR request on a file that has write delegation in effect
>>> and the request attributes include the change info and size attribute
>>> then the write delegation is recalled and NFS4ERR_DELAY is returned
>>> for the GETATTR.
>>> 
>>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>>> ---
>>>  fs/nfsd/nfs4state.c | 37 +++++++++++++++++++++++++++++++++++++
>>>  fs/nfsd/nfs4xdr.c   |  5 +++++
>>>  fs/nfsd/state.h     |  3 +++
>>>  3 files changed, 45 insertions(+)
>>> 
>>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>>> index b90b74a5e66e..ea9cd781db5f 100644
>>> --- a/fs/nfsd/nfs4state.c
>>> +++ b/fs/nfsd/nfs4state.c
>>> @@ -8353,3 +8353,40 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate,
>>>  {
>>>   get_stateid(cstate, &u->write.wr_stateid);
>>>  }
>>> +
>>> +__be32
>>> +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode *inode)
>>> +{
>>> + struct file_lock_context *ctx;
>>> + struct file_lock *fl;
>>> + struct nfs4_delegation *dp;
>>> +
>>> + ctx = locks_inode_context(inode);
>>> + if (!ctx)
>>> + return 0;
>>> + spin_lock(&ctx->flc_lock);
>>> + list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
>>> + if (fl->fl_flags == FL_LAYOUT ||
>>> + fl->fl_lmops != &nfsd_lease_mng_ops)
>>> + continue;
>>> + if (fl->fl_type == F_WRLCK) {
>>> + dp = fl->fl_owner;
>>> + /*
>>> +  * increment the sc_count to prevent the delegation to
>>> +  * be freed while sending the CB_RECALL. The refcount is
>>> +  * decremented by nfs4_put_stid in nfsd4_cb_recall_release
>>> +  * after the request was sent.
>>> +  */
>>> + if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker) ||
>>> + !refcount_inc_not_zero(&dp->dl_stid.sc_count)) {
>> I still don't get why you're incrementing the refcount of this stateid.
>> At this point, you know that this stateid is owned by a different client
>> altogether,  and breaking its lease doesn't require a reference to the
>> stateid.
> 
> You're right, the intention was to make sure the delegation does not go
> away when the recall is being sent. However, this was already done in
> nfsd_break_one_deleg where the sc_count is incremented. Incrementing the
> sc_count refcount would be needed here if we do the CB_GETATTR. I'll remove
> this in next version.
> 
> But should we drop the this patch altogether? since there is no value in
> recall the write delegation when there is an GETATTR from another client
> as I mentioned in the previous email.

a. Neither Solaris nor OnTAP do a CB_RECALL or a CB_GETATTR in this case

b. RFC 8881 Section 18.7.4 states that a server MUST NOT proceed with a
   GETATTR response unless it has done one of those callbacks

So we have two examples of non-compliant server implementations that
don't seem to have consequences for not following that MUST.

It doesn't seem unreasonable to recall in this scenario, but it's
unknown what kind of performance impact that will have.

Can you send an updated version of this patch, rebased on nfsd-next
(which now has the other two applied), plus a patch to add a counter
for this type of conflict?


--
Chuck Lever
diff mbox series

Patch

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index b90b74a5e66e..ea9cd781db5f 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -8353,3 +8353,40 @@  nfsd4_get_writestateid(struct nfsd4_compound_state *cstate,
 {
 	get_stateid(cstate, &u->write.wr_stateid);
 }
+
+__be32
+nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode *inode)
+{
+	struct file_lock_context *ctx;
+	struct file_lock *fl;
+	struct nfs4_delegation *dp;
+
+	ctx = locks_inode_context(inode);
+	if (!ctx)
+		return 0;
+	spin_lock(&ctx->flc_lock);
+	list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
+		if (fl->fl_flags == FL_LAYOUT ||
+				fl->fl_lmops != &nfsd_lease_mng_ops)
+			continue;
+		if (fl->fl_type == F_WRLCK) {
+			dp = fl->fl_owner;
+			/*
+			 * increment the sc_count to prevent the delegation to
+			 * be freed while sending the CB_RECALL. The refcount is
+			 * decremented by nfs4_put_stid in nfsd4_cb_recall_release
+			 * after the request was sent.
+			 */
+			if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker) ||
+					!refcount_inc_not_zero(&dp->dl_stid.sc_count)) {
+				spin_unlock(&ctx->flc_lock);
+				return 0;
+			}
+			spin_unlock(&ctx->flc_lock);
+			return nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
+		}
+		break;
+	}
+	spin_unlock(&ctx->flc_lock);
+	return 0;
+}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 76db2fe29624..ed09b575afac 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2966,6 +2966,11 @@  nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
 		if (status)
 			goto out;
 	}
+	if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
+		status = nfs4_handle_wrdeleg_conflict(rqstp, d_inode(dentry));
+		if (status)
+			goto out;
+	}
 
 	err = vfs_getattr(&path, &stat,
 			  STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index d49d3060ed4f..64727a39f0db 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -732,4 +732,7 @@  static inline bool try_to_expire_client(struct nfs4_client *clp)
 	cmpxchg(&clp->cl_state, NFSD4_COURTESY, NFSD4_EXPIRABLE);
 	return clp->cl_state == NFSD4_EXPIRABLE;
 }
+
+extern __be32 nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp,
+				struct inode *inode);
 #endif   /* NFSD4_STATE_H */