diff mbox series

[v2,2/3] NFSD: Make nfsd4_setattr() wait before returning NFS4ERR_DELAY

Message ID 165953745991.1658.5781306176717145818.stgit@manet.1015granger.net (mailing list archive)
State New, archived
Headers show
Series Wait for DELEGRETURN before returning NFS4ERR_DELAY | expand

Commit Message

Chuck Lever Aug. 3, 2022, 2:37 p.m. UTC
nfsd_setattr() can kick off a CB_RECALL (via
notify_change() -> break_lease()) if a delegation is present. Before
returning NFS4ERR_DELAY, give the client holding that delegation a
chance to return it and then retry the nfsd_setattr() again, once.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4proc.c  |   18 +++++++++++++++---
 fs/nfsd/nfs4state.c |   17 +++++++++++++++++
 fs/nfsd/nfsd.h      |    1 +
 fs/nfsd/trace.h     |   19 +++++++++++++++++++
 fs/nfsd/xdr4.h      |    2 ++
 5 files changed, 54 insertions(+), 3 deletions(-)

Comments

Jeffrey Layton Aug. 3, 2022, 7:47 p.m. UTC | #1
On Wed, 2022-08-03 at 10:37 -0400, Chuck Lever wrote:
> nfsd_setattr() can kick off a CB_RECALL (via
> notify_change() -> break_lease()) if a delegation is present. Before
> returning NFS4ERR_DELAY, give the client holding that delegation a
> chance to return it and then retry the nfsd_setattr() again, once.
> 
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
>  fs/nfsd/nfs4proc.c  |   18 +++++++++++++++---
>  fs/nfsd/nfs4state.c |   17 +++++++++++++++++
>  fs/nfsd/nfsd.h      |    1 +
>  fs/nfsd/trace.h     |   19 +++++++++++++++++++
>  fs/nfsd/xdr4.h      |    2 ++
>  5 files changed, 54 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index 42bfe0d769ec..62a267bb2ce5 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -1142,7 +1142,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  {
>  	struct nfsd4_setattr *setattr = &u->setattr;
>  	__be32 status = nfs_ok;
> -	int err;
> +	int err, retries;
>  
>  	if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
>  		status = nfs4_preprocess_stateid_op(rqstp, cstate,
> @@ -1173,8 +1173,20 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  				&setattr->sa_label);
>  	if (status)
>  		goto out;
> -	status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr,
> -				0, (time64_t)0);
> +
> +	retries = 1;
> +	do {
> +		status = nfsd_setattr(rqstp, &cstate->current_fh,
> +				      &setattr->sa_iattr, 0, (time64_t)0);
> +		if (status != nfserr_jukebox)
> +			break;
> +		if (!retries--)
> +			break;
> +
> +		fh_clear_pre_post_attrs(&cstate->current_fh);
> +		nfsd4_wait_for_delegreturn(rqstp, &cstate->current_fh);
> +	} while (1);
> +
>  out:
>  	fh_drop_write(&cstate->current_fh);
>  	return status;
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 0cf5a4bb36df..e3ac89d4a859 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -4689,6 +4689,23 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
>  	return ret;
>  }
>  
> +/**
> + * nfsd4_wait_for_delegreturn - wait for delegations to be returned
> + * @rqstp: the RPC transaction being executed
> + * @fhp: filehandle of file being waited for
> + *
> + * A better approach would wait for the DELEGRETURN operation, and
> + * retry just as soon as it was done.
> + *
> + * The timeout prevents deadlock if all nfsd threads happen to be
> + * tied up waiting for returning delegations.
> + */
> +void nfsd4_wait_for_delegreturn(struct svc_rqst *rqstp, struct svc_fh *fhp)
> +{
> +	trace_nfsd_delegreturn_wait(rqstp, fhp);
> +	msleep(NFSD_DELEGRETURN_TIMEOUT);

Like you mentioned in the cover letter, this is pretty nasty.

You could use wait_var_event_timeout here on the inode, paired with a
wake_up_var when a delegation is returned.

For the condition, you could use something like this:

    !inode->i_flctx || list_empty(&inode->i_flctx->flc_lease)

Maybe even a similar lockless check as the one in break_deleg?

> +}
> +
>  static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb)
>  {
>  	struct nfs4_delegation *dp = cb_to_delegation(cb);
> diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
> index 9a8b09afc173..0b800a154828 100644
> --- a/fs/nfsd/nfsd.h
> +++ b/fs/nfsd/nfsd.h
> @@ -341,6 +341,7 @@ void		nfsd_lockd_shutdown(void);
>  
>  #define NFSD_LAUNDROMAT_MINTIMEOUT      1   /* seconds */
>  #define	NFSD_COURTESY_CLIENT_TIMEOUT	(24 * 60 * 60)	/* seconds */
> +#define NFSD_DELEGRETURN_TIMEOUT	(30)	/* milliseconds */
>  
>  /*
>   * The following attributes are currently not supported by the NFSv4 server:
> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
> index 8c3d5f88072f..dd2654cac132 100644
> --- a/fs/nfsd/trace.h
> +++ b/fs/nfsd/trace.h
> @@ -443,6 +443,25 @@ DEFINE_NFSD_COPY_ERR_EVENT(clone_file_range_err);
>  #include "filecache.h"
>  #include "vfs.h"
>  
> +TRACE_EVENT(nfsd_delegreturn_wait,
> +	TP_PROTO(
> +		const struct svc_rqst *rqstp,
> +		const struct svc_fh *fhp
> +	),
> +	TP_ARGS(rqstp, fhp),
> +	TP_STRUCT__entry(
> +		__field(u32, xid)
> +		__field(u32, fh_hash)
> +	),
> +	TP_fast_assign(
> +		__entry->xid = be32_to_cpu(rqstp->rq_xid);
> +		__entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
> +	),
> +	TP_printk("xid=0x%08x fh_hash=0x%08x",
> +		  __entry->xid, __entry->fh_hash
> +	)
> +);
> +
>  DECLARE_EVENT_CLASS(nfsd_stateid_class,
>  	TP_PROTO(stateid_t *stp),
>  	TP_ARGS(stp),
> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> index 7b744011f2d3..5b9213076e95 100644
> --- a/fs/nfsd/xdr4.h
> +++ b/fs/nfsd/xdr4.h
> @@ -788,6 +788,8 @@ extern __be32 nfsd4_destroy_clientid(struct svc_rqst *, struct nfsd4_compound_st
>  		union nfsd4_op_u *u);
>  __be32 nfsd4_reclaim_complete(struct svc_rqst *, struct nfsd4_compound_state *,
>  		union nfsd4_op_u *u);
> +extern void nfsd4_wait_for_delegreturn(struct svc_rqst *rqstp,
> +		struct svc_fh *fhp);
>  extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *,
>  		struct nfsd4_open *open, struct nfsd_net *nn);
>  extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
> 
>
Chuck Lever Aug. 3, 2022, 8:27 p.m. UTC | #2
> On Aug 3, 2022, at 3:47 PM, Jeff Layton <jlayton@kernel.org> wrote:
> 
> On Wed, 2022-08-03 at 10:37 -0400, Chuck Lever wrote:
>> nfsd_setattr() can kick off a CB_RECALL (via
>> notify_change() -> break_lease()) if a delegation is present. Before
>> returning NFS4ERR_DELAY, give the client holding that delegation a
>> chance to return it and then retry the nfsd_setattr() again, once.
>> 
>> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
>> ---
>> fs/nfsd/nfs4proc.c  |   18 +++++++++++++++---
>> fs/nfsd/nfs4state.c |   17 +++++++++++++++++
>> fs/nfsd/nfsd.h      |    1 +
>> fs/nfsd/trace.h     |   19 +++++++++++++++++++
>> fs/nfsd/xdr4.h      |    2 ++
>> 5 files changed, 54 insertions(+), 3 deletions(-)
>> 
>> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
>> index 42bfe0d769ec..62a267bb2ce5 100644
>> --- a/fs/nfsd/nfs4proc.c
>> +++ b/fs/nfsd/nfs4proc.c
>> @@ -1142,7 +1142,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>> {
>> 	struct nfsd4_setattr *setattr = &u->setattr;
>> 	__be32 status = nfs_ok;
>> -	int err;
>> +	int err, retries;
>> 
>> 	if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
>> 		status = nfs4_preprocess_stateid_op(rqstp, cstate,
>> @@ -1173,8 +1173,20 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>> 				&setattr->sa_label);
>> 	if (status)
>> 		goto out;
>> -	status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr,
>> -				0, (time64_t)0);
>> +
>> +	retries = 1;
>> +	do {
>> +		status = nfsd_setattr(rqstp, &cstate->current_fh,
>> +				      &setattr->sa_iattr, 0, (time64_t)0);
>> +		if (status != nfserr_jukebox)
>> +			break;
>> +		if (!retries--)
>> +			break;
>> +
>> +		fh_clear_pre_post_attrs(&cstate->current_fh);
>> +		nfsd4_wait_for_delegreturn(rqstp, &cstate->current_fh);
>> +	} while (1);
>> +
>> out:
>> 	fh_drop_write(&cstate->current_fh);
>> 	return status;
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index 0cf5a4bb36df..e3ac89d4a859 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -4689,6 +4689,23 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
>> 	return ret;
>> }
>> 
>> +/**
>> + * nfsd4_wait_for_delegreturn - wait for delegations to be returned
>> + * @rqstp: the RPC transaction being executed
>> + * @fhp: filehandle of file being waited for
>> + *
>> + * A better approach would wait for the DELEGRETURN operation, and
>> + * retry just as soon as it was done.
>> + *
>> + * The timeout prevents deadlock if all nfsd threads happen to be
>> + * tied up waiting for returning delegations.
>> + */
>> +void nfsd4_wait_for_delegreturn(struct svc_rqst *rqstp, struct svc_fh *fhp)
>> +{
>> +	trace_nfsd_delegreturn_wait(rqstp, fhp);
>> +	msleep(NFSD_DELEGRETURN_TIMEOUT);
> 
> Like you mentioned in the cover letter, this is pretty nasty.

Right, it's proof-of-concept stuff.


> You could use wait_var_event_timeout here on the inode, paired with a
> wake_up_var when a delegation is returned.

I was looking for an NFSD-specific data structure to add a
completion to, but yeah, I guess the inode itself could work.
I'll have a look at that for the next version of this series.
Thanks for the suggestion!


> For the condition, you could use something like this:
> 
>    !inode->i_flctx || list_empty(&inode->i_flctx->flc_lease)
> 
> Maybe even a similar lockless check as the one in break_deleg?
> 
>> +}
>> +
>> static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb)
>> {
>> 	struct nfs4_delegation *dp = cb_to_delegation(cb);
>> diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
>> index 9a8b09afc173..0b800a154828 100644
>> --- a/fs/nfsd/nfsd.h
>> +++ b/fs/nfsd/nfsd.h
>> @@ -341,6 +341,7 @@ void		nfsd_lockd_shutdown(void);
>> 
>> #define NFSD_LAUNDROMAT_MINTIMEOUT      1   /* seconds */
>> #define	NFSD_COURTESY_CLIENT_TIMEOUT	(24 * 60 * 60)	/* seconds */
>> +#define NFSD_DELEGRETURN_TIMEOUT	(30)	/* milliseconds */
>> 
>> /*
>>  * The following attributes are currently not supported by the NFSv4 server:
>> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
>> index 8c3d5f88072f..dd2654cac132 100644
>> --- a/fs/nfsd/trace.h
>> +++ b/fs/nfsd/trace.h
>> @@ -443,6 +443,25 @@ DEFINE_NFSD_COPY_ERR_EVENT(clone_file_range_err);
>> #include "filecache.h"
>> #include "vfs.h"
>> 
>> +TRACE_EVENT(nfsd_delegreturn_wait,
>> +	TP_PROTO(
>> +		const struct svc_rqst *rqstp,
>> +		const struct svc_fh *fhp
>> +	),
>> +	TP_ARGS(rqstp, fhp),
>> +	TP_STRUCT__entry(
>> +		__field(u32, xid)
>> +		__field(u32, fh_hash)
>> +	),
>> +	TP_fast_assign(
>> +		__entry->xid = be32_to_cpu(rqstp->rq_xid);
>> +		__entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
>> +	),
>> +	TP_printk("xid=0x%08x fh_hash=0x%08x",
>> +		  __entry->xid, __entry->fh_hash
>> +	)
>> +);
>> +
>> DECLARE_EVENT_CLASS(nfsd_stateid_class,
>> 	TP_PROTO(stateid_t *stp),
>> 	TP_ARGS(stp),
>> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
>> index 7b744011f2d3..5b9213076e95 100644
>> --- a/fs/nfsd/xdr4.h
>> +++ b/fs/nfsd/xdr4.h
>> @@ -788,6 +788,8 @@ extern __be32 nfsd4_destroy_clientid(struct svc_rqst *, struct nfsd4_compound_st
>> 		union nfsd4_op_u *u);
>> __be32 nfsd4_reclaim_complete(struct svc_rqst *, struct nfsd4_compound_state *,
>> 		union nfsd4_op_u *u);
>> +extern void nfsd4_wait_for_delegreturn(struct svc_rqst *rqstp,
>> +		struct svc_fh *fhp);
>> extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *,
>> 		struct nfsd4_open *open, struct nfsd_net *nn);
>> extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
>> 
>> 
> 
> -- 
> Jeff Layton <jlayton@kernel.org>

--
Chuck Lever
diff mbox series

Patch

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 42bfe0d769ec..62a267bb2ce5 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1142,7 +1142,7 @@  nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 {
 	struct nfsd4_setattr *setattr = &u->setattr;
 	__be32 status = nfs_ok;
-	int err;
+	int err, retries;
 
 	if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
 		status = nfs4_preprocess_stateid_op(rqstp, cstate,
@@ -1173,8 +1173,20 @@  nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 				&setattr->sa_label);
 	if (status)
 		goto out;
-	status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr,
-				0, (time64_t)0);
+
+	retries = 1;
+	do {
+		status = nfsd_setattr(rqstp, &cstate->current_fh,
+				      &setattr->sa_iattr, 0, (time64_t)0);
+		if (status != nfserr_jukebox)
+			break;
+		if (!retries--)
+			break;
+
+		fh_clear_pre_post_attrs(&cstate->current_fh);
+		nfsd4_wait_for_delegreturn(rqstp, &cstate->current_fh);
+	} while (1);
+
 out:
 	fh_drop_write(&cstate->current_fh);
 	return status;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 0cf5a4bb36df..e3ac89d4a859 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4689,6 +4689,23 @@  nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
 	return ret;
 }
 
+/**
+ * nfsd4_wait_for_delegreturn - wait for delegations to be returned
+ * @rqstp: the RPC transaction being executed
+ * @fhp: filehandle of file being waited for
+ *
+ * A better approach would wait for the DELEGRETURN operation, and
+ * retry just as soon as it was done.
+ *
+ * The timeout prevents deadlock if all nfsd threads happen to be
+ * tied up waiting for returning delegations.
+ */
+void nfsd4_wait_for_delegreturn(struct svc_rqst *rqstp, struct svc_fh *fhp)
+{
+	trace_nfsd_delegreturn_wait(rqstp, fhp);
+	msleep(NFSD_DELEGRETURN_TIMEOUT);
+}
+
 static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb)
 {
 	struct nfs4_delegation *dp = cb_to_delegation(cb);
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 9a8b09afc173..0b800a154828 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -341,6 +341,7 @@  void		nfsd_lockd_shutdown(void);
 
 #define NFSD_LAUNDROMAT_MINTIMEOUT      1   /* seconds */
 #define	NFSD_COURTESY_CLIENT_TIMEOUT	(24 * 60 * 60)	/* seconds */
+#define NFSD_DELEGRETURN_TIMEOUT	(30)	/* milliseconds */
 
 /*
  * The following attributes are currently not supported by the NFSv4 server:
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 8c3d5f88072f..dd2654cac132 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -443,6 +443,25 @@  DEFINE_NFSD_COPY_ERR_EVENT(clone_file_range_err);
 #include "filecache.h"
 #include "vfs.h"
 
+TRACE_EVENT(nfsd_delegreturn_wait,
+	TP_PROTO(
+		const struct svc_rqst *rqstp,
+		const struct svc_fh *fhp
+	),
+	TP_ARGS(rqstp, fhp),
+	TP_STRUCT__entry(
+		__field(u32, xid)
+		__field(u32, fh_hash)
+	),
+	TP_fast_assign(
+		__entry->xid = be32_to_cpu(rqstp->rq_xid);
+		__entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
+	),
+	TP_printk("xid=0x%08x fh_hash=0x%08x",
+		  __entry->xid, __entry->fh_hash
+	)
+);
+
 DECLARE_EVENT_CLASS(nfsd_stateid_class,
 	TP_PROTO(stateid_t *stp),
 	TP_ARGS(stp),
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 7b744011f2d3..5b9213076e95 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -788,6 +788,8 @@  extern __be32 nfsd4_destroy_clientid(struct svc_rqst *, struct nfsd4_compound_st
 		union nfsd4_op_u *u);
 __be32 nfsd4_reclaim_complete(struct svc_rqst *, struct nfsd4_compound_state *,
 		union nfsd4_op_u *u);
+extern void nfsd4_wait_for_delegreturn(struct svc_rqst *rqstp,
+		struct svc_fh *fhp);
 extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *,
 		struct nfsd4_open *open, struct nfsd_net *nn);
 extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,