[v2,1/2] NFSD: add support for sending CB_RECALL_ANY

Message ID	1666462150-11736-2-git-send-email-dai.ngo@oracle.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <linux-nfs-owner@kernel.org> From: Dai Ngo <dai.ngo@oracle.com> To: chuck.lever@oracle.com, jlayton@kernel.org Cc: linux-nfs@vger.kernel.org Subject: [PATCH v2 1/2] NFSD: add support for sending CB_RECALL_ANY Date: Sat, 22 Oct 2022 11:09:09 -0700 Message-Id: <1666462150-11736-2-git-send-email-dai.ngo@oracle.com> In-Reply-To: <1666462150-11736-1-git-send-email-dai.ngo@oracle.com> References: <1666462150-11736-1-git-send-email-dai.ngo@oracle.com> Precedence: bulk
Series	add support for CB_RECALL_ANY and the delegation shrinker \| expand [v2,0/2] add support for CB_RECALL_ANY and the delegation shrinker [v2,1/2] NFSD: add support for sending CB_RECALL_ANY [v2,2/2] NFSD: add delegation shrinker to react to low memory condition

Dai Ngo Oct. 22, 2022, 6:09 p.m. UTC

There is only one nfsd4_callback, cl_recall_any, added for each
nfs4_client. Access to it must be serialized. For now it's done
by the cl_recall_any_busy flag since it's used only by the
delegation shrinker. If there is another consumer of CB_RECALL_ANY
then a spinlock must be used.

Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
---
 fs/nfsd/nfs4callback.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfsd/nfs4state.c    | 27 +++++++++++++++++++++
 fs/nfsd/state.h        |  8 +++++++
 fs/nfsd/xdr4cb.h       |  6 +++++
 4 files changed, 105 insertions(+)

Jeff Layton Oct. 24, 2022, 12:16 p.m. UTC | #1

On Sat, 2022-10-22 at 11:09 -0700, Dai Ngo wrote:
> There is only one nfsd4_callback, cl_recall_any, added for each
> nfs4_client. Access to it must be serialized. For now it's done
> by the cl_recall_any_busy flag since it's used only by the
> delegation shrinker. If there is another consumer of CB_RECALL_ANY
> then a spinlock must be used.
> 
> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> ---
>  fs/nfsd/nfs4callback.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  fs/nfsd/nfs4state.c    | 27 +++++++++++++++++++++
>  fs/nfsd/state.h        |  8 +++++++
>  fs/nfsd/xdr4cb.h       |  6 +++++
>  4 files changed, 105 insertions(+)
> 
> diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
> index f0e69edf5f0f..03587e1397f4 100644
> --- a/fs/nfsd/nfs4callback.c
> +++ b/fs/nfsd/nfs4callback.c
> @@ -329,6 +329,29 @@ static void encode_cb_recall4args(struct xdr_stream *xdr,
>  }
>  
>  /*
> + * CB_RECALLANY4args
> + *
> + *	struct CB_RECALLANY4args {
> + *		uint32_t	craa_objects_to_keep;
> + *		bitmap4		craa_type_mask;
> + *	};
> + */
> +static void
> +encode_cb_recallany4args(struct xdr_stream *xdr,
> +			struct nfs4_cb_compound_hdr *hdr, uint32_t bmval)
> +{
> +	__be32 *p;
> +
> +	encode_nfs_cb_opnum4(xdr, OP_CB_RECALL_ANY);
> +	p = xdr_reserve_space(xdr, 4);
> +	*p++ = xdr_zero;	/* craa_objects_to_keep */
> +	p = xdr_reserve_space(xdr, 8);
> +	*p++ = cpu_to_be32(1);
> +	*p++ = cpu_to_be32(bmval);
> +	hdr->nops++;
> +}
> +
> +/*
>   * CB_SEQUENCE4args
>   *
>   *	struct CB_SEQUENCE4args {
> @@ -482,6 +505,24 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
>  	encode_cb_nops(&hdr);
>  }
>  
> +/*
> + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
> + */
> +static void
> +nfs4_xdr_enc_cb_recall_any(struct rpc_rqst *req,
> +		struct xdr_stream *xdr, const void *data)
> +{
> +	const struct nfsd4_callback *cb = data;
> +	struct nfs4_cb_compound_hdr hdr = {
> +		.ident = cb->cb_clp->cl_cb_ident,
> +		.minorversion = cb->cb_clp->cl_minorversion,
> +	};
> +
> +	encode_cb_compound4args(xdr, &hdr);
> +	encode_cb_sequence4args(xdr, cb, &hdr);
> +	encode_cb_recallany4args(xdr, &hdr, cb->cb_clp->cl_recall_any_bm);
> +	encode_cb_nops(&hdr);
> +}
>  
>  /*
>   * NFSv4.0 and NFSv4.1 XDR decode functions
> @@ -520,6 +561,28 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
>  	return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
>  }
>  
> +/*
> + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
> + */
> +static int
> +nfs4_xdr_dec_cb_recall_any(struct rpc_rqst *rqstp,
> +				  struct xdr_stream *xdr,
> +				  void *data)
> +{
> +	struct nfsd4_callback *cb = data;
> +	struct nfs4_cb_compound_hdr hdr;
> +	int status;
> +
> +	status = decode_cb_compound4res(xdr, &hdr);
> +	if (unlikely(status))
> +		return status;
> +	status = decode_cb_sequence4res(xdr, cb);
> +	if (unlikely(status || cb->cb_seq_status))
> +		return status;
> +	status =  decode_cb_op_status(xdr, OP_CB_RECALL_ANY, &cb->cb_status);
> +	return status;
> +}
> +
>  #ifdef CONFIG_NFSD_PNFS
>  /*
>   * CB_LAYOUTRECALL4args
> @@ -783,6 +846,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = {
>  #endif
>  	PROC(CB_NOTIFY_LOCK,	COMPOUND,	cb_notify_lock,	cb_notify_lock),
>  	PROC(CB_OFFLOAD,	COMPOUND,	cb_offload,	cb_offload),
> +	PROC(CB_RECALL_ANY,	COMPOUND,	cb_recall_any,	cb_recall_any),
>  };
>  
>  static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 4e718500a00c..c60c937dece6 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -2854,6 +2854,31 @@ static const struct tree_descr client_files[] = {
>  	[3] = {""},
>  };
>  
> +static int
> +nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
> +			struct rpc_task *task)
> +{
> +	switch (task->tk_status) {
> +	case -NFS4ERR_DELAY:
> +		rpc_delay(task, 2 * HZ);
> +		return 0;
> +	default:
> +		return 1;
> +	}
> +}
> +
> +static void
> +nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
> +{
> +	cb->cb_clp->cl_recall_any_busy = false;
> +	atomic_dec(&cb->cb_clp->cl_rpc_users);
> +}


This series probably ought to be one big patch. The problem is that
you're adding a bunch of code to do CB_RECALL_ANY, but there is no way
to call it without patch #2.

That makes it hard to judge whether there could be races and locking
issues around the handling of cb_recall_any_busy, in particular. From
patch #2, it looks like cb_recall_any_busy is protected by the
nn->client_lock, but I don't think ->release is called with that held.

Also, cl_rpc_users is a refcount (though we don't necessarily free the
object when it goes to zero). I think you need to call
put_client_renew_locked here instead of just decrementing the counter.

> +
> +static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
> +	.done		= nfsd4_cb_recall_any_done,
> +	.release	= nfsd4_cb_recall_any_release,
> +};
> +
>  static struct nfs4_client *create_client(struct xdr_netobj name,
>  		struct svc_rqst *rqstp, nfs4_verifier *verf)
>  {
> @@ -2891,6 +2916,8 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
>  		free_client(clp);
>  		return NULL;
>  	}
> +	nfsd4_init_cb(&clp->cl_recall_any, clp, &nfsd4_cb_recall_any_ops,
> +			NFSPROC4_CLNT_CB_RECALL_ANY);
>  	return clp;
>  }
>  
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index e2daef3cc003..49ca06169642 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -411,6 +411,10 @@ struct nfs4_client {
>  
>  	unsigned int		cl_state;
>  	atomic_t		cl_delegs_in_recall;
> +
> +	bool			cl_recall_any_busy;
> +	uint32_t		cl_recall_any_bm;
> +	struct nfsd4_callback	cl_recall_any;
>  };
>  
>  /* struct nfs4_client_reset
> @@ -639,8 +643,12 @@ enum nfsd4_cb_op {
>  	NFSPROC4_CLNT_CB_OFFLOAD,
>  	NFSPROC4_CLNT_CB_SEQUENCE,
>  	NFSPROC4_CLNT_CB_NOTIFY_LOCK,
> +	NFSPROC4_CLNT_CB_RECALL_ANY,
>  };
>  
> +#define RCA4_TYPE_MASK_RDATA_DLG	0
> +#define RCA4_TYPE_MASK_WDATA_DLG	1
> +
>  /* Returns true iff a is later than b: */
>  static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
>  {
> diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
> index 547cf07cf4e0..0d39af1b00a0 100644
> --- a/fs/nfsd/xdr4cb.h
> +++ b/fs/nfsd/xdr4cb.h
> @@ -48,3 +48,9 @@
>  #define NFS4_dec_cb_offload_sz		(cb_compound_dec_hdr_sz  +      \
>  					cb_sequence_dec_sz +            \
>  					op_dec_sz)
> +#define NFS4_enc_cb_recall_any_sz	(cb_compound_enc_hdr_sz +       \
> +					cb_sequence_enc_sz +            \
> +					1 + 1 + 1)
> +#define NFS4_dec_cb_recall_any_sz	(cb_compound_dec_hdr_sz  +      \
> +					cb_sequence_dec_sz +            \
> +					op_dec_sz)

Chuck Lever Oct. 24, 2022, 4:56 p.m. UTC | #2

> On Oct 24, 2022, at 8:16 AM, Jeff Layton <jlayton@kernel.org> wrote:
> 
> On Sat, 2022-10-22 at 11:09 -0700, Dai Ngo wrote:
>> There is only one nfsd4_callback, cl_recall_any, added for each
>> nfs4_client. Access to it must be serialized. For now it's done
>> by the cl_recall_any_busy flag since it's used only by the
>> delegation shrinker. If there is another consumer of CB_RECALL_ANY
>> then a spinlock must be used.
>> 
>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>> ---
>> fs/nfsd/nfs4callback.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++
>> fs/nfsd/nfs4state.c    | 27 +++++++++++++++++++++
>> fs/nfsd/state.h        |  8 +++++++
>> fs/nfsd/xdr4cb.h       |  6 +++++
>> 4 files changed, 105 insertions(+)
>> 
>> diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
>> index f0e69edf5f0f..03587e1397f4 100644
>> --- a/fs/nfsd/nfs4callback.c
>> +++ b/fs/nfsd/nfs4callback.c
>> @@ -329,6 +329,29 @@ static void encode_cb_recall4args(struct xdr_stream *xdr,
>> }
>> 
>> /*
>> + * CB_RECALLANY4args
>> + *
>> + *	struct CB_RECALLANY4args {
>> + *		uint32_t	craa_objects_to_keep;
>> + *		bitmap4		craa_type_mask;
>> + *	};
>> + */
>> +static void
>> +encode_cb_recallany4args(struct xdr_stream *xdr,
>> +			struct nfs4_cb_compound_hdr *hdr, uint32_t bmval)
>> +{
>> +	__be32 *p;
>> +
>> +	encode_nfs_cb_opnum4(xdr, OP_CB_RECALL_ANY);
>> +	p = xdr_reserve_space(xdr, 4);
>> +	*p++ = xdr_zero;	/* craa_objects_to_keep */
>> +	p = xdr_reserve_space(xdr, 8);
>> +	*p++ = cpu_to_be32(1);
>> +	*p++ = cpu_to_be32(bmval);
>> +	hdr->nops++;
>> +}
>> +
>> +/*
>>  * CB_SEQUENCE4args
>>  *
>>  *	struct CB_SEQUENCE4args {
>> @@ -482,6 +505,24 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
>> 	encode_cb_nops(&hdr);
>> }
>> 
>> +/*
>> + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
>> + */
>> +static void
>> +nfs4_xdr_enc_cb_recall_any(struct rpc_rqst *req,
>> +		struct xdr_stream *xdr, const void *data)
>> +{
>> +	const struct nfsd4_callback *cb = data;
>> +	struct nfs4_cb_compound_hdr hdr = {
>> +		.ident = cb->cb_clp->cl_cb_ident,
>> +		.minorversion = cb->cb_clp->cl_minorversion,
>> +	};
>> +
>> +	encode_cb_compound4args(xdr, &hdr);
>> +	encode_cb_sequence4args(xdr, cb, &hdr);
>> +	encode_cb_recallany4args(xdr, &hdr, cb->cb_clp->cl_recall_any_bm);
>> +	encode_cb_nops(&hdr);
>> +}
>> 
>> /*
>>  * NFSv4.0 and NFSv4.1 XDR decode functions
>> @@ -520,6 +561,28 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
>> 	return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
>> }
>> 
>> +/*
>> + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
>> + */
>> +static int
>> +nfs4_xdr_dec_cb_recall_any(struct rpc_rqst *rqstp,
>> +				  struct xdr_stream *xdr,
>> +				  void *data)
>> +{
>> +	struct nfsd4_callback *cb = data;
>> +	struct nfs4_cb_compound_hdr hdr;
>> +	int status;
>> +
>> +	status = decode_cb_compound4res(xdr, &hdr);
>> +	if (unlikely(status))
>> +		return status;
>> +	status = decode_cb_sequence4res(xdr, cb);
>> +	if (unlikely(status || cb->cb_seq_status))
>> +		return status;
>> +	status =  decode_cb_op_status(xdr, OP_CB_RECALL_ANY, &cb->cb_status);
>> +	return status;
>> +}
>> +
>> #ifdef CONFIG_NFSD_PNFS
>> /*
>>  * CB_LAYOUTRECALL4args
>> @@ -783,6 +846,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = {
>> #endif
>> 	PROC(CB_NOTIFY_LOCK,	COMPOUND,	cb_notify_lock,	cb_notify_lock),
>> 	PROC(CB_OFFLOAD,	COMPOUND,	cb_offload,	cb_offload),
>> +	PROC(CB_RECALL_ANY,	COMPOUND,	cb_recall_any,	cb_recall_any),
>> };
>> 
>> static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index 4e718500a00c..c60c937dece6 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -2854,6 +2854,31 @@ static const struct tree_descr client_files[] = {
>> 	[3] = {""},
>> };
>> 
>> +static int
>> +nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
>> +			struct rpc_task *task)
>> +{
>> +	switch (task->tk_status) {
>> +	case -NFS4ERR_DELAY:
>> +		rpc_delay(task, 2 * HZ);
>> +		return 0;
>> +	default:
>> +		return 1;
>> +	}
>> +}
>> +
>> +static void
>> +nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
>> +{
>> +	cb->cb_clp->cl_recall_any_busy = false;
>> +	atomic_dec(&cb->cb_clp->cl_rpc_users);
>> +}
> 
> 
> This series probably ought to be one big patch. The problem is that
> you're adding a bunch of code to do CB_RECALL_ANY, but there is no way
> to call it without patch #2.

True enough, but there are other potential uses of CB_RECALL_ANY
(and I think Dai is about to add one or two to this series).

I haven't had a chance to look through these yet. I don't mind it
staying split for review, for now. When we get closer to merge,
we can consider squashing if there's still a problem.


> That makes it hard to judge whether there could be races and locking
> issues around the handling of cb_recall_any_busy, in particular. From
> patch #2, it looks like cb_recall_any_busy is protected by the
> nn->client_lock, but I don't think ->release is called with that held.

Then maybe the patches aren't split properly... if there are things
in the second patch that fix issues in the first, then those can be
moved as appropriate.

Adding the new XDR pieces in a separate patch makes sense to me, but
the other parts might be moved to 2/2, for instance.


> Also, cl_rpc_users is a refcount (though we don't necessarily free the
> object when it goes to zero). I think you need to call
> put_client_renew_locked here instead of just decrementing the counter.
> 
>> +
>> +static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
>> +	.done		= nfsd4_cb_recall_any_done,
>> +	.release	= nfsd4_cb_recall_any_release,
>> +};
>> +
>> static struct nfs4_client *create_client(struct xdr_netobj name,
>> 		struct svc_rqst *rqstp, nfs4_verifier *verf)
>> {
>> @@ -2891,6 +2916,8 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
>> 		free_client(clp);
>> 		return NULL;
>> 	}
>> +	nfsd4_init_cb(&clp->cl_recall_any, clp, &nfsd4_cb_recall_any_ops,
>> +			NFSPROC4_CLNT_CB_RECALL_ANY);
>> 	return clp;
>> }
>> 
>> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
>> index e2daef3cc003..49ca06169642 100644
>> --- a/fs/nfsd/state.h
>> +++ b/fs/nfsd/state.h
>> @@ -411,6 +411,10 @@ struct nfs4_client {
>> 
>> 	unsigned int		cl_state;
>> 	atomic_t		cl_delegs_in_recall;
>> +
>> +	bool			cl_recall_any_busy;
>> +	uint32_t		cl_recall_any_bm;
>> +	struct nfsd4_callback	cl_recall_any;
>> };
>> 
>> /* struct nfs4_client_reset
>> @@ -639,8 +643,12 @@ enum nfsd4_cb_op {
>> 	NFSPROC4_CLNT_CB_OFFLOAD,
>> 	NFSPROC4_CLNT_CB_SEQUENCE,
>> 	NFSPROC4_CLNT_CB_NOTIFY_LOCK,
>> +	NFSPROC4_CLNT_CB_RECALL_ANY,
>> };
>> 
>> +#define RCA4_TYPE_MASK_RDATA_DLG	0
>> +#define RCA4_TYPE_MASK_WDATA_DLG	1
>> +
>> /* Returns true iff a is later than b: */
>> static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
>> {
>> diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
>> index 547cf07cf4e0..0d39af1b00a0 100644
>> --- a/fs/nfsd/xdr4cb.h
>> +++ b/fs/nfsd/xdr4cb.h
>> @@ -48,3 +48,9 @@
>> #define NFS4_dec_cb_offload_sz		(cb_compound_dec_hdr_sz  +      \
>> 					cb_sequence_dec_sz +            \
>> 					op_dec_sz)
>> +#define NFS4_enc_cb_recall_any_sz	(cb_compound_enc_hdr_sz +       \
>> +					cb_sequence_enc_sz +            \
>> +					1 + 1 + 1)
>> +#define NFS4_dec_cb_recall_any_sz	(cb_compound_dec_hdr_sz  +      \
>> +					cb_sequence_dec_sz +            \
>> +					op_dec_sz)
> 
> -- 
> Jeff Layton <jlayton@kernel.org>

--
Chuck Lever

Dai Ngo Oct. 24, 2022, 7:44 p.m. UTC | #3

On 10/24/22 5:16 AM, Jeff Layton wrote:
> On Sat, 2022-10-22 at 11:09 -0700, Dai Ngo wrote:
>> There is only one nfsd4_callback, cl_recall_any, added for each
>> nfs4_client. Access to it must be serialized. For now it's done
>> by the cl_recall_any_busy flag since it's used only by the
>> delegation shrinker. If there is another consumer of CB_RECALL_ANY
>> then a spinlock must be used.
>>
>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>> ---
>>   fs/nfsd/nfs4callback.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++
>>   fs/nfsd/nfs4state.c    | 27 +++++++++++++++++++++
>>   fs/nfsd/state.h        |  8 +++++++
>>   fs/nfsd/xdr4cb.h       |  6 +++++
>>   4 files changed, 105 insertions(+)
>>
>> diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
>> index f0e69edf5f0f..03587e1397f4 100644
>> --- a/fs/nfsd/nfs4callback.c
>> +++ b/fs/nfsd/nfs4callback.c
>> @@ -329,6 +329,29 @@ static void encode_cb_recall4args(struct xdr_stream *xdr,
>>   }
>>   
>>   /*
>> + * CB_RECALLANY4args
>> + *
>> + *	struct CB_RECALLANY4args {
>> + *		uint32_t	craa_objects_to_keep;
>> + *		bitmap4		craa_type_mask;
>> + *	};
>> + */
>> +static void
>> +encode_cb_recallany4args(struct xdr_stream *xdr,
>> +			struct nfs4_cb_compound_hdr *hdr, uint32_t bmval)
>> +{
>> +	__be32 *p;
>> +
>> +	encode_nfs_cb_opnum4(xdr, OP_CB_RECALL_ANY);
>> +	p = xdr_reserve_space(xdr, 4);
>> +	*p++ = xdr_zero;	/* craa_objects_to_keep */
>> +	p = xdr_reserve_space(xdr, 8);
>> +	*p++ = cpu_to_be32(1);
>> +	*p++ = cpu_to_be32(bmval);
>> +	hdr->nops++;
>> +}
>> +
>> +/*
>>    * CB_SEQUENCE4args
>>    *
>>    *	struct CB_SEQUENCE4args {
>> @@ -482,6 +505,24 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
>>   	encode_cb_nops(&hdr);
>>   }
>>   
>> +/*
>> + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
>> + */
>> +static void
>> +nfs4_xdr_enc_cb_recall_any(struct rpc_rqst *req,
>> +		struct xdr_stream *xdr, const void *data)
>> +{
>> +	const struct nfsd4_callback *cb = data;
>> +	struct nfs4_cb_compound_hdr hdr = {
>> +		.ident = cb->cb_clp->cl_cb_ident,
>> +		.minorversion = cb->cb_clp->cl_minorversion,
>> +	};
>> +
>> +	encode_cb_compound4args(xdr, &hdr);
>> +	encode_cb_sequence4args(xdr, cb, &hdr);
>> +	encode_cb_recallany4args(xdr, &hdr, cb->cb_clp->cl_recall_any_bm);
>> +	encode_cb_nops(&hdr);
>> +}
>>   
>>   /*
>>    * NFSv4.0 and NFSv4.1 XDR decode functions
>> @@ -520,6 +561,28 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
>>   	return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
>>   }
>>   
>> +/*
>> + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
>> + */
>> +static int
>> +nfs4_xdr_dec_cb_recall_any(struct rpc_rqst *rqstp,
>> +				  struct xdr_stream *xdr,
>> +				  void *data)
>> +{
>> +	struct nfsd4_callback *cb = data;
>> +	struct nfs4_cb_compound_hdr hdr;
>> +	int status;
>> +
>> +	status = decode_cb_compound4res(xdr, &hdr);
>> +	if (unlikely(status))
>> +		return status;
>> +	status = decode_cb_sequence4res(xdr, cb);
>> +	if (unlikely(status || cb->cb_seq_status))
>> +		return status;
>> +	status =  decode_cb_op_status(xdr, OP_CB_RECALL_ANY, &cb->cb_status);
>> +	return status;
>> +}
>> +
>>   #ifdef CONFIG_NFSD_PNFS
>>   /*
>>    * CB_LAYOUTRECALL4args
>> @@ -783,6 +846,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = {
>>   #endif
>>   	PROC(CB_NOTIFY_LOCK,	COMPOUND,	cb_notify_lock,	cb_notify_lock),
>>   	PROC(CB_OFFLOAD,	COMPOUND,	cb_offload,	cb_offload),
>> +	PROC(CB_RECALL_ANY,	COMPOUND,	cb_recall_any,	cb_recall_any),
>>   };
>>   
>>   static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index 4e718500a00c..c60c937dece6 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -2854,6 +2854,31 @@ static const struct tree_descr client_files[] = {
>>   	[3] = {""},
>>   };
>>   
>> +static int
>> +nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
>> +			struct rpc_task *task)
>> +{
>> +	switch (task->tk_status) {
>> +	case -NFS4ERR_DELAY:
>> +		rpc_delay(task, 2 * HZ);
>> +		return 0;
>> +	default:
>> +		return 1;
>> +	}
>> +}
>> +
>> +static void
>> +nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
>> +{
>> +	cb->cb_clp->cl_recall_any_busy = false;
>> +	atomic_dec(&cb->cb_clp->cl_rpc_users);
>> +}
>
> This series probably ought to be one big patch. The problem is that
> you're adding a bunch of code to do CB_RECALL_ANY, but there is no way
> to call it without patch #2.

The reason I separated these patches is that the 1st patch, adding support
CB_RECALL_ANY can be called for other purposes than just for delegation such
as to recall pnfs layouts, or when the max number of delegation is reached.
So that was why I did not combined this patches. However, I understand your
concern about not being able to test individual patch. So as Chuck suggested,
perhaps we can leave these as separate patches for easier review and when it's
finalized we can decide to combine them in to one big patch.  BTW, I plan to
add a third patch to this series to send CB_RECALL_ANY to clients when the
max number of delegations is reached.

>
> That makes it hard to judge whether there could be races and locking
> issues around the handling of cb_recall_any_busy, in particular. From
> patch #2, it looks like cb_recall_any_busy is protected by the
> nn->client_lock, but I don't think ->release is called with that held.

I don't intended to use the nn->client_lock, I think the scope of this
lock is too big for what's needed to serialize access to struct nfsd4_callback.
As I mentioned in the cover email, since the cb_recall_any_busy is only
used by the deleg_reaper we do not need a lock to protect this flag.
But if there is another of consumer, other than deleg_reaper, of this
nfsd4_callback then we can add a simple spinlock for it.

My question is do you think we need to add the spinlock now instead of
delaying it until there is real need?

>
> Also, cl_rpc_users is a refcount (though we don't necessarily free the
> object when it goes to zero). I think you need to call
> put_client_renew_locked here instead of just decrementing the counter.

Since put_client_renew_locked() also renews the client lease, I don't
think it's right nfsd4_cb_recall_any_release to renew the lease because
because this is a callback so the client is not actually sending any
request that causes the lease to renewed, and nfsd4_cb_recall_any_release
is also alled even if the client is completely dead and did not reply, or
reply with some errors.

-Dai

>
>> +
>> +static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
>> +	.done		= nfsd4_cb_recall_any_done,
>> +	.release	= nfsd4_cb_recall_any_release,
>> +};
>> +
>>   static struct nfs4_client *create_client(struct xdr_netobj name,
>>   		struct svc_rqst *rqstp, nfs4_verifier *verf)
>>   {
>> @@ -2891,6 +2916,8 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
>>   		free_client(clp);
>>   		return NULL;
>>   	}
>> +	nfsd4_init_cb(&clp->cl_recall_any, clp, &nfsd4_cb_recall_any_ops,
>> +			NFSPROC4_CLNT_CB_RECALL_ANY);
>>   	return clp;
>>   }
>>   
>> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
>> index e2daef3cc003..49ca06169642 100644
>> --- a/fs/nfsd/state.h
>> +++ b/fs/nfsd/state.h
>> @@ -411,6 +411,10 @@ struct nfs4_client {
>>   
>>   	unsigned int		cl_state;
>>   	atomic_t		cl_delegs_in_recall;
>> +
>> +	bool			cl_recall_any_busy;
>> +	uint32_t		cl_recall_any_bm;
>> +	struct nfsd4_callback	cl_recall_any;
>>   };
>>   
>>   /* struct nfs4_client_reset
>> @@ -639,8 +643,12 @@ enum nfsd4_cb_op {
>>   	NFSPROC4_CLNT_CB_OFFLOAD,
>>   	NFSPROC4_CLNT_CB_SEQUENCE,
>>   	NFSPROC4_CLNT_CB_NOTIFY_LOCK,
>> +	NFSPROC4_CLNT_CB_RECALL_ANY,
>>   };
>>   
>> +#define RCA4_TYPE_MASK_RDATA_DLG	0
>> +#define RCA4_TYPE_MASK_WDATA_DLG	1
>> +
>>   /* Returns true iff a is later than b: */
>>   static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
>>   {
>> diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
>> index 547cf07cf4e0..0d39af1b00a0 100644
>> --- a/fs/nfsd/xdr4cb.h
>> +++ b/fs/nfsd/xdr4cb.h
>> @@ -48,3 +48,9 @@
>>   #define NFS4_dec_cb_offload_sz		(cb_compound_dec_hdr_sz  +      \
>>   					cb_sequence_dec_sz +            \
>>   					op_dec_sz)
>> +#define NFS4_enc_cb_recall_any_sz	(cb_compound_enc_hdr_sz +       \
>> +					cb_sequence_enc_sz +            \
>> +					1 + 1 + 1)
>> +#define NFS4_dec_cb_recall_any_sz	(cb_compound_dec_hdr_sz  +      \
>> +					cb_sequence_dec_sz +            \
>> +					op_dec_sz)

Jeff Layton Oct. 24, 2022, 9:09 p.m. UTC | #4

On Mon, 2022-10-24 at 12:44 -0700, dai.ngo@oracle.com wrote:
> On 10/24/22 5:16 AM, Jeff Layton wrote:
> > On Sat, 2022-10-22 at 11:09 -0700, Dai Ngo wrote:
> > > There is only one nfsd4_callback, cl_recall_any, added for each
> > > nfs4_client. Access to it must be serialized. For now it's done
> > > by the cl_recall_any_busy flag since it's used only by the
> > > delegation shrinker. If there is another consumer of CB_RECALL_ANY
> > > then a spinlock must be used.
> > > 
> > > Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> > > ---
> > >   fs/nfsd/nfs4callback.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++
> > >   fs/nfsd/nfs4state.c    | 27 +++++++++++++++++++++
> > >   fs/nfsd/state.h        |  8 +++++++
> > >   fs/nfsd/xdr4cb.h       |  6 +++++
> > >   4 files changed, 105 insertions(+)
> > > 
> > > diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
> > > index f0e69edf5f0f..03587e1397f4 100644
> > > --- a/fs/nfsd/nfs4callback.c
> > > +++ b/fs/nfsd/nfs4callback.c
> > > @@ -329,6 +329,29 @@ static void encode_cb_recall4args(struct xdr_stream *xdr,
> > >   }
> > >   
> > >   /*
> > > + * CB_RECALLANY4args
> > > + *
> > > + *	struct CB_RECALLANY4args {
> > > + *		uint32_t	craa_objects_to_keep;
> > > + *		bitmap4		craa_type_mask;
> > > + *	};
> > > + */
> > > +static void
> > > +encode_cb_recallany4args(struct xdr_stream *xdr,
> > > +			struct nfs4_cb_compound_hdr *hdr, uint32_t bmval)
> > > +{
> > > +	__be32 *p;
> > > +
> > > +	encode_nfs_cb_opnum4(xdr, OP_CB_RECALL_ANY);
> > > +	p = xdr_reserve_space(xdr, 4);
> > > +	*p++ = xdr_zero;	/* craa_objects_to_keep */
> > > +	p = xdr_reserve_space(xdr, 8);
> > > +	*p++ = cpu_to_be32(1);
> > > +	*p++ = cpu_to_be32(bmval);
> > > +	hdr->nops++;
> > > +}
> > > +
> > > +/*
> > >    * CB_SEQUENCE4args
> > >    *
> > >    *	struct CB_SEQUENCE4args {
> > > @@ -482,6 +505,24 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
> > >   	encode_cb_nops(&hdr);
> > >   }
> > >   
> > > +/*
> > > + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
> > > + */
> > > +static void
> > > +nfs4_xdr_enc_cb_recall_any(struct rpc_rqst *req,
> > > +		struct xdr_stream *xdr, const void *data)
> > > +{
> > > +	const struct nfsd4_callback *cb = data;
> > > +	struct nfs4_cb_compound_hdr hdr = {
> > > +		.ident = cb->cb_clp->cl_cb_ident,
> > > +		.minorversion = cb->cb_clp->cl_minorversion,
> > > +	};
> > > +
> > > +	encode_cb_compound4args(xdr, &hdr);
> > > +	encode_cb_sequence4args(xdr, cb, &hdr);
> > > +	encode_cb_recallany4args(xdr, &hdr, cb->cb_clp->cl_recall_any_bm);
> > > +	encode_cb_nops(&hdr);
> > > +}
> > >   
> > >   /*
> > >    * NFSv4.0 and NFSv4.1 XDR decode functions
> > > @@ -520,6 +561,28 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
> > >   	return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
> > >   }
> > >   
> > > +/*
> > > + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
> > > + */
> > > +static int
> > > +nfs4_xdr_dec_cb_recall_any(struct rpc_rqst *rqstp,
> > > +				  struct xdr_stream *xdr,
> > > +				  void *data)
> > > +{
> > > +	struct nfsd4_callback *cb = data;
> > > +	struct nfs4_cb_compound_hdr hdr;
> > > +	int status;
> > > +
> > > +	status = decode_cb_compound4res(xdr, &hdr);
> > > +	if (unlikely(status))
> > > +		return status;
> > > +	status = decode_cb_sequence4res(xdr, cb);
> > > +	if (unlikely(status || cb->cb_seq_status))
> > > +		return status;
> > > +	status =  decode_cb_op_status(xdr, OP_CB_RECALL_ANY, &cb->cb_status);
> > > +	return status;
> > > +}
> > > +
> > >   #ifdef CONFIG_NFSD_PNFS
> > >   /*
> > >    * CB_LAYOUTRECALL4args
> > > @@ -783,6 +846,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = {
> > >   #endif
> > >   	PROC(CB_NOTIFY_LOCK,	COMPOUND,	cb_notify_lock,	cb_notify_lock),
> > >   	PROC(CB_OFFLOAD,	COMPOUND,	cb_offload,	cb_offload),
> > > +	PROC(CB_RECALL_ANY,	COMPOUND,	cb_recall_any,	cb_recall_any),
> > >   };
> > >   
> > >   static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
> > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > > index 4e718500a00c..c60c937dece6 100644
> > > --- a/fs/nfsd/nfs4state.c
> > > +++ b/fs/nfsd/nfs4state.c
> > > @@ -2854,6 +2854,31 @@ static const struct tree_descr client_files[] = {
> > >   	[3] = {""},
> > >   };
> > >   
> > > +static int
> > > +nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
> > > +			struct rpc_task *task)
> > > +{
> > > +	switch (task->tk_status) {
> > > +	case -NFS4ERR_DELAY:
> > > +		rpc_delay(task, 2 * HZ);
> > > +		return 0;
> > > +	default:
> > > +		return 1;
> > > +	}
> > > +}
> > > +
> > > +static void
> > > +nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
> > > +{
> > > +	cb->cb_clp->cl_recall_any_busy = false;
> > > +	atomic_dec(&cb->cb_clp->cl_rpc_users);
> > > +}
> > 
> > This series probably ought to be one big patch. The problem is that
> > you're adding a bunch of code to do CB_RECALL_ANY, but there is no way
> > to call it without patch #2.
> 
> The reason I separated these patches is that the 1st patch, adding support
> CB_RECALL_ANY can be called for other purposes than just for delegation such
> as to recall pnfs layouts, or when the max number of delegation is reached.
> So that was why I did not combined this patches. However, I understand your
> concern about not being able to test individual patch. So as Chuck suggested,
> perhaps we can leave these as separate patches for easier review and when it's
> finalized we can decide to combine them in to one big patch.  BTW, I plan to
> add a third patch to this series to send CB_RECALL_ANY to clients when the
> max number of delegations is reached.
> 

I think we should get this bit sorted out first, but that sounds
reasonable eventually.

> > 
> > That makes it hard to judge whether there could be races and locking
> > issues around the handling of cb_recall_any_busy, in particular. From
> > patch #2, it looks like cb_recall_any_busy is protected by the
> > nn->client_lock, but I don't think ->release is called with that held.
> 
> I don't intended to use the nn->client_lock, I think the scope of this
> lock is too big for what's needed to serialize access to struct nfsd4_callback.
> As I mentioned in the cover email, since the cb_recall_any_busy is only
> used by the deleg_reaper we do not need a lock to protect this flag.
> But if there is another of consumer, other than deleg_reaper, of this
> nfsd4_callback then we can add a simple spinlock for it.
> 
> My question is do you think we need to add the spinlock now instead of
> delaying it until there is real need?
> 

I don't see the need for a dedicated spinlock here. You said above that
there is only one of these per client, so you could use the
client->cl_lock.

But...I don't see the problem with doing just using the nn->client_lock
here. It's not like we're likely to be calling this that often, and if
we do then the contention for the nn->client_lock is probably the least
of our worries.

Honestly, do we need this boolean at all? The only place it's checked is
in deleg_reaper. Why not just try to submit the work and if it's already
queued, let it fail?

> > 
> > Also, cl_rpc_users is a refcount (though we don't necessarily free the
> > object when it goes to zero). I think you need to call
> > put_client_renew_locked here instead of just decrementing the counter.
> 
> Since put_client_renew_locked() also renews the client lease, I don't
> think it's right nfsd4_cb_recall_any_release to renew the lease because
> because this is a callback so the client is not actually sending any
> request that causes the lease to renewed, and nfsd4_cb_recall_any_release
> is also alled even if the client is completely dead and did not reply, or
> reply with some errors.
> 

What happens when this atomic_inc makes the cl_rpc_count go to zero?
What actually triggers the cleanup activities in put_client_renew /
put_client_renew_locked in that situation?

> 
> > 
> > > +
> > > +static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
> > > +	.done		= nfsd4_cb_recall_any_done,
> > > +	.release	= nfsd4_cb_recall_any_release,
> > > +};
> > > +
> > >   static struct nfs4_client *create_client(struct xdr_netobj name,
> > >   		struct svc_rqst *rqstp, nfs4_verifier *verf)
> > >   {
> > > @@ -2891,6 +2916,8 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
> > >   		free_client(clp);
> > >   		return NULL;
> > >   	}
> > > +	nfsd4_init_cb(&clp->cl_recall_any, clp, &nfsd4_cb_recall_any_ops,
> > > +			NFSPROC4_CLNT_CB_RECALL_ANY);
> > >   	return clp;
> > >   }
> > >   
> > > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> > > index e2daef3cc003..49ca06169642 100644
> > > --- a/fs/nfsd/state.h
> > > +++ b/fs/nfsd/state.h
> > > @@ -411,6 +411,10 @@ struct nfs4_client {
> > >   
> > >   	unsigned int		cl_state;
> > >   	atomic_t		cl_delegs_in_recall;
> > > +
> > > +	bool			cl_recall_any_busy;
> > > +	uint32_t		cl_recall_any_bm;
> > > +	struct nfsd4_callback	cl_recall_any;
> > >   };
> > >   
> > >   /* struct nfs4_client_reset
> > > @@ -639,8 +643,12 @@ enum nfsd4_cb_op {
> > >   	NFSPROC4_CLNT_CB_OFFLOAD,
> > >   	NFSPROC4_CLNT_CB_SEQUENCE,
> > >   	NFSPROC4_CLNT_CB_NOTIFY_LOCK,
> > > +	NFSPROC4_CLNT_CB_RECALL_ANY,
> > >   };
> > >   
> > > +#define RCA4_TYPE_MASK_RDATA_DLG	0
> > > +#define RCA4_TYPE_MASK_WDATA_DLG	1
> > > +
> > >   /* Returns true iff a is later than b: */
> > >   static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
> > >   {
> > > diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
> > > index 547cf07cf4e0..0d39af1b00a0 100644
> > > --- a/fs/nfsd/xdr4cb.h
> > > +++ b/fs/nfsd/xdr4cb.h
> > > @@ -48,3 +48,9 @@
> > >   #define NFS4_dec_cb_offload_sz		(cb_compound_dec_hdr_sz  +      \
> > >   					cb_sequence_dec_sz +            \
> > >   					op_dec_sz)
> > > +#define NFS4_enc_cb_recall_any_sz	(cb_compound_enc_hdr_sz +       \
> > > +					cb_sequence_enc_sz +            \
> > > +					1 + 1 + 1)
> > > +#define NFS4_dec_cb_recall_any_sz	(cb_compound_dec_hdr_sz  +      \
> > > +					cb_sequence_dec_sz +            \
> > > +					op_dec_sz)

Dai Ngo Oct. 25, 2022, 1:30 a.m. UTC | #5

On 10/24/22 2:09 PM, Jeff Layton wrote:
> On Mon, 2022-10-24 at 12:44 -0700, dai.ngo@oracle.com wrote:
>> On 10/24/22 5:16 AM, Jeff Layton wrote:
>>> On Sat, 2022-10-22 at 11:09 -0700, Dai Ngo wrote:
>>>> There is only one nfsd4_callback, cl_recall_any, added for each
>>>> nfs4_client. Access to it must be serialized. For now it's done
>>>> by the cl_recall_any_busy flag since it's used only by the
>>>> delegation shrinker. If there is another consumer of CB_RECALL_ANY
>>>> then a spinlock must be used.
>>>>
>>>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>>>> ---
>>>>    fs/nfsd/nfs4callback.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>    fs/nfsd/nfs4state.c    | 27 +++++++++++++++++++++
>>>>    fs/nfsd/state.h        |  8 +++++++
>>>>    fs/nfsd/xdr4cb.h       |  6 +++++
>>>>    4 files changed, 105 insertions(+)
>>>>
>>>> diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
>>>> index f0e69edf5f0f..03587e1397f4 100644
>>>> --- a/fs/nfsd/nfs4callback.c
>>>> +++ b/fs/nfsd/nfs4callback.c
>>>> @@ -329,6 +329,29 @@ static void encode_cb_recall4args(struct xdr_stream *xdr,
>>>>    }
>>>>    
>>>>    /*
>>>> + * CB_RECALLANY4args
>>>> + *
>>>> + *	struct CB_RECALLANY4args {
>>>> + *		uint32_t	craa_objects_to_keep;
>>>> + *		bitmap4		craa_type_mask;
>>>> + *	};
>>>> + */
>>>> +static void
>>>> +encode_cb_recallany4args(struct xdr_stream *xdr,
>>>> +			struct nfs4_cb_compound_hdr *hdr, uint32_t bmval)
>>>> +{
>>>> +	__be32 *p;
>>>> +
>>>> +	encode_nfs_cb_opnum4(xdr, OP_CB_RECALL_ANY);
>>>> +	p = xdr_reserve_space(xdr, 4);
>>>> +	*p++ = xdr_zero;	/* craa_objects_to_keep */
>>>> +	p = xdr_reserve_space(xdr, 8);
>>>> +	*p++ = cpu_to_be32(1);
>>>> +	*p++ = cpu_to_be32(bmval);
>>>> +	hdr->nops++;
>>>> +}
>>>> +
>>>> +/*
>>>>     * CB_SEQUENCE4args
>>>>     *
>>>>     *	struct CB_SEQUENCE4args {
>>>> @@ -482,6 +505,24 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
>>>>    	encode_cb_nops(&hdr);
>>>>    }
>>>>    
>>>> +/*
>>>> + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
>>>> + */
>>>> +static void
>>>> +nfs4_xdr_enc_cb_recall_any(struct rpc_rqst *req,
>>>> +		struct xdr_stream *xdr, const void *data)
>>>> +{
>>>> +	const struct nfsd4_callback *cb = data;
>>>> +	struct nfs4_cb_compound_hdr hdr = {
>>>> +		.ident = cb->cb_clp->cl_cb_ident,
>>>> +		.minorversion = cb->cb_clp->cl_minorversion,
>>>> +	};
>>>> +
>>>> +	encode_cb_compound4args(xdr, &hdr);
>>>> +	encode_cb_sequence4args(xdr, cb, &hdr);
>>>> +	encode_cb_recallany4args(xdr, &hdr, cb->cb_clp->cl_recall_any_bm);
>>>> +	encode_cb_nops(&hdr);
>>>> +}
>>>>    
>>>>    /*
>>>>     * NFSv4.0 and NFSv4.1 XDR decode functions
>>>> @@ -520,6 +561,28 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
>>>>    	return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
>>>>    }
>>>>    
>>>> +/*
>>>> + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
>>>> + */
>>>> +static int
>>>> +nfs4_xdr_dec_cb_recall_any(struct rpc_rqst *rqstp,
>>>> +				  struct xdr_stream *xdr,
>>>> +				  void *data)
>>>> +{
>>>> +	struct nfsd4_callback *cb = data;
>>>> +	struct nfs4_cb_compound_hdr hdr;
>>>> +	int status;
>>>> +
>>>> +	status = decode_cb_compound4res(xdr, &hdr);
>>>> +	if (unlikely(status))
>>>> +		return status;
>>>> +	status = decode_cb_sequence4res(xdr, cb);
>>>> +	if (unlikely(status || cb->cb_seq_status))
>>>> +		return status;
>>>> +	status =  decode_cb_op_status(xdr, OP_CB_RECALL_ANY, &cb->cb_status);
>>>> +	return status;
>>>> +}
>>>> +
>>>>    #ifdef CONFIG_NFSD_PNFS
>>>>    /*
>>>>     * CB_LAYOUTRECALL4args
>>>> @@ -783,6 +846,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = {
>>>>    #endif
>>>>    	PROC(CB_NOTIFY_LOCK,	COMPOUND,	cb_notify_lock,	cb_notify_lock),
>>>>    	PROC(CB_OFFLOAD,	COMPOUND,	cb_offload,	cb_offload),
>>>> +	PROC(CB_RECALL_ANY,	COMPOUND,	cb_recall_any,	cb_recall_any),
>>>>    };
>>>>    
>>>>    static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
>>>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>>>> index 4e718500a00c..c60c937dece6 100644
>>>> --- a/fs/nfsd/nfs4state.c
>>>> +++ b/fs/nfsd/nfs4state.c
>>>> @@ -2854,6 +2854,31 @@ static const struct tree_descr client_files[] = {
>>>>    	[3] = {""},
>>>>    };
>>>>    
>>>> +static int
>>>> +nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
>>>> +			struct rpc_task *task)
>>>> +{
>>>> +	switch (task->tk_status) {
>>>> +	case -NFS4ERR_DELAY:
>>>> +		rpc_delay(task, 2 * HZ);
>>>> +		return 0;
>>>> +	default:
>>>> +		return 1;
>>>> +	}
>>>> +}
>>>> +
>>>> +static void
>>>> +nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
>>>> +{
>>>> +	cb->cb_clp->cl_recall_any_busy = false;
>>>> +	atomic_dec(&cb->cb_clp->cl_rpc_users);
>>>> +}
>>> This series probably ought to be one big patch. The problem is that
>>> you're adding a bunch of code to do CB_RECALL_ANY, but there is no way
>>> to call it without patch #2.
>> The reason I separated these patches is that the 1st patch, adding support
>> CB_RECALL_ANY can be called for other purposes than just for delegation such
>> as to recall pnfs layouts, or when the max number of delegation is reached.
>> So that was why I did not combined this patches. However, I understand your
>> concern about not being able to test individual patch. So as Chuck suggested,
>> perhaps we can leave these as separate patches for easier review and when it's
>> finalized we can decide to combine them in to one big patch.  BTW, I plan to
>> add a third patch to this series to send CB_RECALL_ANY to clients when the
>> max number of delegations is reached.
>>
> I think we should get this bit sorted out first,

ok

>   but that sounds
> reasonable eventually.
>
>>> That makes it hard to judge whether there could be races and locking
>>> issues around the handling of cb_recall_any_busy, in particular. From
>>> patch #2, it looks like cb_recall_any_busy is protected by the
>>> nn->client_lock, but I don't think ->release is called with that held.
>> I don't intended to use the nn->client_lock, I think the scope of this
>> lock is too big for what's needed to serialize access to struct nfsd4_callback.
>> As I mentioned in the cover email, since the cb_recall_any_busy is only
>> used by the deleg_reaper we do not need a lock to protect this flag.
>> But if there is another of consumer, other than deleg_reaper, of this
>> nfsd4_callback then we can add a simple spinlock for it.
>>
>> My question is do you think we need to add the spinlock now instead of
>> delaying it until there is real need?
>>
> I don't see the need for a dedicated spinlock here. You said above that
> there is only one of these per client, so you could use the
> client->cl_lock.
>
> But...I don't see the problem with doing just using the nn->client_lock
> here. It's not like we're likely to be calling this that often, and if
> we do then the contention for the nn->client_lock is probably the least
> of our worries.

If the contention on nn->client_lock is not a concern then I just
leave the patch to use the nn->client_lock as it current does.

>
> Honestly, do we need this boolean at all? The only place it's checked is
> in deleg_reaper. Why not just try to submit the work and if it's already
> queued, let it fail?

There is nothing in the existing code to prevent the nfs4_callback from
being used again before the current CB_RECALL_ANY request completes. This
resulted in se_cb_seq_nr becomes out of sync with the client and server
starts getting NFS4ERR_SEQ_MISORDERED then eventually NFS4ERR_BADSESSION
from the client.

nfsd4_recall_file_layout has similar usage of nfs4_callback and it uses
the ls_lock to make sure the current request is done before allowing new
one to proceed.

>
>>> Also, cl_rpc_users is a refcount (though we don't necessarily free the
>>> object when it goes to zero). I think you need to call
>>> put_client_renew_locked here instead of just decrementing the counter.
>> Since put_client_renew_locked() also renews the client lease, I don't
>> think it's right nfsd4_cb_recall_any_release to renew the lease because
>> because this is a callback so the client is not actually sending any
>> request that causes the lease to renewed, and nfsd4_cb_recall_any_release
>> is also alled even if the client is completely dead and did not reply, or
>> reply with some errors.
>>
> What happens when this atomic_inc makes the cl_rpc_count go to zero?

Do you mean atomic_dec of cl_rpc_users?

> What actually triggers the cleanup activities in put_client_renew /
> put_client_renew_locked in that situation?

maybe I'm missing something, but I don't see any client cleanup
in put_client_renew/put_client_renew_locked other than renewing
the lease?

-Dai

>
>>>> +
>>>> +static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
>>>> +	.done		= nfsd4_cb_recall_any_done,
>>>> +	.release	= nfsd4_cb_recall_any_release,
>>>> +};
>>>> +
>>>>    static struct nfs4_client *create_client(struct xdr_netobj name,
>>>>    		struct svc_rqst *rqstp, nfs4_verifier *verf)
>>>>    {
>>>> @@ -2891,6 +2916,8 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
>>>>    		free_client(clp);
>>>>    		return NULL;
>>>>    	}
>>>> +	nfsd4_init_cb(&clp->cl_recall_any, clp, &nfsd4_cb_recall_any_ops,
>>>> +			NFSPROC4_CLNT_CB_RECALL_ANY);
>>>>    	return clp;
>>>>    }
>>>>    
>>>> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
>>>> index e2daef3cc003..49ca06169642 100644
>>>> --- a/fs/nfsd/state.h
>>>> +++ b/fs/nfsd/state.h
>>>> @@ -411,6 +411,10 @@ struct nfs4_client {
>>>>    
>>>>    	unsigned int		cl_state;
>>>>    	atomic_t		cl_delegs_in_recall;
>>>> +
>>>> +	bool			cl_recall_any_busy;
>>>> +	uint32_t		cl_recall_any_bm;
>>>> +	struct nfsd4_callback	cl_recall_any;
>>>>    };
>>>>    
>>>>    /* struct nfs4_client_reset
>>>> @@ -639,8 +643,12 @@ enum nfsd4_cb_op {
>>>>    	NFSPROC4_CLNT_CB_OFFLOAD,
>>>>    	NFSPROC4_CLNT_CB_SEQUENCE,
>>>>    	NFSPROC4_CLNT_CB_NOTIFY_LOCK,
>>>> +	NFSPROC4_CLNT_CB_RECALL_ANY,
>>>>    };
>>>>    
>>>> +#define RCA4_TYPE_MASK_RDATA_DLG	0
>>>> +#define RCA4_TYPE_MASK_WDATA_DLG	1
>>>> +
>>>>    /* Returns true iff a is later than b: */
>>>>    static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
>>>>    {
>>>> diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
>>>> index 547cf07cf4e0..0d39af1b00a0 100644
>>>> --- a/fs/nfsd/xdr4cb.h
>>>> +++ b/fs/nfsd/xdr4cb.h
>>>> @@ -48,3 +48,9 @@
>>>>    #define NFS4_dec_cb_offload_sz		(cb_compound_dec_hdr_sz  +      \
>>>>    					cb_sequence_dec_sz +            \
>>>>    					op_dec_sz)
>>>> +#define NFS4_enc_cb_recall_any_sz	(cb_compound_enc_hdr_sz +       \
>>>> +					cb_sequence_enc_sz +            \
>>>> +					1 + 1 + 1)
>>>> +#define NFS4_dec_cb_recall_any_sz	(cb_compound_dec_hdr_sz  +      \
>>>> +					cb_sequence_dec_sz +            \
>>>> +					op_dec_sz)

Jeff Layton Oct. 25, 2022, 9:33 a.m. UTC | #6

On Mon, 2022-10-24 at 18:30 -0700, dai.ngo@oracle.com wrote:
> On 10/24/22 2:09 PM, Jeff Layton wrote:
> > On Mon, 2022-10-24 at 12:44 -0700, dai.ngo@oracle.com wrote:
> > > On 10/24/22 5:16 AM, Jeff Layton wrote:
> > > > On Sat, 2022-10-22 at 11:09 -0700, Dai Ngo wrote:
> > > > > There is only one nfsd4_callback, cl_recall_any, added for each
> > > > > nfs4_client. Access to it must be serialized. For now it's done
> > > > > by the cl_recall_any_busy flag since it's used only by the
> > > > > delegation shrinker. If there is another consumer of CB_RECALL_ANY
> > > > > then a spinlock must be used.
> > > > > 
> > > > > Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> > > > > ---
> > > > >    fs/nfsd/nfs4callback.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++
> > > > >    fs/nfsd/nfs4state.c    | 27 +++++++++++++++++++++
> > > > >    fs/nfsd/state.h        |  8 +++++++
> > > > >    fs/nfsd/xdr4cb.h       |  6 +++++
> > > > >    4 files changed, 105 insertions(+)
> > > > > 
> > > > > diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
> > > > > index f0e69edf5f0f..03587e1397f4 100644
> > > > > --- a/fs/nfsd/nfs4callback.c
> > > > > +++ b/fs/nfsd/nfs4callback.c
> > > > > @@ -329,6 +329,29 @@ static void encode_cb_recall4args(struct xdr_stream *xdr,
> > > > >    }
> > > > >    
> > > > >    /*
> > > > > + * CB_RECALLANY4args
> > > > > + *
> > > > > + *	struct CB_RECALLANY4args {
> > > > > + *		uint32_t	craa_objects_to_keep;
> > > > > + *		bitmap4		craa_type_mask;
> > > > > + *	};
> > > > > + */
> > > > > +static void
> > > > > +encode_cb_recallany4args(struct xdr_stream *xdr,
> > > > > +			struct nfs4_cb_compound_hdr *hdr, uint32_t bmval)
> > > > > +{
> > > > > +	__be32 *p;
> > > > > +
> > > > > +	encode_nfs_cb_opnum4(xdr, OP_CB_RECALL_ANY);
> > > > > +	p = xdr_reserve_space(xdr, 4);
> > > > > +	*p++ = xdr_zero;	/* craa_objects_to_keep */
> > > > > +	p = xdr_reserve_space(xdr, 8);
> > > > > +	*p++ = cpu_to_be32(1);
> > > > > +	*p++ = cpu_to_be32(bmval);
> > > > > +	hdr->nops++;
> > > > > +}
> > > > > +
> > > > > +/*
> > > > >     * CB_SEQUENCE4args
> > > > >     *
> > > > >     *	struct CB_SEQUENCE4args {
> > > > > @@ -482,6 +505,24 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
> > > > >    	encode_cb_nops(&hdr);
> > > > >    }
> > > > >    
> > > > > +/*
> > > > > + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
> > > > > + */
> > > > > +static void
> > > > > +nfs4_xdr_enc_cb_recall_any(struct rpc_rqst *req,
> > > > > +		struct xdr_stream *xdr, const void *data)
> > > > > +{
> > > > > +	const struct nfsd4_callback *cb = data;
> > > > > +	struct nfs4_cb_compound_hdr hdr = {
> > > > > +		.ident = cb->cb_clp->cl_cb_ident,
> > > > > +		.minorversion = cb->cb_clp->cl_minorversion,
> > > > > +	};
> > > > > +
> > > > > +	encode_cb_compound4args(xdr, &hdr);
> > > > > +	encode_cb_sequence4args(xdr, cb, &hdr);
> > > > > +	encode_cb_recallany4args(xdr, &hdr, cb->cb_clp->cl_recall_any_bm);
> > > > > +	encode_cb_nops(&hdr);
> > > > > +}
> > > > >    
> > > > >    /*
> > > > >     * NFSv4.0 and NFSv4.1 XDR decode functions
> > > > > @@ -520,6 +561,28 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
> > > > >    	return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
> > > > >    }
> > > > >    
> > > > > +/*
> > > > > + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
> > > > > + */
> > > > > +static int
> > > > > +nfs4_xdr_dec_cb_recall_any(struct rpc_rqst *rqstp,
> > > > > +				  struct xdr_stream *xdr,
> > > > > +				  void *data)
> > > > > +{
> > > > > +	struct nfsd4_callback *cb = data;
> > > > > +	struct nfs4_cb_compound_hdr hdr;
> > > > > +	int status;
> > > > > +
> > > > > +	status = decode_cb_compound4res(xdr, &hdr);
> > > > > +	if (unlikely(status))
> > > > > +		return status;
> > > > > +	status = decode_cb_sequence4res(xdr, cb);
> > > > > +	if (unlikely(status || cb->cb_seq_status))
> > > > > +		return status;
> > > > > +	status =  decode_cb_op_status(xdr, OP_CB_RECALL_ANY, &cb->cb_status);
> > > > > +	return status;
> > > > > +}
> > > > > +
> > > > >    #ifdef CONFIG_NFSD_PNFS
> > > > >    /*
> > > > >     * CB_LAYOUTRECALL4args
> > > > > @@ -783,6 +846,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = {
> > > > >    #endif
> > > > >    	PROC(CB_NOTIFY_LOCK,	COMPOUND,	cb_notify_lock,	cb_notify_lock),
> > > > >    	PROC(CB_OFFLOAD,	COMPOUND,	cb_offload,	cb_offload),
> > > > > +	PROC(CB_RECALL_ANY,	COMPOUND,	cb_recall_any,	cb_recall_any),
> > > > >    };
> > > > >    
> > > > >    static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
> > > > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > > > > index 4e718500a00c..c60c937dece6 100644
> > > > > --- a/fs/nfsd/nfs4state.c
> > > > > +++ b/fs/nfsd/nfs4state.c
> > > > > @@ -2854,6 +2854,31 @@ static const struct tree_descr client_files[] = {
> > > > >    	[3] = {""},
> > > > >    };
> > > > >    
> > > > > +static int
> > > > > +nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
> > > > > +			struct rpc_task *task)
> > > > > +{
> > > > > +	switch (task->tk_status) {
> > > > > +	case -NFS4ERR_DELAY:
> > > > > +		rpc_delay(task, 2 * HZ);
> > > > > +		return 0;
> > > > > +	default:
> > > > > +		return 1;
> > > > > +	}
> > > > > +}
> > > > > +
> > > > > +static void
> > > > > +nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
> > > > > +{
> > > > > +	cb->cb_clp->cl_recall_any_busy = false;
> > > > > +	atomic_dec(&cb->cb_clp->cl_rpc_users);
> > > > > +}
> > > > This series probably ought to be one big patch. The problem is that
> > > > you're adding a bunch of code to do CB_RECALL_ANY, but there is no way
> > > > to call it without patch #2.
> > > The reason I separated these patches is that the 1st patch, adding support
> > > CB_RECALL_ANY can be called for other purposes than just for delegation such
> > > as to recall pnfs layouts, or when the max number of delegation is reached.
> > > So that was why I did not combined this patches. However, I understand your
> > > concern about not being able to test individual patch. So as Chuck suggested,
> > > perhaps we can leave these as separate patches for easier review and when it's
> > > finalized we can decide to combine them in to one big patch.  BTW, I plan to
> > > add a third patch to this series to send CB_RECALL_ANY to clients when the
> > > max number of delegations is reached.
> > > 
> > I think we should get this bit sorted out first,
> 
> ok
> 
> >   but that sounds
> > reasonable eventually.
> > 
> > > > That makes it hard to judge whether there could be races and locking
> > > > issues around the handling of cb_recall_any_busy, in particular. From
> > > > patch #2, it looks like cb_recall_any_busy is protected by the
> > > > nn->client_lock, but I don't think ->release is called with that held.
> > > I don't intended to use the nn->client_lock, I think the scope of this
> > > lock is too big for what's needed to serialize access to struct nfsd4_callback.
> > > As I mentioned in the cover email, since the cb_recall_any_busy is only
> > > used by the deleg_reaper we do not need a lock to protect this flag.
> > > But if there is another of consumer, other than deleg_reaper, of this
> > > nfsd4_callback then we can add a simple spinlock for it.
> > > 
> > > My question is do you think we need to add the spinlock now instead of
> > > delaying it until there is real need?
> > > 
> > I don't see the need for a dedicated spinlock here. You said above that
> > there is only one of these per client, so you could use the
> > client->cl_lock.
> > 
> > But...I don't see the problem with doing just using the nn->client_lock
> > here. It's not like we're likely to be calling this that often, and if
> > we do then the contention for the nn->client_lock is probably the least
> > of our worries.
> 
> If the contention on nn->client_lock is not a concern then I just
> leave the patch to use the nn->client_lock as it current does.
> 

Except you aren't taking the client_lock in ->release. That's what needs
to be added if you want to keep this boolean.

> > 
> > Honestly, do we need this boolean at all? The only place it's checked is
> > in deleg_reaper. Why not just try to submit the work and if it's already
> > queued, let it fail?
> 
> There is nothing in the existing code to prevent the nfs4_callback from
> being used again before the current CB_RECALL_ANY request completes. This
> resulted in se_cb_seq_nr becomes out of sync with the client and server
> starts getting NFS4ERR_SEQ_MISORDERED then eventually NFS4ERR_BADSESSION
> from the client.
> 
> nfsd4_recall_file_layout has similar usage of nfs4_callback and it uses
> the ls_lock to make sure the current request is done before allowing new
> one to proceed.
> 

That's a little different. The ls_lock protects ls_recalled, which is
set when a recall is issued. We only want to issue a recall for a
delegation once and that's what ensures that it's only issued once.

CB_RECALL_ANY could be called more than once per client. We don't need
to ensure "exactly once" semantics there. All of the callbacks are run
out of workqueues.

If a workqueue job is already scheduled then queue_work will return
false when called. Could you just do away with this boolean and rely on
the return code from queue_work to ensure that it doesn't get scheduled
too often?

> > 
> > > > Also, cl_rpc_users is a refcount (though we don't necessarily free the
> > > > object when it goes to zero). I think you need to call
> > > > put_client_renew_locked here instead of just decrementing the counter.
> > > Since put_client_renew_locked() also renews the client lease, I don't
> > > think it's right nfsd4_cb_recall_any_release to renew the lease because
> > > because this is a callback so the client is not actually sending any
> > > request that causes the lease to renewed, and nfsd4_cb_recall_any_release
> > > is also alled even if the client is completely dead and did not reply, or
> > > reply with some errors.
> > > 
> > What happens when this atomic_inc makes the cl_rpc_count go to zero?
> 
> Do you mean atomic_dec of cl_rpc_users?
> 

Yes, sorry.

> > What actually triggers the cleanup activities in put_client_renew /
> > put_client_renew_locked in that situation?
> 
> maybe I'm missing something, but I don't see any client cleanup
> in put_client_renew/put_client_renew_locked other than renewing
> the lease?
> 
> 

        if (!is_client_expired(clp))
                renew_client_locked(clp);
        else                                         
                wake_up_all(&expiry_wq);


...unless the client has already expired, in which case you need to wake
up the waitqueue. My guess is that if the atomic_dec you're calling here
goes to zero then any tasks on the expiry_wq will hang indefinitely.

> > 
> > > > > +
> > > > > +static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
> > > > > +	.done		= nfsd4_cb_recall_any_done,
> > > > > +	.release	= nfsd4_cb_recall_any_release,
> > > > > +};
> > > > > +
> > > > >    static struct nfs4_client *create_client(struct xdr_netobj name,
> > > > >    		struct svc_rqst *rqstp, nfs4_verifier *verf)
> > > > >    {
> > > > > @@ -2891,6 +2916,8 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
> > > > >    		free_client(clp);
> > > > >    		return NULL;
> > > > >    	}
> > > > > +	nfsd4_init_cb(&clp->cl_recall_any, clp, &nfsd4_cb_recall_any_ops,
> > > > > +			NFSPROC4_CLNT_CB_RECALL_ANY);
> > > > >    	return clp;
> > > > >    }
> > > > >    
> > > > > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> > > > > index e2daef3cc003..49ca06169642 100644
> > > > > --- a/fs/nfsd/state.h
> > > > > +++ b/fs/nfsd/state.h
> > > > > @@ -411,6 +411,10 @@ struct nfs4_client {
> > > > >    
> > > > >    	unsigned int		cl_state;
> > > > >    	atomic_t		cl_delegs_in_recall;
> > > > > +
> > > > > +	bool			cl_recall_any_busy;
> > > > > +	uint32_t		cl_recall_any_bm;
> > > > > +	struct nfsd4_callback	cl_recall_any;
> > > > >    };
> > > > >    
> > > > >    /* struct nfs4_client_reset
> > > > > @@ -639,8 +643,12 @@ enum nfsd4_cb_op {
> > > > >    	NFSPROC4_CLNT_CB_OFFLOAD,
> > > > >    	NFSPROC4_CLNT_CB_SEQUENCE,
> > > > >    	NFSPROC4_CLNT_CB_NOTIFY_LOCK,
> > > > > +	NFSPROC4_CLNT_CB_RECALL_ANY,
> > > > >    };
> > > > >    
> > > > > +#define RCA4_TYPE_MASK_RDATA_DLG	0
> > > > > +#define RCA4_TYPE_MASK_WDATA_DLG	1
> > > > > +
> > > > >    /* Returns true iff a is later than b: */
> > > > >    static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
> > > > >    {
> > > > > diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
> > > > > index 547cf07cf4e0..0d39af1b00a0 100644
> > > > > --- a/fs/nfsd/xdr4cb.h
> > > > > +++ b/fs/nfsd/xdr4cb.h
> > > > > @@ -48,3 +48,9 @@
> > > > >    #define NFS4_dec_cb_offload_sz		(cb_compound_dec_hdr_sz  +      \
> > > > >    					cb_sequence_dec_sz +            \
> > > > >    					op_dec_sz)
> > > > > +#define NFS4_enc_cb_recall_any_sz	(cb_compound_enc_hdr_sz +       \
> > > > > +					cb_sequence_enc_sz +            \
> > > > > +					1 + 1 + 1)
> > > > > +#define NFS4_dec_cb_recall_any_sz	(cb_compound_dec_hdr_sz  +      \
> > > > > +					cb_sequence_dec_sz +            \
> > > > > +					op_dec_sz)

Jeff Layton Oct. 25, 2022, 9:37 a.m. UTC | #7

On Tue, 2022-10-25 at 05:33 -0400, Jeff Layton wrote:
> On Mon, 2022-10-24 at 18:30 -0700, dai.ngo@oracle.com wrote:
> > On 10/24/22 2:09 PM, Jeff Layton wrote:
> > > On Mon, 2022-10-24 at 12:44 -0700, dai.ngo@oracle.com wrote:
> > > > On 10/24/22 5:16 AM, Jeff Layton wrote:
> > > > > On Sat, 2022-10-22 at 11:09 -0700, Dai Ngo wrote:
> > > > > > There is only one nfsd4_callback, cl_recall_any, added for each
> > > > > > nfs4_client. Access to it must be serialized. For now it's done
> > > > > > by the cl_recall_any_busy flag since it's used only by the
> > > > > > delegation shrinker. If there is another consumer of CB_RECALL_ANY
> > > > > > then a spinlock must be used.
> > > > > > 
> > > > > > Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> > > > > > ---
> > > > > >    fs/nfsd/nfs4callback.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++
> > > > > >    fs/nfsd/nfs4state.c    | 27 +++++++++++++++++++++
> > > > > >    fs/nfsd/state.h        |  8 +++++++
> > > > > >    fs/nfsd/xdr4cb.h       |  6 +++++
> > > > > >    4 files changed, 105 insertions(+)
> > > > > > 
> > > > > > diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
> > > > > > index f0e69edf5f0f..03587e1397f4 100644
> > > > > > --- a/fs/nfsd/nfs4callback.c
> > > > > > +++ b/fs/nfsd/nfs4callback.c
> > > > > > @@ -329,6 +329,29 @@ static void encode_cb_recall4args(struct xdr_stream *xdr,
> > > > > >    }
> > > > > >    
> > > > > >    /*
> > > > > > + * CB_RECALLANY4args
> > > > > > + *
> > > > > > + *	struct CB_RECALLANY4args {
> > > > > > + *		uint32_t	craa_objects_to_keep;
> > > > > > + *		bitmap4		craa_type_mask;
> > > > > > + *	};
> > > > > > + */
> > > > > > +static void
> > > > > > +encode_cb_recallany4args(struct xdr_stream *xdr,
> > > > > > +			struct nfs4_cb_compound_hdr *hdr, uint32_t bmval)
> > > > > > +{
> > > > > > +	__be32 *p;
> > > > > > +
> > > > > > +	encode_nfs_cb_opnum4(xdr, OP_CB_RECALL_ANY);
> > > > > > +	p = xdr_reserve_space(xdr, 4);
> > > > > > +	*p++ = xdr_zero;	/* craa_objects_to_keep */
> > > > > > +	p = xdr_reserve_space(xdr, 8);
> > > > > > +	*p++ = cpu_to_be32(1);
> > > > > > +	*p++ = cpu_to_be32(bmval);
> > > > > > +	hdr->nops++;
> > > > > > +}
> > > > > > +
> > > > > > +/*
> > > > > >     * CB_SEQUENCE4args
> > > > > >     *
> > > > > >     *	struct CB_SEQUENCE4args {
> > > > > > @@ -482,6 +505,24 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
> > > > > >    	encode_cb_nops(&hdr);
> > > > > >    }
> > > > > >    
> > > > > > +/*
> > > > > > + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
> > > > > > + */
> > > > > > +static void
> > > > > > +nfs4_xdr_enc_cb_recall_any(struct rpc_rqst *req,
> > > > > > +		struct xdr_stream *xdr, const void *data)
> > > > > > +{
> > > > > > +	const struct nfsd4_callback *cb = data;
> > > > > > +	struct nfs4_cb_compound_hdr hdr = {
> > > > > > +		.ident = cb->cb_clp->cl_cb_ident,
> > > > > > +		.minorversion = cb->cb_clp->cl_minorversion,
> > > > > > +	};
> > > > > > +
> > > > > > +	encode_cb_compound4args(xdr, &hdr);
> > > > > > +	encode_cb_sequence4args(xdr, cb, &hdr);
> > > > > > +	encode_cb_recallany4args(xdr, &hdr, cb->cb_clp->cl_recall_any_bm);
> > > > > > +	encode_cb_nops(&hdr);
> > > > > > +}
> > > > > >    
> > > > > >    /*
> > > > > >     * NFSv4.0 and NFSv4.1 XDR decode functions
> > > > > > @@ -520,6 +561,28 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
> > > > > >    	return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
> > > > > >    }
> > > > > >    
> > > > > > +/*
> > > > > > + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
> > > > > > + */
> > > > > > +static int
> > > > > > +nfs4_xdr_dec_cb_recall_any(struct rpc_rqst *rqstp,
> > > > > > +				  struct xdr_stream *xdr,
> > > > > > +				  void *data)
> > > > > > +{
> > > > > > +	struct nfsd4_callback *cb = data;
> > > > > > +	struct nfs4_cb_compound_hdr hdr;
> > > > > > +	int status;
> > > > > > +
> > > > > > +	status = decode_cb_compound4res(xdr, &hdr);
> > > > > > +	if (unlikely(status))
> > > > > > +		return status;
> > > > > > +	status = decode_cb_sequence4res(xdr, cb);
> > > > > > +	if (unlikely(status || cb->cb_seq_status))
> > > > > > +		return status;
> > > > > > +	status =  decode_cb_op_status(xdr, OP_CB_RECALL_ANY, &cb->cb_status);
> > > > > > +	return status;
> > > > > > +}
> > > > > > +
> > > > > >    #ifdef CONFIG_NFSD_PNFS
> > > > > >    /*
> > > > > >     * CB_LAYOUTRECALL4args
> > > > > > @@ -783,6 +846,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = {
> > > > > >    #endif
> > > > > >    	PROC(CB_NOTIFY_LOCK,	COMPOUND,	cb_notify_lock,	cb_notify_lock),
> > > > > >    	PROC(CB_OFFLOAD,	COMPOUND,	cb_offload,	cb_offload),
> > > > > > +	PROC(CB_RECALL_ANY,	COMPOUND,	cb_recall_any,	cb_recall_any),
> > > > > >    };
> > > > > >    
> > > > > >    static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
> > > > > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > > > > > index 4e718500a00c..c60c937dece6 100644
> > > > > > --- a/fs/nfsd/nfs4state.c
> > > > > > +++ b/fs/nfsd/nfs4state.c
> > > > > > @@ -2854,6 +2854,31 @@ static const struct tree_descr client_files[] = {
> > > > > >    	[3] = {""},
> > > > > >    };
> > > > > >    
> > > > > > +static int
> > > > > > +nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
> > > > > > +			struct rpc_task *task)
> > > > > > +{
> > > > > > +	switch (task->tk_status) {
> > > > > > +	case -NFS4ERR_DELAY:
> > > > > > +		rpc_delay(task, 2 * HZ);
> > > > > > +		return 0;
> > > > > > +	default:
> > > > > > +		return 1;
> > > > > > +	}
> > > > > > +}
> > > > > > +
> > > > > > +static void
> > > > > > +nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
> > > > > > +{
> > > > > > +	cb->cb_clp->cl_recall_any_busy = false;
> > > > > > +	atomic_dec(&cb->cb_clp->cl_rpc_users);
> > > > > > +}
> > > > > This series probably ought to be one big patch. The problem is that
> > > > > you're adding a bunch of code to do CB_RECALL_ANY, but there is no way
> > > > > to call it without patch #2.
> > > > The reason I separated these patches is that the 1st patch, adding support
> > > > CB_RECALL_ANY can be called for other purposes than just for delegation such
> > > > as to recall pnfs layouts, or when the max number of delegation is reached.
> > > > So that was why I did not combined this patches. However, I understand your
> > > > concern about not being able to test individual patch. So as Chuck suggested,
> > > > perhaps we can leave these as separate patches for easier review and when it's
> > > > finalized we can decide to combine them in to one big patch.  BTW, I plan to
> > > > add a third patch to this series to send CB_RECALL_ANY to clients when the
> > > > max number of delegations is reached.
> > > > 
> > > I think we should get this bit sorted out first,
> > 
> > ok
> > 
> > >   but that sounds
> > > reasonable eventually.
> > > 
> > > > > That makes it hard to judge whether there could be races and locking
> > > > > issues around the handling of cb_recall_any_busy, in particular. From
> > > > > patch #2, it looks like cb_recall_any_busy is protected by the
> > > > > nn->client_lock, but I don't think ->release is called with that held.
> > > > I don't intended to use the nn->client_lock, I think the scope of this
> > > > lock is too big for what's needed to serialize access to struct nfsd4_callback.
> > > > As I mentioned in the cover email, since the cb_recall_any_busy is only
> > > > used by the deleg_reaper we do not need a lock to protect this flag.
> > > > But if there is another of consumer, other than deleg_reaper, of this
> > > > nfsd4_callback then we can add a simple spinlock for it.
> > > > 
> > > > My question is do you think we need to add the spinlock now instead of
> > > > delaying it until there is real need?
> > > > 
> > > I don't see the need for a dedicated spinlock here. You said above that
> > > there is only one of these per client, so you could use the
> > > client->cl_lock.
> > > 
> > > But...I don't see the problem with doing just using the nn->client_lock
> > > here. It's not like we're likely to be calling this that often, and if
> > > we do then the contention for the nn->client_lock is probably the least
> > > of our worries.
> > 
> > If the contention on nn->client_lock is not a concern then I just
> > leave the patch to use the nn->client_lock as it current does.
> > 
> 
> Except you aren't taking the client_lock in ->release. That's what needs
> to be added if you want to keep this boolean.
> 
> > > 
> > > Honestly, do we need this boolean at all? The only place it's checked is
> > > in deleg_reaper. Why not just try to submit the work and if it's already
> > > queued, let it fail?
> > 
> > There is nothing in the existing code to prevent the nfs4_callback from
> > being used again before the current CB_RECALL_ANY request completes. This
> > resulted in se_cb_seq_nr becomes out of sync with the client and server
> > starts getting NFS4ERR_SEQ_MISORDERED then eventually NFS4ERR_BADSESSION
> > from the client.
> > 
> > nfsd4_recall_file_layout has similar usage of nfs4_callback and it uses
> > the ls_lock to make sure the current request is done before allowing new
> > one to proceed.
> > 
> 
> That's a little different. The ls_lock protects ls_recalled, which is
> set when a recall is issued. We only want to issue a recall for a
> delegation once and that's what ensures that it's only issued once.
> 
> CB_RECALL_ANY could be called more than once per client. We don't need
> to ensure "exactly once" semantics there. All of the callbacks are run
> out of workqueues.
> 
> If a workqueue job is already scheduled then queue_work will return
> false when called. Could you just do away with this boolean and rely on
> the return code from queue_work to ensure that it doesn't get scheduled
> too often?
> 
> > > 
> > > > > Also, cl_rpc_users is a refcount (though we don't necessarily free the
> > > > > object when it goes to zero). I think you need to call
> > > > > put_client_renew_locked here instead of just decrementing the counter.
> > > > Since put_client_renew_locked() also renews the client lease, I don't
> > > > think it's right nfsd4_cb_recall_any_release to renew the lease because
> > > > because this is a callback so the client is not actually sending any
> > > > request that causes the lease to renewed, and nfsd4_cb_recall_any_release
> > > > is also alled even if the client is completely dead and did not reply, or
> > > > reply with some errors.
> > > > 
> > > What happens when this atomic_inc makes the cl_rpc_count go to zero?
> > 
> > Do you mean atomic_dec of cl_rpc_users?
> > 
> 
> Yes, sorry.
> 
> > > What actually triggers the cleanup activities in put_client_renew /
> > > put_client_renew_locked in that situation?
> > 
> > maybe I'm missing something, but I don't see any client cleanup
> > in put_client_renew/put_client_renew_locked other than renewing
> > the lease?
> > 
> > 
> 
>         if (!is_client_expired(clp))
>                 renew_client_locked(clp);
>         else                                         
>                 wake_up_all(&expiry_wq);
> 
> 
> ...unless the client has already expired, in which case you need to wake
> up the waitqueue. My guess is that if the atomic_dec you're calling here
> goes to zero then any tasks on the expiry_wq will hang indefinitely.
> 

I'm not sure you need to take a reference to the client here at all. Can
the client go away while a callback job is still running? You may be
able to assume that it will stick around for the life of the callback
(though you should verify this before assuming it).

> > > 
> > > > > > +
> > > > > > +static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
> > > > > > +	.done		= nfsd4_cb_recall_any_done,
> > > > > > +	.release	= nfsd4_cb_recall_any_release,
> > > > > > +};
> > > > > > +
> > > > > >    static struct nfs4_client *create_client(struct xdr_netobj name,
> > > > > >    		struct svc_rqst *rqstp, nfs4_verifier *verf)
> > > > > >    {
> > > > > > @@ -2891,6 +2916,8 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
> > > > > >    		free_client(clp);
> > > > > >    		return NULL;
> > > > > >    	}
> > > > > > +	nfsd4_init_cb(&clp->cl_recall_any, clp, &nfsd4_cb_recall_any_ops,
> > > > > > +			NFSPROC4_CLNT_CB_RECALL_ANY);
> > > > > >    	return clp;
> > > > > >    }
> > > > > >    
> > > > > > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> > > > > > index e2daef3cc003..49ca06169642 100644
> > > > > > --- a/fs/nfsd/state.h
> > > > > > +++ b/fs/nfsd/state.h
> > > > > > @@ -411,6 +411,10 @@ struct nfs4_client {
> > > > > >    
> > > > > >    	unsigned int		cl_state;
> > > > > >    	atomic_t		cl_delegs_in_recall;
> > > > > > +
> > > > > > +	bool			cl_recall_any_busy;
> > > > > > +	uint32_t		cl_recall_any_bm;
> > > > > > +	struct nfsd4_callback	cl_recall_any;
> > > > > >    };
> > > > > >    
> > > > > >    /* struct nfs4_client_reset
> > > > > > @@ -639,8 +643,12 @@ enum nfsd4_cb_op {
> > > > > >    	NFSPROC4_CLNT_CB_OFFLOAD,
> > > > > >    	NFSPROC4_CLNT_CB_SEQUENCE,
> > > > > >    	NFSPROC4_CLNT_CB_NOTIFY_LOCK,
> > > > > > +	NFSPROC4_CLNT_CB_RECALL_ANY,
> > > > > >    };
> > > > > >    
> > > > > > +#define RCA4_TYPE_MASK_RDATA_DLG	0
> > > > > > +#define RCA4_TYPE_MASK_WDATA_DLG	1
> > > > > > +
> > > > > >    /* Returns true iff a is later than b: */
> > > > > >    static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
> > > > > >    {
> > > > > > diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
> > > > > > index 547cf07cf4e0..0d39af1b00a0 100644
> > > > > > --- a/fs/nfsd/xdr4cb.h
> > > > > > +++ b/fs/nfsd/xdr4cb.h
> > > > > > @@ -48,3 +48,9 @@
> > > > > >    #define NFS4_dec_cb_offload_sz		(cb_compound_dec_hdr_sz  +      \
> > > > > >    					cb_sequence_dec_sz +            \
> > > > > >    					op_dec_sz)
> > > > > > +#define NFS4_enc_cb_recall_any_sz	(cb_compound_enc_hdr_sz +       \
> > > > > > +					cb_sequence_enc_sz +            \
> > > > > > +					1 + 1 + 1)
> > > > > > +#define NFS4_dec_cb_recall_any_sz	(cb_compound_dec_hdr_sz  +      \
> > > > > > +					cb_sequence_dec_sz +            \
> > > > > > +					op_dec_sz)
>

Dai Ngo Oct. 25, 2022, 11:35 p.m. UTC | #8

On 10/25/22 2:33 AM, Jeff Layton wrote:
> On Mon, 2022-10-24 at 18:30 -0700, dai.ngo@oracle.com wrote:
>> On 10/24/22 2:09 PM, Jeff Layton wrote:
>>> On Mon, 2022-10-24 at 12:44 -0700, dai.ngo@oracle.com wrote:
>>>> On 10/24/22 5:16 AM, Jeff Layton wrote:
>>>>> On Sat, 2022-10-22 at 11:09 -0700, Dai Ngo wrote:
>>>>>> There is only one nfsd4_callback, cl_recall_any, added for each
>>>>>> nfs4_client. Access to it must be serialized. For now it's done
>>>>>> by the cl_recall_any_busy flag since it's used only by the
>>>>>> delegation shrinker. If there is another consumer of CB_RECALL_ANY
>>>>>> then a spinlock must be used.
>>>>>>
>>>>>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>>>>>> ---
>>>>>>     fs/nfsd/nfs4callback.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>>     fs/nfsd/nfs4state.c    | 27 +++++++++++++++++++++
>>>>>>     fs/nfsd/state.h        |  8 +++++++
>>>>>>     fs/nfsd/xdr4cb.h       |  6 +++++
>>>>>>     4 files changed, 105 insertions(+)
>>>>>>
>>>>>> diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
>>>>>> index f0e69edf5f0f..03587e1397f4 100644
>>>>>> --- a/fs/nfsd/nfs4callback.c
>>>>>> +++ b/fs/nfsd/nfs4callback.c
>>>>>> @@ -329,6 +329,29 @@ static void encode_cb_recall4args(struct xdr_stream *xdr,
>>>>>>     }
>>>>>>     
>>>>>>     /*
>>>>>> + * CB_RECALLANY4args
>>>>>> + *
>>>>>> + *	struct CB_RECALLANY4args {
>>>>>> + *		uint32_t	craa_objects_to_keep;
>>>>>> + *		bitmap4		craa_type_mask;
>>>>>> + *	};
>>>>>> + */
>>>>>> +static void
>>>>>> +encode_cb_recallany4args(struct xdr_stream *xdr,
>>>>>> +			struct nfs4_cb_compound_hdr *hdr, uint32_t bmval)
>>>>>> +{
>>>>>> +	__be32 *p;
>>>>>> +
>>>>>> +	encode_nfs_cb_opnum4(xdr, OP_CB_RECALL_ANY);
>>>>>> +	p = xdr_reserve_space(xdr, 4);
>>>>>> +	*p++ = xdr_zero;	/* craa_objects_to_keep */
>>>>>> +	p = xdr_reserve_space(xdr, 8);
>>>>>> +	*p++ = cpu_to_be32(1);
>>>>>> +	*p++ = cpu_to_be32(bmval);
>>>>>> +	hdr->nops++;
>>>>>> +}
>>>>>> +
>>>>>> +/*
>>>>>>      * CB_SEQUENCE4args
>>>>>>      *
>>>>>>      *	struct CB_SEQUENCE4args {
>>>>>> @@ -482,6 +505,24 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
>>>>>>     	encode_cb_nops(&hdr);
>>>>>>     }
>>>>>>     
>>>>>> +/*
>>>>>> + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
>>>>>> + */
>>>>>> +static void
>>>>>> +nfs4_xdr_enc_cb_recall_any(struct rpc_rqst *req,
>>>>>> +		struct xdr_stream *xdr, const void *data)
>>>>>> +{
>>>>>> +	const struct nfsd4_callback *cb = data;
>>>>>> +	struct nfs4_cb_compound_hdr hdr = {
>>>>>> +		.ident = cb->cb_clp->cl_cb_ident,
>>>>>> +		.minorversion = cb->cb_clp->cl_minorversion,
>>>>>> +	};
>>>>>> +
>>>>>> +	encode_cb_compound4args(xdr, &hdr);
>>>>>> +	encode_cb_sequence4args(xdr, cb, &hdr);
>>>>>> +	encode_cb_recallany4args(xdr, &hdr, cb->cb_clp->cl_recall_any_bm);
>>>>>> +	encode_cb_nops(&hdr);
>>>>>> +}
>>>>>>     
>>>>>>     /*
>>>>>>      * NFSv4.0 and NFSv4.1 XDR decode functions
>>>>>> @@ -520,6 +561,28 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
>>>>>>     	return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
>>>>>>     }
>>>>>>     
>>>>>> +/*
>>>>>> + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
>>>>>> + */
>>>>>> +static int
>>>>>> +nfs4_xdr_dec_cb_recall_any(struct rpc_rqst *rqstp,
>>>>>> +				  struct xdr_stream *xdr,
>>>>>> +				  void *data)
>>>>>> +{
>>>>>> +	struct nfsd4_callback *cb = data;
>>>>>> +	struct nfs4_cb_compound_hdr hdr;
>>>>>> +	int status;
>>>>>> +
>>>>>> +	status = decode_cb_compound4res(xdr, &hdr);
>>>>>> +	if (unlikely(status))
>>>>>> +		return status;
>>>>>> +	status = decode_cb_sequence4res(xdr, cb);
>>>>>> +	if (unlikely(status || cb->cb_seq_status))
>>>>>> +		return status;
>>>>>> +	status =  decode_cb_op_status(xdr, OP_CB_RECALL_ANY, &cb->cb_status);
>>>>>> +	return status;
>>>>>> +}
>>>>>> +
>>>>>>     #ifdef CONFIG_NFSD_PNFS
>>>>>>     /*
>>>>>>      * CB_LAYOUTRECALL4args
>>>>>> @@ -783,6 +846,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = {
>>>>>>     #endif
>>>>>>     	PROC(CB_NOTIFY_LOCK,	COMPOUND,	cb_notify_lock,	cb_notify_lock),
>>>>>>     	PROC(CB_OFFLOAD,	COMPOUND,	cb_offload,	cb_offload),
>>>>>> +	PROC(CB_RECALL_ANY,	COMPOUND,	cb_recall_any,	cb_recall_any),
>>>>>>     };
>>>>>>     
>>>>>>     static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
>>>>>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>>>>>> index 4e718500a00c..c60c937dece6 100644
>>>>>> --- a/fs/nfsd/nfs4state.c
>>>>>> +++ b/fs/nfsd/nfs4state.c
>>>>>> @@ -2854,6 +2854,31 @@ static const struct tree_descr client_files[] = {
>>>>>>     	[3] = {""},
>>>>>>     };
>>>>>>     
>>>>>> +static int
>>>>>> +nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
>>>>>> +			struct rpc_task *task)
>>>>>> +{
>>>>>> +	switch (task->tk_status) {
>>>>>> +	case -NFS4ERR_DELAY:
>>>>>> +		rpc_delay(task, 2 * HZ);
>>>>>> +		return 0;
>>>>>> +	default:
>>>>>> +		return 1;
>>>>>> +	}
>>>>>> +}
>>>>>> +
>>>>>> +static void
>>>>>> +nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
>>>>>> +{
>>>>>> +	cb->cb_clp->cl_recall_any_busy = false;
>>>>>> +	atomic_dec(&cb->cb_clp->cl_rpc_users);
>>>>>> +}
>>>>> This series probably ought to be one big patch. The problem is that
>>>>> you're adding a bunch of code to do CB_RECALL_ANY, but there is no way
>>>>> to call it without patch #2.
>>>> The reason I separated these patches is that the 1st patch, adding support
>>>> CB_RECALL_ANY can be called for other purposes than just for delegation such
>>>> as to recall pnfs layouts, or when the max number of delegation is reached.
>>>> So that was why I did not combined this patches. However, I understand your
>>>> concern about not being able to test individual patch. So as Chuck suggested,
>>>> perhaps we can leave these as separate patches for easier review and when it's
>>>> finalized we can decide to combine them in to one big patch.  BTW, I plan to
>>>> add a third patch to this series to send CB_RECALL_ANY to clients when the
>>>> max number of delegations is reached.
>>>>
>>> I think we should get this bit sorted out first,
>> ok
>>
>>>    but that sounds
>>> reasonable eventually.
>>>
>>>>> That makes it hard to judge whether there could be races and locking
>>>>> issues around the handling of cb_recall_any_busy, in particular. From
>>>>> patch #2, it looks like cb_recall_any_busy is protected by the
>>>>> nn->client_lock, but I don't think ->release is called with that held.
>>>> I don't intended to use the nn->client_lock, I think the scope of this
>>>> lock is too big for what's needed to serialize access to struct nfsd4_callback.
>>>> As I mentioned in the cover email, since the cb_recall_any_busy is only
>>>> used by the deleg_reaper we do not need a lock to protect this flag.
>>>> But if there is another of consumer, other than deleg_reaper, of this
>>>> nfsd4_callback then we can add a simple spinlock for it.
>>>>
>>>> My question is do you think we need to add the spinlock now instead of
>>>> delaying it until there is real need?
>>>>
>>> I don't see the need for a dedicated spinlock here. You said above that
>>> there is only one of these per client, so you could use the
>>> client->cl_lock.
>>>
>>> But...I don't see the problem with doing just using the nn->client_lock
>>> here. It's not like we're likely to be calling this that often, and if
>>> we do then the contention for the nn->client_lock is probably the least
>>> of our worries.
>> If the contention on nn->client_lock is not a concern then I just
>> leave the patch to use the nn->client_lock as it current does.
>>
> Except you aren't taking the client_lock in ->release. That's what needs
> to be added if you want to keep this boolean.

ok, will do, see below.

>
>>> Honestly, do we need this boolean at all? The only place it's checked is
>>> in deleg_reaper. Why not just try to submit the work and if it's already
>>> queued, let it fail?
>> There is nothing in the existing code to prevent the nfs4_callback from
>> being used again before the current CB_RECALL_ANY request completes. This
>> resulted in se_cb_seq_nr becomes out of sync with the client and server
>> starts getting NFS4ERR_SEQ_MISORDERED then eventually NFS4ERR_BADSESSION
>> from the client.
>>
>> nfsd4_recall_file_layout has similar usage of nfs4_callback and it uses
>> the ls_lock to make sure the current request is done before allowing new
>> one to proceed.
>>
> That's a little different. The ls_lock protects ls_recalled, which is
> set when a recall is issued. We only want to issue a recall for a
> delegation once and that's what ensures that it's only issued once.
>
> CB_RECALL_ANY could be called more than once per client. We don't need
> to ensure "exactly once" semantics there. All of the callbacks are run
> out of workqueues.
>
> If a workqueue job is already scheduled then queue_work will return
> false when called. Could you just do away with this boolean and rely on
> the return code from queue_work to ensure that it doesn't get scheduled
> too often?

I think once a job is executed by nfsd4_run_cb_work and while the server
has not replied yet, nfsd4_queue_cb can be called successfully with the
same 'cb_work' resulting in this request being sent with the same se_cb_seq_nr
as in the previous request (se_cb_seq_nr is incremented when the reply is
received). This causes the client to return NFS4ERR_RETRY_UNCACHED_REP to
the second request which is OK.

However, in my stress testing, eventually the client starts replying with
NFS4ERR_SEQ_MISORDERED. I think the reason for the NFS4ERR_SEQ_MISORDERED
errors is because the 'se_cb_seq_nr' on the server is out of sync.

>
>>>>> Also, cl_rpc_users is a refcount (though we don't necessarily free the
>>>>> object when it goes to zero). I think you need to call
>>>>> put_client_renew_locked here instead of just decrementing the counter.
>>>> Since put_client_renew_locked() also renews the client lease, I don't
>>>> think it's right nfsd4_cb_recall_any_release to renew the lease because
>>>> because this is a callback so the client is not actually sending any
>>>> request that causes the lease to renewed, and nfsd4_cb_recall_any_release
>>>> is also alled even if the client is completely dead and did not reply, or
>>>> reply with some errors.
>>>>
>>> What happens when this atomic_inc makes the cl_rpc_count go to zero?
>> Do you mean atomic_dec of cl_rpc_users?
>>
> Yes, sorry.
>
>>> What actually triggers the cleanup activities in put_client_renew /
>>> put_client_renew_locked in that situation?
>> maybe I'm missing something, but I don't see any client cleanup
>> in put_client_renew/put_client_renew_locked other than renewing
>> the lease?
>>
>>
>          if (!is_client_expired(clp))
>                  renew_client_locked(clp);
>          else
>                  wake_up_all(&expiry_wq);
>
>
> ...unless the client has already expired, in which case you need to wake
> up the waitqueue. My guess is that if the atomic_dec you're calling here
> goes to zero then any tasks on the expiry_wq will hang indefinitely.

I see, I will make the change to call put_client_renew to decrement
cl_rpc_users.

Thanks,
-Dai

>
>>>>>> +
>>>>>> +static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
>>>>>> +	.done		= nfsd4_cb_recall_any_done,
>>>>>> +	.release	= nfsd4_cb_recall_any_release,
>>>>>> +};
>>>>>> +
>>>>>>     static struct nfs4_client *create_client(struct xdr_netobj name,
>>>>>>     		struct svc_rqst *rqstp, nfs4_verifier *verf)
>>>>>>     {
>>>>>> @@ -2891,6 +2916,8 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
>>>>>>     		free_client(clp);
>>>>>>     		return NULL;
>>>>>>     	}
>>>>>> +	nfsd4_init_cb(&clp->cl_recall_any, clp, &nfsd4_cb_recall_any_ops,
>>>>>> +			NFSPROC4_CLNT_CB_RECALL_ANY);
>>>>>>     	return clp;
>>>>>>     }
>>>>>>     
>>>>>> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
>>>>>> index e2daef3cc003..49ca06169642 100644
>>>>>> --- a/fs/nfsd/state.h
>>>>>> +++ b/fs/nfsd/state.h
>>>>>> @@ -411,6 +411,10 @@ struct nfs4_client {
>>>>>>     
>>>>>>     	unsigned int		cl_state;
>>>>>>     	atomic_t		cl_delegs_in_recall;
>>>>>> +
>>>>>> +	bool			cl_recall_any_busy;
>>>>>> +	uint32_t		cl_recall_any_bm;
>>>>>> +	struct nfsd4_callback	cl_recall_any;
>>>>>>     };
>>>>>>     
>>>>>>     /* struct nfs4_client_reset
>>>>>> @@ -639,8 +643,12 @@ enum nfsd4_cb_op {
>>>>>>     	NFSPROC4_CLNT_CB_OFFLOAD,
>>>>>>     	NFSPROC4_CLNT_CB_SEQUENCE,
>>>>>>     	NFSPROC4_CLNT_CB_NOTIFY_LOCK,
>>>>>> +	NFSPROC4_CLNT_CB_RECALL_ANY,
>>>>>>     };
>>>>>>     
>>>>>> +#define RCA4_TYPE_MASK_RDATA_DLG	0
>>>>>> +#define RCA4_TYPE_MASK_WDATA_DLG	1
>>>>>> +
>>>>>>     /* Returns true iff a is later than b: */
>>>>>>     static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
>>>>>>     {
>>>>>> diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
>>>>>> index 547cf07cf4e0..0d39af1b00a0 100644
>>>>>> --- a/fs/nfsd/xdr4cb.h
>>>>>> +++ b/fs/nfsd/xdr4cb.h
>>>>>> @@ -48,3 +48,9 @@
>>>>>>     #define NFS4_dec_cb_offload_sz		(cb_compound_dec_hdr_sz  +      \
>>>>>>     					cb_sequence_dec_sz +            \
>>>>>>     					op_dec_sz)
>>>>>> +#define NFS4_enc_cb_recall_any_sz	(cb_compound_enc_hdr_sz +       \
>>>>>> +					cb_sequence_enc_sz +            \
>>>>>> +					1 + 1 + 1)
>>>>>> +#define NFS4_dec_cb_recall_any_sz	(cb_compound_dec_hdr_sz  +      \
>>>>>> +					cb_sequence_dec_sz +            \
>>>>>> +					op_dec_sz)

Dai Ngo Oct. 25, 2022, 11:39 p.m. UTC | #9

On 10/25/22 2:37 AM, Jeff Layton wrote:
> On Tue, 2022-10-25 at 05:33 -0400, Jeff Layton wrote:
>> On Mon, 2022-10-24 at 18:30 -0700, dai.ngo@oracle.com wrote:
>>> On 10/24/22 2:09 PM, Jeff Layton wrote:
>>>> On Mon, 2022-10-24 at 12:44 -0700, dai.ngo@oracle.com wrote:
>>>>> On 10/24/22 5:16 AM, Jeff Layton wrote:
>>>>>> On Sat, 2022-10-22 at 11:09 -0700, Dai Ngo wrote:
>>>>>>> There is only one nfsd4_callback, cl_recall_any, added for each
>>>>>>> nfs4_client. Access to it must be serialized. For now it's done
>>>>>>> by the cl_recall_any_busy flag since it's used only by the
>>>>>>> delegation shrinker. If there is another consumer of CB_RECALL_ANY
>>>>>>> then a spinlock must be used.
>>>>>>>
>>>>>>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>>>>>>> ---
>>>>>>>     fs/nfsd/nfs4callback.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>>>     fs/nfsd/nfs4state.c    | 27 +++++++++++++++++++++
>>>>>>>     fs/nfsd/state.h        |  8 +++++++
>>>>>>>     fs/nfsd/xdr4cb.h       |  6 +++++
>>>>>>>     4 files changed, 105 insertions(+)
>>>>>>>
>>>>>>> diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
>>>>>>> index f0e69edf5f0f..03587e1397f4 100644
>>>>>>> --- a/fs/nfsd/nfs4callback.c
>>>>>>> +++ b/fs/nfsd/nfs4callback.c
>>>>>>> @@ -329,6 +329,29 @@ static void encode_cb_recall4args(struct xdr_stream *xdr,
>>>>>>>     }
>>>>>>>     
>>>>>>>     /*
>>>>>>> + * CB_RECALLANY4args
>>>>>>> + *
>>>>>>> + *	struct CB_RECALLANY4args {
>>>>>>> + *		uint32_t	craa_objects_to_keep;
>>>>>>> + *		bitmap4		craa_type_mask;
>>>>>>> + *	};
>>>>>>> + */
>>>>>>> +static void
>>>>>>> +encode_cb_recallany4args(struct xdr_stream *xdr,
>>>>>>> +			struct nfs4_cb_compound_hdr *hdr, uint32_t bmval)
>>>>>>> +{
>>>>>>> +	__be32 *p;
>>>>>>> +
>>>>>>> +	encode_nfs_cb_opnum4(xdr, OP_CB_RECALL_ANY);
>>>>>>> +	p = xdr_reserve_space(xdr, 4);
>>>>>>> +	*p++ = xdr_zero;	/* craa_objects_to_keep */
>>>>>>> +	p = xdr_reserve_space(xdr, 8);
>>>>>>> +	*p++ = cpu_to_be32(1);
>>>>>>> +	*p++ = cpu_to_be32(bmval);
>>>>>>> +	hdr->nops++;
>>>>>>> +}
>>>>>>> +
>>>>>>> +/*
>>>>>>>      * CB_SEQUENCE4args
>>>>>>>      *
>>>>>>>      *	struct CB_SEQUENCE4args {
>>>>>>> @@ -482,6 +505,24 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
>>>>>>>     	encode_cb_nops(&hdr);
>>>>>>>     }
>>>>>>>     
>>>>>>> +/*
>>>>>>> + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
>>>>>>> + */
>>>>>>> +static void
>>>>>>> +nfs4_xdr_enc_cb_recall_any(struct rpc_rqst *req,
>>>>>>> +		struct xdr_stream *xdr, const void *data)
>>>>>>> +{
>>>>>>> +	const struct nfsd4_callback *cb = data;
>>>>>>> +	struct nfs4_cb_compound_hdr hdr = {
>>>>>>> +		.ident = cb->cb_clp->cl_cb_ident,
>>>>>>> +		.minorversion = cb->cb_clp->cl_minorversion,
>>>>>>> +	};
>>>>>>> +
>>>>>>> +	encode_cb_compound4args(xdr, &hdr);
>>>>>>> +	encode_cb_sequence4args(xdr, cb, &hdr);
>>>>>>> +	encode_cb_recallany4args(xdr, &hdr, cb->cb_clp->cl_recall_any_bm);
>>>>>>> +	encode_cb_nops(&hdr);
>>>>>>> +}
>>>>>>>     
>>>>>>>     /*
>>>>>>>      * NFSv4.0 and NFSv4.1 XDR decode functions
>>>>>>> @@ -520,6 +561,28 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
>>>>>>>     	return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
>>>>>>>     }
>>>>>>>     
>>>>>>> +/*
>>>>>>> + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
>>>>>>> + */
>>>>>>> +static int
>>>>>>> +nfs4_xdr_dec_cb_recall_any(struct rpc_rqst *rqstp,
>>>>>>> +				  struct xdr_stream *xdr,
>>>>>>> +				  void *data)
>>>>>>> +{
>>>>>>> +	struct nfsd4_callback *cb = data;
>>>>>>> +	struct nfs4_cb_compound_hdr hdr;
>>>>>>> +	int status;
>>>>>>> +
>>>>>>> +	status = decode_cb_compound4res(xdr, &hdr);
>>>>>>> +	if (unlikely(status))
>>>>>>> +		return status;
>>>>>>> +	status = decode_cb_sequence4res(xdr, cb);
>>>>>>> +	if (unlikely(status || cb->cb_seq_status))
>>>>>>> +		return status;
>>>>>>> +	status =  decode_cb_op_status(xdr, OP_CB_RECALL_ANY, &cb->cb_status);
>>>>>>> +	return status;
>>>>>>> +}
>>>>>>> +
>>>>>>>     #ifdef CONFIG_NFSD_PNFS
>>>>>>>     /*
>>>>>>>      * CB_LAYOUTRECALL4args
>>>>>>> @@ -783,6 +846,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = {
>>>>>>>     #endif
>>>>>>>     	PROC(CB_NOTIFY_LOCK,	COMPOUND,	cb_notify_lock,	cb_notify_lock),
>>>>>>>     	PROC(CB_OFFLOAD,	COMPOUND,	cb_offload,	cb_offload),
>>>>>>> +	PROC(CB_RECALL_ANY,	COMPOUND,	cb_recall_any,	cb_recall_any),
>>>>>>>     };
>>>>>>>     
>>>>>>>     static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
>>>>>>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>>>>>>> index 4e718500a00c..c60c937dece6 100644
>>>>>>> --- a/fs/nfsd/nfs4state.c
>>>>>>> +++ b/fs/nfsd/nfs4state.c
>>>>>>> @@ -2854,6 +2854,31 @@ static const struct tree_descr client_files[] = {
>>>>>>>     	[3] = {""},
>>>>>>>     };
>>>>>>>     
>>>>>>> +static int
>>>>>>> +nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
>>>>>>> +			struct rpc_task *task)
>>>>>>> +{
>>>>>>> +	switch (task->tk_status) {
>>>>>>> +	case -NFS4ERR_DELAY:
>>>>>>> +		rpc_delay(task, 2 * HZ);
>>>>>>> +		return 0;
>>>>>>> +	default:
>>>>>>> +		return 1;
>>>>>>> +	}
>>>>>>> +}
>>>>>>> +
>>>>>>> +static void
>>>>>>> +nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
>>>>>>> +{
>>>>>>> +	cb->cb_clp->cl_recall_any_busy = false;
>>>>>>> +	atomic_dec(&cb->cb_clp->cl_rpc_users);
>>>>>>> +}
>>>>>> This series probably ought to be one big patch. The problem is that
>>>>>> you're adding a bunch of code to do CB_RECALL_ANY, but there is no way
>>>>>> to call it without patch #2.
>>>>> The reason I separated these patches is that the 1st patch, adding support
>>>>> CB_RECALL_ANY can be called for other purposes than just for delegation such
>>>>> as to recall pnfs layouts, or when the max number of delegation is reached.
>>>>> So that was why I did not combined this patches. However, I understand your
>>>>> concern about not being able to test individual patch. So as Chuck suggested,
>>>>> perhaps we can leave these as separate patches for easier review and when it's
>>>>> finalized we can decide to combine them in to one big patch.  BTW, I plan to
>>>>> add a third patch to this series to send CB_RECALL_ANY to clients when the
>>>>> max number of delegations is reached.
>>>>>
>>>> I think we should get this bit sorted out first,
>>> ok
>>>
>>>>    but that sounds
>>>> reasonable eventually.
>>>>
>>>>>> That makes it hard to judge whether there could be races and locking
>>>>>> issues around the handling of cb_recall_any_busy, in particular. From
>>>>>> patch #2, it looks like cb_recall_any_busy is protected by the
>>>>>> nn->client_lock, but I don't think ->release is called with that held.
>>>>> I don't intended to use the nn->client_lock, I think the scope of this
>>>>> lock is too big for what's needed to serialize access to struct nfsd4_callback.
>>>>> As I mentioned in the cover email, since the cb_recall_any_busy is only
>>>>> used by the deleg_reaper we do not need a lock to protect this flag.
>>>>> But if there is another of consumer, other than deleg_reaper, of this
>>>>> nfsd4_callback then we can add a simple spinlock for it.
>>>>>
>>>>> My question is do you think we need to add the spinlock now instead of
>>>>> delaying it until there is real need?
>>>>>
>>>> I don't see the need for a dedicated spinlock here. You said above that
>>>> there is only one of these per client, so you could use the
>>>> client->cl_lock.
>>>>
>>>> But...I don't see the problem with doing just using the nn->client_lock
>>>> here. It's not like we're likely to be calling this that often, and if
>>>> we do then the contention for the nn->client_lock is probably the least
>>>> of our worries.
>>> If the contention on nn->client_lock is not a concern then I just
>>> leave the patch to use the nn->client_lock as it current does.
>>>
>> Except you aren't taking the client_lock in ->release. That's what needs
>> to be added if you want to keep this boolean.
>>
>>>> Honestly, do we need this boolean at all? The only place it's checked is
>>>> in deleg_reaper. Why not just try to submit the work and if it's already
>>>> queued, let it fail?
>>> There is nothing in the existing code to prevent the nfs4_callback from
>>> being used again before the current CB_RECALL_ANY request completes. This
>>> resulted in se_cb_seq_nr becomes out of sync with the client and server
>>> starts getting NFS4ERR_SEQ_MISORDERED then eventually NFS4ERR_BADSESSION
>>> from the client.
>>>
>>> nfsd4_recall_file_layout has similar usage of nfs4_callback and it uses
>>> the ls_lock to make sure the current request is done before allowing new
>>> one to proceed.
>>>
>> That's a little different. The ls_lock protects ls_recalled, which is
>> set when a recall is issued. We only want to issue a recall for a
>> delegation once and that's what ensures that it's only issued once.
>>
>> CB_RECALL_ANY could be called more than once per client. We don't need
>> to ensure "exactly once" semantics there. All of the callbacks are run
>> out of workqueues.
>>
>> If a workqueue job is already scheduled then queue_work will return
>> false when called. Could you just do away with this boolean and rely on
>> the return code from queue_work to ensure that it doesn't get scheduled
>> too often?
>>
>>>>>> Also, cl_rpc_users is a refcount (though we don't necessarily free the
>>>>>> object when it goes to zero). I think you need to call
>>>>>> put_client_renew_locked here instead of just decrementing the counter.
>>>>> Since put_client_renew_locked() also renews the client lease, I don't
>>>>> think it's right nfsd4_cb_recall_any_release to renew the lease because
>>>>> because this is a callback so the client is not actually sending any
>>>>> request that causes the lease to renewed, and nfsd4_cb_recall_any_release
>>>>> is also alled even if the client is completely dead and did not reply, or
>>>>> reply with some errors.
>>>>>
>>>> What happens when this atomic_inc makes the cl_rpc_count go to zero?
>>> Do you mean atomic_dec of cl_rpc_users?
>>>
>> Yes, sorry.
>>
>>>> What actually triggers the cleanup activities in put_client_renew /
>>>> put_client_renew_locked in that situation?
>>> maybe I'm missing something, but I don't see any client cleanup
>>> in put_client_renew/put_client_renew_locked other than renewing
>>> the lease?
>>>
>>>
>>          if (!is_client_expired(clp))
>>                  renew_client_locked(clp);
>>          else
>>                  wake_up_all(&expiry_wq);
>>
>>
>> ...unless the client has already expired, in which case you need to wake
>> up the waitqueue. My guess is that if the atomic_dec you're calling here
>> goes to zero then any tasks on the expiry_wq will hang indefinitely.
>>
> I'm not sure you need to take a reference to the client here at all. Can
> the client go away while a callback job is still running? You may be
> able to assume that it will stick around for the life of the callback
> (though you should verify this before assuming it).

I think without increment cl_rpc_users, the client lease can be expired and
client being destroyed while CB_RECALL_ANY request has not completed yet.

-Dai

>
>>>>>>> +
>>>>>>> +static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
>>>>>>> +	.done		= nfsd4_cb_recall_any_done,
>>>>>>> +	.release	= nfsd4_cb_recall_any_release,
>>>>>>> +};
>>>>>>> +
>>>>>>>     static struct nfs4_client *create_client(struct xdr_netobj name,
>>>>>>>     		struct svc_rqst *rqstp, nfs4_verifier *verf)
>>>>>>>     {
>>>>>>> @@ -2891,6 +2916,8 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
>>>>>>>     		free_client(clp);
>>>>>>>     		return NULL;
>>>>>>>     	}
>>>>>>> +	nfsd4_init_cb(&clp->cl_recall_any, clp, &nfsd4_cb_recall_any_ops,
>>>>>>> +			NFSPROC4_CLNT_CB_RECALL_ANY);
>>>>>>>     	return clp;
>>>>>>>     }
>>>>>>>     
>>>>>>> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
>>>>>>> index e2daef3cc003..49ca06169642 100644
>>>>>>> --- a/fs/nfsd/state.h
>>>>>>> +++ b/fs/nfsd/state.h
>>>>>>> @@ -411,6 +411,10 @@ struct nfs4_client {
>>>>>>>     
>>>>>>>     	unsigned int		cl_state;
>>>>>>>     	atomic_t		cl_delegs_in_recall;
>>>>>>> +
>>>>>>> +	bool			cl_recall_any_busy;
>>>>>>> +	uint32_t		cl_recall_any_bm;
>>>>>>> +	struct nfsd4_callback	cl_recall_any;
>>>>>>>     };
>>>>>>>     
>>>>>>>     /* struct nfs4_client_reset
>>>>>>> @@ -639,8 +643,12 @@ enum nfsd4_cb_op {
>>>>>>>     	NFSPROC4_CLNT_CB_OFFLOAD,
>>>>>>>     	NFSPROC4_CLNT_CB_SEQUENCE,
>>>>>>>     	NFSPROC4_CLNT_CB_NOTIFY_LOCK,
>>>>>>> +	NFSPROC4_CLNT_CB_RECALL_ANY,
>>>>>>>     };
>>>>>>>     
>>>>>>> +#define RCA4_TYPE_MASK_RDATA_DLG	0
>>>>>>> +#define RCA4_TYPE_MASK_WDATA_DLG	1
>>>>>>> +
>>>>>>>     /* Returns true iff a is later than b: */
>>>>>>>     static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
>>>>>>>     {
>>>>>>> diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
>>>>>>> index 547cf07cf4e0..0d39af1b00a0 100644
>>>>>>> --- a/fs/nfsd/xdr4cb.h
>>>>>>> +++ b/fs/nfsd/xdr4cb.h
>>>>>>> @@ -48,3 +48,9 @@
>>>>>>>     #define NFS4_dec_cb_offload_sz		(cb_compound_dec_hdr_sz  +      \
>>>>>>>     					cb_sequence_dec_sz +            \
>>>>>>>     					op_dec_sz)
>>>>>>> +#define NFS4_enc_cb_recall_any_sz	(cb_compound_enc_hdr_sz +       \
>>>>>>> +					cb_sequence_enc_sz +            \
>>>>>>> +					1 + 1 + 1)
>>>>>>> +#define NFS4_dec_cb_recall_any_sz	(cb_compound_dec_hdr_sz  +      \
>>>>>>> +					cb_sequence_dec_sz +            \
>>>>>>> +					op_dec_sz)

[v2,1/2] NFSD: add support for sending CB_RECALL_ANY

Commit Message

Comments

Patch