diff mbox series

[5/6] nfsd: add support for freeing unused session-DRC slots

Message ID 20241119004928.3245873-6-neilb@suse.de (mailing list archive)
State New
Headers show
Series nfsd: allocate/free session-based DRC slots on demand | expand

Commit Message

NeilBrown Nov. 19, 2024, 12:41 a.m. UTC
Reducing the number of slots in the session slot table requires
confirmation from the client.  This patch adds reduce_session_slots()
which starts the process of getting confirmation, but never calls it.
That will come in a later patch.

Before we can free a slot we need to confirm that the client won't try
to use it again.  This involves returning a lower cr_maxrequests in a
SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
is not larger than we limit we are trying to impose.  So for each slot
we need to remember that we have sent a reduced cr_maxrequests.

To achieve this we introduce a concept of request "generations".  Each
time we decide to reduce cr_maxrequests we increment the generation
number, and record this when we return the lower cr_maxrequests to the
client.  When a slot with the current generation reports a low
ca_maxrequests, we commit to that level and free extra slots.

We use an 8 bit generation number (64 seems wasteful) and if it cycles
we iterate all slots and reset the generation number to avoid false matches.

When we free a slot we store the seqid in the slot pointer so that it can
be restored when we reactivate the slot.  The RFC can be read as
suggesting that the slot number could restart from one after a slot is
retired and reactivated, but also suggests that retiring slots is not
required.  So when we reactive a slot we accept with the next seqid in
sequence, or 1.

When decoding sa_highest_slotid into maxslots we need to add 1 - this
matches how it is encoded for the reply.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 fs/nfsd/nfs4state.c | 81 ++++++++++++++++++++++++++++++++++++++-------
 fs/nfsd/nfs4xdr.c   |  5 +--
 fs/nfsd/state.h     |  4 +++
 fs/nfsd/xdr4.h      |  2 --
 4 files changed, 76 insertions(+), 16 deletions(-)

Comments

Chuck Lever III Nov. 19, 2024, 7:25 p.m. UTC | #1
On Tue, Nov 19, 2024 at 11:41:32AM +1100, NeilBrown wrote:
> Reducing the number of slots in the session slot table requires
> confirmation from the client.  This patch adds reduce_session_slots()
> which starts the process of getting confirmation, but never calls it.
> That will come in a later patch.
> 
> Before we can free a slot we need to confirm that the client won't try
> to use it again.  This involves returning a lower cr_maxrequests in a
> SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
> is not larger than we limit we are trying to impose.  So for each slot
> we need to remember that we have sent a reduced cr_maxrequests.
> 
> To achieve this we introduce a concept of request "generations".  Each
> time we decide to reduce cr_maxrequests we increment the generation
> number, and record this when we return the lower cr_maxrequests to the
> client.  When a slot with the current generation reports a low
> ca_maxrequests, we commit to that level and free extra slots.
> 
> We use an 8 bit generation number (64 seems wasteful) and if it cycles
> we iterate all slots and reset the generation number to avoid false matches.
> 
> When we free a slot we store the seqid in the slot pointer so that it can
> be restored when we reactivate the slot.  The RFC can be read as
> suggesting that the slot number could restart from one after a slot is
> retired and reactivated, but also suggests that retiring slots is not
> required.  So when we reactive a slot we accept with the next seqid in
> sequence, or 1.
> 
> When decoding sa_highest_slotid into maxslots we need to add 1 - this
> matches how it is encoded for the reply.
> 
> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>  fs/nfsd/nfs4state.c | 81 ++++++++++++++++++++++++++++++++++++++-------
>  fs/nfsd/nfs4xdr.c   |  5 +--
>  fs/nfsd/state.h     |  4 +++
>  fs/nfsd/xdr4.h      |  2 --
>  4 files changed, 76 insertions(+), 16 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index fb522165b376..0625b0aec6b8 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -1910,17 +1910,55 @@ gen_sessionid(struct nfsd4_session *ses)
>  #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
>  
>  static void
> -free_session_slots(struct nfsd4_session *ses)
> +free_session_slots(struct nfsd4_session *ses, int from)
>  {
>  	int i;
>  
> -	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> +	if (from >= ses->se_fchannel.maxreqs)
> +		return;
> +
> +	for (i = from; i < ses->se_fchannel.maxreqs; i++) {
>  		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
>  
> -		xa_erase(&ses->se_slots, i);
> +		/*
> +		 * Save the seqid in case we reactivate this slot.
> +		 * This will never require a memory allocation so GFP
> +		 * flag is irrelevant
> +		 */
> +		xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid),
> +			 GFP_ATOMIC);

Again... ATOMIC is probably not what we want here, even if it is
only documentary.

And, I thought we determined that an unretired slot had a sequence
number that is reset. Why save the slot's seqid? If I'm missing
something, the comment here should be bolstered to explain it.


>  		free_svc_cred(&slot->sl_cred);
>  		kfree(slot);
>  	}
> +	ses->se_fchannel.maxreqs = from;
> +	if (ses->se_target_maxslots > from)
> +		ses->se_target_maxslots = from;
> +}
> +
> +static int __maybe_unused
> +reduce_session_slots(struct nfsd4_session *ses, int dec)
> +{
> +	struct nfsd_net *nn = net_generic(ses->se_client->net,
> +					  nfsd_net_id);
> +	int ret = 0;
> +
> +	if (ses->se_target_maxslots <= 1)
> +		return ret;
> +	if (!spin_trylock(&nn->client_lock))
> +		return ret;
> +	ret = min(dec, ses->se_target_maxslots-1);
> +	ses->se_target_maxslots -= ret;
> +	ses->se_slot_gen += 1;
> +	if (ses->se_slot_gen == 0) {
> +		int i;
> +		ses->se_slot_gen = 1;
> +		for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> +			struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
> +			slot->sl_generation = 0;
> +		}
> +	}
> +	spin_unlock(&nn->client_lock);
> +	return ret;
>  }
>  
>  /*
> @@ -1967,6 +2005,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
>  	}
>  	fattrs->maxreqs = i;
>  	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
> +	new->se_target_maxslots = i;
>  	new->se_cb_slot_avail = ~0U;
>  	new->se_cb_highest_slot = min(battrs->maxreqs - 1,
>  				      NFSD_BC_SLOT_TABLE_SIZE - 1);
> @@ -2080,7 +2119,7 @@ static void nfsd4_del_conns(struct nfsd4_session *s)
>  
>  static void __free_session(struct nfsd4_session *ses)
>  {
> -	free_session_slots(ses);
> +	free_session_slots(ses, 0);
>  	xa_destroy(&ses->se_slots);
>  	kfree(ses);
>  }
> @@ -3687,10 +3726,10 @@ nfsd4_exchange_id_release(union nfsd4_op_u *u)
>  	kfree(exid->server_impl_name);
>  }
>  
> -static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
> +static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, u8 flags)
>  {
>  	/* The slot is in use, and no response has been sent. */
> -	if (slot_inuse) {
> +	if (flags & NFSD4_SLOT_INUSE) {
>  		if (seqid == slot_seqid)
>  			return nfserr_jukebox;
>  		else
> @@ -3699,6 +3738,8 @@ static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
>  	/* Note unsigned 32-bit arithmetic handles wraparound: */
>  	if (likely(seqid == slot_seqid + 1))
>  		return nfs_ok;
> +	if ((flags & NFSD4_SLOT_REUSED) && seqid == 1)
> +		return nfs_ok;
>  	if (seqid == slot_seqid)
>  		return nfserr_replay_cache;
>  	return nfserr_seq_misordered;
> @@ -4249,8 +4290,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	dprintk("%s: slotid %d\n", __func__, seq->slotid);
>  
>  	trace_nfsd_slot_seqid_sequence(clp, seq, slot);
> -	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
> -					slot->sl_flags & NFSD4_SLOT_INUSE);
> +	status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags);
>  	if (status == nfserr_replay_cache) {
>  		status = nfserr_seq_misordered;
>  		if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
> @@ -4275,6 +4315,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	if (status)
>  		goto out_put_session;
>  
> +	if (session->se_target_maxslots < session->se_fchannel.maxreqs &&
> +	    slot->sl_generation == session->se_slot_gen &&
> +	    seq->maxslots <= session->se_target_maxslots)
> +		/* Client acknowledged our reduce maxreqs */
> +		free_session_slots(session, session->se_target_maxslots);
> +
>  	buflen = (seq->cachethis) ?
>  			session->se_fchannel.maxresp_cached :
>  			session->se_fchannel.maxresp_sz;
> @@ -4285,8 +4331,9 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	svc_reserve(rqstp, buflen);
>  
>  	status = nfs_ok;
> -	/* Success! bump slot seqid */
> +	/* Success! accept new slot seqid */
>  	slot->sl_seqid = seq->seqid;
> +	slot->sl_flags &= ~NFSD4_SLOT_REUSED;
>  	slot->sl_flags |= NFSD4_SLOT_INUSE;
>  	if (seq->cachethis)
>  		slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
> @@ -4302,8 +4349,10 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	 * gently try to allocate another one.
>  	 */
>  	if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
> +	    session->se_target_maxslots >= session->se_fchannel.maxreqs &&
>  	    session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
>  		int s = session->se_fchannel.maxreqs;
> +		void *prev_slot;
>  
>  		/*
>  		 * GFP_NOWAIT is a low-priority non-blocking allocation
> @@ -4314,13 +4363,21 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  		 * allocation.
>  		 */
>  		slot = kzalloc(slot_bytes(&session->se_fchannel), GFP_NOWAIT);
> +		prev_slot = xa_load(&session->se_slots, s);
> +		if (xa_is_value(prev_slot) && slot) {
> +			slot->sl_seqid = xa_to_value(prev_slot);
> +			slot->sl_flags |= NFSD4_SLOT_REUSED;
> +		}
>  		if (slot && !xa_is_err(xa_store(&session->se_slots, s, slot,
> -						GFP_ATOMIC)))
> +						GFP_ATOMIC))) {
>  			session->se_fchannel.maxreqs += 1;
> -		else
> +			session->se_target_maxslots = session->se_fchannel.maxreqs;
> +		} else {
>  			kfree(slot);
> +		}
>  	}
> -	seq->maxslots = session->se_fchannel.maxreqs;
> +	seq->maxslots = max(session->se_target_maxslots, seq->maxslots);
> +	seq->target_maxslots = session->se_target_maxslots;
>  
>  out:
>  	switch (clp->cl_cb_state) {
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index 5c79494bd20b..b281a2198ff3 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -1905,7 +1905,8 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
>  		return nfserr_bad_xdr;
>  	seq->seqid = be32_to_cpup(p++);
>  	seq->slotid = be32_to_cpup(p++);
> -	seq->maxslots = be32_to_cpup(p++);
> +	/* sa_highest_slotid counts from 0 but maxslots  counts from 1 ... */
> +	seq->maxslots = be32_to_cpup(p++) + 1;
>  	seq->cachethis = be32_to_cpup(p);
>  
>  	seq->status_flags = 0;
> @@ -5054,7 +5055,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
>  	if (nfserr != nfs_ok)
>  		return nfserr;
>  	/* sr_target_highest_slotid */
> -	nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
> +	nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
>  	if (nfserr != nfs_ok)
>  		return nfserr;
>  	/* sr_status_flags */
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index a14a823670e9..ea6659d52be2 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -268,7 +268,9 @@ struct nfsd4_slot {
>  #define NFSD4_SLOT_CACHETHIS	(1 << 1)
>  #define NFSD4_SLOT_INITIALIZED	(1 << 2)
>  #define NFSD4_SLOT_CACHED	(1 << 3)
> +#define NFSD4_SLOT_REUSED	(1 << 4)
>  	u8	sl_flags;
> +	u8	sl_generation;
>  	char	sl_data[];
>  };
>  
> @@ -350,6 +352,8 @@ struct nfsd4_session {
>  	struct list_head	se_conns;
>  	u32			se_cb_seq_nr[NFSD_BC_SLOT_TABLE_SIZE];
>  	struct xarray		se_slots;	/* forward channel slots */
> +	u8			se_slot_gen;
> +	u32			se_target_maxslots;
>  };
>  
>  /* formatted contents of nfs4_sessionid */
> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> index 382cc1389396..c26ba86dbdfd 100644
> --- a/fs/nfsd/xdr4.h
> +++ b/fs/nfsd/xdr4.h
> @@ -576,9 +576,7 @@ struct nfsd4_sequence {
>  	u32			slotid;			/* request/response */
>  	u32			maxslots;		/* request/response */
>  	u32			cachethis;		/* request */
> -#if 0
>  	u32			target_maxslots;	/* response */
> -#endif /* not yet */
>  	u32			status_flags;		/* response */
>  };
>  
> -- 
> 2.47.0
>
Jeff Layton Nov. 19, 2024, 7:48 p.m. UTC | #2
On Tue, 2024-11-19 at 11:41 +1100, NeilBrown wrote:
> Reducing the number of slots in the session slot table requires
> confirmation from the client.  This patch adds reduce_session_slots()
> which starts the process of getting confirmation, but never calls it.
> That will come in a later patch.
> 
> Before we can free a slot we need to confirm that the client won't try
> to use it again.  This involves returning a lower cr_maxrequests in a
> SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
> is not larger than we limit we are trying to impose.  So for each slot
> we need to remember that we have sent a reduced cr_maxrequests.
> 
> To achieve this we introduce a concept of request "generations".  Each
> time we decide to reduce cr_maxrequests we increment the generation
> number, and record this when we return the lower cr_maxrequests to the
> client.  When a slot with the current generation reports a low
> ca_maxrequests, we commit to that level and free extra slots.
> 
> We use an 8 bit generation number (64 seems wasteful) and if it cycles
> we iterate all slots and reset the generation number to avoid false matches.
> 
> When we free a slot we store the seqid in the slot pointer so that it can
> be restored when we reactivate the slot.  The RFC can be read as
> suggesting that the slot number could restart from one after a slot is
> retired and reactivated, but also suggests that retiring slots is not
> required.  So when we reactive a slot we accept with the next seqid in
> sequence, or 1.
> 

Personally, I think that resetting to 1 is the only sane choice. After
shrinking the slot table, either side is free to forget the slot
information. When the slot is resurrected, we need to treat it as a new
slot. Expecting the server to remember all seqids, and their cached
replies for all slots ever used on a session seems like an open-ended
mandate.

That said, I'm ok with the server being accepting here, in case there
are client implementations that have done it the other way. Some clear
guidance from the RFCs would sure be nice though.

> When decoding sa_highest_slotid into maxslots we need to add 1 - this
> matches how it is encoded for the reply.
> 
> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>  fs/nfsd/nfs4state.c | 81 ++++++++++++++++++++++++++++++++++++++-------
>  fs/nfsd/nfs4xdr.c   |  5 +--
>  fs/nfsd/state.h     |  4 +++
>  fs/nfsd/xdr4.h      |  2 --
>  4 files changed, 76 insertions(+), 16 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index fb522165b376..0625b0aec6b8 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -1910,17 +1910,55 @@ gen_sessionid(struct nfsd4_session *ses)
>  #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
>  
>  static void
> -free_session_slots(struct nfsd4_session *ses)
> +free_session_slots(struct nfsd4_session *ses, int from)
>  {
>  	int i;
>  
> -	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> +	if (from >= ses->se_fchannel.maxreqs)
> +		return;
> +
> +	for (i = from; i < ses->se_fchannel.maxreqs; i++) {
>  		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
>  
> -		xa_erase(&ses->se_slots, i);
> +		/*
> +		 * Save the seqid in case we reactivate this slot.
> +		 * This will never require a memory allocation so GFP
> +		 * flag is irrelevant
> +		 */
> +		xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid),
> +			 GFP_ATOMIC);
>  		free_svc_cred(&slot->sl_cred);
>  		kfree(slot);
>  	}
> +	ses->se_fchannel.maxreqs = from;
> +	if (ses->se_target_maxslots > from)
> +		ses->se_target_maxslots = from;
> +}
> +
> +static int __maybe_unused
> +reduce_session_slots(struct nfsd4_session *ses, int dec)
> +{
> +	struct nfsd_net *nn = net_generic(ses->se_client->net,
> +					  nfsd_net_id);
> +	int ret = 0;
> +
> +	if (ses->se_target_maxslots <= 1)
> +		return ret;
> +	if (!spin_trylock(&nn->client_lock))
> +		return ret;
> +	ret = min(dec, ses->se_target_maxslots-1);
> +	ses->se_target_maxslots -= ret;
> +	ses->se_slot_gen += 1;
> +	if (ses->se_slot_gen == 0) {
> +		int i;
> +		ses->se_slot_gen = 1;
> +		for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> +			struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
> +			slot->sl_generation = 0;
> +		}
> +	}
> +	spin_unlock(&nn->client_lock);
> +	return ret;
>  }
>  
>  /*
> @@ -1967,6 +2005,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
>  	}
>  	fattrs->maxreqs = i;
>  	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
> +	new->se_target_maxslots = i;
>  	new->se_cb_slot_avail = ~0U;
>  	new->se_cb_highest_slot = min(battrs->maxreqs - 1,
>  				      NFSD_BC_SLOT_TABLE_SIZE - 1);
> @@ -2080,7 +2119,7 @@ static void nfsd4_del_conns(struct nfsd4_session *s)
>  
>  static void __free_session(struct nfsd4_session *ses)
>  {
> -	free_session_slots(ses);
> +	free_session_slots(ses, 0);
>  	xa_destroy(&ses->se_slots);
>  	kfree(ses);
>  }
> @@ -3687,10 +3726,10 @@ nfsd4_exchange_id_release(union nfsd4_op_u *u)
>  	kfree(exid->server_impl_name);
>  }
>  
> -static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
> +static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, u8 flags)
>  {
>  	/* The slot is in use, and no response has been sent. */
> -	if (slot_inuse) {
> +	if (flags & NFSD4_SLOT_INUSE) {
>  		if (seqid == slot_seqid)
>  			return nfserr_jukebox;
>  		else
> @@ -3699,6 +3738,8 @@ static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
>  	/* Note unsigned 32-bit arithmetic handles wraparound: */
>  	if (likely(seqid == slot_seqid + 1))
>  		return nfs_ok;
> +	if ((flags & NFSD4_SLOT_REUSED) && seqid == 1)
> +		return nfs_ok;
>  	if (seqid == slot_seqid)
>  		return nfserr_replay_cache;
>  	return nfserr_seq_misordered;
> @@ -4249,8 +4290,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	dprintk("%s: slotid %d\n", __func__, seq->slotid);
>  
>  	trace_nfsd_slot_seqid_sequence(clp, seq, slot);
> -	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
> -					slot->sl_flags & NFSD4_SLOT_INUSE);
> +	status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags);
>  	if (status == nfserr_replay_cache) {
>  		status = nfserr_seq_misordered;
>  		if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
> @@ -4275,6 +4315,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	if (status)
>  		goto out_put_session;
>  
> +	if (session->se_target_maxslots < session->se_fchannel.maxreqs &&
> +	    slot->sl_generation == session->se_slot_gen &&
> +	    seq->maxslots <= session->se_target_maxslots)
> +		/* Client acknowledged our reduce maxreqs */
> +		free_session_slots(session, session->se_target_maxslots);
> +
>  	buflen = (seq->cachethis) ?
>  			session->se_fchannel.maxresp_cached :
>  			session->se_fchannel.maxresp_sz;
> @@ -4285,8 +4331,9 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	svc_reserve(rqstp, buflen);
>  
>  	status = nfs_ok;
> -	/* Success! bump slot seqid */
> +	/* Success! accept new slot seqid */
>  	slot->sl_seqid = seq->seqid;
> +	slot->sl_flags &= ~NFSD4_SLOT_REUSED;
>  	slot->sl_flags |= NFSD4_SLOT_INUSE;
>  	if (seq->cachethis)
>  		slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
> @@ -4302,8 +4349,10 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	 * gently try to allocate another one.
>  	 */
>  	if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
> +	    session->se_target_maxslots >= session->se_fchannel.maxreqs &&
>  	    session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
>  		int s = session->se_fchannel.maxreqs;
> +		void *prev_slot;
>  
>  		/*
>  		 * GFP_NOWAIT is a low-priority non-blocking allocation
> @@ -4314,13 +4363,21 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  		 * allocation.
>  		 */
>  		slot = kzalloc(slot_bytes(&session->se_fchannel), GFP_NOWAIT);
> +		prev_slot = xa_load(&session->se_slots, s);
> +		if (xa_is_value(prev_slot) && slot) {
> +			slot->sl_seqid = xa_to_value(prev_slot);
> +			slot->sl_flags |= NFSD4_SLOT_REUSED;
> +		}
>  		if (slot && !xa_is_err(xa_store(&session->se_slots, s, slot,
> -						GFP_ATOMIC)))
> +						GFP_ATOMIC))) {
>  			session->se_fchannel.maxreqs += 1;
> -		else
> +			session->se_target_maxslots = session->se_fchannel.maxreqs;
> +		} else {
>  			kfree(slot);
> +		}
>  	}
> -	seq->maxslots = session->se_fchannel.maxreqs;
> +	seq->maxslots = max(session->se_target_maxslots, seq->maxslots);
> +	seq->target_maxslots = session->se_target_maxslots;
>  
>  out:
>  	switch (clp->cl_cb_state) {
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index 5c79494bd20b..b281a2198ff3 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -1905,7 +1905,8 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
>  		return nfserr_bad_xdr;
>  	seq->seqid = be32_to_cpup(p++);
>  	seq->slotid = be32_to_cpup(p++);
> -	seq->maxslots = be32_to_cpup(p++);
> +	/* sa_highest_slotid counts from 0 but maxslots  counts from 1 ... */
> +	seq->maxslots = be32_to_cpup(p++) + 1;
>  	seq->cachethis = be32_to_cpup(p);
>  
>  	seq->status_flags = 0;
> @@ -5054,7 +5055,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
>  	if (nfserr != nfs_ok)
>  		return nfserr;
>  	/* sr_target_highest_slotid */
> -	nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
> +	nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
>  	if (nfserr != nfs_ok)
>  		return nfserr;
>  	/* sr_status_flags */
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index a14a823670e9..ea6659d52be2 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -268,7 +268,9 @@ struct nfsd4_slot {
>  #define NFSD4_SLOT_CACHETHIS	(1 << 1)
>  #define NFSD4_SLOT_INITIALIZED	(1 << 2)
>  #define NFSD4_SLOT_CACHED	(1 << 3)
> +#define NFSD4_SLOT_REUSED	(1 << 4)
>  	u8	sl_flags;
> +	u8	sl_generation;
>  	char	sl_data[];
>  };
>  
> @@ -350,6 +352,8 @@ struct nfsd4_session {
>  	struct list_head	se_conns;
>  	u32			se_cb_seq_nr[NFSD_BC_SLOT_TABLE_SIZE];
>  	struct xarray		se_slots;	/* forward channel slots */
> +	u8			se_slot_gen;
> +	u32			se_target_maxslots;
>  };
>  
>  /* formatted contents of nfs4_sessionid */
> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> index 382cc1389396..c26ba86dbdfd 100644
> --- a/fs/nfsd/xdr4.h
> +++ b/fs/nfsd/xdr4.h
> @@ -576,9 +576,7 @@ struct nfsd4_sequence {
>  	u32			slotid;			/* request/response */
>  	u32			maxslots;		/* request/response */
>  	u32			cachethis;		/* request */
> -#if 0
>  	u32			target_maxslots;	/* response */
> -#endif /* not yet */
>  	u32			status_flags;		/* response */
>  };
>
NeilBrown Nov. 19, 2024, 10:35 p.m. UTC | #3
On Wed, 20 Nov 2024, Chuck Lever wrote:
> On Tue, Nov 19, 2024 at 11:41:32AM +1100, NeilBrown wrote:
> > Reducing the number of slots in the session slot table requires
> > confirmation from the client.  This patch adds reduce_session_slots()
> > which starts the process of getting confirmation, but never calls it.
> > That will come in a later patch.
> > 
> > Before we can free a slot we need to confirm that the client won't try
> > to use it again.  This involves returning a lower cr_maxrequests in a
> > SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
> > is not larger than we limit we are trying to impose.  So for each slot
> > we need to remember that we have sent a reduced cr_maxrequests.
> > 
> > To achieve this we introduce a concept of request "generations".  Each
> > time we decide to reduce cr_maxrequests we increment the generation
> > number, and record this when we return the lower cr_maxrequests to the
> > client.  When a slot with the current generation reports a low
> > ca_maxrequests, we commit to that level and free extra slots.
> > 
> > We use an 8 bit generation number (64 seems wasteful) and if it cycles
> > we iterate all slots and reset the generation number to avoid false matches.
> > 
> > When we free a slot we store the seqid in the slot pointer so that it can
> > be restored when we reactivate the slot.  The RFC can be read as
> > suggesting that the slot number could restart from one after a slot is
> > retired and reactivated, but also suggests that retiring slots is not
> > required.  So when we reactive a slot we accept with the next seqid in
> > sequence, or 1.
> > 
> > When decoding sa_highest_slotid into maxslots we need to add 1 - this
> > matches how it is encoded for the reply.
> > 
> > Signed-off-by: NeilBrown <neilb@suse.de>
> > ---
> >  fs/nfsd/nfs4state.c | 81 ++++++++++++++++++++++++++++++++++++++-------
> >  fs/nfsd/nfs4xdr.c   |  5 +--
> >  fs/nfsd/state.h     |  4 +++
> >  fs/nfsd/xdr4.h      |  2 --
> >  4 files changed, 76 insertions(+), 16 deletions(-)
> > 
> > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > index fb522165b376..0625b0aec6b8 100644
> > --- a/fs/nfsd/nfs4state.c
> > +++ b/fs/nfsd/nfs4state.c
> > @@ -1910,17 +1910,55 @@ gen_sessionid(struct nfsd4_session *ses)
> >  #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
> >  
> >  static void
> > -free_session_slots(struct nfsd4_session *ses)
> > +free_session_slots(struct nfsd4_session *ses, int from)
> >  {
> >  	int i;
> >  
> > -	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> > +	if (from >= ses->se_fchannel.maxreqs)
> > +		return;
> > +
> > +	for (i = from; i < ses->se_fchannel.maxreqs; i++) {
> >  		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
> >  
> > -		xa_erase(&ses->se_slots, i);
> > +		/*
> > +		 * Save the seqid in case we reactivate this slot.
> > +		 * This will never require a memory allocation so GFP
> > +		 * flag is irrelevant
> > +		 */
> > +		xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid),
> > +			 GFP_ATOMIC);
> 
> Again... ATOMIC is probably not what we want here, even if it is
> only documentary.

Why not?  It might be called under a spinlock so GFP_KERNEL might trigger
a warning.

> 
> And, I thought we determined that an unretired slot had a sequence
> number that is reset. Why save the slot's seqid? If I'm missing
> something, the comment here should be bolstered to explain it.

It isn't clear to me that we determined that - only the some people
asserted it.  Until the spec is clarified I think it is safest to be
cautious.

Thanks,
NeilBrown
Chuck Lever III Nov. 20, 2024, 1:27 a.m. UTC | #4
On Wed, Nov 20, 2024 at 09:35:00AM +1100, NeilBrown wrote:
> On Wed, 20 Nov 2024, Chuck Lever wrote:
> > On Tue, Nov 19, 2024 at 11:41:32AM +1100, NeilBrown wrote:
> > > Reducing the number of slots in the session slot table requires
> > > confirmation from the client.  This patch adds reduce_session_slots()
> > > which starts the process of getting confirmation, but never calls it.
> > > That will come in a later patch.
> > > 
> > > Before we can free a slot we need to confirm that the client won't try
> > > to use it again.  This involves returning a lower cr_maxrequests in a
> > > SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
> > > is not larger than we limit we are trying to impose.  So for each slot
> > > we need to remember that we have sent a reduced cr_maxrequests.
> > > 
> > > To achieve this we introduce a concept of request "generations".  Each
> > > time we decide to reduce cr_maxrequests we increment the generation
> > > number, and record this when we return the lower cr_maxrequests to the
> > > client.  When a slot with the current generation reports a low
> > > ca_maxrequests, we commit to that level and free extra slots.
> > > 
> > > We use an 8 bit generation number (64 seems wasteful) and if it cycles
> > > we iterate all slots and reset the generation number to avoid false matches.
> > > 
> > > When we free a slot we store the seqid in the slot pointer so that it can
> > > be restored when we reactivate the slot.  The RFC can be read as
> > > suggesting that the slot number could restart from one after a slot is
> > > retired and reactivated, but also suggests that retiring slots is not
> > > required.  So when we reactive a slot we accept with the next seqid in
> > > sequence, or 1.
> > > 
> > > When decoding sa_highest_slotid into maxslots we need to add 1 - this
> > > matches how it is encoded for the reply.
> > > 
> > > Signed-off-by: NeilBrown <neilb@suse.de>
> > > ---
> > >  fs/nfsd/nfs4state.c | 81 ++++++++++++++++++++++++++++++++++++++-------
> > >  fs/nfsd/nfs4xdr.c   |  5 +--
> > >  fs/nfsd/state.h     |  4 +++
> > >  fs/nfsd/xdr4.h      |  2 --
> > >  4 files changed, 76 insertions(+), 16 deletions(-)
> > > 
> > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > > index fb522165b376..0625b0aec6b8 100644
> > > --- a/fs/nfsd/nfs4state.c
> > > +++ b/fs/nfsd/nfs4state.c
> > > @@ -1910,17 +1910,55 @@ gen_sessionid(struct nfsd4_session *ses)
> > >  #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
> > >  
> > >  static void
> > > -free_session_slots(struct nfsd4_session *ses)
> > > +free_session_slots(struct nfsd4_session *ses, int from)
> > >  {
> > >  	int i;
> > >  
> > > -	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> > > +	if (from >= ses->se_fchannel.maxreqs)
> > > +		return;
> > > +
> > > +	for (i = from; i < ses->se_fchannel.maxreqs; i++) {
> > >  		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
> > >  
> > > -		xa_erase(&ses->se_slots, i);
> > > +		/*
> > > +		 * Save the seqid in case we reactivate this slot.
> > > +		 * This will never require a memory allocation so GFP
> > > +		 * flag is irrelevant
> > > +		 */
> > > +		xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid),
> > > +			 GFP_ATOMIC);
> > 
> > Again... ATOMIC is probably not what we want here, even if it is
> > only documentary.
> 
> Why not?  It might be called under a spinlock so GFP_KERNEL might trigger
> a warning.

I find using GFP_ATOMIC here to be confusing -- it requests
allocation from special memory reserves and is to be used in
situations where allocation might result in system failure. That is
clearly not the case here, and the resulting memory allocation might
be long-lived.

I see the comment that says memory won't actually be allocated. I'm
not sure that's the way xa_store() works, however.

I don't immediately see another good choice, however. I can reach
out to Matthew and Liam and see if they have a better idea.


> > And, I thought we determined that an unretired slot had a sequence
> > number that is reset. Why save the slot's seqid? If I'm missing
> > something, the comment here should be bolstered to explain it.
> 
> It isn't clear to me that we determined that - only the some people
> asserted it.

From what I've read, everyone else who responded has said "use one".
And they have provided enough spec quotations that 1 seems like the
right initial slot sequence number value, always.

You should trust Tom Talpey's opinion on this. He was directly
involved 25 years ago when sessions were invented in DAFS and then
transferred into the NFSv4.1 protocol.


> Until the spec is clarified I think it is safest to be cautious.

The usual line we draw for adding code/features/complexity is the
proposer must demonstrate a use case for it. So far I have not seen
a client implementation that needs a server to remember the sequence
number in a slot that has been shrunken and then re-activated.

Will this dead slot be subject to being freed by the session
shrinker?

But the proposed implementation accepts 1 in this case, and it
doesn't seem tremendously difficult to remove the "remember the
seqid" mechanism once it has been codified to everyone's
satisfaction. So I won't belabor the point.
NeilBrown Nov. 21, 2024, 9:47 p.m. UTC | #5
On Wed, 20 Nov 2024, Chuck Lever wrote:
> On Wed, Nov 20, 2024 at 09:35:00AM +1100, NeilBrown wrote:
> > On Wed, 20 Nov 2024, Chuck Lever wrote:
> > > On Tue, Nov 19, 2024 at 11:41:32AM +1100, NeilBrown wrote:
> > > > Reducing the number of slots in the session slot table requires
> > > > confirmation from the client.  This patch adds reduce_session_slots()
> > > > which starts the process of getting confirmation, but never calls it.
> > > > That will come in a later patch.
> > > > 
> > > > Before we can free a slot we need to confirm that the client won't try
> > > > to use it again.  This involves returning a lower cr_maxrequests in a
> > > > SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
> > > > is not larger than we limit we are trying to impose.  So for each slot
> > > > we need to remember that we have sent a reduced cr_maxrequests.
> > > > 
> > > > To achieve this we introduce a concept of request "generations".  Each
> > > > time we decide to reduce cr_maxrequests we increment the generation
> > > > number, and record this when we return the lower cr_maxrequests to the
> > > > client.  When a slot with the current generation reports a low
> > > > ca_maxrequests, we commit to that level and free extra slots.
> > > > 
> > > > We use an 8 bit generation number (64 seems wasteful) and if it cycles
> > > > we iterate all slots and reset the generation number to avoid false matches.
> > > > 
> > > > When we free a slot we store the seqid in the slot pointer so that it can
> > > > be restored when we reactivate the slot.  The RFC can be read as
> > > > suggesting that the slot number could restart from one after a slot is
> > > > retired and reactivated, but also suggests that retiring slots is not
> > > > required.  So when we reactive a slot we accept with the next seqid in
> > > > sequence, or 1.
> > > > 
> > > > When decoding sa_highest_slotid into maxslots we need to add 1 - this
> > > > matches how it is encoded for the reply.
> > > > 
> > > > Signed-off-by: NeilBrown <neilb@suse.de>
> > > > ---
> > > >  fs/nfsd/nfs4state.c | 81 ++++++++++++++++++++++++++++++++++++++-------
> > > >  fs/nfsd/nfs4xdr.c   |  5 +--
> > > >  fs/nfsd/state.h     |  4 +++
> > > >  fs/nfsd/xdr4.h      |  2 --
> > > >  4 files changed, 76 insertions(+), 16 deletions(-)
> > > > 
> > > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > > > index fb522165b376..0625b0aec6b8 100644
> > > > --- a/fs/nfsd/nfs4state.c
> > > > +++ b/fs/nfsd/nfs4state.c
> > > > @@ -1910,17 +1910,55 @@ gen_sessionid(struct nfsd4_session *ses)
> > > >  #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
> > > >  
> > > >  static void
> > > > -free_session_slots(struct nfsd4_session *ses)
> > > > +free_session_slots(struct nfsd4_session *ses, int from)
> > > >  {
> > > >  	int i;
> > > >  
> > > > -	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> > > > +	if (from >= ses->se_fchannel.maxreqs)
> > > > +		return;
> > > > +
> > > > +	for (i = from; i < ses->se_fchannel.maxreqs; i++) {
> > > >  		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
> > > >  
> > > > -		xa_erase(&ses->se_slots, i);
> > > > +		/*
> > > > +		 * Save the seqid in case we reactivate this slot.
> > > > +		 * This will never require a memory allocation so GFP
> > > > +		 * flag is irrelevant
> > > > +		 */
> > > > +		xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid),
> > > > +			 GFP_ATOMIC);
> > > 
> > > Again... ATOMIC is probably not what we want here, even if it is
> > > only documentary.
> > 
> > Why not?  It might be called under a spinlock so GFP_KERNEL might trigger
> > a warning.
> 
> I find using GFP_ATOMIC here to be confusing -- it requests
> allocation from special memory reserves and is to be used in
> situations where allocation might result in system failure. That is
> clearly not the case here, and the resulting memory allocation might
> be long-lived.

Would you be comfortable with GFP_NOWAIT which leaves out __GFP_HIGH ??

My understanding of how GFP_ATOMIC is used is it is what people choose
when they have to allocate in a no-sleep context.  It can fail and there
must always be a fall-back option.  In many cases GFP_NOWAIT could
possibly be used when it isn't a high priority, but there are 430 uses
for GFP_NOWAIT compared with over 5000 of GFP_ATOMIC.


> 
> I see the comment that says memory won't actually be allocated. I'm
> not sure that's the way xa_store() works, however.

xarray.rst says:

  The xa_store(), xa_cmpxchg(), xa_alloc(),
  xa_reserve() and xa_insert() functions take a gfp_t
  parameter in case the XArray needs to allocate memory to store this entry.
  If the entry is being deleted, no memory allocation needs to be performed,
  and the GFP flags specified will be ignored.`

The particular context is that a normal pointer is currently stored a
the given index, and we are replacing that with a number.  The above
doesn't explicitly say that won't require a memory allocation, but I
think it is reasonable to say it won't need "to allocate memory to store
this entry" - as an entry is already stored - so allocation should not
be needed.

> 
> I don't immediately see another good choice, however. I can reach
> out to Matthew and Liam and see if they have a better idea.
> 
> 
> > > And, I thought we determined that an unretired slot had a sequence
> > > number that is reset. Why save the slot's seqid? If I'm missing
> > > something, the comment here should be bolstered to explain it.
> > 
> > It isn't clear to me that we determined that - only the some people
> > asserted it.
> 
> From what I've read, everyone else who responded has said "use one".
> And they have provided enough spec quotations that 1 seems like the
> right initial slot sequence number value, always.
> 
> You should trust Tom Talpey's opinion on this. He was directly
> involved 25 years ago when sessions were invented in DAFS and then
> transferred into the NFSv4.1 protocol.

Dave Noveck (also deeply involved) say:

   It does.  The problem is that it undercuts the core goal of the
   slot-based approach 
   In that it makes it possible to have multiple requests with the same
   session id/ slot ID / sequence triple.

i.e.  resetting to 1 undercuts the core goal....  That is not a
resounding endorsement.

While I respect the people, I prefer to trust reasoned arguments.

What exactly is the sequence number protecting against?  It must be
protecting against a request being sent, not reply received, connection
closed, request sent on another connection, reply received.  Original
request getting to the server before the "close" message.  Server must
be sure not to handle this request.  sequence number provides that.

But what if the above all happens for request with seqno of 1, then the
server and client negotiate a "retiring" of slot 1 and then it's reuse
before the original request arrives.  How does the server know to ignore
it?

And Tom said that the handling of maxreq etc is "optional" for both
server and client.  So how can the server know if the client has retired
the slot when doing so is optional???

I really don't think we have clarity on this at all.

> 
> 
> > Until the spec is clarified I think it is safest to be cautious.
> 
> The usual line we draw for adding code/features/complexity is the
> proposer must demonstrate a use case for it. So far I have not seen
> a client implementation that needs a server to remember the sequence
> number in a slot that has been shrunken and then re-activated.

And I cannot in good faith submit code that I think violates the spec.

> 
> Will this dead slot be subject to being freed by the session
> shrinker?

No, but it uses much much less space than a slot.

> 
> But the proposed implementation accepts 1 in this case, and it
> doesn't seem tremendously difficult to remove the "remember the
> seqid" mechanism once it has been codified to everyone's
> satisfaction. So I won't belabor the point.

Thank you!

NeilBrown
Chuck Lever III Nov. 21, 2024, 10:29 p.m. UTC | #6
> On Nov 21, 2024, at 4:47 PM, NeilBrown <neilb@suse.de> wrote:
> 
> On Wed, 20 Nov 2024, Chuck Lever wrote:
>> On Wed, Nov 20, 2024 at 09:35:00AM +1100, NeilBrown wrote:
>>> On Wed, 20 Nov 2024, Chuck Lever wrote:
>>>> On Tue, Nov 19, 2024 at 11:41:32AM +1100, NeilBrown wrote:
>>>>> Reducing the number of slots in the session slot table requires
>>>>> confirmation from the client.  This patch adds reduce_session_slots()
>>>>> which starts the process of getting confirmation, but never calls it.
>>>>> That will come in a later patch.
>>>>> 
>>>>> Before we can free a slot we need to confirm that the client won't try
>>>>> to use it again.  This involves returning a lower cr_maxrequests in a
>>>>> SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
>>>>> is not larger than we limit we are trying to impose.  So for each slot
>>>>> we need to remember that we have sent a reduced cr_maxrequests.
>>>>> 
>>>>> To achieve this we introduce a concept of request "generations".  Each
>>>>> time we decide to reduce cr_maxrequests we increment the generation
>>>>> number, and record this when we return the lower cr_maxrequests to the
>>>>> client.  When a slot with the current generation reports a low
>>>>> ca_maxrequests, we commit to that level and free extra slots.
>>>>> 
>>>>> We use an 8 bit generation number (64 seems wasteful) and if it cycles
>>>>> we iterate all slots and reset the generation number to avoid false matches.
>>>>> 
>>>>> When we free a slot we store the seqid in the slot pointer so that it can
>>>>> be restored when we reactivate the slot.  The RFC can be read as
>>>>> suggesting that the slot number could restart from one after a slot is
>>>>> retired and reactivated, but also suggests that retiring slots is not
>>>>> required.  So when we reactive a slot we accept with the next seqid in
>>>>> sequence, or 1.
>>>>> 
>>>>> When decoding sa_highest_slotid into maxslots we need to add 1 - this
>>>>> matches how it is encoded for the reply.
>>>>> 
>>>>> Signed-off-by: NeilBrown <neilb@suse.de>
>>>>> ---
>>>>> fs/nfsd/nfs4state.c | 81 ++++++++++++++++++++++++++++++++++++++-------
>>>>> fs/nfsd/nfs4xdr.c   |  5 +--
>>>>> fs/nfsd/state.h     |  4 +++
>>>>> fs/nfsd/xdr4.h      |  2 --
>>>>> 4 files changed, 76 insertions(+), 16 deletions(-)
>>>>> 
>>>>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>>>>> index fb522165b376..0625b0aec6b8 100644
>>>>> --- a/fs/nfsd/nfs4state.c
>>>>> +++ b/fs/nfsd/nfs4state.c
>>>>> @@ -1910,17 +1910,55 @@ gen_sessionid(struct nfsd4_session *ses)
>>>>> #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
>>>>> 
>>>>> static void
>>>>> -free_session_slots(struct nfsd4_session *ses)
>>>>> +free_session_slots(struct nfsd4_session *ses, int from)
>>>>> {
>>>>> int i;
>>>>> 
>>>>> - for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
>>>>> + if (from >= ses->se_fchannel.maxreqs)
>>>>> + return;
>>>>> +
>>>>> + for (i = from; i < ses->se_fchannel.maxreqs; i++) {
>>>>> struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
>>>>> 
>>>>> - xa_erase(&ses->se_slots, i);
>>>>> + /*
>>>>> +  * Save the seqid in case we reactivate this slot.
>>>>> +  * This will never require a memory allocation so GFP
>>>>> +  * flag is irrelevant
>>>>> +  */
>>>>> + xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid),
>>>>> +  GFP_ATOMIC);
>>>> 
>>>> Again... ATOMIC is probably not what we want here, even if it is
>>>> only documentary.
>>> 
>>> Why not?  It might be called under a spinlock so GFP_KERNEL might trigger
>>> a warning.
>> 
>> I find using GFP_ATOMIC here to be confusing -- it requests
>> allocation from special memory reserves and is to be used in
>> situations where allocation might result in system failure. That is
>> clearly not the case here, and the resulting memory allocation might
>> be long-lived.
> 
> Would you be comfortable with GFP_NOWAIT which leaves out __GFP_HIGH ??

I will be comfortable when I hear back from Matthew and Liam.

:-)


>> I see the comment that says memory won't actually be allocated. I'm
>> not sure that's the way xa_store() works, however.
> 
> xarray.rst says:
> 
>  The xa_store(), xa_cmpxchg(), xa_alloc(),
>  xa_reserve() and xa_insert() functions take a gfp_t
>  parameter in case the XArray needs to allocate memory to store this entry.
>  If the entry is being deleted, no memory allocation needs to be performed,
>  and the GFP flags specified will be ignored.`
> 
> The particular context is that a normal pointer is currently stored a
> the given index, and we are replacing that with a number.  The above
> doesn't explicitly say that won't require a memory allocation, but I
> think it is reasonable to say it won't need "to allocate memory to store
> this entry" - as an entry is already stored - so allocation should not
> be needed.

xa_mk_value() converts a number to a pointer, and xa_store
stores that pointer.

I suspect that xa_store() is allowed to rebalance the
xarray's internal data structures, and that could result
in memory release or allocation. That's why a GFP flag is
one of the arguments.


--
Chuck Lever
diff mbox series

Patch

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index fb522165b376..0625b0aec6b8 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1910,17 +1910,55 @@  gen_sessionid(struct nfsd4_session *ses)
 #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
 
 static void
-free_session_slots(struct nfsd4_session *ses)
+free_session_slots(struct nfsd4_session *ses, int from)
 {
 	int i;
 
-	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
+	if (from >= ses->se_fchannel.maxreqs)
+		return;
+
+	for (i = from; i < ses->se_fchannel.maxreqs; i++) {
 		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
 
-		xa_erase(&ses->se_slots, i);
+		/*
+		 * Save the seqid in case we reactivate this slot.
+		 * This will never require a memory allocation so GFP
+		 * flag is irrelevant
+		 */
+		xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid),
+			 GFP_ATOMIC);
 		free_svc_cred(&slot->sl_cred);
 		kfree(slot);
 	}
+	ses->se_fchannel.maxreqs = from;
+	if (ses->se_target_maxslots > from)
+		ses->se_target_maxslots = from;
+}
+
+static int __maybe_unused
+reduce_session_slots(struct nfsd4_session *ses, int dec)
+{
+	struct nfsd_net *nn = net_generic(ses->se_client->net,
+					  nfsd_net_id);
+	int ret = 0;
+
+	if (ses->se_target_maxslots <= 1)
+		return ret;
+	if (!spin_trylock(&nn->client_lock))
+		return ret;
+	ret = min(dec, ses->se_target_maxslots-1);
+	ses->se_target_maxslots -= ret;
+	ses->se_slot_gen += 1;
+	if (ses->se_slot_gen == 0) {
+		int i;
+		ses->se_slot_gen = 1;
+		for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
+			struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
+			slot->sl_generation = 0;
+		}
+	}
+	spin_unlock(&nn->client_lock);
+	return ret;
 }
 
 /*
@@ -1967,6 +2005,7 @@  static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
 	}
 	fattrs->maxreqs = i;
 	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
+	new->se_target_maxslots = i;
 	new->se_cb_slot_avail = ~0U;
 	new->se_cb_highest_slot = min(battrs->maxreqs - 1,
 				      NFSD_BC_SLOT_TABLE_SIZE - 1);
@@ -2080,7 +2119,7 @@  static void nfsd4_del_conns(struct nfsd4_session *s)
 
 static void __free_session(struct nfsd4_session *ses)
 {
-	free_session_slots(ses);
+	free_session_slots(ses, 0);
 	xa_destroy(&ses->se_slots);
 	kfree(ses);
 }
@@ -3687,10 +3726,10 @@  nfsd4_exchange_id_release(union nfsd4_op_u *u)
 	kfree(exid->server_impl_name);
 }
 
-static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
+static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, u8 flags)
 {
 	/* The slot is in use, and no response has been sent. */
-	if (slot_inuse) {
+	if (flags & NFSD4_SLOT_INUSE) {
 		if (seqid == slot_seqid)
 			return nfserr_jukebox;
 		else
@@ -3699,6 +3738,8 @@  static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
 	/* Note unsigned 32-bit arithmetic handles wraparound: */
 	if (likely(seqid == slot_seqid + 1))
 		return nfs_ok;
+	if ((flags & NFSD4_SLOT_REUSED) && seqid == 1)
+		return nfs_ok;
 	if (seqid == slot_seqid)
 		return nfserr_replay_cache;
 	return nfserr_seq_misordered;
@@ -4249,8 +4290,7 @@  nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	dprintk("%s: slotid %d\n", __func__, seq->slotid);
 
 	trace_nfsd_slot_seqid_sequence(clp, seq, slot);
-	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
-					slot->sl_flags & NFSD4_SLOT_INUSE);
+	status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags);
 	if (status == nfserr_replay_cache) {
 		status = nfserr_seq_misordered;
 		if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
@@ -4275,6 +4315,12 @@  nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if (status)
 		goto out_put_session;
 
+	if (session->se_target_maxslots < session->se_fchannel.maxreqs &&
+	    slot->sl_generation == session->se_slot_gen &&
+	    seq->maxslots <= session->se_target_maxslots)
+		/* Client acknowledged our reduce maxreqs */
+		free_session_slots(session, session->se_target_maxslots);
+
 	buflen = (seq->cachethis) ?
 			session->se_fchannel.maxresp_cached :
 			session->se_fchannel.maxresp_sz;
@@ -4285,8 +4331,9 @@  nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	svc_reserve(rqstp, buflen);
 
 	status = nfs_ok;
-	/* Success! bump slot seqid */
+	/* Success! accept new slot seqid */
 	slot->sl_seqid = seq->seqid;
+	slot->sl_flags &= ~NFSD4_SLOT_REUSED;
 	slot->sl_flags |= NFSD4_SLOT_INUSE;
 	if (seq->cachethis)
 		slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
@@ -4302,8 +4349,10 @@  nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	 * gently try to allocate another one.
 	 */
 	if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
+	    session->se_target_maxslots >= session->se_fchannel.maxreqs &&
 	    session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
 		int s = session->se_fchannel.maxreqs;
+		void *prev_slot;
 
 		/*
 		 * GFP_NOWAIT is a low-priority non-blocking allocation
@@ -4314,13 +4363,21 @@  nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		 * allocation.
 		 */
 		slot = kzalloc(slot_bytes(&session->se_fchannel), GFP_NOWAIT);
+		prev_slot = xa_load(&session->se_slots, s);
+		if (xa_is_value(prev_slot) && slot) {
+			slot->sl_seqid = xa_to_value(prev_slot);
+			slot->sl_flags |= NFSD4_SLOT_REUSED;
+		}
 		if (slot && !xa_is_err(xa_store(&session->se_slots, s, slot,
-						GFP_ATOMIC)))
+						GFP_ATOMIC))) {
 			session->se_fchannel.maxreqs += 1;
-		else
+			session->se_target_maxslots = session->se_fchannel.maxreqs;
+		} else {
 			kfree(slot);
+		}
 	}
-	seq->maxslots = session->se_fchannel.maxreqs;
+	seq->maxslots = max(session->se_target_maxslots, seq->maxslots);
+	seq->target_maxslots = session->se_target_maxslots;
 
 out:
 	switch (clp->cl_cb_state) {
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 5c79494bd20b..b281a2198ff3 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1905,7 +1905,8 @@  nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
 		return nfserr_bad_xdr;
 	seq->seqid = be32_to_cpup(p++);
 	seq->slotid = be32_to_cpup(p++);
-	seq->maxslots = be32_to_cpup(p++);
+	/* sa_highest_slotid counts from 0 but maxslots  counts from 1 ... */
+	seq->maxslots = be32_to_cpup(p++) + 1;
 	seq->cachethis = be32_to_cpup(p);
 
 	seq->status_flags = 0;
@@ -5054,7 +5055,7 @@  nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
 	if (nfserr != nfs_ok)
 		return nfserr;
 	/* sr_target_highest_slotid */
-	nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
+	nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
 	if (nfserr != nfs_ok)
 		return nfserr;
 	/* sr_status_flags */
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index a14a823670e9..ea6659d52be2 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -268,7 +268,9 @@  struct nfsd4_slot {
 #define NFSD4_SLOT_CACHETHIS	(1 << 1)
 #define NFSD4_SLOT_INITIALIZED	(1 << 2)
 #define NFSD4_SLOT_CACHED	(1 << 3)
+#define NFSD4_SLOT_REUSED	(1 << 4)
 	u8	sl_flags;
+	u8	sl_generation;
 	char	sl_data[];
 };
 
@@ -350,6 +352,8 @@  struct nfsd4_session {
 	struct list_head	se_conns;
 	u32			se_cb_seq_nr[NFSD_BC_SLOT_TABLE_SIZE];
 	struct xarray		se_slots;	/* forward channel slots */
+	u8			se_slot_gen;
+	u32			se_target_maxslots;
 };
 
 /* formatted contents of nfs4_sessionid */
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 382cc1389396..c26ba86dbdfd 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -576,9 +576,7 @@  struct nfsd4_sequence {
 	u32			slotid;			/* request/response */
 	u32			maxslots;		/* request/response */
 	u32			cachethis;		/* request */
-#if 0
 	u32			target_maxslots;	/* response */
-#endif /* not yet */
 	u32			status_flags;		/* response */
 };