diff mbox series

[RFC,v20,09/10] NFSD: Update laundromat to handle courtesy client

Message ID 1649285133-16765-10-git-send-email-dai.ngo@oracle.com (mailing list archive)
State New, archived
Headers show
Series NFSD: Initial implementation of NFSv4 Courteous Server | expand

Commit Message

Dai Ngo April 6, 2022, 10:45 p.m. UTC
Add nfs4_anylock_blocker and nfs4_lockowner_has_blockers to check
if an expired client has any lock blockers

Update nfs4_get_client_reaplist to:
 . add courtesy client in CLIENT_EXPIRED state to reaplist.
 . detect if expired client still has state and no blockers then
   transit it to courtesy client by setting CLIENT_COURTESY state
   and removing the client record.

Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
---
 fs/nfsd/nfs4state.c | 93 +++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 91 insertions(+), 2 deletions(-)

Comments

J. Bruce Fields April 8, 2022, 4:31 p.m. UTC | #1
On Wed, Apr 06, 2022 at 03:45:32PM -0700, Dai Ngo wrote:
>  static void
>  nfs4_get_client_reaplist(struct nfsd_net *nn, struct list_head *reaplist,
>  				struct laundry_time *lt)
>  {
>  	struct list_head *pos, *next;
>  	struct nfs4_client *clp;
> +	bool cour;
> +	struct list_head cslist;
>  
>  	INIT_LIST_HEAD(reaplist);
> +	INIT_LIST_HEAD(&cslist);
>  	spin_lock(&nn->client_lock);
>  	list_for_each_safe(pos, next, &nn->client_lru) {
>  		clp = list_entry(pos, struct nfs4_client, cl_lru);
>  		if (!state_expired(lt, clp->cl_time))
>  			break;
> -		if (mark_client_expired_locked(clp))
> +
> +		if (!client_has_state(clp))
> +			goto exp_client;
> +
> +		if (clp->cl_cs_client_state == NFSD4_CLIENT_EXPIRED)
> +			goto exp_client;
> +		cour = (clp->cl_cs_client_state == NFSD4_CLIENT_COURTESY);
> +		if (cour && ktime_get_boottime_seconds() >=
> +				(clp->cl_time + NFSD_COURTESY_CLIENT_TIMEOUT))
> +			goto exp_client;
> +		if (nfs4_anylock_blockers(clp)) {
> +exp_client:
> +			if (mark_client_expired_locked(clp))
> +				continue;
> +			list_add(&clp->cl_lru, reaplist);
>  			continue;
> -		list_add(&clp->cl_lru, reaplist);
> +		}
> +		if (!cour) {
> +			spin_lock(&clp->cl_cs_lock);
> +			clp->cl_cs_client_state = NFSD4_CLIENT_COURTESY;
> +			spin_unlock(&clp->cl_cs_lock);
> +			list_add(&clp->cl_cs_list, &cslist);
> +		}
>  	}
>  	spin_unlock(&nn->client_lock);
> +
> +	while (!list_empty(&cslist)) {
> +		clp = list_first_entry(&cslist, struct nfs4_client, cl_cs_list);
> +		list_del_init(&clp->cl_cs_list);
> +		spin_lock(&clp->cl_cs_lock);
> +		/*
> +		 * Client might have re-connected. Make sure it's
> +		 * still in courtesy state before removing its record.
> +		 */

Good to be careful, but, then....

> +		if (clp->cl_cs_client_state != NFSD4_CLIENT_COURTESY) {
> +			spin_unlock(&clp->cl_cs_lock);
> +			continue;
> +		}
> +		spin_unlock(&clp->cl_cs_lock);

.... I'm not seeing what prevents a client from reconnecting right here,
after we drop cl_cs_lock but before we call
nfsd4_client_record_remove().

> +		nfsd4_client_record_remove(clp);
> +	}
>  }
>  
>  static time64_t
> @@ -5794,6 +5876,13 @@ nfs4_laundromat(struct nfsd_net *nn)
>  		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
>  		if (!state_expired(&lt, dp->dl_time))
>  			break;
> +		spin_lock(&clp->cl_cs_lock);
> +		if (clp->cl_cs_client_state == NFSD4_CLIENT_COURTESY) {
> +			clp->cl_cs_client_state = NFSD4_CLIENT_EXPIRED;
> +			spin_unlock(&clp->cl_cs_lock);

In an earlier patch you set the client to EXPIRED as soon as we got a
lease break, so isn't this unnecessary?

I guess there could be a case like:

	1. lease break comes in
	2. client fails to renew, transitions to courtesy
	3. delegation callback times out

though it'd seem better to catch that at step 2 if we can.

--b.

> +			continue;
> +		}
> +		spin_unlock(&clp->cl_cs_lock);
>  		WARN_ON(!unhash_delegation_locked(dp));
>  		list_add(&dp->dl_recall_lru, &reaplist);
>  	}
> -- 
> 2.9.5
Dai Ngo April 8, 2022, 9 p.m. UTC | #2
On 4/8/22 9:31 AM, J. Bruce Fields wrote:
> On Wed, Apr 06, 2022 at 03:45:32PM -0700, Dai Ngo wrote:
>>   static void
>>   nfs4_get_client_reaplist(struct nfsd_net *nn, struct list_head *reaplist,
>>   				struct laundry_time *lt)
>>   {
>>   	struct list_head *pos, *next;
>>   	struct nfs4_client *clp;
>> +	bool cour;
>> +	struct list_head cslist;
>>   
>>   	INIT_LIST_HEAD(reaplist);
>> +	INIT_LIST_HEAD(&cslist);
>>   	spin_lock(&nn->client_lock);
>>   	list_for_each_safe(pos, next, &nn->client_lru) {
>>   		clp = list_entry(pos, struct nfs4_client, cl_lru);
>>   		if (!state_expired(lt, clp->cl_time))
>>   			break;
>> -		if (mark_client_expired_locked(clp))
>> +
>> +		if (!client_has_state(clp))
>> +			goto exp_client;
>> +
>> +		if (clp->cl_cs_client_state == NFSD4_CLIENT_EXPIRED)
>> +			goto exp_client;
>> +		cour = (clp->cl_cs_client_state == NFSD4_CLIENT_COURTESY);
>> +		if (cour && ktime_get_boottime_seconds() >=
>> +				(clp->cl_time + NFSD_COURTESY_CLIENT_TIMEOUT))
>> +			goto exp_client;
>> +		if (nfs4_anylock_blockers(clp)) {
>> +exp_client:
>> +			if (mark_client_expired_locked(clp))
>> +				continue;
>> +			list_add(&clp->cl_lru, reaplist);
>>   			continue;
>> -		list_add(&clp->cl_lru, reaplist);
>> +		}
>> +		if (!cour) {
>> +			spin_lock(&clp->cl_cs_lock);
>> +			clp->cl_cs_client_state = NFSD4_CLIENT_COURTESY;
>> +			spin_unlock(&clp->cl_cs_lock);
>> +			list_add(&clp->cl_cs_list, &cslist);
>> +		}
>>   	}
>>   	spin_unlock(&nn->client_lock);
>> +
>> +	while (!list_empty(&cslist)) {
>> +		clp = list_first_entry(&cslist, struct nfs4_client, cl_cs_list);
>> +		list_del_init(&clp->cl_cs_list);
>> +		spin_lock(&clp->cl_cs_lock);
>> +		/*
>> +		 * Client might have re-connected. Make sure it's
>> +		 * still in courtesy state before removing its record.
>> +		 */
> Good to be careful, but, then....
>
>> +		if (clp->cl_cs_client_state != NFSD4_CLIENT_COURTESY) {
>> +			spin_unlock(&clp->cl_cs_lock);
>> +			continue;
>> +		}
>> +		spin_unlock(&clp->cl_cs_lock);
> .... I'm not seeing what prevents a client from reconnecting right here,
> after we drop cl_cs_lock but before we call
> nfsd4_client_record_remove().

Thanks Bruce for pointing this out. I will fix it with a wait lock
in the next patch. This seems heavy but I have not been able to
fix it without the wait lock. This is should be a rare condition so
it would not cause much overhead.

>
>> +		nfsd4_client_record_remove(clp);
>> +	}
>>   }
>>   
>>   static time64_t
>> @@ -5794,6 +5876,13 @@ nfs4_laundromat(struct nfsd_net *nn)
>>   		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
>>   		if (!state_expired(&lt, dp->dl_time))
>>   			break;
>> +		spin_lock(&clp->cl_cs_lock);
>> +		if (clp->cl_cs_client_state == NFSD4_CLIENT_COURTESY) {
>> +			clp->cl_cs_client_state = NFSD4_CLIENT_EXPIRED;
>> +			spin_unlock(&clp->cl_cs_lock);
> In an earlier patch you set the client to EXPIRED as soon as we got a
> lease break,

I don't understand, we only set EXPIRED when we want to expire the
courtesy client.

> so isn't this unnecessary?

When the client is checked to see if it it can be in COURTESY_CLIENT
state, we did not check if there is any delegation recall callback is
pending, we only check for lock blockers) so that is why we check it
here and expire it.
Maybe I did not give the right answer since I'm not clear of the
question yet.

>
> I guess there could be a case like:
>
> 	1. lease break comes in
> 	2. client fails to renew, transitions to courtesy
> 	3. delegation callback times out
>
> though it'd seem better to catch that at step 2 if we can.

I will wait for the above issue to be resolved first to better
understand your concern here.

Thanks,
-Dai

> --b.
>
>> +			continue;
>> +		}
>> +		spin_unlock(&clp->cl_cs_lock);
>>   		WARN_ON(!unhash_delegation_locked(dp));
>>   		list_add(&dp->dl_recall_lru, &reaplist);
>>   	}
>> -- 
>> 2.9.5
diff mbox series

Patch

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 4278b2078606..edd50a4485aa 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -5731,24 +5731,106 @@  static void nfsd4_ssc_expire_umount(struct nfsd_net *nn)
 }
 #endif
 
+/* Check if any lock belonging to this lockowner has any blockers */
+static bool
+nfs4_lockowner_has_blockers(struct nfs4_lockowner *lo)
+{
+	struct file_lock_context *ctx;
+	struct nfs4_ol_stateid *stp;
+	struct nfs4_file *nf;
+
+	list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) {
+		nf = stp->st_stid.sc_file;
+		ctx = nf->fi_inode->i_flctx;
+		if (!ctx)
+			continue;
+		if (locks_owner_has_blockers(ctx, lo))
+			return true;
+	}
+	return false;
+}
+
+static bool
+nfs4_anylock_blockers(struct nfs4_client *clp)
+{
+	int i;
+	struct nfs4_stateowner *so;
+	struct nfs4_lockowner *lo;
+
+	spin_lock(&clp->cl_lock);
+	for (i = 0; i < OWNER_HASH_SIZE; i++) {
+		list_for_each_entry(so, &clp->cl_ownerstr_hashtbl[i],
+				so_strhash) {
+			if (so->so_is_open_owner)
+				continue;
+			lo = lockowner(so);
+			if (nfs4_lockowner_has_blockers(lo)) {
+				spin_unlock(&clp->cl_lock);
+				return true;
+			}
+		}
+	}
+	spin_unlock(&clp->cl_lock);
+	return false;
+}
+
 static void
 nfs4_get_client_reaplist(struct nfsd_net *nn, struct list_head *reaplist,
 				struct laundry_time *lt)
 {
 	struct list_head *pos, *next;
 	struct nfs4_client *clp;
+	bool cour;
+	struct list_head cslist;
 
 	INIT_LIST_HEAD(reaplist);
+	INIT_LIST_HEAD(&cslist);
 	spin_lock(&nn->client_lock);
 	list_for_each_safe(pos, next, &nn->client_lru) {
 		clp = list_entry(pos, struct nfs4_client, cl_lru);
 		if (!state_expired(lt, clp->cl_time))
 			break;
-		if (mark_client_expired_locked(clp))
+
+		if (!client_has_state(clp))
+			goto exp_client;
+
+		if (clp->cl_cs_client_state == NFSD4_CLIENT_EXPIRED)
+			goto exp_client;
+		cour = (clp->cl_cs_client_state == NFSD4_CLIENT_COURTESY);
+		if (cour && ktime_get_boottime_seconds() >=
+				(clp->cl_time + NFSD_COURTESY_CLIENT_TIMEOUT))
+			goto exp_client;
+		if (nfs4_anylock_blockers(clp)) {
+exp_client:
+			if (mark_client_expired_locked(clp))
+				continue;
+			list_add(&clp->cl_lru, reaplist);
 			continue;
-		list_add(&clp->cl_lru, reaplist);
+		}
+		if (!cour) {
+			spin_lock(&clp->cl_cs_lock);
+			clp->cl_cs_client_state = NFSD4_CLIENT_COURTESY;
+			spin_unlock(&clp->cl_cs_lock);
+			list_add(&clp->cl_cs_list, &cslist);
+		}
 	}
 	spin_unlock(&nn->client_lock);
+
+	while (!list_empty(&cslist)) {
+		clp = list_first_entry(&cslist, struct nfs4_client, cl_cs_list);
+		list_del_init(&clp->cl_cs_list);
+		spin_lock(&clp->cl_cs_lock);
+		/*
+		 * Client might have re-connected. Make sure it's
+		 * still in courtesy state before removing its record.
+		 */
+		if (clp->cl_cs_client_state != NFSD4_CLIENT_COURTESY) {
+			spin_unlock(&clp->cl_cs_lock);
+			continue;
+		}
+		spin_unlock(&clp->cl_cs_lock);
+		nfsd4_client_record_remove(clp);
+	}
 }
 
 static time64_t
@@ -5794,6 +5876,13 @@  nfs4_laundromat(struct nfsd_net *nn)
 		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
 		if (!state_expired(&lt, dp->dl_time))
 			break;
+		spin_lock(&clp->cl_cs_lock);
+		if (clp->cl_cs_client_state == NFSD4_CLIENT_COURTESY) {
+			clp->cl_cs_client_state = NFSD4_CLIENT_EXPIRED;
+			spin_unlock(&clp->cl_cs_lock);
+			continue;
+		}
+		spin_unlock(&clp->cl_cs_lock);
 		WARN_ON(!unhash_delegation_locked(dp));
 		list_add(&dp->dl_recall_lru, &reaplist);
 	}