diff mbox series

[1/2] NFSv4.1: Again fix a race where CB_NOTIFY_LOCK fails to wake a waiter

Message ID 2a1cebca-1efb-1686-475b-a581e50e61b4@linux.alibaba.com (mailing list archive)
State New, archived
Headers show
Series Fix two bugs CB_NOTIFY_LOCK failing to wake a water | expand

Commit Message

Yihao Wu May 8, 2019, 9:13 a.m. UTC
Commit b7dbcc0e433f ""NFSv4.1: Fix a race where CB_NOTIFY_LOCK fails
to wake a waiter" found this bug. However it didn't fix it. This can
be fixed by adding memory barrier pair.

Specifically, if any CB_NOTIFY_LOCK should be handled between unlocking
the wait queue and freezable_schedule_timeout, only two cases are
possible. So CB_NOTIFY_LOCK will not be dropped unexpectly.

1. The callback thread marks the NFS client as waked. Then NFS client
noticed that itself is waked, so it don't goes to sleep. And it cleans
its wake mark.

2. The NFS client noticed that itself is not waked yet, so it goes to
sleep. No modification will ever happen to the wake mark in between.

Fixes: a1d617d ("nfs: allow blocking locks to be awoken by lock callbacks")
Signed-off-by: Yihao Wu <wuyihao@linux.alibaba.com>
---
 fs/nfs/nfs4proc.c | 21 +++++----------------
 1 file changed, 5 insertions(+), 16 deletions(-)

Comments

Greg KH May 8, 2019, 9:19 a.m. UTC | #1
On Wed, May 08, 2019 at 05:13:25PM +0800, Yihao Wu wrote:
> Commit b7dbcc0e433f ""NFSv4.1: Fix a race where CB_NOTIFY_LOCK fails
> to wake a waiter" found this bug. However it didn't fix it. This can
> be fixed by adding memory barrier pair.
> 
> Specifically, if any CB_NOTIFY_LOCK should be handled between unlocking
> the wait queue and freezable_schedule_timeout, only two cases are
> possible. So CB_NOTIFY_LOCK will not be dropped unexpectly.
> 
> 1. The callback thread marks the NFS client as waked. Then NFS client
> noticed that itself is waked, so it don't goes to sleep. And it cleans
> its wake mark.
> 
> 2. The NFS client noticed that itself is not waked yet, so it goes to
> sleep. No modification will ever happen to the wake mark in between.
> 
> Fixes: a1d617d ("nfs: allow blocking locks to be awoken by lock callbacks")
> Signed-off-by: Yihao Wu <wuyihao@linux.alibaba.com>
> ---
>  fs/nfs/nfs4proc.c | 21 +++++----------------
>  1 file changed, 5 insertions(+), 16 deletions(-)

<formletter>

This is not the correct way to submit patches for inclusion in the
stable kernel tree.  Please read:
    https://www.kernel.org/doc/html/latest/process/stable-kernel-rules.html
for how to do this properly.

</formletter>
Yihao Wu May 8, 2019, 9:39 a.m. UTC | #2
On 2019/5/8 5:19 PM, Greg KH wrote:
> On Wed, May 08, 2019 at 05:13:25PM +0800, Yihao Wu wrote:
>> Commit b7dbcc0e433f ""NFSv4.1: Fix a race where CB_NOTIFY_LOCK fails
>> to wake a waiter" found this bug. However it didn't fix it. This can
>> be fixed by adding memory barrier pair.
>>
>> Specifically, if any CB_NOTIFY_LOCK should be handled between unlocking
>> the wait queue and freezable_schedule_timeout, only two cases are
>> possible. So CB_NOTIFY_LOCK will not be dropped unexpectly.
>>
>> 1. The callback thread marks the NFS client as waked. Then NFS client
>> noticed that itself is waked, so it don't goes to sleep. And it cleans
>> its wake mark.
>>
>> 2. The NFS client noticed that itself is not waked yet, so it goes to
>> sleep. No modification will ever happen to the wake mark in between.
>>
>> Fixes: a1d617d ("nfs: allow blocking locks to be awoken by lock callbacks")
>> Signed-off-by: Yihao Wu <wuyihao@linux.alibaba.com>
>> ---
>>  fs/nfs/nfs4proc.c | 21 +++++----------------
>>  1 file changed, 5 insertions(+), 16 deletions(-)
> 
> <formletter>
> 
> This is not the correct way to submit patches for inclusion in the
> stable kernel tree.  Please read:
>     https://www.kernel.org/doc/html/latest/process/stable-kernel-rules.html
> for how to do this properly.
> 
> </formletter>
> 

Thanks for your reply! And I'm sorry about that. I will correct this in
patch v2 and read the rules before sending patches.

Thanks,
Yihao Wu
Jeff Layton May 8, 2019, 12:24 p.m. UTC | #3
On Wed, 2019-05-08 at 17:13 +0800, Yihao Wu wrote:
> Commit b7dbcc0e433f ""NFSv4.1: Fix a race where CB_NOTIFY_LOCK fails
> to wake a waiter" found this bug. However it didn't fix it. This can
> be fixed by adding memory barrier pair.
> 
> Specifically, if any CB_NOTIFY_LOCK should be handled between unlocking
> the wait queue and freezable_schedule_timeout, only two cases are
> possible. So CB_NOTIFY_LOCK will not be dropped unexpectly.
> 
> 1. The callback thread marks the NFS client as waked. Then NFS client
> noticed that itself is waked, so it don't goes to sleep. And it cleans
> its wake mark.
> 
> 2. The NFS client noticed that itself is not waked yet, so it goes to
> sleep. No modification will ever happen to the wake mark in between.
> 

It's not clear to me what you mean by "wake mark" here. Do you mean the
"notified" flag? This could use a better description.

> Fixes: a1d617d ("nfs: allow blocking locks to be awoken by lock callbacks")
> Signed-off-by: Yihao Wu <wuyihao@linux.alibaba.com>
> ---
>  fs/nfs/nfs4proc.c | 21 +++++----------------
>  1 file changed, 5 insertions(+), 16 deletions(-)
> 
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index 741ff8c..f13ea09 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -6867,7 +6867,6 @@ struct nfs4_lock_waiter {
>  	struct task_struct	*task;
>  	struct inode		*inode;
>  	struct nfs_lowner	*owner;
> -	bool			notified;
>  };
>  
>  static int
> @@ -6889,13 +6888,13 @@ struct nfs4_lock_waiter {
>  		/* Make sure it's for the right inode */
>  		if (nfs_compare_fh(NFS_FH(waiter->inode), &cbnl->cbnl_fh))
>  			return 0;
> -
> -		waiter->notified = true;
>  	}
>  
>  	/* override "private" so we can use default_wake_function */
>  	wait->private = waiter->task;
> -	ret = autoremove_wake_function(wait, mode, flags, key);
> +	ret = woken_wake_function(wait, mode, flags, key);
> +	if (ret)
> +		list_del_init(&wait->entry);
>  	wait->private = waiter;
>  	return ret;
>  }
> @@ -6914,8 +6913,7 @@ struct nfs4_lock_waiter {
>  				    .s_dev = server->s_dev };
>  	struct nfs4_lock_waiter waiter = { .task  = current,
>  					   .inode = state->inode,
> -					   .owner = &owner,
> -					   .notified = false };
> +					   .owner = &owner};
>  	wait_queue_entry_t wait;
>  
>  	/* Don't bother with waitqueue if we don't expect a callback */
> @@ -6928,21 +6926,12 @@ struct nfs4_lock_waiter {
>  	add_wait_queue(q, &wait);
>  
>  	while(!signalled()) {
> -		waiter.notified = false;
>  		status = nfs4_proc_setlk(state, cmd, request);
>  		if ((status != -EAGAIN) || IS_SETLK(cmd))
>  			break;
>  
>  		status = -ERESTARTSYS;
> -		spin_lock_irqsave(&q->lock, flags);
> -		if (waiter.notified) {
> -			spin_unlock_irqrestore(&q->lock, flags);
> -			continue;
> -		}
> -		set_current_state(TASK_INTERRUPTIBLE);
> -		spin_unlock_irqrestore(&q->lock, flags);
> -
> -		freezable_schedule_timeout(NFS4_LOCK_MAXTIMEOUT);
> +		wait_woken(&wait, TASK_INTERRUPTIBLE, NFS4_LOCK_MAXTIMEOUT);

This seems to have dropped the "freezable" part above, such that waiting
on a file lock will prevent (e.g.) a laptop from suspending. I think
that needs to be in here as those waits can be quite long.

>  	}
>  
>  	finish_wait(q, &wait);
Yihao Wu May 8, 2019, 6:18 p.m. UTC | #4
On 2019/5/8 8:24 PM, Jeff Layton wrote:
> On Wed, 2019-05-08 at 17:13 +0800, Yihao Wu wrote:
>> Commit b7dbcc0e433f ""NFSv4.1: Fix a race where CB_NOTIFY_LOCK fails
>> to wake a waiter" found this bug. However it didn't fix it. This can
>> be fixed by adding memory barrier pair.
>>
>> Specifically, if any CB_NOTIFY_LOCK should be handled between unlocking
>> the wait queue and freezable_schedule_timeout, only two cases are
>> possible. So CB_NOTIFY_LOCK will not be dropped unexpectly.
>>
>> 1. The callback thread marks the NFS client as waked. Then NFS client
>> noticed that itself is waked, so it don't goes to sleep. And it cleans
>> its wake mark.
>>
>> 2. The NFS client noticed that itself is not waked yet, so it goes to
>> sleep. No modification will ever happen to the wake mark in between.
>>
> 
> It's not clear to me what you mean by "wake mark" here. Do you mean the
> "notified" flag? This could use a better description.

Yes. I mean "notified flag" by "wake mark". I will clear these ambiguities.

Thanks

> 
>> Fixes: a1d617d ("nfs: allow blocking locks to be awoken by lock callbacks")
>> Signed-off-by: Yihao Wu <wuyihao@linux.alibaba.com>
>> ---
>>  fs/nfs/nfs4proc.c | 21 +++++----------------
>>  1 file changed, 5 insertions(+), 16 deletions(-)
>>
>> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
>> index 741ff8c..f13ea09 100644
>> --- a/fs/nfs/nfs4proc.c
>> +++ b/fs/nfs/nfs4proc.c
>> @@ -6867,7 +6867,6 @@ struct nfs4_lock_waiter {
>>  	struct task_struct	*task;
>>  	struct inode		*inode;
>>  	struct nfs_lowner	*owner;
>> -	bool			notified;
>>  };
>>  
>>  static int
>> @@ -6889,13 +6888,13 @@ struct nfs4_lock_waiter {
>>  		/* Make sure it's for the right inode */
>>  		if (nfs_compare_fh(NFS_FH(waiter->inode), &cbnl->cbnl_fh))
>>  			return 0;
>> -
>> -		waiter->notified = true;
>>  	}
>>  
>>  	/* override "private" so we can use default_wake_function */
>>  	wait->private = waiter->task;
>> -	ret = autoremove_wake_function(wait, mode, flags, key);
>> +	ret = woken_wake_function(wait, mode, flags, key);
>> +	if (ret)
>> +		list_del_init(&wait->entry);
>>  	wait->private = waiter;
>>  	return ret;
>>  }
>> @@ -6914,8 +6913,7 @@ struct nfs4_lock_waiter {
>>  				    .s_dev = server->s_dev };
>>  	struct nfs4_lock_waiter waiter = { .task  = current,
>>  					   .inode = state->inode,
>> -					   .owner = &owner,
>> -					   .notified = false };
>> +					   .owner = &owner};
>>  	wait_queue_entry_t wait;
>>  
>>  	/* Don't bother with waitqueue if we don't expect a callback */
>> @@ -6928,21 +6926,12 @@ struct nfs4_lock_waiter {
>>  	add_wait_queue(q, &wait);
>>  
>>  	while(!signalled()) {
>> -		waiter.notified = false;
>>  		status = nfs4_proc_setlk(state, cmd, request);
>>  		if ((status != -EAGAIN) || IS_SETLK(cmd))
>>  			break;
>>  
>>  		status = -ERESTARTSYS;
>> -		spin_lock_irqsave(&q->lock, flags);
>> -		if (waiter.notified) {
>> -			spin_unlock_irqrestore(&q->lock, flags);
>> -			continue;
>> -		}
>> -		set_current_state(TASK_INTERRUPTIBLE);
>> -		spin_unlock_irqrestore(&q->lock, flags);
>> -
>> -		freezable_schedule_timeout(NFS4_LOCK_MAXTIMEOUT);
>> +		wait_woken(&wait, TASK_INTERRUPTIBLE, NFS4_LOCK_MAXTIMEOUT);
> 
> This seems to have dropped the "freezable" part above, such that waiting
> on a file lock will prevent (e.g.) a laptop from suspending. I think
> that needs to be in here as those waits can be quite long.
> 

You're right. I overlooked this. This will be fixed.

Thanks

>>  	}
>>  
>>  	finish_wait(q, &wait);
>
diff mbox series

Patch

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 741ff8c..f13ea09 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -6867,7 +6867,6 @@  struct nfs4_lock_waiter {
 	struct task_struct	*task;
 	struct inode		*inode;
 	struct nfs_lowner	*owner;
-	bool			notified;
 };
 
 static int
@@ -6889,13 +6888,13 @@  struct nfs4_lock_waiter {
 		/* Make sure it's for the right inode */
 		if (nfs_compare_fh(NFS_FH(waiter->inode), &cbnl->cbnl_fh))
 			return 0;
-
-		waiter->notified = true;
 	}
 
 	/* override "private" so we can use default_wake_function */
 	wait->private = waiter->task;
-	ret = autoremove_wake_function(wait, mode, flags, key);
+	ret = woken_wake_function(wait, mode, flags, key);
+	if (ret)
+		list_del_init(&wait->entry);
 	wait->private = waiter;
 	return ret;
 }
@@ -6914,8 +6913,7 @@  struct nfs4_lock_waiter {
 				    .s_dev = server->s_dev };
 	struct nfs4_lock_waiter waiter = { .task  = current,
 					   .inode = state->inode,
-					   .owner = &owner,
-					   .notified = false };
+					   .owner = &owner};
 	wait_queue_entry_t wait;
 
 	/* Don't bother with waitqueue if we don't expect a callback */
@@ -6928,21 +6926,12 @@  struct nfs4_lock_waiter {
 	add_wait_queue(q, &wait);
 
 	while(!signalled()) {
-		waiter.notified = false;
 		status = nfs4_proc_setlk(state, cmd, request);
 		if ((status != -EAGAIN) || IS_SETLK(cmd))
 			break;
 
 		status = -ERESTARTSYS;
-		spin_lock_irqsave(&q->lock, flags);
-		if (waiter.notified) {
-			spin_unlock_irqrestore(&q->lock, flags);
-			continue;
-		}
-		set_current_state(TASK_INTERRUPTIBLE);
-		spin_unlock_irqrestore(&q->lock, flags);
-
-		freezable_schedule_timeout(NFS4_LOCK_MAXTIMEOUT);
+		wait_woken(&wait, TASK_INTERRUPTIBLE, NFS4_LOCK_MAXTIMEOUT);
 	}
 
 	finish_wait(q, &wait);