diff mbox series

[RFC,v3,1/3] sbitmap: fix that same waitqueue can be woken up continuously

Message ID 20220710042200.20936-2-yukuai1@huaweicloud.com (mailing list archive)
State New, archived
Headers show
Series bugfix for sbitmap | expand

Commit Message

Yu Kuai July 10, 2022, 4:21 a.m. UTC
From: Yu Kuai <yukuai3@huawei.com>

__sbq_wake_up		__sbq_wake_up
 sbq_wake_ptr -> assume	0
			 sbq_wake_ptr -> 0
 atomic_dec_return
			atomic_dec_return
 atomic_cmpxchg -> succeed
			 atomic_cmpxchg -> failed
			  return true

			__sbq_wake_up
			 sbq_wake_ptr
			  atomic_read(&sbq->wake_index) -> still 0
 sbq_index_atomic_inc -> inc to 1
			  if (waitqueue_active(&ws->wait))
			   if (wake_index != atomic_read(&sbq->wake_index))
			    atomic_set -> reset from 1 to 0
 wake_up_nr -> wake up first waitqueue
			    // continue to wake up in first waitqueue

Fix the problem by using atomic_cmpxchg() instead of atomic_set()
to update 'wake_index'.

Fixes: 417232880c8a ("sbitmap: Replace cmpxchg with xchg")
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 lib/sbitmap.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

Comments

Jan Kara July 11, 2022, 2:20 p.m. UTC | #1
On Sun 10-07-22 12:21:58, Yu Kuai wrote:
> From: Yu Kuai <yukuai3@huawei.com>
> 
> __sbq_wake_up		__sbq_wake_up
>  sbq_wake_ptr -> assume	0
> 			 sbq_wake_ptr -> 0
>  atomic_dec_return
> 			atomic_dec_return
>  atomic_cmpxchg -> succeed
> 			 atomic_cmpxchg -> failed
> 			  return true
> 
> 			__sbq_wake_up
> 			 sbq_wake_ptr
> 			  atomic_read(&sbq->wake_index) -> still 0
>  sbq_index_atomic_inc -> inc to 1
> 			  if (waitqueue_active(&ws->wait))
> 			   if (wake_index != atomic_read(&sbq->wake_index))
> 			    atomic_set -> reset from 1 to 0
>  wake_up_nr -> wake up first waitqueue
> 			    // continue to wake up in first waitqueue
> 
> Fix the problem by using atomic_cmpxchg() instead of atomic_set()
> to update 'wake_index'.
> 
> Fixes: 417232880c8a ("sbitmap: Replace cmpxchg with xchg")
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>

I don't think this patch is really needed after the following patches.  As
I see it, wake_index is just a performance optimization (plus a fairness
improvement) but in principle the code in sbq_wake_ptr() is always prone to
races as the waitqueue it returns needn't have any waiters by the time we
return. So for correctness the check-and-retry loop needs to happen at
higher level than inside sbq_wake_ptr() and occasional wrong setting of
wake_index will result only in a bit of unfairness and more scanning
looking for suitable waitqueue but I don't think that really justifies the
cost of atomic operations in cmpxchg loop...

								Honza
> ---
>  lib/sbitmap.c | 15 ++++++++++-----
>  1 file changed, 10 insertions(+), 5 deletions(-)
> 
> diff --git a/lib/sbitmap.c b/lib/sbitmap.c
> index 29eb0484215a..b46fce1beb3a 100644
> --- a/lib/sbitmap.c
> +++ b/lib/sbitmap.c
> @@ -579,19 +579,24 @@ EXPORT_SYMBOL_GPL(sbitmap_queue_min_shallow_depth);
>  
>  static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq)
>  {
> -	int i, wake_index;
> +	int i, wake_index, old_wake_index;
>  
> +again:
>  	if (!atomic_read(&sbq->ws_active))
>  		return NULL;
>  
> -	wake_index = atomic_read(&sbq->wake_index);
> +	old_wake_index = wake_index = atomic_read(&sbq->wake_index);
>  	for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
>  		struct sbq_wait_state *ws = &sbq->ws[wake_index];
>  
>  		if (waitqueue_active(&ws->wait)) {
> -			if (wake_index != atomic_read(&sbq->wake_index))
> -				atomic_set(&sbq->wake_index, wake_index);
> -			return ws;
> +			if (wake_index == old_wake_index)
> +				return ws;
> +
> +			if (atomic_cmpxchg(&sbq->wake_index, old_wake_index,
> +					   wake_index) == old_wake_index)
> +				return ws;
> +			goto again;
>  		}
>  
>  		wake_index = sbq_index_inc(wake_index);
> -- 
> 2.31.1
>
Yu Kuai July 12, 2022, 1:25 p.m. UTC | #2
Hi!

在 2022/07/11 22:20, Jan Kara 写道:
> On Sun 10-07-22 12:21:58, Yu Kuai wrote:
>> From: Yu Kuai <yukuai3@huawei.com>
>>
>> __sbq_wake_up		__sbq_wake_up
>>   sbq_wake_ptr -> assume	0
>> 			 sbq_wake_ptr -> 0
>>   atomic_dec_return
>> 			atomic_dec_return
>>   atomic_cmpxchg -> succeed
>> 			 atomic_cmpxchg -> failed
>> 			  return true
>>
>> 			__sbq_wake_up
>> 			 sbq_wake_ptr
>> 			  atomic_read(&sbq->wake_index) -> still 0
>>   sbq_index_atomic_inc -> inc to 1
>> 			  if (waitqueue_active(&ws->wait))
>> 			   if (wake_index != atomic_read(&sbq->wake_index))
>> 			    atomic_set -> reset from 1 to 0
>>   wake_up_nr -> wake up first waitqueue
>> 			    // continue to wake up in first waitqueue
>>
>> Fix the problem by using atomic_cmpxchg() instead of atomic_set()
>> to update 'wake_index'.
>>
>> Fixes: 417232880c8a ("sbitmap: Replace cmpxchg with xchg")
>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> 
> I don't think this patch is really needed after the following patches.  As
> I see it, wake_index is just a performance optimization (plus a fairness
> improvement) but in principle the code in sbq_wake_ptr() is always prone to
> races as the waitqueue it returns needn't have any waiters by the time we
> return. So for correctness the check-and-retry loop needs to happen at
> higher level than inside sbq_wake_ptr() and occasional wrong setting of
> wake_index will result only in a bit of unfairness and more scanning
> looking for suitable waitqueue but I don't think that really justifies the
> cost of atomic operations in cmpxchg loop...

It's right this patch just improve fairness. However, in hevyload tests
I found that the 'wrong setting of wake_index' can happen frequently,
for consequence, some waitqueue can be empty while some waitqueue have
a lot of waiters.

There shoud be lots of work to fix unfairness throughly, I can remove
this patch for now.

Thanks,
Kuai
> 
> 								Honza
>> ---
>>   lib/sbitmap.c | 15 ++++++++++-----
>>   1 file changed, 10 insertions(+), 5 deletions(-)
>>
>> diff --git a/lib/sbitmap.c b/lib/sbitmap.c
>> index 29eb0484215a..b46fce1beb3a 100644
>> --- a/lib/sbitmap.c
>> +++ b/lib/sbitmap.c
>> @@ -579,19 +579,24 @@ EXPORT_SYMBOL_GPL(sbitmap_queue_min_shallow_depth);
>>   
>>   static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq)
>>   {
>> -	int i, wake_index;
>> +	int i, wake_index, old_wake_index;
>>   
>> +again:
>>   	if (!atomic_read(&sbq->ws_active))
>>   		return NULL;
>>   
>> -	wake_index = atomic_read(&sbq->wake_index);
>> +	old_wake_index = wake_index = atomic_read(&sbq->wake_index);
>>   	for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
>>   		struct sbq_wait_state *ws = &sbq->ws[wake_index];
>>   
>>   		if (waitqueue_active(&ws->wait)) {
>> -			if (wake_index != atomic_read(&sbq->wake_index))
>> -				atomic_set(&sbq->wake_index, wake_index);
>> -			return ws;
>> +			if (wake_index == old_wake_index)
>> +				return ws;
>> +
>> +			if (atomic_cmpxchg(&sbq->wake_index, old_wake_index,
>> +					   wake_index) == old_wake_index)
>> +				return ws;
>> +			goto again;
>>   		}
>>   
>>   		wake_index = sbq_index_inc(wake_index);
>> -- 
>> 2.31.1
>>
diff mbox series

Patch

diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index 29eb0484215a..b46fce1beb3a 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -579,19 +579,24 @@  EXPORT_SYMBOL_GPL(sbitmap_queue_min_shallow_depth);
 
 static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq)
 {
-	int i, wake_index;
+	int i, wake_index, old_wake_index;
 
+again:
 	if (!atomic_read(&sbq->ws_active))
 		return NULL;
 
-	wake_index = atomic_read(&sbq->wake_index);
+	old_wake_index = wake_index = atomic_read(&sbq->wake_index);
 	for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
 		struct sbq_wait_state *ws = &sbq->ws[wake_index];
 
 		if (waitqueue_active(&ws->wait)) {
-			if (wake_index != atomic_read(&sbq->wake_index))
-				atomic_set(&sbq->wake_index, wake_index);
-			return ws;
+			if (wake_index == old_wake_index)
+				return ws;
+
+			if (atomic_cmpxchg(&sbq->wake_index, old_wake_index,
+					   wake_index) == old_wake_index)
+				return ws;
+			goto again;
 		}
 
 		wake_index = sbq_index_inc(wake_index);