diff mbox series

[02/12] bcache: fix a lost wake-up problem caused by mca_cannibalize_lock

Message ID 20191113080326.69989-3-colyli@suse.de (mailing list archive)
State New, archived
Headers show
Series bcache patches for Linux v5.5 | expand

Commit Message

Coly Li Nov. 13, 2019, 8:03 a.m. UTC
From: Guoju Fang <fangguoju@gmail.com>

This patch fix a lost wake-up problem caused by the race between
mca_cannibalize_lock and bch_cannibalize_unlock.

Consider two processes, A and B. Process A is executing
mca_cannibalize_lock, while process B takes c->btree_cache_alloc_lock
and is executing bch_cannibalize_unlock. The problem happens that after
process A executes cmpxchg and will execute prepare_to_wait. In this
timeslice process B executes wake_up, but after that process A executes
prepare_to_wait and set the state to TASK_INTERRUPTIBLE. Then process A
goes to sleep but no one will wake up it. This problem may cause bcache
device to dead.

Signed-off-by: Guoju Fang <fangguoju@gmail.com>
Signed-off-by: Coly Li <colyli@suse.de>
---
 drivers/md/bcache/bcache.h |  1 +
 drivers/md/bcache/btree.c  | 12 ++++++++----
 drivers/md/bcache/super.c  |  1 +
 3 files changed, 10 insertions(+), 4 deletions(-)

Comments

Eric Wheeler Nov. 17, 2019, 3:32 a.m. UTC | #1
On Wed, 13 Nov 2019, Coly Li wrote:

> From: Guoju Fang <fangguoju@gmail.com>
> 
> This patch fix a lost wake-up problem caused by the race between
> mca_cannibalize_lock and bch_cannibalize_unlock.
> 
> Consider two processes, A and B. Process A is executing
> mca_cannibalize_lock, while process B takes c->btree_cache_alloc_lock
> and is executing bch_cannibalize_unlock. The problem happens that after
> process A executes cmpxchg and will execute prepare_to_wait. In this
> timeslice process B executes wake_up, but after that process A executes
> prepare_to_wait and set the state to TASK_INTERRUPTIBLE. Then process A
> goes to sleep but no one will wake up it. This problem may cause bcache
> device to dead.
> 
> Signed-off-by: Guoju Fang <fangguoju@gmail.com>
> Signed-off-by: Coly Li <colyli@suse.de>

Add cc stable?

-Eric


> ---
>  drivers/md/bcache/bcache.h |  1 +
>  drivers/md/bcache/btree.c  | 12 ++++++++----
>  drivers/md/bcache/super.c  |  1 +
>  3 files changed, 10 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
> index 013e35a9e317..3653faf3bf48 100644
> --- a/drivers/md/bcache/bcache.h
> +++ b/drivers/md/bcache/bcache.h
> @@ -582,6 +582,7 @@ struct cache_set {
>  	 */
>  	wait_queue_head_t	btree_cache_wait;
>  	struct task_struct	*btree_cache_alloc_lock;
> +	spinlock_t		btree_cannibalize_lock;
>  
>  	/*
>  	 * When we free a btree node, we increment the gen of the bucket the
> diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
> index 00523cd1db80..39d7fc1ef1ee 100644
> --- a/drivers/md/bcache/btree.c
> +++ b/drivers/md/bcache/btree.c
> @@ -910,15 +910,17 @@ static struct btree *mca_find(struct cache_set *c, struct bkey *k)
>  
>  static int mca_cannibalize_lock(struct cache_set *c, struct btree_op *op)
>  {
> -	struct task_struct *old;
> -
> -	old = cmpxchg(&c->btree_cache_alloc_lock, NULL, current);
> -	if (old && old != current) {
> +	spin_lock(&c->btree_cannibalize_lock);
> +	if (likely(c->btree_cache_alloc_lock == NULL)) {
> +		c->btree_cache_alloc_lock = current;
> +	} else if (c->btree_cache_alloc_lock != current) {
>  		if (op)
>  			prepare_to_wait(&c->btree_cache_wait, &op->wait,
>  					TASK_UNINTERRUPTIBLE);
> +		spin_unlock(&c->btree_cannibalize_lock);
>  		return -EINTR;
>  	}
> +	spin_unlock(&c->btree_cannibalize_lock);
>  
>  	return 0;
>  }
> @@ -953,10 +955,12 @@ static struct btree *mca_cannibalize(struct cache_set *c, struct btree_op *op,
>   */
>  static void bch_cannibalize_unlock(struct cache_set *c)
>  {
> +	spin_lock(&c->btree_cannibalize_lock);
>  	if (c->btree_cache_alloc_lock == current) {
>  		c->btree_cache_alloc_lock = NULL;
>  		wake_up(&c->btree_cache_wait);
>  	}
> +	spin_unlock(&c->btree_cannibalize_lock);
>  }
>  
>  static struct btree *mca_alloc(struct cache_set *c, struct btree_op *op,
> diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
> index 20ed838e9413..ebb854ed05a4 100644
> --- a/drivers/md/bcache/super.c
> +++ b/drivers/md/bcache/super.c
> @@ -1769,6 +1769,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
>  	sema_init(&c->sb_write_mutex, 1);
>  	mutex_init(&c->bucket_lock);
>  	init_waitqueue_head(&c->btree_cache_wait);
> +	spin_lock_init(&c->btree_cannibalize_lock);
>  	init_waitqueue_head(&c->bucket_wait);
>  	init_waitqueue_head(&c->gc_wait);
>  	sema_init(&c->uuid_write_mutex, 1);
> -- 
> 2.16.4
> 
>
Coly Li Nov. 18, 2019, 3:40 p.m. UTC | #2
On 2019/11/17 11:32 上午, Eric Wheeler wrote:
> On Wed, 13 Nov 2019, Coly Li wrote:
> 
>> From: Guoju Fang <fangguoju@gmail.com>
>>
>> This patch fix a lost wake-up problem caused by the race between
>> mca_cannibalize_lock and bch_cannibalize_unlock.
>>
>> Consider two processes, A and B. Process A is executing
>> mca_cannibalize_lock, while process B takes c->btree_cache_alloc_lock
>> and is executing bch_cannibalize_unlock. The problem happens that after
>> process A executes cmpxchg and will execute prepare_to_wait. In this
>> timeslice process B executes wake_up, but after that process A executes
>> prepare_to_wait and set the state to TASK_INTERRUPTIBLE. Then process A
>> goes to sleep but no one will wake up it. This problem may cause bcache
>> device to dead.
>>
>> Signed-off-by: Guoju Fang <fangguoju@gmail.com>
>> Signed-off-by: Coly Li <colyli@suse.de>
> 
> Add cc stable?
> 

Yes, I agree. Now these patches are applied by Jens, how about
explicitly send these patches to linux-stable after they go upstream ?

Thanks.

Coly Li


> -Eric
> 
> 
>> ---
>>  drivers/md/bcache/bcache.h |  1 +
>>  drivers/md/bcache/btree.c  | 12 ++++++++----
>>  drivers/md/bcache/super.c  |  1 +
>>  3 files changed, 10 insertions(+), 4 deletions(-)
>>
[snip]
diff mbox series

Patch

diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 013e35a9e317..3653faf3bf48 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -582,6 +582,7 @@  struct cache_set {
 	 */
 	wait_queue_head_t	btree_cache_wait;
 	struct task_struct	*btree_cache_alloc_lock;
+	spinlock_t		btree_cannibalize_lock;
 
 	/*
 	 * When we free a btree node, we increment the gen of the bucket the
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 00523cd1db80..39d7fc1ef1ee 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -910,15 +910,17 @@  static struct btree *mca_find(struct cache_set *c, struct bkey *k)
 
 static int mca_cannibalize_lock(struct cache_set *c, struct btree_op *op)
 {
-	struct task_struct *old;
-
-	old = cmpxchg(&c->btree_cache_alloc_lock, NULL, current);
-	if (old && old != current) {
+	spin_lock(&c->btree_cannibalize_lock);
+	if (likely(c->btree_cache_alloc_lock == NULL)) {
+		c->btree_cache_alloc_lock = current;
+	} else if (c->btree_cache_alloc_lock != current) {
 		if (op)
 			prepare_to_wait(&c->btree_cache_wait, &op->wait,
 					TASK_UNINTERRUPTIBLE);
+		spin_unlock(&c->btree_cannibalize_lock);
 		return -EINTR;
 	}
+	spin_unlock(&c->btree_cannibalize_lock);
 
 	return 0;
 }
@@ -953,10 +955,12 @@  static struct btree *mca_cannibalize(struct cache_set *c, struct btree_op *op,
  */
 static void bch_cannibalize_unlock(struct cache_set *c)
 {
+	spin_lock(&c->btree_cannibalize_lock);
 	if (c->btree_cache_alloc_lock == current) {
 		c->btree_cache_alloc_lock = NULL;
 		wake_up(&c->btree_cache_wait);
 	}
+	spin_unlock(&c->btree_cannibalize_lock);
 }
 
 static struct btree *mca_alloc(struct cache_set *c, struct btree_op *op,
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 20ed838e9413..ebb854ed05a4 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1769,6 +1769,7 @@  struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
 	sema_init(&c->sb_write_mutex, 1);
 	mutex_init(&c->bucket_lock);
 	init_waitqueue_head(&c->btree_cache_wait);
+	spin_lock_init(&c->btree_cannibalize_lock);
 	init_waitqueue_head(&c->bucket_wait);
 	init_waitqueue_head(&c->gc_wait);
 	sema_init(&c->uuid_write_mutex, 1);