diff mbox series

[RFC,4/6] bdev: simplify waiting for concurrent claimers

Message ID 20231024-vfs-super-rework-v1-4-37a8aa697148@kernel.org (mailing list archive)
State New, archived
Headers show
Series fs,block: yield devices | expand

Commit Message

Christian Brauner Oct. 24, 2023, 2:53 p.m. UTC
Simplify the mechanism to wait for concurrent block devices claimers
and make it possible to introduce an additional state in the following
patches.

Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 block/bdev.c              | 34 ++++++++++++++++++----------------
 include/linux/blk_types.h |  7 ++++++-
 2 files changed, 24 insertions(+), 17 deletions(-)

Comments

Jan Kara Oct. 25, 2023, 3:54 p.m. UTC | #1
On Tue 24-10-23 16:53:42, Christian Brauner wrote:
> Simplify the mechanism to wait for concurrent block devices claimers
> and make it possible to introduce an additional state in the following
> patches.
> 
> Signed-off-by: Christian Brauner <brauner@kernel.org>

The simplification looks good but a few notes below:

> diff --git a/block/bdev.c b/block/bdev.c
> index 9deacd346192..7d19e04a8df8 100644
> --- a/block/bdev.c
> +++ b/block/bdev.c
> @@ -482,6 +482,14 @@ static bool bd_may_claim(struct block_device *bdev, void *holder,
>  	return true;
>  }
>  
> +static bool wait_claimable(const struct block_device *bdev)
> +{
> +	enum bd_claim bd_claim;
> +
> +	bd_claim = smp_load_acquire(&bdev->bd_claim);
> +	return bd_claim == BD_CLAIM_DEFAULT;
> +}

Aren't you overdoing it here a bit? Given this is used only in a retry
loop and all the checks that need to be reliable are done under bdev_lock,
I'd say having:

	return READ_ONCE(bdev->bd_claim) == BD_CLAIM_DEFAULT;

shound be fine here? And probably just inline that into the
wait_var_event() call...

> @@ -511,31 +519,25 @@ int bd_prepare_to_claim(struct block_device *bdev, void *holder,
>  	}
>  
>  	/* if claiming is already in progress, wait for it to finish */
> -	if (whole->bd_claiming) {
> -		wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0);
> -		DEFINE_WAIT(wait);
> -
> -		prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
> +	if (whole->bd_claim) {

This test implicitely assumes that 0 is BD_CLAIM_DEFAULT. I guess that's
fine although I somewhat prefer explicit value test like:

	if (whole->bd_claim != BD_CLAIM_DEFAULT)

>  		mutex_unlock(&bdev_lock);
> -		schedule();
> -		finish_wait(wq, &wait);
> +		wait_var_event(&whole->bd_claim, wait_claimable(whole));
>  		goto retry;
>  	}
>  
>  	/* yay, all mine */
> -	whole->bd_claiming = holder;
> +	whole->bd_claim = BD_CLAIM_ACQUIRE;

Here I'd use WRITE_ONCE() to avoid KCSAN warnings and having to think
whether this can race with wait_claimable() or not.

>  	mutex_unlock(&bdev_lock);
>  	return 0;
>  }
>  EXPORT_SYMBOL_GPL(bd_prepare_to_claim); /* only for the loop driver */
>  
> -static void bd_clear_claiming(struct block_device *whole, void *holder)
> +static void bd_clear_claiming(struct block_device *whole)
>  {
>  	lockdep_assert_held(&bdev_lock);
> -	/* tell others that we're done */
> -	BUG_ON(whole->bd_claiming != holder);
> -	whole->bd_claiming = NULL;
> -	wake_up_bit(&whole->bd_claiming, 0);
> +	smp_store_release(&whole->bd_claim, BD_CLAIM_DEFAULT);
> +	smp_mb();
> +	wake_up_var(&whole->bd_claim);

And here since we are under bdev_lock and the waiter is going to check
under bdev_lock as well, we should be able to do:

	WRITE_ONCE(whole->bd_claim, BD_CLAIM_DEFAULT);
	/* Pairs with barrier in prepare_to_wait_event() -> set_current_state() */
	smp_mb();
	wake_up_var(&whole->bd_claim);

								Honza
Christoph Hellwig Oct. 27, 2023, 7:21 a.m. UTC | #2
On Wed, Oct 25, 2023 at 05:54:39PM +0200, Jan Kara wrote:
> This test implicitely assumes that 0 is BD_CLAIM_DEFAULT. I guess that's
> fine although I somewhat prefer explicit value test like:
>
> 	if (whole->bd_claim != BD_CLAIM_DEFAULT)

I find the BD_CLAIM_DEFAULT confusing to be honest.  I'd expect null
to just be check as:

 	if (whole->bd_claim)

That being said, instead of doing all the manual atomic magic, why
not add an

	unsigned long		bd_state;

to struct block_device instead of bd_claim, then define a single
bit for a device being clamed and simply everything while also
giving us space for more bits if we ever need them?
diff mbox series

Patch

diff --git a/block/bdev.c b/block/bdev.c
index 9deacd346192..7d19e04a8df8 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -482,6 +482,14 @@  static bool bd_may_claim(struct block_device *bdev, void *holder,
 	return true;
 }
 
+static bool wait_claimable(const struct block_device *bdev)
+{
+	enum bd_claim bd_claim;
+
+	bd_claim = smp_load_acquire(&bdev->bd_claim);
+	return bd_claim == BD_CLAIM_DEFAULT;
+}
+
 /**
  * bd_prepare_to_claim - claim a block device
  * @bdev: block device of interest
@@ -490,7 +498,7 @@  static bool bd_may_claim(struct block_device *bdev, void *holder,
  *
  * Claim @bdev.  This function fails if @bdev is already claimed by another
  * holder and waits if another claiming is in progress. return, the caller
- * has ownership of bd_claiming and bd_holder[s].
+ * has ownership of bd_claim and bd_holder[s].
  *
  * RETURNS:
  * 0 if @bdev can be claimed, -EBUSY otherwise.
@@ -511,31 +519,25 @@  int bd_prepare_to_claim(struct block_device *bdev, void *holder,
 	}
 
 	/* if claiming is already in progress, wait for it to finish */
-	if (whole->bd_claiming) {
-		wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0);
-		DEFINE_WAIT(wait);
-
-		prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
+	if (whole->bd_claim) {
 		mutex_unlock(&bdev_lock);
-		schedule();
-		finish_wait(wq, &wait);
+		wait_var_event(&whole->bd_claim, wait_claimable(whole));
 		goto retry;
 	}
 
 	/* yay, all mine */
-	whole->bd_claiming = holder;
+	whole->bd_claim = BD_CLAIM_ACQUIRE;
 	mutex_unlock(&bdev_lock);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(bd_prepare_to_claim); /* only for the loop driver */
 
-static void bd_clear_claiming(struct block_device *whole, void *holder)
+static void bd_clear_claiming(struct block_device *whole)
 {
 	lockdep_assert_held(&bdev_lock);
-	/* tell others that we're done */
-	BUG_ON(whole->bd_claiming != holder);
-	whole->bd_claiming = NULL;
-	wake_up_bit(&whole->bd_claiming, 0);
+	smp_store_release(&whole->bd_claim, BD_CLAIM_DEFAULT);
+	smp_mb();
+	wake_up_var(&whole->bd_claim);
 }
 
 /**
@@ -565,7 +567,7 @@  static void bd_finish_claiming(struct block_device *bdev, void *holder,
 	bdev->bd_holder = holder;
 	bdev->bd_holder_ops = hops;
 	mutex_unlock(&bdev->bd_holder_lock);
-	bd_clear_claiming(whole, holder);
+	bd_clear_claiming(whole);
 	mutex_unlock(&bdev_lock);
 }
 
@@ -581,7 +583,7 @@  static void bd_finish_claiming(struct block_device *bdev, void *holder,
 void bd_abort_claiming(struct block_device *bdev, void *holder)
 {
 	mutex_lock(&bdev_lock);
-	bd_clear_claiming(bdev_whole(bdev), holder);
+	bd_clear_claiming(bdev_whole(bdev));
 	mutex_unlock(&bdev_lock);
 }
 EXPORT_SYMBOL(bd_abort_claiming);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 749203277fee..cbef041fd868 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -37,6 +37,11 @@  struct bio_crypt_ctx;
 #define PAGE_SECTORS		(1 << PAGE_SECTORS_SHIFT)
 #define SECTOR_MASK		(PAGE_SECTORS - 1)
 
+enum bd_claim {
+	BD_CLAIM_DEFAULT = 0,
+	BD_CLAIM_ACQUIRE = 1,
+};
+
 struct block_device {
 	sector_t		bd_start_sect;
 	sector_t		bd_nr_sectors;
@@ -52,7 +57,7 @@  struct block_device {
 	atomic_t		bd_openers;
 	spinlock_t		bd_size_lock; /* for bd_inode->i_size updates */
 	struct inode *		bd_inode;	/* will die */
-	void *			bd_claiming;
+	enum bd_claim		bd_claim;
 	void *			bd_holder;
 	const struct blk_holder_ops *bd_holder_ops;
 	struct mutex		bd_holder_lock;