diff mbox series

[4/8] block: support delayed holder registration

Message ID 20210725055458.29008-5-hch@lst.de (mailing list archive)
State New, archived
Headers show
Series [1/8] block: make the block holder code optional | expand

Commit Message

Christoph Hellwig July 25, 2021, 5:54 a.m. UTC
device mapper needs to register holders before it is ready to do I/O.
Currently it does so by registering the disk early, which has all kinds
of bad side effects.  Support registering holders on an initialized but
not registered disk instead by delaying the sysfs registration until the
disk is registered.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/genhd.c         | 10 +++++++
 block/holder.c        | 68 ++++++++++++++++++++++++++++++++-----------
 include/linux/genhd.h |  5 ++++
 3 files changed, 66 insertions(+), 17 deletions(-)

Comments

Mike Snitzer July 27, 2021, 4:06 p.m. UTC | #1
On Sun, Jul 25 2021 at  1:54P -0400,
Christoph Hellwig <hch@lst.de> wrote:

> device mapper needs to register holders before it is ready to do I/O.
> Currently it does so by registering the disk early, which has all kinds
> of bad side effects.  Support registering holders on an initialized but
> not registered disk instead by delaying the sysfs registration until the
> disk is registered.

This header starts to shine some light on what is motivating this
series by touching on "all kinds of bad side effects" being fixed.
Any chance you could elaborate what you've noticed/found/hit?

Mike
Christoph Hellwig July 28, 2021, 6:54 a.m. UTC | #2
On Tue, Jul 27, 2021 at 12:06:04PM -0400, Mike Snitzer wrote:
> This header starts to shine some light on what is motivating this
> series by touching on "all kinds of bad side effects" being fixed.
> Any chance you could elaborate what you've noticed/found/hit?

The proble mis that it leaves the queue in a weird half state.  The
normal states for a gendisk are:

 1) allocated		(after *alloc_disk)
 2) registered		(after add_disk*)
 3) unregistered	(after del_gendisk)

the delayed queue registration adds a weird half state where it is
sort of registered, except for in sysfs and the elevator.  I have
some pretty big changes between how the disk and queue interact
that tripped over it, but even right now code has to be very careful
in the takedown path to deal with the half-initialized disks.
Mike Snitzer July 29, 2021, 4:32 p.m. UTC | #3
On Sun, Jul 25 2021 at  1:54P -0400,
Christoph Hellwig <hch@lst.de> wrote:

> device mapper needs to register holders before it is ready to do I/O.
> Currently it does so by registering the disk early, which has all kinds
> of bad side effects.  Support registering holders on an initialized but
> not registered disk instead by delaying the sysfs registration until the
> disk is registered.

Maybe expand "bad side effects" in header to include what you detailed here?:
https://listman.redhat.com/archives/dm-devel/2021-July/msg00130.html

Reviewed-by: Mike Snitzer <snitzer@redhat.com>
diff mbox series

Patch

diff --git a/block/genhd.c b/block/genhd.c
index e2708a4a7a47..e3d93b868ec5 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -429,6 +429,16 @@  static void register_disk(struct device *parent, struct gendisk *disk,
 		kobject_create_and_add("holders", &ddev->kobj);
 	disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
 
+	/*
+	 * XXX: this is a mess, can't wait for real error handling in add_disk.
+	 * Make sure ->slave_dir is NULL if we failed some of the registration
+	 * so that the cleanup in bd_unlink_disk_holder works properly.
+	 */
+	if (bd_register_pending_holders(disk) < 0) {
+		kobject_put(disk->slave_dir);
+		disk->slave_dir = NULL;
+	}
+
 	if (disk->flags & GENHD_FL_HIDDEN)
 		return;
 
diff --git a/block/holder.c b/block/holder.c
index 11e65d99a9fb..4568cc4f6827 100644
--- a/block/holder.c
+++ b/block/holder.c
@@ -28,6 +28,19 @@  static void del_symlink(struct kobject *from, struct kobject *to)
 	sysfs_remove_link(from, kobject_name(to));
 }
 
+static int __link_disk_holder(struct block_device *bdev, struct gendisk *disk)
+{
+	int ret;
+
+	ret = add_symlink(disk->slave_dir, bdev_kobj(bdev));
+	if (ret)
+		return ret;
+	ret = add_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
+	if (ret)
+		del_symlink(disk->slave_dir, bdev_kobj(bdev));
+	return ret;
+}
+
 /**
  * bd_link_disk_holder - create symlinks between holding disk and slave bdev
  * @bdev: the claimed slave bdev
@@ -66,7 +79,7 @@  int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
 	WARN_ON_ONCE(!bdev->bd_holder);
 
 	/* FIXME: remove the following once add_disk() handles errors */
-	if (WARN_ON(!disk->slave_dir || !bdev->bd_holder_dir))
+	if (WARN_ON(!bdev->bd_holder_dir))
 		goto out_unlock;
 
 	holder = bd_find_holder_disk(bdev, disk);
@@ -84,28 +97,28 @@  int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
 	INIT_LIST_HEAD(&holder->list);
 	holder->bdev = bdev;
 	holder->refcnt = 1;
-
-	ret = add_symlink(disk->slave_dir, bdev_kobj(bdev));
-	if (ret)
-		goto out_free;
-
-	ret = add_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
-	if (ret)
-		goto out_del;
+	if (disk->slave_dir) {
+		ret = __link_disk_holder(bdev, disk);
+		if (ret) {
+			kfree(holder);
+			goto out_unlock;
+		}
+	}
 
 	list_add(&holder->list, &disk->slave_bdevs);
-	goto out_unlock;
-
-out_del:
-	del_symlink(disk->slave_dir, bdev_kobj(bdev));
-out_free:
-	kfree(holder);
 out_unlock:
 	mutex_unlock(&disk->open_mutex);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(bd_link_disk_holder);
 
+static void __unlink_disk_holder(struct block_device *bdev,
+		struct gendisk *disk)
+{
+	del_symlink(disk->slave_dir, bdev_kobj(bdev));
+	del_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
+}
+
 /**
  * bd_unlink_disk_holder - destroy symlinks created by bd_link_disk_holder()
  * @bdev: the calimed slave bdev
@@ -123,11 +136,32 @@  void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
 	mutex_lock(&disk->open_mutex);
 	holder = bd_find_holder_disk(bdev, disk);
 	if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
-		del_symlink(disk->slave_dir, bdev_kobj(bdev));
-		del_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
+		if (disk->slave_dir)
+			__unlink_disk_holder(bdev, disk);
 		list_del_init(&holder->list);
 		kfree(holder);
 	}
 	mutex_unlock(&disk->open_mutex);
 }
 EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
+
+int bd_register_pending_holders(struct gendisk *disk)
+{
+	struct bd_holder_disk *holder;
+	int ret;
+
+	mutex_lock(&disk->open_mutex);
+	list_for_each_entry(holder, &disk->slave_bdevs, list) {
+		ret = __link_disk_holder(holder->bdev, disk);
+		if (ret)
+			goto out_undo;
+	}
+	mutex_unlock(&disk->open_mutex);
+	return 0;
+
+out_undo:
+	list_for_each_entry_continue_reverse(holder, &disk->slave_bdevs, list)
+		__unlink_disk_holder(holder->bdev, disk);
+	mutex_unlock(&disk->open_mutex);
+	return ret;
+}
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 26c8557e2714..dd95d53c75fa 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -321,6 +321,7 @@  long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
 #ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
 int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
 void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk);
+int bd_register_pending_holders(struct gendisk *disk);
 #else
 static inline int bd_link_disk_holder(struct block_device *bdev,
 				      struct gendisk *disk)
@@ -331,6 +332,10 @@  static inline void bd_unlink_disk_holder(struct block_device *bdev,
 					 struct gendisk *disk)
 {
 }
+static inline int bd_register_pending_holders(struct gendisk *disk)
+{
+	return 0;
+}
 #endif /* CONFIG_BLOCK_HOLDER_DEPRECATED */
 
 dev_t part_devt(struct gendisk *disk, u8 partno);