diff mbox series

[RFC,v2,07/14] md/md-llbitmap: implement hidden disk to manage bitmap IO

Message ID 20250328060853.4124527-8-yukuai1@huaweicloud.com (mailing list archive)
State New
Headers show
Series md: introduce a new lockless bitmap | expand

Commit Message

Yu Kuai March 28, 2025, 6:08 a.m. UTC
From: Yu Kuai <yukuai3@huawei.com>

Bitmap is stored in each member disk, the old bitmap implementation is
allocating memory and managing data by itself, read and write will
attach the allocated page to bio for member disks, and a bitmap level
spinlock is used for synchronization

For llbitmap, a hidden disk, named mdxxx_bitmap, is created for bitmap, see
details in llbitmap_add_disk(). And a file is created as well to manage
bitmap IO for this disk. Read/write bitmap will be converted to buffer
IO to this file.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 drivers/md/md-llbitmap.c | 238 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 238 insertions(+)
diff mbox series

Patch

diff --git a/drivers/md/md-llbitmap.c b/drivers/md/md-llbitmap.c
index 1f97b6868279..bbd8a7c99577 100644
--- a/drivers/md/md-llbitmap.c
+++ b/drivers/md/md-llbitmap.c
@@ -56,8 +56,16 @@ 
  *   unwritten blocks.
  * - After resync is done, change state from Syncing to Dirty first, in case
  *   Startwrite happen before the state is Clean.
+ *
+ * ##### Bitmap IO
+ *
+ * A hidden disk, named mdxxx_bitmap, is created for bitmap, see details in
+ * llbitmap_add_disk(). And a file is created as well to manage bitmap IO for
+ * this disk, see details in llbitmap_open_disk(). Read/write bitmap is
+ * converted to buffer IO to this file.
  */
 
+#define BITMAP_MAX_SECTOR (128 * 2)
 #define BITMAP_MAX_PAGES 32
 #define BITMAP_SB_SIZE 1024
 
@@ -135,6 +143,13 @@  struct llbitmap {
 	__u64	events_cleared;
 };
 
+struct llbitmap_bio {
+	struct md_rdev *rdev;
+	struct bio bio;
+};
+
+static struct workqueue_struct *md_llbitmap_io_wq;
+
 static char state_machine[nr_llbitmap_state][nr_llbitmap_action] = {
 	[BitUnwritten] = {BitDirty, BitNone, BitNone, BitNone, BitNone, BitNone, BitNone, BitNone},
 	[BitClean] = {BitDirty, BitNone, BitNone, BitNone, BitNone, BitNone, BitUnwritten, BitNeedSync},
@@ -254,3 +269,226 @@  static enum llbitmap_state llbitmap_state_machine(struct llbitmap *llbitmap,
 
 	return state;
 }
+
+static void llbitmap_end_write(struct bio *bio)
+{
+	struct bio *parent = bio->bi_private;
+	struct llbitmap_bio *llbitmap_bio;
+	struct md_rdev *rdev;
+
+	if (bio->bi_status == BLK_STS_OK) {
+		WRITE_ONCE(parent->bi_status, BLK_STS_OK);
+	} else {
+		llbitmap_bio = container_of(bio, struct llbitmap_bio, bio);
+		rdev = llbitmap_bio->rdev;
+
+		pr_err("%s: %s: bitmap write failed for %pg\n", __func__,
+		       mdname(rdev->mddev), rdev->bdev);
+		md_error(rdev->mddev, rdev);
+	}
+
+	bio_put(bio);
+	bio_endio(parent);
+}
+
+static void md_llbitmap_retry_read(struct llbitmap *llbitmap, struct bio *bio)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&llbitmap->retry_lock, flags);
+	bio_list_add(&llbitmap->retry_list, bio);
+	queue_work(md_llbitmap_io_wq, &llbitmap->retry_work);
+	spin_unlock_irqrestore(&llbitmap->retry_lock, flags);
+}
+
+static void llbitmap_end_read(struct bio *bio)
+{
+	struct bio *parent = bio->bi_private;
+	struct llbitmap_bio *llbitmap_bio;
+	struct llbitmap *llbitmap;
+	struct md_rdev *rdev;
+
+	if (bio->bi_status == BLK_STS_OK) {
+		WRITE_ONCE(parent->bi_status, BLK_STS_OK);
+		bio_put(bio);
+		bio_endio(parent);
+		return;
+	}
+
+	llbitmap_bio = container_of(bio, struct llbitmap_bio, bio);
+	rdev = llbitmap_bio->rdev;
+	pr_err("%s: %s: bitmap read failed for %pg\n", __func__,
+	       mdname(rdev->mddev), rdev->bdev);
+	md_error(rdev->mddev, rdev);
+	bio_put(bio);
+	md_llbitmap_retry_read(llbitmap, parent);
+}
+
+static void md_llbitmap_retry_fn(struct work_struct *work)
+{
+	struct llbitmap *llbitmap =
+		container_of(work, struct llbitmap, retry_work);
+	struct mddev *mddev = llbitmap->mddev;
+	struct md_rdev *rdev;
+	struct bio *bio;
+
+again:
+	spin_lock_irq(&llbitmap->retry_lock);
+	bio = bio_list_pop(&llbitmap->retry_list);
+	spin_unlock_irq(&llbitmap->retry_lock);
+
+	if (!bio)
+		return;
+
+	rdev_for_each(rdev, mddev) {
+		struct llbitmap_bio *llbitmap_bio;
+		struct bio *new;
+
+		if (rdev->raid_disk < 0 || test_bit(Faulty, &rdev->flags))
+			continue;
+
+		new = bio_alloc_clone(rdev->bdev, bio, GFP_NOIO,
+				      &llbitmap->bio_set);
+		new->bi_iter.bi_sector = bio->bi_iter.bi_sector +
+					 rdev->sb_start +
+					 mddev->bitmap_info.offset;
+		new->bi_opf |= REQ_SYNC | REQ_IDLE | REQ_META;
+		new->bi_private = bio;
+		new->bi_end_io = llbitmap_end_read;
+
+		llbitmap_bio = container_of(new, struct llbitmap_bio, bio);
+		llbitmap_bio->rdev = rdev;
+
+		submit_bio_noacct(new);
+		goto again;
+	}
+}
+
+static void llbitmap_submit_bio(struct bio *bio)
+{
+	struct mddev *mddev = bio->bi_bdev->bd_disk->private_data;
+	struct llbitmap *llbitmap = mddev->bitmap;
+	struct llbitmap_bio *llbitmap_bio;
+	struct md_rdev *rdev;
+	struct bio *new;
+
+	if (unlikely(bio->bi_opf & REQ_PREFLUSH))
+		bio->bi_opf &= ~REQ_PREFLUSH;
+
+	if (!bio_sectors(bio)) {
+		bio_endio(bio);
+		return;
+	}
+
+	/* status will be cleared if any member disk IO succeed */
+	bio->bi_status = BLK_STS_IOERR;
+
+	rdev_for_each(rdev, mddev) {
+		if (rdev->raid_disk < 0 || test_bit(Faulty, &rdev->flags))
+			continue;
+
+		new = bio_alloc_clone(rdev->bdev, bio, GFP_NOIO,
+				      &llbitmap->bio_set);
+		new->bi_iter.bi_sector = bio->bi_iter.bi_sector +
+					 rdev->sb_start +
+					 mddev->bitmap_info.offset;
+		new->bi_opf |= REQ_SYNC | REQ_IDLE | REQ_META;
+
+		llbitmap_bio = container_of(new, struct llbitmap_bio, bio);
+		llbitmap_bio->rdev = rdev;
+		bio_inc_remaining(bio);
+		new->bi_private = bio;
+
+		if (bio_data_dir(bio) == WRITE) {
+			new->bi_end_io = llbitmap_end_write;
+			new->bi_opf |= REQ_FUA;
+			submit_bio_noacct(new);
+			continue;
+		}
+
+		new->bi_end_io = llbitmap_end_read;
+		submit_bio_noacct(new);
+		break;
+	}
+
+	bio_endio(bio);
+}
+
+const struct block_device_operations llbitmap_fops = {
+	.owner = THIS_MODULE,
+	.submit_bio = llbitmap_submit_bio,
+};
+
+static int llbitmap_add_disk(struct llbitmap *llbitmap)
+{
+	struct mddev *mddev = llbitmap->mddev;
+	struct gendisk *disk = blk_alloc_disk(&mddev->gendisk->queue->limits,
+					      NUMA_NO_NODE);
+	int ret;
+
+	if (IS_ERR(disk))
+		return PTR_ERR(disk);
+
+	sprintf(disk->disk_name, "%s_bitmap", mdname(mddev));
+	disk->flags |= GENHD_FL_HIDDEN;
+	disk->fops = &llbitmap_fops;
+
+	ret = add_disk(disk);
+	if (ret) {
+		put_disk(disk);
+		return ret;
+	}
+
+	set_capacity(disk, BITMAP_MAX_SECTOR);
+	disk->private_data = mddev;
+	llbitmap->bitmap_disk = disk;
+	return 0;
+}
+
+static void llbitmap_del_disk(struct llbitmap *llbitmap)
+{
+	struct gendisk *disk = llbitmap->bitmap_disk;
+
+	if (!disk)
+		return;
+
+	llbitmap->bitmap_disk = NULL;
+	del_gendisk(disk);
+	put_disk(disk);
+}
+
+static int llbitmap_open_disk(struct llbitmap *llbitmap)
+{
+	struct gendisk *disk = llbitmap->bitmap_disk;
+	struct file *bitmap_file;
+
+	bitmap_file = bdev_file_alloc(disk->part0,
+				      BLK_OPEN_READ | BLK_OPEN_WRITE);
+	if (IS_ERR(bitmap_file))
+		return PTR_ERR(bitmap_file);
+
+	/* corresponding to the blkdev_put_no_open() from blkdev_release() */
+	get_device(disk_to_dev(disk));
+
+	bitmap_file->f_flags |= O_LARGEFILE;
+	bitmap_file->f_mode |= FMODE_CAN_ODIRECT;
+	bitmap_file->f_mapping = disk->part0->bd_mapping;
+	bitmap_file->f_wb_err = filemap_sample_wb_err(bitmap_file->f_mapping);
+
+	/* not actually opened, let blkdev_release() know */
+	bitmap_file->private_data = ERR_PTR(-ENODEV);
+	llbitmap->bitmap_file = bitmap_file;
+	return 0;
+}
+
+static void llbitmap_close_disk(struct llbitmap *llbitmap)
+{
+	struct file *bitmap_file = llbitmap->bitmap_file;
+
+	if (!bitmap_file)
+		return;
+
+	llbitmap->bitmap_file = NULL;
+	fput(bitmap_file);
+}
+