diff mbox series

brd: improve performance with blk-mq

Message ID 20230203103005.31290-2-p.raghav@samsung.com (mailing list archive)
State New, archived
Headers show
Series brd: improve performance with blk-mq | expand

Commit Message

Pankaj Raghav Feb. 3, 2023, 10:30 a.m. UTC
move to blk-mq based request processing as brd is one of the few drivers
that still uses submit_bio interface. The changes are pretty trivial
to start using blk-mq. The performance increases up to 125% for direct IO
read workloads. There is a slight dip in performance for direct IO write
workload but considering the general performance gain with blk-mq
support, it is not a lot.

SW queues are mapped to one hw queue in the brd device, and rest of IO
processing is retained as is.

Performance results with none scheduler:

--direct=0
------------------------------------------------------
|            | bio(base) | blk-mq            | delta |
------------------------------------------------------
| randread   | 133       | 223               | +75%  |
------------------------------------------------------
| read       | 150       | 313               | +108% |
-----------------------------------------------------
| randwrite  | 111       | 109               | -1.8% |
-----------------------------------------------------
| write      | 118       | 117               | -0.8%|
-----------------------------------------------------

--direct=1
------------------------------------------------------
|            | bio(base) | blk-mq            | delta |
------------------------------------------------------
| randread   | 182       | 414               | +127% |
------------------------------------------------------
| read       | 190       | 429               | +125% |
-----------------------------------------------------
| randwrite  | 378       | 387               | +2.38%|
-----------------------------------------------------
| write      | 443       | 420               | -5.1% |
-----------------------------------------------------

Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
---
 drivers/block/brd.c | 64 +++++++++++++++++++++++++++++++++------------
 1 file changed, 47 insertions(+), 17 deletions(-)

Comments

Christoph Hellwig Feb. 6, 2023, 3:50 p.m. UTC | #1
On Fri, Feb 03, 2023 at 04:00:06PM +0530, Pankaj Raghav wrote:
> move to blk-mq based request processing as brd is one of the few drivers
> that still uses submit_bio interface. The changes are pretty trivial
> to start using blk-mq. The performance increases up to 125% for direct IO
> read workloads. There is a slight dip in performance for direct IO write
> workload but considering the general performance gain with blk-mq
> support, it is not a lot.

Can you find out why writes regress, and what improves for reads?

In general blk-mq is doing a lot more work for better batching, but much
of that batching should not matter for a simple ramdisk.  So the results
look a little odd to me, and extra numbers and explanations would
really help.
Pankaj Raghav Feb. 6, 2023, 4:10 p.m. UTC | #2
On 2023-02-06 21:20, Christoph Hellwig wrote:
> On Fri, Feb 03, 2023 at 04:00:06PM +0530, Pankaj Raghav wrote:
>> move to blk-mq based request processing as brd is one of the few drivers
>> that still uses submit_bio interface. The changes are pretty trivial
>> to start using blk-mq. The performance increases up to 125% for direct IO
>> read workloads. There is a slight dip in performance for direct IO write
>> workload but considering the general performance gain with blk-mq
>> support, it is not a lot.
> 
> Can you find out why writes regress, and what improves for reads?
> 
Definitely. I tried to do similar experiments in null blk with submit_bio
& blk-mq backend and noticed a similar pattern in performance. I will look
into it next week as I am OOO rest of the week.

> In general blk-mq is doing a lot more work for better batching, but much
> of that batching should not matter for a simple ramdisk.  So the results
> look a little odd to me, and extra numbers and explanations would
> really help.

Let me know if you think I am missing something in the code that can cause
this behavior! Thanks.
diff mbox series

Patch

diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 37dce184eb56..99b37ac31532 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -16,6 +16,7 @@ 
 #include <linux/major.h>
 #include <linux/blkdev.h>
 #include <linux/bio.h>
+#include <linux/blk-mq.h>
 #include <linux/highmem.h>
 #include <linux/mutex.h>
 #include <linux/pagemap.h>
@@ -46,6 +47,7 @@  struct brd_device {
 	spinlock_t		brd_lock;
 	struct radix_tree_root	brd_pages;
 	u64			brd_nr_pages;
+	struct blk_mq_tag_set tag_set;
 };
 
 /*
@@ -282,36 +284,46 @@  static int brd_do_bvec(struct brd_device *brd, struct page *page,
 	return err;
 }
 
-static void brd_submit_bio(struct bio *bio)
+static blk_status_t brd_queue_rq(struct blk_mq_hw_ctx *hctx,
+				 const struct blk_mq_queue_data *bd)
 {
-	struct brd_device *brd = bio->bi_bdev->bd_disk->private_data;
-	sector_t sector = bio->bi_iter.bi_sector;
+	struct request *rq = bd->rq;
+	struct brd_device *brd = hctx->queue->queuedata;
+	sector_t sector = blk_rq_pos(rq);
 	struct bio_vec bvec;
-	struct bvec_iter iter;
+	struct req_iterator iter;
+	blk_status_t err = BLK_STS_OK;
 
-	bio_for_each_segment(bvec, bio, iter) {
+	blk_mq_start_request(bd->rq);
+	rq_for_each_segment(bvec, rq, iter) {
 		unsigned int len = bvec.bv_len;
-		int err;
+		int ret;
 
 		/* Don't support un-aligned buffer */
 		WARN_ON_ONCE((bvec.bv_offset & (SECTOR_SIZE - 1)) ||
-				(len & (SECTOR_SIZE - 1)));
+			     (len & (SECTOR_SIZE - 1)));
 
-		err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset,
-				  bio_op(bio), sector);
-		if (err) {
-			bio_io_error(bio);
-			return;
+		ret = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset,
+				  req_op(rq), sector);
+		if (ret) {
+			err = BLK_STS_IOERR;
+			goto end_request;
 		}
 		sector += len >> SECTOR_SHIFT;
 	}
 
-	bio_endio(bio);
+end_request:
+	blk_mq_end_request(bd->rq, err);
+	return BLK_STS_OK;
 }
 
+static const struct blk_mq_ops brd_mq_ops = {
+	.queue_rq = brd_queue_rq,
+};
+
+
 static const struct block_device_operations brd_fops = {
 	.owner =		THIS_MODULE,
-	.submit_bio =		brd_submit_bio,
 };
 
 /*
@@ -355,7 +367,7 @@  static int brd_alloc(int i)
 	struct brd_device *brd;
 	struct gendisk *disk;
 	char buf[DISK_NAME_LEN];
-	int err = -ENOMEM;
+	int err = 0;
 
 	list_for_each_entry(brd, &brd_devices, brd_list)
 		if (brd->brd_number == i)
@@ -364,6 +376,14 @@  static int brd_alloc(int i)
 	if (!brd)
 		return -ENOMEM;
 	brd->brd_number		= i;
+	brd->tag_set.ops = &brd_mq_ops;
+	brd->tag_set.queue_depth = 128;
+	brd->tag_set.numa_node = NUMA_NO_NODE;
+	brd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING;
+	brd->tag_set.cmd_size = 0;
+	brd->tag_set.driver_data = brd;
+	brd->tag_set.nr_hw_queues = 1;
+
 	list_add_tail(&brd->brd_list, &brd_devices);
 
 	spin_lock_init(&brd->brd_lock);
@@ -374,9 +394,17 @@  static int brd_alloc(int i)
 		debugfs_create_u64(buf, 0444, brd_debugfs_dir,
 				&brd->brd_nr_pages);
 
-	disk = brd->brd_disk = blk_alloc_disk(NUMA_NO_NODE);
-	if (!disk)
+	err = blk_mq_alloc_tag_set(&brd->tag_set);
+	if (err) {
+		err = -ENOMEM;
 		goto out_free_dev;
+	}
+
+	disk = brd->brd_disk = blk_mq_alloc_disk(&brd->tag_set, brd);
+	if (IS_ERR(disk)) {
+		err = PTR_ERR(disk);
+		goto out_free_tags;
+	}
 
 	disk->major		= RAMDISK_MAJOR;
 	disk->first_minor	= i * max_part;
@@ -407,6 +435,8 @@  static int brd_alloc(int i)
 
 out_cleanup_disk:
 	put_disk(disk);
+out_free_tags:
+	blk_mq_free_tag_set(&brd->tag_set);
 out_free_dev:
 	list_del(&brd->brd_list);
 	kfree(brd);