From patchwork Wed Jul 20 09:30:47 2022
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Wang You <wangyoua@uniontech.com>
X-Patchwork-Id: 12923752
Return-Path: <linux-block-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
	aws-us-west-2-korg-lkml-1.web.codeaurora.org
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
	by smtp.lore.kernel.org (Postfix) with ESMTP id 8C0A7CCA480
	for <linux-block@archiver.kernel.org>; Wed, 20 Jul 2022 09:31:24 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
        id S231579AbiGTJbW (ORCPT <rfc822;linux-block@archiver.kernel.org>);
        Wed, 20 Jul 2022 05:31:22 -0400
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:32806 "EHLO
        lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
        with ESMTP id S231723AbiGTJbV (ORCPT
        <rfc822;linux-block@vger.kernel.org>);
        Wed, 20 Jul 2022 05:31:21 -0400
Received: from smtpbguseast3.qq.com (smtpbguseast3.qq.com [54.243.244.52])
        by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 6C60BDA5
        for <linux-block@vger.kernel.org>;
 Wed, 20 Jul 2022 02:31:19 -0700 (PDT)
X-QQ-mid: bizesmtp65t1658309467tf1e345v
Received: from eureka.localdomain ( [123.124.208.226])
        by bizesmtp.qq.com (ESMTP) with
        id ; Wed, 20 Jul 2022 17:31:06 +0800 (CST)
X-QQ-SSF: 01400000002000B0D000000A0000020
X-QQ-FEAT: 27H4dUct9tghbnHYDZeKVKd0xx9j0n8612/DIAsUpecCQTLoJQmMsosEIRaAn
        4jKRH2a/Sl1qRoB9/PVzCjGCTQ+WB6Cnexbjt4m+sXRJfSuuZ/SdaducgdyDH+huFApW074
        eMtz9iQuYc1YxcNJKQ4suanjHRm8M4LCEA5Nt3weu1UiOOYI2TD8NoQJGQhS1cqTZw1pKWt
        wyxvdDOREAS4Pa1efErxq2+9JdFrqGZwjlInn578PXJ68m4YkouOaF/n30M7GkJWC3myoYo
        ZWbaBjTgIomaV5i7GX/jitjghLW5jZe1Y68olgx9BHoj3ib7D4vqfICv3v+8VEtIDqdusR6
        uDXZVS3iRxtJB9wWBCcP3wqceV0fZjZHtUof/bXy/hGeNxBUB6kMr6NybQRof8scORm8b8/
        R0mVGEBK3qz9TnxhOBCC1A==
X-QQ-GoodBg: 1
From: Wang You <wangyoua@uniontech.com>
To: axboe@kernel.dk
Cc: linux-block@vger.kernel.org, linux-kernel@vger.kernel.org,
        hch@lst.de, jaegeuk@kernel.org, fio@vger.kernel.org,
        ming.lei@redhat.com, wangyoua@uniontech.com,
        wangxiaohua@uniontech.com
Subject: [PATCH 1/2] block: Introduce nr_sched_batch sys interface
Date: Wed, 20 Jul 2022 17:30:47 +0800
Message-Id: <20220720093048.225944-2-wangyoua@uniontech.com>
X-Mailer: git-send-email 2.27.0
In-Reply-To: <20220720093048.225944-1-wangyoua@uniontech.com>
References: <20220720093048.225944-1-wangyoua@uniontech.com>
MIME-Version: 1.0
X-QQ-SENDSIZE: 520
Feedback-ID: bizesmtp:uniontech.com:qybgforeign:qybgforeign3
X-QQ-Bgrelay: 1
Precedence: bulk
List-ID: <linux-block.vger.kernel.org>
X-Mailing-List: linux-block@vger.kernel.org

The function of this patch is to add an nr_sched_batch interface under
/sys/block/sdx/queue/, which can be used to set the number of batching
requests. Of course, the default value is nr_requests and will follow
nr_request when it has not been changed.

Signed-off-by: Wang You <wangyoua@uniontech.com>
---
 block/blk-mq-sched.c   |  4 +++-
 block/blk-sysfs.c      | 34 ++++++++++++++++++++++++++++++++++
 include/linux/blkdev.h |  1 +
 3 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index a4f7c101b53b..92798a0c03bd 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -100,7 +100,7 @@ static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
 	if (hctx->dispatch_busy)
 		max_dispatch = 1;
 	else
-		max_dispatch = hctx->queue->nr_requests;
+		max_dispatch = q->nr_sched_batch;
 
 	do {
 		struct request *rq;
@@ -567,6 +567,7 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
 		blk_queue_flag_clear(QUEUE_FLAG_SQ_SCHED, q);
 		q->elevator = NULL;
 		q->nr_requests = q->tag_set->queue_depth;
+		q->nr_sched_batch = q->nr_requests;
 		return 0;
 	}
 
@@ -577,6 +578,7 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
 	 */
 	q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth,
 				   BLKDEV_DEFAULT_RQ);
+	q->nr_sched_batch = q->nr_requests;
 
 	if (blk_mq_is_shared_tags(flags)) {
 		ret = blk_mq_init_sched_shared_tags(q);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 9b905e9443e4..8f299a3cf66c 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -70,6 +70,7 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
 {
 	unsigned long nr;
 	int ret, err;
+	unsigned long prev_nr_request = q->nr_requests;
 
 	if (!queue_is_mq(q))
 		return -EINVAL;
@@ -85,6 +86,37 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
 	if (err)
 		return err;
 
+	if (q->nr_sched_batch == prev_nr_request ||
+		q->nr_sched_batch > nr)
+		q->nr_sched_batch = nr;
+
+	return ret;
+}
+
+static ssize_t
+elv_nr_batch_show(struct request_queue *q, char *page)
+{
+	return queue_var_show(q->nr_sched_batch, page);
+}
+
+static ssize_t
+elv_nr_batch_store(struct request_queue *q, const char *page, size_t count)
+{
+	unsigned long nr;
+	int ret;
+
+	if (!queue_is_mq(q))
+		return -EINVAL;
+
+	ret = queue_var_store(&nr, page, count);
+	if (ret < 0)
+		return ret;
+
+	if (nr > q->nr_requests || nr < 1)
+		return -EINVAL;
+
+	q->nr_sched_batch = nr;
+
 	return ret;
 }
 
@@ -573,6 +605,7 @@ QUEUE_RO_ENTRY(queue_max_segments, "max_segments");
 QUEUE_RO_ENTRY(queue_max_integrity_segments, "max_integrity_segments");
 QUEUE_RO_ENTRY(queue_max_segment_size, "max_segment_size");
 QUEUE_RW_ENTRY(elv_iosched, "scheduler");
+QUEUE_RW_ENTRY(elv_nr_batch, "nr_sched_batch");
 
 QUEUE_RO_ENTRY(queue_logical_block_size, "logical_block_size");
 QUEUE_RO_ENTRY(queue_physical_block_size, "physical_block_size");
@@ -632,6 +665,7 @@ static struct attribute *queue_attrs[] = {
 	&queue_max_integrity_segments_entry.attr,
 	&queue_max_segment_size_entry.attr,
 	&elv_iosched_entry.attr,
+	&elv_nr_batch_entry.attr,
 	&queue_hw_sector_size_entry.attr,
 	&queue_logical_block_size_entry.attr,
 	&queue_physical_block_size_entry.attr,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 2f7b43444c5f..13b050c0756b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -422,6 +422,7 @@ struct request_queue {
 	 * queue settings
 	 */
 	unsigned long		nr_requests;	/* Max # of requests */
+	unsigned long           nr_sched_batch;
 
 	unsigned int		dma_pad_mask;
 	unsigned int		dma_alignment;

From patchwork Wed Jul 20 09:30:48 2022
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Wang You <wangyoua@uniontech.com>
X-Patchwork-Id: 12923753
Return-Path: <linux-block-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
	aws-us-west-2-korg-lkml-1.web.codeaurora.org
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
	by smtp.lore.kernel.org (Postfix) with ESMTP id 5855CC433EF
	for <linux-block@archiver.kernel.org>; Wed, 20 Jul 2022 09:31:28 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
        id S232204AbiGTJb0 (ORCPT <rfc822;linux-block@archiver.kernel.org>);
        Wed, 20 Jul 2022 05:31:26 -0400
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:32824 "EHLO
        lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
        with ESMTP id S232463AbiGTJbW (ORCPT
        <rfc822;linux-block@vger.kernel.org>);
        Wed, 20 Jul 2022 05:31:22 -0400
Received: from smtpbgjp3.qq.com (smtpbgjp3.qq.com [54.92.39.34])
        by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 2F92458848
        for <linux-block@vger.kernel.org>;
 Wed, 20 Jul 2022 02:31:19 -0700 (PDT)
X-QQ-mid: bizesmtp65t1658309470tox533d0
Received: from eureka.localdomain ( [123.124.208.226])
        by bizesmtp.qq.com (ESMTP) with
        id ; Wed, 20 Jul 2022 17:31:09 +0800 (CST)
X-QQ-SSF: 01400000002000B0D000000A0000020
X-QQ-FEAT: Zf2po6e9he4aYUlcKXg5HBZNLlmaErk3YUM5+bv5OQWzmDN/KBJPtIJFWpNwH
        TC+d2e+s/YjkDXMlwS34CVe5t8Nee/Mrt7dYdXGIJ1B9/1H2LdjrYJY6mr56iQeKjhAZs5E
        x4hCvPOdpSxR/8HkZXMgCTVL7sbr112w42FcGHHrCvlMZx350X6mGPDvc7EkprPI4J0cJYO
        oqTfw7EcNflNCDvuI/ATkVfsXqelXU9yJ58xzA83Y+b4I/qeX0JvWMpcpPD6jthLiL8fGDO
        iMsq8nfR9lMm6HHsIjhN4fWxlg4mMj9tp5NDzPduvGQpQLuGsUuI/KLxMqmjDuv63KPpCj1
        EW0ir0vPJPupkTpTwPOG4Mni8+PdpHzFPsnOcEe7HcbO+kZemuIgQi90q2khjGH01goGm8c
        gETk3gXg1rnSYA0D8pSnPQ==
X-QQ-GoodBg: 1
From: Wang You <wangyoua@uniontech.com>
To: axboe@kernel.dk
Cc: linux-block@vger.kernel.org, linux-kernel@vger.kernel.org,
        hch@lst.de, jaegeuk@kernel.org, fio@vger.kernel.org,
        ming.lei@redhat.com, wangyoua@uniontech.com,
        wangxiaohua@uniontech.com
Subject: [PATCH 2/2] block/mq-deadline: Prioritize first request
Date: Wed, 20 Jul 2022 17:30:48 +0800
Message-Id: <20220720093048.225944-3-wangyoua@uniontech.com>
X-Mailer: git-send-email 2.27.0
In-Reply-To: <20220720093048.225944-1-wangyoua@uniontech.com>
References: <20220720093048.225944-1-wangyoua@uniontech.com>
MIME-Version: 1.0
X-QQ-SENDSIZE: 520
Feedback-ID: bizesmtp:uniontech.com:qybgforeign:qybgforeign9
X-QQ-Bgrelay: 1
Precedence: bulk
List-ID: <linux-block.vger.kernel.org>
X-Mailing-List: linux-block@vger.kernel.org

The function deadline_head_request can select the request located at
the head from the sector red-black tree of the mq-deadline scheduler,
dispatch such a request may cause the disk access address to return
to the head, so as to prevent it from swinging back and forth.

- The presence of the scheduler batching requests may reduce or
  even eliminate its ability to fuse and sort, so I sometimes set
  it to 1.

- This pathc may exacerbate the risk of expire, I don't know if
  a more absolute expire detection is necessary.

- Tested some disks (mainly rotational disks and some SSDs) with
  the fio tool (using sync, direct, etc. parameters), the results
  show that they increase the disk's small sector sequential read
  and write performance, does this imply that changing
  nr_sched_batch is reasonable?

The test hardware is:
Kunpeng-920, HW-SAS3508+(MG04ACA400N * 2), RAID0.

The test command is:
fio -ioengine=psync -lockmem=1G -buffered=0 -time_based=1 -direct=1
-iodepth=1 -thread -bs=512B -size=110g -numjobs=16 -runtime=300
-group_reporting -name=read -filename=/dev/sdb14
-ioscheduler=mq-deadline -rw=read[,write,rw]

The following is the test data:
origin/master:
read iops: 152421       write iops: 136959      rw iops: 54593,54581

nr_sched_batch = 1:
read iops: 166449       write iops: 139477      rw iops: 55363,55355

nr_sched_batch = 1, use deadline_head_request:
read iops: 171177       write iops: 184431      rw iops: 56178,56169

Signed-off-by: Wang You <wangyoua@uniontech.com>
---
 block/mq-deadline.c | 42 +++++++++++++++++++++++++++++++++++++++---
 1 file changed, 39 insertions(+), 3 deletions(-)

diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 1a9e835e816c..e155f49d7a70 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -344,6 +344,35 @@ deadline_next_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
 	return rq;
 }
 
+static inline struct request *
+deadline_head_request(struct deadline_data *dd, struct dd_per_prio *per_prio, int data_dir)
+{
+	struct rb_node *node = rb_first(&per_prio->sort_list[data_dir]);
+	struct request *rq;
+	unsigned long flags;
+
+	if (!node)
+		return NULL;
+
+	rq = rb_entry_rq(node);
+	if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q))
+		return rq;
+
+	/*
+	 * Look for a write request that can be dispatched, that is one with
+	 * an unlocked target zone.
+	 */
+	spin_lock_irqsave(&dd->zone_lock, flags);
+	while (rq) {
+		if (blk_req_can_dispatch_to_zone(rq))
+			break;
+		rq = deadline_latter_request(rq);
+	}
+	spin_unlock_irqrestore(&dd->zone_lock, flags);
+
+	return rq;
+}
+
 /*
  * Returns true if and only if @rq started after @latest_start where
  * @latest_start is in jiffies.
@@ -429,13 +458,20 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd,
 	 * we are not running a batch, find best request for selected data_dir
 	 */
 	next_rq = deadline_next_request(dd, per_prio, data_dir);
-	if (deadline_check_fifo(per_prio, data_dir) || !next_rq) {
+	if (deadline_check_fifo(per_prio, data_dir)) {
 		/*
 		 * A deadline has expired, the last request was in the other
-		 * direction, or we have run out of higher-sectored requests.
-		 * Start again from the request with the earliest expiry time.
+		 * direction. Start again from the request with the earliest
+		 * expiry time.
 		 */
 		rq = deadline_fifo_request(dd, per_prio, data_dir);
+	} else if (!next_rq) {
+		/*
+		 * There is no operation expired, and we have run out of
+		 * higher-sectored requests. Look for the sector at the head
+		 * which may reduce disk seek consumption.
+		 */
+		rq = deadline_head_request(dd, per_prio, data_dir);
 	} else {
 		/*
 		 * The last req was the same dir and we have a next request in