diff mbox series

[7/7] blk-mq: don't use the requeue list to queue flush commands

Message ID 20230416200930.29542-8-hch@lst.de (mailing list archive)
State New, archived
Headers show
Series [1/7] blk-mq: factor out a blk_rq_init_flush helper | expand

Commit Message

Christoph Hellwig April 16, 2023, 8:09 p.m. UTC
Currently both requeues of commands that were already sent to the
driver and flush commands submitted from the flush state machine
share the same requeue_list struct request_queue, despite requeues
doing head insertations and flushes not.  Switch to using two
separate lists instead.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/blk-flush.c      |  9 +++++++--
 block/blk-mq-debugfs.c |  1 -
 block/blk-mq.c         | 36 +++++++++++++++---------------------
 block/blk-mq.h         |  1 -
 include/linux/blk-mq.h |  4 +---
 include/linux/blkdev.h |  1 +
 6 files changed, 24 insertions(+), 28 deletions(-)

Comments

Bart Van Assche April 16, 2023, 9:01 p.m. UTC | #1
On 4/16/23 13:09, Christoph Hellwig wrote:
> diff --git a/block/blk-flush.c b/block/blk-flush.c
> index 69e9806f575455..231d3780e74ad1 100644
> --- a/block/blk-flush.c
> +++ b/block/blk-flush.c
> @@ -188,7 +188,9 @@ static void blk_flush_complete_seq(struct request *rq,
>   
>   	case REQ_FSEQ_DATA:
>   		list_move_tail(&rq->flush.list, &fq->flush_data_in_flight);
> -		blk_mq_add_to_requeue_list(rq, 0);
> +		spin_lock(&q->requeue_lock);
> +		list_add_tail(&rq->queuelist, &q->flush_list);
> +		spin_unlock(&q->requeue_lock);
>   		blk_mq_kick_requeue_list(q);
>   		break;

At least the SCSI core can call blk_flush_complete_seq() from interrupt 
context so I don't think the above code is correct. The call chain is as 
follows:

LLD interrupt handler
   scsi_done()
     scsi_done_internal()
       blk_mq_complete_request()
         scsi_complete()
           scsi_finish_command()
             scsi_io_completion()
               scsi_end_request()
                 __blk_mq_end_request()
                   flush_end_io()
                     blk_flush_complete_seq()

Bart.
Christoph Hellwig April 17, 2023, 4:26 a.m. UTC | #2
On Sun, Apr 16, 2023 at 02:01:30PM -0700, Bart Van Assche wrote:
> On 4/16/23 13:09, Christoph Hellwig wrote:
>> diff --git a/block/blk-flush.c b/block/blk-flush.c
>> index 69e9806f575455..231d3780e74ad1 100644
>> --- a/block/blk-flush.c
>> +++ b/block/blk-flush.c
>> @@ -188,7 +188,9 @@ static void blk_flush_complete_seq(struct request *rq,
>>     	case REQ_FSEQ_DATA:
>>   		list_move_tail(&rq->flush.list, &fq->flush_data_in_flight);
>> -		blk_mq_add_to_requeue_list(rq, 0);
>> +		spin_lock(&q->requeue_lock);
>> +		list_add_tail(&rq->queuelist, &q->flush_list);
>> +		spin_unlock(&q->requeue_lock);
>>   		blk_mq_kick_requeue_list(q);
>>   		break;
>
> At least the SCSI core can call blk_flush_complete_seq() from interrupt 
> context so I don't think the above code is correct. The call chain is as 
> follows:

All callers of blk_flush_complete_seq already disable interrupts when
taking mq_flush_lock.  No need to disable interrupts again for a nested
lock then.
Damien Le Moal April 17, 2023, 6:46 a.m. UTC | #3
On 4/17/23 05:09, Christoph Hellwig wrote:
> Currently both requeues of commands that were already sent to the
> driver and flush commands submitted from the flush state machine
> share the same requeue_list struct request_queue, despite requeues
> doing head insertations and flushes not.  Switch to using two
> separate lists instead.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Looks OK.

Reviewed-by: Damien Le Moal <dlemoal@kernel.org>

> ---
>  block/blk-flush.c      |  9 +++++++--
>  block/blk-mq-debugfs.c |  1 -
>  block/blk-mq.c         | 36 +++++++++++++++---------------------
>  block/blk-mq.h         |  1 -
>  include/linux/blk-mq.h |  4 +---
>  include/linux/blkdev.h |  1 +
>  6 files changed, 24 insertions(+), 28 deletions(-)
> 
> diff --git a/block/blk-flush.c b/block/blk-flush.c
> index 69e9806f575455..231d3780e74ad1 100644
> --- a/block/blk-flush.c
> +++ b/block/blk-flush.c
> @@ -188,7 +188,9 @@ static void blk_flush_complete_seq(struct request *rq,
>  
>  	case REQ_FSEQ_DATA:
>  		list_move_tail(&rq->flush.list, &fq->flush_data_in_flight);
> -		blk_mq_add_to_requeue_list(rq, 0);
> +		spin_lock(&q->requeue_lock);
> +		list_add_tail(&rq->queuelist, &q->flush_list);
> +		spin_unlock(&q->requeue_lock);
>  		blk_mq_kick_requeue_list(q);

With this change, this function name is a little misleading... But I do not have
a better name to propose :)
kernel test robot April 19, 2023, 12:37 p.m. UTC | #4
Hi Christoph,

kernel test robot noticed the following build warnings:

[auto build test WARNING on axboe-block/for-next]
[also build test WARNING on next-20230418]
[cannot apply to linus/master v6.3-rc7]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Christoph-Hellwig/blk-mq-reflow-blk_insert_flush/20230417-051014
base:   https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git for-next
patch link:    https://lore.kernel.org/r/20230416200930.29542-8-hch%40lst.de
patch subject: [PATCH 7/7] blk-mq: don't use the requeue list to queue flush commands
config: i386-randconfig-a003 (https://download.01.org/0day-ci/archive/20230419/202304192001.KzxpDQmK-lkp@intel.com/config)
compiler: gcc-11 (Debian 11.3.0-8) 11.3.0
reproduce (this is a W=1 build):
        # https://github.com/intel-lab-lkp/linux/commit/c1197e1a4ff5b38b73d3ec923987ca857f5e2695
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Christoph-Hellwig/blk-mq-reflow-blk_insert_flush/20230417-051014
        git checkout c1197e1a4ff5b38b73d3ec923987ca857f5e2695
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        make W=1 O=build_dir ARCH=i386 olddefconfig
        make W=1 O=build_dir ARCH=i386 SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202304192001.KzxpDQmK-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> block/blk-mq.c:1461:6: warning: no previous prototype for 'blk_flush_queue_insert' [-Wmissing-prototypes]
    1461 | void blk_flush_queue_insert(struct request *rq)
         |      ^~~~~~~~~~~~~~~~~~~~~~


vim +/blk_flush_queue_insert +1461 block/blk-mq.c

  1460	
> 1461	void blk_flush_queue_insert(struct request *rq)
  1462	{
  1463		struct request_queue *q = rq->q;
  1464		unsigned long flags;
  1465	
  1466		spin_lock_irqsave(&q->requeue_lock, flags);
  1467		list_add_tail(&rq->queuelist, &q->flush_list);
  1468		spin_unlock_irqrestore(&q->requeue_lock, flags);
  1469	}
  1470
kernel test robot April 19, 2023, 5:34 p.m. UTC | #5
Hi Christoph,

kernel test robot noticed the following build warnings:

[auto build test WARNING on axboe-block/for-next]
[also build test WARNING on next-20230418]
[cannot apply to linus/master v6.3-rc7]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Christoph-Hellwig/blk-mq-reflow-blk_insert_flush/20230417-051014
base:   https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git for-next
patch link:    https://lore.kernel.org/r/20230416200930.29542-8-hch%40lst.de
patch subject: [PATCH 7/7] blk-mq: don't use the requeue list to queue flush commands
config: x86_64-randconfig-a013-20230417 (https://download.01.org/0day-ci/archive/20230420/202304200122.GEFsqxFh-lkp@intel.com/config)
compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project f28c006a5895fc0e329fe15fead81e37457cb1d1)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/c1197e1a4ff5b38b73d3ec923987ca857f5e2695
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Christoph-Hellwig/blk-mq-reflow-blk_insert_flush/20230417-051014
        git checkout c1197e1a4ff5b38b73d3ec923987ca857f5e2695
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=x86_64 olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202304200122.GEFsqxFh-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> block/blk-mq.c:1461:6: warning: no previous prototype for function 'blk_flush_queue_insert' [-Wmissing-prototypes]
   void blk_flush_queue_insert(struct request *rq)
        ^
   block/blk-mq.c:1461:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
   void blk_flush_queue_insert(struct request *rq)
   ^
   static 
   1 warning generated.


vim +/blk_flush_queue_insert +1461 block/blk-mq.c

  1460	
> 1461	void blk_flush_queue_insert(struct request *rq)
  1462	{
  1463		struct request_queue *q = rq->q;
  1464		unsigned long flags;
  1465	
  1466		spin_lock_irqsave(&q->requeue_lock, flags);
  1467		list_add_tail(&rq->queuelist, &q->flush_list);
  1468		spin_unlock_irqrestore(&q->requeue_lock, flags);
  1469	}
  1470
diff mbox series

Patch

diff --git a/block/blk-flush.c b/block/blk-flush.c
index 69e9806f575455..231d3780e74ad1 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -188,7 +188,9 @@  static void blk_flush_complete_seq(struct request *rq,
 
 	case REQ_FSEQ_DATA:
 		list_move_tail(&rq->flush.list, &fq->flush_data_in_flight);
-		blk_mq_add_to_requeue_list(rq, 0);
+		spin_lock(&q->requeue_lock);
+		list_add_tail(&rq->queuelist, &q->flush_list);
+		spin_unlock(&q->requeue_lock);
 		blk_mq_kick_requeue_list(q);
 		break;
 
@@ -349,7 +351,10 @@  static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
 	smp_wmb();
 	req_ref_set(flush_rq, 1);
 
-	blk_mq_add_to_requeue_list(flush_rq, 0);
+	spin_lock(&q->requeue_lock);
+	list_add_tail(&flush_rq->queuelist, &q->flush_list);
+	spin_unlock(&q->requeue_lock);
+
 	blk_mq_kick_requeue_list(q);
 }
 
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index d23a8554ec4aeb..0d2a0f0524b53e 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -241,7 +241,6 @@  static const char *const cmd_flag_name[] = {
 #define RQF_NAME(name) [ilog2((__force u32)RQF_##name)] = #name
 static const char *const rqf_name[] = {
 	RQF_NAME(STARTED),
-	RQF_NAME(SOFTBARRIER),
 	RQF_NAME(FLUSH_SEQ),
 	RQF_NAME(MIXED_MERGE),
 	RQF_NAME(MQ_INFLIGHT),
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 50a03e1592819c..eaed84a346c9c4 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1403,13 +1403,16 @@  static void __blk_mq_requeue_request(struct request *rq)
 void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list)
 {
 	struct request_queue *q = rq->q;
+	unsigned long flags;
 
 	__blk_mq_requeue_request(rq);
 
 	/* this request will be re-inserted to io scheduler queue */
 	blk_mq_sched_requeue_request(rq);
 
-	blk_mq_add_to_requeue_list(rq, BLK_MQ_INSERT_AT_HEAD);
+	spin_lock_irqsave(&q->requeue_lock, flags);
+	list_add_tail(&rq->queuelist, &q->requeue_list);
+	spin_unlock_irqrestore(&q->requeue_lock, flags);
 
 	if (kick_requeue_list)
 		blk_mq_kick_requeue_list(q);
@@ -1421,13 +1424,16 @@  static void blk_mq_requeue_work(struct work_struct *work)
 	struct request_queue *q =
 		container_of(work, struct request_queue, requeue_work.work);
 	LIST_HEAD(rq_list);
-	struct request *rq, *next;
+	LIST_HEAD(flush_list);
+	struct request *rq;
 
 	spin_lock_irq(&q->requeue_lock);
 	list_splice_init(&q->requeue_list, &rq_list);
+	list_splice_init(&q->flush_list, &flush_list);
 	spin_unlock_irq(&q->requeue_lock);
 
-	list_for_each_entry_safe(rq, next, &rq_list, queuelist) {
+	while (!list_empty(&rq_list)) {
+		rq = list_entry(rq_list.next, struct request, queuelist);
 		/*
 		 * If RQF_DONTPREP ist set, the request has been started by the
 		 * driver already and might have driver-specific data allocated
@@ -1435,18 +1441,16 @@  static void blk_mq_requeue_work(struct work_struct *work)
 		 * block layer merges for the request.
 		 */
 		if (rq->rq_flags & RQF_DONTPREP) {
-			rq->rq_flags &= ~RQF_SOFTBARRIER;
 			list_del_init(&rq->queuelist);
 			blk_mq_request_bypass_insert(rq, 0);
-		} else if (rq->rq_flags & RQF_SOFTBARRIER) {
-			rq->rq_flags &= ~RQF_SOFTBARRIER;
+		} else {
 			list_del_init(&rq->queuelist);
 			blk_mq_insert_request(rq, BLK_MQ_INSERT_AT_HEAD);
 		}
 	}
 
-	while (!list_empty(&rq_list)) {
-		rq = list_entry(rq_list.next, struct request, queuelist);
+	while (!list_empty(&flush_list)) {
+		rq = list_entry(flush_list.next, struct request, queuelist);
 		list_del_init(&rq->queuelist);
 		blk_mq_insert_request(rq, 0);
 	}
@@ -1454,24 +1458,13 @@  static void blk_mq_requeue_work(struct work_struct *work)
 	blk_mq_run_hw_queues(q, false);
 }
 
-void blk_mq_add_to_requeue_list(struct request *rq, blk_insert_t insert_flags)
+void blk_flush_queue_insert(struct request *rq)
 {
 	struct request_queue *q = rq->q;
 	unsigned long flags;
 
-	/*
-	 * We abuse this flag that is otherwise used by the I/O scheduler to
-	 * request head insertion from the workqueue.
-	 */
-	BUG_ON(rq->rq_flags & RQF_SOFTBARRIER);
-
 	spin_lock_irqsave(&q->requeue_lock, flags);
-	if (insert_flags & BLK_MQ_INSERT_AT_HEAD) {
-		rq->rq_flags |= RQF_SOFTBARRIER;
-		list_add(&rq->queuelist, &q->requeue_list);
-	} else {
-		list_add_tail(&rq->queuelist, &q->requeue_list);
-	}
+	list_add_tail(&rq->queuelist, &q->flush_list);
 	spin_unlock_irqrestore(&q->requeue_lock, flags);
 }
 
@@ -4222,6 +4215,7 @@  int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 	blk_mq_update_poll_flag(q);
 
 	INIT_DELAYED_WORK(&q->requeue_work, blk_mq_requeue_work);
+	INIT_LIST_HEAD(&q->flush_list);
 	INIT_LIST_HEAD(&q->requeue_list);
 	spin_lock_init(&q->requeue_lock);
 
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 62c505e6ef1e40..1955e0c08d154c 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -47,7 +47,6 @@  int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
 void blk_mq_wake_waiters(struct request_queue *q);
 bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *,
 			     unsigned int);
-void blk_mq_add_to_requeue_list(struct request *rq, blk_insert_t insert_flags);
 void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list);
 struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx,
 					struct blk_mq_ctx *start);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index a204a5f3cc9524..23fa9fdf59f3e1 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -28,8 +28,6 @@  typedef __u32 __bitwise req_flags_t;
 
 /* drive already may have started this one */
 #define RQF_STARTED		((__force req_flags_t)(1 << 1))
-/* may not be passed by ioscheduler */
-#define RQF_SOFTBARRIER		((__force req_flags_t)(1 << 3))
 /* request for flush sequence */
 #define RQF_FLUSH_SEQ		((__force req_flags_t)(1 << 4))
 /* merge of different types, fail separately */
@@ -65,7 +63,7 @@  typedef __u32 __bitwise req_flags_t;
 
 /* flags that prevent us from merging requests: */
 #define RQF_NOMERGE_FLAGS \
-	(RQF_STARTED | RQF_SOFTBARRIER | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD)
+	(RQF_STARTED | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD)
 
 enum mq_rq_state {
 	MQ_RQ_IDLE		= 0,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 6ede578dfbc642..649274b4043b69 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -490,6 +490,7 @@  struct request_queue {
 	 * for flush operations
 	 */
 	struct blk_flush_queue	*fq;
+	struct list_head	flush_list;
 
 	struct list_head	requeue_list;
 	spinlock_t		requeue_lock;