diff mbox series

Patch and bugfix in block/blk-mq.c to regain stability when using various SD Cards and Card Readers.

Message ID c2f50eac-3270-8dfa-2440-4c737c366b17@tuwien.ac.at (mailing list archive)
State New
Headers show
Series Patch and bugfix in block/blk-mq.c to regain stability when using various SD Cards and Card Readers. | expand

Commit Message

Thomas Haschka Feb. 21, 2025, 3:41 p.m. UTC
Bug Fix: block: Improve stability of SD cards in Microsoft Surface GO 2 and
     	 	possibly other devices.


The commit 65a558f66c308
     block: Improve performance for BLK_MQ_F_BLOCKING drivers

basically made the use of SD cards in my Microsoft Surface GO 2 impossible.
The cards do stop functioning after about 15 minutes. Mostly at io intensive
tasks.

As outlined in https://bugzilla.kernel.org/show_bug.cgi?id=218821
i bisected the problem that yielded an unstable operation of the cardreader
on my Surface GO 2.
I successfully reversed the commit 65a558f66c308 in 6.12.16 using
the attached patch. As I suppose the bug introduced with this commit might
hit other users of sd-cards in similar hardware I suggest this commit shall
be reversed, even if the improved performance might be gone.

All the best,
Thomas Haschka

Comments

Bart Van Assche Feb. 21, 2025, 5:20 p.m. UTC | #1
On 2/21/25 7:41 AM, Thomas Haschka wrote:
> Bug Fix: block: Improve stability of SD cards in Microsoft Surface GO 2 and
>               possibly other devices.
> 
> 
> The commit 65a558f66c308
>      block: Improve performance for BLK_MQ_F_BLOCKING drivers
> 
> basically made the use of SD cards in my Microsoft Surface GO 2 impossible.
> The cards do stop functioning after about 15 minutes. Mostly at io 
> intensive
> tasks.
> 
> As outlined in https://bugzilla.kernel.org/show_bug.cgi?id=218821
> i bisected the problem that yielded an unstable operation of the cardreader
> on my Surface GO 2.
> I successfully reversed the commit 65a558f66c308 in 6.12.16 using
> the attached patch. As I suppose the bug introduced with this commit might
> hit other users of sd-cards in similar hardware I suggest this commit shall
> be reversed, even if the improved performance might be gone.

Thank you for having bisected this issue and for having shared the
result of the bisection process. This is very useful information.

Since the commit mentioned above is about 1.5 years old and has not
caused any issues for anyone who is not using an SD card reader, that
commit is probably not the root cause of the reported behavior. Are SD
cards controlled by the MMC driver? If so, I think the next step is to
take a close look at the MMC driver. I have Cc-ed the MMC driver maintainer.

Bart.
diff mbox series

Patch

diff '--color=auto' -urpN a/block/blk-mq.c b/block/blk-mq.c
--- a/block/blk-mq.c	2025-02-21 14:01:47.000000000 +0100
+++ b/block/blk-mq.c	2025-02-21 15:52:51.848041852 +0100
@@ -1418,7 +1418,7 @@  void blk_execute_rq_nowait(struct reques
 	}
 
 	blk_mq_insert_request(rq, at_head ? BLK_MQ_INSERT_AT_HEAD : 0);
-	blk_mq_run_hw_queue(hctx, hctx->flags & BLK_MQ_F_BLOCKING);
+	blk_mq_run_hw_queue(hctx, false);
 }
 EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
 
@@ -2322,8 +2322,6 @@  void blk_mq_run_hw_queue(struct blk_mq_h
 	 */
 	WARN_ON_ONCE(!async && in_interrupt());
 
-	might_sleep_if(!async && hctx->flags & BLK_MQ_F_BLOCKING);
-
 	need_run = blk_mq_hw_queue_need_run(hctx);
 	if (!need_run) {
 		unsigned long flags;
@@ -2342,7 +2340,8 @@  void blk_mq_run_hw_queue(struct blk_mq_h
 			return;
 	}
 
-	if (async || !cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)) {
+	if (async || (hctx->flags & BLK_MQ_F_BLOCKING) ||
+	    !cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)) {
 		blk_mq_delay_run_hw_queue(hctx, 0);
 		return;
 	}
@@ -2477,7 +2476,7 @@  void blk_mq_start_hw_queue(struct blk_mq
 {
 	clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
 
-	blk_mq_run_hw_queue(hctx, hctx->flags & BLK_MQ_F_BLOCKING);
+	blk_mq_run_hw_queue(hctx, false);
 }
 EXPORT_SYMBOL(blk_mq_start_hw_queue);
 
@@ -2513,8 +2512,7 @@  void blk_mq_start_stopped_hw_queues(stru
 	unsigned long i;
 
 	queue_for_each_hw_ctx(q, hctx, i)
-		blk_mq_start_stopped_hw_queue(hctx, async ||
-					(hctx->flags & BLK_MQ_F_BLOCKING));
+		blk_mq_start_stopped_hw_queue(hctx, async);
 }
 EXPORT_SYMBOL(blk_mq_start_stopped_hw_queues);
 
@@ -2572,8 +2570,6 @@  static void blk_mq_insert_requests(struc
 	list_for_each_entry(rq, list, queuelist) {
 		BUG_ON(rq->mq_ctx != ctx);
 		trace_block_rq_insert(rq);
-		if (rq->cmd_flags & REQ_NOWAIT)
-			run_queue_async = true;
 	}
 
 	spin_lock(&ctx->lock);
@@ -2739,7 +2735,7 @@  static void blk_mq_try_issue_directly(st
 
 	if ((rq->rq_flags & RQF_USE_SCHED) || !blk_mq_get_budget_and_tag(rq)) {
 		blk_mq_insert_request(rq, 0);
-		blk_mq_run_hw_queue(hctx, rq->cmd_flags & REQ_NOWAIT);
+		blk_mq_run_hw_queue(hctx, false);
 		return;
 	}
 
diff '--color=auto' -urpN a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
--- a/drivers/scsi/scsi_lib.c	2025-02-21 14:01:47.000000000 +0100
+++ b/drivers/scsi/scsi_lib.c	2025-02-21 15:53:54.654044691 +0100
@@ -429,8 +429,7 @@  static void scsi_single_lun_run(struct s
 	 * but in most cases, we will be first. Ideally, each LU on the
 	 * target would get some limited time or requests on the target.
 	 */
-	blk_mq_run_hw_queues(current_sdev->request_queue,
-			     shost->queuecommand_may_block);
+	blk_mq_run_hw_queues(current_sdev->request_queue, false);
 
 	spin_lock_irqsave(shost->host_lock, flags);
 	if (!starget->starget_sdev_user)