diff mbox series

[V1,2/2] mmc: core: Fix recursive locking issue in CQE recovery path

Message ID 1588775643-18037-3-git-send-email-vbadigan@codeaurora.org (mailing list archive)
State New, archived
Headers show
Series CQE fixes | expand

Commit Message

Veerabhadrarao Badiganti May 6, 2020, 2:34 p.m. UTC
From: Sarthak Garg <sartgarg@codeaurora.org>

Consider the following stack trace

-001|raw_spin_lock_irqsave
-002|mmc_blk_cqe_complete_rq
-003|__blk_mq_complete_request(inline)
-003|blk_mq_complete_request(rq)
-004|mmc_cqe_timed_out(inline)
-004|mmc_mq_timed_out

mmc_mq_timed_out acquires the queue_lock for the first
time. The mmc_blk_cqe_complete_rq function also tries to acquire
the same queue lock resulting in recursive locking where the task
is spinning for the same lock which it has already acquired leading
to watchdog bark.

Fix this issue with the lock only for the required critical section.

Cc: <stable@vger.kernel.org> # v4.19+
Suggested-by: Sahitya Tummala <stummala@codeaurora.org>
Signed-off-by: Sarthak Garg <sartgarg@codeaurora.org>
---
 drivers/mmc/core/queue.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

Comments

Adrian Hunter May 7, 2020, 11:48 a.m. UTC | #1
On 6/05/20 5:34 pm, Veerabhadrarao Badiganti wrote:
> From: Sarthak Garg <sartgarg@codeaurora.org>
> 
> Consider the following stack trace
> 
> -001|raw_spin_lock_irqsave
> -002|mmc_blk_cqe_complete_rq
> -003|__blk_mq_complete_request(inline)
> -003|blk_mq_complete_request(rq)
> -004|mmc_cqe_timed_out(inline)
> -004|mmc_mq_timed_out
> 
> mmc_mq_timed_out acquires the queue_lock for the first
> time. The mmc_blk_cqe_complete_rq function also tries to acquire
> the same queue lock resulting in recursive locking where the task
> is spinning for the same lock which it has already acquired leading
> to watchdog bark.
> 
> Fix this issue with the lock only for the required critical section.
> 
> Cc: <stable@vger.kernel.org> # v4.19+
> Suggested-by: Sahitya Tummala <stummala@codeaurora.org>
> Signed-off-by: Sarthak Garg <sartgarg@codeaurora.org>
> ---
>  drivers/mmc/core/queue.c | 11 ++++++-----
>  1 file changed, 6 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
> index 25bee3d..72bef39 100644
> --- a/drivers/mmc/core/queue.c
> +++ b/drivers/mmc/core/queue.c
> @@ -107,7 +107,7 @@ static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req)
>  	case MMC_ISSUE_DCMD:
>  		if (host->cqe_ops->cqe_timeout(host, mrq, &recovery_needed)) {
>  			if (recovery_needed)
> -				__mmc_cqe_recovery_notifier(mq);
> +				mmc_cqe_recovery_notifier(mrq);
>  			return BLK_EH_RESET_TIMER;
>  		}
>  		/* No timeout (XXX: huh? comment doesn't make much sense) */
> @@ -131,12 +131,13 @@ static enum blk_eh_timer_return mmc_mq_timed_out(struct request *req,
>  
>  	spin_lock_irqsave(&mq->lock, flags);
>  
> -	if (mq->recovery_needed || !mq->use_cqe || host->hsq_enabled)
> +	if (mq->recovery_needed || !mq->use_cqe || host->hsq_enabled) {
>  		ret = BLK_EH_RESET_TIMER;
> -	else
> +		spin_unlock_irqrestore(&mq->lock, flags);
> +	} else {
> +		spin_unlock_irqrestore(&mq->lock, flags);
>  		ret = mmc_cqe_timed_out(req);
> -
> -	spin_unlock_irqrestore(&mq->lock, flags);
> +	}

This looks good, but I think there needs to be another change also.  I will
send a patch for that, but in the meantime maybe you could straighten up the
code flow through the spinlock e.g.

	spin_lock_irqsave(&mq->lock, flags);
	ignore = mq->recovery_needed || !mq->use_cqe || host->hsq_enabled;
	spin_unlock_irqrestore(&mq->lock, flags);

	return ignore ? BLK_EH_RESET_TIMER : mmc_cqe_timed_out(req);

And add a fixes tag.

>  
>  	return ret;
>  }
>
diff mbox series

Patch

diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index 25bee3d..72bef39 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -107,7 +107,7 @@  static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req)
 	case MMC_ISSUE_DCMD:
 		if (host->cqe_ops->cqe_timeout(host, mrq, &recovery_needed)) {
 			if (recovery_needed)
-				__mmc_cqe_recovery_notifier(mq);
+				mmc_cqe_recovery_notifier(mrq);
 			return BLK_EH_RESET_TIMER;
 		}
 		/* No timeout (XXX: huh? comment doesn't make much sense) */
@@ -131,12 +131,13 @@  static enum blk_eh_timer_return mmc_mq_timed_out(struct request *req,
 
 	spin_lock_irqsave(&mq->lock, flags);
 
-	if (mq->recovery_needed || !mq->use_cqe || host->hsq_enabled)
+	if (mq->recovery_needed || !mq->use_cqe || host->hsq_enabled) {
 		ret = BLK_EH_RESET_TIMER;
-	else
+		spin_unlock_irqrestore(&mq->lock, flags);
+	} else {
+		spin_unlock_irqrestore(&mq->lock, flags);
 		ret = mmc_cqe_timed_out(req);
-
-	spin_unlock_irqrestore(&mq->lock, flags);
+	}
 
 	return ret;
 }