diff mbox series

[V2] mmc: core: Fix recursive locking issue in CQE recovery path

Message ID 1588868135-31783-1-git-send-email-vbadigan@codeaurora.org (mailing list archive)
State New, archived
Headers show
Series [V2] mmc: core: Fix recursive locking issue in CQE recovery path | expand

Commit Message

Veerabhadrarao Badiganti May 7, 2020, 4:15 p.m. UTC
From: Sarthak Garg <sartgarg@codeaurora.org>

Consider the following stack trace

-001|raw_spin_lock_irqsave
-002|mmc_blk_cqe_complete_rq
-003|__blk_mq_complete_request(inline)
-003|blk_mq_complete_request(rq)
-004|mmc_cqe_timed_out(inline)
-004|mmc_mq_timed_out

mmc_mq_timed_out acquires the queue_lock for the first
time. The mmc_blk_cqe_complete_rq function also tries to acquire
the same queue lock resulting in recursive locking where the task
is spinning for the same lock which it has already acquired leading
to watchdog bark.

Fix this issue with the lock only for the required critical section.

Cc: <stable@vger.kernel.org>
Fixes: 1e8e55b67030 ("mmc: block: Add CQE support")
Suggested-by: Sahitya Tummala <stummala@codeaurora.org>
Signed-off-by: Sarthak Garg <sartgarg@codeaurora.org>
---
 drivers/mmc/core/queue.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

Comments

Adrian Hunter May 7, 2020, 5:21 p.m. UTC | #1
On 7/05/20 7:15 pm, Veerabhadrarao Badiganti wrote:
> From: Sarthak Garg <sartgarg@codeaurora.org>
> 
> Consider the following stack trace
> 
> -001|raw_spin_lock_irqsave
> -002|mmc_blk_cqe_complete_rq
> -003|__blk_mq_complete_request(inline)
> -003|blk_mq_complete_request(rq)
> -004|mmc_cqe_timed_out(inline)
> -004|mmc_mq_timed_out
> 
> mmc_mq_timed_out acquires the queue_lock for the first
> time. The mmc_blk_cqe_complete_rq function also tries to acquire
> the same queue lock resulting in recursive locking where the task
> is spinning for the same lock which it has already acquired leading
> to watchdog bark.
> 
> Fix this issue with the lock only for the required critical section.
> 
> Cc: <stable@vger.kernel.org>
> Fixes: 1e8e55b67030 ("mmc: block: Add CQE support")
> Suggested-by: Sahitya Tummala <stummala@codeaurora.org>
> Signed-off-by: Sarthak Garg <sartgarg@codeaurora.org>

Acked-by: Adrian Hunter <adrian.hunter@intel.com>

> ---
>  drivers/mmc/core/queue.c | 13 ++++---------
>  1 file changed, 4 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
> index 25bee3d..b5fd3bc 100644
> --- a/drivers/mmc/core/queue.c
> +++ b/drivers/mmc/core/queue.c
> @@ -107,7 +107,7 @@ static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req)
>  	case MMC_ISSUE_DCMD:
>  		if (host->cqe_ops->cqe_timeout(host, mrq, &recovery_needed)) {
>  			if (recovery_needed)
> -				__mmc_cqe_recovery_notifier(mq);
> +				mmc_cqe_recovery_notifier(mrq);
>  			return BLK_EH_RESET_TIMER;
>  		}
>  		/* No timeout (XXX: huh? comment doesn't make much sense) */
> @@ -127,18 +127,13 @@ static enum blk_eh_timer_return mmc_mq_timed_out(struct request *req,
>  	struct mmc_card *card = mq->card;
>  	struct mmc_host *host = card->host;
>  	unsigned long flags;
> -	int ret;
> +	bool ignore_tout;
>  
>  	spin_lock_irqsave(&mq->lock, flags);
> -
> -	if (mq->recovery_needed || !mq->use_cqe || host->hsq_enabled)
> -		ret = BLK_EH_RESET_TIMER;
> -	else
> -		ret = mmc_cqe_timed_out(req);
> -
> +	ignore_tout = mq->recovery_needed || !mq->use_cqe || host->hsq_enabled;
>  	spin_unlock_irqrestore(&mq->lock, flags);
>  
> -	return ret;
> +	return ignore_tout ? BLK_EH_RESET_TIMER : mmc_cqe_timed_out(req);
>  }
>  
>  static void mmc_mq_recovery_handler(struct work_struct *work)
>
Ulf Hansson May 8, 2020, 8:12 a.m. UTC | #2
On Thu, 7 May 2020 at 18:15, Veerabhadrarao Badiganti
<vbadigan@codeaurora.org> wrote:
>
> From: Sarthak Garg <sartgarg@codeaurora.org>
>
> Consider the following stack trace
>
> -001|raw_spin_lock_irqsave
> -002|mmc_blk_cqe_complete_rq
> -003|__blk_mq_complete_request(inline)
> -003|blk_mq_complete_request(rq)
> -004|mmc_cqe_timed_out(inline)
> -004|mmc_mq_timed_out
>
> mmc_mq_timed_out acquires the queue_lock for the first
> time. The mmc_blk_cqe_complete_rq function also tries to acquire
> the same queue lock resulting in recursive locking where the task
> is spinning for the same lock which it has already acquired leading
> to watchdog bark.
>
> Fix this issue with the lock only for the required critical section.
>
> Cc: <stable@vger.kernel.org>
> Fixes: 1e8e55b67030 ("mmc: block: Add CQE support")
> Suggested-by: Sahitya Tummala <stummala@codeaurora.org>
> Signed-off-by: Sarthak Garg <sartgarg@codeaurora.org>

Applied for fixes, thanks!

Kind regards
Uffe


> ---
>  drivers/mmc/core/queue.c | 13 ++++---------
>  1 file changed, 4 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
> index 25bee3d..b5fd3bc 100644
> --- a/drivers/mmc/core/queue.c
> +++ b/drivers/mmc/core/queue.c
> @@ -107,7 +107,7 @@ static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req)
>         case MMC_ISSUE_DCMD:
>                 if (host->cqe_ops->cqe_timeout(host, mrq, &recovery_needed)) {
>                         if (recovery_needed)
> -                               __mmc_cqe_recovery_notifier(mq);
> +                               mmc_cqe_recovery_notifier(mrq);
>                         return BLK_EH_RESET_TIMER;
>                 }
>                 /* No timeout (XXX: huh? comment doesn't make much sense) */
> @@ -127,18 +127,13 @@ static enum blk_eh_timer_return mmc_mq_timed_out(struct request *req,
>         struct mmc_card *card = mq->card;
>         struct mmc_host *host = card->host;
>         unsigned long flags;
> -       int ret;
> +       bool ignore_tout;
>
>         spin_lock_irqsave(&mq->lock, flags);
> -
> -       if (mq->recovery_needed || !mq->use_cqe || host->hsq_enabled)
> -               ret = BLK_EH_RESET_TIMER;
> -       else
> -               ret = mmc_cqe_timed_out(req);
> -
> +       ignore_tout = mq->recovery_needed || !mq->use_cqe || host->hsq_enabled;
>         spin_unlock_irqrestore(&mq->lock, flags);
>
> -       return ret;
> +       return ignore_tout ? BLK_EH_RESET_TIMER : mmc_cqe_timed_out(req);
>  }
>
>  static void mmc_mq_recovery_handler(struct work_struct *work)
> --
> Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc., is a member of Code Aurora Forum, a Linux Foundation Collaborative Project
Sasha Levin May 9, 2020, 12:30 p.m. UTC | #3
Hi

[This is an automated email]

This commit has been processed because it contains a "Fixes:" tag
fixing commit: 1e8e55b67030 ("mmc: block: Add CQE support").

The bot has tested the following trees: v5.6.11, v5.4.39, v4.19.121.

v5.6.11: Failed to apply! Possible dependencies:
    511ce378e16f ("mmc: Add MMC host software queue support")

v5.4.39: Failed to apply! Possible dependencies:
    511ce378e16f ("mmc: Add MMC host software queue support")

v4.19.121: Failed to apply! Possible dependencies:
    310df020cdd7 ("mmc: stop abusing the request queue_lock pointer")
    511ce378e16f ("mmc: Add MMC host software queue support")
    b061b326287d ("mmc: simplify queue initialization")
    f5d72c5c55bc ("mmc: stop abusing the request queue_lock pointer")


NOTE: The patch will not be queued to stable trees until it is upstream.

How should we proceed with this patch?
diff mbox series

Patch

diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index 25bee3d..b5fd3bc 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -107,7 +107,7 @@  static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req)
 	case MMC_ISSUE_DCMD:
 		if (host->cqe_ops->cqe_timeout(host, mrq, &recovery_needed)) {
 			if (recovery_needed)
-				__mmc_cqe_recovery_notifier(mq);
+				mmc_cqe_recovery_notifier(mrq);
 			return BLK_EH_RESET_TIMER;
 		}
 		/* No timeout (XXX: huh? comment doesn't make much sense) */
@@ -127,18 +127,13 @@  static enum blk_eh_timer_return mmc_mq_timed_out(struct request *req,
 	struct mmc_card *card = mq->card;
 	struct mmc_host *host = card->host;
 	unsigned long flags;
-	int ret;
+	bool ignore_tout;
 
 	spin_lock_irqsave(&mq->lock, flags);
-
-	if (mq->recovery_needed || !mq->use_cqe || host->hsq_enabled)
-		ret = BLK_EH_RESET_TIMER;
-	else
-		ret = mmc_cqe_timed_out(req);
-
+	ignore_tout = mq->recovery_needed || !mq->use_cqe || host->hsq_enabled;
 	spin_unlock_irqrestore(&mq->lock, flags);
 
-	return ret;
+	return ignore_tout ? BLK_EH_RESET_TIMER : mmc_cqe_timed_out(req);
 }
 
 static void mmc_mq_recovery_handler(struct work_struct *work)