diff mbox series

mmc: cqhci: Avoid false "cqhci: CQE stuck on" by not open-coding timeout loop

Message ID 20200413162717.1.Idece266f5c8793193b57a1ddb1066d030c6af8e0@changeid (mailing list archive)
State Accepted
Commit b1ac62a7ac386d76968af5f374a4a7a82a35fe31
Headers show
Series mmc: cqhci: Avoid false "cqhci: CQE stuck on" by not open-coding timeout loop | expand

Commit Message

Doug Anderson April 13, 2020, 11:27 p.m. UTC
Open-coding a timeout loop invariably leads to errors with handling
the timeout properly in one corner case or another.  In the case of
cqhci we might report "CQE stuck on" even if it wasn't stuck on.
You'd just need this sequence of events to happen in cqhci_off():

1. Call ktime_get().
2. Something happens to interrupt the CPU for > 100 us (context switch
   or interrupt).
3. Check time and; set "timed_out" to true since > 100 us.
4. Read CQHCI_CTL.
5. Both "reg & CQHCI_HALT" and "timed_out" are true, so break.
6. Since "timed_out" is true, falsely print the error message.

Rather than fixing the polling loop, use readx_poll_timeout() like
many people do.  This has been time tested to handle the corner cases.

Fixes: a4080225f51d ("mmc: cqhci: support for command queue enabled host")
Signed-off-by: Douglas Anderson <dianders@chromium.org>
---

 drivers/mmc/host/cqhci.c | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

Comments

Adrian Hunter April 14, 2020, 12:54 p.m. UTC | #1
On 14/04/20 2:27 am, Douglas Anderson wrote:
> Open-coding a timeout loop invariably leads to errors with handling
> the timeout properly in one corner case or another.  In the case of
> cqhci we might report "CQE stuck on" even if it wasn't stuck on.
> You'd just need this sequence of events to happen in cqhci_off():
> 
> 1. Call ktime_get().
> 2. Something happens to interrupt the CPU for > 100 us (context switch
>    or interrupt).
> 3. Check time and; set "timed_out" to true since > 100 us.
> 4. Read CQHCI_CTL.
> 5. Both "reg & CQHCI_HALT" and "timed_out" are true, so break.
> 6. Since "timed_out" is true, falsely print the error message.
> 
> Rather than fixing the polling loop, use readx_poll_timeout() like
> many people do.  This has been time tested to handle the corner cases.
> 
> Fixes: a4080225f51d ("mmc: cqhci: support for command queue enabled host")
> Signed-off-by: Douglas Anderson <dianders@chromium.org>

Acked-by: Adrian Hunter <adrian.hunter@intel.com>


> ---
> 
>  drivers/mmc/host/cqhci.c | 21 ++++++++++-----------
>  1 file changed, 10 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/mmc/host/cqhci.c b/drivers/mmc/host/cqhci.c
> index c2239ee2c0ef..75934f3c117e 100644
> --- a/drivers/mmc/host/cqhci.c
> +++ b/drivers/mmc/host/cqhci.c
> @@ -5,6 +5,7 @@
>  #include <linux/delay.h>
>  #include <linux/highmem.h>
>  #include <linux/io.h>
> +#include <linux/iopoll.h>
>  #include <linux/module.h>
>  #include <linux/dma-mapping.h>
>  #include <linux/slab.h>
> @@ -349,12 +350,16 @@ static int cqhci_enable(struct mmc_host *mmc, struct mmc_card *card)
>  /* CQHCI is idle and should halt immediately, so set a small timeout */
>  #define CQHCI_OFF_TIMEOUT 100
>  
> +static u32 cqhci_read_ctl(struct cqhci_host *cq_host)
> +{
> +	return cqhci_readl(cq_host, CQHCI_CTL);
> +}
> +
>  static void cqhci_off(struct mmc_host *mmc)
>  {
>  	struct cqhci_host *cq_host = mmc->cqe_private;
> -	ktime_t timeout;
> -	bool timed_out;
>  	u32 reg;
> +	int err;
>  
>  	if (!cq_host->enabled || !mmc->cqe_on || cq_host->recovery_halt)
>  		return;
> @@ -364,15 +369,9 @@ static void cqhci_off(struct mmc_host *mmc)
>  
>  	cqhci_writel(cq_host, CQHCI_HALT, CQHCI_CTL);
>  
> -	timeout = ktime_add_us(ktime_get(), CQHCI_OFF_TIMEOUT);
> -	while (1) {
> -		timed_out = ktime_compare(ktime_get(), timeout) > 0;
> -		reg = cqhci_readl(cq_host, CQHCI_CTL);
> -		if ((reg & CQHCI_HALT) || timed_out)
> -			break;
> -	}
> -
> -	if (timed_out)
> +	err = readx_poll_timeout(cqhci_read_ctl, cq_host, reg,
> +				 reg & CQHCI_HALT, 0, CQHCI_OFF_TIMEOUT);
> +	if (err < 0)
>  		pr_err("%s: cqhci: CQE stuck on\n", mmc_hostname(mmc));
>  	else
>  		pr_debug("%s: cqhci: CQE off\n", mmc_hostname(mmc));
>
Ulf Hansson April 17, 2020, 9:30 a.m. UTC | #2
On Tue, 14 Apr 2020 at 01:27, Douglas Anderson <dianders@chromium.org> wrote:
>
> Open-coding a timeout loop invariably leads to errors with handling
> the timeout properly in one corner case or another.  In the case of
> cqhci we might report "CQE stuck on" even if it wasn't stuck on.
> You'd just need this sequence of events to happen in cqhci_off():
>
> 1. Call ktime_get().
> 2. Something happens to interrupt the CPU for > 100 us (context switch
>    or interrupt).
> 3. Check time and; set "timed_out" to true since > 100 us.
> 4. Read CQHCI_CTL.
> 5. Both "reg & CQHCI_HALT" and "timed_out" are true, so break.
> 6. Since "timed_out" is true, falsely print the error message.
>
> Rather than fixing the polling loop, use readx_poll_timeout() like
> many people do.  This has been time tested to handle the corner cases.
>
> Fixes: a4080225f51d ("mmc: cqhci: support for command queue enabled host")
> Signed-off-by: Douglas Anderson <dianders@chromium.org>

Applied for fixes, and by adding a stable tag, thanks!

Kind regards
Uffe


> ---
>
>  drivers/mmc/host/cqhci.c | 21 ++++++++++-----------
>  1 file changed, 10 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/mmc/host/cqhci.c b/drivers/mmc/host/cqhci.c
> index c2239ee2c0ef..75934f3c117e 100644
> --- a/drivers/mmc/host/cqhci.c
> +++ b/drivers/mmc/host/cqhci.c
> @@ -5,6 +5,7 @@
>  #include <linux/delay.h>
>  #include <linux/highmem.h>
>  #include <linux/io.h>
> +#include <linux/iopoll.h>
>  #include <linux/module.h>
>  #include <linux/dma-mapping.h>
>  #include <linux/slab.h>
> @@ -349,12 +350,16 @@ static int cqhci_enable(struct mmc_host *mmc, struct mmc_card *card)
>  /* CQHCI is idle and should halt immediately, so set a small timeout */
>  #define CQHCI_OFF_TIMEOUT 100
>
> +static u32 cqhci_read_ctl(struct cqhci_host *cq_host)
> +{
> +       return cqhci_readl(cq_host, CQHCI_CTL);
> +}
> +
>  static void cqhci_off(struct mmc_host *mmc)
>  {
>         struct cqhci_host *cq_host = mmc->cqe_private;
> -       ktime_t timeout;
> -       bool timed_out;
>         u32 reg;
> +       int err;
>
>         if (!cq_host->enabled || !mmc->cqe_on || cq_host->recovery_halt)
>                 return;
> @@ -364,15 +369,9 @@ static void cqhci_off(struct mmc_host *mmc)
>
>         cqhci_writel(cq_host, CQHCI_HALT, CQHCI_CTL);
>
> -       timeout = ktime_add_us(ktime_get(), CQHCI_OFF_TIMEOUT);
> -       while (1) {
> -               timed_out = ktime_compare(ktime_get(), timeout) > 0;
> -               reg = cqhci_readl(cq_host, CQHCI_CTL);
> -               if ((reg & CQHCI_HALT) || timed_out)
> -                       break;
> -       }
> -
> -       if (timed_out)
> +       err = readx_poll_timeout(cqhci_read_ctl, cq_host, reg,
> +                                reg & CQHCI_HALT, 0, CQHCI_OFF_TIMEOUT);
> +       if (err < 0)
>                 pr_err("%s: cqhci: CQE stuck on\n", mmc_hostname(mmc));
>         else
>                 pr_debug("%s: cqhci: CQE off\n", mmc_hostname(mmc));
> --
> 2.26.0.110.g2183baf09c-goog
>
diff mbox series

Patch

diff --git a/drivers/mmc/host/cqhci.c b/drivers/mmc/host/cqhci.c
index c2239ee2c0ef..75934f3c117e 100644
--- a/drivers/mmc/host/cqhci.c
+++ b/drivers/mmc/host/cqhci.c
@@ -5,6 +5,7 @@ 
 #include <linux/delay.h>
 #include <linux/highmem.h>
 #include <linux/io.h>
+#include <linux/iopoll.h>
 #include <linux/module.h>
 #include <linux/dma-mapping.h>
 #include <linux/slab.h>
@@ -349,12 +350,16 @@  static int cqhci_enable(struct mmc_host *mmc, struct mmc_card *card)
 /* CQHCI is idle and should halt immediately, so set a small timeout */
 #define CQHCI_OFF_TIMEOUT 100
 
+static u32 cqhci_read_ctl(struct cqhci_host *cq_host)
+{
+	return cqhci_readl(cq_host, CQHCI_CTL);
+}
+
 static void cqhci_off(struct mmc_host *mmc)
 {
 	struct cqhci_host *cq_host = mmc->cqe_private;
-	ktime_t timeout;
-	bool timed_out;
 	u32 reg;
+	int err;
 
 	if (!cq_host->enabled || !mmc->cqe_on || cq_host->recovery_halt)
 		return;
@@ -364,15 +369,9 @@  static void cqhci_off(struct mmc_host *mmc)
 
 	cqhci_writel(cq_host, CQHCI_HALT, CQHCI_CTL);
 
-	timeout = ktime_add_us(ktime_get(), CQHCI_OFF_TIMEOUT);
-	while (1) {
-		timed_out = ktime_compare(ktime_get(), timeout) > 0;
-		reg = cqhci_readl(cq_host, CQHCI_CTL);
-		if ((reg & CQHCI_HALT) || timed_out)
-			break;
-	}
-
-	if (timed_out)
+	err = readx_poll_timeout(cqhci_read_ctl, cq_host, reg,
+				 reg & CQHCI_HALT, 0, CQHCI_OFF_TIMEOUT);
+	if (err < 0)
 		pr_err("%s: cqhci: CQE stuck on\n", mmc_hostname(mmc));
 	else
 		pr_debug("%s: cqhci: CQE off\n", mmc_hostname(mmc));