Message ID | 20200413162717.1.Idece266f5c8793193b57a1ddb1066d030c6af8e0@changeid (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | mmc: cqhci: Avoid false "cqhci: CQE stuck on" by not open-coding timeout loop | expand |
On 14/04/20 2:27 am, Douglas Anderson wrote: > Open-coding a timeout loop invariably leads to errors with handling > the timeout properly in one corner case or another. In the case of > cqhci we might report "CQE stuck on" even if it wasn't stuck on. > You'd just need this sequence of events to happen in cqhci_off(): > > 1. Call ktime_get(). > 2. Something happens to interrupt the CPU for > 100 us (context switch > or interrupt). > 3. Check time and; set "timed_out" to true since > 100 us. > 4. Read CQHCI_CTL. > 5. Both "reg & CQHCI_HALT" and "timed_out" are true, so break. > 6. Since "timed_out" is true, falsely print the error message. > > Rather than fixing the polling loop, use readx_poll_timeout() like > many people do. This has been time tested to handle the corner cases. > > Fixes: a4080225f51d ("mmc: cqhci: support for command queue enabled host") > Signed-off-by: Douglas Anderson <dianders@chromium.org> Acked-by: Adrian Hunter <adrian.hunter@intel.com> > --- > > drivers/mmc/host/cqhci.c | 21 ++++++++++----------- > 1 file changed, 10 insertions(+), 11 deletions(-) > > diff --git a/drivers/mmc/host/cqhci.c b/drivers/mmc/host/cqhci.c > index c2239ee2c0ef..75934f3c117e 100644 > --- a/drivers/mmc/host/cqhci.c > +++ b/drivers/mmc/host/cqhci.c > @@ -5,6 +5,7 @@ > #include <linux/delay.h> > #include <linux/highmem.h> > #include <linux/io.h> > +#include <linux/iopoll.h> > #include <linux/module.h> > #include <linux/dma-mapping.h> > #include <linux/slab.h> > @@ -349,12 +350,16 @@ static int cqhci_enable(struct mmc_host *mmc, struct mmc_card *card) > /* CQHCI is idle and should halt immediately, so set a small timeout */ > #define CQHCI_OFF_TIMEOUT 100 > > +static u32 cqhci_read_ctl(struct cqhci_host *cq_host) > +{ > + return cqhci_readl(cq_host, CQHCI_CTL); > +} > + > static void cqhci_off(struct mmc_host *mmc) > { > struct cqhci_host *cq_host = mmc->cqe_private; > - ktime_t timeout; > - bool timed_out; > u32 reg; > + int err; > > if (!cq_host->enabled || !mmc->cqe_on || cq_host->recovery_halt) > return; > @@ -364,15 +369,9 @@ static void cqhci_off(struct mmc_host *mmc) > > cqhci_writel(cq_host, CQHCI_HALT, CQHCI_CTL); > > - timeout = ktime_add_us(ktime_get(), CQHCI_OFF_TIMEOUT); > - while (1) { > - timed_out = ktime_compare(ktime_get(), timeout) > 0; > - reg = cqhci_readl(cq_host, CQHCI_CTL); > - if ((reg & CQHCI_HALT) || timed_out) > - break; > - } > - > - if (timed_out) > + err = readx_poll_timeout(cqhci_read_ctl, cq_host, reg, > + reg & CQHCI_HALT, 0, CQHCI_OFF_TIMEOUT); > + if (err < 0) > pr_err("%s: cqhci: CQE stuck on\n", mmc_hostname(mmc)); > else > pr_debug("%s: cqhci: CQE off\n", mmc_hostname(mmc)); >
On Tue, 14 Apr 2020 at 01:27, Douglas Anderson <dianders@chromium.org> wrote: > > Open-coding a timeout loop invariably leads to errors with handling > the timeout properly in one corner case or another. In the case of > cqhci we might report "CQE stuck on" even if it wasn't stuck on. > You'd just need this sequence of events to happen in cqhci_off(): > > 1. Call ktime_get(). > 2. Something happens to interrupt the CPU for > 100 us (context switch > or interrupt). > 3. Check time and; set "timed_out" to true since > 100 us. > 4. Read CQHCI_CTL. > 5. Both "reg & CQHCI_HALT" and "timed_out" are true, so break. > 6. Since "timed_out" is true, falsely print the error message. > > Rather than fixing the polling loop, use readx_poll_timeout() like > many people do. This has been time tested to handle the corner cases. > > Fixes: a4080225f51d ("mmc: cqhci: support for command queue enabled host") > Signed-off-by: Douglas Anderson <dianders@chromium.org> Applied for fixes, and by adding a stable tag, thanks! Kind regards Uffe > --- > > drivers/mmc/host/cqhci.c | 21 ++++++++++----------- > 1 file changed, 10 insertions(+), 11 deletions(-) > > diff --git a/drivers/mmc/host/cqhci.c b/drivers/mmc/host/cqhci.c > index c2239ee2c0ef..75934f3c117e 100644 > --- a/drivers/mmc/host/cqhci.c > +++ b/drivers/mmc/host/cqhci.c > @@ -5,6 +5,7 @@ > #include <linux/delay.h> > #include <linux/highmem.h> > #include <linux/io.h> > +#include <linux/iopoll.h> > #include <linux/module.h> > #include <linux/dma-mapping.h> > #include <linux/slab.h> > @@ -349,12 +350,16 @@ static int cqhci_enable(struct mmc_host *mmc, struct mmc_card *card) > /* CQHCI is idle and should halt immediately, so set a small timeout */ > #define CQHCI_OFF_TIMEOUT 100 > > +static u32 cqhci_read_ctl(struct cqhci_host *cq_host) > +{ > + return cqhci_readl(cq_host, CQHCI_CTL); > +} > + > static void cqhci_off(struct mmc_host *mmc) > { > struct cqhci_host *cq_host = mmc->cqe_private; > - ktime_t timeout; > - bool timed_out; > u32 reg; > + int err; > > if (!cq_host->enabled || !mmc->cqe_on || cq_host->recovery_halt) > return; > @@ -364,15 +369,9 @@ static void cqhci_off(struct mmc_host *mmc) > > cqhci_writel(cq_host, CQHCI_HALT, CQHCI_CTL); > > - timeout = ktime_add_us(ktime_get(), CQHCI_OFF_TIMEOUT); > - while (1) { > - timed_out = ktime_compare(ktime_get(), timeout) > 0; > - reg = cqhci_readl(cq_host, CQHCI_CTL); > - if ((reg & CQHCI_HALT) || timed_out) > - break; > - } > - > - if (timed_out) > + err = readx_poll_timeout(cqhci_read_ctl, cq_host, reg, > + reg & CQHCI_HALT, 0, CQHCI_OFF_TIMEOUT); > + if (err < 0) > pr_err("%s: cqhci: CQE stuck on\n", mmc_hostname(mmc)); > else > pr_debug("%s: cqhci: CQE off\n", mmc_hostname(mmc)); > -- > 2.26.0.110.g2183baf09c-goog >
diff --git a/drivers/mmc/host/cqhci.c b/drivers/mmc/host/cqhci.c index c2239ee2c0ef..75934f3c117e 100644 --- a/drivers/mmc/host/cqhci.c +++ b/drivers/mmc/host/cqhci.c @@ -5,6 +5,7 @@ #include <linux/delay.h> #include <linux/highmem.h> #include <linux/io.h> +#include <linux/iopoll.h> #include <linux/module.h> #include <linux/dma-mapping.h> #include <linux/slab.h> @@ -349,12 +350,16 @@ static int cqhci_enable(struct mmc_host *mmc, struct mmc_card *card) /* CQHCI is idle and should halt immediately, so set a small timeout */ #define CQHCI_OFF_TIMEOUT 100 +static u32 cqhci_read_ctl(struct cqhci_host *cq_host) +{ + return cqhci_readl(cq_host, CQHCI_CTL); +} + static void cqhci_off(struct mmc_host *mmc) { struct cqhci_host *cq_host = mmc->cqe_private; - ktime_t timeout; - bool timed_out; u32 reg; + int err; if (!cq_host->enabled || !mmc->cqe_on || cq_host->recovery_halt) return; @@ -364,15 +369,9 @@ static void cqhci_off(struct mmc_host *mmc) cqhci_writel(cq_host, CQHCI_HALT, CQHCI_CTL); - timeout = ktime_add_us(ktime_get(), CQHCI_OFF_TIMEOUT); - while (1) { - timed_out = ktime_compare(ktime_get(), timeout) > 0; - reg = cqhci_readl(cq_host, CQHCI_CTL); - if ((reg & CQHCI_HALT) || timed_out) - break; - } - - if (timed_out) + err = readx_poll_timeout(cqhci_read_ctl, cq_host, reg, + reg & CQHCI_HALT, 0, CQHCI_OFF_TIMEOUT); + if (err < 0) pr_err("%s: cqhci: CQE stuck on\n", mmc_hostname(mmc)); else pr_debug("%s: cqhci: CQE off\n", mmc_hostname(mmc));
Open-coding a timeout loop invariably leads to errors with handling the timeout properly in one corner case or another. In the case of cqhci we might report "CQE stuck on" even if it wasn't stuck on. You'd just need this sequence of events to happen in cqhci_off(): 1. Call ktime_get(). 2. Something happens to interrupt the CPU for > 100 us (context switch or interrupt). 3. Check time and; set "timed_out" to true since > 100 us. 4. Read CQHCI_CTL. 5. Both "reg & CQHCI_HALT" and "timed_out" are true, so break. 6. Since "timed_out" is true, falsely print the error message. Rather than fixing the polling loop, use readx_poll_timeout() like many people do. This has been time tested to handle the corner cases. Fixes: a4080225f51d ("mmc: cqhci: support for command queue enabled host") Signed-off-by: Douglas Anderson <dianders@chromium.org> --- drivers/mmc/host/cqhci.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-)