diff mbox series

[1/3] mmc: sdhci: improve ADMA error reporting

Message ID E1iBz4v-0008MV-4k@rmk-PC.armlinux.org.uk (mailing list archive)
State New, archived
Headers show
Series Fix sdhci-of-esdhc DMA coherency | expand

Commit Message

Russell King (Oracle) Sept. 22, 2019, 10:26 a.m. UTC
ADMA errors are potentially data corrupting events; although we print
the register state, we do not usefully print the ADMA descriptors.
Worse than that, we print them by referencing their virtual address
which is meaningless when the register state gives us the DMA address
of the failing descriptor.

Print the ADMA descriptors giving their DMA addresses rather than their
virtual addresses, and print them using SDHCI_DUMP() rather than DBG().

We also do not show the correct value of the interrupt status register;
the register dump shows the current value, after we have cleared the
pending interrupts we are going to service.  What is more useful is to
print the interrupts that _were_ pending at the time the ADMA error was
encountered.  Fix that too.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 drivers/mmc/host/sdhci.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

Comments

Adrian Hunter Sept. 23, 2019, 6:59 a.m. UTC | #1
On 22/09/19 1:26 PM, Russell King wrote:
> ADMA errors are potentially data corrupting events; although we print
> the register state, we do not usefully print the ADMA descriptors.
> Worse than that, we print them by referencing their virtual address
> which is meaningless when the register state gives us the DMA address
> of the failing descriptor.
> 
> Print the ADMA descriptors giving their DMA addresses rather than their
> virtual addresses, and print them using SDHCI_DUMP() rather than DBG().
> 
> We also do not show the correct value of the interrupt status register;
> the register dump shows the current value, after we have cleared the
> pending interrupts we are going to service.  What is more useful is to
> print the interrupts that _were_ pending at the time the ADMA error was
> encountered.  Fix that too.
> 
> Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>

Acked-by: Adrian Hunter <adrian.hunter@intel.com>

> ---
>  drivers/mmc/host/sdhci.c | 15 ++++++++++-----
>  1 file changed, 10 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
> index a5dc5aae973e..0f33097c55ec 100644
> --- a/drivers/mmc/host/sdhci.c
> +++ b/drivers/mmc/host/sdhci.c
> @@ -2855,6 +2855,7 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p)
>  static void sdhci_adma_show_error(struct sdhci_host *host)
>  {
>  	void *desc = host->adma_table;
> +	dma_addr_t dma = host->adma_addr;
>  
>  	sdhci_dumpregs(host);
>  
> @@ -2862,18 +2863,21 @@ static void sdhci_adma_show_error(struct sdhci_host *host)
>  		struct sdhci_adma2_64_desc *dma_desc = desc;
>  
>  		if (host->flags & SDHCI_USE_64_BIT_DMA)
> -			DBG("%p: DMA 0x%08x%08x, LEN 0x%04x, Attr=0x%02x\n",
> -			    desc, le32_to_cpu(dma_desc->addr_hi),
> +			SDHCI_DUMP("%08llx: DMA 0x%08x%08x, LEN 0x%04x, Attr=0x%02x\n",
> +			    (unsigned long long)dma,
> +			    le32_to_cpu(dma_desc->addr_hi),
>  			    le32_to_cpu(dma_desc->addr_lo),
>  			    le16_to_cpu(dma_desc->len),
>  			    le16_to_cpu(dma_desc->cmd));
>  		else
> -			DBG("%p: DMA 0x%08x, LEN 0x%04x, Attr=0x%02x\n",
> -			    desc, le32_to_cpu(dma_desc->addr_lo),
> +			SDHCI_DUMP("%08llx: DMA 0x%08x, LEN 0x%04x, Attr=0x%02x\n",
> +			    (unsigned long long)dma,
> +			    le32_to_cpu(dma_desc->addr_lo),
>  			    le16_to_cpu(dma_desc->len),
>  			    le16_to_cpu(dma_desc->cmd));
>  
>  		desc += host->desc_sz;
> +		dma += host->desc_sz;
>  
>  		if (dma_desc->cmd & cpu_to_le16(ADMA2_END))
>  			break;
> @@ -2949,7 +2953,8 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>  			!= MMC_BUS_TEST_R)
>  		host->data->error = -EILSEQ;
>  	else if (intmask & SDHCI_INT_ADMA_ERROR) {
> -		pr_err("%s: ADMA error\n", mmc_hostname(host->mmc));
> +		pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
> +		       intmask);
>  		sdhci_adma_show_error(host);
>  		host->data->error = -EIO;
>  		if (host->ops->adma_workaround)
>
Ulf Hansson Sept. 23, 2019, 9:54 p.m. UTC | #2
On Sun, 22 Sep 2019 at 12:27, Russell King <rmk+kernel@armlinux.org.uk> wrote:
>
> ADMA errors are potentially data corrupting events; although we print
> the register state, we do not usefully print the ADMA descriptors.
> Worse than that, we print them by referencing their virtual address
> which is meaningless when the register state gives us the DMA address
> of the failing descriptor.
>
> Print the ADMA descriptors giving their DMA addresses rather than their
> virtual addresses, and print them using SDHCI_DUMP() rather than DBG().
>
> We also do not show the correct value of the interrupt status register;
> the register dump shows the current value, after we have cleared the
> pending interrupts we are going to service.  What is more useful is to
> print the interrupts that _were_ pending at the time the ADMA error was
> encountered.  Fix that too.
>
> Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>

This seems useful to land for 5.4, so I have queued this up via my fixes branch.

Perhaps we should also tag this for stable, what do you think?

Kind regards
Uffe


> ---
>  drivers/mmc/host/sdhci.c | 15 ++++++++++-----
>  1 file changed, 10 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
> index a5dc5aae973e..0f33097c55ec 100644
> --- a/drivers/mmc/host/sdhci.c
> +++ b/drivers/mmc/host/sdhci.c
> @@ -2855,6 +2855,7 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p)
>  static void sdhci_adma_show_error(struct sdhci_host *host)
>  {
>         void *desc = host->adma_table;
> +       dma_addr_t dma = host->adma_addr;
>
>         sdhci_dumpregs(host);
>
> @@ -2862,18 +2863,21 @@ static void sdhci_adma_show_error(struct sdhci_host *host)
>                 struct sdhci_adma2_64_desc *dma_desc = desc;
>
>                 if (host->flags & SDHCI_USE_64_BIT_DMA)
> -                       DBG("%p: DMA 0x%08x%08x, LEN 0x%04x, Attr=0x%02x\n",
> -                           desc, le32_to_cpu(dma_desc->addr_hi),
> +                       SDHCI_DUMP("%08llx: DMA 0x%08x%08x, LEN 0x%04x, Attr=0x%02x\n",
> +                           (unsigned long long)dma,
> +                           le32_to_cpu(dma_desc->addr_hi),
>                             le32_to_cpu(dma_desc->addr_lo),
>                             le16_to_cpu(dma_desc->len),
>                             le16_to_cpu(dma_desc->cmd));
>                 else
> -                       DBG("%p: DMA 0x%08x, LEN 0x%04x, Attr=0x%02x\n",
> -                           desc, le32_to_cpu(dma_desc->addr_lo),
> +                       SDHCI_DUMP("%08llx: DMA 0x%08x, LEN 0x%04x, Attr=0x%02x\n",
> +                           (unsigned long long)dma,
> +                           le32_to_cpu(dma_desc->addr_lo),
>                             le16_to_cpu(dma_desc->len),
>                             le16_to_cpu(dma_desc->cmd));
>
>                 desc += host->desc_sz;
> +               dma += host->desc_sz;
>
>                 if (dma_desc->cmd & cpu_to_le16(ADMA2_END))
>                         break;
> @@ -2949,7 +2953,8 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>                         != MMC_BUS_TEST_R)
>                 host->data->error = -EILSEQ;
>         else if (intmask & SDHCI_INT_ADMA_ERROR) {
> -               pr_err("%s: ADMA error\n", mmc_hostname(host->mmc));
> +               pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
> +                      intmask);
>                 sdhci_adma_show_error(host);
>                 host->data->error = -EIO;
>                 if (host->ops->adma_workaround)
> --
> 2.7.4
>
Russell King (Oracle) Sept. 23, 2019, 10:11 p.m. UTC | #3
On Mon, Sep 23, 2019 at 11:54:46PM +0200, Ulf Hansson wrote:
> On Sun, 22 Sep 2019 at 12:27, Russell King <rmk+kernel@armlinux.org.uk> wrote:
> >
> > ADMA errors are potentially data corrupting events; although we print
> > the register state, we do not usefully print the ADMA descriptors.
> > Worse than that, we print them by referencing their virtual address
> > which is meaningless when the register state gives us the DMA address
> > of the failing descriptor.
> >
> > Print the ADMA descriptors giving their DMA addresses rather than their
> > virtual addresses, and print them using SDHCI_DUMP() rather than DBG().
> >
> > We also do not show the correct value of the interrupt status register;
> > the register dump shows the current value, after we have cleared the
> > pending interrupts we are going to service.  What is more useful is to
> > print the interrupts that _were_ pending at the time the ADMA error was
> > encountered.  Fix that too.
> >
> > Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
> 
> This seems useful to land for 5.4, so I have queued this up via my fixes branch.
> 
> Perhaps we should also tag this for stable, what do you think?

No objection from me.

Thanks.

> 
> Kind regards
> Uffe
> 
> 
> > ---
> >  drivers/mmc/host/sdhci.c | 15 ++++++++++-----
> >  1 file changed, 10 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
> > index a5dc5aae973e..0f33097c55ec 100644
> > --- a/drivers/mmc/host/sdhci.c
> > +++ b/drivers/mmc/host/sdhci.c
> > @@ -2855,6 +2855,7 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p)
> >  static void sdhci_adma_show_error(struct sdhci_host *host)
> >  {
> >         void *desc = host->adma_table;
> > +       dma_addr_t dma = host->adma_addr;
> >
> >         sdhci_dumpregs(host);
> >
> > @@ -2862,18 +2863,21 @@ static void sdhci_adma_show_error(struct sdhci_host *host)
> >                 struct sdhci_adma2_64_desc *dma_desc = desc;
> >
> >                 if (host->flags & SDHCI_USE_64_BIT_DMA)
> > -                       DBG("%p: DMA 0x%08x%08x, LEN 0x%04x, Attr=0x%02x\n",
> > -                           desc, le32_to_cpu(dma_desc->addr_hi),
> > +                       SDHCI_DUMP("%08llx: DMA 0x%08x%08x, LEN 0x%04x, Attr=0x%02x\n",
> > +                           (unsigned long long)dma,
> > +                           le32_to_cpu(dma_desc->addr_hi),
> >                             le32_to_cpu(dma_desc->addr_lo),
> >                             le16_to_cpu(dma_desc->len),
> >                             le16_to_cpu(dma_desc->cmd));
> >                 else
> > -                       DBG("%p: DMA 0x%08x, LEN 0x%04x, Attr=0x%02x\n",
> > -                           desc, le32_to_cpu(dma_desc->addr_lo),
> > +                       SDHCI_DUMP("%08llx: DMA 0x%08x, LEN 0x%04x, Attr=0x%02x\n",
> > +                           (unsigned long long)dma,
> > +                           le32_to_cpu(dma_desc->addr_lo),
> >                             le16_to_cpu(dma_desc->len),
> >                             le16_to_cpu(dma_desc->cmd));
> >
> >                 desc += host->desc_sz;
> > +               dma += host->desc_sz;
> >
> >                 if (dma_desc->cmd & cpu_to_le16(ADMA2_END))
> >                         break;
> > @@ -2949,7 +2953,8 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
> >                         != MMC_BUS_TEST_R)
> >                 host->data->error = -EILSEQ;
> >         else if (intmask & SDHCI_INT_ADMA_ERROR) {
> > -               pr_err("%s: ADMA error\n", mmc_hostname(host->mmc));
> > +               pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
> > +                      intmask);
> >                 sdhci_adma_show_error(host);
> >                 host->data->error = -EIO;
> >                 if (host->ops->adma_workaround)
> > --
> > 2.7.4
> >
>
diff mbox series

Patch

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index a5dc5aae973e..0f33097c55ec 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -2855,6 +2855,7 @@  static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p)
 static void sdhci_adma_show_error(struct sdhci_host *host)
 {
 	void *desc = host->adma_table;
+	dma_addr_t dma = host->adma_addr;
 
 	sdhci_dumpregs(host);
 
@@ -2862,18 +2863,21 @@  static void sdhci_adma_show_error(struct sdhci_host *host)
 		struct sdhci_adma2_64_desc *dma_desc = desc;
 
 		if (host->flags & SDHCI_USE_64_BIT_DMA)
-			DBG("%p: DMA 0x%08x%08x, LEN 0x%04x, Attr=0x%02x\n",
-			    desc, le32_to_cpu(dma_desc->addr_hi),
+			SDHCI_DUMP("%08llx: DMA 0x%08x%08x, LEN 0x%04x, Attr=0x%02x\n",
+			    (unsigned long long)dma,
+			    le32_to_cpu(dma_desc->addr_hi),
 			    le32_to_cpu(dma_desc->addr_lo),
 			    le16_to_cpu(dma_desc->len),
 			    le16_to_cpu(dma_desc->cmd));
 		else
-			DBG("%p: DMA 0x%08x, LEN 0x%04x, Attr=0x%02x\n",
-			    desc, le32_to_cpu(dma_desc->addr_lo),
+			SDHCI_DUMP("%08llx: DMA 0x%08x, LEN 0x%04x, Attr=0x%02x\n",
+			    (unsigned long long)dma,
+			    le32_to_cpu(dma_desc->addr_lo),
 			    le16_to_cpu(dma_desc->len),
 			    le16_to_cpu(dma_desc->cmd));
 
 		desc += host->desc_sz;
+		dma += host->desc_sz;
 
 		if (dma_desc->cmd & cpu_to_le16(ADMA2_END))
 			break;
@@ -2949,7 +2953,8 @@  static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
 			!= MMC_BUS_TEST_R)
 		host->data->error = -EILSEQ;
 	else if (intmask & SDHCI_INT_ADMA_ERROR) {
-		pr_err("%s: ADMA error\n", mmc_hostname(host->mmc));
+		pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
+		       intmask);
 		sdhci_adma_show_error(host);
 		host->data->error = -EIO;
 		if (host->ops->adma_workaround)