diff mbox

[1/5] mmc: renesas_sdhi_internal_dmac: limit DMA RX for old SoCs

Message ID 20180410093831.1759-2-wsa+renesas@sang-engineering.com (mailing list archive)
State Superseded
Delegated to: Geert Uytterhoeven
Headers show

Commit Message

Wolfram Sang April 10, 2018, 9:38 a.m. UTC
Early revisions of certain SoCs cannot do multiple DMA RX streams in
parallel. To avoid data corruption, only allow one DMA RX channel and
fall back to PIO, if needed.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Tested-by: Nguyen Viet Dung <dung.nguyen.aj@renesas.com>
---
 drivers/mmc/host/renesas_sdhi_internal_dmac.c | 35 ++++++++++++++++++++++++---
 1 file changed, 32 insertions(+), 3 deletions(-)

Comments

Geert Uytterhoeven April 10, 2018, 9:48 a.m. UTC | #1
Hi Wolfram,

On Tue, Apr 10, 2018 at 11:38 AM, Wolfram Sang
<wsa+renesas@sang-engineering.com> wrote:
> Early revisions of certain SoCs cannot do multiple DMA RX streams in
> parallel. To avoid data corruption, only allow one DMA RX channel and
> fall back to PIO, if needed.
>
> Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
> Reviewed-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
> Tested-by: Nguyen Viet Dung <dung.nguyen.aj@renesas.com>

Thanks for your patch!

> --- a/drivers/mmc/host/renesas_sdhi_internal_dmac.c
> +++ b/drivers/mmc/host/renesas_sdhi_internal_dmac.c

>  static int renesas_sdhi_internal_dmac_probe(struct platform_device *pdev)
>  {
> -       if (!soc_device_match(gen3_soc_whitelist))
> +       const struct soc_device_attribute *soc = soc_device_match(gen3_soc_whitelist);
> +
> +       if (!soc)
>                 return -ENODEV;
>
> +       if (soc->data)

This non-NULL check is not really needed.

> +               global_flags |= (unsigned long)soc->data;
> +
>         return renesas_sdhi_probe(pdev, &renesas_sdhi_internal_dmac_dma_ops);
>  }

Gr{oetje,eeting}s,

                        Geert
Simon Horman April 11, 2018, 7:14 a.m. UTC | #2
On Tue, Apr 10, 2018 at 11:38:27AM +0200, Wolfram Sang wrote:
> Early revisions of certain SoCs cannot do multiple DMA RX streams in
> parallel. To avoid data corruption, only allow one DMA RX channel and
> fall back to PIO, if needed.
> 
> Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
> Reviewed-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
> Tested-by: Nguyen Viet Dung <dung.nguyen.aj@renesas.com>
> ---
>  drivers/mmc/host/renesas_sdhi_internal_dmac.c | 35 ++++++++++++++++++++++++---
>  1 file changed, 32 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/mmc/host/renesas_sdhi_internal_dmac.c b/drivers/mmc/host/renesas_sdhi_internal_dmac.c
> index 8e0acd197c43..9c50d64cd10c 100644
> --- a/drivers/mmc/host/renesas_sdhi_internal_dmac.c
> +++ b/drivers/mmc/host/renesas_sdhi_internal_dmac.c
> @@ -9,6 +9,7 @@
>   * published by the Free Software Foundation.
>   */
>  
> +#include <linux/bitops.h>
>  #include <linux/device.h>
>  #include <linux/dma-mapping.h>
>  #include <linux/io-64-nonatomic-hi-lo.h>
> @@ -62,6 +63,17 @@
>   *   need a custom accessor.
>   */
>  
> +static unsigned long global_flags;

Is the restriction on concurrent DMA RX streams global or per-device?

> +/*
> + * Workaround for avoiding to use RX DMAC by multiple channels.
> + * On R-Car H3 ES1.* and M3-W ES1.0, when multiple SDHI channels use
> + * RX DMAC simultaneously, sometimes hundreds of bytes data are not
> + * stored into the system memory even if the DMAC interrupt happened.
> + * So, this driver then uses one RX DMAC channel only.
> + */
> +#define SDHI_INTERNAL_DMAC_ONE_RX_ONLY	0
> +#define SDHI_INTERNAL_DMAC_RX_IN_USE	1
> +
>  /* Definitions for sampling clocks */
>  static struct renesas_sdhi_scc rcar_gen3_scc_taps[] = {
>  	{
> @@ -126,6 +138,10 @@ renesas_sdhi_internal_dmac_abort_dma(struct tmio_mmc_host *host) {
>  	renesas_sdhi_internal_dmac_dm_write(host, DM_CM_RST,
>  					    RST_RESERVED_BITS | val);
>  
> +
> +	if (host->data && host->data->flags & MMC_DATA_READ)
> +		clear_bit(SDHI_INTERNAL_DMAC_RX_IN_USE, &global_flags);
>  	renesas_sdhi_internal_dmac_enable_dma(host, true);
>  }
>  
> @@ -155,6 +171,9 @@ renesas_sdhi_internal_dmac_start_dma(struct tmio_mmc_host *host,
>  	if (data->flags & MMC_DATA_READ) {
>  		dtran_mode |= DTRAN_MODE_CH_NUM_CH1;
>  		dir = DMA_FROM_DEVICE;
> +		if (test_bit(SDHI_INTERNAL_DMAC_ONE_RX_ONLY, &global_flags) &&
> +		    test_and_set_bit(SDHI_INTERNAL_DMAC_RX_IN_USE, &global_flags))
> +			goto force_pio;
>  	} else {
>  		dtran_mode |= DTRAN_MODE_CH_NUM_CH0;
>  		dir = DMA_TO_DEVICE;
> @@ -208,6 +227,9 @@ static void renesas_sdhi_internal_dmac_complete_tasklet_fn(unsigned long arg)
>  	renesas_sdhi_internal_dmac_enable_dma(host, false);
>  	dma_unmap_sg(&host->pdev->dev, host->sg_ptr, host->sg_len, dir);
>  
> +	if (dir == DMA_FROM_DEVICE)
> +		clear_bit(SDHI_INTERNAL_DMAC_RX_IN_USE, &global_flags);
> +

Is clear_bit() expensive? If so it might be worth avoiding on SoCs that
don't have the restriction covered by this patch.

> +
>  	tmio_mmc_do_data_irq(host);
>  out:
>  	spin_unlock_irq(&host->lock);
> @@ -251,18 +273,25 @@ static const struct tmio_mmc_dma_ops renesas_sdhi_internal_dmac_dma_ops = {
>   * implementation as others may use a different implementation.
>   */
>  static const struct soc_device_attribute gen3_soc_whitelist[] = {
> -        { .soc_id = "r8a7795", .revision = "ES1.*" },
> +        { .soc_id = "r8a7795", .revision = "ES1.*",
> +	  .data = (void *)BIT(SDHI_INTERNAL_DMAC_ONE_RX_ONLY) },
>          { .soc_id = "r8a7795", .revision = "ES2.0" },
> -        { .soc_id = "r8a7796", .revision = "ES1.0" },
> +        { .soc_id = "r8a7796", .revision = "ES1.0",
> +	  .data = (void *)BIT(SDHI_INTERNAL_DMAC_ONE_RX_ONLY) },
>          { .soc_id = "r8a77995", .revision = "ES1.0" },
>          { /* sentinel */ }
>  };
>  
>  static int renesas_sdhi_internal_dmac_probe(struct platform_device *pdev)
>  {
> -	if (!soc_device_match(gen3_soc_whitelist))
> +	const struct soc_device_attribute *soc = soc_device_match(gen3_soc_whitelist);
> +
> +	if (!soc)
>  		return -ENODEV;
>  
> +	if (soc->data)
> +		global_flags |= (unsigned long)soc->data;
> +
>  	return renesas_sdhi_probe(pdev, &renesas_sdhi_internal_dmac_dma_ops);
>  }
>  
> -- 
> 2.11.0
>
Wolfram Sang April 12, 2018, 11:11 a.m. UTC | #3
> > +       if (soc->data)
> 
> This non-NULL check is not really needed.

And if we match using the Gen3 generic compatible with a non-whitelisted
SoC?
Wolfram Sang April 12, 2018, 11:21 a.m. UTC | #4
> > +static unsigned long global_flags;
> 
> Is the restriction on concurrent DMA RX streams global or per-device?

? Each device has only one DMA RX channel. Hey Simon, you upstreamed
this driver :) Or did I get the question wrong?

> > +	if (dir == DMA_FROM_DEVICE)
> > +		clear_bit(SDHI_INTERNAL_DMAC_RX_IN_USE, &global_flags);
> > +
> 
> Is clear_bit() expensive? If so it might be worth avoiding on SoCs that
> don't have the restriction covered by this patch.

It's an atoimc bitop, so maybe has a memory barrier. Hmm, the above
version is better on the cache lines, though, if you don't have the
restriction.

Will think about it and make sure both clear_bit() are in sync.
Geert Uytterhoeven April 12, 2018, 11:25 a.m. UTC | #5
Hi Wolfram,

On Thu, Apr 12, 2018 at 1:11 PM, Wolfram Sang <wsa@the-dreams.de> wrote:
>> > +       if (soc->data)
>>
>> This non-NULL check is not really needed.
>
> And if we match using the Gen3 generic compatible with a non-whitelisted
> SoC?

That should have been caught by the !soc check above, and have already
returned with -ENODEV.

Gr{oetje,eeting}s,

                        Geert
Wolfram Sang April 12, 2018, 11:31 a.m. UTC | #6
> That should have been caught by the !soc check above, and have already
> returned with -ENODEV.

Now I get it: You mean non-0 check, not non-NULL check...
Geert Uytterhoeven April 12, 2018, 11:34 a.m. UTC | #7
Hi Wolfram,

On Thu, Apr 12, 2018 at 1:31 PM, Wolfram Sang <wsa@the-dreams.de> wrote:
>> That should have been caught by the !soc check above, and have already
>> returned with -ENODEV.
>
> Now I get it: You mean non-0 check, not non-NULL check...

soc->data _is_ a pointer. You only cast it to an integer on the next line.

Gr{oetje,eeting}s,

                        Geert
Wolfram Sang April 12, 2018, 11:40 a.m. UTC | #8
On Thu, Apr 12, 2018 at 01:34:41PM +0200, Geert Uytterhoeven wrote:
> Hi Wolfram,
> 
> On Thu, Apr 12, 2018 at 1:31 PM, Wolfram Sang <wsa@the-dreams.de> wrote:
> >> That should have been caught by the !soc check above, and have already
> >> returned with -ENODEV.
> >
> > Now I get it: You mean non-0 check, not non-NULL check...
> 
> soc->data _is_ a pointer. You only cast it to an integer on the next line.

Yes. I usually worked with it as an integer, so I got confused. Will
fix.
Simon Horman April 13, 2018, 8:14 a.m. UTC | #9
On Thu, Apr 12, 2018 at 01:21:42PM +0200, Wolfram Sang wrote:
> 
> > > +static unsigned long global_flags;
> > 
> > Is the restriction on concurrent DMA RX streams global or per-device?
> 
> ? Each device has only one DMA RX channel. Hey Simon, you upstreamed
> this driver :) Or did I get the question wrong?

As I understand things this patch implements a restriction on concurrent
DMA RX streams for old SoCs, corresponding with a limitation in the
hardware.

As the implementation stands it is global - only one DMA RX stream may
be in flight for the entire system. I am wondering if that is the right
granularity for the restriction. Perhaps it could be per-SDHI device,
allowing concurrent streams on different SDHI devices.

I think what you have is safe. But perhaps it could be relaxed.
I do not have any insights regarding the extent of the hardware
restriction (that I can recall at this time).

> > > +	if (dir == DMA_FROM_DEVICE)
> > > +		clear_bit(SDHI_INTERNAL_DMAC_RX_IN_USE, &global_flags);
> > > +
> > 
> > Is clear_bit() expensive? If so it might be worth avoiding on SoCs that
> > don't have the restriction covered by this patch.
> 
> It's an atoimc bitop, so maybe has a memory barrier. Hmm, the above
> version is better on the cache lines, though, if you don't have the
> restriction.
> 
> Will think about it and make sure both clear_bit() are in sync.

Thanks, I don't feel strongly about this.
Wolfram Sang April 13, 2018, 8:35 a.m. UTC | #10
> As the implementation stands it is global - only one DMA RX stream may
> be in flight for the entire system. I am wondering if that is the right
> granularity for the restriction. Perhaps it could be per-SDHI device,
> allowing concurrent streams on different SDHI devices.

As we have only one DMA RX channel per device, there is no other
concurrency than the global concurrency.
Simon Horman April 13, 2018, 9:47 a.m. UTC | #11
On Fri, Apr 13, 2018 at 10:35:48AM +0200, Wolfram Sang wrote:
> 
> > As the implementation stands it is global - only one DMA RX stream may
> > be in flight for the entire system. I am wondering if that is the right
> > granularity for the restriction. Perhaps it could be per-SDHI device,
> > allowing concurrent streams on different SDHI devices.
> 
> As we have only one DMA RX channel per device, there is no other
> concurrency than the global concurrency.

Thanks, understood.
diff mbox

Patch

diff --git a/drivers/mmc/host/renesas_sdhi_internal_dmac.c b/drivers/mmc/host/renesas_sdhi_internal_dmac.c
index 8e0acd197c43..9c50d64cd10c 100644
--- a/drivers/mmc/host/renesas_sdhi_internal_dmac.c
+++ b/drivers/mmc/host/renesas_sdhi_internal_dmac.c
@@ -9,6 +9,7 @@ 
  * published by the Free Software Foundation.
  */
 
+#include <linux/bitops.h>
 #include <linux/device.h>
 #include <linux/dma-mapping.h>
 #include <linux/io-64-nonatomic-hi-lo.h>
@@ -62,6 +63,17 @@ 
  *   need a custom accessor.
  */
 
+static unsigned long global_flags;
+/*
+ * Workaround for avoiding to use RX DMAC by multiple channels.
+ * On R-Car H3 ES1.* and M3-W ES1.0, when multiple SDHI channels use
+ * RX DMAC simultaneously, sometimes hundreds of bytes data are not
+ * stored into the system memory even if the DMAC interrupt happened.
+ * So, this driver then uses one RX DMAC channel only.
+ */
+#define SDHI_INTERNAL_DMAC_ONE_RX_ONLY	0
+#define SDHI_INTERNAL_DMAC_RX_IN_USE	1
+
 /* Definitions for sampling clocks */
 static struct renesas_sdhi_scc rcar_gen3_scc_taps[] = {
 	{
@@ -126,6 +138,10 @@  renesas_sdhi_internal_dmac_abort_dma(struct tmio_mmc_host *host) {
 	renesas_sdhi_internal_dmac_dm_write(host, DM_CM_RST,
 					    RST_RESERVED_BITS | val);
 
+
+	if (host->data && host->data->flags & MMC_DATA_READ)
+		clear_bit(SDHI_INTERNAL_DMAC_RX_IN_USE, &global_flags);
+
 	renesas_sdhi_internal_dmac_enable_dma(host, true);
 }
 
@@ -155,6 +171,9 @@  renesas_sdhi_internal_dmac_start_dma(struct tmio_mmc_host *host,
 	if (data->flags & MMC_DATA_READ) {
 		dtran_mode |= DTRAN_MODE_CH_NUM_CH1;
 		dir = DMA_FROM_DEVICE;
+		if (test_bit(SDHI_INTERNAL_DMAC_ONE_RX_ONLY, &global_flags) &&
+		    test_and_set_bit(SDHI_INTERNAL_DMAC_RX_IN_USE, &global_flags))
+			goto force_pio;
 	} else {
 		dtran_mode |= DTRAN_MODE_CH_NUM_CH0;
 		dir = DMA_TO_DEVICE;
@@ -208,6 +227,9 @@  static void renesas_sdhi_internal_dmac_complete_tasklet_fn(unsigned long arg)
 	renesas_sdhi_internal_dmac_enable_dma(host, false);
 	dma_unmap_sg(&host->pdev->dev, host->sg_ptr, host->sg_len, dir);
 
+	if (dir == DMA_FROM_DEVICE)
+		clear_bit(SDHI_INTERNAL_DMAC_RX_IN_USE, &global_flags);
+
 	tmio_mmc_do_data_irq(host);
 out:
 	spin_unlock_irq(&host->lock);
@@ -251,18 +273,25 @@  static const struct tmio_mmc_dma_ops renesas_sdhi_internal_dmac_dma_ops = {
  * implementation as others may use a different implementation.
  */
 static const struct soc_device_attribute gen3_soc_whitelist[] = {
-        { .soc_id = "r8a7795", .revision = "ES1.*" },
+        { .soc_id = "r8a7795", .revision = "ES1.*",
+	  .data = (void *)BIT(SDHI_INTERNAL_DMAC_ONE_RX_ONLY) },
         { .soc_id = "r8a7795", .revision = "ES2.0" },
-        { .soc_id = "r8a7796", .revision = "ES1.0" },
+        { .soc_id = "r8a7796", .revision = "ES1.0",
+	  .data = (void *)BIT(SDHI_INTERNAL_DMAC_ONE_RX_ONLY) },
         { .soc_id = "r8a77995", .revision = "ES1.0" },
         { /* sentinel */ }
 };
 
 static int renesas_sdhi_internal_dmac_probe(struct platform_device *pdev)
 {
-	if (!soc_device_match(gen3_soc_whitelist))
+	const struct soc_device_attribute *soc = soc_device_match(gen3_soc_whitelist);
+
+	if (!soc)
 		return -ENODEV;
 
+	if (soc->data)
+		global_flags |= (unsigned long)soc->data;
+
 	return renesas_sdhi_probe(pdev, &renesas_sdhi_internal_dmac_dma_ops);
 }