diff mbox series

[v2,3/5] spi: spi-mtk-nor: use dma_alloc_coherent() for bounce buffer

Message ID 20200918162834.v2.3.I06cb65401ab5ad63ea30c4788d26633928d80f38@changeid (mailing list archive)
State Superseded
Headers show
Series spi: spi-mtk-nor: Add mt8192 support. | expand

Commit Message

Ikjoon Jang Sept. 18, 2020, 8:31 a.m. UTC
Use dma_alloc_coherent() for bounce buffer instead of kmalloc.

Signed-off-by: Ikjoon Jang <ikjn@chromium.org>

---

(no changes since v1)

 drivers/spi/spi-mtk-nor.c | 60 +++++++++++++++++++++++----------------
 1 file changed, 35 insertions(+), 25 deletions(-)

Comments

Chuanhong Guo Sept. 18, 2020, 1:25 p.m. UTC | #1
Hi!

On Fri, Sep 18, 2020 at 4:35 PM Ikjoon Jang <ikjn@chromium.org> wrote:
>
> Use dma_alloc_coherent() for bounce buffer instead of kmalloc.

The commit message should explain why such a change is
needed. (i.e. why using dma_alloc_coherent here is better
than kmalloc.) And if there's no benefit for this change I'd prefer
leaving it untouched.
I remembered reading somewhere that stream DMA api is
prefered over dma_alloc_coherent for this kind of single-direction
DMA operation.

>
> Signed-off-by: Ikjoon Jang <ikjn@chromium.org>
>
> ---
>
> (no changes since v1)
>
>  drivers/spi/spi-mtk-nor.c | 60 +++++++++++++++++++++++----------------
>  1 file changed, 35 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/spi/spi-mtk-nor.c b/drivers/spi/spi-mtk-nor.c
> index 54b2c0fde95b..e14798a6e7d0 100644
> --- a/drivers/spi/spi-mtk-nor.c
> +++ b/drivers/spi/spi-mtk-nor.c
> @@ -96,6 +96,7 @@ struct mtk_nor {
>         struct device *dev;
>         void __iomem *base;
>         u8 *buffer;
> +       dma_addr_t buffer_dma;
>         struct clk *spi_clk;
>         struct clk *ctlr_clk;
>         unsigned int spi_freq;
> @@ -275,19 +276,16 @@ static void mtk_nor_setup_bus(struct mtk_nor *sp, const struct spi_mem_op *op)
>         mtk_nor_rmw(sp, MTK_NOR_REG_BUSCFG, reg, MTK_NOR_BUS_MODE_MASK);
>  }
>
> -static int mtk_nor_read_dma(struct mtk_nor *sp, u32 from, unsigned int length,
> -                           u8 *buffer)
> +static int read_dma(struct mtk_nor *sp, u32 from, unsigned int length,

This name is a bit confusing considering there's a mtk_nor_read_dma
below.
As this function now only executes dma readings and wait it to finish,
what about mtk_nor_dma_exec instead?

> +                   dma_addr_t dma_addr)
>  {
>         int ret = 0;
>         ulong delay;
>         u32 reg;
> -       dma_addr_t dma_addr;
>
> -       dma_addr = dma_map_single(sp->dev, buffer, length, DMA_FROM_DEVICE);
> -       if (dma_mapping_error(sp->dev, dma_addr)) {
> -               dev_err(sp->dev, "failed to map dma buffer.\n");
> +       if (WARN_ON((length & MTK_NOR_DMA_ALIGN_MASK) ||
> +                   (dma_addr & MTK_NOR_DMA_ALIGN_MASK)))

These alignment is guaranteed by callers of this function if all
my comments below are addressed. This check isn't needed.

>                 return -EINVAL;
> -       }
>
>         writel(from, sp->base + MTK_NOR_REG_DMA_FADR);
>         writel(dma_addr, sp->base + MTK_NOR_REG_DMA_DADR);
> @@ -312,30 +310,39 @@ static int mtk_nor_read_dma(struct mtk_nor *sp, u32 from, unsigned int length,
>                                          (delay + 1) * 100);
>         }
>
> -       dma_unmap_single(sp->dev, dma_addr, length, DMA_FROM_DEVICE);
>         if (ret < 0)
>                 dev_err(sp->dev, "dma read timeout.\n");
>
>         return ret;
>  }
>
> -static int mtk_nor_read_bounce(struct mtk_nor *sp, u32 from,
> -                              unsigned int length, u8 *buffer)
> +static int mtk_nor_read_dma(struct mtk_nor *sp, u32 from,
> +                           unsigned int length, u8 *buffer)
>  {
> -       unsigned int rdlen;
>         int ret;
> +       dma_addr_t dma_addr;
> +       bool bounce = need_bounce(buffer, length);
>
> -       if (length & MTK_NOR_DMA_ALIGN_MASK)
> -               rdlen = (length + MTK_NOR_DMA_ALIGN) & ~MTK_NOR_DMA_ALIGN_MASK;

The intention of this rdlen alignment is explained in 2/5.
Please make sure this rdlen alignment logic is present
only for PIO reading.

> -       else
> -               rdlen = length;
> +       if (!bounce) {
> +               dma_addr = dma_map_single(sp->dev, buffer, length,
> +                                         DMA_FROM_DEVICE);
> +               if (dma_mapping_error(sp->dev, dma_addr)) {
> +                       dev_err(sp->dev, "failed to map dma buffer.\n");
> +                       return -EINVAL;
> +               }
> +       } else {
> +               dma_addr = sp->buffer_dma;
> +       }
>
> -       ret = mtk_nor_read_dma(sp, from, rdlen, sp->buffer);
> -       if (ret)
> -               return ret;
> +       ret = read_dma(sp, from, length, dma_addr);
>
> -       memcpy(buffer, sp->buffer, length);
> -       return 0;
> +       if (!bounce)
> +               dma_unmap_single(sp->dev, dma_addr, length,
> +                                DMA_FROM_DEVICE);
> +       else
> +               memcpy(buffer, sp->buffer, length);
> +
> +       return ret;
>  }

I think a separated read_dma and read_bounce function will be
cleaner than this if-else implementation:
read_dma:
1. call dma_map_single to get physical address
2. call read_dma to execute operation
3. call dma_unmap_single

read_bounce:
1. align reading length
2. call read_dma
3. call memcpy

>
>  static int mtk_nor_read_pio(struct mtk_nor *sp, const struct spi_mem_op *op)
> @@ -439,11 +446,6 @@ static int mtk_nor_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
>                 if (op->data.nbytes == 1) {
>                         mtk_nor_set_addr(sp, op);
>                         return mtk_nor_read_pio(sp, op);
> -               } else if (((ulong)(op->data.buf.in) &
> -                           MTK_NOR_DMA_ALIGN_MASK)) {
> -                       return mtk_nor_read_bounce(sp, op->addr.val,
> -                                                  op->data.nbytes,
> -                                                  op->data.buf.in);
>                 } else {
>                         return mtk_nor_read_dma(sp, op->addr.val,
>                                                 op->data.nbytes,
> @@ -654,6 +656,10 @@ static int mtk_nor_probe(struct platform_device *pdev)
>         sp->dev = &pdev->dev;
>         sp->spi_clk = spi_clk;
>         sp->ctlr_clk = ctlr_clk;

There is extra memory allocation code for sp->buffer in mtk_nor_probe.
If you intend to replace this with dma_alloc_coherent you should
drop those devm_kmalloc code as well.

> +       sp->buffer = dma_alloc_coherent(&pdev->dev, MTK_NOR_BOUNCE_BUF_SIZE,
> +                                       &sp->buffer_dma, GFP_KERNEL);

There's a devm variant: dmam_alloc_coherent(dev, size, dma_handle, gfp)

> +       if (!sp->buffer)
> +               return -ENOMEM;

This spi-nor controller requires all addresses to be 16-byte aligned.
Although it should be guaranteed by a usually way larger page
alignment address from dma_alloc_coherent I'd prefer an explicit
check for address alignment here rather than letting it probe
successfully and fail for every dma_read with bounce buffer.


>
>         irq = platform_get_irq_optional(pdev, 0);
>         if (irq < 0) {
> @@ -674,6 +680,8 @@ static int mtk_nor_probe(struct platform_device *pdev)
>         ret = mtk_nor_init(sp);
>         if (ret < 0) {
>                 kfree(ctlr);
> +               dma_free_coherent(&pdev->dev, MTK_NOR_BOUNCE_BUF_SIZE,
> +                                 sp->buffer, sp->buffer_dma);
>                 return ret;
>         }
>
> @@ -692,6 +700,8 @@ static int mtk_nor_remove(struct platform_device *pdev)
>
>         mtk_nor_disable_clk(sp);
>
> +       dma_free_coherent(&pdev->dev, MTK_NOR_BOUNCE_BUF_SIZE,
> +                         sp->buffer, sp->buffer_dma);
>         return 0;
>  }
>
> --
> 2.28.0.681.g6f77f65b4e-goog
>


--
Regards,
Chuanhong Guo
Ikjoon Jang Sept. 21, 2020, 6:52 a.m. UTC | #2
On Fri, Sep 18, 2020 at 9:25 PM Chuanhong Guo <gch981213@gmail.com> wrote:
>
> Hi!
>
> On Fri, Sep 18, 2020 at 4:35 PM Ikjoon Jang <ikjn@chromium.org> wrote:
> >
> > Use dma_alloc_coherent() for bounce buffer instead of kmalloc.
>
> The commit message should explain why such a change is
> needed. (i.e. why using dma_alloc_coherent here is better
> than kmalloc.) And if there's no benefit for this change I'd prefer
> leaving it untouched.
> I remembered reading somewhere that stream DMA api is
> prefered over dma_alloc_coherent for this kind of single-direction
> DMA operation.
>

I will add more description on why I changed it to dma_alloc_coherent():
- to explictly support devices like mt8173-nor which only supports
32bit addressing for dma.

And it also reminded me an another problem, (I won't address this
issue for now in v3):
as this device is using dma range as [start, end) format
where 'end' can be zero in that corner case of {start = 0xffff_f000; end = 0; }

> >
> > Signed-off-by: Ikjoon Jang <ikjn@chromium.org>
> >
> > ---
> >
> > (no changes since v1)
> >
> >  drivers/spi/spi-mtk-nor.c | 60 +++++++++++++++++++++++----------------
> >  1 file changed, 35 insertions(+), 25 deletions(-)
> >
> > diff --git a/drivers/spi/spi-mtk-nor.c b/drivers/spi/spi-mtk-nor.c
> > index 54b2c0fde95b..e14798a6e7d0 100644
> > --- a/drivers/spi/spi-mtk-nor.c
> > +++ b/drivers/spi/spi-mtk-nor.c
> > @@ -96,6 +96,7 @@ struct mtk_nor {
> >         struct device *dev;
> >         void __iomem *base;
> >         u8 *buffer;
> > +       dma_addr_t buffer_dma;
> >         struct clk *spi_clk;
> >         struct clk *ctlr_clk;
> >         unsigned int spi_freq;
> > @@ -275,19 +276,16 @@ static void mtk_nor_setup_bus(struct mtk_nor *sp, const struct spi_mem_op *op)
> >         mtk_nor_rmw(sp, MTK_NOR_REG_BUSCFG, reg, MTK_NOR_BUS_MODE_MASK);
> >  }
> >
> > -static int mtk_nor_read_dma(struct mtk_nor *sp, u32 from, unsigned int length,
> > -                           u8 *buffer)
> > +static int read_dma(struct mtk_nor *sp, u32 from, unsigned int length,
>
> This name is a bit confusing considering there's a mtk_nor_read_dma
> below.
> As this function now only executes dma readings and wait it to finish,
> what about mtk_nor_dma_exec instead?

yeah, good idea.

>
> > +                   dma_addr_t dma_addr)
> >  {
> >         int ret = 0;
> >         ulong delay;
> >         u32 reg;
> > -       dma_addr_t dma_addr;
> >
> > -       dma_addr = dma_map_single(sp->dev, buffer, length, DMA_FROM_DEVICE);
> > -       if (dma_mapping_error(sp->dev, dma_addr)) {
> > -               dev_err(sp->dev, "failed to map dma buffer.\n");
> > +       if (WARN_ON((length & MTK_NOR_DMA_ALIGN_MASK) ||
> > +                   (dma_addr & MTK_NOR_DMA_ALIGN_MASK)))
>
> These alignment is guaranteed by callers of this function if all
> my comments below are addressed. This check isn't needed.

ACK.

>
> >                 return -EINVAL;
> > -       }
> >
> >         writel(from, sp->base + MTK_NOR_REG_DMA_FADR);
> >         writel(dma_addr, sp->base + MTK_NOR_REG_DMA_DADR);
> > @@ -312,30 +310,39 @@ static int mtk_nor_read_dma(struct mtk_nor *sp, u32 from, unsigned int length,
> >                                          (delay + 1) * 100);
> >         }
> >
> > -       dma_unmap_single(sp->dev, dma_addr, length, DMA_FROM_DEVICE);
> >         if (ret < 0)
> >                 dev_err(sp->dev, "dma read timeout.\n");
> >
> >         return ret;
> >  }
> >
> > -static int mtk_nor_read_bounce(struct mtk_nor *sp, u32 from,
> > -                              unsigned int length, u8 *buffer)
> > +static int mtk_nor_read_dma(struct mtk_nor *sp, u32 from,
> > +                           unsigned int length, u8 *buffer)
> >  {
> > -       unsigned int rdlen;
> >         int ret;
> > +       dma_addr_t dma_addr;
> > +       bool bounce = need_bounce(buffer, length);
> >
> > -       if (length & MTK_NOR_DMA_ALIGN_MASK)
> > -               rdlen = (length + MTK_NOR_DMA_ALIGN) & ~MTK_NOR_DMA_ALIGN_MASK;
>
> The intention of this rdlen alignment is explained in 2/5.
> Please make sure this rdlen alignment logic is present
> only for PIO reading.

okay, I'll use padding again in v3.

>
> > -       else
> > -               rdlen = length;
> > +       if (!bounce) {
> > +               dma_addr = dma_map_single(sp->dev, buffer, length,
> > +                                         DMA_FROM_DEVICE);
> > +               if (dma_mapping_error(sp->dev, dma_addr)) {
> > +                       dev_err(sp->dev, "failed to map dma buffer.\n");
> > +                       return -EINVAL;
> > +               }
> > +       } else {
> > +               dma_addr = sp->buffer_dma;
> > +       }
> >
> > -       ret = mtk_nor_read_dma(sp, from, rdlen, sp->buffer);
> > -       if (ret)
> > -               return ret;
> > +       ret = read_dma(sp, from, length, dma_addr);
> >
> > -       memcpy(buffer, sp->buffer, length);
> > -       return 0;
> > +       if (!bounce)
> > +               dma_unmap_single(sp->dev, dma_addr, length,
> > +                                DMA_FROM_DEVICE);
> > +       else
> > +               memcpy(buffer, sp->buffer, length);
> > +
> > +       return ret;
> >  }
>
> I think a separated read_dma and read_bounce function will be
> cleaner than this if-else implementation:
> read_dma:
> 1. call dma_map_single to get physical address
> 2. call read_dma to execute operation
> 3. call dma_unmap_single
>
> read_bounce:
> 1. align reading length
> 2. call read_dma
> 3. call memcpy

ACK

>
> >
> >  static int mtk_nor_read_pio(struct mtk_nor *sp, const struct spi_mem_op *op)
> > @@ -439,11 +446,6 @@ static int mtk_nor_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
> >                 if (op->data.nbytes == 1) {
> >                         mtk_nor_set_addr(sp, op);
> >                         return mtk_nor_read_pio(sp, op);
> > -               } else if (((ulong)(op->data.buf.in) &
> > -                           MTK_NOR_DMA_ALIGN_MASK)) {
> > -                       return mtk_nor_read_bounce(sp, op->addr.val,
> > -                                                  op->data.nbytes,
> > -                                                  op->data.buf.in);
> >                 } else {
> >                         return mtk_nor_read_dma(sp, op->addr.val,
> >                                                 op->data.nbytes,
> > @@ -654,6 +656,10 @@ static int mtk_nor_probe(struct platform_device *pdev)
> >         sp->dev = &pdev->dev;
> >         sp->spi_clk = spi_clk;
> >         sp->ctlr_clk = ctlr_clk;
>
> There is extra memory allocation code for sp->buffer in mtk_nor_probe.
> If you intend to replace this with dma_alloc_coherent you should
> drop those devm_kmalloc code as well.
>
> > +       sp->buffer = dma_alloc_coherent(&pdev->dev, MTK_NOR_BOUNCE_BUF_SIZE,
> > +                                       &sp->buffer_dma, GFP_KERNEL);
>
> There's a devm variant: dmam_alloc_coherent(dev, size, dma_handle, gfp)

ACK

>
> > +       if (!sp->buffer)
> > +               return -ENOMEM;
>
> This spi-nor controller requires all addresses to be 16-byte aligned.
> Although it should be guaranteed by a usually way larger page
> alignment address from dma_alloc_coherent I'd prefer an explicit
> check for address alignment here rather than letting it probe
> successfully and fail for every dma_read with bounce buffer.
>

Yep, I'll restore the padding.

>
> >
> >         irq = platform_get_irq_optional(pdev, 0);
> >         if (irq < 0) {
> > @@ -674,6 +680,8 @@ static int mtk_nor_probe(struct platform_device *pdev)
> >         ret = mtk_nor_init(sp);
> >         if (ret < 0) {
> >                 kfree(ctlr);
> > +               dma_free_coherent(&pdev->dev, MTK_NOR_BOUNCE_BUF_SIZE,
> > +                                 sp->buffer, sp->buffer_dma);
> >                 return ret;
> >         }
> >
> > @@ -692,6 +700,8 @@ static int mtk_nor_remove(struct platform_device *pdev)
> >
> >         mtk_nor_disable_clk(sp);
> >
> > +       dma_free_coherent(&pdev->dev, MTK_NOR_BOUNCE_BUF_SIZE,
> > +                         sp->buffer, sp->buffer_dma);
> >         return 0;
> >  }
> >
> > --
> > 2.28.0.681.g6f77f65b4e-goog
> >
>
>
> --
> Regards,
> Chuanhong Guo
diff mbox series

Patch

diff --git a/drivers/spi/spi-mtk-nor.c b/drivers/spi/spi-mtk-nor.c
index 54b2c0fde95b..e14798a6e7d0 100644
--- a/drivers/spi/spi-mtk-nor.c
+++ b/drivers/spi/spi-mtk-nor.c
@@ -96,6 +96,7 @@  struct mtk_nor {
 	struct device *dev;
 	void __iomem *base;
 	u8 *buffer;
+	dma_addr_t buffer_dma;
 	struct clk *spi_clk;
 	struct clk *ctlr_clk;
 	unsigned int spi_freq;
@@ -275,19 +276,16 @@  static void mtk_nor_setup_bus(struct mtk_nor *sp, const struct spi_mem_op *op)
 	mtk_nor_rmw(sp, MTK_NOR_REG_BUSCFG, reg, MTK_NOR_BUS_MODE_MASK);
 }
 
-static int mtk_nor_read_dma(struct mtk_nor *sp, u32 from, unsigned int length,
-			    u8 *buffer)
+static int read_dma(struct mtk_nor *sp, u32 from, unsigned int length,
+		    dma_addr_t dma_addr)
 {
 	int ret = 0;
 	ulong delay;
 	u32 reg;
-	dma_addr_t dma_addr;
 
-	dma_addr = dma_map_single(sp->dev, buffer, length, DMA_FROM_DEVICE);
-	if (dma_mapping_error(sp->dev, dma_addr)) {
-		dev_err(sp->dev, "failed to map dma buffer.\n");
+	if (WARN_ON((length & MTK_NOR_DMA_ALIGN_MASK) ||
+		    (dma_addr & MTK_NOR_DMA_ALIGN_MASK)))
 		return -EINVAL;
-	}
 
 	writel(from, sp->base + MTK_NOR_REG_DMA_FADR);
 	writel(dma_addr, sp->base + MTK_NOR_REG_DMA_DADR);
@@ -312,30 +310,39 @@  static int mtk_nor_read_dma(struct mtk_nor *sp, u32 from, unsigned int length,
 					 (delay + 1) * 100);
 	}
 
-	dma_unmap_single(sp->dev, dma_addr, length, DMA_FROM_DEVICE);
 	if (ret < 0)
 		dev_err(sp->dev, "dma read timeout.\n");
 
 	return ret;
 }
 
-static int mtk_nor_read_bounce(struct mtk_nor *sp, u32 from,
-			       unsigned int length, u8 *buffer)
+static int mtk_nor_read_dma(struct mtk_nor *sp, u32 from,
+			    unsigned int length, u8 *buffer)
 {
-	unsigned int rdlen;
 	int ret;
+	dma_addr_t dma_addr;
+	bool bounce = need_bounce(buffer, length);
 
-	if (length & MTK_NOR_DMA_ALIGN_MASK)
-		rdlen = (length + MTK_NOR_DMA_ALIGN) & ~MTK_NOR_DMA_ALIGN_MASK;
-	else
-		rdlen = length;
+	if (!bounce) {
+		dma_addr = dma_map_single(sp->dev, buffer, length,
+					  DMA_FROM_DEVICE);
+		if (dma_mapping_error(sp->dev, dma_addr)) {
+			dev_err(sp->dev, "failed to map dma buffer.\n");
+			return -EINVAL;
+		}
+	} else {
+		dma_addr = sp->buffer_dma;
+	}
 
-	ret = mtk_nor_read_dma(sp, from, rdlen, sp->buffer);
-	if (ret)
-		return ret;
+	ret = read_dma(sp, from, length, dma_addr);
 
-	memcpy(buffer, sp->buffer, length);
-	return 0;
+	if (!bounce)
+		dma_unmap_single(sp->dev, dma_addr, length,
+				 DMA_FROM_DEVICE);
+	else
+		memcpy(buffer, sp->buffer, length);
+
+	return ret;
 }
 
 static int mtk_nor_read_pio(struct mtk_nor *sp, const struct spi_mem_op *op)
@@ -439,11 +446,6 @@  static int mtk_nor_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
 		if (op->data.nbytes == 1) {
 			mtk_nor_set_addr(sp, op);
 			return mtk_nor_read_pio(sp, op);
-		} else if (((ulong)(op->data.buf.in) &
-			    MTK_NOR_DMA_ALIGN_MASK)) {
-			return mtk_nor_read_bounce(sp, op->addr.val,
-						   op->data.nbytes,
-						   op->data.buf.in);
 		} else {
 			return mtk_nor_read_dma(sp, op->addr.val,
 						op->data.nbytes,
@@ -654,6 +656,10 @@  static int mtk_nor_probe(struct platform_device *pdev)
 	sp->dev = &pdev->dev;
 	sp->spi_clk = spi_clk;
 	sp->ctlr_clk = ctlr_clk;
+	sp->buffer = dma_alloc_coherent(&pdev->dev, MTK_NOR_BOUNCE_BUF_SIZE,
+					&sp->buffer_dma, GFP_KERNEL);
+	if (!sp->buffer)
+		return -ENOMEM;
 
 	irq = platform_get_irq_optional(pdev, 0);
 	if (irq < 0) {
@@ -674,6 +680,8 @@  static int mtk_nor_probe(struct platform_device *pdev)
 	ret = mtk_nor_init(sp);
 	if (ret < 0) {
 		kfree(ctlr);
+		dma_free_coherent(&pdev->dev, MTK_NOR_BOUNCE_BUF_SIZE,
+				  sp->buffer, sp->buffer_dma);
 		return ret;
 	}
 
@@ -692,6 +700,8 @@  static int mtk_nor_remove(struct platform_device *pdev)
 
 	mtk_nor_disable_clk(sp);
 
+	dma_free_coherent(&pdev->dev, MTK_NOR_BOUNCE_BUF_SIZE,
+			  sp->buffer, sp->buffer_dma);
 	return 0;
 }