Message ID | 20200918162834.v2.3.I06cb65401ab5ad63ea30c4788d26633928d80f38@changeid (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | spi: spi-mtk-nor: Add mt8192 support. | expand |
Hi! On Fri, Sep 18, 2020 at 4:35 PM Ikjoon Jang <ikjn@chromium.org> wrote: > > Use dma_alloc_coherent() for bounce buffer instead of kmalloc. The commit message should explain why such a change is needed. (i.e. why using dma_alloc_coherent here is better than kmalloc.) And if there's no benefit for this change I'd prefer leaving it untouched. I remembered reading somewhere that stream DMA api is prefered over dma_alloc_coherent for this kind of single-direction DMA operation. > > Signed-off-by: Ikjoon Jang <ikjn@chromium.org> > > --- > > (no changes since v1) > > drivers/spi/spi-mtk-nor.c | 60 +++++++++++++++++++++++---------------- > 1 file changed, 35 insertions(+), 25 deletions(-) > > diff --git a/drivers/spi/spi-mtk-nor.c b/drivers/spi/spi-mtk-nor.c > index 54b2c0fde95b..e14798a6e7d0 100644 > --- a/drivers/spi/spi-mtk-nor.c > +++ b/drivers/spi/spi-mtk-nor.c > @@ -96,6 +96,7 @@ struct mtk_nor { > struct device *dev; > void __iomem *base; > u8 *buffer; > + dma_addr_t buffer_dma; > struct clk *spi_clk; > struct clk *ctlr_clk; > unsigned int spi_freq; > @@ -275,19 +276,16 @@ static void mtk_nor_setup_bus(struct mtk_nor *sp, const struct spi_mem_op *op) > mtk_nor_rmw(sp, MTK_NOR_REG_BUSCFG, reg, MTK_NOR_BUS_MODE_MASK); > } > > -static int mtk_nor_read_dma(struct mtk_nor *sp, u32 from, unsigned int length, > - u8 *buffer) > +static int read_dma(struct mtk_nor *sp, u32 from, unsigned int length, This name is a bit confusing considering there's a mtk_nor_read_dma below. As this function now only executes dma readings and wait it to finish, what about mtk_nor_dma_exec instead? > + dma_addr_t dma_addr) > { > int ret = 0; > ulong delay; > u32 reg; > - dma_addr_t dma_addr; > > - dma_addr = dma_map_single(sp->dev, buffer, length, DMA_FROM_DEVICE); > - if (dma_mapping_error(sp->dev, dma_addr)) { > - dev_err(sp->dev, "failed to map dma buffer.\n"); > + if (WARN_ON((length & MTK_NOR_DMA_ALIGN_MASK) || > + (dma_addr & MTK_NOR_DMA_ALIGN_MASK))) These alignment is guaranteed by callers of this function if all my comments below are addressed. This check isn't needed. > return -EINVAL; > - } > > writel(from, sp->base + MTK_NOR_REG_DMA_FADR); > writel(dma_addr, sp->base + MTK_NOR_REG_DMA_DADR); > @@ -312,30 +310,39 @@ static int mtk_nor_read_dma(struct mtk_nor *sp, u32 from, unsigned int length, > (delay + 1) * 100); > } > > - dma_unmap_single(sp->dev, dma_addr, length, DMA_FROM_DEVICE); > if (ret < 0) > dev_err(sp->dev, "dma read timeout.\n"); > > return ret; > } > > -static int mtk_nor_read_bounce(struct mtk_nor *sp, u32 from, > - unsigned int length, u8 *buffer) > +static int mtk_nor_read_dma(struct mtk_nor *sp, u32 from, > + unsigned int length, u8 *buffer) > { > - unsigned int rdlen; > int ret; > + dma_addr_t dma_addr; > + bool bounce = need_bounce(buffer, length); > > - if (length & MTK_NOR_DMA_ALIGN_MASK) > - rdlen = (length + MTK_NOR_DMA_ALIGN) & ~MTK_NOR_DMA_ALIGN_MASK; The intention of this rdlen alignment is explained in 2/5. Please make sure this rdlen alignment logic is present only for PIO reading. > - else > - rdlen = length; > + if (!bounce) { > + dma_addr = dma_map_single(sp->dev, buffer, length, > + DMA_FROM_DEVICE); > + if (dma_mapping_error(sp->dev, dma_addr)) { > + dev_err(sp->dev, "failed to map dma buffer.\n"); > + return -EINVAL; > + } > + } else { > + dma_addr = sp->buffer_dma; > + } > > - ret = mtk_nor_read_dma(sp, from, rdlen, sp->buffer); > - if (ret) > - return ret; > + ret = read_dma(sp, from, length, dma_addr); > > - memcpy(buffer, sp->buffer, length); > - return 0; > + if (!bounce) > + dma_unmap_single(sp->dev, dma_addr, length, > + DMA_FROM_DEVICE); > + else > + memcpy(buffer, sp->buffer, length); > + > + return ret; > } I think a separated read_dma and read_bounce function will be cleaner than this if-else implementation: read_dma: 1. call dma_map_single to get physical address 2. call read_dma to execute operation 3. call dma_unmap_single read_bounce: 1. align reading length 2. call read_dma 3. call memcpy > > static int mtk_nor_read_pio(struct mtk_nor *sp, const struct spi_mem_op *op) > @@ -439,11 +446,6 @@ static int mtk_nor_exec_op(struct spi_mem *mem, const struct spi_mem_op *op) > if (op->data.nbytes == 1) { > mtk_nor_set_addr(sp, op); > return mtk_nor_read_pio(sp, op); > - } else if (((ulong)(op->data.buf.in) & > - MTK_NOR_DMA_ALIGN_MASK)) { > - return mtk_nor_read_bounce(sp, op->addr.val, > - op->data.nbytes, > - op->data.buf.in); > } else { > return mtk_nor_read_dma(sp, op->addr.val, > op->data.nbytes, > @@ -654,6 +656,10 @@ static int mtk_nor_probe(struct platform_device *pdev) > sp->dev = &pdev->dev; > sp->spi_clk = spi_clk; > sp->ctlr_clk = ctlr_clk; There is extra memory allocation code for sp->buffer in mtk_nor_probe. If you intend to replace this with dma_alloc_coherent you should drop those devm_kmalloc code as well. > + sp->buffer = dma_alloc_coherent(&pdev->dev, MTK_NOR_BOUNCE_BUF_SIZE, > + &sp->buffer_dma, GFP_KERNEL); There's a devm variant: dmam_alloc_coherent(dev, size, dma_handle, gfp) > + if (!sp->buffer) > + return -ENOMEM; This spi-nor controller requires all addresses to be 16-byte aligned. Although it should be guaranteed by a usually way larger page alignment address from dma_alloc_coherent I'd prefer an explicit check for address alignment here rather than letting it probe successfully and fail for every dma_read with bounce buffer. > > irq = platform_get_irq_optional(pdev, 0); > if (irq < 0) { > @@ -674,6 +680,8 @@ static int mtk_nor_probe(struct platform_device *pdev) > ret = mtk_nor_init(sp); > if (ret < 0) { > kfree(ctlr); > + dma_free_coherent(&pdev->dev, MTK_NOR_BOUNCE_BUF_SIZE, > + sp->buffer, sp->buffer_dma); > return ret; > } > > @@ -692,6 +700,8 @@ static int mtk_nor_remove(struct platform_device *pdev) > > mtk_nor_disable_clk(sp); > > + dma_free_coherent(&pdev->dev, MTK_NOR_BOUNCE_BUF_SIZE, > + sp->buffer, sp->buffer_dma); > return 0; > } > > -- > 2.28.0.681.g6f77f65b4e-goog > -- Regards, Chuanhong Guo
On Fri, Sep 18, 2020 at 9:25 PM Chuanhong Guo <gch981213@gmail.com> wrote: > > Hi! > > On Fri, Sep 18, 2020 at 4:35 PM Ikjoon Jang <ikjn@chromium.org> wrote: > > > > Use dma_alloc_coherent() for bounce buffer instead of kmalloc. > > The commit message should explain why such a change is > needed. (i.e. why using dma_alloc_coherent here is better > than kmalloc.) And if there's no benefit for this change I'd prefer > leaving it untouched. > I remembered reading somewhere that stream DMA api is > prefered over dma_alloc_coherent for this kind of single-direction > DMA operation. > I will add more description on why I changed it to dma_alloc_coherent(): - to explictly support devices like mt8173-nor which only supports 32bit addressing for dma. And it also reminded me an another problem, (I won't address this issue for now in v3): as this device is using dma range as [start, end) format where 'end' can be zero in that corner case of {start = 0xffff_f000; end = 0; } > > > > Signed-off-by: Ikjoon Jang <ikjn@chromium.org> > > > > --- > > > > (no changes since v1) > > > > drivers/spi/spi-mtk-nor.c | 60 +++++++++++++++++++++++---------------- > > 1 file changed, 35 insertions(+), 25 deletions(-) > > > > diff --git a/drivers/spi/spi-mtk-nor.c b/drivers/spi/spi-mtk-nor.c > > index 54b2c0fde95b..e14798a6e7d0 100644 > > --- a/drivers/spi/spi-mtk-nor.c > > +++ b/drivers/spi/spi-mtk-nor.c > > @@ -96,6 +96,7 @@ struct mtk_nor { > > struct device *dev; > > void __iomem *base; > > u8 *buffer; > > + dma_addr_t buffer_dma; > > struct clk *spi_clk; > > struct clk *ctlr_clk; > > unsigned int spi_freq; > > @@ -275,19 +276,16 @@ static void mtk_nor_setup_bus(struct mtk_nor *sp, const struct spi_mem_op *op) > > mtk_nor_rmw(sp, MTK_NOR_REG_BUSCFG, reg, MTK_NOR_BUS_MODE_MASK); > > } > > > > -static int mtk_nor_read_dma(struct mtk_nor *sp, u32 from, unsigned int length, > > - u8 *buffer) > > +static int read_dma(struct mtk_nor *sp, u32 from, unsigned int length, > > This name is a bit confusing considering there's a mtk_nor_read_dma > below. > As this function now only executes dma readings and wait it to finish, > what about mtk_nor_dma_exec instead? yeah, good idea. > > > + dma_addr_t dma_addr) > > { > > int ret = 0; > > ulong delay; > > u32 reg; > > - dma_addr_t dma_addr; > > > > - dma_addr = dma_map_single(sp->dev, buffer, length, DMA_FROM_DEVICE); > > - if (dma_mapping_error(sp->dev, dma_addr)) { > > - dev_err(sp->dev, "failed to map dma buffer.\n"); > > + if (WARN_ON((length & MTK_NOR_DMA_ALIGN_MASK) || > > + (dma_addr & MTK_NOR_DMA_ALIGN_MASK))) > > These alignment is guaranteed by callers of this function if all > my comments below are addressed. This check isn't needed. ACK. > > > return -EINVAL; > > - } > > > > writel(from, sp->base + MTK_NOR_REG_DMA_FADR); > > writel(dma_addr, sp->base + MTK_NOR_REG_DMA_DADR); > > @@ -312,30 +310,39 @@ static int mtk_nor_read_dma(struct mtk_nor *sp, u32 from, unsigned int length, > > (delay + 1) * 100); > > } > > > > - dma_unmap_single(sp->dev, dma_addr, length, DMA_FROM_DEVICE); > > if (ret < 0) > > dev_err(sp->dev, "dma read timeout.\n"); > > > > return ret; > > } > > > > -static int mtk_nor_read_bounce(struct mtk_nor *sp, u32 from, > > - unsigned int length, u8 *buffer) > > +static int mtk_nor_read_dma(struct mtk_nor *sp, u32 from, > > + unsigned int length, u8 *buffer) > > { > > - unsigned int rdlen; > > int ret; > > + dma_addr_t dma_addr; > > + bool bounce = need_bounce(buffer, length); > > > > - if (length & MTK_NOR_DMA_ALIGN_MASK) > > - rdlen = (length + MTK_NOR_DMA_ALIGN) & ~MTK_NOR_DMA_ALIGN_MASK; > > The intention of this rdlen alignment is explained in 2/5. > Please make sure this rdlen alignment logic is present > only for PIO reading. okay, I'll use padding again in v3. > > > - else > > - rdlen = length; > > + if (!bounce) { > > + dma_addr = dma_map_single(sp->dev, buffer, length, > > + DMA_FROM_DEVICE); > > + if (dma_mapping_error(sp->dev, dma_addr)) { > > + dev_err(sp->dev, "failed to map dma buffer.\n"); > > + return -EINVAL; > > + } > > + } else { > > + dma_addr = sp->buffer_dma; > > + } > > > > - ret = mtk_nor_read_dma(sp, from, rdlen, sp->buffer); > > - if (ret) > > - return ret; > > + ret = read_dma(sp, from, length, dma_addr); > > > > - memcpy(buffer, sp->buffer, length); > > - return 0; > > + if (!bounce) > > + dma_unmap_single(sp->dev, dma_addr, length, > > + DMA_FROM_DEVICE); > > + else > > + memcpy(buffer, sp->buffer, length); > > + > > + return ret; > > } > > I think a separated read_dma and read_bounce function will be > cleaner than this if-else implementation: > read_dma: > 1. call dma_map_single to get physical address > 2. call read_dma to execute operation > 3. call dma_unmap_single > > read_bounce: > 1. align reading length > 2. call read_dma > 3. call memcpy ACK > > > > > static int mtk_nor_read_pio(struct mtk_nor *sp, const struct spi_mem_op *op) > > @@ -439,11 +446,6 @@ static int mtk_nor_exec_op(struct spi_mem *mem, const struct spi_mem_op *op) > > if (op->data.nbytes == 1) { > > mtk_nor_set_addr(sp, op); > > return mtk_nor_read_pio(sp, op); > > - } else if (((ulong)(op->data.buf.in) & > > - MTK_NOR_DMA_ALIGN_MASK)) { > > - return mtk_nor_read_bounce(sp, op->addr.val, > > - op->data.nbytes, > > - op->data.buf.in); > > } else { > > return mtk_nor_read_dma(sp, op->addr.val, > > op->data.nbytes, > > @@ -654,6 +656,10 @@ static int mtk_nor_probe(struct platform_device *pdev) > > sp->dev = &pdev->dev; > > sp->spi_clk = spi_clk; > > sp->ctlr_clk = ctlr_clk; > > There is extra memory allocation code for sp->buffer in mtk_nor_probe. > If you intend to replace this with dma_alloc_coherent you should > drop those devm_kmalloc code as well. > > > + sp->buffer = dma_alloc_coherent(&pdev->dev, MTK_NOR_BOUNCE_BUF_SIZE, > > + &sp->buffer_dma, GFP_KERNEL); > > There's a devm variant: dmam_alloc_coherent(dev, size, dma_handle, gfp) ACK > > > + if (!sp->buffer) > > + return -ENOMEM; > > This spi-nor controller requires all addresses to be 16-byte aligned. > Although it should be guaranteed by a usually way larger page > alignment address from dma_alloc_coherent I'd prefer an explicit > check for address alignment here rather than letting it probe > successfully and fail for every dma_read with bounce buffer. > Yep, I'll restore the padding. > > > > > irq = platform_get_irq_optional(pdev, 0); > > if (irq < 0) { > > @@ -674,6 +680,8 @@ static int mtk_nor_probe(struct platform_device *pdev) > > ret = mtk_nor_init(sp); > > if (ret < 0) { > > kfree(ctlr); > > + dma_free_coherent(&pdev->dev, MTK_NOR_BOUNCE_BUF_SIZE, > > + sp->buffer, sp->buffer_dma); > > return ret; > > } > > > > @@ -692,6 +700,8 @@ static int mtk_nor_remove(struct platform_device *pdev) > > > > mtk_nor_disable_clk(sp); > > > > + dma_free_coherent(&pdev->dev, MTK_NOR_BOUNCE_BUF_SIZE, > > + sp->buffer, sp->buffer_dma); > > return 0; > > } > > > > -- > > 2.28.0.681.g6f77f65b4e-goog > > > > > -- > Regards, > Chuanhong Guo
diff --git a/drivers/spi/spi-mtk-nor.c b/drivers/spi/spi-mtk-nor.c index 54b2c0fde95b..e14798a6e7d0 100644 --- a/drivers/spi/spi-mtk-nor.c +++ b/drivers/spi/spi-mtk-nor.c @@ -96,6 +96,7 @@ struct mtk_nor { struct device *dev; void __iomem *base; u8 *buffer; + dma_addr_t buffer_dma; struct clk *spi_clk; struct clk *ctlr_clk; unsigned int spi_freq; @@ -275,19 +276,16 @@ static void mtk_nor_setup_bus(struct mtk_nor *sp, const struct spi_mem_op *op) mtk_nor_rmw(sp, MTK_NOR_REG_BUSCFG, reg, MTK_NOR_BUS_MODE_MASK); } -static int mtk_nor_read_dma(struct mtk_nor *sp, u32 from, unsigned int length, - u8 *buffer) +static int read_dma(struct mtk_nor *sp, u32 from, unsigned int length, + dma_addr_t dma_addr) { int ret = 0; ulong delay; u32 reg; - dma_addr_t dma_addr; - dma_addr = dma_map_single(sp->dev, buffer, length, DMA_FROM_DEVICE); - if (dma_mapping_error(sp->dev, dma_addr)) { - dev_err(sp->dev, "failed to map dma buffer.\n"); + if (WARN_ON((length & MTK_NOR_DMA_ALIGN_MASK) || + (dma_addr & MTK_NOR_DMA_ALIGN_MASK))) return -EINVAL; - } writel(from, sp->base + MTK_NOR_REG_DMA_FADR); writel(dma_addr, sp->base + MTK_NOR_REG_DMA_DADR); @@ -312,30 +310,39 @@ static int mtk_nor_read_dma(struct mtk_nor *sp, u32 from, unsigned int length, (delay + 1) * 100); } - dma_unmap_single(sp->dev, dma_addr, length, DMA_FROM_DEVICE); if (ret < 0) dev_err(sp->dev, "dma read timeout.\n"); return ret; } -static int mtk_nor_read_bounce(struct mtk_nor *sp, u32 from, - unsigned int length, u8 *buffer) +static int mtk_nor_read_dma(struct mtk_nor *sp, u32 from, + unsigned int length, u8 *buffer) { - unsigned int rdlen; int ret; + dma_addr_t dma_addr; + bool bounce = need_bounce(buffer, length); - if (length & MTK_NOR_DMA_ALIGN_MASK) - rdlen = (length + MTK_NOR_DMA_ALIGN) & ~MTK_NOR_DMA_ALIGN_MASK; - else - rdlen = length; + if (!bounce) { + dma_addr = dma_map_single(sp->dev, buffer, length, + DMA_FROM_DEVICE); + if (dma_mapping_error(sp->dev, dma_addr)) { + dev_err(sp->dev, "failed to map dma buffer.\n"); + return -EINVAL; + } + } else { + dma_addr = sp->buffer_dma; + } - ret = mtk_nor_read_dma(sp, from, rdlen, sp->buffer); - if (ret) - return ret; + ret = read_dma(sp, from, length, dma_addr); - memcpy(buffer, sp->buffer, length); - return 0; + if (!bounce) + dma_unmap_single(sp->dev, dma_addr, length, + DMA_FROM_DEVICE); + else + memcpy(buffer, sp->buffer, length); + + return ret; } static int mtk_nor_read_pio(struct mtk_nor *sp, const struct spi_mem_op *op) @@ -439,11 +446,6 @@ static int mtk_nor_exec_op(struct spi_mem *mem, const struct spi_mem_op *op) if (op->data.nbytes == 1) { mtk_nor_set_addr(sp, op); return mtk_nor_read_pio(sp, op); - } else if (((ulong)(op->data.buf.in) & - MTK_NOR_DMA_ALIGN_MASK)) { - return mtk_nor_read_bounce(sp, op->addr.val, - op->data.nbytes, - op->data.buf.in); } else { return mtk_nor_read_dma(sp, op->addr.val, op->data.nbytes, @@ -654,6 +656,10 @@ static int mtk_nor_probe(struct platform_device *pdev) sp->dev = &pdev->dev; sp->spi_clk = spi_clk; sp->ctlr_clk = ctlr_clk; + sp->buffer = dma_alloc_coherent(&pdev->dev, MTK_NOR_BOUNCE_BUF_SIZE, + &sp->buffer_dma, GFP_KERNEL); + if (!sp->buffer) + return -ENOMEM; irq = platform_get_irq_optional(pdev, 0); if (irq < 0) { @@ -674,6 +680,8 @@ static int mtk_nor_probe(struct platform_device *pdev) ret = mtk_nor_init(sp); if (ret < 0) { kfree(ctlr); + dma_free_coherent(&pdev->dev, MTK_NOR_BOUNCE_BUF_SIZE, + sp->buffer, sp->buffer_dma); return ret; } @@ -692,6 +700,8 @@ static int mtk_nor_remove(struct platform_device *pdev) mtk_nor_disable_clk(sp); + dma_free_coherent(&pdev->dev, MTK_NOR_BOUNCE_BUF_SIZE, + sp->buffer, sp->buffer_dma); return 0; }
Use dma_alloc_coherent() for bounce buffer instead of kmalloc. Signed-off-by: Ikjoon Jang <ikjn@chromium.org> --- (no changes since v1) drivers/spi/spi-mtk-nor.c | 60 +++++++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 25 deletions(-)