diff mbox

[v2] dmaengine: rcar-dmac: enable descriptor mode on 40bit

Message ID 871su87gag.wl%kuninori.morimoto.gx@renesas.com (mailing list archive)
State Changes Requested
Headers show

Commit Message

Kuninori Morimoto March 8, 2017, 12:42 a.m. UTC
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>

SYS-DMAC can use 40bit address transfer, and it supports Descriptor
Mode too. Current SYS-DMAC driver disables Descriptor Mode if it was
40bit address today. But it can use Descriptor Mode with 40bit if
transfer Source/Destination address are located in same 4GiB region
in the 40 bit address space.
This patch enables it if all condition was clear

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
---
v1 -> v2

 - do high_dev_addr / high_mem_addr in out of sg loop
 - reuse if (dev_addr >> 32 != (dev_addr + size - 1) >> 32)

 drivers/dma/sh/rcar-dmac.c | 52 ++++++++++++++++++++++++++++------------------
 1 file changed, 32 insertions(+), 20 deletions(-)

Comments

Laurent Pinchart March 8, 2017, 7:57 a.m. UTC | #1
Hi Morimoto-san,

Thank you for the patch.

On Wednesday 08 Mar 2017 00:42:54 Kuninori Morimoto wrote:
> From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
> 
> SYS-DMAC can use 40bit address transfer, and it supports Descriptor
> Mode too. Current SYS-DMAC driver disables Descriptor Mode if it was
> 40bit address today. But it can use Descriptor Mode with 40bit if
> transfer Source/Destination address are located in same 4GiB region
> in the 40 bit address space.
> This patch enables it if all condition was clear
> 
> Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
> ---
> v1 -> v2
> 
>  - do high_dev_addr / high_mem_addr in out of sg loop
>  - reuse if (dev_addr >> 32 != (dev_addr + size - 1) >> 32)
> 
>  drivers/dma/sh/rcar-dmac.c | 52 +++++++++++++++++++++++++------------------
>  1 file changed, 32 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
> index 48b22d5..a420921 100644
> --- a/drivers/dma/sh/rcar-dmac.c
> +++ b/drivers/dma/sh/rcar-dmac.c
> @@ -344,13 +344,19 @@ static void rcar_dmac_chan_start_xfer(struct
> rcar_dmac_chan *chan) rcar_dmac_chan_write(chan, RCAR_DMARS,
> chan->mid_rid);
> 
>  	if (desc->hwdescs.use) {
> -		struct rcar_dmac_xfer_chunk *chunk;
> +		struct rcar_dmac_xfer_chunk *chunk =
> +			list_first_entry(&desc->chunks,
> +					 struct rcar_dmac_xfer_chunk, node);
> 
>  		dev_dbg(chan->chan.device->dev,
>  			"chan%u: queue desc %p: %u@%pad\n",
>  			chan->index, desc, desc->nchunks, &desc->hwdescs.dma);
> 
>  #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
> +		rcar_dmac_chan_write(chan, RCAR_DMAFIXSAR,
> +				     chunk->src_addr >> 32);
> +		rcar_dmac_chan_write(chan, RCAR_DMAFIXDAR,
> +				     chunk->dst_addr >> 32);
>  		rcar_dmac_chan_write(chan, RCAR_DMAFIXDPBASE,
>  				     desc->hwdescs.dma >> 32);
>  #endif
> @@ -368,8 +374,6 @@ static void rcar_dmac_chan_start_xfer(struct
> rcar_dmac_chan *chan) * should. Initialize it manually with the destination
> address
>  		 * of the first chunk.
>  		 */
> -		chunk = list_first_entry(&desc->chunks,
> -					 struct rcar_dmac_xfer_chunk, node);
>  		rcar_dmac_chan_write(chan, RCAR_DMADAR,
>  				     chunk->dst_addr & 0xffffffff);
> 
> @@ -855,8 +859,12 @@ static void rcar_dmac_chan_configure_desc(struct
> rcar_dmac_chan *chan, unsigned int nchunks = 0;
>  	unsigned int max_chunk_size;
>  	unsigned int full_size = 0;
> -	bool highmem = false;
> +	bool cross_boundary = false;
>  	unsigned int i;
> +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
> +	u32 high_dev_addr = 0;
> +	u32 high_mem_addr = 0;

No need to initialize these to 0 as you initialize them in the first iteration 
of the loop.

Apart from that,

Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>

> +#endif
> 
>  	desc = rcar_dmac_desc_get(chan);
>  	if (!desc)
> @@ -882,6 +890,16 @@ static void rcar_dmac_chan_configure_desc(struct
> rcar_dmac_chan *chan,
> 
>  		full_size += len;
> 
> +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
> +		if (i == 0) {
> +			high_dev_addr = dev_addr >> 32;
> +			high_mem_addr = mem_addr >> 32;
> +		}
> +
> +		if ((dev_addr >> 32 != high_dev_addr) ||
> +		    (mem_addr >> 32 != high_mem_addr))
> +			cross_boundary = true;
> +#endif
>  		while (len) {
>  			unsigned int size = min(len, max_chunk_size);
> 
> @@ -890,18 +908,14 @@ static void rcar_dmac_chan_configure_desc(struct
> rcar_dmac_chan *chan, * Prevent individual transfers from crossing 4GB
>  			 * boundaries.
>  			 */
> -			if (dev_addr >> 32 != (dev_addr + size - 1) >> 32)
> +			if (dev_addr >> 32 != (dev_addr + size - 1) >> 32) {
>  				size = ALIGN(dev_addr, 1ULL << 32) - dev_addr;
> -			if (mem_addr >> 32 != (mem_addr + size - 1) >> 32)
> +				cross_boundary = true;
> +			}
> +			if (mem_addr >> 32 != (mem_addr + size - 1) >> 32) {
>  				size = ALIGN(mem_addr, 1ULL << 32) - mem_addr;
> -
> -			/*
> -			 * Check if either of the source or destination 
address
> -			 * can't be expressed in 32 bits. If so we can't use
> -			 * hardware descriptor lists.
> -			 */
> -			if (dev_addr >> 32 || mem_addr >> 32)
> -				highmem = true;
> +				cross_boundary = true;
> +			}
>  #endif
> 
>  			chunk = rcar_dmac_xfer_chunk_get(chan);
> @@ -943,13 +957,11 @@ static void rcar_dmac_chan_configure_desc(struct
> rcar_dmac_chan *chan, * Use hardware descriptor lists if possible when more
> than one chunk * needs to be transferred (otherwise they don't make much
> sense). *
> -	 * The highmem check currently covers the whole transfer. As an
> -	 * optimization we could use descriptor lists for consecutive lowmem
> -	 * chunks and direct manual mode for highmem chunks. Whether the
> -	 * performance improvement would be significant enough compared to the
> -	 * additional complexity remains to be investigated.
> +	 * Source/Destination address should be located in same 4GiB region
> +	 * in the 40bit address space when it uses Hardware descriptor,
> +	 * and cross_boundary is checking it.
>  	 */
> -	desc->hwdescs.use = !highmem && nchunks > 1;
> +	desc->hwdescs.use = !cross_boundary && nchunks > 1;
>  	if (desc->hwdescs.use) {
>  		if (rcar_dmac_fill_hwdesc(chan, desc) < 0)
>  			desc->hwdescs.use = false;
Geert Uytterhoeven March 8, 2017, 8:17 a.m. UTC | #2
On Wed, Mar 8, 2017 at 1:42 AM, Kuninori Morimoto
<kuninori.morimoto.gx@renesas.com> wrote:
> From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
>
> SYS-DMAC can use 40bit address transfer, and it supports Descriptor
> Mode too. Current SYS-DMAC driver disables Descriptor Mode if it was
> 40bit address today. But it can use Descriptor Mode with 40bit if
> transfer Source/Destination address are located in same 4GiB region
> in the 40 bit address space.
> This patch enables it if all condition was clear
>
> Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>

Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds
--
To unsubscribe from this list: send the line "unsubscribe dmaengine" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
index 48b22d5..a420921 100644
--- a/drivers/dma/sh/rcar-dmac.c
+++ b/drivers/dma/sh/rcar-dmac.c
@@ -344,13 +344,19 @@  static void rcar_dmac_chan_start_xfer(struct rcar_dmac_chan *chan)
 		rcar_dmac_chan_write(chan, RCAR_DMARS, chan->mid_rid);
 
 	if (desc->hwdescs.use) {
-		struct rcar_dmac_xfer_chunk *chunk;
+		struct rcar_dmac_xfer_chunk *chunk =
+			list_first_entry(&desc->chunks,
+					 struct rcar_dmac_xfer_chunk, node);
 
 		dev_dbg(chan->chan.device->dev,
 			"chan%u: queue desc %p: %u@%pad\n",
 			chan->index, desc, desc->nchunks, &desc->hwdescs.dma);
 
 #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+		rcar_dmac_chan_write(chan, RCAR_DMAFIXSAR,
+				     chunk->src_addr >> 32);
+		rcar_dmac_chan_write(chan, RCAR_DMAFIXDAR,
+				     chunk->dst_addr >> 32);
 		rcar_dmac_chan_write(chan, RCAR_DMAFIXDPBASE,
 				     desc->hwdescs.dma >> 32);
 #endif
@@ -368,8 +374,6 @@  static void rcar_dmac_chan_start_xfer(struct rcar_dmac_chan *chan)
 		 * should. Initialize it manually with the destination address
 		 * of the first chunk.
 		 */
-		chunk = list_first_entry(&desc->chunks,
-					 struct rcar_dmac_xfer_chunk, node);
 		rcar_dmac_chan_write(chan, RCAR_DMADAR,
 				     chunk->dst_addr & 0xffffffff);
 
@@ -855,8 +859,12 @@  static void rcar_dmac_chan_configure_desc(struct rcar_dmac_chan *chan,
 	unsigned int nchunks = 0;
 	unsigned int max_chunk_size;
 	unsigned int full_size = 0;
-	bool highmem = false;
+	bool cross_boundary = false;
 	unsigned int i;
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+	u32 high_dev_addr = 0;
+	u32 high_mem_addr = 0;
+#endif
 
 	desc = rcar_dmac_desc_get(chan);
 	if (!desc)
@@ -882,6 +890,16 @@  static void rcar_dmac_chan_configure_desc(struct rcar_dmac_chan *chan,
 
 		full_size += len;
 
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+		if (i == 0) {
+			high_dev_addr = dev_addr >> 32;
+			high_mem_addr = mem_addr >> 32;
+		}
+
+		if ((dev_addr >> 32 != high_dev_addr) ||
+		    (mem_addr >> 32 != high_mem_addr))
+			cross_boundary = true;
+#endif
 		while (len) {
 			unsigned int size = min(len, max_chunk_size);
 
@@ -890,18 +908,14 @@  static void rcar_dmac_chan_configure_desc(struct rcar_dmac_chan *chan,
 			 * Prevent individual transfers from crossing 4GB
 			 * boundaries.
 			 */
-			if (dev_addr >> 32 != (dev_addr + size - 1) >> 32)
+			if (dev_addr >> 32 != (dev_addr + size - 1) >> 32) {
 				size = ALIGN(dev_addr, 1ULL << 32) - dev_addr;
-			if (mem_addr >> 32 != (mem_addr + size - 1) >> 32)
+				cross_boundary = true;
+			}
+			if (mem_addr >> 32 != (mem_addr + size - 1) >> 32) {
 				size = ALIGN(mem_addr, 1ULL << 32) - mem_addr;
-
-			/*
-			 * Check if either of the source or destination address
-			 * can't be expressed in 32 bits. If so we can't use
-			 * hardware descriptor lists.
-			 */
-			if (dev_addr >> 32 || mem_addr >> 32)
-				highmem = true;
+				cross_boundary = true;
+			}
 #endif
 
 			chunk = rcar_dmac_xfer_chunk_get(chan);
@@ -943,13 +957,11 @@  static void rcar_dmac_chan_configure_desc(struct rcar_dmac_chan *chan,
 	 * Use hardware descriptor lists if possible when more than one chunk
 	 * needs to be transferred (otherwise they don't make much sense).
 	 *
-	 * The highmem check currently covers the whole transfer. As an
-	 * optimization we could use descriptor lists for consecutive lowmem
-	 * chunks and direct manual mode for highmem chunks. Whether the
-	 * performance improvement would be significant enough compared to the
-	 * additional complexity remains to be investigated.
+	 * Source/Destination address should be located in same 4GiB region
+	 * in the 40bit address space when it uses Hardware descriptor,
+	 * and cross_boundary is checking it.
 	 */
-	desc->hwdescs.use = !highmem && nchunks > 1;
+	desc->hwdescs.use = !cross_boundary && nchunks > 1;
 	if (desc->hwdescs.use) {
 		if (rcar_dmac_fill_hwdesc(chan, desc) < 0)
 			desc->hwdescs.use = false;