diff mbox

[5/6] drm/armada: gem: Use drm_clflush_*() functions

Message ID 1428590049-20357-5-git-send-email-thierry.reding@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Thierry Reding April 9, 2015, 2:34 p.m. UTC
From: Thierry Reding <treding@nvidia.com>

Instead of going through the DMA mapping API for cache maintenance, use
the drm_clflush_*() family of functions to achieve the same effect.

Cc: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/armada/Kconfig      |  1 +
 drivers/gpu/drm/armada/armada_gem.c | 13 ++-----------
 2 files changed, 3 insertions(+), 11 deletions(-)

Comments

Russell King - ARM Linux April 10, 2015, 12:08 p.m. UTC | #1
On Thu, Apr 09, 2015 at 04:34:08PM +0200, Thierry Reding wrote:
> diff --git a/drivers/gpu/drm/armada/armada_gem.c b/drivers/gpu/drm/armada/armada_gem.c
> index 580e10acaa3a..c2d4414031ab 100644
> --- a/drivers/gpu/drm/armada/armada_gem.c
> +++ b/drivers/gpu/drm/armada/armada_gem.c
> @@ -453,19 +453,14 @@ armada_gem_prime_map_dma_buf(struct dma_buf_attachment *attach,
>  			sg_set_page(sg, page, PAGE_SIZE, 0);
>  		}
>  
> -		if (dma_map_sg(attach->dev, sgt->sgl, sgt->nents, dir) == 0) {
> -			num = sgt->nents;
> -			goto release;
> -		}
> +		drm_clflush_sg(sgt);
>  	} else if (dobj->page) {
>  		/* Single contiguous page */
>  		if (sg_alloc_table(sgt, 1, GFP_KERNEL))
>  			goto free_sgt;
>  
>  		sg_set_page(sgt->sgl, dobj->page, dobj->obj.size, 0);
> -
> -		if (dma_map_sg(attach->dev, sgt->sgl, sgt->nents, dir) == 0)
> -			goto free_table;
> +		drm_clflush_sg(sgt);
>  	} else if (dobj->linear) {
>  		/* Single contiguous physical region - no struct page */
>  		if (sg_alloc_table(sgt, 1, GFP_KERNEL))
> @@ -480,7 +475,6 @@ armada_gem_prime_map_dma_buf(struct dma_buf_attachment *attach,
>   release:
>  	for_each_sg(sgt->sgl, sg, num, i)
>  		page_cache_release(sg_page(sg));
> - free_table:
>  	sg_free_table(sgt);
>   free_sgt:
>  	kfree(sgt);
> @@ -494,9 +488,6 @@ static void armada_gem_prime_unmap_dma_buf(struct dma_buf_attachment *attach,
>  	struct armada_gem_object *dobj = drm_to_armada_gem(obj);
>  	int i;
>  
> -	if (!dobj->linear)
> -		dma_unmap_sg(attach->dev, sgt->sgl, sgt->nents, dir);
> -

I'm really wonder where this is the right thing to do.

DMA coherency on ARMv6 and ARMv7 CPUs is not just a case of "do something
just before DMA" - it's more complicated than that because of the
speculative prefetching.

What you must remember is this:

	Any memory which is readable to the CPU may be speculatively
	prefetched by the CPU, and cache lines allocated into the L1
	and L2 caches.

What this means is that if you're doing this:

	Flush caches
	Perform DMA to buffer
	Read buffer from CPU

You may or may not see the data you expect in the buffer - it's
indeterminant, depending on how aggressive the CPU has been at
prefetching data.
Thierry Reding April 10, 2015, 12:44 p.m. UTC | #2
On Fri, Apr 10, 2015 at 01:08:02PM +0100, Russell King - ARM Linux wrote:
> On Thu, Apr 09, 2015 at 04:34:08PM +0200, Thierry Reding wrote:
> > diff --git a/drivers/gpu/drm/armada/armada_gem.c b/drivers/gpu/drm/armada/armada_gem.c
> > index 580e10acaa3a..c2d4414031ab 100644
> > --- a/drivers/gpu/drm/armada/armada_gem.c
> > +++ b/drivers/gpu/drm/armada/armada_gem.c
> > @@ -453,19 +453,14 @@ armada_gem_prime_map_dma_buf(struct dma_buf_attachment *attach,
> >  			sg_set_page(sg, page, PAGE_SIZE, 0);
> >  		}
> >  
> > -		if (dma_map_sg(attach->dev, sgt->sgl, sgt->nents, dir) == 0) {
> > -			num = sgt->nents;
> > -			goto release;
> > -		}
> > +		drm_clflush_sg(sgt);
> >  	} else if (dobj->page) {
> >  		/* Single contiguous page */
> >  		if (sg_alloc_table(sgt, 1, GFP_KERNEL))
> >  			goto free_sgt;
> >  
> >  		sg_set_page(sgt->sgl, dobj->page, dobj->obj.size, 0);
> > -
> > -		if (dma_map_sg(attach->dev, sgt->sgl, sgt->nents, dir) == 0)
> > -			goto free_table;
> > +		drm_clflush_sg(sgt);
> >  	} else if (dobj->linear) {
> >  		/* Single contiguous physical region - no struct page */
> >  		if (sg_alloc_table(sgt, 1, GFP_KERNEL))
> > @@ -480,7 +475,6 @@ armada_gem_prime_map_dma_buf(struct dma_buf_attachment *attach,
> >   release:
> >  	for_each_sg(sgt->sgl, sg, num, i)
> >  		page_cache_release(sg_page(sg));
> > - free_table:
> >  	sg_free_table(sgt);
> >   free_sgt:
> >  	kfree(sgt);
> > @@ -494,9 +488,6 @@ static void armada_gem_prime_unmap_dma_buf(struct dma_buf_attachment *attach,
> >  	struct armada_gem_object *dobj = drm_to_armada_gem(obj);
> >  	int i;
> >  
> > -	if (!dobj->linear)
> > -		dma_unmap_sg(attach->dev, sgt->sgl, sgt->nents, dir);
> > -
> 
> I'm really wonder where this is the right thing to do.

I think it isn't the right thing to do in this case indeed. Both Tegra
and MSM were using this pattern to make sure that caches are invalidated
upon allocation of memory from shmemfs, but I now realize that for the
Armada driver this isn't the case.

Thierry
diff mbox

Patch

diff --git a/drivers/gpu/drm/armada/Kconfig b/drivers/gpu/drm/armada/Kconfig
index 50ae88ad4d76..7b7070128a05 100644
--- a/drivers/gpu/drm/armada/Kconfig
+++ b/drivers/gpu/drm/armada/Kconfig
@@ -4,6 +4,7 @@  config DRM_ARMADA
 	select FB_CFB_FILLRECT
 	select FB_CFB_COPYAREA
 	select FB_CFB_IMAGEBLIT
+	select DRM_CACHE
 	select DRM_KMS_HELPER
 	select DRM_KMS_FB_HELPER
 	help
diff --git a/drivers/gpu/drm/armada/armada_gem.c b/drivers/gpu/drm/armada/armada_gem.c
index 580e10acaa3a..c2d4414031ab 100644
--- a/drivers/gpu/drm/armada/armada_gem.c
+++ b/drivers/gpu/drm/armada/armada_gem.c
@@ -453,19 +453,14 @@  armada_gem_prime_map_dma_buf(struct dma_buf_attachment *attach,
 			sg_set_page(sg, page, PAGE_SIZE, 0);
 		}
 
-		if (dma_map_sg(attach->dev, sgt->sgl, sgt->nents, dir) == 0) {
-			num = sgt->nents;
-			goto release;
-		}
+		drm_clflush_sg(sgt);
 	} else if (dobj->page) {
 		/* Single contiguous page */
 		if (sg_alloc_table(sgt, 1, GFP_KERNEL))
 			goto free_sgt;
 
 		sg_set_page(sgt->sgl, dobj->page, dobj->obj.size, 0);
-
-		if (dma_map_sg(attach->dev, sgt->sgl, sgt->nents, dir) == 0)
-			goto free_table;
+		drm_clflush_sg(sgt);
 	} else if (dobj->linear) {
 		/* Single contiguous physical region - no struct page */
 		if (sg_alloc_table(sgt, 1, GFP_KERNEL))
@@ -480,7 +475,6 @@  armada_gem_prime_map_dma_buf(struct dma_buf_attachment *attach,
  release:
 	for_each_sg(sgt->sgl, sg, num, i)
 		page_cache_release(sg_page(sg));
- free_table:
 	sg_free_table(sgt);
  free_sgt:
 	kfree(sgt);
@@ -494,9 +488,6 @@  static void armada_gem_prime_unmap_dma_buf(struct dma_buf_attachment *attach,
 	struct armada_gem_object *dobj = drm_to_armada_gem(obj);
 	int i;
 
-	if (!dobj->linear)
-		dma_unmap_sg(attach->dev, sgt->sgl, sgt->nents, dir);
-
 	if (dobj->obj.filp) {
 		struct scatterlist *sg;
 		for_each_sg(sgt->sgl, sg, sgt->nents, i)