diff mbox series

[1/1] drm: msm: Replace dma_map_sg with dma_sync_sg*

Message ID 20181120095437.29820-1-vivek.gautam@codeaurora.org (mailing list archive)
State Not Applicable, archived
Headers show
Series [1/1] drm: msm: Replace dma_map_sg with dma_sync_sg* | expand

Commit Message

Vivek Gautam Nov. 20, 2018, 9:54 a.m. UTC
dma_map_sg() expects a DMA domain. However, the drm devices
have been traditionally using unmanaged iommu domain which
is non-dma type. Using dma mapping APIs with that domain is bad.

Replace dma_map_sg() calls with dma_sync_sg_for_device{|cpu}()
to do the cache maintenance.

Signed-off-by: Vivek Gautam <vivek.gautam@codeaurora.org>
Suggested-by: Tomasz Figa <tfiga@chromium.org>
---

Tested on an MTP sdm845:
https://github.com/vivekgautam1/linux/tree/v4.19/sdm845-mtp-display-working

 drivers/gpu/drm/msm/msm_gem.c | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

Comments

Jordan Crouse Nov. 20, 2018, 3:41 p.m. UTC | #1
On Tue, Nov 20, 2018 at 03:24:37PM +0530, Vivek Gautam wrote:
> dma_map_sg() expects a DMA domain. However, the drm devices
> have been traditionally using unmanaged iommu domain which
> is non-dma type. Using dma mapping APIs with that domain is bad.
> 
> Replace dma_map_sg() calls with dma_sync_sg_for_device{|cpu}()
> to do the cache maintenance.
> 
> Signed-off-by: Vivek Gautam <vivek.gautam@codeaurora.org>
> Suggested-by: Tomasz Figa <tfiga@chromium.org>
> ---
> 
> Tested on an MTP sdm845:
> https://github.com/vivekgautam1/linux/tree/v4.19/sdm845-mtp-display-working
> 
>  drivers/gpu/drm/msm/msm_gem.c | 27 ++++++++++++++++++++-------
>  1 file changed, 20 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
> index 00c795ced02c..d7a7af610803 100644
> --- a/drivers/gpu/drm/msm/msm_gem.c
> +++ b/drivers/gpu/drm/msm/msm_gem.c
> @@ -81,6 +81,8 @@ static struct page **get_pages(struct drm_gem_object *obj)
>  		struct drm_device *dev = obj->dev;
>  		struct page **p;
>  		int npages = obj->size >> PAGE_SHIFT;
> +		struct scatterlist *s;
> +		int i;
>  
>  		if (use_pages(obj))
>  			p = drm_gem_get_pages(obj);
> @@ -107,9 +109,19 @@ static struct page **get_pages(struct drm_gem_object *obj)
>  		/* For non-cached buffers, ensure the new pages are clean
>  		 * because display controller, GPU, etc. are not coherent:
>  		 */
> -		if (msm_obj->flags & (MSM_BO_WC|MSM_BO_UNCACHED))
> -			dma_map_sg(dev->dev, msm_obj->sgt->sgl,
> -					msm_obj->sgt->nents, DMA_BIDIRECTIONAL);
> +		if (msm_obj->flags & (MSM_BO_WC | MSM_BO_UNCACHED)) {
> +			/*
> +			 * Fake up the SG table so that dma_sync_sg_*()
> +			 * can be used to flush the pages associated with it.
> +			 */

We aren't really faking.  The table is real, we are just slightly abusing the
sg_dma_address() which makes this comment a bit misleading. Instead I would
probably say something like:

/* dma_sync_sg_* flushes pages using sg_dma_address() so point it at the
 * physical page for the right behavior */

Or something like that.

> +			for_each_sg(msm_obj->sgt->sgl, s,
> +				    msm_obj->sgt->nents, i)
> +				sg_dma_address(s) = sg_phys(s);
> +

I'm wondering - wouldn't we want to do this association for cached buffers to so
we could sync them correctly in cpu_prep and cpu_fini?  Maybe it wouldn't hurt
to put this association in the main path (obviously the sync should stay inside
the conditional for uncached buffers).

> +			dma_sync_sg_for_device(dev->dev, msm_obj->sgt->sgl,
> +					       msm_obj->sgt->nents,
> +					       DMA_TO_DEVICE);
> +		}
>  	}
>  
>  	return msm_obj->pages;
> @@ -137,10 +149,11 @@ static void put_pages(struct drm_gem_object *obj)
>  			 * pages are clean because display controller,
>  			 * GPU, etc. are not coherent:
>  			 */
> -			if (msm_obj->flags & (MSM_BO_WC|MSM_BO_UNCACHED))
> -				dma_unmap_sg(obj->dev->dev, msm_obj->sgt->sgl,
> -					     msm_obj->sgt->nents,
> -					     DMA_BIDIRECTIONAL);
> +			if (msm_obj->flags & (MSM_BO_WC | MSM_BO_UNCACHED))
> +				dma_sync_sg_for_cpu(obj->dev->dev,
> +						    msm_obj->sgt->sgl,
> +						    msm_obj->sgt->nents,
> +						    DMA_FROM_DEVICE);
>  
>  			sg_free_table(msm_obj->sgt);
>  			kfree(msm_obj->sgt);
Tomasz Figa Nov. 21, 2018, 3:48 a.m. UTC | #2
Hi Jordan, Vivek,

On Wed, Nov 21, 2018 at 12:41 AM Jordan Crouse <jcrouse@codeaurora.org> wrote:
>
> On Tue, Nov 20, 2018 at 03:24:37PM +0530, Vivek Gautam wrote:
> > dma_map_sg() expects a DMA domain. However, the drm devices
> > have been traditionally using unmanaged iommu domain which
> > is non-dma type. Using dma mapping APIs with that domain is bad.
> >
> > Replace dma_map_sg() calls with dma_sync_sg_for_device{|cpu}()
> > to do the cache maintenance.
> >
> > Signed-off-by: Vivek Gautam <vivek.gautam@codeaurora.org>
> > Suggested-by: Tomasz Figa <tfiga@chromium.org>
> > ---
> >
> > Tested on an MTP sdm845:
> > https://github.com/vivekgautam1/linux/tree/v4.19/sdm845-mtp-display-working
> >
> >  drivers/gpu/drm/msm/msm_gem.c | 27 ++++++++++++++++++++-------
> >  1 file changed, 20 insertions(+), 7 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
> > index 00c795ced02c..d7a7af610803 100644
> > --- a/drivers/gpu/drm/msm/msm_gem.c
> > +++ b/drivers/gpu/drm/msm/msm_gem.c
> > @@ -81,6 +81,8 @@ static struct page **get_pages(struct drm_gem_object *obj)
> >               struct drm_device *dev = obj->dev;
> >               struct page **p;
> >               int npages = obj->size >> PAGE_SHIFT;
> > +             struct scatterlist *s;
> > +             int i;
> >
> >               if (use_pages(obj))
> >                       p = drm_gem_get_pages(obj);
> > @@ -107,9 +109,19 @@ static struct page **get_pages(struct drm_gem_object *obj)
> >               /* For non-cached buffers, ensure the new pages are clean
> >                * because display controller, GPU, etc. are not coherent:
> >                */
> > -             if (msm_obj->flags & (MSM_BO_WC|MSM_BO_UNCACHED))
> > -                     dma_map_sg(dev->dev, msm_obj->sgt->sgl,
> > -                                     msm_obj->sgt->nents, DMA_BIDIRECTIONAL);
> > +             if (msm_obj->flags & (MSM_BO_WC | MSM_BO_UNCACHED)) {
> > +                     /*
> > +                      * Fake up the SG table so that dma_sync_sg_*()
> > +                      * can be used to flush the pages associated with it.
> > +                      */
>
> We aren't really faking.  The table is real, we are just slightly abusing the
> sg_dma_address() which makes this comment a bit misleading. Instead I would
> probably say something like:
>
> /* dma_sync_sg_* flushes pages using sg_dma_address() so point it at the
>  * physical page for the right behavior */
>
> Or something like that.
>

It's actually quite complicated, but I agree that the comment isn't
very precise. The cases are as follows:
- arm64 iommu_dma_ops use sg_phys()
https://elixir.bootlin.com/linux/v4.20-rc3/source/arch/arm64/mm/dma-mapping.c#L599
- swiotlb_dma_ops used on arm64 if no IOMMU is available use
sg->dma_address directly:
https://elixir.bootlin.com/linux/v4.20-rc3/source/kernel/dma/swiotlb.c#L832
- arm_dma_ops use sg_dma_address():
https://elixir.bootlin.com/linux/v4.20-rc3/source/arch/arm/mm/dma-mapping.c#L1130
- arm iommu_ops use sg_page():
https://elixir.bootlin.com/linux/v4.20-rc3/source/arch/arm/mm/dma-mapping.c#L1869

Sounds like a mess...

> > +                     for_each_sg(msm_obj->sgt->sgl, s,
> > +                                 msm_obj->sgt->nents, i)
> > +                             sg_dma_address(s) = sg_phys(s);
> > +
>
> I'm wondering - wouldn't we want to do this association for cached buffers to so
> we could sync them correctly in cpu_prep and cpu_fini?  Maybe it wouldn't hurt
> to put this association in the main path (obviously the sync should stay inside
> the conditional for uncached buffers).
>

I guess it wouldn't hurt indeed. Note that cpu_prep/fini seem to be
missing the sync call currently.

P.S. Jordan, not sure if it's my Gmail or your email client, but your
message had all the recipients in a Reply-to header, except you, so
pressing Reply to all in my case led to a message that didn't have you
in recipients anymore...

Best regards,
Tomasz
Vivek Gautam Nov. 22, 2018, 10:07 a.m. UTC | #3
Hi Tomasz, Jordan,


On 11/21/2018 9:18 AM, Tomasz Figa wrote:
> Hi Jordan, Vivek,
>
> On Wed, Nov 21, 2018 at 12:41 AM Jordan Crouse <jcrouse@codeaurora.org> wrote:
>> On Tue, Nov 20, 2018 at 03:24:37PM +0530, Vivek Gautam wrote:
>>> dma_map_sg() expects a DMA domain. However, the drm devices
>>> have been traditionally using unmanaged iommu domain which
>>> is non-dma type. Using dma mapping APIs with that domain is bad.
>>>
>>> Replace dma_map_sg() calls with dma_sync_sg_for_device{|cpu}()
>>> to do the cache maintenance.
>>>
>>> Signed-off-by: Vivek Gautam <vivek.gautam@codeaurora.org>
>>> Suggested-by: Tomasz Figa <tfiga@chromium.org>
>>> ---
>>>
>>> Tested on an MTP sdm845:
>>> https://github.com/vivekgautam1/linux/tree/v4.19/sdm845-mtp-display-working
>>>
>>>   drivers/gpu/drm/msm/msm_gem.c | 27 ++++++++++++++++++++-------
>>>   1 file changed, 20 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
>>> index 00c795ced02c..d7a7af610803 100644
>>> --- a/drivers/gpu/drm/msm/msm_gem.c
>>> +++ b/drivers/gpu/drm/msm/msm_gem.c
>>> @@ -81,6 +81,8 @@ static struct page **get_pages(struct drm_gem_object *obj)
>>>                struct drm_device *dev = obj->dev;
>>>                struct page **p;
>>>                int npages = obj->size >> PAGE_SHIFT;
>>> +             struct scatterlist *s;
>>> +             int i;
>>>
>>>                if (use_pages(obj))
>>>                        p = drm_gem_get_pages(obj);
>>> @@ -107,9 +109,19 @@ static struct page **get_pages(struct drm_gem_object *obj)
>>>                /* For non-cached buffers, ensure the new pages are clean
>>>                 * because display controller, GPU, etc. are not coherent:
>>>                 */
>>> -             if (msm_obj->flags & (MSM_BO_WC|MSM_BO_UNCACHED))
>>> -                     dma_map_sg(dev->dev, msm_obj->sgt->sgl,
>>> -                                     msm_obj->sgt->nents, DMA_BIDIRECTIONAL);
>>> +             if (msm_obj->flags & (MSM_BO_WC | MSM_BO_UNCACHED)) {
>>> +                     /*
>>> +                      * Fake up the SG table so that dma_sync_sg_*()
>>> +                      * can be used to flush the pages associated with it.
>>> +                      */
>> We aren't really faking.  The table is real, we are just slightly abusing the
>> sg_dma_address() which makes this comment a bit misleading. Instead I would
>> probably say something like:
>>
>> /* dma_sync_sg_* flushes pages using sg_dma_address() so point it at the
>>   * physical page for the right behavior */
>>
>> Or something like that.
>>
> It's actually quite complicated, but I agree that the comment isn't
> very precise. The cases are as follows:
> - arm64 iommu_dma_ops use sg_phys()
> https://elixir.bootlin.com/linux/v4.20-rc3/source/arch/arm64/mm/dma-mapping.c#L599
> - swiotlb_dma_ops used on arm64 if no IOMMU is available use
> sg->dma_address directly:
> https://elixir.bootlin.com/linux/v4.20-rc3/source/kernel/dma/swiotlb.c#L832
> - arm_dma_ops use sg_dma_address():
> https://elixir.bootlin.com/linux/v4.20-rc3/source/arch/arm/mm/dma-mapping.c#L1130
> - arm iommu_ops use sg_page():
> https://elixir.bootlin.com/linux/v4.20-rc3/source/arch/arm/mm/dma-mapping.c#L1869
>
> Sounds like a mess...
Thanks for the review.

Technically with the below assignment we address all of the above. How 
about an even
simpler version of the suggested comment:

/* dma_sync_sg_* flushes physical pages, so point sg->dma_address to
  * the physical one for the right behavior.
  */


>
>>> +                     for_each_sg(msm_obj->sgt->sgl, s,
>>> +                                 msm_obj->sgt->nents, i)
>>> +                             sg_dma_address(s) = sg_phys(s);
>>> +
>> I'm wondering - wouldn't we want to do this association for cached buffers to so
>> we could sync them correctly in cpu_prep and cpu_fini?  Maybe it wouldn't hurt
>> to put this association in the main path (obviously the sync should stay inside
>> the conditional for uncached buffers).
>>

Sure, I will move this out of the conditional check.

> I guess it wouldn't hurt indeed. Note that cpu_prep/fini seem to be
> missing the sync call currently.

I can't say I understand the usage of cpu_prep and cpu_fini(). But I can add
the necessary support if you can point me in the right direction.
Thanks

Best regards
Vivek
>
> P.S. Jordan, not sure if it's my Gmail or your email client, but your
> message had all the recipients in a Reply-to header, except you, so
> pressing Reply to all in my case led to a message that didn't have you
> in recipients anymore...
>
> Best regards,
> Tomasz
Jordan Crouse Nov. 26, 2018, 6:46 p.m. UTC | #4
On Thu, Nov 22, 2018 at 03:37:54PM +0530, Vivek Gautam wrote:
> Hi Tomasz, Jordan,
> 
> 
> On 11/21/2018 9:18 AM, Tomasz Figa wrote:
 
> >
> >>>+                     for_each_sg(msm_obj->sgt->sgl, s,
> >>>+                                 msm_obj->sgt->nents, i)
> >>>+                             sg_dma_address(s) = sg_phys(s);
> >>>+
> >>I'm wondering - wouldn't we want to do this association for cached buffers to so
> >>we could sync them correctly in cpu_prep and cpu_fini?  Maybe it wouldn't hurt
> >>to put this association in the main path (obviously the sync should stay inside
> >>the conditional for uncached buffers).
> >>
> 
> Sure, I will move this out of the conditional check.
> 
> >I guess it wouldn't hurt indeed. Note that cpu_prep/fini seem to be
> >missing the sync call currently.
> 
> I can't say I understand the usage of cpu_prep and cpu_fini(). But I can add
> the necessary support if you can point me in the right direction.

Not needed for this iteration. We don't have support in those functions for
cached buffers right now so continuing to not support it wouldn't hurt.

Jordan
diff mbox series

Patch

diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index 00c795ced02c..d7a7af610803 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -81,6 +81,8 @@  static struct page **get_pages(struct drm_gem_object *obj)
 		struct drm_device *dev = obj->dev;
 		struct page **p;
 		int npages = obj->size >> PAGE_SHIFT;
+		struct scatterlist *s;
+		int i;
 
 		if (use_pages(obj))
 			p = drm_gem_get_pages(obj);
@@ -107,9 +109,19 @@  static struct page **get_pages(struct drm_gem_object *obj)
 		/* For non-cached buffers, ensure the new pages are clean
 		 * because display controller, GPU, etc. are not coherent:
 		 */
-		if (msm_obj->flags & (MSM_BO_WC|MSM_BO_UNCACHED))
-			dma_map_sg(dev->dev, msm_obj->sgt->sgl,
-					msm_obj->sgt->nents, DMA_BIDIRECTIONAL);
+		if (msm_obj->flags & (MSM_BO_WC | MSM_BO_UNCACHED)) {
+			/*
+			 * Fake up the SG table so that dma_sync_sg_*()
+			 * can be used to flush the pages associated with it.
+			 */
+			for_each_sg(msm_obj->sgt->sgl, s,
+				    msm_obj->sgt->nents, i)
+				sg_dma_address(s) = sg_phys(s);
+
+			dma_sync_sg_for_device(dev->dev, msm_obj->sgt->sgl,
+					       msm_obj->sgt->nents,
+					       DMA_TO_DEVICE);
+		}
 	}
 
 	return msm_obj->pages;
@@ -137,10 +149,11 @@  static void put_pages(struct drm_gem_object *obj)
 			 * pages are clean because display controller,
 			 * GPU, etc. are not coherent:
 			 */
-			if (msm_obj->flags & (MSM_BO_WC|MSM_BO_UNCACHED))
-				dma_unmap_sg(obj->dev->dev, msm_obj->sgt->sgl,
-					     msm_obj->sgt->nents,
-					     DMA_BIDIRECTIONAL);
+			if (msm_obj->flags & (MSM_BO_WC | MSM_BO_UNCACHED))
+				dma_sync_sg_for_cpu(obj->dev->dev,
+						    msm_obj->sgt->sgl,
+						    msm_obj->sgt->nents,
+						    DMA_FROM_DEVICE);
 
 			sg_free_table(msm_obj->sgt);
 			kfree(msm_obj->sgt);