diff mbox series

[1/2] drm/shmem: Use cached mappings by default

Message ID 20200513150312.21421-2-tzimmermann@suse.de (mailing list archive)
State New, archived
Headers show
Series Default to cachable mappings for GEM SHMEM | expand

Commit Message

Thomas Zimmermann May 13, 2020, 3:03 p.m. UTC
SHMEM-buffer backing storage is allocated from system memory; which is
typically cachable. Currently, only virtio uses cachable mappings; udl
uses its own vmap/mmap implementation for cachable mappings. Other
drivers default to writecombine mappings.

Use cached mappings by default. The exception is pages imported via
dma-buf. DMA memory is usually not cached.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
---
 drivers/gpu/drm/drm_gem_shmem_helper.c  | 6 ++++--
 drivers/gpu/drm/virtio/virtgpu_object.c | 1 -
 include/drm/drm_gem_shmem_helper.h      | 4 ++--
 3 files changed, 6 insertions(+), 5 deletions(-)

Comments

Thomas Zimmermann May 13, 2020, 3:06 p.m. UTC | #1
Am 13.05.20 um 17:03 schrieb Thomas Zimmermann:
> SHMEM-buffer backing storage is allocated from system memory; which is
> typically cachable. Currently, only virtio uses cachable mappings; udl
> uses its own vmap/mmap implementation for cachable mappings. Other
> drivers default to writecombine mappings.
> 
> Use cached mappings by default. The exception is pages imported via
> dma-buf. DMA memory is usually not cached.
> 
> Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
> ---
>  drivers/gpu/drm/drm_gem_shmem_helper.c  | 6 ++++--
>  drivers/gpu/drm/virtio/virtgpu_object.c | 1 -
>  include/drm/drm_gem_shmem_helper.h      | 4 ++--
>  3 files changed, 6 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
> index df31e5782eed1..1ce90325dfa31 100644
> --- a/drivers/gpu/drm/drm_gem_shmem_helper.c
> +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
> @@ -259,7 +259,7 @@ static void *drm_gem_shmem_vmap_locked(struct drm_gem_shmem_object *shmem)
>  	} else {
>  		pgprot_t prot = PAGE_KERNEL;
>  
> -		if (!shmem->map_cached)
> +		if (shmem->map_wc)
>  			prot = pgprot_writecombine(prot);
>  		shmem->vaddr = vmap(shmem->pages, obj->size >> PAGE_SHIFT,
>  				    VM_MAP, prot);
> @@ -546,7 +546,7 @@ int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
>  
>  	vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND;
>  	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
> -	if (!shmem->map_cached)
> +	if (shmem->map_wc)
>  		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
>  	vma->vm_ops = &drm_gem_shmem_vm_ops;
>  
> @@ -664,6 +664,8 @@ drm_gem_shmem_prime_import_sg_table(struct drm_device *dev,
>  	if (IS_ERR(shmem))
>  		return ERR_CAST(shmem);
>  
> +	shmem->map_wc = false; /* dma-buf mappings use writecombine */

Just when I posted the patch, I saw this bug. map_wc should be set to
true to enable writecombine mappings for dma-buf pages. Will be fixed in
the next iteration.

> +
>  	shmem->pages = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
>  	if (!shmem->pages) {
>  		ret = -ENOMEM;
> diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c
> index 6ccbd01cd888c..80ba6b2b61668 100644
> --- a/drivers/gpu/drm/virtio/virtgpu_object.c
> +++ b/drivers/gpu/drm/virtio/virtgpu_object.c
> @@ -132,7 +132,6 @@ struct drm_gem_object *virtio_gpu_create_object(struct drm_device *dev,
>  
>  	dshmem = &shmem->base.base;
>  	dshmem->base.funcs = &virtio_gpu_shmem_funcs;
> -	dshmem->map_cached = true;
>  	return &dshmem->base;
>  }
>  
> diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h
> index 294b2931c4cc0..a5bc082a77c48 100644
> --- a/include/drm/drm_gem_shmem_helper.h
> +++ b/include/drm/drm_gem_shmem_helper.h
> @@ -98,9 +98,9 @@ struct drm_gem_shmem_object {
>  	unsigned int vmap_use_count;
>  
>  	/**
> -	 * @map_cached: map object cached (instead of using writecombine).
> +	 * @map_wc: map object using writecombine (instead of cached).
>  	 */
> -	bool map_cached;
> +	bool map_wc;
>  };
>  
>  #define to_drm_gem_shmem_obj(obj) \
>
Daniel Vetter May 14, 2020, 12:40 p.m. UTC | #2
On Wed, May 13, 2020 at 05:03:11PM +0200, Thomas Zimmermann wrote:
> SHMEM-buffer backing storage is allocated from system memory; which is
> typically cachable. Currently, only virtio uses cachable mappings; udl
> uses its own vmap/mmap implementation for cachable mappings. Other
> drivers default to writecombine mappings.

I'm pretty sure this breaks all these drivers. quick grep on a few
functions says this is used by lima, panfrost, v3d. And they definitely
need uncached/wc stuff afaiui. Or I'm completely missing something?
-Daniel

> 
> Use cached mappings by default. The exception is pages imported via
> dma-buf. DMA memory is usually not cached.
> 
> Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
> ---
>  drivers/gpu/drm/drm_gem_shmem_helper.c  | 6 ++++--
>  drivers/gpu/drm/virtio/virtgpu_object.c | 1 -
>  include/drm/drm_gem_shmem_helper.h      | 4 ++--
>  3 files changed, 6 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
> index df31e5782eed1..1ce90325dfa31 100644
> --- a/drivers/gpu/drm/drm_gem_shmem_helper.c
> +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
> @@ -259,7 +259,7 @@ static void *drm_gem_shmem_vmap_locked(struct drm_gem_shmem_object *shmem)
>  	} else {
>  		pgprot_t prot = PAGE_KERNEL;
>  
> -		if (!shmem->map_cached)
> +		if (shmem->map_wc)
>  			prot = pgprot_writecombine(prot);
>  		shmem->vaddr = vmap(shmem->pages, obj->size >> PAGE_SHIFT,
>  				    VM_MAP, prot);
> @@ -546,7 +546,7 @@ int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
>  
>  	vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND;
>  	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
> -	if (!shmem->map_cached)
> +	if (shmem->map_wc)
>  		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
>  	vma->vm_ops = &drm_gem_shmem_vm_ops;
>  
> @@ -664,6 +664,8 @@ drm_gem_shmem_prime_import_sg_table(struct drm_device *dev,
>  	if (IS_ERR(shmem))
>  		return ERR_CAST(shmem);
>  
> +	shmem->map_wc = false; /* dma-buf mappings use writecombine */
> +
>  	shmem->pages = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
>  	if (!shmem->pages) {
>  		ret = -ENOMEM;
> diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c
> index 6ccbd01cd888c..80ba6b2b61668 100644
> --- a/drivers/gpu/drm/virtio/virtgpu_object.c
> +++ b/drivers/gpu/drm/virtio/virtgpu_object.c
> @@ -132,7 +132,6 @@ struct drm_gem_object *virtio_gpu_create_object(struct drm_device *dev,
>  
>  	dshmem = &shmem->base.base;
>  	dshmem->base.funcs = &virtio_gpu_shmem_funcs;
> -	dshmem->map_cached = true;
>  	return &dshmem->base;
>  }
>  
> diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h
> index 294b2931c4cc0..a5bc082a77c48 100644
> --- a/include/drm/drm_gem_shmem_helper.h
> +++ b/include/drm/drm_gem_shmem_helper.h
> @@ -98,9 +98,9 @@ struct drm_gem_shmem_object {
>  	unsigned int vmap_use_count;
>  
>  	/**
> -	 * @map_cached: map object cached (instead of using writecombine).
> +	 * @map_wc: map object using writecombine (instead of cached).
>  	 */
> -	bool map_cached;
> +	bool map_wc;
>  };
>  
>  #define to_drm_gem_shmem_obj(obj) \
> -- 
> 2.26.2
>
Thomas Zimmermann May 14, 2020, 3:27 p.m. UTC | #3
Hi

Am 14.05.20 um 14:40 schrieb Daniel Vetter:
> On Wed, May 13, 2020 at 05:03:11PM +0200, Thomas Zimmermann wrote:
>> SHMEM-buffer backing storage is allocated from system memory; which is
>> typically cachable. Currently, only virtio uses cachable mappings; udl
>> uses its own vmap/mmap implementation for cachable mappings. Other
>> drivers default to writecombine mappings.
> 
> I'm pretty sure this breaks all these drivers. quick grep on a few
> functions says this is used by lima, panfrost, v3d. And they definitely
> need uncached/wc stuff afaiui. Or I'm completely missing something?

OK, I better get some testing for this code. :D

> -Daniel
> 
>>
>> Use cached mappings by default. The exception is pages imported via
>> dma-buf. DMA memory is usually not cached.
>>
>> Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
>> ---
>>  drivers/gpu/drm/drm_gem_shmem_helper.c  | 6 ++++--
>>  drivers/gpu/drm/virtio/virtgpu_object.c | 1 -
>>  include/drm/drm_gem_shmem_helper.h      | 4 ++--
>>  3 files changed, 6 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
>> index df31e5782eed1..1ce90325dfa31 100644
>> --- a/drivers/gpu/drm/drm_gem_shmem_helper.c
>> +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
>> @@ -259,7 +259,7 @@ static void *drm_gem_shmem_vmap_locked(struct drm_gem_shmem_object *shmem)
>>  	} else {
>>  		pgprot_t prot = PAGE_KERNEL;
>>  
>> -		if (!shmem->map_cached)
>> +		if (shmem->map_wc)
>>  			prot = pgprot_writecombine(prot);
>>  		shmem->vaddr = vmap(shmem->pages, obj->size >> PAGE_SHIFT,
>>  				    VM_MAP, prot);
>> @@ -546,7 +546,7 @@ int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
>>  
>>  	vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND;
>>  	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
>> -	if (!shmem->map_cached)
>> +	if (shmem->map_wc)
>>  		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
>>  	vma->vm_ops = &drm_gem_shmem_vm_ops;
>>  
>> @@ -664,6 +664,8 @@ drm_gem_shmem_prime_import_sg_table(struct drm_device *dev,
>>  	if (IS_ERR(shmem))
>>  		return ERR_CAST(shmem);
>>  
>> +	shmem->map_wc = false; /* dma-buf mappings use writecombine */
>> +
>>  	shmem->pages = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
>>  	if (!shmem->pages) {
>>  		ret = -ENOMEM;
>> diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c
>> index 6ccbd01cd888c..80ba6b2b61668 100644
>> --- a/drivers/gpu/drm/virtio/virtgpu_object.c
>> +++ b/drivers/gpu/drm/virtio/virtgpu_object.c
>> @@ -132,7 +132,6 @@ struct drm_gem_object *virtio_gpu_create_object(struct drm_device *dev,
>>  
>>  	dshmem = &shmem->base.base;
>>  	dshmem->base.funcs = &virtio_gpu_shmem_funcs;
>> -	dshmem->map_cached = true;
>>  	return &dshmem->base;
>>  }
>>  
>> diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h
>> index 294b2931c4cc0..a5bc082a77c48 100644
>> --- a/include/drm/drm_gem_shmem_helper.h
>> +++ b/include/drm/drm_gem_shmem_helper.h
>> @@ -98,9 +98,9 @@ struct drm_gem_shmem_object {
>>  	unsigned int vmap_use_count;
>>  
>>  	/**
>> -	 * @map_cached: map object cached (instead of using writecombine).
>> +	 * @map_wc: map object using writecombine (instead of cached).
>>  	 */
>> -	bool map_cached;
>> +	bool map_wc;
>>  };
>>  
>>  #define to_drm_gem_shmem_obj(obj) \
>> -- 
>> 2.26.2
>>
>
Rob Herring (Arm) May 14, 2020, 8:36 p.m. UTC | #4
On Thu, May 14, 2020 at 7:40 AM Daniel Vetter <daniel@ffwll.ch> wrote:
>
> On Wed, May 13, 2020 at 05:03:11PM +0200, Thomas Zimmermann wrote:
> > SHMEM-buffer backing storage is allocated from system memory; which is
> > typically cachable. Currently, only virtio uses cachable mappings; udl
> > uses its own vmap/mmap implementation for cachable mappings. Other
> > drivers default to writecombine mappings.
>
> I'm pretty sure this breaks all these drivers. quick grep on a few
> functions says this is used by lima, panfrost, v3d. And they definitely
> need uncached/wc stuff afaiui. Or I'm completely missing something?

Yes, that would be my guess. DMA is usually non-coherent on Arm.

Rob
Thomas Zimmermann May 15, 2020, 6:58 a.m. UTC | #5
Hi

Am 14.05.20 um 22:36 schrieb Rob Herring:
> On Thu, May 14, 2020 at 7:40 AM Daniel Vetter <daniel@ffwll.ch> wrote:
>>
>> On Wed, May 13, 2020 at 05:03:11PM +0200, Thomas Zimmermann wrote:
>>> SHMEM-buffer backing storage is allocated from system memory; which is
>>> typically cachable. Currently, only virtio uses cachable mappings; udl
>>> uses its own vmap/mmap implementation for cachable mappings. Other
>>> drivers default to writecombine mappings.
>>
>> I'm pretty sure this breaks all these drivers. quick grep on a few
>> functions says this is used by lima, panfrost, v3d. And they definitely
>> need uncached/wc stuff afaiui. Or I'm completely missing something?
> 

OK. I think I'll just make a patch that adds a .gem_create_object helper
that sets the map_cached flag. So drivers can opt-in.

> Yes, that would be my guess. DMA is usually non-coherent on Arm.

Can one of you give me some pointer to what you're looking at? For
example, I grepped for use of vmap within lima and only found [1].
That's a for memcpy with no DMA involved. There's got to be something
I'm missing.

Best regards
Thomas

[1]
https://cgit.freedesktop.org/drm/drm-tip/tree/drivers/gpu/drm/lima/lima_sched.c#n391

> 
> Rob
>
Daniel Vetter May 15, 2020, 2:10 p.m. UTC | #6
On Fri, May 15, 2020 at 08:58:02AM +0200, Thomas Zimmermann wrote:
> Hi
> 
> Am 14.05.20 um 22:36 schrieb Rob Herring:
> > On Thu, May 14, 2020 at 7:40 AM Daniel Vetter <daniel@ffwll.ch> wrote:
> >>
> >> On Wed, May 13, 2020 at 05:03:11PM +0200, Thomas Zimmermann wrote:
> >>> SHMEM-buffer backing storage is allocated from system memory; which is
> >>> typically cachable. Currently, only virtio uses cachable mappings; udl
> >>> uses its own vmap/mmap implementation for cachable mappings. Other
> >>> drivers default to writecombine mappings.
> >>
> >> I'm pretty sure this breaks all these drivers. quick grep on a few
> >> functions says this is used by lima, panfrost, v3d. And they definitely
> >> need uncached/wc stuff afaiui. Or I'm completely missing something?
> > 
> 
> OK. I think I'll just make a patch that adds a .gem_create_object helper
> that sets the map_cached flag. So drivers can opt-in.
> 
> > Yes, that would be my guess. DMA is usually non-coherent on Arm.
> 
> Can one of you give me some pointer to what you're looking at? For
> example, I grepped for use of vmap within lima and only found [1].
> That's a for memcpy with no DMA involved. There's got to be something
> I'm missing.
> 
> Best regards
> Thomas
> 
> [1]
> https://cgit.freedesktop.org/drm/drm-tip/tree/drivers/gpu/drm/lima/lima_sched.c#n391

$ git grep drm_gem_shmem_mmap

We also need correct access from userspace, otherwise the gpu is going to
be sad.
-Daniel
> 
> > 
> > Rob
> > 
> 
> -- 
> Thomas Zimmermann
> Graphics Driver Developer
> SUSE Software Solutions Germany GmbH
> Maxfeldstr. 5, 90409 Nürnberg, Germany
> (HRB 36809, AG Nürnberg)
> Geschäftsführer: Felix Imendörffer
>
Thomas Zimmermann May 18, 2020, 8:13 a.m. UTC | #7
Hi

Am 15.05.20 um 16:10 schrieb Daniel Vetter:
> On Fri, May 15, 2020 at 08:58:02AM +0200, Thomas Zimmermann wrote:
>> Hi
>>
>> Am 14.05.20 um 22:36 schrieb Rob Herring:
>>> On Thu, May 14, 2020 at 7:40 AM Daniel Vetter <daniel@ffwll.ch> wrote:
>>>>
>>>> On Wed, May 13, 2020 at 05:03:11PM +0200, Thomas Zimmermann wrote:
>>>>> SHMEM-buffer backing storage is allocated from system memory; which is
>>>>> typically cachable. Currently, only virtio uses cachable mappings; udl
>>>>> uses its own vmap/mmap implementation for cachable mappings. Other
>>>>> drivers default to writecombine mappings.
>>>>
>>>> I'm pretty sure this breaks all these drivers. quick grep on a few
>>>> functions says this is used by lima, panfrost, v3d. And they definitely
>>>> need uncached/wc stuff afaiui. Or I'm completely missing something?
>>>
>>
>> OK. I think I'll just make a patch that adds a .gem_create_object helper
>> that sets the map_cached flag. So drivers can opt-in.
>>
>>> Yes, that would be my guess. DMA is usually non-coherent on Arm.
>>
>> Can one of you give me some pointer to what you're looking at? For
>> example, I grepped for use of vmap within lima and only found [1].
>> That's a for memcpy with no DMA involved. There's got to be something
>> I'm missing.
>>
>> Best regards
>> Thomas
>>
>> [1]
>> https://cgit.freedesktop.org/drm/drm-tip/tree/drivers/gpu/drm/lima/lima_sched.c#n391
> 
> $ git grep drm_gem_shmem_mmap
> 
> We also need correct access from userspace, otherwise the gpu is going to
> be sad.

I've been thinking about this, and I think it means that we can never
have cached mappings anywhere. Even if shmem supports it internally for
most drivers, as soon as the page are exported, the importer could
expect uncached memory.

Given that, I think the correct thing to do is to udl's gem code and use
the default implementation.

Best regards
Thomas


> -Daniel
>>
>>>
>>> Rob
>>>
>>
>> -- 
>> Thomas Zimmermann
>> Graphics Driver Developer
>> SUSE Software Solutions Germany GmbH
>> Maxfeldstr. 5, 90409 Nürnberg, Germany
>> (HRB 36809, AG Nürnberg)
>> Geschäftsführer: Felix Imendörffer
>>
> 
> 
> 
>
Gerd Hoffmann May 18, 2020, 8:23 a.m. UTC | #8
> > $ git grep drm_gem_shmem_mmap
> > 
> > We also need correct access from userspace, otherwise the gpu is going to
> > be sad.
> 
> I've been thinking about this, and I think it means that we can never
> have cached mappings anywhere. Even if shmem supports it internally for
> most drivers, as soon as the page are exported, the importer could
> expect uncached memory.

The importer should not expect anything but call dma-buf ops so the
exporter has a chance to handle this correctly.

(Yes, we don't get this right everywhere, some drivers take the dma-bufs
list of pages and do their own thing ...).

take care,
  Gerd
Thomas Zimmermann May 18, 2020, 8:50 a.m. UTC | #9
Hi Gerd

Am 18.05.20 um 10:23 schrieb Gerd Hoffmann:
>>> $ git grep drm_gem_shmem_mmap
>>>
>>> We also need correct access from userspace, otherwise the gpu is going to
>>> be sad.
>>
>> I've been thinking about this, and I think it means that we can never
>> have cached mappings anywhere. Even if shmem supports it internally for
>> most drivers, as soon as the page are exported, the importer could
>> expect uncached memory.
> 
> The importer should not expect anything but call dma-buf ops so the
> exporter has a chance to handle this correctly.

I have the following case in mind: Suppose the exporter maps cached
pages and the importer expects uncached pages for DMA. There is
map_dma_buf/unmap_dma_buf, which can implement a cache flush for the
cached pages. Is it guaranteed that the importer calls this around each
DMA operation? If it doesn't, it could miss later buffer updates from
the exporter. And there's apparently no explicit flush op in dma_buf_ops.

Best regards
Thomas

> 
> (Yes, we don't get this right everywhere, some drivers take the dma-bufs
> list of pages and do their own thing ...).
> 
> take care,
>   Gerd
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>
Gerd Hoffmann May 18, 2020, 10:11 a.m. UTC | #10
On Mon, May 18, 2020 at 10:50:15AM +0200, Thomas Zimmermann wrote:
> Hi Gerd
> 
> Am 18.05.20 um 10:23 schrieb Gerd Hoffmann:
> >>> $ git grep drm_gem_shmem_mmap
> >>>
> >>> We also need correct access from userspace, otherwise the gpu is going to
> >>> be sad.
> >>
> >> I've been thinking about this, and I think it means that we can never
> >> have cached mappings anywhere. Even if shmem supports it internally for
> >> most drivers, as soon as the page are exported, the importer could
> >> expect uncached memory.
> > 
> > The importer should not expect anything but call dma-buf ops so the
> > exporter has a chance to handle this correctly.
> 
> I have the following case in mind: Suppose the exporter maps cached
> pages and the importer expects uncached pages for DMA. There is
> map_dma_buf/unmap_dma_buf, which can implement a cache flush for the
> cached pages. Is it guaranteed that the importer calls this around each
> DMA operation?

I think the importer is supposed to do that, but I wouldn't surprised if
there are cases in tree where this isn't implemented correctly ...

take care,
  Gerd
Daniel Vetter May 18, 2020, 2:40 p.m. UTC | #11
On Mon, May 18, 2020 at 12:11:32PM +0200, Gerd Hoffmann wrote:
> On Mon, May 18, 2020 at 10:50:15AM +0200, Thomas Zimmermann wrote:
> > Hi Gerd
> > 
> > Am 18.05.20 um 10:23 schrieb Gerd Hoffmann:
> > >>> $ git grep drm_gem_shmem_mmap
> > >>>
> > >>> We also need correct access from userspace, otherwise the gpu is going to
> > >>> be sad.
> > >>
> > >> I've been thinking about this, and I think it means that we can never
> > >> have cached mappings anywhere. Even if shmem supports it internally for
> > >> most drivers, as soon as the page are exported, the importer could
> > >> expect uncached memory.
> > > 
> > > The importer should not expect anything but call dma-buf ops so the
> > > exporter has a chance to handle this correctly.
> > 
> > I have the following case in mind: Suppose the exporter maps cached
> > pages and the importer expects uncached pages for DMA. There is
> > map_dma_buf/unmap_dma_buf, which can implement a cache flush for the
> > cached pages. Is it guaranteed that the importer calls this around each
> > DMA operation?
> 
> I think the importer is supposed to do that, but I wouldn't surprised if
> there are cases in tree where this isn't implemented correctly ...

Yup, this is very much a case of "supposed to" but "in practice, many
actually dont". The reason is that setting up mappings is expensive, so
best avoid.

We filled that gap a few years after dma-buf landed with the
begin/end_cpu_access hooks, which allow the exporter to do cache flushing
(using something like dma_sync_sg_for_device/cpu) and for this to all work
properly. We even added ioctl so that the mmap on the dma-buf works
correctly.

But most importers still ignore this, so it's all fail :-/ But in theory
the pieces to make cached mappings work over dma-buf, even for importers
that need uncached, are all there.

Cheers, Daniel
Thomas Zimmermann May 19, 2020, 6:31 a.m. UTC | #12
Hi

Am 18.05.20 um 16:40 schrieb Daniel Vetter:
> On Mon, May 18, 2020 at 12:11:32PM +0200, Gerd Hoffmann wrote:
>> On Mon, May 18, 2020 at 10:50:15AM +0200, Thomas Zimmermann wrote:
>>> Hi Gerd
>>>
>>> Am 18.05.20 um 10:23 schrieb Gerd Hoffmann:
>>>>>> $ git grep drm_gem_shmem_mmap
>>>>>>
>>>>>> We also need correct access from userspace, otherwise the gpu is going to
>>>>>> be sad.
>>>>>
>>>>> I've been thinking about this, and I think it means that we can never
>>>>> have cached mappings anywhere. Even if shmem supports it internally for
>>>>> most drivers, as soon as the page are exported, the importer could
>>>>> expect uncached memory.
>>>>
>>>> The importer should not expect anything but call dma-buf ops so the
>>>> exporter has a chance to handle this correctly.
>>>
>>> I have the following case in mind: Suppose the exporter maps cached
>>> pages and the importer expects uncached pages for DMA. There is
>>> map_dma_buf/unmap_dma_buf, which can implement a cache flush for the
>>> cached pages. Is it guaranteed that the importer calls this around each
>>> DMA operation?
>>
>> I think the importer is supposed to do that, but I wouldn't surprised if
>> there are cases in tree where this isn't implemented correctly ...
> 
> Yup, this is very much a case of "supposed to" but "in practice, many
> actually dont". The reason is that setting up mappings is expensive, so
> best avoid.

Thanks to both of you for answering the question.

> 
> We filled that gap a few years after dma-buf landed with the
> begin/end_cpu_access hooks, which allow the exporter to do cache flushing
> (using something like dma_sync_sg_for_device/cpu) and for this to all work
> properly. We even added ioctl so that the mmap on the dma-buf works
> correctly.
> 
> But most importers still ignore this, so it's all fail :-/ But in theory
> the pieces to make cached mappings work over dma-buf, even for importers
> that need uncached, are all there.
> 
> Cheers, Daniel
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
index df31e5782eed1..1ce90325dfa31 100644
--- a/drivers/gpu/drm/drm_gem_shmem_helper.c
+++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
@@ -259,7 +259,7 @@  static void *drm_gem_shmem_vmap_locked(struct drm_gem_shmem_object *shmem)
 	} else {
 		pgprot_t prot = PAGE_KERNEL;
 
-		if (!shmem->map_cached)
+		if (shmem->map_wc)
 			prot = pgprot_writecombine(prot);
 		shmem->vaddr = vmap(shmem->pages, obj->size >> PAGE_SHIFT,
 				    VM_MAP, prot);
@@ -546,7 +546,7 @@  int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
 
 	vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND;
 	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
-	if (!shmem->map_cached)
+	if (shmem->map_wc)
 		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
 	vma->vm_ops = &drm_gem_shmem_vm_ops;
 
@@ -664,6 +664,8 @@  drm_gem_shmem_prime_import_sg_table(struct drm_device *dev,
 	if (IS_ERR(shmem))
 		return ERR_CAST(shmem);
 
+	shmem->map_wc = false; /* dma-buf mappings use writecombine */
+
 	shmem->pages = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
 	if (!shmem->pages) {
 		ret = -ENOMEM;
diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c
index 6ccbd01cd888c..80ba6b2b61668 100644
--- a/drivers/gpu/drm/virtio/virtgpu_object.c
+++ b/drivers/gpu/drm/virtio/virtgpu_object.c
@@ -132,7 +132,6 @@  struct drm_gem_object *virtio_gpu_create_object(struct drm_device *dev,
 
 	dshmem = &shmem->base.base;
 	dshmem->base.funcs = &virtio_gpu_shmem_funcs;
-	dshmem->map_cached = true;
 	return &dshmem->base;
 }
 
diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h
index 294b2931c4cc0..a5bc082a77c48 100644
--- a/include/drm/drm_gem_shmem_helper.h
+++ b/include/drm/drm_gem_shmem_helper.h
@@ -98,9 +98,9 @@  struct drm_gem_shmem_object {
 	unsigned int vmap_use_count;
 
 	/**
-	 * @map_cached: map object cached (instead of using writecombine).
+	 * @map_wc: map object using writecombine (instead of cached).
 	 */
-	bool map_cached;
+	bool map_wc;
 };
 
 #define to_drm_gem_shmem_obj(obj) \