diff mbox series

[v5,1/3] drm/shmem: add support for per object caching flags.

Message ID 20200226154752.24328-2-kraxel@redhat.com (mailing list archive)
State New, archived
Headers show
Series drm/virtio: fix mmap page attributes | expand

Commit Message

Gerd Hoffmann Feb. 26, 2020, 3:47 p.m. UTC
Add map_cached bool to drm_gem_shmem_object, to request cached mappings
on a per-object base.  Check the flag before adding writecombine to
pgprot bits.

Cc: stable@vger.kernel.org
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
 include/drm/drm_gem_shmem_helper.h     |  5 +++++
 drivers/gpu/drm/drm_gem_shmem_helper.c | 15 +++++++++++----
 2 files changed, 16 insertions(+), 4 deletions(-)

Comments

Guillaume Gardet Feb. 26, 2020, 4:51 p.m. UTC | #1
> -----Original Message-----
> From: Gerd Hoffmann <kraxel@redhat.com>
> Sent: 26 February 2020 16:48
> To: dri-devel@lists.freedesktop.org
> Cc: tzimmermann@suse.de; gurchetansingh@chromium.org; olvaffe@gmail.com;
> Guillaume Gardet <Guillaume.Gardet@arm.com>; Gerd Hoffmann
> <kraxel@redhat.com>; stable@vger.kernel.org; Maarten Lankhorst
> <maarten.lankhorst@linux.intel.com>; Maxime Ripard <mripard@kernel.org>;
> David Airlie <airlied@linux.ie>; Daniel Vetter <daniel@ffwll.ch>; open list <linux-
> kernel@vger.kernel.org>
> Subject: [PATCH v5 1/3] drm/shmem: add support for per object caching flags.
>
> Add map_cached bool to drm_gem_shmem_object, to request cached mappings
> on a per-object base.  Check the flag before adding writecombine to pgprot bits.
>
> Cc: stable@vger.kernel.org
> Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>

Tested-by: Guillaume Gardet <Guillaume.Gardet@arm.com>

> ---
>  include/drm/drm_gem_shmem_helper.h     |  5 +++++
>  drivers/gpu/drm/drm_gem_shmem_helper.c | 15 +++++++++++----
>  2 files changed, 16 insertions(+), 4 deletions(-)
>
> diff --git a/include/drm/drm_gem_shmem_helper.h
> b/include/drm/drm_gem_shmem_helper.h
> index e34a7b7f848a..294b2931c4cc 100644
> --- a/include/drm/drm_gem_shmem_helper.h
> +++ b/include/drm/drm_gem_shmem_helper.h
> @@ -96,6 +96,11 @@ struct drm_gem_shmem_object {
>   * The address are un-mapped when the count reaches zero.
>   */
>  unsigned int vmap_use_count;
> +
> +/**
> + * @map_cached: map object cached (instead of using writecombine).
> + */
> +bool map_cached;
>  };
>
>  #define to_drm_gem_shmem_obj(obj) \
> diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c
> b/drivers/gpu/drm/drm_gem_shmem_helper.c
> index a421a2eed48a..aad9324dcf4f 100644
> --- a/drivers/gpu/drm/drm_gem_shmem_helper.c
> +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
> @@ -254,11 +254,16 @@ static void *drm_gem_shmem_vmap_locked(struct
> drm_gem_shmem_object *shmem)
>  if (ret)
>  goto err_zero_use;
>
> -if (obj->import_attach)
> +if (obj->import_attach) {
>  shmem->vaddr = dma_buf_vmap(obj->import_attach->dmabuf);
> -else
> +} else {
> +pgprot_t prot = PAGE_KERNEL;
> +
> +if (!shmem->map_cached)
> +prot = pgprot_writecombine(prot);
>  shmem->vaddr = vmap(shmem->pages, obj->size >>
> PAGE_SHIFT,
> -    VM_MAP,
> pgprot_writecombine(PAGE_KERNEL));
> +    VM_MAP, prot);
> +}
>
>  if (!shmem->vaddr) {
>  DRM_DEBUG_KMS("Failed to vmap pages\n"); @@ -540,7 +545,9
> @@ int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct
> vm_area_struct *vma)
>  }
>
>  vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND;
> -vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma-
> >vm_flags));
> +vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
> +if (!shmem->map_cached)
> +vma->vm_page_prot = pgprot_writecombine(vma-
> >vm_page_prot);
>  vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
>  vma->vm_ops = &drm_gem_shmem_vm_ops;
>
> --
> 2.18.2

IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.
Thomas Hellström (Intel) Feb. 26, 2020, 6:24 p.m. UTC | #2
Hi, Gerd,

While looking at this patchset I came across some stuff that seems 
strange but that was merged in a previous patchset.

(please refer to 
https://lists.freedesktop.org/archives/dri-devel/2018-September/190001.html. 
Forgive me if I've missed any discussion leading up to this).


On 2/26/20 4:47 PM, Gerd Hoffmann wrote:
> Add map_cached bool to drm_gem_shmem_object, to request cached mappings
> on a per-object base.  Check the flag before adding writecombine to
> pgprot bits.
>
> Cc: stable@vger.kernel.org
> Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
> ---
>   include/drm/drm_gem_shmem_helper.h     |  5 +++++
>   drivers/gpu/drm/drm_gem_shmem_helper.c | 15 +++++++++++----
>   2 files changed, 16 insertions(+), 4 deletions(-)
>
> diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h
> index e34a7b7f848a..294b2931c4cc 100644
> --- a/include/drm/drm_gem_shmem_helper.h
> +++ b/include/drm/drm_gem_shmem_helper.h
> @@ -96,6 +96,11 @@ struct drm_gem_shmem_object {
>   	 * The address are un-mapped when the count reaches zero.
>   	 */
>   	unsigned int vmap_use_count;
> +
> +	/**
> +	 * @map_cached: map object cached (instead of using writecombine).
> +	 */
> +	bool map_cached;
>   };
>   
>   #define to_drm_gem_shmem_obj(obj) \
> diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
> index a421a2eed48a..aad9324dcf4f 100644
> --- a/drivers/gpu/drm/drm_gem_shmem_helper.c
> +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
> @@ -254,11 +254,16 @@ static void *drm_gem_shmem_vmap_locked(struct drm_gem_shmem_object *shmem)
>   	if (ret)
>   		goto err_zero_use;
>   
> -	if (obj->import_attach)
> +	if (obj->import_attach) {
>   		shmem->vaddr = dma_buf_vmap(obj->import_attach->dmabuf);
> -	else
> +	} else {
> +		pgprot_t prot = PAGE_KERNEL;
> +
> +		if (!shmem->map_cached)
> +			prot = pgprot_writecombine(prot);
>   		shmem->vaddr = vmap(shmem->pages, obj->size >> PAGE_SHIFT,
> -				    VM_MAP, pgprot_writecombine(PAGE_KERNEL));
> +				    VM_MAP, prot)


Wouldn't a vmap with pgprot_writecombine() create conflicting mappings 
with the linear kernel map which is not write-combined? Or do you change 
the linear kernel map of the shmem pages somewhere? vmap bypassess at 
least the x86 PAT core mapping consistency check and this could 
potentially cause spuriously overwritten memory.


> +	}
>   
>   	if (!shmem->vaddr) {
>   		DRM_DEBUG_KMS("Failed to vmap pages\n");
> @@ -540,7 +545,9 @@ int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
>   	}
>   
>   	vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND;
> -	vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
> +	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
> +	if (!shmem->map_cached)
> +		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);

Same thing here. Note that vmf_insert_page() which is used by the fault 
handler also bypasses the x86 PAT  consistency check, whereas 
vmf_insert_mixed() doesn't.

>   	vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);

At least with SME or SEV encryption, where shmem memory has its kernel 
map set to encrypted, creating conflicting mappings is explicitly 
disallowed.
BTW, why is mmap mapping decrypted while vmap isn't?

>   	vma->vm_ops = &drm_gem_shmem_vm_ops;
>   

Thanks,
Thomas
Chia-I Wu Feb. 27, 2020, 12:02 a.m. UTC | #3
On Wed, Feb 26, 2020 at 10:25 AM Thomas Hellström (VMware)
<thomas_os@shipmail.org> wrote:
>
> Hi, Gerd,
>
> While looking at this patchset I came across some stuff that seems
> strange but that was merged in a previous patchset.
>
> (please refer to
> https://lists.freedesktop.org/archives/dri-devel/2018-September/190001.html.
> Forgive me if I've missed any discussion leading up to this).
>
>
> On 2/26/20 4:47 PM, Gerd Hoffmann wrote:
> > Add map_cached bool to drm_gem_shmem_object, to request cached mappings
> > on a per-object base.  Check the flag before adding writecombine to
> > pgprot bits.
> >
> > Cc: stable@vger.kernel.org
> > Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
> > ---
> >   include/drm/drm_gem_shmem_helper.h     |  5 +++++
> >   drivers/gpu/drm/drm_gem_shmem_helper.c | 15 +++++++++++----
> >   2 files changed, 16 insertions(+), 4 deletions(-)
> >
> > diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h
> > index e34a7b7f848a..294b2931c4cc 100644
> > --- a/include/drm/drm_gem_shmem_helper.h
> > +++ b/include/drm/drm_gem_shmem_helper.h
> > @@ -96,6 +96,11 @@ struct drm_gem_shmem_object {
> >        * The address are un-mapped when the count reaches zero.
> >        */
> >       unsigned int vmap_use_count;
> > +
> > +     /**
> > +      * @map_cached: map object cached (instead of using writecombine).
> > +      */
> > +     bool map_cached;
> >   };
> >
> >   #define to_drm_gem_shmem_obj(obj) \
> > diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
> > index a421a2eed48a..aad9324dcf4f 100644
> > --- a/drivers/gpu/drm/drm_gem_shmem_helper.c
> > +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
> > @@ -254,11 +254,16 @@ static void *drm_gem_shmem_vmap_locked(struct drm_gem_shmem_object *shmem)
> >       if (ret)
> >               goto err_zero_use;
> >
> > -     if (obj->import_attach)
> > +     if (obj->import_attach) {
> >               shmem->vaddr = dma_buf_vmap(obj->import_attach->dmabuf);
> > -     else
> > +     } else {
> > +             pgprot_t prot = PAGE_KERNEL;
> > +
> > +             if (!shmem->map_cached)
> > +                     prot = pgprot_writecombine(prot);
> >               shmem->vaddr = vmap(shmem->pages, obj->size >> PAGE_SHIFT,
> > -                                 VM_MAP, pgprot_writecombine(PAGE_KERNEL));
> > +                                 VM_MAP, prot)
>
>
> Wouldn't a vmap with pgprot_writecombine() create conflicting mappings
> with the linear kernel map which is not write-combined? Or do you change
> the linear kernel map of the shmem pages somewhere? vmap bypassess at
> least the x86 PAT core mapping consistency check and this could
> potentially cause spuriously overwritten memory.

Yeah, I think this creates a conflicting alias.  It seems a call to
set_pages_array_wc here or changes elsewhere is needed..

But this is a pre-existing issue in the shmem helper.  There is also
no universal fix (e.g., set_pages_array_wc is x86 only)?  I would hope
this series can be merged sooner to fix the regression first.

>
>
> > +     }
> >
> >       if (!shmem->vaddr) {
> >               DRM_DEBUG_KMS("Failed to vmap pages\n");
> > @@ -540,7 +545,9 @@ int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
> >       }
> >
> >       vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND;
> > -     vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
> > +     vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
> > +     if (!shmem->map_cached)
> > +             vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
>
> Same thing here. Note that vmf_insert_page() which is used by the fault
> handler also bypasses the x86 PAT  consistency check, whereas
> vmf_insert_mixed() doesn't.
>
> >       vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
>
> At least with SME or SEV encryption, where shmem memory has its kernel
> map set to encrypted, creating conflicting mappings is explicitly
> disallowed.
> BTW, why is mmap mapping decrypted while vmap isn't?
>
> >       vma->vm_ops = &drm_gem_shmem_vm_ops;
> >
>
> Thanks,
> Thomas
>
>
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Thomas Hellström (Intel) Feb. 27, 2020, 7:16 a.m. UTC | #4
On 2/27/20 1:02 AM, Chia-I Wu wrote:
> On Wed, Feb 26, 2020 at 10:25 AM Thomas Hellström (VMware)
> <thomas_os@shipmail.org> wrote:
>> Hi, Gerd,
>>
>>
>>
>>    #define to_drm_gem_shmem_obj(obj) \
>> diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
>> index a421a2eed48a..aad9324dcf4f 100644
>> --- a/drivers/gpu/drm/drm_gem_shmem_helper.c
>> +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
>> @@ -254,11 +254,16 @@ static void *drm_gem_shmem_vmap_locked(struct drm_gem_shmem_object *shmem)
>>        if (ret)
>>                goto err_zero_use;
>>
>> -     if (obj->import_attach)
>> +     if (obj->import_attach) {
>>                shmem->vaddr = dma_buf_vmap(obj->import_attach->dmabuf);
>> -     else
>> +     } else {
>> +             pgprot_t prot = PAGE_KERNEL;
>> +
>> +             if (!shmem->map_cached)
>> +                     prot = pgprot_writecombine(prot);
>>                shmem->vaddr = vmap(shmem->pages, obj->size >> PAGE_SHIFT,
>> -                                 VM_MAP, pgprot_writecombine(PAGE_KERNEL));
>> +                                 VM_MAP, prot)
>>
>> Wouldn't a vmap with pgprot_writecombine() create conflicting mappings
>> with the linear kernel map which is not write-combined? Or do you change
>> the linear kernel map of the shmem pages somewhere? vmap bypassess at
>> least the x86 PAT core mapping consistency check and this could
>> potentially cause spuriously overwritten memory.
> Yeah, I think this creates a conflicting alias.  It seems a call to
> set_pages_array_wc here or changes elsewhere is needed..
>
> But this is a pre-existing issue in the shmem helper.  There is also
> no universal fix (e.g., set_pages_array_wc is x86 only)?  I would hope
> this series can be merged sooner to fix the regression first.

The problem is this isn't doable with shmem, since that would create 
interesting problems when pages are swapped out.

So I would argue that the correct fix is to revert commit 0be895893607f 
("drm/shmem: switch shmem helper to &drm_gem_object_funcs.mmap")

If some drivers can argue that in their particular environment it's safe 
to use writecombine in this way, then modifying the page protection 
should be moved out to those drivers documenting that assumption. Using 
pgprot_decrypted() in this way could never be safe.

But IMO this should never be part of generic helpers.

/Thomas
Gerd Hoffmann Feb. 27, 2020, 7:53 a.m. UTC | #5
Hi,

> > +		if (!shmem->map_cached)
> > +			prot = pgprot_writecombine(prot);
> >   		shmem->vaddr = vmap(shmem->pages, obj->size >> PAGE_SHIFT,
> > -				    VM_MAP, pgprot_writecombine(PAGE_KERNEL));
> > +				    VM_MAP, prot)
> 
> 
> Wouldn't a vmap with pgprot_writecombine() create conflicting mappings with
> the linear kernel map which is not write-combined?

I think so, yes.

> Or do you change the linear kernel map of the shmem pages somewhere?

Havn't seen anything doing so while browsing the code.

> vmap bypassess at least the
> x86 PAT core mapping consistency check and this could potentially cause
> spuriously overwritten memory.

Well, I don't think the linear kernel map is ever used to access the
shmem gem objects.  So while this isn't exactly clean it shouldn't
cause problems in practice.

Suggestions how to fix that?

The reason I need cachable gem object mappings for virtio-gpu is because
we have a inconsistency between host (cached) and guest (wc) otherwise.

> > +	}
> >   	if (!shmem->vaddr) {
> >   		DRM_DEBUG_KMS("Failed to vmap pages\n");
> > @@ -540,7 +545,9 @@ int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
> >   	}
> >   	vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND;
> > -	vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
> > +	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
> > +	if (!shmem->map_cached)
> > +		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
> 
> Same thing here. Note that vmf_insert_page() which is used by the fault
> handler also bypasses the x86 PAT  consistency check, whereas
> vmf_insert_mixed() doesn't.

vmap + mmap are consistent though, so this likewise shouldn't cause
issues in practice.

> >   	vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
> 
> At least with SME or SEV encryption, where shmem memory has its kernel map
> set to encrypted, creating conflicting mappings is explicitly disallowed.
> BTW, why is mmap mapping decrypted while vmap isn't?

Ok, that sounds like a real problem.  Have to check.

cheers,
  Gerd

PS: Given we are discussing pre-existing issues in the code I think the
    series can be merged nevertheless.
Thomas Hellström (Intel) Feb. 27, 2020, 8:10 a.m. UTC | #6
On 2/27/20 8:53 AM, Gerd Hoffmann wrote:
>    Hi,
>
>>> +		if (!shmem->map_cached)
>>> +			prot = pgprot_writecombine(prot);
>>>    		shmem->vaddr = vmap(shmem->pages, obj->size >> PAGE_SHIFT,
>>> -				    VM_MAP, pgprot_writecombine(PAGE_KERNEL));
>>> +				    VM_MAP, prot)
>>
>> Wouldn't a vmap with pgprot_writecombine() create conflicting mappings with
>> the linear kernel map which is not write-combined?
> I think so, yes.
>
>> Or do you change the linear kernel map of the shmem pages somewhere?
> Havn't seen anything doing so while browsing the code.
>
>> vmap bypassess at least the
>> x86 PAT core mapping consistency check and this could potentially cause
>> spuriously overwritten memory.
> Well, I don't think the linear kernel map is ever used to access the
> shmem gem objects.  So while this isn't exactly clean it shouldn't
> cause problems in practice.
>
> Suggestions how to fix that?
>
So this has historically caused problems since the linear kernel map has 
been accessed while prefetching, even if it's never used. Some 
processors like AMD athlon actually even wrote back the prefetched 
contents without ever using it.

Also the linear kernel map could be cached somewhere because of the 
page's previous usage. (hibernation  for example?)

I think it might be safe for some integrated graphics where the driver 
maintainers can guarantee that it's safe on all particular processors 
used with that driver, but then IMO it should be moved out to those drivers.

Other drivers needing write-combine shouldn't really use shmem.

So again, to fix the regression, could we revert 0be895893607f 
("drm/shmem: switch shmem helper to &drm_gem_object_funcs.mmap") or does 
that have other implications?

/Thomas
Gerd Hoffmann Feb. 27, 2020, 10:56 a.m. UTC | #7
Hi,

> I think it might be safe for some integrated graphics where the driver
> maintainers can guarantee that it's safe on all particular processors used
> with that driver, but then IMO it should be moved out to those drivers.
> 
> Other drivers needing write-combine shouldn't really use shmem.
> 
> So again, to fix the regression, could we revert 0be895893607f ("drm/shmem:
> switch shmem helper to &drm_gem_object_funcs.mmap") or does that have other
> implications?

This patch isn't a regression.  The old code path has the
pgprot_writecombine() call in drm_gem_mmap_obj(), so the behavior
is the same before and afterwards.

But with the patch in place we can easily have shmem helpers do their
own thing instead of depending on whatever drm_gem_mmap_obj() is doing.
Just using cached mappings unconditionally would be perfectly fine for
virtio-gpu.

Not sure about the other users though.  I'd like to fix the virtio-gpu
regression (coming from ttm -> shmem switch) asap, and I don't feel like
changing the behavior for other drivers in 5.6-rc is a good idea.

So I'd like to push patches 1+2 to -fixes and sort everything else later
in -next.  OK?

cheers,
  Gerd
Thomas Hellström (Intel) Feb. 27, 2020, 12:16 p.m. UTC | #8
On 2/27/20 11:56 AM, Gerd Hoffmann wrote:
>    Hi,
>
>> I think it might be safe for some integrated graphics where the driver
>> maintainers can guarantee that it's safe on all particular processors used
>> with that driver, but then IMO it should be moved out to those drivers.
>>
>> Other drivers needing write-combine shouldn't really use shmem.
>>
>> So again, to fix the regression, could we revert 0be895893607f ("drm/shmem:
>> switch shmem helper to &drm_gem_object_funcs.mmap") or does that have other
>> implications?
> This patch isn't a regression.  The old code path has the
> pgprot_writecombine() call in drm_gem_mmap_obj(), so the behavior
> is the same before and afterwards.

OK. I wasn't checking where this all came from from the start...

> But with the patch in place we can easily have shmem helpers do their
> own thing instead of depending on whatever drm_gem_mmap_obj() is doing.
> Just using cached mappings unconditionally would be perfectly fine for
> virtio-gpu.
>
> Not sure about the other users though.  I'd like to fix the virtio-gpu
> regression (coming from ttm -> shmem switch) asap, and I don't feel like
> changing the behavior for other drivers in 5.6-rc is a good idea.
>
> So I'd like to push patches 1+2 to -fixes and sort everything else later
> in -next.  OK?

OK with me. Do we have any idea what drivers are actually using 
write-combine and decrypted?

/Thomas



>
> cheers,
>    Gerd
Gerd Hoffmann Feb. 27, 2020, 1:21 p.m. UTC | #9
Hi,

> > So I'd like to push patches 1+2 to -fixes and sort everything else later
> > in -next.  OK?
> 
> OK with me.

Done.

>> [ context: why shmem helpers use pgprot_writecombine + pgprot_decrypted?
>>            we get conflicting mappings because of that, linear kernel
>>            map vs. gem object vmap/mmap ]

> Do we have any idea what drivers are actually using
> write-combine and decrypted?

drivers/gpu/drm# find -name Kconfig* -print | xargs grep -l DRM_GEM_SHMEM_HELPER
./lima/Kconfig
./tiny/Kconfig
./cirrus/Kconfig
./Kconfig
./panfrost/Kconfig
./udl/Kconfig
./v3d/Kconfig
./virtio/Kconfig

virtio needs cached.
cirrus+udl should be ok with cached too.

Not clue about the others (lima, tiny, panfrost, v3d).  Maybe they use
write-combine just because this is what they got by default from
drm_gem_mmap_obj().  Maybe they actually need that.  Trying to Cc:
maintainters (and drop stable@).

On decrypted: I guess that is only relevant for virtual machines, i.e.
virtio-gpu and cirrus?

virtio-gpu needs it, otherwise the host can't show the virtual display.
cirrus bounces everything via blits to vram, so it should be ok without
decrypted.  I guess that implies we should make decrypted configurable.

cheers,
  Gerd
Thomas Hellström (Intel) Feb. 27, 2020, 1:44 p.m. UTC | #10
Hi,

On 2/27/20 2:21 PM, Gerd Hoffmann wrote:
>    Hi,
>
>>> So I'd like to push patches 1+2 to -fixes and sort everything else later
>>> in -next.  OK?
>> OK with me.
> Done.
>
>>> [ context: why shmem helpers use pgprot_writecombine + pgprot_decrypted?
>>>             we get conflicting mappings because of that, linear kernel
>>>             map vs. gem object vmap/mmap ]
>> Do we have any idea what drivers are actually using
>> write-combine and decrypted?
> drivers/gpu/drm# find -name Kconfig* -print | xargs grep -l DRM_GEM_SHMEM_HELPER
> ./lima/Kconfig
> ./tiny/Kconfig
> ./cirrus/Kconfig
> ./Kconfig
> ./panfrost/Kconfig
> ./udl/Kconfig
> ./v3d/Kconfig
> ./virtio/Kconfig
>
> virtio needs cached.
> cirrus+udl should be ok with cached too.
>
> Not clue about the others (lima, tiny, panfrost, v3d).  Maybe they use
> write-combine just because this is what they got by default from
> drm_gem_mmap_obj().  Maybe they actually need that.  Trying to Cc:
> maintainters (and drop stable@).
>
> On decrypted: I guess that is only relevant for virtual machines, i.e.
> virtio-gpu and cirrus?
>
> virtio-gpu needs it, otherwise the host can't show the virtual display.
> cirrus bounces everything via blits to vram, so it should be ok without
> decrypted.  I guess that implies we should make decrypted configurable.

Decrypted here is clearly incorrect and violates the SEV spec, 
regardless of a config option.
The only correct way is currently to use dma_alloc_coherent() and 
mmap_coherent() to allocate decrypted memory and then use the 
pgprot_decrypted flag.

Since the same page is aliased with two physical addresses (one 
encrypted and one decrypted) switching memory from one to the other 
needs extensive handling even to flush stale vmaps...

So if virtio-gpu needs it for some bos, it should move away from shmem 
for those bos.

/Thomas



>
> cheers,
>    Gerd
Thomas Hellström (Intel) Feb. 27, 2020, 1:49 p.m. UTC | #11
On 2/27/20 2:44 PM, Thomas Hellström (VMware) wrote:
> Hi,
>
> On 2/27/20 2:21 PM, Gerd Hoffmann wrote:
>>    Hi,
>>
>>>> So I'd like to push patches 1+2 to -fixes and sort everything else 
>>>> later
>>>> in -next.  OK?
>>> OK with me.
>> Done.
>>
>>>> [ context: why shmem helpers use pgprot_writecombine + 
>>>> pgprot_decrypted?
>>>>             we get conflicting mappings because of that, linear kernel
>>>>             map vs. gem object vmap/mmap ]
>>> Do we have any idea what drivers are actually using
>>> write-combine and decrypted?
>> drivers/gpu/drm# find -name Kconfig* -print | xargs grep -l 
>> DRM_GEM_SHMEM_HELPER
>> ./lima/Kconfig
>> ./tiny/Kconfig
>> ./cirrus/Kconfig
>> ./Kconfig
>> ./panfrost/Kconfig
>> ./udl/Kconfig
>> ./v3d/Kconfig
>> ./virtio/Kconfig
>>
>> virtio needs cached.
>> cirrus+udl should be ok with cached too.
>>
>> Not clue about the others (lima, tiny, panfrost, v3d).  Maybe they use
>> write-combine just because this is what they got by default from
>> drm_gem_mmap_obj().  Maybe they actually need that.  Trying to Cc:
>> maintainters (and drop stable@).
>>
>> On decrypted: I guess that is only relevant for virtual machines, i.e.
>> virtio-gpu and cirrus?
>>
>> virtio-gpu needs it, otherwise the host can't show the virtual display.
>> cirrus bounces everything via blits to vram, so it should be ok without
>> decrypted.  I guess that implies we should make decrypted configurable.
>
> Decrypted here is clearly incorrect and violates the SEV spec, 
> regardless of a config option.
> The only correct way is currently to use dma_alloc_coherent() and 
> mmap_coherent() to allocate decrypted memory and then use the 
> pgprot_decrypted flag.
>
> Since the same page is aliased with two physical addresses (one 
> encrypted and one decrypted) switching memory from one to the other 
> needs extensive handling even to flush stale vmaps...
>
> So if virtio-gpu needs it for some bos, it should move away from shmem 
> for those bos.

(But this is of course up to the virtio-gpu and drm maintainers), but 
IMO, again, pgprot_decrypted() shouldn't be part of generic helpers.

/Thomas
Gerd Hoffmann Feb. 28, 2020, 9:49 a.m. UTC | #12
Hi,

> > Not clue about the others (lima, tiny, panfrost, v3d).  Maybe they use
> > write-combine just because this is what they got by default from
> > drm_gem_mmap_obj().  Maybe they actually need that.  Trying to Cc:
> > maintainters (and drop stable@).

> > virtio-gpu needs it, otherwise the host can't show the virtual display.
> > cirrus bounces everything via blits to vram, so it should be ok without
> > decrypted.  I guess that implies we should make decrypted configurable.
> 
> Decrypted here is clearly incorrect and violates the SEV spec, regardless of
> a config option.
> 
> The only correct way is currently to use dma_alloc_coherent() and
> mmap_coherent() to allocate decrypted memory and then use the
> pgprot_decrypted flag.

Hmm, virtio-gpu uses the dma api to allow the host access the gem
object.  So I think I have to correct the statement above, if I
understands things correctly the dma api will use (properly allocated)
decrypted bounce buffers and the virtio-gpu shmem objects don't need
pgprot_decrypted mappings.

That leaves the question what to do about pgprot_writecombine().  Any
comments from the driver maintainers (see first paragraph)?

cheers,
  Gerd
Thomas Hellström (Intel) Feb. 28, 2020, 9:54 a.m. UTC | #13
On 2/28/20 10:49 AM, Gerd Hoffmann wrote:
>    Hi,
>
>>> Not clue about the others (lima, tiny, panfrost, v3d).  Maybe they use
>>> write-combine just because this is what they got by default from
>>> drm_gem_mmap_obj().  Maybe they actually need that.  Trying to Cc:
>>> maintainters (and drop stable@).
>>> virtio-gpu needs it, otherwise the host can't show the virtual display.
>>> cirrus bounces everything via blits to vram, so it should be ok without
>>> decrypted.  I guess that implies we should make decrypted configurable.
>> Decrypted here is clearly incorrect and violates the SEV spec, regardless of
>> a config option.
>>
>> The only correct way is currently to use dma_alloc_coherent() and
>> mmap_coherent() to allocate decrypted memory and then use the
>> pgprot_decrypted flag.
> Hmm, virtio-gpu uses the dma api to allow the host access the gem
> object.  So I think I have to correct the statement above, if I
> understands things correctly the dma api will use (properly allocated)
> decrypted bounce buffers and the virtio-gpu shmem objects don't need
> pgprot_decrypted mappings.

Yes, that sounds more correct. I wonder whether the "pgprot_decrypted()" 
perhaps remains from mapping VRAM gem buffers...

/Thomas


>
> That leaves the question what to do about pgprot_writecombine().  Any
> comments from the driver maintainers (see first paragraph)?
>
> cheers,
>    Gerd
Gerd Hoffmann Feb. 28, 2020, 10:46 a.m. UTC | #14
On Fri, Feb 28, 2020 at 10:54:54AM +0100, Thomas Hellström (VMware) wrote:
> On 2/28/20 10:49 AM, Gerd Hoffmann wrote:
> >    Hi,
> > 
> > > > Not clue about the others (lima, tiny, panfrost, v3d).  Maybe they use
> > > > write-combine just because this is what they got by default from
> > > > drm_gem_mmap_obj().  Maybe they actually need that.  Trying to Cc:
> > > > maintainters (and drop stable@).
> > > > virtio-gpu needs it, otherwise the host can't show the virtual display.
> > > > cirrus bounces everything via blits to vram, so it should be ok without
> > > > decrypted.  I guess that implies we should make decrypted configurable.
> > > Decrypted here is clearly incorrect and violates the SEV spec, regardless of
> > > a config option.
> > > 
> > > The only correct way is currently to use dma_alloc_coherent() and
> > > mmap_coherent() to allocate decrypted memory and then use the
> > > pgprot_decrypted flag.
> > Hmm, virtio-gpu uses the dma api to allow the host access the gem
> > object.  So I think I have to correct the statement above, if I
> > understands things correctly the dma api will use (properly allocated)
> > decrypted bounce buffers and the virtio-gpu shmem objects don't need
> > pgprot_decrypted mappings.
> 
> Yes, that sounds more correct. I wonder whether the "pgprot_decrypted()"
> perhaps remains from mapping VRAM gem buffers...

Commit 95cf9264d5f3 ("x86, drm, fbdev: Do not specify encrypted memory
for video mappings") added it, then it was kept through various changes.

cheers,
  Gerd
Qiang Yu March 2, 2020, 1:56 a.m. UTC | #15
On Thu, Feb 27, 2020 at 9:21 PM Gerd Hoffmann <kraxel@redhat.com> wrote:
>
>   Hi,
>
> > > So I'd like to push patches 1+2 to -fixes and sort everything else later
> > > in -next.  OK?
> >
> > OK with me.
>
> Done.
>
> >> [ context: why shmem helpers use pgprot_writecombine + pgprot_decrypted?
> >>            we get conflicting mappings because of that, linear kernel
> >>            map vs. gem object vmap/mmap ]
>
> > Do we have any idea what drivers are actually using
> > write-combine and decrypted?
>
> drivers/gpu/drm# find -name Kconfig* -print | xargs grep -l DRM_GEM_SHMEM_HELPER
> ./lima/Kconfig
> ./tiny/Kconfig
> ./cirrus/Kconfig
> ./Kconfig
> ./panfrost/Kconfig
> ./udl/Kconfig
> ./v3d/Kconfig
> ./virtio/Kconfig
>
> virtio needs cached.
> cirrus+udl should be ok with cached too.
>
> Not clue about the others (lima, tiny, panfrost, v3d).  Maybe they use
> write-combine just because this is what they got by default from
> drm_gem_mmap_obj().  Maybe they actually need that.  Trying to Cc:
> maintainters (and drop stable@).
>
lima driver needs writecombine mapped buffer for GPU hardware
working properly due to CPU-GPU not cache coherent. Although we
haven't meet problems caused by kernel/user map conflict, but I
do admit it's a problem as I met with amdgpu+arm before.

With TTM we can control page allocated and create a pool for
writecombine pages, but seems shmem is not friendly with
writecombine pages.

Thanks,
Qiang
diff mbox series

Patch

diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h
index e34a7b7f848a..294b2931c4cc 100644
--- a/include/drm/drm_gem_shmem_helper.h
+++ b/include/drm/drm_gem_shmem_helper.h
@@ -96,6 +96,11 @@  struct drm_gem_shmem_object {
 	 * The address are un-mapped when the count reaches zero.
 	 */
 	unsigned int vmap_use_count;
+
+	/**
+	 * @map_cached: map object cached (instead of using writecombine).
+	 */
+	bool map_cached;
 };
 
 #define to_drm_gem_shmem_obj(obj) \
diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
index a421a2eed48a..aad9324dcf4f 100644
--- a/drivers/gpu/drm/drm_gem_shmem_helper.c
+++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
@@ -254,11 +254,16 @@  static void *drm_gem_shmem_vmap_locked(struct drm_gem_shmem_object *shmem)
 	if (ret)
 		goto err_zero_use;
 
-	if (obj->import_attach)
+	if (obj->import_attach) {
 		shmem->vaddr = dma_buf_vmap(obj->import_attach->dmabuf);
-	else
+	} else {
+		pgprot_t prot = PAGE_KERNEL;
+
+		if (!shmem->map_cached)
+			prot = pgprot_writecombine(prot);
 		shmem->vaddr = vmap(shmem->pages, obj->size >> PAGE_SHIFT,
-				    VM_MAP, pgprot_writecombine(PAGE_KERNEL));
+				    VM_MAP, prot);
+	}
 
 	if (!shmem->vaddr) {
 		DRM_DEBUG_KMS("Failed to vmap pages\n");
@@ -540,7 +545,9 @@  int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
 	}
 
 	vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND;
-	vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
+	if (!shmem->map_cached)
+		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
 	vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
 	vma->vm_ops = &drm_gem_shmem_vm_ops;