drm/vgem: fix cache synchronization on arm/arm64
diff mbox series

Message ID 20190717211542.30482-1-robdclark@gmail.com
State New
Headers show
Series
  • drm/vgem: fix cache synchronization on arm/arm64
Related show

Commit Message

Rob Clark July 17, 2019, 9:15 p.m. UTC
From: Rob Clark <robdclark@chromium.org>

drm_cflush_pages() is no-op on arm/arm64.  But instead we can use
dma_sync API.

Fixes failures w/ vgem_test.

Signed-off-by: Rob Clark <robdclark@chromium.org>
---
An alternative approach to the series[1] I sent yesterday

On the plus side, it keeps the WC buffers and avoids any drm core
changes.  On the minus side, I don't think it will work (at least
on arm64) prior to v5.0[2], so the fix can't be backported very
far.

[1] https://patchwork.freedesktop.org/series/63771/
[2] depends on 356da6d0cde3323236977fce54c1f9612a742036

 drivers/gpu/drm/vgem/vgem_drv.c | 130 ++++++++++++++++++++------------
 1 file changed, 83 insertions(+), 47 deletions(-)

Comments

Daniel Vetter July 19, 2019, 9:21 a.m. UTC | #1
On Wed, Jul 17, 2019 at 02:15:37PM -0700, Rob Clark wrote:
> From: Rob Clark <robdclark@chromium.org>
> 
> drm_cflush_pages() is no-op on arm/arm64.  But instead we can use
> dma_sync API.
> 
> Fixes failures w/ vgem_test.
> 
> Signed-off-by: Rob Clark <robdclark@chromium.org>
> ---
> An alternative approach to the series[1] I sent yesterday
> 
> On the plus side, it keeps the WC buffers and avoids any drm core
> changes.  On the minus side, I don't think it will work (at least
> on arm64) prior to v5.0[2], so the fix can't be backported very
> far.

Yeah seems a lot more reasonable.

Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> 
> [1] https://patchwork.freedesktop.org/series/63771/
> [2] depends on 356da6d0cde3323236977fce54c1f9612a742036
> 
>  drivers/gpu/drm/vgem/vgem_drv.c | 130 ++++++++++++++++++++------------
>  1 file changed, 83 insertions(+), 47 deletions(-)
> 
> diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
> index 76d95b5e289c..6c9b5e20b3d4 100644
> --- a/drivers/gpu/drm/vgem/vgem_drv.c
> +++ b/drivers/gpu/drm/vgem/vgem_drv.c
> @@ -47,10 +47,16 @@ static struct vgem_device {
>  	struct platform_device *platform;
>  } *vgem_device;
>  
> +static void sync_and_unpin(struct drm_vgem_gem_object *bo);
> +static struct page **pin_and_sync(struct drm_vgem_gem_object *bo);
> +
>  static void vgem_gem_free_object(struct drm_gem_object *obj)
>  {
>  	struct drm_vgem_gem_object *vgem_obj = to_vgem_bo(obj);
>  
> +	if (!obj->import_attach)
> +		sync_and_unpin(vgem_obj);
> +
>  	kvfree(vgem_obj->pages);
>  	mutex_destroy(&vgem_obj->pages_lock);
>  
> @@ -78,40 +84,15 @@ static vm_fault_t vgem_gem_fault(struct vm_fault *vmf)
>  		return VM_FAULT_SIGBUS;
>  
>  	mutex_lock(&obj->pages_lock);
> +	if (!obj->pages)
> +		pin_and_sync(obj);
>  	if (obj->pages) {
>  		get_page(obj->pages[page_offset]);
>  		vmf->page = obj->pages[page_offset];
>  		ret = 0;
>  	}
>  	mutex_unlock(&obj->pages_lock);
> -	if (ret) {
> -		struct page *page;
> -
> -		page = shmem_read_mapping_page(
> -					file_inode(obj->base.filp)->i_mapping,
> -					page_offset);
> -		if (!IS_ERR(page)) {
> -			vmf->page = page;
> -			ret = 0;
> -		} else switch (PTR_ERR(page)) {
> -			case -ENOSPC:
> -			case -ENOMEM:
> -				ret = VM_FAULT_OOM;
> -				break;
> -			case -EBUSY:
> -				ret = VM_FAULT_RETRY;
> -				break;
> -			case -EFAULT:
> -			case -EINVAL:
> -				ret = VM_FAULT_SIGBUS;
> -				break;
> -			default:
> -				WARN_ON(PTR_ERR(page));
> -				ret = VM_FAULT_SIGBUS;
> -				break;
> -		}
>  
> -	}
>  	return ret;
>  }
>  
> @@ -277,32 +258,93 @@ static const struct file_operations vgem_driver_fops = {
>  	.release	= drm_release,
>  };
>  
> -static struct page **vgem_pin_pages(struct drm_vgem_gem_object *bo)
> +/* Called under pages_lock, except in free path (where it can't race): */
> +static void sync_and_unpin(struct drm_vgem_gem_object *bo)
>  {
> -	mutex_lock(&bo->pages_lock);
> -	if (bo->pages_pin_count++ == 0) {
> -		struct page **pages;
> +	struct drm_device *dev = bo->base.dev;
> +
> +	if (bo->table) {
> +		dma_sync_sg_for_cpu(dev->dev, bo->table->sgl,
> +				bo->table->nents, DMA_BIDIRECTIONAL);
> +		sg_free_table(bo->table);
> +		kfree(bo->table);
> +		bo->table = NULL;
> +	}
> +
> +	if (bo->pages) {
> +		drm_gem_put_pages(&bo->base, bo->pages, true, true);
> +		bo->pages = NULL;
> +	}
> +}
> +
> +static struct page **pin_and_sync(struct drm_vgem_gem_object *bo)
> +{
> +	struct drm_device *dev = bo->base.dev;
> +	int npages = bo->base.size >> PAGE_SHIFT;
> +	struct page **pages;
> +	struct sg_table *sgt;
> +
> +	WARN_ON(!mutex_is_locked(&bo->pages_lock));
> +
> +	pages = drm_gem_get_pages(&bo->base);
> +	if (IS_ERR(pages)) {
> +		bo->pages_pin_count--;
> +		mutex_unlock(&bo->pages_lock);
> +		return pages;
> +	}
>  
> -		pages = drm_gem_get_pages(&bo->base);
> -		if (IS_ERR(pages)) {
> -			bo->pages_pin_count--;
> -			mutex_unlock(&bo->pages_lock);
> -			return pages;
> -		}
> +	sgt = drm_prime_pages_to_sg(pages, npages);
> +	if (IS_ERR(sgt)) {
> +		dev_err(dev->dev,
> +			"failed to allocate sgt: %ld\n",
> +			PTR_ERR(bo->table));
> +		drm_gem_put_pages(&bo->base, pages, false, false);
> +		mutex_unlock(&bo->pages_lock);
> +		return ERR_CAST(bo->table);
> +	}
> +
> +	/*
> +	 * Flush the object from the CPU cache so that importers
> +	 * can rely on coherent indirect access via the exported
> +	 * dma-address.
> +	 */
> +	dma_sync_sg_for_device(dev->dev, sgt->sgl,
> +			sgt->nents, DMA_BIDIRECTIONAL);
> +
> +	bo->pages = pages;
> +	bo->table = sgt;
> +
> +	return pages;
> +}
> +
> +static struct page **vgem_pin_pages(struct drm_vgem_gem_object *bo)
> +{
> +	struct page **pages;
>  
> -		bo->pages = pages;
> +	mutex_lock(&bo->pages_lock);
> +	if (bo->pages_pin_count++ == 0 && !bo->pages) {
> +		pages = pin_and_sync(bo);
> +	} else {
> +		WARN_ON(!bo->pages);
> +		pages = bo->pages;
>  	}
>  	mutex_unlock(&bo->pages_lock);
>  
> -	return bo->pages;
> +	return pages;
>  }
>  
>  static void vgem_unpin_pages(struct drm_vgem_gem_object *bo)
>  {
> +	/*
> +	 * We shouldn't hit this for imported bo's.. in the import
> +	 * case we don't own the scatter-table
> +	 */
> +	WARN_ON(bo->base.import_attach);
> +
>  	mutex_lock(&bo->pages_lock);
>  	if (--bo->pages_pin_count == 0) {
> -		drm_gem_put_pages(&bo->base, bo->pages, true, true);
> -		bo->pages = NULL;
> +		WARN_ON(!bo->table);
> +		sync_and_unpin(bo);
>  	}
>  	mutex_unlock(&bo->pages_lock);
>  }
> @@ -310,18 +352,12 @@ static void vgem_unpin_pages(struct drm_vgem_gem_object *bo)
>  static int vgem_prime_pin(struct drm_gem_object *obj)
>  {
>  	struct drm_vgem_gem_object *bo = to_vgem_bo(obj);
> -	long n_pages = obj->size >> PAGE_SHIFT;
>  	struct page **pages;
>  
>  	pages = vgem_pin_pages(bo);
>  	if (IS_ERR(pages))
>  		return PTR_ERR(pages);
>  
> -	/* Flush the object from the CPU cache so that importers can rely
> -	 * on coherent indirect access via the exported dma-address.
> -	 */
> -	drm_clflush_pages(pages, n_pages);
> -
>  	return 0;
>  }
>  
> -- 
> 2.21.0
>
Sean Paul July 31, 2019, 7:23 p.m. UTC | #2
On Fri, Jul 19, 2019 at 11:21:53AM +0200, Daniel Vetter wrote:
> On Wed, Jul 17, 2019 at 02:15:37PM -0700, Rob Clark wrote:
> > From: Rob Clark <robdclark@chromium.org>
> > 
> > drm_cflush_pages() is no-op on arm/arm64.  But instead we can use
> > dma_sync API.
> > 
> > Fixes failures w/ vgem_test.
> > 
> > Signed-off-by: Rob Clark <robdclark@chromium.org>
> > ---
> > An alternative approach to the series[1] I sent yesterday
> > 
> > On the plus side, it keeps the WC buffers and avoids any drm core
> > changes.  On the minus side, I don't think it will work (at least
> > on arm64) prior to v5.0[2], so the fix can't be backported very
> > far.
> 
> Yeah seems a lot more reasonable.
> 
> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>

Applied to drm-misc-fixes, thanks!

Sean

> > 
> > [1] https://patchwork.freedesktop.org/series/63771/
> > [2] depends on 356da6d0cde3323236977fce54c1f9612a742036
> > 
> >  drivers/gpu/drm/vgem/vgem_drv.c | 130 ++++++++++++++++++++------------
> >  1 file changed, 83 insertions(+), 47 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
> > index 76d95b5e289c..6c9b5e20b3d4 100644
> > --- a/drivers/gpu/drm/vgem/vgem_drv.c
> > +++ b/drivers/gpu/drm/vgem/vgem_drv.c
> > @@ -47,10 +47,16 @@ static struct vgem_device {
> >  	struct platform_device *platform;
> >  } *vgem_device;
> >  
> > +static void sync_and_unpin(struct drm_vgem_gem_object *bo);
> > +static struct page **pin_and_sync(struct drm_vgem_gem_object *bo);
> > +
> >  static void vgem_gem_free_object(struct drm_gem_object *obj)
> >  {
> >  	struct drm_vgem_gem_object *vgem_obj = to_vgem_bo(obj);
> >  
> > +	if (!obj->import_attach)
> > +		sync_and_unpin(vgem_obj);
> > +
> >  	kvfree(vgem_obj->pages);
> >  	mutex_destroy(&vgem_obj->pages_lock);
> >  
> > @@ -78,40 +84,15 @@ static vm_fault_t vgem_gem_fault(struct vm_fault *vmf)
> >  		return VM_FAULT_SIGBUS;
> >  
> >  	mutex_lock(&obj->pages_lock);
> > +	if (!obj->pages)
> > +		pin_and_sync(obj);
> >  	if (obj->pages) {
> >  		get_page(obj->pages[page_offset]);
> >  		vmf->page = obj->pages[page_offset];
> >  		ret = 0;
> >  	}
> >  	mutex_unlock(&obj->pages_lock);
> > -	if (ret) {
> > -		struct page *page;
> > -
> > -		page = shmem_read_mapping_page(
> > -					file_inode(obj->base.filp)->i_mapping,
> > -					page_offset);
> > -		if (!IS_ERR(page)) {
> > -			vmf->page = page;
> > -			ret = 0;
> > -		} else switch (PTR_ERR(page)) {
> > -			case -ENOSPC:
> > -			case -ENOMEM:
> > -				ret = VM_FAULT_OOM;
> > -				break;
> > -			case -EBUSY:
> > -				ret = VM_FAULT_RETRY;
> > -				break;
> > -			case -EFAULT:
> > -			case -EINVAL:
> > -				ret = VM_FAULT_SIGBUS;
> > -				break;
> > -			default:
> > -				WARN_ON(PTR_ERR(page));
> > -				ret = VM_FAULT_SIGBUS;
> > -				break;
> > -		}
> >  
> > -	}
> >  	return ret;
> >  }
> >  
> > @@ -277,32 +258,93 @@ static const struct file_operations vgem_driver_fops = {
> >  	.release	= drm_release,
> >  };
> >  
> > -static struct page **vgem_pin_pages(struct drm_vgem_gem_object *bo)
> > +/* Called under pages_lock, except in free path (where it can't race): */
> > +static void sync_and_unpin(struct drm_vgem_gem_object *bo)
> >  {
> > -	mutex_lock(&bo->pages_lock);
> > -	if (bo->pages_pin_count++ == 0) {
> > -		struct page **pages;
> > +	struct drm_device *dev = bo->base.dev;
> > +
> > +	if (bo->table) {
> > +		dma_sync_sg_for_cpu(dev->dev, bo->table->sgl,
> > +				bo->table->nents, DMA_BIDIRECTIONAL);
> > +		sg_free_table(bo->table);
> > +		kfree(bo->table);
> > +		bo->table = NULL;
> > +	}
> > +
> > +	if (bo->pages) {
> > +		drm_gem_put_pages(&bo->base, bo->pages, true, true);
> > +		bo->pages = NULL;
> > +	}
> > +}
> > +
> > +static struct page **pin_and_sync(struct drm_vgem_gem_object *bo)
> > +{
> > +	struct drm_device *dev = bo->base.dev;
> > +	int npages = bo->base.size >> PAGE_SHIFT;
> > +	struct page **pages;
> > +	struct sg_table *sgt;
> > +
> > +	WARN_ON(!mutex_is_locked(&bo->pages_lock));
> > +
> > +	pages = drm_gem_get_pages(&bo->base);
> > +	if (IS_ERR(pages)) {
> > +		bo->pages_pin_count--;
> > +		mutex_unlock(&bo->pages_lock);
> > +		return pages;
> > +	}
> >  
> > -		pages = drm_gem_get_pages(&bo->base);
> > -		if (IS_ERR(pages)) {
> > -			bo->pages_pin_count--;
> > -			mutex_unlock(&bo->pages_lock);
> > -			return pages;
> > -		}
> > +	sgt = drm_prime_pages_to_sg(pages, npages);
> > +	if (IS_ERR(sgt)) {
> > +		dev_err(dev->dev,
> > +			"failed to allocate sgt: %ld\n",
> > +			PTR_ERR(bo->table));
> > +		drm_gem_put_pages(&bo->base, pages, false, false);
> > +		mutex_unlock(&bo->pages_lock);
> > +		return ERR_CAST(bo->table);
> > +	}
> > +
> > +	/*
> > +	 * Flush the object from the CPU cache so that importers
> > +	 * can rely on coherent indirect access via the exported
> > +	 * dma-address.
> > +	 */
> > +	dma_sync_sg_for_device(dev->dev, sgt->sgl,
> > +			sgt->nents, DMA_BIDIRECTIONAL);
> > +
> > +	bo->pages = pages;
> > +	bo->table = sgt;
> > +
> > +	return pages;
> > +}
> > +
> > +static struct page **vgem_pin_pages(struct drm_vgem_gem_object *bo)
> > +{
> > +	struct page **pages;
> >  
> > -		bo->pages = pages;
> > +	mutex_lock(&bo->pages_lock);
> > +	if (bo->pages_pin_count++ == 0 && !bo->pages) {
> > +		pages = pin_and_sync(bo);
> > +	} else {
> > +		WARN_ON(!bo->pages);
> > +		pages = bo->pages;
> >  	}
> >  	mutex_unlock(&bo->pages_lock);
> >  
> > -	return bo->pages;
> > +	return pages;
> >  }
> >  
> >  static void vgem_unpin_pages(struct drm_vgem_gem_object *bo)
> >  {
> > +	/*
> > +	 * We shouldn't hit this for imported bo's.. in the import
> > +	 * case we don't own the scatter-table
> > +	 */
> > +	WARN_ON(bo->base.import_attach);
> > +
> >  	mutex_lock(&bo->pages_lock);
> >  	if (--bo->pages_pin_count == 0) {
> > -		drm_gem_put_pages(&bo->base, bo->pages, true, true);
> > -		bo->pages = NULL;
> > +		WARN_ON(!bo->table);
> > +		sync_and_unpin(bo);
> >  	}
> >  	mutex_unlock(&bo->pages_lock);
> >  }
> > @@ -310,18 +352,12 @@ static void vgem_unpin_pages(struct drm_vgem_gem_object *bo)
> >  static int vgem_prime_pin(struct drm_gem_object *obj)
> >  {
> >  	struct drm_vgem_gem_object *bo = to_vgem_bo(obj);
> > -	long n_pages = obj->size >> PAGE_SHIFT;
> >  	struct page **pages;
> >  
> >  	pages = vgem_pin_pages(bo);
> >  	if (IS_ERR(pages))
> >  		return PTR_ERR(pages);
> >  
> > -	/* Flush the object from the CPU cache so that importers can rely
> > -	 * on coherent indirect access via the exported dma-address.
> > -	 */
> > -	drm_clflush_pages(pages, n_pages);
> > -
> >  	return 0;
> >  }
> >  
> > -- 
> > 2.21.0
> > 
> 
> -- 
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Chris Wilson Aug. 1, 2019, 12:40 p.m. UTC | #3
Quoting Sean Paul (2019-07-31 20:23:31)
> On Fri, Jul 19, 2019 at 11:21:53AM +0200, Daniel Vetter wrote:
> > On Wed, Jul 17, 2019 at 02:15:37PM -0700, Rob Clark wrote:
> > > From: Rob Clark <robdclark@chromium.org>
> > > 
> > > drm_cflush_pages() is no-op on arm/arm64.  But instead we can use
> > > dma_sync API.
> > > 
> > > Fixes failures w/ vgem_test.
> > > 
> > > Signed-off-by: Rob Clark <robdclark@chromium.org>
> > > ---
> > > An alternative approach to the series[1] I sent yesterday
> > > 
> > > On the plus side, it keeps the WC buffers and avoids any drm core
> > > changes.  On the minus side, I don't think it will work (at least
> > > on arm64) prior to v5.0[2], so the fix can't be backported very
> > > far.
> > 
> > Yeah seems a lot more reasonable.
> > 
> > Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> 
> Applied to drm-misc-fixes, thanks!

But it didn't actually fix the failures in CI.
-Chris
Rob Clark Aug. 1, 2019, 3:18 p.m. UTC | #4
On Thu, Aug 1, 2019 at 5:40 AM Chris Wilson <chris@chris-wilson.co.uk> wrote:
>
> Quoting Sean Paul (2019-07-31 20:23:31)
> > On Fri, Jul 19, 2019 at 11:21:53AM +0200, Daniel Vetter wrote:
> > > On Wed, Jul 17, 2019 at 02:15:37PM -0700, Rob Clark wrote:
> > > > From: Rob Clark <robdclark@chromium.org>
> > > >
> > > > drm_cflush_pages() is no-op on arm/arm64.  But instead we can use
> > > > dma_sync API.
> > > >
> > > > Fixes failures w/ vgem_test.
> > > >
> > > > Signed-off-by: Rob Clark <robdclark@chromium.org>
> > > > ---
> > > > An alternative approach to the series[1] I sent yesterday
> > > >
> > > > On the plus side, it keeps the WC buffers and avoids any drm core
> > > > changes.  On the minus side, I don't think it will work (at least
> > > > on arm64) prior to v5.0[2], so the fix can't be backported very
> > > > far.
> > >
> > > Yeah seems a lot more reasonable.
> > >
> > > Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> >
> > Applied to drm-misc-fixes, thanks!
>
> But it didn't actually fix the failures in CI.

Hmm, that is unfortunate, I'd assumed that silence meant latest
version was working in CI..

dma_sync_sg_* doesn't work on x86?  It would be kinda unfortunate to
have vgem only work on x86 *or* arm..  maybe bringing back
drm_cflush_pages() could make it work in both cases

BR,
-R
Chris Wilson Aug. 1, 2019, 3:34 p.m. UTC | #5
Quoting Rob Clark (2019-08-01 16:18:45)
> On Thu, Aug 1, 2019 at 5:40 AM Chris Wilson <chris@chris-wilson.co.uk> wrote:
> >
> > Quoting Sean Paul (2019-07-31 20:23:31)
> > > On Fri, Jul 19, 2019 at 11:21:53AM +0200, Daniel Vetter wrote:
> > > > On Wed, Jul 17, 2019 at 02:15:37PM -0700, Rob Clark wrote:
> > > > > From: Rob Clark <robdclark@chromium.org>
> > > > >
> > > > > drm_cflush_pages() is no-op on arm/arm64.  But instead we can use
> > > > > dma_sync API.
> > > > >
> > > > > Fixes failures w/ vgem_test.
> > > > >
> > > > > Signed-off-by: Rob Clark <robdclark@chromium.org>
> > > > > ---
> > > > > An alternative approach to the series[1] I sent yesterday
> > > > >
> > > > > On the plus side, it keeps the WC buffers and avoids any drm core
> > > > > changes.  On the minus side, I don't think it will work (at least
> > > > > on arm64) prior to v5.0[2], so the fix can't be backported very
> > > > > far.
> > > >
> > > > Yeah seems a lot more reasonable.
> > > >
> > > > Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> > >
> > > Applied to drm-misc-fixes, thanks!
> >
> > But it didn't actually fix the failures in CI.
> 
> Hmm, that is unfortunate, I'd assumed that silence meant latest
> version was working in CI..

Ah, takes a intel-gfx@ for CI to pick up patches atm.
 
> dma_sync_sg_* doesn't work on x86?  It would be kinda unfortunate to
> have vgem only work on x86 *or* arm..  maybe bringing back
> drm_cflush_pages() could make it work in both cases

I think it stems from the expectation that vgem provides "device
coherency" for CPU access. From the testing perspective, it's nice to
emulate HW interactions; but maybe that is just beyond the general
capabilities and we cannot simply use vgem as we do currently. That
would leave a hole for mocking prime in CI that needs filling :(
-Chris
Rob Clark Aug. 1, 2019, 3:57 p.m. UTC | #6
On Thu, Aug 1, 2019 at 8:34 AM Chris Wilson <chris@chris-wilson.co.uk> wrote:
>
> Quoting Rob Clark (2019-08-01 16:18:45)
> > On Thu, Aug 1, 2019 at 5:40 AM Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > >
> > > Quoting Sean Paul (2019-07-31 20:23:31)
> > > > On Fri, Jul 19, 2019 at 11:21:53AM +0200, Daniel Vetter wrote:
> > > > > On Wed, Jul 17, 2019 at 02:15:37PM -0700, Rob Clark wrote:
> > > > > > From: Rob Clark <robdclark@chromium.org>
> > > > > >
> > > > > > drm_cflush_pages() is no-op on arm/arm64.  But instead we can use
> > > > > > dma_sync API.
> > > > > >
> > > > > > Fixes failures w/ vgem_test.
> > > > > >
> > > > > > Signed-off-by: Rob Clark <robdclark@chromium.org>
> > > > > > ---
> > > > > > An alternative approach to the series[1] I sent yesterday
> > > > > >
> > > > > > On the plus side, it keeps the WC buffers and avoids any drm core
> > > > > > changes.  On the minus side, I don't think it will work (at least
> > > > > > on arm64) prior to v5.0[2], so the fix can't be backported very
> > > > > > far.
> > > > >
> > > > > Yeah seems a lot more reasonable.
> > > > >
> > > > > Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> > > >
> > > > Applied to drm-misc-fixes, thanks!
> > >
> > > But it didn't actually fix the failures in CI.
> >
> > Hmm, that is unfortunate, I'd assumed that silence meant latest
> > version was working in CI..
>
> Ah, takes a intel-gfx@ for CI to pick up patches atm.
>
> > dma_sync_sg_* doesn't work on x86?  It would be kinda unfortunate to
> > have vgem only work on x86 *or* arm..  maybe bringing back
> > drm_cflush_pages() could make it work in both cases
>
> I think it stems from the expectation that vgem provides "device
> coherency" for CPU access. From the testing perspective, it's nice to
> emulate HW interactions; but maybe that is just beyond the general
> capabilities and we cannot simply use vgem as we do currently. That
> would leave a hole for mocking prime in CI that needs filling :(

yeah, being a "fake" device makes things a bit rough..  (I wonder if
there is some way to do a VM w/ both virgl and i915/gvt to get some
more "real" testing?)

OTOH, I kinda want to make drm_cache work on arm64, since dma-mapping
is already problematic, which would make this patch unnecessary.  (I'm
still not entirely sure what to do about 32b arm..)

BR,
-R

Patch
diff mbox series

diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
index 76d95b5e289c..6c9b5e20b3d4 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.c
+++ b/drivers/gpu/drm/vgem/vgem_drv.c
@@ -47,10 +47,16 @@  static struct vgem_device {
 	struct platform_device *platform;
 } *vgem_device;
 
+static void sync_and_unpin(struct drm_vgem_gem_object *bo);
+static struct page **pin_and_sync(struct drm_vgem_gem_object *bo);
+
 static void vgem_gem_free_object(struct drm_gem_object *obj)
 {
 	struct drm_vgem_gem_object *vgem_obj = to_vgem_bo(obj);
 
+	if (!obj->import_attach)
+		sync_and_unpin(vgem_obj);
+
 	kvfree(vgem_obj->pages);
 	mutex_destroy(&vgem_obj->pages_lock);
 
@@ -78,40 +84,15 @@  static vm_fault_t vgem_gem_fault(struct vm_fault *vmf)
 		return VM_FAULT_SIGBUS;
 
 	mutex_lock(&obj->pages_lock);
+	if (!obj->pages)
+		pin_and_sync(obj);
 	if (obj->pages) {
 		get_page(obj->pages[page_offset]);
 		vmf->page = obj->pages[page_offset];
 		ret = 0;
 	}
 	mutex_unlock(&obj->pages_lock);
-	if (ret) {
-		struct page *page;
-
-		page = shmem_read_mapping_page(
-					file_inode(obj->base.filp)->i_mapping,
-					page_offset);
-		if (!IS_ERR(page)) {
-			vmf->page = page;
-			ret = 0;
-		} else switch (PTR_ERR(page)) {
-			case -ENOSPC:
-			case -ENOMEM:
-				ret = VM_FAULT_OOM;
-				break;
-			case -EBUSY:
-				ret = VM_FAULT_RETRY;
-				break;
-			case -EFAULT:
-			case -EINVAL:
-				ret = VM_FAULT_SIGBUS;
-				break;
-			default:
-				WARN_ON(PTR_ERR(page));
-				ret = VM_FAULT_SIGBUS;
-				break;
-		}
 
-	}
 	return ret;
 }
 
@@ -277,32 +258,93 @@  static const struct file_operations vgem_driver_fops = {
 	.release	= drm_release,
 };
 
-static struct page **vgem_pin_pages(struct drm_vgem_gem_object *bo)
+/* Called under pages_lock, except in free path (where it can't race): */
+static void sync_and_unpin(struct drm_vgem_gem_object *bo)
 {
-	mutex_lock(&bo->pages_lock);
-	if (bo->pages_pin_count++ == 0) {
-		struct page **pages;
+	struct drm_device *dev = bo->base.dev;
+
+	if (bo->table) {
+		dma_sync_sg_for_cpu(dev->dev, bo->table->sgl,
+				bo->table->nents, DMA_BIDIRECTIONAL);
+		sg_free_table(bo->table);
+		kfree(bo->table);
+		bo->table = NULL;
+	}
+
+	if (bo->pages) {
+		drm_gem_put_pages(&bo->base, bo->pages, true, true);
+		bo->pages = NULL;
+	}
+}
+
+static struct page **pin_and_sync(struct drm_vgem_gem_object *bo)
+{
+	struct drm_device *dev = bo->base.dev;
+	int npages = bo->base.size >> PAGE_SHIFT;
+	struct page **pages;
+	struct sg_table *sgt;
+
+	WARN_ON(!mutex_is_locked(&bo->pages_lock));
+
+	pages = drm_gem_get_pages(&bo->base);
+	if (IS_ERR(pages)) {
+		bo->pages_pin_count--;
+		mutex_unlock(&bo->pages_lock);
+		return pages;
+	}
 
-		pages = drm_gem_get_pages(&bo->base);
-		if (IS_ERR(pages)) {
-			bo->pages_pin_count--;
-			mutex_unlock(&bo->pages_lock);
-			return pages;
-		}
+	sgt = drm_prime_pages_to_sg(pages, npages);
+	if (IS_ERR(sgt)) {
+		dev_err(dev->dev,
+			"failed to allocate sgt: %ld\n",
+			PTR_ERR(bo->table));
+		drm_gem_put_pages(&bo->base, pages, false, false);
+		mutex_unlock(&bo->pages_lock);
+		return ERR_CAST(bo->table);
+	}
+
+	/*
+	 * Flush the object from the CPU cache so that importers
+	 * can rely on coherent indirect access via the exported
+	 * dma-address.
+	 */
+	dma_sync_sg_for_device(dev->dev, sgt->sgl,
+			sgt->nents, DMA_BIDIRECTIONAL);
+
+	bo->pages = pages;
+	bo->table = sgt;
+
+	return pages;
+}
+
+static struct page **vgem_pin_pages(struct drm_vgem_gem_object *bo)
+{
+	struct page **pages;
 
-		bo->pages = pages;
+	mutex_lock(&bo->pages_lock);
+	if (bo->pages_pin_count++ == 0 && !bo->pages) {
+		pages = pin_and_sync(bo);
+	} else {
+		WARN_ON(!bo->pages);
+		pages = bo->pages;
 	}
 	mutex_unlock(&bo->pages_lock);
 
-	return bo->pages;
+	return pages;
 }
 
 static void vgem_unpin_pages(struct drm_vgem_gem_object *bo)
 {
+	/*
+	 * We shouldn't hit this for imported bo's.. in the import
+	 * case we don't own the scatter-table
+	 */
+	WARN_ON(bo->base.import_attach);
+
 	mutex_lock(&bo->pages_lock);
 	if (--bo->pages_pin_count == 0) {
-		drm_gem_put_pages(&bo->base, bo->pages, true, true);
-		bo->pages = NULL;
+		WARN_ON(!bo->table);
+		sync_and_unpin(bo);
 	}
 	mutex_unlock(&bo->pages_lock);
 }
@@ -310,18 +352,12 @@  static void vgem_unpin_pages(struct drm_vgem_gem_object *bo)
 static int vgem_prime_pin(struct drm_gem_object *obj)
 {
 	struct drm_vgem_gem_object *bo = to_vgem_bo(obj);
-	long n_pages = obj->size >> PAGE_SHIFT;
 	struct page **pages;
 
 	pages = vgem_pin_pages(bo);
 	if (IS_ERR(pages))
 		return PTR_ERR(pages);
 
-	/* Flush the object from the CPU cache so that importers can rely
-	 * on coherent indirect access via the exported dma-address.
-	 */
-	drm_clflush_pages(pages, n_pages);
-
 	return 0;
 }