diff mbox

[v4,4/6] drm/nouveau: synchronize BOs when required

Message ID 1404807961-30530-5-git-send-email-acourbot@nvidia.com (mailing list archive)
State New, archived
Headers show

Commit Message

Alexandre Courbot July 8, 2014, 8:25 a.m. UTC
On architectures for which access to GPU memory is non-coherent,
caches need to be flushed and invalidated explicitly when BO control
changes between CPU and GPU.

This patch adds buffer synchronization functions which invokes the
correct API (PCI or DMA) to ensure synchronization is effective.

Based on the TTM DMA cache helper patches by Lucas Stach.

Signed-off-by: Lucas Stach <dev@lynxeye.de>
Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
---
 drivers/gpu/drm/nouveau/nouveau_bo.c  | 56 +++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/nouveau/nouveau_bo.h  |  2 ++
 drivers/gpu/drm/nouveau/nouveau_gem.c | 12 ++++++++
 3 files changed, 70 insertions(+)

Comments

Daniel Vetter July 10, 2014, 1:04 p.m. UTC | #1
On Tue, Jul 08, 2014 at 05:25:59PM +0900, Alexandre Courbot wrote:
> On architectures for which access to GPU memory is non-coherent,
> caches need to be flushed and invalidated explicitly when BO control
> changes between CPU and GPU.
> 
> This patch adds buffer synchronization functions which invokes the
> correct API (PCI or DMA) to ensure synchronization is effective.
> 
> Based on the TTM DMA cache helper patches by Lucas Stach.
> 
> Signed-off-by: Lucas Stach <dev@lynxeye.de>
> Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
> ---
>  drivers/gpu/drm/nouveau/nouveau_bo.c  | 56 +++++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/nouveau/nouveau_bo.h  |  2 ++
>  drivers/gpu/drm/nouveau/nouveau_gem.c | 12 ++++++++
>  3 files changed, 70 insertions(+)
> 
> diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
> index 67e9e8e2e2ec..47e4e8886769 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
> @@ -402,6 +402,60 @@ nouveau_bo_unmap(struct nouveau_bo *nvbo)
>  		ttm_bo_kunmap(&nvbo->kmap);
>  }
>  
> +void
> +nouveau_bo_sync_for_device(struct nouveau_bo *nvbo)
> +{
> +	struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
> +	struct nouveau_device *device = nouveau_dev(drm->dev);
> +	struct ttm_dma_tt *ttm_dma = (struct ttm_dma_tt *)nvbo->bo.ttm;
> +	int i;
> +
> +	if (!ttm_dma)
> +		return;
> +
> +	if (nv_device_is_cpu_coherent(device) || nvbo->force_coherent)
> +		return;

Is the is_cpu_coherent check really required? On coherent platforms the
sync_for_foo should be a noop. It's the dma api's job to encapsulate this
knowledge so that drivers can be blissfully ignorant. The explicit
is_coherent check makes this a bit leaky. And same comment that underlying
the bus-specifics dma-mapping functions are identical.
-Daniel

> +
> +	if (nv_device_is_pci(device)) {
> +		for (i = 0; i < ttm_dma->ttm.num_pages; i++)
> +			pci_dma_sync_single_for_device(device->pdev,
> +				ttm_dma->dma_address[i], PAGE_SIZE,
> +				PCI_DMA_TODEVICE);
> +	} else {
> +		for (i = 0; i < ttm_dma->ttm.num_pages; i++)
> +			dma_sync_single_for_device(nv_device_base(device),
> +				ttm_dma->dma_address[i], PAGE_SIZE,
> +				DMA_TO_DEVICE);
> +	}
> +}
> +
> +void
> +nouveau_bo_sync_for_cpu(struct nouveau_bo *nvbo)
> +{
> +	struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
> +	struct nouveau_device *device = nouveau_dev(drm->dev);
> +	struct ttm_dma_tt *ttm_dma = (struct ttm_dma_tt *)nvbo->bo.ttm;
> +	int i;
> +
> +	if (!ttm_dma)
> +		return;
> +
> +	if (nv_device_is_cpu_coherent(device) || nvbo->force_coherent)
> +		return;
> +
> +	if (nv_device_is_pci(device)) {
> +		for (i = 0; i < ttm_dma->ttm.num_pages; i++)
> +			pci_dma_sync_single_for_cpu(device->pdev,
> +				ttm_dma->dma_address[i], PAGE_SIZE,
> +				PCI_DMA_FROMDEVICE);
> +	} else {
> +		for (i = 0; i < ttm_dma->ttm.num_pages; i++)
> +			dma_sync_single_for_cpu(nv_device_base(device),
> +				ttm_dma->dma_address[i], PAGE_SIZE,
> +				DMA_FROM_DEVICE);
> +	}
> +}
> +
>  int
>  nouveau_bo_validate(struct nouveau_bo *nvbo, bool interruptible,
>  		    bool no_wait_gpu)
> @@ -418,6 +472,8 @@ nouveau_bo_validate(struct nouveau_bo *nvbo, bool interruptible,
>  	if (!ttm)
>  		return ret;
>  
> +	nouveau_bo_sync_for_device(nvbo);
> +
>  	device = nouveau_dev(nouveau_bdev(ttm->bdev)->dev);
>  	nv_wr32(device, 0x70004, 0x00000001);
>  	if (!nv_wait(device, 0x070004, 0x00000001, 0x00000000))
> diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h
> index ff17c1f432fc..fa42298d2dca 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_bo.h
> +++ b/drivers/gpu/drm/nouveau/nouveau_bo.h
> @@ -81,6 +81,8 @@ void nouveau_bo_wr32(struct nouveau_bo *, unsigned index, u32 val);
>  void nouveau_bo_fence(struct nouveau_bo *, struct nouveau_fence *);
>  int  nouveau_bo_validate(struct nouveau_bo *, bool interruptible,
>  			 bool no_wait_gpu);
> +void nouveau_bo_sync_for_device(struct nouveau_bo *nvbo);
> +void nouveau_bo_sync_for_cpu(struct nouveau_bo *nvbo);
>  
>  struct nouveau_vma *
>  nouveau_bo_vma_find(struct nouveau_bo *, struct nouveau_vm *);
> diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
> index c90c0dc0afe8..08829a720891 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_gem.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
> @@ -896,6 +896,7 @@ nouveau_gem_ioctl_cpu_prep(struct drm_device *dev, void *data,
>  	spin_lock(&nvbo->bo.bdev->fence_lock);
>  	ret = ttm_bo_wait(&nvbo->bo, true, true, no_wait);
>  	spin_unlock(&nvbo->bo.bdev->fence_lock);
> +	nouveau_bo_sync_for_cpu(nvbo);
>  	drm_gem_object_unreference_unlocked(gem);
>  	return ret;
>  }
> @@ -904,6 +905,17 @@ int
>  nouveau_gem_ioctl_cpu_fini(struct drm_device *dev, void *data,
>  			   struct drm_file *file_priv)
>  {
> +	struct drm_nouveau_gem_cpu_fini *req = data;
> +	struct drm_gem_object *gem;
> +	struct nouveau_bo *nvbo;
> +
> +	gem = drm_gem_object_lookup(dev, file_priv, req->handle);
> +	if (!gem)
> +		return -ENOENT;
> +	nvbo = nouveau_gem_object(gem);
> +
> +	nouveau_bo_sync_for_device(nvbo);
> +	drm_gem_object_unreference_unlocked(gem);
>  	return 0;
>  }
>  
> -- 
> 2.0.0
> 
> _______________________________________________
> Nouveau mailing list
> Nouveau@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/nouveau
Alexandre Courbot July 11, 2014, 2:40 a.m. UTC | #2
On 07/10/2014 10:04 PM, Daniel Vetter wrote:
> On Tue, Jul 08, 2014 at 05:25:59PM +0900, Alexandre Courbot wrote:
>> On architectures for which access to GPU memory is non-coherent,
>> caches need to be flushed and invalidated explicitly when BO control
>> changes between CPU and GPU.
>>
>> This patch adds buffer synchronization functions which invokes the
>> correct API (PCI or DMA) to ensure synchronization is effective.
>>
>> Based on the TTM DMA cache helper patches by Lucas Stach.
>>
>> Signed-off-by: Lucas Stach <dev@lynxeye.de>
>> Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
>> ---
>>   drivers/gpu/drm/nouveau/nouveau_bo.c  | 56 +++++++++++++++++++++++++++++++++++
>>   drivers/gpu/drm/nouveau/nouveau_bo.h  |  2 ++
>>   drivers/gpu/drm/nouveau/nouveau_gem.c | 12 ++++++++
>>   3 files changed, 70 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
>> index 67e9e8e2e2ec..47e4e8886769 100644
>> --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
>> +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
>> @@ -402,6 +402,60 @@ nouveau_bo_unmap(struct nouveau_bo *nvbo)
>>   		ttm_bo_kunmap(&nvbo->kmap);
>>   }
>>
>> +void
>> +nouveau_bo_sync_for_device(struct nouveau_bo *nvbo)
>> +{
>> +	struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
>> +	struct nouveau_device *device = nouveau_dev(drm->dev);
>> +	struct ttm_dma_tt *ttm_dma = (struct ttm_dma_tt *)nvbo->bo.ttm;
>> +	int i;
>> +
>> +	if (!ttm_dma)
>> +		return;
>> +
>> +	if (nv_device_is_cpu_coherent(device) || nvbo->force_coherent)
>> +		return;
>
> Is the is_cpu_coherent check really required? On coherent platforms the
> sync_for_foo should be a noop. It's the dma api's job to encapsulate this
> knowledge so that drivers can be blissfully ignorant. The explicit
> is_coherent check makes this a bit leaky. And same comment that underlying
> the bus-specifics dma-mapping functions are identical.

I think you are right, the is_cpu_coherent check should not be needed 
here. I still think we should have separate paths for the PCI/DMA cases 
though, unless you can point me to a source that clearly states that the 
PCI API is deprecated and that DMA should be used instead.
Daniel Vetter July 11, 2014, 7:41 a.m. UTC | #3
On Fri, Jul 11, 2014 at 11:40:27AM +0900, Alexandre Courbot wrote:
> On 07/10/2014 10:04 PM, Daniel Vetter wrote:
> >On Tue, Jul 08, 2014 at 05:25:59PM +0900, Alexandre Courbot wrote:
> >>On architectures for which access to GPU memory is non-coherent,
> >>caches need to be flushed and invalidated explicitly when BO control
> >>changes between CPU and GPU.
> >>
> >>This patch adds buffer synchronization functions which invokes the
> >>correct API (PCI or DMA) to ensure synchronization is effective.
> >>
> >>Based on the TTM DMA cache helper patches by Lucas Stach.
> >>
> >>Signed-off-by: Lucas Stach <dev@lynxeye.de>
> >>Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
> >>---
> >>  drivers/gpu/drm/nouveau/nouveau_bo.c  | 56 +++++++++++++++++++++++++++++++++++
> >>  drivers/gpu/drm/nouveau/nouveau_bo.h  |  2 ++
> >>  drivers/gpu/drm/nouveau/nouveau_gem.c | 12 ++++++++
> >>  3 files changed, 70 insertions(+)
> >>
> >>diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
> >>index 67e9e8e2e2ec..47e4e8886769 100644
> >>--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
> >>+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
> >>@@ -402,6 +402,60 @@ nouveau_bo_unmap(struct nouveau_bo *nvbo)
> >>  		ttm_bo_kunmap(&nvbo->kmap);
> >>  }
> >>
> >>+void
> >>+nouveau_bo_sync_for_device(struct nouveau_bo *nvbo)
> >>+{
> >>+	struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
> >>+	struct nouveau_device *device = nouveau_dev(drm->dev);
> >>+	struct ttm_dma_tt *ttm_dma = (struct ttm_dma_tt *)nvbo->bo.ttm;
> >>+	int i;
> >>+
> >>+	if (!ttm_dma)
> >>+		return;
> >>+
> >>+	if (nv_device_is_cpu_coherent(device) || nvbo->force_coherent)
> >>+		return;
> >
> >Is the is_cpu_coherent check really required? On coherent platforms the
> >sync_for_foo should be a noop. It's the dma api's job to encapsulate this
> >knowledge so that drivers can be blissfully ignorant. The explicit
> >is_coherent check makes this a bit leaky. And same comment that underlying
> >the bus-specifics dma-mapping functions are identical.
> 
> I think you are right, the is_cpu_coherent check should not be needed here.
> I still think we should have separate paths for the PCI/DMA cases though,
> unless you can point me to a source that clearly states that the PCI API is
> deprecated and that DMA should be used instead.

Ah, on 2nd look I've found it again. Quoting
Documentation/DMA-API-HOWTO.txt:

"Note that the DMA API works with any bus independent of the underlying
microprocessor architecture. You should use the DMA API rather than the
bus-specific DMA API, i.e., use the dma_map_*() interfaces rather than the
pci_map_*() interfaces."

The advice is fairly strong here I think ;-) And imo the idea makes sense,
since it allows drivers like nouveau here to care much less about the
actual bus used to get data to/from the ip block. And if you look at intel
gfx it makes even more sense since the pci layer we have is really just a
thin fake shim whacked on top of the hw (on SoCs at least).

Cheers, Daniel
Alexandre Courbot July 11, 2014, 9:35 a.m. UTC | #4
On 07/11/2014 04:41 PM, Daniel Vetter wrote:
> On Fri, Jul 11, 2014 at 11:40:27AM +0900, Alexandre Courbot wrote:
>> On 07/10/2014 10:04 PM, Daniel Vetter wrote:
>>> On Tue, Jul 08, 2014 at 05:25:59PM +0900, Alexandre Courbot wrote:
>>>> On architectures for which access to GPU memory is non-coherent,
>>>> caches need to be flushed and invalidated explicitly when BO control
>>>> changes between CPU and GPU.
>>>>
>>>> This patch adds buffer synchronization functions which invokes the
>>>> correct API (PCI or DMA) to ensure synchronization is effective.
>>>>
>>>> Based on the TTM DMA cache helper patches by Lucas Stach.
>>>>
>>>> Signed-off-by: Lucas Stach <dev@lynxeye.de>
>>>> Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
>>>> ---
>>>>   drivers/gpu/drm/nouveau/nouveau_bo.c  | 56 +++++++++++++++++++++++++++++++++++
>>>>   drivers/gpu/drm/nouveau/nouveau_bo.h  |  2 ++
>>>>   drivers/gpu/drm/nouveau/nouveau_gem.c | 12 ++++++++
>>>>   3 files changed, 70 insertions(+)
>>>>
>>>> diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
>>>> index 67e9e8e2e2ec..47e4e8886769 100644
>>>> --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
>>>> +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
>>>> @@ -402,6 +402,60 @@ nouveau_bo_unmap(struct nouveau_bo *nvbo)
>>>>   		ttm_bo_kunmap(&nvbo->kmap);
>>>>   }
>>>>
>>>> +void
>>>> +nouveau_bo_sync_for_device(struct nouveau_bo *nvbo)
>>>> +{
>>>> +	struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
>>>> +	struct nouveau_device *device = nouveau_dev(drm->dev);
>>>> +	struct ttm_dma_tt *ttm_dma = (struct ttm_dma_tt *)nvbo->bo.ttm;
>>>> +	int i;
>>>> +
>>>> +	if (!ttm_dma)
>>>> +		return;
>>>> +
>>>> +	if (nv_device_is_cpu_coherent(device) || nvbo->force_coherent)
>>>> +		return;
>>>
>>> Is the is_cpu_coherent check really required? On coherent platforms the
>>> sync_for_foo should be a noop. It's the dma api's job to encapsulate this
>>> knowledge so that drivers can be blissfully ignorant. The explicit
>>> is_coherent check makes this a bit leaky. And same comment that underlying
>>> the bus-specifics dma-mapping functions are identical.
>>
>> I think you are right, the is_cpu_coherent check should not be needed here.
>> I still think we should have separate paths for the PCI/DMA cases though,
>> unless you can point me to a source that clearly states that the PCI API is
>> deprecated and that DMA should be used instead.
>
> Ah, on 2nd look I've found it again. Quoting
> Documentation/DMA-API-HOWTO.txt:
>
> "Note that the DMA API works with any bus independent of the underlying
> microprocessor architecture. You should use the DMA API rather than the
> bus-specific DMA API, i.e., use the dma_map_*() interfaces rather than the
> pci_map_*() interfaces."
>
> The advice is fairly strong here I think ;-) And imo the idea makes sense,
> since it allows drivers like nouveau here to care much less about the
> actual bus used to get data to/from the ip block. And if you look at intel
> gfx it makes even more sense since the pci layer we have is really just a
> thin fake shim whacked on top of the hw (on SoCs at least).

Indeed, I stand corrected. :) That's good news actually, as it will 
simplify the code. Thanks for pointing that out!

I will send a new revision that makes use of the DMA API exclusively and 
will remove the nv_device_map/unmap() functions which are pretty useless 
now.
diff mbox

Patch

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 67e9e8e2e2ec..47e4e8886769 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -402,6 +402,60 @@  nouveau_bo_unmap(struct nouveau_bo *nvbo)
 		ttm_bo_kunmap(&nvbo->kmap);
 }
 
+void
+nouveau_bo_sync_for_device(struct nouveau_bo *nvbo)
+{
+	struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
+	struct nouveau_device *device = nouveau_dev(drm->dev);
+	struct ttm_dma_tt *ttm_dma = (struct ttm_dma_tt *)nvbo->bo.ttm;
+	int i;
+
+	if (!ttm_dma)
+		return;
+
+	if (nv_device_is_cpu_coherent(device) || nvbo->force_coherent)
+		return;
+
+	if (nv_device_is_pci(device)) {
+		for (i = 0; i < ttm_dma->ttm.num_pages; i++)
+			pci_dma_sync_single_for_device(device->pdev,
+				ttm_dma->dma_address[i], PAGE_SIZE,
+				PCI_DMA_TODEVICE);
+	} else {
+		for (i = 0; i < ttm_dma->ttm.num_pages; i++)
+			dma_sync_single_for_device(nv_device_base(device),
+				ttm_dma->dma_address[i], PAGE_SIZE,
+				DMA_TO_DEVICE);
+	}
+}
+
+void
+nouveau_bo_sync_for_cpu(struct nouveau_bo *nvbo)
+{
+	struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
+	struct nouveau_device *device = nouveau_dev(drm->dev);
+	struct ttm_dma_tt *ttm_dma = (struct ttm_dma_tt *)nvbo->bo.ttm;
+	int i;
+
+	if (!ttm_dma)
+		return;
+
+	if (nv_device_is_cpu_coherent(device) || nvbo->force_coherent)
+		return;
+
+	if (nv_device_is_pci(device)) {
+		for (i = 0; i < ttm_dma->ttm.num_pages; i++)
+			pci_dma_sync_single_for_cpu(device->pdev,
+				ttm_dma->dma_address[i], PAGE_SIZE,
+				PCI_DMA_FROMDEVICE);
+	} else {
+		for (i = 0; i < ttm_dma->ttm.num_pages; i++)
+			dma_sync_single_for_cpu(nv_device_base(device),
+				ttm_dma->dma_address[i], PAGE_SIZE,
+				DMA_FROM_DEVICE);
+	}
+}
+
 int
 nouveau_bo_validate(struct nouveau_bo *nvbo, bool interruptible,
 		    bool no_wait_gpu)
@@ -418,6 +472,8 @@  nouveau_bo_validate(struct nouveau_bo *nvbo, bool interruptible,
 	if (!ttm)
 		return ret;
 
+	nouveau_bo_sync_for_device(nvbo);
+
 	device = nouveau_dev(nouveau_bdev(ttm->bdev)->dev);
 	nv_wr32(device, 0x70004, 0x00000001);
 	if (!nv_wait(device, 0x070004, 0x00000001, 0x00000000))
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h
index ff17c1f432fc..fa42298d2dca 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.h
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.h
@@ -81,6 +81,8 @@  void nouveau_bo_wr32(struct nouveau_bo *, unsigned index, u32 val);
 void nouveau_bo_fence(struct nouveau_bo *, struct nouveau_fence *);
 int  nouveau_bo_validate(struct nouveau_bo *, bool interruptible,
 			 bool no_wait_gpu);
+void nouveau_bo_sync_for_device(struct nouveau_bo *nvbo);
+void nouveau_bo_sync_for_cpu(struct nouveau_bo *nvbo);
 
 struct nouveau_vma *
 nouveau_bo_vma_find(struct nouveau_bo *, struct nouveau_vm *);
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index c90c0dc0afe8..08829a720891 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -896,6 +896,7 @@  nouveau_gem_ioctl_cpu_prep(struct drm_device *dev, void *data,
 	spin_lock(&nvbo->bo.bdev->fence_lock);
 	ret = ttm_bo_wait(&nvbo->bo, true, true, no_wait);
 	spin_unlock(&nvbo->bo.bdev->fence_lock);
+	nouveau_bo_sync_for_cpu(nvbo);
 	drm_gem_object_unreference_unlocked(gem);
 	return ret;
 }
@@ -904,6 +905,17 @@  int
 nouveau_gem_ioctl_cpu_fini(struct drm_device *dev, void *data,
 			   struct drm_file *file_priv)
 {
+	struct drm_nouveau_gem_cpu_fini *req = data;
+	struct drm_gem_object *gem;
+	struct nouveau_bo *nvbo;
+
+	gem = drm_gem_object_lookup(dev, file_priv, req->handle);
+	if (!gem)
+		return -ENOENT;
+	nvbo = nouveau_gem_object(gem);
+
+	nouveau_bo_sync_for_device(nvbo);
+	drm_gem_object_unreference_unlocked(gem);
 	return 0;
 }