diff mbox series

[v4,1/6] drm/i915: Add support for moving fence waiting

Message ID 20211118130230.154988-2-thomas.hellstrom@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915/ttm: Async migration | expand

Commit Message

Thomas Hellström Nov. 18, 2021, 1:02 p.m. UTC
From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>

For now, we will only allow async migration when TTM is used,
so the paths we care about are related to TTM.

The mmap path is handled by having the fence in ttm_bo->moving,
when pinning, the binding only becomes available after the moving
fence is signaled, and pinning a cpu map will only work after
the moving fence signals.

This should close all holes where userspace can read a buffer
before it's fully migrated.

v2:
- Fix a couple of SPARSE warnings
v3:
- Fix a NULL pointer dereference
v4:
- Ditch the moving fence waiting for i915_vma_pin_iomap() and
  replace with a verification that the vma is already bound.
  (Matthew Auld)
- Squash with a previous patch introducing moving fence waiting and
  accessing interfaces (Matthew Auld)
- Rename to indicated that we also add support for sync waiting.

Co-developed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c | 52 ++++++++++++++++++++++
 drivers/gpu/drm/i915/gem/i915_gem_object.h |  6 +++
 drivers/gpu/drm/i915/gem/i915_gem_pages.c  |  6 +++
 drivers/gpu/drm/i915/i915_vma.c            | 36 ++++++++++++++-
 4 files changed, 99 insertions(+), 1 deletion(-)

Comments

Matthew Auld Nov. 18, 2021, 2:12 p.m. UTC | #1
On 18/11/2021 13:02, Thomas Hellström wrote:
> From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> 
> For now, we will only allow async migration when TTM is used,
> so the paths we care about are related to TTM.
> 
> The mmap path is handled by having the fence in ttm_bo->moving,
> when pinning, the binding only becomes available after the moving
> fence is signaled, and pinning a cpu map will only work after
> the moving fence signals.
> 
> This should close all holes where userspace can read a buffer
> before it's fully migrated.
> 
> v2:
> - Fix a couple of SPARSE warnings
> v3:
> - Fix a NULL pointer dereference
> v4:
> - Ditch the moving fence waiting for i915_vma_pin_iomap() and
>    replace with a verification that the vma is already bound.
>    (Matthew Auld)
> - Squash with a previous patch introducing moving fence waiting and
>    accessing interfaces (Matthew Auld)
> - Rename to indicated that we also add support for sync waiting.
> 
> Co-developed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/gem/i915_gem_object.c | 52 ++++++++++++++++++++++
>   drivers/gpu/drm/i915/gem/i915_gem_object.h |  6 +++
>   drivers/gpu/drm/i915/gem/i915_gem_pages.c  |  6 +++
>   drivers/gpu/drm/i915/i915_vma.c            | 36 ++++++++++++++-
>   4 files changed, 99 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> index 591ee3cb7275..7b7d9415c9cb 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> @@ -33,6 +33,7 @@
>   #include "i915_gem_object.h"
>   #include "i915_memcpy.h"
>   #include "i915_trace.h"
> +#include "i915_gem_ttm.h"

Nit: Ordering.

>   
>   static struct kmem_cache *slab_objects;
>   
> @@ -726,6 +727,57 @@ static const struct drm_gem_object_funcs i915_gem_object_funcs = {
>   	.export = i915_gem_prime_export,
>   };
>   
> +/**
> + * i915_gem_object_get_moving_fence - Get the object's moving fence if any
> + * @obj: The object whose moving fence to get.
> + *
> + * A non-signaled moving fence means that there is an async operation
> + * pending on the object that needs to be waited on before setting up
> + * any GPU- or CPU PTEs to the object's pages.
> + *
> + * Return: A refcounted pointer to the object's moving fence if any,
> + * NULL otherwise.
> + */
> +struct dma_fence *
> +i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj)
> +{
> +	return dma_fence_get(i915_gem_to_ttm(obj)->moving);
> +}
> +
> +/**
> + * i915_gem_object_wait_moving_fence - Wait for the object's moving fence if any
> + * @obj: The object whose moving fence to wait for.
> + * @intr: Whether to wait interruptible.
> + *
> + * If the moving fence signaled without an error, it is detached from the
> + * object and put.
> + *
> + * Return: 0 if successful, -ERESTARTSYS if the wait was interrupted,
> + * negative error code if the async operation represented by the
> + * moving fence failed.
> + */
> +int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj,
> +				      bool intr)
> +{
> +	struct dma_fence *fence = i915_gem_to_ttm(obj)->moving;
> +	int ret;
> +
> +	assert_object_held(obj);
> +	if (!fence)
> +		return 0;
> +
> +	ret = dma_fence_wait(fence, intr);
> +	if (ret)
> +		return ret;
> +
> +	if (fence->error)
> +		return fence->error;
> +
> +	i915_gem_to_ttm(obj)->moving = NULL;
> +	dma_fence_put(fence);
> +	return 0;
> +}
> +
>   #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
>   #include "selftests/huge_gem_object.c"
>   #include "selftests/huge_pages.c"
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> index 133963b46135..66f20b803b01 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> @@ -517,6 +517,12 @@ i915_gem_object_finish_access(struct drm_i915_gem_object *obj)
>   	i915_gem_object_unpin_pages(obj);
>   }
>   
> +struct dma_fence *
> +i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj);
> +
> +int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj,
> +				      bool intr);
> +
>   void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
>   					 unsigned int cache_level);
>   bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj);
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> index c4f684b7cc51..49c6e55c68ce 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> @@ -418,6 +418,12 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
>   	}
>   
>   	if (!ptr) {
> +		err = i915_gem_object_wait_moving_fence(obj, true);
> +		if (err) {
> +			ptr = ERR_PTR(err);
> +			goto err_unpin;
> +		}
> +
>   		if (GEM_WARN_ON(type == I915_MAP_WC &&
>   				!static_cpu_has(X86_FEATURE_PAT)))
>   			ptr = ERR_PTR(-ENODEV);
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index 0896656896d0..00d3daf72329 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -354,6 +354,25 @@ int i915_vma_wait_for_bind(struct i915_vma *vma)
>   	return err;
>   }
>   
> +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
> +static int i915_vma_verify_bind_complete(struct i915_vma *vma)
> +{
> +	int err = 0;
> +
> +	if (i915_active_has_exclusive(&vma->active)) {
> +		struct dma_fence *fence =
> +			i915_active_fence_get(&vma->active.excl);

if (!fence)
     return 0;

?

> +
> +		if (dma_fence_is_signaled(fence))
> +			err = fence->error;
> +		else
> +			err = -EBUSY;

dma_fence_put(fence);

?

Reviewed-by: Matthew Auld <matthew.auld@intel.com>

> +	}
> +
> +	return err;
> +}
> +#endif
> +
>   /**
>    * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
>    * @vma: VMA to map
> @@ -428,6 +447,13 @@ int i915_vma_bind(struct i915_vma *vma,
>   			work->pinned = i915_gem_object_get(vma->obj);
>   		}
>   	} else {
> +		if (vma->obj) {
> +			int ret;
> +
> +			ret = i915_gem_object_wait_moving_fence(vma->obj, true);
> +			if (ret)
> +				return ret;
> +		}
>   		vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags);
>   	}
>   
> @@ -449,6 +475,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
>   
>   	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
>   	GEM_BUG_ON(!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND));
> +	GEM_BUG_ON(i915_vma_verify_bind_complete(vma));
>   
>   	ptr = READ_ONCE(vma->iomap);
>   	if (ptr == NULL) {
> @@ -867,6 +894,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
>   		    u64 size, u64 alignment, u64 flags)
>   {
>   	struct i915_vma_work *work = NULL;
> +	struct dma_fence *moving = NULL;
>   	intel_wakeref_t wakeref = 0;
>   	unsigned int bound;
>   	int err;
> @@ -892,7 +920,8 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
>   	if (flags & PIN_GLOBAL)
>   		wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
>   
> -	if (flags & vma->vm->bind_async_flags) {
> +	moving = vma->obj ? i915_gem_object_get_moving_fence(vma->obj) : NULL;
> +	if (flags & vma->vm->bind_async_flags || moving) {
>   		/* lock VM */
>   		err = i915_vm_lock_objects(vma->vm, ww);
>   		if (err)
> @@ -906,6 +935,8 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
>   
>   		work->vm = i915_vm_get(vma->vm);
>   
> +		dma_fence_work_chain(&work->base, moving);
> +
>   		/* Allocate enough page directories to used PTE */
>   		if (vma->vm->allocate_va_range) {
>   			err = i915_vm_alloc_pt_stash(vma->vm,
> @@ -1010,7 +1041,10 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
>   err_rpm:
>   	if (wakeref)
>   		intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref);
> +	if (moving)
> +		dma_fence_put(moving);
>   	vma_put_pages(vma);
> +
>   	return err;
>   }
>   
>
kernel test robot Nov. 20, 2021, 9:09 a.m. UTC | #2
Hi "Thomas,

I love your patch! Yet something to improve:

[auto build test ERROR on drm-tip/drm-tip]
[also build test ERROR on drm-exynos/exynos-drm-next drm/drm-next v5.16-rc1 next-20211118]
[cannot apply to drm-intel/for-linux-next tegra-drm/drm/tegra/for-next airlied/drm-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Thomas-Hellstr-m/drm-i915-ttm-Async-migration/20211118-210436
base:   git://anongit.freedesktop.org/drm/drm-tip drm-tip
config: x86_64-randconfig-r001-20211118 (attached as .config)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/316dda0c6b91d043111cb348b43be6a6f2e3bb0a
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Thomas-Hellstr-m/drm-i915-ttm-Async-migration/20211118-210436
        git checkout 316dda0c6b91d043111cb348b43be6a6f2e3bb0a
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 ARCH=x86_64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

>> drivers/gpu/drm/i915/i915_vma.c:478:13: error: implicit declaration of function 'i915_vma_verify_bind_complete' [-Werror,-Wimplicit-function-declaration]
           GEM_BUG_ON(i915_vma_verify_bind_complete(vma));
                      ^
   1 error generated.


vim +/i915_vma_verify_bind_complete +478 drivers/gpu/drm/i915/i915_vma.c

   463	
   464	void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
   465	{
   466		void __iomem *ptr;
   467		int err;
   468	
   469		if (!i915_gem_object_is_lmem(vma->obj)) {
   470			if (GEM_WARN_ON(!i915_vma_is_map_and_fenceable(vma))) {
   471				err = -ENODEV;
   472				goto err;
   473			}
   474		}
   475	
   476		GEM_BUG_ON(!i915_vma_is_ggtt(vma));
   477		GEM_BUG_ON(!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND));
 > 478		GEM_BUG_ON(i915_vma_verify_bind_complete(vma));
   479	
   480		ptr = READ_ONCE(vma->iomap);
   481		if (ptr == NULL) {
   482			/*
   483			 * TODO: consider just using i915_gem_object_pin_map() for lmem
   484			 * instead, which already supports mapping non-contiguous chunks
   485			 * of pages, that way we can also drop the
   486			 * I915_BO_ALLOC_CONTIGUOUS when allocating the object.
   487			 */
   488			if (i915_gem_object_is_lmem(vma->obj))
   489				ptr = i915_gem_object_lmem_io_map(vma->obj, 0,
   490								  vma->obj->base.size);
   491			else
   492				ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->iomap,
   493							vma->node.start,
   494							vma->node.size);
   495			if (ptr == NULL) {
   496				err = -ENOMEM;
   497				goto err;
   498			}
   499	
   500			if (unlikely(cmpxchg(&vma->iomap, NULL, ptr))) {
   501				io_mapping_unmap(ptr);
   502				ptr = vma->iomap;
   503			}
   504		}
   505	
   506		__i915_vma_pin(vma);
   507	
   508		err = i915_vma_pin_fence(vma);
   509		if (err)
   510			goto err_unpin;
   511	
   512		i915_vma_set_ggtt_write(vma);
   513	
   514		/* NB Access through the GTT requires the device to be awake. */
   515		return ptr;
   516	
   517	err_unpin:
   518		__i915_vma_unpin(vma);
   519	err:
   520		return IO_ERR_PTR(err);
   521	}
   522	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 591ee3cb7275..7b7d9415c9cb 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -33,6 +33,7 @@ 
 #include "i915_gem_object.h"
 #include "i915_memcpy.h"
 #include "i915_trace.h"
+#include "i915_gem_ttm.h"
 
 static struct kmem_cache *slab_objects;
 
@@ -726,6 +727,57 @@  static const struct drm_gem_object_funcs i915_gem_object_funcs = {
 	.export = i915_gem_prime_export,
 };
 
+/**
+ * i915_gem_object_get_moving_fence - Get the object's moving fence if any
+ * @obj: The object whose moving fence to get.
+ *
+ * A non-signaled moving fence means that there is an async operation
+ * pending on the object that needs to be waited on before setting up
+ * any GPU- or CPU PTEs to the object's pages.
+ *
+ * Return: A refcounted pointer to the object's moving fence if any,
+ * NULL otherwise.
+ */
+struct dma_fence *
+i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj)
+{
+	return dma_fence_get(i915_gem_to_ttm(obj)->moving);
+}
+
+/**
+ * i915_gem_object_wait_moving_fence - Wait for the object's moving fence if any
+ * @obj: The object whose moving fence to wait for.
+ * @intr: Whether to wait interruptible.
+ *
+ * If the moving fence signaled without an error, it is detached from the
+ * object and put.
+ *
+ * Return: 0 if successful, -ERESTARTSYS if the wait was interrupted,
+ * negative error code if the async operation represented by the
+ * moving fence failed.
+ */
+int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj,
+				      bool intr)
+{
+	struct dma_fence *fence = i915_gem_to_ttm(obj)->moving;
+	int ret;
+
+	assert_object_held(obj);
+	if (!fence)
+		return 0;
+
+	ret = dma_fence_wait(fence, intr);
+	if (ret)
+		return ret;
+
+	if (fence->error)
+		return fence->error;
+
+	i915_gem_to_ttm(obj)->moving = NULL;
+	dma_fence_put(fence);
+	return 0;
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/huge_gem_object.c"
 #include "selftests/huge_pages.c"
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 133963b46135..66f20b803b01 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -517,6 +517,12 @@  i915_gem_object_finish_access(struct drm_i915_gem_object *obj)
 	i915_gem_object_unpin_pages(obj);
 }
 
+struct dma_fence *
+i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj);
+
+int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj,
+				      bool intr);
+
 void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
 					 unsigned int cache_level);
 bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index c4f684b7cc51..49c6e55c68ce 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -418,6 +418,12 @@  void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
 	}
 
 	if (!ptr) {
+		err = i915_gem_object_wait_moving_fence(obj, true);
+		if (err) {
+			ptr = ERR_PTR(err);
+			goto err_unpin;
+		}
+
 		if (GEM_WARN_ON(type == I915_MAP_WC &&
 				!static_cpu_has(X86_FEATURE_PAT)))
 			ptr = ERR_PTR(-ENODEV);
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 0896656896d0..00d3daf72329 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -354,6 +354,25 @@  int i915_vma_wait_for_bind(struct i915_vma *vma)
 	return err;
 }
 
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+static int i915_vma_verify_bind_complete(struct i915_vma *vma)
+{
+	int err = 0;
+
+	if (i915_active_has_exclusive(&vma->active)) {
+		struct dma_fence *fence =
+			i915_active_fence_get(&vma->active.excl);
+
+		if (dma_fence_is_signaled(fence))
+			err = fence->error;
+		else
+			err = -EBUSY;
+	}
+
+	return err;
+}
+#endif
+
 /**
  * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
  * @vma: VMA to map
@@ -428,6 +447,13 @@  int i915_vma_bind(struct i915_vma *vma,
 			work->pinned = i915_gem_object_get(vma->obj);
 		}
 	} else {
+		if (vma->obj) {
+			int ret;
+
+			ret = i915_gem_object_wait_moving_fence(vma->obj, true);
+			if (ret)
+				return ret;
+		}
 		vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags);
 	}
 
@@ -449,6 +475,7 @@  void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 
 	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
 	GEM_BUG_ON(!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND));
+	GEM_BUG_ON(i915_vma_verify_bind_complete(vma));
 
 	ptr = READ_ONCE(vma->iomap);
 	if (ptr == NULL) {
@@ -867,6 +894,7 @@  int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 		    u64 size, u64 alignment, u64 flags)
 {
 	struct i915_vma_work *work = NULL;
+	struct dma_fence *moving = NULL;
 	intel_wakeref_t wakeref = 0;
 	unsigned int bound;
 	int err;
@@ -892,7 +920,8 @@  int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 	if (flags & PIN_GLOBAL)
 		wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
 
-	if (flags & vma->vm->bind_async_flags) {
+	moving = vma->obj ? i915_gem_object_get_moving_fence(vma->obj) : NULL;
+	if (flags & vma->vm->bind_async_flags || moving) {
 		/* lock VM */
 		err = i915_vm_lock_objects(vma->vm, ww);
 		if (err)
@@ -906,6 +935,8 @@  int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 
 		work->vm = i915_vm_get(vma->vm);
 
+		dma_fence_work_chain(&work->base, moving);
+
 		/* Allocate enough page directories to used PTE */
 		if (vma->vm->allocate_va_range) {
 			err = i915_vm_alloc_pt_stash(vma->vm,
@@ -1010,7 +1041,10 @@  int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 err_rpm:
 	if (wakeref)
 		intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref);
+	if (moving)
+		dma_fence_put(moving);
 	vma_put_pages(vma);
+
 	return err;
 }