diff mbox series

[5/7] drm/i915: Implement GGTT update method with MI_UPDATE_GTT

Message ID 20230915083412.4572-6-nirmoy.das@intel.com (mailing list archive)
State New, archived
Headers show
Series Update GGTT with MI_UPDATE_GTT on MTL | expand

Commit Message

Nirmoy Das Sept. 15, 2023, 8:34 a.m. UTC
Implement GGTT update method with blitter command, MI_UPDATE_GTT
and install those handlers if a platform requires that.

v2: Make sure we hold the GT wakeref and Blitter engine wakeref before
we call mutex_lock/intel_context_enter below. When GT/engine are not
awake, the intel_context_enter calls into some runtime pm function which
can end up with kmalloc/fs_reclaim. But trigger fs_reclaim holding a
mutex lock is not allowed because shrinker can also try to hold the same
mutex lock. It is a circular lock. So hold the GT/blitter engine wakeref
before calling mutex_lock, to fix the circular lock.

Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
Signed-off-by: Oak Zeng <oak.zeng@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_ggtt.c | 235 +++++++++++++++++++++++++++
 1 file changed, 235 insertions(+)

Comments

Zeng, Oak Sept. 15, 2023, 3:56 p.m. UTC | #1
Thanks,
Oak

> -----Original Message-----
> From: Das, Nirmoy <nirmoy.das@intel.com>
> Sent: Friday, September 15, 2023 4:34 AM
> To: intel-gfx@lists.freedesktop.org
> Cc: Zeng, Oak <oak.zeng@intel.com>; chris.p.wilson@linux.intel.com; Piorkowski,
> Piotr <piotr.piorkowski@intel.com>; Shyti, Andi <andi.shyti@intel.com>; Mun,
> Gwan-gyeong <gwan-gyeong.mun@intel.com>; Roper, Matthew D
> <matthew.d.roper@intel.com>; Das, Nirmoy <nirmoy.das@intel.com>
> Subject: [PATCH 5/7] drm/i915: Implement GGTT update method with
> MI_UPDATE_GTT
> 
> Implement GGTT update method with blitter command, MI_UPDATE_GTT
> and install those handlers if a platform requires that.
> 
> v2: Make sure we hold the GT wakeref and Blitter engine wakeref before
> we call mutex_lock/intel_context_enter below. When GT/engine are not
> awake, the intel_context_enter calls into some runtime pm function which
> can end up with kmalloc/fs_reclaim. But trigger fs_reclaim holding a
> mutex lock is not allowed because shrinker can also try to hold the same
> mutex lock. It is a circular lock. So hold the GT/blitter engine wakeref
> before calling mutex_lock, to fix the circular lock.
> 
> Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
> Signed-off-by: Oak Zeng <oak.zeng@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_ggtt.c | 235 +++++++++++++++++++++++++++
>  1 file changed, 235 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> index dd0ed941441a..b94de7cebfce 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> @@ -15,18 +15,23 @@
>  #include "display/intel_display.h"
>  #include "gem/i915_gem_lmem.h"
> 
> +#include "intel_context.h"
>  #include "intel_ggtt_gmch.h"
> +#include "intel_gpu_commands.h"
>  #include "intel_gt.h"
>  #include "intel_gt_regs.h"
>  #include "intel_pci_config.h"
> +#include "intel_ring.h"
>  #include "i915_drv.h"
>  #include "i915_pci.h"
> +#include "i915_request.h"
>  #include "i915_scatterlist.h"
>  #include "i915_utils.h"
>  #include "i915_vgpu.h"
> 
>  #include "intel_gtt.h"
>  #include "gen8_ppgtt.h"
> +#include "intel_engine_pm.h"
> 
>  static void i915_ggtt_color_adjust(const struct drm_mm_node *node,
>  				   unsigned long color,
> @@ -252,6 +257,145 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr,
>  	return pte;
>  }
> 
> +static bool should_update_ggtt_with_bind(struct i915_ggtt *ggtt)
> +{
> +	struct intel_gt *gt = ggtt->vm.gt;
> +
> +	return intel_gt_is_bind_context_ready(gt);
> +}
> +
> +static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt)
> +{
> +	struct intel_context *ce;
> +	struct intel_gt *gt = ggtt->vm.gt;
> +
> +	if (intel_gt_is_wedged(gt))
> +		return NULL;
> +
> +	ce = gt->engine[BCS0]->bind_context;
> +	GEM_BUG_ON(!ce);
> +
> +	/*
> +	 * If the GT is not awake already at this stage then fallback
> +	 * to pci based GGTT update otherwise __intel_wakeref_get_first()
> +	 * would conflict with fs_reclaim trying to allocate memory while
> +	 * doing rpm_resume().
> +	 */
> +	if (!intel_gt_pm_get_if_awake(gt))
> +		return NULL;
> +
> +	intel_engine_pm_get(ce->engine);
> +
> +	return ce;
> +}
> +
> +static void gen8_ggtt_bind_put_ce(struct intel_context *ce)
> +{
> +	intel_engine_pm_put(ce->engine);
> +	intel_gt_pm_put(ce->engine->gt);
> +}
> +
> +static bool gen8_ggtt_bind_ptes(struct i915_ggtt *ggtt, u32 offset,
> +				struct sg_table *pages, u32 num_entries,
> +				const gen8_pte_t pte)
> +{
> +	struct i915_sched_attr attr = {};
> +	struct intel_gt *gt = ggtt->vm.gt;
> +	const gen8_pte_t scratch_pte = ggtt->vm.scratch[0]->encode;
> +	struct sgt_iter iter;
> +	struct i915_request *rq;
> +	struct intel_context *ce;
> +	u32 *cs;
> +
> +	if (!num_entries)
> +		return true;
> +
> +	ce = gen8_ggtt_bind_get_ce(ggtt);
> +	if (!ce)
> +		return false;
> +
> +	if (pages)
> +		iter = __sgt_iter(pages->sgl, true);
> +
> +	while (num_entries) {
> +		int count = 0;
> +		dma_addr_t addr;
> +		/*
> +		 * MI_UPDATE_GTT can update 512 entries in a single command
> but
> +		 * that end up with engine reset, 511 works.
> +		 */
> +		u32 n_ptes = min_t(u32, 511, num_entries);
> +
> +		if (mutex_lock_interruptible(&ce->timeline->mutex))
> +			goto put_ce;
> +
> +		intel_context_enter(ce);
> +		rq = __i915_request_create(ce, GFP_NOWAIT | GFP_ATOMIC);
> +		intel_context_exit(ce);
> +		if (IS_ERR(rq)) {
> +			GT_TRACE(gt, "Failed to get bind request\n");
> +			mutex_unlock(&ce->timeline->mutex);
> +			goto put_ce;
> +		}
> +
> +		cs = intel_ring_begin(rq, 2 * n_ptes + 2);
> +		if (IS_ERR(cs)) {
> +			GT_TRACE(gt, "Failed to ring space for GGTT bind\n");
> +			i915_request_set_error_once(rq, PTR_ERR(cs));
> +			/* once a request is created, it must be queued */
> +			goto queue_err_rq;
> +		}
> +
> +		*cs++ = MI_UPDATE_GTT | (2 * n_ptes);
> +		*cs++ = offset << 12;
> +
> +		if (pages) {
> +			for_each_sgt_daddr_next(addr, iter) {
> +				if (count == n_ptes)
> +					break;
> +				*cs++ = lower_32_bits(pte | addr);
> +				*cs++ = upper_32_bits(pte | addr);
> +				count++;
> +			}
> +			/* fill remaining with scratch pte, if any */
> +			if (count < n_ptes) {
> +				memset64((u64 *)cs, scratch_pte,
> +					 n_ptes - count);
> +				cs += (n_ptes - count) * 2;
> +			}

Should we return error instead of silently fill pte with scratch? Or maybe even gem_bug_on on this case? The caller didn't allocate enough pages, means something wrong happened...

Oak

> +		} else {
> +			memset64((u64 *)cs, pte, n_ptes);
> +			cs += n_ptes * 2;
> +		}
> +
> +		intel_ring_advance(rq, cs);
> +queue_err_rq:
> +		i915_request_get(rq);
> +		__i915_request_commit(rq);
> +		__i915_request_queue(rq, &attr);
> +
> +		mutex_unlock(&ce->timeline->mutex);
> +		/* This will break if the request is complete or after engine reset
> */
> +		i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
> +		if (rq->fence.error)
> +			goto err_rq;
> +
> +		i915_request_put(rq);
> +
> +		num_entries -= n_ptes;
> +		offset += n_ptes;
> +	}
> +
> +	gen8_ggtt_bind_put_ce(ce);
> +	return true;
> +
> +err_rq:
> +	i915_request_put(rq);
> +put_ce:
> +	gen8_ggtt_bind_put_ce(ce);
> +	return false;
> +}
> +
>  static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
>  {
>  	writeq(pte, addr);
> @@ -272,6 +416,21 @@ static void gen8_ggtt_insert_page(struct
> i915_address_space *vm,
>  	ggtt->invalidate(ggtt);
>  }
> 
> +static void gen8_ggtt_insert_page_bind(struct i915_address_space *vm,
> +				       dma_addr_t addr, u64 offset,
> +				       unsigned int pat_index, u32 flags)
> +{
> +	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
> +	gen8_pte_t pte;
> +
> +	pte = ggtt->vm.pte_encode(addr, pat_index, flags);
> +	if (should_update_ggtt_with_bind(i915_vm_to_ggtt(vm)) &&
> +	    gen8_ggtt_bind_ptes(ggtt, offset, NULL, 1, pte))
> +		return ggtt->invalidate(ggtt);
> +
> +	gen8_ggtt_insert_page(vm, addr, offset, pat_index, flags);
> +}
> +
>  static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
>  				     struct i915_vma_resource *vma_res,
>  				     unsigned int pat_index,
> @@ -311,6 +470,50 @@ static void gen8_ggtt_insert_entries(struct
> i915_address_space *vm,
>  	ggtt->invalidate(ggtt);
>  }
> 
> +static bool __gen8_ggtt_insert_entries_bind(struct i915_address_space *vm,
> +					    struct i915_vma_resource *vma_res,
> +					    unsigned int pat_index, u32 flags)
> +{
> +	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
> +	gen8_pte_t scratch_pte = vm->scratch[0]->encode;
> +	gen8_pte_t pte_encode;
> +	u64 start, end;
> +
> +	pte_encode = ggtt->vm.pte_encode(0, pat_index, flags);
> +	start = (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE;
> +	end = start + vma_res->guard / I915_GTT_PAGE_SIZE;
> +	if (!gen8_ggtt_bind_ptes(ggtt, start, NULL, end - start, scratch_pte))
> +		goto err;
> +
> +	start = end;
> +	end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE;
> +	if (!gen8_ggtt_bind_ptes(ggtt, start, vma_res->bi.pages,
> +	      vma_res->node_size / I915_GTT_PAGE_SIZE, pte_encode))
> +		goto err;
> +
> +	start += vma_res->node_size / I915_GTT_PAGE_SIZE;
> +	if (!gen8_ggtt_bind_ptes(ggtt, start, NULL, end - start, scratch_pte))
> +		goto err;
> +
> +	return true;
> +
> +err:
> +	return false;
> +}
> +
> +static void gen8_ggtt_insert_entries_bind(struct i915_address_space *vm,
> +					  struct i915_vma_resource *vma_res,
> +					  unsigned int pat_index, u32 flags)
> +{
> +	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
> +
> +	if (should_update_ggtt_with_bind(i915_vm_to_ggtt(vm)) &&
> +	    __gen8_ggtt_insert_entries_bind(vm, vma_res, pat_index, flags))
> +		return ggtt->invalidate(ggtt);
> +
> +	gen8_ggtt_insert_entries(vm, vma_res, pat_index, flags);
> +}
> +
>  static void gen8_ggtt_clear_range(struct i915_address_space *vm,
>  				  u64 start, u64 length)
>  {
> @@ -332,6 +535,27 @@ static void gen8_ggtt_clear_range(struct
> i915_address_space *vm,
>  		gen8_set_pte(&gtt_base[i], scratch_pte);
>  }
> 
> +static void gen8_ggtt_scratch_range_bind(struct i915_address_space *vm,
> +					 u64 start, u64 length)
> +{
> +	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
> +	unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
> +	unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
> +	const gen8_pte_t scratch_pte = vm->scratch[0]->encode;
> +	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
> +
> +	if (WARN(num_entries > max_entries,
> +		 "First entry = %d; Num entries = %d (max=%d)\n",
> +		 first_entry, num_entries, max_entries))
> +		num_entries = max_entries;
> +
> +	if (should_update_ggtt_with_bind(ggtt) && gen8_ggtt_bind_ptes(ggtt,
> first_entry,
> +	     NULL, num_entries, scratch_pte))
> +		return ggtt->invalidate(ggtt);
> +
> +	gen8_ggtt_clear_range(vm, start, length);
> +}
> +
>  static void gen6_ggtt_insert_page(struct i915_address_space *vm,
>  				  dma_addr_t addr,
>  				  u64 offset,
> @@ -997,6 +1221,17 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
>  			I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
>  	}
> 
> +	if (i915_ggtt_require_binder(i915)) {
> +		ggtt->vm.scratch_range = gen8_ggtt_scratch_range_bind;
> +		ggtt->vm.insert_page = gen8_ggtt_insert_page_bind;
> +		ggtt->vm.insert_entries = gen8_ggtt_insert_entries_bind;
> +		/*
> +		 * On GPU is hung, we might bind VMAs for error capture.
> +		 * Fallback to CPU GGTT updates in that case.
> +		 */
> +		ggtt->vm.raw_insert_page = gen8_ggtt_insert_page;
> +	}
> +
>  	if (intel_uc_wants_guc(&ggtt->vm.gt->uc))
>  		ggtt->invalidate = guc_ggtt_invalidate;
>  	else
> --
> 2.41.0
Piotr Piórkowski Sept. 15, 2023, 4:06 p.m. UTC | #2
Zeng, Oak <oak.zeng@intel.com> wrote on pią [2023-wrz-15 17:56:56 +0200]:
> 
> 
> Thanks,
> Oak
> 
> > -----Original Message-----
> > From: Das, Nirmoy <nirmoy.das@intel.com>
> > Sent: Friday, September 15, 2023 4:34 AM
> > To: intel-gfx@lists.freedesktop.org
> > Cc: Zeng, Oak <oak.zeng@intel.com>; chris.p.wilson@linux.intel.com; Piorkowski,
> > Piotr <piotr.piorkowski@intel.com>; Shyti, Andi <andi.shyti@intel.com>; Mun,
> > Gwan-gyeong <gwan-gyeong.mun@intel.com>; Roper, Matthew D
> > <matthew.d.roper@intel.com>; Das, Nirmoy <nirmoy.das@intel.com>
> > Subject: [PATCH 5/7] drm/i915: Implement GGTT update method with
> > MI_UPDATE_GTT
> > 
> > Implement GGTT update method with blitter command, MI_UPDATE_GTT
> > and install those handlers if a platform requires that.
> > 
> > v2: Make sure we hold the GT wakeref and Blitter engine wakeref before
> > we call mutex_lock/intel_context_enter below. When GT/engine are not
> > awake, the intel_context_enter calls into some runtime pm function which
> > can end up with kmalloc/fs_reclaim. But trigger fs_reclaim holding a
> > mutex lock is not allowed because shrinker can also try to hold the same
> > mutex lock. It is a circular lock. So hold the GT/blitter engine wakeref
> > before calling mutex_lock, to fix the circular lock.
> > 
> > Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
> > Signed-off-by: Oak Zeng <oak.zeng@intel.com>
> > ---
> >  drivers/gpu/drm/i915/gt/intel_ggtt.c | 235 +++++++++++++++++++++++++++
> >  1 file changed, 235 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > index dd0ed941441a..b94de7cebfce 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> > @@ -15,18 +15,23 @@
> >  #include "display/intel_display.h"
> >  #include "gem/i915_gem_lmem.h"
> > 
> > +#include "intel_context.h"
> >  #include "intel_ggtt_gmch.h"
> > +#include "intel_gpu_commands.h"
> >  #include "intel_gt.h"
> >  #include "intel_gt_regs.h"
> >  #include "intel_pci_config.h"
> > +#include "intel_ring.h"
> >  #include "i915_drv.h"
> >  #include "i915_pci.h"
> > +#include "i915_request.h"
> >  #include "i915_scatterlist.h"
> >  #include "i915_utils.h"
> >  #include "i915_vgpu.h"
> > 
> >  #include "intel_gtt.h"
> >  #include "gen8_ppgtt.h"
> > +#include "intel_engine_pm.h"
> > 
> >  static void i915_ggtt_color_adjust(const struct drm_mm_node *node,
> >  				   unsigned long color,
> > @@ -252,6 +257,145 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr,
> >  	return pte;
> >  }
> > 
> > +static bool should_update_ggtt_with_bind(struct i915_ggtt *ggtt)
> > +{
> > +	struct intel_gt *gt = ggtt->vm.gt;
> > +
> > +	return intel_gt_is_bind_context_ready(gt);
> > +}
> > +
> > +static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt)
> > +{
> > +	struct intel_context *ce;
> > +	struct intel_gt *gt = ggtt->vm.gt;
> > +
> > +	if (intel_gt_is_wedged(gt))
> > +		return NULL;
> > +
> > +	ce = gt->engine[BCS0]->bind_context;
> > +	GEM_BUG_ON(!ce);
> > +
> > +	/*
> > +	 * If the GT is not awake already at this stage then fallback
> > +	 * to pci based GGTT update otherwise __intel_wakeref_get_first()
> > +	 * would conflict with fs_reclaim trying to allocate memory while
> > +	 * doing rpm_resume().
> > +	 */
> > +	if (!intel_gt_pm_get_if_awake(gt))
> > +		return NULL;
> > +
> > +	intel_engine_pm_get(ce->engine);
> > +
> > +	return ce;
> > +}
> > +
> > +static void gen8_ggtt_bind_put_ce(struct intel_context *ce)
> > +{
> > +	intel_engine_pm_put(ce->engine);
> > +	intel_gt_pm_put(ce->engine->gt);
> > +}
> > +
> > +static bool gen8_ggtt_bind_ptes(struct i915_ggtt *ggtt, u32 offset,
> > +				struct sg_table *pages, u32 num_entries,
> > +				const gen8_pte_t pte)
> > +{
> > +	struct i915_sched_attr attr = {};
> > +	struct intel_gt *gt = ggtt->vm.gt;
> > +	const gen8_pte_t scratch_pte = ggtt->vm.scratch[0]->encode;
> > +	struct sgt_iter iter;
> > +	struct i915_request *rq;
> > +	struct intel_context *ce;
> > +	u32 *cs;
> > +
> > +	if (!num_entries)
> > +		return true;
> > +
> > +	ce = gen8_ggtt_bind_get_ce(ggtt);
> > +	if (!ce)
> > +		return false;
> > +
> > +	if (pages)
> > +		iter = __sgt_iter(pages->sgl, true);
> > +
> > +	while (num_entries) {
> > +		int count = 0;
> > +		dma_addr_t addr;
> > +		/*
> > +		 * MI_UPDATE_GTT can update 512 entries in a single command
> > but
> > +		 * that end up with engine reset, 511 works.
> > +		 */
> > +		u32 n_ptes = min_t(u32, 511, num_entries);
> > +
> > +		if (mutex_lock_interruptible(&ce->timeline->mutex))
> > +			goto put_ce;
> > +
> > +		intel_context_enter(ce);
> > +		rq = __i915_request_create(ce, GFP_NOWAIT | GFP_ATOMIC);
> > +		intel_context_exit(ce);
> > +		if (IS_ERR(rq)) {
> > +			GT_TRACE(gt, "Failed to get bind request\n");
> > +			mutex_unlock(&ce->timeline->mutex);
> > +			goto put_ce;
> > +		}
> > +
> > +		cs = intel_ring_begin(rq, 2 * n_ptes + 2);
> > +		if (IS_ERR(cs)) {
> > +			GT_TRACE(gt, "Failed to ring space for GGTT bind\n");
> > +			i915_request_set_error_once(rq, PTR_ERR(cs));
> > +			/* once a request is created, it must be queued */
> > +			goto queue_err_rq;
> > +		}
> > +
> > +		*cs++ = MI_UPDATE_GTT | (2 * n_ptes);
> > +		*cs++ = offset << 12;
> > +
> > +		if (pages) {
> > +			for_each_sgt_daddr_next(addr, iter) {
> > +				if (count == n_ptes)
> > +					break;
> > +				*cs++ = lower_32_bits(pte | addr);
> > +				*cs++ = upper_32_bits(pte | addr);
> > +				count++;
> > +			}
> > +			/* fill remaining with scratch pte, if any */
> > +			if (count < n_ptes) {
> > +				memset64((u64 *)cs, scratch_pte,
> > +					 n_ptes - count);
> > +				cs += (n_ptes - count) * 2;
> > +			}
> 
> Should we return error instead of silently fill pte with scratch? Or maybe even gem_bug_on on this case? The caller didn't allocate enough pages, means something wrong happened...

We should avoid GEM_BUG_ON, however, maybe it's a good idea for this function to return an int
instead of a bool, because there are a lot of places here that return an err. In that case,
ret > 0 could mean the number of PTEs we updated correctly and ret < 0 would be an error.

Piotr

> 
> Oak
> 
> > +		} else {
> > +			memset64((u64 *)cs, pte, n_ptes);
> > +			cs += n_ptes * 2;
> > +		}
> > +
> > +		intel_ring_advance(rq, cs);
> > +queue_err_rq:
> > +		i915_request_get(rq);
> > +		__i915_request_commit(rq);
> > +		__i915_request_queue(rq, &attr);
> > +
> > +		mutex_unlock(&ce->timeline->mutex);
> > +		/* This will break if the request is complete or after engine reset
> > */
> > +		i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
> > +		if (rq->fence.error)
> > +			goto err_rq;
> > +
> > +		i915_request_put(rq);
> > +
> > +		num_entries -= n_ptes;
> > +		offset += n_ptes;
> > +	}
> > +
> > +	gen8_ggtt_bind_put_ce(ce);
> > +	return true;
> > +
> > +err_rq:
> > +	i915_request_put(rq);
> > +put_ce:
> > +	gen8_ggtt_bind_put_ce(ce);
> > +	return false;
> > +}
> > +
> >  static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
> >  {
> >  	writeq(pte, addr);
> > @@ -272,6 +416,21 @@ static void gen8_ggtt_insert_page(struct
> > i915_address_space *vm,
> >  	ggtt->invalidate(ggtt);
> >  }
> > 
> > +static void gen8_ggtt_insert_page_bind(struct i915_address_space *vm,
> > +				       dma_addr_t addr, u64 offset,
> > +				       unsigned int pat_index, u32 flags)
> > +{
> > +	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
> > +	gen8_pte_t pte;
> > +
> > +	pte = ggtt->vm.pte_encode(addr, pat_index, flags);
> > +	if (should_update_ggtt_with_bind(i915_vm_to_ggtt(vm)) &&
> > +	    gen8_ggtt_bind_ptes(ggtt, offset, NULL, 1, pte))
> > +		return ggtt->invalidate(ggtt);
> > +
> > +	gen8_ggtt_insert_page(vm, addr, offset, pat_index, flags);
> > +}
> > +
> >  static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
> >  				     struct i915_vma_resource *vma_res,
> >  				     unsigned int pat_index,
> > @@ -311,6 +470,50 @@ static void gen8_ggtt_insert_entries(struct
> > i915_address_space *vm,
> >  	ggtt->invalidate(ggtt);
> >  }
> > 
> > +static bool __gen8_ggtt_insert_entries_bind(struct i915_address_space *vm,
> > +					    struct i915_vma_resource *vma_res,
> > +					    unsigned int pat_index, u32 flags)
> > +{
> > +	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
> > +	gen8_pte_t scratch_pte = vm->scratch[0]->encode;
> > +	gen8_pte_t pte_encode;
> > +	u64 start, end;
> > +
> > +	pte_encode = ggtt->vm.pte_encode(0, pat_index, flags);
> > +	start = (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE;
> > +	end = start + vma_res->guard / I915_GTT_PAGE_SIZE;
> > +	if (!gen8_ggtt_bind_ptes(ggtt, start, NULL, end - start, scratch_pte))
> > +		goto err;
> > +
> > +	start = end;
> > +	end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE;
> > +	if (!gen8_ggtt_bind_ptes(ggtt, start, vma_res->bi.pages,
> > +	      vma_res->node_size / I915_GTT_PAGE_SIZE, pte_encode))
> > +		goto err;
> > +
> > +	start += vma_res->node_size / I915_GTT_PAGE_SIZE;
> > +	if (!gen8_ggtt_bind_ptes(ggtt, start, NULL, end - start, scratch_pte))
> > +		goto err;
> > +
> > +	return true;
> > +
> > +err:
> > +	return false;
> > +}
> > +
> > +static void gen8_ggtt_insert_entries_bind(struct i915_address_space *vm,
> > +					  struct i915_vma_resource *vma_res,
> > +					  unsigned int pat_index, u32 flags)
> > +{
> > +	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
> > +
> > +	if (should_update_ggtt_with_bind(i915_vm_to_ggtt(vm)) &&
> > +	    __gen8_ggtt_insert_entries_bind(vm, vma_res, pat_index, flags))
> > +		return ggtt->invalidate(ggtt);
> > +
> > +	gen8_ggtt_insert_entries(vm, vma_res, pat_index, flags);
> > +}
> > +
> >  static void gen8_ggtt_clear_range(struct i915_address_space *vm,
> >  				  u64 start, u64 length)
> >  {
> > @@ -332,6 +535,27 @@ static void gen8_ggtt_clear_range(struct
> > i915_address_space *vm,
> >  		gen8_set_pte(&gtt_base[i], scratch_pte);
> >  }
> > 
> > +static void gen8_ggtt_scratch_range_bind(struct i915_address_space *vm,
> > +					 u64 start, u64 length)
> > +{
> > +	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
> > +	unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
> > +	unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
> > +	const gen8_pte_t scratch_pte = vm->scratch[0]->encode;
> > +	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
> > +
> > +	if (WARN(num_entries > max_entries,
> > +		 "First entry = %d; Num entries = %d (max=%d)\n",
> > +		 first_entry, num_entries, max_entries))
> > +		num_entries = max_entries;
> > +
> > +	if (should_update_ggtt_with_bind(ggtt) && gen8_ggtt_bind_ptes(ggtt,
> > first_entry,
> > +	     NULL, num_entries, scratch_pte))
> > +		return ggtt->invalidate(ggtt);
> > +
> > +	gen8_ggtt_clear_range(vm, start, length);
> > +}
> > +
> >  static void gen6_ggtt_insert_page(struct i915_address_space *vm,
> >  				  dma_addr_t addr,
> >  				  u64 offset,
> > @@ -997,6 +1221,17 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
> >  			I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
> >  	}
> > 
> > +	if (i915_ggtt_require_binder(i915)) {
> > +		ggtt->vm.scratch_range = gen8_ggtt_scratch_range_bind;
> > +		ggtt->vm.insert_page = gen8_ggtt_insert_page_bind;
> > +		ggtt->vm.insert_entries = gen8_ggtt_insert_entries_bind;
> > +		/*
> > +		 * On GPU is hung, we might bind VMAs for error capture.
> > +		 * Fallback to CPU GGTT updates in that case.
> > +		 */
> > +		ggtt->vm.raw_insert_page = gen8_ggtt_insert_page;
> > +	}
> > +
> >  	if (intel_uc_wants_guc(&ggtt->vm.gt->uc))
> >  		ggtt->invalidate = guc_ggtt_invalidate;
> >  	else
> > --
> > 2.41.0
> 

--
Nirmoy Das Sept. 18, 2023, 10:25 a.m. UTC | #3
On 9/15/2023 5:56 PM, Zeng, Oak wrote:
>
> Thanks,
> Oak
>
>> -----Original Message-----
>> From: Das, Nirmoy <nirmoy.das@intel.com>
>> Sent: Friday, September 15, 2023 4:34 AM
>> To: intel-gfx@lists.freedesktop.org
>> Cc: Zeng, Oak <oak.zeng@intel.com>; chris.p.wilson@linux.intel.com; Piorkowski,
>> Piotr <piotr.piorkowski@intel.com>; Shyti, Andi <andi.shyti@intel.com>; Mun,
>> Gwan-gyeong <gwan-gyeong.mun@intel.com>; Roper, Matthew D
>> <matthew.d.roper@intel.com>; Das, Nirmoy <nirmoy.das@intel.com>
>> Subject: [PATCH 5/7] drm/i915: Implement GGTT update method with
>> MI_UPDATE_GTT
>>
>> Implement GGTT update method with blitter command, MI_UPDATE_GTT
>> and install those handlers if a platform requires that.
>>
>> v2: Make sure we hold the GT wakeref and Blitter engine wakeref before
>> we call mutex_lock/intel_context_enter below. When GT/engine are not
>> awake, the intel_context_enter calls into some runtime pm function which
>> can end up with kmalloc/fs_reclaim. But trigger fs_reclaim holding a
>> mutex lock is not allowed because shrinker can also try to hold the same
>> mutex lock. It is a circular lock. So hold the GT/blitter engine wakeref
>> before calling mutex_lock, to fix the circular lock.
>>
>> Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
>> Signed-off-by: Oak Zeng <oak.zeng@intel.com>
>> ---
>>   drivers/gpu/drm/i915/gt/intel_ggtt.c | 235 +++++++++++++++++++++++++++
>>   1 file changed, 235 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> index dd0ed941441a..b94de7cebfce 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> @@ -15,18 +15,23 @@
>>   #include "display/intel_display.h"
>>   #include "gem/i915_gem_lmem.h"
>>
>> +#include "intel_context.h"
>>   #include "intel_ggtt_gmch.h"
>> +#include "intel_gpu_commands.h"
>>   #include "intel_gt.h"
>>   #include "intel_gt_regs.h"
>>   #include "intel_pci_config.h"
>> +#include "intel_ring.h"
>>   #include "i915_drv.h"
>>   #include "i915_pci.h"
>> +#include "i915_request.h"
>>   #include "i915_scatterlist.h"
>>   #include "i915_utils.h"
>>   #include "i915_vgpu.h"
>>
>>   #include "intel_gtt.h"
>>   #include "gen8_ppgtt.h"
>> +#include "intel_engine_pm.h"
>>
>>   static void i915_ggtt_color_adjust(const struct drm_mm_node *node,
>>   				   unsigned long color,
>> @@ -252,6 +257,145 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr,
>>   	return pte;
>>   }
>>
>> +static bool should_update_ggtt_with_bind(struct i915_ggtt *ggtt)
>> +{
>> +	struct intel_gt *gt = ggtt->vm.gt;
>> +
>> +	return intel_gt_is_bind_context_ready(gt);
>> +}
>> +
>> +static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt)
>> +{
>> +	struct intel_context *ce;
>> +	struct intel_gt *gt = ggtt->vm.gt;
>> +
>> +	if (intel_gt_is_wedged(gt))
>> +		return NULL;
>> +
>> +	ce = gt->engine[BCS0]->bind_context;
>> +	GEM_BUG_ON(!ce);
>> +
>> +	/*
>> +	 * If the GT is not awake already at this stage then fallback
>> +	 * to pci based GGTT update otherwise __intel_wakeref_get_first()
>> +	 * would conflict with fs_reclaim trying to allocate memory while
>> +	 * doing rpm_resume().
>> +	 */
>> +	if (!intel_gt_pm_get_if_awake(gt))
>> +		return NULL;
>> +
>> +	intel_engine_pm_get(ce->engine);
>> +
>> +	return ce;
>> +}
>> +
>> +static void gen8_ggtt_bind_put_ce(struct intel_context *ce)
>> +{
>> +	intel_engine_pm_put(ce->engine);
>> +	intel_gt_pm_put(ce->engine->gt);
>> +}
>> +
>> +static bool gen8_ggtt_bind_ptes(struct i915_ggtt *ggtt, u32 offset,
>> +				struct sg_table *pages, u32 num_entries,
>> +				const gen8_pte_t pte)
>> +{
>> +	struct i915_sched_attr attr = {};
>> +	struct intel_gt *gt = ggtt->vm.gt;
>> +	const gen8_pte_t scratch_pte = ggtt->vm.scratch[0]->encode;
>> +	struct sgt_iter iter;
>> +	struct i915_request *rq;
>> +	struct intel_context *ce;
>> +	u32 *cs;
>> +
>> +	if (!num_entries)
>> +		return true;
>> +
>> +	ce = gen8_ggtt_bind_get_ce(ggtt);
>> +	if (!ce)
>> +		return false;
>> +
>> +	if (pages)
>> +		iter = __sgt_iter(pages->sgl, true);
>> +
>> +	while (num_entries) {
>> +		int count = 0;
>> +		dma_addr_t addr;
>> +		/*
>> +		 * MI_UPDATE_GTT can update 512 entries in a single command
>> but
>> +		 * that end up with engine reset, 511 works.
>> +		 */
>> +		u32 n_ptes = min_t(u32, 511, num_entries);
>> +
>> +		if (mutex_lock_interruptible(&ce->timeline->mutex))
>> +			goto put_ce;
>> +
>> +		intel_context_enter(ce);
>> +		rq = __i915_request_create(ce, GFP_NOWAIT | GFP_ATOMIC);
>> +		intel_context_exit(ce);
>> +		if (IS_ERR(rq)) {
>> +			GT_TRACE(gt, "Failed to get bind request\n");
>> +			mutex_unlock(&ce->timeline->mutex);
>> +			goto put_ce;
>> +		}
>> +
>> +		cs = intel_ring_begin(rq, 2 * n_ptes + 2);
>> +		if (IS_ERR(cs)) {
>> +			GT_TRACE(gt, "Failed to ring space for GGTT bind\n");
>> +			i915_request_set_error_once(rq, PTR_ERR(cs));
>> +			/* once a request is created, it must be queued */
>> +			goto queue_err_rq;
>> +		}
>> +
>> +		*cs++ = MI_UPDATE_GTT | (2 * n_ptes);
>> +		*cs++ = offset << 12;
>> +
>> +		if (pages) {
>> +			for_each_sgt_daddr_next(addr, iter) {
>> +				if (count == n_ptes)
>> +					break;
>> +				*cs++ = lower_32_bits(pte | addr);
>> +				*cs++ = upper_32_bits(pte | addr);
>> +				count++;
>> +			}
>> +			/* fill remaining with scratch pte, if any */
>> +			if (count < n_ptes) {
>> +				memset64((u64 *)cs, scratch_pte,
>> +					 n_ptes - count);
>> +				cs += (n_ptes - count) * 2;
>> +			}
> Should we return error instead of silently fill pte with scratch? Or maybe even gem_bug_on on this case? The caller didn't allocate enough pages, means something wrong happened...


We do the dame already in gen8_ggtt_insert_entries() so not adding 
something new here. The comment just made it obvious :)  I don't know 
the exact usecase when this happens but

I saw tons of pipe error without this.


Regards,

Nirmoy

>
> Oak
>
>> +		} else {
>> +			memset64((u64 *)cs, pte, n_ptes);
>> +			cs += n_ptes * 2;
>> +		}
>> +
>> +		intel_ring_advance(rq, cs);
>> +queue_err_rq:
>> +		i915_request_get(rq);
>> +		__i915_request_commit(rq);
>> +		__i915_request_queue(rq, &attr);
>> +
>> +		mutex_unlock(&ce->timeline->mutex);
>> +		/* This will break if the request is complete or after engine reset
>> */
>> +		i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
>> +		if (rq->fence.error)
>> +			goto err_rq;
>> +
>> +		i915_request_put(rq);
>> +
>> +		num_entries -= n_ptes;
>> +		offset += n_ptes;
>> +	}
>> +
>> +	gen8_ggtt_bind_put_ce(ce);
>> +	return true;
>> +
>> +err_rq:
>> +	i915_request_put(rq);
>> +put_ce:
>> +	gen8_ggtt_bind_put_ce(ce);
>> +	return false;
>> +}
>> +
>>   static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
>>   {
>>   	writeq(pte, addr);
>> @@ -272,6 +416,21 @@ static void gen8_ggtt_insert_page(struct
>> i915_address_space *vm,
>>   	ggtt->invalidate(ggtt);
>>   }
>>
>> +static void gen8_ggtt_insert_page_bind(struct i915_address_space *vm,
>> +				       dma_addr_t addr, u64 offset,
>> +				       unsigned int pat_index, u32 flags)
>> +{
>> +	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
>> +	gen8_pte_t pte;
>> +
>> +	pte = ggtt->vm.pte_encode(addr, pat_index, flags);
>> +	if (should_update_ggtt_with_bind(i915_vm_to_ggtt(vm)) &&
>> +	    gen8_ggtt_bind_ptes(ggtt, offset, NULL, 1, pte))
>> +		return ggtt->invalidate(ggtt);
>> +
>> +	gen8_ggtt_insert_page(vm, addr, offset, pat_index, flags);
>> +}
>> +
>>   static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
>>   				     struct i915_vma_resource *vma_res,
>>   				     unsigned int pat_index,
>> @@ -311,6 +470,50 @@ static void gen8_ggtt_insert_entries(struct
>> i915_address_space *vm,
>>   	ggtt->invalidate(ggtt);
>>   }
>>
>> +static bool __gen8_ggtt_insert_entries_bind(struct i915_address_space *vm,
>> +					    struct i915_vma_resource *vma_res,
>> +					    unsigned int pat_index, u32 flags)
>> +{
>> +	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
>> +	gen8_pte_t scratch_pte = vm->scratch[0]->encode;
>> +	gen8_pte_t pte_encode;
>> +	u64 start, end;
>> +
>> +	pte_encode = ggtt->vm.pte_encode(0, pat_index, flags);
>> +	start = (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE;
>> +	end = start + vma_res->guard / I915_GTT_PAGE_SIZE;
>> +	if (!gen8_ggtt_bind_ptes(ggtt, start, NULL, end - start, scratch_pte))
>> +		goto err;
>> +
>> +	start = end;
>> +	end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE;
>> +	if (!gen8_ggtt_bind_ptes(ggtt, start, vma_res->bi.pages,
>> +	      vma_res->node_size / I915_GTT_PAGE_SIZE, pte_encode))
>> +		goto err;
>> +
>> +	start += vma_res->node_size / I915_GTT_PAGE_SIZE;
>> +	if (!gen8_ggtt_bind_ptes(ggtt, start, NULL, end - start, scratch_pte))
>> +		goto err;
>> +
>> +	return true;
>> +
>> +err:
>> +	return false;
>> +}
>> +
>> +static void gen8_ggtt_insert_entries_bind(struct i915_address_space *vm,
>> +					  struct i915_vma_resource *vma_res,
>> +					  unsigned int pat_index, u32 flags)
>> +{
>> +	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
>> +
>> +	if (should_update_ggtt_with_bind(i915_vm_to_ggtt(vm)) &&
>> +	    __gen8_ggtt_insert_entries_bind(vm, vma_res, pat_index, flags))
>> +		return ggtt->invalidate(ggtt);
>> +
>> +	gen8_ggtt_insert_entries(vm, vma_res, pat_index, flags);
>> +}
>> +
>>   static void gen8_ggtt_clear_range(struct i915_address_space *vm,
>>   				  u64 start, u64 length)
>>   {
>> @@ -332,6 +535,27 @@ static void gen8_ggtt_clear_range(struct
>> i915_address_space *vm,
>>   		gen8_set_pte(&gtt_base[i], scratch_pte);
>>   }
>>
>> +static void gen8_ggtt_scratch_range_bind(struct i915_address_space *vm,
>> +					 u64 start, u64 length)
>> +{
>> +	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
>> +	unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
>> +	unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
>> +	const gen8_pte_t scratch_pte = vm->scratch[0]->encode;
>> +	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
>> +
>> +	if (WARN(num_entries > max_entries,
>> +		 "First entry = %d; Num entries = %d (max=%d)\n",
>> +		 first_entry, num_entries, max_entries))
>> +		num_entries = max_entries;
>> +
>> +	if (should_update_ggtt_with_bind(ggtt) && gen8_ggtt_bind_ptes(ggtt,
>> first_entry,
>> +	     NULL, num_entries, scratch_pte))
>> +		return ggtt->invalidate(ggtt);
>> +
>> +	gen8_ggtt_clear_range(vm, start, length);
>> +}
>> +
>>   static void gen6_ggtt_insert_page(struct i915_address_space *vm,
>>   				  dma_addr_t addr,
>>   				  u64 offset,
>> @@ -997,6 +1221,17 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
>>   			I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
>>   	}
>>
>> +	if (i915_ggtt_require_binder(i915)) {
>> +		ggtt->vm.scratch_range = gen8_ggtt_scratch_range_bind;
>> +		ggtt->vm.insert_page = gen8_ggtt_insert_page_bind;
>> +		ggtt->vm.insert_entries = gen8_ggtt_insert_entries_bind;
>> +		/*
>> +		 * On GPU is hung, we might bind VMAs for error capture.
>> +		 * Fallback to CPU GGTT updates in that case.
>> +		 */
>> +		ggtt->vm.raw_insert_page = gen8_ggtt_insert_page;
>> +	}
>> +
>>   	if (intel_uc_wants_guc(&ggtt->vm.gt->uc))
>>   		ggtt->invalidate = guc_ggtt_invalidate;
>>   	else
>> --
>> 2.41.0
Zeng, Oak Sept. 18, 2023, 2:50 p.m. UTC | #4
Thanks,
Oak

> -----Original Message-----
> From: Das, Nirmoy <nirmoy.das@intel.com>
> Sent: Monday, September 18, 2023 6:26 AM
> To: Zeng, Oak <oak.zeng@intel.com>; intel-gfx@lists.freedesktop.org;
> Piorkowski, Piotr <piotr.piorkowski@intel.com>
> Cc: chris.p.wilson@linux.intel.com; Shyti, Andi <andi.shyti@intel.com>; Mun,
> Gwan-gyeong <gwan-gyeong.mun@intel.com>; Roper, Matthew D
> <matthew.d.roper@intel.com>
> Subject: Re: [PATCH 5/7] drm/i915: Implement GGTT update method with
> MI_UPDATE_GTT
> 
> 
> On 9/15/2023 5:56 PM, Zeng, Oak wrote:
> >
> > Thanks,
> > Oak
> >
> >> -----Original Message-----
> >> From: Das, Nirmoy <nirmoy.das@intel.com>
> >> Sent: Friday, September 15, 2023 4:34 AM
> >> To: intel-gfx@lists.freedesktop.org
> >> Cc: Zeng, Oak <oak.zeng@intel.com>; chris.p.wilson@linux.intel.com;
> Piorkowski,
> >> Piotr <piotr.piorkowski@intel.com>; Shyti, Andi <andi.shyti@intel.com>; Mun,
> >> Gwan-gyeong <gwan-gyeong.mun@intel.com>; Roper, Matthew D
> >> <matthew.d.roper@intel.com>; Das, Nirmoy <nirmoy.das@intel.com>
> >> Subject: [PATCH 5/7] drm/i915: Implement GGTT update method with
> >> MI_UPDATE_GTT
> >>
> >> Implement GGTT update method with blitter command, MI_UPDATE_GTT
> >> and install those handlers if a platform requires that.
> >>
> >> v2: Make sure we hold the GT wakeref and Blitter engine wakeref before
> >> we call mutex_lock/intel_context_enter below. When GT/engine are not
> >> awake, the intel_context_enter calls into some runtime pm function which
> >> can end up with kmalloc/fs_reclaim. But trigger fs_reclaim holding a
> >> mutex lock is not allowed because shrinker can also try to hold the same
> >> mutex lock. It is a circular lock. So hold the GT/blitter engine wakeref
> >> before calling mutex_lock, to fix the circular lock.
> >>
> >> Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
> >> Signed-off-by: Oak Zeng <oak.zeng@intel.com>
> >> ---
> >>   drivers/gpu/drm/i915/gt/intel_ggtt.c | 235
> +++++++++++++++++++++++++++
> >>   1 file changed, 235 insertions(+)
> >>
> >> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> >> b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> >> index dd0ed941441a..b94de7cebfce 100644
> >> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> >> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> >> @@ -15,18 +15,23 @@
> >>   #include "display/intel_display.h"
> >>   #include "gem/i915_gem_lmem.h"
> >>
> >> +#include "intel_context.h"
> >>   #include "intel_ggtt_gmch.h"
> >> +#include "intel_gpu_commands.h"
> >>   #include "intel_gt.h"
> >>   #include "intel_gt_regs.h"
> >>   #include "intel_pci_config.h"
> >> +#include "intel_ring.h"
> >>   #include "i915_drv.h"
> >>   #include "i915_pci.h"
> >> +#include "i915_request.h"
> >>   #include "i915_scatterlist.h"
> >>   #include "i915_utils.h"
> >>   #include "i915_vgpu.h"
> >>
> >>   #include "intel_gtt.h"
> >>   #include "gen8_ppgtt.h"
> >> +#include "intel_engine_pm.h"
> >>
> >>   static void i915_ggtt_color_adjust(const struct drm_mm_node *node,
> >>   				   unsigned long color,
> >> @@ -252,6 +257,145 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr,
> >>   	return pte;
> >>   }
> >>
> >> +static bool should_update_ggtt_with_bind(struct i915_ggtt *ggtt)
> >> +{
> >> +	struct intel_gt *gt = ggtt->vm.gt;
> >> +
> >> +	return intel_gt_is_bind_context_ready(gt);
> >> +}
> >> +
> >> +static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt)
> >> +{
> >> +	struct intel_context *ce;
> >> +	struct intel_gt *gt = ggtt->vm.gt;
> >> +
> >> +	if (intel_gt_is_wedged(gt))
> >> +		return NULL;
> >> +
> >> +	ce = gt->engine[BCS0]->bind_context;
> >> +	GEM_BUG_ON(!ce);
> >> +
> >> +	/*
> >> +	 * If the GT is not awake already at this stage then fallback
> >> +	 * to pci based GGTT update otherwise __intel_wakeref_get_first()
> >> +	 * would conflict with fs_reclaim trying to allocate memory while
> >> +	 * doing rpm_resume().
> >> +	 */
> >> +	if (!intel_gt_pm_get_if_awake(gt))
> >> +		return NULL;
> >> +
> >> +	intel_engine_pm_get(ce->engine);
> >> +
> >> +	return ce;
> >> +}
> >> +
> >> +static void gen8_ggtt_bind_put_ce(struct intel_context *ce)
> >> +{
> >> +	intel_engine_pm_put(ce->engine);
> >> +	intel_gt_pm_put(ce->engine->gt);
> >> +}
> >> +
> >> +static bool gen8_ggtt_bind_ptes(struct i915_ggtt *ggtt, u32 offset,
> >> +				struct sg_table *pages, u32 num_entries,
> >> +				const gen8_pte_t pte)
> >> +{
> >> +	struct i915_sched_attr attr = {};
> >> +	struct intel_gt *gt = ggtt->vm.gt;
> >> +	const gen8_pte_t scratch_pte = ggtt->vm.scratch[0]->encode;
> >> +	struct sgt_iter iter;
> >> +	struct i915_request *rq;
> >> +	struct intel_context *ce;
> >> +	u32 *cs;
> >> +
> >> +	if (!num_entries)
> >> +		return true;
> >> +
> >> +	ce = gen8_ggtt_bind_get_ce(ggtt);
> >> +	if (!ce)
> >> +		return false;
> >> +
> >> +	if (pages)
> >> +		iter = __sgt_iter(pages->sgl, true);
> >> +
> >> +	while (num_entries) {
> >> +		int count = 0;
> >> +		dma_addr_t addr;
> >> +		/*
> >> +		 * MI_UPDATE_GTT can update 512 entries in a single command
> >> but
> >> +		 * that end up with engine reset, 511 works.
> >> +		 */
> >> +		u32 n_ptes = min_t(u32, 511, num_entries);
> >> +
> >> +		if (mutex_lock_interruptible(&ce->timeline->mutex))
> >> +			goto put_ce;
> >> +
> >> +		intel_context_enter(ce);
> >> +		rq = __i915_request_create(ce, GFP_NOWAIT | GFP_ATOMIC);
> >> +		intel_context_exit(ce);
> >> +		if (IS_ERR(rq)) {
> >> +			GT_TRACE(gt, "Failed to get bind request\n");
> >> +			mutex_unlock(&ce->timeline->mutex);
> >> +			goto put_ce;
> >> +		}
> >> +
> >> +		cs = intel_ring_begin(rq, 2 * n_ptes + 2);
> >> +		if (IS_ERR(cs)) {
> >> +			GT_TRACE(gt, "Failed to ring space for GGTT bind\n");
> >> +			i915_request_set_error_once(rq, PTR_ERR(cs));
> >> +			/* once a request is created, it must be queued */
> >> +			goto queue_err_rq;
> >> +		}
> >> +
> >> +		*cs++ = MI_UPDATE_GTT | (2 * n_ptes);
> >> +		*cs++ = offset << 12;
> >> +
> >> +		if (pages) {
> >> +			for_each_sgt_daddr_next(addr, iter) {
> >> +				if (count == n_ptes)
> >> +					break;
> >> +				*cs++ = lower_32_bits(pte | addr);
> >> +				*cs++ = upper_32_bits(pte | addr);
> >> +				count++;
> >> +			}
> >> +			/* fill remaining with scratch pte, if any */
> >> +			if (count < n_ptes) {
> >> +				memset64((u64 *)cs, scratch_pte,
> >> +					 n_ptes - count);
> >> +				cs += (n_ptes - count) * 2;
> >> +			}
> > Should we return error instead of silently fill pte with scratch? Or maybe even
> gem_bug_on on this case? The caller didn't allocate enough pages, means
> something wrong happened...
> 
> 
> We do the dame already in gen8_ggtt_insert_entries() so not adding
> something new here. The comment just made it obvious :)  I don't know
> the exact usecase when this happens but
> 
> I saw tons of pipe error without this.

Okay it is good to know. 

I don't see obvious issue of this patch. Acked-by: Oak Zeng <oak.zeng@intel.com>. Better someone else also review it.

Oak
> 
> 
> Regards,
> 
> Nirmoy
> 
> >
> > Oak
> >
> >> +		} else {
> >> +			memset64((u64 *)cs, pte, n_ptes);
> >> +			cs += n_ptes * 2;
> >> +		}
> >> +
> >> +		intel_ring_advance(rq, cs);
> >> +queue_err_rq:
> >> +		i915_request_get(rq);
> >> +		__i915_request_commit(rq);
> >> +		__i915_request_queue(rq, &attr);
> >> +
> >> +		mutex_unlock(&ce->timeline->mutex);
> >> +		/* This will break if the request is complete or after engine reset
> >> */
> >> +		i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
> >> +		if (rq->fence.error)
> >> +			goto err_rq;
> >> +
> >> +		i915_request_put(rq);
> >> +
> >> +		num_entries -= n_ptes;
> >> +		offset += n_ptes;
> >> +	}
> >> +
> >> +	gen8_ggtt_bind_put_ce(ce);
> >> +	return true;
> >> +
> >> +err_rq:
> >> +	i915_request_put(rq);
> >> +put_ce:
> >> +	gen8_ggtt_bind_put_ce(ce);
> >> +	return false;
> >> +}
> >> +
> >>   static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
> >>   {
> >>   	writeq(pte, addr);
> >> @@ -272,6 +416,21 @@ static void gen8_ggtt_insert_page(struct
> >> i915_address_space *vm,
> >>   	ggtt->invalidate(ggtt);
> >>   }
> >>
> >> +static void gen8_ggtt_insert_page_bind(struct i915_address_space *vm,
> >> +				       dma_addr_t addr, u64 offset,
> >> +				       unsigned int pat_index, u32 flags)
> >> +{
> >> +	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
> >> +	gen8_pte_t pte;
> >> +
> >> +	pte = ggtt->vm.pte_encode(addr, pat_index, flags);
> >> +	if (should_update_ggtt_with_bind(i915_vm_to_ggtt(vm)) &&
> >> +	    gen8_ggtt_bind_ptes(ggtt, offset, NULL, 1, pte))
> >> +		return ggtt->invalidate(ggtt);
> >> +
> >> +	gen8_ggtt_insert_page(vm, addr, offset, pat_index, flags);
> >> +}
> >> +
> >>   static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
> >>   				     struct i915_vma_resource *vma_res,
> >>   				     unsigned int pat_index,
> >> @@ -311,6 +470,50 @@ static void gen8_ggtt_insert_entries(struct
> >> i915_address_space *vm,
> >>   	ggtt->invalidate(ggtt);
> >>   }
> >>
> >> +static bool __gen8_ggtt_insert_entries_bind(struct i915_address_space *vm,
> >> +					    struct i915_vma_resource *vma_res,
> >> +					    unsigned int pat_index, u32 flags)
> >> +{
> >> +	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
> >> +	gen8_pte_t scratch_pte = vm->scratch[0]->encode;
> >> +	gen8_pte_t pte_encode;
> >> +	u64 start, end;
> >> +
> >> +	pte_encode = ggtt->vm.pte_encode(0, pat_index, flags);
> >> +	start = (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE;
> >> +	end = start + vma_res->guard / I915_GTT_PAGE_SIZE;
> >> +	if (!gen8_ggtt_bind_ptes(ggtt, start, NULL, end - start, scratch_pte))
> >> +		goto err;
> >> +
> >> +	start = end;
> >> +	end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE;
> >> +	if (!gen8_ggtt_bind_ptes(ggtt, start, vma_res->bi.pages,
> >> +	      vma_res->node_size / I915_GTT_PAGE_SIZE, pte_encode))
> >> +		goto err;
> >> +
> >> +	start += vma_res->node_size / I915_GTT_PAGE_SIZE;
> >> +	if (!gen8_ggtt_bind_ptes(ggtt, start, NULL, end - start, scratch_pte))
> >> +		goto err;
> >> +
> >> +	return true;
> >> +
> >> +err:
> >> +	return false;
> >> +}
> >> +
> >> +static void gen8_ggtt_insert_entries_bind(struct i915_address_space *vm,
> >> +					  struct i915_vma_resource *vma_res,
> >> +					  unsigned int pat_index, u32 flags)
> >> +{
> >> +	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
> >> +
> >> +	if (should_update_ggtt_with_bind(i915_vm_to_ggtt(vm)) &&
> >> +	    __gen8_ggtt_insert_entries_bind(vm, vma_res, pat_index, flags))
> >> +		return ggtt->invalidate(ggtt);
> >> +
> >> +	gen8_ggtt_insert_entries(vm, vma_res, pat_index, flags);
> >> +}
> >> +
> >>   static void gen8_ggtt_clear_range(struct i915_address_space *vm,
> >>   				  u64 start, u64 length)
> >>   {
> >> @@ -332,6 +535,27 @@ static void gen8_ggtt_clear_range(struct
> >> i915_address_space *vm,
> >>   		gen8_set_pte(&gtt_base[i], scratch_pte);
> >>   }
> >>
> >> +static void gen8_ggtt_scratch_range_bind(struct i915_address_space *vm,
> >> +					 u64 start, u64 length)
> >> +{
> >> +	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
> >> +	unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
> >> +	unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
> >> +	const gen8_pte_t scratch_pte = vm->scratch[0]->encode;
> >> +	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
> >> +
> >> +	if (WARN(num_entries > max_entries,
> >> +		 "First entry = %d; Num entries = %d (max=%d)\n",
> >> +		 first_entry, num_entries, max_entries))
> >> +		num_entries = max_entries;
> >> +
> >> +	if (should_update_ggtt_with_bind(ggtt) && gen8_ggtt_bind_ptes(ggtt,
> >> first_entry,
> >> +	     NULL, num_entries, scratch_pte))
> >> +		return ggtt->invalidate(ggtt);
> >> +
> >> +	gen8_ggtt_clear_range(vm, start, length);
> >> +}
> >> +
> >>   static void gen6_ggtt_insert_page(struct i915_address_space *vm,
> >>   				  dma_addr_t addr,
> >>   				  u64 offset,
> >> @@ -997,6 +1221,17 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
> >>   			I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
> >>   	}
> >>
> >> +	if (i915_ggtt_require_binder(i915)) {
> >> +		ggtt->vm.scratch_range = gen8_ggtt_scratch_range_bind;
> >> +		ggtt->vm.insert_page = gen8_ggtt_insert_page_bind;
> >> +		ggtt->vm.insert_entries = gen8_ggtt_insert_entries_bind;
> >> +		/*
> >> +		 * On GPU is hung, we might bind VMAs for error capture.
> >> +		 * Fallback to CPU GGTT updates in that case.
> >> +		 */
> >> +		ggtt->vm.raw_insert_page = gen8_ggtt_insert_page;
> >> +	}
> >> +
> >>   	if (intel_uc_wants_guc(&ggtt->vm.gt->uc))
> >>   		ggtt->invalidate = guc_ggtt_invalidate;
> >>   	else
> >> --
> >> 2.41.0
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index dd0ed941441a..b94de7cebfce 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -15,18 +15,23 @@ 
 #include "display/intel_display.h"
 #include "gem/i915_gem_lmem.h"
 
+#include "intel_context.h"
 #include "intel_ggtt_gmch.h"
+#include "intel_gpu_commands.h"
 #include "intel_gt.h"
 #include "intel_gt_regs.h"
 #include "intel_pci_config.h"
+#include "intel_ring.h"
 #include "i915_drv.h"
 #include "i915_pci.h"
+#include "i915_request.h"
 #include "i915_scatterlist.h"
 #include "i915_utils.h"
 #include "i915_vgpu.h"
 
 #include "intel_gtt.h"
 #include "gen8_ppgtt.h"
+#include "intel_engine_pm.h"
 
 static void i915_ggtt_color_adjust(const struct drm_mm_node *node,
 				   unsigned long color,
@@ -252,6 +257,145 @@  u64 gen8_ggtt_pte_encode(dma_addr_t addr,
 	return pte;
 }
 
+static bool should_update_ggtt_with_bind(struct i915_ggtt *ggtt)
+{
+	struct intel_gt *gt = ggtt->vm.gt;
+
+	return intel_gt_is_bind_context_ready(gt);
+}
+
+static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt)
+{
+	struct intel_context *ce;
+	struct intel_gt *gt = ggtt->vm.gt;
+
+	if (intel_gt_is_wedged(gt))
+		return NULL;
+
+	ce = gt->engine[BCS0]->bind_context;
+	GEM_BUG_ON(!ce);
+
+	/*
+	 * If the GT is not awake already at this stage then fallback
+	 * to pci based GGTT update otherwise __intel_wakeref_get_first()
+	 * would conflict with fs_reclaim trying to allocate memory while
+	 * doing rpm_resume().
+	 */
+	if (!intel_gt_pm_get_if_awake(gt))
+		return NULL;
+
+	intel_engine_pm_get(ce->engine);
+
+	return ce;
+}
+
+static void gen8_ggtt_bind_put_ce(struct intel_context *ce)
+{
+	intel_engine_pm_put(ce->engine);
+	intel_gt_pm_put(ce->engine->gt);
+}
+
+static bool gen8_ggtt_bind_ptes(struct i915_ggtt *ggtt, u32 offset,
+				struct sg_table *pages, u32 num_entries,
+				const gen8_pte_t pte)
+{
+	struct i915_sched_attr attr = {};
+	struct intel_gt *gt = ggtt->vm.gt;
+	const gen8_pte_t scratch_pte = ggtt->vm.scratch[0]->encode;
+	struct sgt_iter iter;
+	struct i915_request *rq;
+	struct intel_context *ce;
+	u32 *cs;
+
+	if (!num_entries)
+		return true;
+
+	ce = gen8_ggtt_bind_get_ce(ggtt);
+	if (!ce)
+		return false;
+
+	if (pages)
+		iter = __sgt_iter(pages->sgl, true);
+
+	while (num_entries) {
+		int count = 0;
+		dma_addr_t addr;
+		/*
+		 * MI_UPDATE_GTT can update 512 entries in a single command but
+		 * that end up with engine reset, 511 works.
+		 */
+		u32 n_ptes = min_t(u32, 511, num_entries);
+
+		if (mutex_lock_interruptible(&ce->timeline->mutex))
+			goto put_ce;
+
+		intel_context_enter(ce);
+		rq = __i915_request_create(ce, GFP_NOWAIT | GFP_ATOMIC);
+		intel_context_exit(ce);
+		if (IS_ERR(rq)) {
+			GT_TRACE(gt, "Failed to get bind request\n");
+			mutex_unlock(&ce->timeline->mutex);
+			goto put_ce;
+		}
+
+		cs = intel_ring_begin(rq, 2 * n_ptes + 2);
+		if (IS_ERR(cs)) {
+			GT_TRACE(gt, "Failed to ring space for GGTT bind\n");
+			i915_request_set_error_once(rq, PTR_ERR(cs));
+			/* once a request is created, it must be queued */
+			goto queue_err_rq;
+		}
+
+		*cs++ = MI_UPDATE_GTT | (2 * n_ptes);
+		*cs++ = offset << 12;
+
+		if (pages) {
+			for_each_sgt_daddr_next(addr, iter) {
+				if (count == n_ptes)
+					break;
+				*cs++ = lower_32_bits(pte | addr);
+				*cs++ = upper_32_bits(pte | addr);
+				count++;
+			}
+			/* fill remaining with scratch pte, if any */
+			if (count < n_ptes) {
+				memset64((u64 *)cs, scratch_pte,
+					 n_ptes - count);
+				cs += (n_ptes - count) * 2;
+			}
+		} else {
+			memset64((u64 *)cs, pte, n_ptes);
+			cs += n_ptes * 2;
+		}
+
+		intel_ring_advance(rq, cs);
+queue_err_rq:
+		i915_request_get(rq);
+		__i915_request_commit(rq);
+		__i915_request_queue(rq, &attr);
+
+		mutex_unlock(&ce->timeline->mutex);
+		/* This will break if the request is complete or after engine reset */
+		i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
+		if (rq->fence.error)
+			goto err_rq;
+
+		i915_request_put(rq);
+
+		num_entries -= n_ptes;
+		offset += n_ptes;
+	}
+
+	gen8_ggtt_bind_put_ce(ce);
+	return true;
+
+err_rq:
+	i915_request_put(rq);
+put_ce:
+	gen8_ggtt_bind_put_ce(ce);
+	return false;
+}
+
 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
 {
 	writeq(pte, addr);
@@ -272,6 +416,21 @@  static void gen8_ggtt_insert_page(struct i915_address_space *vm,
 	ggtt->invalidate(ggtt);
 }
 
+static void gen8_ggtt_insert_page_bind(struct i915_address_space *vm,
+				       dma_addr_t addr, u64 offset,
+				       unsigned int pat_index, u32 flags)
+{
+	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+	gen8_pte_t pte;
+
+	pte = ggtt->vm.pte_encode(addr, pat_index, flags);
+	if (should_update_ggtt_with_bind(i915_vm_to_ggtt(vm)) &&
+	    gen8_ggtt_bind_ptes(ggtt, offset, NULL, 1, pte))
+		return ggtt->invalidate(ggtt);
+
+	gen8_ggtt_insert_page(vm, addr, offset, pat_index, flags);
+}
+
 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 				     struct i915_vma_resource *vma_res,
 				     unsigned int pat_index,
@@ -311,6 +470,50 @@  static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 	ggtt->invalidate(ggtt);
 }
 
+static bool __gen8_ggtt_insert_entries_bind(struct i915_address_space *vm,
+					    struct i915_vma_resource *vma_res,
+					    unsigned int pat_index, u32 flags)
+{
+	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+	gen8_pte_t scratch_pte = vm->scratch[0]->encode;
+	gen8_pte_t pte_encode;
+	u64 start, end;
+
+	pte_encode = ggtt->vm.pte_encode(0, pat_index, flags);
+	start = (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE;
+	end = start + vma_res->guard / I915_GTT_PAGE_SIZE;
+	if (!gen8_ggtt_bind_ptes(ggtt, start, NULL, end - start, scratch_pte))
+		goto err;
+
+	start = end;
+	end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE;
+	if (!gen8_ggtt_bind_ptes(ggtt, start, vma_res->bi.pages,
+	      vma_res->node_size / I915_GTT_PAGE_SIZE, pte_encode))
+		goto err;
+
+	start += vma_res->node_size / I915_GTT_PAGE_SIZE;
+	if (!gen8_ggtt_bind_ptes(ggtt, start, NULL, end - start, scratch_pte))
+		goto err;
+
+	return true;
+
+err:
+	return false;
+}
+
+static void gen8_ggtt_insert_entries_bind(struct i915_address_space *vm,
+					  struct i915_vma_resource *vma_res,
+					  unsigned int pat_index, u32 flags)
+{
+	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+
+	if (should_update_ggtt_with_bind(i915_vm_to_ggtt(vm)) &&
+	    __gen8_ggtt_insert_entries_bind(vm, vma_res, pat_index, flags))
+		return ggtt->invalidate(ggtt);
+
+	gen8_ggtt_insert_entries(vm, vma_res, pat_index, flags);
+}
+
 static void gen8_ggtt_clear_range(struct i915_address_space *vm,
 				  u64 start, u64 length)
 {
@@ -332,6 +535,27 @@  static void gen8_ggtt_clear_range(struct i915_address_space *vm,
 		gen8_set_pte(&gtt_base[i], scratch_pte);
 }
 
+static void gen8_ggtt_scratch_range_bind(struct i915_address_space *vm,
+					 u64 start, u64 length)
+{
+	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+	unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
+	unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
+	const gen8_pte_t scratch_pte = vm->scratch[0]->encode;
+	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
+
+	if (WARN(num_entries > max_entries,
+		 "First entry = %d; Num entries = %d (max=%d)\n",
+		 first_entry, num_entries, max_entries))
+		num_entries = max_entries;
+
+	if (should_update_ggtt_with_bind(ggtt) && gen8_ggtt_bind_ptes(ggtt, first_entry,
+	     NULL, num_entries, scratch_pte))
+		return ggtt->invalidate(ggtt);
+
+	gen8_ggtt_clear_range(vm, start, length);
+}
+
 static void gen6_ggtt_insert_page(struct i915_address_space *vm,
 				  dma_addr_t addr,
 				  u64 offset,
@@ -997,6 +1221,17 @@  static int gen8_gmch_probe(struct i915_ggtt *ggtt)
 			I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
 	}
 
+	if (i915_ggtt_require_binder(i915)) {
+		ggtt->vm.scratch_range = gen8_ggtt_scratch_range_bind;
+		ggtt->vm.insert_page = gen8_ggtt_insert_page_bind;
+		ggtt->vm.insert_entries = gen8_ggtt_insert_entries_bind;
+		/*
+		 * On GPU is hung, we might bind VMAs for error capture.
+		 * Fallback to CPU GGTT updates in that case.
+		 */
+		ggtt->vm.raw_insert_page = gen8_ggtt_insert_page;
+	}
+
 	if (intel_uc_wants_guc(&ggtt->vm.gt->uc))
 		ggtt->invalidate = guc_ggtt_invalidate;
 	else