Message ID | 1457523024-12706-1-git-send-email-akash.goel@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Wed, Mar 09, 2016 at 05:00:24PM +0530, akash.goel@intel.com wrote: > +static int > +intel_context_get_trtt(struct intel_context *ctx, > + struct drm_i915_gem_context_param *args) > +{ > + struct drm_i915_gem_context_trtt_param trtt_params; > + struct drm_device *dev = ctx->i915->dev; > + > + if (!HAS_TRTT(dev) || !USES_FULL_48BIT_PPGTT(dev)) { Both of these actually inspect dev_priv (and magically convert dev into dev_priv). > + return -ENODEV; > + } else if (args->size < sizeof(trtt_params)) { > + args->size = sizeof(trtt_params); > + } else { > + trtt_params.segment_base_addr = > + ctx->trtt_info.segment_base_addr; > + trtt_params.l3_table_address = > + ctx->trtt_info.l3_table_address; > + trtt_params.null_tile_val = > + ctx->trtt_info.null_tile_val; > + trtt_params.invd_tile_val = > + ctx->trtt_info.invd_tile_val; > + > + if (__copy_to_user(to_user_ptr(args->value), > + &trtt_params, > + sizeof(trtt_params))) > + return -EFAULT; args->size = sizeof(trtt_params); in case the use passed in size > sizeof(trtt_params) we want to report how many bytes we wrote. > + } > + > + return 0; > +} > + > +static int > +intel_context_set_trtt(struct intel_context *ctx, > + struct drm_i915_gem_context_param *args) > +{ > + struct drm_i915_gem_context_trtt_param trtt_params; > + struct drm_device *dev = ctx->i915->dev; > + > + if (!HAS_TRTT(dev) || !USES_FULL_48BIT_PPGTT(dev)) Ditto (dev_priv) > + return -ENODEV; > + else if (ctx->flags & CONTEXT_USE_TRTT) > + return -EEXIST; What locks are we holding here? > + else if (args->size < sizeof(trtt_params)) > + return -EINVAL; > + else if (copy_from_user(&trtt_params, > + to_user_ptr(args->value), > + sizeof(trtt_params))) Because whatever they are, we can't hold them here! (Imagine/write a test that passes in the trtt_params inside a GTT mmaping.) > @@ -923,7 +1015,6 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, > return PTR_ERR(ctx); > } > > - args->size = 0; Awooga. Does every path then set it? > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index 7b8de85..8de0319 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -2169,6 +2169,17 @@ int i915_ppgtt_init_hw(struct drm_device *dev) > { > gtt_write_workarounds(dev); > > + if (HAS_TRTT(dev) && USES_FULL_48BIT_PPGTT(dev)) { > + struct drm_i915_private *dev_priv = dev->dev_private; > + /* > + * Globally enable TR-TT support in Hw. > + * Still TR-TT enabling on per context basis is required. > + * Non-trtt contexts are not affected by this setting. > + */ > + I915_WRITE(GEN9_TR_CHICKEN_BIT_VECTOR, > + GEN9_TRTT_BYPASS_DISABLE); > + } > + > /* In the case of execlists, PPGTT is enabled by the context descriptor > * and the PDPs are contained within the context itself. We don't > * need to do anything here. */ > @@ -3368,6 +3379,57 @@ i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj, > > } > > +void intel_trtt_context_destroy_vma(struct i915_vma *vma) > +{ > + struct i915_address_space *vm = vma->vm; > + > + WARN_ON(!list_empty(&vma->obj_link)); > + WARN_ON(!list_empty(&vma->vm_link)); > + WARN_ON(!list_empty(&vma->exec_list)); WARN_ON(!vma->pin_count); > + > + drm_mm_remove_node(&vma->node); > + i915_ppgtt_put(i915_vm_to_ppgtt(vm)); > + kmem_cache_free(to_i915(vm->dev)->vmas, vma); > +} > + > +struct i915_vma * > +intel_trtt_context_allocate_vma(struct i915_address_space *vm, > + uint64_t segment_base_addr) > +{ > + struct i915_vma *vma; > + int ret; > + > + vma = kmem_cache_zalloc(to_i915(vm->dev)->vmas, GFP_KERNEL); > + if (!vma) > + return ERR_PTR(-ENOMEM); > + > + INIT_LIST_HEAD(&vma->obj_link); > + INIT_LIST_HEAD(&vma->vm_link); > + INIT_LIST_HEAD(&vma->exec_list); > + vma->vm = vm; > + i915_ppgtt_get(i915_vm_to_ppgtt(vm)); > + > + /* Mark the vma as permanently pinned */ > + vma->pin_count = 1; > + > + /* Reserve from the 48 bit PPGTT space */ > + vma->node.start = segment_base_addr; > + vma->node.size = GEN9_TRTT_SEGMENT_SIZE; > + ret = drm_mm_reserve_node(&vm->mm, &vma->node); > + if (ret) { > + ret = i915_gem_evict_for_vma(vma); Given that this has a known GPF, you need a test case that tries to evict an active/hanging object in order to make room for the trtt. > +static int gen9_init_context_trtt(struct drm_i915_gem_request *req) Since TRTT is render only, call this gen9_init_rcs_context_trtt() > static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) > { > struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt; > @@ -1693,6 +1757,20 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, > req->ctx->ppgtt->pd_dirty_rings &= ~intel_ring_flag(req->ring); > } > > + /* > + * Emitting LRIs to update the TRTT registers is most reliable, instead > + * of directly updating the context image, as this will ensure that > + * update happens in a serialized manner for the context and also > + * lite-restore scenario will get handled. > + */ > + if ((req->ring->id == RCS) && req->ctx->trtt_info.update_trtt_params) { > + ret = gen9_emit_trtt_regs(req); > + if (ret) > + return ret; > + > + req->ctx->trtt_info.update_trtt_params = false; Bah. Since we can only update the params once (EEXIST otherwise), we emit the change when the user sets the new params. -Chris
Hi Akash, [auto build test ERROR on drm-intel/for-linux-next] [also build test ERROR on next-20160309] [cannot apply to v4.5-rc7] [if your patch is applied to the wrong git tree, please drop us a note to help improving the system] url: https://github.com/0day-ci/linux/commits/akash-goel-intel-com/drm-i915-Support-to-enable-TRTT-on-GEN9/20160309-192019 base: git://anongit.freedesktop.org/drm-intel for-linux-next config: x86_64-rhel (attached as .config) reproduce: # save the attached .config to linux build tree make ARCH=x86_64 All errors (new ones prefixed by >>): drivers/gpu/drm/i915/i915_gem_context.c: In function 'intel_context_set_trtt': >> drivers/gpu/drm/i915/i915_gem_context.c:570:3: error: implicit declaration of function 'i915_dbg' [-Werror=implicit-function-declaration] i915_dbg(dev, "segment base address not correctly aligned\n"); ^ cc1: some warnings being treated as errors vim +/i915_dbg +570 drivers/gpu/drm/i915/i915_gem_context.c 564 to_user_ptr(args->value), 565 sizeof(trtt_params))) 566 return -EFAULT; 567 568 /* basic sanity checks for the segment location & l3 table pointer */ 569 if (trtt_params.segment_base_addr & (GEN9_TRTT_SEGMENT_SIZE - 1)) { > 570 i915_dbg(dev, "segment base address not correctly aligned\n"); 571 return -EINVAL; 572 } 573 --- 0-DAY kernel test infrastructure Open Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation
On 3/9/2016 5:34 PM, Chris Wilson wrote: > On Wed, Mar 09, 2016 at 05:00:24PM +0530, akash.goel@intel.com wrote: >> +static int >> +intel_context_get_trtt(struct intel_context *ctx, >> + struct drm_i915_gem_context_param *args) >> +{ >> + struct drm_i915_gem_context_trtt_param trtt_params; >> + struct drm_device *dev = ctx->i915->dev; >> + >> + if (!HAS_TRTT(dev) || !USES_FULL_48BIT_PPGTT(dev)) { > > Both of these actually inspect dev_priv (and magically convert dev into > dev_priv). Sorry, my bad. Missed the __I915__ macro. > >> + return -ENODEV; >> + } else if (args->size < sizeof(trtt_params)) { >> + args->size = sizeof(trtt_params); >> + } else { >> + trtt_params.segment_base_addr = >> + ctx->trtt_info.segment_base_addr; >> + trtt_params.l3_table_address = >> + ctx->trtt_info.l3_table_address; >> + trtt_params.null_tile_val = >> + ctx->trtt_info.null_tile_val; >> + trtt_params.invd_tile_val = >> + ctx->trtt_info.invd_tile_val; >> + >> + if (__copy_to_user(to_user_ptr(args->value), >> + &trtt_params, >> + sizeof(trtt_params))) >> + return -EFAULT; > > args->size = sizeof(trtt_params); > > in case the use passed in size > sizeof(trtt_params) we want to report > how many bytes we wrote. fine will add this. > >> + } >> + >> + return 0; >> +} >> + >> +static int >> +intel_context_set_trtt(struct intel_context *ctx, >> + struct drm_i915_gem_context_param *args) >> +{ >> + struct drm_i915_gem_context_trtt_param trtt_params; >> + struct drm_device *dev = ctx->i915->dev; >> + >> + if (!HAS_TRTT(dev) || !USES_FULL_48BIT_PPGTT(dev)) > > Ditto (dev_priv) > >> + return -ENODEV; >> + else if (ctx->flags & CONTEXT_USE_TRTT) >> + return -EEXIST; > > What locks are we holding here? > >> + else if (args->size < sizeof(trtt_params)) >> + return -EINVAL; >> + else if (copy_from_user(&trtt_params, >> + to_user_ptr(args->value), >> + sizeof(trtt_params))) > > Because whatever they are, we can't hold them here! > The struct_mutex lock was taken in the caller, ioctl function. Ok, so need to release that before invoking copy_from_user. > (Imagine/write a test that passes in the trtt_params inside a GTT mmaping.) This could cause a recursive locking of struct_mutex from the gem_fault() ? > >> @@ -923,7 +1015,6 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, >> return PTR_ERR(ctx); >> } >> >> - args->size = 0; > > Awooga. Does every path then set it? > It is being set only for the TRTT case. For the other existing cases, should it be explicitly set to 0, is that really needed ? >> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c >> index 7b8de85..8de0319 100644 >> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c >> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c >> @@ -2169,6 +2169,17 @@ int i915_ppgtt_init_hw(struct drm_device *dev) >> { >> gtt_write_workarounds(dev); >> >> + if (HAS_TRTT(dev) && USES_FULL_48BIT_PPGTT(dev)) { >> + struct drm_i915_private *dev_priv = dev->dev_private; >> + /* >> + * Globally enable TR-TT support in Hw. >> + * Still TR-TT enabling on per context basis is required. >> + * Non-trtt contexts are not affected by this setting. >> + */ >> + I915_WRITE(GEN9_TR_CHICKEN_BIT_VECTOR, >> + GEN9_TRTT_BYPASS_DISABLE); >> + } >> + >> /* In the case of execlists, PPGTT is enabled by the context descriptor >> * and the PDPs are contained within the context itself. We don't >> * need to do anything here. */ >> @@ -3368,6 +3379,57 @@ i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj, >> >> } >> >> +void intel_trtt_context_destroy_vma(struct i915_vma *vma) >> +{ >> + struct i915_address_space *vm = vma->vm; >> + >> + WARN_ON(!list_empty(&vma->obj_link)); >> + WARN_ON(!list_empty(&vma->vm_link)); >> + WARN_ON(!list_empty(&vma->exec_list)); > > WARN_ON(!vma->pin_count); Thanks, will add. > >> + >> + drm_mm_remove_node(&vma->node); >> + i915_ppgtt_put(i915_vm_to_ppgtt(vm)); >> + kmem_cache_free(to_i915(vm->dev)->vmas, vma); >> +} >> + >> +struct i915_vma * >> +intel_trtt_context_allocate_vma(struct i915_address_space *vm, >> + uint64_t segment_base_addr) >> +{ >> + struct i915_vma *vma; >> + int ret; >> + >> + vma = kmem_cache_zalloc(to_i915(vm->dev)->vmas, GFP_KERNEL); >> + if (!vma) >> + return ERR_PTR(-ENOMEM); >> + >> + INIT_LIST_HEAD(&vma->obj_link); >> + INIT_LIST_HEAD(&vma->vm_link); >> + INIT_LIST_HEAD(&vma->exec_list); >> + vma->vm = vm; >> + i915_ppgtt_get(i915_vm_to_ppgtt(vm)); >> + >> + /* Mark the vma as permanently pinned */ >> + vma->pin_count = 1; >> + >> + /* Reserve from the 48 bit PPGTT space */ >> + vma->node.start = segment_base_addr; >> + vma->node.size = GEN9_TRTT_SEGMENT_SIZE; >> + ret = drm_mm_reserve_node(&vm->mm, &vma->node); >> + if (ret) { >> + ret = i915_gem_evict_for_vma(vma); > > Given that this has a known GPF, you need a test case that tries to > evict an active/hanging object in order to make room for the trtt. > In the new test case, will soft pin objects in TR-TT segment first. Then later on enabling TR-TT, those objects should get evicted. >> +static int gen9_init_context_trtt(struct drm_i915_gem_request *req) > > Since TRTT is render only, call this gen9_init_rcs_context_trtt() > Thanks, will change. >> static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) >> { >> struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt; >> @@ -1693,6 +1757,20 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, >> req->ctx->ppgtt->pd_dirty_rings &= ~intel_ring_flag(req->ring); >> } >> >> + /* >> + * Emitting LRIs to update the TRTT registers is most reliable, instead >> + * of directly updating the context image, as this will ensure that >> + * update happens in a serialized manner for the context and also >> + * lite-restore scenario will get handled. >> + */ >> + if ((req->ring->id == RCS) && req->ctx->trtt_info.update_trtt_params) { >> + ret = gen9_emit_trtt_regs(req); >> + if (ret) >> + return ret; >> + >> + req->ctx->trtt_info.update_trtt_params = false; > > Bah. Since we can only update the params once (EEXIST otherwise), > we emit the change when the user sets the new params. Sorry couldn't get this point. We can't emit the params right away when User sets them (only once). We need to emit/apply the params (onetime) in a deferred manner on the next submission. Best regards Akash > -Chris >
On Wed, Mar 09, 2016 at 08:20:07PM +0530, Goel, Akash wrote: > >What locks are we holding here? > > > >>+ else if (args->size < sizeof(trtt_params)) > >>+ return -EINVAL; > >>+ else if (copy_from_user(&trtt_params, > >>+ to_user_ptr(args->value), > >>+ sizeof(trtt_params))) > > > >Because whatever they are, we can't hold them here! > > > The struct_mutex lock was taken in the caller, ioctl function. > Ok, so need to release that before invoking copy_from_user. > > >(Imagine/write a test that passes in the trtt_params inside a GTT mmaping.) > > This could cause a recursive locking of struct_mutex from the gem_fault() ? Exactly. At the least lockdep should warn if we hit a fault along this path (due to the illegal nesting of mmap_sem inside struct_mtuex). > > > > >>@@ -923,7 +1015,6 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, > >> return PTR_ERR(ctx); > >> } > >> > >>- args->size = 0; > > > >Awooga. Does every path then set it? > > > > It is being set only for the TRTT case. For the other existing > cases, should it be explicitly set to 0, is that really needed ? Yes. All other paths need to report .size = 0 (as they don't write through a pointer). > >>+struct i915_vma * > >>+intel_trtt_context_allocate_vma(struct i915_address_space *vm, > >>+ uint64_t segment_base_addr) > >>+{ > >>+ struct i915_vma *vma; > >>+ int ret; > >>+ > >>+ vma = kmem_cache_zalloc(to_i915(vm->dev)->vmas, GFP_KERNEL); > >>+ if (!vma) > >>+ return ERR_PTR(-ENOMEM); > >>+ > >>+ INIT_LIST_HEAD(&vma->obj_link); > >>+ INIT_LIST_HEAD(&vma->vm_link); > >>+ INIT_LIST_HEAD(&vma->exec_list); > >>+ vma->vm = vm; > >>+ i915_ppgtt_get(i915_vm_to_ppgtt(vm)); > >>+ > >>+ /* Mark the vma as permanently pinned */ > >>+ vma->pin_count = 1; > >>+ > >>+ /* Reserve from the 48 bit PPGTT space */ > >>+ vma->node.start = segment_base_addr; > >>+ vma->node.size = GEN9_TRTT_SEGMENT_SIZE; > >>+ ret = drm_mm_reserve_node(&vm->mm, &vma->node); > >>+ if (ret) { > >>+ ret = i915_gem_evict_for_vma(vma); > > > >Given that this has a known GPF, you need a test case that tries to > >evict an active/hanging object in order to make room for the trtt. > > > In the new test case, will soft pin objects in TR-TT segment first. > Then later on enabling TR-TT, those objects should get evicted. Yes. But make sure you have combinations of inactive, active, and hanging objects inside the to-be-evicted segment. Those cover the most frequent errors we have to handle (and easiest to reproduce). > >>+static int gen9_init_context_trtt(struct drm_i915_gem_request *req) > > > >Since TRTT is render only, call this gen9_init_rcs_context_trtt() > > > Thanks, will change. > > >> static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) > >> { > >> struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt; > >>@@ -1693,6 +1757,20 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, > >> req->ctx->ppgtt->pd_dirty_rings &= ~intel_ring_flag(req->ring); > >> } > >> > >>+ /* > >>+ * Emitting LRIs to update the TRTT registers is most reliable, instead > >>+ * of directly updating the context image, as this will ensure that > >>+ * update happens in a serialized manner for the context and also > >>+ * lite-restore scenario will get handled. > >>+ */ > >>+ if ((req->ring->id == RCS) && req->ctx->trtt_info.update_trtt_params) { > >>+ ret = gen9_emit_trtt_regs(req); > >>+ if (ret) > >>+ return ret; > >>+ > >>+ req->ctx->trtt_info.update_trtt_params = false; > > > >Bah. Since we can only update the params once (EEXIST otherwise), > >we emit the change when the user sets the new params. > > Sorry couldn't get this point. We can't emit the params right away > when User sets them (only once). We need to emit/apply the params > (onetime) in a deferred manner on the next submission. Why can't we? We can construct and submit a request setting the registers inside the right context image at that point, and they never change after that point. -Chris
On 3/9/2016 8:32 PM, Chris Wilson wrote: > On Wed, Mar 09, 2016 at 08:20:07PM +0530, Goel, Akash wrote: >>> What locks are we holding here? >>> >>>> + else if (args->size < sizeof(trtt_params)) >>>> + return -EINVAL; >>>> + else if (copy_from_user(&trtt_params, >>>> + to_user_ptr(args->value), >>>> + sizeof(trtt_params))) >>> >>> Because whatever they are, we can't hold them here! >>> >> The struct_mutex lock was taken in the caller, ioctl function. >> Ok, so need to release that before invoking copy_from_user. >> >>> (Imagine/write a test that passes in the trtt_params inside a GTT mmaping.) >> >> This could cause a recursive locking of struct_mutex from the gem_fault() ? > > Exactly. At the least lockdep should warn if we hit a fault along this > path (due to the illegal nesting of mmap_sem inside struct_mtuex). > I hope it won't look ungainly to unlock the struct_mutex before copy_from_user and lock it back right after that. >> >>> >>>> @@ -923,7 +1015,6 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, >>>> return PTR_ERR(ctx); >>>> } >>>> >>>> - args->size = 0; >>> >>> Awooga. Does every path then set it? >>> >> >> It is being set only for the TRTT case. For the other existing >> cases, should it be explicitly set to 0, is that really needed ? > > Yes. All other paths need to report .size = 0 (as they don't write > through a pointer). > Fine will add the args->size = 0 for all the other cases. >>>> + /* Mark the vma as permanently pinned */ >>>> + vma->pin_count = 1; >>>> + >>>> + /* Reserve from the 48 bit PPGTT space */ >>>> + vma->node.start = segment_base_addr; >>>> + vma->node.size = GEN9_TRTT_SEGMENT_SIZE; >>>> + ret = drm_mm_reserve_node(&vm->mm, &vma->node); >>>> + if (ret) { >>>> + ret = i915_gem_evict_for_vma(vma); >>> >>> Given that this has a known GPF, you need a test case that tries to >>> evict an active/hanging object in order to make room for the trtt. >>> >> In the new test case, will soft pin objects in TR-TT segment first. >> Then later on enabling TR-TT, those objects should get evicted. > > Yes. But make sure you have combinations of inactive, active, and > hanging objects inside the to-be-evicted segment. Those cover the most > frequent errors we have to handle (and easiest to reproduce). > Fine, will refer other tests logic to see how to ensure that previously soft pinned object is still marked as active, when the eviction happens on enabling TR-TT. Sorry what is the hanging object type ? >>>> +static int gen9_init_context_trtt(struct drm_i915_gem_request *req) >>> >>> Since TRTT is render only, call this gen9_init_rcs_context_trtt() >>> >> Thanks, will change. >> >>>> static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) >>>> { >>>> struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt; >>>> @@ -1693,6 +1757,20 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, >>>> req->ctx->ppgtt->pd_dirty_rings &= ~intel_ring_flag(req->ring); >>>> } >>>> >>>> + /* >>>> + * Emitting LRIs to update the TRTT registers is most reliable, instead >>>> + * of directly updating the context image, as this will ensure that >>>> + * update happens in a serialized manner for the context and also >>>> + * lite-restore scenario will get handled. >>>> + */ >>>> + if ((req->ring->id == RCS) && req->ctx->trtt_info.update_trtt_params) { >>>> + ret = gen9_emit_trtt_regs(req); >>>> + if (ret) >>>> + return ret; >>>> + >>>> + req->ctx->trtt_info.update_trtt_params = false; >>> >>> Bah. Since we can only update the params once (EEXIST otherwise), >>> we emit the change when the user sets the new params. >> >> Sorry couldn't get this point. We can't emit the params right away >> when User sets them (only once). We need to emit/apply the params >> (onetime) in a deferred manner on the next submission. > > Why can't we? We can construct and submit a request setting the > registers inside the right context image at that point, and they never > change after that point. Ok yes a new request can be constructed & submitted for the Context, emitting the LRIs to update the TRTT params in the Context image. But won't that be relatively cumbersome considering that we are able to easily defer & conflate that with next batch submission, through an extra flag trtt_info.update_trtt_params. Best regards Akash > -Chris >
On Wed, Mar 09, 2016 at 09:26:08PM +0530, Goel, Akash wrote: > > > On 3/9/2016 8:32 PM, Chris Wilson wrote: > >On Wed, Mar 09, 2016 at 08:20:07PM +0530, Goel, Akash wrote: > >>>What locks are we holding here? > >>> > >>>>+ else if (args->size < sizeof(trtt_params)) > >>>>+ return -EINVAL; > >>>>+ else if (copy_from_user(&trtt_params, > >>>>+ to_user_ptr(args->value), > >>>>+ sizeof(trtt_params))) > >>> > >>>Because whatever they are, we can't hold them here! > >>> > >>The struct_mutex lock was taken in the caller, ioctl function. > >>Ok, so need to release that before invoking copy_from_user. > >> > >>>(Imagine/write a test that passes in the trtt_params inside a GTT mmaping.) > >> > >>This could cause a recursive locking of struct_mutex from the gem_fault() ? > > > >Exactly. At the least lockdep should warn if we hit a fault along this > >path (due to the illegal nesting of mmap_sem inside struct_mtuex). > > > > I hope it won't look ungainly to unlock the struct_mutex before > copy_from_user and lock it back right after that. It what's we have to do. However, we have to make sure that we do not lose state, or the user doesn't interfere, across the unlock. i.e. make sure we have a reference on the context, double check that the state is still valid (so do the EEXISTS check after the copy) etc. > >>In the new test case, will soft pin objects in TR-TT segment first. > >>Then later on enabling TR-TT, those objects should get evicted. > > > >Yes. But make sure you have combinations of inactive, active, and > >hanging objects inside the to-be-evicted segment. Those cover the most > >frequent errors we have to handle (and easiest to reproduce). > > > Fine, will refer other tests logic to see how to ensure that > previously soft pinned object is still marked as active, when the > eviction happens on enabling TR-TT. > > Sorry what is the hanging object type ? Submit a recursive batch using the vma inside your trtt region. See igt_hang_ctx() if you are free to select the trtt region using the offset generated by igt_hang_ctx() (and for this test you are), then it is very simple. See gem_softpin, test_evict_hang() and test_evict_active(). > >>>>+static int gen9_init_context_trtt(struct drm_i915_gem_request *req) > >>> > >>>Since TRTT is render only, call this gen9_init_rcs_context_trtt() > >>> > >>Thanks, will change. > >> > >>>> static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) > >>>> { > >>>> struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt; > >>>>@@ -1693,6 +1757,20 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, > >>>> req->ctx->ppgtt->pd_dirty_rings &= ~intel_ring_flag(req->ring); > >>>> } > >>>> > >>>>+ /* > >>>>+ * Emitting LRIs to update the TRTT registers is most reliable, instead > >>>>+ * of directly updating the context image, as this will ensure that > >>>>+ * update happens in a serialized manner for the context and also > >>>>+ * lite-restore scenario will get handled. > >>>>+ */ > >>>>+ if ((req->ring->id == RCS) && req->ctx->trtt_info.update_trtt_params) { > >>>>+ ret = gen9_emit_trtt_regs(req); > >>>>+ if (ret) > >>>>+ return ret; > >>>>+ > >>>>+ req->ctx->trtt_info.update_trtt_params = false; > >>> > >>>Bah. Since we can only update the params once (EEXIST otherwise), > >>>we emit the change when the user sets the new params. > >> > >>Sorry couldn't get this point. We can't emit the params right away > >>when User sets them (only once). We need to emit/apply the params > >>(onetime) in a deferred manner on the next submission. > > > >Why can't we? We can construct and submit a request setting the > >registers inside the right context image at that point, and they never > >change after that point. > > Ok yes a new request can be constructed & submitted for the Context, > emitting the LRIs to update the TRTT params in the Context image. > But won't that be relatively cumbersome considering that we are able > to easily defer & conflate that with next batch submission, through > an extra flag trtt_info.update_trtt_params. A conditional on every batch vs a one-off ? request = i915_gem_request_alloc(&dev_priv->ring[RCS], ctx); if (IS_ERR(request)) return ERR_PTR(request); ret = gen9_emit_trtt_regs(request); if (ret) { i915_gem_request_cancel(request); return ret; } i915_add_request(request); return 0; Complain to whoever sold you your kernel if it is not that simple. (And that is quite byzantine compared to how it should be!) -Chris
On 3/9/2016 9:51 PM, Chris Wilson wrote: > On Wed, Mar 09, 2016 at 09:26:08PM +0530, Goel, Akash wrote: >> >> >> On 3/9/2016 8:32 PM, Chris Wilson wrote: >>> On Wed, Mar 09, 2016 at 08:20:07PM +0530, Goel, Akash wrote: >>>>> What locks are we holding here? >>>>> >>>>>> + else if (args->size < sizeof(trtt_params)) >>>>>> + return -EINVAL; >>>>>> + else if (copy_from_user(&trtt_params, >>>>>> + to_user_ptr(args->value), >>>>>> + sizeof(trtt_params))) >>>>> >>>>> Because whatever they are, we can't hold them here! >>>>> >>>> The struct_mutex lock was taken in the caller, ioctl function. >>>> Ok, so need to release that before invoking copy_from_user. >>>> >>>>> (Imagine/write a test that passes in the trtt_params inside a GTT mmaping.) >>>> >>>> This could cause a recursive locking of struct_mutex from the gem_fault() ? >>> >>> Exactly. At the least lockdep should warn if we hit a fault along this >>> path (due to the illegal nesting of mmap_sem inside struct_mtuex). >>> >> >> I hope it won't look ungainly to unlock the struct_mutex before >> copy_from_user and lock it back right after that. > > It what's we have to do. However, we have to make sure that we do not > lose state, or the user doesn't interfere, across the unlock. i.e. make > sure we have a reference on the context, double check that the state is > still valid (so do the EEXISTS check after the copy) etc. > Thanks for the inputs, will keep them in mind. >>>> In the new test case, will soft pin objects in TR-TT segment first. >>>> Then later on enabling TR-TT, those objects should get evicted. >>> >>> Yes. But make sure you have combinations of inactive, active, and >>> hanging objects inside the to-be-evicted segment. Those cover the most >>> frequent errors we have to handle (and easiest to reproduce). >>> >> Fine, will refer other tests logic to see how to ensure that >> previously soft pinned object is still marked as active, when the >> eviction happens on enabling TR-TT. >> >> Sorry what is the hanging object type ? > > Submit a recursive batch using the vma inside your trtt region. > See igt_hang_ctx() if you are free to select the trtt region using the > offset generated by igt_hang_ctx() (and for this test you are), then it > is very simple. See gem_softpin, test_evict_hang() and > test_evict_active(). > Thanks for suggesting these tests, will refer them. >>>>>> +static int gen9_init_context_trtt(struct drm_i915_gem_request *req) >>>>> >>>>> Since TRTT is render only, call this gen9_init_rcs_context_trtt() >>>>> >>>> Thanks, will change. >>>> >>>>>> static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) >>>>>> { >>>>>> struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt; >>>>>> @@ -1693,6 +1757,20 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, >>>>>> req->ctx->ppgtt->pd_dirty_rings &= ~intel_ring_flag(req->ring); >>>>>> } >>>>>> >>>>>> + /* >>>>>> + * Emitting LRIs to update the TRTT registers is most reliable, instead >>>>>> + * of directly updating the context image, as this will ensure that >>>>>> + * update happens in a serialized manner for the context and also >>>>>> + * lite-restore scenario will get handled. >>>>>> + */ >>>>>> + if ((req->ring->id == RCS) && req->ctx->trtt_info.update_trtt_params) { >>>>>> + ret = gen9_emit_trtt_regs(req); >>>>>> + if (ret) >>>>>> + return ret; >>>>>> + >>>>>> + req->ctx->trtt_info.update_trtt_params = false; >>>>> >>>>> Bah. Since we can only update the params once (EEXIST otherwise), >>>>> we emit the change when the user sets the new params. >>>> >>>> Sorry couldn't get this point. We can't emit the params right away >>>> when User sets them (only once). We need to emit/apply the params >>>> (onetime) in a deferred manner on the next submission. >>> >>> Why can't we? We can construct and submit a request setting the >>> registers inside the right context image at that point, and they never >>> change after that point. >> >> Ok yes a new request can be constructed & submitted for the Context, >> emitting the LRIs to update the TRTT params in the Context image. >> But won't that be relatively cumbersome considering that we are able >> to easily defer & conflate that with next batch submission, through >> an extra flag trtt_info.update_trtt_params. > > A conditional on every batch vs a one-off ? > > request = i915_gem_request_alloc(&dev_priv->ring[RCS], ctx); > if (IS_ERR(request)) > return ERR_PTR(request); > > ret = gen9_emit_trtt_regs(request); > if (ret) { > i915_gem_request_cancel(request); > return ret; > } > > i915_add_request(request); > return 0; > > Complain to whoever sold you your kernel if it is not that simple. (And > that is quite byzantine compared to how it should be!) Fine, thanks much for the required code snippet, will update the patch. Sorry actually was bit skeptical about introducing a new non-execbuffer path, from the where the request allocation & submission happens. Best regards Akash > -Chris >
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f7b6caf..d648fdc 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -856,6 +856,7 @@ struct i915_ctx_hang_stats { #define DEFAULT_CONTEXT_HANDLE 0 #define CONTEXT_NO_ZEROMAP (1<<0) +#define CONTEXT_USE_TRTT (1 << 1) /** * struct intel_context - as the name implies, represents a context. * @ref: reference count. @@ -870,6 +871,8 @@ struct i915_ctx_hang_stats { * @ppgtt: virtual memory space used by this context. * @legacy_hw_ctx: render context backing object and whether it is correctly * initialized (legacy ring submission mechanism only). + * @trtt_info: Programming parameters for tr-tt (redirection tables for + * userspace, for sparse resource management) * @link: link in the global list of contexts. * * Contexts are memory images used by the hardware to store copies of their @@ -880,7 +883,7 @@ struct intel_context { int user_handle; uint8_t remap_slice; struct drm_i915_private *i915; - int flags; + unsigned int flags; struct drm_i915_file_private *file_priv; struct i915_ctx_hang_stats hang_stats; struct i915_hw_ppgtt *ppgtt; @@ -901,6 +904,16 @@ struct intel_context { uint32_t *lrc_reg_state; } engine[I915_NUM_RINGS]; + /* TRTT info */ + struct intel_context_trtt { + u32 invd_tile_val; + u32 null_tile_val; + u64 l3_table_address; + u64 segment_base_addr; + struct i915_vma *vma; + bool update_trtt_params; + } trtt_info; + struct list_head link; }; @@ -2703,6 +2716,8 @@ struct drm_i915_cmd_table { !IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev) && \ !IS_BROXTON(dev)) +#define HAS_TRTT(dev) (IS_GEN9(dev)) + #define INTEL_PCH_DEVICE_ID_MASK 0xff00 #define INTEL_PCH_IBX_DEVICE_ID_TYPE 0x3b00 #define INTEL_PCH_CPT_DEVICE_ID_TYPE 0x1c00 diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 5dd84e1..0e4c6c2 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -133,6 +133,14 @@ static int get_context_size(struct drm_device *dev) return ret; } +static void intel_context_free_trtt(struct intel_context *ctx) +{ + if (!ctx->trtt_info.vma) + return; + + intel_trtt_context_destroy_vma(ctx->trtt_info.vma); +} + static void i915_gem_context_clean(struct intel_context *ctx) { struct i915_hw_ppgtt *ppgtt = ctx->ppgtt; @@ -164,6 +172,8 @@ void i915_gem_context_free(struct kref *ctx_ref) */ i915_gem_context_clean(ctx); + intel_context_free_trtt(ctx); + i915_ppgtt_put(ctx->ppgtt); if (ctx->legacy_hw_ctx.rcs_state) @@ -507,6 +517,88 @@ i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id) return ctx; } +static int +intel_context_get_trtt(struct intel_context *ctx, + struct drm_i915_gem_context_param *args) +{ + struct drm_i915_gem_context_trtt_param trtt_params; + struct drm_device *dev = ctx->i915->dev; + + if (!HAS_TRTT(dev) || !USES_FULL_48BIT_PPGTT(dev)) { + return -ENODEV; + } else if (args->size < sizeof(trtt_params)) { + args->size = sizeof(trtt_params); + } else { + trtt_params.segment_base_addr = + ctx->trtt_info.segment_base_addr; + trtt_params.l3_table_address = + ctx->trtt_info.l3_table_address; + trtt_params.null_tile_val = + ctx->trtt_info.null_tile_val; + trtt_params.invd_tile_val = + ctx->trtt_info.invd_tile_val; + + if (__copy_to_user(to_user_ptr(args->value), + &trtt_params, + sizeof(trtt_params))) + return -EFAULT; + } + + return 0; +} + +static int +intel_context_set_trtt(struct intel_context *ctx, + struct drm_i915_gem_context_param *args) +{ + struct drm_i915_gem_context_trtt_param trtt_params; + struct drm_device *dev = ctx->i915->dev; + + if (!HAS_TRTT(dev) || !USES_FULL_48BIT_PPGTT(dev)) + return -ENODEV; + else if (ctx->flags & CONTEXT_USE_TRTT) + return -EEXIST; + else if (args->size < sizeof(trtt_params)) + return -EINVAL; + else if (copy_from_user(&trtt_params, + to_user_ptr(args->value), + sizeof(trtt_params))) + return -EFAULT; + + /* basic sanity checks for the segment location & l3 table pointer */ + if (trtt_params.segment_base_addr & (GEN9_TRTT_SEGMENT_SIZE - 1)) { + i915_dbg(dev, "segment base address not correctly aligned\n"); + return -EINVAL; + } + + if (((trtt_params.l3_table_address + PAGE_SIZE) >= + trtt_params.segment_base_addr) && + (trtt_params.l3_table_address < + (trtt_params.segment_base_addr + GEN9_TRTT_SEGMENT_SIZE))) { + i915_dbg(dev, "l3 table address conflicts with trtt segment\n"); + return -EINVAL; + } + + if (trtt_params.l3_table_address & ~GEN9_TRTT_L3_GFXADDR_MASK) { + i915_dbg(dev, "invalid l3 table address\n"); + return -EINVAL; + } + + ctx->trtt_info.vma = intel_trtt_context_allocate_vma(&ctx->ppgtt->base, + trtt_params.segment_base_addr); + if (IS_ERR(ctx->trtt_info.vma)) + return PTR_ERR(ctx->trtt_info.vma); + + ctx->trtt_info.null_tile_val = trtt_params.null_tile_val; + ctx->trtt_info.invd_tile_val = trtt_params.invd_tile_val; + ctx->trtt_info.l3_table_address = trtt_params.l3_table_address; + ctx->trtt_info.segment_base_addr = trtt_params.segment_base_addr; + ctx->trtt_info.update_trtt_params = 1; + + ctx->flags |= CONTEXT_USE_TRTT; + return 0; +} + static inline int mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) { @@ -923,7 +1015,6 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, return PTR_ERR(ctx); } - args->size = 0; switch (args->param) { case I915_CONTEXT_PARAM_BAN_PERIOD: args->value = ctx->hang_stats.ban_period_seconds; @@ -939,6 +1030,9 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, else args->value = to_i915(dev)->gtt.base.total; break; + case I915_CONTEXT_PARAM_TRTT: + ret = intel_context_get_trtt(ctx, args); + break; default: ret = -EINVAL; break; @@ -984,6 +1078,9 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, ctx->flags |= args->value ? CONTEXT_NO_ZEROMAP : 0; } break; + case I915_CONTEXT_PARAM_TRTT: + ret = intel_context_set_trtt(ctx, args); + break; default: ret = -EINVAL; break; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 7b8de85..8de0319 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2169,6 +2169,17 @@ int i915_ppgtt_init_hw(struct drm_device *dev) { gtt_write_workarounds(dev); + if (HAS_TRTT(dev) && USES_FULL_48BIT_PPGTT(dev)) { + struct drm_i915_private *dev_priv = dev->dev_private; + /* + * Globally enable TR-TT support in Hw. + * Still TR-TT enabling on per context basis is required. + * Non-trtt contexts are not affected by this setting. + */ + I915_WRITE(GEN9_TR_CHICKEN_BIT_VECTOR, + GEN9_TRTT_BYPASS_DISABLE); + } + /* In the case of execlists, PPGTT is enabled by the context descriptor * and the PDPs are contained within the context itself. We don't * need to do anything here. */ @@ -3368,6 +3379,57 @@ i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj, } +void intel_trtt_context_destroy_vma(struct i915_vma *vma) +{ + struct i915_address_space *vm = vma->vm; + + WARN_ON(!list_empty(&vma->obj_link)); + WARN_ON(!list_empty(&vma->vm_link)); + WARN_ON(!list_empty(&vma->exec_list)); + + drm_mm_remove_node(&vma->node); + i915_ppgtt_put(i915_vm_to_ppgtt(vm)); + kmem_cache_free(to_i915(vm->dev)->vmas, vma); +} + +struct i915_vma * +intel_trtt_context_allocate_vma(struct i915_address_space *vm, + uint64_t segment_base_addr) +{ + struct i915_vma *vma; + int ret; + + vma = kmem_cache_zalloc(to_i915(vm->dev)->vmas, GFP_KERNEL); + if (!vma) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&vma->obj_link); + INIT_LIST_HEAD(&vma->vm_link); + INIT_LIST_HEAD(&vma->exec_list); + vma->vm = vm; + i915_ppgtt_get(i915_vm_to_ppgtt(vm)); + + /* Mark the vma as permanently pinned */ + vma->pin_count = 1; + + /* Reserve from the 48 bit PPGTT space */ + vma->node.start = segment_base_addr; + vma->node.size = GEN9_TRTT_SEGMENT_SIZE; + ret = drm_mm_reserve_node(&vm->mm, &vma->node); + if (ret) { + ret = i915_gem_evict_for_vma(vma); + if (ret == 0) + ret = drm_mm_reserve_node(&vm->mm, &vma->node); + } + if (ret) { + DRM_ERROR("Reservation for TRTT segment failed: %i\n", ret); + intel_trtt_context_destroy_vma(vma); + return ERR_PTR(ret); + } + + return vma; +} + static struct scatterlist * rotate_pages(const dma_addr_t *in, unsigned int offset, unsigned int width, unsigned int height, diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index dc208c0..2374cb1 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -128,6 +128,10 @@ typedef uint64_t gen8_ppgtt_pml4e_t; #define GEN8_PPAT_ELLC_OVERRIDE (0<<2) #define GEN8_PPAT(i, x) ((uint64_t) (x) << ((i) * 8)) +/* Fixed size segment */ +#define GEN9_TRTT_SEG_SIZE_SHIFT 44 +#define GEN9_TRTT_SEGMENT_SIZE (1ULL << GEN9_TRTT_SEG_SIZE_SHIFT) + enum i915_ggtt_view_type { I915_GGTT_VIEW_NORMAL = 0, I915_GGTT_VIEW_ROTATED, @@ -562,4 +566,8 @@ size_t i915_ggtt_view_size(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view); +struct i915_vma * +intel_trtt_context_allocate_vma(struct i915_address_space *vm, + uint64_t segment_base_addr); +void intel_trtt_context_destroy_vma(struct i915_vma *vma); #endif diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 71abf57..0f32021 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -186,6 +186,25 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN8_RPCS_EU_MIN_SHIFT 0 #define GEN8_RPCS_EU_MIN_MASK (0xf << GEN8_RPCS_EU_MIN_SHIFT) +#define GEN9_TR_CHICKEN_BIT_VECTOR _MMIO(0x4DFC) +#define GEN9_TRTT_BYPASS_DISABLE (1 << 0) + +/* TRTT registers in the H/W Context */ +#define GEN9_TRTT_L3_POINTER_DW0 _MMIO(0x4DE0) +#define GEN9_TRTT_L3_POINTER_DW1 _MMIO(0x4DE4) +#define GEN9_TRTT_L3_GFXADDR_MASK 0xFFFFFFFF0000 + +#define GEN9_TRTT_NULL_TILE_REG _MMIO(0x4DE8) +#define GEN9_TRTT_INVD_TILE_REG _MMIO(0x4DEC) + +#define GEN9_TRTT_VA_MASKDATA _MMIO(0x4DF0) +#define GEN9_TRVA_MASK_VALUE 0xF0 +#define GEN9_TRVA_DATA_MASK 0xF + +#define GEN9_TRTT_TABLE_CONTROL _MMIO(0x4DF4) +#define GEN9_TRTT_IN_GFX_VA_SPACE (1 << 1) +#define GEN9_TRTT_ENABLE (1 << 0) + #define GAM_ECOCHK _MMIO(0x4090) #define BDW_DISABLE_HDC_INVALIDATION (1<<25) #define ECOCHK_SNB_BIT (1<<10) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 27c9ee3..4186e2c 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1640,6 +1640,70 @@ static int gen9_init_render_ring(struct intel_engine_cs *ring) return init_workarounds_ring(ring); } +static int gen9_init_context_trtt(struct drm_i915_gem_request *req) +{ + struct intel_ringbuffer *ringbuf = req->ringbuf; + int ret; + + ret = intel_logical_ring_begin(req, 2 + 2); + if (ret) + return ret; + + intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1)); + + intel_logical_ring_emit_reg(ringbuf, GEN9_TRTT_TABLE_CONTROL); + intel_logical_ring_emit(ringbuf, 0); + + intel_logical_ring_emit(ringbuf, MI_NOOP); + intel_logical_ring_advance(ringbuf); + + return 0; +} + +static int gen9_emit_trtt_regs(struct drm_i915_gem_request *req) +{ + struct intel_context *ctx = req->ctx; + struct intel_ringbuffer *ringbuf = req->ringbuf; + u64 masked_l3_gfx_address = + ctx->trtt_info.l3_table_address & GEN9_TRTT_L3_GFXADDR_MASK; + u32 trva_data_value = + (ctx->trtt_info.segment_base_addr >> GEN9_TRTT_SEG_SIZE_SHIFT) & + GEN9_TRVA_DATA_MASK; + const int num_lri_cmds = 6; + int ret; + + ret = intel_logical_ring_begin(req, num_lri_cmds * 2 + 2); + if (ret) + return ret; + + intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(num_lri_cmds)); + + intel_logical_ring_emit_reg(ringbuf, GEN9_TRTT_L3_POINTER_DW0); + intel_logical_ring_emit(ringbuf, lower_32_bits(masked_l3_gfx_address)); + + intel_logical_ring_emit_reg(ringbuf, GEN9_TRTT_L3_POINTER_DW1); + intel_logical_ring_emit(ringbuf, upper_32_bits(masked_l3_gfx_address)); + + intel_logical_ring_emit_reg(ringbuf, GEN9_TRTT_NULL_TILE_REG); + intel_logical_ring_emit(ringbuf, ctx->trtt_info.null_tile_val); + + intel_logical_ring_emit_reg(ringbuf, GEN9_TRTT_INVD_TILE_REG); + intel_logical_ring_emit(ringbuf, ctx->trtt_info.invd_tile_val); + + intel_logical_ring_emit_reg(ringbuf, GEN9_TRTT_VA_MASKDATA); + intel_logical_ring_emit(ringbuf, + GEN9_TRVA_MASK_VALUE | trva_data_value); + + intel_logical_ring_emit_reg(ringbuf, GEN9_TRTT_TABLE_CONTROL); + intel_logical_ring_emit(ringbuf, + GEN9_TRTT_IN_GFX_VA_SPACE | GEN9_TRTT_ENABLE); + + intel_logical_ring_emit(ringbuf, MI_NOOP); + intel_logical_ring_advance(ringbuf); + + return 0; +} + static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) { struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt; @@ -1693,6 +1757,20 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, req->ctx->ppgtt->pd_dirty_rings &= ~intel_ring_flag(req->ring); } + /* + * Emitting LRIs to update the TRTT registers is most reliable, instead + * of directly updating the context image, as this will ensure that + * update happens in a serialized manner for the context and also + * lite-restore scenario will get handled. + */ + if ((req->ring->id == RCS) && req->ctx->trtt_info.update_trtt_params) { + ret = gen9_emit_trtt_regs(req); + if (ret) + return ret; + + req->ctx->trtt_info.update_trtt_params = false; + } + ret = intel_logical_ring_begin(req, 4); if (ret) return ret; @@ -1994,6 +2072,25 @@ static int gen8_init_rcs_context(struct drm_i915_gem_request *req) return intel_lr_context_render_state_init(req); } +static int gen9_init_rcs_context(struct drm_i915_gem_request *req) +{ + int ret; + + /* + * Explictily disable TR-TT at the start of a new context. + * Otherwise on switching from a TR-TT context to a new Non TR-TT + * context the TR-TT settings of the outgoing context could get + * spilled on to the new incoming context as only the Ring Context + * part is loaded on the first submission of a new context, due to + * the setting of ENGINE_CTX_RESTORE_INHIBIT bit. + */ + ret = gen9_init_context_trtt(req); + if (ret) + return ret; + + return gen8_init_rcs_context(req); +} + /** * intel_logical_ring_cleanup() - deallocate the Engine Command Streamer * @@ -2125,11 +2222,14 @@ static int logical_render_ring_init(struct drm_device *dev) logical_ring_default_vfuncs(dev, ring); /* Override some for render ring. */ - if (INTEL_INFO(dev)->gen >= 9) + if (INTEL_INFO(dev)->gen >= 9) { ring->init_hw = gen9_init_render_ring; - else + ring->init_context = gen9_init_rcs_context; + } else { ring->init_hw = gen8_init_render_ring; - ring->init_context = gen8_init_rcs_context; + ring->init_context = gen8_init_rcs_context; + } + ring->cleanup = intel_fini_pipe_control; ring->emit_flush = gen8_emit_flush_render; ring->emit_request = gen8_emit_request_render; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index a5524cc..604da23 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1167,7 +1167,15 @@ struct drm_i915_gem_context_param { #define I915_CONTEXT_PARAM_BAN_PERIOD 0x1 #define I915_CONTEXT_PARAM_NO_ZEROMAP 0x2 #define I915_CONTEXT_PARAM_GTT_SIZE 0x3 +#define I915_CONTEXT_PARAM_TRTT 0x4 __u64 value; }; +struct drm_i915_gem_context_trtt_param { + __u64 segment_base_addr; + __u64 l3_table_address; + __u32 invd_tile_val; + __u32 null_tile_val; +}; + #endif /* _UAPI_I915_DRM_H_ */