Message ID | 20180912152930.28237-1-tvrtko.ursulin@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | i915/oa: Simplify updating contexts | expand |
On 12/09/2018 16:29, Tvrtko Ursulin wrote: > From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > We can remove the update-via-batch-buffer code path, which is basically an > effective duplicate of update-via-context-image path, if we notice that > after we have idled the GPU, we can update the context image even of the > kernel context directly. (Update-via-batch-buffer path existed only to > solve the problem of how to update the kernel context image.) > > Only additional thing needed is to activate the edited configuration by > sending one empty request down the pipe. This accomplishes context restore > of the updated kernel context and so the OA configuration gets written out > to it's control registers. > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > Cc: Matthew Auld <matthew.auld@intel.com> > Cc: Chris Wilson <chris@chris-wilson.co.uk> After the discussion we had on IRC about context save/restore timings, this looks good to me : Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > --- > drivers/gpu/drm/i915/i915_perf.c | 122 ++++--------------------------- > 1 file changed, 13 insertions(+), 109 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c > index ccb20230df2c..3d7a052b4cca 100644 > --- a/drivers/gpu/drm/i915/i915_perf.c > +++ b/drivers/gpu/drm/i915/i915_perf.c > @@ -1679,107 +1679,6 @@ static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx, > } > } > > -/* > - * Same as gen8_update_reg_state_unlocked only through the batchbuffer. This > - * is only used by the kernel context. > - */ > -static int gen8_emit_oa_config(struct i915_request *rq, > - const struct i915_oa_config *oa_config) > -{ > - struct drm_i915_private *dev_priv = rq->i915; > - /* The MMIO offsets for Flex EU registers aren't contiguous */ > - u32 flex_mmio[] = { > - i915_mmio_reg_offset(EU_PERF_CNTL0), > - i915_mmio_reg_offset(EU_PERF_CNTL1), > - i915_mmio_reg_offset(EU_PERF_CNTL2), > - i915_mmio_reg_offset(EU_PERF_CNTL3), > - i915_mmio_reg_offset(EU_PERF_CNTL4), > - i915_mmio_reg_offset(EU_PERF_CNTL5), > - i915_mmio_reg_offset(EU_PERF_CNTL6), > - }; > - u32 *cs; > - int i; > - > - cs = intel_ring_begin(rq, ARRAY_SIZE(flex_mmio) * 2 + 4); > - if (IS_ERR(cs)) > - return PTR_ERR(cs); > - > - *cs++ = MI_LOAD_REGISTER_IMM(ARRAY_SIZE(flex_mmio) + 1); > - > - *cs++ = i915_mmio_reg_offset(GEN8_OACTXCONTROL); > - *cs++ = (dev_priv->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | > - (dev_priv->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) | > - GEN8_OA_COUNTER_RESUME; > - > - for (i = 0; i < ARRAY_SIZE(flex_mmio); i++) { > - u32 mmio = flex_mmio[i]; > - > - /* > - * This arbitrary default will select the 'EU FPU0 Pipeline > - * Active' event. In the future it's anticipated that there > - * will be an explicit 'No Event' we can select, but not > - * yet... > - */ > - u32 value = 0; > - > - if (oa_config) { > - u32 j; > - > - for (j = 0; j < oa_config->flex_regs_len; j++) { > - if (i915_mmio_reg_offset(oa_config->flex_regs[j].addr) == mmio) { > - value = oa_config->flex_regs[j].value; > - break; > - } > - } > - } > - > - *cs++ = mmio; > - *cs++ = value; > - } > - > - *cs++ = MI_NOOP; > - intel_ring_advance(rq, cs); > - > - return 0; > -} > - > -static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_priv, > - const struct i915_oa_config *oa_config) > -{ > - struct intel_engine_cs *engine = dev_priv->engine[RCS]; > - struct i915_timeline *timeline; > - struct i915_request *rq; > - int ret; > - > - lockdep_assert_held(&dev_priv->drm.struct_mutex); > - > - i915_retire_requests(dev_priv); > - > - rq = i915_request_alloc(engine, dev_priv->kernel_context); > - if (IS_ERR(rq)) > - return PTR_ERR(rq); > - > - ret = gen8_emit_oa_config(rq, oa_config); > - if (ret) { > - i915_request_add(rq); > - return ret; > - } > - > - /* Queue this switch after all other activity */ > - list_for_each_entry(timeline, &dev_priv->gt.timelines, link) { > - struct i915_request *prev; > - > - prev = i915_gem_active_raw(&timeline->last_request, > - &dev_priv->drm.struct_mutex); > - if (prev) > - i915_request_await_dma_fence(rq, &prev->fence); > - } > - > - i915_request_add(rq); > - > - return 0; > -} > - > /* > * Manages updating the per-context aspects of the OA stream > * configuration across all contexts. > @@ -1809,16 +1708,11 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, > { > struct intel_engine_cs *engine = dev_priv->engine[RCS]; > struct i915_gem_context *ctx; > + struct i915_request *rq; > int ret; > - unsigned int wait_flags = I915_WAIT_LOCKED; > > lockdep_assert_held(&dev_priv->drm.struct_mutex); > > - /* Switch away from any user context. */ > - ret = gen8_switch_to_updated_kernel_context(dev_priv, oa_config); > - if (ret) > - return ret; > - > /* > * The OA register config is setup through the context image. This image > * might be written to by the GPU on context switch (in particular on > @@ -1833,7 +1727,7 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, > * the GPU from any submitted work. > */ > ret = i915_gem_wait_for_idle(dev_priv, > - wait_flags, > + I915_WAIT_LOCKED, > MAX_SCHEDULE_TIMEOUT); > if (ret) > return ret; > @@ -1859,7 +1753,17 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, > i915_gem_object_unpin_map(ce->state->obj); > } > > - return ret; > + /* > + * Apply the configuration by doing one context restore of the edited > + * context image. > + */ > + rq = i915_request_alloc(engine, dev_priv->kernel_context); > + if (IS_ERR(rq)) > + return PTR_ERR(rq); > + > + i915_request_add(rq); > + > + return 0; > } > > static int gen8_enable_metric_set(struct drm_i915_private *dev_priv,
Quoting Tvrtko Ursulin (2018-09-12 16:29:30) > /* > * The OA register config is setup through the context image. This image > * might be written to by the GPU on context switch (in particular on > @@ -1833,7 +1727,7 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, > * the GPU from any submitted work. > */ > ret = i915_gem_wait_for_idle(dev_priv, > - wait_flags, > + I915_WAIT_LOCKED, > MAX_SCHEDULE_TIMEOUT); Wait until idle includes a wait for the gpu to switch off. At least it does for execlists, not so clear for ringbuffer as there is no explicit idle-event. However, that shouldn't matter as the kernel context doesn't exist for legacy ringbuffer anyway ;) But the reload will be forced on the next actual use. > if (ret) > return ret; > @@ -1859,7 +1753,17 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, > i915_gem_object_unpin_map(ce->state->obj); > } > > - return ret; > + /* > + * Apply the configuration by doing one context restore of the edited > + * context image. > + */ > + rq = i915_request_alloc(engine, dev_priv->kernel_context); By feeding a request, you ensure the reconfig is loaded again. +1 for having it turn off when idle (and not instrument the kernel context at all)! Still I follow your logic that this should leave the oa config in exactly the same state as before the patch, so Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> -Chris
On 12/09/2018 17:58, Patchwork wrote: > == Series Details == > > Series: i915/oa: Simplify updating contexts > URL : https://patchwork.freedesktop.org/series/49569/ > State : success > > == Summary == > > = CI Bug Log - changes from CI_DRM_4813 -> Patchwork_10158 = > > == Summary - SUCCESS == > > No regressions found. > > External URL: https://patchwork.freedesktop.org/api/1.0/series/49569/revisions/1/mbox/ > > == Known issues == > > Here are the changes found in Patchwork_10158 that come from known issues: > > === IGT changes === > > ==== Issues hit ==== > > igt@gem_exec_suspend@basic-s3: > fi-bdw-samus: PASS -> INCOMPLETE (fdo#107773) > > igt@kms_frontbuffer_tracking@basic: > fi-hsw-peppy: PASS -> DMESG-WARN (fdo#102614) > > igt@kms_pipe_crc_basic@hang-read-crc-pipe-a: > fi-byt-clapper: PASS -> FAIL (fdo#103191, fdo#107362) > > igt@kms_pipe_crc_basic@suspend-read-crc-pipe-b: > fi-blb-e6850: PASS -> INCOMPLETE (fdo#107718) > > > ==== Possible fixes ==== > > igt@drv_selftest@live_hangcheck: > fi-cfl-guc: DMESG-FAIL (fdo#107710) -> PASS > > igt@gem_exec_suspend@basic-s4-devices: > fi-kbl-7500u: DMESG-WARN (fdo#105128, fdo#107139) -> PASS > > igt@kms_pipe_crc_basic@suspend-read-crc-pipe-c: > fi-bxt-dsi: INCOMPLETE (fdo#103927) -> PASS > > > fdo#102614 https://bugs.freedesktop.org/show_bug.cgi?id=102614 > fdo#103191 https://bugs.freedesktop.org/show_bug.cgi?id=103191 > fdo#103927 https://bugs.freedesktop.org/show_bug.cgi?id=103927 > fdo#105128 https://bugs.freedesktop.org/show_bug.cgi?id=105128 > fdo#107139 https://bugs.freedesktop.org/show_bug.cgi?id=107139 > fdo#107362 https://bugs.freedesktop.org/show_bug.cgi?id=107362 > fdo#107710 https://bugs.freedesktop.org/show_bug.cgi?id=107710 > fdo#107718 https://bugs.freedesktop.org/show_bug.cgi?id=107718 > fdo#107773 https://bugs.freedesktop.org/show_bug.cgi?id=107773 > > > == Participating hosts (51 -> 45) == > > Missing (6): fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 fi-glk-j4005 > > > == Build changes == > > * Linux: CI_DRM_4813 -> Patchwork_10158 > > CI_DRM_4813: 3c13515b12339366b414637b69227a4e3cbe21ae @ git://anongit.freedesktop.org/gfx-ci/linux > IGT_4640: 9a8da36e708f9ed15b20689dfe305e41f9a19008 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools > Patchwork_10158: 2a916e5572e4e76f7a31d748b15b417b280e8123 @ git://anongit.freedesktop.org/gfx-ci/linux > > > == Linux commits == > > 2a916e5572e4 i915/oa: Simplify updating contexts Pushed, thanks for the brainstorming and reviews! Regards, Tvrtko
On 12/09/2018 16:50, Chris Wilson wrote: > Quoting Tvrtko Ursulin (2018-09-12 16:29:30) >> /* >> * The OA register config is setup through the context image. This image >> * might be written to by the GPU on context switch (in particular on >> @@ -1833,7 +1727,7 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, >> * the GPU from any submitted work. >> */ >> ret = i915_gem_wait_for_idle(dev_priv, >> - wait_flags, >> + I915_WAIT_LOCKED, >> MAX_SCHEDULE_TIMEOUT); > > Wait until idle includes a wait for the gpu to switch off. At least it > does for execlists, not so clear for ringbuffer as there is no explicit > idle-event. However, that shouldn't matter as the kernel context doesn't > exist for legacy ringbuffer anyway ;) But the reload will be forced on > the next actual use. And on top this is only called on Gen8+! >> if (ret) >> return ret; >> @@ -1859,7 +1753,17 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, >> i915_gem_object_unpin_map(ce->state->obj); >> } >> >> - return ret; >> + /* >> + * Apply the configuration by doing one context restore of the edited >> + * context image. >> + */ >> + rq = i915_request_alloc(engine, dev_priv->kernel_context); > > By feeding a request, you ensure the reconfig is loaded again. +1 for > having it turn off when idle (and not instrument the kernel context at > all)! > > Still I follow your logic that this should leave the oa config in > exactly the same state as before the patch, so > Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Thanks, yeah, I am not sure excluding kernel context is possible. If I understand the comments in i915_perf.c, and how much Lionel explained to me, when on we want it on all the time so sampling timers are always on regardless of context switches. Regards, Tvrtko
Quoting Tvrtko Ursulin (2018-09-13 09:54:15) > > On 12/09/2018 16:50, Chris Wilson wrote: > > By feeding a request, you ensure the reconfig is loaded again. +1 for > > having it turn off when idle (and not instrument the kernel context at > > all)! > > > > Still I follow your logic that this should leave the oa config in > > exactly the same state as before the patch, so > > Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> > > Thanks, yeah, I am not sure excluding kernel context is possible. If I > understand the comments in i915_perf.c, and how much Lionel explained to > me, when on we want it on all the time so sampling timers are always on > regardless of context switches. I was just thinking along the lines of not having the sampling active when we are idle... But doesn't it actually break our powersaving model entirely if the GT remains active after we drop our wakeref for it? -Chris
Quoting Chris Wilson (2018-09-13 09:58:59) > Quoting Tvrtko Ursulin (2018-09-13 09:54:15) > > > > On 12/09/2018 16:50, Chris Wilson wrote: > > > By feeding a request, you ensure the reconfig is loaded again. +1 for > > > having it turn off when idle (and not instrument the kernel context at > > > all)! > > > > > > Still I follow your logic that this should leave the oa config in > > > exactly the same state as before the patch, so > > > Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> > > > > Thanks, yeah, I am not sure excluding kernel context is possible. If I > > understand the comments in i915_perf.c, and how much Lionel explained to > > me, when on we want it on all the time so sampling timers are always on > > regardless of context switches. > > I was just thinking along the lines of not having the sampling active > when we are idle... But doesn't it actually break our powersaving model > entirely if the GT remains active after we drop our wakeref for it? Ah, oa being the brute tries to keep the device awake. But iirc it only takes the forcewake and not a rpm wakeref. By the argument above, it needs both. -Chris
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index ccb20230df2c..3d7a052b4cca 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1679,107 +1679,6 @@ static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx, } } -/* - * Same as gen8_update_reg_state_unlocked only through the batchbuffer. This - * is only used by the kernel context. - */ -static int gen8_emit_oa_config(struct i915_request *rq, - const struct i915_oa_config *oa_config) -{ - struct drm_i915_private *dev_priv = rq->i915; - /* The MMIO offsets for Flex EU registers aren't contiguous */ - u32 flex_mmio[] = { - i915_mmio_reg_offset(EU_PERF_CNTL0), - i915_mmio_reg_offset(EU_PERF_CNTL1), - i915_mmio_reg_offset(EU_PERF_CNTL2), - i915_mmio_reg_offset(EU_PERF_CNTL3), - i915_mmio_reg_offset(EU_PERF_CNTL4), - i915_mmio_reg_offset(EU_PERF_CNTL5), - i915_mmio_reg_offset(EU_PERF_CNTL6), - }; - u32 *cs; - int i; - - cs = intel_ring_begin(rq, ARRAY_SIZE(flex_mmio) * 2 + 4); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_LOAD_REGISTER_IMM(ARRAY_SIZE(flex_mmio) + 1); - - *cs++ = i915_mmio_reg_offset(GEN8_OACTXCONTROL); - *cs++ = (dev_priv->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | - (dev_priv->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) | - GEN8_OA_COUNTER_RESUME; - - for (i = 0; i < ARRAY_SIZE(flex_mmio); i++) { - u32 mmio = flex_mmio[i]; - - /* - * This arbitrary default will select the 'EU FPU0 Pipeline - * Active' event. In the future it's anticipated that there - * will be an explicit 'No Event' we can select, but not - * yet... - */ - u32 value = 0; - - if (oa_config) { - u32 j; - - for (j = 0; j < oa_config->flex_regs_len; j++) { - if (i915_mmio_reg_offset(oa_config->flex_regs[j].addr) == mmio) { - value = oa_config->flex_regs[j].value; - break; - } - } - } - - *cs++ = mmio; - *cs++ = value; - } - - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - - return 0; -} - -static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_priv, - const struct i915_oa_config *oa_config) -{ - struct intel_engine_cs *engine = dev_priv->engine[RCS]; - struct i915_timeline *timeline; - struct i915_request *rq; - int ret; - - lockdep_assert_held(&dev_priv->drm.struct_mutex); - - i915_retire_requests(dev_priv); - - rq = i915_request_alloc(engine, dev_priv->kernel_context); - if (IS_ERR(rq)) - return PTR_ERR(rq); - - ret = gen8_emit_oa_config(rq, oa_config); - if (ret) { - i915_request_add(rq); - return ret; - } - - /* Queue this switch after all other activity */ - list_for_each_entry(timeline, &dev_priv->gt.timelines, link) { - struct i915_request *prev; - - prev = i915_gem_active_raw(&timeline->last_request, - &dev_priv->drm.struct_mutex); - if (prev) - i915_request_await_dma_fence(rq, &prev->fence); - } - - i915_request_add(rq); - - return 0; -} - /* * Manages updating the per-context aspects of the OA stream * configuration across all contexts. @@ -1809,16 +1708,11 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, { struct intel_engine_cs *engine = dev_priv->engine[RCS]; struct i915_gem_context *ctx; + struct i915_request *rq; int ret; - unsigned int wait_flags = I915_WAIT_LOCKED; lockdep_assert_held(&dev_priv->drm.struct_mutex); - /* Switch away from any user context. */ - ret = gen8_switch_to_updated_kernel_context(dev_priv, oa_config); - if (ret) - return ret; - /* * The OA register config is setup through the context image. This image * might be written to by the GPU on context switch (in particular on @@ -1833,7 +1727,7 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, * the GPU from any submitted work. */ ret = i915_gem_wait_for_idle(dev_priv, - wait_flags, + I915_WAIT_LOCKED, MAX_SCHEDULE_TIMEOUT); if (ret) return ret; @@ -1859,7 +1753,17 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, i915_gem_object_unpin_map(ce->state->obj); } - return ret; + /* + * Apply the configuration by doing one context restore of the edited + * context image. + */ + rq = i915_request_alloc(engine, dev_priv->kernel_context); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + i915_request_add(rq); + + return 0; } static int gen8_enable_metric_set(struct drm_i915_private *dev_priv,