[v15,10/13] drm/i915/perf: execute OA configuration from command stream

Message ID	20190906093256.26198-11-lionel.g.landwerlin@intel.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <SRS0=PfXq=XB=lists.freedesktop.org=intel-gfx-bounces@kernel.org> DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org DEC662081B From: Lionel Landwerlin <lionel.g.landwerlin@intel.com> To: intel-gfx@lists.freedesktop.org Date: Fri, 6 Sep 2019 12:32:53 +0300 Message-Id: <20190906093256.26198-11-lionel.g.landwerlin@intel.com> In-Reply-To: <20190906093256.26198-1-lionel.g.landwerlin@intel.com> References: <20190906093256.26198-1-lionel.g.landwerlin@intel.com> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH v15 10/13] drm/i915/perf: execute OA configuration from command stream Precedence: list Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>
Series	drm/i915: Vulkan performance query support \| expand [v15,00/13] drm/i915: Vulkan performance query support [v15,01/13] drm/i915: introduce a mechanism to extend execbuf2 [v15,02/13] drm/i915: add syncobj timeline support [v15,03/13] drm/i915/perf: drop list of streams [v15,04/13] drm/i915/perf: store the associated engine of a stream [v15,05/13] drm/i915/perf: introduce a versioning of the i915-perf uapi [v15,06/13] drm/i915/perf: move perf types to their own header [v15,07/13] drm/i915/perf: allow for CS OA configs to be created lazily [v15,08/13] drm/i915/perf: implement active wait for noa configurations [v15,09/13] drm/i915: add wait flags to i915_active_request_retire [v15,10/13] drm/i915/perf: execute OA configuration from command stream [v15,11/13] drm/i915: add a new perf configuration execbuf parameter [v15,12/13] drm/i915/perf: allow holding preemption on filtered ctx [v15,13/13] drm/i915: add support for perf configuration queries

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index f2b778d84b52..8e3532518139 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1558,18 +1558,23 @@ free_oa_configs(struct i915_perf_stream *stream) static void i915_oa_stream_destroy(struct i915_perf_stream *stream) { struct drm_i915_private *dev_priv = stream->dev_priv; + int err; BUG_ON(stream != dev_priv->perf.exclusive_stream); - /* - * Unset exclusive_stream first, it will be checked while disabling - * the metric set on gen8+. - */ mutex_lock(&dev_priv->drm.struct_mutex); - dev_priv->perf.exclusive_stream = NULL; + mutex_lock(&stream->config_mutex); dev_priv->perf.ops.disable_metric_set(stream); + err = i915_active_request_retire(&stream->active_config_rq, 0, + &stream->config_mutex); + mutex_unlock(&stream->config_mutex); + dev_priv->perf.exclusive_stream = NULL; mutex_unlock(&dev_priv->drm.struct_mutex); + if (err) + DRM_ERROR("Failed to disable perf stream\n"); + + free_oa_buffer(stream); free_noa_wait(stream); @@ -1795,6 +1800,10 @@ static int alloc_noa_wait(struct i915_perf_stream *stream) return PTR_ERR(bo); } + ret = i915_mutex_lock_interruptible(&i915->drm); + if (ret) + goto err_unref; + /* * We pin in GGTT because we jump into this buffer now because * multiple OA config BOs will have a jump to this address and it @@ -1802,10 +1811,13 @@ static int alloc_noa_wait(struct i915_perf_stream *stream) */ vma = i915_gem_object_ggtt_pin(bo, NULL, 0, 4096, 0); if (IS_ERR(vma)) { + mutex_unlock(&i915->drm.struct_mutex); ret = PTR_ERR(vma); goto err_unref; } + mutex_unlock(&i915->drm.struct_mutex); + batch = cs = i915_gem_object_pin_map(bo, I915_MAP_WB); if (IS_ERR(batch)) { ret = PTR_ERR(batch); @@ -1939,7 +1951,9 @@ static int alloc_noa_wait(struct i915_perf_stream *stream) return 0; err_unpin: - __i915_vma_unpin(vma); + mutex_lock(&i915->drm.struct_mutex); + i915_vma_unpin_and_release(&vma, 0); + mutex_unlock(&i915->drm.struct_mutex); err_unref: i915_gem_object_put(bo); @@ -1947,50 +1961,73 @@ static int alloc_noa_wait(struct i915_perf_stream *stream) return ret; } -static void config_oa_regs(struct drm_i915_private *dev_priv, - const struct i915_oa_reg *regs, - u32 n_regs) +static int emit_oa_config(struct drm_i915_private *i915, + struct i915_perf_stream *stream) { - u32 i; + struct i915_request *rq; + struct i915_vma *vma; + u32 *cs; + int err; - for (i = 0; i < n_regs; i++) { - const struct i915_oa_reg *reg = regs + i; + lockdep_assert_held(&stream->config_mutex); + + vma = i915_vma_instance(stream->initial_oa_config_bo, + &stream->engine->gt->ggtt->vm, NULL); + if (unlikely(IS_ERR(vma))) + return PTR_ERR(vma); - I915_WRITE(reg->addr, reg->value); + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) + goto err_vma_unpin; + + rq = i915_request_create(stream->engine->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_add_request; } -} -static void delay_after_mux(void) -{ - /* - * It apparently takes a fairly long time for a new MUX - * configuration to be be applied after these register writes. - * This delay duration was derived empirically based on the - * render_basic config but hopefully it covers the maximum - * configuration latency. - * - * As a fallback, the checks in _append_oa_reports() to skip - * invalid OA reports do also seem to work to discard reports - * generated before this config has completed - albeit not - * silently. - * - * Unfortunately this is essentially a magic number, since we - * don't currently know of a reliable mechanism for predicting - * how long the MUX config will take to apply and besides - * seeing invalid reports we don't know of a reliable way to - * explicitly check that the MUX config has landed. - * - * It's even possible we've miss characterized the underlying - * problem - it just seems like the simplest explanation why - * a delay at this location would mitigate any invalid reports. - */ - usleep_range(15000, 20000); + err = i915_active_request_set(&stream->active_config_rq, + rq); + if (err) + goto err_add_request; + + i915_vma_lock(vma); + err = i915_request_await_object(rq, vma->obj, 0); + if (!err) + err = i915_vma_move_to_active(vma, rq, 0); + i915_vma_unlock(vma); + if (err) + goto err_add_request; + + cs = intel_ring_begin(rq, INTEL_GEN(i915) >= 8 ? 4 : 2); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_add_request; + } + + if (INTEL_GEN(i915) > 8) { + *cs++ = MI_BATCH_BUFFER_START_GEN8; + *cs++ = lower_32_bits(vma->node.start); + *cs++ = upper_32_bits(vma->node.start); + *cs++ = MI_NOOP; + } else { + *cs++ = MI_BATCH_BUFFER_START; + *cs++ = vma->node.start; + } + + intel_ring_advance(rq, cs); + +err_add_request: + i915_request_add(rq); +err_vma_unpin: + i915_vma_unpin(vma); + + return err; } static int hsw_enable_metric_set(struct i915_perf_stream *stream) { struct drm_i915_private *dev_priv = stream->dev_priv; - const struct i915_oa_config *oa_config = stream->oa_config; /* * PRM: @@ -2007,13 +2044,7 @@ static int hsw_enable_metric_set(struct i915_perf_stream *stream) I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) | GEN6_CSUNIT_CLOCK_GATE_DISABLE)); - config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len); - delay_after_mux(); - - config_oa_regs(dev_priv, oa_config->b_counter_regs, - oa_config->b_counter_regs_len); - - return 0; + return emit_oa_config(dev_priv, stream); } static void hsw_disable_metric_set(struct i915_perf_stream *stream) @@ -2372,13 +2403,7 @@ static int gen8_enable_metric_set(struct i915_perf_stream *stream) if (ret) return ret; - config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len); - delay_after_mux(); - - config_oa_regs(dev_priv, oa_config->b_counter_regs, - oa_config->b_counter_regs_len); - - return 0; + return emit_oa_config(dev_priv, stream); } static void gen8_disable_metric_set(struct i915_perf_stream *stream) @@ -2597,6 +2622,9 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, stream->engine = props->engine; + INIT_ACTIVE_REQUEST(&stream->active_config_rq, + &stream->config_mutex); + stream->sample_flags |= SAMPLE_OA_REPORT; stream->sample_size += format_size; @@ -2625,8 +2653,9 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, goto err_noa_wait_alloc; } - ret = i915_perf_get_oa_config(dev_priv, props->metrics_set, - &stream->oa_config); + ret = i915_perf_get_oa_config_and_bo(stream, props->metrics_set, + &stream->oa_config, + &stream->initial_oa_config_bo); if (ret) { DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set); goto err_config; @@ -2658,16 +2687,31 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, stream->ops = &i915_oa_stream_ops; dev_priv->perf.exclusive_stream = stream; + mutex_lock(&stream->config_mutex); ret = dev_priv->perf.ops.enable_metric_set(stream); if (ret) { DRM_DEBUG("Unable to enable metric set\n"); - goto err_enable; + /* + * Ignore the return value since we already have an error from + * the enable vfunc. + */ + i915_active_request_retire(&stream->active_config_rq, 0, + &stream->config_mutex); + } else { + ret = i915_active_request_retire(&stream->active_config_rq, 0, + &stream->config_mutex); } - DRM_DEBUG("opening stream oa config uuid=%s\n", stream->oa_config->uuid); - + mutex_unlock(&stream->config_mutex); mutex_unlock(&dev_priv->drm.struct_mutex); + i915_gem_object_put(stream->initial_oa_config_bo); + stream->initial_oa_config_bo = NULL; + if (ret) + goto err_enable; + + DRM_DEBUG("opening stream oa config uuid=%s\n", stream->oa_config->uuid); + hrtimer_init(&stream->poll_check_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); stream->poll_check_timer.function = oa_poll_check_timer_cb; @@ -2677,8 +2721,11 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, return 0; err_enable: - dev_priv->perf.exclusive_stream = NULL; + mutex_lock(&dev_priv->drm.struct_mutex); + mutex_lock(&stream->config_mutex); dev_priv->perf.ops.disable_metric_set(stream); + mutex_unlock(&stream->config_mutex); + dev_priv->perf.exclusive_stream = NULL; mutex_unlock(&dev_priv->drm.struct_mutex); err_lock: @@ -2690,6 +2737,9 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, free_oa_configs(stream); + if (stream->initial_oa_config_bo) + i915_gem_object_put(stream->initial_oa_config_bo); + err_config: free_noa_wait(stream); diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h index 695c6ad2fcd5..1ba511c12673 100644 --- a/drivers/gpu/drm/i915/i915_perf_types.h +++ b/drivers/gpu/drm/i915/i915_perf_types.h @@ -166,7 +166,8 @@ struct i915_perf_stream { const struct i915_perf_stream_ops *ops; /** - * @active_config_mutex: Protects access to @oa_config & @oa_config_bos. + * @active_config_mutex: Protects access to @active_config_rq, + * @oa_config & @oa_config_bos. */ struct mutex config_mutex; @@ -181,6 +182,16 @@ struct i915_perf_stream { */ struct list_head oa_config_bos; + /** + * @active_config_rq: Last request reconfiguring the HW. + */ + struct i915_active_request active_config_rq; + + /** + * @initial_oa_config_bo: First OA configuration BO to be run. + */ + struct drm_i915_gem_object *initial_oa_config_bo; + /** * The OA context specific information. */

[v15,10/13] drm/i915/perf: execute OA configuration from command stream

Commit Message

Patch