diff mbox series

[06/12] drm/i915: Show support for accurate sw PMU busyness tracking

Message ID 20190204084116.3013-6-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [01/12] drm/i915: Allow normal clients to always preempt idle priority clients | expand

Commit Message

Chris Wilson Feb. 4, 2019, 8:41 a.m. UTC
Expose whether or not we support the PMU software tracking in our
scheduler capabilities, so userspace can query at runtime.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c         |  2 ++
 drivers/gpu/drm/i915/intel_engine_cs.c  | 38 +++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_lrc.c        |  6 ----
 drivers/gpu/drm/i915/intel_ringbuffer.h |  2 ++
 include/uapi/drm/i915_drm.h             |  1 +
 5 files changed, 43 insertions(+), 6 deletions(-)

Comments

Tvrtko Ursulin Feb. 4, 2019, 12:14 p.m. UTC | #1
On 04/02/2019 08:41, Chris Wilson wrote:
> Expose whether or not we support the PMU software tracking in our
> scheduler capabilities, so userspace can query at runtime.

I am leaning towards thinking PMU is a backend and not the scheduler 
feature. We could export it via engine discovery for instance.

Do you think the "all must agree" change in logic is interesting enough 
on it's own?

Regards,

Tvrtko


> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem.c         |  2 ++
>   drivers/gpu/drm/i915/intel_engine_cs.c  | 38 +++++++++++++++++++++++++
>   drivers/gpu/drm/i915/intel_lrc.c        |  6 ----
>   drivers/gpu/drm/i915/intel_ringbuffer.h |  2 ++
>   include/uapi/drm/i915_drm.h             |  1 +
>   5 files changed, 43 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index e802af64d628..bc7d1338b69a 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4757,6 +4757,8 @@ static int __i915_gem_restart_engines(void *data)
>   		}
>   	}
>   
> +	intel_engines_set_scheduler_caps(i915);
> +
>   	return 0;
>   }
>   
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 71c01eb13af1..ec2cbbe070a4 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -614,6 +614,44 @@ int intel_engine_setup_common(struct intel_engine_cs *engine)
>   	return err;
>   }
>   
> +void intel_engines_set_scheduler_caps(struct drm_i915_private *i915)
> +{
> +	static const struct {
> +		u32 engine_flag;
> +		u32 sched_cap;
> +	} map[] = {
> +		{ I915_ENGINE_HAS_PREEMPTION, I915_SCHEDULER_CAP_PREEMPTION },
> +		{ I915_ENGINE_SUPPORTS_STATS, I915_SCHEDULER_CAP_PMU },
> +	};
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +	u32 enabled, disabled;
> +
> +	enabled = 0;
> +	disabled = 0;
> +	for_each_engine(engine, i915, id) { /* all engines must agree! */
> +		int i;
> +
> +		if (engine->schedule)
> +			enabled |= (I915_SCHEDULER_CAP_ENABLED |
> +				    I915_SCHEDULER_CAP_PRIORITY);
> +		else
> +			disabled |= (I915_SCHEDULER_CAP_ENABLED |
> +				     I915_SCHEDULER_CAP_PRIORITY);
> +
> +		for (i = 0; i < ARRAY_SIZE(map); i++) {
> +			if (engine->flags & map[i].engine_flag)
> +				enabled |= map[i].sched_cap;
> +			else
> +				disabled |= map[i].sched_cap;
> +		}
> +	}
> +
> +	i915->caps.scheduler = enabled & ~disabled;
> +	if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_ENABLED))
> +		i915->caps.scheduler = 0;
> +}
> +
>   static void __intel_context_unpin(struct i915_gem_context *ctx,
>   				  struct intel_engine_cs *engine)
>   {
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 9b6b3acb9070..0869a4fd20c7 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -2299,12 +2299,6 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
>   	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
>   	if (engine->i915->preempt_context)
>   		engine->flags |= I915_ENGINE_HAS_PREEMPTION;
> -
> -	engine->i915->caps.scheduler =
> -		I915_SCHEDULER_CAP_ENABLED |
> -		I915_SCHEDULER_CAP_PRIORITY;
> -	if (intel_engine_has_preemption(engine))
> -		engine->i915->caps.scheduler |= I915_SCHEDULER_CAP_PREEMPTION;
>   }
>   
>   static void
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 983ad1e7914d..610ee351ebee 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -590,6 +590,8 @@ intel_engine_has_preemption(const struct intel_engine_cs *engine)
>   	return engine->flags & I915_ENGINE_HAS_PREEMPTION;
>   }
>   
> +void intel_engines_set_scheduler_caps(struct drm_i915_private *i915);
> +
>   static inline bool __execlists_need_preempt(int prio, int last)
>   {
>   	/*
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 298b2e197744..d8ac7f105734 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -476,6 +476,7 @@ typedef struct drm_i915_irq_wait {
>   #define   I915_SCHEDULER_CAP_ENABLED	(1ul << 0)
>   #define   I915_SCHEDULER_CAP_PRIORITY	(1ul << 1)
>   #define   I915_SCHEDULER_CAP_PREEMPTION	(1ul << 2)
> +#define   I915_SCHEDULER_CAP_PMU	(1ul << 3)
>   
>   #define I915_PARAM_HUC_STATUS		 42
>   
>
Chris Wilson Feb. 4, 2019, 12:28 p.m. UTC | #2
Quoting Tvrtko Ursulin (2019-02-04 12:14:38)
> 
> On 04/02/2019 08:41, Chris Wilson wrote:
> > Expose whether or not we support the PMU software tracking in our
> > scheduler capabilities, so userspace can query at runtime.
> 
> I am leaning towards thinking PMU is a backend and not the scheduler 
> feature. We could export it via engine discovery for instance.

The sw metrics are buggy. They include semaphore time on top of busy,
but historically that has always been separate (and how it's measured by
the HW).
-Chris
Chris Wilson Feb. 4, 2019, 12:29 p.m. UTC | #3
Quoting Chris Wilson (2019-02-04 12:28:04)
> Quoting Tvrtko Ursulin (2019-02-04 12:14:38)
> > 
> > On 04/02/2019 08:41, Chris Wilson wrote:
> > > Expose whether or not we support the PMU software tracking in our
> > > scheduler capabilities, so userspace can query at runtime.
> > 
> > I am leaning towards thinking PMU is a backend and not the scheduler 
> > feature. We could export it via engine discovery for instance.
> 
> The sw metrics are buggy. They include semaphore time on top of busy,
> but historically that has always been separate (and how it's measured by
> the HW).

Furthermore they aren't even universally available today, breaking the
current assumption of HAS_EXECLISTS -> has SW PMU.
-Chris
Tvrtko Ursulin Feb. 4, 2019, 12:37 p.m. UTC | #4
On 04/02/2019 12:28, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-02-04 12:14:38)
>>
>> On 04/02/2019 08:41, Chris Wilson wrote:
>>> Expose whether or not we support the PMU software tracking in our
>>> scheduler capabilities, so userspace can query at runtime.
>>
>> I am leaning towards thinking PMU is a backend and not the scheduler
>> feature. We could export it via engine discovery for instance.
> 
> The sw metrics are buggy. They include semaphore time on top of busy,
> but historically that has always been separate (and how it's measured by
> the HW).

Time to resurrect the LRCA context runtime patches and see if that is 
consistent in wait vs poll mode.

But, why are the semantics of busy time related to the question of 
whether to expose this flag at engine or scheduler level?

Regards,

Tvrtko
Chris Wilson Feb. 4, 2019, 12:43 p.m. UTC | #5
Quoting Tvrtko Ursulin (2019-02-04 12:37:00)
> 
> On 04/02/2019 12:28, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2019-02-04 12:14:38)
> >>
> >> On 04/02/2019 08:41, Chris Wilson wrote:
> >>> Expose whether or not we support the PMU software tracking in our
> >>> scheduler capabilities, so userspace can query at runtime.
> >>
> >> I am leaning towards thinking PMU is a backend and not the scheduler
> >> feature. We could export it via engine discovery for instance.
> > 
> > The sw metrics are buggy. They include semaphore time on top of busy,
> > but historically that has always been separate (and how it's measured by
> > the HW).
> 
> Time to resurrect the LRCA context runtime patches and see if that is 
> consistent in wait vs poll mode.
> 
> But, why are the semantics of busy time related to the question of 
> whether to expose this flag at engine or scheduler level?

The accuracy (and meaning) presented to the user currently depends on
internal details that are not exposed. I just piggy backed caps.scheduler 
as it was adjacent to the code and already being used to determine
implementation details from igt / mesa.
-Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e802af64d628..bc7d1338b69a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4757,6 +4757,8 @@  static int __i915_gem_restart_engines(void *data)
 		}
 	}
 
+	intel_engines_set_scheduler_caps(i915);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 71c01eb13af1..ec2cbbe070a4 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -614,6 +614,44 @@  int intel_engine_setup_common(struct intel_engine_cs *engine)
 	return err;
 }
 
+void intel_engines_set_scheduler_caps(struct drm_i915_private *i915)
+{
+	static const struct {
+		u32 engine_flag;
+		u32 sched_cap;
+	} map[] = {
+		{ I915_ENGINE_HAS_PREEMPTION, I915_SCHEDULER_CAP_PREEMPTION },
+		{ I915_ENGINE_SUPPORTS_STATS, I915_SCHEDULER_CAP_PMU },
+	};
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	u32 enabled, disabled;
+
+	enabled = 0;
+	disabled = 0;
+	for_each_engine(engine, i915, id) { /* all engines must agree! */
+		int i;
+
+		if (engine->schedule)
+			enabled |= (I915_SCHEDULER_CAP_ENABLED |
+				    I915_SCHEDULER_CAP_PRIORITY);
+		else
+			disabled |= (I915_SCHEDULER_CAP_ENABLED |
+				     I915_SCHEDULER_CAP_PRIORITY);
+
+		for (i = 0; i < ARRAY_SIZE(map); i++) {
+			if (engine->flags & map[i].engine_flag)
+				enabled |= map[i].sched_cap;
+			else
+				disabled |= map[i].sched_cap;
+		}
+	}
+
+	i915->caps.scheduler = enabled & ~disabled;
+	if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_ENABLED))
+		i915->caps.scheduler = 0;
+}
+
 static void __intel_context_unpin(struct i915_gem_context *ctx,
 				  struct intel_engine_cs *engine)
 {
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 9b6b3acb9070..0869a4fd20c7 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2299,12 +2299,6 @@  void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
 	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
 	if (engine->i915->preempt_context)
 		engine->flags |= I915_ENGINE_HAS_PREEMPTION;
-
-	engine->i915->caps.scheduler =
-		I915_SCHEDULER_CAP_ENABLED |
-		I915_SCHEDULER_CAP_PRIORITY;
-	if (intel_engine_has_preemption(engine))
-		engine->i915->caps.scheduler |= I915_SCHEDULER_CAP_PREEMPTION;
 }
 
 static void
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 983ad1e7914d..610ee351ebee 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -590,6 +590,8 @@  intel_engine_has_preemption(const struct intel_engine_cs *engine)
 	return engine->flags & I915_ENGINE_HAS_PREEMPTION;
 }
 
+void intel_engines_set_scheduler_caps(struct drm_i915_private *i915);
+
 static inline bool __execlists_need_preempt(int prio, int last)
 {
 	/*
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 298b2e197744..d8ac7f105734 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -476,6 +476,7 @@  typedef struct drm_i915_irq_wait {
 #define   I915_SCHEDULER_CAP_ENABLED	(1ul << 0)
 #define   I915_SCHEDULER_CAP_PRIORITY	(1ul << 1)
 #define   I915_SCHEDULER_CAP_PREEMPTION	(1ul << 2)
+#define   I915_SCHEDULER_CAP_PMU	(1ul << 3)
 
 #define I915_PARAM_HUC_STATUS		 42