diff mbox series

[4/4] drm/i915: Predictive governor to control eu/slice/subslice based on workload

Message ID 1537521230-22904-5-git-send-email-kedar.j.karanje@intel.com (mailing list archive)
State New, archived
Headers show
Series Dynamic EU configuration of Slice/Subslice/EU. | expand

Commit Message

Kedar J. Karanje Sept. 21, 2018, 9:13 a.m. UTC
From: Praveen Diwakar <praveen.diwakar@intel.com>

High resoluton timer is used for this purpose.

Debugfs is provided to enable/disable/update timer configuration

Change-Id: I35d692c5afe962fcad4573185bc6f744487711d0
Signed-off-by: Praveen Diwakar <praveen.diwakar@intel.com>
Signed-off-by: Yogesh Marathe <yogesh.marathe@intel.com>
Signed-off-by: Aravindan Muthukumar <aravindan.muthukumar@intel.com>
Signed-off-by: Kedar J Karanje <kedar.j.karanje@intel.com>
Signed-off-by: Ankit Navik <ankit.p.navik@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c | 94 ++++++++++++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_drv.h     |  1 +
 2 files changed, 94 insertions(+), 1 deletion(-)

Comments

Tvrtko Ursulin Sept. 21, 2018, 1:24 p.m. UTC | #1
On 21/09/2018 10:13, kedar.j.karanje@intel.com wrote:
> From: Praveen Diwakar <praveen.diwakar@intel.com>
> 
> High resoluton timer is used for this purpose.
> 
> Debugfs is provided to enable/disable/update timer configuration
> 
> Change-Id: I35d692c5afe962fcad4573185bc6f744487711d0
> Signed-off-by: Praveen Diwakar <praveen.diwakar@intel.com>
> Signed-off-by: Yogesh Marathe <yogesh.marathe@intel.com>
> Signed-off-by: Aravindan Muthukumar <aravindan.muthukumar@intel.com>
> Signed-off-by: Kedar J Karanje <kedar.j.karanje@intel.com>
> Signed-off-by: Ankit Navik <ankit.p.navik@intel.com
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c | 94 ++++++++++++++++++++++++++++++++++++-
>   drivers/gpu/drm/i915/i915_drv.h     |  1 +
>   2 files changed, 94 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index f9ce35d..81ba509 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -4740,6 +4740,97 @@ static const struct drm_info_list i915_debugfs_list[] = {
>   	{"i915_drrs_status", i915_drrs_status, 0},
>   	{"i915_rps_boost_info", i915_rps_boost_info, 0},
>   };
> +
> +#define POLL_PERIOD_MS	(1000 * 1000)
> +#define PENDING_REQ_0	0 /* No active request pending*/
> +#define PENDING_REQ_3	3 /* Threshold value of 3 active request pending*/
> +			  /* Anything above this is considered as HIGH load
> +			   * context
> +			   */
> +			  /* And less is considered as LOW load*/
> +			  /* And equal is considered as mediaum load */

Wonky comments and some typos up to here.

> +
> +static int predictive_load_enable;
> +static int predictive_load_timer_init;
> +
> +static enum hrtimer_restart predictive_load_cb(struct hrtimer *hrtimer)
> +{
> +	struct drm_i915_private *dev_priv =
> +		container_of(hrtimer, typeof(*dev_priv),
> +				pred_timer);
> +	enum intel_engine_id id;
> +	struct intel_engine_cs *engine;

Some unused's.

> +	struct i915_gem_context *ctx;
> +	u64 req_pending;

unsigned long, and also please try to order declaration so the right 
edge of text is moving in one direction only.

> +
> +	list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
> +
> +		if (!ctx->name)
> +			continue;

What is this?

> +
> +		mutex_lock(&dev_priv->pred_mutex);

Here the mutex bites you since you cannot sleep in the timer callback. 
atomic_t would solve it. Or would a native unsigned int/long since lock 
to read a native word on x86 is not needed.

> +		req_pending = ctx->req_cnt;
> +		mutex_unlock(&dev_priv->pred_mutex);
> +
> +		if (req_pending == PENDING_REQ_0)
> +			continue;
> +
> +		if (req_pending > PENDING_REQ_3)
> +			ctx->load_type = LOAD_TYPE_HIGH;
> +		else if (req_pending == PENDING_REQ_3)
> +			ctx->load_type = LOAD_TYPE_MEDIUM;
> +		else if (req_pending < PENDING_REQ_3)

Must be smaller if not greater or equal, but maybe the compiler does 
that for you.

> +			ctx->load_type = LOAD_TYPE_LOW;
> +
> +		i915_set_optimum_config(ctx->load_type, ctx, KABYLAKE_GT3);

Only KBL? Idea to put the table in dev_priv FTW! :)

ctx->load_type used only as a temporary uncovered here? :)

> +	}
> +
> +	hrtimer_forward_now(hrtimer,
> +			ns_to_ktime(predictive_load_enable*POLL_PERIOD_MS));

Or HRTIMER_NORESTART if disabled? Hard to call it, details..

> +
> +	return HRTIMER_RESTART;
> +}
> +
> +static int
> +i915_predictive_load_get(void *data, u64 *val)
> +{
> +	struct drm_i915_private *dev_priv = data;
> +
> +	*val = predictive_load_enable;
> +	return 0;
> +}
> +
> +static int
> +i915_predictive_load_set(void *data, u64 val)
> +{
> +	struct drm_i915_private *dev_priv = data;
> +	struct intel_device_info *info;
> +
> +	info = mkwrite_device_info(dev_priv);

Unused, why?

> +
> +	predictive_load_enable = val;
> +
> +	if (predictive_load_enable) {
> +		if (!predictive_load_timer_init) {
> +			hrtimer_init(&dev_priv->pred_timer, CLOCK_MONOTONIC,
> +					HRTIMER_MODE_REL);
> +			dev_priv->pred_timer.function = predictive_load_cb;
> +			predictive_load_timer_init = 1;

Move timer init to dev_priv setup.

> +		}
> +		hrtimer_start(&dev_priv->pred_timer,
> +			ns_to_ktime(predictive_load_enable*POLL_PERIOD_MS),
> +			HRTIMER_MODE_REL_PINNED);

Two threads can race to here.

Also you can give some slack to the timer since precision is not 
critical to you and can save you some CPU power.

> +	} else {
> +		hrtimer_cancel(&dev_priv->pred_timer);
> +	}
> +
> +	return 0;
> +}
> +
> +DEFINE_SIMPLE_ATTRIBUTE(i915_predictive_load_ctl,
> +			i915_predictive_load_get, i915_predictive_load_set,
> +			"%llu\n");
> +
>   #define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list)
>   
>   static const struct i915_debugfs_files {
> @@ -4769,7 +4860,8 @@ static const struct i915_debugfs_files {
>   	{"i915_hpd_storm_ctl", &i915_hpd_storm_ctl_fops},
>   	{"i915_ipc_status", &i915_ipc_status_fops},
>   	{"i915_drrs_ctl", &i915_drrs_ctl_fops},
> -	{"i915_edp_psr_debug", &i915_edp_psr_debug_fops}
> +	{"i915_edp_psr_debug", &i915_edp_psr_debug_fops},
> +	{"i915_predictive_load_ctl", &i915_predictive_load_ctl}

And if the feature is to become real one day, given it's nature, the 
knob would probably have to go to sysfs, not debugfs.

>   };
>   
>   int i915_debugfs_register(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 137ec33..0505c47 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1552,6 +1552,7 @@ struct intel_cdclk_state {
>   };
>   
>   struct drm_i915_private {
> +	struct hrtimer pred_timer;

Surely not the first member! :)

Regards,

Tvrtko

>   	struct drm_device drm;
>   
>   	struct kmem_cache *objects;
>
kernel test robot Sept. 22, 2018, 6:31 p.m. UTC | #2
Hi Praveen,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on drm-intel/for-linux-next]
[also build test ERROR on v4.19-rc4 next-20180921]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/kedar-j-karanje-intel-com/drm-i915-Get-active-pending-request-for-given-context/20180923-012250
base:   git://anongit.freedesktop.org/drm-intel for-linux-next
config: i386-randconfig-x014-201838 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-1) 7.3.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All errors (new ones prefixed by >>):

   drivers/gpu//drm/i915/i915_debugfs.c: In function 'predictive_load_cb':
>> drivers/gpu//drm/i915/i915_debugfs.c:4762:26: error: unused variable 'engine' [-Werror=unused-variable]
     struct intel_engine_cs *engine;
                             ^~~~~~
>> drivers/gpu//drm/i915/i915_debugfs.c:4761:23: error: unused variable 'id' [-Werror=unused-variable]
     enum intel_engine_id id;
                          ^~
   drivers/gpu//drm/i915/i915_debugfs.c: In function 'i915_predictive_load_get':
>> drivers/gpu//drm/i915/i915_debugfs.c:4797:27: error: unused variable 'dev_priv' [-Werror=unused-variable]
     struct drm_i915_private *dev_priv = data;
                              ^~~~~~~~
   cc1: all warnings being treated as errors

vim +/dev_priv +4797 drivers/gpu//drm/i915/i915_debugfs.c

  4755	
  4756	static enum hrtimer_restart predictive_load_cb(struct hrtimer *hrtimer)
  4757	{
  4758		struct drm_i915_private *dev_priv =
  4759			container_of(hrtimer, typeof(*dev_priv),
  4760					pred_timer);
> 4761		enum intel_engine_id id;
> 4762		struct intel_engine_cs *engine;
  4763		struct i915_gem_context *ctx;
  4764		u64 req_pending;
  4765	
  4766		list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
  4767	
  4768			if (!ctx->name)
  4769				continue;
  4770	
  4771			mutex_lock(&dev_priv->pred_mutex);
  4772			req_pending = ctx->req_cnt;
  4773			mutex_unlock(&dev_priv->pred_mutex);
  4774	
  4775			if (req_pending == PENDING_REQ_0)
  4776				continue;
  4777	
  4778			if (req_pending > PENDING_REQ_3)
  4779				ctx->load_type = LOAD_TYPE_HIGH;
  4780			else if (req_pending == PENDING_REQ_3)
  4781				ctx->load_type = LOAD_TYPE_MEDIUM;
  4782			else if (req_pending < PENDING_REQ_3)
  4783				ctx->load_type = LOAD_TYPE_LOW;
  4784	
  4785			i915_set_optimum_config(ctx->load_type, ctx, KABYLAKE_GT3);
  4786		}
  4787	
  4788		hrtimer_forward_now(hrtimer,
  4789				ns_to_ktime(predictive_load_enable*POLL_PERIOD_MS));
  4790	
  4791		return HRTIMER_RESTART;
  4792	}
  4793	
  4794	static int
  4795	i915_predictive_load_get(void *data, u64 *val)
  4796	{
> 4797		struct drm_i915_private *dev_priv = data;
  4798	
  4799		*val = predictive_load_enable;
  4800		return 0;
  4801	}
  4802	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
Ankit Navik Sept. 25, 2018, 6:12 a.m. UTC | #3
Hi Tvrtko, 

Thank you for your valuable comments. We have gone through it. 
I'll be submitting revised patch-sets after incorporating all your review comments.

> On 21/09/2018 10:13, kedar.j.karanje@intel.com wrote:
> > From: Praveen Diwakar <praveen.diwakar@intel.com>
> >
> > High resoluton timer is used for this purpose.
> >
> > Debugfs is provided to enable/disable/update timer configuration
> >
> > Change-Id: I35d692c5afe962fcad4573185bc6f744487711d0
> > Signed-off-by: Praveen Diwakar <praveen.diwakar@intel.com>
> > Signed-off-by: Yogesh Marathe <yogesh.marathe@intel.com>
> > Signed-off-by: Aravindan Muthukumar <aravindan.muthukumar@intel.com>
> > Signed-off-by: Kedar J Karanje <kedar.j.karanje@intel.com>
> > Signed-off-by: Ankit Navik <ankit.p.navik@intel.com
> > ---
> >   drivers/gpu/drm/i915/i915_debugfs.c | 94
> ++++++++++++++++++++++++++++++++++++-
> >   drivers/gpu/drm/i915/i915_drv.h     |  1 +
> >   2 files changed, 94 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c
> > b/drivers/gpu/drm/i915/i915_debugfs.c
> > index f9ce35d..81ba509 100644
> > --- a/drivers/gpu/drm/i915/i915_debugfs.c
> > +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> > @@ -4740,6 +4740,97 @@ static const struct drm_info_list
> i915_debugfs_list[] = {
> >   	{"i915_drrs_status", i915_drrs_status, 0},
> >   	{"i915_rps_boost_info", i915_rps_boost_info, 0},
> >   };
> > +
> > +#define POLL_PERIOD_MS	(1000 * 1000)
> > +#define PENDING_REQ_0	0 /* No active request pending*/
> > +#define PENDING_REQ_3	3 /* Threshold value of 3 active request
> pending*/
> > +			  /* Anything above this is considered as HIGH load
> > +			   * context
> > +			   */
> > +			  /* And less is considered as LOW load*/
> > +			  /* And equal is considered as mediaum load */
> 
> Wonky comments and some typos up to here.
> 
> > +
> > +static int predictive_load_enable;
> > +static int predictive_load_timer_init;
> > +
> > +static enum hrtimer_restart predictive_load_cb(struct hrtimer
> > +*hrtimer) {
> > +	struct drm_i915_private *dev_priv =
> > +		container_of(hrtimer, typeof(*dev_priv),
> > +				pred_timer);
> > +	enum intel_engine_id id;
> > +	struct intel_engine_cs *engine;
> 
> Some unused's.
> 
> > +	struct i915_gem_context *ctx;
> > +	u64 req_pending;
> 
> unsigned long, and also please try to order declaration so the right edge of text
> is moving in one direction only.
> 
> > +
> > +	list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
> > +
> > +		if (!ctx->name)
> > +			continue;
> 
> What is this?
> 
> > +
> > +		mutex_lock(&dev_priv->pred_mutex);
> 
> Here the mutex bites you since you cannot sleep in the timer callback.
> atomic_t would solve it. Or would a native unsigned int/long since lock to read a
> native word on x86 is not needed.
> 
> > +		req_pending = ctx->req_cnt;
> > +		mutex_unlock(&dev_priv->pred_mutex);
> > +
> > +		if (req_pending == PENDING_REQ_0)
> > +			continue;
> > +
> > +		if (req_pending > PENDING_REQ_3)
> > +			ctx->load_type = LOAD_TYPE_HIGH;
> > +		else if (req_pending == PENDING_REQ_3)
> > +			ctx->load_type = LOAD_TYPE_MEDIUM;
> > +		else if (req_pending < PENDING_REQ_3)
> 
> Must be smaller if not greater or equal, but maybe the compiler does that for
> you.
> 
> > +			ctx->load_type = LOAD_TYPE_LOW;
> > +
> > +		i915_set_optimum_config(ctx->load_type, ctx,
> KABYLAKE_GT3);
> 
> Only KBL? Idea to put the table in dev_priv FTW! :)
> 
> ctx->load_type used only as a temporary uncovered here? :)
> 
> > +	}
> > +
> > +	hrtimer_forward_now(hrtimer,
> > +
> 	ns_to_ktime(predictive_load_enable*POLL_PERIOD_MS));
> 
> Or HRTIMER_NORESTART if disabled? Hard to call it, details..
> 
> > +
> > +	return HRTIMER_RESTART;
> > +}
> > +
> > +static int
> > +i915_predictive_load_get(void *data, u64 *val) {
> > +	struct drm_i915_private *dev_priv = data;
> > +
> > +	*val = predictive_load_enable;
> > +	return 0;
> > +}
> > +
> > +static int
> > +i915_predictive_load_set(void *data, u64 val) {
> > +	struct drm_i915_private *dev_priv = data;
> > +	struct intel_device_info *info;
> > +
> > +	info = mkwrite_device_info(dev_priv);
> 
> Unused, why?
> 
> > +
> > +	predictive_load_enable = val;
> > +
> > +	if (predictive_load_enable) {
> > +		if (!predictive_load_timer_init) {
> > +			hrtimer_init(&dev_priv->pred_timer,
> CLOCK_MONOTONIC,
> > +					HRTIMER_MODE_REL);
> > +			dev_priv->pred_timer.function = predictive_load_cb;
> > +			predictive_load_timer_init = 1;
> 
> Move timer init to dev_priv setup.
> 
> > +		}
> > +		hrtimer_start(&dev_priv->pred_timer,
> > +
> 	ns_to_ktime(predictive_load_enable*POLL_PERIOD_MS),
> > +			HRTIMER_MODE_REL_PINNED);
> 
> Two threads can race to here.
> 
> Also you can give some slack to the timer since precision is not critical to you
> and can save you some CPU power.
> 
> > +	} else {
> > +		hrtimer_cancel(&dev_priv->pred_timer);
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +DEFINE_SIMPLE_ATTRIBUTE(i915_predictive_load_ctl,
> > +			i915_predictive_load_get, i915_predictive_load_set,
> > +			"%llu\n");
> > +
> >   #define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list)
> >
> >   static const struct i915_debugfs_files { @@ -4769,7 +4860,8 @@
> > static const struct i915_debugfs_files {
> >   	{"i915_hpd_storm_ctl", &i915_hpd_storm_ctl_fops},
> >   	{"i915_ipc_status", &i915_ipc_status_fops},
> >   	{"i915_drrs_ctl", &i915_drrs_ctl_fops},
> > -	{"i915_edp_psr_debug", &i915_edp_psr_debug_fops}
> > +	{"i915_edp_psr_debug", &i915_edp_psr_debug_fops},
> > +	{"i915_predictive_load_ctl", &i915_predictive_load_ctl}
> 
> And if the feature is to become real one day, given it's nature, the knob would
> probably have to go to sysfs, not debugfs.
> 
> >   };
> >
> >   int i915_debugfs_register(struct drm_i915_private *dev_priv) diff
> > --git a/drivers/gpu/drm/i915/i915_drv.h
> > b/drivers/gpu/drm/i915/i915_drv.h index 137ec33..0505c47 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -1552,6 +1552,7 @@ struct intel_cdclk_state {
> >   };
> >
> >   struct drm_i915_private {
> > +	struct hrtimer pred_timer;
> 
> Surely not the first member! :)
> 
> Regards,
> 
> Tvrtko
> 
> >   	struct drm_device drm;
> >
> >   	struct kmem_cache *objects;
> >

Regards, 
Ankit
Tvrtko Ursulin Sept. 25, 2018, 8:25 a.m. UTC | #4
On 25/09/2018 07:12, Navik, Ankit P wrote:
> Hi Tvrtko,
> 
> Thank you for your valuable comments. We have gone through it.
> I'll be submitting revised patch-sets after incorporating all your review comments.

You're welcome!

Btw one more thing - you can suspend your timer when GPU goes idle and 
restart it when active. See for instance i915_pmu_gt_parked/unparked 
where to put the hooks.

Regards,

Tvrtko

>> On 21/09/2018 10:13, kedar.j.karanje@intel.com wrote:
>>> From: Praveen Diwakar <praveen.diwakar@intel.com>
>>>
>>> High resoluton timer is used for this purpose.
>>>
>>> Debugfs is provided to enable/disable/update timer configuration
>>>
>>> Change-Id: I35d692c5afe962fcad4573185bc6f744487711d0
>>> Signed-off-by: Praveen Diwakar <praveen.diwakar@intel.com>
>>> Signed-off-by: Yogesh Marathe <yogesh.marathe@intel.com>
>>> Signed-off-by: Aravindan Muthukumar <aravindan.muthukumar@intel.com>
>>> Signed-off-by: Kedar J Karanje <kedar.j.karanje@intel.com>
>>> Signed-off-by: Ankit Navik <ankit.p.navik@intel.com
>>> ---
>>>    drivers/gpu/drm/i915/i915_debugfs.c | 94
>> ++++++++++++++++++++++++++++++++++++-
>>>    drivers/gpu/drm/i915/i915_drv.h     |  1 +
>>>    2 files changed, 94 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c
>>> b/drivers/gpu/drm/i915/i915_debugfs.c
>>> index f9ce35d..81ba509 100644
>>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>>> @@ -4740,6 +4740,97 @@ static const struct drm_info_list
>> i915_debugfs_list[] = {
>>>    	{"i915_drrs_status", i915_drrs_status, 0},
>>>    	{"i915_rps_boost_info", i915_rps_boost_info, 0},
>>>    };
>>> +
>>> +#define POLL_PERIOD_MS	(1000 * 1000)
>>> +#define PENDING_REQ_0	0 /* No active request pending*/
>>> +#define PENDING_REQ_3	3 /* Threshold value of 3 active request
>> pending*/
>>> +			  /* Anything above this is considered as HIGH load
>>> +			   * context
>>> +			   */
>>> +			  /* And less is considered as LOW load*/
>>> +			  /* And equal is considered as mediaum load */
>>
>> Wonky comments and some typos up to here.
>>
>>> +
>>> +static int predictive_load_enable;
>>> +static int predictive_load_timer_init;
>>> +
>>> +static enum hrtimer_restart predictive_load_cb(struct hrtimer
>>> +*hrtimer) {
>>> +	struct drm_i915_private *dev_priv =
>>> +		container_of(hrtimer, typeof(*dev_priv),
>>> +				pred_timer);
>>> +	enum intel_engine_id id;
>>> +	struct intel_engine_cs *engine;
>>
>> Some unused's.
>>
>>> +	struct i915_gem_context *ctx;
>>> +	u64 req_pending;
>>
>> unsigned long, and also please try to order declaration so the right edge of text
>> is moving in one direction only.
>>
>>> +
>>> +	list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
>>> +
>>> +		if (!ctx->name)
>>> +			continue;
>>
>> What is this?
>>
>>> +
>>> +		mutex_lock(&dev_priv->pred_mutex);
>>
>> Here the mutex bites you since you cannot sleep in the timer callback.
>> atomic_t would solve it. Or would a native unsigned int/long since lock to read a
>> native word on x86 is not needed.
>>
>>> +		req_pending = ctx->req_cnt;
>>> +		mutex_unlock(&dev_priv->pred_mutex);
>>> +
>>> +		if (req_pending == PENDING_REQ_0)
>>> +			continue;
>>> +
>>> +		if (req_pending > PENDING_REQ_3)
>>> +			ctx->load_type = LOAD_TYPE_HIGH;
>>> +		else if (req_pending == PENDING_REQ_3)
>>> +			ctx->load_type = LOAD_TYPE_MEDIUM;
>>> +		else if (req_pending < PENDING_REQ_3)
>>
>> Must be smaller if not greater or equal, but maybe the compiler does that for
>> you.
>>
>>> +			ctx->load_type = LOAD_TYPE_LOW;
>>> +
>>> +		i915_set_optimum_config(ctx->load_type, ctx,
>> KABYLAKE_GT3);
>>
>> Only KBL? Idea to put the table in dev_priv FTW! :)
>>
>> ctx->load_type used only as a temporary uncovered here? :)
>>
>>> +	}
>>> +
>>> +	hrtimer_forward_now(hrtimer,
>>> +
>> 	ns_to_ktime(predictive_load_enable*POLL_PERIOD_MS));
>>
>> Or HRTIMER_NORESTART if disabled? Hard to call it, details..
>>
>>> +
>>> +	return HRTIMER_RESTART;
>>> +}
>>> +
>>> +static int
>>> +i915_predictive_load_get(void *data, u64 *val) {
>>> +	struct drm_i915_private *dev_priv = data;
>>> +
>>> +	*val = predictive_load_enable;
>>> +	return 0;
>>> +}
>>> +
>>> +static int
>>> +i915_predictive_load_set(void *data, u64 val) {
>>> +	struct drm_i915_private *dev_priv = data;
>>> +	struct intel_device_info *info;
>>> +
>>> +	info = mkwrite_device_info(dev_priv);
>>
>> Unused, why?
>>
>>> +
>>> +	predictive_load_enable = val;
>>> +
>>> +	if (predictive_load_enable) {
>>> +		if (!predictive_load_timer_init) {
>>> +			hrtimer_init(&dev_priv->pred_timer,
>> CLOCK_MONOTONIC,
>>> +					HRTIMER_MODE_REL);
>>> +			dev_priv->pred_timer.function = predictive_load_cb;
>>> +			predictive_load_timer_init = 1;
>>
>> Move timer init to dev_priv setup.
>>
>>> +		}
>>> +		hrtimer_start(&dev_priv->pred_timer,
>>> +
>> 	ns_to_ktime(predictive_load_enable*POLL_PERIOD_MS),
>>> +			HRTIMER_MODE_REL_PINNED);
>>
>> Two threads can race to here.
>>
>> Also you can give some slack to the timer since precision is not critical to you
>> and can save you some CPU power.
>>
>>> +	} else {
>>> +		hrtimer_cancel(&dev_priv->pred_timer);
>>> +	}
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +DEFINE_SIMPLE_ATTRIBUTE(i915_predictive_load_ctl,
>>> +			i915_predictive_load_get, i915_predictive_load_set,
>>> +			"%llu\n");
>>> +
>>>    #define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list)
>>>
>>>    static const struct i915_debugfs_files { @@ -4769,7 +4860,8 @@
>>> static const struct i915_debugfs_files {
>>>    	{"i915_hpd_storm_ctl", &i915_hpd_storm_ctl_fops},
>>>    	{"i915_ipc_status", &i915_ipc_status_fops},
>>>    	{"i915_drrs_ctl", &i915_drrs_ctl_fops},
>>> -	{"i915_edp_psr_debug", &i915_edp_psr_debug_fops}
>>> +	{"i915_edp_psr_debug", &i915_edp_psr_debug_fops},
>>> +	{"i915_predictive_load_ctl", &i915_predictive_load_ctl}
>>
>> And if the feature is to become real one day, given it's nature, the knob would
>> probably have to go to sysfs, not debugfs.
>>
>>>    };
>>>
>>>    int i915_debugfs_register(struct drm_i915_private *dev_priv) diff
>>> --git a/drivers/gpu/drm/i915/i915_drv.h
>>> b/drivers/gpu/drm/i915/i915_drv.h index 137ec33..0505c47 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>> @@ -1552,6 +1552,7 @@ struct intel_cdclk_state {
>>>    };
>>>
>>>    struct drm_i915_private {
>>> +	struct hrtimer pred_timer;
>>
>> Surely not the first member! :)
>>
>> Regards,
>>
>> Tvrtko
>>
>>>    	struct drm_device drm;
>>>
>>>    	struct kmem_cache *objects;
>>>
> 
> Regards,
> Ankit
>
Ankit Navik Nov. 6, 2018, 4:46 a.m. UTC | #5
Hi Tvrtko, 

> -----Original Message-----
> From: Tvrtko Ursulin [mailto:tvrtko.ursulin@linux.intel.com]
> Sent: Tuesday, September 25, 2018 1:56 PM
> To: Navik, Ankit P <ankit.p.navik@intel.com>; intel-gfx@lists.freedesktop.org
> Cc: J Karanje, Kedar <kedar.j.karanje@intel.com>; Diwakar, Praveen
> <praveen.diwakar@intel.com>; Marathe, Yogesh
> <yogesh.marathe@intel.com>; Muthukumar, Aravindan
> <aravindan.muthukumar@intel.com>
> Subject: Re: [Intel-gfx] [PATCH 4/4] drm/i915: Predictive governor to control
> eu/slice/subslice based on workload
> 
> 
> On 25/09/2018 07:12, Navik, Ankit P wrote:
> > Hi Tvrtko,
> >
> > Thank you for your valuable comments. We have gone through it.
> > I'll be submitting revised patch-sets after incorporating all your review
> comments.
> 
> You're welcome!
> 
> Btw one more thing - you can suspend your timer when GPU goes idle and
> restart it when active. See for instance i915_pmu_gt_parked/unparked where to
> put the hooks.

We are working on this and we will send as new patch on top of series.
> 
> Regards,
> 
> Tvrtko
> 
> >> On 21/09/2018 10:13, kedar.j.karanje@intel.com wrote:
> >>> From: Praveen Diwakar <praveen.diwakar@intel.com>
> >>>
> >>> High resoluton timer is used for this purpose.
> >>>
> >>> Debugfs is provided to enable/disable/update timer configuration
> >>>
> >>> Change-Id: I35d692c5afe962fcad4573185bc6f744487711d0
> >>> Signed-off-by: Praveen Diwakar <praveen.diwakar@intel.com>
> >>> Signed-off-by: Yogesh Marathe <yogesh.marathe@intel.com>
> >>> Signed-off-by: Aravindan Muthukumar <aravindan.muthukumar@intel.com>
> >>> Signed-off-by: Kedar J Karanje <kedar.j.karanje@intel.com>
> >>> Signed-off-by: Ankit Navik <ankit.p.navik@intel.com
> >>> ---
> >>>    drivers/gpu/drm/i915/i915_debugfs.c | 94
> >> ++++++++++++++++++++++++++++++++++++-
> >>>    drivers/gpu/drm/i915/i915_drv.h     |  1 +
> >>>    2 files changed, 94 insertions(+), 1 deletion(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c
> >>> b/drivers/gpu/drm/i915/i915_debugfs.c
> >>> index f9ce35d..81ba509 100644
> >>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> >>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> >>> @@ -4740,6 +4740,97 @@ static const struct drm_info_list
> >> i915_debugfs_list[] = {
> >>>    	{"i915_drrs_status", i915_drrs_status, 0},
> >>>    	{"i915_rps_boost_info", i915_rps_boost_info, 0},
> >>>    };
> >>> +
> >>> +#define POLL_PERIOD_MS	(1000 * 1000)
> >>> +#define PENDING_REQ_0	0 /* No active request pending*/
> >>> +#define PENDING_REQ_3	3 /* Threshold value of 3 active request
> >> pending*/
> >>> +			  /* Anything above this is considered as HIGH load
> >>> +			   * context
> >>> +			   */
> >>> +			  /* And less is considered as LOW load*/
> >>> +			  /* And equal is considered as mediaum load */
> >>
> >> Wonky comments and some typos up to here.
Changes done in v2.
> >>
> >>> +
> >>> +static int predictive_load_enable;
> >>> +static int predictive_load_timer_init;
> >>> +
> >>> +static enum hrtimer_restart predictive_load_cb(struct hrtimer
> >>> +*hrtimer) {
> >>> +	struct drm_i915_private *dev_priv =
> >>> +		container_of(hrtimer, typeof(*dev_priv),
> >>> +				pred_timer);
> >>> +	enum intel_engine_id id;
> >>> +	struct intel_engine_cs *engine;
> >>
> >> Some unused's.
Changes done in v2. 
> >>
> >>> +	struct i915_gem_context *ctx;
> >>> +	u64 req_pending;
> >>
> >> unsigned long, and also please try to order declaration so the right
> >> edge of text is moving in one direction only.
Changes done in v2.
> >>
> >>> +
> >>> +	list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
> >>> +
> >>> +		if (!ctx->name)
> >>> +			continue;
> >>
> >> What is this?
Just an error check for invalid context.
> >>
> >>> +
> >>> +		mutex_lock(&dev_priv->pred_mutex);
> >>
> >> Here the mutex bites you since you cannot sleep in the timer callback.
> >> atomic_t would solve it. Or would a native unsigned int/long since
> >> lock to read a native word on x86 is not needed.
Changes done in v2.
> >>
> >>> +		req_pending = ctx->req_cnt;
> >>> +		mutex_unlock(&dev_priv->pred_mutex);
> >>> +
> >>> +		if (req_pending == PENDING_REQ_0)
> >>> +			continue;
> >>> +
> >>> +		if (req_pending > PENDING_REQ_3)
> >>> +			ctx->load_type = LOAD_TYPE_HIGH;
> >>> +		else if (req_pending == PENDING_REQ_3)
> >>> +			ctx->load_type = LOAD_TYPE_MEDIUM;
> >>> +		else if (req_pending < PENDING_REQ_3)
> >>
> >> Must be smaller if not greater or equal, but maybe the compiler does
> >> that for you.
Changes done in v2. 
> >>
> >>> +			ctx->load_type = LOAD_TYPE_LOW;
> >>> +
> >>> +		i915_set_optimum_config(ctx->load_type, ctx,
> >> KABYLAKE_GT3);
> >>
> >> Only KBL? Idea to put the table in dev_priv FTW! :)
Changes done in v2.
> >>
> >> ctx->load_type used only as a temporary uncovered here? :)
> >>
> >>> +	}
> >>> +
> >>> +	hrtimer_forward_now(hrtimer,
> >>> +
> >> 	ns_to_ktime(predictive_load_enable*POLL_PERIOD_MS));
> >>
> >> Or HRTIMER_NORESTART if disabled? Hard to call it, details..
We get timer value and we don’t start the timer if the value is 0.
> >>
> >>> +
> >>> +	return HRTIMER_RESTART;
> >>> +}
> >>> +
> >>> +static int
> >>> +i915_predictive_load_get(void *data, u64 *val) {
> >>> +	struct drm_i915_private *dev_priv = data;
> >>> +
> >>> +	*val = predictive_load_enable;
> >>> +	return 0;
> >>> +}
> >>> +
> >>> +static int
> >>> +i915_predictive_load_set(void *data, u64 val) {
> >>> +	struct drm_i915_private *dev_priv = data;
> >>> +	struct intel_device_info *info;
> >>> +
> >>> +	info = mkwrite_device_info(dev_priv);
> >>
> >> Unused, why?
Changes done in v2.
> >>
> >>> +
> >>> +	predictive_load_enable = val;
> >>> +
> >>> +	if (predictive_load_enable) {
> >>> +		if (!predictive_load_timer_init) {
> >>> +			hrtimer_init(&dev_priv->pred_timer,
> >> CLOCK_MONOTONIC,
> >>> +					HRTIMER_MODE_REL);
> >>> +			dev_priv->pred_timer.function = predictive_load_cb;
> >>> +			predictive_load_timer_init = 1;
> >>
> >> Move timer init to dev_priv setup.
Changes done in v2.
> >>
> >>> +		}
> >>> +		hrtimer_start(&dev_priv->pred_timer,
> >>> +
> >> 	ns_to_ktime(predictive_load_enable*POLL_PERIOD_MS),
> >>> +			HRTIMER_MODE_REL_PINNED);
> >>
> >> Two threads can race to here.
This is under mutex, please let me know scenario if any corner case which could
cause race condition.
> >>
> >> Also you can give some slack to the timer since precision is not
> >> critical to you and can save you some CPU power.
Changes done in v2.
> >>
> >>> +	} else {
> >>> +		hrtimer_cancel(&dev_priv->pred_timer);
> >>> +	}
> >>> +
> >>> +	return 0;
> >>> +}
> >>> +
> >>> +DEFINE_SIMPLE_ATTRIBUTE(i915_predictive_load_ctl,
> >>> +			i915_predictive_load_get, i915_predictive_load_set,
> >>> +			"%llu\n");
> >>> +
> >>>    #define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list)
> >>>
> >>>    static const struct i915_debugfs_files { @@ -4769,7 +4860,8 @@
> >>> static const struct i915_debugfs_files {
> >>>    	{"i915_hpd_storm_ctl", &i915_hpd_storm_ctl_fops},
> >>>    	{"i915_ipc_status", &i915_ipc_status_fops},
> >>>    	{"i915_drrs_ctl", &i915_drrs_ctl_fops},
> >>> -	{"i915_edp_psr_debug", &i915_edp_psr_debug_fops}
> >>> +	{"i915_edp_psr_debug", &i915_edp_psr_debug_fops},
> >>> +	{"i915_predictive_load_ctl", &i915_predictive_load_ctl}
> >>
> >> And if the feature is to become real one day, given it's nature, the
> >> knob would probably have to go to sysfs, not debugfs.
Added FIXME comment to migrate to sysfs on subsequent version along with other fixes.
> >>
> >>>    };
> >>>
> >>>    int i915_debugfs_register(struct drm_i915_private *dev_priv) diff
> >>> --git a/drivers/gpu/drm/i915/i915_drv.h
> >>> b/drivers/gpu/drm/i915/i915_drv.h index 137ec33..0505c47 100644
> >>> --- a/drivers/gpu/drm/i915/i915_drv.h
> >>> +++ b/drivers/gpu/drm/i915/i915_drv.h
> >>> @@ -1552,6 +1552,7 @@ struct intel_cdclk_state {
> >>>    };
> >>>
> >>>    struct drm_i915_private {
> >>> +	struct hrtimer pred_timer;
> >>
> >> Surely not the first member! :)
Changes done in v2. 

Regards, Ankit
> >>
> >> Regards,
> >>
> >> Tvrtko
> >>
> >>>    	struct drm_device drm;
> >>>
> >>>    	struct kmem_cache *objects;
> >>>
> >
> > Regards,
> > Ankit
> >
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index f9ce35d..81ba509 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4740,6 +4740,97 @@  static const struct drm_info_list i915_debugfs_list[] = {
 	{"i915_drrs_status", i915_drrs_status, 0},
 	{"i915_rps_boost_info", i915_rps_boost_info, 0},
 };
+
+#define POLL_PERIOD_MS	(1000 * 1000)
+#define PENDING_REQ_0	0 /* No active request pending*/
+#define PENDING_REQ_3	3 /* Threshold value of 3 active request pending*/
+			  /* Anything above this is considered as HIGH load
+			   * context
+			   */
+			  /* And less is considered as LOW load*/
+			  /* And equal is considered as mediaum load */
+
+static int predictive_load_enable;
+static int predictive_load_timer_init;
+
+static enum hrtimer_restart predictive_load_cb(struct hrtimer *hrtimer)
+{
+	struct drm_i915_private *dev_priv =
+		container_of(hrtimer, typeof(*dev_priv),
+				pred_timer);
+	enum intel_engine_id id;
+	struct intel_engine_cs *engine;
+	struct i915_gem_context *ctx;
+	u64 req_pending;
+
+	list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
+
+		if (!ctx->name)
+			continue;
+
+		mutex_lock(&dev_priv->pred_mutex);
+		req_pending = ctx->req_cnt;
+		mutex_unlock(&dev_priv->pred_mutex);
+
+		if (req_pending == PENDING_REQ_0)
+			continue;
+
+		if (req_pending > PENDING_REQ_3)
+			ctx->load_type = LOAD_TYPE_HIGH;
+		else if (req_pending == PENDING_REQ_3)
+			ctx->load_type = LOAD_TYPE_MEDIUM;
+		else if (req_pending < PENDING_REQ_3)
+			ctx->load_type = LOAD_TYPE_LOW;
+
+		i915_set_optimum_config(ctx->load_type, ctx, KABYLAKE_GT3);
+	}
+
+	hrtimer_forward_now(hrtimer,
+			ns_to_ktime(predictive_load_enable*POLL_PERIOD_MS));
+
+	return HRTIMER_RESTART;
+}
+
+static int
+i915_predictive_load_get(void *data, u64 *val)
+{
+	struct drm_i915_private *dev_priv = data;
+
+	*val = predictive_load_enable;
+	return 0;
+}
+
+static int
+i915_predictive_load_set(void *data, u64 val)
+{
+	struct drm_i915_private *dev_priv = data;
+	struct intel_device_info *info;
+
+	info = mkwrite_device_info(dev_priv);
+
+	predictive_load_enable = val;
+
+	if (predictive_load_enable) {
+		if (!predictive_load_timer_init) {
+			hrtimer_init(&dev_priv->pred_timer, CLOCK_MONOTONIC,
+					HRTIMER_MODE_REL);
+			dev_priv->pred_timer.function = predictive_load_cb;
+			predictive_load_timer_init = 1;
+		}
+		hrtimer_start(&dev_priv->pred_timer,
+			ns_to_ktime(predictive_load_enable*POLL_PERIOD_MS),
+			HRTIMER_MODE_REL_PINNED);
+	} else {
+		hrtimer_cancel(&dev_priv->pred_timer);
+	}
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(i915_predictive_load_ctl,
+			i915_predictive_load_get, i915_predictive_load_set,
+			"%llu\n");
+
 #define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list)
 
 static const struct i915_debugfs_files {
@@ -4769,7 +4860,8 @@  static const struct i915_debugfs_files {
 	{"i915_hpd_storm_ctl", &i915_hpd_storm_ctl_fops},
 	{"i915_ipc_status", &i915_ipc_status_fops},
 	{"i915_drrs_ctl", &i915_drrs_ctl_fops},
-	{"i915_edp_psr_debug", &i915_edp_psr_debug_fops}
+	{"i915_edp_psr_debug", &i915_edp_psr_debug_fops},
+	{"i915_predictive_load_ctl", &i915_predictive_load_ctl}
 };
 
 int i915_debugfs_register(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 137ec33..0505c47 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1552,6 +1552,7 @@  struct intel_cdclk_state {
 };
 
 struct drm_i915_private {
+	struct hrtimer pred_timer;
 	struct drm_device drm;
 
 	struct kmem_cache *objects;