diff mbox

[36/36] drm/i915: Support per-context user requests for GPU frequency control

Message ID 20180314093748.8541-36-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson March 14, 2018, 9:37 a.m. UTC
Often, we find ourselves facing a workload where the user knows in
advance what GPU frequency they require for it to complete in a timely
manner, and using past experience they can outperform the HW assisted
RPS autotuning. An example might be kodi (HTPC) where they know that
video decoding and compositing require a minimum frequency to avoid ever
dropping a frame, or conversely know when they are in a powersaving mode
and would rather have slower updates than ramp up the GPU frequency and
power consumption. Other workloads may defeat the autotuning entirely
and need manual control to meet their performance goals, e.g. bursty
applications which require low latency.

To accommodate the varying needs of different applications, that may be
running concurrently, we want a more flexible system than a global limit
supplied by sysfs. To this end, we offer the application the option to
set their desired frequency bounds on the context itself, and apply those
bounds when we execute commands from the application, switching between
bounds just as easily as we switch between the clients themselves.

The clients can query the range supported by the HW, or at least the
range they are restricted to, and then freely select frequencies within
that range that they want to run at. (They can select just a single
frequency if they so choose.) As this is subject to the global limit
supplied by the user in sysfs, and a client can only reduce the range of
frequencies they allow the HW to run at, we allow all clients to adjust
their request (and not restrict raising the minimum to privileged
CAP_SYS_NICE clients).

Testcase: igt/gem_ctx_freq
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Praveen Paneri <praveen.paneri@intel.com>
Cc: Sagar A Kamble <sagar.a.kamble@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c                |  16 ++-
 drivers/gpu/drm/i915/i915_drv.h                    |   5 +
 drivers/gpu/drm/i915/i915_gem_context.c            |  54 +++++++++
 drivers/gpu/drm/i915/i915_gem_context.h            |   3 +
 drivers/gpu/drm/i915/intel_gt_pm.c                 | 121 ++++++++++++++++---
 drivers/gpu/drm/i915/intel_gt_pm.h                 |   4 +
 drivers/gpu/drm/i915/intel_guc_submission.c        |  16 ++-
 drivers/gpu/drm/i915/intel_lrc.c                   |  15 +++
 .../gpu/drm/i915/selftests/i915_mock_selftests.h   |   1 +
 drivers/gpu/drm/i915/selftests/intel_gt_pm.c       | 130 +++++++++++++++++++++
 include/uapi/drm/i915_drm.h                        |  20 ++++
 11 files changed, 368 insertions(+), 17 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/selftests/intel_gt_pm.c

Comments

sagar.a.kamble@intel.com March 19, 2018, 9:51 a.m. UTC | #1
On 3/14/2018 3:07 PM, Chris Wilson wrote:
> Often, we find ourselves facing a workload where the user knows in
> advance what GPU frequency they require for it to complete in a timely
> manner, and using past experience they can outperform the HW assisted
> RPS autotuning. An example might be kodi (HTPC) where they know that
> video decoding and compositing require a minimum frequency to avoid ever
> dropping a frame, or conversely know when they are in a powersaving mode
> and would rather have slower updates than ramp up the GPU frequency and
> power consumption. Other workloads may defeat the autotuning entirely
> and need manual control to meet their performance goals, e.g. bursty
> applications which require low latency.
>
> To accommodate the varying needs of different applications, that may be
> running concurrently, we want a more flexible system than a global limit
> supplied by sysfs. To this end, we offer the application the option to
> set their desired frequency bounds on the context itself, and apply those
> bounds when we execute commands from the application, switching between
> bounds just as easily as we switch between the clients themselves.
>
> The clients can query the range supported by the HW, or at least the
> range they are restricted to, and then freely select frequencies within
> that range that they want to run at. (They can select just a single
> frequency if they so choose.) As this is subject to the global limit
> supplied by the user in sysfs, and a client can only reduce the range of
> frequencies they allow the HW to run at, we allow all clients to adjust
> their request (and not restrict raising the minimum to privileged
> CAP_SYS_NICE clients).
>
> Testcase: igt/gem_ctx_freq
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Praveen Paneri <praveen.paneri@intel.com>
> Cc: Sagar A Kamble <sagar.a.kamble@intel.com>
Change looks good to me. I have one query below.
<snip>
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> index 8a8ad2fe158d..d8eaae683186 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -26,9 +26,12 @@
>   #include <trace/events/dma_fence.h>
>   
>   #include "intel_guc_submission.h"
> -#include "intel_lrc_reg.h"
> +
>   #include "i915_drv.h"
>   
> +#include "intel_gt_pm.h"
> +#include "intel_lrc_reg.h"
> +
>   #define GUC_PREEMPT_FINISHED		0x1
>   #define GUC_PREEMPT_BREADCRUMB_DWORDS	0x8
>   #define GUC_PREEMPT_BREADCRUMB_BYTES	\
> @@ -650,6 +653,12 @@ static void guc_submit(struct intel_engine_cs *engine)
>   	}
>   }
>   
> +static void update_rps(struct intel_engine_cs *engine)
> +{
> +	intel_rps_update_engine(engine,
> +				port_request(engine->execlists.port)->ctx);
> +}
> +
>   static void port_assign(struct execlist_port *port, struct i915_request *rq)
>   {
>   	GEM_BUG_ON(port_isset(port));
> @@ -728,6 +737,7 @@ static void guc_dequeue(struct intel_engine_cs *engine)
>   	execlists->first = rb;
>   	if (submit) {
>   		port_assign(port, last);
> +		update_rps(engine);
>   		execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
>   		guc_submit(engine);
>   	}
> @@ -757,8 +767,10 @@ static void guc_submission_tasklet(unsigned long data)
>   
>   		rq = port_request(&port[0]);
>   	}
> -	if (!rq)
> +	if (!rq) {
>   		execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
> +		intel_rps_update_engine(engine, NULL);
I think we also need to do this (update_engine(NULL)) while handling 
preemption completion for both GuC and execlists also.
Doing it as part of execlists_cancel_port_requests will cover all those 
cases including reset.
Am I right?
> +	}
>   
>   	if (execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT) &&
>   	    intel_read_status_page(engine, I915_GEM_HWS_PREEMPT_INDEX) ==
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 3a69b367e565..518f7b3db857 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -138,6 +138,7 @@
>   #include "i915_drv.h"
>   #include "i915_gem_render_state.h"
>   #include "intel_lrc_reg.h"
> +#include "intel_gt_pm.h"
>   #include "intel_mocs.h"
>   
>   #define RING_EXECLIST_QFULL		(1 << 0x2)
> @@ -535,6 +536,12 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
>   	execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
>   }
>   
> +static void update_rps(struct intel_engine_cs *engine)
> +{
> +	intel_rps_update_engine(engine,
> +				port_request(engine->execlists.port)->ctx);
> +}
> +
>   static void execlists_dequeue(struct intel_engine_cs *engine)
>   {
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
> @@ -708,6 +715,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   	spin_unlock_irq(&engine->timeline->lock);
>   
>   	if (submit) {
> +		update_rps(engine);
>   		execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
>   		execlists_submit_ports(engine);
>   	}
> @@ -982,6 +990,11 @@ static void execlists_submission_tasklet(unsigned long data)
>   					  engine->name, port->context_id);
>   
>   				execlists_port_complete(execlists, port);
> +
> +				/* Switch to the next request/context */
> +				rq = port_request(port);
> +				intel_rps_update_engine(engine,
> +							rq ? rq->ctx : NULL);
>   			} else {
>   				port_set(port, port_pack(rq, count));
>   			}
> @@ -1717,6 +1730,8 @@ static void reset_common_ring(struct intel_engine_cs *engine,
>   	__unwind_incomplete_requests(engine);
>   	spin_unlock(&engine->timeline->lock);
>   
> +	intel_rps_update_engine(engine, NULL);
> +
>   	/* Mark all CS interrupts as complete */
>   	execlists->active = 0;
>   
> diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
> index 9a48aa441743..85b6e6d020b7 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
> +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
> @@ -14,6 +14,7 @@ selftest(fence, i915_sw_fence_mock_selftests)
>   selftest(scatterlist, scatterlist_mock_selftests)
>   selftest(syncmap, i915_syncmap_mock_selftests)
>   selftest(uncore, intel_uncore_mock_selftests)
> +selftest(gt_pm, intel_gt_pm_mock_selftests)
>   selftest(breadcrumbs, intel_breadcrumbs_mock_selftests)
>   selftest(timelines, i915_gem_timeline_mock_selftests)
>   selftest(requests, i915_request_mock_selftests)
> diff --git a/drivers/gpu/drm/i915/selftests/intel_gt_pm.c b/drivers/gpu/drm/i915/selftests/intel_gt_pm.c
> new file mode 100644
> index 000000000000..c3871eb9eabb
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/selftests/intel_gt_pm.c
> @@ -0,0 +1,130 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2018 Intel Corporation
> + */
> +
> +#include "../i915_selftest.h"
> +#include "i915_random.h"
> +
> +#include "mock_gem_device.h"
> +
> +static void mock_rps_init(struct drm_i915_private *i915)
> +{
> +	struct intel_rps *rps = &i915->gt_pm.rps;
> +
> +	/* Disable the register writes */
> +	mkwrite_device_info(i915)->gen = 0;
> +	mkwrite_device_info(i915)->has_rps = true;
> +
> +	intel_rps_init(rps);
> +
> +	rps->min_freq_hw = 0;
> +	rps->max_freq_hw = 255;
> +
> +	rps->min_freq_user = rps->min_freq_hw;
> +	rps->max_freq_user = rps->max_freq_hw;
> +
> +	intel_rps_init__frequencies(rps);
> +}
> +
> +static void mock_rps_fini(struct drm_i915_private *i915)
> +{
> +	struct intel_rps *rps = &i915->gt_pm.rps;
> +
> +	cancel_work_sync(&rps->work);
> +}
> +
> +static int igt_rps_engine(void *arg)
> +{
> +	struct drm_i915_private *i915 = arg;
> +	struct intel_rps *rps = &i915->gt_pm.rps;
> +	I915_RND_STATE(prng);
> +	int err;
> +	int i;
> +
> +	intel_gt_pm_busy(i915); /* Activate RPS */
> +
> +	/*
> +	 * Minimum unit tests for intel_rps_update_engine().
> +	 *
> +	 * Whenever we call intel_rps_update_engine, it will
> +	 * replace the context min/max frequency request for a particular
> +	 * engine and then recompute the global max(min)/min(max) over all
> +	 * engines. In this mockup, we are limited to checking those
> +	 * max(min)/min(max) calculations and then seeing if the rps
> +	 * worker uses those bounds.
> +	 */
> +
> +	for (i = 0; i < 256 * 256; i++) {
> +		u8 freq = prandom_u32_state(&prng);
> +
> +		__rps_update_engine(rps, 0, freq, freq);
> +		if (rps->min_freq_context != freq ||
> +		    rps->max_freq_context != freq) {
> +			pr_err("Context min/max frequencies not restricted to %d, found [%d, %d]\n",
> +			       freq, rps->min_freq_context, rps->max_freq_context);
> +			err = -EINVAL;
> +			goto out;
> +		}
> +		flush_work(&rps->work);
> +
> +		if (rps->freq != freq) {
> +			pr_err("Tried to restrict frequency to %d, found %d\n",
> +			       freq, rps->freq);
> +			err = -EINVAL;
> +			goto out;
> +		}
> +	}
> +
> +	__rps_update_engine(rps, 0, rps->min_freq_hw, rps->max_freq_hw);
> +	if (rps->min_freq_context != rps->min_freq_hw ||
> +	    rps->max_freq_context != rps->max_freq_hw) {
> +		pr_err("Context frequency not restored to [%d, %d], found [%d, %d]\n",
> +		       rps->min_freq_hw, rps->min_freq_hw,
> +		       rps->min_freq_context, rps->max_freq_context);
> +		err = -EINVAL;
> +		goto out;
> +	}
> +
> +	for (i = 0; i < I915_NUM_ENGINES; i++)
> +		__rps_update_engine(rps, i, i, 255 - i);
> +	i--;
> +	if (rps->min_freq_context != i) {
> +		pr_err("Minimum context frequency across all engines not raised to %d, found %d\n", i, rps->min_freq_context);
> +		err = -EINVAL;
> +		goto out;
> +	}
> +	if (rps->max_freq_context != 255 - i) {
> +		pr_err("Maxmimum context frequency across all engines not lowered to %d, found %d\n", 255 - i, rps->max_freq_context);
> +		err = -EINVAL;
> +		goto out;
> +	}
> +
> +	err = 0;
> +out:
> +	intel_gt_pm_idle(i915);
> +	return err;
> +}
> +
> +int intel_gt_pm_mock_selftests(void)
> +{
> +	static const struct i915_subtest tests[] = {
> +		SUBTEST(igt_rps_engine),
> +	};
> +	struct drm_i915_private *i915;
> +	int err;
> +
> +	i915 = mock_gem_device();
> +	if (!i915)
> +		return -ENOMEM;
> +
> +	mock_rps_init(i915);
> +
> +	err = i915_subtests(tests, i915);
> +
> +	mock_rps_fini(i915);
> +	drm_dev_unref(&i915->drm);
> +
> +	return err;
> +}
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 7f5634ce8e88..64c6377df769 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1456,6 +1456,26 @@ struct drm_i915_gem_context_param {
>   #define   I915_CONTEXT_MAX_USER_PRIORITY	1023 /* inclusive */
>   #define   I915_CONTEXT_DEFAULT_PRIORITY		0
>   #define   I915_CONTEXT_MIN_USER_PRIORITY	-1023 /* inclusive */
> +
> +/*
> + * I915_CONTEXT_PARAM_FREQUENCY:
> + *
> + * Request that when this context runs, the GPU is restricted to run
> + * in this frequency range; but still contrained by the global user
> + * restriction specified via sysfs.
> + *
> + * The minimum / maximum frequencies are specified in MHz. Each context
> + * starts in the default unrestricted state, where the range is taken from
> + * the hardware, and so may be queried.
> + *
> + * Note the frequency is only changed on a context switch; if the
> + * context's frequency is updated whilst the context is currently executing
> + * the request will not take effect until the next time the context is run.
> + */
> +#define I915_CONTEXT_PARAM_FREQUENCY	0x7
> +#define   I915_CONTEXT_MIN_FREQUENCY(x) ((x) & 0xffffffff)
> +#define   I915_CONTEXT_MAX_FREQUENCY(x) ((x) >> 32)
> +#define   I915_CONTEXT_SET_FREQUENCY(min, max) ((__u64)(max) << 32 | (min))
>   	__u64 value;
>   };
>
Chris Wilson April 10, 2018, 12:53 p.m. UTC | #2
Quoting Sagar Arun Kamble (2018-03-19 09:51:08)
> 
> 
> On 3/14/2018 3:07 PM, Chris Wilson wrote:
> > @@ -757,8 +767,10 @@ static void guc_submission_tasklet(unsigned long data)
> >   
> >               rq = port_request(&port[0]);
> >       }
> > -     if (!rq)
> > +     if (!rq) {
> >               execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
> > +             intel_rps_update_engine(engine, NULL);
> I think we also need to do this (update_engine(NULL)) while handling 
> preemption completion for both GuC and execlists also.
> Doing it as part of execlists_cancel_port_requests will cover all those 
> cases including reset.
> Am I right?

While we don't need it in the intermediate (internal) context switches to
preempt. That is always preceded by execlists_user_end (cancelling the
context frequency selection) or succeeded by the next
execlists_user_begin (selecting the next frequency). However, that was
change was already made to simplify execlists->active handling ;)
-Chris
Lionel Landwerlin Nov. 9, 2018, 5:51 p.m. UTC | #3
I think we have some interest in reviving this for the performance query 
use case.
Is that on anybody's todo list?

Thanks,

-
Lionel

On 14/03/2018 09:37, Chris Wilson wrote:
> Often, we find ourselves facing a workload where the user knows in
> advance what GPU frequency they require for it to complete in a timely
> manner, and using past experience they can outperform the HW assisted
> RPS autotuning. An example might be kodi (HTPC) where they know that
> video decoding and compositing require a minimum frequency to avoid ever
> dropping a frame, or conversely know when they are in a powersaving mode
> and would rather have slower updates than ramp up the GPU frequency and
> power consumption. Other workloads may defeat the autotuning entirely
> and need manual control to meet their performance goals, e.g. bursty
> applications which require low latency.
>
> To accommodate the varying needs of different applications, that may be
> running concurrently, we want a more flexible system than a global limit
> supplied by sysfs. To this end, we offer the application the option to
> set their desired frequency bounds on the context itself, and apply those
> bounds when we execute commands from the application, switching between
> bounds just as easily as we switch between the clients themselves.
>
> The clients can query the range supported by the HW, or at least the
> range they are restricted to, and then freely select frequencies within
> that range that they want to run at. (They can select just a single
> frequency if they so choose.) As this is subject to the global limit
> supplied by the user in sysfs, and a client can only reduce the range of
> frequencies they allow the HW to run at, we allow all clients to adjust
> their request (and not restrict raising the minimum to privileged
> CAP_SYS_NICE clients).
>
> Testcase: igt/gem_ctx_freq
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Praveen Paneri <praveen.paneri@intel.com>
> Cc: Sagar A Kamble <sagar.a.kamble@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c                |  16 ++-
>   drivers/gpu/drm/i915/i915_drv.h                    |   5 +
>   drivers/gpu/drm/i915/i915_gem_context.c            |  54 +++++++++
>   drivers/gpu/drm/i915/i915_gem_context.h            |   3 +
>   drivers/gpu/drm/i915/intel_gt_pm.c                 | 121 ++++++++++++++++---
>   drivers/gpu/drm/i915/intel_gt_pm.h                 |   4 +
>   drivers/gpu/drm/i915/intel_guc_submission.c        |  16 ++-
>   drivers/gpu/drm/i915/intel_lrc.c                   |  15 +++
>   .../gpu/drm/i915/selftests/i915_mock_selftests.h   |   1 +
>   drivers/gpu/drm/i915/selftests/intel_gt_pm.c       | 130 +++++++++++++++++++++
>   include/uapi/drm/i915_drm.h                        |  20 ++++
>   11 files changed, 368 insertions(+), 17 deletions(-)
>   create mode 100644 drivers/gpu/drm/i915/selftests/intel_gt_pm.c
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 7c7afdac8c8c..a21b9164ade8 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2191,6 +2191,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
>   	struct drm_device *dev = &dev_priv->drm;
>   	struct intel_rps *rps = &dev_priv->gt_pm.rps;
>   	struct drm_file *file;
> +	int n;
>   
>   	seq_printf(m, "GPU busy? %s [%d requests]\n",
>   		   yesno(dev_priv->gt.awake), dev_priv->gt.active_requests);
> @@ -2198,17 +2199,30 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
>   	seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv));
>   	seq_printf(m, "Boosts outstanding? %d\n",
>   		   atomic_read(&rps->num_waiters));
> +	seq_printf(m, "Worker pending? %s\n", yesno(work_busy(&rps->work)));
>   	seq_printf(m, "Frequency requested %d [%d, %d]\n",
>   		   intel_gpu_freq(dev_priv, rps->freq),
>   		   intel_gpu_freq(dev_priv, rps->min),
>   		   intel_gpu_freq(dev_priv, rps->max));
> -	seq_printf(m, "  min hard:%d, soft:%d user:%d; max user:%d, soft: %d hard:%d\n",
> +	seq_printf(m, "  min hard:%d, soft:%d, ctx:%d, user:%d; max user:%d, ctx:%d, soft:%d, hard:%d\n",
>   		   intel_gpu_freq(dev_priv, rps->min_freq_hw),
>   		   intel_gpu_freq(dev_priv, rps->min_freq_soft),
> +		   intel_gpu_freq(dev_priv, rps->min_freq_context),
>   		   intel_gpu_freq(dev_priv, rps->min_freq_user),
>   		   intel_gpu_freq(dev_priv, rps->max_freq_user),
> +		   intel_gpu_freq(dev_priv, rps->max_freq_context),
>   		   intel_gpu_freq(dev_priv, rps->max_freq_soft),
>   		   intel_gpu_freq(dev_priv, rps->max_freq_hw));
> +	seq_printf(m, "  engines min: [");
> +	for (n = 0; n < ARRAY_SIZE(rps->min_freq_engine); n++)
> +		seq_printf(m, "%s%d", n ? ", " : "",
> +			   intel_gpu_freq(dev_priv, rps->min_freq_engine[n]));
> +	seq_printf(m, "]\n  engines max: [");
> +	for (n = 0; n < ARRAY_SIZE(rps->max_freq_engine); n++)
> +		seq_printf(m, "%s%d", n ? ", " : "",
> +			   intel_gpu_freq(dev_priv, rps->max_freq_engine[n]));
> +	seq_printf(m, "]\n");
> +
>   	seq_printf(m, "  idle:%d, efficient:%d, boost:%d\n",
>   		   intel_gpu_freq(dev_priv, rps->idle_freq),
>   		   intel_gpu_freq(dev_priv, rps->efficient_freq),
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 82e9a58bd65f..d754d44cfbc2 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -731,6 +731,7 @@ struct intel_rps_ei {
>   
>   struct intel_rps {
>   	struct mutex lock;
> +	spinlock_t engine_lock; /* protects updates to min/max_freq_context */
>   	struct work_struct work;
>   
>   	bool active;
> @@ -763,6 +764,10 @@ struct intel_rps {
>   	u8 max_freq_user;	/* Max frequency permitted by the driver */
>   	u8 min_freq_soft;
>   	u8 max_freq_soft;
> +	u8 min_freq_context;	/* Min frequency permitted by the context */
> +	u8 max_freq_context;	/* Max frequency permitted by the context */
> +	u8 min_freq_engine[I915_NUM_ENGINES];
> +	u8 max_freq_engine[I915_NUM_ENGINES];
>   
>   	u8 idle_freq;		/* Frequency to request when we are idle */
>   	u8 efficient_freq;	/* AKA RPe. Pre-determined balanced frequency */
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 65bf92658d92..1d36e2a02479 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -88,8 +88,10 @@
>   #include <linux/log2.h>
>   #include <drm/drmP.h>
>   #include <drm/i915_drm.h>
> +
>   #include "i915_drv.h"
>   #include "i915_trace.h"
> +#include "intel_gt_pm.h"
>   
>   #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
>   
> @@ -281,6 +283,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
>   	list_add_tail(&ctx->link, &dev_priv->contexts.list);
>   	ctx->i915 = dev_priv;
>   	ctx->priority = I915_PRIORITY_NORMAL;
> +	ctx->min_freq = dev_priv->gt_pm.rps.min_freq_hw;
> +	ctx->max_freq = dev_priv->gt_pm.rps.max_freq_hw;
>   
>   	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
>   	INIT_LIST_HEAD(&ctx->handles_list);
> @@ -715,6 +719,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
>   {
>   	struct drm_i915_file_private *file_priv = file->driver_priv;
>   	struct drm_i915_gem_context_param *args = data;
> +	struct drm_i915_private *i915 = to_i915(dev);
>   	struct i915_gem_context *ctx;
>   	int ret = 0;
>   
> @@ -747,6 +752,19 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
>   	case I915_CONTEXT_PARAM_PRIORITY:
>   		args->value = ctx->priority;
>   		break;
> +	case I915_CONTEXT_PARAM_FREQUENCY:
> +		if (!HAS_RPS(i915) || !HAS_EXECLISTS(i915)) {
> +			ret = -ENODEV;
> +		} else if (args->size) {
> +			ret = -EINVAL;
> +		} else {
> +			u32 min = intel_gpu_freq(i915, ctx->min_freq);
> +			u32 max = intel_gpu_freq(i915, ctx->max_freq);
> +
> +			args->value = I915_CONTEXT_SET_FREQUENCY(min, max);
> +		}
> +		break;
> +
>   	default:
>   		ret = -EINVAL;
>   		break;
> @@ -761,6 +779,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
>   {
>   	struct drm_i915_file_private *file_priv = file->driver_priv;
>   	struct drm_i915_gem_context_param *args = data;
> +	struct drm_i915_private *i915 = to_i915(dev);
>   	struct i915_gem_context *ctx;
>   	int ret;
>   
> @@ -821,6 +840,41 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
>   				ctx->priority = priority;
>   		}
>   		break;
> +	case I915_CONTEXT_PARAM_FREQUENCY:
> +		if (!HAS_RPS(i915) || !HAS_EXECLISTS(i915)) {
> +			ret = -ENODEV;
> +		} else if (args->size) {
> +			ret = -EINVAL;
> +		} else {
> +			struct intel_rps *rps = &i915->gt_pm.rps;
> +			u32 min, max;
> +
> +			min = I915_CONTEXT_MIN_FREQUENCY(args->value);
> +			min = intel_freq_opcode(i915, min);
> +
> +			max = I915_CONTEXT_MAX_FREQUENCY(args->value);
> +			max = intel_freq_opcode(i915, max);
> +
> +			/*
> +			 * As we constrain the frequency request from the
> +			 * context (application) by the sysadmin imposed limits,
> +			 * it is reasonable to allow the application to
> +			 * specify its preferred range within those limits.
> +			 * That is we do not need to restrict requesting
> +			 * a higher frequency to privileged (CAP_SYS_NICE)
> +			 * processes.
> +			 */
> +			if (max < min) {
> +				ret = -EINVAL;
> +			} else if (min < rps->min_freq_hw ||
> +				   max > rps->max_freq_hw) {
> +				ret = -EINVAL;
> +			} else {
> +				ctx->min_freq = min;
> +				ctx->max_freq = max;
> +			}
> +		}
> +		break;
>   
>   	default:
>   		ret = -EINVAL;
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> index 7854262ddfd9..98f7b71a787a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> @@ -150,6 +150,9 @@ struct i915_gem_context {
>   	 */
>   	int priority;
>   
> +	u32 min_freq;
> +	u32 max_freq;
> +
>   	/** ggtt_offset_bias: placement restriction for context objects */
>   	u32 ggtt_offset_bias;
>   
> diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
> index 9705205a26b5..4bbfb4080f8f 100644
> --- a/drivers/gpu/drm/i915/intel_gt_pm.c
> +++ b/drivers/gpu/drm/i915/intel_gt_pm.c
> @@ -402,10 +402,10 @@ static int adjust_rps(struct drm_i915_private *dev_priv, int freq, int adj)
>   	GEM_BUG_ON(!rps->active);
>   
>   	min = clamp_t(int,
> -		      rps->min_freq_soft,
> +		      max(rps->min_freq_soft, READ_ONCE(rps->min_freq_context)),
>   		      rps->min_freq_user, rps->max_freq_user);
>   	max = clamp_t(int,
> -		      rps->max_freq_soft,
> +		      min(rps->max_freq_soft, READ_ONCE(rps->max_freq_context)),
>   		      min, rps->max_freq_user);
>   	if (atomic_read(&rps->num_waiters) && max < rps->boost_freq)
>   		max = rps->boost_freq;
> @@ -809,6 +809,75 @@ void intel_rps_boost(struct i915_request *rq, struct intel_rps_client *client)
>   	atomic_inc(client ? &client->boosts : &rps->boosts);
>   }
>   
> +static void __rps_update_engine(struct intel_rps *rps,
> +				enum intel_engine_id idx,
> +				u32 min, u32 max)
> +{
> +	unsigned long flags;
> +	bool update = false;
> +	u32 old;
> +	int n;
> +
> +	GEM_BUG_ON(min > max);
> +
> +	if (rps->min_freq_engine[idx] != min) {
> +		spin_lock_irqsave(&rps->engine_lock, flags);
> +
> +		rps->min_freq_engine[idx] = min;
> +
> +		old = rps->min_freq_context;
> +		rps->min_freq_context = rps->min_freq_engine[0];
> +		for (n = 1; n < ARRAY_SIZE(rps->min_freq_engine); n++)
> +			if (rps->min_freq_engine[n] > rps->min_freq_context)
> +				rps->min_freq_context = rps->min_freq_engine[n];
> +		update |= rps->min_freq_context != old;
> +
> +		spin_unlock_irqrestore(&rps->engine_lock, flags);
> +	}
> +
> +	if (rps->max_freq_engine[idx] != max) {
> +		spin_lock_irqsave(&rps->engine_lock, flags);
> +
> +		rps->max_freq_engine[idx] = max;
> +
> +		old = rps->max_freq_context;
> +		rps->max_freq_context = rps->max_freq_engine[0];
> +		for (n = 1; n < ARRAY_SIZE(rps->max_freq_engine); n++)
> +			if (rps->max_freq_engine[n] < rps->max_freq_context)
> +				rps->max_freq_context = rps->max_freq_engine[n];
> +		update |= rps->max_freq_context != old;
> +
> +		spin_unlock_irqrestore(&rps->engine_lock, flags);
> +	}
> +
> +	/* Kick the RPS worker to apply the updated constraints, as needed */
> +	if (update && !atomic_read(&rps->num_waiters)) {
> +		old = READ_ONCE(rps->freq);
> +		if ((old < min || old > max))
> +			schedule_work(&rps->work);
> +	}
> +}
> +
> +void intel_rps_update_engine(const struct intel_engine_cs *engine,
> +			     const struct i915_gem_context *ctx)
> +{
> +	struct intel_rps *rps = &engine->i915->gt_pm.rps;
> +	u32 min, max;
> +
> +	if (!HAS_RPS(engine->i915))
> +		return;
> +
> +	if (ctx) {
> +		min = ctx->min_freq;
> +		max = ctx->max_freq;
> +	} else {
> +		min = rps->min_freq_hw;
> +		max = rps->max_freq_hw;
> +	}
> +
> +	__rps_update_engine(rps, engine->id, min, max);
> +}
> +
>   static void gen9_disable_rc6(struct drm_i915_private *dev_priv)
>   {
>   	I915_WRITE(GEN6_RC_CONTROL, 0);
> @@ -2379,12 +2448,41 @@ void intel_gt_pm_sanitize(struct drm_i915_private *dev_priv)
>   		gen6_reset_rps_interrupts(dev_priv);
>   }
>   
> +static void intel_rps_init(struct intel_rps *rps)
> +{
> +	mutex_init(&rps->lock);
> +	INIT_WORK(&rps->work, intel_rps_work);
> +	spin_lock_init(&rps->engine_lock);
> +}
> +
> +static void intel_rps_init__frequencies(struct intel_rps *rps)
> +{
> +	int n;
> +
> +	rps->max_freq_soft = rps->max_freq_hw;
> +	rps->min_freq_soft = rps->min_freq_hw;
> +
> +	rps->max_freq_context = rps->max_freq_hw;
> +	rps->min_freq_context = rps->min_freq_hw;
> +	for (n = 0; n < ARRAY_SIZE(rps->max_freq_engine); n++) {
> +		rps->max_freq_engine[n] = rps->max_freq_hw;
> +		rps->min_freq_engine[n] = rps->min_freq_hw;
> +	}
> +
> +	/* Finally allow us to boost to max by default */
> +	rps->boost_freq = rps->max_freq_hw;
> +	rps->idle_freq = rps->min_freq_hw;
> +
> +	rps->freq = rps->idle_freq;
> +	rps->min = rps->min_freq_hw;
> +	rps->max = rps->max_freq_hw;
> +}
> +
>   void intel_gt_pm_init(struct drm_i915_private *dev_priv)
>   {
>   	struct intel_rps *rps = &dev_priv->gt_pm.rps;
>   
> -	mutex_init(&rps->lock);
> -	INIT_WORK(&rps->work, intel_rps_work);
> +	intel_rps_init(rps);
>   
>   	if (HAS_GUC_SCHED(dev_priv))
>   		rps->guc_events = GEN9_GUC_TO_HOST_INT_EVENT;
> @@ -2449,16 +2547,7 @@ void intel_gt_pm_init(struct drm_i915_private *dev_priv)
>   		}
>   	}
>   
> -	rps->max_freq_soft = rps->max_freq_hw;
> -	rps->min_freq_soft = rps->min_freq_hw;
> -
> -	/* Finally allow us to boost to max by default */
> -	rps->boost_freq = rps->max_freq_hw;
> -	rps->idle_freq = rps->min_freq_hw;
> -
> -	rps->freq = rps->idle_freq;
> -	rps->min = rps->min_freq_hw;
> -	rps->max = rps->max_freq_hw;
> +	intel_rps_init__frequencies(rps);
>   
>   	if (HAS_LLC(dev_priv))
>   		gen6_update_ring_freq(dev_priv);
> @@ -2703,3 +2792,7 @@ void gen9_disable_guc_interrupts(struct drm_i915_private *dev_priv)
>   
>   	gen9_reset_guc_interrupts(dev_priv);
>   }
> +
> +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> +#include "selftests/intel_gt_pm.c"
> +#endif
> diff --git a/drivers/gpu/drm/i915/intel_gt_pm.h b/drivers/gpu/drm/i915/intel_gt_pm.h
> index 314912c15126..ef3f27eca529 100644
> --- a/drivers/gpu/drm/i915/intel_gt_pm.h
> +++ b/drivers/gpu/drm/i915/intel_gt_pm.h
> @@ -25,7 +25,9 @@
>   #define __INTEL_GT_PM_H__
>   
>   struct drm_i915_private;
> +struct i915_gem_context;
>   struct i915_request;
> +struct intel_engine_cs;
>   struct intel_rps_client;
>   
>   void intel_gpu_ips_init(struct drm_i915_private *dev_priv);
> @@ -47,6 +49,8 @@ void intel_gt_pm_idle(struct drm_i915_private *dev_priv);
>   
>   void intel_gt_pm_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir);
>   
> +void intel_rps_update_engine(const struct intel_engine_cs *engine,
> +			     const struct i915_gem_context *ctx);
>   void intel_rps_boost(struct i915_request *rq, struct intel_rps_client *rps);
>   
>   int intel_gpu_freq(struct drm_i915_private *dev_priv, int val);
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> index 8a8ad2fe158d..d8eaae683186 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -26,9 +26,12 @@
>   #include <trace/events/dma_fence.h>
>   
>   #include "intel_guc_submission.h"
> -#include "intel_lrc_reg.h"
> +
>   #include "i915_drv.h"
>   
> +#include "intel_gt_pm.h"
> +#include "intel_lrc_reg.h"
> +
>   #define GUC_PREEMPT_FINISHED		0x1
>   #define GUC_PREEMPT_BREADCRUMB_DWORDS	0x8
>   #define GUC_PREEMPT_BREADCRUMB_BYTES	\
> @@ -650,6 +653,12 @@ static void guc_submit(struct intel_engine_cs *engine)
>   	}
>   }
>   
> +static void update_rps(struct intel_engine_cs *engine)
> +{
> +	intel_rps_update_engine(engine,
> +				port_request(engine->execlists.port)->ctx);
> +}
> +
>   static void port_assign(struct execlist_port *port, struct i915_request *rq)
>   {
>   	GEM_BUG_ON(port_isset(port));
> @@ -728,6 +737,7 @@ static void guc_dequeue(struct intel_engine_cs *engine)
>   	execlists->first = rb;
>   	if (submit) {
>   		port_assign(port, last);
> +		update_rps(engine);
>   		execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
>   		guc_submit(engine);
>   	}
> @@ -757,8 +767,10 @@ static void guc_submission_tasklet(unsigned long data)
>   
>   		rq = port_request(&port[0]);
>   	}
> -	if (!rq)
> +	if (!rq) {
>   		execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
> +		intel_rps_update_engine(engine, NULL);
> +	}
>   
>   	if (execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT) &&
>   	    intel_read_status_page(engine, I915_GEM_HWS_PREEMPT_INDEX) ==
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 3a69b367e565..518f7b3db857 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -138,6 +138,7 @@
>   #include "i915_drv.h"
>   #include "i915_gem_render_state.h"
>   #include "intel_lrc_reg.h"
> +#include "intel_gt_pm.h"
>   #include "intel_mocs.h"
>   
>   #define RING_EXECLIST_QFULL		(1 << 0x2)
> @@ -535,6 +536,12 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
>   	execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
>   }
>   
> +static void update_rps(struct intel_engine_cs *engine)
> +{
> +	intel_rps_update_engine(engine,
> +				port_request(engine->execlists.port)->ctx);
> +}
> +
>   static void execlists_dequeue(struct intel_engine_cs *engine)
>   {
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
> @@ -708,6 +715,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   	spin_unlock_irq(&engine->timeline->lock);
>   
>   	if (submit) {
> +		update_rps(engine);
>   		execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
>   		execlists_submit_ports(engine);
>   	}
> @@ -982,6 +990,11 @@ static void execlists_submission_tasklet(unsigned long data)
>   					  engine->name, port->context_id);
>   
>   				execlists_port_complete(execlists, port);
> +
> +				/* Switch to the next request/context */
> +				rq = port_request(port);
> +				intel_rps_update_engine(engine,
> +							rq ? rq->ctx : NULL);
>   			} else {
>   				port_set(port, port_pack(rq, count));
>   			}
> @@ -1717,6 +1730,8 @@ static void reset_common_ring(struct intel_engine_cs *engine,
>   	__unwind_incomplete_requests(engine);
>   	spin_unlock(&engine->timeline->lock);
>   
> +	intel_rps_update_engine(engine, NULL);
> +
>   	/* Mark all CS interrupts as complete */
>   	execlists->active = 0;
>   
> diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
> index 9a48aa441743..85b6e6d020b7 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
> +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
> @@ -14,6 +14,7 @@ selftest(fence, i915_sw_fence_mock_selftests)
>   selftest(scatterlist, scatterlist_mock_selftests)
>   selftest(syncmap, i915_syncmap_mock_selftests)
>   selftest(uncore, intel_uncore_mock_selftests)
> +selftest(gt_pm, intel_gt_pm_mock_selftests)
>   selftest(breadcrumbs, intel_breadcrumbs_mock_selftests)
>   selftest(timelines, i915_gem_timeline_mock_selftests)
>   selftest(requests, i915_request_mock_selftests)
> diff --git a/drivers/gpu/drm/i915/selftests/intel_gt_pm.c b/drivers/gpu/drm/i915/selftests/intel_gt_pm.c
> new file mode 100644
> index 000000000000..c3871eb9eabb
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/selftests/intel_gt_pm.c
> @@ -0,0 +1,130 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2018 Intel Corporation
> + */
> +
> +#include "../i915_selftest.h"
> +#include "i915_random.h"
> +
> +#include "mock_gem_device.h"
> +
> +static void mock_rps_init(struct drm_i915_private *i915)
> +{
> +	struct intel_rps *rps = &i915->gt_pm.rps;
> +
> +	/* Disable the register writes */
> +	mkwrite_device_info(i915)->gen = 0;
> +	mkwrite_device_info(i915)->has_rps = true;
> +
> +	intel_rps_init(rps);
> +
> +	rps->min_freq_hw = 0;
> +	rps->max_freq_hw = 255;
> +
> +	rps->min_freq_user = rps->min_freq_hw;
> +	rps->max_freq_user = rps->max_freq_hw;
> +
> +	intel_rps_init__frequencies(rps);
> +}
> +
> +static void mock_rps_fini(struct drm_i915_private *i915)
> +{
> +	struct intel_rps *rps = &i915->gt_pm.rps;
> +
> +	cancel_work_sync(&rps->work);
> +}
> +
> +static int igt_rps_engine(void *arg)
> +{
> +	struct drm_i915_private *i915 = arg;
> +	struct intel_rps *rps = &i915->gt_pm.rps;
> +	I915_RND_STATE(prng);
> +	int err;
> +	int i;
> +
> +	intel_gt_pm_busy(i915); /* Activate RPS */
> +
> +	/*
> +	 * Minimum unit tests for intel_rps_update_engine().
> +	 *
> +	 * Whenever we call intel_rps_update_engine, it will
> +	 * replace the context min/max frequency request for a particular
> +	 * engine and then recompute the global max(min)/min(max) over all
> +	 * engines. In this mockup, we are limited to checking those
> +	 * max(min)/min(max) calculations and then seeing if the rps
> +	 * worker uses those bounds.
> +	 */
> +
> +	for (i = 0; i < 256 * 256; i++) {
> +		u8 freq = prandom_u32_state(&prng);
> +
> +		__rps_update_engine(rps, 0, freq, freq);
> +		if (rps->min_freq_context != freq ||
> +		    rps->max_freq_context != freq) {
> +			pr_err("Context min/max frequencies not restricted to %d, found [%d, %d]\n",
> +			       freq, rps->min_freq_context, rps->max_freq_context);
> +			err = -EINVAL;
> +			goto out;
> +		}
> +		flush_work(&rps->work);
> +
> +		if (rps->freq != freq) {
> +			pr_err("Tried to restrict frequency to %d, found %d\n",
> +			       freq, rps->freq);
> +			err = -EINVAL;
> +			goto out;
> +		}
> +	}
> +
> +	__rps_update_engine(rps, 0, rps->min_freq_hw, rps->max_freq_hw);
> +	if (rps->min_freq_context != rps->min_freq_hw ||
> +	    rps->max_freq_context != rps->max_freq_hw) {
> +		pr_err("Context frequency not restored to [%d, %d], found [%d, %d]\n",
> +		       rps->min_freq_hw, rps->min_freq_hw,
> +		       rps->min_freq_context, rps->max_freq_context);
> +		err = -EINVAL;
> +		goto out;
> +	}
> +
> +	for (i = 0; i < I915_NUM_ENGINES; i++)
> +		__rps_update_engine(rps, i, i, 255 - i);
> +	i--;
> +	if (rps->min_freq_context != i) {
> +		pr_err("Minimum context frequency across all engines not raised to %d, found %d\n", i, rps->min_freq_context);
> +		err = -EINVAL;
> +		goto out;
> +	}
> +	if (rps->max_freq_context != 255 - i) {
> +		pr_err("Maxmimum context frequency across all engines not lowered to %d, found %d\n", 255 - i, rps->max_freq_context);
> +		err = -EINVAL;
> +		goto out;
> +	}
> +
> +	err = 0;
> +out:
> +	intel_gt_pm_idle(i915);
> +	return err;
> +}
> +
> +int intel_gt_pm_mock_selftests(void)
> +{
> +	static const struct i915_subtest tests[] = {
> +		SUBTEST(igt_rps_engine),
> +	};
> +	struct drm_i915_private *i915;
> +	int err;
> +
> +	i915 = mock_gem_device();
> +	if (!i915)
> +		return -ENOMEM;
> +
> +	mock_rps_init(i915);
> +
> +	err = i915_subtests(tests, i915);
> +
> +	mock_rps_fini(i915);
> +	drm_dev_unref(&i915->drm);
> +
> +	return err;
> +}
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 7f5634ce8e88..64c6377df769 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1456,6 +1456,26 @@ struct drm_i915_gem_context_param {
>   #define   I915_CONTEXT_MAX_USER_PRIORITY	1023 /* inclusive */
>   #define   I915_CONTEXT_DEFAULT_PRIORITY		0
>   #define   I915_CONTEXT_MIN_USER_PRIORITY	-1023 /* inclusive */
> +
> +/*
> + * I915_CONTEXT_PARAM_FREQUENCY:
> + *
> + * Request that when this context runs, the GPU is restricted to run
> + * in this frequency range; but still contrained by the global user
> + * restriction specified via sysfs.
> + *
> + * The minimum / maximum frequencies are specified in MHz. Each context
> + * starts in the default unrestricted state, where the range is taken from
> + * the hardware, and so may be queried.
> + *
> + * Note the frequency is only changed on a context switch; if the
> + * context's frequency is updated whilst the context is currently executing
> + * the request will not take effect until the next time the context is run.
> + */
> +#define I915_CONTEXT_PARAM_FREQUENCY	0x7
> +#define   I915_CONTEXT_MIN_FREQUENCY(x) ((x) & 0xffffffff)
> +#define   I915_CONTEXT_MAX_FREQUENCY(x) ((x) >> 32)
> +#define   I915_CONTEXT_SET_FREQUENCY(min, max) ((__u64)(max) << 32 | (min))
>   	__u64 value;
>   };
>
Joonas Lahtinen Nov. 16, 2018, 11:14 a.m. UTC | #4
Quoting Lionel Landwerlin (2018-11-09 19:51:17)
> I think we have some interest in reviving this for the performance query 
> use case.

How are performance queries related?

Regards, Joonas

> Is that on anybody's todo list?
> 
> Thanks,
> 
> -
> Lionel
> 
> On 14/03/2018 09:37, Chris Wilson wrote:
> > Often, we find ourselves facing a workload where the user knows in
> > advance what GPU frequency they require for it to complete in a timely
> > manner, and using past experience they can outperform the HW assisted
> > RPS autotuning. An example might be kodi (HTPC) where they know that
> > video decoding and compositing require a minimum frequency to avoid ever
> > dropping a frame, or conversely know when they are in a powersaving mode
> > and would rather have slower updates than ramp up the GPU frequency and
> > power consumption. Other workloads may defeat the autotuning entirely
> > and need manual control to meet their performance goals, e.g. bursty
> > applications which require low latency.
> >
> > To accommodate the varying needs of different applications, that may be
> > running concurrently, we want a more flexible system than a global limit
> > supplied by sysfs. To this end, we offer the application the option to
> > set their desired frequency bounds on the context itself, and apply those
> > bounds when we execute commands from the application, switching between
> > bounds just as easily as we switch between the clients themselves.
> >
> > The clients can query the range supported by the HW, or at least the
> > range they are restricted to, and then freely select frequencies within
> > that range that they want to run at. (They can select just a single
> > frequency if they so choose.) As this is subject to the global limit
> > supplied by the user in sysfs, and a client can only reduce the range of
> > frequencies they allow the HW to run at, we allow all clients to adjust
> > their request (and not restrict raising the minimum to privileged
> > CAP_SYS_NICE clients).
> >
> > Testcase: igt/gem_ctx_freq
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > Cc: Praveen Paneri <praveen.paneri@intel.com>
> > Cc: Sagar A Kamble <sagar.a.kamble@intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_debugfs.c                |  16 ++-
> >   drivers/gpu/drm/i915/i915_drv.h                    |   5 +
> >   drivers/gpu/drm/i915/i915_gem_context.c            |  54 +++++++++
> >   drivers/gpu/drm/i915/i915_gem_context.h            |   3 +
> >   drivers/gpu/drm/i915/intel_gt_pm.c                 | 121 ++++++++++++++++---
> >   drivers/gpu/drm/i915/intel_gt_pm.h                 |   4 +
> >   drivers/gpu/drm/i915/intel_guc_submission.c        |  16 ++-
> >   drivers/gpu/drm/i915/intel_lrc.c                   |  15 +++
> >   .../gpu/drm/i915/selftests/i915_mock_selftests.h   |   1 +
> >   drivers/gpu/drm/i915/selftests/intel_gt_pm.c       | 130 +++++++++++++++++++++
> >   include/uapi/drm/i915_drm.h                        |  20 ++++
> >   11 files changed, 368 insertions(+), 17 deletions(-)
> >   create mode 100644 drivers/gpu/drm/i915/selftests/intel_gt_pm.c
> >
> > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> > index 7c7afdac8c8c..a21b9164ade8 100644
> > --- a/drivers/gpu/drm/i915/i915_debugfs.c
> > +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> > @@ -2191,6 +2191,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
> >       struct drm_device *dev = &dev_priv->drm;
> >       struct intel_rps *rps = &dev_priv->gt_pm.rps;
> >       struct drm_file *file;
> > +     int n;
> >   
> >       seq_printf(m, "GPU busy? %s [%d requests]\n",
> >                  yesno(dev_priv->gt.awake), dev_priv->gt.active_requests);
> > @@ -2198,17 +2199,30 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
> >       seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv));
> >       seq_printf(m, "Boosts outstanding? %d\n",
> >                  atomic_read(&rps->num_waiters));
> > +     seq_printf(m, "Worker pending? %s\n", yesno(work_busy(&rps->work)));
> >       seq_printf(m, "Frequency requested %d [%d, %d]\n",
> >                  intel_gpu_freq(dev_priv, rps->freq),
> >                  intel_gpu_freq(dev_priv, rps->min),
> >                  intel_gpu_freq(dev_priv, rps->max));
> > -     seq_printf(m, "  min hard:%d, soft:%d user:%d; max user:%d, soft: %d hard:%d\n",
> > +     seq_printf(m, "  min hard:%d, soft:%d, ctx:%d, user:%d; max user:%d, ctx:%d, soft:%d, hard:%d\n",
> >                  intel_gpu_freq(dev_priv, rps->min_freq_hw),
> >                  intel_gpu_freq(dev_priv, rps->min_freq_soft),
> > +                intel_gpu_freq(dev_priv, rps->min_freq_context),
> >                  intel_gpu_freq(dev_priv, rps->min_freq_user),
> >                  intel_gpu_freq(dev_priv, rps->max_freq_user),
> > +                intel_gpu_freq(dev_priv, rps->max_freq_context),
> >                  intel_gpu_freq(dev_priv, rps->max_freq_soft),
> >                  intel_gpu_freq(dev_priv, rps->max_freq_hw));
> > +     seq_printf(m, "  engines min: [");
> > +     for (n = 0; n < ARRAY_SIZE(rps->min_freq_engine); n++)
> > +             seq_printf(m, "%s%d", n ? ", " : "",
> > +                        intel_gpu_freq(dev_priv, rps->min_freq_engine[n]));
> > +     seq_printf(m, "]\n  engines max: [");
> > +     for (n = 0; n < ARRAY_SIZE(rps->max_freq_engine); n++)
> > +             seq_printf(m, "%s%d", n ? ", " : "",
> > +                        intel_gpu_freq(dev_priv, rps->max_freq_engine[n]));
> > +     seq_printf(m, "]\n");
> > +
> >       seq_printf(m, "  idle:%d, efficient:%d, boost:%d\n",
> >                  intel_gpu_freq(dev_priv, rps->idle_freq),
> >                  intel_gpu_freq(dev_priv, rps->efficient_freq),
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 82e9a58bd65f..d754d44cfbc2 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -731,6 +731,7 @@ struct intel_rps_ei {
> >   
> >   struct intel_rps {
> >       struct mutex lock;
> > +     spinlock_t engine_lock; /* protects updates to min/max_freq_context */
> >       struct work_struct work;
> >   
> >       bool active;
> > @@ -763,6 +764,10 @@ struct intel_rps {
> >       u8 max_freq_user;       /* Max frequency permitted by the driver */
> >       u8 min_freq_soft;
> >       u8 max_freq_soft;
> > +     u8 min_freq_context;    /* Min frequency permitted by the context */
> > +     u8 max_freq_context;    /* Max frequency permitted by the context */
> > +     u8 min_freq_engine[I915_NUM_ENGINES];
> > +     u8 max_freq_engine[I915_NUM_ENGINES];
> >   
> >       u8 idle_freq;           /* Frequency to request when we are idle */
> >       u8 efficient_freq;      /* AKA RPe. Pre-determined balanced frequency */
> > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> > index 65bf92658d92..1d36e2a02479 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_context.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> > @@ -88,8 +88,10 @@
> >   #include <linux/log2.h>
> >   #include <drm/drmP.h>
> >   #include <drm/i915_drm.h>
> > +
> >   #include "i915_drv.h"
> >   #include "i915_trace.h"
> > +#include "intel_gt_pm.h"
> >   
> >   #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
> >   
> > @@ -281,6 +283,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
> >       list_add_tail(&ctx->link, &dev_priv->contexts.list);
> >       ctx->i915 = dev_priv;
> >       ctx->priority = I915_PRIORITY_NORMAL;
> > +     ctx->min_freq = dev_priv->gt_pm.rps.min_freq_hw;
> > +     ctx->max_freq = dev_priv->gt_pm.rps.max_freq_hw;
> >   
> >       INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
> >       INIT_LIST_HEAD(&ctx->handles_list);
> > @@ -715,6 +719,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
> >   {
> >       struct drm_i915_file_private *file_priv = file->driver_priv;
> >       struct drm_i915_gem_context_param *args = data;
> > +     struct drm_i915_private *i915 = to_i915(dev);
> >       struct i915_gem_context *ctx;
> >       int ret = 0;
> >   
> > @@ -747,6 +752,19 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
> >       case I915_CONTEXT_PARAM_PRIORITY:
> >               args->value = ctx->priority;
> >               break;
> > +     case I915_CONTEXT_PARAM_FREQUENCY:
> > +             if (!HAS_RPS(i915) || !HAS_EXECLISTS(i915)) {
> > +                     ret = -ENODEV;
> > +             } else if (args->size) {
> > +                     ret = -EINVAL;
> > +             } else {
> > +                     u32 min = intel_gpu_freq(i915, ctx->min_freq);
> > +                     u32 max = intel_gpu_freq(i915, ctx->max_freq);
> > +
> > +                     args->value = I915_CONTEXT_SET_FREQUENCY(min, max);
> > +             }
> > +             break;
> > +
> >       default:
> >               ret = -EINVAL;
> >               break;
> > @@ -761,6 +779,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
> >   {
> >       struct drm_i915_file_private *file_priv = file->driver_priv;
> >       struct drm_i915_gem_context_param *args = data;
> > +     struct drm_i915_private *i915 = to_i915(dev);
> >       struct i915_gem_context *ctx;
> >       int ret;
> >   
> > @@ -821,6 +840,41 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
> >                               ctx->priority = priority;
> >               }
> >               break;
> > +     case I915_CONTEXT_PARAM_FREQUENCY:
> > +             if (!HAS_RPS(i915) || !HAS_EXECLISTS(i915)) {
> > +                     ret = -ENODEV;
> > +             } else if (args->size) {
> > +                     ret = -EINVAL;
> > +             } else {
> > +                     struct intel_rps *rps = &i915->gt_pm.rps;
> > +                     u32 min, max;
> > +
> > +                     min = I915_CONTEXT_MIN_FREQUENCY(args->value);
> > +                     min = intel_freq_opcode(i915, min);
> > +
> > +                     max = I915_CONTEXT_MAX_FREQUENCY(args->value);
> > +                     max = intel_freq_opcode(i915, max);
> > +
> > +                     /*
> > +                      * As we constrain the frequency request from the
> > +                      * context (application) by the sysadmin imposed limits,
> > +                      * it is reasonable to allow the application to
> > +                      * specify its preferred range within those limits.
> > +                      * That is we do not need to restrict requesting
> > +                      * a higher frequency to privileged (CAP_SYS_NICE)
> > +                      * processes.
> > +                      */
> > +                     if (max < min) {
> > +                             ret = -EINVAL;
> > +                     } else if (min < rps->min_freq_hw ||
> > +                                max > rps->max_freq_hw) {
> > +                             ret = -EINVAL;
> > +                     } else {
> > +                             ctx->min_freq = min;
> > +                             ctx->max_freq = max;
> > +                     }
> > +             }
> > +             break;
> >   
> >       default:
> >               ret = -EINVAL;
> > diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> > index 7854262ddfd9..98f7b71a787a 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_context.h
> > +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> > @@ -150,6 +150,9 @@ struct i915_gem_context {
> >        */
> >       int priority;
> >   
> > +     u32 min_freq;
> > +     u32 max_freq;
> > +
> >       /** ggtt_offset_bias: placement restriction for context objects */
> >       u32 ggtt_offset_bias;
> >   
> > diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
> > index 9705205a26b5..4bbfb4080f8f 100644
> > --- a/drivers/gpu/drm/i915/intel_gt_pm.c
> > +++ b/drivers/gpu/drm/i915/intel_gt_pm.c
> > @@ -402,10 +402,10 @@ static int adjust_rps(struct drm_i915_private *dev_priv, int freq, int adj)
> >       GEM_BUG_ON(!rps->active);
> >   
> >       min = clamp_t(int,
> > -                   rps->min_freq_soft,
> > +                   max(rps->min_freq_soft, READ_ONCE(rps->min_freq_context)),
> >                     rps->min_freq_user, rps->max_freq_user);
> >       max = clamp_t(int,
> > -                   rps->max_freq_soft,
> > +                   min(rps->max_freq_soft, READ_ONCE(rps->max_freq_context)),
> >                     min, rps->max_freq_user);
> >       if (atomic_read(&rps->num_waiters) && max < rps->boost_freq)
> >               max = rps->boost_freq;
> > @@ -809,6 +809,75 @@ void intel_rps_boost(struct i915_request *rq, struct intel_rps_client *client)
> >       atomic_inc(client ? &client->boosts : &rps->boosts);
> >   }
> >   
> > +static void __rps_update_engine(struct intel_rps *rps,
> > +                             enum intel_engine_id idx,
> > +                             u32 min, u32 max)
> > +{
> > +     unsigned long flags;
> > +     bool update = false;
> > +     u32 old;
> > +     int n;
> > +
> > +     GEM_BUG_ON(min > max);
> > +
> > +     if (rps->min_freq_engine[idx] != min) {
> > +             spin_lock_irqsave(&rps->engine_lock, flags);
> > +
> > +             rps->min_freq_engine[idx] = min;
> > +
> > +             old = rps->min_freq_context;
> > +             rps->min_freq_context = rps->min_freq_engine[0];
> > +             for (n = 1; n < ARRAY_SIZE(rps->min_freq_engine); n++)
> > +                     if (rps->min_freq_engine[n] > rps->min_freq_context)
> > +                             rps->min_freq_context = rps->min_freq_engine[n];
> > +             update |= rps->min_freq_context != old;
> > +
> > +             spin_unlock_irqrestore(&rps->engine_lock, flags);
> > +     }
> > +
> > +     if (rps->max_freq_engine[idx] != max) {
> > +             spin_lock_irqsave(&rps->engine_lock, flags);
> > +
> > +             rps->max_freq_engine[idx] = max;
> > +
> > +             old = rps->max_freq_context;
> > +             rps->max_freq_context = rps->max_freq_engine[0];
> > +             for (n = 1; n < ARRAY_SIZE(rps->max_freq_engine); n++)
> > +                     if (rps->max_freq_engine[n] < rps->max_freq_context)
> > +                             rps->max_freq_context = rps->max_freq_engine[n];
> > +             update |= rps->max_freq_context != old;
> > +
> > +             spin_unlock_irqrestore(&rps->engine_lock, flags);
> > +     }
> > +
> > +     /* Kick the RPS worker to apply the updated constraints, as needed */
> > +     if (update && !atomic_read(&rps->num_waiters)) {
> > +             old = READ_ONCE(rps->freq);
> > +             if ((old < min || old > max))
> > +                     schedule_work(&rps->work);
> > +     }
> > +}
> > +
> > +void intel_rps_update_engine(const struct intel_engine_cs *engine,
> > +                          const struct i915_gem_context *ctx)
> > +{
> > +     struct intel_rps *rps = &engine->i915->gt_pm.rps;
> > +     u32 min, max;
> > +
> > +     if (!HAS_RPS(engine->i915))
> > +             return;
> > +
> > +     if (ctx) {
> > +             min = ctx->min_freq;
> > +             max = ctx->max_freq;
> > +     } else {
> > +             min = rps->min_freq_hw;
> > +             max = rps->max_freq_hw;
> > +     }
> > +
> > +     __rps_update_engine(rps, engine->id, min, max);
> > +}
> > +
> >   static void gen9_disable_rc6(struct drm_i915_private *dev_priv)
> >   {
> >       I915_WRITE(GEN6_RC_CONTROL, 0);
> > @@ -2379,12 +2448,41 @@ void intel_gt_pm_sanitize(struct drm_i915_private *dev_priv)
> >               gen6_reset_rps_interrupts(dev_priv);
> >   }
> >   
> > +static void intel_rps_init(struct intel_rps *rps)
> > +{
> > +     mutex_init(&rps->lock);
> > +     INIT_WORK(&rps->work, intel_rps_work);
> > +     spin_lock_init(&rps->engine_lock);
> > +}
> > +
> > +static void intel_rps_init__frequencies(struct intel_rps *rps)
> > +{
> > +     int n;
> > +
> > +     rps->max_freq_soft = rps->max_freq_hw;
> > +     rps->min_freq_soft = rps->min_freq_hw;
> > +
> > +     rps->max_freq_context = rps->max_freq_hw;
> > +     rps->min_freq_context = rps->min_freq_hw;
> > +     for (n = 0; n < ARRAY_SIZE(rps->max_freq_engine); n++) {
> > +             rps->max_freq_engine[n] = rps->max_freq_hw;
> > +             rps->min_freq_engine[n] = rps->min_freq_hw;
> > +     }
> > +
> > +     /* Finally allow us to boost to max by default */
> > +     rps->boost_freq = rps->max_freq_hw;
> > +     rps->idle_freq = rps->min_freq_hw;
> > +
> > +     rps->freq = rps->idle_freq;
> > +     rps->min = rps->min_freq_hw;
> > +     rps->max = rps->max_freq_hw;
> > +}
> > +
> >   void intel_gt_pm_init(struct drm_i915_private *dev_priv)
> >   {
> >       struct intel_rps *rps = &dev_priv->gt_pm.rps;
> >   
> > -     mutex_init(&rps->lock);
> > -     INIT_WORK(&rps->work, intel_rps_work);
> > +     intel_rps_init(rps);
> >   
> >       if (HAS_GUC_SCHED(dev_priv))
> >               rps->guc_events = GEN9_GUC_TO_HOST_INT_EVENT;
> > @@ -2449,16 +2547,7 @@ void intel_gt_pm_init(struct drm_i915_private *dev_priv)
> >               }
> >       }
> >   
> > -     rps->max_freq_soft = rps->max_freq_hw;
> > -     rps->min_freq_soft = rps->min_freq_hw;
> > -
> > -     /* Finally allow us to boost to max by default */
> > -     rps->boost_freq = rps->max_freq_hw;
> > -     rps->idle_freq = rps->min_freq_hw;
> > -
> > -     rps->freq = rps->idle_freq;
> > -     rps->min = rps->min_freq_hw;
> > -     rps->max = rps->max_freq_hw;
> > +     intel_rps_init__frequencies(rps);
> >   
> >       if (HAS_LLC(dev_priv))
> >               gen6_update_ring_freq(dev_priv);
> > @@ -2703,3 +2792,7 @@ void gen9_disable_guc_interrupts(struct drm_i915_private *dev_priv)
> >   
> >       gen9_reset_guc_interrupts(dev_priv);
> >   }
> > +
> > +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> > +#include "selftests/intel_gt_pm.c"
> > +#endif
> > diff --git a/drivers/gpu/drm/i915/intel_gt_pm.h b/drivers/gpu/drm/i915/intel_gt_pm.h
> > index 314912c15126..ef3f27eca529 100644
> > --- a/drivers/gpu/drm/i915/intel_gt_pm.h
> > +++ b/drivers/gpu/drm/i915/intel_gt_pm.h
> > @@ -25,7 +25,9 @@
> >   #define __INTEL_GT_PM_H__
> >   
> >   struct drm_i915_private;
> > +struct i915_gem_context;
> >   struct i915_request;
> > +struct intel_engine_cs;
> >   struct intel_rps_client;
> >   
> >   void intel_gpu_ips_init(struct drm_i915_private *dev_priv);
> > @@ -47,6 +49,8 @@ void intel_gt_pm_idle(struct drm_i915_private *dev_priv);
> >   
> >   void intel_gt_pm_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir);
> >   
> > +void intel_rps_update_engine(const struct intel_engine_cs *engine,
> > +                          const struct i915_gem_context *ctx);
> >   void intel_rps_boost(struct i915_request *rq, struct intel_rps_client *rps);
> >   
> >   int intel_gpu_freq(struct drm_i915_private *dev_priv, int val);
> > diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> > index 8a8ad2fe158d..d8eaae683186 100644
> > --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> > +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> > @@ -26,9 +26,12 @@
> >   #include <trace/events/dma_fence.h>
> >   
> >   #include "intel_guc_submission.h"
> > -#include "intel_lrc_reg.h"
> > +
> >   #include "i915_drv.h"
> >   
> > +#include "intel_gt_pm.h"
> > +#include "intel_lrc_reg.h"
> > +
> >   #define GUC_PREEMPT_FINISHED                0x1
> >   #define GUC_PREEMPT_BREADCRUMB_DWORDS       0x8
> >   #define GUC_PREEMPT_BREADCRUMB_BYTES        \
> > @@ -650,6 +653,12 @@ static void guc_submit(struct intel_engine_cs *engine)
> >       }
> >   }
> >   
> > +static void update_rps(struct intel_engine_cs *engine)
> > +{
> > +     intel_rps_update_engine(engine,
> > +                             port_request(engine->execlists.port)->ctx);
> > +}
> > +
> >   static void port_assign(struct execlist_port *port, struct i915_request *rq)
> >   {
> >       GEM_BUG_ON(port_isset(port));
> > @@ -728,6 +737,7 @@ static void guc_dequeue(struct intel_engine_cs *engine)
> >       execlists->first = rb;
> >       if (submit) {
> >               port_assign(port, last);
> > +             update_rps(engine);
> >               execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
> >               guc_submit(engine);
> >       }
> > @@ -757,8 +767,10 @@ static void guc_submission_tasklet(unsigned long data)
> >   
> >               rq = port_request(&port[0]);
> >       }
> > -     if (!rq)
> > +     if (!rq) {
> >               execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
> > +             intel_rps_update_engine(engine, NULL);
> > +     }
> >   
> >       if (execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT) &&
> >           intel_read_status_page(engine, I915_GEM_HWS_PREEMPT_INDEX) ==
> > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> > index 3a69b367e565..518f7b3db857 100644
> > --- a/drivers/gpu/drm/i915/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/intel_lrc.c
> > @@ -138,6 +138,7 @@
> >   #include "i915_drv.h"
> >   #include "i915_gem_render_state.h"
> >   #include "intel_lrc_reg.h"
> > +#include "intel_gt_pm.h"
> >   #include "intel_mocs.h"
> >   
> >   #define RING_EXECLIST_QFULL         (1 << 0x2)
> > @@ -535,6 +536,12 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
> >       execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
> >   }
> >   
> > +static void update_rps(struct intel_engine_cs *engine)
> > +{
> > +     intel_rps_update_engine(engine,
> > +                             port_request(engine->execlists.port)->ctx);
> > +}
> > +
> >   static void execlists_dequeue(struct intel_engine_cs *engine)
> >   {
> >       struct intel_engine_execlists * const execlists = &engine->execlists;
> > @@ -708,6 +715,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> >       spin_unlock_irq(&engine->timeline->lock);
> >   
> >       if (submit) {
> > +             update_rps(engine);
> >               execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
> >               execlists_submit_ports(engine);
> >       }
> > @@ -982,6 +990,11 @@ static void execlists_submission_tasklet(unsigned long data)
> >                                         engine->name, port->context_id);
> >   
> >                               execlists_port_complete(execlists, port);
> > +
> > +                             /* Switch to the next request/context */
> > +                             rq = port_request(port);
> > +                             intel_rps_update_engine(engine,
> > +                                                     rq ? rq->ctx : NULL);
> >                       } else {
> >                               port_set(port, port_pack(rq, count));
> >                       }
> > @@ -1717,6 +1730,8 @@ static void reset_common_ring(struct intel_engine_cs *engine,
> >       __unwind_incomplete_requests(engine);
> >       spin_unlock(&engine->timeline->lock);
> >   
> > +     intel_rps_update_engine(engine, NULL);
> > +
> >       /* Mark all CS interrupts as complete */
> >       execlists->active = 0;
> >   
> > diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
> > index 9a48aa441743..85b6e6d020b7 100644
> > --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
> > +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
> > @@ -14,6 +14,7 @@ selftest(fence, i915_sw_fence_mock_selftests)
> >   selftest(scatterlist, scatterlist_mock_selftests)
> >   selftest(syncmap, i915_syncmap_mock_selftests)
> >   selftest(uncore, intel_uncore_mock_selftests)
> > +selftest(gt_pm, intel_gt_pm_mock_selftests)
> >   selftest(breadcrumbs, intel_breadcrumbs_mock_selftests)
> >   selftest(timelines, i915_gem_timeline_mock_selftests)
> >   selftest(requests, i915_request_mock_selftests)
> > diff --git a/drivers/gpu/drm/i915/selftests/intel_gt_pm.c b/drivers/gpu/drm/i915/selftests/intel_gt_pm.c
> > new file mode 100644
> > index 000000000000..c3871eb9eabb
> > --- /dev/null
> > +++ b/drivers/gpu/drm/i915/selftests/intel_gt_pm.c
> > @@ -0,0 +1,130 @@
> > +/*
> > + * SPDX-License-Identifier: MIT
> > + *
> > + * Copyright © 2018 Intel Corporation
> > + */
> > +
> > +#include "../i915_selftest.h"
> > +#include "i915_random.h"
> > +
> > +#include "mock_gem_device.h"
> > +
> > +static void mock_rps_init(struct drm_i915_private *i915)
> > +{
> > +     struct intel_rps *rps = &i915->gt_pm.rps;
> > +
> > +     /* Disable the register writes */
> > +     mkwrite_device_info(i915)->gen = 0;
> > +     mkwrite_device_info(i915)->has_rps = true;
> > +
> > +     intel_rps_init(rps);
> > +
> > +     rps->min_freq_hw = 0;
> > +     rps->max_freq_hw = 255;
> > +
> > +     rps->min_freq_user = rps->min_freq_hw;
> > +     rps->max_freq_user = rps->max_freq_hw;
> > +
> > +     intel_rps_init__frequencies(rps);
> > +}
> > +
> > +static void mock_rps_fini(struct drm_i915_private *i915)
> > +{
> > +     struct intel_rps *rps = &i915->gt_pm.rps;
> > +
> > +     cancel_work_sync(&rps->work);
> > +}
> > +
> > +static int igt_rps_engine(void *arg)
> > +{
> > +     struct drm_i915_private *i915 = arg;
> > +     struct intel_rps *rps = &i915->gt_pm.rps;
> > +     I915_RND_STATE(prng);
> > +     int err;
> > +     int i;
> > +
> > +     intel_gt_pm_busy(i915); /* Activate RPS */
> > +
> > +     /*
> > +      * Minimum unit tests for intel_rps_update_engine().
> > +      *
> > +      * Whenever we call intel_rps_update_engine, it will
> > +      * replace the context min/max frequency request for a particular
> > +      * engine and then recompute the global max(min)/min(max) over all
> > +      * engines. In this mockup, we are limited to checking those
> > +      * max(min)/min(max) calculations and then seeing if the rps
> > +      * worker uses those bounds.
> > +      */
> > +
> > +     for (i = 0; i < 256 * 256; i++) {
> > +             u8 freq = prandom_u32_state(&prng);
> > +
> > +             __rps_update_engine(rps, 0, freq, freq);
> > +             if (rps->min_freq_context != freq ||
> > +                 rps->max_freq_context != freq) {
> > +                     pr_err("Context min/max frequencies not restricted to %d, found [%d, %d]\n",
> > +                            freq, rps->min_freq_context, rps->max_freq_context);
> > +                     err = -EINVAL;
> > +                     goto out;
> > +             }
> > +             flush_work(&rps->work);
> > +
> > +             if (rps->freq != freq) {
> > +                     pr_err("Tried to restrict frequency to %d, found %d\n",
> > +                            freq, rps->freq);
> > +                     err = -EINVAL;
> > +                     goto out;
> > +             }
> > +     }
> > +
> > +     __rps_update_engine(rps, 0, rps->min_freq_hw, rps->max_freq_hw);
> > +     if (rps->min_freq_context != rps->min_freq_hw ||
> > +         rps->max_freq_context != rps->max_freq_hw) {
> > +             pr_err("Context frequency not restored to [%d, %d], found [%d, %d]\n",
> > +                    rps->min_freq_hw, rps->min_freq_hw,
> > +                    rps->min_freq_context, rps->max_freq_context);
> > +             err = -EINVAL;
> > +             goto out;
> > +     }
> > +
> > +     for (i = 0; i < I915_NUM_ENGINES; i++)
> > +             __rps_update_engine(rps, i, i, 255 - i);
> > +     i--;
> > +     if (rps->min_freq_context != i) {
> > +             pr_err("Minimum context frequency across all engines not raised to %d, found %d\n", i, rps->min_freq_context);
> > +             err = -EINVAL;
> > +             goto out;
> > +     }
> > +     if (rps->max_freq_context != 255 - i) {
> > +             pr_err("Maxmimum context frequency across all engines not lowered to %d, found %d\n", 255 - i, rps->max_freq_context);
> > +             err = -EINVAL;
> > +             goto out;
> > +     }
> > +
> > +     err = 0;
> > +out:
> > +     intel_gt_pm_idle(i915);
> > +     return err;
> > +}
> > +
> > +int intel_gt_pm_mock_selftests(void)
> > +{
> > +     static const struct i915_subtest tests[] = {
> > +             SUBTEST(igt_rps_engine),
> > +     };
> > +     struct drm_i915_private *i915;
> > +     int err;
> > +
> > +     i915 = mock_gem_device();
> > +     if (!i915)
> > +             return -ENOMEM;
> > +
> > +     mock_rps_init(i915);
> > +
> > +     err = i915_subtests(tests, i915);
> > +
> > +     mock_rps_fini(i915);
> > +     drm_dev_unref(&i915->drm);
> > +
> > +     return err;
> > +}
> > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> > index 7f5634ce8e88..64c6377df769 100644
> > --- a/include/uapi/drm/i915_drm.h
> > +++ b/include/uapi/drm/i915_drm.h
> > @@ -1456,6 +1456,26 @@ struct drm_i915_gem_context_param {
> >   #define   I915_CONTEXT_MAX_USER_PRIORITY    1023 /* inclusive */
> >   #define   I915_CONTEXT_DEFAULT_PRIORITY             0
> >   #define   I915_CONTEXT_MIN_USER_PRIORITY    -1023 /* inclusive */
> > +
> > +/*
> > + * I915_CONTEXT_PARAM_FREQUENCY:
> > + *
> > + * Request that when this context runs, the GPU is restricted to run
> > + * in this frequency range; but still contrained by the global user
> > + * restriction specified via sysfs.
> > + *
> > + * The minimum / maximum frequencies are specified in MHz. Each context
> > + * starts in the default unrestricted state, where the range is taken from
> > + * the hardware, and so may be queried.
> > + *
> > + * Note the frequency is only changed on a context switch; if the
> > + * context's frequency is updated whilst the context is currently executing
> > + * the request will not take effect until the next time the context is run.
> > + */
> > +#define I915_CONTEXT_PARAM_FREQUENCY 0x7
> > +#define   I915_CONTEXT_MIN_FREQUENCY(x) ((x) & 0xffffffff)
> > +#define   I915_CONTEXT_MAX_FREQUENCY(x) ((x) >> 32)
> > +#define   I915_CONTEXT_SET_FREQUENCY(min, max) ((__u64)(max) << 32 | (min))
> >       __u64 value;
> >   };
> >   
> 
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Lionel Landwerlin Nov. 16, 2018, 11:22 a.m. UTC | #5
On 16/11/2018 11:14, Joonas Lahtinen wrote:
> Quoting Lionel Landwerlin (2018-11-09 19:51:17)
>> I think we have some interest in reviving this for the performance query
>> use case.
> How are performance queries related?


People want performance measured at a given frequency (usually max).


>
> Regards, Joonas
>
>> Is that on anybody's todo list?
>>
>> Thanks,
>>
>> -
>> Lionel
>>
>> On 14/03/2018 09:37, Chris Wilson wrote:
>>> Often, we find ourselves facing a workload where the user knows in
>>> advance what GPU frequency they require for it to complete in a timely
>>> manner, and using past experience they can outperform the HW assisted
>>> RPS autotuning. An example might be kodi (HTPC) where they know that
>>> video decoding and compositing require a minimum frequency to avoid ever
>>> dropping a frame, or conversely know when they are in a powersaving mode
>>> and would rather have slower updates than ramp up the GPU frequency and
>>> power consumption. Other workloads may defeat the autotuning entirely
>>> and need manual control to meet their performance goals, e.g. bursty
>>> applications which require low latency.
>>>
>>> To accommodate the varying needs of different applications, that may be
>>> running concurrently, we want a more flexible system than a global limit
>>> supplied by sysfs. To this end, we offer the application the option to
>>> set their desired frequency bounds on the context itself, and apply those
>>> bounds when we execute commands from the application, switching between
>>> bounds just as easily as we switch between the clients themselves.
>>>
>>> The clients can query the range supported by the HW, or at least the
>>> range they are restricted to, and then freely select frequencies within
>>> that range that they want to run at. (They can select just a single
>>> frequency if they so choose.) As this is subject to the global limit
>>> supplied by the user in sysfs, and a client can only reduce the range of
>>> frequencies they allow the HW to run at, we allow all clients to adjust
>>> their request (and not restrict raising the minimum to privileged
>>> CAP_SYS_NICE clients).
>>>
>>> Testcase: igt/gem_ctx_freq
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> Cc: Praveen Paneri <praveen.paneri@intel.com>
>>> Cc: Sagar A Kamble <sagar.a.kamble@intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/i915_debugfs.c                |  16 ++-
>>>    drivers/gpu/drm/i915/i915_drv.h                    |   5 +
>>>    drivers/gpu/drm/i915/i915_gem_context.c            |  54 +++++++++
>>>    drivers/gpu/drm/i915/i915_gem_context.h            |   3 +
>>>    drivers/gpu/drm/i915/intel_gt_pm.c                 | 121 ++++++++++++++++---
>>>    drivers/gpu/drm/i915/intel_gt_pm.h                 |   4 +
>>>    drivers/gpu/drm/i915/intel_guc_submission.c        |  16 ++-
>>>    drivers/gpu/drm/i915/intel_lrc.c                   |  15 +++
>>>    .../gpu/drm/i915/selftests/i915_mock_selftests.h   |   1 +
>>>    drivers/gpu/drm/i915/selftests/intel_gt_pm.c       | 130 +++++++++++++++++++++
>>>    include/uapi/drm/i915_drm.h                        |  20 ++++
>>>    11 files changed, 368 insertions(+), 17 deletions(-)
>>>    create mode 100644 drivers/gpu/drm/i915/selftests/intel_gt_pm.c
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
>>> index 7c7afdac8c8c..a21b9164ade8 100644
>>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>>> @@ -2191,6 +2191,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
>>>        struct drm_device *dev = &dev_priv->drm;
>>>        struct intel_rps *rps = &dev_priv->gt_pm.rps;
>>>        struct drm_file *file;
>>> +     int n;
>>>    
>>>        seq_printf(m, "GPU busy? %s [%d requests]\n",
>>>                   yesno(dev_priv->gt.awake), dev_priv->gt.active_requests);
>>> @@ -2198,17 +2199,30 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
>>>        seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv));
>>>        seq_printf(m, "Boosts outstanding? %d\n",
>>>                   atomic_read(&rps->num_waiters));
>>> +     seq_printf(m, "Worker pending? %s\n", yesno(work_busy(&rps->work)));
>>>        seq_printf(m, "Frequency requested %d [%d, %d]\n",
>>>                   intel_gpu_freq(dev_priv, rps->freq),
>>>                   intel_gpu_freq(dev_priv, rps->min),
>>>                   intel_gpu_freq(dev_priv, rps->max));
>>> -     seq_printf(m, "  min hard:%d, soft:%d user:%d; max user:%d, soft: %d hard:%d\n",
>>> +     seq_printf(m, "  min hard:%d, soft:%d, ctx:%d, user:%d; max user:%d, ctx:%d, soft:%d, hard:%d\n",
>>>                   intel_gpu_freq(dev_priv, rps->min_freq_hw),
>>>                   intel_gpu_freq(dev_priv, rps->min_freq_soft),
>>> +                intel_gpu_freq(dev_priv, rps->min_freq_context),
>>>                   intel_gpu_freq(dev_priv, rps->min_freq_user),
>>>                   intel_gpu_freq(dev_priv, rps->max_freq_user),
>>> +                intel_gpu_freq(dev_priv, rps->max_freq_context),
>>>                   intel_gpu_freq(dev_priv, rps->max_freq_soft),
>>>                   intel_gpu_freq(dev_priv, rps->max_freq_hw));
>>> +     seq_printf(m, "  engines min: [");
>>> +     for (n = 0; n < ARRAY_SIZE(rps->min_freq_engine); n++)
>>> +             seq_printf(m, "%s%d", n ? ", " : "",
>>> +                        intel_gpu_freq(dev_priv, rps->min_freq_engine[n]));
>>> +     seq_printf(m, "]\n  engines max: [");
>>> +     for (n = 0; n < ARRAY_SIZE(rps->max_freq_engine); n++)
>>> +             seq_printf(m, "%s%d", n ? ", " : "",
>>> +                        intel_gpu_freq(dev_priv, rps->max_freq_engine[n]));
>>> +     seq_printf(m, "]\n");
>>> +
>>>        seq_printf(m, "  idle:%d, efficient:%d, boost:%d\n",
>>>                   intel_gpu_freq(dev_priv, rps->idle_freq),
>>>                   intel_gpu_freq(dev_priv, rps->efficient_freq),
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>>> index 82e9a58bd65f..d754d44cfbc2 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>> @@ -731,6 +731,7 @@ struct intel_rps_ei {
>>>    
>>>    struct intel_rps {
>>>        struct mutex lock;
>>> +     spinlock_t engine_lock; /* protects updates to min/max_freq_context */
>>>        struct work_struct work;
>>>    
>>>        bool active;
>>> @@ -763,6 +764,10 @@ struct intel_rps {
>>>        u8 max_freq_user;       /* Max frequency permitted by the driver */
>>>        u8 min_freq_soft;
>>>        u8 max_freq_soft;
>>> +     u8 min_freq_context;    /* Min frequency permitted by the context */
>>> +     u8 max_freq_context;    /* Max frequency permitted by the context */
>>> +     u8 min_freq_engine[I915_NUM_ENGINES];
>>> +     u8 max_freq_engine[I915_NUM_ENGINES];
>>>    
>>>        u8 idle_freq;           /* Frequency to request when we are idle */
>>>        u8 efficient_freq;      /* AKA RPe. Pre-determined balanced frequency */
>>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
>>> index 65bf92658d92..1d36e2a02479 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_context.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
>>> @@ -88,8 +88,10 @@
>>>    #include <linux/log2.h>
>>>    #include <drm/drmP.h>
>>>    #include <drm/i915_drm.h>
>>> +
>>>    #include "i915_drv.h"
>>>    #include "i915_trace.h"
>>> +#include "intel_gt_pm.h"
>>>    
>>>    #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
>>>    
>>> @@ -281,6 +283,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
>>>        list_add_tail(&ctx->link, &dev_priv->contexts.list);
>>>        ctx->i915 = dev_priv;
>>>        ctx->priority = I915_PRIORITY_NORMAL;
>>> +     ctx->min_freq = dev_priv->gt_pm.rps.min_freq_hw;
>>> +     ctx->max_freq = dev_priv->gt_pm.rps.max_freq_hw;
>>>    
>>>        INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
>>>        INIT_LIST_HEAD(&ctx->handles_list);
>>> @@ -715,6 +719,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
>>>    {
>>>        struct drm_i915_file_private *file_priv = file->driver_priv;
>>>        struct drm_i915_gem_context_param *args = data;
>>> +     struct drm_i915_private *i915 = to_i915(dev);
>>>        struct i915_gem_context *ctx;
>>>        int ret = 0;
>>>    
>>> @@ -747,6 +752,19 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
>>>        case I915_CONTEXT_PARAM_PRIORITY:
>>>                args->value = ctx->priority;
>>>                break;
>>> +     case I915_CONTEXT_PARAM_FREQUENCY:
>>> +             if (!HAS_RPS(i915) || !HAS_EXECLISTS(i915)) {
>>> +                     ret = -ENODEV;
>>> +             } else if (args->size) {
>>> +                     ret = -EINVAL;
>>> +             } else {
>>> +                     u32 min = intel_gpu_freq(i915, ctx->min_freq);
>>> +                     u32 max = intel_gpu_freq(i915, ctx->max_freq);
>>> +
>>> +                     args->value = I915_CONTEXT_SET_FREQUENCY(min, max);
>>> +             }
>>> +             break;
>>> +
>>>        default:
>>>                ret = -EINVAL;
>>>                break;
>>> @@ -761,6 +779,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
>>>    {
>>>        struct drm_i915_file_private *file_priv = file->driver_priv;
>>>        struct drm_i915_gem_context_param *args = data;
>>> +     struct drm_i915_private *i915 = to_i915(dev);
>>>        struct i915_gem_context *ctx;
>>>        int ret;
>>>    
>>> @@ -821,6 +840,41 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
>>>                                ctx->priority = priority;
>>>                }
>>>                break;
>>> +     case I915_CONTEXT_PARAM_FREQUENCY:
>>> +             if (!HAS_RPS(i915) || !HAS_EXECLISTS(i915)) {
>>> +                     ret = -ENODEV;
>>> +             } else if (args->size) {
>>> +                     ret = -EINVAL;
>>> +             } else {
>>> +                     struct intel_rps *rps = &i915->gt_pm.rps;
>>> +                     u32 min, max;
>>> +
>>> +                     min = I915_CONTEXT_MIN_FREQUENCY(args->value);
>>> +                     min = intel_freq_opcode(i915, min);
>>> +
>>> +                     max = I915_CONTEXT_MAX_FREQUENCY(args->value);
>>> +                     max = intel_freq_opcode(i915, max);
>>> +
>>> +                     /*
>>> +                      * As we constrain the frequency request from the
>>> +                      * context (application) by the sysadmin imposed limits,
>>> +                      * it is reasonable to allow the application to
>>> +                      * specify its preferred range within those limits.
>>> +                      * That is we do not need to restrict requesting
>>> +                      * a higher frequency to privileged (CAP_SYS_NICE)
>>> +                      * processes.
>>> +                      */
>>> +                     if (max < min) {
>>> +                             ret = -EINVAL;
>>> +                     } else if (min < rps->min_freq_hw ||
>>> +                                max > rps->max_freq_hw) {
>>> +                             ret = -EINVAL;
>>> +                     } else {
>>> +                             ctx->min_freq = min;
>>> +                             ctx->max_freq = max;
>>> +                     }
>>> +             }
>>> +             break;
>>>    
>>>        default:
>>>                ret = -EINVAL;
>>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
>>> index 7854262ddfd9..98f7b71a787a 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_context.h
>>> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
>>> @@ -150,6 +150,9 @@ struct i915_gem_context {
>>>         */
>>>        int priority;
>>>    
>>> +     u32 min_freq;
>>> +     u32 max_freq;
>>> +
>>>        /** ggtt_offset_bias: placement restriction for context objects */
>>>        u32 ggtt_offset_bias;
>>>    
>>> diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
>>> index 9705205a26b5..4bbfb4080f8f 100644
>>> --- a/drivers/gpu/drm/i915/intel_gt_pm.c
>>> +++ b/drivers/gpu/drm/i915/intel_gt_pm.c
>>> @@ -402,10 +402,10 @@ static int adjust_rps(struct drm_i915_private *dev_priv, int freq, int adj)
>>>        GEM_BUG_ON(!rps->active);
>>>    
>>>        min = clamp_t(int,
>>> -                   rps->min_freq_soft,
>>> +                   max(rps->min_freq_soft, READ_ONCE(rps->min_freq_context)),
>>>                      rps->min_freq_user, rps->max_freq_user);
>>>        max = clamp_t(int,
>>> -                   rps->max_freq_soft,
>>> +                   min(rps->max_freq_soft, READ_ONCE(rps->max_freq_context)),
>>>                      min, rps->max_freq_user);
>>>        if (atomic_read(&rps->num_waiters) && max < rps->boost_freq)
>>>                max = rps->boost_freq;
>>> @@ -809,6 +809,75 @@ void intel_rps_boost(struct i915_request *rq, struct intel_rps_client *client)
>>>        atomic_inc(client ? &client->boosts : &rps->boosts);
>>>    }
>>>    
>>> +static void __rps_update_engine(struct intel_rps *rps,
>>> +                             enum intel_engine_id idx,
>>> +                             u32 min, u32 max)
>>> +{
>>> +     unsigned long flags;
>>> +     bool update = false;
>>> +     u32 old;
>>> +     int n;
>>> +
>>> +     GEM_BUG_ON(min > max);
>>> +
>>> +     if (rps->min_freq_engine[idx] != min) {
>>> +             spin_lock_irqsave(&rps->engine_lock, flags);
>>> +
>>> +             rps->min_freq_engine[idx] = min;
>>> +
>>> +             old = rps->min_freq_context;
>>> +             rps->min_freq_context = rps->min_freq_engine[0];
>>> +             for (n = 1; n < ARRAY_SIZE(rps->min_freq_engine); n++)
>>> +                     if (rps->min_freq_engine[n] > rps->min_freq_context)
>>> +                             rps->min_freq_context = rps->min_freq_engine[n];
>>> +             update |= rps->min_freq_context != old;
>>> +
>>> +             spin_unlock_irqrestore(&rps->engine_lock, flags);
>>> +     }
>>> +
>>> +     if (rps->max_freq_engine[idx] != max) {
>>> +             spin_lock_irqsave(&rps->engine_lock, flags);
>>> +
>>> +             rps->max_freq_engine[idx] = max;
>>> +
>>> +             old = rps->max_freq_context;
>>> +             rps->max_freq_context = rps->max_freq_engine[0];
>>> +             for (n = 1; n < ARRAY_SIZE(rps->max_freq_engine); n++)
>>> +                     if (rps->max_freq_engine[n] < rps->max_freq_context)
>>> +                             rps->max_freq_context = rps->max_freq_engine[n];
>>> +             update |= rps->max_freq_context != old;
>>> +
>>> +             spin_unlock_irqrestore(&rps->engine_lock, flags);
>>> +     }
>>> +
>>> +     /* Kick the RPS worker to apply the updated constraints, as needed */
>>> +     if (update && !atomic_read(&rps->num_waiters)) {
>>> +             old = READ_ONCE(rps->freq);
>>> +             if ((old < min || old > max))
>>> +                     schedule_work(&rps->work);
>>> +     }
>>> +}
>>> +
>>> +void intel_rps_update_engine(const struct intel_engine_cs *engine,
>>> +                          const struct i915_gem_context *ctx)
>>> +{
>>> +     struct intel_rps *rps = &engine->i915->gt_pm.rps;
>>> +     u32 min, max;
>>> +
>>> +     if (!HAS_RPS(engine->i915))
>>> +             return;
>>> +
>>> +     if (ctx) {
>>> +             min = ctx->min_freq;
>>> +             max = ctx->max_freq;
>>> +     } else {
>>> +             min = rps->min_freq_hw;
>>> +             max = rps->max_freq_hw;
>>> +     }
>>> +
>>> +     __rps_update_engine(rps, engine->id, min, max);
>>> +}
>>> +
>>>    static void gen9_disable_rc6(struct drm_i915_private *dev_priv)
>>>    {
>>>        I915_WRITE(GEN6_RC_CONTROL, 0);
>>> @@ -2379,12 +2448,41 @@ void intel_gt_pm_sanitize(struct drm_i915_private *dev_priv)
>>>                gen6_reset_rps_interrupts(dev_priv);
>>>    }
>>>    
>>> +static void intel_rps_init(struct intel_rps *rps)
>>> +{
>>> +     mutex_init(&rps->lock);
>>> +     INIT_WORK(&rps->work, intel_rps_work);
>>> +     spin_lock_init(&rps->engine_lock);
>>> +}
>>> +
>>> +static void intel_rps_init__frequencies(struct intel_rps *rps)
>>> +{
>>> +     int n;
>>> +
>>> +     rps->max_freq_soft = rps->max_freq_hw;
>>> +     rps->min_freq_soft = rps->min_freq_hw;
>>> +
>>> +     rps->max_freq_context = rps->max_freq_hw;
>>> +     rps->min_freq_context = rps->min_freq_hw;
>>> +     for (n = 0; n < ARRAY_SIZE(rps->max_freq_engine); n++) {
>>> +             rps->max_freq_engine[n] = rps->max_freq_hw;
>>> +             rps->min_freq_engine[n] = rps->min_freq_hw;
>>> +     }
>>> +
>>> +     /* Finally allow us to boost to max by default */
>>> +     rps->boost_freq = rps->max_freq_hw;
>>> +     rps->idle_freq = rps->min_freq_hw;
>>> +
>>> +     rps->freq = rps->idle_freq;
>>> +     rps->min = rps->min_freq_hw;
>>> +     rps->max = rps->max_freq_hw;
>>> +}
>>> +
>>>    void intel_gt_pm_init(struct drm_i915_private *dev_priv)
>>>    {
>>>        struct intel_rps *rps = &dev_priv->gt_pm.rps;
>>>    
>>> -     mutex_init(&rps->lock);
>>> -     INIT_WORK(&rps->work, intel_rps_work);
>>> +     intel_rps_init(rps);
>>>    
>>>        if (HAS_GUC_SCHED(dev_priv))
>>>                rps->guc_events = GEN9_GUC_TO_HOST_INT_EVENT;
>>> @@ -2449,16 +2547,7 @@ void intel_gt_pm_init(struct drm_i915_private *dev_priv)
>>>                }
>>>        }
>>>    
>>> -     rps->max_freq_soft = rps->max_freq_hw;
>>> -     rps->min_freq_soft = rps->min_freq_hw;
>>> -
>>> -     /* Finally allow us to boost to max by default */
>>> -     rps->boost_freq = rps->max_freq_hw;
>>> -     rps->idle_freq = rps->min_freq_hw;
>>> -
>>> -     rps->freq = rps->idle_freq;
>>> -     rps->min = rps->min_freq_hw;
>>> -     rps->max = rps->max_freq_hw;
>>> +     intel_rps_init__frequencies(rps);
>>>    
>>>        if (HAS_LLC(dev_priv))
>>>                gen6_update_ring_freq(dev_priv);
>>> @@ -2703,3 +2792,7 @@ void gen9_disable_guc_interrupts(struct drm_i915_private *dev_priv)
>>>    
>>>        gen9_reset_guc_interrupts(dev_priv);
>>>    }
>>> +
>>> +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
>>> +#include "selftests/intel_gt_pm.c"
>>> +#endif
>>> diff --git a/drivers/gpu/drm/i915/intel_gt_pm.h b/drivers/gpu/drm/i915/intel_gt_pm.h
>>> index 314912c15126..ef3f27eca529 100644
>>> --- a/drivers/gpu/drm/i915/intel_gt_pm.h
>>> +++ b/drivers/gpu/drm/i915/intel_gt_pm.h
>>> @@ -25,7 +25,9 @@
>>>    #define __INTEL_GT_PM_H__
>>>    
>>>    struct drm_i915_private;
>>> +struct i915_gem_context;
>>>    struct i915_request;
>>> +struct intel_engine_cs;
>>>    struct intel_rps_client;
>>>    
>>>    void intel_gpu_ips_init(struct drm_i915_private *dev_priv);
>>> @@ -47,6 +49,8 @@ void intel_gt_pm_idle(struct drm_i915_private *dev_priv);
>>>    
>>>    void intel_gt_pm_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir);
>>>    
>>> +void intel_rps_update_engine(const struct intel_engine_cs *engine,
>>> +                          const struct i915_gem_context *ctx);
>>>    void intel_rps_boost(struct i915_request *rq, struct intel_rps_client *rps);
>>>    
>>>    int intel_gpu_freq(struct drm_i915_private *dev_priv, int val);
>>> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
>>> index 8a8ad2fe158d..d8eaae683186 100644
>>> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
>>> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
>>> @@ -26,9 +26,12 @@
>>>    #include <trace/events/dma_fence.h>
>>>    
>>>    #include "intel_guc_submission.h"
>>> -#include "intel_lrc_reg.h"
>>> +
>>>    #include "i915_drv.h"
>>>    
>>> +#include "intel_gt_pm.h"
>>> +#include "intel_lrc_reg.h"
>>> +
>>>    #define GUC_PREEMPT_FINISHED                0x1
>>>    #define GUC_PREEMPT_BREADCRUMB_DWORDS       0x8
>>>    #define GUC_PREEMPT_BREADCRUMB_BYTES        \
>>> @@ -650,6 +653,12 @@ static void guc_submit(struct intel_engine_cs *engine)
>>>        }
>>>    }
>>>    
>>> +static void update_rps(struct intel_engine_cs *engine)
>>> +{
>>> +     intel_rps_update_engine(engine,
>>> +                             port_request(engine->execlists.port)->ctx);
>>> +}
>>> +
>>>    static void port_assign(struct execlist_port *port, struct i915_request *rq)
>>>    {
>>>        GEM_BUG_ON(port_isset(port));
>>> @@ -728,6 +737,7 @@ static void guc_dequeue(struct intel_engine_cs *engine)
>>>        execlists->first = rb;
>>>        if (submit) {
>>>                port_assign(port, last);
>>> +             update_rps(engine);
>>>                execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
>>>                guc_submit(engine);
>>>        }
>>> @@ -757,8 +767,10 @@ static void guc_submission_tasklet(unsigned long data)
>>>    
>>>                rq = port_request(&port[0]);
>>>        }
>>> -     if (!rq)
>>> +     if (!rq) {
>>>                execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
>>> +             intel_rps_update_engine(engine, NULL);
>>> +     }
>>>    
>>>        if (execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT) &&
>>>            intel_read_status_page(engine, I915_GEM_HWS_PREEMPT_INDEX) ==
>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>>> index 3a69b367e565..518f7b3db857 100644
>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>> @@ -138,6 +138,7 @@
>>>    #include "i915_drv.h"
>>>    #include "i915_gem_render_state.h"
>>>    #include "intel_lrc_reg.h"
>>> +#include "intel_gt_pm.h"
>>>    #include "intel_mocs.h"
>>>    
>>>    #define RING_EXECLIST_QFULL         (1 << 0x2)
>>> @@ -535,6 +536,12 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
>>>        execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
>>>    }
>>>    
>>> +static void update_rps(struct intel_engine_cs *engine)
>>> +{
>>> +     intel_rps_update_engine(engine,
>>> +                             port_request(engine->execlists.port)->ctx);
>>> +}
>>> +
>>>    static void execlists_dequeue(struct intel_engine_cs *engine)
>>>    {
>>>        struct intel_engine_execlists * const execlists = &engine->execlists;
>>> @@ -708,6 +715,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>>>        spin_unlock_irq(&engine->timeline->lock);
>>>    
>>>        if (submit) {
>>> +             update_rps(engine);
>>>                execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
>>>                execlists_submit_ports(engine);
>>>        }
>>> @@ -982,6 +990,11 @@ static void execlists_submission_tasklet(unsigned long data)
>>>                                          engine->name, port->context_id);
>>>    
>>>                                execlists_port_complete(execlists, port);
>>> +
>>> +                             /* Switch to the next request/context */
>>> +                             rq = port_request(port);
>>> +                             intel_rps_update_engine(engine,
>>> +                                                     rq ? rq->ctx : NULL);
>>>                        } else {
>>>                                port_set(port, port_pack(rq, count));
>>>                        }
>>> @@ -1717,6 +1730,8 @@ static void reset_common_ring(struct intel_engine_cs *engine,
>>>        __unwind_incomplete_requests(engine);
>>>        spin_unlock(&engine->timeline->lock);
>>>    
>>> +     intel_rps_update_engine(engine, NULL);
>>> +
>>>        /* Mark all CS interrupts as complete */
>>>        execlists->active = 0;
>>>    
>>> diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
>>> index 9a48aa441743..85b6e6d020b7 100644
>>> --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
>>> +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
>>> @@ -14,6 +14,7 @@ selftest(fence, i915_sw_fence_mock_selftests)
>>>    selftest(scatterlist, scatterlist_mock_selftests)
>>>    selftest(syncmap, i915_syncmap_mock_selftests)
>>>    selftest(uncore, intel_uncore_mock_selftests)
>>> +selftest(gt_pm, intel_gt_pm_mock_selftests)
>>>    selftest(breadcrumbs, intel_breadcrumbs_mock_selftests)
>>>    selftest(timelines, i915_gem_timeline_mock_selftests)
>>>    selftest(requests, i915_request_mock_selftests)
>>> diff --git a/drivers/gpu/drm/i915/selftests/intel_gt_pm.c b/drivers/gpu/drm/i915/selftests/intel_gt_pm.c
>>> new file mode 100644
>>> index 000000000000..c3871eb9eabb
>>> --- /dev/null
>>> +++ b/drivers/gpu/drm/i915/selftests/intel_gt_pm.c
>>> @@ -0,0 +1,130 @@
>>> +/*
>>> + * SPDX-License-Identifier: MIT
>>> + *
>>> + * Copyright © 2018 Intel Corporation
>>> + */
>>> +
>>> +#include "../i915_selftest.h"
>>> +#include "i915_random.h"
>>> +
>>> +#include "mock_gem_device.h"
>>> +
>>> +static void mock_rps_init(struct drm_i915_private *i915)
>>> +{
>>> +     struct intel_rps *rps = &i915->gt_pm.rps;
>>> +
>>> +     /* Disable the register writes */
>>> +     mkwrite_device_info(i915)->gen = 0;
>>> +     mkwrite_device_info(i915)->has_rps = true;
>>> +
>>> +     intel_rps_init(rps);
>>> +
>>> +     rps->min_freq_hw = 0;
>>> +     rps->max_freq_hw = 255;
>>> +
>>> +     rps->min_freq_user = rps->min_freq_hw;
>>> +     rps->max_freq_user = rps->max_freq_hw;
>>> +
>>> +     intel_rps_init__frequencies(rps);
>>> +}
>>> +
>>> +static void mock_rps_fini(struct drm_i915_private *i915)
>>> +{
>>> +     struct intel_rps *rps = &i915->gt_pm.rps;
>>> +
>>> +     cancel_work_sync(&rps->work);
>>> +}
>>> +
>>> +static int igt_rps_engine(void *arg)
>>> +{
>>> +     struct drm_i915_private *i915 = arg;
>>> +     struct intel_rps *rps = &i915->gt_pm.rps;
>>> +     I915_RND_STATE(prng);
>>> +     int err;
>>> +     int i;
>>> +
>>> +     intel_gt_pm_busy(i915); /* Activate RPS */
>>> +
>>> +     /*
>>> +      * Minimum unit tests for intel_rps_update_engine().
>>> +      *
>>> +      * Whenever we call intel_rps_update_engine, it will
>>> +      * replace the context min/max frequency request for a particular
>>> +      * engine and then recompute the global max(min)/min(max) over all
>>> +      * engines. In this mockup, we are limited to checking those
>>> +      * max(min)/min(max) calculations and then seeing if the rps
>>> +      * worker uses those bounds.
>>> +      */
>>> +
>>> +     for (i = 0; i < 256 * 256; i++) {
>>> +             u8 freq = prandom_u32_state(&prng);
>>> +
>>> +             __rps_update_engine(rps, 0, freq, freq);
>>> +             if (rps->min_freq_context != freq ||
>>> +                 rps->max_freq_context != freq) {
>>> +                     pr_err("Context min/max frequencies not restricted to %d, found [%d, %d]\n",
>>> +                            freq, rps->min_freq_context, rps->max_freq_context);
>>> +                     err = -EINVAL;
>>> +                     goto out;
>>> +             }
>>> +             flush_work(&rps->work);
>>> +
>>> +             if (rps->freq != freq) {
>>> +                     pr_err("Tried to restrict frequency to %d, found %d\n",
>>> +                            freq, rps->freq);
>>> +                     err = -EINVAL;
>>> +                     goto out;
>>> +             }
>>> +     }
>>> +
>>> +     __rps_update_engine(rps, 0, rps->min_freq_hw, rps->max_freq_hw);
>>> +     if (rps->min_freq_context != rps->min_freq_hw ||
>>> +         rps->max_freq_context != rps->max_freq_hw) {
>>> +             pr_err("Context frequency not restored to [%d, %d], found [%d, %d]\n",
>>> +                    rps->min_freq_hw, rps->min_freq_hw,
>>> +                    rps->min_freq_context, rps->max_freq_context);
>>> +             err = -EINVAL;
>>> +             goto out;
>>> +     }
>>> +
>>> +     for (i = 0; i < I915_NUM_ENGINES; i++)
>>> +             __rps_update_engine(rps, i, i, 255 - i);
>>> +     i--;
>>> +     if (rps->min_freq_context != i) {
>>> +             pr_err("Minimum context frequency across all engines not raised to %d, found %d\n", i, rps->min_freq_context);
>>> +             err = -EINVAL;
>>> +             goto out;
>>> +     }
>>> +     if (rps->max_freq_context != 255 - i) {
>>> +             pr_err("Maxmimum context frequency across all engines not lowered to %d, found %d\n", 255 - i, rps->max_freq_context);
>>> +             err = -EINVAL;
>>> +             goto out;
>>> +     }
>>> +
>>> +     err = 0;
>>> +out:
>>> +     intel_gt_pm_idle(i915);
>>> +     return err;
>>> +}
>>> +
>>> +int intel_gt_pm_mock_selftests(void)
>>> +{
>>> +     static const struct i915_subtest tests[] = {
>>> +             SUBTEST(igt_rps_engine),
>>> +     };
>>> +     struct drm_i915_private *i915;
>>> +     int err;
>>> +
>>> +     i915 = mock_gem_device();
>>> +     if (!i915)
>>> +             return -ENOMEM;
>>> +
>>> +     mock_rps_init(i915);
>>> +
>>> +     err = i915_subtests(tests, i915);
>>> +
>>> +     mock_rps_fini(i915);
>>> +     drm_dev_unref(&i915->drm);
>>> +
>>> +     return err;
>>> +}
>>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>>> index 7f5634ce8e88..64c6377df769 100644
>>> --- a/include/uapi/drm/i915_drm.h
>>> +++ b/include/uapi/drm/i915_drm.h
>>> @@ -1456,6 +1456,26 @@ struct drm_i915_gem_context_param {
>>>    #define   I915_CONTEXT_MAX_USER_PRIORITY    1023 /* inclusive */
>>>    #define   I915_CONTEXT_DEFAULT_PRIORITY             0
>>>    #define   I915_CONTEXT_MIN_USER_PRIORITY    -1023 /* inclusive */
>>> +
>>> +/*
>>> + * I915_CONTEXT_PARAM_FREQUENCY:
>>> + *
>>> + * Request that when this context runs, the GPU is restricted to run
>>> + * in this frequency range; but still contrained by the global user
>>> + * restriction specified via sysfs.
>>> + *
>>> + * The minimum / maximum frequencies are specified in MHz. Each context
>>> + * starts in the default unrestricted state, where the range is taken from
>>> + * the hardware, and so may be queried.
>>> + *
>>> + * Note the frequency is only changed on a context switch; if the
>>> + * context's frequency is updated whilst the context is currently executing
>>> + * the request will not take effect until the next time the context is run.
>>> + */
>>> +#define I915_CONTEXT_PARAM_FREQUENCY 0x7
>>> +#define   I915_CONTEXT_MIN_FREQUENCY(x) ((x) & 0xffffffff)
>>> +#define   I915_CONTEXT_MAX_FREQUENCY(x) ((x) >> 32)
>>> +#define   I915_CONTEXT_SET_FREQUENCY(min, max) ((__u64)(max) << 32 | (min))
>>>        __u64 value;
>>>    };
>>>    
>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 7c7afdac8c8c..a21b9164ade8 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2191,6 +2191,7 @@  static int i915_rps_boost_info(struct seq_file *m, void *data)
 	struct drm_device *dev = &dev_priv->drm;
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 	struct drm_file *file;
+	int n;
 
 	seq_printf(m, "GPU busy? %s [%d requests]\n",
 		   yesno(dev_priv->gt.awake), dev_priv->gt.active_requests);
@@ -2198,17 +2199,30 @@  static int i915_rps_boost_info(struct seq_file *m, void *data)
 	seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv));
 	seq_printf(m, "Boosts outstanding? %d\n",
 		   atomic_read(&rps->num_waiters));
+	seq_printf(m, "Worker pending? %s\n", yesno(work_busy(&rps->work)));
 	seq_printf(m, "Frequency requested %d [%d, %d]\n",
 		   intel_gpu_freq(dev_priv, rps->freq),
 		   intel_gpu_freq(dev_priv, rps->min),
 		   intel_gpu_freq(dev_priv, rps->max));
-	seq_printf(m, "  min hard:%d, soft:%d user:%d; max user:%d, soft: %d hard:%d\n",
+	seq_printf(m, "  min hard:%d, soft:%d, ctx:%d, user:%d; max user:%d, ctx:%d, soft:%d, hard:%d\n",
 		   intel_gpu_freq(dev_priv, rps->min_freq_hw),
 		   intel_gpu_freq(dev_priv, rps->min_freq_soft),
+		   intel_gpu_freq(dev_priv, rps->min_freq_context),
 		   intel_gpu_freq(dev_priv, rps->min_freq_user),
 		   intel_gpu_freq(dev_priv, rps->max_freq_user),
+		   intel_gpu_freq(dev_priv, rps->max_freq_context),
 		   intel_gpu_freq(dev_priv, rps->max_freq_soft),
 		   intel_gpu_freq(dev_priv, rps->max_freq_hw));
+	seq_printf(m, "  engines min: [");
+	for (n = 0; n < ARRAY_SIZE(rps->min_freq_engine); n++)
+		seq_printf(m, "%s%d", n ? ", " : "",
+			   intel_gpu_freq(dev_priv, rps->min_freq_engine[n]));
+	seq_printf(m, "]\n  engines max: [");
+	for (n = 0; n < ARRAY_SIZE(rps->max_freq_engine); n++)
+		seq_printf(m, "%s%d", n ? ", " : "",
+			   intel_gpu_freq(dev_priv, rps->max_freq_engine[n]));
+	seq_printf(m, "]\n");
+
 	seq_printf(m, "  idle:%d, efficient:%d, boost:%d\n",
 		   intel_gpu_freq(dev_priv, rps->idle_freq),
 		   intel_gpu_freq(dev_priv, rps->efficient_freq),
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 82e9a58bd65f..d754d44cfbc2 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -731,6 +731,7 @@  struct intel_rps_ei {
 
 struct intel_rps {
 	struct mutex lock;
+	spinlock_t engine_lock; /* protects updates to min/max_freq_context */
 	struct work_struct work;
 
 	bool active;
@@ -763,6 +764,10 @@  struct intel_rps {
 	u8 max_freq_user;	/* Max frequency permitted by the driver */
 	u8 min_freq_soft;
 	u8 max_freq_soft;
+	u8 min_freq_context;	/* Min frequency permitted by the context */
+	u8 max_freq_context;	/* Max frequency permitted by the context */
+	u8 min_freq_engine[I915_NUM_ENGINES];
+	u8 max_freq_engine[I915_NUM_ENGINES];
 
 	u8 idle_freq;		/* Frequency to request when we are idle */
 	u8 efficient_freq;	/* AKA RPe. Pre-determined balanced frequency */
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 65bf92658d92..1d36e2a02479 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -88,8 +88,10 @@ 
 #include <linux/log2.h>
 #include <drm/drmP.h>
 #include <drm/i915_drm.h>
+
 #include "i915_drv.h"
 #include "i915_trace.h"
+#include "intel_gt_pm.h"
 
 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
 
@@ -281,6 +283,8 @@  __create_hw_context(struct drm_i915_private *dev_priv,
 	list_add_tail(&ctx->link, &dev_priv->contexts.list);
 	ctx->i915 = dev_priv;
 	ctx->priority = I915_PRIORITY_NORMAL;
+	ctx->min_freq = dev_priv->gt_pm.rps.min_freq_hw;
+	ctx->max_freq = dev_priv->gt_pm.rps.max_freq_hw;
 
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
 	INIT_LIST_HEAD(&ctx->handles_list);
@@ -715,6 +719,7 @@  int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 	struct drm_i915_gem_context_param *args = data;
+	struct drm_i915_private *i915 = to_i915(dev);
 	struct i915_gem_context *ctx;
 	int ret = 0;
 
@@ -747,6 +752,19 @@  int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 	case I915_CONTEXT_PARAM_PRIORITY:
 		args->value = ctx->priority;
 		break;
+	case I915_CONTEXT_PARAM_FREQUENCY:
+		if (!HAS_RPS(i915) || !HAS_EXECLISTS(i915)) {
+			ret = -ENODEV;
+		} else if (args->size) {
+			ret = -EINVAL;
+		} else {
+			u32 min = intel_gpu_freq(i915, ctx->min_freq);
+			u32 max = intel_gpu_freq(i915, ctx->max_freq);
+
+			args->value = I915_CONTEXT_SET_FREQUENCY(min, max);
+		}
+		break;
+
 	default:
 		ret = -EINVAL;
 		break;
@@ -761,6 +779,7 @@  int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 	struct drm_i915_gem_context_param *args = data;
+	struct drm_i915_private *i915 = to_i915(dev);
 	struct i915_gem_context *ctx;
 	int ret;
 
@@ -821,6 +840,41 @@  int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 				ctx->priority = priority;
 		}
 		break;
+	case I915_CONTEXT_PARAM_FREQUENCY:
+		if (!HAS_RPS(i915) || !HAS_EXECLISTS(i915)) {
+			ret = -ENODEV;
+		} else if (args->size) {
+			ret = -EINVAL;
+		} else {
+			struct intel_rps *rps = &i915->gt_pm.rps;
+			u32 min, max;
+
+			min = I915_CONTEXT_MIN_FREQUENCY(args->value);
+			min = intel_freq_opcode(i915, min);
+
+			max = I915_CONTEXT_MAX_FREQUENCY(args->value);
+			max = intel_freq_opcode(i915, max);
+
+			/*
+			 * As we constrain the frequency request from the
+			 * context (application) by the sysadmin imposed limits,
+			 * it is reasonable to allow the application to
+			 * specify its preferred range within those limits.
+			 * That is we do not need to restrict requesting
+			 * a higher frequency to privileged (CAP_SYS_NICE)
+			 * processes.
+			 */
+			if (max < min) {
+				ret = -EINVAL;
+			} else if (min < rps->min_freq_hw ||
+				   max > rps->max_freq_hw) {
+				ret = -EINVAL;
+			} else {
+				ctx->min_freq = min;
+				ctx->max_freq = max;
+			}
+		}
+		break;
 
 	default:
 		ret = -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 7854262ddfd9..98f7b71a787a 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -150,6 +150,9 @@  struct i915_gem_context {
 	 */
 	int priority;
 
+	u32 min_freq;
+	u32 max_freq;
+
 	/** ggtt_offset_bias: placement restriction for context objects */
 	u32 ggtt_offset_bias;
 
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
index 9705205a26b5..4bbfb4080f8f 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/intel_gt_pm.c
@@ -402,10 +402,10 @@  static int adjust_rps(struct drm_i915_private *dev_priv, int freq, int adj)
 	GEM_BUG_ON(!rps->active);
 
 	min = clamp_t(int,
-		      rps->min_freq_soft,
+		      max(rps->min_freq_soft, READ_ONCE(rps->min_freq_context)),
 		      rps->min_freq_user, rps->max_freq_user);
 	max = clamp_t(int,
-		      rps->max_freq_soft,
+		      min(rps->max_freq_soft, READ_ONCE(rps->max_freq_context)),
 		      min, rps->max_freq_user);
 	if (atomic_read(&rps->num_waiters) && max < rps->boost_freq)
 		max = rps->boost_freq;
@@ -809,6 +809,75 @@  void intel_rps_boost(struct i915_request *rq, struct intel_rps_client *client)
 	atomic_inc(client ? &client->boosts : &rps->boosts);
 }
 
+static void __rps_update_engine(struct intel_rps *rps,
+				enum intel_engine_id idx,
+				u32 min, u32 max)
+{
+	unsigned long flags;
+	bool update = false;
+	u32 old;
+	int n;
+
+	GEM_BUG_ON(min > max);
+
+	if (rps->min_freq_engine[idx] != min) {
+		spin_lock_irqsave(&rps->engine_lock, flags);
+
+		rps->min_freq_engine[idx] = min;
+
+		old = rps->min_freq_context;
+		rps->min_freq_context = rps->min_freq_engine[0];
+		for (n = 1; n < ARRAY_SIZE(rps->min_freq_engine); n++)
+			if (rps->min_freq_engine[n] > rps->min_freq_context)
+				rps->min_freq_context = rps->min_freq_engine[n];
+		update |= rps->min_freq_context != old;
+
+		spin_unlock_irqrestore(&rps->engine_lock, flags);
+	}
+
+	if (rps->max_freq_engine[idx] != max) {
+		spin_lock_irqsave(&rps->engine_lock, flags);
+
+		rps->max_freq_engine[idx] = max;
+
+		old = rps->max_freq_context;
+		rps->max_freq_context = rps->max_freq_engine[0];
+		for (n = 1; n < ARRAY_SIZE(rps->max_freq_engine); n++)
+			if (rps->max_freq_engine[n] < rps->max_freq_context)
+				rps->max_freq_context = rps->max_freq_engine[n];
+		update |= rps->max_freq_context != old;
+
+		spin_unlock_irqrestore(&rps->engine_lock, flags);
+	}
+
+	/* Kick the RPS worker to apply the updated constraints, as needed */
+	if (update && !atomic_read(&rps->num_waiters)) {
+		old = READ_ONCE(rps->freq);
+		if ((old < min || old > max))
+			schedule_work(&rps->work);
+	}
+}
+
+void intel_rps_update_engine(const struct intel_engine_cs *engine,
+			     const struct i915_gem_context *ctx)
+{
+	struct intel_rps *rps = &engine->i915->gt_pm.rps;
+	u32 min, max;
+
+	if (!HAS_RPS(engine->i915))
+		return;
+
+	if (ctx) {
+		min = ctx->min_freq;
+		max = ctx->max_freq;
+	} else {
+		min = rps->min_freq_hw;
+		max = rps->max_freq_hw;
+	}
+
+	__rps_update_engine(rps, engine->id, min, max);
+}
+
 static void gen9_disable_rc6(struct drm_i915_private *dev_priv)
 {
 	I915_WRITE(GEN6_RC_CONTROL, 0);
@@ -2379,12 +2448,41 @@  void intel_gt_pm_sanitize(struct drm_i915_private *dev_priv)
 		gen6_reset_rps_interrupts(dev_priv);
 }
 
+static void intel_rps_init(struct intel_rps *rps)
+{
+	mutex_init(&rps->lock);
+	INIT_WORK(&rps->work, intel_rps_work);
+	spin_lock_init(&rps->engine_lock);
+}
+
+static void intel_rps_init__frequencies(struct intel_rps *rps)
+{
+	int n;
+
+	rps->max_freq_soft = rps->max_freq_hw;
+	rps->min_freq_soft = rps->min_freq_hw;
+
+	rps->max_freq_context = rps->max_freq_hw;
+	rps->min_freq_context = rps->min_freq_hw;
+	for (n = 0; n < ARRAY_SIZE(rps->max_freq_engine); n++) {
+		rps->max_freq_engine[n] = rps->max_freq_hw;
+		rps->min_freq_engine[n] = rps->min_freq_hw;
+	}
+
+	/* Finally allow us to boost to max by default */
+	rps->boost_freq = rps->max_freq_hw;
+	rps->idle_freq = rps->min_freq_hw;
+
+	rps->freq = rps->idle_freq;
+	rps->min = rps->min_freq_hw;
+	rps->max = rps->max_freq_hw;
+}
+
 void intel_gt_pm_init(struct drm_i915_private *dev_priv)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 
-	mutex_init(&rps->lock);
-	INIT_WORK(&rps->work, intel_rps_work);
+	intel_rps_init(rps);
 
 	if (HAS_GUC_SCHED(dev_priv))
 		rps->guc_events = GEN9_GUC_TO_HOST_INT_EVENT;
@@ -2449,16 +2547,7 @@  void intel_gt_pm_init(struct drm_i915_private *dev_priv)
 		}
 	}
 
-	rps->max_freq_soft = rps->max_freq_hw;
-	rps->min_freq_soft = rps->min_freq_hw;
-
-	/* Finally allow us to boost to max by default */
-	rps->boost_freq = rps->max_freq_hw;
-	rps->idle_freq = rps->min_freq_hw;
-
-	rps->freq = rps->idle_freq;
-	rps->min = rps->min_freq_hw;
-	rps->max = rps->max_freq_hw;
+	intel_rps_init__frequencies(rps);
 
 	if (HAS_LLC(dev_priv))
 		gen6_update_ring_freq(dev_priv);
@@ -2703,3 +2792,7 @@  void gen9_disable_guc_interrupts(struct drm_i915_private *dev_priv)
 
 	gen9_reset_guc_interrupts(dev_priv);
 }
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/intel_gt_pm.c"
+#endif
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.h b/drivers/gpu/drm/i915/intel_gt_pm.h
index 314912c15126..ef3f27eca529 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/intel_gt_pm.h
@@ -25,7 +25,9 @@ 
 #define __INTEL_GT_PM_H__
 
 struct drm_i915_private;
+struct i915_gem_context;
 struct i915_request;
+struct intel_engine_cs;
 struct intel_rps_client;
 
 void intel_gpu_ips_init(struct drm_i915_private *dev_priv);
@@ -47,6 +49,8 @@  void intel_gt_pm_idle(struct drm_i915_private *dev_priv);
 
 void intel_gt_pm_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir);
 
+void intel_rps_update_engine(const struct intel_engine_cs *engine,
+			     const struct i915_gem_context *ctx);
 void intel_rps_boost(struct i915_request *rq, struct intel_rps_client *rps);
 
 int intel_gpu_freq(struct drm_i915_private *dev_priv, int val);
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 8a8ad2fe158d..d8eaae683186 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -26,9 +26,12 @@ 
 #include <trace/events/dma_fence.h>
 
 #include "intel_guc_submission.h"
-#include "intel_lrc_reg.h"
+
 #include "i915_drv.h"
 
+#include "intel_gt_pm.h"
+#include "intel_lrc_reg.h"
+
 #define GUC_PREEMPT_FINISHED		0x1
 #define GUC_PREEMPT_BREADCRUMB_DWORDS	0x8
 #define GUC_PREEMPT_BREADCRUMB_BYTES	\
@@ -650,6 +653,12 @@  static void guc_submit(struct intel_engine_cs *engine)
 	}
 }
 
+static void update_rps(struct intel_engine_cs *engine)
+{
+	intel_rps_update_engine(engine,
+				port_request(engine->execlists.port)->ctx);
+}
+
 static void port_assign(struct execlist_port *port, struct i915_request *rq)
 {
 	GEM_BUG_ON(port_isset(port));
@@ -728,6 +737,7 @@  static void guc_dequeue(struct intel_engine_cs *engine)
 	execlists->first = rb;
 	if (submit) {
 		port_assign(port, last);
+		update_rps(engine);
 		execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
 		guc_submit(engine);
 	}
@@ -757,8 +767,10 @@  static void guc_submission_tasklet(unsigned long data)
 
 		rq = port_request(&port[0]);
 	}
-	if (!rq)
+	if (!rq) {
 		execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
+		intel_rps_update_engine(engine, NULL);
+	}
 
 	if (execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT) &&
 	    intel_read_status_page(engine, I915_GEM_HWS_PREEMPT_INDEX) ==
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 3a69b367e565..518f7b3db857 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -138,6 +138,7 @@ 
 #include "i915_drv.h"
 #include "i915_gem_render_state.h"
 #include "intel_lrc_reg.h"
+#include "intel_gt_pm.h"
 #include "intel_mocs.h"
 
 #define RING_EXECLIST_QFULL		(1 << 0x2)
@@ -535,6 +536,12 @@  static void inject_preempt_context(struct intel_engine_cs *engine)
 	execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
 }
 
+static void update_rps(struct intel_engine_cs *engine)
+{
+	intel_rps_update_engine(engine,
+				port_request(engine->execlists.port)->ctx);
+}
+
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -708,6 +715,7 @@  static void execlists_dequeue(struct intel_engine_cs *engine)
 	spin_unlock_irq(&engine->timeline->lock);
 
 	if (submit) {
+		update_rps(engine);
 		execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
 		execlists_submit_ports(engine);
 	}
@@ -982,6 +990,11 @@  static void execlists_submission_tasklet(unsigned long data)
 					  engine->name, port->context_id);
 
 				execlists_port_complete(execlists, port);
+
+				/* Switch to the next request/context */
+				rq = port_request(port);
+				intel_rps_update_engine(engine,
+							rq ? rq->ctx : NULL);
 			} else {
 				port_set(port, port_pack(rq, count));
 			}
@@ -1717,6 +1730,8 @@  static void reset_common_ring(struct intel_engine_cs *engine,
 	__unwind_incomplete_requests(engine);
 	spin_unlock(&engine->timeline->lock);
 
+	intel_rps_update_engine(engine, NULL);
+
 	/* Mark all CS interrupts as complete */
 	execlists->active = 0;
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
index 9a48aa441743..85b6e6d020b7 100644
--- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
@@ -14,6 +14,7 @@  selftest(fence, i915_sw_fence_mock_selftests)
 selftest(scatterlist, scatterlist_mock_selftests)
 selftest(syncmap, i915_syncmap_mock_selftests)
 selftest(uncore, intel_uncore_mock_selftests)
+selftest(gt_pm, intel_gt_pm_mock_selftests)
 selftest(breadcrumbs, intel_breadcrumbs_mock_selftests)
 selftest(timelines, i915_gem_timeline_mock_selftests)
 selftest(requests, i915_request_mock_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/intel_gt_pm.c b/drivers/gpu/drm/i915/selftests/intel_gt_pm.c
new file mode 100644
index 000000000000..c3871eb9eabb
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/intel_gt_pm.c
@@ -0,0 +1,130 @@ 
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include "../i915_selftest.h"
+#include "i915_random.h"
+
+#include "mock_gem_device.h"
+
+static void mock_rps_init(struct drm_i915_private *i915)
+{
+	struct intel_rps *rps = &i915->gt_pm.rps;
+
+	/* Disable the register writes */
+	mkwrite_device_info(i915)->gen = 0;
+	mkwrite_device_info(i915)->has_rps = true;
+
+	intel_rps_init(rps);
+
+	rps->min_freq_hw = 0;
+	rps->max_freq_hw = 255;
+
+	rps->min_freq_user = rps->min_freq_hw;
+	rps->max_freq_user = rps->max_freq_hw;
+
+	intel_rps_init__frequencies(rps);
+}
+
+static void mock_rps_fini(struct drm_i915_private *i915)
+{
+	struct intel_rps *rps = &i915->gt_pm.rps;
+
+	cancel_work_sync(&rps->work);
+}
+
+static int igt_rps_engine(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_rps *rps = &i915->gt_pm.rps;
+	I915_RND_STATE(prng);
+	int err;
+	int i;
+
+	intel_gt_pm_busy(i915); /* Activate RPS */
+
+	/*
+	 * Minimum unit tests for intel_rps_update_engine().
+	 *
+	 * Whenever we call intel_rps_update_engine, it will
+	 * replace the context min/max frequency request for a particular
+	 * engine and then recompute the global max(min)/min(max) over all
+	 * engines. In this mockup, we are limited to checking those
+	 * max(min)/min(max) calculations and then seeing if the rps
+	 * worker uses those bounds.
+	 */
+
+	for (i = 0; i < 256 * 256; i++) {
+		u8 freq = prandom_u32_state(&prng);
+
+		__rps_update_engine(rps, 0, freq, freq);
+		if (rps->min_freq_context != freq ||
+		    rps->max_freq_context != freq) {
+			pr_err("Context min/max frequencies not restricted to %d, found [%d, %d]\n",
+			       freq, rps->min_freq_context, rps->max_freq_context);
+			err = -EINVAL;
+			goto out;
+		}
+		flush_work(&rps->work);
+
+		if (rps->freq != freq) {
+			pr_err("Tried to restrict frequency to %d, found %d\n",
+			       freq, rps->freq);
+			err = -EINVAL;
+			goto out;
+		}
+	}
+
+	__rps_update_engine(rps, 0, rps->min_freq_hw, rps->max_freq_hw);
+	if (rps->min_freq_context != rps->min_freq_hw ||
+	    rps->max_freq_context != rps->max_freq_hw) {
+		pr_err("Context frequency not restored to [%d, %d], found [%d, %d]\n",
+		       rps->min_freq_hw, rps->min_freq_hw,
+		       rps->min_freq_context, rps->max_freq_context);
+		err = -EINVAL;
+		goto out;
+	}
+
+	for (i = 0; i < I915_NUM_ENGINES; i++)
+		__rps_update_engine(rps, i, i, 255 - i);
+	i--;
+	if (rps->min_freq_context != i) {
+		pr_err("Minimum context frequency across all engines not raised to %d, found %d\n", i, rps->min_freq_context);
+		err = -EINVAL;
+		goto out;
+	}
+	if (rps->max_freq_context != 255 - i) {
+		pr_err("Maxmimum context frequency across all engines not lowered to %d, found %d\n", 255 - i, rps->max_freq_context);
+		err = -EINVAL;
+		goto out;
+	}
+
+	err = 0;
+out:
+	intel_gt_pm_idle(i915);
+	return err;
+}
+
+int intel_gt_pm_mock_selftests(void)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_rps_engine),
+	};
+	struct drm_i915_private *i915;
+	int err;
+
+	i915 = mock_gem_device();
+	if (!i915)
+		return -ENOMEM;
+
+	mock_rps_init(i915);
+
+	err = i915_subtests(tests, i915);
+
+	mock_rps_fini(i915);
+	drm_dev_unref(&i915->drm);
+
+	return err;
+}
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 7f5634ce8e88..64c6377df769 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1456,6 +1456,26 @@  struct drm_i915_gem_context_param {
 #define   I915_CONTEXT_MAX_USER_PRIORITY	1023 /* inclusive */
 #define   I915_CONTEXT_DEFAULT_PRIORITY		0
 #define   I915_CONTEXT_MIN_USER_PRIORITY	-1023 /* inclusive */
+
+/*
+ * I915_CONTEXT_PARAM_FREQUENCY:
+ *
+ * Request that when this context runs, the GPU is restricted to run
+ * in this frequency range; but still contrained by the global user
+ * restriction specified via sysfs.
+ *
+ * The minimum / maximum frequencies are specified in MHz. Each context
+ * starts in the default unrestricted state, where the range is taken from
+ * the hardware, and so may be queried.
+ *
+ * Note the frequency is only changed on a context switch; if the
+ * context's frequency is updated whilst the context is currently executing
+ * the request will not take effect until the next time the context is run.
+ */
+#define I915_CONTEXT_PARAM_FREQUENCY	0x7
+#define   I915_CONTEXT_MIN_FREQUENCY(x) ((x) & 0xffffffff)
+#define   I915_CONTEXT_MAX_FREQUENCY(x) ((x) >> 32)
+#define   I915_CONTEXT_SET_FREQUENCY(min, max) ((__u64)(max) << 32 | (min))
 	__u64 value;
 };