diff mbox series

[i-g-t] test/i915_pm_rc6_residency: Check we enter RC6 when mostly idle

Message ID 20200109222300.1154999-1-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [i-g-t] test/i915_pm_rc6_residency: Check we enter RC6 when mostly idle | expand

Commit Message

Chris Wilson Jan. 9, 2020, 10:23 p.m. UTC
Long ago, we would only approach runtime-suspend if the GPU had been
idle (no userspace submissions) for a second or two. However, since
disabling automatic HW RC6 such a relaxed approach to runtime-suspend
caused us to never enter RC6 on the desktop and consume vast quantities
of power. Surmise this behaviour by setting up a background load that is
only active for ~1% of the time (so equivalent to a compositor that is
updating the clock every 50ms or so) and verify that we do continue to
enter RC6 between the GPU pulses.

References: https://gitlab.freedesktop.org/drm/intel/issues/614
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Imre Deak <imre.deak@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 tests/Makefile.am                  |   1 +
 tests/i915/i915_pm_rc6_residency.c | 174 +++++++++++++++++++++++++++--
 tests/meson.build                  |   9 +-
 3 files changed, 173 insertions(+), 11 deletions(-)

Comments

Imre Deak Jan. 10, 2020, 4:16 p.m. UTC | #1
On Thu, Jan 09, 2020 at 10:23:00PM +0000, Chris Wilson wrote:
> Long ago, we would only approach runtime-suspend if the GPU had been
> idle (no userspace submissions) for a second or two. However, since
> disabling automatic HW RC6 such a relaxed approach to runtime-suspend
> caused us to never enter RC6 on the desktop and consume vast quantities
> of power. Surmise this behaviour by setting up a background load that is
> only active for ~1% of the time (so equivalent to a compositor that is
> updating the clock every 50ms or so) and verify that we do continue to
> enter RC6 between the GPU pulses.
> 
> References: https://gitlab.freedesktop.org/drm/intel/issues/614
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Imre Deak <imre.deak@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>  tests/Makefile.am                  |   1 +
>  tests/i915/i915_pm_rc6_residency.c | 174 +++++++++++++++++++++++++++--
>  tests/meson.build                  |   9 +-
>  3 files changed, 173 insertions(+), 11 deletions(-)
> 
> diff --git a/tests/Makefile.am b/tests/Makefile.am
> index 9a320bc23..fc3052475 100644
> --- a/tests/Makefile.am
> +++ b/tests/Makefile.am
> @@ -122,6 +122,7 @@ gem_threaded_access_tiled_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
>  gem_threaded_access_tiled_LDADD = $(LDADD) -lpthread
>  gem_tiled_swapping_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
>  gem_tiled_swapping_LDADD = $(LDADD) -lpthread
> +i915_pm_rc6_residency_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la
>  prime_self_import_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
>  prime_self_import_LDADD = $(LDADD) -lpthread
>  gem_userptr_blits_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
> diff --git a/tests/i915/i915_pm_rc6_residency.c b/tests/i915/i915_pm_rc6_residency.c
> index 1b39c870e..a5bcb084b 100644
> --- a/tests/i915/i915_pm_rc6_residency.c
> +++ b/tests/i915/i915_pm_rc6_residency.c
> @@ -25,8 +25,6 @@
>   *
>   */
>  
> -#include "igt.h"
> -#include "igt_sysfs.h"
>  #include <stdio.h>
>  #include <stdlib.h>
>  #include <string.h>
> @@ -34,6 +32,9 @@
>  #include <errno.h>
>  #include <time.h>
>  
> +#include "igt.h"
> +#include "igt_perf.h"
> +#include "igt_sysfs.h"
>  
>  #define SLEEP_DURATION 3 /* in seconds */
>  
> @@ -195,31 +196,180 @@ static bool wait_for_rc6(void)
>  	return false;
>  }
>  
> +static uint64_t __pmu_read_single(int fd, uint64_t *ts)
> +{
> +	uint64_t data[2];
> +
> +	igt_assert_eq(read(fd, data, sizeof(data)), sizeof(data));
> +
> +	if (ts)
> +		*ts = data[1];
> +
> +	return data[0];
> +}
> +
> +static uint64_t pmu_read_single(int fd)
> +{
> +	return __pmu_read_single(fd, NULL);
> +}
> +
> +#define __assert_within_epsilon(x, ref, tol_up, tol_down) \
> +	igt_assert_f((double)(x) <= (1.0 + (tol_up)) * (double)(ref) && \
> +		     (double)(x) >= (1.0 - (tol_down)) * (double)(ref), \
> +		     "'%s' != '%s' (%f not within +%.1f%%/-%.1f%% tolerance of %f)\n",\
> +		     #x, #ref, (double)(x), \
> +		     (tol_up) * 100.0, (tol_down) * 100.0, \
> +		     (double)(ref))
> +
> +#define assert_within_epsilon(x, ref, tolerance) \
> +	__assert_within_epsilon(x, ref, tolerance, tolerance)
> +
> +static bool __pmu_wait_for_rc6(int fd)
> +{
> +	struct timespec tv = {};
> +	uint64_t start, now;
> +
> +	/* First wait for roughly an RC6 Evaluation Interval */
> +	usleep(160 * 1000);
> +
> +	/* Then poll for RC6 to start ticking */
> +	now = pmu_read_single(fd);
> +	do {
> +		start = now;
> +		usleep(5000);
> +		now = pmu_read_single(fd);
> +		if (now - start > 1e6)
> +			return true;
> +	} while (!igt_seconds_elapsed(&tv));
> +
> +	return false;
> +}
> +
> +static unsigned int measured_usleep(unsigned int usec)
> +{
> +	struct timespec ts = { };
> +	unsigned int slept;
> +
> +	slept = igt_nsec_elapsed(&ts);
> +	igt_assert(slept == 0);
> +	do {
> +		usleep(usec - slept);
> +		slept = igt_nsec_elapsed(&ts) / 1000;
> +	} while (slept < usec);
> +
> +	return igt_nsec_elapsed(&ts);
> +}
> +
> +static uint32_t batch_create(int fd)
> +{
> +	const uint32_t bbe = MI_BATCH_BUFFER_END;
> +	uint32_t handle;
> +
> +	handle = gem_create(fd, 4096);
> +	gem_write(fd, handle, 0, &bbe, sizeof(bbe));
> +
> +	return handle;
> +}
> +
> +static int open_pmu(int i915, uint64_t config)
> +{
> +	int fd;
> +
> +	fd = perf_i915_open(config);
> +	igt_skip_on(fd < 0 && errno == ENODEV);
> +	igt_assert(fd >= 0);
> +
> +	return fd;
> +}
> +
> +static void rc6_perf(int i915)
> +{
> +	const int64_t duration_ns = 2e9;
> +	uint64_t idle, prev, ts[2];
> +	unsigned long slept, cycles;
> +	unsigned long *done;
> +	int fd;
> +
> +	fd = open_pmu(i915, I915_PMU_RC6_RESIDENCY);
> +	igt_require(__pmu_wait_for_rc6(fd));
> +
> +	/* While idle check full RC6. */
> +	prev = __pmu_read_single(fd, &ts[0]);
> +	slept = measured_usleep(duration_ns / 1000);
> +	idle = __pmu_read_single(fd, &ts[1]);
> +	igt_debug("slept=%lu perf=%"PRIu64"\n", slept, ts[1] - ts[0]);
> +	assert_within_epsilon(idle - prev, ts[1] - ts[0], 5);
> +
> +	/* Setup up a very light load */
> +	done = mmap(0, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
> +	igt_fork(child, 1) {
> +		struct drm_i915_gem_exec_object2 obj = {
> +			.handle = batch_create(i915),
> +		};
> +		struct drm_i915_gem_execbuffer2 execbuf = {
> +			.buffers_ptr = to_user_pointer(&obj),
> +			.buffer_count = 1,
> +		};
> +
> +		do {
> +			struct timespec tv = {};
> +
> +			igt_seconds_elapsed(&tv);
> +
> +			gem_execbuf(i915, &execbuf);
> +			gem_sync(i915, obj.handle);
> +			done[1]++;
> +
> +			usleep(igt_seconds_elapsed(&tv) / 10); /* => 1% busy */

igt_nsec_elapsed()?

Not too familiar with the perf interface but I assume event[0] read is
the RC6 residency, while event[1] is the duration while the event was
enabled (so the duration since the event file was opened?). Looks ok:

Reviewed-by: Imre Deak <imre.deak@intel.com>



> +		} while (!*done);
> +	}
> +
> +	/* While very nearly idle (idle to within tolerance), except full RC6 */
> +	cycles = -done[1];
> +	prev = __pmu_read_single(fd, &ts[0]);
> +	slept = measured_usleep(duration_ns / 1000);
> +	idle = __pmu_read_single(fd, &ts[1]);
> +	cycles += done[1];
> +	igt_debug("slept=%lu perf=%"PRIu64", cycles=%lu\n",
> +		  slept, ts[1] - ts[0], cycles);
> +	igt_assert(cycles > 0);
> +	assert_within_epsilon(idle - prev, ts[1] - ts[0], 5);
> +
> +	close(fd);
> +
> +	*done = 1;
> +	igt_waitchildren();
> +}
> +
>  igt_main
>  {
>  	unsigned int rc6_enabled = 0;
>  	unsigned int devid = 0;
> +	int i915 = -1;
>  
>  	/* Use drm_open_driver to verify device existence */
>  	igt_fixture {
> -		int fd;
> -
> -		fd = drm_open_driver(DRIVER_INTEL);
> -		devid = intel_get_drm_devid(fd);
> -		sysfs = igt_sysfs_open(fd);
> +		i915 = drm_open_driver(DRIVER_INTEL);
> +		devid = intel_get_drm_devid(i915);
> +		sysfs = igt_sysfs_open(i915);
>  
>  		igt_require(has_rc6_residency("rc6"));
>  
>  		/* Make sure rc6 counters are running */
> -		igt_drop_caches_set(fd, DROP_IDLE);
> +		igt_drop_caches_set(i915, DROP_IDLE);
>  		igt_require(wait_for_rc6());
>  
> -		close(fd);
> -
>  		rc6_enabled = get_rc6_enabled_mask();
>  		igt_require(rc6_enabled & RC6_ENABLED);
>  	}
>  
> +	igt_subtest("rc6-perf") {
> +		igt_require_gem(i915);
> +		gem_quiescent_gpu(i915);
> +
> +		rc6_perf(i915);
> +	}
> +
>  	igt_subtest("rc6-accuracy") {
>  		struct residencies res;
>  
> @@ -235,4 +385,8 @@ igt_main
>  		measure_residencies(devid, rc6_enabled, &res);
>  		residency_accuracy(res.media_rc6, res.duration, "media_rc6");
>  	}
> +
> +	igt_fixture
> +		close(i915);
> +
>  }
> diff --git a/tests/meson.build b/tests/meson.build
> index 570de5459..a79d22ba1 100644
> --- a/tests/meson.build
> +++ b/tests/meson.build
> @@ -232,7 +232,6 @@ i915_progs = [
>  	'i915_module_load',
>  	'i915_pm_backlight',
>  	'i915_pm_lpsp',
> -	'i915_pm_rc6_residency',
>  	'i915_pm_rpm',
>  	'i915_pm_dc',
>  	'i915_pm_rps',
> @@ -336,6 +335,14 @@ test_executables += executable('gem_mmap_offset',
>  	   install : true)
>  test_list += 'gem_mmap_offset'
>  
> +test_executables += executable('i915_pm_rc6_residency',
> +	   join_paths('i915', 'i915_pm_rc6_residency.c'),
> +	   dependencies : test_deps + [ lib_igt_perf ],
> +	   install_dir : libexecdir,
> +	   install_rpath : libexecdir_rpathdir,
> +	   install : true)
> +test_list += 'i915_pm_rc6_residency'
> +
>  test_executables += executable('perf_pmu', 'perf_pmu.c',
>  	   dependencies : test_deps + [ lib_igt_perf ],
>  	   install_dir : libexecdir,
> -- 
> 2.25.0.rc2
>
Chris Wilson Jan. 10, 2020, 4:26 p.m. UTC | #2
Quoting Imre Deak (2020-01-10 16:16:27)
> On Thu, Jan 09, 2020 at 10:23:00PM +0000, Chris Wilson wrote:
> > Long ago, we would only approach runtime-suspend if the GPU had been
> > idle (no userspace submissions) for a second or two. However, since
> > disabling automatic HW RC6 such a relaxed approach to runtime-suspend
> > caused us to never enter RC6 on the desktop and consume vast quantities
> > of power. Surmise this behaviour by setting up a background load that is
> > only active for ~1% of the time (so equivalent to a compositor that is
> > updating the clock every 50ms or so) and verify that we do continue to
> > enter RC6 between the GPU pulses.
> > 
> > References: https://gitlab.freedesktop.org/drm/intel/issues/614
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Imre Deak <imre.deak@intel.com>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >  tests/Makefile.am                  |   1 +
> >  tests/i915/i915_pm_rc6_residency.c | 174 +++++++++++++++++++++++++++--
> >  tests/meson.build                  |   9 +-
> >  3 files changed, 173 insertions(+), 11 deletions(-)
> > 
> > diff --git a/tests/Makefile.am b/tests/Makefile.am
> > index 9a320bc23..fc3052475 100644
> > --- a/tests/Makefile.am
> > +++ b/tests/Makefile.am
> > @@ -122,6 +122,7 @@ gem_threaded_access_tiled_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
> >  gem_threaded_access_tiled_LDADD = $(LDADD) -lpthread
> >  gem_tiled_swapping_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
> >  gem_tiled_swapping_LDADD = $(LDADD) -lpthread
> > +i915_pm_rc6_residency_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la
> >  prime_self_import_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
> >  prime_self_import_LDADD = $(LDADD) -lpthread
> >  gem_userptr_blits_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
> > diff --git a/tests/i915/i915_pm_rc6_residency.c b/tests/i915/i915_pm_rc6_residency.c
> > index 1b39c870e..a5bcb084b 100644
> > --- a/tests/i915/i915_pm_rc6_residency.c
> > +++ b/tests/i915/i915_pm_rc6_residency.c
> > @@ -25,8 +25,6 @@
> >   *
> >   */
> >  
> > -#include "igt.h"
> > -#include "igt_sysfs.h"
> >  #include <stdio.h>
> >  #include <stdlib.h>
> >  #include <string.h>
> > @@ -34,6 +32,9 @@
> >  #include <errno.h>
> >  #include <time.h>
> >  
> > +#include "igt.h"
> > +#include "igt_perf.h"
> > +#include "igt_sysfs.h"
> >  
> >  #define SLEEP_DURATION 3 /* in seconds */
> >  
> > @@ -195,31 +196,180 @@ static bool wait_for_rc6(void)
> >       return false;
> >  }
> >  
> > +static uint64_t __pmu_read_single(int fd, uint64_t *ts)
> > +{
> > +     uint64_t data[2];
> > +
> > +     igt_assert_eq(read(fd, data, sizeof(data)), sizeof(data));
> > +
> > +     if (ts)
> > +             *ts = data[1];
> > +
> > +     return data[0];
> > +}
> > +
> > +static uint64_t pmu_read_single(int fd)
> > +{
> > +     return __pmu_read_single(fd, NULL);
> > +}
> > +
> > +#define __assert_within_epsilon(x, ref, tol_up, tol_down) \
> > +     igt_assert_f((double)(x) <= (1.0 + (tol_up)) * (double)(ref) && \
> > +                  (double)(x) >= (1.0 - (tol_down)) * (double)(ref), \
> > +                  "'%s' != '%s' (%f not within +%.1f%%/-%.1f%% tolerance of %f)\n",\
> > +                  #x, #ref, (double)(x), \
> > +                  (tol_up) * 100.0, (tol_down) * 100.0, \
> > +                  (double)(ref))
> > +
> > +#define assert_within_epsilon(x, ref, tolerance) \
> > +     __assert_within_epsilon(x, ref, tolerance, tolerance)
> > +
> > +static bool __pmu_wait_for_rc6(int fd)
> > +{
> > +     struct timespec tv = {};
> > +     uint64_t start, now;
> > +
> > +     /* First wait for roughly an RC6 Evaluation Interval */
> > +     usleep(160 * 1000);
> > +
> > +     /* Then poll for RC6 to start ticking */
> > +     now = pmu_read_single(fd);
> > +     do {
> > +             start = now;
> > +             usleep(5000);
> > +             now = pmu_read_single(fd);
> > +             if (now - start > 1e6)
> > +                     return true;
> > +     } while (!igt_seconds_elapsed(&tv));
> > +
> > +     return false;
> > +}
> > +
> > +static unsigned int measured_usleep(unsigned int usec)
> > +{
> > +     struct timespec ts = { };
> > +     unsigned int slept;
> > +
> > +     slept = igt_nsec_elapsed(&ts);
> > +     igt_assert(slept == 0);
> > +     do {
> > +             usleep(usec - slept);
> > +             slept = igt_nsec_elapsed(&ts) / 1000;
> > +     } while (slept < usec);
> > +
> > +     return igt_nsec_elapsed(&ts);
> > +}
> > +
> > +static uint32_t batch_create(int fd)
> > +{
> > +     const uint32_t bbe = MI_BATCH_BUFFER_END;
> > +     uint32_t handle;
> > +
> > +     handle = gem_create(fd, 4096);
> > +     gem_write(fd, handle, 0, &bbe, sizeof(bbe));
> > +
> > +     return handle;
> > +}
> > +
> > +static int open_pmu(int i915, uint64_t config)
> > +{
> > +     int fd;
> > +
> > +     fd = perf_i915_open(config);
> > +     igt_skip_on(fd < 0 && errno == ENODEV);
> > +     igt_assert(fd >= 0);
> > +
> > +     return fd;
> > +}
> > +
> > +static void rc6_perf(int i915)
> > +{
> > +     const int64_t duration_ns = 2e9;
> > +     uint64_t idle, prev, ts[2];
> > +     unsigned long slept, cycles;
> > +     unsigned long *done;
> > +     int fd;
> > +
> > +     fd = open_pmu(i915, I915_PMU_RC6_RESIDENCY);
> > +     igt_require(__pmu_wait_for_rc6(fd));
> > +
> > +     /* While idle check full RC6. */
> > +     prev = __pmu_read_single(fd, &ts[0]);
> > +     slept = measured_usleep(duration_ns / 1000);
> > +     idle = __pmu_read_single(fd, &ts[1]);
> > +     igt_debug("slept=%lu perf=%"PRIu64"\n", slept, ts[1] - ts[0]);
> > +     assert_within_epsilon(idle - prev, ts[1] - ts[0], 5);
> > +
> > +     /* Setup up a very light load */
> > +     done = mmap(0, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
> > +     igt_fork(child, 1) {
> > +             struct drm_i915_gem_exec_object2 obj = {
> > +                     .handle = batch_create(i915),
> > +             };
> > +             struct drm_i915_gem_execbuffer2 execbuf = {
> > +                     .buffers_ptr = to_user_pointer(&obj),
> > +                     .buffer_count = 1,
> > +             };
> > +
> > +             do {
> > +                     struct timespec tv = {};
> > +
> > +                     igt_seconds_elapsed(&tv);
> > +
> > +                     gem_execbuf(i915, &execbuf);
> > +                     gem_sync(i915, obj.handle);
> > +                     done[1]++;
> > +
> > +                     usleep(igt_seconds_elapsed(&tv) / 10); /* => 1% busy */
> 
> igt_nsec_elapsed()?

That's what I thought I wrote. How bizarre!

> Not too familiar with the perf interface but I assume event[0] read is
> the RC6 residency, while event[1] is the duration while the event was
> enabled (so the duration since the event file was opened?). Looks ok:

Yup, event[1] is the timestamp, as measured from the start of perf_open.
We use PERF_FORMAT_TOTAL_TIME_ENABLED in perf_i915_open().
 
> Reviewed-by: Imre Deak <imre.deak@intel.com>

Thanks,
-Chris
diff mbox series

Patch

diff --git a/tests/Makefile.am b/tests/Makefile.am
index 9a320bc23..fc3052475 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -122,6 +122,7 @@  gem_threaded_access_tiled_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
 gem_threaded_access_tiled_LDADD = $(LDADD) -lpthread
 gem_tiled_swapping_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
 gem_tiled_swapping_LDADD = $(LDADD) -lpthread
+i915_pm_rc6_residency_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la
 prime_self_import_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
 prime_self_import_LDADD = $(LDADD) -lpthread
 gem_userptr_blits_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
diff --git a/tests/i915/i915_pm_rc6_residency.c b/tests/i915/i915_pm_rc6_residency.c
index 1b39c870e..a5bcb084b 100644
--- a/tests/i915/i915_pm_rc6_residency.c
+++ b/tests/i915/i915_pm_rc6_residency.c
@@ -25,8 +25,6 @@ 
  *
  */
 
-#include "igt.h"
-#include "igt_sysfs.h"
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -34,6 +32,9 @@ 
 #include <errno.h>
 #include <time.h>
 
+#include "igt.h"
+#include "igt_perf.h"
+#include "igt_sysfs.h"
 
 #define SLEEP_DURATION 3 /* in seconds */
 
@@ -195,31 +196,180 @@  static bool wait_for_rc6(void)
 	return false;
 }
 
+static uint64_t __pmu_read_single(int fd, uint64_t *ts)
+{
+	uint64_t data[2];
+
+	igt_assert_eq(read(fd, data, sizeof(data)), sizeof(data));
+
+	if (ts)
+		*ts = data[1];
+
+	return data[0];
+}
+
+static uint64_t pmu_read_single(int fd)
+{
+	return __pmu_read_single(fd, NULL);
+}
+
+#define __assert_within_epsilon(x, ref, tol_up, tol_down) \
+	igt_assert_f((double)(x) <= (1.0 + (tol_up)) * (double)(ref) && \
+		     (double)(x) >= (1.0 - (tol_down)) * (double)(ref), \
+		     "'%s' != '%s' (%f not within +%.1f%%/-%.1f%% tolerance of %f)\n",\
+		     #x, #ref, (double)(x), \
+		     (tol_up) * 100.0, (tol_down) * 100.0, \
+		     (double)(ref))
+
+#define assert_within_epsilon(x, ref, tolerance) \
+	__assert_within_epsilon(x, ref, tolerance, tolerance)
+
+static bool __pmu_wait_for_rc6(int fd)
+{
+	struct timespec tv = {};
+	uint64_t start, now;
+
+	/* First wait for roughly an RC6 Evaluation Interval */
+	usleep(160 * 1000);
+
+	/* Then poll for RC6 to start ticking */
+	now = pmu_read_single(fd);
+	do {
+		start = now;
+		usleep(5000);
+		now = pmu_read_single(fd);
+		if (now - start > 1e6)
+			return true;
+	} while (!igt_seconds_elapsed(&tv));
+
+	return false;
+}
+
+static unsigned int measured_usleep(unsigned int usec)
+{
+	struct timespec ts = { };
+	unsigned int slept;
+
+	slept = igt_nsec_elapsed(&ts);
+	igt_assert(slept == 0);
+	do {
+		usleep(usec - slept);
+		slept = igt_nsec_elapsed(&ts) / 1000;
+	} while (slept < usec);
+
+	return igt_nsec_elapsed(&ts);
+}
+
+static uint32_t batch_create(int fd)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	uint32_t handle;
+
+	handle = gem_create(fd, 4096);
+	gem_write(fd, handle, 0, &bbe, sizeof(bbe));
+
+	return handle;
+}
+
+static int open_pmu(int i915, uint64_t config)
+{
+	int fd;
+
+	fd = perf_i915_open(config);
+	igt_skip_on(fd < 0 && errno == ENODEV);
+	igt_assert(fd >= 0);
+
+	return fd;
+}
+
+static void rc6_perf(int i915)
+{
+	const int64_t duration_ns = 2e9;
+	uint64_t idle, prev, ts[2];
+	unsigned long slept, cycles;
+	unsigned long *done;
+	int fd;
+
+	fd = open_pmu(i915, I915_PMU_RC6_RESIDENCY);
+	igt_require(__pmu_wait_for_rc6(fd));
+
+	/* While idle check full RC6. */
+	prev = __pmu_read_single(fd, &ts[0]);
+	slept = measured_usleep(duration_ns / 1000);
+	idle = __pmu_read_single(fd, &ts[1]);
+	igt_debug("slept=%lu perf=%"PRIu64"\n", slept, ts[1] - ts[0]);
+	assert_within_epsilon(idle - prev, ts[1] - ts[0], 5);
+
+	/* Setup up a very light load */
+	done = mmap(0, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+	igt_fork(child, 1) {
+		struct drm_i915_gem_exec_object2 obj = {
+			.handle = batch_create(i915),
+		};
+		struct drm_i915_gem_execbuffer2 execbuf = {
+			.buffers_ptr = to_user_pointer(&obj),
+			.buffer_count = 1,
+		};
+
+		do {
+			struct timespec tv = {};
+
+			igt_seconds_elapsed(&tv);
+
+			gem_execbuf(i915, &execbuf);
+			gem_sync(i915, obj.handle);
+			done[1]++;
+
+			usleep(igt_seconds_elapsed(&tv) / 10); /* => 1% busy */
+		} while (!*done);
+	}
+
+	/* While very nearly idle (idle to within tolerance), except full RC6 */
+	cycles = -done[1];
+	prev = __pmu_read_single(fd, &ts[0]);
+	slept = measured_usleep(duration_ns / 1000);
+	idle = __pmu_read_single(fd, &ts[1]);
+	cycles += done[1];
+	igt_debug("slept=%lu perf=%"PRIu64", cycles=%lu\n",
+		  slept, ts[1] - ts[0], cycles);
+	igt_assert(cycles > 0);
+	assert_within_epsilon(idle - prev, ts[1] - ts[0], 5);
+
+	close(fd);
+
+	*done = 1;
+	igt_waitchildren();
+}
+
 igt_main
 {
 	unsigned int rc6_enabled = 0;
 	unsigned int devid = 0;
+	int i915 = -1;
 
 	/* Use drm_open_driver to verify device existence */
 	igt_fixture {
-		int fd;
-
-		fd = drm_open_driver(DRIVER_INTEL);
-		devid = intel_get_drm_devid(fd);
-		sysfs = igt_sysfs_open(fd);
+		i915 = drm_open_driver(DRIVER_INTEL);
+		devid = intel_get_drm_devid(i915);
+		sysfs = igt_sysfs_open(i915);
 
 		igt_require(has_rc6_residency("rc6"));
 
 		/* Make sure rc6 counters are running */
-		igt_drop_caches_set(fd, DROP_IDLE);
+		igt_drop_caches_set(i915, DROP_IDLE);
 		igt_require(wait_for_rc6());
 
-		close(fd);
-
 		rc6_enabled = get_rc6_enabled_mask();
 		igt_require(rc6_enabled & RC6_ENABLED);
 	}
 
+	igt_subtest("rc6-perf") {
+		igt_require_gem(i915);
+		gem_quiescent_gpu(i915);
+
+		rc6_perf(i915);
+	}
+
 	igt_subtest("rc6-accuracy") {
 		struct residencies res;
 
@@ -235,4 +385,8 @@  igt_main
 		measure_residencies(devid, rc6_enabled, &res);
 		residency_accuracy(res.media_rc6, res.duration, "media_rc6");
 	}
+
+	igt_fixture
+		close(i915);
+
 }
diff --git a/tests/meson.build b/tests/meson.build
index 570de5459..a79d22ba1 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -232,7 +232,6 @@  i915_progs = [
 	'i915_module_load',
 	'i915_pm_backlight',
 	'i915_pm_lpsp',
-	'i915_pm_rc6_residency',
 	'i915_pm_rpm',
 	'i915_pm_dc',
 	'i915_pm_rps',
@@ -336,6 +335,14 @@  test_executables += executable('gem_mmap_offset',
 	   install : true)
 test_list += 'gem_mmap_offset'
 
+test_executables += executable('i915_pm_rc6_residency',
+	   join_paths('i915', 'i915_pm_rc6_residency.c'),
+	   dependencies : test_deps + [ lib_igt_perf ],
+	   install_dir : libexecdir,
+	   install_rpath : libexecdir_rpathdir,
+	   install : true)
+test_list += 'i915_pm_rc6_residency'
+
 test_executables += executable('perf_pmu', 'perf_pmu.c',
 	   dependencies : test_deps + [ lib_igt_perf ],
 	   install_dir : libexecdir,