Message ID | 20190308181129.15562-18-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [i-g-t,01/19] i915/gem_ppgtt: Estimate resource usage and bail if it means swapping! | expand |
On 08/03/2019 18:11, Chris Wilson wrote: > Exercise the in-kernel load balancer checking that we can distribute > batches across the set of ctx->engines to avoid load. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > --- > tests/Makefile.am | 1 + > tests/Makefile.sources | 1 + > tests/i915/gem_exec_balancer.c | 627 +++++++++++++++++++++++++++++++++ > tests/meson.build | 7 + > 4 files changed, 636 insertions(+) > create mode 100644 tests/i915/gem_exec_balancer.c > > diff --git a/tests/Makefile.am b/tests/Makefile.am > index 289249b42..68a9c14bf 100644 > --- a/tests/Makefile.am > +++ b/tests/Makefile.am > @@ -102,6 +102,7 @@ gem_close_race_LDADD = $(LDADD) -lpthread > gem_ctx_thrash_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) > gem_ctx_thrash_LDADD = $(LDADD) -lpthread > gem_ctx_sseu_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la > +i915_gem_exec_balancer_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la > gem_exec_capture_LDADD = $(LDADD) -lz > gem_exec_parallel_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) > gem_exec_parallel_LDADD = $(LDADD) -lpthread > diff --git a/tests/Makefile.sources b/tests/Makefile.sources > index 41e756f15..f6c21a1aa 100644 > --- a/tests/Makefile.sources > +++ b/tests/Makefile.sources > @@ -23,6 +23,7 @@ TESTS_progs = \ > drm_read \ > i915/gem_ctx_engines \ > i915/gem_ctx_shared \ > + i915/gem_exec_balancer \ > kms_3d \ > kms_addfb_basic \ > kms_atomic \ > diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c > new file mode 100644 > index 000000000..d9fdffe67 > --- /dev/null > +++ b/tests/i915/gem_exec_balancer.c > @@ -0,0 +1,627 @@ > +/* > + * Copyright © 2018 Intel Corporation > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS > + * IN THE SOFTWARE. > + */ > + > +#include <sched.h> > + > +#include "igt.h" > +#include "igt_perf.h" > +#include "i915/gem_ring.h" > +#include "sw_sync.h" > + > +IGT_TEST_DESCRIPTION("Exercise in-kernel load-balancing"); > + > +struct class_instance { > + uint16_t class; > + uint16_t instance; > +}; > +#define INSTANCE_COUNT (1 << I915_PMU_SAMPLE_INSTANCE_BITS) > + > +static bool has_class_instance(int i915, uint16_t class, uint16_t instance) > +{ > + int fd; > + > + fd = perf_i915_open(I915_PMU_ENGINE_BUSY(class, instance)); > + if (fd != -1) { > + close(fd); > + return true; > + } > + > + return false; > +} > + > +static struct class_instance * > +list_engines(int i915, uint32_t class_mask, unsigned int *out) > +{ > + unsigned int count = 0, size = 64; > + struct class_instance *engines; > + > + engines = malloc(size * sizeof(*engines)); > + if (!engines) { > + *out = 0; > + return NULL; > + } > + > + for (enum drm_i915_gem_engine_class class = I915_ENGINE_CLASS_RENDER; > + class_mask; > + class++, class_mask >>= 1) { > + if (!(class_mask & 1)) > + continue; > + > + for (unsigned int instance = 0; > + instance < INSTANCE_COUNT; > + instance++) { > + if (!has_class_instance(i915, class, instance)) > + continue; > + > + if (count == size) { > + struct class_instance *e; > + > + size *= 2; > + e = realloc(engines, size*sizeof(*engines)); > + if (!e) { > + *out = count; > + return engines; > + } > + > + engines = e; > + } > + > + engines[count++] = (struct class_instance){ > + .class = class, > + .instance = instance, > + }; > + } > + } > + > + if (!count) { > + free(engines); > + engines = NULL; > + } > + > + *out = count; > + return engines; > +} > + > +static int __set_load_balancer(int i915, uint32_t ctx, > + const struct class_instance *ci, > + unsigned int count) > +{ > + struct i915_context_engines_load_balance balancer = { > + { .name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE }, > + .engines_mask = ~0ull, > + }; > + I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, count + 1); > + struct drm_i915_gem_context_param p = { > + .ctx_id = ctx, > + .param = I915_CONTEXT_PARAM_ENGINES, > + .size = sizeof(&engines), sizeof(engines) Regards, Tvrtko > + .value = to_user_pointer(&engines) > + }; > + > + engines.extensions = to_user_pointer(&balancer), > + engines.class_instance[0].engine_class = I915_ENGINE_CLASS_INVALID; > + engines.class_instance[0].engine_instance = I915_ENGINE_CLASS_INVALID_NONE; > + memcpy(engines.class_instance + 1, ci, count * sizeof(uint32_t)); > + > + return __gem_context_set_param(i915, &p); > +} > + > +static void set_load_balancer(int i915, uint32_t ctx, > + const struct class_instance *ci, > + unsigned int count) > +{ > + igt_assert_eq(__set_load_balancer(i915, ctx, ci, count), 0); > +} > + > +static uint32_t load_balancer_create(int i915, > + const struct class_instance *ci, > + unsigned int count) > +{ > + uint32_t ctx; > + > + ctx = gem_context_create(i915); > + set_load_balancer(i915, ctx, ci, count); > + > + return ctx; > +} > + > +static void kick_kthreads(int period_us) > +{ > + sched_yield(); > + usleep(period_us); > +} > + > +static double measure_load(int pmu, int period_us) > +{ > + uint64_t data[2]; > + uint64_t d_t, d_v; > + > + kick_kthreads(period_us); > + > + igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data)); > + d_v = -data[0]; > + d_t = -data[1]; > + > + usleep(period_us); > + > + igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data)); > + d_v += data[0]; > + d_t += data[1]; > + > + return d_v / (double)d_t; > +} > + > +static double measure_min_load(int pmu, unsigned int num, int period_us) > +{ > + uint64_t data[2 + num]; > + uint64_t d_t, d_v[num]; > + uint64_t min = -1, max = 0; > + > + kick_kthreads(period_us); > + > + igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data)); > + for (unsigned int n = 0; n < num; n++) > + d_v[n] = -data[2 + n]; > + d_t = -data[1]; > + > + usleep(period_us); > + > + igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data)); > + > + d_t += data[1]; > + for (unsigned int n = 0; n < num; n++) { > + d_v[n] += data[2 + n]; > + igt_debug("engine[%d]: %.1f%%\n", > + n, d_v[n] / (double)d_t * 100); > + if (d_v[n] < min) > + min = d_v[n]; > + if (d_v[n] > max) > + max = d_v[n]; > + } > + > + igt_debug("elapsed: %"PRIu64"ns, load [%.1f, %.1f]%%\n", > + d_t, min / (double)d_t * 100, max / (double)d_t * 100); > + > + return min / (double)d_t; > +} > + > +static void check_individual_engine(int i915, > + uint32_t ctx, > + const struct class_instance *ci, > + int idx) > +{ > + igt_spin_t *spin; > + double load; > + int pmu; > + > + pmu = perf_i915_open(I915_PMU_ENGINE_BUSY(ci[idx].class, > + ci[idx].instance)); > + > + spin = igt_spin_batch_new(i915, .ctx = ctx, .engine = idx + 1); > + load = measure_load(pmu, 10000); > + igt_spin_batch_free(i915, spin); > + > + close(pmu); > + > + igt_assert_f(load > 0.90, > + "engine %d (class:instance %d:%d) was found to be only %.1f%% busy\n", > + idx, ci[idx].class, ci[idx].instance, load*100); > +} > + > +static void individual(int i915) > +{ > + uint32_t ctx; > + > + /* > + * I915_CONTEXT_PARAM_ENGINE allows us to index into the user > + * supplied array from gem_execbuf(). Our check is to build the > + * ctx->engine[] with various different engine classes, feed in > + * a spinner and then ask pmu to confirm it the expected engine > + * was busy. > + */ > + > + ctx = gem_context_create(i915); > + > + for (int mask = 0; mask < 32; mask++) { > + struct class_instance *ci; > + unsigned int count; > + > + ci = list_engines(i915, 1u << mask, &count); > + if (!ci) > + continue; > + > + igt_debug("Found %d engines of class %d\n", count, mask); > + > + for (int pass = 0; pass < count; pass++) { /* approx. count! */ > + igt_permute_array(ci, count, igt_exchange_int64); > + set_load_balancer(i915, ctx, ci, count); > + for (unsigned int n = 0; n < count; n++) > + check_individual_engine(i915, ctx, ci, n); > + } > + > + free(ci); > + } > + > + gem_context_destroy(i915, ctx); > +} > + > +static int add_pmu(int pmu, const struct class_instance *ci) > +{ > + return perf_i915_open_group(I915_PMU_ENGINE_BUSY(ci->class, > + ci->instance), > + pmu); > +} > + > +static uint32_t __batch_create(int i915, uint32_t offset) > +{ > + const uint32_t bbe = MI_BATCH_BUFFER_END; > + uint32_t handle; > + > + handle = gem_create(i915, ALIGN(offset + 4, 4096)); > + gem_write(i915, handle, offset, &bbe, sizeof(bbe)); > + > + return handle; > +} > + > +static uint32_t batch_create(int i915) > +{ > + return __batch_create(i915, 0); > +} > + > +static void full(int i915, unsigned int flags) > +#define PULSE 0x1 > +#define LATE 0x2 > +{ > + struct drm_i915_gem_exec_object2 batch = { > + .handle = batch_create(i915), > + }; > + > + if (flags & LATE) > + igt_require_sw_sync(); > + > + /* > + * I915_CONTEXT_PARAM_ENGINE changes the meaning of I915_EXEC_DEFAULT > + * to provide an automatic selection from the ctx->engine[]. It > + * employs load-balancing to evenly distribute the workload the > + * array. If we submit N spinners, we expect them to be simultaneously > + * running across N engines and use PMU to confirm that the entire > + * set of engines are busy. > + * > + * We complicate matters by interpersing shortlived tasks to challenge > + * the kernel to search for space in which to insert new batches. > + */ > + > + > + for (int mask = 0; mask < 32; mask++) { > + struct class_instance *ci; > + igt_spin_t *spin = NULL; > + unsigned int count; > + IGT_CORK_FENCE(cork); > + double load; > + int fence = -1; > + int *pmu; > + > + ci = list_engines(i915, 1u << mask, &count); > + if (!ci) > + continue; > + > + igt_debug("Found %d engines of class %d\n", count, mask); > + > + pmu = malloc(sizeof(*pmu) * count); > + igt_assert(pmu); > + > + if (flags & LATE) > + fence = igt_cork_plug(&cork, i915); > + > + pmu[0] = -1; > + for (unsigned int n = 0; n < count; n++) { > + uint32_t ctx; > + > + pmu[n] = add_pmu(pmu[0], &ci[n]); > + > + if (flags & PULSE) { > + struct drm_i915_gem_execbuffer2 eb = { > + .buffers_ptr = to_user_pointer(&batch), > + .buffer_count = 1, > + .rsvd2 = fence, > + .flags = flags & LATE ? I915_EXEC_FENCE_IN : 0, > + }; > + > + gem_execbuf(i915, &eb); > + } > + > + /* > + * Each spinner needs to be one a new timeline, > + * otherwise they will just sit in the single queue > + * and not run concurrently. > + */ > + ctx = load_balancer_create(i915, ci, count); > + > + if (spin == NULL) { > + spin = __igt_spin_batch_new(i915, ctx, 0, 0); > + } else { > + struct drm_i915_gem_exec_object2 obj = { > + .handle = spin->handle, > + }; > + struct drm_i915_gem_execbuffer2 eb = { > + .buffers_ptr = to_user_pointer(&obj), > + .buffer_count = 1, > + .rsvd1 = ctx, > + .rsvd2 = fence, > + .flags = flags & LATE ? I915_EXEC_FENCE_IN : 0, > + }; > + > + gem_execbuf(i915, &eb); > + } > + > + gem_context_destroy(i915, ctx); > + } > + > + if (flags & LATE) { > + igt_cork_unplug(&cork); > + close(fence); > + } > + > + load = measure_min_load(pmu[0], count, 10000); > + igt_spin_batch_free(i915, spin); > + > + close(pmu[0]); > + free(pmu); > + > + free(ci); > + > + igt_assert_f(load > 0.90, > + "minimum load for %d x class:%d was found to be only %.1f%% busy\n", > + count, mask, load*100); > + } > + > + gem_close(i915, batch.handle); > +} > + > +static void ping(int i915, uint32_t ctx, unsigned int engine) > +{ > + struct drm_i915_gem_exec_object2 obj = { > + .handle = batch_create(i915), > + }; > + struct drm_i915_gem_execbuffer2 execbuf = { > + .buffers_ptr = to_user_pointer(&obj), > + .buffer_count = 1, > + .flags = engine, > + .rsvd1 = ctx, > + }; > + gem_execbuf(i915, &execbuf); > + gem_sync(i915, obj.handle); > + gem_close(i915, obj.handle); > +} > + > +static void semaphore(int i915) > +{ > + uint32_t block[2], scratch; > + igt_spin_t *spin[3]; > + > + /* > + * If we are using HW semaphores to launch serialised requests > + * on different engine concurrently, we want to verify that real > + * work is unimpeded. > + */ > + igt_require(gem_scheduler_has_preemption(i915)); > + > + block[0] = gem_context_create(i915); > + block[1] = gem_context_create(i915); > + > + scratch = gem_create(i915, 4096); > + spin[2] = igt_spin_batch_new(i915, .dependency = scratch); > + for (int mask = 1; mask < 32; mask++) { > + struct class_instance *ci; > + unsigned int count; > + uint32_t vip; > + > + ci = list_engines(i915, 1u << mask, &count); > + if (!ci) > + continue; > + > + if (count < ARRAY_SIZE(block)) > + continue; > + > + /* Ensure that we completely occupy all engines in this group */ > + count = ARRAY_SIZE(block); > + > + for (int i = 0; i < count; i++) { > + set_load_balancer(i915, block[i], ci, count); > + spin[i] = __igt_spin_batch_new(i915, > + .ctx = block[i], > + .dependency = scratch); > + } > + > + /* > + * Either we haven't blocked both engines with semaphores, > + * or we let the vip through. If not, we hang. > + */ > + vip = gem_context_create(i915); > + set_load_balancer(i915, vip, ci, count); > + ping(i915, vip, 0); > + gem_context_destroy(i915, vip); > + > + for (int i = 0; i < count; i++) > + igt_spin_batch_free(i915, spin[i]); > + > + free(ci); > + } > + igt_spin_batch_free(i915, spin[2]); > + gem_close(i915, scratch); > + > + gem_context_destroy(i915, block[1]); > + gem_context_destroy(i915, block[0]); > +} > + > +static void smoketest(int i915, int timeout) > +{ > + struct drm_i915_gem_exec_object2 batch[2] = { > + { .handle = __batch_create(i915, 16380) } > + }; > + unsigned int ncontext = 0; > + uint32_t *contexts = NULL; > + uint32_t *handles = NULL; > + > + igt_require_sw_sync(); > + > + for (int mask = 0; mask < 32; mask++) { > + struct class_instance *ci; > + unsigned int count = 0; > + > + ci = list_engines(i915, 1u << mask, &count); > + if (!ci || count < 2) { > + free(ci); > + continue; > + } > + > + igt_debug("Found %d engines of class %d\n", count, mask); > + > + ncontext += 128; > + contexts = realloc(contexts, sizeof(*contexts) * ncontext); > + igt_assert(contexts); > + > + for (unsigned int n = ncontext - 128; n < ncontext; n++) { > + contexts[n] = load_balancer_create(i915, ci, count); > + igt_assert(contexts[n]); > + } > + > + free(ci); > + } > + igt_debug("Created %d virtual engines (one per context)\n", ncontext); > + igt_require(ncontext); > + > + contexts = realloc(contexts, sizeof(*contexts) * ncontext * 4); > + igt_assert(contexts); > + memcpy(contexts + ncontext, contexts, ncontext * sizeof(*contexts)); > + ncontext *= 2; > + memcpy(contexts + ncontext, contexts, ncontext * sizeof(*contexts)); > + ncontext *= 2; > + > + handles = malloc(sizeof(*handles) * ncontext); > + igt_assert(handles); > + for (unsigned int n = 0; n < ncontext; n++) > + handles[n] = gem_create(i915, 4096); > + > + igt_until_timeout(timeout) { > + unsigned int count = 1 + (rand() % (ncontext - 1)); > + IGT_CORK_FENCE(cork); > + int fence = igt_cork_plug(&cork, i915); > + > + for (unsigned int n = 0; n < count; n++) { > + struct drm_i915_gem_execbuffer2 eb = { > + .buffers_ptr = to_user_pointer(batch), > + .buffer_count = ARRAY_SIZE(batch), > + .rsvd1 = contexts[n], > + .rsvd2 = fence, > + .flags = I915_EXEC_BATCH_FIRST | I915_EXEC_FENCE_IN, > + }; > + batch[1].handle = handles[n]; > + gem_execbuf(i915, &eb); > + } > + igt_permute_array(handles, count, igt_exchange_int); > + > + igt_cork_unplug(&cork); > + for (unsigned int n = 0; n < count; n++) > + gem_sync(i915, handles[n]); > + > + close(fence); > + } > + > + for (unsigned int n = 0; n < ncontext; n++) { > + gem_close(i915, handles[n]); > + __gem_context_destroy(i915, contexts[n]); > + } > + free(handles); > + free(contexts); > + gem_close(i915, batch[0].handle); > +} > + > +static bool has_context_engines(int i915) > +{ > + struct drm_i915_gem_context_param p = { > + .param = I915_CONTEXT_PARAM_ENGINES, > + }; > + > + return __gem_context_set_param(i915, &p) == 0; > +} > + > +static bool has_load_balancer(int i915) > +{ > + struct class_instance ci = {}; > + uint32_t ctx; > + int err; > + > + ctx = gem_context_create(i915); > + err = __set_load_balancer(i915, ctx, &ci, 1); > + gem_context_destroy(i915, ctx); > + > + return err == 0; > +} > + > +igt_main > +{ > + int i915 = -1; > + > + igt_skip_on_simulation(); > + > + igt_fixture { > + i915 = drm_open_driver(DRIVER_INTEL); > + igt_require_gem(i915); > + > + gem_require_contexts(i915); > + igt_require(has_context_engines(i915)); > + igt_require(has_load_balancer(i915)); > + > + igt_fork_hang_detector(i915); > + } > + > + igt_subtest("individual") > + individual(i915); > + > + igt_subtest_group { > + static const struct { > + const char *name; > + unsigned int flags; > + } phases[] = { > + { "", 0 }, > + { "-pulse", PULSE }, > + { "-late", LATE }, > + { "-late-pulse", PULSE | LATE }, > + { } > + }; > + for (typeof(*phases) *p = phases; p->name; p++) > + igt_subtest_f("full%s", p->name) > + full(i915, p->flags); > + } > + > + igt_subtest("semaphore") > + semaphore(i915); > + > + igt_subtest("smoke") > + smoketest(i915, 20); > + > + igt_fixture { > + igt_stop_hang_detector(); > + } > +} > diff --git a/tests/meson.build b/tests/meson.build > index 0539c20c8..bd2db825e 100644 > --- a/tests/meson.build > +++ b/tests/meson.build > @@ -293,6 +293,13 @@ test_executables += executable('gem_eio', > install : true) > test_list += 'gem_eio' > > +test_executables += executable('gem_exec_balancer', 'i915/gem_exec_balancer.c', > + dependencies : test_deps + [ lib_igt_perf ], > + install_dir : libexecdir, > + install_rpath : libexecdir_rpathdir, > + install : true) > +test_progs += 'gem_exec_balancer' > + > test_executables += executable('gem_mocs_settings', > join_paths('i915', 'gem_mocs_settings.c'), > dependencies : test_deps + [ lib_igt_perf ], >
Quoting Tvrtko Ursulin (2019-03-12 10:23:12) > > On 08/03/2019 18:11, Chris Wilson wrote: > > Exercise the in-kernel load balancer checking that we can distribute > > batches across the set of ctx->engines to avoid load. > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > --- > > tests/Makefile.am | 1 + > > tests/Makefile.sources | 1 + > > tests/i915/gem_exec_balancer.c | 627 +++++++++++++++++++++++++++++++++ > > tests/meson.build | 7 + > > 4 files changed, 636 insertions(+) > > create mode 100644 tests/i915/gem_exec_balancer.c > > > > diff --git a/tests/Makefile.am b/tests/Makefile.am > > index 289249b42..68a9c14bf 100644 > > --- a/tests/Makefile.am > > +++ b/tests/Makefile.am > > @@ -102,6 +102,7 @@ gem_close_race_LDADD = $(LDADD) -lpthread > > gem_ctx_thrash_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) > > gem_ctx_thrash_LDADD = $(LDADD) -lpthread > > gem_ctx_sseu_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la > > +i915_gem_exec_balancer_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la > > gem_exec_capture_LDADD = $(LDADD) -lz > > gem_exec_parallel_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) > > gem_exec_parallel_LDADD = $(LDADD) -lpthread > > diff --git a/tests/Makefile.sources b/tests/Makefile.sources > > index 41e756f15..f6c21a1aa 100644 > > --- a/tests/Makefile.sources > > +++ b/tests/Makefile.sources > > @@ -23,6 +23,7 @@ TESTS_progs = \ > > drm_read \ > > i915/gem_ctx_engines \ > > i915/gem_ctx_shared \ > > + i915/gem_exec_balancer \ > > kms_3d \ > > kms_addfb_basic \ > > kms_atomic \ > > diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c > > new file mode 100644 > > index 000000000..d9fdffe67 > > --- /dev/null > > +++ b/tests/i915/gem_exec_balancer.c > > @@ -0,0 +1,627 @@ > > +/* > > + * Copyright © 2018 Intel Corporation > > + * > > + * Permission is hereby granted, free of charge, to any person obtaining a > > + * copy of this software and associated documentation files (the "Software"), > > + * to deal in the Software without restriction, including without limitation > > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > > + * and/or sell copies of the Software, and to permit persons to whom the > > + * Software is furnished to do so, subject to the following conditions: > > + * > > + * The above copyright notice and this permission notice (including the next > > + * paragraph) shall be included in all copies or substantial portions of the > > + * Software. > > + * > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS > > + * IN THE SOFTWARE. > > + */ > > + > > +#include <sched.h> > > + > > +#include "igt.h" > > +#include "igt_perf.h" > > +#include "i915/gem_ring.h" > > +#include "sw_sync.h" > > + > > +IGT_TEST_DESCRIPTION("Exercise in-kernel load-balancing"); > > + > > +struct class_instance { > > + uint16_t class; > > + uint16_t instance; > > +}; > > +#define INSTANCE_COUNT (1 << I915_PMU_SAMPLE_INSTANCE_BITS) > > + > > +static bool has_class_instance(int i915, uint16_t class, uint16_t instance) > > +{ > > + int fd; > > + > > + fd = perf_i915_open(I915_PMU_ENGINE_BUSY(class, instance)); > > + if (fd != -1) { > > + close(fd); > > + return true; > > + } > > + > > + return false; > > +} > > + > > +static struct class_instance * > > +list_engines(int i915, uint32_t class_mask, unsigned int *out) > > +{ > > + unsigned int count = 0, size = 64; > > + struct class_instance *engines; > > + > > + engines = malloc(size * sizeof(*engines)); > > + if (!engines) { > > + *out = 0; > > + return NULL; > > + } > > + > > + for (enum drm_i915_gem_engine_class class = I915_ENGINE_CLASS_RENDER; > > + class_mask; > > + class++, class_mask >>= 1) { > > + if (!(class_mask & 1)) > > + continue; > > + > > + for (unsigned int instance = 0; > > + instance < INSTANCE_COUNT; > > + instance++) { > > + if (!has_class_instance(i915, class, instance)) > > + continue; > > + > > + if (count == size) { > > + struct class_instance *e; > > + > > + size *= 2; > > + e = realloc(engines, size*sizeof(*engines)); > > + if (!e) { > > + *out = count; > > + return engines; > > + } > > + > > + engines = e; > > + } > > + > > + engines[count++] = (struct class_instance){ > > + .class = class, > > + .instance = instance, > > + }; > > + } > > + } > > + > > + if (!count) { > > + free(engines); > > + engines = NULL; > > + } > > + > > + *out = count; > > + return engines; > > +} > > + > > +static int __set_load_balancer(int i915, uint32_t ctx, > > + const struct class_instance *ci, > > + unsigned int count) > > +{ > > + struct i915_context_engines_load_balance balancer = { > > + { .name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE }, > > + .engines_mask = ~0ull, > > + }; > > + I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, count + 1); > > + struct drm_i915_gem_context_param p = { > > + .ctx_id = ctx, > > + .param = I915_CONTEXT_PARAM_ENGINES, > > + .size = sizeof(&engines), > > sizeof(engines) Ta. Don't look at the bonding test -- that still has all the +1 offset baked in. ;) -Chris
diff --git a/tests/Makefile.am b/tests/Makefile.am index 289249b42..68a9c14bf 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -102,6 +102,7 @@ gem_close_race_LDADD = $(LDADD) -lpthread gem_ctx_thrash_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) gem_ctx_thrash_LDADD = $(LDADD) -lpthread gem_ctx_sseu_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la +i915_gem_exec_balancer_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la gem_exec_capture_LDADD = $(LDADD) -lz gem_exec_parallel_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) gem_exec_parallel_LDADD = $(LDADD) -lpthread diff --git a/tests/Makefile.sources b/tests/Makefile.sources index 41e756f15..f6c21a1aa 100644 --- a/tests/Makefile.sources +++ b/tests/Makefile.sources @@ -23,6 +23,7 @@ TESTS_progs = \ drm_read \ i915/gem_ctx_engines \ i915/gem_ctx_shared \ + i915/gem_exec_balancer \ kms_3d \ kms_addfb_basic \ kms_atomic \ diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c new file mode 100644 index 000000000..d9fdffe67 --- /dev/null +++ b/tests/i915/gem_exec_balancer.c @@ -0,0 +1,627 @@ +/* + * Copyright © 2018 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <sched.h> + +#include "igt.h" +#include "igt_perf.h" +#include "i915/gem_ring.h" +#include "sw_sync.h" + +IGT_TEST_DESCRIPTION("Exercise in-kernel load-balancing"); + +struct class_instance { + uint16_t class; + uint16_t instance; +}; +#define INSTANCE_COUNT (1 << I915_PMU_SAMPLE_INSTANCE_BITS) + +static bool has_class_instance(int i915, uint16_t class, uint16_t instance) +{ + int fd; + + fd = perf_i915_open(I915_PMU_ENGINE_BUSY(class, instance)); + if (fd != -1) { + close(fd); + return true; + } + + return false; +} + +static struct class_instance * +list_engines(int i915, uint32_t class_mask, unsigned int *out) +{ + unsigned int count = 0, size = 64; + struct class_instance *engines; + + engines = malloc(size * sizeof(*engines)); + if (!engines) { + *out = 0; + return NULL; + } + + for (enum drm_i915_gem_engine_class class = I915_ENGINE_CLASS_RENDER; + class_mask; + class++, class_mask >>= 1) { + if (!(class_mask & 1)) + continue; + + for (unsigned int instance = 0; + instance < INSTANCE_COUNT; + instance++) { + if (!has_class_instance(i915, class, instance)) + continue; + + if (count == size) { + struct class_instance *e; + + size *= 2; + e = realloc(engines, size*sizeof(*engines)); + if (!e) { + *out = count; + return engines; + } + + engines = e; + } + + engines[count++] = (struct class_instance){ + .class = class, + .instance = instance, + }; + } + } + + if (!count) { + free(engines); + engines = NULL; + } + + *out = count; + return engines; +} + +static int __set_load_balancer(int i915, uint32_t ctx, + const struct class_instance *ci, + unsigned int count) +{ + struct i915_context_engines_load_balance balancer = { + { .name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE }, + .engines_mask = ~0ull, + }; + I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, count + 1); + struct drm_i915_gem_context_param p = { + .ctx_id = ctx, + .param = I915_CONTEXT_PARAM_ENGINES, + .size = sizeof(&engines), + .value = to_user_pointer(&engines) + }; + + engines.extensions = to_user_pointer(&balancer), + engines.class_instance[0].engine_class = I915_ENGINE_CLASS_INVALID; + engines.class_instance[0].engine_instance = I915_ENGINE_CLASS_INVALID_NONE; + memcpy(engines.class_instance + 1, ci, count * sizeof(uint32_t)); + + return __gem_context_set_param(i915, &p); +} + +static void set_load_balancer(int i915, uint32_t ctx, + const struct class_instance *ci, + unsigned int count) +{ + igt_assert_eq(__set_load_balancer(i915, ctx, ci, count), 0); +} + +static uint32_t load_balancer_create(int i915, + const struct class_instance *ci, + unsigned int count) +{ + uint32_t ctx; + + ctx = gem_context_create(i915); + set_load_balancer(i915, ctx, ci, count); + + return ctx; +} + +static void kick_kthreads(int period_us) +{ + sched_yield(); + usleep(period_us); +} + +static double measure_load(int pmu, int period_us) +{ + uint64_t data[2]; + uint64_t d_t, d_v; + + kick_kthreads(period_us); + + igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data)); + d_v = -data[0]; + d_t = -data[1]; + + usleep(period_us); + + igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data)); + d_v += data[0]; + d_t += data[1]; + + return d_v / (double)d_t; +} + +static double measure_min_load(int pmu, unsigned int num, int period_us) +{ + uint64_t data[2 + num]; + uint64_t d_t, d_v[num]; + uint64_t min = -1, max = 0; + + kick_kthreads(period_us); + + igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data)); + for (unsigned int n = 0; n < num; n++) + d_v[n] = -data[2 + n]; + d_t = -data[1]; + + usleep(period_us); + + igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data)); + + d_t += data[1]; + for (unsigned int n = 0; n < num; n++) { + d_v[n] += data[2 + n]; + igt_debug("engine[%d]: %.1f%%\n", + n, d_v[n] / (double)d_t * 100); + if (d_v[n] < min) + min = d_v[n]; + if (d_v[n] > max) + max = d_v[n]; + } + + igt_debug("elapsed: %"PRIu64"ns, load [%.1f, %.1f]%%\n", + d_t, min / (double)d_t * 100, max / (double)d_t * 100); + + return min / (double)d_t; +} + +static void check_individual_engine(int i915, + uint32_t ctx, + const struct class_instance *ci, + int idx) +{ + igt_spin_t *spin; + double load; + int pmu; + + pmu = perf_i915_open(I915_PMU_ENGINE_BUSY(ci[idx].class, + ci[idx].instance)); + + spin = igt_spin_batch_new(i915, .ctx = ctx, .engine = idx + 1); + load = measure_load(pmu, 10000); + igt_spin_batch_free(i915, spin); + + close(pmu); + + igt_assert_f(load > 0.90, + "engine %d (class:instance %d:%d) was found to be only %.1f%% busy\n", + idx, ci[idx].class, ci[idx].instance, load*100); +} + +static void individual(int i915) +{ + uint32_t ctx; + + /* + * I915_CONTEXT_PARAM_ENGINE allows us to index into the user + * supplied array from gem_execbuf(). Our check is to build the + * ctx->engine[] with various different engine classes, feed in + * a spinner and then ask pmu to confirm it the expected engine + * was busy. + */ + + ctx = gem_context_create(i915); + + for (int mask = 0; mask < 32; mask++) { + struct class_instance *ci; + unsigned int count; + + ci = list_engines(i915, 1u << mask, &count); + if (!ci) + continue; + + igt_debug("Found %d engines of class %d\n", count, mask); + + for (int pass = 0; pass < count; pass++) { /* approx. count! */ + igt_permute_array(ci, count, igt_exchange_int64); + set_load_balancer(i915, ctx, ci, count); + for (unsigned int n = 0; n < count; n++) + check_individual_engine(i915, ctx, ci, n); + } + + free(ci); + } + + gem_context_destroy(i915, ctx); +} + +static int add_pmu(int pmu, const struct class_instance *ci) +{ + return perf_i915_open_group(I915_PMU_ENGINE_BUSY(ci->class, + ci->instance), + pmu); +} + +static uint32_t __batch_create(int i915, uint32_t offset) +{ + const uint32_t bbe = MI_BATCH_BUFFER_END; + uint32_t handle; + + handle = gem_create(i915, ALIGN(offset + 4, 4096)); + gem_write(i915, handle, offset, &bbe, sizeof(bbe)); + + return handle; +} + +static uint32_t batch_create(int i915) +{ + return __batch_create(i915, 0); +} + +static void full(int i915, unsigned int flags) +#define PULSE 0x1 +#define LATE 0x2 +{ + struct drm_i915_gem_exec_object2 batch = { + .handle = batch_create(i915), + }; + + if (flags & LATE) + igt_require_sw_sync(); + + /* + * I915_CONTEXT_PARAM_ENGINE changes the meaning of I915_EXEC_DEFAULT + * to provide an automatic selection from the ctx->engine[]. It + * employs load-balancing to evenly distribute the workload the + * array. If we submit N spinners, we expect them to be simultaneously + * running across N engines and use PMU to confirm that the entire + * set of engines are busy. + * + * We complicate matters by interpersing shortlived tasks to challenge + * the kernel to search for space in which to insert new batches. + */ + + + for (int mask = 0; mask < 32; mask++) { + struct class_instance *ci; + igt_spin_t *spin = NULL; + unsigned int count; + IGT_CORK_FENCE(cork); + double load; + int fence = -1; + int *pmu; + + ci = list_engines(i915, 1u << mask, &count); + if (!ci) + continue; + + igt_debug("Found %d engines of class %d\n", count, mask); + + pmu = malloc(sizeof(*pmu) * count); + igt_assert(pmu); + + if (flags & LATE) + fence = igt_cork_plug(&cork, i915); + + pmu[0] = -1; + for (unsigned int n = 0; n < count; n++) { + uint32_t ctx; + + pmu[n] = add_pmu(pmu[0], &ci[n]); + + if (flags & PULSE) { + struct drm_i915_gem_execbuffer2 eb = { + .buffers_ptr = to_user_pointer(&batch), + .buffer_count = 1, + .rsvd2 = fence, + .flags = flags & LATE ? I915_EXEC_FENCE_IN : 0, + }; + + gem_execbuf(i915, &eb); + } + + /* + * Each spinner needs to be one a new timeline, + * otherwise they will just sit in the single queue + * and not run concurrently. + */ + ctx = load_balancer_create(i915, ci, count); + + if (spin == NULL) { + spin = __igt_spin_batch_new(i915, ctx, 0, 0); + } else { + struct drm_i915_gem_exec_object2 obj = { + .handle = spin->handle, + }; + struct drm_i915_gem_execbuffer2 eb = { + .buffers_ptr = to_user_pointer(&obj), + .buffer_count = 1, + .rsvd1 = ctx, + .rsvd2 = fence, + .flags = flags & LATE ? I915_EXEC_FENCE_IN : 0, + }; + + gem_execbuf(i915, &eb); + } + + gem_context_destroy(i915, ctx); + } + + if (flags & LATE) { + igt_cork_unplug(&cork); + close(fence); + } + + load = measure_min_load(pmu[0], count, 10000); + igt_spin_batch_free(i915, spin); + + close(pmu[0]); + free(pmu); + + free(ci); + + igt_assert_f(load > 0.90, + "minimum load for %d x class:%d was found to be only %.1f%% busy\n", + count, mask, load*100); + } + + gem_close(i915, batch.handle); +} + +static void ping(int i915, uint32_t ctx, unsigned int engine) +{ + struct drm_i915_gem_exec_object2 obj = { + .handle = batch_create(i915), + }; + struct drm_i915_gem_execbuffer2 execbuf = { + .buffers_ptr = to_user_pointer(&obj), + .buffer_count = 1, + .flags = engine, + .rsvd1 = ctx, + }; + gem_execbuf(i915, &execbuf); + gem_sync(i915, obj.handle); + gem_close(i915, obj.handle); +} + +static void semaphore(int i915) +{ + uint32_t block[2], scratch; + igt_spin_t *spin[3]; + + /* + * If we are using HW semaphores to launch serialised requests + * on different engine concurrently, we want to verify that real + * work is unimpeded. + */ + igt_require(gem_scheduler_has_preemption(i915)); + + block[0] = gem_context_create(i915); + block[1] = gem_context_create(i915); + + scratch = gem_create(i915, 4096); + spin[2] = igt_spin_batch_new(i915, .dependency = scratch); + for (int mask = 1; mask < 32; mask++) { + struct class_instance *ci; + unsigned int count; + uint32_t vip; + + ci = list_engines(i915, 1u << mask, &count); + if (!ci) + continue; + + if (count < ARRAY_SIZE(block)) + continue; + + /* Ensure that we completely occupy all engines in this group */ + count = ARRAY_SIZE(block); + + for (int i = 0; i < count; i++) { + set_load_balancer(i915, block[i], ci, count); + spin[i] = __igt_spin_batch_new(i915, + .ctx = block[i], + .dependency = scratch); + } + + /* + * Either we haven't blocked both engines with semaphores, + * or we let the vip through. If not, we hang. + */ + vip = gem_context_create(i915); + set_load_balancer(i915, vip, ci, count); + ping(i915, vip, 0); + gem_context_destroy(i915, vip); + + for (int i = 0; i < count; i++) + igt_spin_batch_free(i915, spin[i]); + + free(ci); + } + igt_spin_batch_free(i915, spin[2]); + gem_close(i915, scratch); + + gem_context_destroy(i915, block[1]); + gem_context_destroy(i915, block[0]); +} + +static void smoketest(int i915, int timeout) +{ + struct drm_i915_gem_exec_object2 batch[2] = { + { .handle = __batch_create(i915, 16380) } + }; + unsigned int ncontext = 0; + uint32_t *contexts = NULL; + uint32_t *handles = NULL; + + igt_require_sw_sync(); + + for (int mask = 0; mask < 32; mask++) { + struct class_instance *ci; + unsigned int count = 0; + + ci = list_engines(i915, 1u << mask, &count); + if (!ci || count < 2) { + free(ci); + continue; + } + + igt_debug("Found %d engines of class %d\n", count, mask); + + ncontext += 128; + contexts = realloc(contexts, sizeof(*contexts) * ncontext); + igt_assert(contexts); + + for (unsigned int n = ncontext - 128; n < ncontext; n++) { + contexts[n] = load_balancer_create(i915, ci, count); + igt_assert(contexts[n]); + } + + free(ci); + } + igt_debug("Created %d virtual engines (one per context)\n", ncontext); + igt_require(ncontext); + + contexts = realloc(contexts, sizeof(*contexts) * ncontext * 4); + igt_assert(contexts); + memcpy(contexts + ncontext, contexts, ncontext * sizeof(*contexts)); + ncontext *= 2; + memcpy(contexts + ncontext, contexts, ncontext * sizeof(*contexts)); + ncontext *= 2; + + handles = malloc(sizeof(*handles) * ncontext); + igt_assert(handles); + for (unsigned int n = 0; n < ncontext; n++) + handles[n] = gem_create(i915, 4096); + + igt_until_timeout(timeout) { + unsigned int count = 1 + (rand() % (ncontext - 1)); + IGT_CORK_FENCE(cork); + int fence = igt_cork_plug(&cork, i915); + + for (unsigned int n = 0; n < count; n++) { + struct drm_i915_gem_execbuffer2 eb = { + .buffers_ptr = to_user_pointer(batch), + .buffer_count = ARRAY_SIZE(batch), + .rsvd1 = contexts[n], + .rsvd2 = fence, + .flags = I915_EXEC_BATCH_FIRST | I915_EXEC_FENCE_IN, + }; + batch[1].handle = handles[n]; + gem_execbuf(i915, &eb); + } + igt_permute_array(handles, count, igt_exchange_int); + + igt_cork_unplug(&cork); + for (unsigned int n = 0; n < count; n++) + gem_sync(i915, handles[n]); + + close(fence); + } + + for (unsigned int n = 0; n < ncontext; n++) { + gem_close(i915, handles[n]); + __gem_context_destroy(i915, contexts[n]); + } + free(handles); + free(contexts); + gem_close(i915, batch[0].handle); +} + +static bool has_context_engines(int i915) +{ + struct drm_i915_gem_context_param p = { + .param = I915_CONTEXT_PARAM_ENGINES, + }; + + return __gem_context_set_param(i915, &p) == 0; +} + +static bool has_load_balancer(int i915) +{ + struct class_instance ci = {}; + uint32_t ctx; + int err; + + ctx = gem_context_create(i915); + err = __set_load_balancer(i915, ctx, &ci, 1); + gem_context_destroy(i915, ctx); + + return err == 0; +} + +igt_main +{ + int i915 = -1; + + igt_skip_on_simulation(); + + igt_fixture { + i915 = drm_open_driver(DRIVER_INTEL); + igt_require_gem(i915); + + gem_require_contexts(i915); + igt_require(has_context_engines(i915)); + igt_require(has_load_balancer(i915)); + + igt_fork_hang_detector(i915); + } + + igt_subtest("individual") + individual(i915); + + igt_subtest_group { + static const struct { + const char *name; + unsigned int flags; + } phases[] = { + { "", 0 }, + { "-pulse", PULSE }, + { "-late", LATE }, + { "-late-pulse", PULSE | LATE }, + { } + }; + for (typeof(*phases) *p = phases; p->name; p++) + igt_subtest_f("full%s", p->name) + full(i915, p->flags); + } + + igt_subtest("semaphore") + semaphore(i915); + + igt_subtest("smoke") + smoketest(i915, 20); + + igt_fixture { + igt_stop_hang_detector(); + } +} diff --git a/tests/meson.build b/tests/meson.build index 0539c20c8..bd2db825e 100644 --- a/tests/meson.build +++ b/tests/meson.build @@ -293,6 +293,13 @@ test_executables += executable('gem_eio', install : true) test_list += 'gem_eio' +test_executables += executable('gem_exec_balancer', 'i915/gem_exec_balancer.c', + dependencies : test_deps + [ lib_igt_perf ], + install_dir : libexecdir, + install_rpath : libexecdir_rpathdir, + install : true) +test_progs += 'gem_exec_balancer' + test_executables += executable('gem_mocs_settings', join_paths('i915', 'gem_mocs_settings.c'), dependencies : test_deps + [ lib_igt_perf ],
Exercise the in-kernel load balancer checking that we can distribute batches across the set of ctx->engines to avoid load. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- tests/Makefile.am | 1 + tests/Makefile.sources | 1 + tests/i915/gem_exec_balancer.c | 627 +++++++++++++++++++++++++++++++++ tests/meson.build | 7 + 4 files changed, 636 insertions(+) create mode 100644 tests/i915/gem_exec_balancer.c