Message ID | 20250203153007.63400-4-tvrtko.ursulin@igalia.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | DRM scheduler kunit tests | expand |
Am 03.02.25 um 16:30 schrieb Tvrtko Ursulin: > Add a very simple TDR test which submits a single job and verifies that > the TDR handling will run if the backend failed to complete the job in > time. I think I said it before but I strongly suggest to not use TDR as name in the scheduler at all. What the scheduler provides is a simple timeout while waiting for the HW fence to signal. That is fundamentally different to the TDR functionality Windows provide and we already had people confusing this. Apart from that "yes, please". Those tests are desperately needed. Regards, Christian. > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com> > Cc: Christian König <christian.koenig@amd.com> > Cc: Danilo Krummrich <dakr@kernel.org> > Cc: Matthew Brost <matthew.brost@intel.com> > Cc: Philipp Stanner <phasta@kernel.org> > --- > .../drm/scheduler/tests/drm_mock_scheduler.c | 12 +++- > .../gpu/drm/scheduler/tests/drm_sched_tests.h | 6 +- > .../scheduler/tests/drm_sched_tests_basic.c | 64 ++++++++++++++++++- > 3 files changed, 76 insertions(+), 6 deletions(-) > > diff --git a/drivers/gpu/drm/scheduler/tests/drm_mock_scheduler.c b/drivers/gpu/drm/scheduler/tests/drm_mock_scheduler.c > index f1985900a6ba..79b6193ce920 100644 > --- a/drivers/gpu/drm/scheduler/tests/drm_mock_scheduler.c > +++ b/drivers/gpu/drm/scheduler/tests/drm_mock_scheduler.c > @@ -160,7 +160,11 @@ static struct dma_fence *mock_sched_run_job(struct drm_sched_job *sched_job) > static enum drm_gpu_sched_stat > mock_sched_timedout_job(struct drm_sched_job *sched_job) > { > - return DRM_GPU_SCHED_STAT_ENODEV; > + struct drm_mock_sched_job *job = drm_sched_job_to_mock_job(sched_job); > + > + job->flags |= DRM_MOCK_SCHED_JOB_TIMEDOUT; > + > + return DRM_GPU_SCHED_STAT_NOMINAL; > } > > static void mock_sched_free_job(struct drm_sched_job *sched_job) > @@ -174,7 +178,9 @@ static const struct drm_sched_backend_ops drm_mock_scheduler_ops = { > .free_job = mock_sched_free_job > }; > > -struct drm_mock_scheduler *drm_mock_new_scheduler(struct kunit *test) > +struct drm_mock_scheduler * > +drm_mock_new_scheduler(struct kunit *test, > + long timeout) > { > struct drm_mock_scheduler *sched; > int ret; > @@ -188,7 +194,7 @@ struct drm_mock_scheduler *drm_mock_new_scheduler(struct kunit *test) > DRM_SCHED_PRIORITY_COUNT, > U32_MAX, /* max credits */ > UINT_MAX, /* hang limit */ > - MAX_SCHEDULE_TIMEOUT, /* timeout */ > + timeout, > NULL, /* timeout wq */ > NULL, /* score */ > "drm-mock-scheduler", > diff --git a/drivers/gpu/drm/scheduler/tests/drm_sched_tests.h b/drivers/gpu/drm/scheduler/tests/drm_sched_tests.h > index 421ee2712985..20695f55e453 100644 > --- a/drivers/gpu/drm/scheduler/tests/drm_sched_tests.h > +++ b/drivers/gpu/drm/scheduler/tests/drm_sched_tests.h > @@ -35,6 +35,9 @@ struct drm_mock_sched_entity { > struct drm_mock_sched_job { > struct drm_sched_job base; > > +#define DRM_MOCK_SCHED_JOB_TIMEDOUT 0x1 > + unsigned long flags; > + > struct list_head link; > struct hrtimer timer; > > @@ -65,7 +68,8 @@ drm_sched_job_to_mock_job(struct drm_sched_job *sched_job) > return container_of(sched_job, struct drm_mock_sched_job, base); > }; > > -struct drm_mock_scheduler *drm_mock_new_scheduler(struct kunit *test); > +struct drm_mock_scheduler *drm_mock_new_scheduler(struct kunit *test, > + long timeout); > void drm_mock_scheduler_fini(struct drm_mock_scheduler *sched); > unsigned int drm_mock_sched_advance(struct drm_mock_scheduler *sched, > unsigned int num); > diff --git a/drivers/gpu/drm/scheduler/tests/drm_sched_tests_basic.c b/drivers/gpu/drm/scheduler/tests/drm_sched_tests_basic.c > index 6fd39bea95b1..eb0d54d00f21 100644 > --- a/drivers/gpu/drm/scheduler/tests/drm_sched_tests_basic.c > +++ b/drivers/gpu/drm/scheduler/tests/drm_sched_tests_basic.c > @@ -3,7 +3,7 @@ > > static int drm_sched_basic_init(struct kunit *test) > { > - test->priv = drm_mock_new_scheduler(test); > + test->priv = drm_mock_new_scheduler(test, MAX_SCHEDULE_TIMEOUT); > > return 0; > } > @@ -15,6 +15,13 @@ static void drm_sched_basic_exit(struct kunit *test) > drm_mock_scheduler_fini(sched); > } > > +static int drm_sched_tdr_init(struct kunit *test) > +{ > + test->priv = drm_mock_new_scheduler(test, HZ); > + > + return 0; > +} > + > static void drm_sched_basic_submit(struct kunit *test) > { > struct drm_mock_scheduler *sched = test->priv; > @@ -244,4 +251,57 @@ static struct kunit_suite drm_sched_basic = { > .test_cases = drm_sched_basic_tests, > }; > > -kunit_test_suite(drm_sched_basic); > +static void drm_sched_basic_tdr(struct kunit *test) > +{ > + struct drm_mock_scheduler *sched = test->priv; > + struct drm_mock_sched_entity *entity; > + struct drm_mock_sched_job *job; > + bool done; > + > + /* > + * Submit a single job against a scheduler with the timeout configured > + * and verify that the timeout handling will run if the backend fails > + * to complete it in time. > + */ > + > + entity = drm_mock_new_sched_entity(test, > + DRM_SCHED_PRIORITY_NORMAL, > + sched); > + job = drm_mock_new_sched_job(test, entity); > + > + drm_mock_sched_job_submit(job); > + > + done = drm_mock_sched_job_wait_scheduled(job, HZ); > + KUNIT_ASSERT_EQ(test, done, true); > + > + done = drm_mock_sched_job_wait_finished(job, HZ / 2); > + KUNIT_ASSERT_EQ(test, done, false); > + > + KUNIT_ASSERT_EQ(test, > + job->flags & DRM_MOCK_SCHED_JOB_TIMEDOUT, > + 0); > + > + done = drm_mock_sched_job_wait_finished(job, HZ); > + KUNIT_ASSERT_EQ(test, done, false); > + > + KUNIT_ASSERT_EQ(test, > + job->flags & DRM_MOCK_SCHED_JOB_TIMEDOUT, > + DRM_MOCK_SCHED_JOB_TIMEDOUT); > + > + drm_mock_sched_entity_free(entity); > +} > + > +static struct kunit_case drm_sched_tdr_tests[] = { > + KUNIT_CASE(drm_sched_basic_tdr), > + {} > +}; > + > +static struct kunit_suite drm_sched_tdr = { > + .name = "drm_sched_basic_tdr_tests", > + .init = drm_sched_tdr_init, > + .exit = drm_sched_basic_exit, > + .test_cases = drm_sched_tdr_tests, > +}; > + > +kunit_test_suites(&drm_sched_basic, > + &drm_sched_tdr);
diff --git a/drivers/gpu/drm/scheduler/tests/drm_mock_scheduler.c b/drivers/gpu/drm/scheduler/tests/drm_mock_scheduler.c index f1985900a6ba..79b6193ce920 100644 --- a/drivers/gpu/drm/scheduler/tests/drm_mock_scheduler.c +++ b/drivers/gpu/drm/scheduler/tests/drm_mock_scheduler.c @@ -160,7 +160,11 @@ static struct dma_fence *mock_sched_run_job(struct drm_sched_job *sched_job) static enum drm_gpu_sched_stat mock_sched_timedout_job(struct drm_sched_job *sched_job) { - return DRM_GPU_SCHED_STAT_ENODEV; + struct drm_mock_sched_job *job = drm_sched_job_to_mock_job(sched_job); + + job->flags |= DRM_MOCK_SCHED_JOB_TIMEDOUT; + + return DRM_GPU_SCHED_STAT_NOMINAL; } static void mock_sched_free_job(struct drm_sched_job *sched_job) @@ -174,7 +178,9 @@ static const struct drm_sched_backend_ops drm_mock_scheduler_ops = { .free_job = mock_sched_free_job }; -struct drm_mock_scheduler *drm_mock_new_scheduler(struct kunit *test) +struct drm_mock_scheduler * +drm_mock_new_scheduler(struct kunit *test, + long timeout) { struct drm_mock_scheduler *sched; int ret; @@ -188,7 +194,7 @@ struct drm_mock_scheduler *drm_mock_new_scheduler(struct kunit *test) DRM_SCHED_PRIORITY_COUNT, U32_MAX, /* max credits */ UINT_MAX, /* hang limit */ - MAX_SCHEDULE_TIMEOUT, /* timeout */ + timeout, NULL, /* timeout wq */ NULL, /* score */ "drm-mock-scheduler", diff --git a/drivers/gpu/drm/scheduler/tests/drm_sched_tests.h b/drivers/gpu/drm/scheduler/tests/drm_sched_tests.h index 421ee2712985..20695f55e453 100644 --- a/drivers/gpu/drm/scheduler/tests/drm_sched_tests.h +++ b/drivers/gpu/drm/scheduler/tests/drm_sched_tests.h @@ -35,6 +35,9 @@ struct drm_mock_sched_entity { struct drm_mock_sched_job { struct drm_sched_job base; +#define DRM_MOCK_SCHED_JOB_TIMEDOUT 0x1 + unsigned long flags; + struct list_head link; struct hrtimer timer; @@ -65,7 +68,8 @@ drm_sched_job_to_mock_job(struct drm_sched_job *sched_job) return container_of(sched_job, struct drm_mock_sched_job, base); }; -struct drm_mock_scheduler *drm_mock_new_scheduler(struct kunit *test); +struct drm_mock_scheduler *drm_mock_new_scheduler(struct kunit *test, + long timeout); void drm_mock_scheduler_fini(struct drm_mock_scheduler *sched); unsigned int drm_mock_sched_advance(struct drm_mock_scheduler *sched, unsigned int num); diff --git a/drivers/gpu/drm/scheduler/tests/drm_sched_tests_basic.c b/drivers/gpu/drm/scheduler/tests/drm_sched_tests_basic.c index 6fd39bea95b1..eb0d54d00f21 100644 --- a/drivers/gpu/drm/scheduler/tests/drm_sched_tests_basic.c +++ b/drivers/gpu/drm/scheduler/tests/drm_sched_tests_basic.c @@ -3,7 +3,7 @@ static int drm_sched_basic_init(struct kunit *test) { - test->priv = drm_mock_new_scheduler(test); + test->priv = drm_mock_new_scheduler(test, MAX_SCHEDULE_TIMEOUT); return 0; } @@ -15,6 +15,13 @@ static void drm_sched_basic_exit(struct kunit *test) drm_mock_scheduler_fini(sched); } +static int drm_sched_tdr_init(struct kunit *test) +{ + test->priv = drm_mock_new_scheduler(test, HZ); + + return 0; +} + static void drm_sched_basic_submit(struct kunit *test) { struct drm_mock_scheduler *sched = test->priv; @@ -244,4 +251,57 @@ static struct kunit_suite drm_sched_basic = { .test_cases = drm_sched_basic_tests, }; -kunit_test_suite(drm_sched_basic); +static void drm_sched_basic_tdr(struct kunit *test) +{ + struct drm_mock_scheduler *sched = test->priv; + struct drm_mock_sched_entity *entity; + struct drm_mock_sched_job *job; + bool done; + + /* + * Submit a single job against a scheduler with the timeout configured + * and verify that the timeout handling will run if the backend fails + * to complete it in time. + */ + + entity = drm_mock_new_sched_entity(test, + DRM_SCHED_PRIORITY_NORMAL, + sched); + job = drm_mock_new_sched_job(test, entity); + + drm_mock_sched_job_submit(job); + + done = drm_mock_sched_job_wait_scheduled(job, HZ); + KUNIT_ASSERT_EQ(test, done, true); + + done = drm_mock_sched_job_wait_finished(job, HZ / 2); + KUNIT_ASSERT_EQ(test, done, false); + + KUNIT_ASSERT_EQ(test, + job->flags & DRM_MOCK_SCHED_JOB_TIMEDOUT, + 0); + + done = drm_mock_sched_job_wait_finished(job, HZ); + KUNIT_ASSERT_EQ(test, done, false); + + KUNIT_ASSERT_EQ(test, + job->flags & DRM_MOCK_SCHED_JOB_TIMEDOUT, + DRM_MOCK_SCHED_JOB_TIMEDOUT); + + drm_mock_sched_entity_free(entity); +} + +static struct kunit_case drm_sched_tdr_tests[] = { + KUNIT_CASE(drm_sched_basic_tdr), + {} +}; + +static struct kunit_suite drm_sched_tdr = { + .name = "drm_sched_basic_tdr_tests", + .init = drm_sched_tdr_init, + .exit = drm_sched_basic_exit, + .test_cases = drm_sched_tdr_tests, +}; + +kunit_test_suites(&drm_sched_basic, + &drm_sched_tdr);
Add a very simple TDR test which submits a single job and verifies that the TDR handling will run if the backend failed to complete the job in time. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com> Cc: Christian König <christian.koenig@amd.com> Cc: Danilo Krummrich <dakr@kernel.org> Cc: Matthew Brost <matthew.brost@intel.com> Cc: Philipp Stanner <phasta@kernel.org> --- .../drm/scheduler/tests/drm_mock_scheduler.c | 12 +++- .../gpu/drm/scheduler/tests/drm_sched_tests.h | 6 +- .../scheduler/tests/drm_sched_tests_basic.c | 64 ++++++++++++++++++- 3 files changed, 76 insertions(+), 6 deletions(-)