Message ID | 20180702090727.7721-5-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 02/07/2018 10:07, Chris Wilson wrote: > When using the pollable spinner, we often want to use it as a means of > ensuring the task is running on the GPU before switching to something > else. In which case we don't want to add extra delay inside the spinner, > but the current 1000 NOPs add on order of 5us, which is often larger > than the target latency. > > v2: Don't change perf_pmu as that is sensitive to the extra CPU latency > from a tight GPU spinner. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Reviewed-by: Antonio Argenziano <antonio.argenziano@intel.com> #v1 > Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> #v1 > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > --- > lib/igt_dummyload.c | 3 ++- > lib/igt_dummyload.h | 1 + > tests/gem_ctx_isolation.c | 1 + > tests/gem_eio.c | 1 + > tests/gem_exec_latency.c | 4 ++-- > 5 files changed, 7 insertions(+), 3 deletions(-) > > diff --git a/lib/igt_dummyload.c b/lib/igt_dummyload.c > index 94efdf745..7beb66244 100644 > --- a/lib/igt_dummyload.c > +++ b/lib/igt_dummyload.c > @@ -199,7 +199,8 @@ emit_recursive_batch(igt_spin_t *spin, > * between function calls, that appears enough to keep SNB out of > * trouble. See https://bugs.freedesktop.org/show_bug.cgi?id=102262 > */ > - batch += 1000; > + if (!(opts->flags & IGT_SPIN_FAST)) > + batch += 1000; igt_require(!snb) or something, given the comment whose last two lines can be seen in the diff above? Regards, Tvrtko > > /* recurse */ > r = &relocs[obj[BATCH].relocation_count++]; > diff --git a/lib/igt_dummyload.h b/lib/igt_dummyload.h > index c794f2544..e80a12451 100644 > --- a/lib/igt_dummyload.h > +++ b/lib/igt_dummyload.h > @@ -52,6 +52,7 @@ struct igt_spin_factory { > > #define IGT_SPIN_FENCE_OUT (1 << 0) > #define IGT_SPIN_POLL_RUN (1 << 1) > +#define IGT_SPIN_FAST (1 << 2) > > igt_spin_t * > __igt_spin_batch_factory(int fd, const struct igt_spin_factory *opts); > diff --git a/tests/gem_ctx_isolation.c b/tests/gem_ctx_isolation.c > index 2e19e8c03..4325e1c28 100644 > --- a/tests/gem_ctx_isolation.c > +++ b/tests/gem_ctx_isolation.c > @@ -560,6 +560,7 @@ static void inject_reset_context(int fd, unsigned int engine) > struct igt_spin_factory opts = { > .ctx = gem_context_create(fd), > .engine = engine, > + .flags = IGT_SPIN_FAST, > }; > igt_spin_t *spin; > > diff --git a/tests/gem_eio.c b/tests/gem_eio.c > index 0ec1aaec9..3162a3170 100644 > --- a/tests/gem_eio.c > +++ b/tests/gem_eio.c > @@ -160,6 +160,7 @@ static igt_spin_t * __spin_poll(int fd, uint32_t ctx, unsigned long flags) > struct igt_spin_factory opts = { > .ctx = ctx, > .engine = flags, > + .flags = IGT_SPIN_FAST, > }; > > if (gem_can_store_dword(fd, opts.engine)) > diff --git a/tests/gem_exec_latency.c b/tests/gem_exec_latency.c > index 75811f325..de16322a6 100644 > --- a/tests/gem_exec_latency.c > +++ b/tests/gem_exec_latency.c > @@ -65,7 +65,7 @@ poll_ring(int fd, unsigned ring, const char *name) > { > const struct igt_spin_factory opts = { > .engine = ring, > - .flags = IGT_SPIN_POLL_RUN, > + .flags = IGT_SPIN_POLL_RUN | IGT_SPIN_FAST, > }; > struct timespec tv = {}; > unsigned long cycles; > @@ -464,7 +464,7 @@ rthog_latency_on_ring(int fd, unsigned int engine, const char *name, unsigned in > #define MMAP_SZ (64 << 10) > const struct igt_spin_factory opts = { > .engine = engine, > - .flags = IGT_SPIN_POLL_RUN, > + .flags = IGT_SPIN_POLL_RUN | IGT_SPIN_FAST, > }; > struct rt_pkt *results; > unsigned int engines[16]; >
Quoting Tvrtko Ursulin (2018-07-02 16:36:28) > > On 02/07/2018 10:07, Chris Wilson wrote: > > When using the pollable spinner, we often want to use it as a means of > > ensuring the task is running on the GPU before switching to something > > else. In which case we don't want to add extra delay inside the spinner, > > but the current 1000 NOPs add on order of 5us, which is often larger > > than the target latency. > > > > v2: Don't change perf_pmu as that is sensitive to the extra CPU latency > > from a tight GPU spinner. > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > Reviewed-by: Antonio Argenziano <antonio.argenziano@intel.com> #v1 > > Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> #v1 > > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > --- > > lib/igt_dummyload.c | 3 ++- > > lib/igt_dummyload.h | 1 + > > tests/gem_ctx_isolation.c | 1 + > > tests/gem_eio.c | 1 + > > tests/gem_exec_latency.c | 4 ++-- > > 5 files changed, 7 insertions(+), 3 deletions(-) > > > > diff --git a/lib/igt_dummyload.c b/lib/igt_dummyload.c > > index 94efdf745..7beb66244 100644 > > --- a/lib/igt_dummyload.c > > +++ b/lib/igt_dummyload.c > > @@ -199,7 +199,8 @@ emit_recursive_batch(igt_spin_t *spin, > > * between function calls, that appears enough to keep SNB out of > > * trouble. See https://bugs.freedesktop.org/show_bug.cgi?id=102262 > > */ > > - batch += 1000; > > + if (!(opts->flags & IGT_SPIN_FAST)) > > + batch += 1000; > > igt_require(!snb) or something, given the comment whose last two lines > can be seen in the diff above? It's not a machine killer since we have the required fix in the kernel, it just has interesting system latency. That latency is not specific to snb, and whether we want to trade system latency vs gpu latency is the reason we punt the decision to the caller. -Chris
On 05/07/2018 12:23, Chris Wilson wrote: > Quoting Tvrtko Ursulin (2018-07-02 16:36:28) >> >> On 02/07/2018 10:07, Chris Wilson wrote: >>> When using the pollable spinner, we often want to use it as a means of >>> ensuring the task is running on the GPU before switching to something >>> else. In which case we don't want to add extra delay inside the spinner, >>> but the current 1000 NOPs add on order of 5us, which is often larger >>> than the target latency. >>> >>> v2: Don't change perf_pmu as that is sensitive to the extra CPU latency >>> from a tight GPU spinner. >>> >>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> >>> Reviewed-by: Antonio Argenziano <antonio.argenziano@intel.com> #v1 >>> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> #v1 >>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >>> --- >>> lib/igt_dummyload.c | 3 ++- >>> lib/igt_dummyload.h | 1 + >>> tests/gem_ctx_isolation.c | 1 + >>> tests/gem_eio.c | 1 + >>> tests/gem_exec_latency.c | 4 ++-- >>> 5 files changed, 7 insertions(+), 3 deletions(-) >>> >>> diff --git a/lib/igt_dummyload.c b/lib/igt_dummyload.c >>> index 94efdf745..7beb66244 100644 >>> --- a/lib/igt_dummyload.c >>> +++ b/lib/igt_dummyload.c >>> @@ -199,7 +199,8 @@ emit_recursive_batch(igt_spin_t *spin, >>> * between function calls, that appears enough to keep SNB out of >>> * trouble. See https://bugs.freedesktop.org/show_bug.cgi?id=102262 >>> */ >>> - batch += 1000; >>> + if (!(opts->flags & IGT_SPIN_FAST)) >>> + batch += 1000; >> >> igt_require(!snb) or something, given the comment whose last two lines >> can be seen in the diff above? > > It's not a machine killer since we have the required fix in the kernel, > it just has interesting system latency. That latency is not specific to > snb, and whether we want to trade system latency vs gpu latency is the > reason we punt the decision to the caller. I am not sure if the comment and linked BZ is then still relevant? If it is, it is not nice to punt the responsibility to the caller. We are exporting new API and it is hard to think callers will dig into the implementation to find it (this comment). So a info/warn/skip when using the feature on a platform where it doesn't work well is I think much friendlier approach. If the comment is not relevant, or needs to be adjusted to reflect the changed reality (bugfix you mention), then either removed or adjusted. Regards, Tvrtko
Quoting Tvrtko Ursulin (2018-07-05 13:33:55) > > On 05/07/2018 12:23, Chris Wilson wrote: > > Quoting Tvrtko Ursulin (2018-07-02 16:36:28) > >> > >> On 02/07/2018 10:07, Chris Wilson wrote: > >>> When using the pollable spinner, we often want to use it as a means of > >>> ensuring the task is running on the GPU before switching to something > >>> else. In which case we don't want to add extra delay inside the spinner, > >>> but the current 1000 NOPs add on order of 5us, which is often larger > >>> than the target latency. > >>> > >>> v2: Don't change perf_pmu as that is sensitive to the extra CPU latency > >>> from a tight GPU spinner. > >>> > >>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > >>> Reviewed-by: Antonio Argenziano <antonio.argenziano@intel.com> #v1 > >>> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> #v1 > >>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > >>> --- > >>> lib/igt_dummyload.c | 3 ++- > >>> lib/igt_dummyload.h | 1 + > >>> tests/gem_ctx_isolation.c | 1 + > >>> tests/gem_eio.c | 1 + > >>> tests/gem_exec_latency.c | 4 ++-- > >>> 5 files changed, 7 insertions(+), 3 deletions(-) > >>> > >>> diff --git a/lib/igt_dummyload.c b/lib/igt_dummyload.c > >>> index 94efdf745..7beb66244 100644 > >>> --- a/lib/igt_dummyload.c > >>> +++ b/lib/igt_dummyload.c > >>> @@ -199,7 +199,8 @@ emit_recursive_batch(igt_spin_t *spin, > >>> * between function calls, that appears enough to keep SNB out of > >>> * trouble. See https://bugs.freedesktop.org/show_bug.cgi?id=102262 > >>> */ > >>> - batch += 1000; > >>> + if (!(opts->flags & IGT_SPIN_FAST)) > >>> + batch += 1000; > >> > >> igt_require(!snb) or something, given the comment whose last two lines > >> can be seen in the diff above? > > > > It's not a machine killer since we have the required fix in the kernel, > > it just has interesting system latency. That latency is not specific to > > snb, and whether we want to trade system latency vs gpu latency is the > > reason we punt the decision to the caller. > > I am not sure if the comment and linked BZ is then still relevant? It is, it's just too specific, and it particularly affects modesetting as it doesn't defend itself against CPU starvation / timer latency (imo). > If it is, it is not nice to punt the responsibility to the caller. We > are exporting new API and it is hard to think callers will dig into the > implementation to find it (this comment). Why not? Is not the sordid history of our hw supposed to be our specialist topic? :) For what has once happened before is sure to happen again. (Corollary to George Santayana.) > So a info/warn/skip when using the feature on a platform where it > doesn't work well is I think much friendlier approach. But for those who want to use it, it is just noise. -Chris
On 05/07/2018 13:42, Chris Wilson wrote: > Quoting Tvrtko Ursulin (2018-07-05 13:33:55) >> >> On 05/07/2018 12:23, Chris Wilson wrote: >>> Quoting Tvrtko Ursulin (2018-07-02 16:36:28) >>>> >>>> On 02/07/2018 10:07, Chris Wilson wrote: >>>>> When using the pollable spinner, we often want to use it as a means of >>>>> ensuring the task is running on the GPU before switching to something >>>>> else. In which case we don't want to add extra delay inside the spinner, >>>>> but the current 1000 NOPs add on order of 5us, which is often larger >>>>> than the target latency. >>>>> >>>>> v2: Don't change perf_pmu as that is sensitive to the extra CPU latency >>>>> from a tight GPU spinner. >>>>> >>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> >>>>> Reviewed-by: Antonio Argenziano <antonio.argenziano@intel.com> #v1 >>>>> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> #v1 >>>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >>>>> --- >>>>> lib/igt_dummyload.c | 3 ++- >>>>> lib/igt_dummyload.h | 1 + >>>>> tests/gem_ctx_isolation.c | 1 + >>>>> tests/gem_eio.c | 1 + >>>>> tests/gem_exec_latency.c | 4 ++-- >>>>> 5 files changed, 7 insertions(+), 3 deletions(-) >>>>> >>>>> diff --git a/lib/igt_dummyload.c b/lib/igt_dummyload.c >>>>> index 94efdf745..7beb66244 100644 >>>>> --- a/lib/igt_dummyload.c >>>>> +++ b/lib/igt_dummyload.c >>>>> @@ -199,7 +199,8 @@ emit_recursive_batch(igt_spin_t *spin, >>>>> * between function calls, that appears enough to keep SNB out of >>>>> * trouble. See https://bugs.freedesktop.org/show_bug.cgi?id=102262 >>>>> */ >>>>> - batch += 1000; >>>>> + if (!(opts->flags & IGT_SPIN_FAST)) >>>>> + batch += 1000; >>>> >>>> igt_require(!snb) or something, given the comment whose last two lines >>>> can be seen in the diff above? >>> >>> It's not a machine killer since we have the required fix in the kernel, >>> it just has interesting system latency. That latency is not specific to >>> snb, and whether we want to trade system latency vs gpu latency is the >>> reason we punt the decision to the caller. >> >> I am not sure if the comment and linked BZ is then still relevant? > > It is, it's just too specific, and it particularly affects modesetting as > it doesn't defend itself against CPU starvation / timer latency (imo). > >> If it is, it is not nice to punt the responsibility to the caller. We >> are exporting new API and it is hard to think callers will dig into the >> implementation to find it (this comment). > > Why not? Is not the sordid history of our hw supposed to be our > specialist topic? :) For what has once happened before is sure to happen > again. (Corollary to George Santayana.) > >> So a info/warn/skip when using the feature on a platform where it >> doesn't work well is I think much friendlier approach. > > But for those who want to use it, it is just noise. For current users yes, for new users maybe not - maybe they need to have a way to notice they might be doing things unknowingly suboptimally. What would accomplish that goal? igt_info too much noise? igt_debug too low key? Blah.. Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Regards, Tvrtko
Quoting Tvrtko Ursulin (2018-07-05 16:29:32) > > On 05/07/2018 13:42, Chris Wilson wrote: > > Quoting Tvrtko Ursulin (2018-07-05 13:33:55) > >> > >> On 05/07/2018 12:23, Chris Wilson wrote: > >>> Quoting Tvrtko Ursulin (2018-07-02 16:36:28) > >>>> > >>>> On 02/07/2018 10:07, Chris Wilson wrote: > >>>>> When using the pollable spinner, we often want to use it as a means of > >>>>> ensuring the task is running on the GPU before switching to something > >>>>> else. In which case we don't want to add extra delay inside the spinner, > >>>>> but the current 1000 NOPs add on order of 5us, which is often larger > >>>>> than the target latency. > >>>>> > >>>>> v2: Don't change perf_pmu as that is sensitive to the extra CPU latency > >>>>> from a tight GPU spinner. > >>>>> > >>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > >>>>> Reviewed-by: Antonio Argenziano <antonio.argenziano@intel.com> #v1 > >>>>> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> #v1 > >>>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > >>>>> --- > >>>>> lib/igt_dummyload.c | 3 ++- > >>>>> lib/igt_dummyload.h | 1 + > >>>>> tests/gem_ctx_isolation.c | 1 + > >>>>> tests/gem_eio.c | 1 + > >>>>> tests/gem_exec_latency.c | 4 ++-- > >>>>> 5 files changed, 7 insertions(+), 3 deletions(-) > >>>>> > >>>>> diff --git a/lib/igt_dummyload.c b/lib/igt_dummyload.c > >>>>> index 94efdf745..7beb66244 100644 > >>>>> --- a/lib/igt_dummyload.c > >>>>> +++ b/lib/igt_dummyload.c > >>>>> @@ -199,7 +199,8 @@ emit_recursive_batch(igt_spin_t *spin, > >>>>> * between function calls, that appears enough to keep SNB out of > >>>>> * trouble. See https://bugs.freedesktop.org/show_bug.cgi?id=102262 > >>>>> */ > >>>>> - batch += 1000; > >>>>> + if (!(opts->flags & IGT_SPIN_FAST)) > >>>>> + batch += 1000; > >>>> > >>>> igt_require(!snb) or something, given the comment whose last two lines > >>>> can be seen in the diff above? > >>> > >>> It's not a machine killer since we have the required fix in the kernel, > >>> it just has interesting system latency. That latency is not specific to > >>> snb, and whether we want to trade system latency vs gpu latency is the > >>> reason we punt the decision to the caller. > >> > >> I am not sure if the comment and linked BZ is then still relevant? > > > > It is, it's just too specific, and it particularly affects modesetting as > > it doesn't defend itself against CPU starvation / timer latency (imo). > > > >> If it is, it is not nice to punt the responsibility to the caller. We > >> are exporting new API and it is hard to think callers will dig into the > >> implementation to find it (this comment). > > > > Why not? Is not the sordid history of our hw supposed to be our > > specialist topic? :) For what has once happened before is sure to happen > > again. (Corollary to George Santayana.) > > > >> So a info/warn/skip when using the feature on a platform where it > >> doesn't work well is I think much friendlier approach. > > > > But for those who want to use it, it is just noise. > > For current users yes, for new users maybe not - maybe they need to have > a way to notice they might be doing things unknowingly suboptimally. > What would accomplish that goal? igt_info too much noise? igt_debug too > low key? Blah.. Rather, as I see it unless it _impacts_ their test, ignorance is bliss. Having opted into using SPIN_FAST and their test failing on the farm on some machines, I don't expect it to be hard for a novice to debug. -Chris
diff --git a/lib/igt_dummyload.c b/lib/igt_dummyload.c index 94efdf745..7beb66244 100644 --- a/lib/igt_dummyload.c +++ b/lib/igt_dummyload.c @@ -199,7 +199,8 @@ emit_recursive_batch(igt_spin_t *spin, * between function calls, that appears enough to keep SNB out of * trouble. See https://bugs.freedesktop.org/show_bug.cgi?id=102262 */ - batch += 1000; + if (!(opts->flags & IGT_SPIN_FAST)) + batch += 1000; /* recurse */ r = &relocs[obj[BATCH].relocation_count++]; diff --git a/lib/igt_dummyload.h b/lib/igt_dummyload.h index c794f2544..e80a12451 100644 --- a/lib/igt_dummyload.h +++ b/lib/igt_dummyload.h @@ -52,6 +52,7 @@ struct igt_spin_factory { #define IGT_SPIN_FENCE_OUT (1 << 0) #define IGT_SPIN_POLL_RUN (1 << 1) +#define IGT_SPIN_FAST (1 << 2) igt_spin_t * __igt_spin_batch_factory(int fd, const struct igt_spin_factory *opts); diff --git a/tests/gem_ctx_isolation.c b/tests/gem_ctx_isolation.c index 2e19e8c03..4325e1c28 100644 --- a/tests/gem_ctx_isolation.c +++ b/tests/gem_ctx_isolation.c @@ -560,6 +560,7 @@ static void inject_reset_context(int fd, unsigned int engine) struct igt_spin_factory opts = { .ctx = gem_context_create(fd), .engine = engine, + .flags = IGT_SPIN_FAST, }; igt_spin_t *spin; diff --git a/tests/gem_eio.c b/tests/gem_eio.c index 0ec1aaec9..3162a3170 100644 --- a/tests/gem_eio.c +++ b/tests/gem_eio.c @@ -160,6 +160,7 @@ static igt_spin_t * __spin_poll(int fd, uint32_t ctx, unsigned long flags) struct igt_spin_factory opts = { .ctx = ctx, .engine = flags, + .flags = IGT_SPIN_FAST, }; if (gem_can_store_dword(fd, opts.engine)) diff --git a/tests/gem_exec_latency.c b/tests/gem_exec_latency.c index 75811f325..de16322a6 100644 --- a/tests/gem_exec_latency.c +++ b/tests/gem_exec_latency.c @@ -65,7 +65,7 @@ poll_ring(int fd, unsigned ring, const char *name) { const struct igt_spin_factory opts = { .engine = ring, - .flags = IGT_SPIN_POLL_RUN, + .flags = IGT_SPIN_POLL_RUN | IGT_SPIN_FAST, }; struct timespec tv = {}; unsigned long cycles; @@ -464,7 +464,7 @@ rthog_latency_on_ring(int fd, unsigned int engine, const char *name, unsigned in #define MMAP_SZ (64 << 10) const struct igt_spin_factory opts = { .engine = engine, - .flags = IGT_SPIN_POLL_RUN, + .flags = IGT_SPIN_POLL_RUN | IGT_SPIN_FAST, }; struct rt_pkt *results; unsigned int engines[16];