Message ID | 20190701100502.15639-4-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [01/12] drm/i915/guc: Avoid reclaim locks during reset | expand |
Chris Wilson <chris@chris-wilson.co.uk> writes: > Daniele pointed out that the CSB status information will change with > Tigerlake and suggested that we could rearrange our state machine to > hide the differences in generation. gcc also prefers the explicit state > machine, so make it so: > > process_csb 1980 1967 -13 > > Suggested-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> > Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> > --- > drivers/gpu/drm/i915/gt/intel_lrc.c | 64 ++++++++++++++++++++--------- > 1 file changed, 44 insertions(+), 20 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c > index 471e134de186..953b3938a85f 100644 > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c > @@ -1279,6 +1279,30 @@ reset_in_progress(const struct intel_engine_execlists *execlists) > return unlikely(!__tasklet_is_enabled(&execlists->tasklet)); > } > > +enum csb_step { > + CSB_NOP, > + CSB_PROMOTE, > + CSB_PREEMPT, > + CSB_COMPLETE, > +}; > + > +static inline enum csb_step > +csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) > +{ > + unsigned int status = *csb; Could be const u32 aswell (stylistic). Just makes me ponder why you want to read csb in here and not in the callsite. Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> > + > + if (status & GEN8_CTX_STATUS_IDLE_ACTIVE) > + return CSB_PROMOTE; > + > + if (status & GEN8_CTX_STATUS_PREEMPTED) > + return CSB_PREEMPT; > + > + if (*execlists->active) > + return CSB_COMPLETE; > + > + return CSB_NOP; > +} > + > static void process_csb(struct intel_engine_cs *engine) > { > struct intel_engine_execlists * const execlists = &engine->execlists; > @@ -1316,8 +1340,6 @@ static void process_csb(struct intel_engine_cs *engine) > rmb(); > > do { > - unsigned int status; > - > if (++head == num_entries) > head = 0; > > @@ -1343,10 +1365,16 @@ static void process_csb(struct intel_engine_cs *engine) > engine->name, head, > buf[2 * head + 0], buf[2 * head + 1]); > > - status = buf[2 * head]; > - if (status & GEN8_CTX_STATUS_IDLE_ACTIVE) { > + switch (csb_parse(execlists, buf + 2 * head)) { > + case CSB_PREEMPT: /* cancel old inflight, prepare for switch */ > + trace_ports(execlists, "preempted", execlists->active); > + > + while (*execlists->active) > + execlists_schedule_out(*execlists->active++); > + > + /* fallthrough */ > + case CSB_PROMOTE: /* switch pending to inflight */ > GEM_BUG_ON(*execlists->active); > -promote: > GEM_BUG_ON(!assert_pending_valid(execlists, "promote")); > execlists->active = > memcpy(execlists->inflight, > @@ -1355,25 +1383,17 @@ static void process_csb(struct intel_engine_cs *engine) > sizeof(*execlists->pending)); > execlists->pending[0] = NULL; > > + trace_ports(execlists, "promoted", execlists->active); > + > if (enable_timeslice(engine)) > mod_timer(&execlists->timer, jiffies + 1); > > if (!inject_preempt_hang(execlists)) > ring_set_paused(engine, 0); > - } else if (status & GEN8_CTX_STATUS_PREEMPTED) { > - struct i915_request * const *port = execlists->active; > - > - trace_ports(execlists, "preempted", execlists->active); > - > - while (*port) > - execlists_schedule_out(*port++); > - > - goto promote; > - } else if (*execlists->active) { > - struct i915_request *rq = *execlists->active++; > + break; > > - trace_ports(execlists, "completed", > - execlists->active - 1); > + case CSB_COMPLETE: /* port0 completed, advanced to port1 */ > + trace_ports(execlists, "completed", execlists->active); > > /* > * We rely on the hardware being strongly > @@ -1381,11 +1401,15 @@ static void process_csb(struct intel_engine_cs *engine) > * coherent (visible from the CPU) before the > * user interrupt and CSB is processed. > */ > - GEM_BUG_ON(!i915_request_completed(rq)); > - execlists_schedule_out(rq); > + GEM_BUG_ON(!i915_request_completed(*execlists->active)); > + execlists_schedule_out(*execlists->active++); > > GEM_BUG_ON(execlists->active - execlists->inflight > > execlists_num_ports(execlists)); > + break; > + > + case CSB_NOP: > + break; > } > } while (head != tail); > > -- > 2.20.1
Quoting Mika Kuoppala (2019-07-01 12:49:48) > Chris Wilson <chris@chris-wilson.co.uk> writes: > > > Daniele pointed out that the CSB status information will change with > > Tigerlake and suggested that we could rearrange our state machine to > > hide the differences in generation. gcc also prefers the explicit state > > machine, so make it so: > > > > process_csb 1980 1967 -13 > > > > Suggested-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> > > Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> > > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> > > --- > > drivers/gpu/drm/i915/gt/intel_lrc.c | 64 ++++++++++++++++++++--------- > > 1 file changed, 44 insertions(+), 20 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c > > index 471e134de186..953b3938a85f 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c > > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c > > @@ -1279,6 +1279,30 @@ reset_in_progress(const struct intel_engine_execlists *execlists) > > return unlikely(!__tasklet_is_enabled(&execlists->tasklet)); > > } > > > > +enum csb_step { > > + CSB_NOP, > > + CSB_PROMOTE, > > + CSB_PREEMPT, > > + CSB_COMPLETE, > > +}; > > + > > +static inline enum csb_step > > +csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) > > +{ > > + unsigned int status = *csb; > > Could be const u32 aswell (stylistic). No need to specify here, local register is fine, so left it as natural. > Just makes me ponder why you want to read csb in here > and not in the callsite. Whatever gcc prefers when there is multiple csb_parsers. :) -Chris
On 7/1/19 3:04 AM, Chris Wilson wrote: > Daniele pointed out that the CSB status information will change with > Tigerlake and suggested that we could rearrange our state machine to > hide the differences in generation. gcc also prefers the explicit state > machine, so make it so: > > process_csb 1980 1967 -13 > > Suggested-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> To be fair the suggestion came from you... > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> > Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> > --- > drivers/gpu/drm/i915/gt/intel_lrc.c | 64 ++++++++++++++++++++--------- > 1 file changed, 44 insertions(+), 20 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c > index 471e134de186..953b3938a85f 100644 > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c > @@ -1279,6 +1279,30 @@ reset_in_progress(const struct intel_engine_execlists *execlists) > return unlikely(!__tasklet_is_enabled(&execlists->tasklet)); > } > > +enum csb_step { > + CSB_NOP, > + CSB_PROMOTE, > + CSB_PREEMPT, > + CSB_COMPLETE, > +}; > + > +static inline enum csb_step > +csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) > +{ > + unsigned int status = *csb; > + > + if (status & GEN8_CTX_STATUS_IDLE_ACTIVE) > + return CSB_PROMOTE; > + > + if (status & GEN8_CTX_STATUS_PREEMPTED) > + return CSB_PREEMPT; > + > + if (*execlists->active) > + return CSB_COMPLETE; I think the CSB_COMPLETE case is going to be the same across the various csb parsers since we don't even look at the complete bit in the CSB, but I'm undecided if it'd indeed be cleaner to have it outside or not, e.g.: switch (csb_parse(...)) { case CSB_PREEMPT: [...] case CSB_PROMOTE: [...] default: if (!*execlists->active) break; [...] we can reconsider when the TGL parser is added. Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> > + > + return CSB_NOP; > +} > + > static void process_csb(struct intel_engine_cs *engine) > { > struct intel_engine_execlists * const execlists = &engine->execlists; > @@ -1316,8 +1340,6 @@ static void process_csb(struct intel_engine_cs *engine) > rmb(); > > do { > - unsigned int status; > - > if (++head == num_entries) > head = 0; > > @@ -1343,10 +1365,16 @@ static void process_csb(struct intel_engine_cs *engine) > engine->name, head, > buf[2 * head + 0], buf[2 * head + 1]); > > - status = buf[2 * head]; > - if (status & GEN8_CTX_STATUS_IDLE_ACTIVE) { > + switch (csb_parse(execlists, buf + 2 * head)) { > + case CSB_PREEMPT: /* cancel old inflight, prepare for switch */ > + trace_ports(execlists, "preempted", execlists->active); > + > + while (*execlists->active) > + execlists_schedule_out(*execlists->active++); > + > + /* fallthrough */ > + case CSB_PROMOTE: /* switch pending to inflight */ > GEM_BUG_ON(*execlists->active); > -promote: > GEM_BUG_ON(!assert_pending_valid(execlists, "promote")); > execlists->active = > memcpy(execlists->inflight, > @@ -1355,25 +1383,17 @@ static void process_csb(struct intel_engine_cs *engine) > sizeof(*execlists->pending)); > execlists->pending[0] = NULL; > > + trace_ports(execlists, "promoted", execlists->active); > + > if (enable_timeslice(engine)) > mod_timer(&execlists->timer, jiffies + 1); > > if (!inject_preempt_hang(execlists)) > ring_set_paused(engine, 0); > - } else if (status & GEN8_CTX_STATUS_PREEMPTED) { > - struct i915_request * const *port = execlists->active; > - > - trace_ports(execlists, "preempted", execlists->active); > - > - while (*port) > - execlists_schedule_out(*port++); > - > - goto promote; > - } else if (*execlists->active) { > - struct i915_request *rq = *execlists->active++; > + break; > > - trace_ports(execlists, "completed", > - execlists->active - 1); > + case CSB_COMPLETE: /* port0 completed, advanced to port1 */ > + trace_ports(execlists, "completed", execlists->active); > > /* > * We rely on the hardware being strongly > @@ -1381,11 +1401,15 @@ static void process_csb(struct intel_engine_cs *engine) > * coherent (visible from the CPU) before the > * user interrupt and CSB is processed. > */ > - GEM_BUG_ON(!i915_request_completed(rq)); > - execlists_schedule_out(rq); > + GEM_BUG_ON(!i915_request_completed(*execlists->active)); > + execlists_schedule_out(*execlists->active++); > > GEM_BUG_ON(execlists->active - execlists->inflight > > execlists_num_ports(execlists)); > + break; > + > + case CSB_NOP: > + break; > } > } while (head != tail); > >
Chris Wilson <chris@chris-wilson.co.uk> writes: > Quoting Mika Kuoppala (2019-07-01 12:49:48) >> Chris Wilson <chris@chris-wilson.co.uk> writes: >> >> > Daniele pointed out that the CSB status information will change with >> > Tigerlake and suggested that we could rearrange our state machine to >> > hide the differences in generation. gcc also prefers the explicit state >> > machine, so make it so: >> > >> > process_csb 1980 1967 -13 >> > >> > Suggested-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> >> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> >> > Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> >> > Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> >> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> >> > --- >> > drivers/gpu/drm/i915/gt/intel_lrc.c | 64 ++++++++++++++++++++--------- >> > 1 file changed, 44 insertions(+), 20 deletions(-) >> > >> > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c >> > index 471e134de186..953b3938a85f 100644 >> > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c >> > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c >> > @@ -1279,6 +1279,30 @@ reset_in_progress(const struct intel_engine_execlists *execlists) >> > return unlikely(!__tasklet_is_enabled(&execlists->tasklet)); >> > } >> > >> > +enum csb_step { >> > + CSB_NOP, >> > + CSB_PROMOTE, >> > + CSB_PREEMPT, >> > + CSB_COMPLETE, >> > +}; >> > + >> > +static inline enum csb_step >> > +csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) >> > +{ >> > + unsigned int status = *csb; >> >> Could be const u32 aswell (stylistic). > > No need to specify here, local register is fine, so left it as natural. > In this case the function is small and obvious so that is why stylistic. But for more complex one, it takes away reviewers burden as you can read something as const and then the complexity tree you need to manage between your ears shrinks when you read further down. I can also remember atleast few cases where it has prevented an unwanted accidental write into propagating past compiler. >> Just makes me ponder why you want to read csb in here >> and not in the callsite. > > Whatever gcc prefers when there is multiple csb_parsers. :) It changes to a better produced code? Surely reason enough. -Mika
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 471e134de186..953b3938a85f 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1279,6 +1279,30 @@ reset_in_progress(const struct intel_engine_execlists *execlists) return unlikely(!__tasklet_is_enabled(&execlists->tasklet)); } +enum csb_step { + CSB_NOP, + CSB_PROMOTE, + CSB_PREEMPT, + CSB_COMPLETE, +}; + +static inline enum csb_step +csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) +{ + unsigned int status = *csb; + + if (status & GEN8_CTX_STATUS_IDLE_ACTIVE) + return CSB_PROMOTE; + + if (status & GEN8_CTX_STATUS_PREEMPTED) + return CSB_PREEMPT; + + if (*execlists->active) + return CSB_COMPLETE; + + return CSB_NOP; +} + static void process_csb(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; @@ -1316,8 +1340,6 @@ static void process_csb(struct intel_engine_cs *engine) rmb(); do { - unsigned int status; - if (++head == num_entries) head = 0; @@ -1343,10 +1365,16 @@ static void process_csb(struct intel_engine_cs *engine) engine->name, head, buf[2 * head + 0], buf[2 * head + 1]); - status = buf[2 * head]; - if (status & GEN8_CTX_STATUS_IDLE_ACTIVE) { + switch (csb_parse(execlists, buf + 2 * head)) { + case CSB_PREEMPT: /* cancel old inflight, prepare for switch */ + trace_ports(execlists, "preempted", execlists->active); + + while (*execlists->active) + execlists_schedule_out(*execlists->active++); + + /* fallthrough */ + case CSB_PROMOTE: /* switch pending to inflight */ GEM_BUG_ON(*execlists->active); -promote: GEM_BUG_ON(!assert_pending_valid(execlists, "promote")); execlists->active = memcpy(execlists->inflight, @@ -1355,25 +1383,17 @@ static void process_csb(struct intel_engine_cs *engine) sizeof(*execlists->pending)); execlists->pending[0] = NULL; + trace_ports(execlists, "promoted", execlists->active); + if (enable_timeslice(engine)) mod_timer(&execlists->timer, jiffies + 1); if (!inject_preempt_hang(execlists)) ring_set_paused(engine, 0); - } else if (status & GEN8_CTX_STATUS_PREEMPTED) { - struct i915_request * const *port = execlists->active; - - trace_ports(execlists, "preempted", execlists->active); - - while (*port) - execlists_schedule_out(*port++); - - goto promote; - } else if (*execlists->active) { - struct i915_request *rq = *execlists->active++; + break; - trace_ports(execlists, "completed", - execlists->active - 1); + case CSB_COMPLETE: /* port0 completed, advanced to port1 */ + trace_ports(execlists, "completed", execlists->active); /* * We rely on the hardware being strongly @@ -1381,11 +1401,15 @@ static void process_csb(struct intel_engine_cs *engine) * coherent (visible from the CPU) before the * user interrupt and CSB is processed. */ - GEM_BUG_ON(!i915_request_completed(rq)); - execlists_schedule_out(rq); + GEM_BUG_ON(!i915_request_completed(*execlists->active)); + execlists_schedule_out(*execlists->active++); GEM_BUG_ON(execlists->active - execlists->inflight > execlists_num_ports(execlists)); + break; + + case CSB_NOP: + break; } } while (head != tail);
Daniele pointed out that the CSB status information will change with Tigerlake and suggested that we could rearrange our state machine to hide the differences in generation. gcc also prefers the explicit state machine, so make it so: process_csb 1980 1967 -13 Suggested-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> --- drivers/gpu/drm/i915/gt/intel_lrc.c | 64 ++++++++++++++++++++--------- 1 file changed, 44 insertions(+), 20 deletions(-)