diff mbox series

[18/46] drm/i915: Replace global_seqno with a hangcheck heartbeat seqno

Message ID 20190206130356.18771-19-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [01/46] drm/i915: Hack and slash, throttle execbuffer hogs | expand

Commit Message

Chris Wilson Feb. 6, 2019, 1:03 p.m. UTC
To determine whether an engine has 'stuck', we simply check whether or
not is still on the same seqno for several seconds. To keep this simple
mechanism intact over the loss of a global seqno, we can simply add a
new global heartbeat seqno instead. As we cannot know the sequence in
which requests will then be completed, we use a primitive random number
generator instead (with a cycle long enough to not matter over an
interval of a few thousand requests between hangcheck samples).

The alternative to using a dedicated seqno on every request is to issue
a heartbeat request and query its progress through the system. Sadly
this requires us to reduce struct_mutex so that we can issue requests
without requiring that bkl.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c     |  7 ++---
 drivers/gpu/drm/i915/intel_engine_cs.c  |  5 ++--
 drivers/gpu/drm/i915/intel_hangcheck.c  |  6 ++---
 drivers/gpu/drm/i915/intel_lrc.c        | 15 +++++++++++
 drivers/gpu/drm/i915/intel_ringbuffer.c | 36 +++++++++++++++++++++++--
 drivers/gpu/drm/i915/intel_ringbuffer.h | 19 ++++++++++++-
 6 files changed, 77 insertions(+), 11 deletions(-)

Comments

Tvrtko Ursulin Feb. 11, 2019, 12:40 p.m. UTC | #1
On 06/02/2019 13:03, Chris Wilson wrote:
> To determine whether an engine has 'stuck', we simply check whether or
> not is still on the same seqno for several seconds. To keep this simple
> mechanism intact over the loss of a global seqno, we can simply add a
> new global heartbeat seqno instead. As we cannot know the sequence in
> which requests will then be completed, we use a primitive random number
> generator instead (with a cycle long enough to not matter over an
> interval of a few thousand requests between hangcheck samples).

We couldn't keep the global seqno just for hangcheck puposes? I mean as 
long as it is unique, which would be guaranteed by obtaining an 
increment on every submission to hw and storing it in atomic_t 
i915->hangcheck_global_seqno / rq->hangcheck_global_seqno, hangcheck 
does not care about the order of execution, no?

Regards,

Tvrtko


> The alternative to using a dedicated seqno on every request is to issue
> a heartbeat request and query its progress through the system. Sadly
> this requires us to reduce struct_mutex so that we can issue requests
> without requiring that bkl.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c     |  7 ++---
>   drivers/gpu/drm/i915/intel_engine_cs.c  |  5 ++--
>   drivers/gpu/drm/i915/intel_hangcheck.c  |  6 ++---
>   drivers/gpu/drm/i915/intel_lrc.c        | 15 +++++++++++
>   drivers/gpu/drm/i915/intel_ringbuffer.c | 36 +++++++++++++++++++++++--
>   drivers/gpu/drm/i915/intel_ringbuffer.h | 19 ++++++++++++-
>   6 files changed, 77 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index af53a2d07f6b..846bd0de3cfa 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1295,7 +1295,7 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
>   	with_intel_runtime_pm(dev_priv, wakeref) {
>   		for_each_engine(engine, dev_priv, id) {
>   			acthd[id] = intel_engine_get_active_head(engine);
> -			seqno[id] = intel_engine_get_seqno(engine);
> +			seqno[id] = intel_engine_get_hangcheck_seqno(engine);
>   		}
>   
>   		intel_engine_get_instdone(dev_priv->engine[RCS], &instdone);
> @@ -1315,8 +1315,9 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
>   	for_each_engine(engine, dev_priv, id) {
>   		seq_printf(m, "%s:\n", engine->name);
>   		seq_printf(m, "\tseqno = %x [current %x, last %x], %dms ago\n",
> -			   engine->hangcheck.seqno, seqno[id],
> -			   intel_engine_last_submit(engine),
> +			   engine->hangcheck.last_seqno,
> +			   seqno[id],
> +			   engine->hangcheck.next_seqno,
>   			   jiffies_to_msecs(jiffies -
>   					    engine->hangcheck.action_timestamp));
>   
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 57cfc4c551c9..e1e54b7448b4 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -1538,10 +1538,11 @@ void intel_engine_dump(struct intel_engine_cs *engine,
>   	if (i915_terminally_wedged(&engine->i915->gpu_error))
>   		drm_printf(m, "*** WEDGED ***\n");
>   
> -	drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms]\n",
> +	drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x/%x [%d ms]\n",
>   		   intel_engine_get_seqno(engine),
>   		   intel_engine_last_submit(engine),
> -		   engine->hangcheck.seqno,
> +		   engine->hangcheck.last_seqno,
> +		   engine->hangcheck.next_seqno,
>   		   jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp));
>   	drm_printf(m, "\tReset count: %d (global %d)\n",
>   		   i915_reset_engine_count(error, engine),
> diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
> index a219c796e56d..e04b2560369e 100644
> --- a/drivers/gpu/drm/i915/intel_hangcheck.c
> +++ b/drivers/gpu/drm/i915/intel_hangcheck.c
> @@ -133,21 +133,21 @@ static void hangcheck_load_sample(struct intel_engine_cs *engine,
>   				  struct hangcheck *hc)
>   {
>   	hc->acthd = intel_engine_get_active_head(engine);
> -	hc->seqno = intel_engine_get_seqno(engine);
> +	hc->seqno = intel_engine_get_hangcheck_seqno(engine);
>   }
>   
>   static void hangcheck_store_sample(struct intel_engine_cs *engine,
>   				   const struct hangcheck *hc)
>   {
>   	engine->hangcheck.acthd = hc->acthd;
> -	engine->hangcheck.seqno = hc->seqno;
> +	engine->hangcheck.last_seqno = hc->seqno;
>   }
>   
>   static enum intel_engine_hangcheck_action
>   hangcheck_get_action(struct intel_engine_cs *engine,
>   		     const struct hangcheck *hc)
>   {
> -	if (engine->hangcheck.seqno != hc->seqno)
> +	if (engine->hangcheck.last_seqno != hc->seqno)
>   		return ENGINE_ACTIVE_SEQNO;
>   
>   	if (intel_engine_is_idle(engine))
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index d105187070d4..342d3a91be03 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -180,6 +180,12 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
>   		I915_GEM_HWS_INDEX_ADDR);
>   }
>   
> +static inline u32 intel_hws_hangcheck_address(struct intel_engine_cs *engine)
> +{
> +	return (i915_ggtt_offset(engine->status_page.vma) +
> +		I915_GEM_HWS_HANGCHECK_ADDR);
> +}
> +
>   static inline struct i915_priolist *to_priolist(struct rb_node *rb)
>   {
>   	return rb_entry(rb, struct i915_priolist, node);
> @@ -2235,6 +2241,10 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
>   				  request->fence.seqno,
>   				  request->timeline->hwsp_offset);
>   
> +	cs = gen8_emit_ggtt_write(cs,
> +				  intel_engine_next_hangcheck_seqno(request->engine),
> +				  intel_hws_hangcheck_address(request->engine));
> +
>   	cs = gen8_emit_ggtt_write(cs,
>   				  request->global_seqno,
>   				  intel_hws_seqno_address(request->engine));
> @@ -2259,6 +2269,11 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
>   				      PIPE_CONTROL_FLUSH_ENABLE |
>   				      PIPE_CONTROL_CS_STALL);
>   
> +	cs = gen8_emit_ggtt_write_rcs(cs,
> +				      intel_engine_next_hangcheck_seqno(request->engine),
> +				      intel_hws_hangcheck_address(request->engine),
> +				      PIPE_CONTROL_CS_STALL);
> +
>   	cs = gen8_emit_ggtt_write_rcs(cs,
>   				      request->global_seqno,
>   				      intel_hws_seqno_address(request->engine),
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 7f841dba87b3..870184bbd169 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -43,6 +43,12 @@
>    */
>   #define LEGACY_REQUEST_SIZE 200
>   
> +static inline u32 hws_hangcheck_address(struct intel_engine_cs *engine)
> +{
> +	return (i915_ggtt_offset(engine->status_page.vma) +
> +		I915_GEM_HWS_HANGCHECK_ADDR);
> +}
> +
>   static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
>   {
>   	return (i915_ggtt_offset(engine->status_page.vma) +
> @@ -316,6 +322,11 @@ static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
>   	*cs++ = rq->timeline->hwsp_offset | PIPE_CONTROL_GLOBAL_GTT;
>   	*cs++ = rq->fence.seqno;
>   
> +	*cs++ = GFX_OP_PIPE_CONTROL(4);
> +	*cs++ = PIPE_CONTROL_QW_WRITE;
> +	*cs++ = hws_hangcheck_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT;
> +	*cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
> +
>   	*cs++ = GFX_OP_PIPE_CONTROL(4);
>   	*cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
>   	*cs++ = intel_hws_seqno_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT;
> @@ -422,6 +433,11 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
>   	*cs++ = rq->timeline->hwsp_offset;
>   	*cs++ = rq->fence.seqno;
>   
> +	*cs++ = GFX_OP_PIPE_CONTROL(4);
> +	*cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB;
> +	*cs++ = hws_hangcheck_address(rq->engine);
> +	*cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
> +
>   	*cs++ = GFX_OP_PIPE_CONTROL(4);
>   	*cs++ = (PIPE_CONTROL_QW_WRITE |
>   		 PIPE_CONTROL_GLOBAL_GTT_IVB |
> @@ -447,12 +463,15 @@ static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
>   	*cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
>   	*cs++ = rq->fence.seqno;
>   
> +	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
> +	*cs++ = I915_GEM_HWS_HANGCHECK_ADDR | MI_FLUSH_DW_USE_GTT;
> +	*cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
> +
>   	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
>   	*cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT;
>   	*cs++ = rq->global_seqno;
>   
>   	*cs++ = MI_USER_INTERRUPT;
> -	*cs++ = MI_NOOP;
>   
>   	rq->tail = intel_ring_offset(rq, cs);
>   	assert_ring_tail_valid(rq->ring, rq->tail);
> @@ -472,6 +491,10 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
>   	*cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
>   	*cs++ = rq->fence.seqno;
>   
> +	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
> +	*cs++ = I915_GEM_HWS_HANGCHECK_ADDR | MI_FLUSH_DW_USE_GTT;
> +	*cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
> +
>   	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
>   	*cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT;
>   	*cs++ = rq->global_seqno;
> @@ -487,6 +510,7 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
>   	*cs++ = 0;
>   
>   	*cs++ = MI_USER_INTERRUPT;
> +	*cs++ = MI_NOOP;
>   
>   	rq->tail = intel_ring_offset(rq, cs);
>   	assert_ring_tail_valid(rq->ring, rq->tail);
> @@ -930,11 +954,16 @@ static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
>   	*cs++ = I915_GEM_HWS_SEQNO_ADDR;
>   	*cs++ = rq->fence.seqno;
>   
> +	*cs++ = MI_STORE_DWORD_INDEX;
> +	*cs++ = I915_GEM_HWS_HANGCHECK_ADDR;
> +	*cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
> +
>   	*cs++ = MI_STORE_DWORD_INDEX;
>   	*cs++ = I915_GEM_HWS_INDEX_ADDR;
>   	*cs++ = rq->global_seqno;
>   
>   	*cs++ = MI_USER_INTERRUPT;
> +	*cs++ = MI_NOOP;
>   
>   	rq->tail = intel_ring_offset(rq, cs);
>   	assert_ring_tail_valid(rq->ring, rq->tail);
> @@ -956,6 +985,10 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
>   	*cs++ = I915_GEM_HWS_SEQNO_ADDR;
>   	*cs++ = rq->fence.seqno;
>   
> +	*cs++ = MI_STORE_DWORD_INDEX;
> +	*cs++ = I915_GEM_HWS_HANGCHECK_ADDR;
> +	*cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
> +
>   	BUILD_BUG_ON(GEN5_WA_STORES < 1);
>   	for (i = 0; i < GEN5_WA_STORES; i++) {
>   		*cs++ = MI_STORE_DWORD_INDEX;
> @@ -964,7 +997,6 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
>   	}
>   
>   	*cs++ = MI_USER_INTERRUPT;
> -	*cs++ = MI_NOOP;
>   
>   	rq->tail = intel_ring_offset(rq, cs);
>   	assert_ring_tail_valid(rq->ring, rq->tail);
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 493a72ed01af..b30c37ac55a3 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -6,6 +6,7 @@
>   
>   #include <linux/hashtable.h>
>   #include <linux/irq_work.h>
> +#include <linux/random.h>
>   #include <linux/seqlock.h>
>   
>   #include "i915_gem_batch_pool.h"
> @@ -119,7 +120,8 @@ struct intel_instdone {
>   
>   struct intel_engine_hangcheck {
>   	u64 acthd;
> -	u32 seqno;
> +	u32 last_seqno;
> +	u32 next_seqno;
>   	unsigned long action_timestamp;
>   	struct intel_instdone instdone;
>   };
> @@ -718,6 +720,8 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
>   #define I915_GEM_HWS_INDEX_ADDR		(I915_GEM_HWS_INDEX * sizeof(u32))
>   #define I915_GEM_HWS_PREEMPT		0x32
>   #define I915_GEM_HWS_PREEMPT_ADDR	(I915_GEM_HWS_PREEMPT * sizeof(u32))
> +#define I915_GEM_HWS_HANGCHECK		0x34
> +#define I915_GEM_HWS_HANGCHECK_ADDR	(I915_GEM_HWS_HANGCHECK * sizeof(u32))
>   #define I915_GEM_HWS_SEQNO		0x40
>   #define I915_GEM_HWS_SEQNO_ADDR		(I915_GEM_HWS_SEQNO * sizeof(u32))
>   #define I915_GEM_HWS_SCRATCH		0x80
> @@ -1078,4 +1082,17 @@ static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists)
>   
>   #endif
>   
> +static inline u32
> +intel_engine_next_hangcheck_seqno(struct intel_engine_cs *engine)
> +{
> +	return engine->hangcheck.next_seqno =
> +		next_pseudo_random32(engine->hangcheck.next_seqno);
> +}
> +
> +static inline u32
> +intel_engine_get_hangcheck_seqno(struct intel_engine_cs *engine)
> +{
> +	return intel_read_status_page(engine, I915_GEM_HWS_HANGCHECK);
> +}
> +
>   #endif /* _INTEL_RINGBUFFER_H_ */
>
Chris Wilson Feb. 11, 2019, 12:44 p.m. UTC | #2
Quoting Tvrtko Ursulin (2019-02-11 12:40:07)
> 
> On 06/02/2019 13:03, Chris Wilson wrote:
> > To determine whether an engine has 'stuck', we simply check whether or
> > not is still on the same seqno for several seconds. To keep this simple
> > mechanism intact over the loss of a global seqno, we can simply add a
> > new global heartbeat seqno instead. As we cannot know the sequence in
> > which requests will then be completed, we use a primitive random number
> > generator instead (with a cycle long enough to not matter over an
> > interval of a few thousand requests between hangcheck samples).
> 
> We couldn't keep the global seqno just for hangcheck puposes? I mean as 
> long as it is unique, which would be guaranteed by obtaining an 
> increment on every submission to hw and storing it in atomic_t 
> i915->hangcheck_global_seqno / rq->hangcheck_global_seqno, hangcheck 
> does not care about the order of execution, no?

s/global_seqno/hangcheck_seqno/ ?

(a) the goal is to kill off global_seqno entirely so we are all sure
there is no such seqno or ordering anymore
(b) this is a temporary patch and we kill off hangcheck_seqno, just as
soon as I can submit requests without struct_mutex
-Chris
Tvrtko Ursulin Feb. 11, 2019, 4:56 p.m. UTC | #3
On 11/02/2019 12:44, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-02-11 12:40:07)
>>
>> On 06/02/2019 13:03, Chris Wilson wrote:
>>> To determine whether an engine has 'stuck', we simply check whether or
>>> not is still on the same seqno for several seconds. To keep this simple
>>> mechanism intact over the loss of a global seqno, we can simply add a
>>> new global heartbeat seqno instead. As we cannot know the sequence in
>>> which requests will then be completed, we use a primitive random number
>>> generator instead (with a cycle long enough to not matter over an
>>> interval of a few thousand requests between hangcheck samples).
>>
>> We couldn't keep the global seqno just for hangcheck puposes? I mean as
>> long as it is unique, which would be guaranteed by obtaining an
>> increment on every submission to hw and storing it in atomic_t
>> i915->hangcheck_global_seqno / rq->hangcheck_global_seqno, hangcheck
>> does not care about the order of execution, no?
> 
> s/global_seqno/hangcheck_seqno/ ?

Yes sure, I was just trying to express the idea that a "globally" unique 
number is all that I thought we need. Like:

     rq->hangcheck_seqno = atomic_inc_return(&i915->hangcheck_seqno);

Did I get that right then? That we don't really need the pseudo random 
number solution? We could even avoid calling it a seqno if desired. 
rq->unique, wait.. we possibly had this name for something in the past..

> (a) the goal is to kill off global_seqno entirely so we are all sure
> there is no such seqno or ordering anymore
> (b) this is a temporary patch and we kill off hangcheck_seqno, just as
> soon as I can submit requests without struct_mutex

The heartbeat request solution? Is that better than the hangcheck seqno?

Regards,

Tvrtko
Chris Wilson Feb. 12, 2019, 1:36 p.m. UTC | #4
Quoting Tvrtko Ursulin (2019-02-11 16:56:03)
> 
> On 11/02/2019 12:44, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2019-02-11 12:40:07)
> >>
> >> On 06/02/2019 13:03, Chris Wilson wrote:
> >>> To determine whether an engine has 'stuck', we simply check whether or
> >>> not is still on the same seqno for several seconds. To keep this simple
> >>> mechanism intact over the loss of a global seqno, we can simply add a
> >>> new global heartbeat seqno instead. As we cannot know the sequence in
> >>> which requests will then be completed, we use a primitive random number
> >>> generator instead (with a cycle long enough to not matter over an
> >>> interval of a few thousand requests between hangcheck samples).
> >>
> >> We couldn't keep the global seqno just for hangcheck puposes? I mean as
> >> long as it is unique, which would be guaranteed by obtaining an
> >> increment on every submission to hw and storing it in atomic_t
> >> i915->hangcheck_global_seqno / rq->hangcheck_global_seqno, hangcheck
> >> does not care about the order of execution, no?
> > 
> > s/global_seqno/hangcheck_seqno/ ?
> 
> Yes sure, I was just trying to express the idea that a "globally" unique 
> number is all that I thought we need. Like:
> 
>      rq->hangcheck_seqno = atomic_inc_return(&i915->hangcheck_seqno);
> 
> Did I get that right then? That we don't really need the pseudo random 
> number solution? We could even avoid calling it a seqno if desired. 
> rq->unique, wait.. we possibly had this name for something in the past..

We don't need it to be random, I just picked the psuedo-random number so
we got used to not expecting it to be sequential and to be sure we
didn't make the mistake of assuming it was.
 
> > (a) the goal is to kill off global_seqno entirely so we are all sure
> > there is no such seqno or ordering anymore
> > (b) this is a temporary patch and we kill off hangcheck_seqno, just as
> > soon as I can submit requests without struct_mutex
> 
> The heartbeat request solution? Is that better than the hangcheck seqno?

Yes. We don't need an extra seqno every request and handles preemptible
OpenCL persistent kernels, as well as any other long running compute
batch (thinking some of the WebGL tests, they both expect hangcheck and
expect that isn't too quick afair).
-Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index af53a2d07f6b..846bd0de3cfa 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1295,7 +1295,7 @@  static int i915_hangcheck_info(struct seq_file *m, void *unused)
 	with_intel_runtime_pm(dev_priv, wakeref) {
 		for_each_engine(engine, dev_priv, id) {
 			acthd[id] = intel_engine_get_active_head(engine);
-			seqno[id] = intel_engine_get_seqno(engine);
+			seqno[id] = intel_engine_get_hangcheck_seqno(engine);
 		}
 
 		intel_engine_get_instdone(dev_priv->engine[RCS], &instdone);
@@ -1315,8 +1315,9 @@  static int i915_hangcheck_info(struct seq_file *m, void *unused)
 	for_each_engine(engine, dev_priv, id) {
 		seq_printf(m, "%s:\n", engine->name);
 		seq_printf(m, "\tseqno = %x [current %x, last %x], %dms ago\n",
-			   engine->hangcheck.seqno, seqno[id],
-			   intel_engine_last_submit(engine),
+			   engine->hangcheck.last_seqno,
+			   seqno[id],
+			   engine->hangcheck.next_seqno,
 			   jiffies_to_msecs(jiffies -
 					    engine->hangcheck.action_timestamp));
 
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 57cfc4c551c9..e1e54b7448b4 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1538,10 +1538,11 @@  void intel_engine_dump(struct intel_engine_cs *engine,
 	if (i915_terminally_wedged(&engine->i915->gpu_error))
 		drm_printf(m, "*** WEDGED ***\n");
 
-	drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms]\n",
+	drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x/%x [%d ms]\n",
 		   intel_engine_get_seqno(engine),
 		   intel_engine_last_submit(engine),
-		   engine->hangcheck.seqno,
+		   engine->hangcheck.last_seqno,
+		   engine->hangcheck.next_seqno,
 		   jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp));
 	drm_printf(m, "\tReset count: %d (global %d)\n",
 		   i915_reset_engine_count(error, engine),
diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
index a219c796e56d..e04b2560369e 100644
--- a/drivers/gpu/drm/i915/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/intel_hangcheck.c
@@ -133,21 +133,21 @@  static void hangcheck_load_sample(struct intel_engine_cs *engine,
 				  struct hangcheck *hc)
 {
 	hc->acthd = intel_engine_get_active_head(engine);
-	hc->seqno = intel_engine_get_seqno(engine);
+	hc->seqno = intel_engine_get_hangcheck_seqno(engine);
 }
 
 static void hangcheck_store_sample(struct intel_engine_cs *engine,
 				   const struct hangcheck *hc)
 {
 	engine->hangcheck.acthd = hc->acthd;
-	engine->hangcheck.seqno = hc->seqno;
+	engine->hangcheck.last_seqno = hc->seqno;
 }
 
 static enum intel_engine_hangcheck_action
 hangcheck_get_action(struct intel_engine_cs *engine,
 		     const struct hangcheck *hc)
 {
-	if (engine->hangcheck.seqno != hc->seqno)
+	if (engine->hangcheck.last_seqno != hc->seqno)
 		return ENGINE_ACTIVE_SEQNO;
 
 	if (intel_engine_is_idle(engine))
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index d105187070d4..342d3a91be03 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -180,6 +180,12 @@  static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
 		I915_GEM_HWS_INDEX_ADDR);
 }
 
+static inline u32 intel_hws_hangcheck_address(struct intel_engine_cs *engine)
+{
+	return (i915_ggtt_offset(engine->status_page.vma) +
+		I915_GEM_HWS_HANGCHECK_ADDR);
+}
+
 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 {
 	return rb_entry(rb, struct i915_priolist, node);
@@ -2235,6 +2241,10 @@  static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
 				  request->fence.seqno,
 				  request->timeline->hwsp_offset);
 
+	cs = gen8_emit_ggtt_write(cs,
+				  intel_engine_next_hangcheck_seqno(request->engine),
+				  intel_hws_hangcheck_address(request->engine));
+
 	cs = gen8_emit_ggtt_write(cs,
 				  request->global_seqno,
 				  intel_hws_seqno_address(request->engine));
@@ -2259,6 +2269,11 @@  static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 				      PIPE_CONTROL_FLUSH_ENABLE |
 				      PIPE_CONTROL_CS_STALL);
 
+	cs = gen8_emit_ggtt_write_rcs(cs,
+				      intel_engine_next_hangcheck_seqno(request->engine),
+				      intel_hws_hangcheck_address(request->engine),
+				      PIPE_CONTROL_CS_STALL);
+
 	cs = gen8_emit_ggtt_write_rcs(cs,
 				      request->global_seqno,
 				      intel_hws_seqno_address(request->engine),
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 7f841dba87b3..870184bbd169 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -43,6 +43,12 @@ 
  */
 #define LEGACY_REQUEST_SIZE 200
 
+static inline u32 hws_hangcheck_address(struct intel_engine_cs *engine)
+{
+	return (i915_ggtt_offset(engine->status_page.vma) +
+		I915_GEM_HWS_HANGCHECK_ADDR);
+}
+
 static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
 {
 	return (i915_ggtt_offset(engine->status_page.vma) +
@@ -316,6 +322,11 @@  static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	*cs++ = rq->timeline->hwsp_offset | PIPE_CONTROL_GLOBAL_GTT;
 	*cs++ = rq->fence.seqno;
 
+	*cs++ = GFX_OP_PIPE_CONTROL(4);
+	*cs++ = PIPE_CONTROL_QW_WRITE;
+	*cs++ = hws_hangcheck_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT;
+	*cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
+
 	*cs++ = GFX_OP_PIPE_CONTROL(4);
 	*cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
 	*cs++ = intel_hws_seqno_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT;
@@ -422,6 +433,11 @@  static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	*cs++ = rq->timeline->hwsp_offset;
 	*cs++ = rq->fence.seqno;
 
+	*cs++ = GFX_OP_PIPE_CONTROL(4);
+	*cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB;
+	*cs++ = hws_hangcheck_address(rq->engine);
+	*cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
+
 	*cs++ = GFX_OP_PIPE_CONTROL(4);
 	*cs++ = (PIPE_CONTROL_QW_WRITE |
 		 PIPE_CONTROL_GLOBAL_GTT_IVB |
@@ -447,12 +463,15 @@  static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	*cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
 	*cs++ = rq->fence.seqno;
 
+	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
+	*cs++ = I915_GEM_HWS_HANGCHECK_ADDR | MI_FLUSH_DW_USE_GTT;
+	*cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
+
 	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
 	*cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT;
 	*cs++ = rq->global_seqno;
 
 	*cs++ = MI_USER_INTERRUPT;
-	*cs++ = MI_NOOP;
 
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
@@ -472,6 +491,10 @@  static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	*cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
 	*cs++ = rq->fence.seqno;
 
+	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
+	*cs++ = I915_GEM_HWS_HANGCHECK_ADDR | MI_FLUSH_DW_USE_GTT;
+	*cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
+
 	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
 	*cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT;
 	*cs++ = rq->global_seqno;
@@ -487,6 +510,7 @@  static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	*cs++ = 0;
 
 	*cs++ = MI_USER_INTERRUPT;
+	*cs++ = MI_NOOP;
 
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
@@ -930,11 +954,16 @@  static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	*cs++ = I915_GEM_HWS_SEQNO_ADDR;
 	*cs++ = rq->fence.seqno;
 
+	*cs++ = MI_STORE_DWORD_INDEX;
+	*cs++ = I915_GEM_HWS_HANGCHECK_ADDR;
+	*cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
+
 	*cs++ = MI_STORE_DWORD_INDEX;
 	*cs++ = I915_GEM_HWS_INDEX_ADDR;
 	*cs++ = rq->global_seqno;
 
 	*cs++ = MI_USER_INTERRUPT;
+	*cs++ = MI_NOOP;
 
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
@@ -956,6 +985,10 @@  static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	*cs++ = I915_GEM_HWS_SEQNO_ADDR;
 	*cs++ = rq->fence.seqno;
 
+	*cs++ = MI_STORE_DWORD_INDEX;
+	*cs++ = I915_GEM_HWS_HANGCHECK_ADDR;
+	*cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
+
 	BUILD_BUG_ON(GEN5_WA_STORES < 1);
 	for (i = 0; i < GEN5_WA_STORES; i++) {
 		*cs++ = MI_STORE_DWORD_INDEX;
@@ -964,7 +997,6 @@  static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	}
 
 	*cs++ = MI_USER_INTERRUPT;
-	*cs++ = MI_NOOP;
 
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 493a72ed01af..b30c37ac55a3 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -6,6 +6,7 @@ 
 
 #include <linux/hashtable.h>
 #include <linux/irq_work.h>
+#include <linux/random.h>
 #include <linux/seqlock.h>
 
 #include "i915_gem_batch_pool.h"
@@ -119,7 +120,8 @@  struct intel_instdone {
 
 struct intel_engine_hangcheck {
 	u64 acthd;
-	u32 seqno;
+	u32 last_seqno;
+	u32 next_seqno;
 	unsigned long action_timestamp;
 	struct intel_instdone instdone;
 };
@@ -718,6 +720,8 @@  intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
 #define I915_GEM_HWS_INDEX_ADDR		(I915_GEM_HWS_INDEX * sizeof(u32))
 #define I915_GEM_HWS_PREEMPT		0x32
 #define I915_GEM_HWS_PREEMPT_ADDR	(I915_GEM_HWS_PREEMPT * sizeof(u32))
+#define I915_GEM_HWS_HANGCHECK		0x34
+#define I915_GEM_HWS_HANGCHECK_ADDR	(I915_GEM_HWS_HANGCHECK * sizeof(u32))
 #define I915_GEM_HWS_SEQNO		0x40
 #define I915_GEM_HWS_SEQNO_ADDR		(I915_GEM_HWS_SEQNO * sizeof(u32))
 #define I915_GEM_HWS_SCRATCH		0x80
@@ -1078,4 +1082,17 @@  static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists)
 
 #endif
 
+static inline u32
+intel_engine_next_hangcheck_seqno(struct intel_engine_cs *engine)
+{
+	return engine->hangcheck.next_seqno =
+		next_pseudo_random32(engine->hangcheck.next_seqno);
+}
+
+static inline u32
+intel_engine_get_hangcheck_seqno(struct intel_engine_cs *engine)
+{
+	return intel_read_status_page(engine, I915_GEM_HWS_HANGCHECK);
+}
+
 #endif /* _INTEL_RINGBUFFER_H_ */