diff mbox

[08/11] drm/i915/execlists: Force preemption via reset on timeout

Message ID 20180326115044.2505-9-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson March 26, 2018, 11:50 a.m. UTC
Install a timer when trying to preempt on behalf of an important
context such that if the active context does not honour the preemption
request within the desired timeout, then we reset the GPU to allow the
important context to run.

(Open: should not the timer be started from receiving the high priority
request...)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_lrc.c        | 53 +++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_ringbuffer.h |  8 +++++
 2 files changed, 61 insertions(+)

Comments

Tvrtko Ursulin March 27, 2018, 8:51 a.m. UTC | #1
On 26/03/2018 12:50, Chris Wilson wrote:
> Install a timer when trying to preempt on behalf of an important
> context such that if the active context does not honour the preemption
> request within the desired timeout, then we reset the GPU to allow the
> important context to run.

I suggest renaming patch title to "Implement optional preemption delay 
timeout", or "upper bound", or something, as long as it is not "force 
preemption". :)

> (Open: should not the timer be started from receiving the high priority
> request...)

If you think receiving as in execbuf I think not - that would be 
something else and not preempt timeout.

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/intel_lrc.c        | 53 +++++++++++++++++++++++++++++++++
>   drivers/gpu/drm/i915/intel_ringbuffer.h |  8 +++++
>   2 files changed, 61 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 50688fc889d9..6da816d23cb3 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -533,6 +533,47 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
>   
>   	execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
>   	execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
> +
> +	/* Set a timer to force preemption vs hostile userspace */
> +	if (execlists->queue_preempt_timeout) {
> +		GEM_TRACE("%s timeout=%uns\n",

preempt-timeout ?

> +			  engine->name, execlists->queue_preempt_timeout);
> +		hrtimer_start(&execlists->preempt_timer,
> +			      ktime_set(0, execlists->queue_preempt_timeout),
> +			      HRTIMER_MODE_REL);
> +	}
> +}
> +
> +static enum hrtimer_restart preempt_timeout(struct hrtimer *hrtimer)
> +{
> +	struct intel_engine_execlists *execlists =
> +		container_of(hrtimer, typeof(*execlists), preempt_timer);
> +
> +	GEM_TRACE("%s\n",
> +		  container_of(execlists,
> +			       struct intel_engine_cs,
> +			       execlists)->name);
> +
> +	queue_work(system_highpri_wq, &execlists->preempt_reset);

I suppose indirection from hrtimer to worker is for better than jiffie 
timeout granularity? But then queue_work might introduce some delay to 
defeat that.

I am wondering if simply schedule_delayed_work directly wouldn't be good 
enough. I suppose it is a question for the product group. But it is also 
implementation detail.

> +	return HRTIMER_NORESTART;
> +}
> +
> +static void preempt_reset(struct work_struct *work)
> +{
> +	struct intel_engine_cs *engine =
> +		container_of(work, typeof(*engine), execlists.preempt_reset);
> +
> +	GEM_TRACE("%s\n", engine->name);
> +
> +	tasklet_disable(&engine->execlists.tasklet);
> +
> +	engine->execlists.tasklet.func(engine->execlists.tasklet.data);

Comment on why calling the tasklet directly.

> +
> +	if (execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
> +		i915_handle_error(engine->i915, BIT(engine->id), 0,
> +				  "preemption timed out on %s", engine->name);

Can this race with the normal reset and we end up wit i915_handle_error 
twice simultaneously?

> +
> +	tasklet_enable(&engine->execlists.tasklet);
>   }
>   
>   static void complete_preempt_context(struct intel_engine_execlists *execlists)
> @@ -542,6 +583,10 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists)
>   	execlists_cancel_port_requests(execlists);
>   	execlists_unwind_incomplete_requests(execlists);
>   
> +	/* If the timer already fired, complete the reset */
> +	if (hrtimer_try_to_cancel(&execlists->preempt_timer) < 0)
> +		return;

What about timer which had already fired and queued the worker? 
hrtimer_try_to_cancel will return zero for that case I think.

> +
>   	execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
>   }
>   
> @@ -708,6 +753,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   			kmem_cache_free(engine->i915->priorities, p);
>   	}
>   done:
> +	execlists->queue_preempt_timeout = 0; /* preemption point passed */
>   	execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
>   	execlists->first = rb;
>   	if (submit)
> @@ -864,6 +910,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
>   
>   	/* Remaining _unready_ requests will be nop'ed when submitted */
>   
> +	execlists->queue_preempt_timeout = 0;
>   	execlists->queue_priority = INT_MIN;
>   	execlists->queue = RB_ROOT;
>   	execlists->first = NULL;
> @@ -1080,6 +1127,7 @@ static void queue_request(struct intel_engine_cs *engine,
>   static void __submit_queue(struct intel_engine_cs *engine, int prio)
>   {
>   		engine->execlists.queue_priority = prio;
> +		engine->execlists.queue_preempt_timeout = 0;
>   		tasklet_hi_schedule(&engine->execlists.tasklet);
>   }
>   
> @@ -2270,6 +2318,11 @@ logical_ring_setup(struct intel_engine_cs *engine)
>   	tasklet_init(&engine->execlists.tasklet,
>   		     execlists_submission_tasklet, (unsigned long)engine);
>   
> +	INIT_WORK(&engine->execlists.preempt_reset, preempt_reset);
> +	hrtimer_init(&engine->execlists.preempt_timer,
> +		     CLOCK_MONOTONIC, HRTIMER_MODE_REL);
> +	engine->execlists.preempt_timer.function = preempt_timeout;
> +
>   	logical_ring_default_vfuncs(engine);
>   	logical_ring_default_irqs(engine);
>   }
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 4c71dcdc722b..7166f47c8489 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -284,6 +284,11 @@ struct intel_engine_execlists {
>   	 */
>   	int queue_priority;
>   
> +	/**
> +	 * @queue_preempt_timeout: Timeout in ns before forcing preemption.
> +	 */
> +	unsigned int queue_preempt_timeout;
> +
>   	/**
>   	 * @queue: queue of requests, in priority lists
>   	 */
> @@ -313,6 +318,9 @@ struct intel_engine_execlists {
>   	 * @preempt_complete_status: expected CSB upon completing preemption
>   	 */
>   	u32 preempt_complete_status;
> +
> +	struct hrtimer preempt_timer;
> +	struct work_struct preempt_reset;
>   };
>   
>   #define INTEL_ENGINE_CS_MAX_NAME 8
> 

Regards,

Tvrtko
Chris Wilson March 27, 2018, 9:10 a.m. UTC | #2
Quoting Tvrtko Ursulin (2018-03-27 09:51:20)
> 
> On 26/03/2018 12:50, Chris Wilson wrote:
> > Install a timer when trying to preempt on behalf of an important
> > context such that if the active context does not honour the preemption
> > request within the desired timeout, then we reset the GPU to allow the
> > important context to run.
> 
> I suggest renaming patch title to "Implement optional preemption delay 
> timeout", or "upper bound", or something, as long as it is not "force 
> preemption". :)
> 
> > (Open: should not the timer be started from receiving the high priority
> > request...)
> 
> If you think receiving as in execbuf I think not - that would be 
> something else and not preempt timeout.

I'm thinking of submit_request -> [insert huge delay] -> ksoftirqd.

Actually I think it's better just to solve the ksoftirqd adding random
delays since it affects us all over the shop.

The tricky part is that I expect any request to improve (i.e. avoid)
ksoftirqd for our use case is likely to end up in "use threaded-irq"
with which we always incur a scheduler delay as we then do not have our
irq_exit hook. I currently hack kernel/softirq.c to always run at least
one loop for HI_SOFTIRQ before deferring to ksoftirqd.
-Chris
Chris Wilson March 27, 2018, 9:23 a.m. UTC | #3
Quoting Tvrtko Ursulin (2018-03-27 09:51:20)
> 
> On 26/03/2018 12:50, Chris Wilson wrote:
> > +static enum hrtimer_restart preempt_timeout(struct hrtimer *hrtimer)
> > +{
> > +     struct intel_engine_execlists *execlists =
> > +             container_of(hrtimer, typeof(*execlists), preempt_timer);
> > +
> > +     GEM_TRACE("%s\n",
> > +               container_of(execlists,
> > +                            struct intel_engine_cs,
> > +                            execlists)->name);
> > +
> > +     queue_work(system_highpri_wq, &execlists->preempt_reset);
> 
> I suppose indirection from hrtimer to worker is for better than jiffie 
> timeout granularity? But then queue_work might introduce some delay to 
> defeat that.

Yes. It's betting on highpri_wq being just that. We can do better with
our own RT kthread and a wakeup from the hrtimer if required.

Hmm, the original plan (watchdog TDR) was to avoid using the global
reset entirely. The per-engine reset (although needs serialising with
itself) doesn't need struct_mutex, so we should be able to do that from
the timer directly, and just kick off the global reset on failure.

That sounds a whole lot better, let's dust off that code and see what
breaks.

> I am wondering if simply schedule_delayed_work directly wouldn't be good 
> enough. I suppose it is a question for the product group. But it is also 
> implementation detail.
> 
> > +     return HRTIMER_NORESTART;
> > +}
> > +
> > +static void preempt_reset(struct work_struct *work)
> > +{
> > +     struct intel_engine_cs *engine =
> > +             container_of(work, typeof(*engine), execlists.preempt_reset);
> > +
> > +     GEM_TRACE("%s\n", engine->name);
> > +
> > +     tasklet_disable(&engine->execlists.tasklet);
> > +
> > +     engine->execlists.tasklet.func(engine->execlists.tasklet.data);
> 
> Comment on why calling the tasklet directly.

/* ksoftirqd hates me; I hate ksoftirqd. */
 
> > +
> > +     if (execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
> > +             i915_handle_error(engine->i915, BIT(engine->id), 0,
> > +                               "preemption timed out on %s", engine->name);
> 
> Can this race with the normal reset and we end up wit i915_handle_error 
> twice simultaneously?

i915_handle_error() serialises itself against multiple calls to the same
set of engines and against a global reset.

There's a window for us to try and reset the same request twice (in
quick succession) though. That's not good.

Also I think we need some more hand holding here to provoke the right
reset. Hmm. We may need to break i915_handle_error() down quite a bit.

(Ok decided to try and not use i915_handle_error() here, at least not on
the immediate path.)

Hmm, going back to that discussion, we will have to dig through the
details on when exactly hrtimer comes from softirq, or else we may
deadlock with waiting on the tasklet. Hmm indeed.

> > +     tasklet_enable(&engine->execlists.tasklet);
> >   }
> >   
> >   static void complete_preempt_context(struct intel_engine_execlists *execlists)
> > @@ -542,6 +583,10 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists)
> >       execlists_cancel_port_requests(execlists);
> >       execlists_unwind_incomplete_requests(execlists);
> >   
> > +     /* If the timer already fired, complete the reset */
> > +     if (hrtimer_try_to_cancel(&execlists->preempt_timer) < 0)
> > +             return;
> 
> What about timer which had already fired and queued the worker? 
> hrtimer_try_to_cancel will return zero for that case I think.

That's -1. 0 is the timer already fired and executed. 1 is pending.

If the timer fired, executed its callback and then we execute, we
clear the pending flag and proceed as normal (also when we get called
from inside the worker after the hrtimer). The worker sees the cleared
flag in that case and skips the reset.
-Chris
jeff.mcgee@intel.com March 27, 2018, 3:40 p.m. UTC | #4
On Tue, Mar 27, 2018 at 09:51:20AM +0100, Tvrtko Ursulin wrote:
> 
> On 26/03/2018 12:50, Chris Wilson wrote:
> >Install a timer when trying to preempt on behalf of an important
> >context such that if the active context does not honour the preemption
> >request within the desired timeout, then we reset the GPU to allow the
> >important context to run.
> 
> I suggest renaming patch title to "Implement optional preemption
> delay timeout", or "upper bound", or something, as long as it is not
> "force preemption". :)
> 
> >(Open: should not the timer be started from receiving the high priority
> >request...)
> 
> If you think receiving as in execbuf I think not - that would be
> something else and not preempt timeout.
> 
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >---
> >  drivers/gpu/drm/i915/intel_lrc.c        | 53 +++++++++++++++++++++++++++++++++
> >  drivers/gpu/drm/i915/intel_ringbuffer.h |  8 +++++
> >  2 files changed, 61 insertions(+)
> >
> >diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> >index 50688fc889d9..6da816d23cb3 100644
> >--- a/drivers/gpu/drm/i915/intel_lrc.c
> >+++ b/drivers/gpu/drm/i915/intel_lrc.c
> >@@ -533,6 +533,47 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
> >  	execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
> >  	execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
> >+
> >+	/* Set a timer to force preemption vs hostile userspace */
> >+	if (execlists->queue_preempt_timeout) {
> >+		GEM_TRACE("%s timeout=%uns\n",
> 
> preempt-timeout ?
> 
> >+			  engine->name, execlists->queue_preempt_timeout);
> >+		hrtimer_start(&execlists->preempt_timer,
> >+			      ktime_set(0, execlists->queue_preempt_timeout),
> >+			      HRTIMER_MODE_REL);
> >+	}
> >+}
> >+
> >+static enum hrtimer_restart preempt_timeout(struct hrtimer *hrtimer)
> >+{
> >+	struct intel_engine_execlists *execlists =
> >+		container_of(hrtimer, typeof(*execlists), preempt_timer);
> >+
> >+	GEM_TRACE("%s\n",
> >+		  container_of(execlists,
> >+			       struct intel_engine_cs,
> >+			       execlists)->name);
> >+
> >+	queue_work(system_highpri_wq, &execlists->preempt_reset);
> 
> I suppose indirection from hrtimer to worker is for better than
> jiffie timeout granularity? But then queue_work might introduce some
> delay to defeat that.
> 
> I am wondering if simply schedule_delayed_work directly wouldn't be
> good enough. I suppose it is a question for the product group. But
> it is also implementation detail.
> 
I started with schedule_delayed_work in my implementation hoping for
at least consistent msec accuracy, but it was all over the place.
We need msec granularity for the automotive use cases.
-Jeff

> >+	return HRTIMER_NORESTART;
> >+}
> >+
> >+static void preempt_reset(struct work_struct *work)
> >+{
> >+	struct intel_engine_cs *engine =
> >+		container_of(work, typeof(*engine), execlists.preempt_reset);
> >+
> >+	GEM_TRACE("%s\n", engine->name);
> >+
> >+	tasklet_disable(&engine->execlists.tasklet);
> >+
> >+	engine->execlists.tasklet.func(engine->execlists.tasklet.data);
> 
> Comment on why calling the tasklet directly.
> 
> >+
> >+	if (execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
> >+		i915_handle_error(engine->i915, BIT(engine->id), 0,
> >+				  "preemption timed out on %s", engine->name);
> 
> Can this race with the normal reset and we end up wit
> i915_handle_error twice simultaneously?
> 
> >+
> >+	tasklet_enable(&engine->execlists.tasklet);
> >  }
> >  static void complete_preempt_context(struct intel_engine_execlists *execlists)
> >@@ -542,6 +583,10 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists)
> >  	execlists_cancel_port_requests(execlists);
> >  	execlists_unwind_incomplete_requests(execlists);
> >+	/* If the timer already fired, complete the reset */
> >+	if (hrtimer_try_to_cancel(&execlists->preempt_timer) < 0)
> >+		return;
> 
> What about timer which had already fired and queued the worker?
> hrtimer_try_to_cancel will return zero for that case I think.
> 
> >+
> >  	execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
> >  }
> >@@ -708,6 +753,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> >  			kmem_cache_free(engine->i915->priorities, p);
> >  	}
> >  done:
> >+	execlists->queue_preempt_timeout = 0; /* preemption point passed */
> >  	execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
> >  	execlists->first = rb;
> >  	if (submit)
> >@@ -864,6 +910,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
> >  	/* Remaining _unready_ requests will be nop'ed when submitted */
> >+	execlists->queue_preempt_timeout = 0;
> >  	execlists->queue_priority = INT_MIN;
> >  	execlists->queue = RB_ROOT;
> >  	execlists->first = NULL;
> >@@ -1080,6 +1127,7 @@ static void queue_request(struct intel_engine_cs *engine,
> >  static void __submit_queue(struct intel_engine_cs *engine, int prio)
> >  {
> >  		engine->execlists.queue_priority = prio;
> >+		engine->execlists.queue_preempt_timeout = 0;
> >  		tasklet_hi_schedule(&engine->execlists.tasklet);
> >  }
> >@@ -2270,6 +2318,11 @@ logical_ring_setup(struct intel_engine_cs *engine)
> >  	tasklet_init(&engine->execlists.tasklet,
> >  		     execlists_submission_tasklet, (unsigned long)engine);
> >+	INIT_WORK(&engine->execlists.preempt_reset, preempt_reset);
> >+	hrtimer_init(&engine->execlists.preempt_timer,
> >+		     CLOCK_MONOTONIC, HRTIMER_MODE_REL);
> >+	engine->execlists.preempt_timer.function = preempt_timeout;
> >+
> >  	logical_ring_default_vfuncs(engine);
> >  	logical_ring_default_irqs(engine);
> >  }
> >diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> >index 4c71dcdc722b..7166f47c8489 100644
> >--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> >+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> >@@ -284,6 +284,11 @@ struct intel_engine_execlists {
> >  	 */
> >  	int queue_priority;
> >+	/**
> >+	 * @queue_preempt_timeout: Timeout in ns before forcing preemption.
> >+	 */
> >+	unsigned int queue_preempt_timeout;
> >+
> >  	/**
> >  	 * @queue: queue of requests, in priority lists
> >  	 */
> >@@ -313,6 +318,9 @@ struct intel_engine_execlists {
> >  	 * @preempt_complete_status: expected CSB upon completing preemption
> >  	 */
> >  	u32 preempt_complete_status;
> >+
> >+	struct hrtimer preempt_timer;
> >+	struct work_struct preempt_reset;
> >  };
> >  #define INTEL_ENGINE_CS_MAX_NAME 8
> >
> 
> Regards,
> 
> Tvrtko
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
jeff.mcgee@intel.com March 27, 2018, 3:50 p.m. UTC | #5
On Mon, Mar 26, 2018 at 12:50:41PM +0100, Chris Wilson wrote:
> Install a timer when trying to preempt on behalf of an important
> context such that if the active context does not honour the preemption
> request within the desired timeout, then we reset the GPU to allow the
> important context to run.
> 
> (Open: should not the timer be started from receiving the high priority
> request...)
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/intel_lrc.c        | 53 +++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/intel_ringbuffer.h |  8 +++++
>  2 files changed, 61 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 50688fc889d9..6da816d23cb3 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -533,6 +533,47 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
>  
>  	execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
>  	execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
> +
> +	/* Set a timer to force preemption vs hostile userspace */
> +	if (execlists->queue_preempt_timeout) {
> +		GEM_TRACE("%s timeout=%uns\n",
> +			  engine->name, execlists->queue_preempt_timeout);
> +		hrtimer_start(&execlists->preempt_timer,
> +			      ktime_set(0, execlists->queue_preempt_timeout),
> +			      HRTIMER_MODE_REL);
> +	}
> +}
> +
> +static enum hrtimer_restart preempt_timeout(struct hrtimer *hrtimer)
> +{
> +	struct intel_engine_execlists *execlists =
> +		container_of(hrtimer, typeof(*execlists), preempt_timer);
> +
> +	GEM_TRACE("%s\n",
> +		  container_of(execlists,
> +			       struct intel_engine_cs,
> +			       execlists)->name);
> +
> +	queue_work(system_highpri_wq, &execlists->preempt_reset);
> +	return HRTIMER_NORESTART;
> +}
> +
> +static void preempt_reset(struct work_struct *work)
> +{
> +	struct intel_engine_cs *engine =
> +		container_of(work, typeof(*engine), execlists.preempt_reset);
> +
> +	GEM_TRACE("%s\n", engine->name);
> +
> +	tasklet_disable(&engine->execlists.tasklet);
> +
> +	engine->execlists.tasklet.func(engine->execlists.tasklet.data);
This seems redundant with the execlists_reset_prepare already doing a process_csb
at a later time to decide whether to execute or abort the reset. In your
previous proposal you suggested tasklet disable and kill here.

> +
> +	if (execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
> +		i915_handle_error(engine->i915, BIT(engine->id), 0,
> +				  "preemption timed out on %s", engine->name);
> +
> +	tasklet_enable(&engine->execlists.tasklet);
>  }
>  
>  static void complete_preempt_context(struct intel_engine_execlists *execlists)
> @@ -542,6 +583,10 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists)
>  	execlists_cancel_port_requests(execlists);
>  	execlists_unwind_incomplete_requests(execlists);
>  
> +	/* If the timer already fired, complete the reset */
> +	if (hrtimer_try_to_cancel(&execlists->preempt_timer) < 0)
> +		return;
> +
>  	execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
>  }
>  
> @@ -708,6 +753,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>  			kmem_cache_free(engine->i915->priorities, p);
>  	}
>  done:
> +	execlists->queue_preempt_timeout = 0; /* preemption point passed */
>  	execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
>  	execlists->first = rb;
>  	if (submit)
> @@ -864,6 +910,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
>  
>  	/* Remaining _unready_ requests will be nop'ed when submitted */
>  
> +	execlists->queue_preempt_timeout = 0;
>  	execlists->queue_priority = INT_MIN;
>  	execlists->queue = RB_ROOT;
>  	execlists->first = NULL;
> @@ -1080,6 +1127,7 @@ static void queue_request(struct intel_engine_cs *engine,
>  static void __submit_queue(struct intel_engine_cs *engine, int prio)
>  {
>  		engine->execlists.queue_priority = prio;
> +		engine->execlists.queue_preempt_timeout = 0;
>  		tasklet_hi_schedule(&engine->execlists.tasklet);
>  }
>  
> @@ -2270,6 +2318,11 @@ logical_ring_setup(struct intel_engine_cs *engine)
>  	tasklet_init(&engine->execlists.tasklet,
>  		     execlists_submission_tasklet, (unsigned long)engine);
>  
> +	INIT_WORK(&engine->execlists.preempt_reset, preempt_reset);
> +	hrtimer_init(&engine->execlists.preempt_timer,
> +		     CLOCK_MONOTONIC, HRTIMER_MODE_REL);
> +	engine->execlists.preempt_timer.function = preempt_timeout;
> +
>  	logical_ring_default_vfuncs(engine);
>  	logical_ring_default_irqs(engine);
>  }
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 4c71dcdc722b..7166f47c8489 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -284,6 +284,11 @@ struct intel_engine_execlists {
>  	 */
>  	int queue_priority;
>  
> +	/**
> +	 * @queue_preempt_timeout: Timeout in ns before forcing preemption.
> +	 */
> +	unsigned int queue_preempt_timeout;
> +
>  	/**
>  	 * @queue: queue of requests, in priority lists
>  	 */
> @@ -313,6 +318,9 @@ struct intel_engine_execlists {
>  	 * @preempt_complete_status: expected CSB upon completing preemption
>  	 */
>  	u32 preempt_complete_status;
> +
> +	struct hrtimer preempt_timer;
> +	struct work_struct preempt_reset;
>  };
>  
>  #define INTEL_ENGINE_CS_MAX_NAME 8
> -- 
> 2.16.3
>
Chris Wilson March 28, 2018, 8:59 a.m. UTC | #6
Quoting Chris Wilson (2018-03-27 10:23:26)
> Quoting Tvrtko Ursulin (2018-03-27 09:51:20)
> > 
> > On 26/03/2018 12:50, Chris Wilson wrote:
> > > +static enum hrtimer_restart preempt_timeout(struct hrtimer *hrtimer)
> > > +{
> > > +     struct intel_engine_execlists *execlists =
> > > +             container_of(hrtimer, typeof(*execlists), preempt_timer);
> > > +
> > > +     GEM_TRACE("%s\n",
> > > +               container_of(execlists,
> > > +                            struct intel_engine_cs,
> > > +                            execlists)->name);
> > > +
> > > +     queue_work(system_highpri_wq, &execlists->preempt_reset);
> > 
> > I suppose indirection from hrtimer to worker is for better than jiffie 
> > timeout granularity? But then queue_work might introduce some delay to 
> > defeat that.
> 
> Yes. It's betting on highpri_wq being just that. We can do better with
> our own RT kthread and a wakeup from the hrtimer if required.
> 
> Hmm, the original plan (watchdog TDR) was to avoid using the global
> reset entirely. The per-engine reset (although needs serialising with
> itself) doesn't need struct_mutex, so we should be able to do that from
> the timer directly, and just kick off the global reset on failure.
> 
> That sounds a whole lot better, let's dust off that code and see what
> breaks.

So I think something along the lines of

+static int try_execlists_reset(struct intel_engine_execlists *execlists)
+{
+       struct intel_engine_cs *engine =
+               container_of(execlists, typeof(*engine), execlists);
+       int err = -EBUSY;
+
+       if (!test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted) &&
+           tasklet_trylock(&execlists->tasklet)) {
+               unsigned long *lock = &engine->i915->gpu_error.flags;
+               unsigned int bit = I915_RESET_ENGINE + engine->id;
+
+               if (!test_and_set_bit(bit, lock)) {
+                       tasklet_disable(&engine->execlists.tasklet);
+                       err = i915_reset_engine(engine,
+                                               "preemption timeout");
+                       tasklet_enable(&engine->execlists.tasklet);
+
+                       clear_bit(bit, lock);
+                       wake_up_bit(lock, bit);
+               }
+
+               tasklet_unlock(&execlists->tasklet);
+       }
+
+       return err;
+}

should do the trick, with a fallback to worker+i915_handle_error for the
cases where we can't claim ensure softirq safety.

There's still the issue of two resets in quick succession being treated
as a failure. That's also an issue for the current per-engine failover
to whole-device reset.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 50688fc889d9..6da816d23cb3 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -533,6 +533,47 @@  static void inject_preempt_context(struct intel_engine_cs *engine)
 
 	execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
 	execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
+
+	/* Set a timer to force preemption vs hostile userspace */
+	if (execlists->queue_preempt_timeout) {
+		GEM_TRACE("%s timeout=%uns\n",
+			  engine->name, execlists->queue_preempt_timeout);
+		hrtimer_start(&execlists->preempt_timer,
+			      ktime_set(0, execlists->queue_preempt_timeout),
+			      HRTIMER_MODE_REL);
+	}
+}
+
+static enum hrtimer_restart preempt_timeout(struct hrtimer *hrtimer)
+{
+	struct intel_engine_execlists *execlists =
+		container_of(hrtimer, typeof(*execlists), preempt_timer);
+
+	GEM_TRACE("%s\n",
+		  container_of(execlists,
+			       struct intel_engine_cs,
+			       execlists)->name);
+
+	queue_work(system_highpri_wq, &execlists->preempt_reset);
+	return HRTIMER_NORESTART;
+}
+
+static void preempt_reset(struct work_struct *work)
+{
+	struct intel_engine_cs *engine =
+		container_of(work, typeof(*engine), execlists.preempt_reset);
+
+	GEM_TRACE("%s\n", engine->name);
+
+	tasklet_disable(&engine->execlists.tasklet);
+
+	engine->execlists.tasklet.func(engine->execlists.tasklet.data);
+
+	if (execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
+		i915_handle_error(engine->i915, BIT(engine->id), 0,
+				  "preemption timed out on %s", engine->name);
+
+	tasklet_enable(&engine->execlists.tasklet);
 }
 
 static void complete_preempt_context(struct intel_engine_execlists *execlists)
@@ -542,6 +583,10 @@  static void complete_preempt_context(struct intel_engine_execlists *execlists)
 	execlists_cancel_port_requests(execlists);
 	execlists_unwind_incomplete_requests(execlists);
 
+	/* If the timer already fired, complete the reset */
+	if (hrtimer_try_to_cancel(&execlists->preempt_timer) < 0)
+		return;
+
 	execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
 }
 
@@ -708,6 +753,7 @@  static void execlists_dequeue(struct intel_engine_cs *engine)
 			kmem_cache_free(engine->i915->priorities, p);
 	}
 done:
+	execlists->queue_preempt_timeout = 0; /* preemption point passed */
 	execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
 	execlists->first = rb;
 	if (submit)
@@ -864,6 +910,7 @@  static void execlists_cancel_requests(struct intel_engine_cs *engine)
 
 	/* Remaining _unready_ requests will be nop'ed when submitted */
 
+	execlists->queue_preempt_timeout = 0;
 	execlists->queue_priority = INT_MIN;
 	execlists->queue = RB_ROOT;
 	execlists->first = NULL;
@@ -1080,6 +1127,7 @@  static void queue_request(struct intel_engine_cs *engine,
 static void __submit_queue(struct intel_engine_cs *engine, int prio)
 {
 		engine->execlists.queue_priority = prio;
+		engine->execlists.queue_preempt_timeout = 0;
 		tasklet_hi_schedule(&engine->execlists.tasklet);
 }
 
@@ -2270,6 +2318,11 @@  logical_ring_setup(struct intel_engine_cs *engine)
 	tasklet_init(&engine->execlists.tasklet,
 		     execlists_submission_tasklet, (unsigned long)engine);
 
+	INIT_WORK(&engine->execlists.preempt_reset, preempt_reset);
+	hrtimer_init(&engine->execlists.preempt_timer,
+		     CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	engine->execlists.preempt_timer.function = preempt_timeout;
+
 	logical_ring_default_vfuncs(engine);
 	logical_ring_default_irqs(engine);
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 4c71dcdc722b..7166f47c8489 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -284,6 +284,11 @@  struct intel_engine_execlists {
 	 */
 	int queue_priority;
 
+	/**
+	 * @queue_preempt_timeout: Timeout in ns before forcing preemption.
+	 */
+	unsigned int queue_preempt_timeout;
+
 	/**
 	 * @queue: queue of requests, in priority lists
 	 */
@@ -313,6 +318,9 @@  struct intel_engine_execlists {
 	 * @preempt_complete_status: expected CSB upon completing preemption
 	 */
 	u32 preempt_complete_status;
+
+	struct hrtimer preempt_timer;
+	struct work_struct preempt_reset;
 };
 
 #define INTEL_ENGINE_CS_MAX_NAME 8