diff mbox

[v6,11/34] drm/i915: Added deferred work handler for scheduler

Message ID 1461172435-4256-12-git-send-email-John.C.Harrison@Intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

John Harrison April 20, 2016, 5:13 p.m. UTC
From: John Harrison <John.C.Harrison@Intel.com>

The scheduler needs to do interrupt triggered work that is too complex
to do in the interrupt handler. Thus it requires a deferred work
handler to process such tasks asynchronously.

v2: Updated to reduce mutex lock usage. The lock is now only held for
the minimum time within the remove function rather than for the whole
of the worker thread's operation.

v5: Removed objectionable white space and added some documentation.
[Joonas Lahtinen]

v6: Updated to newer nightly (lots of ring -> engine renaming).

Added an i915_scheduler_destroy() function instead of doing explicit
clean up of scheduler internals from i915_driver_unload().
[review feedback from Joonas Lahtinen]

For: VIZ-1587
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h       | 10 ++++++++++
 drivers/gpu/drm/i915/i915_gem.c       |  2 ++
 drivers/gpu/drm/i915/i915_scheduler.c | 28 ++++++++++++++++++++++++++--
 drivers/gpu/drm/i915/i915_scheduler.h |  1 +
 4 files changed, 39 insertions(+), 2 deletions(-)

Comments

Tvrtko Ursulin June 10, 2016, 4:29 p.m. UTC | #1
Hi,

More random comments.

On 20/04/16 18:13, John.C.Harrison@Intel.com wrote:
> From: John Harrison <John.C.Harrison@Intel.com>
>
> The scheduler needs to do interrupt triggered work that is too complex
> to do in the interrupt handler. Thus it requires a deferred work
> handler to process such tasks asynchronously.
>
> v2: Updated to reduce mutex lock usage. The lock is now only held for
> the minimum time within the remove function rather than for the whole
> of the worker thread's operation.
>
> v5: Removed objectionable white space and added some documentation.
> [Joonas Lahtinen]
>
> v6: Updated to newer nightly (lots of ring -> engine renaming).
>
> Added an i915_scheduler_destroy() function instead of doing explicit
> clean up of scheduler internals from i915_driver_unload().
> [review feedback from Joonas Lahtinen]
>
> For: VIZ-1587
> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.h       | 10 ++++++++++
>   drivers/gpu/drm/i915/i915_gem.c       |  2 ++
>   drivers/gpu/drm/i915/i915_scheduler.c | 28 ++++++++++++++++++++++++++--
>   drivers/gpu/drm/i915/i915_scheduler.h |  1 +
>   4 files changed, 39 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 7b62e2c..ed9d829 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1296,6 +1296,16 @@ struct i915_gem_mm {
>   	struct delayed_work retire_work;
>
>   	/**
> +	 * New scheme is to get an interrupt after every work packet
> +	 * in order to allow the low latency scheduling of pending
> +	 * packets. The idea behind adding new packets to a pending
> +	 * queue rather than directly into the hardware ring buffer
> +	 * is to allow high priority packets to over take low priority
> +	 * ones.
> +	 */
> +	struct work_struct scheduler_work;
> +
> +	/**
>   	 * When we detect an idle GPU, we want to turn on
>   	 * powersaving features. So once we see that there
>   	 * are no more requests outstanding and no more
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 14dc641..50c45f3 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -5546,6 +5546,8 @@ i915_gem_load_init(struct drm_device *dev)
>   			  i915_gem_retire_work_handler);
>   	INIT_DELAYED_WORK(&dev_priv->mm.idle_work,
>   			  i915_gem_idle_work_handler);
> +	INIT_WORK(&dev_priv->mm.scheduler_work,
> +				i915_scheduler_work_handler);
>   	init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
>
>   	dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> index 6dd9838..2dc5597 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -95,6 +95,8 @@ void i915_scheduler_destroy(struct drm_i915_private *dev_priv)
>   	if (!scheduler)
>   		return;
>
> +	cancel_work_sync(&dev_priv->mm.scheduler_work);
> +
>   	for (e = 0; e < I915_NUM_ENGINES; e++)
>   		WARN(!list_empty(&scheduler->node_queue[e]), "Destroying with list entries on engine %d!", e);
>
> @@ -738,7 +740,9 @@ static int i915_scheduler_remove_dependent(struct i915_scheduler *scheduler,
>    */
>   void i915_scheduler_wakeup(struct drm_device *dev)
>   {
> -	/* XXX: Need to call i915_scheduler_remove() via work handler. */
> +	struct drm_i915_private *dev_priv = to_i915(dev);
> +
> +	queue_work(dev_priv->wq, &dev_priv->mm.scheduler_work);

As I commented in person, sharing this wq with the rest of the driver 
could introduce scheduling latency since it is an ordered (one work item 
at a time) queue.

It would probably be good to create a dedicated wq for the scheduler, 
maybe even WQ_HIGHPRI one.

>   }
>
>   /**
> @@ -820,7 +824,7 @@ static bool i915_scheduler_remove(struct i915_scheduler *scheduler,
>   	return do_submit;
>   }
>
> -void i915_scheduler_process_work(struct intel_engine_cs *engine)
> +static void i915_scheduler_process_work(struct intel_engine_cs *engine)
>   {
>   	struct drm_i915_private *dev_priv = to_i915(engine->dev);
>   	struct i915_scheduler *scheduler = dev_priv->scheduler;
> @@ -867,6 +871,26 @@ void i915_scheduler_process_work(struct intel_engine_cs *engine)
>   }
>
>   /**
> + * i915_scheduler_work_handler - scheduler's work handler callback.
> + * @work: Work structure
> + * A lot of the scheduler's work must be done asynchronously in response to
> + * an interrupt or other event. However, that work cannot be done at
> + * interrupt time or in the context of the event signaller (which might in
> + * fact be an interrupt). Thus a worker thread is required. This function
> + * will cause the thread to wake up and do its processing.
> + */
> +void i915_scheduler_work_handler(struct work_struct *work)
> +{
> +	struct intel_engine_cs *engine;
> +	struct drm_i915_private *dev_priv;
> +
> +	dev_priv = container_of(work, struct drm_i915_private, mm.scheduler_work);
> +
> +	for_each_engine(engine, dev_priv)
> +		i915_scheduler_process_work(engine);

I wonder how easy or hard it would be to refactor things a bit so that 
the i915_scheduler_process_work does not have to grab and drop the 
global scheduler->lock multiple times. Maybe splitting 
i915_scheduler_process_work in some stages could help, like the remove 
bit and submit or something.

> +}
> +
> +/**
>    * i915_scheduler_closefile - notify the scheduler that a DRM file handle
>    * has been closed.
>    * @dev: DRM device
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
> index 40398bb..b8d4a343 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.h
> +++ b/drivers/gpu/drm/i915/i915_scheduler.h
> @@ -110,5 +110,6 @@ void i915_scheduler_clean_node(struct i915_scheduler_queue_entry *node);
>   int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe);
>   bool i915_scheduler_notify_request(struct drm_i915_gem_request *req);
>   void i915_scheduler_wakeup(struct drm_device *dev);
> +void i915_scheduler_work_handler(struct work_struct *work);
>
>   #endif  /* _I915_SCHEDULER_H_ */
>

Regards,

Tvrtko
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7b62e2c..ed9d829 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1296,6 +1296,16 @@  struct i915_gem_mm {
 	struct delayed_work retire_work;
 
 	/**
+	 * New scheme is to get an interrupt after every work packet
+	 * in order to allow the low latency scheduling of pending
+	 * packets. The idea behind adding new packets to a pending
+	 * queue rather than directly into the hardware ring buffer
+	 * is to allow high priority packets to over take low priority
+	 * ones.
+	 */
+	struct work_struct scheduler_work;
+
+	/**
 	 * When we detect an idle GPU, we want to turn on
 	 * powersaving features. So once we see that there
 	 * are no more requests outstanding and no more
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 14dc641..50c45f3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -5546,6 +5546,8 @@  i915_gem_load_init(struct drm_device *dev)
 			  i915_gem_retire_work_handler);
 	INIT_DELAYED_WORK(&dev_priv->mm.idle_work,
 			  i915_gem_idle_work_handler);
+	INIT_WORK(&dev_priv->mm.scheduler_work,
+				i915_scheduler_work_handler);
 	init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
 
 	dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 6dd9838..2dc5597 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -95,6 +95,8 @@  void i915_scheduler_destroy(struct drm_i915_private *dev_priv)
 	if (!scheduler)
 		return;
 
+	cancel_work_sync(&dev_priv->mm.scheduler_work);
+
 	for (e = 0; e < I915_NUM_ENGINES; e++)
 		WARN(!list_empty(&scheduler->node_queue[e]), "Destroying with list entries on engine %d!", e);
 
@@ -738,7 +740,9 @@  static int i915_scheduler_remove_dependent(struct i915_scheduler *scheduler,
  */
 void i915_scheduler_wakeup(struct drm_device *dev)
 {
-	/* XXX: Need to call i915_scheduler_remove() via work handler. */
+	struct drm_i915_private *dev_priv = to_i915(dev);
+
+	queue_work(dev_priv->wq, &dev_priv->mm.scheduler_work);
 }
 
 /**
@@ -820,7 +824,7 @@  static bool i915_scheduler_remove(struct i915_scheduler *scheduler,
 	return do_submit;
 }
 
-void i915_scheduler_process_work(struct intel_engine_cs *engine)
+static void i915_scheduler_process_work(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = to_i915(engine->dev);
 	struct i915_scheduler *scheduler = dev_priv->scheduler;
@@ -867,6 +871,26 @@  void i915_scheduler_process_work(struct intel_engine_cs *engine)
 }
 
 /**
+ * i915_scheduler_work_handler - scheduler's work handler callback.
+ * @work: Work structure
+ * A lot of the scheduler's work must be done asynchronously in response to
+ * an interrupt or other event. However, that work cannot be done at
+ * interrupt time or in the context of the event signaller (which might in
+ * fact be an interrupt). Thus a worker thread is required. This function
+ * will cause the thread to wake up and do its processing.
+ */
+void i915_scheduler_work_handler(struct work_struct *work)
+{
+	struct intel_engine_cs *engine;
+	struct drm_i915_private *dev_priv;
+
+	dev_priv = container_of(work, struct drm_i915_private, mm.scheduler_work);
+
+	for_each_engine(engine, dev_priv)
+		i915_scheduler_process_work(engine);
+}
+
+/**
  * i915_scheduler_closefile - notify the scheduler that a DRM file handle
  * has been closed.
  * @dev: DRM device
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 40398bb..b8d4a343 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -110,5 +110,6 @@  void i915_scheduler_clean_node(struct i915_scheduler_queue_entry *node);
 int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe);
 bool i915_scheduler_notify_request(struct drm_i915_gem_request *req);
 void i915_scheduler_wakeup(struct drm_device *dev);
+void i915_scheduler_work_handler(struct work_struct *work);
 
 #endif  /* _I915_SCHEDULER_H_ */