diff mbox

[v5,10/35] drm/i915: Added scheduler hook when closing DRM file handles

Message ID 1455805644-6450-11-git-send-email-John.C.Harrison@Intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

John Harrison Feb. 18, 2016, 2:26 p.m. UTC
From: John Harrison <John.C.Harrison@Intel.com>

The scheduler decouples the submission of batch buffers to the driver
with submission of batch buffers to the hardware. Thus it is possible
for an application to close its DRM file handle while there is still
work outstanding. That means the scheduler needs to know about file
close events so it can remove the file pointer from such orphaned
batch buffers and not attempt to dereference it later.

v3: Updated to not wait for outstanding work to complete but merely
remove the file handle reference. The wait was getting excessively
complicated with inter-ring dependencies, pre-emption, and other such
issues.

v4: Changed some white space to keep the style checker happy.

v5: Added function documentation and removed apparently objectionable
white space. [Joonas Lahtinen]

Used lighter weight spinlocks.

For: VIZ-1587
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_dma.c       |  3 +++
 drivers/gpu/drm/i915/i915_scheduler.c | 48 +++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_scheduler.h |  2 ++
 3 files changed, 53 insertions(+)

Comments

Joonas Lahtinen March 1, 2016, 8:59 a.m. UTC | #1
On to, 2016-02-18 at 14:26 +0000, John.C.Harrison@Intel.com wrote:
> From: John Harrison <John.C.Harrison@Intel.com>
> 
> The scheduler decouples the submission of batch buffers to the driver
> with submission of batch buffers to the hardware. Thus it is possible
> for an application to close its DRM file handle while there is still
> work outstanding. That means the scheduler needs to know about file
> close events so it can remove the file pointer from such orphaned
> batch buffers and not attempt to dereference it later.
> 
> v3: Updated to not wait for outstanding work to complete but merely
> remove the file handle reference. The wait was getting excessively
> complicated with inter-ring dependencies, pre-emption, and other such
> issues.
> 
> v4: Changed some white space to keep the style checker happy.
> 
> v5: Added function documentation and removed apparently objectionable
> white space. [Joonas Lahtinen]
> 
> Used lighter weight spinlocks.
> 
> For: VIZ-1587
> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_dma.c       |  3 +++
>  drivers/gpu/drm/i915/i915_scheduler.c | 48 +++++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_scheduler.h |  2 ++
>  3 files changed, 53 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index a0f5659..678adc7 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -46,6 +46,7 @@
>  #include 
>  #include 
>  #include 
> +#include "i915_scheduler.h"
>  #include 
>  #include 
>  #include 
> @@ -1258,6 +1259,8 @@ void i915_driver_lastclose(struct drm_device *dev)
>  
>  void i915_driver_preclose(struct drm_device *dev, struct drm_file *file)
>  {
> +	i915_scheduler_closefile(dev, file);

Any reason not to put this inside the struct_mutex lock?

> +
>  	mutex_lock(&dev->struct_mutex);
>  	i915_gem_context_close(dev, file);
>  	i915_gem_release(dev, file);
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> index fc23ee7..ab5007a 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -872,3 +872,51 @@ void i915_scheduler_process_work(struct intel_engine_cs *ring)
>  	if (do_submit)
>  		intel_runtime_pm_put(dev_priv);
>  }
> +
> +/**
> + * i915_scheduler_closefile - notify the scheduler that a DRM file handle
> + * has been closed.
> + * @dev: DRM device
> + * @file: file being closed
> + *
> + * Goes through the scheduler's queues and removes all connections to the
> + * disappearing file handle that still exist. There is an argument to say
> + * that this should also flush such outstanding work through the hardware.
> + * However, with pre-emption, TDR and other such complications doing so
> + * becomes a locking nightmare. So instead, just warn with a debug message
> + * if the application is leaking uncompleted work and make sure a null
> + * pointer dereference will not follow.
> + */
> +int i915_scheduler_closefile(struct drm_device *dev, struct drm_file *file)

Return value not used, should be void if this can not fail.

Other than that,

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

> +{
> +	struct i915_scheduler_queue_entry *node;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct i915_scheduler *scheduler = dev_priv->scheduler;
> +	struct intel_engine_cs *ring;
> +	int i;
> +
> +	if (!scheduler)
> +		return 0;
> +
> +	spin_lock_irq(&scheduler->lock);
> +
> +	for_each_ring(ring, dev_priv, i) {
> +		list_for_each_entry(node, &scheduler->node_queue[ring->id], link) {
> +			if (node->params.file != file)
> +				continue;
> +
> +			if (!I915_SQS_IS_COMPLETE(node))
> +				DRM_DEBUG_DRIVER("Closing file handle with outstanding work: %d:%d/%d on %s\n",
> +						 node->params.request->uniq,
> +						 node->params.request->seqno,
> +						 node->status,
> +						 ring->name);
> +
> +			node->params.file = NULL;
> +		}
> +	}
> +
> +	spin_unlock_irq(&scheduler->lock);
> +
> +	return 0;
> +}
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
> index 415fec8..0e8b6a9 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.h
> +++ b/drivers/gpu/drm/i915/i915_scheduler.h
> @@ -87,6 +87,8 @@ enum {
>  
>  bool i915_scheduler_is_enabled(struct drm_device *dev);
>  int i915_scheduler_init(struct drm_device *dev);
> +int i915_scheduler_closefile(struct drm_device *dev,
> +			     struct drm_file *file);
>  void i915_scheduler_clean_node(struct i915_scheduler_queue_entry *node);
>  int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe);
>  bool i915_scheduler_notify_request(struct drm_i915_gem_request *req);
John Harrison March 1, 2016, 2:52 p.m. UTC | #2
On 01/03/2016 08:59, Joonas Lahtinen wrote:
> On to, 2016-02-18 at 14:26 +0000, John.C.Harrison@Intel.com wrote:
>> From: John Harrison <John.C.Harrison@Intel.com>
>>
>> The scheduler decouples the submission of batch buffers to the driver
>> with submission of batch buffers to the hardware. Thus it is possible
>> for an application to close its DRM file handle while there is still
>> work outstanding. That means the scheduler needs to know about file
>> close events so it can remove the file pointer from such orphaned
>> batch buffers and not attempt to dereference it later.
>>
>> v3: Updated to not wait for outstanding work to complete but merely
>> remove the file handle reference. The wait was getting excessively
>> complicated with inter-ring dependencies, pre-emption, and other such
>> issues.
>>
>> v4: Changed some white space to keep the style checker happy.
>>
>> v5: Added function documentation and removed apparently objectionable
>> white space. [Joonas Lahtinen]
>>
>> Used lighter weight spinlocks.
>>
>> For: VIZ-1587
>> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_dma.c       |  3 +++
>>   drivers/gpu/drm/i915/i915_scheduler.c | 48 +++++++++++++++++++++++++++++++++++
>>   drivers/gpu/drm/i915/i915_scheduler.h |  2 ++
>>   3 files changed, 53 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
>> index a0f5659..678adc7 100644
>> --- a/drivers/gpu/drm/i915/i915_dma.c
>> +++ b/drivers/gpu/drm/i915/i915_dma.c
>> @@ -46,6 +46,7 @@
>>   #include
>>   #include
>>   #include
>> +#include "i915_scheduler.h"
>>   #include
>>   #include
>>   #include
>> @@ -1258,6 +1259,8 @@ void i915_driver_lastclose(struct drm_device *dev)
>>   
>>   void i915_driver_preclose(struct drm_device *dev, struct drm_file *file)
>>   {
>> +	i915_scheduler_closefile(dev, file);
> Any reason not to put this inside the struct_mutex lock?
Any reason to put it inside? The smaller the processing done while 
holding any given lock the better. The implementation only works on 
scheduler internal data structures so while it does require the 
scheduler's lock, it does not require the driver lock.

>
>> +
>>   	mutex_lock(&dev->struct_mutex);
>>   	i915_gem_context_close(dev, file);
>>   	i915_gem_release(dev, file);
>> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
>> index fc23ee7..ab5007a 100644
>> --- a/drivers/gpu/drm/i915/i915_scheduler.c
>> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
>> @@ -872,3 +872,51 @@ void i915_scheduler_process_work(struct intel_engine_cs *ring)
>>   	if (do_submit)
>>   		intel_runtime_pm_put(dev_priv);
>>   }
>> +
>> +/**
>> + * i915_scheduler_closefile - notify the scheduler that a DRM file handle
>> + * has been closed.
>> + * @dev: DRM device
>> + * @file: file being closed
>> + *
>> + * Goes through the scheduler's queues and removes all connections to the
>> + * disappearing file handle that still exist. There is an argument to say
>> + * that this should also flush such outstanding work through the hardware.
>> + * However, with pre-emption, TDR and other such complications doing so
>> + * becomes a locking nightmare. So instead, just warn with a debug message
>> + * if the application is leaking uncompleted work and make sure a null
>> + * pointer dereference will not follow.
>> + */
>> +int i915_scheduler_closefile(struct drm_device *dev, struct drm_file *file)
> Return value not used, should be void if this can not fail.
Left over from a previous implementation that was much more complicated.

> Other than that,
>
> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>
>> +{
>> +	struct i915_scheduler_queue_entry *node;
>> +	struct drm_i915_private *dev_priv = dev->dev_private;
>> +	struct i915_scheduler *scheduler = dev_priv->scheduler;
>> +	struct intel_engine_cs *ring;
>> +	int i;
>> +
>> +	if (!scheduler)
>> +		return 0;
>> +
>> +	spin_lock_irq(&scheduler->lock);
>> +
>> +	for_each_ring(ring, dev_priv, i) {
>> +		list_for_each_entry(node, &scheduler->node_queue[ring->id], link) {
>> +			if (node->params.file != file)
>> +				continue;
>> +
>> +			if (!I915_SQS_IS_COMPLETE(node))
>> +				DRM_DEBUG_DRIVER("Closing file handle with outstanding work: %d:%d/%d on %s\n",
>> +						 node->params.request->uniq,
>> +						 node->params.request->seqno,
>> +						 node->status,
>> +						 ring->name);
>> +
>> +			node->params.file = NULL;
>> +		}
>> +	}
>> +
>> +	spin_unlock_irq(&scheduler->lock);
>> +
>> +	return 0;
>> +}
>> diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
>> index 415fec8..0e8b6a9 100644
>> --- a/drivers/gpu/drm/i915/i915_scheduler.h
>> +++ b/drivers/gpu/drm/i915/i915_scheduler.h
>> @@ -87,6 +87,8 @@ enum {
>>   
>>   bool i915_scheduler_is_enabled(struct drm_device *dev);
>>   int i915_scheduler_init(struct drm_device *dev);
>> +int i915_scheduler_closefile(struct drm_device *dev,
>> +			     struct drm_file *file);
>>   void i915_scheduler_clean_node(struct i915_scheduler_queue_entry *node);
>>   int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe);
>>   bool i915_scheduler_notify_request(struct drm_i915_gem_request *req);
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index a0f5659..678adc7 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -46,6 +46,7 @@ 
 #include <linux/vga_switcheroo.h>
 #include <linux/slab.h>
 #include <acpi/video.h>
+#include "i915_scheduler.h"
 #include <linux/pm.h>
 #include <linux/pm_runtime.h>
 #include <linux/oom.h>
@@ -1258,6 +1259,8 @@  void i915_driver_lastclose(struct drm_device *dev)
 
 void i915_driver_preclose(struct drm_device *dev, struct drm_file *file)
 {
+	i915_scheduler_closefile(dev, file);
+
 	mutex_lock(&dev->struct_mutex);
 	i915_gem_context_close(dev, file);
 	i915_gem_release(dev, file);
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index fc23ee7..ab5007a 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -872,3 +872,51 @@  void i915_scheduler_process_work(struct intel_engine_cs *ring)
 	if (do_submit)
 		intel_runtime_pm_put(dev_priv);
 }
+
+/**
+ * i915_scheduler_closefile - notify the scheduler that a DRM file handle
+ * has been closed.
+ * @dev: DRM device
+ * @file: file being closed
+ *
+ * Goes through the scheduler's queues and removes all connections to the
+ * disappearing file handle that still exist. There is an argument to say
+ * that this should also flush such outstanding work through the hardware.
+ * However, with pre-emption, TDR and other such complications doing so
+ * becomes a locking nightmare. So instead, just warn with a debug message
+ * if the application is leaking uncompleted work and make sure a null
+ * pointer dereference will not follow.
+ */
+int i915_scheduler_closefile(struct drm_device *dev, struct drm_file *file)
+{
+	struct i915_scheduler_queue_entry *node;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct i915_scheduler *scheduler = dev_priv->scheduler;
+	struct intel_engine_cs *ring;
+	int i;
+
+	if (!scheduler)
+		return 0;
+
+	spin_lock_irq(&scheduler->lock);
+
+	for_each_ring(ring, dev_priv, i) {
+		list_for_each_entry(node, &scheduler->node_queue[ring->id], link) {
+			if (node->params.file != file)
+				continue;
+
+			if (!I915_SQS_IS_COMPLETE(node))
+				DRM_DEBUG_DRIVER("Closing file handle with outstanding work: %d:%d/%d on %s\n",
+						 node->params.request->uniq,
+						 node->params.request->seqno,
+						 node->status,
+						 ring->name);
+
+			node->params.file = NULL;
+		}
+	}
+
+	spin_unlock_irq(&scheduler->lock);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 415fec8..0e8b6a9 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -87,6 +87,8 @@  enum {
 
 bool i915_scheduler_is_enabled(struct drm_device *dev);
 int i915_scheduler_init(struct drm_device *dev);
+int i915_scheduler_closefile(struct drm_device *dev,
+			     struct drm_file *file);
 void i915_scheduler_clean_node(struct i915_scheduler_queue_entry *node);
 int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe);
 bool i915_scheduler_notify_request(struct drm_i915_gem_request *req);