diff mbox

[RFC,17/44] drm/i915: Prelude to splitting i915_gem_do_execbuffer in two

Message ID 1403803475-16337-18-git-send-email-John.C.Harrison@Intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

John Harrison June 26, 2014, 5:24 p.m. UTC
From: John Harrison <John.C.Harrison@Intel.com>

The scheduler decouples the submission of batch buffers to the driver with their
submission to the hardware. This basically means splitting the execbuffer()
function in half. This change rearranges some code ready for the split to occur.
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

Comments

Jesse Barnes July 2, 2014, 6:34 p.m. UTC | #1
On Thu, 26 Jun 2014 18:24:08 +0100
John.C.Harrison@Intel.com wrote:

> From: John Harrison <John.C.Harrison@Intel.com>
> 
> The scheduler decouples the submission of batch buffers to the driver with their
> submission to the hardware. This basically means splitting the execbuffer()
> function in half. This change rearranges some code ready for the split to occur.
> ---
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |   23 ++++++++++++++++-------
>  1 file changed, 16 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index ec274ef..fda9187 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -32,6 +32,7 @@
>  #include "i915_trace.h"
>  #include "intel_drv.h"
>  #include <linux/dma_remapping.h>
> +#include "i915_scheduler.h"
>  
>  #define  __EXEC_OBJECT_HAS_PIN (1<<31)
>  #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
> @@ -874,10 +875,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
>  	if (flush_domains & I915_GEM_DOMAIN_GTT)
>  		wmb();
>  
> -	/* Unconditionally invalidate gpu caches and ensure that we do flush
> -	 * any residual writes from the previous batch.
> -	 */
> -	return intel_ring_invalidate_all_caches(ring);
> +	return 0;
>  }
>  
>  static bool
> @@ -1219,8 +1217,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>  		}
>  	}
>  
> -	intel_runtime_pm_get(dev_priv);
> -
>  	ret = i915_mutex_lock_interruptible(dev);
>  	if (ret)
>  		goto pre_mutex_err;
> @@ -1331,6 +1327,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>  	if (ret)
>  		goto err;
>  
> +	i915_gem_execbuffer_move_to_active(&eb->vmas, ring);
> +
> +	/* To be split into two functions here... */
> +
> +	intel_runtime_pm_get(dev_priv);
> +
> +	/* Unconditionally invalidate gpu caches and ensure that we do flush
> +	 * any residual writes from the previous batch.
> +	 */
> +	ret = intel_ring_invalidate_all_caches(ring);
> +	if (ret)
> +		goto err;
> +
> +	/* Switch to the correct context for the batch */
>  	ret = i915_switch_context(ring, ctx);
>  	if (ret)
>  		goto err;
> @@ -1381,7 +1391,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>  
>  	trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags);
>  
> -	i915_gem_execbuffer_move_to_active(&eb->vmas, ring);
>  	i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
>  
>  err:

I'd like Chris to take a look too, but it looks safe afaict.

Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Daniel Vetter July 7, 2014, 7:21 p.m. UTC | #2
On Wed, Jul 02, 2014 at 11:34:23AM -0700, Jesse Barnes wrote:
> On Thu, 26 Jun 2014 18:24:08 +0100
> John.C.Harrison@Intel.com wrote:
> 
> > From: John Harrison <John.C.Harrison@Intel.com>
> > 
> > The scheduler decouples the submission of batch buffers to the driver with their
> > submission to the hardware. This basically means splitting the execbuffer()
> > function in half. This change rearranges some code ready for the split to occur.
> > ---
> >  drivers/gpu/drm/i915/i915_gem_execbuffer.c |   23 ++++++++++++++++-------
> >  1 file changed, 16 insertions(+), 7 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > index ec274ef..fda9187 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > @@ -32,6 +32,7 @@
> >  #include "i915_trace.h"
> >  #include "intel_drv.h"
> >  #include <linux/dma_remapping.h>
> > +#include "i915_scheduler.h"
> >  
> >  #define  __EXEC_OBJECT_HAS_PIN (1<<31)
> >  #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
> > @@ -874,10 +875,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
> >  	if (flush_domains & I915_GEM_DOMAIN_GTT)
> >  		wmb();
> >  
> > -	/* Unconditionally invalidate gpu caches and ensure that we do flush
> > -	 * any residual writes from the previous batch.
> > -	 */
> > -	return intel_ring_invalidate_all_caches(ring);
> > +	return 0;
> >  }
> >  
> >  static bool
> > @@ -1219,8 +1217,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
> >  		}
> >  	}
> >  
> > -	intel_runtime_pm_get(dev_priv);
> > -
> >  	ret = i915_mutex_lock_interruptible(dev);
> >  	if (ret)
> >  		goto pre_mutex_err;
> > @@ -1331,6 +1327,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
> >  	if (ret)
> >  		goto err;
> >  
> > +	i915_gem_execbuffer_move_to_active(&eb->vmas, ring);
> > +
> > +	/* To be split into two functions here... */
> > +
> > +	intel_runtime_pm_get(dev_priv);
> > +
> > +	/* Unconditionally invalidate gpu caches and ensure that we do flush
> > +	 * any residual writes from the previous batch.
> > +	 */
> > +	ret = intel_ring_invalidate_all_caches(ring);
> > +	if (ret)
> > +		goto err;
> > +
> > +	/* Switch to the correct context for the batch */
> >  	ret = i915_switch_context(ring, ctx);
> >  	if (ret)
> >  		goto err;
> > @@ -1381,7 +1391,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
> >  
> >  	trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags);
> >  
> > -	i915_gem_execbuffer_move_to_active(&eb->vmas, ring);
> >  	i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
> >  
> >  err:
> 
> I'd like Chris to take a look too, but it looks safe afaict.
> 
> Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>

switch_context can fail with EINTR so we really can't move stuff to the
active list before that point. Or we need to make sure that all the stuff
between the old and new move_to_active callsite can't fail.

Or we need to track this and tell userspace with an EIO and adjusted reset
stats that something between our point of no return where the kernel
committed to executing the batch failed.

Or we need to unrol move_to_active (which is currently not really
possible).
-Daniel
John Harrison July 23, 2014, 4:33 p.m. UTC | #3
On 07/07/2014 20:21, Daniel Vetter wrote:
> On Wed, Jul 02, 2014 at 11:34:23AM -0700, Jesse Barnes wrote:
>> On Thu, 26 Jun 2014 18:24:08 +0100
>> John.C.Harrison@Intel.com wrote:
>>
>>> From: John Harrison <John.C.Harrison@Intel.com>
>>>
>>> The scheduler decouples the submission of batch buffers to the driver with their
>>> submission to the hardware. This basically means splitting the execbuffer()
>>> function in half. This change rearranges some code ready for the split to occur.
>>> ---
>>>   drivers/gpu/drm/i915/i915_gem_execbuffer.c |   23 ++++++++++++++++-------
>>>   1 file changed, 16 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>>> index ec274ef..fda9187 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>>> @@ -32,6 +32,7 @@
>>>   #include "i915_trace.h"
>>>   #include "intel_drv.h"
>>>   #include <linux/dma_remapping.h>
>>> +#include "i915_scheduler.h"
>>>   
>>>   #define  __EXEC_OBJECT_HAS_PIN (1<<31)
>>>   #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
>>> @@ -874,10 +875,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
>>>   	if (flush_domains & I915_GEM_DOMAIN_GTT)
>>>   		wmb();
>>>   
>>> -	/* Unconditionally invalidate gpu caches and ensure that we do flush
>>> -	 * any residual writes from the previous batch.
>>> -	 */
>>> -	return intel_ring_invalidate_all_caches(ring);
>>> +	return 0;
>>>   }
>>>   
>>>   static bool
>>> @@ -1219,8 +1217,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>>>   		}
>>>   	}
>>>   
>>> -	intel_runtime_pm_get(dev_priv);
>>> -
>>>   	ret = i915_mutex_lock_interruptible(dev);
>>>   	if (ret)
>>>   		goto pre_mutex_err;
>>> @@ -1331,6 +1327,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>>>   	if (ret)
>>>   		goto err;
>>>   
>>> +	i915_gem_execbuffer_move_to_active(&eb->vmas, ring);
>>> +
>>> +	/* To be split into two functions here... */
>>> +
>>> +	intel_runtime_pm_get(dev_priv);
>>> +
>>> +	/* Unconditionally invalidate gpu caches and ensure that we do flush
>>> +	 * any residual writes from the previous batch.
>>> +	 */
>>> +	ret = intel_ring_invalidate_all_caches(ring);
>>> +	if (ret)
>>> +		goto err;
>>> +
>>> +	/* Switch to the correct context for the batch */
>>>   	ret = i915_switch_context(ring, ctx);
>>>   	if (ret)
>>>   		goto err;
>>> @@ -1381,7 +1391,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>>>   
>>>   	trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags);
>>>   
>>> -	i915_gem_execbuffer_move_to_active(&eb->vmas, ring);
>>>   	i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
>>>   
>>>   err:
>> I'd like Chris to take a look too, but it looks safe afaict.
>>
>> Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
> switch_context can fail with EINTR so we really can't move stuff to the
> active list before that point. Or we need to make sure that all the stuff
> between the old and new move_to_active callsite can't fail.
>
> Or we need to track this and tell userspace with an EIO and adjusted reset
> stats that something between our point of no return where the kernel
> committed to executing the batch failed.
>
> Or we need to unrol move_to_active (which is currently not really
> possible).
> -Daniel

switch_context can fail with quite a lot of different error codes. Is 
there anything particularly special about EINTR? I can't spot that 
particular code path at the moment.

The context switch is done at the point of submission to the hardware. 
As batch buffers can be re-ordered between submission to driver and 
submission to hardware, there is no point choosing a context any 
earlier. Whereas the move to active needs to be done at the point of 
submission to the driver. The object needs to be marked as in use even 
though the batch buffer that actually uses it might not be executed for 
some time. From the software viewpoint, the object is in use and all the 
syncrhonisation code needs to know that.

The scheduler makes the batch buffer execution asynchronous to its 
submission to the driver. There is no way to communicate back a return 
code to user land. Instead, it is up to the scheduler to check the 
return codes from all the execution paths and to retry later if 
something fails for a temporary reason. Or to discard the buffer if it 
is truly toast.

John.
Daniel Vetter July 23, 2014, 6:14 p.m. UTC | #4
On Wed, Jul 23, 2014 at 05:33:42PM +0100, John Harrison wrote:
> 
> On 07/07/2014 20:21, Daniel Vetter wrote:
> >On Wed, Jul 02, 2014 at 11:34:23AM -0700, Jesse Barnes wrote:
> >>On Thu, 26 Jun 2014 18:24:08 +0100
> >>John.C.Harrison@Intel.com wrote:
> >>
> >>>From: John Harrison <John.C.Harrison@Intel.com>
> >>>
> >>>The scheduler decouples the submission of batch buffers to the driver with their
> >>>submission to the hardware. This basically means splitting the execbuffer()
> >>>function in half. This change rearranges some code ready for the split to occur.
> >>>---
> >>>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |   23 ++++++++++++++++-------
> >>>  1 file changed, 16 insertions(+), 7 deletions(-)
> >>>
> >>>diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> >>>index ec274ef..fda9187 100644
> >>>--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> >>>+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> >>>@@ -32,6 +32,7 @@
> >>>  #include "i915_trace.h"
> >>>  #include "intel_drv.h"
> >>>  #include <linux/dma_remapping.h>
> >>>+#include "i915_scheduler.h"
> >>>  #define  __EXEC_OBJECT_HAS_PIN (1<<31)
> >>>  #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
> >>>@@ -874,10 +875,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
> >>>  	if (flush_domains & I915_GEM_DOMAIN_GTT)
> >>>  		wmb();
> >>>-	/* Unconditionally invalidate gpu caches and ensure that we do flush
> >>>-	 * any residual writes from the previous batch.
> >>>-	 */
> >>>-	return intel_ring_invalidate_all_caches(ring);
> >>>+	return 0;
> >>>  }
> >>>  static bool
> >>>@@ -1219,8 +1217,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
> >>>  		}
> >>>  	}
> >>>-	intel_runtime_pm_get(dev_priv);
> >>>-
> >>>  	ret = i915_mutex_lock_interruptible(dev);
> >>>  	if (ret)
> >>>  		goto pre_mutex_err;
> >>>@@ -1331,6 +1327,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
> >>>  	if (ret)
> >>>  		goto err;
> >>>+	i915_gem_execbuffer_move_to_active(&eb->vmas, ring);
> >>>+
> >>>+	/* To be split into two functions here... */
> >>>+
> >>>+	intel_runtime_pm_get(dev_priv);
> >>>+
> >>>+	/* Unconditionally invalidate gpu caches and ensure that we do flush
> >>>+	 * any residual writes from the previous batch.
> >>>+	 */
> >>>+	ret = intel_ring_invalidate_all_caches(ring);
> >>>+	if (ret)
> >>>+		goto err;
> >>>+
> >>>+	/* Switch to the correct context for the batch */
> >>>  	ret = i915_switch_context(ring, ctx);
> >>>  	if (ret)
> >>>  		goto err;
> >>>@@ -1381,7 +1391,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
> >>>  	trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags);
> >>>-	i915_gem_execbuffer_move_to_active(&eb->vmas, ring);
> >>>  	i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
> >>>  err:
> >>I'd like Chris to take a look too, but it looks safe afaict.
> >>
> >>Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
> >switch_context can fail with EINTR so we really can't move stuff to the
> >active list before that point. Or we need to make sure that all the stuff
> >between the old and new move_to_active callsite can't fail.
> >
> >Or we need to track this and tell userspace with an EIO and adjusted reset
> >stats that something between our point of no return where the kernel
> >committed to executing the batch failed.
> >
> >Or we need to unrol move_to_active (which is currently not really
> >possible).
> >-Daniel
> 
> switch_context can fail with quite a lot of different error codes. Is there
> anything particularly special about EINTR? I can't spot that particular code
> path at the moment.
> 
> The context switch is done at the point of submission to the hardware. As
> batch buffers can be re-ordered between submission to driver and submission
> to hardware, there is no point choosing a context any earlier. Whereas the
> move to active needs to be done at the point of submission to the driver.
> The object needs to be marked as in use even though the batch buffer that
> actually uses it might not be executed for some time. From the software
> viewpoint, the object is in use and all the syncrhonisation code needs to
> know that.
> 
> The scheduler makes the batch buffer execution asynchronous to its
> submission to the driver. There is no way to communicate back a return code
> to user land. Instead, it is up to the scheduler to check the return codes
> from all the execution paths and to retry later if something fails for a
> temporary reason. Or to discard the buffer if it is truly toast.

EINTR is simply really easy to test&hit since you can provoke it with
signals. And X uses signals excessively. One point where EINTR might
happen is in intel_ring_begin, the other when we try to pin the context
into ggtt. The other error codes are true exceptions and will much harder
to hit.
-Daniel
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index ec274ef..fda9187 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -32,6 +32,7 @@ 
 #include "i915_trace.h"
 #include "intel_drv.h"
 #include <linux/dma_remapping.h>
+#include "i915_scheduler.h"
 
 #define  __EXEC_OBJECT_HAS_PIN (1<<31)
 #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
@@ -874,10 +875,7 @@  i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
 	if (flush_domains & I915_GEM_DOMAIN_GTT)
 		wmb();
 
-	/* Unconditionally invalidate gpu caches and ensure that we do flush
-	 * any residual writes from the previous batch.
-	 */
-	return intel_ring_invalidate_all_caches(ring);
+	return 0;
 }
 
 static bool
@@ -1219,8 +1217,6 @@  i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		}
 	}
 
-	intel_runtime_pm_get(dev_priv);
-
 	ret = i915_mutex_lock_interruptible(dev);
 	if (ret)
 		goto pre_mutex_err;
@@ -1331,6 +1327,20 @@  i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	if (ret)
 		goto err;
 
+	i915_gem_execbuffer_move_to_active(&eb->vmas, ring);
+
+	/* To be split into two functions here... */
+
+	intel_runtime_pm_get(dev_priv);
+
+	/* Unconditionally invalidate gpu caches and ensure that we do flush
+	 * any residual writes from the previous batch.
+	 */
+	ret = intel_ring_invalidate_all_caches(ring);
+	if (ret)
+		goto err;
+
+	/* Switch to the correct context for the batch */
 	ret = i915_switch_context(ring, ctx);
 	if (ret)
 		goto err;
@@ -1381,7 +1391,6 @@  i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 
 	trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags);
 
-	i915_gem_execbuffer_move_to_active(&eb->vmas, ring);
 	i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
 
 err: