diff mbox series

[6/8] drm/i915: Add global barrier support

Message ID 20180814144058.19286-7-tvrtko.ursulin@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series Per context dynamic (sub)slice power-gating | expand

Commit Message

Tvrtko Ursulin Aug. 14, 2018, 2:40 p.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Global barrier is a facility to allow serialization between different
timelines.

After calling i915_gem_set_global_barrier on a request, all following
submissions on any engine will be set up as depending on this global
barrier. Once the global barrier has been completed it automatically gets
cleared and things continue as normal.

This facility will be used by the upcoming context SSEU code.

-------------------------------------------------------------------------
This code was part of the larger SSEU patch but I extracted it to be
separate for ease of review and clarity. I think it originates from Chris
Wilson so permission pending I will change the author and add appropriate
S-o-B.
-------------------------------------------------------------------------

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h     | 27 +++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem.c     |  2 ++
 drivers/gpu/drm/i915/i915_request.c | 16 ++++++++++++++++
 3 files changed, 45 insertions(+)

Comments

Tvrtko Ursulin Aug. 14, 2018, 2:59 p.m. UTC | #1
Chris, for this one please let me know if it is okay to give you 
authorship and to add your S-o-B.

Tvrtko

On 14/08/2018 15:40, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Global barrier is a facility to allow serialization between different
> timelines.
> 
> After calling i915_gem_set_global_barrier on a request, all following
> submissions on any engine will be set up as depending on this global
> barrier. Once the global barrier has been completed it automatically gets
> cleared and things continue as normal.
> 
> This facility will be used by the upcoming context SSEU code.
> 
> -------------------------------------------------------------------------
> This code was part of the larger SSEU patch but I extracted it to be
> separate for ease of review and clarity. I think it originates from Chris
> Wilson so permission pending I will change the author and add appropriate
> S-o-B.
> -------------------------------------------------------------------------
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_drv.h     | 27 +++++++++++++++++++++++++++
>   drivers/gpu/drm/i915/i915_gem.c     |  2 ++
>   drivers/gpu/drm/i915/i915_request.c | 16 ++++++++++++++++
>   3 files changed, 45 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 5c12d2676435..643089ba01b9 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2098,6 +2098,16 @@ struct drm_i915_private {
>   		u32 active_requests;
>   		u32 request_serial;
>   
> +		/**
> +		 * Global barrier for the ability to serialize ordering between
> +		 * different timelines.
> +		 *
> +		 * Users can call i915_gem_set_global_barrier which will make
> +		 * all subsequent submission be execute only after this barrier
> +		 * has been completed.
> +		 */
> +		struct i915_gem_active global_barrier;
> +
>   		/**
>   		 * Is the GPU currently considered idle, or busy executing
>   		 * userspace requests? Whilst idle, we allow runtime power
> @@ -3230,6 +3240,23 @@ i915_vm_to_ppgtt(struct i915_address_space *vm)
>   	return container_of(vm, struct i915_hw_ppgtt, vm);
>   }
>   
> +/**
> + * i915_gem_set_global_barrier - orders submission on different timelines
> + * @i915: i915 device private
> + * @rq: request after which new submissions can proceed
> + *
> + * Sets the passed in request as the serialization point for all subsequent
> + * submissions, regardless of the engine/timeline. Subsequent requests will not
> + * be submitted to GPU until the global barrier has been completed.
> + */
> +static inline void
> +i915_gem_set_global_barrier(struct drm_i915_private *i915,
> +			    struct i915_request *rq)
> +{
> +	lockdep_assert_held(&i915->drm.struct_mutex);
> +	i915_gem_active_set(&i915->gt.global_barrier, rq);
> +}
> +
>   /* i915_gem_fence_reg.c */
>   struct drm_i915_fence_reg *
>   i915_reserve_fence(struct drm_i915_private *dev_priv);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 0453eb42a1a3..be462ef65786 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -5752,6 +5752,8 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
>   	if (!dev_priv->priorities)
>   		goto err_dependencies;
>   
> +	init_request_active(&dev_priv->gt.global_barrier, NULL);
> +
>   	INIT_LIST_HEAD(&dev_priv->gt.timelines);
>   	INIT_LIST_HEAD(&dev_priv->gt.active_rings);
>   	INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 09ed48833b54..8b45f74dc748 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -644,6 +644,18 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
>   	return NOTIFY_DONE;
>   }
>   
> +static int add_global_barrier(struct i915_request *rq)
> +{
> +	struct i915_request *barrier;
> +
> +	barrier = i915_gem_active_raw(&rq->i915->gt.global_barrier,
> +				      &rq->i915->drm.struct_mutex);
> +	if (barrier)
> +		return i915_request_await_dma_fence(rq, &barrier->fence);
> +
> +	return 0;
> +}
> +
>   /**
>    * i915_request_alloc - allocate a request structure
>    *
> @@ -806,6 +818,10 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
>   	 */
>   	rq->head = rq->ring->emit;
>   
> +	ret = add_global_barrier(rq);
> +	if (ret)
> +		goto err_unwind;
> +
>   	/* Unconditionally invalidate GPU caches and TLBs. */
>   	ret = engine->emit_flush(rq, EMIT_INVALIDATE);
>   	if (ret)
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5c12d2676435..643089ba01b9 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2098,6 +2098,16 @@  struct drm_i915_private {
 		u32 active_requests;
 		u32 request_serial;
 
+		/**
+		 * Global barrier for the ability to serialize ordering between
+		 * different timelines.
+		 *
+		 * Users can call i915_gem_set_global_barrier which will make
+		 * all subsequent submission be execute only after this barrier
+		 * has been completed.
+		 */
+		struct i915_gem_active global_barrier;
+
 		/**
 		 * Is the GPU currently considered idle, or busy executing
 		 * userspace requests? Whilst idle, we allow runtime power
@@ -3230,6 +3240,23 @@  i915_vm_to_ppgtt(struct i915_address_space *vm)
 	return container_of(vm, struct i915_hw_ppgtt, vm);
 }
 
+/**
+ * i915_gem_set_global_barrier - orders submission on different timelines
+ * @i915: i915 device private
+ * @rq: request after which new submissions can proceed
+ *
+ * Sets the passed in request as the serialization point for all subsequent
+ * submissions, regardless of the engine/timeline. Subsequent requests will not
+ * be submitted to GPU until the global barrier has been completed.
+ */
+static inline void
+i915_gem_set_global_barrier(struct drm_i915_private *i915,
+			    struct i915_request *rq)
+{
+	lockdep_assert_held(&i915->drm.struct_mutex);
+	i915_gem_active_set(&i915->gt.global_barrier, rq);
+}
+
 /* i915_gem_fence_reg.c */
 struct drm_i915_fence_reg *
 i915_reserve_fence(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0453eb42a1a3..be462ef65786 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -5752,6 +5752,8 @@  int i915_gem_init_early(struct drm_i915_private *dev_priv)
 	if (!dev_priv->priorities)
 		goto err_dependencies;
 
+	init_request_active(&dev_priv->gt.global_barrier, NULL);
+
 	INIT_LIST_HEAD(&dev_priv->gt.timelines);
 	INIT_LIST_HEAD(&dev_priv->gt.active_rings);
 	INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 09ed48833b54..8b45f74dc748 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -644,6 +644,18 @@  submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 	return NOTIFY_DONE;
 }
 
+static int add_global_barrier(struct i915_request *rq)
+{
+	struct i915_request *barrier;
+
+	barrier = i915_gem_active_raw(&rq->i915->gt.global_barrier,
+				      &rq->i915->drm.struct_mutex);
+	if (barrier)
+		return i915_request_await_dma_fence(rq, &barrier->fence);
+
+	return 0;
+}
+
 /**
  * i915_request_alloc - allocate a request structure
  *
@@ -806,6 +818,10 @@  i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 */
 	rq->head = rq->ring->emit;
 
+	ret = add_global_barrier(rq);
+	if (ret)
+		goto err_unwind;
+
 	/* Unconditionally invalidate GPU caches and TLBs. */
 	ret = engine->emit_flush(rq, EMIT_INVALIDATE);
 	if (ret)