diff mbox series

[2/5] drm/i915: Introduce i915_timeline.mutex

Message ID 20190301110547.14758-2-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [1/5] drm/i915/execlists: Suppress redundant preemption | expand

Commit Message

Chris Wilson March 1, 2019, 11:05 a.m. UTC
A simple mutex used for guarding the flow of requests in and out of the
timeline. In the short-term, it will be used only to guard the addition
of requests into the timeline, taken on alloc and released on commit so
that only one caller can construct a request into the timeline
(important as the seqno and ring pointers must be serialised). This will
be used by observers to ensure that the seqno/hwsp is stable. Later,
when we have reduced retiring to only operate on a single timeline at a
time, we can then use the mutex as the sole guard required for retiring.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_request.c            | 6 +++++-
 drivers/gpu/drm/i915/i915_timeline.c           | 1 +
 drivers/gpu/drm/i915/i915_timeline.h           | 2 ++
 drivers/gpu/drm/i915/selftests/i915_request.c  | 4 +---
 drivers/gpu/drm/i915/selftests/mock_timeline.c | 1 +
 5 files changed, 10 insertions(+), 4 deletions(-)

Comments

Tvrtko Ursulin March 1, 2019, 11:38 a.m. UTC | #1
On 01/03/2019 11:05, Chris Wilson wrote:
> A simple mutex used for guarding the flow of requests in and out of the
> timeline. In the short-term, it will be used only to guard the addition
> of requests into the timeline, taken on alloc and released on commit so
> that only one caller can construct a request into the timeline
> (important as the seqno and ring pointers must be serialised). This will
> be used by observers to ensure that the seqno/hwsp is stable. Later,
> when we have reduced retiring to only operate on a single timeline at a
> time, we can then use the mutex as the sole guard required for retiring.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_request.c            | 6 +++++-
>   drivers/gpu/drm/i915/i915_timeline.c           | 1 +
>   drivers/gpu/drm/i915/i915_timeline.h           | 2 ++
>   drivers/gpu/drm/i915/selftests/i915_request.c  | 4 +---
>   drivers/gpu/drm/i915/selftests/mock_timeline.c | 1 +
>   5 files changed, 10 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index c65f6c990fdd..719d1a5ab082 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -563,6 +563,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
>   		return ERR_CAST(ce);
>   
>   	reserve_gt(i915);
> +	mutex_lock(&ce->ring->timeline->mutex);
>   
>   	/* Move our oldest request to the slab-cache (if not in use!) */
>   	rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link);
> @@ -688,6 +689,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
>   
>   	kmem_cache_free(global.slab_requests, rq);
>   err_unreserve:
> +	mutex_unlock(&ce->ring->timeline->mutex);
>   	unreserve_gt(i915);
>   	intel_context_unpin(ce);
>   	return ERR_PTR(ret);
> @@ -880,7 +882,7 @@ void i915_request_add(struct i915_request *request)
>   	GEM_TRACE("%s fence %llx:%lld\n",
>   		  engine->name, request->fence.context, request->fence.seqno);
>   
> -	lockdep_assert_held(&request->i915->drm.struct_mutex);
> +	lockdep_assert_held(&request->timeline->mutex);
>   	trace_i915_request_add(request);
>   
>   	/*
> @@ -991,6 +993,8 @@ void i915_request_add(struct i915_request *request)
>   	 */
>   	if (prev && i915_request_completed(prev))
>   		i915_request_retire_upto(prev);
> +
> +	mutex_unlock(&request->timeline->mutex);
>   }
>   
>   static unsigned long local_clock_us(unsigned int *cpu)
> diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
> index b2202d2e58a2..87a80558da28 100644
> --- a/drivers/gpu/drm/i915/i915_timeline.c
> +++ b/drivers/gpu/drm/i915/i915_timeline.c
> @@ -162,6 +162,7 @@ int i915_timeline_init(struct drm_i915_private *i915,
>   	timeline->fence_context = dma_fence_context_alloc(1);
>   
>   	spin_lock_init(&timeline->lock);
> +	mutex_init(&timeline->mutex);
>   
>   	INIT_ACTIVE_REQUEST(&timeline->barrier);
>   	INIT_ACTIVE_REQUEST(&timeline->last_request);
> diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
> index 7bec7d2e45bf..36c3849f7108 100644
> --- a/drivers/gpu/drm/i915/i915_timeline.h
> +++ b/drivers/gpu/drm/i915/i915_timeline.h
> @@ -44,6 +44,8 @@ struct i915_timeline {
>   #define TIMELINE_CLIENT 0 /* default subclass */
>   #define TIMELINE_ENGINE 1
>   
> +	struct mutex mutex; /* protects the flow of requests */
> +
>   	unsigned int pin_count;
>   	const u32 *hwsp_seqno;
>   	struct i915_vma *hwsp_ggtt;
> diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
> index 7da52e3d67af..7e1b65b8eb19 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_request.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_request.c
> @@ -141,14 +141,12 @@ static int igt_fence_wait(void *arg)
>   		err = -ENOMEM;
>   		goto out_locked;
>   	}
> -	mutex_unlock(&i915->drm.struct_mutex); /* safe as we are single user */
>   
>   	if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) {
>   		pr_err("fence wait success before submit (expected timeout)!\n");
> -		goto out_device;
> +		goto out_locked;
>   	}
>   
> -	mutex_lock(&i915->drm.struct_mutex);
>   	i915_request_add(request);
>   	mutex_unlock(&i915->drm.struct_mutex);
>   
> diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c
> index d2de9ece2118..416d85233263 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_timeline.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c
> @@ -14,6 +14,7 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context)
>   	timeline->fence_context = context;
>   
>   	spin_lock_init(&timeline->lock);
> +	mutex_init(&timeline->mutex);
>   
>   	INIT_ACTIVE_REQUEST(&timeline->barrier);
>   	INIT_ACTIVE_REQUEST(&timeline->last_request);
> 

So hard to predict if this will be okay in the future and keep a mental 
image of the endgame in ones head.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
Chris Wilson March 1, 2019, 11:48 a.m. UTC | #2
Quoting Tvrtko Ursulin (2019-03-01 11:38:11)
> 
> On 01/03/2019 11:05, Chris Wilson wrote:
> > diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c
> > index d2de9ece2118..416d85233263 100644
> > --- a/drivers/gpu/drm/i915/selftests/mock_timeline.c
> > +++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c
> > @@ -14,6 +14,7 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context)
> >       timeline->fence_context = context;
> >   
> >       spin_lock_init(&timeline->lock);
> > +     mutex_init(&timeline->mutex);
> >   
> >       INIT_ACTIVE_REQUEST(&timeline->barrier);
> >       INIT_ACTIVE_REQUEST(&timeline->last_request);
> > 
> 
> So hard to predict if this will be okay in the future and keep a mental 
> image of the endgame in ones head.

Yeah, it'll change anyway as soon as we meet the enemy.

At the moment, it seems a fundamental mutex that we can't avoid, the
timeline must be serialised (for access to the ring and seqno, request
construct must be sequential) and from there we extend it to cover
retirement and activity tracking. It may be that we can split the mutex
further (allow for parallel construction and retirement? but that seems
an unlikely win since timeline == client and so we expect to parcel work
up into timeline/client bundles), but for now timeline->mutex seems like
a solid plan of action. (At least it gets us as far as allowing clients
to submit requests independently, after that my crystal ball is cloudy.)
-Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index c65f6c990fdd..719d1a5ab082 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -563,6 +563,7 @@  i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 		return ERR_CAST(ce);
 
 	reserve_gt(i915);
+	mutex_lock(&ce->ring->timeline->mutex);
 
 	/* Move our oldest request to the slab-cache (if not in use!) */
 	rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link);
@@ -688,6 +689,7 @@  i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 
 	kmem_cache_free(global.slab_requests, rq);
 err_unreserve:
+	mutex_unlock(&ce->ring->timeline->mutex);
 	unreserve_gt(i915);
 	intel_context_unpin(ce);
 	return ERR_PTR(ret);
@@ -880,7 +882,7 @@  void i915_request_add(struct i915_request *request)
 	GEM_TRACE("%s fence %llx:%lld\n",
 		  engine->name, request->fence.context, request->fence.seqno);
 
-	lockdep_assert_held(&request->i915->drm.struct_mutex);
+	lockdep_assert_held(&request->timeline->mutex);
 	trace_i915_request_add(request);
 
 	/*
@@ -991,6 +993,8 @@  void i915_request_add(struct i915_request *request)
 	 */
 	if (prev && i915_request_completed(prev))
 		i915_request_retire_upto(prev);
+
+	mutex_unlock(&request->timeline->mutex);
 }
 
 static unsigned long local_clock_us(unsigned int *cpu)
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index b2202d2e58a2..87a80558da28 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -162,6 +162,7 @@  int i915_timeline_init(struct drm_i915_private *i915,
 	timeline->fence_context = dma_fence_context_alloc(1);
 
 	spin_lock_init(&timeline->lock);
+	mutex_init(&timeline->mutex);
 
 	INIT_ACTIVE_REQUEST(&timeline->barrier);
 	INIT_ACTIVE_REQUEST(&timeline->last_request);
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index 7bec7d2e45bf..36c3849f7108 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -44,6 +44,8 @@  struct i915_timeline {
 #define TIMELINE_CLIENT 0 /* default subclass */
 #define TIMELINE_ENGINE 1
 
+	struct mutex mutex; /* protects the flow of requests */
+
 	unsigned int pin_count;
 	const u32 *hwsp_seqno;
 	struct i915_vma *hwsp_ggtt;
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 7da52e3d67af..7e1b65b8eb19 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -141,14 +141,12 @@  static int igt_fence_wait(void *arg)
 		err = -ENOMEM;
 		goto out_locked;
 	}
-	mutex_unlock(&i915->drm.struct_mutex); /* safe as we are single user */
 
 	if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) {
 		pr_err("fence wait success before submit (expected timeout)!\n");
-		goto out_device;
+		goto out_locked;
 	}
 
-	mutex_lock(&i915->drm.struct_mutex);
 	i915_request_add(request);
 	mutex_unlock(&i915->drm.struct_mutex);
 
diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c
index d2de9ece2118..416d85233263 100644
--- a/drivers/gpu/drm/i915/selftests/mock_timeline.c
+++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c
@@ -14,6 +14,7 @@  void mock_timeline_init(struct i915_timeline *timeline, u64 context)
 	timeline->fence_context = context;
 
 	spin_lock_init(&timeline->lock);
+	mutex_init(&timeline->mutex);
 
 	INIT_ACTIVE_REQUEST(&timeline->barrier);
 	INIT_ACTIVE_REQUEST(&timeline->last_request);