diff mbox

[RFC,10/44] drm/i915: Prepare retire_requests to handle out-of-order seqnos

Message ID 1403803475-16337-11-git-send-email-John.C.Harrison@Intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

John Harrison June 26, 2014, 5:24 p.m. UTC
From: John Harrison <John.C.Harrison@Intel.com>

A major point of the GPU scheduler is that it re-orders batch buffers after they
have been submitted to the driver. Rather than attempting to re-assign seqno
values, it is much simpler to have each batch buffer keep its initially assigned
number and modify the rest of the driver to cope with seqnos being returned out
of order. In practice, very little code actually needs updating to cope.

One such place is the retire request handler. Rather than stopping as soon as an
uncompleted seqno is found, it must now keep iterating through the requests in
case later seqnos have completed. There is also a problem with doing the free of
the request before the move to inactive. Thus the requests are now moved to a
temporary list first, then the objects de-activated and finally the requests on
the temporary list are freed.
---
 drivers/gpu/drm/i915/i915_gem.c |   60 +++++++++++++++++++++------------------
 1 file changed, 32 insertions(+), 28 deletions(-)

Comments

Jesse Barnes July 2, 2014, 6:11 p.m. UTC | #1
On Thu, 26 Jun 2014 18:24:01 +0100
John.C.Harrison@Intel.com wrote:

> From: John Harrison <John.C.Harrison@Intel.com>
> 
> A major point of the GPU scheduler is that it re-orders batch buffers after they
> have been submitted to the driver. Rather than attempting to re-assign seqno
> values, it is much simpler to have each batch buffer keep its initially assigned
> number and modify the rest of the driver to cope with seqnos being returned out
> of order. In practice, very little code actually needs updating to cope.
> 
> One such place is the retire request handler. Rather than stopping as soon as an
> uncompleted seqno is found, it must now keep iterating through the requests in
> case later seqnos have completed. There is also a problem with doing the free of
> the request before the move to inactive. Thus the requests are now moved to a
> temporary list first, then the objects de-activated and finally the requests on
> the temporary list are freed.
> ---
>  drivers/gpu/drm/i915/i915_gem.c |   60 +++++++++++++++++++++------------------
>  1 file changed, 32 insertions(+), 28 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index b784eb2..7e53446 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2602,7 +2602,10 @@ void i915_gem_reset(struct drm_device *dev)
>  void
>  i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
>  {
> +	struct drm_i915_gem_object *obj, *obj_next;
> +	struct drm_i915_gem_request *req, *req_next;
>  	uint32_t seqno;
> +	LIST_HEAD(deferred_request_free);
>  
>  	if (list_empty(&ring->request_list))
>  		return;
> @@ -2611,43 +2614,35 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
>  
>  	seqno = ring->get_seqno(ring, true);
>  
> -	/* Move any buffers on the active list that are no longer referenced
> -	 * by the ringbuffer to the flushing/inactive lists as appropriate,
> -	 * before we free the context associated with the requests.
> +	/* Note that seqno values might be out of order due to rescheduling and
> +	 * pre-emption. Thus both lists must be processed in their entirety
> +	 * rather than stopping at the first 'non-passed' entry.
>  	 */
> -	while (!list_empty(&ring->active_list)) {
> -		struct drm_i915_gem_object *obj;
> -
> -		obj = list_first_entry(&ring->active_list,
> -				      struct drm_i915_gem_object,
> -				      ring_list);
> -
> -		if (!i915_seqno_passed(seqno, obj->last_read_seqno))
> -			break;
>  
> -		i915_gem_object_move_to_inactive(obj);
> -	}
> -
> -
> -	while (!list_empty(&ring->request_list)) {
> -		struct drm_i915_gem_request *request;
> -
> -		request = list_first_entry(&ring->request_list,
> -					   struct drm_i915_gem_request,
> -					   list);
> -
> -		if (!i915_seqno_passed(seqno, request->seqno))
> -			break;
> +	list_for_each_entry_safe(req, req_next, &ring->request_list, list) {
> +		if (!i915_seqno_passed(seqno, req->seqno))
> +			continue;
>  
> -		trace_i915_gem_request_retire(ring, request->seqno);
> +		trace_i915_gem_request_retire(ring, req->seqno);
>  		/* We know the GPU must have read the request to have
>  		 * sent us the seqno + interrupt, so use the position
>  		 * of tail of the request to update the last known position
>  		 * of the GPU head.
>  		 */
> -		ring->buffer->last_retired_head = request->tail;
> +		ring->buffer->last_retired_head = req->tail;
>  
> -		i915_gem_free_request(request);
> +		list_move_tail(&req->list, &deferred_request_free);
> +	}
> +
> +	/* Move any buffers on the active list that are no longer referenced
> +	 * by the ringbuffer to the flushing/inactive lists as appropriate,
> +	 * before we free the context associated with the requests.
> +	 */
> +	list_for_each_entry_safe(obj, obj_next, &ring->active_list, ring_list) {
> +		if (!i915_seqno_passed(seqno, obj->last_read_seqno))
> +			continue;
> +
> +		i915_gem_object_move_to_inactive(obj);
>  	}
>  
>  	if (unlikely(ring->trace_irq_seqno &&
> @@ -2656,6 +2651,15 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
>  		ring->trace_irq_seqno = 0;
>  	}
>  
> +	/* Finish processing active list before freeing request */
> +	while (!list_empty(&deferred_request_free)) {
> +		req = list_first_entry(&deferred_request_free,
> +	                               struct drm_i915_gem_request,
> +	                               list);
> +
> +		i915_gem_free_request(req);
> +	}
> +
>  	WARN_ON(i915_verify_lists(ring->dev));
>  }
>  

I think this looks ok, but I don't look at this code much...  Seems
like it should be fine to go in as-is, though I do worry a little about
the additional time we'll spend walking the list if we have lots of
outstanding requests.  But since this is just called in a work queue,
maybe that's fine.

Going forward, I guess we might want per-context seqno tracking
instead, with more limited preemption within a context (or maybe
none?), which might make things easier.  But that would require a bit
more restructuring...

Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Daniel Vetter July 7, 2014, 7:05 p.m. UTC | #2
On Thu, Jun 26, 2014 at 06:24:01PM +0100, John.C.Harrison@Intel.com wrote:
> From: John Harrison <John.C.Harrison@Intel.com>
> 
> A major point of the GPU scheduler is that it re-orders batch buffers after they
> have been submitted to the driver. Rather than attempting to re-assign seqno
> values, it is much simpler to have each batch buffer keep its initially assigned
> number and modify the rest of the driver to cope with seqnos being returned out
> of order. In practice, very little code actually needs updating to cope.
> 
> One such place is the retire request handler. Rather than stopping as soon as an
> uncompleted seqno is found, it must now keep iterating through the requests in
> case later seqnos have completed. There is also a problem with doing the free of
> the request before the move to inactive. Thus the requests are now moved to a
> temporary list first, then the objects de-activated and finally the requests on
> the temporary list are freed.

I still hold that we should track requests, not seqno+ring pairs. At least
the plan with Maarten's fencing patches is to embedded the generic struct
fence into our i915_gem_request structure. And struct fence will also be
the kernel-internal represenation of a android native sync fence.

So splatter ring+seqno->request/fence lookups all over the place isn't a
good way forward. It's ok for bring up, but for merging we should do that
kind of large-scale refactoring upfront to reduce rebase churn. Oscar
knows how this works.
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_gem.c |   60 +++++++++++++++++++++------------------
>  1 file changed, 32 insertions(+), 28 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index b784eb2..7e53446 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2602,7 +2602,10 @@ void i915_gem_reset(struct drm_device *dev)
>  void
>  i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
>  {
> +	struct drm_i915_gem_object *obj, *obj_next;
> +	struct drm_i915_gem_request *req, *req_next;
>  	uint32_t seqno;
> +	LIST_HEAD(deferred_request_free);
>  
>  	if (list_empty(&ring->request_list))
>  		return;
> @@ -2611,43 +2614,35 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
>  
>  	seqno = ring->get_seqno(ring, true);
>  
> -	/* Move any buffers on the active list that are no longer referenced
> -	 * by the ringbuffer to the flushing/inactive lists as appropriate,
> -	 * before we free the context associated with the requests.
> +	/* Note that seqno values might be out of order due to rescheduling and
> +	 * pre-emption. Thus both lists must be processed in their entirety
> +	 * rather than stopping at the first 'non-passed' entry.
>  	 */
> -	while (!list_empty(&ring->active_list)) {
> -		struct drm_i915_gem_object *obj;
> -
> -		obj = list_first_entry(&ring->active_list,
> -				      struct drm_i915_gem_object,
> -				      ring_list);
> -
> -		if (!i915_seqno_passed(seqno, obj->last_read_seqno))
> -			break;
>  
> -		i915_gem_object_move_to_inactive(obj);
> -	}
> -
> -
> -	while (!list_empty(&ring->request_list)) {
> -		struct drm_i915_gem_request *request;
> -
> -		request = list_first_entry(&ring->request_list,
> -					   struct drm_i915_gem_request,
> -					   list);
> -
> -		if (!i915_seqno_passed(seqno, request->seqno))
> -			break;
> +	list_for_each_entry_safe(req, req_next, &ring->request_list, list) {
> +		if (!i915_seqno_passed(seqno, req->seqno))
> +			continue;
>  
> -		trace_i915_gem_request_retire(ring, request->seqno);
> +		trace_i915_gem_request_retire(ring, req->seqno);
>  		/* We know the GPU must have read the request to have
>  		 * sent us the seqno + interrupt, so use the position
>  		 * of tail of the request to update the last known position
>  		 * of the GPU head.
>  		 */
> -		ring->buffer->last_retired_head = request->tail;
> +		ring->buffer->last_retired_head = req->tail;
>  
> -		i915_gem_free_request(request);
> +		list_move_tail(&req->list, &deferred_request_free);
> +	}
> +
> +	/* Move any buffers on the active list that are no longer referenced
> +	 * by the ringbuffer to the flushing/inactive lists as appropriate,
> +	 * before we free the context associated with the requests.
> +	 */
> +	list_for_each_entry_safe(obj, obj_next, &ring->active_list, ring_list) {
> +		if (!i915_seqno_passed(seqno, obj->last_read_seqno))
> +			continue;
> +
> +		i915_gem_object_move_to_inactive(obj);
>  	}
>  
>  	if (unlikely(ring->trace_irq_seqno &&
> @@ -2656,6 +2651,15 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
>  		ring->trace_irq_seqno = 0;
>  	}
>  
> +	/* Finish processing active list before freeing request */
> +	while (!list_empty(&deferred_request_free)) {
> +		req = list_first_entry(&deferred_request_free,
> +	                               struct drm_i915_gem_request,
> +	                               list);
> +
> +		i915_gem_free_request(req);
> +	}
> +
>  	WARN_ON(i915_verify_lists(ring->dev));
>  }
>  
> -- 
> 1.7.9.5
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Daniel Vetter July 9, 2014, 2:08 p.m. UTC | #3
On Mon, Jul 07, 2014 at 09:05:50PM +0200, Daniel Vetter wrote:
> On Thu, Jun 26, 2014 at 06:24:01PM +0100, John.C.Harrison@Intel.com wrote:
> > From: John Harrison <John.C.Harrison@Intel.com>
> > 
> > A major point of the GPU scheduler is that it re-orders batch buffers after they
> > have been submitted to the driver. Rather than attempting to re-assign seqno
> > values, it is much simpler to have each batch buffer keep its initially assigned
> > number and modify the rest of the driver to cope with seqnos being returned out
> > of order. In practice, very little code actually needs updating to cope.
> > 
> > One such place is the retire request handler. Rather than stopping as soon as an
> > uncompleted seqno is found, it must now keep iterating through the requests in
> > case later seqnos have completed. There is also a problem with doing the free of
> > the request before the move to inactive. Thus the requests are now moved to a
> > temporary list first, then the objects de-activated and finally the requests on
> > the temporary list are freed.
> 
> I still hold that we should track requests, not seqno+ring pairs. At least
> the plan with Maarten's fencing patches is to embedded the generic struct
> fence into our i915_gem_request structure. And struct fence will also be
> the kernel-internal represenation of a android native sync fence.
> 
> So splatter ring+seqno->request/fence lookups all over the place isn't a
> good way forward. It's ok for bring up, but for merging we should do that
> kind of large-scale refactoring upfront to reduce rebase churn. Oscar
> knows how this works.

Aside: Maarten's driver core patches to add a struct fence have been
merged for 3.17, so no the requirements to directly go to the right
solution and embedded struct fence into i915_gem_request and start to
refcount pointers properly is possible. Possible without intermediate
hacks to add a struct kref directly to i915_gem_request as an intermediate
step.
-Daniel
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b784eb2..7e53446 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2602,7 +2602,10 @@  void i915_gem_reset(struct drm_device *dev)
 void
 i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
 {
+	struct drm_i915_gem_object *obj, *obj_next;
+	struct drm_i915_gem_request *req, *req_next;
 	uint32_t seqno;
+	LIST_HEAD(deferred_request_free);
 
 	if (list_empty(&ring->request_list))
 		return;
@@ -2611,43 +2614,35 @@  i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
 
 	seqno = ring->get_seqno(ring, true);
 
-	/* Move any buffers on the active list that are no longer referenced
-	 * by the ringbuffer to the flushing/inactive lists as appropriate,
-	 * before we free the context associated with the requests.
+	/* Note that seqno values might be out of order due to rescheduling and
+	 * pre-emption. Thus both lists must be processed in their entirety
+	 * rather than stopping at the first 'non-passed' entry.
 	 */
-	while (!list_empty(&ring->active_list)) {
-		struct drm_i915_gem_object *obj;
-
-		obj = list_first_entry(&ring->active_list,
-				      struct drm_i915_gem_object,
-				      ring_list);
-
-		if (!i915_seqno_passed(seqno, obj->last_read_seqno))
-			break;
 
-		i915_gem_object_move_to_inactive(obj);
-	}
-
-
-	while (!list_empty(&ring->request_list)) {
-		struct drm_i915_gem_request *request;
-
-		request = list_first_entry(&ring->request_list,
-					   struct drm_i915_gem_request,
-					   list);
-
-		if (!i915_seqno_passed(seqno, request->seqno))
-			break;
+	list_for_each_entry_safe(req, req_next, &ring->request_list, list) {
+		if (!i915_seqno_passed(seqno, req->seqno))
+			continue;
 
-		trace_i915_gem_request_retire(ring, request->seqno);
+		trace_i915_gem_request_retire(ring, req->seqno);
 		/* We know the GPU must have read the request to have
 		 * sent us the seqno + interrupt, so use the position
 		 * of tail of the request to update the last known position
 		 * of the GPU head.
 		 */
-		ring->buffer->last_retired_head = request->tail;
+		ring->buffer->last_retired_head = req->tail;
 
-		i915_gem_free_request(request);
+		list_move_tail(&req->list, &deferred_request_free);
+	}
+
+	/* Move any buffers on the active list that are no longer referenced
+	 * by the ringbuffer to the flushing/inactive lists as appropriate,
+	 * before we free the context associated with the requests.
+	 */
+	list_for_each_entry_safe(obj, obj_next, &ring->active_list, ring_list) {
+		if (!i915_seqno_passed(seqno, obj->last_read_seqno))
+			continue;
+
+		i915_gem_object_move_to_inactive(obj);
 	}
 
 	if (unlikely(ring->trace_irq_seqno &&
@@ -2656,6 +2651,15 @@  i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
 		ring->trace_irq_seqno = 0;
 	}
 
+	/* Finish processing active list before freeing request */
+	while (!list_empty(&deferred_request_free)) {
+		req = list_first_entry(&deferred_request_free,
+	                               struct drm_i915_gem_request,
+	                               list);
+
+		i915_gem_free_request(req);
+	}
+
 	WARN_ON(i915_verify_lists(ring->dev));
 }