diff mbox

drm/i915/guc: Fix a false alert of memory leak when free LRC

Message ID 1445452063-9286-1-git-send-email-yu.dai@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

yu.dai@intel.com Oct. 21, 2015, 6:27 p.m. UTC
From: Alex Dai <yu.dai@intel.com>

There is a memory leak warning message from i915_gem_context_clean
when GuC submission is enabled. The reason is that gem_request (so
the LRC associated with it) is freed early than moving the vma list
to inactive.

We are not seeing this in ExecList (non-GuC) mode because the
gem_request is tracked by execlist_retired_req_list. The management
of this queue, therefore free of LRC, happens after retire of vma
list. In this patch, we use the same gem_request management for GuC
submission. Because the context switch interrupt is handled by
firmware, intel_guc_retire_requests is introduced to move retired
gem_request to execlist_retired_req_list then be released later in
workqueue.

Signed-off-by: Alex Dai <yu.dai@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c            |  1 +
 drivers/gpu/drm/i915/i915_guc_submission.c | 35 +++++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/intel_guc.h           |  2 ++
 drivers/gpu/drm/i915/intel_lrc.c           | 10 ++++-----
 4 files changed, 42 insertions(+), 6 deletions(-)

Comments

Dave Gordon Oct. 23, 2015, 9:40 p.m. UTC | #1
On 21/10/15 19:27, yu.dai@intel.com wrote:
> From: Alex Dai <yu.dai@intel.com>
>
> There is a memory leak warning message from i915_gem_context_clean
> when GuC submission is enabled. The reason is that gem_request (so
> the LRC associated with it) is freed early than moving the vma list
> to inactive.
>
> We are not seeing this in ExecList (non-GuC) mode because the
> gem_request is tracked by execlist_retired_req_list. The management
> of this queue, therefore free of LRC, happens after retire of vma
> list. In this patch, we use the same gem_request management for GuC
> submission. Because the context switch interrupt is handled by
> firmware, intel_guc_retire_requests is introduced to move retired
> gem_request to execlist_retired_req_list then be released later in
> workqueue.
>
> Signed-off-by: Alex Dai <yu.dai@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem.c            |  1 +
>   drivers/gpu/drm/i915/i915_guc_submission.c | 35 +++++++++++++++++++++++++++++-
>   drivers/gpu/drm/i915/intel_guc.h           |  2 ++
>   drivers/gpu/drm/i915/intel_lrc.c           | 10 ++++-----
>   4 files changed, 42 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 7d6b0c8..6d8a0f1 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2879,6 +2879,7 @@ i915_gem_retire_requests(struct drm_device *dev)
>   			idle &= list_empty(&ring->execlist_queue);
>   			spin_unlock_irqrestore(&ring->execlist_lock, flags);
>
> +			intel_guc_retire_requests(ring);
>   			intel_execlists_retire_requests(ring);
>   		}
>   	}
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 737b4f5..a35cfee 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -597,7 +597,8 @@ int i915_guc_submit(struct i915_guc_client *client,
>   		    struct drm_i915_gem_request *rq)
>   {
>   	struct intel_guc *guc = client->guc;
> -	enum intel_ring_id ring_id = rq->ring->id;
> +	struct intel_engine_cs *ring = rq->ring;
> +	enum intel_ring_id ring_id = ring->id;
>   	unsigned long flags;
>   	int q_ret, b_ret;
>
> @@ -628,9 +629,41 @@ int i915_guc_submit(struct i915_guc_client *client,
>   	guc->last_seqno[ring_id] = rq->seqno;
>   	spin_unlock(&guc->host2guc_lock);
>
> +	spin_lock_irq(&ring->execlist_lock);
> +	list_add_tail(&rq->execlist_link, &ring->execlist_queue);
> +	spin_unlock_irq(&ring->execlist_lock);
> +
>   	return q_ret;
>   }
>
> +void intel_guc_retire_requests(struct intel_engine_cs *ring)
> +{
> +	struct drm_i915_private *dev_priv = ring->dev->dev_private;
> +
> +	if (!dev_priv->guc.execbuf_client)
> +		return;
> +
> +	spin_lock_irq(&ring->execlist_lock);
> +
> +	while (!list_empty(&ring->execlist_queue)) {
> +		struct drm_i915_gem_request *request;
> +
> +		request = list_first_entry(&ring->execlist_queue,
> +				struct drm_i915_gem_request,
> +				execlist_link);
> +
> +		if (!i915_gem_request_completed(request, true))
> +			break;
> +
> +		list_del(&request->execlist_link);
> +		list_add_tail(&request->execlist_link,
> +			&ring->execlist_retired_req_list);
> +
> +	}
> +
> +	spin_unlock(&ring->execlist_lock);
> +}
> +
>   /*
>    * Everything below here is concerned with setup & teardown, and is
>    * therefore not part of the somewhat time-critical batch-submission
> diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
> index 8c5f82f..4c647b9 100644
> --- a/drivers/gpu/drm/i915/intel_guc.h
> +++ b/drivers/gpu/drm/i915/intel_guc.h
> @@ -129,4 +129,6 @@ int i915_guc_submit(struct i915_guc_client *client,
>   void i915_guc_submission_disable(struct drm_device *dev);
>   void i915_guc_submission_fini(struct drm_device *dev);
>
> +void intel_guc_retire_requests(struct intel_engine_cs *ring);
> +
>   #endif
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 98389bd..05620f3 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -566,11 +566,6 @@ static int execlists_context_queue(struct drm_i915_gem_request *request)
>   	struct drm_i915_gem_request *cursor;
>   	int num_elements = 0;
>
> -	if (request->ctx != ring->default_context)
> -		intel_lr_context_pin(request);
> -
> -	i915_gem_request_reference(request);
> -
>   	spin_lock_irq(&ring->execlist_lock);
>
>   	list_for_each_entry(cursor, &ring->execlist_queue, execlist_link)
> @@ -732,6 +727,11 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
>   	if (intel_ring_stopped(ring))
>   		return;
>
> +	if (request->ctx != ring->default_context)
> +		intel_lr_context_pin(request);
> +
> +	i915_gem_request_reference(request);
> +
>   	if (dev_priv->guc.execbuf_client)
>   		i915_guc_submit(dev_priv->guc.execbuf_client, request);
>   	else

Not entirely happy about this. If an extra ref is needed for both 
execlist and GuC mode (of which I'm not convinced) what about legacy 
ringbuffer mode? I thought execlists needed the extra ref only because 
it added an extra queue to hold work not yet submitted to hardware.
That queue isn't needed in ringbuffer OR GuC mode. OTOH if an extra ref 
is needed in ALL modes it should be in common core code, not replicated 
into all submission paths :)

You've got intel_logical_ring_advance_and_submit() taking the extra 
reference, but each of the execlist and GuC paths adding the request to 
the "execlist_queue" :( And then two separate but very similar functions 
reversing these two operations :(

Surely the VMA should not be freed while there are any outstanding 
(not-yet-retired) requests referring to it? Perhaps 
i915_gem_context_clean() is called in the wrong place (too early?). 
Shouldn't we have waited for all requests to complete and be retired 
BEFORE freeing the context they're using?

.Dave.
Chris Wilson Oct. 24, 2015, 8:52 a.m. UTC | #2
On Fri, Oct 23, 2015 at 10:40:11PM +0100, Dave Gordon wrote:
> >@@ -732,6 +727,11 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
> >  	if (intel_ring_stopped(ring))
> >  		return;
> >
> >+	if (request->ctx != ring->default_context)
> >+		intel_lr_context_pin(request);
> >+
> >+	i915_gem_request_reference(request);
> >+
> >  	if (dev_priv->guc.execbuf_client)
> >  		i915_guc_submit(dev_priv->guc.execbuf_client, request);
> >  	else
> 
> Not entirely happy about this. If an extra ref is needed for both
> execlist and GuC mode (of which I'm not convinced) what about legacy
> ringbuffer mode? I thought execlists needed the extra ref only
> because it added an extra queue to hold work not yet submitted to
> hardware.
> That queue isn't needed in ringbuffer OR GuC mode. OTOH if an extra
> ref is needed in ALL modes it should be in common core code, not
> replicated into all submission paths :)

Indeed that extra ref here is bogus.
 
> You've got intel_logical_ring_advance_and_submit() taking the extra
> reference, but each of the execlist and GuC paths adding the request
> to the "execlist_queue" :( And then two separate but very similar
> functions reversing these two operations :(
> 
> Surely the VMA should not be freed while there are any outstanding
> (not-yet-retired) requests referring to it? Perhaps
> i915_gem_context_clean() is called in the wrong place (too early?).
> Shouldn't we have waited for all requests to complete and be retired
> BEFORE freeing the context they're using?

The issue is that the context_clean() is being called whilst the
bookkeeping is in an inconsistent state. My preferred solution here is
to revert that bogus patch as it doesn't fix the issue it was purported
to.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7d6b0c8..6d8a0f1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2879,6 +2879,7 @@  i915_gem_retire_requests(struct drm_device *dev)
 			idle &= list_empty(&ring->execlist_queue);
 			spin_unlock_irqrestore(&ring->execlist_lock, flags);
 
+			intel_guc_retire_requests(ring);
 			intel_execlists_retire_requests(ring);
 		}
 	}
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 737b4f5..a35cfee 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -597,7 +597,8 @@  int i915_guc_submit(struct i915_guc_client *client,
 		    struct drm_i915_gem_request *rq)
 {
 	struct intel_guc *guc = client->guc;
-	enum intel_ring_id ring_id = rq->ring->id;
+	struct intel_engine_cs *ring = rq->ring;
+	enum intel_ring_id ring_id = ring->id;
 	unsigned long flags;
 	int q_ret, b_ret;
 
@@ -628,9 +629,41 @@  int i915_guc_submit(struct i915_guc_client *client,
 	guc->last_seqno[ring_id] = rq->seqno;
 	spin_unlock(&guc->host2guc_lock);
 
+	spin_lock_irq(&ring->execlist_lock);
+	list_add_tail(&rq->execlist_link, &ring->execlist_queue);
+	spin_unlock_irq(&ring->execlist_lock);
+
 	return q_ret;
 }
 
+void intel_guc_retire_requests(struct intel_engine_cs *ring)
+{
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+
+	if (!dev_priv->guc.execbuf_client)
+		return;
+
+	spin_lock_irq(&ring->execlist_lock);
+
+	while (!list_empty(&ring->execlist_queue)) {
+		struct drm_i915_gem_request *request;
+
+		request = list_first_entry(&ring->execlist_queue,
+				struct drm_i915_gem_request,
+				execlist_link);
+
+		if (!i915_gem_request_completed(request, true))
+			break;
+
+		list_del(&request->execlist_link);
+		list_add_tail(&request->execlist_link,
+			&ring->execlist_retired_req_list);
+
+	}
+
+	spin_unlock(&ring->execlist_lock);
+}
+
 /*
  * Everything below here is concerned with setup & teardown, and is
  * therefore not part of the somewhat time-critical batch-submission
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 8c5f82f..4c647b9 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -129,4 +129,6 @@  int i915_guc_submit(struct i915_guc_client *client,
 void i915_guc_submission_disable(struct drm_device *dev);
 void i915_guc_submission_fini(struct drm_device *dev);
 
+void intel_guc_retire_requests(struct intel_engine_cs *ring);
+
 #endif
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 98389bd..05620f3 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -566,11 +566,6 @@  static int execlists_context_queue(struct drm_i915_gem_request *request)
 	struct drm_i915_gem_request *cursor;
 	int num_elements = 0;
 
-	if (request->ctx != ring->default_context)
-		intel_lr_context_pin(request);
-
-	i915_gem_request_reference(request);
-
 	spin_lock_irq(&ring->execlist_lock);
 
 	list_for_each_entry(cursor, &ring->execlist_queue, execlist_link)
@@ -732,6 +727,11 @@  intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
 	if (intel_ring_stopped(ring))
 		return;
 
+	if (request->ctx != ring->default_context)
+		intel_lr_context_pin(request);
+
+	i915_gem_request_reference(request);
+
 	if (dev_priv->guc.execbuf_client)
 		i915_guc_submit(dev_priv->guc.execbuf_client, request);
 	else