diff mbox

drm/i915: Do not leak objects after capturing error state

Message ID 1426785077-7889-1-git-send-email-michel.thierry@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Michel Thierry March 19, 2015, 5:11 p.m. UTC
While running kmemleak chasing a different memleak, I saw that the
capture_error_state function was leaking some objects, for example:

unreferenced object 0xffff8800a9b72148 (size 8192):
  comm "kworker/u16:0", pid 1499, jiffies 4295201243 (age 990.096s)
  hex dump (first 32 bytes):
    00 00 04 00 00 00 00 00 5d f4 ff ff 00 00 00 00  ........].......
    00 30 b0 01 00 00 00 00 37 00 00 00 00 00 00 00  .0......7.......
  backtrace:
    [<ffffffff811e5ae4>] create_object+0x104/0x2c0
    [<ffffffff8178f50a>] kmemleak_alloc+0x7a/0xc0
    [<ffffffff811cde4b>] __kmalloc+0xeb/0x220
    [<ffffffffa038f1d9>] kcalloc.constprop.12+0x2d/0x2f [i915]
    [<ffffffffa0316064>] i915_capture_error_state+0x3f4/0x1660 [i915]
    [<ffffffffa03207df>] i915_handle_error+0x7f/0x660 [i915]
    [<ffffffffa03210f7>] i915_hangcheck_elapsed+0x2e7/0x470 [i915]
    [<ffffffff8108d574>] process_one_work+0x144/0x490
    [<ffffffff8108dfbd>] worker_thread+0x11d/0x530
    [<ffffffff81094079>] kthread+0xc9/0xe0
    [<ffffffff817a2398>] ret_from_fork+0x58/0x90
    [<ffffffffffffffff>] 0xffffffffffffffff

The following objects are allocated in i915_gem_capture_buffers, but not
released in i915_error_state_free:
  - error->active_bo_count
  - error->pinned_bo
  - error->pinned_bo_count
  - error->active_bo[vm_count] (allocated in i915_gem_capture_vm).

Signed-off-by: Michel Thierry <michel.thierry@intel.com>
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

Comments

Chris Wilson March 19, 2015, 5:18 p.m. UTC | #1
On Thu, Mar 19, 2015 at 05:11:17PM +0000, Michel Thierry wrote:
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index bbf25d0..18f7a2a 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -545,7 +545,7 @@ static void i915_error_state_free(struct kref *error_ref)
>  {
>  	struct drm_i915_error_state *error = container_of(error_ref,
>  							  typeof(*error), ref);
> -	int i;
> +	int i, j;

No need for a new iterator.
> 
>  	for (i = 0; i < ARRAY_SIZE(error->ring); i++) {
>  		i915_error_object_free(error->ring[i].batchbuffer);
> @@ -556,7 +556,14 @@ static void i915_error_state_free(struct kref *error_ref)
>  	}
>  
>  	i915_error_object_free(error->semaphore_obj);
> +
> +	for (i = 0; i < error->vm_count; i++)
> +		kfree(error->active_bo[i]);
		kfree(error->pinned_bo[i]);

Pinned_bo is also an overlooked array of pointers.
-Chris
Michel Thierry March 19, 2015, 5:49 p.m. UTC | #2
On 3/19/2015 5:18 PM, Chris Wilson wrote:
> On Thu, Mar 19, 2015 at 05:11:17PM +0000, Michel Thierry wrote:
>> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
>> index bbf25d0..18f7a2a 100644
>> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
>> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
>> @@ -545,7 +545,7 @@ static void i915_error_state_free(struct kref *error_ref)
>>   {
>>   	struct drm_i915_error_state *error = container_of(error_ref,
>>   							  typeof(*error), ref);
>> -	int i;
>> +	int i, j;
> No need for a new iterator.
Ok, I'll reuse 'i'.
>>   	for (i = 0; i < ARRAY_SIZE(error->ring); i++) {
>>   		i915_error_object_free(error->ring[i].batchbuffer);
>> @@ -556,7 +556,14 @@ static void i915_error_state_free(struct kref *error_ref)
>>   	}
>>   
>>   	i915_error_object_free(error->semaphore_obj);
>> +
>> +	for (i = 0; i < error->vm_count; i++)
>> +		kfree(error->active_bo[i]);
> 		kfree(error->pinned_bo[i]);
>
> Pinned_bo is also an overlooked array of pointers.
But pinned_bo elements were not explicitly allocated with kcalloc.
I'd get warnings that they are already freed.

Thanks
> -Chris
>
Chris Wilson March 19, 2015, 9:02 p.m. UTC | #3
On Thu, Mar 19, 2015 at 05:49:44PM +0000, Michel Thierry wrote:
> On 3/19/2015 5:18 PM, Chris Wilson wrote:
> >On Thu, Mar 19, 2015 at 05:11:17PM +0000, Michel Thierry wrote:
> >>diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> >>index bbf25d0..18f7a2a 100644
> >>--- a/drivers/gpu/drm/i915/i915_gpu_error.c
> >>+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> >>@@ -545,7 +545,7 @@ static void i915_error_state_free(struct kref *error_ref)
> >>  {
> >>  	struct drm_i915_error_state *error = container_of(error_ref,
> >>  							  typeof(*error), ref);
> >>-	int i;
> >>+	int i, j;
> >No need for a new iterator.
> Ok, I'll reuse 'i'.
> >>  	for (i = 0; i < ARRAY_SIZE(error->ring); i++) {
> >>  		i915_error_object_free(error->ring[i].batchbuffer);
> >>@@ -556,7 +556,14 @@ static void i915_error_state_free(struct kref *error_ref)
> >>  	}
> >>  	i915_error_object_free(error->semaphore_obj);
> >>+
> >>+	for (i = 0; i < error->vm_count; i++)
> >>+		kfree(error->active_bo[i]);
> >		kfree(error->pinned_bo[i]);
> >
> >Pinned_bo is also an overlooked array of pointers.
> But pinned_bo elements were not explicitly allocated with kcalloc.
> I'd get warnings that they are already freed.

I was wrong. I thought we allocated a single array for both, then
managed to convince myself we had two allocs (one for the pinned_bo and
one for the active_bo).

FWIW, as penance, the regression is from

commit 95f5301dd880da2dea2c9a9c29750064536d426a
Author: Ben Widawsky <ben@bwidawsk.net>
Date:   Wed Jul 31 17:00:15 2013 -0700

    drm/i915: Update error capture for VMs
-Chris
Shuang He March 20, 2015, 5:41 a.m. UTC | #4
Tested-By: PRC QA PRTS (Patch Regression Test System Contact: shuang.he@intel.com)
Task id: 6011
-------------------------------------Summary-------------------------------------
Platform          Delta          drm-intel-nightly          Series Applied
PNV                 -2              274/274              272/274
ILK                                  303/303              303/303
SNB                                  303/303              303/303
IVB                 -1              342/342              341/342
BYT                                  287/287              287/287
HSW                 -1              362/362              361/362
BDW                                  308/308              308/308
-------------------------------------Detailed-------------------------------------
Platform  Test                                drm-intel-nightly          Series Applied
*PNV  igt_gem_userptr_blits_minor-sync-interruptible      PASS(1)      DMESG_WARN(1)PASS(1)
*PNV  igt_gen3_render_linear_blits      PASS(1)      CRASH(1)PASS(1)
*IVB  igt_gem_pwrite_pread_snooped-copy-performance      PASS(1)      DMESG_WARN(1)PASS(1)
*HSW  igt_gem_pwrite_pread_snooped-pwrite-blt-cpu_mmap-performance      PASS(1)      DMESG_WARN(1)PASS(1)
Note: You need to pay more attention to line start with '*'
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index bbf25d0..18f7a2a 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -545,7 +545,7 @@  static void i915_error_state_free(struct kref *error_ref)
 {
 	struct drm_i915_error_state *error = container_of(error_ref,
 							  typeof(*error), ref);
-	int i;
+	int i, j;
 
 	for (i = 0; i < ARRAY_SIZE(error->ring); i++) {
 		i915_error_object_free(error->ring[i].batchbuffer);
@@ -556,7 +556,14 @@  static void i915_error_state_free(struct kref *error_ref)
 	}
 
 	i915_error_object_free(error->semaphore_obj);
+
+	for (j = 0; j < error->vm_count; j++)
+		kfree(error->active_bo[j]);
+
 	kfree(error->active_bo);
+	kfree(error->active_bo_count);
+	kfree(error->pinned_bo);
+	kfree(error->pinned_bo_count);
 	kfree(error->overlay);
 	kfree(error->display);
 	kfree(error);