diff mbox series

drm/i915: Fix vGPU kernel context kmemleak

Message ID 20191217071354.20006-1-zhenyuw@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915: Fix vGPU kernel context kmemleak | expand

Commit Message

Zhenyu Wang Dec. 17, 2019, 7:13 a.m. UTC
Current GVT allocates kernel context as vGPU submission context.
For vGPU destroy, the kernel context needs to be close then released,
otherwise context's vm ppgtt resource would cause memleak issue like
below. This trys to add new helper to destroy kernel context for that.

unreferenced object 0xffff8ff6a3b46400 (size 1024):
  comm "type.sh", pid 2078, jiffies 4294909812 (age 89732.440s)
  hex dump (first 32 bytes):
    01 00 00 00 00 00 00 00 e0 ff ff ff 0f 00 00 00  ................
    10 64 b4 a3 f6 8f ff ff 10 64 b4 a3 f6 8f ff ff  .d.......d......
  backtrace:
    [<000000009d45297c>] kmem_cache_alloc_trace+0x131/0x240
    [<00000000a6e3cc58>] gen8_ppgtt_create+0x2f/0x580 [i915]
    [<0000000054e72785>] i915_ppgtt_create+0x1a/0x80 [i915]
    [<000000003bc414cd>] i915_gem_create_context+0x296/0x390 [i915]
    [<0000000017a40875>] i915_gem_context_create_kernel+0x13/0x40 [i915]
    [<00000000124aecff>] intel_vgpu_setup_submission+0x2d/0x370 [i915]
    [<00000000b6be72ee>] intel_gvt_create_vgpu+0x298/0x360 [i915]
    [<00000000527d88ad>] intel_vgpu_create+0x5d/0x130 [kvmgt]
    [<000000000d912742>] mdev_device_create+0x1cd/0x290 [mdev]
    [<000000001231e5d2>] create_store+0x92/0xd0 [mdev]
    [<00000000408c3c74>] mdev_type_attr_store+0x1e/0x30 [mdev]
    [<0000000035d8e9cd>] sysfs_kf_write+0x3c/0x50
    [<0000000084ce74a8>] kernfs_fop_write+0x125/0x1a0
    [<0000000019db1653>] __vfs_write+0x1b/0x40
    [<00000000a72bac26>] vfs_write+0xb1/0x1a0
    [<0000000036135673>] ksys_write+0xa7/0xe0

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c | 5 +++++
 drivers/gpu/drm/i915/gem/i915_gem_context.h | 1 +
 drivers/gpu/drm/i915/gvt/gvt.h              | 1 +
 drivers/gpu/drm/i915/gvt/scheduler.c        | 5 +++--
 4 files changed, 10 insertions(+), 2 deletions(-)

Comments

Chris Wilson Dec. 17, 2019, 8:26 a.m. UTC | #1
Quoting Zhenyu Wang (2019-12-17 07:13:54)
> Current GVT allocates kernel context as vGPU submission context.
> For vGPU destroy, the kernel context needs to be close then released,
> otherwise context's vm ppgtt resource would cause memleak issue like
> below. This trys to add new helper to destroy kernel context for that.

There's only been patches to remove the last of the kernel context for
that last year, after which this is moot.
-Chris
Zhenyu Wang Dec. 18, 2019, 3:22 a.m. UTC | #2
On 2019.12.17 08:26:16 +0000, Chris Wilson wrote:
> Quoting Zhenyu Wang (2019-12-17 07:13:54)
> > Current GVT allocates kernel context as vGPU submission context.
> > For vGPU destroy, the kernel context needs to be close then released,
> > otherwise context's vm ppgtt resource would cause memleak issue like
> > below. This trys to add new helper to destroy kernel context for that.
> 
> There's only been patches to remove the last of the kernel context for
> that last year, after which this is moot.

Could you point me to the patch?

thanks
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 46b4d1d643f8..de1d753731c6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -742,6 +742,11 @@  i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
 	return ctx;
 }
 
+void i915_gem_context_destroy_kernel(struct i915_gem_context *ctx)
+{
+	destroy_kernel_context(&ctx);
+}
+
 static void init_contexts(struct i915_gem_contexts *gc)
 {
 	spin_lock_init(&gc->lock);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index 18e50a769a6e..611754744437 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -180,6 +180,7 @@  int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data,
 
 struct i915_gem_context *
 i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio);
+void i915_gem_context_destroy_kernel(struct i915_gem_context *ctx);
 
 static inline struct i915_gem_context *
 i915_gem_context_get(struct i915_gem_context *ctx)
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index 0081b051d3e0..ece22a199551 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -149,6 +149,7 @@  struct intel_vgpu_submission_ops {
 struct intel_vgpu_submission {
 	struct intel_vgpu_execlist execlist[I915_NUM_ENGINES];
 	struct list_head workload_q_head[I915_NUM_ENGINES];
+	struct i915_gem_context *ctx;
 	struct intel_context *shadow[I915_NUM_ENGINES];
 	struct kmem_cache *workloads;
 	atomic_t running_workload_num;
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 5b2a7d072ec9..4f5d8f48de28 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -1166,6 +1166,7 @@  void intel_vgpu_clean_submission(struct intel_vgpu *vgpu)
 	for_each_engine(engine, vgpu->gvt->dev_priv, id)
 		intel_context_unpin(s->shadow[id]);
 
+	i915_gem_context_destroy_kernel(s->ctx);
 	kmem_cache_destroy(s->workloads);
 }
 
@@ -1276,12 +1277,12 @@  int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
 		ret = -ENOMEM;
 		goto out_shadow_ctx;
 	}
-
+	s->ctx = ctx;
 	atomic_set(&s->running_workload_num, 0);
 	bitmap_zero(s->tlb_handle_pending, I915_NUM_ENGINES);
 
 	i915_vm_put(&ppgtt->vm);
-	i915_gem_context_put(ctx);
+
 	return 0;
 
 out_shadow_ctx: