Message ID | 20190523064933.23604-1-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/i915/gtt: Always acquire struct_mutex for gen6_ppgtt_cleanup | expand |
On 23/05/2019 07:49, Chris Wilson wrote: > We rearranged the vm_destroy_ioctl to avoid taking struct_mutex, little > realising that buried underneath the gen6 ppgtt release path was a > struct_mutex requirement (to remove its GGTT vma). Until that > struct_mutex is vanquished, take a detour in gen6_ppgtt_cleanup to do > the i915_vma_destroy from inside a worker under the struct_mutex. > > <4> [257.740160] WARN_ON(debug_locks && !lock_is_held(&(&vma->vm->i915->drm.struct_mutex)->dep_map)) > <4> [257.740213] WARNING: CPU: 3 PID: 1507 at drivers/gpu/drm/i915/i915_vma.c:841 i915_vma_destroy+0x1ae/0x3a0 [i915] > <4> [257.740214] Modules linked in: snd_hda_codec_hdmi i915 x86_pkg_temp_thermal mei_hdcp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_hda_codec_realtek snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core r8169 realtek snd_pcm mei_me mei prime_numbers lpc_ich > <4> [257.740224] CPU: 3 PID: 1507 Comm: gem_vm_create Tainted: G U 5.2.0-rc1-CI-CI_DRM_6118+ #1 > <4> [257.740225] Hardware name: MSI MS-7924/Z97M-G43(MS-7924), BIOS V1.12 02/15/2016 > <4> [257.740249] RIP: 0010:i915_vma_destroy+0x1ae/0x3a0 [i915] > <4> [257.740250] Code: 00 00 00 48 81 c7 c8 00 00 00 e8 ed 08 f0 e0 85 c0 0f 85 78 fe ff ff 48 c7 c6 e8 ec 30 a0 48 c7 c7 da 55 33 a0 e8 42 8c e9 e0 <0f> 0b 8b 83 40 01 00 00 85 c0 0f 84 63 fe ff ff 48 c7 c1 c1 58 33 > <4> [257.740251] RSP: 0018:ffffc90000aafc68 EFLAGS: 00010282 > <4> [257.740252] RAX: 0000000000000000 RBX: ffff8883f7957840 RCX: 0000000000000003 > <4> [257.740253] RDX: 0000000000000046 RSI: 0000000000000006 RDI: ffffffff8212d1b9 > <4> [257.740254] RBP: ffffc90000aafcc8 R08: 0000000000000000 R09: 0000000000000000 > <4> [257.740255] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8883f4d5c2a8 > <4> [257.740256] R13: ffff8883f4d5d680 R14: ffff8883f4d5c668 R15: ffff8883f4d5c2f0 > <4> [257.740257] FS: 00007f777fa8fe40(0000) GS:ffff88840f780000(0000) knlGS:0000000000000000 > <4> [257.740258] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > <4> [257.740259] CR2: 00007f777f6522b0 CR3: 00000003c612a006 CR4: 00000000001606e0 > <4> [257.740260] Call Trace: > <4> [257.740283] gen6_ppgtt_cleanup+0x25/0x60 [i915] > <4> [257.740306] i915_ppgtt_release+0x102/0x290 [i915] > <4> [257.740330] i915_gem_vm_destroy_ioctl+0x7c/0xa0 [i915] > <4> [257.740376] ? i915_gem_vm_create_ioctl+0x160/0x160 [i915] > <4> [257.740379] drm_ioctl_kernel+0x83/0xf0 > <4> [257.740382] drm_ioctl+0x2f3/0x3b0 > <4> [257.740422] ? i915_gem_vm_create_ioctl+0x160/0x160 [i915] > <4> [257.740426] ? _raw_spin_unlock_irqrestore+0x39/0x60 > <4> [257.740430] do_vfs_ioctl+0xa0/0x6e0 > <4> [257.740433] ? lock_acquire+0xa6/0x1c0 > <4> [257.740436] ? __task_pid_nr_ns+0xb9/0x1f0 > <4> [257.740439] ksys_ioctl+0x35/0x60 > <4> [257.740441] __x64_sys_ioctl+0x11/0x20 > <4> [257.740443] do_syscall_64+0x55/0x1c0 > <4> [257.740445] entry_SYSCALL_64_after_hwframe+0x49/0xbe > > References: e0695db7298e ("drm/i915: Create/destroy VM (ppGTT) for use with contexts") > Fixes: 7f3f317a66ca ("drm/i915: Restore control over ppgtt for context creation ABI") > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > --- > drivers/gpu/drm/i915/i915_gem_gtt.c | 32 +++++++++++++++++++++++++++-- > drivers/gpu/drm/i915/i915_gem_gtt.h | 2 ++ > 2 files changed, 32 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index 9ed41aefb456..5a350251766a 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -1828,11 +1828,33 @@ static void gen6_ppgtt_free_pd(struct gen6_hw_ppgtt *ppgtt) > free_pt(&ppgtt->base.vm, pt); > } > > +struct gen6_ppgtt_cleanup_work { > + struct work_struct base; > + struct i915_vma *vma; > +}; > + > +static void gen6_ppgtt_cleanup_work(struct work_struct *wrk) > +{ > + struct gen6_ppgtt_cleanup_work *work = > + container_of(wrk, typeof(*work), base); > + struct drm_i915_private *i915 = work->vma->vm->i915; Does the sequence need an extra reference on ppgtt for dereferencing the vm here? Alternatively storing i915 in the work struct could work around that. Regards, Tvrtko > + > + mutex_lock(&i915->drm.struct_mutex); > + i915_vma_destroy(work->vma); > + mutex_unlock(&i915->drm.struct_mutex); > + > + kfree(work); > +} > + > static void gen6_ppgtt_cleanup(struct i915_address_space *vm) > { > struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); > + struct gen6_ppgtt_cleanup_work *work = ppgtt->work; > > - i915_vma_destroy(ppgtt->vma); > + /* FIXME remove the struct_mutex to bring the locking under control */ > + INIT_WORK(&work->base, gen6_ppgtt_cleanup_work); > + work->vma = ppgtt->vma; > + schedule_work(&work->base); > > gen6_ppgtt_free_pd(ppgtt); > gen6_ppgtt_free_scratch(vm); > @@ -2011,9 +2033,13 @@ static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915) > > ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode; > > + ppgtt->work = kmalloc(sizeof(*ppgtt->work), GFP_KERNEL); > + if (!ppgtt->work) > + goto err_free; > + > err = gen6_ppgtt_init_scratch(ppgtt); > if (err) > - goto err_free; > + goto err_work; > > ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE); > if (IS_ERR(ppgtt->vma)) { > @@ -2025,6 +2051,8 @@ static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915) > > err_scratch: > gen6_ppgtt_free_scratch(&ppgtt->base.vm); > +err_work: > + kfree(ppgtt->work); > err_free: > kfree(ppgtt); > return ERR_PTR(err); > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h > index 98fc71053f7c..38496039456b 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.h > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h > @@ -424,6 +424,8 @@ struct gen6_hw_ppgtt { > > unsigned int pin_count; > bool scan_for_unused_pt; > + > + struct gen6_ppgtt_cleanup_work *work; > }; > > #define __to_gen6_ppgtt(base) container_of(base, struct gen6_hw_ppgtt, base) >
On 23/05/2019 10:21, Tvrtko Ursulin wrote: > > On 23/05/2019 07:49, Chris Wilson wrote: >> We rearranged the vm_destroy_ioctl to avoid taking struct_mutex, little >> realising that buried underneath the gen6 ppgtt release path was a >> struct_mutex requirement (to remove its GGTT vma). Until that >> struct_mutex is vanquished, take a detour in gen6_ppgtt_cleanup to do >> the i915_vma_destroy from inside a worker under the struct_mutex. >> >> <4> [257.740160] WARN_ON(debug_locks && >> !lock_is_held(&(&vma->vm->i915->drm.struct_mutex)->dep_map)) >> <4> [257.740213] WARNING: CPU: 3 PID: 1507 at >> drivers/gpu/drm/i915/i915_vma.c:841 i915_vma_destroy+0x1ae/0x3a0 [i915] >> <4> [257.740214] Modules linked in: snd_hda_codec_hdmi i915 >> x86_pkg_temp_thermal mei_hdcp coretemp crct10dif_pclmul crc32_pclmul >> ghash_clmulni_intel snd_hda_codec_realtek snd_hda_codec_generic >> snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core r8169 realtek >> snd_pcm mei_me mei prime_numbers lpc_ich >> <4> [257.740224] CPU: 3 PID: 1507 Comm: gem_vm_create Tainted: G >> U 5.2.0-rc1-CI-CI_DRM_6118+ #1 >> <4> [257.740225] Hardware name: MSI MS-7924/Z97M-G43(MS-7924), BIOS >> V1.12 02/15/2016 >> <4> [257.740249] RIP: 0010:i915_vma_destroy+0x1ae/0x3a0 [i915] >> <4> [257.740250] Code: 00 00 00 48 81 c7 c8 00 00 00 e8 ed 08 f0 e0 85 >> c0 0f 85 78 fe ff ff 48 c7 c6 e8 ec 30 a0 48 c7 c7 da 55 33 a0 e8 42 >> 8c e9 e0 <0f> 0b 8b 83 40 01 00 00 85 c0 0f 84 63 fe ff ff 48 c7 c1 c1 >> 58 33 >> <4> [257.740251] RSP: 0018:ffffc90000aafc68 EFLAGS: 00010282 >> <4> [257.740252] RAX: 0000000000000000 RBX: ffff8883f7957840 RCX: >> 0000000000000003 >> <4> [257.740253] RDX: 0000000000000046 RSI: 0000000000000006 RDI: >> ffffffff8212d1b9 >> <4> [257.740254] RBP: ffffc90000aafcc8 R08: 0000000000000000 R09: >> 0000000000000000 >> <4> [257.740255] R10: 0000000000000000 R11: 0000000000000000 R12: >> ffff8883f4d5c2a8 >> <4> [257.740256] R13: ffff8883f4d5d680 R14: ffff8883f4d5c668 R15: >> ffff8883f4d5c2f0 >> <4> [257.740257] FS: 00007f777fa8fe40(0000) GS:ffff88840f780000(0000) >> knlGS:0000000000000000 >> <4> [257.740258] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 >> <4> [257.740259] CR2: 00007f777f6522b0 CR3: 00000003c612a006 CR4: >> 00000000001606e0 >> <4> [257.740260] Call Trace: >> <4> [257.740283] gen6_ppgtt_cleanup+0x25/0x60 [i915] >> <4> [257.740306] i915_ppgtt_release+0x102/0x290 [i915] >> <4> [257.740330] i915_gem_vm_destroy_ioctl+0x7c/0xa0 [i915] >> <4> [257.740376] ? i915_gem_vm_create_ioctl+0x160/0x160 [i915] >> <4> [257.740379] drm_ioctl_kernel+0x83/0xf0 >> <4> [257.740382] drm_ioctl+0x2f3/0x3b0 >> <4> [257.740422] ? i915_gem_vm_create_ioctl+0x160/0x160 [i915] >> <4> [257.740426] ? _raw_spin_unlock_irqrestore+0x39/0x60 >> <4> [257.740430] do_vfs_ioctl+0xa0/0x6e0 >> <4> [257.740433] ? lock_acquire+0xa6/0x1c0 >> <4> [257.740436] ? __task_pid_nr_ns+0xb9/0x1f0 >> <4> [257.740439] ksys_ioctl+0x35/0x60 >> <4> [257.740441] __x64_sys_ioctl+0x11/0x20 >> <4> [257.740443] do_syscall_64+0x55/0x1c0 >> <4> [257.740445] entry_SYSCALL_64_after_hwframe+0x49/0xbe >> >> References: e0695db7298e ("drm/i915: Create/destroy VM (ppGTT) for use >> with contexts") >> Fixes: 7f3f317a66ca ("drm/i915: Restore control over ppgtt for context >> creation ABI") >> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> >> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >> --- >> drivers/gpu/drm/i915/i915_gem_gtt.c | 32 +++++++++++++++++++++++++++-- >> drivers/gpu/drm/i915/i915_gem_gtt.h | 2 ++ >> 2 files changed, 32 insertions(+), 2 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c >> b/drivers/gpu/drm/i915/i915_gem_gtt.c >> index 9ed41aefb456..5a350251766a 100644 >> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c >> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c >> @@ -1828,11 +1828,33 @@ static void gen6_ppgtt_free_pd(struct >> gen6_hw_ppgtt *ppgtt) >> free_pt(&ppgtt->base.vm, pt); >> } >> +struct gen6_ppgtt_cleanup_work { >> + struct work_struct base; >> + struct i915_vma *vma; >> +}; >> + >> +static void gen6_ppgtt_cleanup_work(struct work_struct *wrk) >> +{ >> + struct gen6_ppgtt_cleanup_work *work = >> + container_of(wrk, typeof(*work), base); >> + struct drm_i915_private *i915 = work->vma->vm->i915; > > Does the sequence need an extra reference on ppgtt for dereferencing the > vm here? Alternatively storing i915 in the work struct could work around > that. Nope - vma has a pointer to vm as well which it will dereference. So if reference is needed it's needed - it looks like it is to me, since only contexts take them, which vma's still rely on, right? Regards, Tvrtko
Quoting Tvrtko Ursulin (2019-05-23 11:01:09) > > On 23/05/2019 10:21, Tvrtko Ursulin wrote: > > > > On 23/05/2019 07:49, Chris Wilson wrote: > >> We rearranged the vm_destroy_ioctl to avoid taking struct_mutex, little > >> realising that buried underneath the gen6 ppgtt release path was a > >> struct_mutex requirement (to remove its GGTT vma). Until that > >> struct_mutex is vanquished, take a detour in gen6_ppgtt_cleanup to do > >> the i915_vma_destroy from inside a worker under the struct_mutex. > >> > >> <4> [257.740160] WARN_ON(debug_locks && > >> !lock_is_held(&(&vma->vm->i915->drm.struct_mutex)->dep_map)) > >> <4> [257.740213] WARNING: CPU: 3 PID: 1507 at > >> drivers/gpu/drm/i915/i915_vma.c:841 i915_vma_destroy+0x1ae/0x3a0 [i915] > >> <4> [257.740214] Modules linked in: snd_hda_codec_hdmi i915 > >> x86_pkg_temp_thermal mei_hdcp coretemp crct10dif_pclmul crc32_pclmul > >> ghash_clmulni_intel snd_hda_codec_realtek snd_hda_codec_generic > >> snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core r8169 realtek > >> snd_pcm mei_me mei prime_numbers lpc_ich > >> <4> [257.740224] CPU: 3 PID: 1507 Comm: gem_vm_create Tainted: G > >> U 5.2.0-rc1-CI-CI_DRM_6118+ #1 > >> <4> [257.740225] Hardware name: MSI MS-7924/Z97M-G43(MS-7924), BIOS > >> V1.12 02/15/2016 > >> <4> [257.740249] RIP: 0010:i915_vma_destroy+0x1ae/0x3a0 [i915] > >> <4> [257.740250] Code: 00 00 00 48 81 c7 c8 00 00 00 e8 ed 08 f0 e0 85 > >> c0 0f 85 78 fe ff ff 48 c7 c6 e8 ec 30 a0 48 c7 c7 da 55 33 a0 e8 42 > >> 8c e9 e0 <0f> 0b 8b 83 40 01 00 00 85 c0 0f 84 63 fe ff ff 48 c7 c1 c1 > >> 58 33 > >> <4> [257.740251] RSP: 0018:ffffc90000aafc68 EFLAGS: 00010282 > >> <4> [257.740252] RAX: 0000000000000000 RBX: ffff8883f7957840 RCX: > >> 0000000000000003 > >> <4> [257.740253] RDX: 0000000000000046 RSI: 0000000000000006 RDI: > >> ffffffff8212d1b9 > >> <4> [257.740254] RBP: ffffc90000aafcc8 R08: 0000000000000000 R09: > >> 0000000000000000 > >> <4> [257.740255] R10: 0000000000000000 R11: 0000000000000000 R12: > >> ffff8883f4d5c2a8 > >> <4> [257.740256] R13: ffff8883f4d5d680 R14: ffff8883f4d5c668 R15: > >> ffff8883f4d5c2f0 > >> <4> [257.740257] FS: 00007f777fa8fe40(0000) GS:ffff88840f780000(0000) > >> knlGS:0000000000000000 > >> <4> [257.740258] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > >> <4> [257.740259] CR2: 00007f777f6522b0 CR3: 00000003c612a006 CR4: > >> 00000000001606e0 > >> <4> [257.740260] Call Trace: > >> <4> [257.740283] gen6_ppgtt_cleanup+0x25/0x60 [i915] > >> <4> [257.740306] i915_ppgtt_release+0x102/0x290 [i915] > >> <4> [257.740330] i915_gem_vm_destroy_ioctl+0x7c/0xa0 [i915] > >> <4> [257.740376] ? i915_gem_vm_create_ioctl+0x160/0x160 [i915] > >> <4> [257.740379] drm_ioctl_kernel+0x83/0xf0 > >> <4> [257.740382] drm_ioctl+0x2f3/0x3b0 > >> <4> [257.740422] ? i915_gem_vm_create_ioctl+0x160/0x160 [i915] > >> <4> [257.740426] ? _raw_spin_unlock_irqrestore+0x39/0x60 > >> <4> [257.740430] do_vfs_ioctl+0xa0/0x6e0 > >> <4> [257.740433] ? lock_acquire+0xa6/0x1c0 > >> <4> [257.740436] ? __task_pid_nr_ns+0xb9/0x1f0 > >> <4> [257.740439] ksys_ioctl+0x35/0x60 > >> <4> [257.740441] __x64_sys_ioctl+0x11/0x20 > >> <4> [257.740443] do_syscall_64+0x55/0x1c0 > >> <4> [257.740445] entry_SYSCALL_64_after_hwframe+0x49/0xbe > >> > >> References: e0695db7298e ("drm/i915: Create/destroy VM (ppGTT) for use > >> with contexts") > >> Fixes: 7f3f317a66ca ("drm/i915: Restore control over ppgtt for context > >> creation ABI") > >> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > >> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > >> --- > >> drivers/gpu/drm/i915/i915_gem_gtt.c | 32 +++++++++++++++++++++++++++-- > >> drivers/gpu/drm/i915/i915_gem_gtt.h | 2 ++ > >> 2 files changed, 32 insertions(+), 2 deletions(-) > >> > >> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c > >> b/drivers/gpu/drm/i915/i915_gem_gtt.c > >> index 9ed41aefb456..5a350251766a 100644 > >> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > >> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > >> @@ -1828,11 +1828,33 @@ static void gen6_ppgtt_free_pd(struct > >> gen6_hw_ppgtt *ppgtt) > >> free_pt(&ppgtt->base.vm, pt); > >> } > >> +struct gen6_ppgtt_cleanup_work { > >> + struct work_struct base; > >> + struct i915_vma *vma; > >> +}; > >> + > >> +static void gen6_ppgtt_cleanup_work(struct work_struct *wrk) > >> +{ > >> + struct gen6_ppgtt_cleanup_work *work = > >> + container_of(wrk, typeof(*work), base); > >> + struct drm_i915_private *i915 = work->vma->vm->i915; > > > > Does the sequence need an extra reference on ppgtt for dereferencing the > > vm here? Alternatively storing i915 in the work struct could work around > > that. > > Nope - vma has a pointer to vm as well which it will dereference. So if > reference is needed it's needed - it looks like it is to me, since only > contexts take them, which vma's still rely on, right? Nah, vm is decidedly dead at this point, we just need to stuff the i915 pointer into the cleanup_work. -Chris
Quoting Chris Wilson (2019-05-23 11:07:50) > Quoting Tvrtko Ursulin (2019-05-23 11:01:09) > > > > On 23/05/2019 10:21, Tvrtko Ursulin wrote: > > > > > > On 23/05/2019 07:49, Chris Wilson wrote: > > >> We rearranged the vm_destroy_ioctl to avoid taking struct_mutex, little > > >> realising that buried underneath the gen6 ppgtt release path was a > > >> struct_mutex requirement (to remove its GGTT vma). Until that > > >> struct_mutex is vanquished, take a detour in gen6_ppgtt_cleanup to do > > >> the i915_vma_destroy from inside a worker under the struct_mutex. > > >> > > >> <4> [257.740160] WARN_ON(debug_locks && > > >> !lock_is_held(&(&vma->vm->i915->drm.struct_mutex)->dep_map)) > > >> <4> [257.740213] WARNING: CPU: 3 PID: 1507 at > > >> drivers/gpu/drm/i915/i915_vma.c:841 i915_vma_destroy+0x1ae/0x3a0 [i915] > > >> <4> [257.740214] Modules linked in: snd_hda_codec_hdmi i915 > > >> x86_pkg_temp_thermal mei_hdcp coretemp crct10dif_pclmul crc32_pclmul > > >> ghash_clmulni_intel snd_hda_codec_realtek snd_hda_codec_generic > > >> snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core r8169 realtek > > >> snd_pcm mei_me mei prime_numbers lpc_ich > > >> <4> [257.740224] CPU: 3 PID: 1507 Comm: gem_vm_create Tainted: G > > >> U 5.2.0-rc1-CI-CI_DRM_6118+ #1 > > >> <4> [257.740225] Hardware name: MSI MS-7924/Z97M-G43(MS-7924), BIOS > > >> V1.12 02/15/2016 > > >> <4> [257.740249] RIP: 0010:i915_vma_destroy+0x1ae/0x3a0 [i915] > > >> <4> [257.740250] Code: 00 00 00 48 81 c7 c8 00 00 00 e8 ed 08 f0 e0 85 > > >> c0 0f 85 78 fe ff ff 48 c7 c6 e8 ec 30 a0 48 c7 c7 da 55 33 a0 e8 42 > > >> 8c e9 e0 <0f> 0b 8b 83 40 01 00 00 85 c0 0f 84 63 fe ff ff 48 c7 c1 c1 > > >> 58 33 > > >> <4> [257.740251] RSP: 0018:ffffc90000aafc68 EFLAGS: 00010282 > > >> <4> [257.740252] RAX: 0000000000000000 RBX: ffff8883f7957840 RCX: > > >> 0000000000000003 > > >> <4> [257.740253] RDX: 0000000000000046 RSI: 0000000000000006 RDI: > > >> ffffffff8212d1b9 > > >> <4> [257.740254] RBP: ffffc90000aafcc8 R08: 0000000000000000 R09: > > >> 0000000000000000 > > >> <4> [257.740255] R10: 0000000000000000 R11: 0000000000000000 R12: > > >> ffff8883f4d5c2a8 > > >> <4> [257.740256] R13: ffff8883f4d5d680 R14: ffff8883f4d5c668 R15: > > >> ffff8883f4d5c2f0 > > >> <4> [257.740257] FS: 00007f777fa8fe40(0000) GS:ffff88840f780000(0000) > > >> knlGS:0000000000000000 > > >> <4> [257.740258] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > > >> <4> [257.740259] CR2: 00007f777f6522b0 CR3: 00000003c612a006 CR4: > > >> 00000000001606e0 > > >> <4> [257.740260] Call Trace: > > >> <4> [257.740283] gen6_ppgtt_cleanup+0x25/0x60 [i915] > > >> <4> [257.740306] i915_ppgtt_release+0x102/0x290 [i915] > > >> <4> [257.740330] i915_gem_vm_destroy_ioctl+0x7c/0xa0 [i915] > > >> <4> [257.740376] ? i915_gem_vm_create_ioctl+0x160/0x160 [i915] > > >> <4> [257.740379] drm_ioctl_kernel+0x83/0xf0 > > >> <4> [257.740382] drm_ioctl+0x2f3/0x3b0 > > >> <4> [257.740422] ? i915_gem_vm_create_ioctl+0x160/0x160 [i915] > > >> <4> [257.740426] ? _raw_spin_unlock_irqrestore+0x39/0x60 > > >> <4> [257.740430] do_vfs_ioctl+0xa0/0x6e0 > > >> <4> [257.740433] ? lock_acquire+0xa6/0x1c0 > > >> <4> [257.740436] ? __task_pid_nr_ns+0xb9/0x1f0 > > >> <4> [257.740439] ksys_ioctl+0x35/0x60 > > >> <4> [257.740441] __x64_sys_ioctl+0x11/0x20 > > >> <4> [257.740443] do_syscall_64+0x55/0x1c0 > > >> <4> [257.740445] entry_SYSCALL_64_after_hwframe+0x49/0xbe > > >> > > >> References: e0695db7298e ("drm/i915: Create/destroy VM (ppGTT) for use > > >> with contexts") > > >> Fixes: 7f3f317a66ca ("drm/i915: Restore control over ppgtt for context > > >> creation ABI") > > >> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > >> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > >> --- > > >> drivers/gpu/drm/i915/i915_gem_gtt.c | 32 +++++++++++++++++++++++++++-- > > >> drivers/gpu/drm/i915/i915_gem_gtt.h | 2 ++ > > >> 2 files changed, 32 insertions(+), 2 deletions(-) > > >> > > >> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c > > >> b/drivers/gpu/drm/i915/i915_gem_gtt.c > > >> index 9ed41aefb456..5a350251766a 100644 > > >> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > > >> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > > >> @@ -1828,11 +1828,33 @@ static void gen6_ppgtt_free_pd(struct > > >> gen6_hw_ppgtt *ppgtt) > > >> free_pt(&ppgtt->base.vm, pt); > > >> } > > >> +struct gen6_ppgtt_cleanup_work { > > >> + struct work_struct base; > > >> + struct i915_vma *vma; > > >> +}; > > >> + > > >> +static void gen6_ppgtt_cleanup_work(struct work_struct *wrk) > > >> +{ > > >> + struct gen6_ppgtt_cleanup_work *work = > > >> + container_of(wrk, typeof(*work), base); > > >> + struct drm_i915_private *i915 = work->vma->vm->i915; > > > > > > Does the sequence need an extra reference on ppgtt for dereferencing the > > > vm here? Alternatively storing i915 in the work struct could work around > > > that. > > > > Nope - vma has a pointer to vm as well which it will dereference. So if > > reference is needed it's needed - it looks like it is to me, since only > > contexts take them, which vma's still rely on, right? > > Nah, vm is decidedly dead at this point, we just need to stuff the i915 > pointer into the cleanup_work. Argh. vma is in ggtt, so vma->vm is alive and kicking -- not the vm we just destroyed. -Chris
On 23/05/2019 11:14, Chris Wilson wrote: > Quoting Chris Wilson (2019-05-23 11:07:50) >> Quoting Tvrtko Ursulin (2019-05-23 11:01:09) >>> >>> On 23/05/2019 10:21, Tvrtko Ursulin wrote: >>>> >>>> On 23/05/2019 07:49, Chris Wilson wrote: >>>>> We rearranged the vm_destroy_ioctl to avoid taking struct_mutex, little >>>>> realising that buried underneath the gen6 ppgtt release path was a >>>>> struct_mutex requirement (to remove its GGTT vma). Until that >>>>> struct_mutex is vanquished, take a detour in gen6_ppgtt_cleanup to do >>>>> the i915_vma_destroy from inside a worker under the struct_mutex. >>>>> >>>>> <4> [257.740160] WARN_ON(debug_locks && >>>>> !lock_is_held(&(&vma->vm->i915->drm.struct_mutex)->dep_map)) >>>>> <4> [257.740213] WARNING: CPU: 3 PID: 1507 at >>>>> drivers/gpu/drm/i915/i915_vma.c:841 i915_vma_destroy+0x1ae/0x3a0 [i915] >>>>> <4> [257.740214] Modules linked in: snd_hda_codec_hdmi i915 >>>>> x86_pkg_temp_thermal mei_hdcp coretemp crct10dif_pclmul crc32_pclmul >>>>> ghash_clmulni_intel snd_hda_codec_realtek snd_hda_codec_generic >>>>> snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core r8169 realtek >>>>> snd_pcm mei_me mei prime_numbers lpc_ich >>>>> <4> [257.740224] CPU: 3 PID: 1507 Comm: gem_vm_create Tainted: G >>>>> U 5.2.0-rc1-CI-CI_DRM_6118+ #1 >>>>> <4> [257.740225] Hardware name: MSI MS-7924/Z97M-G43(MS-7924), BIOS >>>>> V1.12 02/15/2016 >>>>> <4> [257.740249] RIP: 0010:i915_vma_destroy+0x1ae/0x3a0 [i915] >>>>> <4> [257.740250] Code: 00 00 00 48 81 c7 c8 00 00 00 e8 ed 08 f0 e0 85 >>>>> c0 0f 85 78 fe ff ff 48 c7 c6 e8 ec 30 a0 48 c7 c7 da 55 33 a0 e8 42 >>>>> 8c e9 e0 <0f> 0b 8b 83 40 01 00 00 85 c0 0f 84 63 fe ff ff 48 c7 c1 c1 >>>>> 58 33 >>>>> <4> [257.740251] RSP: 0018:ffffc90000aafc68 EFLAGS: 00010282 >>>>> <4> [257.740252] RAX: 0000000000000000 RBX: ffff8883f7957840 RCX: >>>>> 0000000000000003 >>>>> <4> [257.740253] RDX: 0000000000000046 RSI: 0000000000000006 RDI: >>>>> ffffffff8212d1b9 >>>>> <4> [257.740254] RBP: ffffc90000aafcc8 R08: 0000000000000000 R09: >>>>> 0000000000000000 >>>>> <4> [257.740255] R10: 0000000000000000 R11: 0000000000000000 R12: >>>>> ffff8883f4d5c2a8 >>>>> <4> [257.740256] R13: ffff8883f4d5d680 R14: ffff8883f4d5c668 R15: >>>>> ffff8883f4d5c2f0 >>>>> <4> [257.740257] FS: 00007f777fa8fe40(0000) GS:ffff88840f780000(0000) >>>>> knlGS:0000000000000000 >>>>> <4> [257.740258] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 >>>>> <4> [257.740259] CR2: 00007f777f6522b0 CR3: 00000003c612a006 CR4: >>>>> 00000000001606e0 >>>>> <4> [257.740260] Call Trace: >>>>> <4> [257.740283] gen6_ppgtt_cleanup+0x25/0x60 [i915] >>>>> <4> [257.740306] i915_ppgtt_release+0x102/0x290 [i915] >>>>> <4> [257.740330] i915_gem_vm_destroy_ioctl+0x7c/0xa0 [i915] >>>>> <4> [257.740376] ? i915_gem_vm_create_ioctl+0x160/0x160 [i915] >>>>> <4> [257.740379] drm_ioctl_kernel+0x83/0xf0 >>>>> <4> [257.740382] drm_ioctl+0x2f3/0x3b0 >>>>> <4> [257.740422] ? i915_gem_vm_create_ioctl+0x160/0x160 [i915] >>>>> <4> [257.740426] ? _raw_spin_unlock_irqrestore+0x39/0x60 >>>>> <4> [257.740430] do_vfs_ioctl+0xa0/0x6e0 >>>>> <4> [257.740433] ? lock_acquire+0xa6/0x1c0 >>>>> <4> [257.740436] ? __task_pid_nr_ns+0xb9/0x1f0 >>>>> <4> [257.740439] ksys_ioctl+0x35/0x60 >>>>> <4> [257.740441] __x64_sys_ioctl+0x11/0x20 >>>>> <4> [257.740443] do_syscall_64+0x55/0x1c0 >>>>> <4> [257.740445] entry_SYSCALL_64_after_hwframe+0x49/0xbe >>>>> >>>>> References: e0695db7298e ("drm/i915: Create/destroy VM (ppGTT) for use >>>>> with contexts") >>>>> Fixes: 7f3f317a66ca ("drm/i915: Restore control over ppgtt for context >>>>> creation ABI") >>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> >>>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >>>>> --- >>>>> drivers/gpu/drm/i915/i915_gem_gtt.c | 32 +++++++++++++++++++++++++++-- >>>>> drivers/gpu/drm/i915/i915_gem_gtt.h | 2 ++ >>>>> 2 files changed, 32 insertions(+), 2 deletions(-) >>>>> >>>>> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c >>>>> b/drivers/gpu/drm/i915/i915_gem_gtt.c >>>>> index 9ed41aefb456..5a350251766a 100644 >>>>> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c >>>>> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c >>>>> @@ -1828,11 +1828,33 @@ static void gen6_ppgtt_free_pd(struct >>>>> gen6_hw_ppgtt *ppgtt) >>>>> free_pt(&ppgtt->base.vm, pt); >>>>> } >>>>> +struct gen6_ppgtt_cleanup_work { >>>>> + struct work_struct base; >>>>> + struct i915_vma *vma; >>>>> +}; >>>>> + >>>>> +static void gen6_ppgtt_cleanup_work(struct work_struct *wrk) >>>>> +{ >>>>> + struct gen6_ppgtt_cleanup_work *work = >>>>> + container_of(wrk, typeof(*work), base); >>>>> + struct drm_i915_private *i915 = work->vma->vm->i915; >>>> >>>> Does the sequence need an extra reference on ppgtt for dereferencing the >>>> vm here? Alternatively storing i915 in the work struct could work around >>>> that. >>> >>> Nope - vma has a pointer to vm as well which it will dereference. So if >>> reference is needed it's needed - it looks like it is to me, since only >>> contexts take them, which vma's still rely on, right? >> >> Nah, vm is decidedly dead at this point, we just need to stuff the i915 >> pointer into the cleanup_work. > > Argh. vma is in ggtt, so vma->vm is alive and kicking -- not the vm we > just destroyed. Why is this an argh? :) Doesn't it mean you were right - just storing a pointer to i915 in work struct should work. I missed the fact they are two separate VMs. Regards, Tvrtko
On 23/05/2019 11:19, Tvrtko Ursulin wrote: > > On 23/05/2019 11:14, Chris Wilson wrote: >> Quoting Chris Wilson (2019-05-23 11:07:50) >>> Quoting Tvrtko Ursulin (2019-05-23 11:01:09) >>>> >>>> On 23/05/2019 10:21, Tvrtko Ursulin wrote: >>>>> >>>>> On 23/05/2019 07:49, Chris Wilson wrote: >>>>>> We rearranged the vm_destroy_ioctl to avoid taking struct_mutex, >>>>>> little >>>>>> realising that buried underneath the gen6 ppgtt release path was a >>>>>> struct_mutex requirement (to remove its GGTT vma). Until that >>>>>> struct_mutex is vanquished, take a detour in gen6_ppgtt_cleanup to do >>>>>> the i915_vma_destroy from inside a worker under the struct_mutex. >>>>>> >>>>>> <4> [257.740160] WARN_ON(debug_locks && >>>>>> !lock_is_held(&(&vma->vm->i915->drm.struct_mutex)->dep_map)) >>>>>> <4> [257.740213] WARNING: CPU: 3 PID: 1507 at >>>>>> drivers/gpu/drm/i915/i915_vma.c:841 i915_vma_destroy+0x1ae/0x3a0 >>>>>> [i915] >>>>>> <4> [257.740214] Modules linked in: snd_hda_codec_hdmi i915 >>>>>> x86_pkg_temp_thermal mei_hdcp coretemp crct10dif_pclmul crc32_pclmul >>>>>> ghash_clmulni_intel snd_hda_codec_realtek snd_hda_codec_generic >>>>>> snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core r8169 realtek >>>>>> snd_pcm mei_me mei prime_numbers lpc_ich >>>>>> <4> [257.740224] CPU: 3 PID: 1507 Comm: gem_vm_create Tainted: G >>>>>> U 5.2.0-rc1-CI-CI_DRM_6118+ #1 >>>>>> <4> [257.740225] Hardware name: MSI MS-7924/Z97M-G43(MS-7924), BIOS >>>>>> V1.12 02/15/2016 >>>>>> <4> [257.740249] RIP: 0010:i915_vma_destroy+0x1ae/0x3a0 [i915] >>>>>> <4> [257.740250] Code: 00 00 00 48 81 c7 c8 00 00 00 e8 ed 08 f0 >>>>>> e0 85 >>>>>> c0 0f 85 78 fe ff ff 48 c7 c6 e8 ec 30 a0 48 c7 c7 da 55 33 a0 e8 42 >>>>>> 8c e9 e0 <0f> 0b 8b 83 40 01 00 00 85 c0 0f 84 63 fe ff ff 48 c7 >>>>>> c1 c1 >>>>>> 58 33 >>>>>> <4> [257.740251] RSP: 0018:ffffc90000aafc68 EFLAGS: 00010282 >>>>>> <4> [257.740252] RAX: 0000000000000000 RBX: ffff8883f7957840 RCX: >>>>>> 0000000000000003 >>>>>> <4> [257.740253] RDX: 0000000000000046 RSI: 0000000000000006 RDI: >>>>>> ffffffff8212d1b9 >>>>>> <4> [257.740254] RBP: ffffc90000aafcc8 R08: 0000000000000000 R09: >>>>>> 0000000000000000 >>>>>> <4> [257.740255] R10: 0000000000000000 R11: 0000000000000000 R12: >>>>>> ffff8883f4d5c2a8 >>>>>> <4> [257.740256] R13: ffff8883f4d5d680 R14: ffff8883f4d5c668 R15: >>>>>> ffff8883f4d5c2f0 >>>>>> <4> [257.740257] FS: 00007f777fa8fe40(0000) >>>>>> GS:ffff88840f780000(0000) >>>>>> knlGS:0000000000000000 >>>>>> <4> [257.740258] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 >>>>>> <4> [257.740259] CR2: 00007f777f6522b0 CR3: 00000003c612a006 CR4: >>>>>> 00000000001606e0 >>>>>> <4> [257.740260] Call Trace: >>>>>> <4> [257.740283] gen6_ppgtt_cleanup+0x25/0x60 [i915] >>>>>> <4> [257.740306] i915_ppgtt_release+0x102/0x290 [i915] >>>>>> <4> [257.740330] i915_gem_vm_destroy_ioctl+0x7c/0xa0 [i915] >>>>>> <4> [257.740376] ? i915_gem_vm_create_ioctl+0x160/0x160 [i915] >>>>>> <4> [257.740379] drm_ioctl_kernel+0x83/0xf0 >>>>>> <4> [257.740382] drm_ioctl+0x2f3/0x3b0 >>>>>> <4> [257.740422] ? i915_gem_vm_create_ioctl+0x160/0x160 [i915] >>>>>> <4> [257.740426] ? _raw_spin_unlock_irqrestore+0x39/0x60 >>>>>> <4> [257.740430] do_vfs_ioctl+0xa0/0x6e0 >>>>>> <4> [257.740433] ? lock_acquire+0xa6/0x1c0 >>>>>> <4> [257.740436] ? __task_pid_nr_ns+0xb9/0x1f0 >>>>>> <4> [257.740439] ksys_ioctl+0x35/0x60 >>>>>> <4> [257.740441] __x64_sys_ioctl+0x11/0x20 >>>>>> <4> [257.740443] do_syscall_64+0x55/0x1c0 >>>>>> <4> [257.740445] entry_SYSCALL_64_after_hwframe+0x49/0xbe >>>>>> >>>>>> References: e0695db7298e ("drm/i915: Create/destroy VM (ppGTT) for >>>>>> use >>>>>> with contexts") >>>>>> Fixes: 7f3f317a66ca ("drm/i915: Restore control over ppgtt for >>>>>> context >>>>>> creation ABI") >>>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> >>>>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >>>>>> --- >>>>>> drivers/gpu/drm/i915/i915_gem_gtt.c | 32 >>>>>> +++++++++++++++++++++++++++-- >>>>>> drivers/gpu/drm/i915/i915_gem_gtt.h | 2 ++ >>>>>> 2 files changed, 32 insertions(+), 2 deletions(-) >>>>>> >>>>>> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c >>>>>> b/drivers/gpu/drm/i915/i915_gem_gtt.c >>>>>> index 9ed41aefb456..5a350251766a 100644 >>>>>> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c >>>>>> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c >>>>>> @@ -1828,11 +1828,33 @@ static void gen6_ppgtt_free_pd(struct >>>>>> gen6_hw_ppgtt *ppgtt) >>>>>> free_pt(&ppgtt->base.vm, pt); >>>>>> } >>>>>> +struct gen6_ppgtt_cleanup_work { >>>>>> + struct work_struct base; >>>>>> + struct i915_vma *vma; >>>>>> +}; >>>>>> + >>>>>> +static void gen6_ppgtt_cleanup_work(struct work_struct *wrk) >>>>>> +{ >>>>>> + struct gen6_ppgtt_cleanup_work *work = >>>>>> + container_of(wrk, typeof(*work), base); >>>>>> + struct drm_i915_private *i915 = work->vma->vm->i915; >>>>> >>>>> Does the sequence need an extra reference on ppgtt for >>>>> dereferencing the >>>>> vm here? Alternatively storing i915 in the work struct could work >>>>> around >>>>> that. >>>> >>>> Nope - vma has a pointer to vm as well which it will dereference. So if >>>> reference is needed it's needed - it looks like it is to me, since only >>>> contexts take them, which vma's still rely on, right? >>> >>> Nah, vm is decidedly dead at this point, we just need to stuff the i915 >>> pointer into the cleanup_work. >> >> Argh. vma is in ggtt, so vma->vm is alive and kicking -- not the vm we >> just destroyed. > > Why is this an argh? :) Doesn't it mean you were right - just storing a > pointer to i915 in work struct should work. I missed the fact they are > two separate VMs. Apart from the locally dereferenced vma being the same ggtt one... :)) Okay.. Is it worth a comment? Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Regards, Tvrtko
Quoting Tvrtko Ursulin (2019-05-23 11:19:30) > > On 23/05/2019 11:14, Chris Wilson wrote: > > Quoting Chris Wilson (2019-05-23 11:07:50) > >> Quoting Tvrtko Ursulin (2019-05-23 11:01:09) > >>> > >>> On 23/05/2019 10:21, Tvrtko Ursulin wrote: > >>>> > >>>> On 23/05/2019 07:49, Chris Wilson wrote: > >>>>> We rearranged the vm_destroy_ioctl to avoid taking struct_mutex, little > >>>>> realising that buried underneath the gen6 ppgtt release path was a > >>>>> struct_mutex requirement (to remove its GGTT vma). Until that > >>>>> struct_mutex is vanquished, take a detour in gen6_ppgtt_cleanup to do > >>>>> the i915_vma_destroy from inside a worker under the struct_mutex. > >>>>> > >>>>> <4> [257.740160] WARN_ON(debug_locks && > >>>>> !lock_is_held(&(&vma->vm->i915->drm.struct_mutex)->dep_map)) > >>>>> <4> [257.740213] WARNING: CPU: 3 PID: 1507 at > >>>>> drivers/gpu/drm/i915/i915_vma.c:841 i915_vma_destroy+0x1ae/0x3a0 [i915] > >>>>> <4> [257.740214] Modules linked in: snd_hda_codec_hdmi i915 > >>>>> x86_pkg_temp_thermal mei_hdcp coretemp crct10dif_pclmul crc32_pclmul > >>>>> ghash_clmulni_intel snd_hda_codec_realtek snd_hda_codec_generic > >>>>> snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core r8169 realtek > >>>>> snd_pcm mei_me mei prime_numbers lpc_ich > >>>>> <4> [257.740224] CPU: 3 PID: 1507 Comm: gem_vm_create Tainted: G > >>>>> U 5.2.0-rc1-CI-CI_DRM_6118+ #1 > >>>>> <4> [257.740225] Hardware name: MSI MS-7924/Z97M-G43(MS-7924), BIOS > >>>>> V1.12 02/15/2016 > >>>>> <4> [257.740249] RIP: 0010:i915_vma_destroy+0x1ae/0x3a0 [i915] > >>>>> <4> [257.740250] Code: 00 00 00 48 81 c7 c8 00 00 00 e8 ed 08 f0 e0 85 > >>>>> c0 0f 85 78 fe ff ff 48 c7 c6 e8 ec 30 a0 48 c7 c7 da 55 33 a0 e8 42 > >>>>> 8c e9 e0 <0f> 0b 8b 83 40 01 00 00 85 c0 0f 84 63 fe ff ff 48 c7 c1 c1 > >>>>> 58 33 > >>>>> <4> [257.740251] RSP: 0018:ffffc90000aafc68 EFLAGS: 00010282 > >>>>> <4> [257.740252] RAX: 0000000000000000 RBX: ffff8883f7957840 RCX: > >>>>> 0000000000000003 > >>>>> <4> [257.740253] RDX: 0000000000000046 RSI: 0000000000000006 RDI: > >>>>> ffffffff8212d1b9 > >>>>> <4> [257.740254] RBP: ffffc90000aafcc8 R08: 0000000000000000 R09: > >>>>> 0000000000000000 > >>>>> <4> [257.740255] R10: 0000000000000000 R11: 0000000000000000 R12: > >>>>> ffff8883f4d5c2a8 > >>>>> <4> [257.740256] R13: ffff8883f4d5d680 R14: ffff8883f4d5c668 R15: > >>>>> ffff8883f4d5c2f0 > >>>>> <4> [257.740257] FS: 00007f777fa8fe40(0000) GS:ffff88840f780000(0000) > >>>>> knlGS:0000000000000000 > >>>>> <4> [257.740258] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > >>>>> <4> [257.740259] CR2: 00007f777f6522b0 CR3: 00000003c612a006 CR4: > >>>>> 00000000001606e0 > >>>>> <4> [257.740260] Call Trace: > >>>>> <4> [257.740283] gen6_ppgtt_cleanup+0x25/0x60 [i915] > >>>>> <4> [257.740306] i915_ppgtt_release+0x102/0x290 [i915] > >>>>> <4> [257.740330] i915_gem_vm_destroy_ioctl+0x7c/0xa0 [i915] > >>>>> <4> [257.740376] ? i915_gem_vm_create_ioctl+0x160/0x160 [i915] > >>>>> <4> [257.740379] drm_ioctl_kernel+0x83/0xf0 > >>>>> <4> [257.740382] drm_ioctl+0x2f3/0x3b0 > >>>>> <4> [257.740422] ? i915_gem_vm_create_ioctl+0x160/0x160 [i915] > >>>>> <4> [257.740426] ? _raw_spin_unlock_irqrestore+0x39/0x60 > >>>>> <4> [257.740430] do_vfs_ioctl+0xa0/0x6e0 > >>>>> <4> [257.740433] ? lock_acquire+0xa6/0x1c0 > >>>>> <4> [257.740436] ? __task_pid_nr_ns+0xb9/0x1f0 > >>>>> <4> [257.740439] ksys_ioctl+0x35/0x60 > >>>>> <4> [257.740441] __x64_sys_ioctl+0x11/0x20 > >>>>> <4> [257.740443] do_syscall_64+0x55/0x1c0 > >>>>> <4> [257.740445] entry_SYSCALL_64_after_hwframe+0x49/0xbe > >>>>> > >>>>> References: e0695db7298e ("drm/i915: Create/destroy VM (ppGTT) for use > >>>>> with contexts") > >>>>> Fixes: 7f3f317a66ca ("drm/i915: Restore control over ppgtt for context > >>>>> creation ABI") > >>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > >>>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > >>>>> --- > >>>>> drivers/gpu/drm/i915/i915_gem_gtt.c | 32 +++++++++++++++++++++++++++-- > >>>>> drivers/gpu/drm/i915/i915_gem_gtt.h | 2 ++ > >>>>> 2 files changed, 32 insertions(+), 2 deletions(-) > >>>>> > >>>>> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c > >>>>> b/drivers/gpu/drm/i915/i915_gem_gtt.c > >>>>> index 9ed41aefb456..5a350251766a 100644 > >>>>> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > >>>>> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > >>>>> @@ -1828,11 +1828,33 @@ static void gen6_ppgtt_free_pd(struct > >>>>> gen6_hw_ppgtt *ppgtt) > >>>>> free_pt(&ppgtt->base.vm, pt); > >>>>> } > >>>>> +struct gen6_ppgtt_cleanup_work { > >>>>> + struct work_struct base; > >>>>> + struct i915_vma *vma; > >>>>> +}; > >>>>> + > >>>>> +static void gen6_ppgtt_cleanup_work(struct work_struct *wrk) > >>>>> +{ > >>>>> + struct gen6_ppgtt_cleanup_work *work = > >>>>> + container_of(wrk, typeof(*work), base); > >>>>> + struct drm_i915_private *i915 = work->vma->vm->i915; > >>>> > >>>> Does the sequence need an extra reference on ppgtt for dereferencing the > >>>> vm here? Alternatively storing i915 in the work struct could work around > >>>> that. > >>> > >>> Nope - vma has a pointer to vm as well which it will dereference. So if > >>> reference is needed it's needed - it looks like it is to me, since only > >>> contexts take them, which vma's still rely on, right? > >> > >> Nah, vm is decidedly dead at this point, we just need to stuff the i915 > >> pointer into the cleanup_work. > > > > Argh. vma is in ggtt, so vma->vm is alive and kicking -- not the vm we > > just destroyed. > > Why is this an argh? :) Doesn't it mean you were right - just storing a > pointer to i915 in work struct should work. I missed the fact they are > two separate VMs. We don't need to store an extra pointer as vma->vm is valid as i915->ggtt isn't destroyed until after the worker is flushed. -Chris
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 9ed41aefb456..5a350251766a 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1828,11 +1828,33 @@ static void gen6_ppgtt_free_pd(struct gen6_hw_ppgtt *ppgtt) free_pt(&ppgtt->base.vm, pt); } +struct gen6_ppgtt_cleanup_work { + struct work_struct base; + struct i915_vma *vma; +}; + +static void gen6_ppgtt_cleanup_work(struct work_struct *wrk) +{ + struct gen6_ppgtt_cleanup_work *work = + container_of(wrk, typeof(*work), base); + struct drm_i915_private *i915 = work->vma->vm->i915; + + mutex_lock(&i915->drm.struct_mutex); + i915_vma_destroy(work->vma); + mutex_unlock(&i915->drm.struct_mutex); + + kfree(work); +} + static void gen6_ppgtt_cleanup(struct i915_address_space *vm) { struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); + struct gen6_ppgtt_cleanup_work *work = ppgtt->work; - i915_vma_destroy(ppgtt->vma); + /* FIXME remove the struct_mutex to bring the locking under control */ + INIT_WORK(&work->base, gen6_ppgtt_cleanup_work); + work->vma = ppgtt->vma; + schedule_work(&work->base); gen6_ppgtt_free_pd(ppgtt); gen6_ppgtt_free_scratch(vm); @@ -2011,9 +2033,13 @@ static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915) ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode; + ppgtt->work = kmalloc(sizeof(*ppgtt->work), GFP_KERNEL); + if (!ppgtt->work) + goto err_free; + err = gen6_ppgtt_init_scratch(ppgtt); if (err) - goto err_free; + goto err_work; ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE); if (IS_ERR(ppgtt->vma)) { @@ -2025,6 +2051,8 @@ static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915) err_scratch: gen6_ppgtt_free_scratch(&ppgtt->base.vm); +err_work: + kfree(ppgtt->work); err_free: kfree(ppgtt); return ERR_PTR(err); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 98fc71053f7c..38496039456b 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -424,6 +424,8 @@ struct gen6_hw_ppgtt { unsigned int pin_count; bool scan_for_unused_pt; + + struct gen6_ppgtt_cleanup_work *work; }; #define __to_gen6_ppgtt(base) container_of(base, struct gen6_hw_ppgtt, base)
We rearranged the vm_destroy_ioctl to avoid taking struct_mutex, little realising that buried underneath the gen6 ppgtt release path was a struct_mutex requirement (to remove its GGTT vma). Until that struct_mutex is vanquished, take a detour in gen6_ppgtt_cleanup to do the i915_vma_destroy from inside a worker under the struct_mutex. <4> [257.740160] WARN_ON(debug_locks && !lock_is_held(&(&vma->vm->i915->drm.struct_mutex)->dep_map)) <4> [257.740213] WARNING: CPU: 3 PID: 1507 at drivers/gpu/drm/i915/i915_vma.c:841 i915_vma_destroy+0x1ae/0x3a0 [i915] <4> [257.740214] Modules linked in: snd_hda_codec_hdmi i915 x86_pkg_temp_thermal mei_hdcp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_hda_codec_realtek snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core r8169 realtek snd_pcm mei_me mei prime_numbers lpc_ich <4> [257.740224] CPU: 3 PID: 1507 Comm: gem_vm_create Tainted: G U 5.2.0-rc1-CI-CI_DRM_6118+ #1 <4> [257.740225] Hardware name: MSI MS-7924/Z97M-G43(MS-7924), BIOS V1.12 02/15/2016 <4> [257.740249] RIP: 0010:i915_vma_destroy+0x1ae/0x3a0 [i915] <4> [257.740250] Code: 00 00 00 48 81 c7 c8 00 00 00 e8 ed 08 f0 e0 85 c0 0f 85 78 fe ff ff 48 c7 c6 e8 ec 30 a0 48 c7 c7 da 55 33 a0 e8 42 8c e9 e0 <0f> 0b 8b 83 40 01 00 00 85 c0 0f 84 63 fe ff ff 48 c7 c1 c1 58 33 <4> [257.740251] RSP: 0018:ffffc90000aafc68 EFLAGS: 00010282 <4> [257.740252] RAX: 0000000000000000 RBX: ffff8883f7957840 RCX: 0000000000000003 <4> [257.740253] RDX: 0000000000000046 RSI: 0000000000000006 RDI: ffffffff8212d1b9 <4> [257.740254] RBP: ffffc90000aafcc8 R08: 0000000000000000 R09: 0000000000000000 <4> [257.740255] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8883f4d5c2a8 <4> [257.740256] R13: ffff8883f4d5d680 R14: ffff8883f4d5c668 R15: ffff8883f4d5c2f0 <4> [257.740257] FS: 00007f777fa8fe40(0000) GS:ffff88840f780000(0000) knlGS:0000000000000000 <4> [257.740258] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4> [257.740259] CR2: 00007f777f6522b0 CR3: 00000003c612a006 CR4: 00000000001606e0 <4> [257.740260] Call Trace: <4> [257.740283] gen6_ppgtt_cleanup+0x25/0x60 [i915] <4> [257.740306] i915_ppgtt_release+0x102/0x290 [i915] <4> [257.740330] i915_gem_vm_destroy_ioctl+0x7c/0xa0 [i915] <4> [257.740376] ? i915_gem_vm_create_ioctl+0x160/0x160 [i915] <4> [257.740379] drm_ioctl_kernel+0x83/0xf0 <4> [257.740382] drm_ioctl+0x2f3/0x3b0 <4> [257.740422] ? i915_gem_vm_create_ioctl+0x160/0x160 [i915] <4> [257.740426] ? _raw_spin_unlock_irqrestore+0x39/0x60 <4> [257.740430] do_vfs_ioctl+0xa0/0x6e0 <4> [257.740433] ? lock_acquire+0xa6/0x1c0 <4> [257.740436] ? __task_pid_nr_ns+0xb9/0x1f0 <4> [257.740439] ksys_ioctl+0x35/0x60 <4> [257.740441] __x64_sys_ioctl+0x11/0x20 <4> [257.740443] do_syscall_64+0x55/0x1c0 <4> [257.740445] entry_SYSCALL_64_after_hwframe+0x49/0xbe References: e0695db7298e ("drm/i915: Create/destroy VM (ppGTT) for use with contexts") Fixes: 7f3f317a66ca ("drm/i915: Restore control over ppgtt for context creation ABI") Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> --- drivers/gpu/drm/i915/i915_gem_gtt.c | 32 +++++++++++++++++++++++++++-- drivers/gpu/drm/i915/i915_gem_gtt.h | 2 ++ 2 files changed, 32 insertions(+), 2 deletions(-)