diff mbox

[v2] RFC drm/i915: Mark runtime_pm as a special class of lock

Message ID 20180712083633.32235-1-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson July 12, 2018, 8:36 a.m. UTC
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.c         |  5 +++++
 drivers/gpu/drm/i915/i915_drv.h         |  1 +
 drivers/gpu/drm/i915/intel_runtime_pm.c | 11 +++++++++++
 3 files changed, 17 insertions(+)

Comments

Chris Wilson July 12, 2018, 8:41 a.m. UTC | #1
Quoting Chris Wilson (2018-07-12 09:36:33)
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_drv.c         |  5 +++++
>  drivers/gpu/drm/i915/i915_drv.h         |  1 +
>  drivers/gpu/drm/i915/intel_runtime_pm.c | 11 +++++++++++
>  3 files changed, 17 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 3eba3d1ab5b8..2e6d3259f6d0 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -2603,6 +2603,7 @@ static int intel_runtime_suspend(struct device *kdev)
>         DRM_DEBUG_KMS("Suspending device\n");
>  
>         disable_rpm_wakeref_asserts(dev_priv);
> +       lock_map_acquire(&dev_priv->runtime_pm.lock);
>  
>         /*
>          * We are safe here against re-faults, since the fault handler takes
> @@ -2637,11 +2638,13 @@ static int intel_runtime_suspend(struct device *kdev)
>                 i915_gem_init_swizzling(dev_priv);
>                 i915_gem_restore_fences(dev_priv);
>  
> +               lock_map_release(&dev_priv->runtime_pm.lock);
>                 enable_rpm_wakeref_asserts(dev_priv);
>  
>                 return ret;
>         }
>  
> +       lock_map_release(&dev_priv->runtime_pm.lock);

What happens if we don't release the lock here? I think that's what we
want... While suspended we are not allowed to do any action that would
ordinarily require a wakeref. However that scares me for being both
incredibly broad, and that I think lockdep is process centric so doesn't
track locks in this manner?
-Chris
Daniel Vetter July 12, 2018, 12:58 p.m. UTC | #2
On Thu, Jul 12, 2018 at 09:41:07AM +0100, Chris Wilson wrote:
> Quoting Chris Wilson (2018-07-12 09:36:33)
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/i915_drv.c         |  5 +++++
> >  drivers/gpu/drm/i915/i915_drv.h         |  1 +
> >  drivers/gpu/drm/i915/intel_runtime_pm.c | 11 +++++++++++
> >  3 files changed, 17 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> > index 3eba3d1ab5b8..2e6d3259f6d0 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.c
> > +++ b/drivers/gpu/drm/i915/i915_drv.c
> > @@ -2603,6 +2603,7 @@ static int intel_runtime_suspend(struct device *kdev)
> >         DRM_DEBUG_KMS("Suspending device\n");
> >  
> >         disable_rpm_wakeref_asserts(dev_priv);
> > +       lock_map_acquire(&dev_priv->runtime_pm.lock);
> >  
> >         /*
> >          * We are safe here against re-faults, since the fault handler takes
> > @@ -2637,11 +2638,13 @@ static int intel_runtime_suspend(struct device *kdev)
> >                 i915_gem_init_swizzling(dev_priv);
> >                 i915_gem_restore_fences(dev_priv);
> >  
> > +               lock_map_release(&dev_priv->runtime_pm.lock);
> >                 enable_rpm_wakeref_asserts(dev_priv);
> >  
> >                 return ret;
> >         }
> >  
> > +       lock_map_release(&dev_priv->runtime_pm.lock);
> 
> What happens if we don't release the lock here? I think that's what we
> want... While suspended we are not allowed to do any action that would
> ordinarily require a wakeref. However that scares me for being both
> incredibly broad, and that I think lockdep is process centric so doesn't
> track locks in this manner?

Lockdep requires that acquire&release are in the same process context. For
dependencies crossing boundaries we want a cross-release. And yes I think
a cross-release dependency between our rpm_suspend and rpm_get is required
for full anotation. But since cross-release is suffering in limbo due to
meltdown/spectre that's a way off still :-/

Also I think if this all works out we should propose it as a patch to core
rpm code (maybe once the cross-release stuff has landed too).
-Daniel
Chris Wilson July 13, 2018, 1:29 p.m. UTC | #3
Quoting Daniel Vetter (2018-07-12 13:58:11)
> On Thu, Jul 12, 2018 at 09:41:07AM +0100, Chris Wilson wrote:
> > Quoting Chris Wilson (2018-07-12 09:36:33)
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > ---
> > >  drivers/gpu/drm/i915/i915_drv.c         |  5 +++++
> > >  drivers/gpu/drm/i915/i915_drv.h         |  1 +
> > >  drivers/gpu/drm/i915/intel_runtime_pm.c | 11 +++++++++++
> > >  3 files changed, 17 insertions(+)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> > > index 3eba3d1ab5b8..2e6d3259f6d0 100644
> > > --- a/drivers/gpu/drm/i915/i915_drv.c
> > > +++ b/drivers/gpu/drm/i915/i915_drv.c
> > > @@ -2603,6 +2603,7 @@ static int intel_runtime_suspend(struct device *kdev)
> > >         DRM_DEBUG_KMS("Suspending device\n");
> > >  
> > >         disable_rpm_wakeref_asserts(dev_priv);
> > > +       lock_map_acquire(&dev_priv->runtime_pm.lock);
> > >  
> > >         /*
> > >          * We are safe here against re-faults, since the fault handler takes
> > > @@ -2637,11 +2638,13 @@ static int intel_runtime_suspend(struct device *kdev)
> > >                 i915_gem_init_swizzling(dev_priv);
> > >                 i915_gem_restore_fences(dev_priv);
> > >  
> > > +               lock_map_release(&dev_priv->runtime_pm.lock);
> > >                 enable_rpm_wakeref_asserts(dev_priv);
> > >  
> > >                 return ret;
> > >         }
> > >  
> > > +       lock_map_release(&dev_priv->runtime_pm.lock);
> > 
> > What happens if we don't release the lock here? I think that's what we
> > want... While suspended we are not allowed to do any action that would
> > ordinarily require a wakeref. However that scares me for being both
> > incredibly broad, and that I think lockdep is process centric so doesn't
> > track locks in this manner?
> 
> Lockdep requires that acquire&release are in the same process context. For
> dependencies crossing boundaries we want a cross-release. And yes I think
> a cross-release dependency between our rpm_suspend and rpm_get is required
> for full anotation. But since cross-release is suffering in limbo due to
> meltdown/spectre that's a way off still :-/

Bah, we can't do it without cross-release as we pass our wakelock around
a lot. We start off with an unbalanced lock and never recover. Drat, I
was hoping this would make verifying the vm.mutex vs runtime_pm more
convincing.
-Chris
Daniel Vetter Aug. 6, 2018, 3:58 p.m. UTC | #4
On Fri, Jul 13, 2018 at 02:29:58PM +0100, Chris Wilson wrote:
> Quoting Daniel Vetter (2018-07-12 13:58:11)
> > On Thu, Jul 12, 2018 at 09:41:07AM +0100, Chris Wilson wrote:
> > > Quoting Chris Wilson (2018-07-12 09:36:33)
> > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > > ---
> > > >  drivers/gpu/drm/i915/i915_drv.c         |  5 +++++
> > > >  drivers/gpu/drm/i915/i915_drv.h         |  1 +
> > > >  drivers/gpu/drm/i915/intel_runtime_pm.c | 11 +++++++++++
> > > >  3 files changed, 17 insertions(+)
> > > > 
> > > > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> > > > index 3eba3d1ab5b8..2e6d3259f6d0 100644
> > > > --- a/drivers/gpu/drm/i915/i915_drv.c
> > > > +++ b/drivers/gpu/drm/i915/i915_drv.c
> > > > @@ -2603,6 +2603,7 @@ static int intel_runtime_suspend(struct device *kdev)
> > > >         DRM_DEBUG_KMS("Suspending device\n");
> > > >  
> > > >         disable_rpm_wakeref_asserts(dev_priv);
> > > > +       lock_map_acquire(&dev_priv->runtime_pm.lock);
> > > >  
> > > >         /*
> > > >          * We are safe here against re-faults, since the fault handler takes
> > > > @@ -2637,11 +2638,13 @@ static int intel_runtime_suspend(struct device *kdev)
> > > >                 i915_gem_init_swizzling(dev_priv);
> > > >                 i915_gem_restore_fences(dev_priv);
> > > >  
> > > > +               lock_map_release(&dev_priv->runtime_pm.lock);
> > > >                 enable_rpm_wakeref_asserts(dev_priv);
> > > >  
> > > >                 return ret;
> > > >         }
> > > >  
> > > > +       lock_map_release(&dev_priv->runtime_pm.lock);
> > > 
> > > What happens if we don't release the lock here? I think that's what we
> > > want... While suspended we are not allowed to do any action that would
> > > ordinarily require a wakeref. However that scares me for being both
> > > incredibly broad, and that I think lockdep is process centric so doesn't
> > > track locks in this manner?
> > 
> > Lockdep requires that acquire&release are in the same process context. For
> > dependencies crossing boundaries we want a cross-release. And yes I think
> > a cross-release dependency between our rpm_suspend and rpm_get is required
> > for full anotation. But since cross-release is suffering in limbo due to
> > meltdown/spectre that's a way off still :-/
> 
> Bah, we can't do it without cross-release as we pass our wakelock around
> a lot. We start off with an unbalanced lock and never recover. Drat, I
> was hoping this would make verifying the vm.mutex vs runtime_pm more
> convincing.

Yes rpm_get/put is essentially full rwsemaphore which can also move
between process. It's the most evil of locks, and cross-release would
help a lot.

But given how hard a time cross-release with just the minimal waitqueue
annotations has, and how much fun everyone has with making rpm not
deadlock too much, I'm not really holding out for proper cross-release
annotations for rpm in upstream. And we really need them in upstream or
we'll spend 200% of our time fixing everyone else's bugs :-/
-Daniel
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 3eba3d1ab5b8..2e6d3259f6d0 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -2603,6 +2603,7 @@  static int intel_runtime_suspend(struct device *kdev)
 	DRM_DEBUG_KMS("Suspending device\n");
 
 	disable_rpm_wakeref_asserts(dev_priv);
+	lock_map_acquire(&dev_priv->runtime_pm.lock);
 
 	/*
 	 * We are safe here against re-faults, since the fault handler takes
@@ -2637,11 +2638,13 @@  static int intel_runtime_suspend(struct device *kdev)
 		i915_gem_init_swizzling(dev_priv);
 		i915_gem_restore_fences(dev_priv);
 
+		lock_map_release(&dev_priv->runtime_pm.lock);
 		enable_rpm_wakeref_asserts(dev_priv);
 
 		return ret;
 	}
 
+	lock_map_release(&dev_priv->runtime_pm.lock);
 	enable_rpm_wakeref_asserts(dev_priv);
 	WARN_ON_ONCE(atomic_read(&dev_priv->runtime_pm.wakeref_count));
 
@@ -2696,6 +2699,7 @@  static int intel_runtime_resume(struct device *kdev)
 
 	WARN_ON_ONCE(atomic_read(&dev_priv->runtime_pm.wakeref_count));
 	disable_rpm_wakeref_asserts(dev_priv);
+	lock_map_acquire(&dev_priv->runtime_pm.lock);
 
 	intel_opregion_notify_adapter(dev_priv, PCI_D0);
 	dev_priv->runtime_pm.suspended = false;
@@ -2737,6 +2741,7 @@  static int intel_runtime_resume(struct device *kdev)
 
 	intel_enable_ipc(dev_priv);
 
+	lock_map_release(&dev_priv->runtime_pm.lock);
 	enable_rpm_wakeref_asserts(dev_priv);
 
 	if (ret)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 01dd29837233..be50a0e6d8c9 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1251,6 +1251,7 @@  struct skl_wm_params {
  * For more, read the Documentation/power/runtime_pm.txt.
  */
 struct i915_runtime_pm {
+	struct lockdep_map lock;
 	atomic_t wakeref_count;
 	bool suspended;
 	bool irqs_enabled;
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 6b5aa3b074ec..dc76a3bab1e3 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -3697,6 +3697,8 @@  void intel_runtime_pm_get(struct drm_i915_private *dev_priv)
 	ret = pm_runtime_get_sync(kdev);
 	WARN_ONCE(ret < 0, "pm_runtime_get_sync() failed: %d\n", ret);
 
+	lock_map_acquire_read(&dev_priv->runtime_pm.lock);
+
 	atomic_inc(&dev_priv->runtime_pm.wakeref_count);
 	assert_rpm_wakelock_held(dev_priv);
 }
@@ -3730,6 +3732,8 @@  bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv)
 			return false;
 	}
 
+	lock_map_acquire_read(&dev_priv->runtime_pm.lock);
+
 	atomic_inc(&dev_priv->runtime_pm.wakeref_count);
 	assert_rpm_wakelock_held(dev_priv);
 
@@ -3761,6 +3765,8 @@  void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv)
 	assert_rpm_wakelock_held(dev_priv);
 	pm_runtime_get_noresume(kdev);
 
+	lock_map_acquire_read(&dev_priv->runtime_pm.lock);
+
 	atomic_inc(&dev_priv->runtime_pm.wakeref_count);
 }
 
@@ -3780,6 +3786,8 @@  void intel_runtime_pm_put(struct drm_i915_private *dev_priv)
 	assert_rpm_wakelock_held(dev_priv);
 	atomic_dec(&dev_priv->runtime_pm.wakeref_count);
 
+	lock_map_release(&dev_priv->runtime_pm.lock);
+
 	pm_runtime_mark_last_busy(kdev);
 	pm_runtime_put_autosuspend(kdev);
 }
@@ -3796,9 +3804,12 @@  void intel_runtime_pm_put(struct drm_i915_private *dev_priv)
  */
 void intel_runtime_pm_enable(struct drm_i915_private *dev_priv)
 {
+	static struct lock_class_key lock_key;
 	struct pci_dev *pdev = dev_priv->drm.pdev;
 	struct device *kdev = &pdev->dev;
 
+	lockdep_init_map(&dev_priv->runtime_pm.lock,
+			 "i915->runtime_pm", &lock_key, 0);
 	pm_runtime_set_autosuspend_delay(kdev, 10000); /* 10s */
 	pm_runtime_mark_last_busy(kdev);