diff mbox

[2/3] drm/i915: Use RPM as the barrier for controlling user mmap access

Message ID 20161011143759.28052-2-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson Oct. 11, 2016, 2:37 p.m. UTC
We can remove the false coupling between RPM and struct mutex by the
observation that we can use the RPM wakeref as the barrier around user
mmap access. That is as we tear down the user's PTE atomically from
within rpm suspend and then to fault in new PTE requires the rpm
wakeref, means that no user access is possible through those PTE without
RPM being awake. Having made that observation, we can then remove the
presumption of having to take rpm outside of struct_mutex and so allow
fine grained acquisition of a wakeref around hw access rather than
having to remember to acquire the wakeref early on.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Imre Deak <imre.deak@intel.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c    | 22 ----------------------
 drivers/gpu/drm/i915/i915_drv.c        | 19 -------------------
 drivers/gpu/drm/i915/i915_gem.c        | 14 +++++---------
 drivers/gpu/drm/i915/i915_gem_gtt.c    | 17 +++++++++++++----
 drivers/gpu/drm/i915/i915_gem_tiling.c |  4 ----
 drivers/gpu/drm/i915/intel_uncore.c    |  6 +++---
 6 files changed, 21 insertions(+), 61 deletions(-)

Comments

Daniel Vetter Oct. 13, 2016, 2:44 p.m. UTC | #1
On Tue, Oct 11, 2016 at 03:37:58PM +0100, Chris Wilson wrote:
> We can remove the false coupling between RPM and struct mutex by the
> observation that we can use the RPM wakeref as the barrier around user
> mmap access. That is as we tear down the user's PTE atomically from
> within rpm suspend and then to fault in new PTE requires the rpm
> wakeref, means that no user access is possible through those PTE without
> RPM being awake. Having made that observation, we can then remove the
> presumption of having to take rpm outside of struct_mutex and so allow
> fine grained acquisition of a wakeref around hw access rather than
> having to remember to acquire the wakeref early on.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Imre Deak <imre.deak@intel.com>
> Cc: Daniel Vetter <daniel@ffwll.ch>
> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c    | 22 ----------------------
>  drivers/gpu/drm/i915/i915_drv.c        | 19 -------------------
>  drivers/gpu/drm/i915/i915_gem.c        | 14 +++++---------
>  drivers/gpu/drm/i915/i915_gem_gtt.c    | 17 +++++++++++++----
>  drivers/gpu/drm/i915/i915_gem_tiling.c |  4 ----
>  drivers/gpu/drm/i915/intel_uncore.c    |  6 +++---
>  6 files changed, 21 insertions(+), 61 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index d4ecc5283c2a..d4779abd89e3 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1400,14 +1400,9 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
>  static int ironlake_drpc_info(struct seq_file *m)
>  {
>  	struct drm_i915_private *dev_priv = node_to_i915(m->private);
> -	struct drm_device *dev = &dev_priv->drm;
>  	u32 rgvmodectl, rstdbyctl;
>  	u16 crstandvid;
> -	int ret;
>  
> -	ret = mutex_lock_interruptible(&dev->struct_mutex);
> -	if (ret)
> -		return ret;
>  	intel_runtime_pm_get(dev_priv);
>  
>  	rgvmodectl = I915_READ(MEMMODECTL);
> @@ -1415,7 +1410,6 @@ static int ironlake_drpc_info(struct seq_file *m)
>  	crstandvid = I915_READ16(CRSTANDVID);
>  
>  	intel_runtime_pm_put(dev_priv);
> -	mutex_unlock(&dev->struct_mutex);
>  
>  	seq_printf(m, "HD boost: %s\n", yesno(rgvmodectl & MEMMODE_BOOST_EN));
>  	seq_printf(m, "Boost freq: %d\n",
> @@ -2093,12 +2087,7 @@ static const char *swizzle_string(unsigned swizzle)
>  static int i915_swizzle_info(struct seq_file *m, void *data)
>  {
>  	struct drm_i915_private *dev_priv = node_to_i915(m->private);
> -	struct drm_device *dev = &dev_priv->drm;
> -	int ret;
>  
> -	ret = mutex_lock_interruptible(&dev->struct_mutex);
> -	if (ret)
> -		return ret;
>  	intel_runtime_pm_get(dev_priv);
>  
>  	seq_printf(m, "bit6 swizzle for X-tiling = %s\n",
> @@ -2138,7 +2127,6 @@ static int i915_swizzle_info(struct seq_file *m, void *data)
>  		seq_puts(m, "L-shaped memory detected\n");
>  
>  	intel_runtime_pm_put(dev_priv);
> -	mutex_unlock(&dev->struct_mutex);
>  
>  	return 0;
>  }
> @@ -4797,13 +4785,9 @@ i915_wedged_set(void *data, u64 val)
>  	if (i915_reset_in_progress(&dev_priv->gpu_error))
>  		return -EAGAIN;
>  
> -	intel_runtime_pm_get(dev_priv);
> -
>  	i915_handle_error(dev_priv, val,
>  			  "Manually setting wedged to %llu", val);
>  
> -	intel_runtime_pm_put(dev_priv);
> -
>  	return 0;
>  }
>  
> @@ -5038,22 +5022,16 @@ static int
>  i915_cache_sharing_get(void *data, u64 *val)
>  {
>  	struct drm_i915_private *dev_priv = data;
> -	struct drm_device *dev = &dev_priv->drm;
>  	u32 snpcr;
> -	int ret;
>  
>  	if (!(IS_GEN6(dev_priv) || IS_GEN7(dev_priv)))
>  		return -ENODEV;
>  
> -	ret = mutex_lock_interruptible(&dev->struct_mutex);
> -	if (ret)
> -		return ret;
>  	intel_runtime_pm_get(dev_priv);
>  
>  	snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
>  
>  	intel_runtime_pm_put(dev_priv);
> -	mutex_unlock(&dev->struct_mutex);
>  
>  	*val = (snpcr & GEN6_MBC_SNPCR_MASK) >> GEN6_MBC_SNPCR_SHIFT;
>  
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 89d322215c84..31eee32fcf6d 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -2313,24 +2313,6 @@ static int intel_runtime_suspend(struct device *kdev)
>  
>  	DRM_DEBUG_KMS("Suspending device\n");
>  
> -	/*
> -	 * We could deadlock here in case another thread holding struct_mutex
> -	 * calls RPM suspend concurrently, since the RPM suspend will wait
> -	 * first for this RPM suspend to finish. In this case the concurrent
> -	 * RPM resume will be followed by its RPM suspend counterpart. Still
> -	 * for consistency return -EAGAIN, which will reschedule this suspend.
> -	 */
> -	if (!mutex_trylock(&dev->struct_mutex)) {
> -		DRM_DEBUG_KMS("device lock contention, deffering suspend\n");
> -		/*
> -		 * Bump the expiration timestamp, otherwise the suspend won't
> -		 * be rescheduled.
> -		 */
> -		pm_runtime_mark_last_busy(kdev);
> -
> -		return -EAGAIN;
> -	}
> -
>  	disable_rpm_wakeref_asserts(dev_priv);
>  
>  	/*
> @@ -2338,7 +2320,6 @@ static int intel_runtime_suspend(struct device *kdev)
>  	 * an RPM reference.
>  	 */
>  	i915_gem_release_all_mmaps(dev_priv);
> -	mutex_unlock(&dev->struct_mutex);
>  
>  	intel_guc_suspend(dev);
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 91910ffe0964..587a91af5a3f 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1469,7 +1469,9 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
>  	 */
>  	if (!i915_gem_object_has_struct_page(obj) ||
>  	    cpu_write_needs_clflush(obj)) {
> +		intel_runtime_pm_get(dev_priv);
>  		ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file);
> +		intel_runtime_pm_put(dev_priv);

I'd move the rpm_get/put into gtt_pwrite_fast - there's only one caller,
and it's in the spirit of this patch of moving the rpm get/put closer to
where we really need it.

>  		/* Note that the gtt paths might fail with non-page-backed user
>  		 * pointers (e.g. gtt mappings when moving data between
>  		 * textures). Fallback to the shmem path in that case. */
> @@ -1850,6 +1852,7 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
>  	if (ret)
>  		goto err_unpin;
>  
> +	assert_rpm_wakelock_held(dev_priv);
>  	spin_lock(&dev_priv->mm.userfault_lock);
>  	list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
>  	spin_unlock(&dev_priv->mm.userfault_lock);
> @@ -1932,7 +1935,7 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
>  	 * requirement that operations to the GGTT be made holding the RPM
>  	 * wakeref.
>  	 */
> -	lockdep_assert_held(&obj->base.dev->struct_mutex);
> +	assert_rpm_wakelock_held(i915);
>  
>  	spin_lock(&i915->mm.userfault_lock);
>  	if (!list_empty(&obj->userfault_link)) {
> @@ -3469,7 +3472,6 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
>  int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
>  			       struct drm_file *file)
>  {
> -	struct drm_i915_private *dev_priv = to_i915(dev);
>  	struct drm_i915_gem_caching *args = data;
>  	struct drm_i915_gem_object *obj;
>  	enum i915_cache_level level;
> @@ -3498,11 +3500,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
>  		return -EINVAL;
>  	}
>  
> -	intel_runtime_pm_get(dev_priv);
> -
>  	ret = i915_mutex_lock_interruptible(dev);
>  	if (ret)
> -		goto rpm_put;
> +		return ret;
>  
>  	obj = i915_gem_object_lookup(file, args->handle);
>  	if (!obj) {
> @@ -3511,13 +3511,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
>  	}
>  
>  	ret = i915_gem_object_set_cache_level(obj, level);
> -
>  	i915_gem_object_put(obj);
>  unlock:
>  	mutex_unlock(&dev->struct_mutex);
> -rpm_put:
> -	intel_runtime_pm_put(dev_priv);
> -
>  	return ret;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 2d846aa39ca5..c0a4ba478309 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -2596,6 +2596,7 @@ static int ggtt_bind_vma(struct i915_vma *vma,
>  			 enum i915_cache_level cache_level,
>  			 u32 flags)
>  {
> +	struct drm_i915_private *i915 = to_i915(vma->vm->dev);
>  	struct drm_i915_gem_object *obj = vma->obj;
>  	u32 pte_flags = 0;
>  	int ret;
> @@ -2608,8 +2609,10 @@ static int ggtt_bind_vma(struct i915_vma *vma,
>  	if (obj->gt_ro)
>  		pte_flags |= PTE_READ_ONLY;
>  
> +	intel_runtime_pm_get(i915);
>  	vma->vm->insert_entries(vma->vm, vma->pages, vma->node.start,
>  				cache_level, pte_flags);
> +	intel_runtime_pm_put(i915);
>  
>  	/*
>  	 * Without aliasing PPGTT there's no difference between
> @@ -2625,6 +2628,7 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
>  				 enum i915_cache_level cache_level,
>  				 u32 flags)
>  {
> +	struct drm_i915_private *i915 = to_i915(vma->vm->dev);
>  	u32 pte_flags;
>  	int ret;
>  
> @@ -2639,14 +2643,15 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
>  
>  
>  	if (flags & I915_VMA_GLOBAL_BIND) {
> +		intel_runtime_pm_get(i915);
>  		vma->vm->insert_entries(vma->vm,
>  					vma->pages, vma->node.start,
>  					cache_level, pte_flags);
> +		intel_runtime_pm_put(i915);
>  	}
>  
>  	if (flags & I915_VMA_LOCAL_BIND) {
> -		struct i915_hw_ppgtt *appgtt =
> -			to_i915(vma->vm->dev)->mm.aliasing_ppgtt;
> +		struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt;
>  		appgtt->base.insert_entries(&appgtt->base,
>  					    vma->pages, vma->node.start,
>  					    cache_level, pte_flags);
> @@ -2657,13 +2662,17 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
>  
>  static void ggtt_unbind_vma(struct i915_vma *vma)
>  {
> -	struct i915_hw_ppgtt *appgtt = to_i915(vma->vm->dev)->mm.aliasing_ppgtt;
> +	struct drm_i915_private *i915 = to_i915(vma->vm->dev);
> +	struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt;
>  	const u64 size = min(vma->size, vma->node.size);
>  
> -	if (vma->flags & I915_VMA_GLOBAL_BIND)
> +	if (vma->flags & I915_VMA_GLOBAL_BIND) {
> +		intel_runtime_pm_get(i915);
>  		vma->vm->clear_range(vma->vm,
>  				     vma->node.start, size,
>  				     true);
> +		intel_runtime_pm_put(i915);
> +	}
>  
>  	if (vma->flags & I915_VMA_LOCAL_BIND && appgtt)
>  		appgtt->base.clear_range(&appgtt->base,
> diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
> index a14b1e3d4c78..08f796a4f5f6 100644
> --- a/drivers/gpu/drm/i915/i915_gem_tiling.c
> +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
> @@ -204,8 +204,6 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
>  		return -EINVAL;
>  	}
>  
> -	intel_runtime_pm_get(dev_priv);
> -
>  	mutex_lock(&dev->struct_mutex);
>  	if (obj->pin_display || obj->framebuffer_references) {
>  		err = -EBUSY;
> @@ -301,8 +299,6 @@ err:
>  	i915_gem_object_put(obj);
>  	mutex_unlock(&dev->struct_mutex);
>  
> -	intel_runtime_pm_put(dev_priv);
> -
>  	return err;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index e2b188dcf908..e0c3bd941e38 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -1364,7 +1364,7 @@ int i915_reg_read_ioctl(struct drm_device *dev,
>  	struct register_whitelist const *entry = whitelist;
>  	unsigned size;
>  	i915_reg_t offset_ldw, offset_udw;
> -	int i, ret = 0;
> +	int i, ret;
>  
>  	for (i = 0; i < ARRAY_SIZE(whitelist); i++, entry++) {
>  		if (i915_mmio_reg_offset(entry->offset_ldw) == (reg->offset & -entry->size) &&
> @@ -1386,6 +1386,7 @@ int i915_reg_read_ioctl(struct drm_device *dev,
>  
>  	intel_runtime_pm_get(dev_priv);
>  
> +	ret = 0;
>  	switch (size) {
>  	case 8 | 1:
>  		reg->val = I915_READ64_2x32(offset_ldw, offset_udw);
> @@ -1404,10 +1405,9 @@ int i915_reg_read_ioctl(struct drm_device *dev,
>  		break;
>  	default:
>  		ret = -EINVAL;
> -		goto out;
> +		break;
>  	}
>  
> -out:
>  	intel_runtime_pm_put(dev_priv);
>  	return ret;

Spurious hunks in i915_reg_read_ioctl, please remove from the patch. With
the userfault_list/lock I'm happy with this now. With the 2 nits
addressed.

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

>  }
> -- 
> 2.9.3
>
Chris Wilson Oct. 13, 2016, 3:15 p.m. UTC | #2
On Thu, Oct 13, 2016 at 04:44:23PM +0200, Daniel Vetter wrote:
> On Tue, Oct 11, 2016 at 03:37:58PM +0100, Chris Wilson wrote:
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index 91910ffe0964..587a91af5a3f 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -1469,7 +1469,9 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
> >  	 */
> >  	if (!i915_gem_object_has_struct_page(obj) ||
> >  	    cpu_write_needs_clflush(obj)) {
> > +		intel_runtime_pm_get(dev_priv);
> >  		ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file);
> > +		intel_runtime_pm_put(dev_priv);
> 
> I'd move the rpm_get/put into gtt_pwrite_fast - there's only one caller,
> and it's in the spirit of this patch of moving the rpm get/put closer to
> where we really need it.

What I've also done is move rpm_get/rpm_put into i915_gem_release_mmap()
and split out the RPM-suspend only i915_gem_release_all_mmaps() (if I
can think of a good name to better distinguish the two I'll do that as
well). The benefit being is that instead of simply asserting in one that
we hold the rpm-wakeref we take it.

i915_gem_runtime_suspend__mmaps() ?
-Chris
Daniel Vetter Oct. 13, 2016, 3:25 p.m. UTC | #3
On Thu, Oct 13, 2016 at 04:15:23PM +0100, Chris Wilson wrote:
> On Thu, Oct 13, 2016 at 04:44:23PM +0200, Daniel Vetter wrote:
> > On Tue, Oct 11, 2016 at 03:37:58PM +0100, Chris Wilson wrote:
> > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > > index 91910ffe0964..587a91af5a3f 100644
> > > --- a/drivers/gpu/drm/i915/i915_gem.c
> > > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > > @@ -1469,7 +1469,9 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
> > >  	 */
> > >  	if (!i915_gem_object_has_struct_page(obj) ||
> > >  	    cpu_write_needs_clflush(obj)) {
> > > +		intel_runtime_pm_get(dev_priv);
> > >  		ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file);
> > > +		intel_runtime_pm_put(dev_priv);
> > 
> > I'd move the rpm_get/put into gtt_pwrite_fast - there's only one caller,
> > and it's in the spirit of this patch of moving the rpm get/put closer to
> > where we really need it.
> 
> What I've also done is move rpm_get/rpm_put into i915_gem_release_mmap()
> and split out the RPM-suspend only i915_gem_release_all_mmaps() (if I
> can think of a good name to better distinguish the two I'll do that as
> well). The benefit being is that instead of simply asserting in one that
> we hold the rpm-wakeref we take it.
> 
> i915_gem_runtime_suspend__mmaps() ?

Or maybe throw a ggtt in there for giggles, but yeah.
-Daniel
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index d4ecc5283c2a..d4779abd89e3 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1400,14 +1400,9 @@  static int i915_hangcheck_info(struct seq_file *m, void *unused)
 static int ironlake_drpc_info(struct seq_file *m)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	struct drm_device *dev = &dev_priv->drm;
 	u32 rgvmodectl, rstdbyctl;
 	u16 crstandvid;
-	int ret;
 
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
-	if (ret)
-		return ret;
 	intel_runtime_pm_get(dev_priv);
 
 	rgvmodectl = I915_READ(MEMMODECTL);
@@ -1415,7 +1410,6 @@  static int ironlake_drpc_info(struct seq_file *m)
 	crstandvid = I915_READ16(CRSTANDVID);
 
 	intel_runtime_pm_put(dev_priv);
-	mutex_unlock(&dev->struct_mutex);
 
 	seq_printf(m, "HD boost: %s\n", yesno(rgvmodectl & MEMMODE_BOOST_EN));
 	seq_printf(m, "Boost freq: %d\n",
@@ -2093,12 +2087,7 @@  static const char *swizzle_string(unsigned swizzle)
 static int i915_swizzle_info(struct seq_file *m, void *data)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	struct drm_device *dev = &dev_priv->drm;
-	int ret;
 
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
-	if (ret)
-		return ret;
 	intel_runtime_pm_get(dev_priv);
 
 	seq_printf(m, "bit6 swizzle for X-tiling = %s\n",
@@ -2138,7 +2127,6 @@  static int i915_swizzle_info(struct seq_file *m, void *data)
 		seq_puts(m, "L-shaped memory detected\n");
 
 	intel_runtime_pm_put(dev_priv);
-	mutex_unlock(&dev->struct_mutex);
 
 	return 0;
 }
@@ -4797,13 +4785,9 @@  i915_wedged_set(void *data, u64 val)
 	if (i915_reset_in_progress(&dev_priv->gpu_error))
 		return -EAGAIN;
 
-	intel_runtime_pm_get(dev_priv);
-
 	i915_handle_error(dev_priv, val,
 			  "Manually setting wedged to %llu", val);
 
-	intel_runtime_pm_put(dev_priv);
-
 	return 0;
 }
 
@@ -5038,22 +5022,16 @@  static int
 i915_cache_sharing_get(void *data, u64 *val)
 {
 	struct drm_i915_private *dev_priv = data;
-	struct drm_device *dev = &dev_priv->drm;
 	u32 snpcr;
-	int ret;
 
 	if (!(IS_GEN6(dev_priv) || IS_GEN7(dev_priv)))
 		return -ENODEV;
 
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
-	if (ret)
-		return ret;
 	intel_runtime_pm_get(dev_priv);
 
 	snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
 
 	intel_runtime_pm_put(dev_priv);
-	mutex_unlock(&dev->struct_mutex);
 
 	*val = (snpcr & GEN6_MBC_SNPCR_MASK) >> GEN6_MBC_SNPCR_SHIFT;
 
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 89d322215c84..31eee32fcf6d 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -2313,24 +2313,6 @@  static int intel_runtime_suspend(struct device *kdev)
 
 	DRM_DEBUG_KMS("Suspending device\n");
 
-	/*
-	 * We could deadlock here in case another thread holding struct_mutex
-	 * calls RPM suspend concurrently, since the RPM suspend will wait
-	 * first for this RPM suspend to finish. In this case the concurrent
-	 * RPM resume will be followed by its RPM suspend counterpart. Still
-	 * for consistency return -EAGAIN, which will reschedule this suspend.
-	 */
-	if (!mutex_trylock(&dev->struct_mutex)) {
-		DRM_DEBUG_KMS("device lock contention, deffering suspend\n");
-		/*
-		 * Bump the expiration timestamp, otherwise the suspend won't
-		 * be rescheduled.
-		 */
-		pm_runtime_mark_last_busy(kdev);
-
-		return -EAGAIN;
-	}
-
 	disable_rpm_wakeref_asserts(dev_priv);
 
 	/*
@@ -2338,7 +2320,6 @@  static int intel_runtime_suspend(struct device *kdev)
 	 * an RPM reference.
 	 */
 	i915_gem_release_all_mmaps(dev_priv);
-	mutex_unlock(&dev->struct_mutex);
 
 	intel_guc_suspend(dev);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 91910ffe0964..587a91af5a3f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1469,7 +1469,9 @@  i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 	 */
 	if (!i915_gem_object_has_struct_page(obj) ||
 	    cpu_write_needs_clflush(obj)) {
+		intel_runtime_pm_get(dev_priv);
 		ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file);
+		intel_runtime_pm_put(dev_priv);
 		/* Note that the gtt paths might fail with non-page-backed user
 		 * pointers (e.g. gtt mappings when moving data between
 		 * textures). Fallback to the shmem path in that case. */
@@ -1850,6 +1852,7 @@  int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
 	if (ret)
 		goto err_unpin;
 
+	assert_rpm_wakelock_held(dev_priv);
 	spin_lock(&dev_priv->mm.userfault_lock);
 	list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
 	spin_unlock(&dev_priv->mm.userfault_lock);
@@ -1932,7 +1935,7 @@  i915_gem_release_mmap(struct drm_i915_gem_object *obj)
 	 * requirement that operations to the GGTT be made holding the RPM
 	 * wakeref.
 	 */
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
+	assert_rpm_wakelock_held(i915);
 
 	spin_lock(&i915->mm.userfault_lock);
 	if (!list_empty(&obj->userfault_link)) {
@@ -3469,7 +3472,6 @@  int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
 			       struct drm_file *file)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_i915_gem_caching *args = data;
 	struct drm_i915_gem_object *obj;
 	enum i915_cache_level level;
@@ -3498,11 +3500,9 @@  int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
-	intel_runtime_pm_get(dev_priv);
-
 	ret = i915_mutex_lock_interruptible(dev);
 	if (ret)
-		goto rpm_put;
+		return ret;
 
 	obj = i915_gem_object_lookup(file, args->handle);
 	if (!obj) {
@@ -3511,13 +3511,9 @@  int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
 	}
 
 	ret = i915_gem_object_set_cache_level(obj, level);
-
 	i915_gem_object_put(obj);
 unlock:
 	mutex_unlock(&dev->struct_mutex);
-rpm_put:
-	intel_runtime_pm_put(dev_priv);
-
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 2d846aa39ca5..c0a4ba478309 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2596,6 +2596,7 @@  static int ggtt_bind_vma(struct i915_vma *vma,
 			 enum i915_cache_level cache_level,
 			 u32 flags)
 {
+	struct drm_i915_private *i915 = to_i915(vma->vm->dev);
 	struct drm_i915_gem_object *obj = vma->obj;
 	u32 pte_flags = 0;
 	int ret;
@@ -2608,8 +2609,10 @@  static int ggtt_bind_vma(struct i915_vma *vma,
 	if (obj->gt_ro)
 		pte_flags |= PTE_READ_ONLY;
 
+	intel_runtime_pm_get(i915);
 	vma->vm->insert_entries(vma->vm, vma->pages, vma->node.start,
 				cache_level, pte_flags);
+	intel_runtime_pm_put(i915);
 
 	/*
 	 * Without aliasing PPGTT there's no difference between
@@ -2625,6 +2628,7 @@  static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 				 enum i915_cache_level cache_level,
 				 u32 flags)
 {
+	struct drm_i915_private *i915 = to_i915(vma->vm->dev);
 	u32 pte_flags;
 	int ret;
 
@@ -2639,14 +2643,15 @@  static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 
 
 	if (flags & I915_VMA_GLOBAL_BIND) {
+		intel_runtime_pm_get(i915);
 		vma->vm->insert_entries(vma->vm,
 					vma->pages, vma->node.start,
 					cache_level, pte_flags);
+		intel_runtime_pm_put(i915);
 	}
 
 	if (flags & I915_VMA_LOCAL_BIND) {
-		struct i915_hw_ppgtt *appgtt =
-			to_i915(vma->vm->dev)->mm.aliasing_ppgtt;
+		struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt;
 		appgtt->base.insert_entries(&appgtt->base,
 					    vma->pages, vma->node.start,
 					    cache_level, pte_flags);
@@ -2657,13 +2662,17 @@  static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 
 static void ggtt_unbind_vma(struct i915_vma *vma)
 {
-	struct i915_hw_ppgtt *appgtt = to_i915(vma->vm->dev)->mm.aliasing_ppgtt;
+	struct drm_i915_private *i915 = to_i915(vma->vm->dev);
+	struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt;
 	const u64 size = min(vma->size, vma->node.size);
 
-	if (vma->flags & I915_VMA_GLOBAL_BIND)
+	if (vma->flags & I915_VMA_GLOBAL_BIND) {
+		intel_runtime_pm_get(i915);
 		vma->vm->clear_range(vma->vm,
 				     vma->node.start, size,
 				     true);
+		intel_runtime_pm_put(i915);
+	}
 
 	if (vma->flags & I915_VMA_LOCAL_BIND && appgtt)
 		appgtt->base.clear_range(&appgtt->base,
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index a14b1e3d4c78..08f796a4f5f6 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -204,8 +204,6 @@  i915_gem_set_tiling(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
-	intel_runtime_pm_get(dev_priv);
-
 	mutex_lock(&dev->struct_mutex);
 	if (obj->pin_display || obj->framebuffer_references) {
 		err = -EBUSY;
@@ -301,8 +299,6 @@  err:
 	i915_gem_object_put(obj);
 	mutex_unlock(&dev->struct_mutex);
 
-	intel_runtime_pm_put(dev_priv);
-
 	return err;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index e2b188dcf908..e0c3bd941e38 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -1364,7 +1364,7 @@  int i915_reg_read_ioctl(struct drm_device *dev,
 	struct register_whitelist const *entry = whitelist;
 	unsigned size;
 	i915_reg_t offset_ldw, offset_udw;
-	int i, ret = 0;
+	int i, ret;
 
 	for (i = 0; i < ARRAY_SIZE(whitelist); i++, entry++) {
 		if (i915_mmio_reg_offset(entry->offset_ldw) == (reg->offset & -entry->size) &&
@@ -1386,6 +1386,7 @@  int i915_reg_read_ioctl(struct drm_device *dev,
 
 	intel_runtime_pm_get(dev_priv);
 
+	ret = 0;
 	switch (size) {
 	case 8 | 1:
 		reg->val = I915_READ64_2x32(offset_ldw, offset_udw);
@@ -1404,10 +1405,9 @@  int i915_reg_read_ioctl(struct drm_device *dev,
 		break;
 	default:
 		ret = -EINVAL;
-		goto out;
+		break;
 	}
 
-out:
 	intel_runtime_pm_put(dev_priv);
 	return ret;
 }