diff mbox

[v12,2/4] iommu/arm-smmu: Invoke pm_runtime during probe, add/remove device

Message ID 20180708173413.1965-3-vivek.gautam@codeaurora.org (mailing list archive)
State Not Applicable, archived
Headers show

Commit Message

Vivek Gautam July 8, 2018, 5:34 p.m. UTC
From: Sricharan R <sricharan@codeaurora.org>

The smmu device probe/remove and add/remove master device callbacks
gets called when the smmu is not linked to its master, that is without
the context of the master device. So calling runtime apis in those places
separately.

Signed-off-by: Sricharan R <sricharan@codeaurora.org>
[vivek: Cleanup pm runtime calls]
Signed-off-by: Vivek Gautam <vivek.gautam@codeaurora.org>
Reviewed-by: Tomasz Figa <tfiga@chromium.org>
---

 - Change since v11
   * Replaced pm_runtime_disable() with pm_runtime_force_suspend()
     to avoid warning about " Unpreparing enabled clock".
     Full warning text mentioned in cover patch.

 drivers/iommu/arm-smmu.c | 92 +++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 84 insertions(+), 8 deletions(-)

Comments

Rafael J. Wysocki July 11, 2018, 9:51 a.m. UTC | #1
On Sunday, July 8, 2018 7:34:11 PM CEST Vivek Gautam wrote:
> From: Sricharan R <sricharan@codeaurora.org>
> 
> The smmu device probe/remove and add/remove master device callbacks
> gets called when the smmu is not linked to its master, that is without
> the context of the master device. So calling runtime apis in those places
> separately.
> 
> Signed-off-by: Sricharan R <sricharan@codeaurora.org>
> [vivek: Cleanup pm runtime calls]
> Signed-off-by: Vivek Gautam <vivek.gautam@codeaurora.org>
> Reviewed-by: Tomasz Figa <tfiga@chromium.org>
> ---
> 
>  - Change since v11
>    * Replaced pm_runtime_disable() with pm_runtime_force_suspend()
>      to avoid warning about " Unpreparing enabled clock".
>      Full warning text mentioned in cover patch.
> 
>  drivers/iommu/arm-smmu.c | 92 +++++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 84 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
> index a01d0dde21dd..09265e206e2d 100644
> --- a/drivers/iommu/arm-smmu.c
> +++ b/drivers/iommu/arm-smmu.c
> @@ -268,6 +268,20 @@ static struct arm_smmu_option_prop arm_smmu_options[] = {
>  	{ 0, NULL},
>  };
>  
> +static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
> +{
> +	if (pm_runtime_enabled(smmu->dev))

Why do you need the pm_runtime_enabled() checks here and below?

pm_runtime_get_sync() and pm_runtime_put() should work just fine if
runtime PM is not enabled.

> +		return pm_runtime_get_sync(smmu->dev);
> +
> +	return 0;
> +}
> +
> +static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
> +{
> +	if (pm_runtime_enabled(smmu->dev))
> +		pm_runtime_put(smmu->dev);
> +}
> +
>  static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
>  {
>  	return container_of(dom, struct arm_smmu_domain, domain);
> @@ -913,11 +927,15 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
>  	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
>  	struct arm_smmu_device *smmu = smmu_domain->smmu;
>  	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
> -	int irq;
> +	int ret, irq;
>  
>  	if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
>  		return;
>  
> +	ret = arm_smmu_rpm_get(smmu);
> +	if (ret < 0)
> +		return;
> +
>  	/*
>  	 * Disable the context bank and free the page tables before freeing
>  	 * it.
> @@ -932,6 +950,8 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
>  
>  	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
>  	__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
> +
> +	arm_smmu_rpm_put(smmu);
>  }
>  
>  static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
> @@ -1213,10 +1233,15 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
>  		return -ENODEV;
>  
>  	smmu = fwspec_smmu(fwspec);
> +
> +	ret = arm_smmu_rpm_get(smmu);
> +	if (ret < 0)
> +		return ret;
> +
>  	/* Ensure that the domain is finalised */
>  	ret = arm_smmu_init_domain_context(domain, smmu);
>  	if (ret < 0)
> -		return ret;
> +		goto rpm_put;
>  
>  	/*
>  	 * Sanity check the domain. We don't support domains across
> @@ -1226,33 +1251,50 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
>  		dev_err(dev,
>  			"cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
>  			dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
> -		return -EINVAL;
> +		ret = -EINVAL;
> +		goto rpm_put;
>  	}
>  
>  	/* Looks ok, so add the device to the domain */
> -	return arm_smmu_domain_add_master(smmu_domain, fwspec);
> +	ret = arm_smmu_domain_add_master(smmu_domain, fwspec);
> +
> +rpm_put:
> +	arm_smmu_rpm_put(smmu);
> +	return ret;
>  }
>  
>  static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
>  			phys_addr_t paddr, size_t size, int prot)
>  {
>  	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
> +	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
> +	int ret;
>  
>  	if (!ops)
>  		return -ENODEV;
>  
> -	return ops->map(ops, iova, paddr, size, prot);
> +	arm_smmu_rpm_get(smmu);
> +	ret = ops->map(ops, iova, paddr, size, prot);
> +	arm_smmu_rpm_put(smmu);
> +
> +	return ret;
>  }
>  
>  static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
>  			     size_t size)
>  {
>  	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
> +	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
> +	size_t ret;
>  
>  	if (!ops)
>  		return 0;
>  
> -	return ops->unmap(ops, iova, size);
> +	arm_smmu_rpm_get(smmu);
> +	ret = ops->unmap(ops, iova, size);
> +	arm_smmu_rpm_put(smmu);
> +
> +	return ret;
>  }
>  
>  static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
> @@ -1407,7 +1449,13 @@ static int arm_smmu_add_device(struct device *dev)
>  	while (i--)
>  		cfg->smendx[i] = INVALID_SMENDX;
>  
> +	ret = arm_smmu_rpm_get(smmu);
> +	if (ret < 0)
> +		goto out_cfg_free;
> +
>  	ret = arm_smmu_master_alloc_smes(dev);
> +	arm_smmu_rpm_put(smmu);
> +
>  	if (ret)
>  		goto out_cfg_free;
>  
> @@ -1427,7 +1475,7 @@ static void arm_smmu_remove_device(struct device *dev)
>  	struct iommu_fwspec *fwspec = dev->iommu_fwspec;
>  	struct arm_smmu_master_cfg *cfg;
>  	struct arm_smmu_device *smmu;
> -
> +	int ret;
>  
>  	if (!fwspec || fwspec->ops != &arm_smmu_ops)
>  		return;
> @@ -1435,8 +1483,15 @@ static void arm_smmu_remove_device(struct device *dev)
>  	cfg  = fwspec->iommu_priv;
>  	smmu = cfg->smmu;
>  
> +	ret = arm_smmu_rpm_get(smmu);
> +	if (ret < 0)
> +		return;
> +
>  	iommu_device_unlink(&smmu->iommu, dev);
>  	arm_smmu_master_free_smes(fwspec);
> +
> +	arm_smmu_rpm_put(smmu);
> +
>  	iommu_group_remove_device(dev);
>  	kfree(fwspec->iommu_priv);
>  	iommu_fwspec_free(dev);
> @@ -2124,6 +2179,8 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
>  		smmu->irqs[i] = irq;
>  	}
>  
> +	platform_set_drvdata(pdev, smmu);
> +
>  	err = devm_clk_bulk_get(smmu->dev, smmu->num_clks, smmu->clks);
>  	if (err)
>  		return err;
> @@ -2132,6 +2189,19 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
>  	if (err)
>  		return err;
>  
> +	/*
> +	 * We want to avoid touching dev->power.lock in fastpaths unless
> +	 * it's really going to do something useful - pm_runtime_enabled()
> +	 * can serve as an ideal proxy for that decision. So, conditionally
> +	 * enable pm_runtime.
> +	 */
> +	if (dev->pm_domain)
> +		pm_runtime_enable(dev);
> +
> +	err = arm_smmu_rpm_get(smmu);
> +	if (err < 0)
> +		return err;
> +
>  	err = arm_smmu_device_cfg_probe(smmu);
>  	if (err)
>  		return err;
> @@ -2173,10 +2243,11 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
>  		return err;
>  	}
>  
> -	platform_set_drvdata(pdev, smmu);
>  	arm_smmu_device_reset(smmu);
>  	arm_smmu_test_smr_masks(smmu);
>  
> +	arm_smmu_rpm_put(smmu);
> +
>  	/*
>  	 * For ACPI and generic DT bindings, an SMMU will be probed before
>  	 * any device which might need it, so we want the bus ops in place
> @@ -2212,8 +2283,13 @@ static int arm_smmu_device_remove(struct platform_device *pdev)
>  	if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
>  		dev_err(&pdev->dev, "removing device with active domains!\n");
>  
> +	arm_smmu_rpm_get(smmu);
>  	/* Turn the thing off */
>  	writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
> +	arm_smmu_rpm_put(smmu);
> +
> +	if (pm_runtime_enabled(smmu->dev))
> +		pm_runtime_force_suspend(smmu->dev);
>  
>  	clk_bulk_unprepare(smmu->num_clks, smmu->clks);
>  
>
Tomasz Figa July 11, 2018, 10:05 a.m. UTC | #2
Hi Rafael,

Thanks for review.

On Wed, Jul 11, 2018 at 6:53 PM Rafael J. Wysocki <rjw@rjwysocki.net> wrote:
>
> On Sunday, July 8, 2018 7:34:11 PM CEST Vivek Gautam wrote:
> > From: Sricharan R <sricharan@codeaurora.org>
> >
> > The smmu device probe/remove and add/remove master device callbacks
> > gets called when the smmu is not linked to its master, that is without
> > the context of the master device. So calling runtime apis in those places
> > separately.
> >
> > Signed-off-by: Sricharan R <sricharan@codeaurora.org>
> > [vivek: Cleanup pm runtime calls]
> > Signed-off-by: Vivek Gautam <vivek.gautam@codeaurora.org>
> > Reviewed-by: Tomasz Figa <tfiga@chromium.org>
> > ---
> >
> >  - Change since v11
> >    * Replaced pm_runtime_disable() with pm_runtime_force_suspend()
> >      to avoid warning about " Unpreparing enabled clock".
> >      Full warning text mentioned in cover patch.
> >
> >  drivers/iommu/arm-smmu.c | 92 +++++++++++++++++++++++++++++++++++++++++++-----
> >  1 file changed, 84 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
> > index a01d0dde21dd..09265e206e2d 100644
> > --- a/drivers/iommu/arm-smmu.c
> > +++ b/drivers/iommu/arm-smmu.c
> > @@ -268,6 +268,20 @@ static struct arm_smmu_option_prop arm_smmu_options[] = {
> >       { 0, NULL},
> >  };
> >
> > +static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
> > +{
> > +     if (pm_runtime_enabled(smmu->dev))
>
> Why do you need the pm_runtime_enabled() checks here and below?
>
> pm_runtime_get_sync() and pm_runtime_put() should work just fine if
> runtime PM is not enabled.

Because pm_runtime_get_sync() acquires a spin lock, even if only for
the short time of checking if runtime PM is enabled and SMMU driver
maintainers didn't want any spin locks in certain IOMMU API code paths
on hardware implementations that don't need runtime PM, while we still
need to be able to control runtime PM there on hardware
implementations that need so.

Best regards,
Tomasz
Rafael J. Wysocki July 11, 2018, 10:59 a.m. UTC | #3
On Wed, Jul 11, 2018 at 12:05 PM, Tomasz Figa <tfiga@chromium.org> wrote:
> Hi Rafael,
>
> Thanks for review.
>
> On Wed, Jul 11, 2018 at 6:53 PM Rafael J. Wysocki <rjw@rjwysocki.net> wrote:
>>
>> On Sunday, July 8, 2018 7:34:11 PM CEST Vivek Gautam wrote:
>> > From: Sricharan R <sricharan@codeaurora.org>
>> >
>> > The smmu device probe/remove and add/remove master device callbacks
>> > gets called when the smmu is not linked to its master, that is without
>> > the context of the master device. So calling runtime apis in those places
>> > separately.
>> >
>> > Signed-off-by: Sricharan R <sricharan@codeaurora.org>
>> > [vivek: Cleanup pm runtime calls]
>> > Signed-off-by: Vivek Gautam <vivek.gautam@codeaurora.org>
>> > Reviewed-by: Tomasz Figa <tfiga@chromium.org>
>> > ---
>> >
>> >  - Change since v11
>> >    * Replaced pm_runtime_disable() with pm_runtime_force_suspend()
>> >      to avoid warning about " Unpreparing enabled clock".
>> >      Full warning text mentioned in cover patch.
>> >
>> >  drivers/iommu/arm-smmu.c | 92 +++++++++++++++++++++++++++++++++++++++++++-----
>> >  1 file changed, 84 insertions(+), 8 deletions(-)
>> >
>> > diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
>> > index a01d0dde21dd..09265e206e2d 100644
>> > --- a/drivers/iommu/arm-smmu.c
>> > +++ b/drivers/iommu/arm-smmu.c
>> > @@ -268,6 +268,20 @@ static struct arm_smmu_option_prop arm_smmu_options[] = {
>> >       { 0, NULL},
>> >  };
>> >
>> > +static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
>> > +{
>> > +     if (pm_runtime_enabled(smmu->dev))
>>
>> Why do you need the pm_runtime_enabled() checks here and below?
>>
>> pm_runtime_get_sync() and pm_runtime_put() should work just fine if
>> runtime PM is not enabled.
>
> Because pm_runtime_get_sync() acquires a spin lock, even if only for
> the short time of checking if runtime PM is enabled and SMMU driver
> maintainers didn't want any spin locks in certain IOMMU API code paths
> on hardware implementations that don't need runtime PM, while we still
> need to be able to control runtime PM there on hardware
> implementations that need so.

OK, so it is an optimization.  It would be good to put a comment in
there to that effect.
Vivek Gautam July 11, 2018, 11:30 a.m. UTC | #4
On 7/11/2018 4:29 PM, Rafael J. Wysocki wrote:
> On Wed, Jul 11, 2018 at 12:05 PM, Tomasz Figa <tfiga@chromium.org> wrote:
>> Hi Rafael,
>>
>> Thanks for review.
>>
>> On Wed, Jul 11, 2018 at 6:53 PM Rafael J. Wysocki <rjw@rjwysocki.net> wrote:
>>> On Sunday, July 8, 2018 7:34:11 PM CEST Vivek Gautam wrote:
>>>> From: Sricharan R <sricharan@codeaurora.org>
>>>>
>>>> The smmu device probe/remove and add/remove master device callbacks
>>>> gets called when the smmu is not linked to its master, that is without
>>>> the context of the master device. So calling runtime apis in those places
>>>> separately.
>>>>
>>>> Signed-off-by: Sricharan R <sricharan@codeaurora.org>
>>>> [vivek: Cleanup pm runtime calls]
>>>> Signed-off-by: Vivek Gautam <vivek.gautam@codeaurora.org>
>>>> Reviewed-by: Tomasz Figa <tfiga@chromium.org>
>>>> ---
>>>>
>>>>   - Change since v11
>>>>     * Replaced pm_runtime_disable() with pm_runtime_force_suspend()
>>>>       to avoid warning about " Unpreparing enabled clock".
>>>>       Full warning text mentioned in cover patch.
>>>>
>>>>   drivers/iommu/arm-smmu.c | 92 +++++++++++++++++++++++++++++++++++++++++++-----
>>>>   1 file changed, 84 insertions(+), 8 deletions(-)
>>>>
>>>> diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
>>>> index a01d0dde21dd..09265e206e2d 100644
>>>> --- a/drivers/iommu/arm-smmu.c
>>>> +++ b/drivers/iommu/arm-smmu.c
>>>> @@ -268,6 +268,20 @@ static struct arm_smmu_option_prop arm_smmu_options[] = {
>>>>        { 0, NULL},
>>>>   };
>>>>
>>>> +static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
>>>> +{
>>>> +     if (pm_runtime_enabled(smmu->dev))
>>> Why do you need the pm_runtime_enabled() checks here and below?
>>>
>>> pm_runtime_get_sync() and pm_runtime_put() should work just fine if
>>> runtime PM is not enabled.
>> Because pm_runtime_get_sync() acquires a spin lock, even if only for
>> the short time of checking if runtime PM is enabled and SMMU driver
>> maintainers didn't want any spin locks in certain IOMMU API code paths
>> on hardware implementations that don't need runtime PM, while we still
>> need to be able to control runtime PM there on hardware
>> implementations that need so.
> OK, so it is an optimization.  It would be good to put a comment in
> there to that effect.

Yea, actually there's a comment placed in arm_smmu_device_probe()
  where the runtime PM is conditionally enabled.
I can add comments for these wrappers too if you would like.

Thanks & Regards
Vivek
diff mbox

Patch

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index a01d0dde21dd..09265e206e2d 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -268,6 +268,20 @@  static struct arm_smmu_option_prop arm_smmu_options[] = {
 	{ 0, NULL},
 };
 
+static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
+{
+	if (pm_runtime_enabled(smmu->dev))
+		return pm_runtime_get_sync(smmu->dev);
+
+	return 0;
+}
+
+static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
+{
+	if (pm_runtime_enabled(smmu->dev))
+		pm_runtime_put(smmu->dev);
+}
+
 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
 {
 	return container_of(dom, struct arm_smmu_domain, domain);
@@ -913,11 +927,15 @@  static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
 	struct arm_smmu_device *smmu = smmu_domain->smmu;
 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
-	int irq;
+	int ret, irq;
 
 	if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
 		return;
 
+	ret = arm_smmu_rpm_get(smmu);
+	if (ret < 0)
+		return;
+
 	/*
 	 * Disable the context bank and free the page tables before freeing
 	 * it.
@@ -932,6 +950,8 @@  static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
 
 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
 	__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
+
+	arm_smmu_rpm_put(smmu);
 }
 
 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
@@ -1213,10 +1233,15 @@  static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 		return -ENODEV;
 
 	smmu = fwspec_smmu(fwspec);
+
+	ret = arm_smmu_rpm_get(smmu);
+	if (ret < 0)
+		return ret;
+
 	/* Ensure that the domain is finalised */
 	ret = arm_smmu_init_domain_context(domain, smmu);
 	if (ret < 0)
-		return ret;
+		goto rpm_put;
 
 	/*
 	 * Sanity check the domain. We don't support domains across
@@ -1226,33 +1251,50 @@  static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 		dev_err(dev,
 			"cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
 			dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
-		return -EINVAL;
+		ret = -EINVAL;
+		goto rpm_put;
 	}
 
 	/* Looks ok, so add the device to the domain */
-	return arm_smmu_domain_add_master(smmu_domain, fwspec);
+	ret = arm_smmu_domain_add_master(smmu_domain, fwspec);
+
+rpm_put:
+	arm_smmu_rpm_put(smmu);
+	return ret;
 }
 
 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
 			phys_addr_t paddr, size_t size, int prot)
 {
 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
+	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
+	int ret;
 
 	if (!ops)
 		return -ENODEV;
 
-	return ops->map(ops, iova, paddr, size, prot);
+	arm_smmu_rpm_get(smmu);
+	ret = ops->map(ops, iova, paddr, size, prot);
+	arm_smmu_rpm_put(smmu);
+
+	return ret;
 }
 
 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
 			     size_t size)
 {
 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
+	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
+	size_t ret;
 
 	if (!ops)
 		return 0;
 
-	return ops->unmap(ops, iova, size);
+	arm_smmu_rpm_get(smmu);
+	ret = ops->unmap(ops, iova, size);
+	arm_smmu_rpm_put(smmu);
+
+	return ret;
 }
 
 static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
@@ -1407,7 +1449,13 @@  static int arm_smmu_add_device(struct device *dev)
 	while (i--)
 		cfg->smendx[i] = INVALID_SMENDX;
 
+	ret = arm_smmu_rpm_get(smmu);
+	if (ret < 0)
+		goto out_cfg_free;
+
 	ret = arm_smmu_master_alloc_smes(dev);
+	arm_smmu_rpm_put(smmu);
+
 	if (ret)
 		goto out_cfg_free;
 
@@ -1427,7 +1475,7 @@  static void arm_smmu_remove_device(struct device *dev)
 	struct iommu_fwspec *fwspec = dev->iommu_fwspec;
 	struct arm_smmu_master_cfg *cfg;
 	struct arm_smmu_device *smmu;
-
+	int ret;
 
 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
 		return;
@@ -1435,8 +1483,15 @@  static void arm_smmu_remove_device(struct device *dev)
 	cfg  = fwspec->iommu_priv;
 	smmu = cfg->smmu;
 
+	ret = arm_smmu_rpm_get(smmu);
+	if (ret < 0)
+		return;
+
 	iommu_device_unlink(&smmu->iommu, dev);
 	arm_smmu_master_free_smes(fwspec);
+
+	arm_smmu_rpm_put(smmu);
+
 	iommu_group_remove_device(dev);
 	kfree(fwspec->iommu_priv);
 	iommu_fwspec_free(dev);
@@ -2124,6 +2179,8 @@  static int arm_smmu_device_probe(struct platform_device *pdev)
 		smmu->irqs[i] = irq;
 	}
 
+	platform_set_drvdata(pdev, smmu);
+
 	err = devm_clk_bulk_get(smmu->dev, smmu->num_clks, smmu->clks);
 	if (err)
 		return err;
@@ -2132,6 +2189,19 @@  static int arm_smmu_device_probe(struct platform_device *pdev)
 	if (err)
 		return err;
 
+	/*
+	 * We want to avoid touching dev->power.lock in fastpaths unless
+	 * it's really going to do something useful - pm_runtime_enabled()
+	 * can serve as an ideal proxy for that decision. So, conditionally
+	 * enable pm_runtime.
+	 */
+	if (dev->pm_domain)
+		pm_runtime_enable(dev);
+
+	err = arm_smmu_rpm_get(smmu);
+	if (err < 0)
+		return err;
+
 	err = arm_smmu_device_cfg_probe(smmu);
 	if (err)
 		return err;
@@ -2173,10 +2243,11 @@  static int arm_smmu_device_probe(struct platform_device *pdev)
 		return err;
 	}
 
-	platform_set_drvdata(pdev, smmu);
 	arm_smmu_device_reset(smmu);
 	arm_smmu_test_smr_masks(smmu);
 
+	arm_smmu_rpm_put(smmu);
+
 	/*
 	 * For ACPI and generic DT bindings, an SMMU will be probed before
 	 * any device which might need it, so we want the bus ops in place
@@ -2212,8 +2283,13 @@  static int arm_smmu_device_remove(struct platform_device *pdev)
 	if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
 		dev_err(&pdev->dev, "removing device with active domains!\n");
 
+	arm_smmu_rpm_get(smmu);
 	/* Turn the thing off */
 	writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
+	arm_smmu_rpm_put(smmu);
+
+	if (pm_runtime_enabled(smmu->dev))
+		pm_runtime_force_suspend(smmu->dev);
 
 	clk_bulk_unprepare(smmu->num_clks, smmu->clks);