diff mbox series

[RESEND,v17,2/5] iommu/arm-smmu: Invoke pm_runtime during probe, add/remove device

Message ID 20181116112430.31248-3-vivek.gautam@codeaurora.org (mailing list archive)
State Not Applicable, archived
Headers show
Series iommu/arm-smmu: Add runtime pm/sleep support | expand

Commit Message

Vivek Gautam Nov. 16, 2018, 11:24 a.m. UTC
From: Sricharan R <sricharan@codeaurora.org>

The smmu device probe/remove and add/remove master device callbacks
gets called when the smmu is not linked to its master, that is without
the context of the master device. So calling runtime apis in those places
separately.
Global locks are also initialized before enabling runtime pm as the
runtime_resume() calls device_reset() which does tlb_sync_global()
that ultimately requires locks to be initialized.

Signed-off-by: Sricharan R <sricharan@codeaurora.org>
[vivek: Cleanup pm runtime calls]
Signed-off-by: Vivek Gautam <vivek.gautam@codeaurora.org>
Reviewed-by: Tomasz Figa <tfiga@chromium.org>
Tested-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
---
 drivers/iommu/arm-smmu.c | 101 ++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 91 insertions(+), 10 deletions(-)

Comments

Will Deacon Nov. 21, 2018, 5:37 p.m. UTC | #1
On Fri, Nov 16, 2018 at 04:54:27PM +0530, Vivek Gautam wrote:
> From: Sricharan R <sricharan@codeaurora.org>
> 
> The smmu device probe/remove and add/remove master device callbacks
> gets called when the smmu is not linked to its master, that is without
> the context of the master device. So calling runtime apis in those places
> separately.
> Global locks are also initialized before enabling runtime pm as the
> runtime_resume() calls device_reset() which does tlb_sync_global()
> that ultimately requires locks to be initialized.
> 
> Signed-off-by: Sricharan R <sricharan@codeaurora.org>
> [vivek: Cleanup pm runtime calls]
> Signed-off-by: Vivek Gautam <vivek.gautam@codeaurora.org>
> Reviewed-by: Tomasz Figa <tfiga@chromium.org>
> Tested-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
> Reviewed-by: Robin Murphy <robin.murphy@arm.com>
> ---
>  drivers/iommu/arm-smmu.c | 101 ++++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 91 insertions(+), 10 deletions(-)

Given that you're doing the get/put in the TLBI ops unconditionally:

>  static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
>  {
>  	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> +	struct arm_smmu_device *smmu = smmu_domain->smmu;
>  
> -	if (smmu_domain->tlb_ops)
> +	if (smmu_domain->tlb_ops) {
> +		arm_smmu_rpm_get(smmu);
>  		smmu_domain->tlb_ops->tlb_flush_all(smmu_domain);
> +		arm_smmu_rpm_put(smmu);
> +	}
>  }
>  
>  static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
>  {
>  	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> +	struct arm_smmu_device *smmu = smmu_domain->smmu;
>  
> -	if (smmu_domain->tlb_ops)
> +	if (smmu_domain->tlb_ops) {
> +		arm_smmu_rpm_get(smmu);
>  		smmu_domain->tlb_ops->tlb_sync(smmu_domain);
> +		arm_smmu_rpm_put(smmu);
> +	}

Why do you need them around the map/unmap calls as well?

Will
Vivek Gautam Nov. 22, 2018, 12:02 p.m. UTC | #2
Hi Will,

On Wed, Nov 21, 2018 at 11:09 PM Will Deacon <will.deacon@arm.com> wrote:
>
> On Fri, Nov 16, 2018 at 04:54:27PM +0530, Vivek Gautam wrote:
> > From: Sricharan R <sricharan@codeaurora.org>
> >
> > The smmu device probe/remove and add/remove master device callbacks
> > gets called when the smmu is not linked to its master, that is without
> > the context of the master device. So calling runtime apis in those places
> > separately.
> > Global locks are also initialized before enabling runtime pm as the
> > runtime_resume() calls device_reset() which does tlb_sync_global()
> > that ultimately requires locks to be initialized.
> >
> > Signed-off-by: Sricharan R <sricharan@codeaurora.org>
> > [vivek: Cleanup pm runtime calls]
> > Signed-off-by: Vivek Gautam <vivek.gautam@codeaurora.org>
> > Reviewed-by: Tomasz Figa <tfiga@chromium.org>
> > Tested-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
> > Reviewed-by: Robin Murphy <robin.murphy@arm.com>
> > ---
> >  drivers/iommu/arm-smmu.c | 101 ++++++++++++++++++++++++++++++++++++++++++-----
> >  1 file changed, 91 insertions(+), 10 deletions(-)
>
> Given that you're doing the get/put in the TLBI ops unconditionally:
>
> >  static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
> >  {
> >       struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> > +     struct arm_smmu_device *smmu = smmu_domain->smmu;
> >
> > -     if (smmu_domain->tlb_ops)
> > +     if (smmu_domain->tlb_ops) {
> > +             arm_smmu_rpm_get(smmu);
> >               smmu_domain->tlb_ops->tlb_flush_all(smmu_domain);
> > +             arm_smmu_rpm_put(smmu);
> > +     }
> >  }
> >
> >  static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
> >  {
> >       struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> > +     struct arm_smmu_device *smmu = smmu_domain->smmu;
> >
> > -     if (smmu_domain->tlb_ops)
> > +     if (smmu_domain->tlb_ops) {
> > +             arm_smmu_rpm_get(smmu);
> >               smmu_domain->tlb_ops->tlb_sync(smmu_domain);
> > +             arm_smmu_rpm_put(smmu);
> > +     }
>
> Why do you need them around the map/unmap calls as well?

We still have .tlb_add_flush path?

Thanks
Vivek
>
> Will
> _______________________________________________
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu
Will Deacon Nov. 23, 2018, 6:36 p.m. UTC | #3
On Thu, Nov 22, 2018 at 05:32:24PM +0530, Vivek Gautam wrote:
> Hi Will,
> 
> On Wed, Nov 21, 2018 at 11:09 PM Will Deacon <will.deacon@arm.com> wrote:
> >
> > On Fri, Nov 16, 2018 at 04:54:27PM +0530, Vivek Gautam wrote:
> > > From: Sricharan R <sricharan@codeaurora.org>
> > >
> > > The smmu device probe/remove and add/remove master device callbacks
> > > gets called when the smmu is not linked to its master, that is without
> > > the context of the master device. So calling runtime apis in those places
> > > separately.
> > > Global locks are also initialized before enabling runtime pm as the
> > > runtime_resume() calls device_reset() which does tlb_sync_global()
> > > that ultimately requires locks to be initialized.
> > >
> > > Signed-off-by: Sricharan R <sricharan@codeaurora.org>
> > > [vivek: Cleanup pm runtime calls]
> > > Signed-off-by: Vivek Gautam <vivek.gautam@codeaurora.org>
> > > Reviewed-by: Tomasz Figa <tfiga@chromium.org>
> > > Tested-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
> > > Reviewed-by: Robin Murphy <robin.murphy@arm.com>
> > > ---
> > >  drivers/iommu/arm-smmu.c | 101 ++++++++++++++++++++++++++++++++++++++++++-----
> > >  1 file changed, 91 insertions(+), 10 deletions(-)
> >
> > Given that you're doing the get/put in the TLBI ops unconditionally:
> >
> > >  static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
> > >  {
> > >       struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> > > +     struct arm_smmu_device *smmu = smmu_domain->smmu;
> > >
> > > -     if (smmu_domain->tlb_ops)
> > > +     if (smmu_domain->tlb_ops) {
> > > +             arm_smmu_rpm_get(smmu);
> > >               smmu_domain->tlb_ops->tlb_flush_all(smmu_domain);
> > > +             arm_smmu_rpm_put(smmu);
> > > +     }
> > >  }
> > >
> > >  static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
> > >  {
> > >       struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> > > +     struct arm_smmu_device *smmu = smmu_domain->smmu;
> > >
> > > -     if (smmu_domain->tlb_ops)
> > > +     if (smmu_domain->tlb_ops) {
> > > +             arm_smmu_rpm_get(smmu);
> > >               smmu_domain->tlb_ops->tlb_sync(smmu_domain);
> > > +             arm_smmu_rpm_put(smmu);
> > > +     }
> >
> > Why do you need them around the map/unmap calls as well?
> 
> We still have .tlb_add_flush path?

Ok, so we could add the ops around that as well. Right now, we've got
the runtime pm hooks crossing two parts of the API.

Will
Vivek Gautam Nov. 26, 2018, 6:03 a.m. UTC | #4
On 11/24/2018 12:06 AM, Will Deacon wrote:
> On Thu, Nov 22, 2018 at 05:32:24PM +0530, Vivek Gautam wrote:
>> Hi Will,
>>
>> On Wed, Nov 21, 2018 at 11:09 PM Will Deacon <will.deacon@arm.com> wrote:
>>> On Fri, Nov 16, 2018 at 04:54:27PM +0530, Vivek Gautam wrote:
>>>> From: Sricharan R <sricharan@codeaurora.org>
>>>>
>>>> The smmu device probe/remove and add/remove master device callbacks
>>>> gets called when the smmu is not linked to its master, that is without
>>>> the context of the master device. So calling runtime apis in those places
>>>> separately.
>>>> Global locks are also initialized before enabling runtime pm as the
>>>> runtime_resume() calls device_reset() which does tlb_sync_global()
>>>> that ultimately requires locks to be initialized.
>>>>
>>>> Signed-off-by: Sricharan R <sricharan@codeaurora.org>
>>>> [vivek: Cleanup pm runtime calls]
>>>> Signed-off-by: Vivek Gautam <vivek.gautam@codeaurora.org>
>>>> Reviewed-by: Tomasz Figa <tfiga@chromium.org>
>>>> Tested-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
>>>> Reviewed-by: Robin Murphy <robin.murphy@arm.com>
>>>> ---
>>>>   drivers/iommu/arm-smmu.c | 101 ++++++++++++++++++++++++++++++++++++++++++-----
>>>>   1 file changed, 91 insertions(+), 10 deletions(-)
>>> Given that you're doing the get/put in the TLBI ops unconditionally:
>>>
>>>>   static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
>>>>   {
>>>>        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
>>>> +     struct arm_smmu_device *smmu = smmu_domain->smmu;
>>>>
>>>> -     if (smmu_domain->tlb_ops)
>>>> +     if (smmu_domain->tlb_ops) {
>>>> +             arm_smmu_rpm_get(smmu);
>>>>                smmu_domain->tlb_ops->tlb_flush_all(smmu_domain);
>>>> +             arm_smmu_rpm_put(smmu);
>>>> +     }
>>>>   }
>>>>
>>>>   static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
>>>>   {
>>>>        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
>>>> +     struct arm_smmu_device *smmu = smmu_domain->smmu;
>>>>
>>>> -     if (smmu_domain->tlb_ops)
>>>> +     if (smmu_domain->tlb_ops) {
>>>> +             arm_smmu_rpm_get(smmu);
>>>>                smmu_domain->tlb_ops->tlb_sync(smmu_domain);
>>>> +             arm_smmu_rpm_put(smmu);
>>>> +     }
>>> Why do you need them around the map/unmap calls as well?
>> We still have .tlb_add_flush path?
> Ok, so we could add the ops around that as well. Right now, we've got
> the runtime pm hooks crossing two parts of the API.

Sure, will do that then, and remove the runtime pm hooks from map/unmap.

Thanks
Vivek
>
> Will
Vivek Gautam Nov. 26, 2018, 11:26 a.m. UTC | #5
On 11/26/2018 11:33 AM, Vivek Gautam wrote:
>
>
> On 11/24/2018 12:06 AM, Will Deacon wrote:
>> On Thu, Nov 22, 2018 at 05:32:24PM +0530, Vivek Gautam wrote:
>>> Hi Will,
>>>
>>> On Wed, Nov 21, 2018 at 11:09 PM Will Deacon <will.deacon@arm.com> 
>>> wrote:
>>>> On Fri, Nov 16, 2018 at 04:54:27PM +0530, Vivek Gautam wrote:
>>>>> From: Sricharan R <sricharan@codeaurora.org>
>>>>>
>>>>> The smmu device probe/remove and add/remove master device callbacks
>>>>> gets called when the smmu is not linked to its master, that is 
>>>>> without
>>>>> the context of the master device. So calling runtime apis in those 
>>>>> places
>>>>> separately.
>>>>> Global locks are also initialized before enabling runtime pm as the
>>>>> runtime_resume() calls device_reset() which does tlb_sync_global()
>>>>> that ultimately requires locks to be initialized.
>>>>>
>>>>> Signed-off-by: Sricharan R <sricharan@codeaurora.org>
>>>>> [vivek: Cleanup pm runtime calls]
>>>>> Signed-off-by: Vivek Gautam <vivek.gautam@codeaurora.org>
>>>>> Reviewed-by: Tomasz Figa <tfiga@chromium.org>
>>>>> Tested-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
>>>>> Reviewed-by: Robin Murphy <robin.murphy@arm.com>
>>>>> ---
>>>>>   drivers/iommu/arm-smmu.c | 101 
>>>>> ++++++++++++++++++++++++++++++++++++++++++-----
>>>>>   1 file changed, 91 insertions(+), 10 deletions(-)
>>>> Given that you're doing the get/put in the TLBI ops unconditionally:
>>>>
>>>>>   static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
>>>>>   {
>>>>>        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
>>>>> +     struct arm_smmu_device *smmu = smmu_domain->smmu;
>>>>>
>>>>> -     if (smmu_domain->tlb_ops)
>>>>> +     if (smmu_domain->tlb_ops) {
>>>>> +             arm_smmu_rpm_get(smmu);
>>>>> smmu_domain->tlb_ops->tlb_flush_all(smmu_domain);
>>>>> +             arm_smmu_rpm_put(smmu);
>>>>> +     }
>>>>>   }
>>>>>
>>>>>   static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
>>>>>   {
>>>>>        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
>>>>> +     struct arm_smmu_device *smmu = smmu_domain->smmu;
>>>>>
>>>>> -     if (smmu_domain->tlb_ops)
>>>>> +     if (smmu_domain->tlb_ops) {
>>>>> +             arm_smmu_rpm_get(smmu);
>>>>> smmu_domain->tlb_ops->tlb_sync(smmu_domain);
>>>>> +             arm_smmu_rpm_put(smmu);
>>>>> +     }
>>>> Why do you need them around the map/unmap calls as well?
>>> We still have .tlb_add_flush path?
>> Ok, so we could add the ops around that as well. Right now, we've got
>> the runtime pm hooks crossing two parts of the API.
>
> Sure, will do that then, and remove the runtime pm hooks from map/unmap.

I missed this earlier -
We are adding runtime pm hooks in the 'iommu_ops' callbacks and not 
really to
'tlb_ops'. So how the runtime pm hooks crossing the paths?
'.map/.unmap' iommu_ops don't call '.flush_iotlb_all' or '.iotlb_sync' 
iommu_ops
anywhere.

E.g., only callers to domain->ops->flush_iotlb_all() are:
iommu_dma_flush_iotlb_all(), or iommu_flush_tlb_all() which are not in 
map/unmap paths.

Regards
Vivek

>
> Thanks
> Vivek
>>
>> Will
>
Will Deacon Nov. 26, 2018, 7:31 p.m. UTC | #6
On Mon, Nov 26, 2018 at 04:56:42PM +0530, Vivek Gautam wrote:
> On 11/26/2018 11:33 AM, Vivek Gautam wrote:
> >On 11/24/2018 12:06 AM, Will Deacon wrote:
> >>On Thu, Nov 22, 2018 at 05:32:24PM +0530, Vivek Gautam wrote:
> >>>On Wed, Nov 21, 2018 at 11:09 PM Will Deacon <will.deacon@arm.com>
> >>>wrote:
> >>>>On Fri, Nov 16, 2018 at 04:54:27PM +0530, Vivek Gautam wrote:
> >>>>>From: Sricharan R <sricharan@codeaurora.org>
> >>>>>
> >>>>>The smmu device probe/remove and add/remove master device callbacks
> >>>>>gets called when the smmu is not linked to its master, that is
> >>>>>without
> >>>>>the context of the master device. So calling runtime apis in those
> >>>>>places
> >>>>>separately.
> >>>>>Global locks are also initialized before enabling runtime pm as the
> >>>>>runtime_resume() calls device_reset() which does tlb_sync_global()
> >>>>>that ultimately requires locks to be initialized.
> >>>>>
> >>>>>Signed-off-by: Sricharan R <sricharan@codeaurora.org>
> >>>>>[vivek: Cleanup pm runtime calls]
> >>>>>Signed-off-by: Vivek Gautam <vivek.gautam@codeaurora.org>
> >>>>>Reviewed-by: Tomasz Figa <tfiga@chromium.org>
> >>>>>Tested-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
> >>>>>Reviewed-by: Robin Murphy <robin.murphy@arm.com>
> >>>>>---
> >>>>>  drivers/iommu/arm-smmu.c | 101
> >>>>>++++++++++++++++++++++++++++++++++++++++++-----
> >>>>>  1 file changed, 91 insertions(+), 10 deletions(-)
> >>>>Given that you're doing the get/put in the TLBI ops unconditionally:
> >>>>
> >>>>>  static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
> >>>>>  {
> >>>>>       struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> >>>>>+     struct arm_smmu_device *smmu = smmu_domain->smmu;
> >>>>>
> >>>>>-     if (smmu_domain->tlb_ops)
> >>>>>+     if (smmu_domain->tlb_ops) {
> >>>>>+             arm_smmu_rpm_get(smmu);
> >>>>>smmu_domain->tlb_ops->tlb_flush_all(smmu_domain);
> >>>>>+             arm_smmu_rpm_put(smmu);
> >>>>>+     }
> >>>>>  }
> >>>>>
> >>>>>  static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
> >>>>>  {
> >>>>>       struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> >>>>>+     struct arm_smmu_device *smmu = smmu_domain->smmu;
> >>>>>
> >>>>>-     if (smmu_domain->tlb_ops)
> >>>>>+     if (smmu_domain->tlb_ops) {
> >>>>>+             arm_smmu_rpm_get(smmu);
> >>>>>smmu_domain->tlb_ops->tlb_sync(smmu_domain);
> >>>>>+             arm_smmu_rpm_put(smmu);
> >>>>>+     }
> >>>>Why do you need them around the map/unmap calls as well?
> >>>We still have .tlb_add_flush path?
> >>Ok, so we could add the ops around that as well. Right now, we've got
> >>the runtime pm hooks crossing two parts of the API.
> >
> >Sure, will do that then, and remove the runtime pm hooks from map/unmap.
> 
> I missed this earlier -
> We are adding runtime pm hooks in the 'iommu_ops' callbacks and not really
> to
> 'tlb_ops'. So how the runtime pm hooks crossing the paths?
> '.map/.unmap' iommu_ops don't call '.flush_iotlb_all' or '.iotlb_sync'
> iommu_ops
> anywhere.
> 
> E.g., only callers to domain->ops->flush_iotlb_all() are:
> iommu_dma_flush_iotlb_all(), or iommu_flush_tlb_all() which are not in
> map/unmap paths.

Yes, sorry, I got confused here and completely misled you. In which case,
your original patch is ok because it intercepts the core IOMMU API via
iommu_ops. Apologies.

At that level, should we also annotate arm_smmu_iova_to_phys_hard()
for the iova_to_phys() implementation?

With that detail and clock bits sorted out, we should be able to get this
queued at last.

Will
diff mbox series

Patch

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index f7ab7ce87a94..cae88c9f83ca 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -270,6 +270,20 @@  static struct arm_smmu_option_prop arm_smmu_options[] = {
 	{ 0, NULL},
 };
 
+static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
+{
+	if (pm_runtime_enabled(smmu->dev))
+		return pm_runtime_get_sync(smmu->dev);
+
+	return 0;
+}
+
+static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
+{
+	if (pm_runtime_enabled(smmu->dev))
+		pm_runtime_put(smmu->dev);
+}
+
 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
 {
 	return container_of(dom, struct arm_smmu_domain, domain);
@@ -929,11 +943,15 @@  static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
 	struct arm_smmu_device *smmu = smmu_domain->smmu;
 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
-	int irq;
+	int ret, irq;
 
 	if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
 		return;
 
+	ret = arm_smmu_rpm_get(smmu);
+	if (ret < 0)
+		return;
+
 	/*
 	 * Disable the context bank and free the page tables before freeing
 	 * it.
@@ -948,6 +966,8 @@  static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
 
 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
 	__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
+
+	arm_smmu_rpm_put(smmu);
 }
 
 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
@@ -1229,10 +1249,15 @@  static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 		return -ENODEV;
 
 	smmu = fwspec_smmu(fwspec);
+
+	ret = arm_smmu_rpm_get(smmu);
+	if (ret < 0)
+		return ret;
+
 	/* Ensure that the domain is finalised */
 	ret = arm_smmu_init_domain_context(domain, smmu);
 	if (ret < 0)
-		return ret;
+		goto rpm_put;
 
 	/*
 	 * Sanity check the domain. We don't support domains across
@@ -1242,49 +1267,74 @@  static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 		dev_err(dev,
 			"cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
 			dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
-		return -EINVAL;
+		ret = -EINVAL;
+		goto rpm_put;
 	}
 
 	/* Looks ok, so add the device to the domain */
-	return arm_smmu_domain_add_master(smmu_domain, fwspec);
+	ret = arm_smmu_domain_add_master(smmu_domain, fwspec);
+
+rpm_put:
+	arm_smmu_rpm_put(smmu);
+	return ret;
 }
 
 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
 			phys_addr_t paddr, size_t size, int prot)
 {
 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
+	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
+	int ret;
 
 	if (!ops)
 		return -ENODEV;
 
-	return ops->map(ops, iova, paddr, size, prot);
+	arm_smmu_rpm_get(smmu);
+	ret = ops->map(ops, iova, paddr, size, prot);
+	arm_smmu_rpm_put(smmu);
+
+	return ret;
 }
 
 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
 			     size_t size)
 {
 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
+	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
+	size_t ret;
 
 	if (!ops)
 		return 0;
 
-	return ops->unmap(ops, iova, size);
+	arm_smmu_rpm_get(smmu);
+	ret = ops->unmap(ops, iova, size);
+	arm_smmu_rpm_put(smmu);
+
+	return ret;
 }
 
 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
 {
 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+	struct arm_smmu_device *smmu = smmu_domain->smmu;
 
-	if (smmu_domain->tlb_ops)
+	if (smmu_domain->tlb_ops) {
+		arm_smmu_rpm_get(smmu);
 		smmu_domain->tlb_ops->tlb_flush_all(smmu_domain);
+		arm_smmu_rpm_put(smmu);
+	}
 }
 
 static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
 {
 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+	struct arm_smmu_device *smmu = smmu_domain->smmu;
 
-	if (smmu_domain->tlb_ops)
+	if (smmu_domain->tlb_ops) {
+		arm_smmu_rpm_get(smmu);
 		smmu_domain->tlb_ops->tlb_sync(smmu_domain);
+		arm_smmu_rpm_put(smmu);
+	}
 }
 
 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
@@ -1431,7 +1481,13 @@  static int arm_smmu_add_device(struct device *dev)
 	while (i--)
 		cfg->smendx[i] = INVALID_SMENDX;
 
+	ret = arm_smmu_rpm_get(smmu);
+	if (ret < 0)
+		goto out_cfg_free;
+
 	ret = arm_smmu_master_alloc_smes(dev);
+	arm_smmu_rpm_put(smmu);
+
 	if (ret)
 		goto out_cfg_free;
 
@@ -1451,7 +1507,7 @@  static void arm_smmu_remove_device(struct device *dev)
 	struct iommu_fwspec *fwspec = dev->iommu_fwspec;
 	struct arm_smmu_master_cfg *cfg;
 	struct arm_smmu_device *smmu;
-
+	int ret;
 
 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
 		return;
@@ -1459,8 +1515,15 @@  static void arm_smmu_remove_device(struct device *dev)
 	cfg  = fwspec->iommu_priv;
 	smmu = cfg->smmu;
 
+	ret = arm_smmu_rpm_get(smmu);
+	if (ret < 0)
+		return;
+
 	iommu_device_unlink(&smmu->iommu, dev);
 	arm_smmu_master_free_smes(fwspec);
+
+	arm_smmu_rpm_put(smmu);
+
 	iommu_group_remove_device(dev);
 	kfree(fwspec->iommu_priv);
 	iommu_fwspec_free(dev);
@@ -2232,6 +2295,17 @@  static int arm_smmu_device_probe(struct platform_device *pdev)
 	arm_smmu_device_reset(smmu);
 	arm_smmu_test_smr_masks(smmu);
 
+	/*
+	 * We want to avoid touching dev->power.lock in fastpaths unless
+	 * it's really going to do something useful - pm_runtime_enabled()
+	 * can serve as an ideal proxy for that decision. So, conditionally
+	 * enable pm_runtime.
+	 */
+	if (dev->pm_domain) {
+		pm_runtime_set_active(dev);
+		pm_runtime_enable(dev);
+	}
+
 	/*
 	 * For ACPI and generic DT bindings, an SMMU will be probed before
 	 * any device which might need it, so we want the bus ops in place
@@ -2267,10 +2341,17 @@  static int arm_smmu_device_remove(struct platform_device *pdev)
 	if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
 		dev_err(&pdev->dev, "removing device with active domains!\n");
 
+	arm_smmu_rpm_get(smmu);
 	/* Turn the thing off */
 	writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
+	arm_smmu_rpm_put(smmu);
+
+	if (pm_runtime_enabled(smmu->dev))
+		pm_runtime_force_suspend(smmu->dev);
+	else
+		clk_bulk_disable(smmu->num_clks, smmu->clks);
 
-	clk_bulk_disable_unprepare(smmu->num_clks, smmu->clks);
+	clk_bulk_unprepare(smmu->num_clks, smmu->clks);
 
 	return 0;
 }