diff mbox

[v5,08/13] iommu/rockchip: Control clocks needed to access the IOMMU

Message ID 20180124103516.2571-9-jeffy.chen@rock-chips.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jeffy Chen Jan. 24, 2018, 10:35 a.m. UTC
From: Tomasz Figa <tfiga@chromium.org>

Current code relies on master driver enabling necessary clocks before
IOMMU is accessed, however there are cases when the IOMMU should be
accessed while the master is not running yet, for example allocating
V4L2 videobuf2 buffers, which is done by the VB2 framework using DMA
mapping API and doesn't engage the master driver at all.

This patch fixes the problem by letting clocks needed for IOMMU
operation to be listed in Device Tree and making the driver enable them
for the time of accessing the hardware.

Signed-off-by: Jeffy Chen <jeffy.chen@rock-chips.com>
Signed-off-by: Tomasz Figa <tfiga@chromium.org>
---

Changes in v5:
Use clk_bulk APIs.

Changes in v4: None
Changes in v3: None
Changes in v2: None

 .../devicetree/bindings/iommu/rockchip,iommu.txt   |  8 +++
 drivers/iommu/rockchip-iommu.c                     | 74 ++++++++++++++++++++--
 2 files changed, 76 insertions(+), 6 deletions(-)

Comments

Robin Murphy Jan. 24, 2018, 1:49 p.m. UTC | #1
On 24/01/18 10:35, Jeffy Chen wrote:
> From: Tomasz Figa <tfiga@chromium.org>
> 
> Current code relies on master driver enabling necessary clocks before
> IOMMU is accessed, however there are cases when the IOMMU should be
> accessed while the master is not running yet, for example allocating
> V4L2 videobuf2 buffers, which is done by the VB2 framework using DMA
> mapping API and doesn't engage the master driver at all.
> 
> This patch fixes the problem by letting clocks needed for IOMMU
> operation to be listed in Device Tree and making the driver enable them
> for the time of accessing the hardware.
> 
> Signed-off-by: Jeffy Chen <jeffy.chen@rock-chips.com>
> Signed-off-by: Tomasz Figa <tfiga@chromium.org>
> ---
> 
> Changes in v5:
> Use clk_bulk APIs.
> 
> Changes in v4: None
> Changes in v3: None
> Changes in v2: None
> 
>   .../devicetree/bindings/iommu/rockchip,iommu.txt   |  8 +++
>   drivers/iommu/rockchip-iommu.c                     | 74 ++++++++++++++++++++--
>   2 files changed, 76 insertions(+), 6 deletions(-)
> 
> diff --git a/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt b/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
> index 2098f7732264..33dd853359fa 100644
> --- a/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
> +++ b/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
> @@ -14,6 +14,13 @@ Required properties:
>                       "single-master" device, and needs no additional information
>                       to associate with its master device.  See:
>                       Documentation/devicetree/bindings/iommu/iommu.txt
> +Optional properties:
> +- clocks : A list of master clocks requires for the IOMMU to be accessible

s/requires/required/

> +           by the host CPU. The number of clocks depends on the master
> +           block and might as well be zero. See [1] for generic clock

Oops, some subtleties of English here :)

To say "the number of clocks ... might as well be zero" effectively 
implies "there's no point ever specifying any clocks". I guess what you 
really mean here is "...might well be...", i.e. it is both valid and 
reasonably likely to require zero clocks.

> +           bindings description.
> +
> +[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
>   
>   Optional properties:
>   - rockchip,disable-mmu-reset : Don't use the mmu reset operation.
> @@ -27,5 +34,6 @@ Example:
>   		reg = <0xff940300 0x100>;
>   		interrupts = <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>;
>   		interrupt-names = "vopl_mmu";
> +		clocks = <&cru ACLK_VOP1>, <&cru DCLK_VOP1>, <&cru HCLK_VOP1>;
>   		#iommu-cells = <0>;
>   	};
> diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
> index c4131ca792e0..8a5e2a659b67 100644
> --- a/drivers/iommu/rockchip-iommu.c
> +++ b/drivers/iommu/rockchip-iommu.c
> @@ -4,6 +4,7 @@
>    * published by the Free Software Foundation.
>    */
>   
> +#include <linux/clk.h>
>   #include <linux/compiler.h>
>   #include <linux/delay.h>
>   #include <linux/device.h>
> @@ -91,6 +92,8 @@ struct rk_iommu {
>   	struct device *dev;
>   	void __iomem **bases;
>   	int num_mmu;
> +	struct clk_bulk_data *clocks;
> +	int num_clocks;
>   	bool reset_disabled;
>   	struct iommu_device iommu;
>   	struct list_head node; /* entry in rk_iommu_domain.iommus */
> @@ -450,6 +453,38 @@ static int rk_iommu_force_reset(struct rk_iommu *iommu)
>   	return 0;
>   }
>   
> +static int rk_iommu_of_get_clocks(struct rk_iommu *iommu)
> +{
> +	struct device_node *np = iommu->dev->of_node;
> +	int ret;
> +	int i;
> +
> +	ret = of_count_phandle_with_args(np, "clocks", "#clock-cells");
> +	if (ret == -ENOENT)
> +		return 0;
> +	else if (ret < 0)
> +		return ret;
> +
> +	iommu->num_clocks = ret;
> +	iommu->clocks = devm_kcalloc(iommu->dev, iommu->num_clocks,
> +				     sizeof(*iommu->clocks), GFP_KERNEL);
> +	if (!iommu->clocks)
> +		return -ENOMEM;
> +
> +	for (i = 0; i < iommu->num_clocks; ++i) {
> +		iommu->clocks[i].clk = of_clk_get(np, i);
> +		if (IS_ERR(iommu->clocks[i].clk)) {
> +			ret = PTR_ERR(iommu->clocks[i].clk);
> +			goto err_clk_put;
> +		}
> +	}

Just to confirm my understanding from a quick scan through the code, the 
reason we can't use clk_bulk_get() here is that currently, clocks[i].id 
being NULL means we'd end up just getting the first clock multiple 
times, right?

I guess there could be other users who also want "just get whatever 
clocks I have" functionality, so it might be worth proposing that for 
the core API as a separate/follow-up patch, but it definitely doesn't 
need to be part of this series.

I really don't know enough about correct clk API usage, but modulo the 
binding comments it certainly looks nice and tidy now;

Acked-by: Robin Murphy <robin.murphy@arm.com>

Thanks,
Robin.

> +
> +	return 0;
> +err_clk_put:
> +	clk_bulk_put(i, iommu->clocks);
> +	return ret;
> +}
> +
>   static void log_iova(struct rk_iommu *iommu, int index, dma_addr_t iova)
>   {
>   	void __iomem *base = iommu->bases[index];
> @@ -506,6 +541,8 @@ static irqreturn_t rk_iommu_irq(int irq, void *dev_id)
>   	irqreturn_t ret = IRQ_NONE;
>   	int i;
>   
> +	WARN_ON(clk_bulk_enable(iommu->num_clocks, iommu->clocks));
> +
>   	for (i = 0; i < iommu->num_mmu; i++) {
>   		int_status = rk_iommu_read(iommu->bases[i], RK_MMU_INT_STATUS);
>   		if (int_status == 0)
> @@ -552,6 +589,8 @@ static irqreturn_t rk_iommu_irq(int irq, void *dev_id)
>   		rk_iommu_write(iommu->bases[i], RK_MMU_INT_CLEAR, int_status);
>   	}
>   
> +	clk_bulk_disable(iommu->num_clocks, iommu->clocks);
> +
>   	return ret;
>   }
>   
> @@ -594,7 +633,9 @@ static void rk_iommu_zap_iova(struct rk_iommu_domain *rk_domain,
>   	list_for_each(pos, &rk_domain->iommus) {
>   		struct rk_iommu *iommu;
>   		iommu = list_entry(pos, struct rk_iommu, node);
> +		WARN_ON(clk_bulk_enable(iommu->num_clocks, iommu->clocks));
>   		rk_iommu_zap_lines(iommu, iova, size);
> +		clk_bulk_disable(iommu->num_clocks, iommu->clocks);
>   	}
>   	spin_unlock_irqrestore(&rk_domain->iommus_lock, flags);
>   }
> @@ -823,10 +864,14 @@ static int rk_iommu_attach_device(struct iommu_domain *domain,
>   	if (!iommu)
>   		return 0;
>   
> -	ret = rk_iommu_enable_stall(iommu);
> +	ret = clk_bulk_enable(iommu->num_clocks, iommu->clocks);
>   	if (ret)
>   		return ret;
>   
> +	ret = rk_iommu_enable_stall(iommu);
> +	if (ret)
> +		goto out_disable_clocks;
> +
>   	ret = rk_iommu_force_reset(iommu);
>   	if (ret)
>   		goto out_disable_stall;
> @@ -852,6 +897,8 @@ static int rk_iommu_attach_device(struct iommu_domain *domain,
>   
>   out_disable_stall:
>   	rk_iommu_disable_stall(iommu);
> +out_disable_clocks:
> +	clk_bulk_disable(iommu->num_clocks, iommu->clocks);
>   	return ret;
>   }
>   
> @@ -873,6 +920,7 @@ static void rk_iommu_detach_device(struct iommu_domain *domain,
>   	spin_unlock_irqrestore(&rk_domain->iommus_lock, flags);
>   
>   	/* Ignore error while disabling, just keep going */
> +	WARN_ON(clk_bulk_enable(iommu->num_clocks, iommu->clocks));
>   	rk_iommu_enable_stall(iommu);
>   	rk_iommu_disable_paging(iommu);
>   	for (i = 0; i < iommu->num_mmu; i++) {
> @@ -880,6 +928,7 @@ static void rk_iommu_detach_device(struct iommu_domain *domain,
>   		rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, 0);
>   	}
>   	rk_iommu_disable_stall(iommu);
> +	clk_bulk_disable(iommu->num_clocks, iommu->clocks);
>   
>   	iommu->domain = NULL;
>   
> @@ -1172,18 +1221,31 @@ static int rk_iommu_probe(struct platform_device *pdev)
>   	iommu->reset_disabled = device_property_read_bool(dev,
>   					"rockchip,disable-mmu-reset");
>   
> -	err = iommu_device_sysfs_add(&iommu->iommu, dev, NULL, dev_name(dev));
> +	err = rk_iommu_of_get_clocks(iommu);
>   	if (err)
>   		return err;
>   
> +	err = clk_bulk_prepare(iommu->num_clocks, iommu->clocks);
> +	if (err)
> +		goto err_put_clocks;
> +
> +	err = iommu_device_sysfs_add(&iommu->iommu, dev, NULL, dev_name(dev));
> +	if (err)
> +		goto err_unprepare_clocks;
> +
>   	iommu_device_set_ops(&iommu->iommu, &rk_iommu_ops);
>   	err = iommu_device_register(&iommu->iommu);
> -	if (err) {
> -		iommu_device_sysfs_remove(&iommu->iommu);
> -		return err;
> -	}
> +	if (err)
> +		goto err_remove_sysfs;
>   
>   	return 0;
> +err_remove_sysfs:
> +	iommu_device_sysfs_remove(&iommu->iommu);
> +err_unprepare_clocks:
> +	clk_bulk_unprepare(iommu->num_clocks, iommu->clocks);
> +err_put_clocks:
> +	clk_bulk_put(iommu->num_clocks, iommu->clocks);
> +	return err;
>   }
>   
>   static const struct of_device_id rk_iommu_dt_ids[] = {
>
Jeffy Chen Jan. 26, 2018, 9:45 a.m. UTC | #2
Hi Robin,

Thanks for your reply.

On 01/24/2018 09:49 PM, Robin Murphy wrote:
>>
>> +Optional properties:
>> +- clocks : A list of master clocks requires for the IOMMU to be
>> accessible
>
> s/requires/required/
ok
>
>> +           by the host CPU. The number of clocks depends on the master
>> +           block and might as well be zero. See [1] for generic clock
>
> Oops, some subtleties of English here :)
>
> To say "the number of clocks ... might as well be zero" effectively
> implies "there's no point ever specifying any clocks". I guess what you
> really mean here is "...might well be...", i.e. it is both valid and
> reasonably likely to require zero clocks.
ok
>
>> +           bindings description.
>> +
>> +[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
>>   Optional properties:
>>   - rockchip,disable-mmu-reset : Don't use the mmu reset operation.
>> @@ -27,5 +34,6 @@ Example:
>>           reg = <0xff940300 0x100>;
>>           interrupts = <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>;
>>           interrupt-names = "vopl_mmu";
>> +        clocks = <&cru ACLK_VOP1>, <&cru DCLK_VOP1>, <&cru HCLK_VOP1>;
>>           #iommu-cells = <0>;
>>       };
>> diff --git a/drivers/iommu/rockchip-iommu.c
>> b/drivers/iommu/rockchip-iommu.c
>> index c4131ca792e0..8a5e2a659b67 100644
>> --- a/drivers/iommu/rockchip-iommu.c
>> +++ b/drivers/iommu/rockchip-iommu.c
>> @@ -4,6 +4,7 @@
>>    * published by the Free Software Foundation.
>>    */
>> +#include <linux/clk.h>
>>   #include <linux/compiler.h>
>>   #include <linux/delay.h>
>>   #include <linux/device.h>
>> @@ -91,6 +92,8 @@ struct rk_iommu {
>>       struct device *dev;
>>       void __iomem **bases;
>>       int num_mmu;
>> +    struct clk_bulk_data *clocks;
>> +    int num_clocks;
>>       bool reset_disabled;
>>       struct iommu_device iommu;
>>       struct list_head node; /* entry in rk_iommu_domain.iommus */
>> @@ -450,6 +453,38 @@ static int rk_iommu_force_reset(struct rk_iommu
>> *iommu)
>>       return 0;
>>   }
>> +static int rk_iommu_of_get_clocks(struct rk_iommu *iommu)
>> +{
>> +    struct device_node *np = iommu->dev->of_node;
>> +    int ret;
>> +    int i;
>> +
>> +    ret = of_count_phandle_with_args(np, "clocks", "#clock-cells");
>> +    if (ret == -ENOENT)
>> +        return 0;
>> +    else if (ret < 0)
>> +        return ret;
>> +
>> +    iommu->num_clocks = ret;
>> +    iommu->clocks = devm_kcalloc(iommu->dev, iommu->num_clocks,
>> +                     sizeof(*iommu->clocks), GFP_KERNEL);
>> +    if (!iommu->clocks)
>> +        return -ENOMEM;
>> +
>> +    for (i = 0; i < iommu->num_clocks; ++i) {
>> +        iommu->clocks[i].clk = of_clk_get(np, i);
>> +        if (IS_ERR(iommu->clocks[i].clk)) {
>> +            ret = PTR_ERR(iommu->clocks[i].clk);
>> +            goto err_clk_put;
>> +        }
>> +    }
>
> Just to confirm my understanding from a quick scan through the code, the
> reason we can't use clk_bulk_get() here is that currently, clocks[i].id
> being NULL means we'd end up just getting the first clock multiple
> times, right?
right, without a valid name, it would return the first clock.

     /* Walk up the tree of devices looking for a clock that matches */
     while (np) {
         int index = 0;

         /*
          * For named clocks, first look up the name in the
          * "clock-names" property.  If it cannot be found, then
          * index will be an error code, and of_clk_get() will fail.
          */
         if (name)
             index = of_property_match_string(np, "clock-names", name);
         clk = __of_clk_get(np, index, dev_id, name);


>
> I guess there could be other users who also want "just get whatever
> clocks I have" functionality, so it might be worth proposing that for
> the core API as a separate/follow-up patch, but it definitely doesn't
> need to be part of this series.
right, i can try to do it later :)
>
> I really don't know enough about correct clk API usage, but modulo the
> binding comments it certainly looks nice and tidy now;
>
> Acked-by: Robin Murphy <robin.murphy@arm.com>
thanks.
>
> Thanks,
> Robin.
Rob Herring Jan. 30, 2018, 5:05 p.m. UTC | #3
On Wed, Jan 24, 2018 at 06:35:11PM +0800, Jeffy Chen wrote:
> From: Tomasz Figa <tfiga@chromium.org>
> 
> Current code relies on master driver enabling necessary clocks before
> IOMMU is accessed, however there are cases when the IOMMU should be
> accessed while the master is not running yet, for example allocating
> V4L2 videobuf2 buffers, which is done by the VB2 framework using DMA
> mapping API and doesn't engage the master driver at all.
> 
> This patch fixes the problem by letting clocks needed for IOMMU
> operation to be listed in Device Tree and making the driver enable them
> for the time of accessing the hardware.
> 
> Signed-off-by: Jeffy Chen <jeffy.chen@rock-chips.com>
> Signed-off-by: Tomasz Figa <tfiga@chromium.org>
> ---
> 
> Changes in v5:
> Use clk_bulk APIs.
> 
> Changes in v4: None
> Changes in v3: None
> Changes in v2: None
> 
>  .../devicetree/bindings/iommu/rockchip,iommu.txt   |  8 +++

Please split binding patches to a separate patch.

>  drivers/iommu/rockchip-iommu.c                     | 74 ++++++++++++++++++++--
>  2 files changed, 76 insertions(+), 6 deletions(-)
> 
> diff --git a/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt b/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
> index 2098f7732264..33dd853359fa 100644
> --- a/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
> +++ b/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
> @@ -14,6 +14,13 @@ Required properties:
>                      "single-master" device, and needs no additional information
>                      to associate with its master device.  See:
>                      Documentation/devicetree/bindings/iommu/iommu.txt
> +Optional properties:
> +- clocks : A list of master clocks requires for the IOMMU to be accessible
> +           by the host CPU. The number of clocks depends on the master
> +           block and might as well be zero. See [1] for generic clock
> +           bindings description.

Hardware blocks don't have a variable number of clock connections. This 
needs to be a defined number of clocks (per compatible string if there 
are different implementations with different # of clocks).

> +
> +[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
>  
>  Optional properties:
>  - rockchip,disable-mmu-reset : Don't use the mmu reset operation.
> @@ -27,5 +34,6 @@ Example:
>  		reg = <0xff940300 0x100>;
>  		interrupts = <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>;
>  		interrupt-names = "vopl_mmu";
> +		clocks = <&cru ACLK_VOP1>, <&cru DCLK_VOP1>, <&cru HCLK_VOP1>;
>  		#iommu-cells = <0>;
>  	};
Tomasz Figa Jan. 31, 2018, 7:52 a.m. UTC | #4
Hi Rob,

On Wed, Jan 31, 2018 at 2:05 AM, Rob Herring <robh@kernel.org> wrote:
> On Wed, Jan 24, 2018 at 06:35:11PM +0800, Jeffy Chen wrote:
>> From: Tomasz Figa <tfiga@chromium.org>
>>
>> Current code relies on master driver enabling necessary clocks before
>> IOMMU is accessed, however there are cases when the IOMMU should be
>> accessed while the master is not running yet, for example allocating
>> V4L2 videobuf2 buffers, which is done by the VB2 framework using DMA
>> mapping API and doesn't engage the master driver at all.
>>
>> This patch fixes the problem by letting clocks needed for IOMMU
>> operation to be listed in Device Tree and making the driver enable them
>> for the time of accessing the hardware.
>>
>> Signed-off-by: Jeffy Chen <jeffy.chen@rock-chips.com>
>> Signed-off-by: Tomasz Figa <tfiga@chromium.org>
>> ---
>>
>> Changes in v5:
>> Use clk_bulk APIs.
>>
>> Changes in v4: None
>> Changes in v3: None
>> Changes in v2: None
>>
>>  .../devicetree/bindings/iommu/rockchip,iommu.txt   |  8 +++
>
> Please split binding patches to a separate patch.
>
>>  drivers/iommu/rockchip-iommu.c                     | 74 ++++++++++++++++++++--
>>  2 files changed, 76 insertions(+), 6 deletions(-)
>>
>> diff --git a/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt b/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
>> index 2098f7732264..33dd853359fa 100644
>> --- a/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
>> +++ b/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
>> @@ -14,6 +14,13 @@ Required properties:
>>                      "single-master" device, and needs no additional information
>>                      to associate with its master device.  See:
>>                      Documentation/devicetree/bindings/iommu/iommu.txt
>> +Optional properties:
>> +- clocks : A list of master clocks requires for the IOMMU to be accessible
>> +           by the host CPU. The number of clocks depends on the master
>> +           block and might as well be zero. See [1] for generic clock
>> +           bindings description.
>
> Hardware blocks don't have a variable number of clock connections.

I think you underestimate the imagination of hardware designers. :)

For Rockchip IOMMU, there is a set of clocks, which all need to be
enabled for IOMMU register access to succeed. The clocks are not
directly fed to the IOMMU, but they are needed for the various buses
and intermediate blocks on the way to the IOMMU to work.

And the set varies based on next to which master block the IOMMU block
is located, because the hierarchy of buses and intermediate blocks is
different.

Best regards,
Tomasz
Rob Herring Jan. 31, 2018, 1:50 p.m. UTC | #5
On Wed, Jan 31, 2018 at 1:52 AM, Tomasz Figa <tfiga@chromium.org> wrote:
> Hi Rob,
>
> On Wed, Jan 31, 2018 at 2:05 AM, Rob Herring <robh@kernel.org> wrote:
>> On Wed, Jan 24, 2018 at 06:35:11PM +0800, Jeffy Chen wrote:
>>> From: Tomasz Figa <tfiga@chromium.org>
>>>
>>> Current code relies on master driver enabling necessary clocks before
>>> IOMMU is accessed, however there are cases when the IOMMU should be
>>> accessed while the master is not running yet, for example allocating
>>> V4L2 videobuf2 buffers, which is done by the VB2 framework using DMA
>>> mapping API and doesn't engage the master driver at all.
>>>
>>> This patch fixes the problem by letting clocks needed for IOMMU
>>> operation to be listed in Device Tree and making the driver enable them
>>> for the time of accessing the hardware.
>>>
>>> Signed-off-by: Jeffy Chen <jeffy.chen@rock-chips.com>
>>> Signed-off-by: Tomasz Figa <tfiga@chromium.org>
>>> ---
>>>
>>> Changes in v5:
>>> Use clk_bulk APIs.
>>>
>>> Changes in v4: None
>>> Changes in v3: None
>>> Changes in v2: None
>>>
>>>  .../devicetree/bindings/iommu/rockchip,iommu.txt   |  8 +++
>>
>> Please split binding patches to a separate patch.
>>
>>>  drivers/iommu/rockchip-iommu.c                     | 74 ++++++++++++++++++++--
>>>  2 files changed, 76 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt b/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
>>> index 2098f7732264..33dd853359fa 100644
>>> --- a/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
>>> +++ b/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
>>> @@ -14,6 +14,13 @@ Required properties:
>>>                      "single-master" device, and needs no additional information
>>>                      to associate with its master device.  See:
>>>                      Documentation/devicetree/bindings/iommu/iommu.txt
>>> +Optional properties:
>>> +- clocks : A list of master clocks requires for the IOMMU to be accessible
>>> +           by the host CPU. The number of clocks depends on the master
>>> +           block and might as well be zero. See [1] for generic clock
>>> +           bindings description.
>>
>> Hardware blocks don't have a variable number of clock connections.
>
> I think you underestimate the imagination of hardware designers. :)

Learned long ago to never do that. If there are 2 ways to do
something, they will find a 3rd way.

> For Rockchip IOMMU, there is a set of clocks, which all need to be
> enabled for IOMMU register access to succeed. The clocks are not
> directly fed to the IOMMU, but they are needed for the various buses
> and intermediate blocks on the way to the IOMMU to work.

The binding should describe the clock connections, not what clocks a
driver needs (currently). It sounds like a lack of managing bus clocks
to me.

In any case, the binding must be written so it can be verified. If you
can have any number of clocks with any names, there's no point in
documenting.

Rob
Vivek Gautam Feb. 14, 2018, 10:03 a.m. UTC | #6
On 1/24/2018 7:19 PM, Robin Murphy wrote:
> On 24/01/18 10:35, Jeffy Chen wrote:
>> From: Tomasz Figa <tfiga@chromium.org>
>>
>> Current code relies on master driver enabling necessary clocks before
>> IOMMU is accessed, however there are cases when the IOMMU should be
>> accessed while the master is not running yet, for example allocating
>> V4L2 videobuf2 buffers, which is done by the VB2 framework using DMA
>> mapping API and doesn't engage the master driver at all.
>>
>> This patch fixes the problem by letting clocks needed for IOMMU
>> operation to be listed in Device Tree and making the driver enable them
>> for the time of accessing the hardware.
>>
>> Signed-off-by: Jeffy Chen <jeffy.chen@rock-chips.com>
>> Signed-off-by: Tomasz Figa <tfiga@chromium.org>
>> ---
>>
>> Changes in v5:
>> Use clk_bulk APIs.
>>
>> Changes in v4: None
>> Changes in v3: None
>> Changes in v2: None

[snip]

>>   +static int rk_iommu_of_get_clocks(struct rk_iommu *iommu)
>> +{
>> +    struct device_node *np = iommu->dev->of_node;
>> +    int ret;
>> +    int i;
>> +
>> +    ret = of_count_phandle_with_args(np, "clocks", "#clock-cells");
>> +    if (ret == -ENOENT)
>> +        return 0;
>> +    else if (ret < 0)
>> +        return ret;
>> +
>> +    iommu->num_clocks = ret;
>> +    iommu->clocks = devm_kcalloc(iommu->dev, iommu->num_clocks,
>> +                     sizeof(*iommu->clocks), GFP_KERNEL);
>> +    if (!iommu->clocks)
>> +        return -ENOMEM;
>> +
>> +    for (i = 0; i < iommu->num_clocks; ++i) {
>> +        iommu->clocks[i].clk = of_clk_get(np, i);
>> +        if (IS_ERR(iommu->clocks[i].clk)) {
>> +            ret = PTR_ERR(iommu->clocks[i].clk);
>> +            goto err_clk_put;
>> +        }
>> +    }
>
> Just to confirm my understanding from a quick scan through the code, 
> the reason we can't use clk_bulk_get() here is that currently, 
> clocks[i].id being NULL means we'd end up just getting the first clock 
> multiple times, right?
>
> I guess there could be other users who also want "just get whatever 
> clocks I have" functionality, so it might be worth proposing that for 
> the core API as a separate/follow-up patch, but it definitely doesn't 
> need to be part of this series.

Just to understand. Is it okay to make the driver "just get whatever 
clocks device node gives"?
Doesn't the driver need to be aware of which all clocks are supposed to 
be obtained and enabled
  It's should good for debug to let the world know which clock we failed 
to get.

regards
Vivek

>
> I really don't know enough about correct clk API usage, but modulo the 
> binding comments it certainly looks nice and tidy now;
>
> Acked-by: Robin Murphy <robin.murphy@arm.com>
>
> Thanks,
> Robin.
[snip]

> _______________________________________________
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu
Tomasz Figa Feb. 14, 2018, 11:27 a.m. UTC | #7
On Wed, Feb 14, 2018 at 7:03 PM, Vivek Gautam
<vivek.gautam@codeaurora.org> wrote:
>
>
> On 1/24/2018 7:19 PM, Robin Murphy wrote:
>>
>> On 24/01/18 10:35, Jeffy Chen wrote:
>>>
>>> From: Tomasz Figa <tfiga@chromium.org>
>>>
>>> Current code relies on master driver enabling necessary clocks before
>>> IOMMU is accessed, however there are cases when the IOMMU should be
>>> accessed while the master is not running yet, for example allocating
>>> V4L2 videobuf2 buffers, which is done by the VB2 framework using DMA
>>> mapping API and doesn't engage the master driver at all.
>>>
>>> This patch fixes the problem by letting clocks needed for IOMMU
>>> operation to be listed in Device Tree and making the driver enable them
>>> for the time of accessing the hardware.
>>>
>>> Signed-off-by: Jeffy Chen <jeffy.chen@rock-chips.com>
>>> Signed-off-by: Tomasz Figa <tfiga@chromium.org>
>>> ---
>>>
>>> Changes in v5:
>>> Use clk_bulk APIs.
>>>
>>> Changes in v4: None
>>> Changes in v3: None
>>> Changes in v2: None
>
>
> [snip]
>
>
>>>   +static int rk_iommu_of_get_clocks(struct rk_iommu *iommu)
>>> +{
>>> +    struct device_node *np = iommu->dev->of_node;
>>> +    int ret;
>>> +    int i;
>>> +
>>> +    ret = of_count_phandle_with_args(np, "clocks", "#clock-cells");
>>> +    if (ret == -ENOENT)
>>> +        return 0;
>>> +    else if (ret < 0)
>>> +        return ret;
>>> +
>>> +    iommu->num_clocks = ret;
>>> +    iommu->clocks = devm_kcalloc(iommu->dev, iommu->num_clocks,
>>> +                     sizeof(*iommu->clocks), GFP_KERNEL);
>>> +    if (!iommu->clocks)
>>> +        return -ENOMEM;
>>> +
>>> +    for (i = 0; i < iommu->num_clocks; ++i) {
>>> +        iommu->clocks[i].clk = of_clk_get(np, i);
>>> +        if (IS_ERR(iommu->clocks[i].clk)) {
>>> +            ret = PTR_ERR(iommu->clocks[i].clk);
>>> +            goto err_clk_put;
>>> +        }
>>> +    }
>>
>>
>> Just to confirm my understanding from a quick scan through the code, the
>> reason we can't use clk_bulk_get() here is that currently, clocks[i].id
>> being NULL means we'd end up just getting the first clock multiple times,
>> right?
>>
>> I guess there could be other users who also want "just get whatever clocks
>> I have" functionality, so it might be worth proposing that for the core API
>> as a separate/follow-up patch, but it definitely doesn't need to be part of
>> this series.
>
>
> Just to understand. Is it okay to make the driver "just get whatever clocks
> device node gives"?
> Doesn't the driver need to be aware of which all clocks are supposed to be
> obtained and enabled
>  It's should good for debug to let the world know which clock we failed to
> get.

Yeah, in general that's desired. However, it is at least impractical
to specify all the clocks in Rockchip case, because it's different for
each block and depends on the master next to which it is located.

Best regards,
Tomasz
Robin Murphy Feb. 27, 2018, 4:59 p.m. UTC | #8
On 23/02/18 10:24, JeffyChen wrote:
> Hi guys,
> 
> On 02/01/2018 07:19 PM, JeffyChen wrote:
>>>>>>
>>>>>>
>>>>>> diff --git
>>>>>> a/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
>>>>>> b/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
>>>>>> index 2098f7732264..33dd853359fa 100644
>>>>>> --- a/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
>>>>>> +++ b/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
>>>>>> @@ -14,6 +14,13 @@ Required properties:
>>>>>>                       "single-master" device, and needs no
>>>>>> additional information
>>>>>>                       to associate with its master device.  See:
>>>>>>
>>>>>> Documentation/devicetree/bindings/iommu/iommu.txt
>>>>>> +Optional properties:
>>>>>> +- clocks : A list of master clocks requires for the IOMMU to be
>>>>>> accessible
>>>>>> +           by the host CPU. The number of clocks depends on the
>>>>>> master
>>>>>> +           block and might as well be zero. See [1] for generic 
>>>>>> clock
>>>>>> +           bindings description.
>>>>>
>>>>> Hardware blocks don't have a variable number of clock connections.
>>>>
>>>> I think you underestimate the imagination of hardware designers. :)
>>>
>>> Learned long ago to never do that. If there are 2 ways to do
>>> something, they will find a 3rd way.
>>>
>>>> For Rockchip IOMMU, there is a set of clocks, which all need to be
>>>> enabled for IOMMU register access to succeed. The clocks are not
>>>> directly fed to the IOMMU, but they are needed for the various buses
>>>> and intermediate blocks on the way to the IOMMU to work.
>>>
>>> The binding should describe the clock connections, not what clocks a
>>> driver needs (currently). It sounds like a lack of managing bus clocks
>>> to me.
>>>
>>> In any case, the binding must be written so it can be verified. If you
>>> can have any number of clocks with any names, there's no point in
>>> documenting.
>>
>> the rockchip IOMMU is part of the master block in hardware, so it needs
>> to control the master's power domain and some of the master's clocks
>> when access it's registers.
>>
>> and the number of clocks needed here, might be different between each
>> IOMMUs(according to which master block it belongs), it's a little like
>> our power domain:
>> https://elixir.free-electrons.com/linux/latest/source/arch/arm64/boot/dts/rockchip/rk3399.dtsi#L935 
>>
>>
>>
>> i'm not sure how to describe this correctly, is it ok use something like
>> "the same as it's master block"?
> 
> would it make sense to add a property to specify the master who owns the 
> iommu, and we can get all clocks(only some of those clocks are actually 
> needed) from it in the of_xlate()? and we can also reuse the clock-names 
> of that master to build clk_bulk_data and log errors in clk_bulk_get.

I'm inclined to agree with Rob here - if we're to add anything to the 
binding, it should only be whatever clock inputs are defined for the 
IOMMU IP block itself. If Linux doesn't properly handle the interconnect 
clock hierarchy external to a particular integration, that's a separate 
issue and it's not the binding's problem.

I actually quite like the hack of "borrowing" the clocks from 
dev->of_node in of_xlate() - you shouldn't need any DT changes for that, 
because you already know that each IOMMU instance only has the one 
master device anyway.

Robin.
Jeffy Chen Feb. 28, 2018, 1 p.m. UTC | #9
Hi Robin,

Thanks for your reply.

On 02/28/2018 12:59 AM, Robin Murphy wrote:
>>> the rockchip IOMMU is part of the master block in hardware, so it needs
>>> to control the master's power domain and some of the master's clocks
>>> when access it's registers.
>>>
>>> and the number of clocks needed here, might be different between each
>>> IOMMUs(according to which master block it belongs), it's a little like
>>> our power domain:
>>> https://elixir.free-electrons.com/linux/latest/source/arch/arm64/boot/dts/rockchip/rk3399.dtsi#L935
>>>
>>>
>>>
>>> i'm not sure how to describe this correctly, is it ok use something like
>>> "the same as it's master block"?
>>
>> would it make sense to add a property to specify the master who owns
>> the iommu, and we can get all clocks(only some of those clocks are
>> actually needed) from it in the of_xlate()? and we can also reuse the
>> clock-names of that master to build clk_bulk_data and log errors in
>> clk_bulk_get.
>
> I'm inclined to agree with Rob here - if we're to add anything to the
> binding, it should only be whatever clock inputs are defined for the
> IOMMU IP block itself. If Linux doesn't properly handle the interconnect
> clock hierarchy external to a particular integration, that's a separate
> issue and it's not the binding's problem.
>
> I actually quite like the hack of "borrowing" the clocks from
> dev->of_node in of_xlate() - you shouldn't need any DT changes for that,
> because you already know that each IOMMU instance only has the one
> master device anyway.

Thanks:) but actually we are going to support sharing IOMMU between 
multiple masters(one of them is the main master i think) in the newer 
chips(not yet supported on upstream kernel)...

So we might have to get all clocks from all masters, or find a way to 
specify the main master...and for the multiple masters case, do it in 
of_xlate() turns out to be a little racy...maybe we can add a property 
to specify main master, and get it's clocks in probe()?

>
> Robin.
Robin Murphy Feb. 28, 2018, 3:06 p.m. UTC | #10
On 28/02/18 13:00, JeffyChen wrote:
> Hi Robin,
> 
> Thanks for your reply.
> 
> On 02/28/2018 12:59 AM, Robin Murphy wrote:
>>>> the rockchip IOMMU is part of the master block in hardware, so it needs
>>>> to control the master's power domain and some of the master's clocks
>>>> when access it's registers.
>>>>
>>>> and the number of clocks needed here, might be different between each
>>>> IOMMUs(according to which master block it belongs), it's a little like
>>>> our power domain:
>>>> https://elixir.free-electrons.com/linux/latest/source/arch/arm64/boot/dts/rockchip/rk3399.dtsi#L935 
>>>>
>>>>
>>>>
>>>>
>>>> i'm not sure how to describe this correctly, is it ok use something 
>>>> like
>>>> "the same as it's master block"?
>>>
>>> would it make sense to add a property to specify the master who owns
>>> the iommu, and we can get all clocks(only some of those clocks are
>>> actually needed) from it in the of_xlate()? and we can also reuse the
>>> clock-names of that master to build clk_bulk_data and log errors in
>>> clk_bulk_get.
>>
>> I'm inclined to agree with Rob here - if we're to add anything to the
>> binding, it should only be whatever clock inputs are defined for the
>> IOMMU IP block itself. If Linux doesn't properly handle the interconnect
>> clock hierarchy external to a particular integration, that's a separate
>> issue and it's not the binding's problem.
>>
>> I actually quite like the hack of "borrowing" the clocks from
>> dev->of_node in of_xlate() - you shouldn't need any DT changes for that,
>> because you already know that each IOMMU instance only has the one
>> master device anyway.
> 
> Thanks:) but actually we are going to support sharing IOMMU between 
> multiple masters(one of them is the main master i think) in the newer 
> chips(not yet supported on upstream kernel)...

Ha! OK, fair enough, back to the first point then...

> So we might have to get all clocks from all masters, or find a way to 
> specify the main master...and for the multiple masters case, do it in 
> of_xlate() turns out to be a little racy...maybe we can add a property 
> to specify main master, and get it's clocks in probe()?

I notice that the 4.4 BSP kernel consistently specifies "aclk" and 
"hclk" for the IOMMU instances - it feels unusual to say "why don't we 
follow the downstream binding?", but it does look a lot like what I 
would expect (I'd guess at one for the register slave interface and one 
for the master interface/general operation?)

If we can implement conceptually-correct clock handling based on an 
accurate binding, which should cover most cases, and *then* look at 
hacking around those where it doesn't quite work in practice due to 
shortcomings elsewhere, that would be ideal, and of course a lot nicer 
than just jumping straight into piles of hacks.

Robin.
Jeffy Chen March 1, 2018, 1:37 a.m. UTC | #11
Hi Robin,

Thanks for your reply.

On 02/28/2018 11:06 PM, Robin Murphy wrote:
> On 28/02/18 13:00, JeffyChen wrote:
>> Hi Robin,
>>
>> Thanks for your reply.
>>
>> On 02/28/2018 12:59 AM, Robin Murphy wrote:
>>>>> the rockchip IOMMU is part of the master block in hardware, so it
>>>>> needs
>>>>> to control the master's power domain and some of the master's clocks
>>>>> when access it's registers.
>>>>>
>>>>> and the number of clocks needed here, might be different between each
>>>>> IOMMUs(according to which master block it belongs), it's a little like
>>>>> our power domain:
>>>>> https://elixir.free-electrons.com/linux/latest/source/arch/arm64/boot/dts/rockchip/rk3399.dtsi#L935
>>>>>
>>>>>
>>>>>
>>>>>
>>>>> i'm not sure how to describe this correctly, is it ok use something
>>>>> like
>>>>> "the same as it's master block"?
>>>>
>>>> would it make sense to add a property to specify the master who owns
>>>> the iommu, and we can get all clocks(only some of those clocks are
>>>> actually needed) from it in the of_xlate()? and we can also reuse the
>>>> clock-names of that master to build clk_bulk_data and log errors in
>>>> clk_bulk_get.
>>>
>>> I'm inclined to agree with Rob here - if we're to add anything to the
>>> binding, it should only be whatever clock inputs are defined for the
>>> IOMMU IP block itself. If Linux doesn't properly handle the interconnect
>>> clock hierarchy external to a particular integration, that's a separate
>>> issue and it's not the binding's problem.
>>>
>>> I actually quite like the hack of "borrowing" the clocks from
>>> dev->of_node in of_xlate() - you shouldn't need any DT changes for that,
>>> because you already know that each IOMMU instance only has the one
>>> master device anyway.
>>
>> Thanks:) but actually we are going to support sharing IOMMU between
>> multiple masters(one of them is the main master i think) in the newer
>> chips(not yet supported on upstream kernel)...
>
> Ha! OK, fair enough, back to the first point then...
>
>> So we might have to get all clocks from all masters, or find a way to
>> specify the main master...and for the multiple masters case, do it in
>> of_xlate() turns out to be a little racy...maybe we can add a property
>> to specify main master, and get it's clocks in probe()?
>
> I notice that the 4.4 BSP kernel consistently specifies "aclk" and
> "hclk" for the IOMMU instances - it feels unusual to say "why don't we
> follow the downstream binding?", but it does look a lot like what I
> would expect (I'd guess at one for the register slave interface and one
> for the master interface/general operation?)
huh, right.
i did noticed that, but there's a hevc_mmu with ("aclk", "hclk", 
"clk_core", "clk_cabac") confused me.

so confirmed with Simon, that hevc_mmu is wrong. currently all IOMMUs 
should only have 2 clks, either aclk+hclk or aclk+pclk (depends on the 
clk tree)

so it seems to be a good idea to do so, will send patches soon, thanks :)
>
> If we can implement conceptually-correct clock handling based on an
> accurate binding, which should cover most cases, and *then* look at
> hacking around those where it doesn't quite work in practice due to
> shortcomings elsewhere, that would be ideal, and of course a lot nicer
> than just jumping straight into piles of hacks.
>
> Robin.
>
>
>
diff mbox

Patch

diff --git a/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt b/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
index 2098f7732264..33dd853359fa 100644
--- a/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
+++ b/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
@@ -14,6 +14,13 @@  Required properties:
                     "single-master" device, and needs no additional information
                     to associate with its master device.  See:
                     Documentation/devicetree/bindings/iommu/iommu.txt
+Optional properties:
+- clocks : A list of master clocks requires for the IOMMU to be accessible
+           by the host CPU. The number of clocks depends on the master
+           block and might as well be zero. See [1] for generic clock
+           bindings description.
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
 
 Optional properties:
 - rockchip,disable-mmu-reset : Don't use the mmu reset operation.
@@ -27,5 +34,6 @@  Example:
 		reg = <0xff940300 0x100>;
 		interrupts = <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>;
 		interrupt-names = "vopl_mmu";
+		clocks = <&cru ACLK_VOP1>, <&cru DCLK_VOP1>, <&cru HCLK_VOP1>;
 		#iommu-cells = <0>;
 	};
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index c4131ca792e0..8a5e2a659b67 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -4,6 +4,7 @@ 
  * published by the Free Software Foundation.
  */
 
+#include <linux/clk.h>
 #include <linux/compiler.h>
 #include <linux/delay.h>
 #include <linux/device.h>
@@ -91,6 +92,8 @@  struct rk_iommu {
 	struct device *dev;
 	void __iomem **bases;
 	int num_mmu;
+	struct clk_bulk_data *clocks;
+	int num_clocks;
 	bool reset_disabled;
 	struct iommu_device iommu;
 	struct list_head node; /* entry in rk_iommu_domain.iommus */
@@ -450,6 +453,38 @@  static int rk_iommu_force_reset(struct rk_iommu *iommu)
 	return 0;
 }
 
+static int rk_iommu_of_get_clocks(struct rk_iommu *iommu)
+{
+	struct device_node *np = iommu->dev->of_node;
+	int ret;
+	int i;
+
+	ret = of_count_phandle_with_args(np, "clocks", "#clock-cells");
+	if (ret == -ENOENT)
+		return 0;
+	else if (ret < 0)
+		return ret;
+
+	iommu->num_clocks = ret;
+	iommu->clocks = devm_kcalloc(iommu->dev, iommu->num_clocks,
+				     sizeof(*iommu->clocks), GFP_KERNEL);
+	if (!iommu->clocks)
+		return -ENOMEM;
+
+	for (i = 0; i < iommu->num_clocks; ++i) {
+		iommu->clocks[i].clk = of_clk_get(np, i);
+		if (IS_ERR(iommu->clocks[i].clk)) {
+			ret = PTR_ERR(iommu->clocks[i].clk);
+			goto err_clk_put;
+		}
+	}
+
+	return 0;
+err_clk_put:
+	clk_bulk_put(i, iommu->clocks);
+	return ret;
+}
+
 static void log_iova(struct rk_iommu *iommu, int index, dma_addr_t iova)
 {
 	void __iomem *base = iommu->bases[index];
@@ -506,6 +541,8 @@  static irqreturn_t rk_iommu_irq(int irq, void *dev_id)
 	irqreturn_t ret = IRQ_NONE;
 	int i;
 
+	WARN_ON(clk_bulk_enable(iommu->num_clocks, iommu->clocks));
+
 	for (i = 0; i < iommu->num_mmu; i++) {
 		int_status = rk_iommu_read(iommu->bases[i], RK_MMU_INT_STATUS);
 		if (int_status == 0)
@@ -552,6 +589,8 @@  static irqreturn_t rk_iommu_irq(int irq, void *dev_id)
 		rk_iommu_write(iommu->bases[i], RK_MMU_INT_CLEAR, int_status);
 	}
 
+	clk_bulk_disable(iommu->num_clocks, iommu->clocks);
+
 	return ret;
 }
 
@@ -594,7 +633,9 @@  static void rk_iommu_zap_iova(struct rk_iommu_domain *rk_domain,
 	list_for_each(pos, &rk_domain->iommus) {
 		struct rk_iommu *iommu;
 		iommu = list_entry(pos, struct rk_iommu, node);
+		WARN_ON(clk_bulk_enable(iommu->num_clocks, iommu->clocks));
 		rk_iommu_zap_lines(iommu, iova, size);
+		clk_bulk_disable(iommu->num_clocks, iommu->clocks);
 	}
 	spin_unlock_irqrestore(&rk_domain->iommus_lock, flags);
 }
@@ -823,10 +864,14 @@  static int rk_iommu_attach_device(struct iommu_domain *domain,
 	if (!iommu)
 		return 0;
 
-	ret = rk_iommu_enable_stall(iommu);
+	ret = clk_bulk_enable(iommu->num_clocks, iommu->clocks);
 	if (ret)
 		return ret;
 
+	ret = rk_iommu_enable_stall(iommu);
+	if (ret)
+		goto out_disable_clocks;
+
 	ret = rk_iommu_force_reset(iommu);
 	if (ret)
 		goto out_disable_stall;
@@ -852,6 +897,8 @@  static int rk_iommu_attach_device(struct iommu_domain *domain,
 
 out_disable_stall:
 	rk_iommu_disable_stall(iommu);
+out_disable_clocks:
+	clk_bulk_disable(iommu->num_clocks, iommu->clocks);
 	return ret;
 }
 
@@ -873,6 +920,7 @@  static void rk_iommu_detach_device(struct iommu_domain *domain,
 	spin_unlock_irqrestore(&rk_domain->iommus_lock, flags);
 
 	/* Ignore error while disabling, just keep going */
+	WARN_ON(clk_bulk_enable(iommu->num_clocks, iommu->clocks));
 	rk_iommu_enable_stall(iommu);
 	rk_iommu_disable_paging(iommu);
 	for (i = 0; i < iommu->num_mmu; i++) {
@@ -880,6 +928,7 @@  static void rk_iommu_detach_device(struct iommu_domain *domain,
 		rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, 0);
 	}
 	rk_iommu_disable_stall(iommu);
+	clk_bulk_disable(iommu->num_clocks, iommu->clocks);
 
 	iommu->domain = NULL;
 
@@ -1172,18 +1221,31 @@  static int rk_iommu_probe(struct platform_device *pdev)
 	iommu->reset_disabled = device_property_read_bool(dev,
 					"rockchip,disable-mmu-reset");
 
-	err = iommu_device_sysfs_add(&iommu->iommu, dev, NULL, dev_name(dev));
+	err = rk_iommu_of_get_clocks(iommu);
 	if (err)
 		return err;
 
+	err = clk_bulk_prepare(iommu->num_clocks, iommu->clocks);
+	if (err)
+		goto err_put_clocks;
+
+	err = iommu_device_sysfs_add(&iommu->iommu, dev, NULL, dev_name(dev));
+	if (err)
+		goto err_unprepare_clocks;
+
 	iommu_device_set_ops(&iommu->iommu, &rk_iommu_ops);
 	err = iommu_device_register(&iommu->iommu);
-	if (err) {
-		iommu_device_sysfs_remove(&iommu->iommu);
-		return err;
-	}
+	if (err)
+		goto err_remove_sysfs;
 
 	return 0;
+err_remove_sysfs:
+	iommu_device_sysfs_remove(&iommu->iommu);
+err_unprepare_clocks:
+	clk_bulk_unprepare(iommu->num_clocks, iommu->clocks);
+err_put_clocks:
+	clk_bulk_put(iommu->num_clocks, iommu->clocks);
+	return err;
 }
 
 static const struct of_device_id rk_iommu_dt_ids[] = {