diff mbox

[v6,4/5] iommu/mediatek: Add mt8173 IOMMU driver

Message ID 1449568153-15643-5-git-send-email-yong.wu@mediatek.com (mailing list archive)
State New, archived
Headers show

Commit Message

Yong Wu (吴勇) Dec. 8, 2015, 9:49 a.m. UTC
This patch adds support for mediatek m4u (MultiMedia Memory Management
Unit).

Signed-off-by: Yong Wu <yong.wu@mediatek.com>
---
 drivers/iommu/Kconfig     |  15 +
 drivers/iommu/Makefile    |   1 +
 drivers/iommu/mtk_iommu.c | 752 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 768 insertions(+)
 create mode 100644 drivers/iommu/mtk_iommu.c

Comments

kernel test robot Dec. 8, 2015, 10:32 a.m. UTC | #1
Hi Yong,

[auto build test ERROR on tegra/for-next]
[also build test ERROR on v4.4-rc4 next-20151208]

url:    https://github.com/0day-ci/linux/commits/Yong-Wu/MT8173-IOMMU-SUPPORT/20151208-175252
base:   https://git.kernel.org/pub/scm/linux/kernel/git/tegra/linux for-next
config: x86_64-allmodconfig (attached as .config)
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All error/warnings (new ones prefixed by >>):

   drivers/iommu/dma-iommu.c: In function '__iommu_dma_alloc_pages':
>> drivers/iommu/dma-iommu.c:198:11: error: implicit declaration of function 'vzalloc' [-Werror=implicit-function-declaration]
      pages = vzalloc(array_size);
              ^
>> drivers/iommu/dma-iommu.c:198:9: warning: assignment makes pointer from integer without a cast [-Wint-conversion]
      pages = vzalloc(array_size);
            ^
   cc1: some warnings being treated as errors
--
>> drivers/iommu/mtk_iommu.c:176:19: warning: initialization from incompatible pointer type [-Wincompatible-pointer-types]
     .tlb_add_flush = mtk_iommu_tlb_add_flush_nosync,
                      ^
   drivers/iommu/mtk_iommu.c:176:19: note: (near initialization for 'mtk_iommu_gather_ops.tlb_add_flush')
   drivers/iommu/mtk_iommu.c: In function 'mtk_iommu_domain_finalise':
>> drivers/iommu/mtk_iommu.c:239:4: error: 'IO_PGTABLE_QUIRK_NO_PERMS' undeclared (first use in this function)
       IO_PGTABLE_QUIRK_NO_PERMS |
       ^
   drivers/iommu/mtk_iommu.c:239:4: note: each undeclared identifier is reported only once for each function it appears in
>> drivers/iommu/mtk_iommu.c:240:4: error: 'IO_PGTABLE_QUIRK_TLBI_ON_MAP' undeclared (first use in this function)
       IO_PGTABLE_QUIRK_TLBI_ON_MAP,
       ^
>> drivers/iommu/mtk_iommu.c:248:34: error: 'ARM_V7S' undeclared (first use in this function)
     dom->iop = alloc_io_pgtable_ops(ARM_V7S, &dom->cfg, data);
                                     ^
   In file included from arch/x86/include/asm/realmode.h:5:0,
                    from arch/x86/include/asm/acpi.h:33,
                    from arch/x86/include/asm/fixmap.h:19,
                    from arch/x86/include/asm/apic.h:12,
                    from arch/x86/include/asm/smp.h:12,
                    from arch/x86/include/asm/mmzone_64.h:10,
                    from arch/x86/include/asm/mmzone.h:4,
                    from include/linux/mmzone.h:856,
                    from include/linux/topology.h:32,
                    from include/linux/of.h:24,
                    from include/linux/iommu.h:24,
                    from include/linux/dma-iommu.h:23,
                    from drivers/iommu/mtk_iommu.c:16:
>> drivers/iommu/mtk_iommu.c:257:35: error: 'struct io_pgtable_cfg' has no member named 'arm_v7s_cfg'
     writel_relaxed(data->m4u_dom->cfg.arm_v7s_cfg.ttbr[0],
                                      ^
   arch/x86/include/asm/io.h:81:39: note: in definition of macro 'writel_relaxed'
    #define writel_relaxed(v, a) __writel(v, a)
                                          ^
   drivers/iommu/mtk_iommu.c: In function 'mtk_iommu_resume':
   drivers/iommu/mtk_iommu.c:702:35: error: 'struct io_pgtable_cfg' has no member named 'arm_v7s_cfg'
     writel_relaxed(data->m4u_dom->cfg.arm_v7s_cfg.ttbr[0],
                                      ^
   arch/x86/include/asm/io.h:81:39: note: in definition of macro 'writel_relaxed'
    #define writel_relaxed(v, a) __writel(v, a)
                                          ^

vim +/IO_PGTABLE_QUIRK_NO_PERMS +239 drivers/iommu/mtk_iommu.c

   170		/* Clear the CPE status */
   171		writel_relaxed(0, data->base + REG_MMU_CPE_DONE);
   172	}
   173	
   174	static const struct iommu_gather_ops mtk_iommu_gather_ops = {
   175		.tlb_flush_all = mtk_iommu_tlb_flush_all,
 > 176		.tlb_add_flush = mtk_iommu_tlb_add_flush_nosync,
   177		.tlb_sync = mtk_iommu_tlb_sync,
   178	};
   179	
   180	static irqreturn_t mtk_iommu_isr(int irq, void *dev_id)
   181	{
   182		struct mtk_iommu_data *data = dev_id;
   183		struct mtk_iommu_domain *dom = data->m4u_dom;
   184		u32 int_state, regval, fault_iova, fault_pa;
   185		unsigned int fault_larb, fault_port;
   186		bool layer, write;
   187	
   188		/* Read error info from registers */
   189		int_state = readl_relaxed(data->base + REG_MMU_FAULT_ST1);
   190		fault_iova = readl_relaxed(data->base + REG_MMU_FAULT_VA);
   191		layer = fault_iova & F_MMU_FAULT_VA_LAYER_BIT;
   192		write = fault_iova & F_MMU_FAULT_VA_WRITE_BIT;
   193		fault_iova &= F_MMU_FAULT_VA_MSK;
   194		fault_pa = readl_relaxed(data->base + REG_MMU_INVLD_PA);
   195		regval = readl_relaxed(data->base + REG_MMU_INT_ID);
   196		fault_larb = F_MMU0_INT_ID_LARB_ID(regval);
   197		fault_port = F_MMU0_INT_ID_PORT_ID(regval);
   198	
   199		if (report_iommu_fault(&dom->domain, data->dev, fault_iova,
   200				       write ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ)) {
   201			dev_err_ratelimited(
   202				data->dev,
   203				"fault type=0x%x iova=0x%x pa=0x%x larb=%d port=%d layer=%d %s\n",
   204				int_state, fault_iova, fault_pa, fault_larb, fault_port,
   205				layer, write ? "write" : "read");
   206		}
   207	
   208		/* Interrupt clear */
   209		regval = readl_relaxed(data->base + REG_MMU_INT_CONTROL0);
   210		regval |= F_INT_CLR_BIT;
   211		writel_relaxed(regval, data->base + REG_MMU_INT_CONTROL0);
   212	
   213		mtk_iommu_tlb_flush_all(data);
   214	
   215		return IRQ_HANDLED;
   216	}
   217	
   218	static void mtk_iommu_config(struct mtk_iommu_data *data,
   219				     struct device *dev, bool enable)
   220	{
   221		struct mtk_iommu_client_priv *head, *cur, *next;
   222	
   223		head = dev->archdata.iommu;
   224		list_for_each_entry_safe(cur, next, &head->client, client) {
   225			mtk_smi_config_port(
   226				data->larbdev[MTK_M4U_TO_LARB(cur->mtk_m4u_id)],
   227				MTK_M4U_TO_PORT(cur->mtk_m4u_id), enable);
   228		}
   229	}
   230	
   231	static int mtk_iommu_domain_finalise(struct mtk_iommu_data *data)
   232	{
   233		struct mtk_iommu_domain *dom = data->m4u_dom;
   234	
   235		spin_lock_init(&dom->pgtlock);
   236	
   237		dom->cfg = (struct io_pgtable_cfg) {
   238			.quirks = IO_PGTABLE_QUIRK_ARM_NS |
 > 239				IO_PGTABLE_QUIRK_NO_PERMS |
 > 240				IO_PGTABLE_QUIRK_TLBI_ON_MAP,
   241			.pgsize_bitmap = mtk_iommu_ops.pgsize_bitmap,
   242			.ias = 32,
   243			.oas = 32,
   244			.tlb = &mtk_iommu_gather_ops,
   245			.iommu_dev = data->dev,
   246		};
   247	
 > 248		dom->iop = alloc_io_pgtable_ops(ARM_V7S, &dom->cfg, data);
   249		if (!dom->iop) {
   250			dev_err(data->dev, "Failed to alloc io pgtable\n");
   251			return -EINVAL;
   252		}
   253	
   254		/* Update our support page sizes bitmap */
   255		mtk_iommu_ops.pgsize_bitmap = dom->cfg.pgsize_bitmap;
   256	
 > 257		writel_relaxed(data->m4u_dom->cfg.arm_v7s_cfg.ttbr[0],
   258			       data->base + REG_MMU_PT_BASE_ADDR);
   259		return 0;
   260	}

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
Joerg Roedel Dec. 14, 2015, 2:16 p.m. UTC | #2
On Tue, Dec 08, 2015 at 05:49:12PM +0800, Yong Wu wrote:
> +static int mtk_iommu_attach_device(struct iommu_domain *domain,
> +				   struct device *dev)
> +{
> +	struct mtk_iommu_domain *dom = to_mtk_domain(domain);
> +	struct mtk_iommu_client_priv *priv = dev->archdata.iommu;
> +	struct mtk_iommu_data *data;
> +	int ret;
> +
> +	if (!priv)
> +		return -ENODEV;
> +
> +	data = dev_get_drvdata(priv->m4udev);
> +	if (!data) {
> +		/*
> +		 * The DMA core will run earlier than this probe, and it will
> +		 * create a default iommu domain for each a iommu device.
> +		 * But here there is only one domain called the m4u domain
> +		 * which all the multimedia HW share.
> +		 * The default domain isn't needed here.
> +		 */

The iommu core creates one domain per iommu-group. In your case this
means one default domain per iommu in the system.

> +		iommu_domain_free(domain);

This function is not supposed to free the domain passed to it.

> +static int mtk_iommu_add_device(struct device *dev)
> +{
> +	struct iommu_group *group;
> +
> +	if (!dev->archdata.iommu) /* Not a iommu client device */
> +		return -ENODEV;
> +
> +	group = iommu_group_get_for_dev(dev);
> +	if (IS_ERR(group))
> +		return PTR_ERR(group);
> +
> +	iommu_group_put(group);
> +	return 0;
> +}

[...]

> +static struct iommu_group *mtk_iommu_device_group(struct device *dev)
> +{
> +	struct mtk_iommu_data *data;
> +	struct mtk_iommu_client_priv *priv;
> +
> +	priv = dev->archdata.iommu;
> +	if (!priv)
> +		return ERR_PTR(-ENODEV);
> +
> +	/* All the client devices are in the same m4u iommu-group */
> +	data = dev_get_drvdata(priv->m4udev);
> +	if (!data->m4u_group) {
> +		data->m4u_group = iommu_group_alloc();
> +		if (IS_ERR(data->m4u_group))
> +			dev_err(dev, "Failed to allocate M4U IOMMU group\n");
> +	}
> +	return data->m4u_group;
> +}

This looks much better than before, thanks.
Matthias Brugger Dec. 14, 2015, 6:19 p.m. UTC | #3
On 08/12/15 10:49, Yong Wu wrote:
> This patch adds support for mediatek m4u (MultiMedia Memory Management
> Unit).
>
> Signed-off-by: Yong Wu <yong.wu@mediatek.com>
> ---
>   drivers/iommu/Kconfig     |  15 +
>   drivers/iommu/Makefile    |   1 +
>   drivers/iommu/mtk_iommu.c | 752 ++++++++++++++++++++++++++++++++++++++++++++++
>   3 files changed, 768 insertions(+)
>   create mode 100644 drivers/iommu/mtk_iommu.c
>
> diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
> index b9094e9..aab942f 100644
> --- a/drivers/iommu/Kconfig
> +++ b/drivers/iommu/Kconfig
> @@ -393,4 +393,19 @@ config S390_IOMMU
>   	help
>   	  Support for the IOMMU API for s390 PCI devices.
>
> +config MTK_IOMMU
> +	bool "MTK IOMMU Support"
> +	depends on ARCH_MEDIATEK || COMPILE_TEST
> +	select IOMMU_API
> +	select IOMMU_DMA
> +	select IOMMU_IO_PGTABLE_ARMV7S
> +	select MEMORY
> +	select MTK_SMI
> +	help
> +	  Support for the M4U on certain Mediatek SOCs. M4U is MultiMedia
> +	  Memory Management Unit. This option enables remapping of DMA memory
> +	  accesses for the multimedia subsystem.
> +
> +	  If unsure, say N here.
> +
>   endif # IOMMU_SUPPORT
> diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
> index 68faca02..02887bc 100644
> --- a/drivers/iommu/Makefile
> +++ b/drivers/iommu/Makefile
> @@ -22,6 +22,7 @@ obj-$(CONFIG_ROCKCHIP_IOMMU) += rockchip-iommu.o
>   obj-$(CONFIG_TEGRA_IOMMU_GART) += tegra-gart.o
>   obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o
>   obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o
> +obj-$(CONFIG_MTK_IOMMU) += mtk_iommu.o
>   obj-$(CONFIG_SHMOBILE_IOMMU) += shmobile-iommu.o
>   obj-$(CONFIG_SHMOBILE_IPMMU) += shmobile-ipmmu.o
>   obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
> diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
> new file mode 100644
> index 0000000..6a21e70
> --- /dev/null
> +++ b/drivers/iommu/mtk_iommu.c
> @@ -0,0 +1,752 @@
> +/*
> + * Copyright (c) 2014-2015 MediaTek Inc.
> + * Author: Yong Wu <yong.wu@mediatek.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + */
> +#include <linux/clk.h>
> +#include <linux/component.h>
> +#include <linux/dma-iommu.h>
> +#include <linux/interrupt.h>
> +#include <linux/io.h>
> +#include <linux/iommu.h>
> +#include <linux/iopoll.h>
> +#include <linux/list.h>
> +#include <linux/module.h>
> +#include <linux/of_address.h>
> +#include <linux/of_iommu.h>
> +#include <linux/of_irq.h>
> +#include <linux/of_platform.h>
> +#include <linux/platform_device.h>
> +#include <soc/mediatek/smi.h>
> +#include <dt-bindings/memory/mt8173-larb-port.h>
> +
> +#include "io-pgtable.h"
> +
> +#define REG_MMU_PT_BASE_ADDR			0x000
> +
> +#define REG_MMU_INVALIDATE			0x020
> +#define F_ALL_INVLD				0x2
> +#define F_MMU_INV_RANGE				0x1
> +
> +#define REG_MMU_INVLD_START_A			0x024
> +#define REG_MMU_INVLD_END_A			0x028
> +
> +#define REG_MMU_INV_SEL				0x038
> +#define F_INVLD_EN0				BIT(0)
> +#define F_INVLD_EN1				BIT(1)
> +
> +#define REG_MMU_STANDARD_AXI_MODE		0x048
> +#define REG_MMU_DCM_DIS				0x050
> +
> +#define REG_MMU_CTRL_REG			0x110
> +#define F_MMU_PREFETCH_RT_REPLACE_MOD		BIT(4)
> +#define F_MMU_TF_PROTECT_SEL(prot)		(((prot) & 0x3) << 5)
> +#define F_COHERENCE_EN				BIT(8)
> +
> +#define REG_MMU_IVRP_PADDR			0x114
> +#define F_MMU_IVRP_PA_SET(pa)			((pa) >> 1)
> +
> +#define REG_MMU_INT_CONTROL0			0x120
> +#define F_L2_MULIT_HIT_EN			BIT(0)
> +#define F_TABLE_WALK_FAULT_INT_EN		BIT(1)
> +#define F_PREETCH_FIFO_OVERFLOW_INT_EN		BIT(2)
> +#define F_MISS_FIFO_OVERFLOW_INT_EN		BIT(3)
> +#define F_PREFETCH_FIFO_ERR_INT_EN		BIT(5)
> +#define F_MISS_FIFO_ERR_INT_EN			BIT(6)
> +#define F_INT_CLR_BIT				BIT(12)
> +
> +#define REG_MMU_INT_MAIN_CONTROL		0x124
> +#define F_INT_TRANSLATION_FAULT			BIT(0)
> +#define F_INT_MAIN_MULTI_HIT_FAULT		BIT(1)
> +#define F_INT_INVALID_PA_FAULT			BIT(2)
> +#define F_INT_ENTRY_REPLACEMENT_FAULT		BIT(3)
> +#define F_INT_TLB_MISS_FAULT			BIT(4)
> +#define F_INT_MISS_TRANSATION_FIFO_FAULT	BIT(5)
> +#define F_INT_PRETETCH_TRANSATION_FIFO_FAULT	BIT(6)
> +
> +#define REG_MMU_CPE_DONE			0x12C
> +
> +#define REG_MMU_FAULT_ST1			0x134
> +
> +#define REG_MMU_FAULT_VA			0x13c
> +#define F_MMU_FAULT_VA_MSK			0xfffff000
> +#define F_MMU_FAULT_VA_WRITE_BIT		BIT(1)
> +#define F_MMU_FAULT_VA_LAYER_BIT		BIT(0)
> +
> +#define REG_MMU_INVLD_PA			0x140
> +#define REG_MMU_INT_ID				0x150
> +#define F_MMU0_INT_ID_LARB_ID(a)		(((a) >> 7) & 0x7)
> +#define F_MMU0_INT_ID_PORT_ID(a)		(((a) >> 2) & 0x1f)
> +
> +#define MTK_PROTECT_PA_ALIGN			128
> +#define MTK_IOMMU_LARB_MAX_NR			8
> +
> +struct mtk_iommu_suspend_reg {
> +	u32				standard_axi_mode;
> +	u32				dcm_dis;
> +	u32				ctrl_reg;
> +	u32				int_control0;
> +	u32				int_main_control;
> +};
> +
> +struct mtk_iommu_client_priv {
> +	struct list_head		client;
> +	unsigned int			mtk_m4u_id;
> +	struct device			*m4udev;
> +};
> +
> +struct mtk_iommu_domain {
> +	spinlock_t			pgtlock; /* lock for page table */
> +
> +	struct io_pgtable_cfg		cfg;
> +	struct io_pgtable_ops		*iop;
> +
> +	struct iommu_domain		domain;
> +};
> +
> +struct mtk_iommu_data {
> +	void __iomem			*base;
> +	int				irq;
> +	struct device			*dev;
> +	struct device			*larbdev[MTK_IOMMU_LARB_MAX_NR];
> +	struct clk			*bclk;
> +	phys_addr_t			protect_base; /* protect memory base */
> +	int				larb_nr;      /* local arbiter number */
> +	struct mtk_iommu_suspend_reg	reg;
> +	struct mtk_iommu_domain		*m4u_dom;
> +	struct iommu_group		*m4u_group;
> +};
> +
> +static struct iommu_ops mtk_iommu_ops;
> +
> +static struct mtk_iommu_domain *to_mtk_domain(struct iommu_domain *dom)
> +{
> +	return container_of(dom, struct mtk_iommu_domain, domain);
> +}
> +
> +static void mtk_iommu_tlb_flush_all(void *cookie)
> +{
> +	struct mtk_iommu_data *data = cookie;
> +
> +	writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, data->base + REG_MMU_INV_SEL);
> +	writel_relaxed(F_ALL_INVLD, data->base + REG_MMU_INVALIDATE);
> +	wmb(); /* Make sure the tlb flush all done */
> +}
> +
> +static void mtk_iommu_tlb_add_flush_nosync(unsigned long iova, size_t size,
> +					   size_t granule, bool leaf,
> +					   void *cookie)
> +{
> +	struct mtk_iommu_data *data = cookie;
> +
> +	writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, data->base + REG_MMU_INV_SEL);
> +
> +	writel_relaxed(iova, data->base + REG_MMU_INVLD_START_A);
> +	writel_relaxed(iova + size - 1, data->base + REG_MMU_INVLD_END_A);
> +	writel_relaxed(F_MMU_INV_RANGE, data->base + REG_MMU_INVALIDATE);
> +}
> +
> +static void mtk_iommu_tlb_sync(void *cookie)
> +{
> +	struct mtk_iommu_data *data = cookie;
> +	int ret;
> +	u32 tmp;
> +
> +	ret = readl_poll_timeout_atomic(data->base + REG_MMU_CPE_DONE, tmp,
> +					tmp != 0, 10, 100000);
> +	if (ret) {
> +		dev_warn(data->dev,
> +			 "Partial TLB flush timed out, falling back to full flush\n");
> +		mtk_iommu_tlb_flush_all(cookie);
> +	}
> +	/* Clear the CPE status */
> +	writel_relaxed(0, data->base + REG_MMU_CPE_DONE);
> +}
> +
> +static const struct iommu_gather_ops mtk_iommu_gather_ops = {
> +	.tlb_flush_all = mtk_iommu_tlb_flush_all,
> +	.tlb_add_flush = mtk_iommu_tlb_add_flush_nosync,
> +	.tlb_sync = mtk_iommu_tlb_sync,
> +};
> +
> +static irqreturn_t mtk_iommu_isr(int irq, void *dev_id)
> +{
> +	struct mtk_iommu_data *data = dev_id;
> +	struct mtk_iommu_domain *dom = data->m4u_dom;
> +	u32 int_state, regval, fault_iova, fault_pa;
> +	unsigned int fault_larb, fault_port;
> +	bool layer, write;
> +
> +	/* Read error info from registers */
> +	int_state = readl_relaxed(data->base + REG_MMU_FAULT_ST1);
> +	fault_iova = readl_relaxed(data->base + REG_MMU_FAULT_VA);
> +	layer = fault_iova & F_MMU_FAULT_VA_LAYER_BIT;
> +	write = fault_iova & F_MMU_FAULT_VA_WRITE_BIT;
> +	fault_iova &= F_MMU_FAULT_VA_MSK;
> +	fault_pa = readl_relaxed(data->base + REG_MMU_INVLD_PA);
> +	regval = readl_relaxed(data->base + REG_MMU_INT_ID);
> +	fault_larb = F_MMU0_INT_ID_LARB_ID(regval);
> +	fault_port = F_MMU0_INT_ID_PORT_ID(regval);
> +
> +	if (report_iommu_fault(&dom->domain, data->dev, fault_iova,
> +			       write ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ)) {
> +		dev_err_ratelimited(
> +			data->dev,
> +			"fault type=0x%x iova=0x%x pa=0x%x larb=%d port=%d layer=%d %s\n",
> +			int_state, fault_iova, fault_pa, fault_larb, fault_port,
> +			layer, write ? "write" : "read");
> +	}
> +
> +	/* Interrupt clear */
> +	regval = readl_relaxed(data->base + REG_MMU_INT_CONTROL0);
> +	regval |= F_INT_CLR_BIT;
> +	writel_relaxed(regval, data->base + REG_MMU_INT_CONTROL0);
> +
> +	mtk_iommu_tlb_flush_all(data);
> +
> +	return IRQ_HANDLED;
> +}
> +
> +static void mtk_iommu_config(struct mtk_iommu_data *data,
> +			     struct device *dev, bool enable)
> +{
> +	struct mtk_iommu_client_priv *head, *cur, *next;
> +
> +	head = dev->archdata.iommu;
> +	list_for_each_entry_safe(cur, next, &head->client, client) {
> +		mtk_smi_config_port(
> +			data->larbdev[MTK_M4U_TO_LARB(cur->mtk_m4u_id)],
> +			MTK_M4U_TO_PORT(cur->mtk_m4u_id), enable);

Use an extra variable for MTK_M4U_TO_LARB(cur->mtk-m4u_id), this makes 
the code easier to read.

Regards,
Matthias

> +	}
> +}
> +
> +static int mtk_iommu_domain_finalise(struct mtk_iommu_data *data)
> +{
> +	struct mtk_iommu_domain *dom = data->m4u_dom;
> +
> +	spin_lock_init(&dom->pgtlock);
> +
> +	dom->cfg = (struct io_pgtable_cfg) {
> +		.quirks = IO_PGTABLE_QUIRK_ARM_NS |
> +			IO_PGTABLE_QUIRK_NO_PERMS |
> +			IO_PGTABLE_QUIRK_TLBI_ON_MAP,
> +		.pgsize_bitmap = mtk_iommu_ops.pgsize_bitmap,
> +		.ias = 32,
> +		.oas = 32,
> +		.tlb = &mtk_iommu_gather_ops,
> +		.iommu_dev = data->dev,
> +	};
> +
> +	dom->iop = alloc_io_pgtable_ops(ARM_V7S, &dom->cfg, data);
> +	if (!dom->iop) {
> +		dev_err(data->dev, "Failed to alloc io pgtable\n");
> +		return -EINVAL;
> +	}
> +
> +	/* Update our support page sizes bitmap */
> +	mtk_iommu_ops.pgsize_bitmap = dom->cfg.pgsize_bitmap;
> +
> +	writel_relaxed(data->m4u_dom->cfg.arm_v7s_cfg.ttbr[0],
> +		       data->base + REG_MMU_PT_BASE_ADDR);
> +	return 0;
> +}
> +
> +static struct iommu_domain *mtk_iommu_domain_alloc(unsigned type)
> +{
> +	struct mtk_iommu_domain *dom;
> +
> +	if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
> +		return NULL;
> +
> +	dom = kzalloc(sizeof(*dom), GFP_KERNEL);
> +	if (!dom)
> +		return NULL;
> +
> +	if (type == IOMMU_DOMAIN_DMA && iommu_get_dma_cookie(&dom->domain)) {
> +		kfree(dom);
> +		return NULL;
> +	}
> +
> +	dom->domain.geometry.aperture_start = 0;
> +	dom->domain.geometry.aperture_end = DMA_BIT_MASK(32);
> +	dom->domain.geometry.force_aperture = true;
> +
> +	return &dom->domain;
> +}
> +
> +static void mtk_iommu_domain_free(struct iommu_domain *domain)
> +{
> +	if (domain->type == IOMMU_DOMAIN_DMA)
> +		iommu_put_dma_cookie(domain);
> +	kfree(to_mtk_domain(domain));
> +}
> +
> +static int mtk_iommu_attach_device(struct iommu_domain *domain,
> +				   struct device *dev)
> +{
> +	struct mtk_iommu_domain *dom = to_mtk_domain(domain);
> +	struct mtk_iommu_client_priv *priv = dev->archdata.iommu;
> +	struct mtk_iommu_data *data;
> +	int ret;
> +
> +	if (!priv)
> +		return -ENODEV;
> +
> +	data = dev_get_drvdata(priv->m4udev);
> +	if (!data) {
> +		/*
> +		 * The DMA core will run earlier than this probe, and it will
> +		 * create a default iommu domain for each a iommu device.
> +		 * But here there is only one domain called the m4u domain
> +		 * which all the multimedia HW share.
> +		 * The default domain isn't needed here.
> +		 */
> +		iommu_domain_free(domain);
> +
> +		/* Return 0 for DMA to update dev->archdata.dma_ops */
> +		return 0;
> +	} else if (!data->m4u_dom) {
> +		/*
> +		 * While a device is added into a iommu group, the iommu core
> +		 * will create a default domain for each a iommu group.
> +		 * This default domain is reserved as the m4u domain and is
> +		 * initiated here.
> +		 */
> +		data->m4u_dom = dom;
> +
> +		if (domain->type == IOMMU_DOMAIN_DMA) {
> +			ret = iommu_dma_init_domain(domain, 0,
> +						    DMA_BIT_MASK(32));
> +			if (ret)
> +				goto err_uninit_dom;
> +		}
> +
> +		ret = mtk_iommu_domain_finalise(data);
> +		if (ret)
> +			goto err_uninit_dom;
> +	} else if (data->m4u_dom != dom) {
> +		/* All the client devices should be in the same m4u domain */
> +		dev_err(dev, "try to attach into the error iommu domain\n");
> +		return -EPERM;
> +	}
> +
> +	mtk_iommu_config(data, dev, true);
> +	return 0;
> +
> +err_uninit_dom:
> +	data->m4u_dom = NULL;
> +	return ret;
> +}
> +
> +static void mtk_iommu_detach_device(struct iommu_domain *domain,
> +				    struct device *dev)
> +{
> +	struct mtk_iommu_client_priv *priv = dev->archdata.iommu;
> +	struct mtk_iommu_data *data;
> +
> +	if (!priv)
> +		return;
> +
> +	data = dev_get_drvdata(priv->m4udev);
> +	if (data)
> +		mtk_iommu_config(data, dev, false);
> +}
> +
> +static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova,
> +			 phys_addr_t paddr, size_t size, int prot)
> +{
> +	struct mtk_iommu_domain *dom = to_mtk_domain(domain);
> +	unsigned long flags;
> +	int ret;
> +
> +	spin_lock_irqsave(&dom->pgtlock, flags);
> +	ret = dom->iop->map(dom->iop, iova, paddr, size, prot);
> +	spin_unlock_irqrestore(&dom->pgtlock, flags);
> +
> +	return ret;
> +}
> +
> +static size_t mtk_iommu_unmap(struct iommu_domain *domain,
> +			      unsigned long iova, size_t size)
> +{
> +	struct mtk_iommu_domain *dom = to_mtk_domain(domain);
> +	unsigned long flags;
> +	size_t unmapsz;
> +
> +	spin_lock_irqsave(&dom->pgtlock, flags);
> +	unmapsz = dom->iop->unmap(dom->iop, iova, size);
> +	spin_unlock_irqrestore(&dom->pgtlock, flags);
> +
> +	return unmapsz;
> +}
> +
> +static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain,
> +					  dma_addr_t iova)
> +{
> +	struct mtk_iommu_domain *dom = to_mtk_domain(domain);
> +	unsigned long flags;
> +	phys_addr_t pa;
> +
> +	spin_lock_irqsave(&dom->pgtlock, flags);
> +	pa = dom->iop->iova_to_phys(dom->iop, iova);
> +	spin_unlock_irqrestore(&dom->pgtlock, flags);
> +
> +	return pa;
> +}
> +
> +static int mtk_iommu_add_device(struct device *dev)
> +{
> +	struct iommu_group *group;
> +
> +	if (!dev->archdata.iommu) /* Not a iommu client device */
> +		return -ENODEV;
> +
> +	group = iommu_group_get_for_dev(dev);
> +	if (IS_ERR(group))
> +		return PTR_ERR(group);
> +
> +	iommu_group_put(group);
> +	return 0;
> +}
> +
> +static void mtk_iommu_remove_device(struct device *dev)
> +{
> +	struct mtk_iommu_client_priv *head, *cur, *next;
> +
> +	head = dev->archdata.iommu;
> +	if (!head)
> +		return;
> +
> +	list_for_each_entry_safe(cur, next, &head->client, client) {
> +		list_del(&cur->client);
> +		kfree(cur);
> +	}
> +	kfree(head);
> +	dev->archdata.iommu = NULL;
> +
> +	iommu_group_remove_device(dev);
> +}
> +
> +static struct iommu_group *mtk_iommu_device_group(struct device *dev)
> +{
> +	struct mtk_iommu_data *data;
> +	struct mtk_iommu_client_priv *priv;
> +
> +	priv = dev->archdata.iommu;
> +	if (!priv)
> +		return ERR_PTR(-ENODEV);
> +
> +	/* All the client devices are in the same m4u iommu-group */
> +	data = dev_get_drvdata(priv->m4udev);
> +	if (!data->m4u_group) {
> +		data->m4u_group = iommu_group_alloc();
> +		if (IS_ERR(data->m4u_group))
> +			dev_err(dev, "Failed to allocate M4U IOMMU group\n");
> +	}
> +	return data->m4u_group;
> +}
> +
> +static int mtk_iommu_of_xlate(struct device *dev, struct of_phandle_args *args)
> +{
> +	struct mtk_iommu_client_priv *head, *priv, *next;
> +	struct platform_device *m4updev;
> +
> +	if (args->args_count != 1) {
> +		dev_err(dev, "invalid #iommu-cells(%d) property for IOMMU\n",
> +			args->args_count);
> +		return -EINVAL;
> +	}
> +
> +	if (!dev->archdata.iommu) {
> +		/* Get the m4u device */
> +		m4updev = of_find_device_by_node(args->np);
> +		of_node_put(args->np);
> +		if (WARN_ON(!m4updev))
> +			return -EINVAL;
> +
> +		head = kzalloc(sizeof(*head), GFP_KERNEL);
> +		if (!head)
> +			return -ENOMEM;
> +
> +		dev->archdata.iommu = head;
> +		INIT_LIST_HEAD(&head->client);
> +		head->m4udev = &m4updev->dev;
> +	} else {
> +		head = dev->archdata.iommu;
> +	}
> +
> +	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
> +	if (!priv)
> +		goto err_free_mem;
> +
> +	priv->mtk_m4u_id = args->args[0];
> +	list_add_tail(&priv->client, &head->client);
> +
> +	return 0;
> +
> +err_free_mem:
> +	list_for_each_entry_safe(priv, next, &head->client, client)
> +		kfree(priv);
> +	kfree(head);
> +	dev->archdata.iommu = NULL;
> +	return -ENOMEM;
> +}
> +
> +static struct iommu_ops mtk_iommu_ops = {
> +	.domain_alloc	= mtk_iommu_domain_alloc,
> +	.domain_free	= mtk_iommu_domain_free,
> +	.attach_dev	= mtk_iommu_attach_device,
> +	.detach_dev	= mtk_iommu_detach_device,
> +	.map		= mtk_iommu_map,
> +	.unmap		= mtk_iommu_unmap,
> +	.map_sg		= default_iommu_map_sg,
> +	.iova_to_phys	= mtk_iommu_iova_to_phys,
> +	.add_device	= mtk_iommu_add_device,
> +	.remove_device	= mtk_iommu_remove_device,
> +	.device_group	= mtk_iommu_device_group,
> +	.of_xlate	= mtk_iommu_of_xlate,
> +	.pgsize_bitmap	= SZ_4K | SZ_64K | SZ_1M | SZ_16M,
> +};
> +
> +static int mtk_iommu_hw_init(const struct mtk_iommu_data *data)
> +{
> +	u32 regval;
> +	int ret;
> +
> +	ret = clk_prepare_enable(data->bclk);
> +	if (ret) {
> +		dev_err(data->dev, "Failed to enable iommu bclk(%d)\n", ret);
> +		return ret;
> +	}
> +
> +	regval = F_MMU_PREFETCH_RT_REPLACE_MOD |
> +		F_MMU_TF_PROTECT_SEL(2) |
> +		F_COHERENCE_EN;
> +	writel_relaxed(regval, data->base + REG_MMU_CTRL_REG);
> +
> +	regval = F_L2_MULIT_HIT_EN |
> +		F_TABLE_WALK_FAULT_INT_EN |
> +		F_PREETCH_FIFO_OVERFLOW_INT_EN |
> +		F_MISS_FIFO_OVERFLOW_INT_EN |
> +		F_PREFETCH_FIFO_ERR_INT_EN |
> +		F_MISS_FIFO_ERR_INT_EN;
> +	writel_relaxed(regval, data->base + REG_MMU_INT_CONTROL0);
> +
> +	regval = F_INT_TRANSLATION_FAULT |
> +		F_INT_MAIN_MULTI_HIT_FAULT |
> +		F_INT_INVALID_PA_FAULT |
> +		F_INT_ENTRY_REPLACEMENT_FAULT |
> +		F_INT_TLB_MISS_FAULT |
> +		F_INT_MISS_TRANSATION_FIFO_FAULT |
> +		F_INT_PRETETCH_TRANSATION_FIFO_FAULT;
> +	writel_relaxed(regval, data->base + REG_MMU_INT_MAIN_CONTROL);
> +
> +	regval = F_MMU_IVRP_PA_SET(data->protect_base);
> +	writel_relaxed(regval, data->base + REG_MMU_IVRP_PADDR);
> +
> +	writel_relaxed(0, data->base + REG_MMU_DCM_DIS);
> +	writel_relaxed(0, data->base + REG_MMU_STANDARD_AXI_MODE);
> +
> +	if (devm_request_irq(data->dev, data->irq, mtk_iommu_isr, 0,
> +			     dev_name(data->dev), (void *)data)) {
> +		writel_relaxed(0, data->base + REG_MMU_PT_BASE_ADDR);
> +		clk_disable_unprepare(data->bclk);
> +		dev_err(data->dev, "Failed @ IRQ-%d Request\n", data->irq);
> +		return -ENODEV;
> +	}
> +
> +	return 0;
> +}
> +
> +static int compare_of(struct device *dev, void *data)
> +{
> +	return dev->of_node == data;
> +}
> +
> +static int mtk_iommu_bind(struct device *dev)
> +{
> +	struct mtk_iommu_data *data = dev_get_drvdata(dev);
> +	void *protect;
> +	int ret;
> +
> +	/* Protect memory. HW will access here while translation fault.*/
> +	protect = devm_kzalloc(dev, MTK_PROTECT_PA_ALIGN * 2, GFP_KERNEL);
> +	if (!protect)
> +		return -ENOMEM;
> +	data->protect_base = ALIGN(virt_to_phys(protect), MTK_PROTECT_PA_ALIGN);
> +
> +	ret = component_bind_all(dev, NULL);
> +	if (ret)
> +		return ret;
> +
> +	ret = mtk_iommu_hw_init(data);
> +	if (ret) {
> +		component_unbind_all(dev, NULL);
> +		return ret;
> +	}
> +
> +	if (!iommu_present(&platform_bus_type))
> +		bus_set_iommu(&platform_bus_type, &mtk_iommu_ops);
> +
> +	return 0;
> +}
> +
> +static void mtk_iommu_unbind(struct device *dev)
> +{
> +	struct mtk_iommu_data *data = dev_get_drvdata(dev);
> +
> +	if (iommu_present(&platform_bus_type))
> +		bus_set_iommu(&platform_bus_type, NULL);
> +
> +	component_unbind_all(dev, NULL);
> +	free_io_pgtable_ops(data->m4u_dom->iop);
> +	clk_disable_unprepare(data->bclk);
> +	devm_free_irq(dev, data->irq, data);
> +}
> +
> +static const struct component_master_ops mtk_iommu_com_ops = {
> +	.bind		= mtk_iommu_bind,
> +	.unbind		= mtk_iommu_unbind,
> +};
> +
> +static int mtk_iommu_probe(struct platform_device *pdev)
> +{
> +	struct mtk_iommu_data   *data;
> +	struct device           *dev = &pdev->dev;
> +	struct resource         *res;
> +	struct component_match  *match = NULL;
> +	int                     i;
> +
> +	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
> +	if (!data)
> +		return -ENOMEM;
> +	data->dev = dev;
> +
> +	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +	data->base = devm_ioremap_resource(dev, res);
> +	if (IS_ERR(data->base))
> +		return PTR_ERR(data->base);
> +
> +	data->irq = platform_get_irq(pdev, 0);
> +	if (data->irq < 0)
> +		return data->irq;
> +
> +	data->bclk = devm_clk_get(dev, "bclk");
> +	if (IS_ERR(data->bclk))
> +		return PTR_ERR(data->bclk);
> +
> +	data->larb_nr = of_count_phandle_with_args(
> +					dev->of_node, "mediatek,larbs", NULL);
> +	if (data->larb_nr < 0)
> +		return data->larb_nr;
> +
> +	for (i = 0; i < data->larb_nr; i++) {
> +		struct device_node *larbnode;
> +		struct platform_device *plarbdev;
> +
> +		larbnode = of_parse_phandle(dev->of_node, "mediatek,larbs", i);
> +		if (!larbnode)
> +			return -EINVAL;
> +
> +		if (!of_device_is_available(larbnode))
> +			continue;
> +
> +		plarbdev = of_find_device_by_node(larbnode);
> +		of_node_put(larbnode);
> +		if (!plarbdev)
> +			return -EPROBE_DEFER;
> +		data->larbdev[i] = &plarbdev->dev;
> +
> +		component_match_add(dev, &match, compare_of, larbnode);
> +	}
> +
> +	dev_set_drvdata(dev, data);
> +	return component_master_add_with_match(dev, &mtk_iommu_com_ops, match);
> +}
> +
> +static int mtk_iommu_remove(struct platform_device *pdev)
> +{
> +	component_master_del(&pdev->dev, &mtk_iommu_com_ops);
> +	return 0;
> +}
> +
> +static int mtk_iommu_suspend(struct device *dev)
> +{
> +	struct mtk_iommu_data *data = dev_get_drvdata(dev);
> +	struct mtk_iommu_suspend_reg *reg = &data->reg;
> +	void __iomem *base = data->base;
> +
> +	reg->standard_axi_mode = readl_relaxed(base +
> +					       REG_MMU_STANDARD_AXI_MODE);
> +	reg->dcm_dis = readl_relaxed(base + REG_MMU_DCM_DIS);
> +	reg->ctrl_reg = readl_relaxed(base + REG_MMU_CTRL_REG);
> +	reg->int_control0 = readl_relaxed(base + REG_MMU_INT_CONTROL0);
> +	reg->int_main_control = readl_relaxed(base + REG_MMU_INT_MAIN_CONTROL);
> +	return 0;
> +}
> +
> +static int mtk_iommu_resume(struct device *dev)
> +{
> +	struct mtk_iommu_data *data = dev_get_drvdata(dev);
> +	struct mtk_iommu_suspend_reg *reg = &data->reg;
> +	void __iomem *base = data->base;
> +
> +	writel_relaxed(data->m4u_dom->cfg.arm_v7s_cfg.ttbr[0],
> +		       base + REG_MMU_PT_BASE_ADDR);
> +	writel_relaxed(reg->standard_axi_mode,
> +		       base + REG_MMU_STANDARD_AXI_MODE);
> +	writel_relaxed(reg->dcm_dis, base + REG_MMU_DCM_DIS);
> +	writel_relaxed(reg->ctrl_reg, base + REG_MMU_CTRL_REG);
> +	writel_relaxed(reg->int_control0, base + REG_MMU_INT_CONTROL0);
> +	writel_relaxed(reg->int_main_control, base + REG_MMU_INT_MAIN_CONTROL);
> +	writel_relaxed(F_MMU_IVRP_PA_SET(data->protect_base),
> +		       base + REG_MMU_IVRP_PADDR);
> +	return 0;
> +}
> +
> +const struct dev_pm_ops mtk_iommu_pm_ops = {
> +	SET_SYSTEM_SLEEP_PM_OPS(mtk_iommu_suspend, mtk_iommu_resume)
> +};
> +
> +static const struct of_device_id mtk_iommu_of_ids[] = {
> +	{ .compatible = "mediatek,mt8173-m4u", },
> +	{}
> +};
> +
> +static struct platform_driver mtk_iommu_driver = {
> +	.probe	= mtk_iommu_probe,
> +	.remove	= mtk_iommu_remove,
> +	.driver	= {
> +		.name = "mtk-iommu",
> +		.of_match_table = mtk_iommu_of_ids,
> +		.pm = &mtk_iommu_pm_ops,
> +	}
> +};
> +
> +static int __init mtk_iommu_init(void)
> +{
> +	return platform_driver_register(&mtk_iommu_driver);
> +}
> +subsys_initcall(mtk_iommu_init);
> +
> +static int mtk_iommu_init_fn(struct device_node *np)
> +{
> +	struct platform_device *pdev;
> +
> +	pdev = of_platform_device_create(np, NULL, platform_bus_type.dev_root);
> +	if (IS_ERR(pdev))
> +		return PTR_ERR(pdev);
> +
> +	of_iommu_set_ops(np, &mtk_iommu_ops);
> +	return 0;
> +}
> +
> +IOMMU_OF_DECLARE(mtkm4u, "mediatek,mt8173-m4u", mtk_iommu_init_fn);
>
Yong Wu (吴勇) Dec. 15, 2015, 2:40 a.m. UTC | #4
On Mon, 2015-12-14 at 19:19 +0100, Matthias Brugger wrote:
> 
> On 08/12/15 10:49, Yong Wu wrote:
> > This patch adds support for mediatek m4u (MultiMedia Memory Management
> > Unit).
> >
> > Signed-off-by: Yong Wu <yong.wu@mediatek.com>
> > ---
[...]
> > +static void mtk_iommu_config(struct mtk_iommu_data *data,
> > +			     struct device *dev, bool enable)
> > +{
> > +	struct mtk_iommu_client_priv *head, *cur, *next;
> > +
> > +	head = dev->archdata.iommu;
> > +	list_for_each_entry_safe(cur, next, &head->client, client) {
> > +		mtk_smi_config_port(
> > +			data->larbdev[MTK_M4U_TO_LARB(cur->mtk_m4u_id)],
> > +			MTK_M4U_TO_PORT(cur->mtk_m4u_id), enable);
> 
> Use an extra variable for MTK_M4U_TO_LARB(cur->mtk-m4u_id), this makes 
> the code easier to read.

OK. Thanks.
I will fix it in next version.

> 
> Regards,
> Matthias
> 
> > +	}
> > +}
> > +
[...]
Yong Wu (吴勇) Dec. 15, 2015, 3:28 a.m. UTC | #5
On Mon, 2015-12-14 at 15:16 +0100, Joerg Roedel wrote:
> On Tue, Dec 08, 2015 at 05:49:12PM +0800, Yong Wu wrote:
> > +static int mtk_iommu_attach_device(struct iommu_domain *domain,
> > +				   struct device *dev)
> > +{
> > +	struct mtk_iommu_domain *dom = to_mtk_domain(domain);
> > +	struct mtk_iommu_client_priv *priv = dev->archdata.iommu;
> > +	struct mtk_iommu_data *data;
> > +	int ret;
> > +
> > +	if (!priv)
> > +		return -ENODEV;
> > +
> > +	data = dev_get_drvdata(priv->m4udev);
> > +	if (!data) {
> > +		/*
> > +		 * The DMA core will run earlier than this probe, and it will
> > +		 * create a default iommu domain for each a iommu device.
> > +		 * But here there is only one domain called the m4u domain
> > +		 * which all the multimedia HW share.
> > +		 * The default domain isn't needed here.
> > +		 */
> 
> The iommu core creates one domain per iommu-group. In your case this
> means one default domain per iommu in the system.

Yes. The iommu core will create one domain per iommu-group.
see the next "if" here.

But the domain here is created by the current DMA64. It's from this
function do_iommu_attach which will be called too early and will help
create a default domain for each a iommu device.(my codebase is
v4.4-rc1).


//=====the next "if"===========
} else if (!data->m4u_dom) {
	/*
	 * While a device is added into a iommu group, the iommu core
	 * will create a default domain for each a iommu group.
	 * This default domain is reserved as the m4u domain and is
	 * initiated here.
	 */
	data->m4u_dom = dom;
	if (domain->type == IOMMU_DOMAIN_DMA) {
		ret = iommu_dma_init_domain(domain, 0,
					    DMA_BIT_MASK(32));
		if (ret)
			goto err_uninit_dom;
	}

	ret = mtk_iommu_domain_finalise(data);
	if (ret)
		goto err_uninit_dom;
} 
//======================

> 
> > +		iommu_domain_free(domain);
> 
> This function is not supposed to free the domain passed to it.

As above this domain is created in the do_iommu_attach which will help
create a default domain for each a iommu device.
We don't need this default domain!

If we don't free it here, there will be a memory leak.

From Robin's comment, He will improve the sequence of the
__iommu_setup_dma_ops in the future.

/*
 * TODO: Right now __iommu_setup_dma_ops() gets called too early to do
 * everything it needs to - the device is only partially created and the
 * IOMMU driver hasn't seen it yet, so it can't have a group. Thus we
 * need this delayed attachment dance. Once IOMMU probe ordering is
sorted
 * to move the arch_setup_dma_ops() call later, all the notifier bits
below
 * become unnecessary, and will go away.
 */

/*
 * Best case: The device is either part of a group which was
 * already attached to a domain in a previous call, or it's
 * been put in a default DMA domain by the IOMMU core.
 */

   But there is no this patch currently, so I add iommu_domain_free
here.
 
   "free the domain" here looks really not good. Then I delete the
iommu_domain_free here(allow this memory leak right now), is it ok?
(It will also works after Robin's change in the future.)

> 
> > +static int mtk_iommu_add_device(struct device *dev)
> > +{
> > +	struct iommu_group *group;
> > +
> > +	if (!dev->archdata.iommu) /* Not a iommu client device */
> > +		return -ENODEV;
> > +
> > +	group = iommu_group_get_for_dev(dev);
> > +	if (IS_ERR(group))
> > +		return PTR_ERR(group);
> > +
> > +	iommu_group_put(group);
> > +	return 0;
> > +}
> 
> [...]
> 
> > +static struct iommu_group *mtk_iommu_device_group(struct device *dev)
> > +{
> > +	struct mtk_iommu_data *data;
> > +	struct mtk_iommu_client_priv *priv;
> > +
> > +	priv = dev->archdata.iommu;
> > +	if (!priv)
> > +		return ERR_PTR(-ENODEV);
> > +
> > +	/* All the client devices are in the same m4u iommu-group */
> > +	data = dev_get_drvdata(priv->m4udev);
> > +	if (!data->m4u_group) {
> > +		data->m4u_group = iommu_group_alloc();
> > +		if (IS_ERR(data->m4u_group))
> > +			dev_err(dev, "Failed to allocate M4U IOMMU group\n");
> > +	}
> > +	return data->m4u_group;
> > +}
> 
> This looks much better than before, thanks.

Thanks.

>
Robin Murphy Dec. 15, 2015, 12:37 p.m. UTC | #6
On 15/12/15 03:28, Yong Wu wrote:
> On Mon, 2015-12-14 at 15:16 +0100, Joerg Roedel wrote:
>> On Tue, Dec 08, 2015 at 05:49:12PM +0800, Yong Wu wrote:
>>> +static int mtk_iommu_attach_device(struct iommu_domain *domain,
>>> +				   struct device *dev)
>>> +{
>>> +	struct mtk_iommu_domain *dom = to_mtk_domain(domain);
>>> +	struct mtk_iommu_client_priv *priv = dev->archdata.iommu;
>>> +	struct mtk_iommu_data *data;
>>> +	int ret;
>>> +
>>> +	if (!priv)
>>> +		return -ENODEV;
>>> +
>>> +	data = dev_get_drvdata(priv->m4udev);
>>> +	if (!data) {
>>> +		/*
>>> +		 * The DMA core will run earlier than this probe, and it will
>>> +		 * create a default iommu domain for each a iommu device.
>>> +		 * But here there is only one domain called the m4u domain
>>> +		 * which all the multimedia HW share.
>>> +		 * The default domain isn't needed here.
>>> +		 */
>>
>> The iommu core creates one domain per iommu-group. In your case this
>> means one default domain per iommu in the system.
>
> Yes. The iommu core will create one domain per iommu-group.
> see the next "if" here.
>
> But the domain here is created by the current DMA64. It's from this
> function do_iommu_attach which will be called too early and will help
> create a default domain for each a iommu device.(my codebase is
> v4.4-rc1).

I still don't really understand the problem here. The M4U device is 
created early in mtk_iommu_init_fn, so it should be probed and ready to 
go long before anything else. Indeed, for your mtk_iommu_device_group() 
callback to work then everything must already be happening in the right 
order - add_device will only call iommu_group_get_for_dev() if of_xlate 
has populated dev->archdata.iommu, and of_xlate will only do that if the 
M4U was up and running before the client device started probing. 
Furthermore, if mtk_iommu_device_group() *does* work, then the IOMMU 
core will go ahead and allocate the default domain there and then, which 
the arch code should find and use later.

The potential issue I *do* see, looking more closely, is that 
iommu_group_get_for_dev() is setting group->domain but not calling the 
attach_dev callback, which looks wrong...

>
> //=====the next "if"===========
> } else if (!data->m4u_dom) {
> 	/*
> 	 * While a device is added into a iommu group, the iommu core
> 	 * will create a default domain for each a iommu group.
> 	 * This default domain is reserved as the m4u domain and is
> 	 * initiated here.
> 	 */
> 	data->m4u_dom = dom;
> 	if (domain->type == IOMMU_DOMAIN_DMA) {
> 		ret = iommu_dma_init_domain(domain, 0,
> 					    DMA_BIT_MASK(32));
> 		if (ret)
> 			goto err_uninit_dom;
> 	}
>
> 	ret = mtk_iommu_domain_finalise(data);
> 	if (ret)
> 		goto err_uninit_dom;
> }
> //======================
>
>>
>>> +		iommu_domain_free(domain);
>>
>> This function is not supposed to free the domain passed to it.
>
> As above this domain is created in the do_iommu_attach which will help
> create a default domain for each a iommu device.
> We don't need this default domain!
>
> If we don't free it here, there will be a memory leak.
>
>  From Robin's comment, He will improve the sequence of the
> __iommu_setup_dma_ops in the future.

That already happened. The final version of the arm64 code which was 
merged makes sure that the IOMMU driver always sees the callbacks in the 
desired of_xlate -> add_device -> attach_dev order. The whole point of 
the comment below is that the driver itself *doesn't* have to care about 
the awkward way in which that is currently achieved.

> /*
>   * TODO: Right now __iommu_setup_dma_ops() gets called too early to do
>   * everything it needs to - the device is only partially created and the
>   * IOMMU driver hasn't seen it yet, so it can't have a group. Thus we
>   * need this delayed attachment dance. Once IOMMU probe ordering is
> sorted
>   * to move the arch_setup_dma_ops() call later, all the notifier bits
> below
>   * become unnecessary, and will go away.
>   */
>
> /*
>   * Best case: The device is either part of a group which was
>   * already attached to a domain in a previous call, or it's
>   * been put in a default DMA domain by the IOMMU core.
>   */

That was before Joerg made the device_group changes which enabled proper 
default domains for platform devices - with those, we should be now be 
hitting the "best case" behaviour every time. In fact I think the "fake 
default domain" workaround shouldn't be needed at all any more, so I 
will add removing it to my giant list of things to do.

>     But there is no this patch currently, so I add iommu_domain_free
> here.
>
>     "free the domain" here looks really not good. Then I delete the
> iommu_domain_free here(allow this memory leak right now), is it ok?
> (It will also works after Robin's change in the future.)
>
>>
>>> +static int mtk_iommu_add_device(struct device *dev)
>>> +{
>>> +	struct iommu_group *group;
>>> +
>>> +	if (!dev->archdata.iommu) /* Not a iommu client device */
>>> +		return -ENODEV;
>>> +
>>> +	group = iommu_group_get_for_dev(dev);
>>> +	if (IS_ERR(group))
>>> +		return PTR_ERR(group);
>>> +
>>> +	iommu_group_put(group);
>>> +	return 0;
>>> +}
>>
>> [...]
>>
>>> +static struct iommu_group *mtk_iommu_device_group(struct device *dev)
>>> +{
>>> +	struct mtk_iommu_data *data;
>>> +	struct mtk_iommu_client_priv *priv;
>>> +
>>> +	priv = dev->archdata.iommu;
>>> +	if (!priv)
>>> +		return ERR_PTR(-ENODEV);
>>> +
>>> +	/* All the client devices are in the same m4u iommu-group */
>>> +	data = dev_get_drvdata(priv->m4udev);
>>> +	if (!data->m4u_group) {
>>> +		data->m4u_group = iommu_group_alloc();
>>> +		if (IS_ERR(data->m4u_group))
>>> +			dev_err(dev, "Failed to allocate M4U IOMMU group\n");
>>> +	}
>>> +	return data->m4u_group;
>>> +}

As long as this works as expected, then AFAICS you shouldn't have to 
have *any* special-case behaviour or tracking of domains at all.

Robin.
Yong Wu (吴勇) Dec. 16, 2015, 5:59 a.m. UTC | #7
On Tue, 2015-12-15 at 12:37 +0000, Robin Murphy wrote:
> On 15/12/15 03:28, Yong Wu wrote:
> > On Mon, 2015-12-14 at 15:16 +0100, Joerg Roedel wrote:
> >> On Tue, Dec 08, 2015 at 05:49:12PM +0800, Yong Wu wrote:
> >>> +static int mtk_iommu_attach_device(struct iommu_domain *domain,
> >>> +				   struct device *dev)
> >>> +{
> >>> +	struct mtk_iommu_domain *dom = to_mtk_domain(domain);
> >>> +	struct mtk_iommu_client_priv *priv = dev->archdata.iommu;
> >>> +	struct mtk_iommu_data *data;
> >>> +	int ret;
> >>> +
> >>> +	if (!priv)
> >>> +		return -ENODEV;
> >>> +
> >>> +	data = dev_get_drvdata(priv->m4udev);
> >>> +	if (!data) {
> >>> +		/*
> >>> +		 * The DMA core will run earlier than this probe, and it will
> >>> +		 * create a default iommu domain for each a iommu device.
> >>> +		 * But here there is only one domain called the m4u domain
> >>> +		 * which all the multimedia HW share.
> >>> +		 * The default domain isn't needed here.
> >>> +		 */
> >>
> >> The iommu core creates one domain per iommu-group. In your case this
> >> means one default domain per iommu in the system.
> >
> > Yes. The iommu core will create one domain per iommu-group.
> > see the next "if" here.
> >
> > But the domain here is created by the current DMA64. It's from this
> > function do_iommu_attach which will be called too early and will help
> > create a default domain for each a iommu device.(my codebase is
> > v4.4-rc1).
> 
> I still don't really understand the problem here. The M4U device is 
> created early in mtk_iommu_init_fn, so it should be probed and ready to 
> go long before anything else. Indeed, for your mtk_iommu_device_group() 
> callback to work then everything must already be happening in the right 
> order - add_device will only call iommu_group_get_for_dev() if of_xlate 
> has populated dev->archdata.iommu, and of_xlate will only do that if the 
> M4U was up and running before the client device started probing. 
> Furthermore, if mtk_iommu_device_group() *does* work, then the IOMMU 
> core will go ahead and allocate the default domain there and then, which 
> the arch code should find and use later.

Thanks. This is very helpful.

I understand your confuse right now and your expectant flow.

Our IOMMU probe was PROBE_DEFER by our SMI device, so currently it probe
was delayed, then have to add the workaround code.

Following your comment above, I test as below. Then the flows seems meet
the "best case" that the iommu core will help create default DMA domain.

@@ -664,19 +636,41 @@ static int mtk_iommu_probe(struct platform_device
*pdev)
for (i = 0; i < larb_nr; i++) {
		struct device_node *larbnode;
		struct platform_device *plarbdev;

		larbnode = of_parse_phandle(dev->of_node, "mediatek,larbs", i);
		if (!larbnode)
			return -EINVAL;

                plarbdev = of_find_device_by_node(larbnode);
                of_node_put(larbnode);
-               if (!plarbdev)
-                       return -EPROBE_DEFER;
+               if (!plarbdev) {
+                       plarbdev = of_platform_device_create(larbnode,
NULL, platform_bus_type.dev_root);
+                       if (IS_ERR(pdev))
+                               return -EPROBE_DEFER;
+               }
}

I only add of_platform_device_create for the SMI local arbiter devices
here.

This is a big improvement for us. If this is ok, I will send a quick
next version for this.

> 
> The potential issue I *do* see, looking more closely, is that 
> iommu_group_get_for_dev() is setting group->domain but not calling the 
> attach_dev callback, which looks wrong...

This is the backtrace,

(151216_09:58:05.207)Call trace:
(151216_09:58:05.207)[<ffffffc000400668>] mtk_iommu_attach_device
+0xb8/0x178
(151216_09:58:05.207)[<ffffffc0003fc55c>] iommu_group_add_device
+0x1d8/0x31c
(151216_09:58:05.207)[<ffffffc0003fc988>] iommu_group_get_for_dev
+0x88/0x108
(151216_09:58:05.207)[<ffffffc0003ffcfc>] mtk_iommu_add_device+0x14/0x34
(151216_09:58:05.207)[<ffffffc0003fb280>] add_iommu_group+0x20/0x44
(151216_09:58:05.207)[<ffffffc000406cec>] bus_for_each_dev+0x58/0x98
(151216_09:58:05.207)[<ffffffc0003fbe8c>] bus_set_iommu+0x9c/0xf8

If I change like above, I will delete the workaround code..

> 
> >
> > //=====the next "if"===========
> > } else if (!data->m4u_dom) {
> > 	/*
> > 	 * While a device is added into a iommu group, the iommu core
> > 	 * will create a default domain for each a iommu group.
> > 	 * This default domain is reserved as the m4u domain and is
> > 	 * initiated here.
> > 	 */
> > 	data->m4u_dom = dom;
> > 	if (domain->type == IOMMU_DOMAIN_DMA) {
> > 		ret = iommu_dma_init_domain(domain, 0,
> > 					    DMA_BIT_MASK(32));
> > 		if (ret)
> > 			goto err_uninit_dom;
> > 	}
> >
> > 	ret = mtk_iommu_domain_finalise(data);
> > 	if (ret)
> > 		goto err_uninit_dom;
> > }
> > //======================
> >
> >>
> >>> +		iommu_domain_free(domain);
> >>
> >> This function is not supposed to free the domain passed to it.
> >
> > As above this domain is created in the do_iommu_attach which will help
> > create a default domain for each a iommu device.
> > We don't need this default domain!
> >
> > If we don't free it here, there will be a memory leak.
> >
> >  From Robin's comment, He will improve the sequence of the
> > __iommu_setup_dma_ops in the future.
> 
> That already happened. The final version of the arm64 code which was 
> merged makes sure that the IOMMU driver always sees the callbacks in the 
> desired of_xlate -> add_device -> attach_dev order. The whole point of 
> the comment below is that the driver itself *doesn't* have to care about 
> the awkward way in which that is currently achieved.
> 
> > /*
> >   * TODO: Right now __iommu_setup_dma_ops() gets called too early to do
> >   * everything it needs to - the device is only partially created and the
> >   * IOMMU driver hasn't seen it yet, so it can't have a group. Thus we
> >   * need this delayed attachment dance. Once IOMMU probe ordering is
> > sorted
> >   * to move the arch_setup_dma_ops() call later, all the notifier bits
> > below
> >   * become unnecessary, and will go away.
> >   */
> >
> > /*
> >   * Best case: The device is either part of a group which was
> >   * already attached to a domain in a previous call, or it's
> >   * been put in a default DMA domain by the IOMMU core.
> >   */
> 
> That was before Joerg made the device_group changes which enabled proper 
> default domains for platform devices - with those, we should be now be 
> hitting the "best case" behaviour every time. In fact I think the "fake 
> default domain" workaround shouldn't be needed at all any more, so I 
> will add removing it to my giant list of things to do.
> 
> >     But there is no this patch currently, so I add iommu_domain_free
> > here.
> >
> >     "free the domain" here looks really not good. Then I delete the
> > iommu_domain_free here(allow this memory leak right now), is it ok?
> > (It will also works after Robin's change in the future.)
> >
> >>
> >>> +static int mtk_iommu_add_device(struct device *dev)
> >>> +{
> >>> +	struct iommu_group *group;
> >>> +
> >>> +	if (!dev->archdata.iommu) /* Not a iommu client device */
> >>> +		return -ENODEV;
> >>> +
> >>> +	group = iommu_group_get_for_dev(dev);
> >>> +	if (IS_ERR(group))
> >>> +		return PTR_ERR(group);
> >>> +
> >>> +	iommu_group_put(group);
> >>> +	return 0;
> >>> +}
> >>
> >> [...]
> >>
> >>> +static struct iommu_group *mtk_iommu_device_group(struct device *dev)
> >>> +{
> >>> +	struct mtk_iommu_data *data;
> >>> +	struct mtk_iommu_client_priv *priv;
> >>> +
> >>> +	priv = dev->archdata.iommu;
> >>> +	if (!priv)
> >>> +		return ERR_PTR(-ENODEV);
> >>> +
> >>> +	/* All the client devices are in the same m4u iommu-group */
> >>> +	data = dev_get_drvdata(priv->m4udev);
> >>> +	if (!data->m4u_group) {
> >>> +		data->m4u_group = iommu_group_alloc();
> >>> +		if (IS_ERR(data->m4u_group))
> >>> +			dev_err(dev, "Failed to allocate M4U IOMMU group\n");
> >>> +	}
> >>> +	return data->m4u_group;
> >>> +}
> 
> As long as this works as expected, then AFAICS you shouldn't have to 
> have *any* special-case behaviour or tracking of domains at all.

We only need one iommu-group, one iommu domain here.

What's the special-case behavior, how can we track of domains.
Could you help give me a example?

> 
> Robin.
Robin Murphy Dec. 16, 2015, 12:48 p.m. UTC | #8
On 16/12/15 05:59, Yong Wu wrote:
> On Tue, 2015-12-15 at 12:37 +0000, Robin Murphy wrote:
>> On 15/12/15 03:28, Yong Wu wrote:
>>> On Mon, 2015-12-14 at 15:16 +0100, Joerg Roedel wrote:
>>>> On Tue, Dec 08, 2015 at 05:49:12PM +0800, Yong Wu wrote:
>>>>> +static int mtk_iommu_attach_device(struct iommu_domain *domain,
>>>>> +				   struct device *dev)
>>>>> +{
>>>>> +	struct mtk_iommu_domain *dom = to_mtk_domain(domain);
>>>>> +	struct mtk_iommu_client_priv *priv = dev->archdata.iommu;
>>>>> +	struct mtk_iommu_data *data;
>>>>> +	int ret;
>>>>> +
>>>>> +	if (!priv)
>>>>> +		return -ENODEV;
>>>>> +
>>>>> +	data = dev_get_drvdata(priv->m4udev);
>>>>> +	if (!data) {
>>>>> +		/*
>>>>> +		 * The DMA core will run earlier than this probe, and it will
>>>>> +		 * create a default iommu domain for each a iommu device.
>>>>> +		 * But here there is only one domain called the m4u domain
>>>>> +		 * which all the multimedia HW share.
>>>>> +		 * The default domain isn't needed here.
>>>>> +		 */
>>>>
>>>> The iommu core creates one domain per iommu-group. In your case this
>>>> means one default domain per iommu in the system.
>>>
>>> Yes. The iommu core will create one domain per iommu-group.
>>> see the next "if" here.
>>>
>>> But the domain here is created by the current DMA64. It's from this
>>> function do_iommu_attach which will be called too early and will help
>>> create a default domain for each a iommu device.(my codebase is
>>> v4.4-rc1).
>>
>> I still don't really understand the problem here. The M4U device is
>> created early in mtk_iommu_init_fn, so it should be probed and ready to
>> go long before anything else. Indeed, for your mtk_iommu_device_group()
>> callback to work then everything must already be happening in the right
>> order - add_device will only call iommu_group_get_for_dev() if of_xlate
>> has populated dev->archdata.iommu, and of_xlate will only do that if the
>> M4U was up and running before the client device started probing.
>> Furthermore, if mtk_iommu_device_group() *does* work, then the IOMMU
>> core will go ahead and allocate the default domain there and then, which
>> the arch code should find and use later.
>
> Thanks. This is very helpful.
>
> I understand your confuse right now and your expectant flow.
>
> Our IOMMU probe was PROBE_DEFER by our SMI device, so currently it probe
> was delayed, then have to add the workaround code.

Aha, I hadn't twigged that there was a dependency on another device that 
could delay the M4U probe, thanks for the clarification. That'll be a 
good case to bear in mind when I eventually get back to the IOMMU probe 
deferral stuff.

> Following your comment above, I test as below. Then the flows seems meet
> the "best case" that the iommu core will help create default DMA domain.
>
> @@ -664,19 +636,41 @@ static int mtk_iommu_probe(struct platform_device
> *pdev)
> for (i = 0; i < larb_nr; i++) {
> 		struct device_node *larbnode;
> 		struct platform_device *plarbdev;
>
> 		larbnode = of_parse_phandle(dev->of_node, "mediatek,larbs", i);
> 		if (!larbnode)
> 			return -EINVAL;
>
>                  plarbdev = of_find_device_by_node(larbnode);
>                  of_node_put(larbnode);
> -               if (!plarbdev)
> -                       return -EPROBE_DEFER;
> +               if (!plarbdev) {
> +                       plarbdev = of_platform_device_create(larbnode,
> NULL, platform_bus_type.dev_root);
> +                       if (IS_ERR(pdev))
> +                               return -EPROBE_DEFER;
> +               }
> }
>
> I only add of_platform_device_create for the SMI local arbiter devices
> here.
>
> This is a big improvement for us. If this is ok, I will send a quick
> next version for this.

In my opinion it's reasonable - we need the whole "IOMMU" to be ready, 
so if we already have to short-cut the creation of the M4U part it only 
seems fair to do the same for the SMI part. That said, would it work to 
just unconditionally poke the larbs in mtk_iommu_init_fn() before you 
poke the M4U itself? It would be nice to keep all that stuff together in 
the same place.

>> The potential issue I *do* see, looking more closely, is that
>> iommu_group_get_for_dev() is setting group->domain but not calling the
>> attach_dev callback, which looks wrong...
>
> This is the backtrace,
>
> (151216_09:58:05.207)Call trace:
> (151216_09:58:05.207)[<ffffffc000400668>] mtk_iommu_attach_device
> +0xb8/0x178
> (151216_09:58:05.207)[<ffffffc0003fc55c>] iommu_group_add_device
> +0x1d8/0x31c
> (151216_09:58:05.207)[<ffffffc0003fc988>] iommu_group_get_for_dev
> +0x88/0x108
> (151216_09:58:05.207)[<ffffffc0003ffcfc>] mtk_iommu_add_device+0x14/0x34
> (151216_09:58:05.207)[<ffffffc0003fb280>] add_iommu_group+0x20/0x44
> (151216_09:58:05.207)[<ffffffc000406cec>] bus_for_each_dev+0x58/0x98
> (151216_09:58:05.207)[<ffffffc0003fbe8c>] bus_set_iommu+0x9c/0xf8

Urgh, now I recall that this isn't even the first time I've been 
confused by the attach being hidden elsewhere. Oh well, problem averted!

> If I change like above, I will delete the workaround code..
>
>>
>>>
>>> //=====the next "if"===========
>>> } else if (!data->m4u_dom) {
>>> 	/*
>>> 	 * While a device is added into a iommu group, the iommu core
>>> 	 * will create a default domain for each a iommu group.
>>> 	 * This default domain is reserved as the m4u domain and is
>>> 	 * initiated here.
>>> 	 */
>>> 	data->m4u_dom = dom;
>>> 	if (domain->type == IOMMU_DOMAIN_DMA) {
>>> 		ret = iommu_dma_init_domain(domain, 0,
>>> 					    DMA_BIT_MASK(32));
>>> 		if (ret)
>>> 			goto err_uninit_dom;
>>> 	}
>>>
>>> 	ret = mtk_iommu_domain_finalise(data);
>>> 	if (ret)
>>> 		goto err_uninit_dom;
>>> }
>>> //======================
>>>
>>>>
>>>>> +		iommu_domain_free(domain);
>>>>
>>>> This function is not supposed to free the domain passed to it.
>>>
>>> As above this domain is created in the do_iommu_attach which will help
>>> create a default domain for each a iommu device.
>>> We don't need this default domain!
>>>
>>> If we don't free it here, there will be a memory leak.
>>>
>>>   From Robin's comment, He will improve the sequence of the
>>> __iommu_setup_dma_ops in the future.
>>
>> That already happened. The final version of the arm64 code which was
>> merged makes sure that the IOMMU driver always sees the callbacks in the
>> desired of_xlate -> add_device -> attach_dev order. The whole point of
>> the comment below is that the driver itself *doesn't* have to care about
>> the awkward way in which that is currently achieved.
>>
>>> /*
>>>    * TODO: Right now __iommu_setup_dma_ops() gets called too early to do
>>>    * everything it needs to - the device is only partially created and the
>>>    * IOMMU driver hasn't seen it yet, so it can't have a group. Thus we
>>>    * need this delayed attachment dance. Once IOMMU probe ordering is
>>> sorted
>>>    * to move the arch_setup_dma_ops() call later, all the notifier bits
>>> below
>>>    * become unnecessary, and will go away.
>>>    */
>>>
>>> /*
>>>    * Best case: The device is either part of a group which was
>>>    * already attached to a domain in a previous call, or it's
>>>    * been put in a default DMA domain by the IOMMU core.
>>>    */
>>
>> That was before Joerg made the device_group changes which enabled proper
>> default domains for platform devices - with those, we should be now be
>> hitting the "best case" behaviour every time. In fact I think the "fake
>> default domain" workaround shouldn't be needed at all any more, so I
>> will add removing it to my giant list of things to do.
>>
>>>      But there is no this patch currently, so I add iommu_domain_free
>>> here.
>>>
>>>      "free the domain" here looks really not good. Then I delete the
>>> iommu_domain_free here(allow this memory leak right now), is it ok?
>>> (It will also works after Robin's change in the future.)
>>>
>>>>
>>>>> +static int mtk_iommu_add_device(struct device *dev)
>>>>> +{
>>>>> +	struct iommu_group *group;
>>>>> +
>>>>> +	if (!dev->archdata.iommu) /* Not a iommu client device */
>>>>> +		return -ENODEV;
>>>>> +
>>>>> +	group = iommu_group_get_for_dev(dev);
>>>>> +	if (IS_ERR(group))
>>>>> +		return PTR_ERR(group);
>>>>> +
>>>>> +	iommu_group_put(group);
>>>>> +	return 0;
>>>>> +}
>>>>
>>>> [...]
>>>>
>>>>> +static struct iommu_group *mtk_iommu_device_group(struct device *dev)
>>>>> +{
>>>>> +	struct mtk_iommu_data *data;
>>>>> +	struct mtk_iommu_client_priv *priv;
>>>>> +
>>>>> +	priv = dev->archdata.iommu;
>>>>> +	if (!priv)
>>>>> +		return ERR_PTR(-ENODEV);
>>>>> +
>>>>> +	/* All the client devices are in the same m4u iommu-group */
>>>>> +	data = dev_get_drvdata(priv->m4udev);
>>>>> +	if (!data->m4u_group) {
>>>>> +		data->m4u_group = iommu_group_alloc();
>>>>> +		if (IS_ERR(data->m4u_group))
>>>>> +			dev_err(dev, "Failed to allocate M4U IOMMU group\n");
>>>>> +	}
>>>>> +	return data->m4u_group;
>>>>> +}
>>
>> As long as this works as expected, then AFAICS you shouldn't have to
>> have *any* special-case behaviour or tracking of domains at all.
>
> We only need one iommu-group, one iommu domain here.
>
> What's the special-case behavior, how can we track of domains.
> Could you help give me a example?

The beauty of it is that you don't need to. If you guarantee all of an 
IOMMU's client devices are in the same group, you know you've only got 
one thing which can be attached to that IOMMU's domains. Therefore, you 
can freely allow as many domains as you like to *exist*, because there 
can never be more than one *active* at any given time - the core code 
enforces that the group is detached from one domain before being 
attached to another, and the driver's attach and detach calls just 
become responsible for switching the given domain's page table in and 
out of the actual hardware. I think it's pretty neat.

Robin.
Joerg Roedel Dec. 16, 2015, 3:15 p.m. UTC | #9
On Tue, Dec 15, 2015 at 12:37:34PM +0000, Robin Murphy wrote:
> The potential issue I *do* see, looking more closely, is that
> iommu_group_get_for_dev() is setting group->domain but not calling
> the attach_dev callback, which looks wrong...

Attaching the device happens from iommu_group_add_device(), which is
called from iommu_group_get_for_dev() right after the domains are
allocated/initialized.


	Joerg
Yong Wu (吴勇) Dec. 17, 2015, 3:12 a.m. UTC | #10
On Wed, 2015-12-16 at 12:48 +0000, Robin Murphy wrote:
> On 16/12/15 05:59, Yong Wu wrote:
> > On Tue, 2015-12-15 at 12:37 +0000, Robin Murphy wrote:
> >> On 15/12/15 03:28, Yong Wu wrote:
> >>> On Mon, 2015-12-14 at 15:16 +0100, Joerg Roedel wrote:
> >>>> On Tue, Dec 08, 2015 at 05:49:12PM +0800, Yong Wu wrote:
[...]
> > Following your comment above, I test as below. Then the flows seems meet
> > the "best case" that the iommu core will help create default DMA domain.
> >
> > @@ -664,19 +636,41 @@ static int mtk_iommu_probe(struct platform_device
> > *pdev)
> > for (i = 0; i < larb_nr; i++) {
> > 		struct device_node *larbnode;
> > 		struct platform_device *plarbdev;
> >
> > 		larbnode = of_parse_phandle(dev->of_node, "mediatek,larbs", i);
> > 		if (!larbnode)
> > 			return -EINVAL;
> >
> >                  plarbdev = of_find_device_by_node(larbnode);
> >                  of_node_put(larbnode);
> > -               if (!plarbdev)
> > -                       return -EPROBE_DEFER;
> > +               if (!plarbdev) {
> > +                       plarbdev = of_platform_device_create(larbnode,
> > NULL, platform_bus_type.dev_root);
> > +                       if (IS_ERR(pdev))
> > +                               return -EPROBE_DEFER;
> > +               }
> > }
> >
> > I only add of_platform_device_create for the SMI local arbiter devices
> > here.
> >
> > This is a big improvement for us. If this is ok, I will send a quick
> > next version for this.
> 
> In my opinion it's reasonable - we need the whole "IOMMU" to be ready, 

  Thanks.

> so if we already have to short-cut the creation of the M4U part it only 
> seems fair to do the same for the SMI part. That said, would it work to 
> just unconditionally poke the larbs in mtk_iommu_init_fn() before you 
> poke the M4U itself? It would be nice to keep all that stuff together in 
> the same place.

    mtk_iommu_init_fn don't have the larb's "struct device_node". So I
cann't create its platform_device directly.

    I have tried 2 method:
   a) add a mtk_smi_larb_init_fn in the SMI patch.

static int mtk_smi_larb_init_fn(struct device_node *np)
{
	struct platform_device *pdev;

	pdev = of_platform_device_create(np, NULL, platform_bus_type.dev_root);

	return IS_ERR(pdev) ? PTR_ERR(pdev) :  0;
}
IOMMU_OF_DECLARE(mtk_smi_larb, "mediatek,mt8173-smi-larb",
mtk_smi_larb_init_fn);

   This don't work. It will run after mtk_iommu_init_fn. then the larb's
platform_device also don't exist while m4u's probe.

  b) Copy the code below to mtk_iommu_init_fn.

   for (i = 0; i < larb_nr; i++) {
      xxx
      plarbdev = of_platform_device_create(larbnode,
                      NULL, platform_bus_type.dev_root);
   }

   It works. But then there are 2 same code of parsing the SMI local
arbiter(one is in mtk_iommu_init_fn, the other is in mtk_iommu_init_fn).
It looks not good. I think that the one I wrote in the previous mail is
better, It only add 3 lines, What's your opinion?

> >>>>> +static struct iommu_group *mtk_iommu_device_group(struct device *dev)
> >>>>> +{
> >>>>> +	struct mtk_iommu_data *data;
> >>>>> +	struct mtk_iommu_client_priv *priv;
> >>>>> +
> >>>>> +	priv = dev->archdata.iommu;
> >>>>> +	if (!priv)
> >>>>> +		return ERR_PTR(-ENODEV);
> >>>>> +
> >>>>> +	/* All the client devices are in the same m4u iommu-group */
> >>>>> +	data = dev_get_drvdata(priv->m4udev);
> >>>>> +	if (!data->m4u_group) {
> >>>>> +		data->m4u_group = iommu_group_alloc();
> >>>>> +		if (IS_ERR(data->m4u_group))
> >>>>> +			dev_err(dev, "Failed to allocate M4U IOMMU group\n");
> >>>>> +	}
> >>>>> +	return data->m4u_group;
> >>>>> +}
> >>
> >> As long as this works as expected, then AFAICS you shouldn't have to
> >> have *any* special-case behaviour or tracking of domains at all.
> >
> > We only need one iommu-group, one iommu domain here.
> >
> > What's the special-case behavior, how can we track of domains.
> > Could you help give me a example?
> 
> The beauty of it is that you don't need to. If you guarantee all of an 
> IOMMU's client devices are in the same group, you know you've only got 
> one thing which can be attached to that IOMMU's domains. Therefore, you 
> can freely allow as many domains as you like to *exist*, because there 
> can never be more than one *active* at any given time - the core code 
> enforces that the group is detached from one domain before being 
> attached to another, and the driver's attach and detach calls just 
> become responsible for switching the given domain's page table in and 
> out of the actual hardware. I think it's pretty neat.

It seems that mtk-iommu can not detach/attach dynamically. the iommu
core don't support iommu_detach_device/iommu_attach_device whose
iommu-group have many devices.(Normally there is only one device in a
iommu-group). 

So currently we only iommu_attach_device while probe, it will never
attach/detach again.

All our multimedia modules are in the m4u'domain and share m4u's
pagetable, They won't change pagetable.

> 
> Robin.
diff mbox

Patch

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index b9094e9..aab942f 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -393,4 +393,19 @@  config S390_IOMMU
 	help
 	  Support for the IOMMU API for s390 PCI devices.
 
+config MTK_IOMMU
+	bool "MTK IOMMU Support"
+	depends on ARCH_MEDIATEK || COMPILE_TEST
+	select IOMMU_API
+	select IOMMU_DMA
+	select IOMMU_IO_PGTABLE_ARMV7S
+	select MEMORY
+	select MTK_SMI
+	help
+	  Support for the M4U on certain Mediatek SOCs. M4U is MultiMedia
+	  Memory Management Unit. This option enables remapping of DMA memory
+	  accesses for the multimedia subsystem.
+
+	  If unsure, say N here.
+
 endif # IOMMU_SUPPORT
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 68faca02..02887bc 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -22,6 +22,7 @@  obj-$(CONFIG_ROCKCHIP_IOMMU) += rockchip-iommu.o
 obj-$(CONFIG_TEGRA_IOMMU_GART) += tegra-gart.o
 obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o
 obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o
+obj-$(CONFIG_MTK_IOMMU) += mtk_iommu.o
 obj-$(CONFIG_SHMOBILE_IOMMU) += shmobile-iommu.o
 obj-$(CONFIG_SHMOBILE_IPMMU) += shmobile-ipmmu.o
 obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
new file mode 100644
index 0000000..6a21e70
--- /dev/null
+++ b/drivers/iommu/mtk_iommu.c
@@ -0,0 +1,752 @@ 
+/*
+ * Copyright (c) 2014-2015 MediaTek Inc.
+ * Author: Yong Wu <yong.wu@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include <linux/clk.h>
+#include <linux/component.h>
+#include <linux/dma-iommu.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iommu.h>
+#include <linux/iopoll.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_iommu.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <soc/mediatek/smi.h>
+#include <dt-bindings/memory/mt8173-larb-port.h>
+
+#include "io-pgtable.h"
+
+#define REG_MMU_PT_BASE_ADDR			0x000
+
+#define REG_MMU_INVALIDATE			0x020
+#define F_ALL_INVLD				0x2
+#define F_MMU_INV_RANGE				0x1
+
+#define REG_MMU_INVLD_START_A			0x024
+#define REG_MMU_INVLD_END_A			0x028
+
+#define REG_MMU_INV_SEL				0x038
+#define F_INVLD_EN0				BIT(0)
+#define F_INVLD_EN1				BIT(1)
+
+#define REG_MMU_STANDARD_AXI_MODE		0x048
+#define REG_MMU_DCM_DIS				0x050
+
+#define REG_MMU_CTRL_REG			0x110
+#define F_MMU_PREFETCH_RT_REPLACE_MOD		BIT(4)
+#define F_MMU_TF_PROTECT_SEL(prot)		(((prot) & 0x3) << 5)
+#define F_COHERENCE_EN				BIT(8)
+
+#define REG_MMU_IVRP_PADDR			0x114
+#define F_MMU_IVRP_PA_SET(pa)			((pa) >> 1)
+
+#define REG_MMU_INT_CONTROL0			0x120
+#define F_L2_MULIT_HIT_EN			BIT(0)
+#define F_TABLE_WALK_FAULT_INT_EN		BIT(1)
+#define F_PREETCH_FIFO_OVERFLOW_INT_EN		BIT(2)
+#define F_MISS_FIFO_OVERFLOW_INT_EN		BIT(3)
+#define F_PREFETCH_FIFO_ERR_INT_EN		BIT(5)
+#define F_MISS_FIFO_ERR_INT_EN			BIT(6)
+#define F_INT_CLR_BIT				BIT(12)
+
+#define REG_MMU_INT_MAIN_CONTROL		0x124
+#define F_INT_TRANSLATION_FAULT			BIT(0)
+#define F_INT_MAIN_MULTI_HIT_FAULT		BIT(1)
+#define F_INT_INVALID_PA_FAULT			BIT(2)
+#define F_INT_ENTRY_REPLACEMENT_FAULT		BIT(3)
+#define F_INT_TLB_MISS_FAULT			BIT(4)
+#define F_INT_MISS_TRANSATION_FIFO_FAULT	BIT(5)
+#define F_INT_PRETETCH_TRANSATION_FIFO_FAULT	BIT(6)
+
+#define REG_MMU_CPE_DONE			0x12C
+
+#define REG_MMU_FAULT_ST1			0x134
+
+#define REG_MMU_FAULT_VA			0x13c
+#define F_MMU_FAULT_VA_MSK			0xfffff000
+#define F_MMU_FAULT_VA_WRITE_BIT		BIT(1)
+#define F_MMU_FAULT_VA_LAYER_BIT		BIT(0)
+
+#define REG_MMU_INVLD_PA			0x140
+#define REG_MMU_INT_ID				0x150
+#define F_MMU0_INT_ID_LARB_ID(a)		(((a) >> 7) & 0x7)
+#define F_MMU0_INT_ID_PORT_ID(a)		(((a) >> 2) & 0x1f)
+
+#define MTK_PROTECT_PA_ALIGN			128
+#define MTK_IOMMU_LARB_MAX_NR			8
+
+struct mtk_iommu_suspend_reg {
+	u32				standard_axi_mode;
+	u32				dcm_dis;
+	u32				ctrl_reg;
+	u32				int_control0;
+	u32				int_main_control;
+};
+
+struct mtk_iommu_client_priv {
+	struct list_head		client;
+	unsigned int			mtk_m4u_id;
+	struct device			*m4udev;
+};
+
+struct mtk_iommu_domain {
+	spinlock_t			pgtlock; /* lock for page table */
+
+	struct io_pgtable_cfg		cfg;
+	struct io_pgtable_ops		*iop;
+
+	struct iommu_domain		domain;
+};
+
+struct mtk_iommu_data {
+	void __iomem			*base;
+	int				irq;
+	struct device			*dev;
+	struct device			*larbdev[MTK_IOMMU_LARB_MAX_NR];
+	struct clk			*bclk;
+	phys_addr_t			protect_base; /* protect memory base */
+	int				larb_nr;      /* local arbiter number */
+	struct mtk_iommu_suspend_reg	reg;
+	struct mtk_iommu_domain		*m4u_dom;
+	struct iommu_group		*m4u_group;
+};
+
+static struct iommu_ops mtk_iommu_ops;
+
+static struct mtk_iommu_domain *to_mtk_domain(struct iommu_domain *dom)
+{
+	return container_of(dom, struct mtk_iommu_domain, domain);
+}
+
+static void mtk_iommu_tlb_flush_all(void *cookie)
+{
+	struct mtk_iommu_data *data = cookie;
+
+	writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, data->base + REG_MMU_INV_SEL);
+	writel_relaxed(F_ALL_INVLD, data->base + REG_MMU_INVALIDATE);
+	wmb(); /* Make sure the tlb flush all done */
+}
+
+static void mtk_iommu_tlb_add_flush_nosync(unsigned long iova, size_t size,
+					   size_t granule, bool leaf,
+					   void *cookie)
+{
+	struct mtk_iommu_data *data = cookie;
+
+	writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, data->base + REG_MMU_INV_SEL);
+
+	writel_relaxed(iova, data->base + REG_MMU_INVLD_START_A);
+	writel_relaxed(iova + size - 1, data->base + REG_MMU_INVLD_END_A);
+	writel_relaxed(F_MMU_INV_RANGE, data->base + REG_MMU_INVALIDATE);
+}
+
+static void mtk_iommu_tlb_sync(void *cookie)
+{
+	struct mtk_iommu_data *data = cookie;
+	int ret;
+	u32 tmp;
+
+	ret = readl_poll_timeout_atomic(data->base + REG_MMU_CPE_DONE, tmp,
+					tmp != 0, 10, 100000);
+	if (ret) {
+		dev_warn(data->dev,
+			 "Partial TLB flush timed out, falling back to full flush\n");
+		mtk_iommu_tlb_flush_all(cookie);
+	}
+	/* Clear the CPE status */
+	writel_relaxed(0, data->base + REG_MMU_CPE_DONE);
+}
+
+static const struct iommu_gather_ops mtk_iommu_gather_ops = {
+	.tlb_flush_all = mtk_iommu_tlb_flush_all,
+	.tlb_add_flush = mtk_iommu_tlb_add_flush_nosync,
+	.tlb_sync = mtk_iommu_tlb_sync,
+};
+
+static irqreturn_t mtk_iommu_isr(int irq, void *dev_id)
+{
+	struct mtk_iommu_data *data = dev_id;
+	struct mtk_iommu_domain *dom = data->m4u_dom;
+	u32 int_state, regval, fault_iova, fault_pa;
+	unsigned int fault_larb, fault_port;
+	bool layer, write;
+
+	/* Read error info from registers */
+	int_state = readl_relaxed(data->base + REG_MMU_FAULT_ST1);
+	fault_iova = readl_relaxed(data->base + REG_MMU_FAULT_VA);
+	layer = fault_iova & F_MMU_FAULT_VA_LAYER_BIT;
+	write = fault_iova & F_MMU_FAULT_VA_WRITE_BIT;
+	fault_iova &= F_MMU_FAULT_VA_MSK;
+	fault_pa = readl_relaxed(data->base + REG_MMU_INVLD_PA);
+	regval = readl_relaxed(data->base + REG_MMU_INT_ID);
+	fault_larb = F_MMU0_INT_ID_LARB_ID(regval);
+	fault_port = F_MMU0_INT_ID_PORT_ID(regval);
+
+	if (report_iommu_fault(&dom->domain, data->dev, fault_iova,
+			       write ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ)) {
+		dev_err_ratelimited(
+			data->dev,
+			"fault type=0x%x iova=0x%x pa=0x%x larb=%d port=%d layer=%d %s\n",
+			int_state, fault_iova, fault_pa, fault_larb, fault_port,
+			layer, write ? "write" : "read");
+	}
+
+	/* Interrupt clear */
+	regval = readl_relaxed(data->base + REG_MMU_INT_CONTROL0);
+	regval |= F_INT_CLR_BIT;
+	writel_relaxed(regval, data->base + REG_MMU_INT_CONTROL0);
+
+	mtk_iommu_tlb_flush_all(data);
+
+	return IRQ_HANDLED;
+}
+
+static void mtk_iommu_config(struct mtk_iommu_data *data,
+			     struct device *dev, bool enable)
+{
+	struct mtk_iommu_client_priv *head, *cur, *next;
+
+	head = dev->archdata.iommu;
+	list_for_each_entry_safe(cur, next, &head->client, client) {
+		mtk_smi_config_port(
+			data->larbdev[MTK_M4U_TO_LARB(cur->mtk_m4u_id)],
+			MTK_M4U_TO_PORT(cur->mtk_m4u_id), enable);
+	}
+}
+
+static int mtk_iommu_domain_finalise(struct mtk_iommu_data *data)
+{
+	struct mtk_iommu_domain *dom = data->m4u_dom;
+
+	spin_lock_init(&dom->pgtlock);
+
+	dom->cfg = (struct io_pgtable_cfg) {
+		.quirks = IO_PGTABLE_QUIRK_ARM_NS |
+			IO_PGTABLE_QUIRK_NO_PERMS |
+			IO_PGTABLE_QUIRK_TLBI_ON_MAP,
+		.pgsize_bitmap = mtk_iommu_ops.pgsize_bitmap,
+		.ias = 32,
+		.oas = 32,
+		.tlb = &mtk_iommu_gather_ops,
+		.iommu_dev = data->dev,
+	};
+
+	dom->iop = alloc_io_pgtable_ops(ARM_V7S, &dom->cfg, data);
+	if (!dom->iop) {
+		dev_err(data->dev, "Failed to alloc io pgtable\n");
+		return -EINVAL;
+	}
+
+	/* Update our support page sizes bitmap */
+	mtk_iommu_ops.pgsize_bitmap = dom->cfg.pgsize_bitmap;
+
+	writel_relaxed(data->m4u_dom->cfg.arm_v7s_cfg.ttbr[0],
+		       data->base + REG_MMU_PT_BASE_ADDR);
+	return 0;
+}
+
+static struct iommu_domain *mtk_iommu_domain_alloc(unsigned type)
+{
+	struct mtk_iommu_domain *dom;
+
+	if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
+		return NULL;
+
+	dom = kzalloc(sizeof(*dom), GFP_KERNEL);
+	if (!dom)
+		return NULL;
+
+	if (type == IOMMU_DOMAIN_DMA && iommu_get_dma_cookie(&dom->domain)) {
+		kfree(dom);
+		return NULL;
+	}
+
+	dom->domain.geometry.aperture_start = 0;
+	dom->domain.geometry.aperture_end = DMA_BIT_MASK(32);
+	dom->domain.geometry.force_aperture = true;
+
+	return &dom->domain;
+}
+
+static void mtk_iommu_domain_free(struct iommu_domain *domain)
+{
+	if (domain->type == IOMMU_DOMAIN_DMA)
+		iommu_put_dma_cookie(domain);
+	kfree(to_mtk_domain(domain));
+}
+
+static int mtk_iommu_attach_device(struct iommu_domain *domain,
+				   struct device *dev)
+{
+	struct mtk_iommu_domain *dom = to_mtk_domain(domain);
+	struct mtk_iommu_client_priv *priv = dev->archdata.iommu;
+	struct mtk_iommu_data *data;
+	int ret;
+
+	if (!priv)
+		return -ENODEV;
+
+	data = dev_get_drvdata(priv->m4udev);
+	if (!data) {
+		/*
+		 * The DMA core will run earlier than this probe, and it will
+		 * create a default iommu domain for each a iommu device.
+		 * But here there is only one domain called the m4u domain
+		 * which all the multimedia HW share.
+		 * The default domain isn't needed here.
+		 */
+		iommu_domain_free(domain);
+
+		/* Return 0 for DMA to update dev->archdata.dma_ops */
+		return 0;
+	} else if (!data->m4u_dom) {
+		/*
+		 * While a device is added into a iommu group, the iommu core
+		 * will create a default domain for each a iommu group.
+		 * This default domain is reserved as the m4u domain and is
+		 * initiated here.
+		 */
+		data->m4u_dom = dom;
+
+		if (domain->type == IOMMU_DOMAIN_DMA) {
+			ret = iommu_dma_init_domain(domain, 0,
+						    DMA_BIT_MASK(32));
+			if (ret)
+				goto err_uninit_dom;
+		}
+
+		ret = mtk_iommu_domain_finalise(data);
+		if (ret)
+			goto err_uninit_dom;
+	} else if (data->m4u_dom != dom) {
+		/* All the client devices should be in the same m4u domain */
+		dev_err(dev, "try to attach into the error iommu domain\n");
+		return -EPERM;
+	}
+
+	mtk_iommu_config(data, dev, true);
+	return 0;
+
+err_uninit_dom:
+	data->m4u_dom = NULL;
+	return ret;
+}
+
+static void mtk_iommu_detach_device(struct iommu_domain *domain,
+				    struct device *dev)
+{
+	struct mtk_iommu_client_priv *priv = dev->archdata.iommu;
+	struct mtk_iommu_data *data;
+
+	if (!priv)
+		return;
+
+	data = dev_get_drvdata(priv->m4udev);
+	if (data)
+		mtk_iommu_config(data, dev, false);
+}
+
+static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova,
+			 phys_addr_t paddr, size_t size, int prot)
+{
+	struct mtk_iommu_domain *dom = to_mtk_domain(domain);
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&dom->pgtlock, flags);
+	ret = dom->iop->map(dom->iop, iova, paddr, size, prot);
+	spin_unlock_irqrestore(&dom->pgtlock, flags);
+
+	return ret;
+}
+
+static size_t mtk_iommu_unmap(struct iommu_domain *domain,
+			      unsigned long iova, size_t size)
+{
+	struct mtk_iommu_domain *dom = to_mtk_domain(domain);
+	unsigned long flags;
+	size_t unmapsz;
+
+	spin_lock_irqsave(&dom->pgtlock, flags);
+	unmapsz = dom->iop->unmap(dom->iop, iova, size);
+	spin_unlock_irqrestore(&dom->pgtlock, flags);
+
+	return unmapsz;
+}
+
+static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain,
+					  dma_addr_t iova)
+{
+	struct mtk_iommu_domain *dom = to_mtk_domain(domain);
+	unsigned long flags;
+	phys_addr_t pa;
+
+	spin_lock_irqsave(&dom->pgtlock, flags);
+	pa = dom->iop->iova_to_phys(dom->iop, iova);
+	spin_unlock_irqrestore(&dom->pgtlock, flags);
+
+	return pa;
+}
+
+static int mtk_iommu_add_device(struct device *dev)
+{
+	struct iommu_group *group;
+
+	if (!dev->archdata.iommu) /* Not a iommu client device */
+		return -ENODEV;
+
+	group = iommu_group_get_for_dev(dev);
+	if (IS_ERR(group))
+		return PTR_ERR(group);
+
+	iommu_group_put(group);
+	return 0;
+}
+
+static void mtk_iommu_remove_device(struct device *dev)
+{
+	struct mtk_iommu_client_priv *head, *cur, *next;
+
+	head = dev->archdata.iommu;
+	if (!head)
+		return;
+
+	list_for_each_entry_safe(cur, next, &head->client, client) {
+		list_del(&cur->client);
+		kfree(cur);
+	}
+	kfree(head);
+	dev->archdata.iommu = NULL;
+
+	iommu_group_remove_device(dev);
+}
+
+static struct iommu_group *mtk_iommu_device_group(struct device *dev)
+{
+	struct mtk_iommu_data *data;
+	struct mtk_iommu_client_priv *priv;
+
+	priv = dev->archdata.iommu;
+	if (!priv)
+		return ERR_PTR(-ENODEV);
+
+	/* All the client devices are in the same m4u iommu-group */
+	data = dev_get_drvdata(priv->m4udev);
+	if (!data->m4u_group) {
+		data->m4u_group = iommu_group_alloc();
+		if (IS_ERR(data->m4u_group))
+			dev_err(dev, "Failed to allocate M4U IOMMU group\n");
+	}
+	return data->m4u_group;
+}
+
+static int mtk_iommu_of_xlate(struct device *dev, struct of_phandle_args *args)
+{
+	struct mtk_iommu_client_priv *head, *priv, *next;
+	struct platform_device *m4updev;
+
+	if (args->args_count != 1) {
+		dev_err(dev, "invalid #iommu-cells(%d) property for IOMMU\n",
+			args->args_count);
+		return -EINVAL;
+	}
+
+	if (!dev->archdata.iommu) {
+		/* Get the m4u device */
+		m4updev = of_find_device_by_node(args->np);
+		of_node_put(args->np);
+		if (WARN_ON(!m4updev))
+			return -EINVAL;
+
+		head = kzalloc(sizeof(*head), GFP_KERNEL);
+		if (!head)
+			return -ENOMEM;
+
+		dev->archdata.iommu = head;
+		INIT_LIST_HEAD(&head->client);
+		head->m4udev = &m4updev->dev;
+	} else {
+		head = dev->archdata.iommu;
+	}
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		goto err_free_mem;
+
+	priv->mtk_m4u_id = args->args[0];
+	list_add_tail(&priv->client, &head->client);
+
+	return 0;
+
+err_free_mem:
+	list_for_each_entry_safe(priv, next, &head->client, client)
+		kfree(priv);
+	kfree(head);
+	dev->archdata.iommu = NULL;
+	return -ENOMEM;
+}
+
+static struct iommu_ops mtk_iommu_ops = {
+	.domain_alloc	= mtk_iommu_domain_alloc,
+	.domain_free	= mtk_iommu_domain_free,
+	.attach_dev	= mtk_iommu_attach_device,
+	.detach_dev	= mtk_iommu_detach_device,
+	.map		= mtk_iommu_map,
+	.unmap		= mtk_iommu_unmap,
+	.map_sg		= default_iommu_map_sg,
+	.iova_to_phys	= mtk_iommu_iova_to_phys,
+	.add_device	= mtk_iommu_add_device,
+	.remove_device	= mtk_iommu_remove_device,
+	.device_group	= mtk_iommu_device_group,
+	.of_xlate	= mtk_iommu_of_xlate,
+	.pgsize_bitmap	= SZ_4K | SZ_64K | SZ_1M | SZ_16M,
+};
+
+static int mtk_iommu_hw_init(const struct mtk_iommu_data *data)
+{
+	u32 regval;
+	int ret;
+
+	ret = clk_prepare_enable(data->bclk);
+	if (ret) {
+		dev_err(data->dev, "Failed to enable iommu bclk(%d)\n", ret);
+		return ret;
+	}
+
+	regval = F_MMU_PREFETCH_RT_REPLACE_MOD |
+		F_MMU_TF_PROTECT_SEL(2) |
+		F_COHERENCE_EN;
+	writel_relaxed(regval, data->base + REG_MMU_CTRL_REG);
+
+	regval = F_L2_MULIT_HIT_EN |
+		F_TABLE_WALK_FAULT_INT_EN |
+		F_PREETCH_FIFO_OVERFLOW_INT_EN |
+		F_MISS_FIFO_OVERFLOW_INT_EN |
+		F_PREFETCH_FIFO_ERR_INT_EN |
+		F_MISS_FIFO_ERR_INT_EN;
+	writel_relaxed(regval, data->base + REG_MMU_INT_CONTROL0);
+
+	regval = F_INT_TRANSLATION_FAULT |
+		F_INT_MAIN_MULTI_HIT_FAULT |
+		F_INT_INVALID_PA_FAULT |
+		F_INT_ENTRY_REPLACEMENT_FAULT |
+		F_INT_TLB_MISS_FAULT |
+		F_INT_MISS_TRANSATION_FIFO_FAULT |
+		F_INT_PRETETCH_TRANSATION_FIFO_FAULT;
+	writel_relaxed(regval, data->base + REG_MMU_INT_MAIN_CONTROL);
+
+	regval = F_MMU_IVRP_PA_SET(data->protect_base);
+	writel_relaxed(regval, data->base + REG_MMU_IVRP_PADDR);
+
+	writel_relaxed(0, data->base + REG_MMU_DCM_DIS);
+	writel_relaxed(0, data->base + REG_MMU_STANDARD_AXI_MODE);
+
+	if (devm_request_irq(data->dev, data->irq, mtk_iommu_isr, 0,
+			     dev_name(data->dev), (void *)data)) {
+		writel_relaxed(0, data->base + REG_MMU_PT_BASE_ADDR);
+		clk_disable_unprepare(data->bclk);
+		dev_err(data->dev, "Failed @ IRQ-%d Request\n", data->irq);
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static int compare_of(struct device *dev, void *data)
+{
+	return dev->of_node == data;
+}
+
+static int mtk_iommu_bind(struct device *dev)
+{
+	struct mtk_iommu_data *data = dev_get_drvdata(dev);
+	void *protect;
+	int ret;
+
+	/* Protect memory. HW will access here while translation fault.*/
+	protect = devm_kzalloc(dev, MTK_PROTECT_PA_ALIGN * 2, GFP_KERNEL);
+	if (!protect)
+		return -ENOMEM;
+	data->protect_base = ALIGN(virt_to_phys(protect), MTK_PROTECT_PA_ALIGN);
+
+	ret = component_bind_all(dev, NULL);
+	if (ret)
+		return ret;
+
+	ret = mtk_iommu_hw_init(data);
+	if (ret) {
+		component_unbind_all(dev, NULL);
+		return ret;
+	}
+
+	if (!iommu_present(&platform_bus_type))
+		bus_set_iommu(&platform_bus_type, &mtk_iommu_ops);
+
+	return 0;
+}
+
+static void mtk_iommu_unbind(struct device *dev)
+{
+	struct mtk_iommu_data *data = dev_get_drvdata(dev);
+
+	if (iommu_present(&platform_bus_type))
+		bus_set_iommu(&platform_bus_type, NULL);
+
+	component_unbind_all(dev, NULL);
+	free_io_pgtable_ops(data->m4u_dom->iop);
+	clk_disable_unprepare(data->bclk);
+	devm_free_irq(dev, data->irq, data);
+}
+
+static const struct component_master_ops mtk_iommu_com_ops = {
+	.bind		= mtk_iommu_bind,
+	.unbind		= mtk_iommu_unbind,
+};
+
+static int mtk_iommu_probe(struct platform_device *pdev)
+{
+	struct mtk_iommu_data   *data;
+	struct device           *dev = &pdev->dev;
+	struct resource         *res;
+	struct component_match  *match = NULL;
+	int                     i;
+
+	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+	data->dev = dev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	data->base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(data->base))
+		return PTR_ERR(data->base);
+
+	data->irq = platform_get_irq(pdev, 0);
+	if (data->irq < 0)
+		return data->irq;
+
+	data->bclk = devm_clk_get(dev, "bclk");
+	if (IS_ERR(data->bclk))
+		return PTR_ERR(data->bclk);
+
+	data->larb_nr = of_count_phandle_with_args(
+					dev->of_node, "mediatek,larbs", NULL);
+	if (data->larb_nr < 0)
+		return data->larb_nr;
+
+	for (i = 0; i < data->larb_nr; i++) {
+		struct device_node *larbnode;
+		struct platform_device *plarbdev;
+
+		larbnode = of_parse_phandle(dev->of_node, "mediatek,larbs", i);
+		if (!larbnode)
+			return -EINVAL;
+
+		if (!of_device_is_available(larbnode))
+			continue;
+
+		plarbdev = of_find_device_by_node(larbnode);
+		of_node_put(larbnode);
+		if (!plarbdev)
+			return -EPROBE_DEFER;
+		data->larbdev[i] = &plarbdev->dev;
+
+		component_match_add(dev, &match, compare_of, larbnode);
+	}
+
+	dev_set_drvdata(dev, data);
+	return component_master_add_with_match(dev, &mtk_iommu_com_ops, match);
+}
+
+static int mtk_iommu_remove(struct platform_device *pdev)
+{
+	component_master_del(&pdev->dev, &mtk_iommu_com_ops);
+	return 0;
+}
+
+static int mtk_iommu_suspend(struct device *dev)
+{
+	struct mtk_iommu_data *data = dev_get_drvdata(dev);
+	struct mtk_iommu_suspend_reg *reg = &data->reg;
+	void __iomem *base = data->base;
+
+	reg->standard_axi_mode = readl_relaxed(base +
+					       REG_MMU_STANDARD_AXI_MODE);
+	reg->dcm_dis = readl_relaxed(base + REG_MMU_DCM_DIS);
+	reg->ctrl_reg = readl_relaxed(base + REG_MMU_CTRL_REG);
+	reg->int_control0 = readl_relaxed(base + REG_MMU_INT_CONTROL0);
+	reg->int_main_control = readl_relaxed(base + REG_MMU_INT_MAIN_CONTROL);
+	return 0;
+}
+
+static int mtk_iommu_resume(struct device *dev)
+{
+	struct mtk_iommu_data *data = dev_get_drvdata(dev);
+	struct mtk_iommu_suspend_reg *reg = &data->reg;
+	void __iomem *base = data->base;
+
+	writel_relaxed(data->m4u_dom->cfg.arm_v7s_cfg.ttbr[0],
+		       base + REG_MMU_PT_BASE_ADDR);
+	writel_relaxed(reg->standard_axi_mode,
+		       base + REG_MMU_STANDARD_AXI_MODE);
+	writel_relaxed(reg->dcm_dis, base + REG_MMU_DCM_DIS);
+	writel_relaxed(reg->ctrl_reg, base + REG_MMU_CTRL_REG);
+	writel_relaxed(reg->int_control0, base + REG_MMU_INT_CONTROL0);
+	writel_relaxed(reg->int_main_control, base + REG_MMU_INT_MAIN_CONTROL);
+	writel_relaxed(F_MMU_IVRP_PA_SET(data->protect_base),
+		       base + REG_MMU_IVRP_PADDR);
+	return 0;
+}
+
+const struct dev_pm_ops mtk_iommu_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(mtk_iommu_suspend, mtk_iommu_resume)
+};
+
+static const struct of_device_id mtk_iommu_of_ids[] = {
+	{ .compatible = "mediatek,mt8173-m4u", },
+	{}
+};
+
+static struct platform_driver mtk_iommu_driver = {
+	.probe	= mtk_iommu_probe,
+	.remove	= mtk_iommu_remove,
+	.driver	= {
+		.name = "mtk-iommu",
+		.of_match_table = mtk_iommu_of_ids,
+		.pm = &mtk_iommu_pm_ops,
+	}
+};
+
+static int __init mtk_iommu_init(void)
+{
+	return platform_driver_register(&mtk_iommu_driver);
+}
+subsys_initcall(mtk_iommu_init);
+
+static int mtk_iommu_init_fn(struct device_node *np)
+{
+	struct platform_device *pdev;
+
+	pdev = of_platform_device_create(np, NULL, platform_bus_type.dev_root);
+	if (IS_ERR(pdev))
+		return PTR_ERR(pdev);
+
+	of_iommu_set_ops(np, &mtk_iommu_ops);
+	return 0;
+}
+
+IOMMU_OF_DECLARE(mtkm4u, "mediatek,mt8173-m4u", mtk_iommu_init_fn);