diff mbox

[v11,17/27] iommu/exynos: remove calls to Runtime PM API functions

Message ID 20140314140843.ba055f28dd7ed59c46088029@samsung.com (mailing list archive)
State New, archived
Headers show

Commit Message

Cho KyongHo March 14, 2014, 5:08 a.m. UTC
Runtime power management by exynos-iommu driver independently from
master H/W's runtime pm is not useful for power saving since attaching
master H/W in probing time turns on its local power endlessly.
Thus this removes runtime pm API calls.
Runtime PM support is added in the following commits to exynos-iommu
driver.

Signed-off-by: Cho KyongHo <pullip.cho@samsung.com>
---
 drivers/iommu/exynos-iommu.c |  369 +++++++++++++++++++++++++++---------------
 1 file changed, 238 insertions(+), 131 deletions(-)

Comments

Tomasz Figa March 14, 2014, 12:59 p.m. UTC | #1
Hi KyongHo,

On 14.03.2014 06:08, Cho KyongHo wrote:
> Runtime power management by exynos-iommu driver independently from
> master H/W's runtime pm is not useful for power saving since attaching
> master H/W in probing time turns on its local power endlessly.
> Thus this removes runtime pm API calls.
> Runtime PM support is added in the following commits to exynos-iommu
> driver.

The patch seems to be doing something completely different than the 
commit description suggests. Please rewrite the description to describe 
the patch correctly.

>
> Signed-off-by: Cho KyongHo <pullip.cho@samsung.com>
> ---
>   drivers/iommu/exynos-iommu.c |  369 +++++++++++++++++++++++++++---------------
>   1 file changed, 238 insertions(+), 131 deletions(-)
>
> diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
> index 3458349..6834556 100644
> --- a/drivers/iommu/exynos-iommu.c
> +++ b/drivers/iommu/exynos-iommu.c
> @@ -27,6 +27,8 @@
>   #include <linux/memblock.h>
>   #include <linux/export.h>
>   #include <linux/of.h>
> +#include <linux/of_platform.h>
> +#include <linux/notifier.h>
>
>   #include <asm/cacheflush.h>
>   #include <asm/pgtable.h>
> @@ -111,6 +113,8 @@
>   #define __master_clk_enable(data)	__clk_gate_ctrl(data, clk_master, en)
>   #define __master_clk_disable(data)	__clk_gate_ctrl(data, clk_master, dis)
>
> +#define has_sysmmu(dev)		(dev->archdata.iommu != NULL)
> +
>   static struct kmem_cache *lv2table_kmem_cache;
>
>   static unsigned long *section_entry(unsigned long *pgtable, unsigned long iova)
> @@ -159,6 +163,16 @@ static char *sysmmu_fault_name[SYSMMU_FAULTS_NUM] = {
>   	"UNKNOWN FAULT"
>   };
>
> +/* attached to dev.archdata.iommu of the master device */
> +struct exynos_iommu_owner {
> +	struct list_head client; /* entry of exynos_iommu_domain.clients */
> +	struct device *dev;
> +	struct device *sysmmu;
> +	struct iommu_domain *domain;
> +	void *vmm_data;         /* IO virtual memory manager's data */
> +	spinlock_t lock;        /* Lock to preserve consistency of System MMU */

Please don't use spaces for alignment.

> +};
> +
>   struct exynos_iommu_domain {
>   	struct list_head clients; /* list of sysmmu_drvdata.node */
>   	unsigned long *pgtable; /* lv1 page table, 16KB */
> @@ -168,9 +182,8 @@ struct exynos_iommu_domain {
>   };
>
>   struct sysmmu_drvdata {
> -	struct list_head node; /* entry of exynos_iommu_domain.clients */
>   	struct device *sysmmu;	/* System MMU's device descriptor */
> -	struct device *dev;	/* Owner of system MMU */
> +	struct device *master;	/* Owner of system MMU */
>   	void __iomem *sfrbase;
>   	struct clk *clk;
>   	struct clk *clk_master;
> @@ -239,7 +252,6 @@ static void __sysmmu_tlb_invalidate_entry(void __iomem *sfrbase,
>   static void __sysmmu_set_ptbase(void __iomem *sfrbase,
>   				       unsigned long pgd)
>   {
> -	__raw_writel(0x1, sfrbase + REG_MMU_CFG); /* 16KB LV1, LRU */
>   	__raw_writel(pgd, sfrbase + REG_PT_BASE_ADDR);
>
>   	__sysmmu_tlb_invalidate(sfrbase);
> @@ -299,7 +311,7 @@ static irqreturn_t exynos_sysmmu_irq(int irq, void *dev_id)
>   					itype, base, addr);
>   		if (data->domain)
>   			ret = report_iommu_fault(data->domain,
> -					data->dev, addr, itype);
> +					data->master, addr, itype);
>   	}
>
>   	/* fault is not recovered by fault handler */
> @@ -316,116 +328,148 @@ static irqreturn_t exynos_sysmmu_irq(int irq, void *dev_id)
>   	return IRQ_HANDLED;
>   }
>
> -static bool __exynos_sysmmu_disable(struct sysmmu_drvdata *data)
> +static void __sysmmu_disable_nocount(struct sysmmu_drvdata *data)

If you are changing the names anyway, it would be probably a good idea 
to reduce code obfuscation a bit and drop the underscores from 
beginnings of function names. Also I'd suggest keeping the "exynos_" prefix.

>   {
> -	unsigned long flags;
> -	bool disabled = false;
> -
> -	write_lock_irqsave(&data->lock, flags);
> -
> -	if (!set_sysmmu_inactive(data))
> -		goto finish;
> -
> -	__master_clk_enable(data);
> +	clk_enable(data->clk_master);
>
>   	__raw_writel(CTRL_DISABLE, data->sfrbase + REG_MMU_CTRL);
> +	__raw_writel(0, data->sfrbase + REG_MMU_CFG);
>
>   	__sysmmu_clk_disable(data);
>   	__master_clk_disable(data);
> +}
>
> -	disabled = true;
> -	data->pgtable = 0;
> -	data->domain = NULL;
> -finish:
> -	write_unlock_irqrestore(&data->lock, flags);
> +static bool __sysmmu_disable(struct sysmmu_drvdata *data)
> +{
> +	bool disabled;
> +	unsigned long flags;
> +
> +	write_lock_irqsave(&data->lock, flags);
> +
> +	disabled = set_sysmmu_inactive(data);
> +
> +	if (disabled) {
> +		data->pgtable = 0;
> +		data->domain = NULL;
> +
> +		__sysmmu_disable_nocount(data);
>
> -	if (disabled)
>   		dev_dbg(data->sysmmu, "Disabled\n");
> -	else
> -		dev_dbg(data->sysmmu, "%d times left to be disabled\n",
> +	} else  {
> +		dev_dbg(data->sysmmu, "%d times left to disable\n",
>   					data->activations);
> +	}
> +
> +	write_unlock_irqrestore(&data->lock, flags);
>
>   	return disabled;
>   }
>
> -/* __exynos_sysmmu_enable: Enables System MMU
> - *
> - * returns -error if an error occurred and System MMU is not enabled,
> - * 0 if the System MMU has been just enabled and 1 if System MMU was already
> - * enabled before.
> - */
> -static int __exynos_sysmmu_enable(struct sysmmu_drvdata *data,
> +static void __sysmmu_init_config(struct sysmmu_drvdata *data)
> +{
> +	unsigned long cfg = 0;
> +
> +	__raw_writel(cfg, data->sfrbase + REG_MMU_CFG);
> +}
> +
> +static void __sysmmu_enable_nocount(struct sysmmu_drvdata *data)
> +{
> +	__master_clk_enable(data);
> +	__sysmmu_clk_enable(data);
> +
> +	__raw_writel(CTRL_BLOCK, data->sfrbase + REG_MMU_CTRL);
> +
> +	__sysmmu_init_config(data);
> +
> +	__sysmmu_set_ptbase(data->sfrbase, data->pgtable);
> +
> +	__raw_writel(CTRL_ENABLE, data->sfrbase + REG_MMU_CTRL);
> +
> +	__master_clk_disable(data);
> +}
> +
> +static int __sysmmu_enable(struct sysmmu_drvdata *data,
>   			unsigned long pgtable, struct iommu_domain *domain)
>   {
>   	int ret = 0;
>   	unsigned long flags;
>
>   	write_lock_irqsave(&data->lock, flags);
> +	if (set_sysmmu_active(data)) {
> +		data->pgtable = pgtable;
> +		data->domain = domain;
>
> -	if (!set_sysmmu_active(data)) {
> -		if (WARN_ON(pgtable != data->pgtable)) {
> -			ret = -EBUSY;
> -			set_sysmmu_inactive(data);
> -		} else {
> -			ret = 1;
> -		}
> +		__sysmmu_enable_nocount(data);
>
> -		dev_dbg(data->sysmmu, "Already enabled\n");
> -		goto finish;
> +		dev_dbg(data->sysmmu, "Enabled\n");
> +	} else {
> +		ret = (pgtable == data->pgtable) ? 1 : -EBUSY;
> +
> +		dev_dbg(data->sysmmu, "already enabled\n");
>   	}
>
> -	data->pgtable = pgtable;
> +	if (WARN_ON(ret < 0))
> +		set_sysmmu_inactive(data); /* decrement count */
>
> -	__master_clk_enable(data);
> -	__sysmmu_clk_enable(data);
> +	write_unlock_irqrestore(&data->lock, flags);
>
> -	__sysmmu_set_ptbase(data->sfrbase, pgtable);
> +	return ret;
> +}
>
> -	__raw_writel(CTRL_ENABLE, data->sfrbase + REG_MMU_CTRL);
> +/* __exynos_sysmmu_enable: Enables System MMU
> + *
> + * returns -error if an error occurred and System MMU is not enabled,
> + * 0 if the System MMU has been just enabled and 1 if System MMU was already
> + * enabled before.
> + */
> +static int __exynos_sysmmu_enable(struct device *dev, unsigned long pgtable,
> +				  struct iommu_domain *domain)
> +{
> +	int ret = 0;
> +	unsigned long flags;
> +	struct exynos_iommu_owner *owner = dev->archdata.iommu;
> +	struct sysmmu_drvdata *data;
>
> -	__master_clk_disable(data);
> +	BUG_ON(!has_sysmmu(dev));
>
> -	data->domain = domain;
> +	spin_lock_irqsave(&owner->lock, flags);
>
> -	dev_dbg(data->sysmmu, "Enabled\n");
> -finish:
> -	write_unlock_irqrestore(&data->lock, flags);
> +	data = dev_get_drvdata(owner->sysmmu);
> +
> +	ret = __sysmmu_enable(data, pgtable, domain);
> +	if (ret >= 0)
> +		data->master = dev;
> +
> +	spin_unlock_irqrestore(&owner->lock, flags);
>
>   	return ret;
>   }
>
>   int exynos_sysmmu_enable(struct device *dev, unsigned long pgtable)
>   {
> -	struct sysmmu_drvdata *data = dev_get_drvdata(dev->archdata.iommu);
> -	int ret;
> -
>   	BUG_ON(!memblock_is_memory(pgtable));
>
> -	ret = pm_runtime_get_sync(data->sysmmu);
> -	if (ret < 0) {
> -		dev_dbg(data->sysmmu, "Failed to enable\n");
> -		return ret;
> -	}
> -
> -	ret = __exynos_sysmmu_enable(data, pgtable, NULL);
> -	if (WARN_ON(ret < 0)) {
> -		pm_runtime_put(data->sysmmu);
> -		dev_err(data->sysmmu, "Already enabled with page table %#lx\n",
> -			data->pgtable);
> -	} else {
> -		data->dev = dev;
> -	}
> -
> -	return ret;
> +	return __exynos_sysmmu_enable(dev, pgtable, NULL);
>   }
>
>   static bool exynos_sysmmu_disable(struct device *dev)
>   {
> -	struct sysmmu_drvdata *data = dev_get_drvdata(dev->archdata.iommu);
> -	bool disabled;
> +	unsigned long flags;
> +	bool disabled = true;
> +	struct exynos_iommu_owner *owner = dev->archdata.iommu;
> +	struct sysmmu_drvdata *data;
> +
> +	BUG_ON(!has_sysmmu(dev));
>
> -	disabled = __exynos_sysmmu_disable(data);
> -	pm_runtime_put(data->sysmmu);
> +	spin_lock_irqsave(&owner->lock, flags);
> +
> +	data = dev_get_drvdata(owner->sysmmu);
> +
> +	disabled = __sysmmu_disable(data);
> +	if (disabled)
> +		data->master = NULL;
> +
> +	spin_unlock_irqrestore(&owner->lock, flags);
>
>   	return disabled;
>   }
> @@ -433,11 +477,13 @@ static bool exynos_sysmmu_disable(struct device *dev)
>   static void sysmmu_tlb_invalidate_entry(struct device *dev, unsigned long iova,
>   					size_t size)
>   {
> +	struct exynos_iommu_owner *owner = dev->archdata.iommu;
>   	unsigned long flags;
> -	struct sysmmu_drvdata *data = dev_get_drvdata(dev->archdata.iommu);
> +	struct sysmmu_drvdata *data;
>
> -	read_lock_irqsave(&data->lock, flags);
> +	data = dev_get_drvdata(owner->sysmmu);
>
> +	read_lock_irqsave(&data->lock, flags);
>   	if (is_sysmmu_active(data)) {
>   		unsigned int maj;
>   		unsigned int num_inv = 1;
> @@ -465,19 +511,21 @@ static void sysmmu_tlb_invalidate_entry(struct device *dev, unsigned long iova,
>   		}
>   		__master_clk_disable(data);
>   	} else {
> -		dev_dbg(data->sysmmu, "Disabled. Skipping invalidating TLB.\n");
> +		dev_dbg(dev, "disabled. Skipping TLB invalidation @ %#lx\n",
> +			iova);
>   	}
> -
>   	read_unlock_irqrestore(&data->lock, flags);
>   }
>
>   void exynos_sysmmu_tlb_invalidate(struct device *dev)
>   {
> +	struct exynos_iommu_owner *owner = dev->archdata.iommu;
>   	unsigned long flags;
> -	struct sysmmu_drvdata *data = dev_get_drvdata(dev->archdata.iommu);
> +	struct sysmmu_drvdata *data;
>
> -	read_lock_irqsave(&data->lock, flags);
> +	data = dev_get_drvdata(owner->sysmmu);
>
> +	read_lock_irqsave(&data->lock, flags);
>   	if (is_sysmmu_active(data)) {
>   		__master_clk_enable(data);
>   		if (sysmmu_block(data->sfrbase)) {
> @@ -486,9 +534,8 @@ void exynos_sysmmu_tlb_invalidate(struct device *dev)
>   		}
>   		__master_clk_disable(data);
>   	} else {
> -		dev_dbg(data->sysmmu, "Disabled. Skipping invalidating TLB.\n");
> +		dev_dbg(dev, "disabled. Skipping TLB invalidation\n");
>   	}
> -
>   	read_unlock_irqrestore(&data->lock, flags);
>   }
>
> @@ -498,6 +545,7 @@ static int __init exynos_sysmmu_probe(struct platform_device *pdev)
>   	struct device *dev = &pdev->dev;
>   	struct sysmmu_drvdata *data;
>   	struct resource *res;
> +	struct device_node *node;
>
>   	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
>   	if (!data)
> @@ -549,9 +597,32 @@ static int __init exynos_sysmmu_probe(struct platform_device *pdev)
>   		return ret;
>   	}
>
> +	/* Relation between master and System MMU is 1:1. */
> +	node = of_parse_phandle(dev->of_node, "mmu-masters", 0);
> +	if (node) {
> +		struct platform_device *master = of_find_device_by_node(node);
> +
> +		if (!master) {
> +			dev_err(dev, "%s: mmu-master '%s' not found\n",
> +				__func__, node->name);
> +			return -EINVAL;
> +		}
> +
> +		if (master->dev.archdata.iommu != NULL) {
> +			dev_err(dev, "%s: '%s' is master of other MMU\n",
> +				__func__, node->name);
> +			return -EINVAL;
> +		}
> +
> +		/*
> +		 * archdata.iommu will be initialized with exynos_iommu_client
> +		 * in sysmmu_hook_driver_register().
> +		 */
> +		master->dev.archdata.iommu = dev;
> +	}
> +
>   	data->sysmmu = dev;
>   	rwlock_init(&data->lock);
> -	INIT_LIST_HEAD(&data->node);
>
>   	platform_set_drvdata(pdev, data);
>
> @@ -629,7 +700,7 @@ err_pgtable:
>   static void exynos_iommu_domain_destroy(struct iommu_domain *domain)
>   {
>   	struct exynos_iommu_domain *priv = domain->priv;
> -	struct sysmmu_drvdata *data;
> +	struct exynos_iommu_owner *owner;
>   	unsigned long flags;
>   	int i;
>
> @@ -637,11 +708,14 @@ static void exynos_iommu_domain_destroy(struct iommu_domain *domain)
>
>   	spin_lock_irqsave(&priv->lock, flags);
>
> -	list_for_each_entry(data, &priv->clients, node) {
> -		while (!exynos_sysmmu_disable(data->dev))
> +	list_for_each_entry(owner, &priv->clients, client) {
> +		while (!exynos_sysmmu_disable(owner->dev))
>   			; /* until System MMU is actually disabled */

What about using list_for_each_entry_safe() and calling list_del_init() 
here directly?

>   	}
>
> +	while (!list_empty(&priv->clients))
> +		list_del_init(priv->clients.next);
> +
>   	spin_unlock_irqrestore(&priv->lock, flags);
>
>   	for (i = 0; i < NUM_LV1ENTRIES; i++)
> @@ -658,41 +732,28 @@ static void exynos_iommu_domain_destroy(struct iommu_domain *domain)
>   static int exynos_iommu_attach_device(struct iommu_domain *domain,
>   				   struct device *dev)
>   {
> -	struct sysmmu_drvdata *data = dev_get_drvdata(dev->archdata.iommu);
> +	struct exynos_iommu_owner *owner = dev->archdata.iommu;
>   	struct exynos_iommu_domain *priv = domain->priv;
>   	unsigned long flags;
>   	int ret;
>
> -	ret = pm_runtime_get_sync(data->sysmmu);
> -	if (ret < 0)
> -		return ret;
> -
> -	ret = 0;
> -
>   	spin_lock_irqsave(&priv->lock, flags);
>
> -	ret = __exynos_sysmmu_enable(data, __pa(priv->pgtable), domain);
> -
> +	ret = __exynos_sysmmu_enable(dev, __pa(priv->pgtable), domain);
>   	if (ret == 0) {
> -		/* 'data->node' must not be appeared in priv->clients */
> -		BUG_ON(!list_empty(&data->node));
> -		data->dev = dev;
> -		list_add_tail(&data->node, &priv->clients);
> +		list_add_tail(&owner->client, &priv->clients);
> +		owner->domain = domain;
>   	}
>
>   	spin_unlock_irqrestore(&priv->lock, flags);
>
> -	if (ret < 0) {
> -		dev_err(dev, "%s: Failed to attach IOMMU with pgtable %#lx\n",
> +	if (ret < 0)
> +		dev_err(dev, "%s: Failed to attach IOMMU with pgtable %#x\n",
>   				__func__, __pa(priv->pgtable));
> -		pm_runtime_put(data->sysmmu);
> -	} else if (ret > 0) {
> -		dev_dbg(dev, "%s: IOMMU with pgtable 0x%lx already attached\n",
> -					__func__, __pa(priv->pgtable));
> -	} else {
> -		dev_dbg(dev, "%s: Attached new IOMMU with pgtable 0x%lx\n",
> -					__func__, __pa(priv->pgtable));
> -	}
> +	else
> +		dev_dbg(dev, "%s: Attached IOMMU with pgtable 0x%x%s\n",
> +					__func__, __pa(priv->pgtable),
> +					(ret == 0) ? "" : ", again");
>
>   	return ret;
>   }
> @@ -700,39 +761,29 @@ static int exynos_iommu_attach_device(struct iommu_domain *domain,
>   static void exynos_iommu_detach_device(struct iommu_domain *domain,
>   				    struct device *dev)
>   {
> -	struct sysmmu_drvdata *data = dev_get_drvdata(dev->archdata.iommu);
> +	struct exynos_iommu_owner *owner;
>   	struct exynos_iommu_domain *priv = domain->priv;
> -	struct list_head *pos;
>   	unsigned long flags;
> -	bool found = false;
>
>   	spin_lock_irqsave(&priv->lock, flags);
>
> -	list_for_each(pos, &priv->clients) {
> -		if (list_entry(pos, struct sysmmu_drvdata, node) == data) {
> -			found = true;
> +	list_for_each_entry(owner, &priv->clients, client) {
> +		if (owner == dev->archdata.iommu) {
> +			if (exynos_sysmmu_disable(dev)) {
> +				list_del_init(&owner->client);
> +				owner->domain = NULL;
> +			}
>   			break;
>   		}
>   	}
>
> -	if (!found)
> -		goto finish;
> -
> -	if (__exynos_sysmmu_disable(data)) {
> -		dev_dbg(dev, "%s: Detached IOMMU with pgtable %#lx\n",
> -					__func__, __pa(priv->pgtable));
> -		list_del_init(&data->node);
> -
> -	} else {
> -		dev_dbg(dev, "%s: Detaching IOMMU with pgtable %#lx delayed",
> -					__func__, __pa(priv->pgtable));
> -	}
> -
> -finish:
>   	spin_unlock_irqrestore(&priv->lock, flags);
>
> -	if (found)
> -		pm_runtime_put(data->sysmmu);
> +	if (owner == dev->archdata.iommu)
> +		dev_dbg(dev, "%s: Detached IOMMU with pgtable %#x\n",
> +					__func__, __pa(priv->pgtable));
> +	else
> +		dev_dbg(dev, "%s: No IOMMU is attached\n", __func__);
>   }
>
>   static unsigned long *alloc_lv2entry(unsigned long *sent, unsigned long iova,
> @@ -862,7 +913,7 @@ static size_t exynos_iommu_unmap(struct iommu_domain *domain,
>   					       unsigned long iova, size_t size)
>   {
>   	struct exynos_iommu_domain *priv = domain->priv;
> -	struct sysmmu_drvdata *data;
> +	struct exynos_iommu_owner *owner;
>   	unsigned long flags;
>   	unsigned long *ent;
>   	size_t err_pgsize;
> @@ -923,8 +974,8 @@ done:
>   	spin_unlock_irqrestore(&priv->pgtablelock, flags);
>
>   	spin_lock_irqsave(&priv->lock, flags);
> -	list_for_each_entry(data, &priv->clients, node)
> -		sysmmu_tlb_invalidate_entry(data->dev, iova, size);
> +	list_for_each_entry(owner, &priv->clients, client)
> +		sysmmu_tlb_invalidate_entry(owner->dev, iova, size);
>   	spin_unlock_irqrestore(&priv->lock, flags);
>
>   	return size;
> @@ -1009,3 +1060,59 @@ err_reg_driver:
>   	return ret;
>   }
>   subsys_initcall(exynos_iommu_init);
> +
> +static int sysmmu_hook_driver_register(struct notifier_block *nb,
> +					unsigned long val,
> +					void *p)
> +{
> +	struct device *dev = p;
> +
> +	switch (val) {
> +	case BUS_NOTIFY_BIND_DRIVER:
> +	{
> +		struct exynos_iommu_owner *owner;

Please move this variable to the top of the function and drop the braces 
around case blocks.

> +
> +		/* No System MMU assigned. See exynos_sysmmu_probe(). */
> +		if (dev->archdata.iommu == NULL)
> +			break;

This looks strange... (see below)

Also this looks racy. There are no guarantees about device probing 
order, so you may end up with master devices being probed before the 
IOMMUs. Deferred probing should be used to handle this correctly.

> +
> +		owner = devm_kzalloc(dev, sizeof(*owner), GFP_KERNEL);
> +		if (!owner) {
> +			dev_err(dev, "No Memory for exynos_iommu_owner\n");
> +			return -ENOMEM;
> +		}
> +
> +		owner->dev = dev;
> +		INIT_LIST_HEAD(&owner->client);
> +		owner->sysmmu = dev->archdata.iommu;
> +
> +		dev->archdata.iommu = owner;

I don't understand what is happening here in this case statement. There 
is already something assigned to dev->archdata.iommu, but this code 
reassigns something else to this field. This means that you attempt to 
use the same field to store pointers to objects of different types, 
which I believe is broken, because you have no way to check what kind of 
object is currently pointed by such (void *) pointer.

> +		break;
> +	}
> +	case BUS_NOTIFY_UNBOUND_DRIVER:
> +	{
> +		struct exynos_iommu_owner *owner = dev->archdata.iommu;
> +		if (owner) {
> +			struct device *sysmmu = owner->sysmmu;
> +			/* if still attached to an iommu_domain. */
> +			if (WARN_ON(!list_empty(&owner->client)))
> +				iommu_detach_device(owner->domain, owner->dev);
> +			devm_kfree(dev, owner);
> +			dev->archdata.iommu = sysmmu;
> +		}
> +		break;
> +	}
> +	} /* switch (val) */
> +
> +	return 0;
> +}
> +
> +static struct notifier_block sysmmu_notifier = {
> +	.notifier_call = &sysmmu_hook_driver_register,
> +};
> +
> +static int __init exynos_iommu_prepare(void)
> +{
> +	return bus_register_notifier(&platform_bus_type, &sysmmu_notifier);
> +}
> +arch_initcall(exynos_iommu_prepare);

I don't think it is a good idea to use such initcalls in drivers, as it 
is not multiplatform friendly. On a multiplatform kernel with 
exynos-iommu driver simply compiled in this notifier will register on 
any platform, not only on Exynos.

Best regards,
Tomasz
Cho KyongHo March 18, 2014, 9:56 a.m. UTC | #2
On Fri, 14 Mar 2014 13:59:00 +0100, Tomasz Figa wrote:
> Hi KyongHo,
> 
> On 14.03.2014 06:08, Cho KyongHo wrote:
> > Runtime power management by exynos-iommu driver independently from
> > master H/W's runtime pm is not useful for power saving since attaching
> > master H/W in probing time turns on its local power endlessly.
> > Thus this removes runtime pm API calls.
> > Runtime PM support is added in the following commits to exynos-iommu
> > driver.
> 
> The patch seems to be doing something completely different than the 
> commit description suggests. Please rewrite the description to describe 
> the patch correctly.
> 
I agree with your comment whatever the purpose of the change is.
The commit message will be updated.

> >
> > Signed-off-by: Cho KyongHo <pullip.cho@samsung.com>
> > ---
> >   drivers/iommu/exynos-iommu.c |  369 +++++++++++++++++++++++++++---------------
> >   1 file changed, 238 insertions(+), 131 deletions(-)
> >
> > diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
> > index 3458349..6834556 100644
> > --- a/drivers/iommu/exynos-iommu.c
> > +++ b/drivers/iommu/exynos-iommu.c
> > @@ -27,6 +27,8 @@
> >   #include <linux/memblock.h>
> >   #include <linux/export.h>
> >   #include <linux/of.h>
> > +#include <linux/of_platform.h>
> > +#include <linux/notifier.h>
> >
> >   #include <asm/cacheflush.h>
> >   #include <asm/pgtable.h>
> > @@ -111,6 +113,8 @@
> >   #define __master_clk_enable(data)	__clk_gate_ctrl(data, clk_master, en)
> >   #define __master_clk_disable(data)	__clk_gate_ctrl(data, clk_master, dis)
> >
> > +#define has_sysmmu(dev)		(dev->archdata.iommu != NULL)
> > +
> >   static struct kmem_cache *lv2table_kmem_cache;
> >
> >   static unsigned long *section_entry(unsigned long *pgtable, unsigned long iova)
> > @@ -159,6 +163,16 @@ static char *sysmmu_fault_name[SYSMMU_FAULTS_NUM] = {
> >   	"UNKNOWN FAULT"
> >   };
> >
> > +/* attached to dev.archdata.iommu of the master device */
> > +struct exynos_iommu_owner {
> > +	struct list_head client; /* entry of exynos_iommu_domain.clients */
> > +	struct device *dev;
> > +	struct device *sysmmu;
> > +	struct iommu_domain *domain;
> > +	void *vmm_data;         /* IO virtual memory manager's data */
> > +	spinlock_t lock;        /* Lock to preserve consistency of System MMU */
> 
> Please don't use spaces for alignment.
> 
OK.

> > +};
> > +
> >   struct exynos_iommu_domain {
> >   	struct list_head clients; /* list of sysmmu_drvdata.node */
> >   	unsigned long *pgtable; /* lv1 page table, 16KB */
> > @@ -168,9 +182,8 @@ struct exynos_iommu_domain {
> >   };
> >
> >   struct sysmmu_drvdata {
> > -	struct list_head node; /* entry of exynos_iommu_domain.clients */
> >   	struct device *sysmmu;	/* System MMU's device descriptor */
> > -	struct device *dev;	/* Owner of system MMU */
> > +	struct device *master;	/* Owner of system MMU */
> >   	void __iomem *sfrbase;
> >   	struct clk *clk;
> >   	struct clk *clk_master;
> > @@ -239,7 +252,6 @@ static void __sysmmu_tlb_invalidate_entry(void __iomem *sfrbase,
> >   static void __sysmmu_set_ptbase(void __iomem *sfrbase,
> >   				       unsigned long pgd)
> >   {
> > -	__raw_writel(0x1, sfrbase + REG_MMU_CFG); /* 16KB LV1, LRU */
> >   	__raw_writel(pgd, sfrbase + REG_PT_BASE_ADDR);
> >
> >   	__sysmmu_tlb_invalidate(sfrbase);
> > @@ -299,7 +311,7 @@ static irqreturn_t exynos_sysmmu_irq(int irq, void *dev_id)
> >   					itype, base, addr);
> >   		if (data->domain)
> >   			ret = report_iommu_fault(data->domain,
> > -					data->dev, addr, itype);
> > +					data->master, addr, itype);
> >   	}
> >
> >   	/* fault is not recovered by fault handler */
> > @@ -316,116 +328,148 @@ static irqreturn_t exynos_sysmmu_irq(int irq, void *dev_id)
> >   	return IRQ_HANDLED;
> >   }
> >
> > -static bool __exynos_sysmmu_disable(struct sysmmu_drvdata *data)
> > +static void __sysmmu_disable_nocount(struct sysmmu_drvdata *data)
> 
> If you are changing the names anyway, it would be probably a good idea 
> to reduce code obfuscation a bit and drop the underscores from 
> beginnings of function names. Also I'd suggest keeping the "exynos_" prefix.

Thanks for the suggestion.
__exynos_sysmmu_disable is splitted into 2 functions: __sysmmu_disable
and __sysmmu_disable_nocount.
I agree with you that it is good idea to reduce code obfuscation but
I don't think dropping beginning underscores of function names reduces
obfuscation.

> 
> >   {
> > -	unsigned long flags;
> > -	bool disabled = false;
> > -
> > -	write_lock_irqsave(&data->lock, flags);
> > -
> > -	if (!set_sysmmu_inactive(data))
> > -		goto finish;
> > -
> > -	__master_clk_enable(data);
> > +	clk_enable(data->clk_master);
> >
> >   	__raw_writel(CTRL_DISABLE, data->sfrbase + REG_MMU_CTRL);
> > +	__raw_writel(0, data->sfrbase + REG_MMU_CFG);
> >
> >   	__sysmmu_clk_disable(data);
> >   	__master_clk_disable(data);
> > +}
> >
> > -	disabled = true;
> > -	data->pgtable = 0;
> > -	data->domain = NULL;
> > -finish:
> > -	write_unlock_irqrestore(&data->lock, flags);
> > +static bool __sysmmu_disable(struct sysmmu_drvdata *data)
> > +{
> > +	bool disabled;
> > +	unsigned long flags;
> > +
> > +	write_lock_irqsave(&data->lock, flags);
> > +
> > +	disabled = set_sysmmu_inactive(data);
> > +
> > +	if (disabled) {
> > +		data->pgtable = 0;
> > +		data->domain = NULL;
> > +
> > +		__sysmmu_disable_nocount(data);
> >
> > -	if (disabled)
> >   		dev_dbg(data->sysmmu, "Disabled\n");
> > -	else
> > -		dev_dbg(data->sysmmu, "%d times left to be disabled\n",
> > +	} else  {
> > +		dev_dbg(data->sysmmu, "%d times left to disable\n",
> >   					data->activations);
> > +	}
> > +
> > +	write_unlock_irqrestore(&data->lock, flags);
> >
> >   	return disabled;
> >   }
> >
> > -/* __exynos_sysmmu_enable: Enables System MMU
> > - *
> > - * returns -error if an error occurred and System MMU is not enabled,
> > - * 0 if the System MMU has been just enabled and 1 if System MMU was already
> > - * enabled before.
> > - */
> > -static int __exynos_sysmmu_enable(struct sysmmu_drvdata *data,
> > +static void __sysmmu_init_config(struct sysmmu_drvdata *data)
> > +{
> > +	unsigned long cfg = 0;
> > +
> > +	__raw_writel(cfg, data->sfrbase + REG_MMU_CFG);
> > +}
> > +
> > +static void __sysmmu_enable_nocount(struct sysmmu_drvdata *data)
> > +{
> > +	__master_clk_enable(data);
> > +	__sysmmu_clk_enable(data);
> > +
> > +	__raw_writel(CTRL_BLOCK, data->sfrbase + REG_MMU_CTRL);
> > +
> > +	__sysmmu_init_config(data);
> > +
> > +	__sysmmu_set_ptbase(data->sfrbase, data->pgtable);
> > +
> > +	__raw_writel(CTRL_ENABLE, data->sfrbase + REG_MMU_CTRL);
> > +
> > +	__master_clk_disable(data);
> > +}
> > +
> > +static int __sysmmu_enable(struct sysmmu_drvdata *data,
> >   			unsigned long pgtable, struct iommu_domain *domain)
> >   {
> >   	int ret = 0;
> >   	unsigned long flags;
> >
> >   	write_lock_irqsave(&data->lock, flags);
> > +	if (set_sysmmu_active(data)) {
> > +		data->pgtable = pgtable;
> > +		data->domain = domain;
> >
> > -	if (!set_sysmmu_active(data)) {
> > -		if (WARN_ON(pgtable != data->pgtable)) {
> > -			ret = -EBUSY;
> > -			set_sysmmu_inactive(data);
> > -		} else {
> > -			ret = 1;
> > -		}
> > +		__sysmmu_enable_nocount(data);
> >
> > -		dev_dbg(data->sysmmu, "Already enabled\n");
> > -		goto finish;
> > +		dev_dbg(data->sysmmu, "Enabled\n");
> > +	} else {
> > +		ret = (pgtable == data->pgtable) ? 1 : -EBUSY;
> > +
> > +		dev_dbg(data->sysmmu, "already enabled\n");
> >   	}
> >
> > -	data->pgtable = pgtable;
> > +	if (WARN_ON(ret < 0))
> > +		set_sysmmu_inactive(data); /* decrement count */
> >
> > -	__master_clk_enable(data);
> > -	__sysmmu_clk_enable(data);
> > +	write_unlock_irqrestore(&data->lock, flags);
> >
> > -	__sysmmu_set_ptbase(data->sfrbase, pgtable);
> > +	return ret;
> > +}
> >
> > -	__raw_writel(CTRL_ENABLE, data->sfrbase + REG_MMU_CTRL);
> > +/* __exynos_sysmmu_enable: Enables System MMU
> > + *
> > + * returns -error if an error occurred and System MMU is not enabled,
> > + * 0 if the System MMU has been just enabled and 1 if System MMU was already
> > + * enabled before.
> > + */
> > +static int __exynos_sysmmu_enable(struct device *dev, unsigned long pgtable,
> > +				  struct iommu_domain *domain)
> > +{
> > +	int ret = 0;
> > +	unsigned long flags;
> > +	struct exynos_iommu_owner *owner = dev->archdata.iommu;
> > +	struct sysmmu_drvdata *data;
> >
> > -	__master_clk_disable(data);
> > +	BUG_ON(!has_sysmmu(dev));
> >
> > -	data->domain = domain;
> > +	spin_lock_irqsave(&owner->lock, flags);
> >
> > -	dev_dbg(data->sysmmu, "Enabled\n");
> > -finish:
> > -	write_unlock_irqrestore(&data->lock, flags);
> > +	data = dev_get_drvdata(owner->sysmmu);
> > +
> > +	ret = __sysmmu_enable(data, pgtable, domain);
> > +	if (ret >= 0)
> > +		data->master = dev;
> > +
> > +	spin_unlock_irqrestore(&owner->lock, flags);
> >
> >   	return ret;
> >   }
> >
> >   int exynos_sysmmu_enable(struct device *dev, unsigned long pgtable)
> >   {
> > -	struct sysmmu_drvdata *data = dev_get_drvdata(dev->archdata.iommu);
> > -	int ret;
> > -
> >   	BUG_ON(!memblock_is_memory(pgtable));
> >
> > -	ret = pm_runtime_get_sync(data->sysmmu);
> > -	if (ret < 0) {
> > -		dev_dbg(data->sysmmu, "Failed to enable\n");
> > -		return ret;
> > -	}
> > -
> > -	ret = __exynos_sysmmu_enable(data, pgtable, NULL);
> > -	if (WARN_ON(ret < 0)) {
> > -		pm_runtime_put(data->sysmmu);
> > -		dev_err(data->sysmmu, "Already enabled with page table %#lx\n",
> > -			data->pgtable);
> > -	} else {
> > -		data->dev = dev;
> > -	}
> > -
> > -	return ret;
> > +	return __exynos_sysmmu_enable(dev, pgtable, NULL);
> >   }
> >
> >   static bool exynos_sysmmu_disable(struct device *dev)
> >   {
> > -	struct sysmmu_drvdata *data = dev_get_drvdata(dev->archdata.iommu);
> > -	bool disabled;
> > +	unsigned long flags;
> > +	bool disabled = true;
> > +	struct exynos_iommu_owner *owner = dev->archdata.iommu;
> > +	struct sysmmu_drvdata *data;
> > +
> > +	BUG_ON(!has_sysmmu(dev));
> >
> > -	disabled = __exynos_sysmmu_disable(data);
> > -	pm_runtime_put(data->sysmmu);
> > +	spin_lock_irqsave(&owner->lock, flags);
> > +
> > +	data = dev_get_drvdata(owner->sysmmu);
> > +
> > +	disabled = __sysmmu_disable(data);
> > +	if (disabled)
> > +		data->master = NULL;
> > +
> > +	spin_unlock_irqrestore(&owner->lock, flags);
> >
> >   	return disabled;
> >   }
> > @@ -433,11 +477,13 @@ static bool exynos_sysmmu_disable(struct device *dev)
> >   static void sysmmu_tlb_invalidate_entry(struct device *dev, unsigned long iova,
> >   					size_t size)
> >   {
> > +	struct exynos_iommu_owner *owner = dev->archdata.iommu;
> >   	unsigned long flags;
> > -	struct sysmmu_drvdata *data = dev_get_drvdata(dev->archdata.iommu);
> > +	struct sysmmu_drvdata *data;
> >
> > -	read_lock_irqsave(&data->lock, flags);
> > +	data = dev_get_drvdata(owner->sysmmu);
> >
> > +	read_lock_irqsave(&data->lock, flags);
> >   	if (is_sysmmu_active(data)) {
> >   		unsigned int maj;
> >   		unsigned int num_inv = 1;
> > @@ -465,19 +511,21 @@ static void sysmmu_tlb_invalidate_entry(struct device *dev, unsigned long iova,
> >   		}
> >   		__master_clk_disable(data);
> >   	} else {
> > -		dev_dbg(data->sysmmu, "Disabled. Skipping invalidating TLB.\n");
> > +		dev_dbg(dev, "disabled. Skipping TLB invalidation @ %#lx\n",
> > +			iova);
> >   	}
> > -
> >   	read_unlock_irqrestore(&data->lock, flags);
> >   }
> >
> >   void exynos_sysmmu_tlb_invalidate(struct device *dev)
> >   {
> > +	struct exynos_iommu_owner *owner = dev->archdata.iommu;
> >   	unsigned long flags;
> > -	struct sysmmu_drvdata *data = dev_get_drvdata(dev->archdata.iommu);
> > +	struct sysmmu_drvdata *data;
> >
> > -	read_lock_irqsave(&data->lock, flags);
> > +	data = dev_get_drvdata(owner->sysmmu);
> >
> > +	read_lock_irqsave(&data->lock, flags);
> >   	if (is_sysmmu_active(data)) {
> >   		__master_clk_enable(data);
> >   		if (sysmmu_block(data->sfrbase)) {
> > @@ -486,9 +534,8 @@ void exynos_sysmmu_tlb_invalidate(struct device *dev)
> >   		}
> >   		__master_clk_disable(data);
> >   	} else {
> > -		dev_dbg(data->sysmmu, "Disabled. Skipping invalidating TLB.\n");
> > +		dev_dbg(dev, "disabled. Skipping TLB invalidation\n");
> >   	}
> > -
> >   	read_unlock_irqrestore(&data->lock, flags);
> >   }
> >
> > @@ -498,6 +545,7 @@ static int __init exynos_sysmmu_probe(struct platform_device *pdev)
> >   	struct device *dev = &pdev->dev;
> >   	struct sysmmu_drvdata *data;
> >   	struct resource *res;
> > +	struct device_node *node;
> >
> >   	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
> >   	if (!data)
> > @@ -549,9 +597,32 @@ static int __init exynos_sysmmu_probe(struct platform_device *pdev)
> >   		return ret;
> >   	}
> >
> > +	/* Relation between master and System MMU is 1:1. */
> > +	node = of_parse_phandle(dev->of_node, "mmu-masters", 0);
> > +	if (node) {
> > +		struct platform_device *master = of_find_device_by_node(node);
> > +
> > +		if (!master) {
> > +			dev_err(dev, "%s: mmu-master '%s' not found\n",
> > +				__func__, node->name);
> > +			return -EINVAL;
> > +		}
> > +
> > +		if (master->dev.archdata.iommu != NULL) {
> > +			dev_err(dev, "%s: '%s' is master of other MMU\n",
> > +				__func__, node->name);
> > +			return -EINVAL;
> > +		}
> > +
> > +		/*
> > +		 * archdata.iommu will be initialized with exynos_iommu_client
> > +		 * in sysmmu_hook_driver_register().
> > +		 */
> > +		master->dev.archdata.iommu = dev;
> > +	}
> > +
> >   	data->sysmmu = dev;
> >   	rwlock_init(&data->lock);
> > -	INIT_LIST_HEAD(&data->node);
> >
> >   	platform_set_drvdata(pdev, data);
> >
> > @@ -629,7 +700,7 @@ err_pgtable:
> >   static void exynos_iommu_domain_destroy(struct iommu_domain *domain)
> >   {
> >   	struct exynos_iommu_domain *priv = domain->priv;
> > -	struct sysmmu_drvdata *data;
> > +	struct exynos_iommu_owner *owner;
> >   	unsigned long flags;
> >   	int i;
> >
> > @@ -637,11 +708,14 @@ static void exynos_iommu_domain_destroy(struct iommu_domain *domain)
> >
> >   	spin_lock_irqsave(&priv->lock, flags);
> >
> > -	list_for_each_entry(data, &priv->clients, node) {
> > -		while (!exynos_sysmmu_disable(data->dev))
> > +	list_for_each_entry(owner, &priv->clients, client) {
> > +		while (!exynos_sysmmu_disable(owner->dev))
> >   			; /* until System MMU is actually disabled */
> 
> What about using list_for_each_entry_safe() and calling list_del_init() 
> here directly?
> 

That require another variable to be defined.
I just wanted to avoid that because I think it is prettier.
Moreover, list_del_init() below the empty while() clause may make
the source code readers misunderstood..

> >   	}
> >
> > +	while (!list_empty(&priv->clients))
> > +		list_del_init(priv->clients.next);
> > +
> >   	spin_unlock_irqrestore(&priv->lock, flags);
> >
> >   	for (i = 0; i < NUM_LV1ENTRIES; i++)
> > @@ -658,41 +732,28 @@ static void exynos_iommu_domain_destroy(struct iommu_domain *domain)
> >   static int exynos_iommu_attach_device(struct iommu_domain *domain,
> >   				   struct device *dev)
> >   {
> > -	struct sysmmu_drvdata *data = dev_get_drvdata(dev->archdata.iommu);
> > +	struct exynos_iommu_owner *owner = dev->archdata.iommu;
> >   	struct exynos_iommu_domain *priv = domain->priv;
> >   	unsigned long flags;
> >   	int ret;
> >
> > -	ret = pm_runtime_get_sync(data->sysmmu);
> > -	if (ret < 0)
> > -		return ret;
> > -
> > -	ret = 0;
> > -
> >   	spin_lock_irqsave(&priv->lock, flags);
> >
> > -	ret = __exynos_sysmmu_enable(data, __pa(priv->pgtable), domain);
> > -
> > +	ret = __exynos_sysmmu_enable(dev, __pa(priv->pgtable), domain);
> >   	if (ret == 0) {
> > -		/* 'data->node' must not be appeared in priv->clients */
> > -		BUG_ON(!list_empty(&data->node));
> > -		data->dev = dev;
> > -		list_add_tail(&data->node, &priv->clients);
> > +		list_add_tail(&owner->client, &priv->clients);
> > +		owner->domain = domain;
> >   	}
> >
> >   	spin_unlock_irqrestore(&priv->lock, flags);
> >
> > -	if (ret < 0) {
> > -		dev_err(dev, "%s: Failed to attach IOMMU with pgtable %#lx\n",
> > +	if (ret < 0)
> > +		dev_err(dev, "%s: Failed to attach IOMMU with pgtable %#x\n",
> >   				__func__, __pa(priv->pgtable));
> > -		pm_runtime_put(data->sysmmu);
> > -	} else if (ret > 0) {
> > -		dev_dbg(dev, "%s: IOMMU with pgtable 0x%lx already attached\n",
> > -					__func__, __pa(priv->pgtable));
> > -	} else {
> > -		dev_dbg(dev, "%s: Attached new IOMMU with pgtable 0x%lx\n",
> > -					__func__, __pa(priv->pgtable));
> > -	}
> > +	else
> > +		dev_dbg(dev, "%s: Attached IOMMU with pgtable 0x%x%s\n",
> > +					__func__, __pa(priv->pgtable),
> > +					(ret == 0) ? "" : ", again");
> >
> >   	return ret;
> >   }
> > @@ -700,39 +761,29 @@ static int exynos_iommu_attach_device(struct iommu_domain *domain,
> >   static void exynos_iommu_detach_device(struct iommu_domain *domain,
> >   				    struct device *dev)
> >   {
> > -	struct sysmmu_drvdata *data = dev_get_drvdata(dev->archdata.iommu);
> > +	struct exynos_iommu_owner *owner;
> >   	struct exynos_iommu_domain *priv = domain->priv;
> > -	struct list_head *pos;
> >   	unsigned long flags;
> > -	bool found = false;
> >
> >   	spin_lock_irqsave(&priv->lock, flags);
> >
> > -	list_for_each(pos, &priv->clients) {
> > -		if (list_entry(pos, struct sysmmu_drvdata, node) == data) {
> > -			found = true;
> > +	list_for_each_entry(owner, &priv->clients, client) {
> > +		if (owner == dev->archdata.iommu) {
> > +			if (exynos_sysmmu_disable(dev)) {
> > +				list_del_init(&owner->client);
> > +				owner->domain = NULL;
> > +			}
> >   			break;
> >   		}
> >   	}
> >
> > -	if (!found)
> > -		goto finish;
> > -
> > -	if (__exynos_sysmmu_disable(data)) {
> > -		dev_dbg(dev, "%s: Detached IOMMU with pgtable %#lx\n",
> > -					__func__, __pa(priv->pgtable));
> > -		list_del_init(&data->node);
> > -
> > -	} else {
> > -		dev_dbg(dev, "%s: Detaching IOMMU with pgtable %#lx delayed",
> > -					__func__, __pa(priv->pgtable));
> > -	}
> > -
> > -finish:
> >   	spin_unlock_irqrestore(&priv->lock, flags);
> >
> > -	if (found)
> > -		pm_runtime_put(data->sysmmu);
> > +	if (owner == dev->archdata.iommu)
> > +		dev_dbg(dev, "%s: Detached IOMMU with pgtable %#x\n",
> > +					__func__, __pa(priv->pgtable));
> > +	else
> > +		dev_dbg(dev, "%s: No IOMMU is attached\n", __func__);
> >   }
> >
> >   static unsigned long *alloc_lv2entry(unsigned long *sent, unsigned long iova,
> > @@ -862,7 +913,7 @@ static size_t exynos_iommu_unmap(struct iommu_domain *domain,
> >   					       unsigned long iova, size_t size)
> >   {
> >   	struct exynos_iommu_domain *priv = domain->priv;
> > -	struct sysmmu_drvdata *data;
> > +	struct exynos_iommu_owner *owner;
> >   	unsigned long flags;
> >   	unsigned long *ent;
> >   	size_t err_pgsize;
> > @@ -923,8 +974,8 @@ done:
> >   	spin_unlock_irqrestore(&priv->pgtablelock, flags);
> >
> >   	spin_lock_irqsave(&priv->lock, flags);
> > -	list_for_each_entry(data, &priv->clients, node)
> > -		sysmmu_tlb_invalidate_entry(data->dev, iova, size);
> > +	list_for_each_entry(owner, &priv->clients, client)
> > +		sysmmu_tlb_invalidate_entry(owner->dev, iova, size);
> >   	spin_unlock_irqrestore(&priv->lock, flags);
> >
> >   	return size;
> > @@ -1009,3 +1060,59 @@ err_reg_driver:
> >   	return ret;
> >   }
> >   subsys_initcall(exynos_iommu_init);
> > +
> > +static int sysmmu_hook_driver_register(struct notifier_block *nb,
> > +					unsigned long val,
> > +					void *p)
> > +{
> > +	struct device *dev = p;
> > +
> > +	switch (val) {
> > +	case BUS_NOTIFY_BIND_DRIVER:
> > +	{
> > +		struct exynos_iommu_owner *owner;
> 
> Please move this variable to the top of the function and drop the braces 
> around case blocks.

I don't think it is required because this function is modified
by the following patches.

> 
> > +
> > +		/* No System MMU assigned. See exynos_sysmmu_probe(). */
> > +		if (dev->archdata.iommu == NULL)
> > +			break;
> 
> This looks strange... (see below)
> 
> Also this looks racy. There are no guarantees about device probing 
> order, so you may end up with master devices being probed before the 
> IOMMUs. Deferred probing should be used to handle this correctly.

System MMU driver must be probed earlier than the drivers of master devices
because the drivers may want to use System MMU for their initial task.

> 
> > +
> > +		owner = devm_kzalloc(dev, sizeof(*owner), GFP_KERNEL);
> > +		if (!owner) {
> > +			dev_err(dev, "No Memory for exynos_iommu_owner\n");
> > +			return -ENOMEM;
> > +		}
> > +
> > +		owner->dev = dev;
> > +		INIT_LIST_HEAD(&owner->client);
> > +		owner->sysmmu = dev->archdata.iommu;
> > +
> > +		dev->archdata.iommu = owner;
> 
> I don't understand what is happening here in this case statement. There 
> is already something assigned to dev->archdata.iommu, but this code 
> reassigns something else to this field. This means that you attempt to 
> use the same field to store pointers to objects of different types, 
> which I believe is broken, because you have no way to check what kind of 
> object is currently pointed by such (void *) pointer.
> 

exynos_sysmmu_probe() assigns the device descriptor of the probing System MMU
to archdata.iommu of its master device. The assignment is just a marker that
the device is a master device of a System MMU.
If dev->archdata.iommu has a value in the case of BUS_NOTIFY_BIND_DRIVER,
the 'dev' is a master devices of a System MMU. Then sysmmu_hook_driver_register()
assigns the data structure to handle System MMU to dev->archdata.iommu.

> > +		break;
> > +	}
> > +	case BUS_NOTIFY_UNBOUND_DRIVER:
> > +	{
> > +		struct exynos_iommu_owner *owner = dev->archdata.iommu;
> > +		if (owner) {
> > +			struct device *sysmmu = owner->sysmmu;
> > +			/* if still attached to an iommu_domain. */
> > +			if (WARN_ON(!list_empty(&owner->client)))
> > +				iommu_detach_device(owner->domain, owner->dev);
> > +			devm_kfree(dev, owner);
> > +			dev->archdata.iommu = sysmmu;
> > +		}
> > +		break;
> > +	}
> > +	} /* switch (val) */
> > +
> > +	return 0;
> > +}
> > +
> > +static struct notifier_block sysmmu_notifier = {
> > +	.notifier_call = &sysmmu_hook_driver_register,
> > +};
> > +
> > +static int __init exynos_iommu_prepare(void)
> > +{
> > +	return bus_register_notifier(&platform_bus_type, &sysmmu_notifier);
> > +}
> > +arch_initcall(exynos_iommu_prepare);
> 
> I don't think it is a good idea to use such initcalls in drivers, as it 
> is not multiplatform friendly. On a multiplatform kernel with 
> exynos-iommu driver simply compiled in this notifier will register on 
> any platform, not only on Exynos.
> 
Ok.
I will change this function not to be called for the platforms
which does not have Exynos System MMU.

Thank you for your review.

KyongHo.
Tomasz Figa March 18, 2014, 3:09 p.m. UTC | #3
On 18.03.2014 10:56, Cho KyongHo wrote:
> On Fri, 14 Mar 2014 13:59:00 +0100, Tomasz Figa wrote:
>> Hi KyongHo,
>>
>> On 14.03.2014 06:08, Cho KyongHo wrote:

[snip]

>>> -static bool __exynos_sysmmu_disable(struct sysmmu_drvdata *data)
>>> +static void __sysmmu_disable_nocount(struct sysmmu_drvdata *data)
>>
>> If you are changing the names anyway, it would be probably a good idea
>> to reduce code obfuscation a bit and drop the underscores from
>> beginnings of function names. Also I'd suggest keeping the "exynos_" prefix.
>
> Thanks for the suggestion.
> __exynos_sysmmu_disable is splitted into 2 functions: __sysmmu_disable
> and __sysmmu_disable_nocount.
> I agree with you that it is good idea to reduce code obfuscation but
> I don't think dropping beginning underscores of function names reduces
> obfuscation.
>

Well, if you are ending up with a function like 
__sysmmu_enable_nocount() below with every line starting with two 
underscores, do you think this improves code readability?

Of course this is a minor issue, but let's keep some code quality level 
in Linux kernel.

>>
>>>    {
>>> -	unsigned long flags;
>>> -	bool disabled = false;
>>> -
>>> -	write_lock_irqsave(&data->lock, flags);

[snip]

Here's the function mentioned above:

>>> +
>>> +static void __sysmmu_enable_nocount(struct sysmmu_drvdata *data)
>>> +{
>>> +	__master_clk_enable(data);
>>> +	__sysmmu_clk_enable(data);
>>> +
>>> +	__raw_writel(CTRL_BLOCK, data->sfrbase + REG_MMU_CTRL);
>>> +
>>> +	__sysmmu_init_config(data);
>>> +
>>> +	__sysmmu_set_ptbase(data->sfrbase, data->pgtable);
>>> +
>>> +	__raw_writel(CTRL_ENABLE, data->sfrbase + REG_MMU_CTRL);
>>> +
>>> +	__master_clk_disable(data);
>>> +}
>>> +

[snip]

>>>
>>> @@ -629,7 +700,7 @@ err_pgtable:
>>>    static void exynos_iommu_domain_destroy(struct iommu_domain *domain)
>>>    {
>>>    	struct exynos_iommu_domain *priv = domain->priv;
>>> -	struct sysmmu_drvdata *data;
>>> +	struct exynos_iommu_owner *owner;
>>>    	unsigned long flags;
>>>    	int i;
>>>
>>> @@ -637,11 +708,14 @@ static void exynos_iommu_domain_destroy(struct iommu_domain *domain)
>>>
>>>    	spin_lock_irqsave(&priv->lock, flags);
>>>
>>> -	list_for_each_entry(data, &priv->clients, node) {
>>> -		while (!exynos_sysmmu_disable(data->dev))
>>> +	list_for_each_entry(owner, &priv->clients, client) {
>>> +		while (!exynos_sysmmu_disable(owner->dev))
>>>    			; /* until System MMU is actually disabled */
>>
>> What about using list_for_each_entry_safe() and calling list_del_init()
>> here directly?
>>
>
> That require another variable to be defined.

Is it a problem?

> I just wanted to avoid that because I think it is prettier.
> Moreover, list_del_init() below the empty while() clause may make
> the source code readers misunderstood..

This raises another question, why the loop above is even needed. 
exynos_sysmmu_disable() should make sure that SYSMMU is actually 
disabled, without any need for looping like this.

>>>    	}
>>>
>>> +	while (!list_empty(&priv->clients))
>>> +		list_del_init(priv->clients.next);
>>> +
>>>    	spin_unlock_irqrestore(&priv->lock, flags);
>>>
>>>    	for (i = 0; i < NUM_LV1ENTRIES; i++)

[snip]

>>> +static int sysmmu_hook_driver_register(struct notifier_block *nb,
>>> +					unsigned long val,
>>> +					void *p)
>>> +{
>>> +	struct device *dev = p;
>>> +
>>> +	switch (val) {
>>> +	case BUS_NOTIFY_BIND_DRIVER:
>>> +	{
>>> +		struct exynos_iommu_owner *owner;
>>
>> Please move this variable to the top of the function and drop the braces
>> around case blocks.
>
> I don't think it is required because this function is modified
> by the following patches.

OK, if so, and similar issue is not present after further patches.

>
>>
>>> +
>>> +		/* No System MMU assigned. See exynos_sysmmu_probe(). */
>>> +		if (dev->archdata.iommu == NULL)
>>> +			break;
>>
>> This looks strange... (see below)
>>
>> Also this looks racy. There are no guarantees about device probing
>> order, so you may end up with master devices being probed before the
>> IOMMUs. Deferred probing should be used to handle this correctly.
>
> System MMU driver must be probed earlier than the drivers of master devices
> because the drivers may want to use System MMU for their initial task.

As I said, there are no guarantees about platform device probe order in 
Linux kernel. Code must be designed to check whether required 
dependencies are met and if not, deferred probing must be used.

>>
>>> +
>>> +		owner = devm_kzalloc(dev, sizeof(*owner), GFP_KERNEL);
>>> +		if (!owner) {
>>> +			dev_err(dev, "No Memory for exynos_iommu_owner\n");
>>> +			return -ENOMEM;
>>> +		}
>>> +
>>> +		owner->dev = dev;
>>> +		INIT_LIST_HEAD(&owner->client);
>>> +		owner->sysmmu = dev->archdata.iommu;
>>> +
>>> +		dev->archdata.iommu = owner;
>>
>> I don't understand what is happening here in this case statement. There
>> is already something assigned to dev->archdata.iommu, but this code
>> reassigns something else to this field. This means that you attempt to
>> use the same field to store pointers to objects of different types,
>> which I believe is broken, because you have no way to check what kind of
>> object is currently pointed by such (void *) pointer.
>>
>
> exynos_sysmmu_probe() assigns the device descriptor of the probing System MMU
> to archdata.iommu of its master device. The assignment is just a marker that
> the device is a master device of a System MMU.
> If dev->archdata.iommu has a value in the case of BUS_NOTIFY_BIND_DRIVER,
> the 'dev' is a master devices of a System MMU. Then sysmmu_hook_driver_register()
> assigns the data structure to handle System MMU to dev->archdata.iommu.

Which is wrong (or even if you don't consider it wrong, it is ugly), 
because one (void *) pointer in the same object is used to store 
pointers to two completely different types.

Best regards,
Tomasz
Cho KyongHo March 19, 2014, 1:03 a.m. UTC | #4
On Tue, 18 Mar 2014 16:09:50 +0100, Tomasz Figa wrote:
> On 18.03.2014 10:56, Cho KyongHo wrote:
> > On Fri, 14 Mar 2014 13:59:00 +0100, Tomasz Figa wrote:
> >> Hi KyongHo,
> >>
> >> On 14.03.2014 06:08, Cho KyongHo wrote:
> 
> [snip]
> 
> >>> -static bool __exynos_sysmmu_disable(struct sysmmu_drvdata *data)
> >>> +static void __sysmmu_disable_nocount(struct sysmmu_drvdata *data)
> >>
> >> If you are changing the names anyway, it would be probably a good idea
> >> to reduce code obfuscation a bit and drop the underscores from
> >> beginnings of function names. Also I'd suggest keeping the "exynos_" prefix.
> >
> > Thanks for the suggestion.
> > __exynos_sysmmu_disable is splitted into 2 functions: __sysmmu_disable
> > and __sysmmu_disable_nocount.
> > I agree with you that it is good idea to reduce code obfuscation but
> > I don't think dropping beginning underscores of function names reduces
> > obfuscation.
> >
> 
> Well, if you are ending up with a function like 
> __sysmmu_enable_nocount() below with every line starting with two 
> underscores, do you think this improves code readability?
> 
> Of course this is a minor issue, but let's keep some code quality level 
> in Linux kernel.
> 

Ok. understood what your are concerning about.

> >>
> >>>    {
> >>> -	unsigned long flags;
> >>> -	bool disabled = false;
> >>> -
> >>> -	write_lock_irqsave(&data->lock, flags);
> 
> [snip]
> 
> Here's the function mentioned above:
> 
> >>> +
> >>> +static void __sysmmu_enable_nocount(struct sysmmu_drvdata *data)
> >>> +{
> >>> +	__master_clk_enable(data);
> >>> +	__sysmmu_clk_enable(data);
> >>> +
> >>> +	__raw_writel(CTRL_BLOCK, data->sfrbase + REG_MMU_CTRL);
> >>> +
> >>> +	__sysmmu_init_config(data);
> >>> +
> >>> +	__sysmmu_set_ptbase(data->sfrbase, data->pgtable);
> >>> +
> >>> +	__raw_writel(CTRL_ENABLE, data->sfrbase + REG_MMU_CTRL);
> >>> +
> >>> +	__master_clk_disable(data);
> >>> +}
> >>> +
> 
> [snip]
> 
> >>>
> >>> @@ -629,7 +700,7 @@ err_pgtable:
> >>>    static void exynos_iommu_domain_destroy(struct iommu_domain *domain)
> >>>    {
> >>>    	struct exynos_iommu_domain *priv = domain->priv;
> >>> -	struct sysmmu_drvdata *data;
> >>> +	struct exynos_iommu_owner *owner;
> >>>    	unsigned long flags;
> >>>    	int i;
> >>>
> >>> @@ -637,11 +708,14 @@ static void exynos_iommu_domain_destroy(struct iommu_domain *domain)
> >>>
> >>>    	spin_lock_irqsave(&priv->lock, flags);
> >>>
> >>> -	list_for_each_entry(data, &priv->clients, node) {
> >>> -		while (!exynos_sysmmu_disable(data->dev))
> >>> +	list_for_each_entry(owner, &priv->clients, client) {
> >>> +		while (!exynos_sysmmu_disable(owner->dev))
> >>>    			; /* until System MMU is actually disabled */
> >>
> >> What about using list_for_each_entry_safe() and calling list_del_init()
> >> here directly?
> >>
> >
> > That require another variable to be defined.
> 
> Is it a problem?
> 
That is not a problem.
But I think using list_for_each_entry() is not a problem likewise.

> > I just wanted to avoid that because I think it is prettier.
> > Moreover, list_del_init() below the empty while() clause may make
> > the source code readers misunderstood..
> 
> This raises another question, why the loop above is even needed. 
> exynos_sysmmu_disable() should make sure that SYSMMU is actually 
> disabled, without any need for looping like this.

Some driver needs enabling sysmmu to be counted due to its complex structure.
It can be also removed by the driver with an extra effort
but the reality is important.
Device driver is not only for the scholarship but also for the real use.

> >>>    	}
> >>>
> >>> +	while (!list_empty(&priv->clients))
> >>> +		list_del_init(priv->clients.next);
> >>> +
> >>>    	spin_unlock_irqrestore(&priv->lock, flags);
> >>>
> >>>    	for (i = 0; i < NUM_LV1ENTRIES; i++)
> 
> [snip]
> 
> >>> +static int sysmmu_hook_driver_register(struct notifier_block *nb,
> >>> +					unsigned long val,
> >>> +					void *p)
> >>> +{
> >>> +	struct device *dev = p;
> >>> +
> >>> +	switch (val) {
> >>> +	case BUS_NOTIFY_BIND_DRIVER:
> >>> +	{
> >>> +		struct exynos_iommu_owner *owner;
> >>
> >> Please move this variable to the top of the function and drop the braces
> >> around case blocks.
> >
> > I don't think it is required because this function is modified
> > by the following patches.
> 
> OK, if so, and similar issue is not present after further patches.
> 
> >
> >>
> >>> +
> >>> +		/* No System MMU assigned. See exynos_sysmmu_probe(). */
> >>> +		if (dev->archdata.iommu == NULL)
> >>> +			break;
> >>
> >> This looks strange... (see below)
> >>
> >> Also this looks racy. There are no guarantees about device probing
> >> order, so you may end up with master devices being probed before the
> >> IOMMUs. Deferred probing should be used to handle this correctly.
> >
> > System MMU driver must be probed earlier than the drivers of master devices
> > because the drivers may want to use System MMU for their initial task.
> 
> As I said, there are no guarantees about platform device probe order in 
> Linux kernel. Code must be designed to check whether required 
> dependencies are met and if not, deferred probing must be used.
> 

I told that System MMU driver must be probed earlier.
That's why exynos_iommu_init() is called by subsys_initcall level.
I also noticed that it must be promoted to arch_initcall for some driver.

> >>
> >>> +
> >>> +		owner = devm_kzalloc(dev, sizeof(*owner), GFP_KERNEL);
> >>> +		if (!owner) {
> >>> +			dev_err(dev, "No Memory for exynos_iommu_owner\n");
> >>> +			return -ENOMEM;
> >>> +		}
> >>> +
> >>> +		owner->dev = dev;
> >>> +		INIT_LIST_HEAD(&owner->client);
> >>> +		owner->sysmmu = dev->archdata.iommu;
> >>> +
> >>> +		dev->archdata.iommu = owner;
> >>
> >> I don't understand what is happening here in this case statement. There
> >> is already something assigned to dev->archdata.iommu, but this code
> >> reassigns something else to this field. This means that you attempt to
> >> use the same field to store pointers to objects of different types,
> >> which I believe is broken, because you have no way to check what kind of
> >> object is currently pointed by such (void *) pointer.
> >>
> >
> > exynos_sysmmu_probe() assigns the device descriptor of the probing System MMU
> > to archdata.iommu of its master device. The assignment is just a marker that
> > the device is a master device of a System MMU.
> > If dev->archdata.iommu has a value in the case of BUS_NOTIFY_BIND_DRIVER,
> > the 'dev' is a master devices of a System MMU. Then sysmmu_hook_driver_register()
> > assigns the data structure to handle System MMU to dev->archdata.iommu.
> 
> Which is wrong (or even if you don't consider it wrong, it is ugly), 
> because one (void *) pointer in the same object is used to store 
> pointers to two completely different types.

archdata.iommu has a pointer to sysmmu descriptor for a very short time
during booting time to build complete data structure to handle System MMU.
It is it also a matter?

Anyway, this style of initialization just maintained to make the driver work
with the old design. It is changed with the following patches.


Regards,

KyongHo
Tomasz Figa March 19, 2014, 1:12 p.m. UTC | #5
On 19.03.2014 02:03, Cho KyongHo wrote:
> On Tue, 18 Mar 2014 16:09:50 +0100, Tomasz Figa wrote:
>> On 18.03.2014 10:56, Cho KyongHo wrote:
>>> On Fri, 14 Mar 2014 13:59:00 +0100, Tomasz Figa wrote:
>>>> Hi KyongHo,
>>>>
>>>> On 14.03.2014 06:08, Cho KyongHo wrote:

[snip]

>>>>> @@ -637,11 +708,14 @@ static void exynos_iommu_domain_destroy(struct iommu_domain *domain)
>>>>>
>>>>>     	spin_lock_irqsave(&priv->lock, flags);
>>>>>
>>>>> -	list_for_each_entry(data, &priv->clients, node) {
>>>>> -		while (!exynos_sysmmu_disable(data->dev))
>>>>> +	list_for_each_entry(owner, &priv->clients, client) {
>>>>> +		while (!exynos_sysmmu_disable(owner->dev))
>>>>>     			; /* until System MMU is actually disabled */
>>>>
>>>> What about using list_for_each_entry_safe() and calling list_del_init()
>>>> here directly?
>>>>
>>>
>>> That require another variable to be defined.
>>
>> Is it a problem?
>>
> That is not a problem.
> But I think using list_for_each_entry() is not a problem likewise.
>
>>> I just wanted to avoid that because I think it is prettier.
>>> Moreover, list_del_init() below the empty while() clause may make
>>> the source code readers misunderstood..
>>
>> This raises another question, why the loop above is even needed.
>> exynos_sysmmu_disable() should make sure that SYSMMU is actually
>> disabled, without any need for looping like this.
>
> Some driver needs enabling sysmmu to be counted due to its complex structure.
> It can be also removed by the driver with an extra effort
> but the reality is important.

My comment was not about the need to have reference counting, as this is 
a common design pattern, but rather that there should be a low level 
power control function, which simply disables the IOMMU, without the 
need to loop through existing references.

Actually there is already such function - __sysmmu_disable_nocount() - 
so the code could look like:

	list_for_each_entry_safe(owner, n, &priv->clients, client) {
		__sysmmu_disable_nocount(owner->dev);
		list_del_init(&owner->client);
	}

As for reference counting in this use case in general, as far as I'm 
looking through the whole driver code, there is no other usage of this 
reference counting than exynos_iommu_attach_device() and 
exynos_iommu_detach_device(). Assuming that there is just a single 
master for each SYSMMU, I don't see why reference counting is even 
needed, as you can't bind a device to IOMMU multiple times.

> Device driver is not only for the scholarship but also for the real use.

Huh? I'm not sure what kind of comment is this.

>
>>>>>     	}
>>>>>
>>>>> +	while (!list_empty(&priv->clients))
>>>>> +		list_del_init(priv->clients.next);
>>>>> +
>>>>>     	spin_unlock_irqrestore(&priv->lock, flags);
>>>>>
>>>>>     	for (i = 0; i < NUM_LV1ENTRIES; i++)
>>
>> [snip]
>>
>>>>> +static int sysmmu_hook_driver_register(struct notifier_block *nb,
>>>>> +					unsigned long val,
>>>>> +					void *p)
>>>>> +{
>>>>> +	struct device *dev = p;
>>>>> +
>>>>> +	switch (val) {
>>>>> +	case BUS_NOTIFY_BIND_DRIVER:
>>>>> +	{
>>>>> +		struct exynos_iommu_owner *owner;
>>>>
>>>> Please move this variable to the top of the function and drop the braces
>>>> around case blocks.
>>>
>>> I don't think it is required because this function is modified
>>> by the following patches.
>>
>> OK, if so, and similar issue is not present after further patches.
>>
>>>
>>>>
>>>>> +
>>>>> +		/* No System MMU assigned. See exynos_sysmmu_probe(). */
>>>>> +		if (dev->archdata.iommu == NULL)
>>>>> +			break;
>>>>
>>>> This looks strange... (see below)
>>>>
>>>> Also this looks racy. There are no guarantees about device probing
>>>> order, so you may end up with master devices being probed before the
>>>> IOMMUs. Deferred probing should be used to handle this correctly.
>>>
>>> System MMU driver must be probed earlier than the drivers of master devices
>>> because the drivers may want to use System MMU for their initial task.
>>
>> As I said, there are no guarantees about platform device probe order in
>> Linux kernel. Code must be designed to check whether required
>> dependencies are met and if not, deferred probing must be used.
>>
>
> I told that System MMU driver must be probed earlier.
> That's why exynos_iommu_init() is called by subsys_initcall level.
> I also noticed that it must be promoted to arch_initcall for some driver.
>

No. Proper Linux drivers must support deferred probing mechanism and 
there should be no assumptions about probing orders. Using other 
initcall level than module_initcall for particular drivers is strongly 
discouraged.

>>>>
>>>>> +
>>>>> +		owner = devm_kzalloc(dev, sizeof(*owner), GFP_KERNEL);
>>>>> +		if (!owner) {
>>>>> +			dev_err(dev, "No Memory for exynos_iommu_owner\n");
>>>>> +			return -ENOMEM;
>>>>> +		}
>>>>> +
>>>>> +		owner->dev = dev;
>>>>> +		INIT_LIST_HEAD(&owner->client);
>>>>> +		owner->sysmmu = dev->archdata.iommu;
>>>>> +
>>>>> +		dev->archdata.iommu = owner;
>>>>
>>>> I don't understand what is happening here in this case statement. There
>>>> is already something assigned to dev->archdata.iommu, but this code
>>>> reassigns something else to this field. This means that you attempt to
>>>> use the same field to store pointers to objects of different types,
>>>> which I believe is broken, because you have no way to check what kind of
>>>> object is currently pointed by such (void *) pointer.
>>>>
>>>
>>> exynos_sysmmu_probe() assigns the device descriptor of the probing System MMU
>>> to archdata.iommu of its master device. The assignment is just a marker that
>>> the device is a master device of a System MMU.
>>> If dev->archdata.iommu has a value in the case of BUS_NOTIFY_BIND_DRIVER,
>>> the 'dev' is a master devices of a System MMU. Then sysmmu_hook_driver_register()
>>> assigns the data structure to handle System MMU to dev->archdata.iommu.
>>
>> Which is wrong (or even if you don't consider it wrong, it is ugly),
>> because one (void *) pointer in the same object is used to store
>> pointers to two completely different types.
>
> archdata.iommu has a pointer to sysmmu descriptor for a very short time
> during booting time to build complete data structure to handle System MMU.
> It is it also a matter?

Yes. It is a serious issue from code readability point of view, as you 
don't know what kind of object pointed by this pointer to expect when 
reading the driver.

Keep in mind that readability is one of the key factors affecting driver 
maintainability which is important if the driver is intended to be kept 
in mainline for many Linux kernel releases, adding support for new 
hardware variants and keeping the driver in good shape over future 
kernel changes (e.g. large refactors of upper layers, such as IOMMU 
subsystem).

>
> Anyway, this style of initialization just maintained to make the driver work
> with the old design. It is changed with the following patches.

OK. If this is just a temporary step, then I guess we can ignore this 
issue. I just noted that such usage is a problem in general.

Best regards,
Tomasz
Grant Grundler March 19, 2014, 4:54 p.m. UTC | #6
On Wed, Mar 19, 2014 at 6:12 AM, Tomasz Figa <t.figa@samsung.com> wrote:
...
>> Device driver is not only for the scholarship but also for the real use.
>
> Huh? I'm not sure what kind of comment is this.

I'm guessing Cho meant: "This isn't an academic exercise - I have a
real use case that requires reference counting."

Cho needs to be more specific about his "Some driver needs enabling
sysmmu" example. Then others would understand why/when the reference
counting is needed. Ie walk through a real driver that exists today
that depends on reference counting.

cheers,
grant
Grant Grundler March 19, 2014, 5:03 p.m. UTC | #7
On Wed, Mar 19, 2014 at 6:12 AM, Tomasz Figa <t.figa@samsung.com> wrote:
...
> No. Proper Linux drivers must support deferred probing mechanism and there
> should be no assumptions about probing orders. Using other initcall level
> than module_initcall for particular drivers is strongly discouraged.

That's true for "end-point" devices. It's not true for
"infrastructure": Memory, CPU, DMA, Interrupt handling, etc. Those
need to be in place before "normal" drivers get called. This SysMMU
driver provides DMA services for "normal" device drivers. Or do I see
that wrong?

thanks,
grant

ps. I've written IOMMU support for four different IOMMUs on three
operating systems (See drivers/parisc for two linux examples). But I
still feel like I at best have 80% understanding of how this one is
organized/works. Abstract descriptions and convoluted code have been
handicapping me (and lack of time to dig further).
Tomasz Figa March 19, 2014, 5:30 p.m. UTC | #8
Hi Grant,

On 19.03.2014 18:03, Grant Grundler wrote:
> On Wed, Mar 19, 2014 at 6:12 AM, Tomasz Figa <t.figa@samsung.com> wrote:
> ...
>> No. Proper Linux drivers must support deferred probing mechanism and there
>> should be no assumptions about probing orders. Using other initcall level
>> than module_initcall for particular drivers is strongly discouraged.
>
> That's true for "end-point" devices. It's not true for
> "infrastructure": Memory, CPU, DMA, Interrupt handling, etc. Those
> need to be in place before "normal" drivers get called. This SysMMU
> driver provides DMA services for "normal" device drivers. Or do I see
> that wrong?

Of course using an early initcall level would give you some kind of 
guarantees, but it wouldn't guarantee that someone couldn't lower 
initcall level for some MMU client driver and break the ordering anyway.

As I said, AFAIK the trend is to get rid of ordering by initcalls and 
make sure that drivers can handle missing dependencies properly, even 
for "services" such as DMA, GPIO, clocks and so on, which after all are 
provided by normal drivers like other.

>
> thanks,
> grant
>
> ps. I've written IOMMU support for four different IOMMUs on three
> operating systems (See drivers/parisc for two linux examples). But I
> still feel like I at best have 80% understanding of how this one is
> organized/works. Abstract descriptions and convoluted code have been
> handicapping me (and lack of time to dig further).

Well, this is one of my concerns with this driver. It isn't easy to read 
(and so review, maintain, extend and debug found issues).

Best regards,
Tomasz
Grant Grundler March 19, 2014, 6:37 p.m. UTC | #9
On Wed, Mar 19, 2014 at 10:30 AM, Tomasz Figa <t.figa@samsung.com> wrote:
...
> As I said, AFAIK the trend is to get rid of ordering by initcalls and make
> sure that drivers can handle missing dependencies properly, even for
> "services" such as DMA, GPIO, clocks and so on, which after all are provided
> by normal drivers like other.

Ok - I'm not following the general kernel dev trends. initcall()
levels are easy to understand and implement. So I would not be in a
hurry to replace them.

>> ps. I've written IOMMU support for four different IOMMUs on three
>> operating systems (See drivers/parisc for two linux examples). But I
>> still feel like I at best have 80% understanding of how this one is
>> organized/works. Abstract descriptions and convoluted code have been
>> handicapping me (and lack of time to dig further).
>
>
> Well, this is one of my concerns with this driver. It isn't easy to read
> (and so review, maintain, extend and debug found issues).

My postscript comment was more to explain why I'm not confident in my
opinion - not a reason to reject the patch series.  I still consider
the whole series as a step forward. But I'm not the expert here.

Right now, with ~30 patches posted by the exynos iommu (official?)
maintainer, no one else who has a clue will attempt to fix or clean up
those kinds of problems.  i.e. it's useful to enable others to fix
what are essentially unspecified "design pattern" issues.

cheers,
grant
Tomasz Figa March 19, 2014, 6:51 p.m. UTC | #10
On 19.03.2014 19:37, Grant Grundler wrote:
> On Wed, Mar 19, 2014 at 10:30 AM, Tomasz Figa <t.figa@samsung.com> wrote:
> ...
>> As I said, AFAIK the trend is to get rid of ordering by initcalls and make
>> sure that drivers can handle missing dependencies properly, even for
>> "services" such as DMA, GPIO, clocks and so on, which after all are provided
>> by normal drivers like other.
>
> Ok - I'm not following the general kernel dev trends. initcall()
> levels are easy to understand and implement. So I would not be in a
> hurry to replace them.
>

Well, initcall level is still a way to satisfy most of dependencies, 
i.e. all client devices with higher initcall levels will probe 
successfully. However the other case needs to be handled as well - in 
this case the IOMMU binding code needs to defer probe of client driver 
if respective IOMMU is not yet available.

>>> ps. I've written IOMMU support for four different IOMMUs on three
>>> operating systems (See drivers/parisc for two linux examples). But I
>>> still feel like I at best have 80% understanding of how this one is
>>> organized/works. Abstract descriptions and convoluted code have been
>>> handicapping me (and lack of time to dig further).
>>
>>
>> Well, this is one of my concerns with this driver. It isn't easy to read
>> (and so review, maintain, extend and debug found issues).
>
> My postscript comment was more to explain why I'm not confident in my
> opinion - not a reason to reject the patch series.  I still consider
> the whole series as a step forward. But I'm not the expert here.

I fully agree with you. Other than the issues mentioned in review, the 
patches are definitely a step forward. I'd even say that all the patches 
that have nothing to do with device tree could be merged in their 
current form and the code refined later. It doesn't mean that patches 
shouldn't be reviewed now and issues spotted reported, even if they 
could be fixed later - this is for the IOMMU subsystem maintainer to decide.

As for patches related to DT support, more care needs to be taken, as 
bindings should be designed with stability in mind, so the refining 
process should happen at review stage.

> Right now, with ~30 patches posted by the exynos iommu (official?)
> maintainer, no one else who has a clue will attempt to fix or clean up
> those kinds of problems.  i.e. it's useful to enable others to fix
> what are essentially unspecified "design pattern" issues.

Agreed.

Best regards,
Tomasz
Cho KyongHo March 20, 2014, 11:55 a.m. UTC | #11
On Wed, 19 Mar 2014 09:54:39 -0700, Grant Grundler wrote:
> On Wed, Mar 19, 2014 at 6:12 AM, Tomasz Figa <t.figa@samsung.com> wrote:
> ...
> >> Device driver is not only for the scholarship but also for the real use.
> >
> > Huh? I'm not sure what kind of comment is this.
> 
> I'm guessing Cho meant: "This isn't an academic exercise - I have a
> real use case that requires reference counting."

That is what I meant; Sorry for my poor English :-)

> Cho needs to be more specific about his "Some driver needs enabling
> sysmmu" example. Then others would understand why/when the reference
> counting is needed. Ie walk through a real driver that exists today
> that depends on reference counting.
>

One of my recent experience is that a display controller (FIMD) driver
of a SoC manages two different context of power management:
One is turning on and off display screen (LCD) (which is as usual as previous SoCs)
and the other is gating its internal clock including System MMU very
frequently to reduce power consumption.
Because System MMU driver holds its clock ungated while it is enabled,
FIMD driver explicitely disable System MMU.

Yes, well designed FIMD driver must care about balancing of
disabling and enabling System MMU between different contexts.
But the design of some complex driver may be poor in few features due to
agressive development schedule sometimes.

Please let me think about the counting.

Now I also think the system mmu driver does not need to make
an extra effort for some special cases.

Regards,

KyongHo
Cho KyongHo March 20, 2014, 12:07 p.m. UTC | #12
On Wed, 19 Mar 2014 19:51:21 +0100, Tomasz Figa wrote:
> On 19.03.2014 19:37, Grant Grundler wrote:
> > On Wed, Mar 19, 2014 at 10:30 AM, Tomasz Figa <t.figa@samsung.com> wrote:
> > ...
> >> As I said, AFAIK the trend is to get rid of ordering by initcalls and make
> >> sure that drivers can handle missing dependencies properly, even for
> >> "services" such as DMA, GPIO, clocks and so on, which after all are provided
> >> by normal drivers like other.
> >
> > Ok - I'm not following the general kernel dev trends. initcall()
> > levels are easy to understand and implement. So I would not be in a
> > hurry to replace them.
> >
> 
> Well, initcall level is still a way to satisfy most of dependencies, 
> i.e. all client devices with higher initcall levels will probe 
> successfully. However the other case needs to be handled as well - in 
> this case the IOMMU binding code needs to defer probe of client driver 
> if respective IOMMU is not yet available.

I now understand what is deferred probing you mentioned.
However, I worry that many existing drivers are not ready
for deferred probing.

But still I wonder if System MMU driver need to be probed in the same
initcall level.

> >>> ps. I've written IOMMU support for four different IOMMUs on three
> >>> operating systems (See drivers/parisc for two linux examples). But I
> >>> still feel like I at best have 80% understanding of how this one is
> >>> organized/works. Abstract descriptions and convoluted code have been
> >>> handicapping me (and lack of time to dig further).
> >>
> >>
> >> Well, this is one of my concerns with this driver. It isn't easy to read
> >> (and so review, maintain, extend and debug found issues).
> >
> > My postscript comment was more to explain why I'm not confident in my
> > opinion - not a reason to reject the patch series.  I still consider
> > the whole series as a step forward. But I'm not the expert here.
> 
> I fully agree with you. Other than the issues mentioned in review, the 
> patches are definitely a step forward. I'd even say that all the patches 
> that have nothing to do with device tree could be merged in their 
> current form and the code refined later. It doesn't mean that patches 
> shouldn't be reviewed now and issues spotted reported, even if they 
> could be fixed later - this is for the IOMMU subsystem maintainer to decide.
> 
> As for patches related to DT support, more care needs to be taken, as 
> bindings should be designed with stability in mind, so the refining 
> process should happen at review stage.
> 
> > Right now, with ~30 patches posted by the exynos iommu (official?)
> > maintainer, no one else who has a clue will attempt to fix or clean up
> > those kinds of problems.  i.e. it's useful to enable others to fix
> > what are essentially unspecified "design pattern" issues.
> 
> Agreed.

Let me wait for the way of binding System MMU and its master developed by Marek.

Regards,

KyongHo
diff mbox

Patch

diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 3458349..6834556 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -27,6 +27,8 @@ 
 #include <linux/memblock.h>
 #include <linux/export.h>
 #include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/notifier.h>
 
 #include <asm/cacheflush.h>
 #include <asm/pgtable.h>
@@ -111,6 +113,8 @@ 
 #define __master_clk_enable(data)	__clk_gate_ctrl(data, clk_master, en)
 #define __master_clk_disable(data)	__clk_gate_ctrl(data, clk_master, dis)
 
+#define has_sysmmu(dev)		(dev->archdata.iommu != NULL)
+
 static struct kmem_cache *lv2table_kmem_cache;
 
 static unsigned long *section_entry(unsigned long *pgtable, unsigned long iova)
@@ -159,6 +163,16 @@  static char *sysmmu_fault_name[SYSMMU_FAULTS_NUM] = {
 	"UNKNOWN FAULT"
 };
 
+/* attached to dev.archdata.iommu of the master device */
+struct exynos_iommu_owner {
+	struct list_head client; /* entry of exynos_iommu_domain.clients */
+	struct device *dev;
+	struct device *sysmmu;
+	struct iommu_domain *domain;
+	void *vmm_data;         /* IO virtual memory manager's data */
+	spinlock_t lock;        /* Lock to preserve consistency of System MMU */
+};
+
 struct exynos_iommu_domain {
 	struct list_head clients; /* list of sysmmu_drvdata.node */
 	unsigned long *pgtable; /* lv1 page table, 16KB */
@@ -168,9 +182,8 @@  struct exynos_iommu_domain {
 };
 
 struct sysmmu_drvdata {
-	struct list_head node; /* entry of exynos_iommu_domain.clients */
 	struct device *sysmmu;	/* System MMU's device descriptor */
-	struct device *dev;	/* Owner of system MMU */
+	struct device *master;	/* Owner of system MMU */
 	void __iomem *sfrbase;
 	struct clk *clk;
 	struct clk *clk_master;
@@ -239,7 +252,6 @@  static void __sysmmu_tlb_invalidate_entry(void __iomem *sfrbase,
 static void __sysmmu_set_ptbase(void __iomem *sfrbase,
 				       unsigned long pgd)
 {
-	__raw_writel(0x1, sfrbase + REG_MMU_CFG); /* 16KB LV1, LRU */
 	__raw_writel(pgd, sfrbase + REG_PT_BASE_ADDR);
 
 	__sysmmu_tlb_invalidate(sfrbase);
@@ -299,7 +311,7 @@  static irqreturn_t exynos_sysmmu_irq(int irq, void *dev_id)
 					itype, base, addr);
 		if (data->domain)
 			ret = report_iommu_fault(data->domain,
-					data->dev, addr, itype);
+					data->master, addr, itype);
 	}
 
 	/* fault is not recovered by fault handler */
@@ -316,116 +328,148 @@  static irqreturn_t exynos_sysmmu_irq(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static bool __exynos_sysmmu_disable(struct sysmmu_drvdata *data)
+static void __sysmmu_disable_nocount(struct sysmmu_drvdata *data)
 {
-	unsigned long flags;
-	bool disabled = false;
-
-	write_lock_irqsave(&data->lock, flags);
-
-	if (!set_sysmmu_inactive(data))
-		goto finish;
-
-	__master_clk_enable(data);
+	clk_enable(data->clk_master);
 
 	__raw_writel(CTRL_DISABLE, data->sfrbase + REG_MMU_CTRL);
+	__raw_writel(0, data->sfrbase + REG_MMU_CFG);
 
 	__sysmmu_clk_disable(data);
 	__master_clk_disable(data);
+}
 
-	disabled = true;
-	data->pgtable = 0;
-	data->domain = NULL;
-finish:
-	write_unlock_irqrestore(&data->lock, flags);
+static bool __sysmmu_disable(struct sysmmu_drvdata *data)
+{
+	bool disabled;
+	unsigned long flags;
+
+	write_lock_irqsave(&data->lock, flags);
+
+	disabled = set_sysmmu_inactive(data);
+
+	if (disabled) {
+		data->pgtable = 0;
+		data->domain = NULL;
+
+		__sysmmu_disable_nocount(data);
 
-	if (disabled)
 		dev_dbg(data->sysmmu, "Disabled\n");
-	else
-		dev_dbg(data->sysmmu, "%d times left to be disabled\n",
+	} else  {
+		dev_dbg(data->sysmmu, "%d times left to disable\n",
 					data->activations);
+	}
+
+	write_unlock_irqrestore(&data->lock, flags);
 
 	return disabled;
 }
 
-/* __exynos_sysmmu_enable: Enables System MMU
- *
- * returns -error if an error occurred and System MMU is not enabled,
- * 0 if the System MMU has been just enabled and 1 if System MMU was already
- * enabled before.
- */
-static int __exynos_sysmmu_enable(struct sysmmu_drvdata *data,
+static void __sysmmu_init_config(struct sysmmu_drvdata *data)
+{
+	unsigned long cfg = 0;
+
+	__raw_writel(cfg, data->sfrbase + REG_MMU_CFG);
+}
+
+static void __sysmmu_enable_nocount(struct sysmmu_drvdata *data)
+{
+	__master_clk_enable(data);
+	__sysmmu_clk_enable(data);
+
+	__raw_writel(CTRL_BLOCK, data->sfrbase + REG_MMU_CTRL);
+
+	__sysmmu_init_config(data);
+
+	__sysmmu_set_ptbase(data->sfrbase, data->pgtable);
+
+	__raw_writel(CTRL_ENABLE, data->sfrbase + REG_MMU_CTRL);
+
+	__master_clk_disable(data);
+}
+
+static int __sysmmu_enable(struct sysmmu_drvdata *data,
 			unsigned long pgtable, struct iommu_domain *domain)
 {
 	int ret = 0;
 	unsigned long flags;
 
 	write_lock_irqsave(&data->lock, flags);
+	if (set_sysmmu_active(data)) {
+		data->pgtable = pgtable;
+		data->domain = domain;
 
-	if (!set_sysmmu_active(data)) {
-		if (WARN_ON(pgtable != data->pgtable)) {
-			ret = -EBUSY;
-			set_sysmmu_inactive(data);
-		} else {
-			ret = 1;
-		}
+		__sysmmu_enable_nocount(data);
 
-		dev_dbg(data->sysmmu, "Already enabled\n");
-		goto finish;
+		dev_dbg(data->sysmmu, "Enabled\n");
+	} else {
+		ret = (pgtable == data->pgtable) ? 1 : -EBUSY;
+
+		dev_dbg(data->sysmmu, "already enabled\n");
 	}
 
-	data->pgtable = pgtable;
+	if (WARN_ON(ret < 0))
+		set_sysmmu_inactive(data); /* decrement count */
 
-	__master_clk_enable(data);
-	__sysmmu_clk_enable(data);
+	write_unlock_irqrestore(&data->lock, flags);
 
-	__sysmmu_set_ptbase(data->sfrbase, pgtable);
+	return ret;
+}
 
-	__raw_writel(CTRL_ENABLE, data->sfrbase + REG_MMU_CTRL);
+/* __exynos_sysmmu_enable: Enables System MMU
+ *
+ * returns -error if an error occurred and System MMU is not enabled,
+ * 0 if the System MMU has been just enabled and 1 if System MMU was already
+ * enabled before.
+ */
+static int __exynos_sysmmu_enable(struct device *dev, unsigned long pgtable,
+				  struct iommu_domain *domain)
+{
+	int ret = 0;
+	unsigned long flags;
+	struct exynos_iommu_owner *owner = dev->archdata.iommu;
+	struct sysmmu_drvdata *data;
 
-	__master_clk_disable(data);
+	BUG_ON(!has_sysmmu(dev));
 
-	data->domain = domain;
+	spin_lock_irqsave(&owner->lock, flags);
 
-	dev_dbg(data->sysmmu, "Enabled\n");
-finish:
-	write_unlock_irqrestore(&data->lock, flags);
+	data = dev_get_drvdata(owner->sysmmu);
+
+	ret = __sysmmu_enable(data, pgtable, domain);
+	if (ret >= 0)
+		data->master = dev;
+
+	spin_unlock_irqrestore(&owner->lock, flags);
 
 	return ret;
 }
 
 int exynos_sysmmu_enable(struct device *dev, unsigned long pgtable)
 {
-	struct sysmmu_drvdata *data = dev_get_drvdata(dev->archdata.iommu);
-	int ret;
-
 	BUG_ON(!memblock_is_memory(pgtable));
 
-	ret = pm_runtime_get_sync(data->sysmmu);
-	if (ret < 0) {
-		dev_dbg(data->sysmmu, "Failed to enable\n");
-		return ret;
-	}
-
-	ret = __exynos_sysmmu_enable(data, pgtable, NULL);
-	if (WARN_ON(ret < 0)) {
-		pm_runtime_put(data->sysmmu);
-		dev_err(data->sysmmu, "Already enabled with page table %#lx\n",
-			data->pgtable);
-	} else {
-		data->dev = dev;
-	}
-
-	return ret;
+	return __exynos_sysmmu_enable(dev, pgtable, NULL);
 }
 
 static bool exynos_sysmmu_disable(struct device *dev)
 {
-	struct sysmmu_drvdata *data = dev_get_drvdata(dev->archdata.iommu);
-	bool disabled;
+	unsigned long flags;
+	bool disabled = true;
+	struct exynos_iommu_owner *owner = dev->archdata.iommu;
+	struct sysmmu_drvdata *data;
+
+	BUG_ON(!has_sysmmu(dev));
 
-	disabled = __exynos_sysmmu_disable(data);
-	pm_runtime_put(data->sysmmu);
+	spin_lock_irqsave(&owner->lock, flags);
+
+	data = dev_get_drvdata(owner->sysmmu);
+
+	disabled = __sysmmu_disable(data);
+	if (disabled)
+		data->master = NULL;
+
+	spin_unlock_irqrestore(&owner->lock, flags);
 
 	return disabled;
 }
@@ -433,11 +477,13 @@  static bool exynos_sysmmu_disable(struct device *dev)
 static void sysmmu_tlb_invalidate_entry(struct device *dev, unsigned long iova,
 					size_t size)
 {
+	struct exynos_iommu_owner *owner = dev->archdata.iommu;
 	unsigned long flags;
-	struct sysmmu_drvdata *data = dev_get_drvdata(dev->archdata.iommu);
+	struct sysmmu_drvdata *data;
 
-	read_lock_irqsave(&data->lock, flags);
+	data = dev_get_drvdata(owner->sysmmu);
 
+	read_lock_irqsave(&data->lock, flags);
 	if (is_sysmmu_active(data)) {
 		unsigned int maj;
 		unsigned int num_inv = 1;
@@ -465,19 +511,21 @@  static void sysmmu_tlb_invalidate_entry(struct device *dev, unsigned long iova,
 		}
 		__master_clk_disable(data);
 	} else {
-		dev_dbg(data->sysmmu, "Disabled. Skipping invalidating TLB.\n");
+		dev_dbg(dev, "disabled. Skipping TLB invalidation @ %#lx\n",
+			iova);
 	}
-
 	read_unlock_irqrestore(&data->lock, flags);
 }
 
 void exynos_sysmmu_tlb_invalidate(struct device *dev)
 {
+	struct exynos_iommu_owner *owner = dev->archdata.iommu;
 	unsigned long flags;
-	struct sysmmu_drvdata *data = dev_get_drvdata(dev->archdata.iommu);
+	struct sysmmu_drvdata *data;
 
-	read_lock_irqsave(&data->lock, flags);
+	data = dev_get_drvdata(owner->sysmmu);
 
+	read_lock_irqsave(&data->lock, flags);
 	if (is_sysmmu_active(data)) {
 		__master_clk_enable(data);
 		if (sysmmu_block(data->sfrbase)) {
@@ -486,9 +534,8 @@  void exynos_sysmmu_tlb_invalidate(struct device *dev)
 		}
 		__master_clk_disable(data);
 	} else {
-		dev_dbg(data->sysmmu, "Disabled. Skipping invalidating TLB.\n");
+		dev_dbg(dev, "disabled. Skipping TLB invalidation\n");
 	}
-
 	read_unlock_irqrestore(&data->lock, flags);
 }
 
@@ -498,6 +545,7 @@  static int __init exynos_sysmmu_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct sysmmu_drvdata *data;
 	struct resource *res;
+	struct device_node *node;
 
 	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
 	if (!data)
@@ -549,9 +597,32 @@  static int __init exynos_sysmmu_probe(struct platform_device *pdev)
 		return ret;
 	}
 
+	/* Relation between master and System MMU is 1:1. */
+	node = of_parse_phandle(dev->of_node, "mmu-masters", 0);
+	if (node) {
+		struct platform_device *master = of_find_device_by_node(node);
+
+		if (!master) {
+			dev_err(dev, "%s: mmu-master '%s' not found\n",
+				__func__, node->name);
+			return -EINVAL;
+		}
+
+		if (master->dev.archdata.iommu != NULL) {
+			dev_err(dev, "%s: '%s' is master of other MMU\n",
+				__func__, node->name);
+			return -EINVAL;
+		}
+
+		/*
+		 * archdata.iommu will be initialized with exynos_iommu_client
+		 * in sysmmu_hook_driver_register().
+		 */
+		master->dev.archdata.iommu = dev;
+	}
+
 	data->sysmmu = dev;
 	rwlock_init(&data->lock);
-	INIT_LIST_HEAD(&data->node);
 
 	platform_set_drvdata(pdev, data);
 
@@ -629,7 +700,7 @@  err_pgtable:
 static void exynos_iommu_domain_destroy(struct iommu_domain *domain)
 {
 	struct exynos_iommu_domain *priv = domain->priv;
-	struct sysmmu_drvdata *data;
+	struct exynos_iommu_owner *owner;
 	unsigned long flags;
 	int i;
 
@@ -637,11 +708,14 @@  static void exynos_iommu_domain_destroy(struct iommu_domain *domain)
 
 	spin_lock_irqsave(&priv->lock, flags);
 
-	list_for_each_entry(data, &priv->clients, node) {
-		while (!exynos_sysmmu_disable(data->dev))
+	list_for_each_entry(owner, &priv->clients, client) {
+		while (!exynos_sysmmu_disable(owner->dev))
 			; /* until System MMU is actually disabled */
 	}
 
+	while (!list_empty(&priv->clients))
+		list_del_init(priv->clients.next);
+
 	spin_unlock_irqrestore(&priv->lock, flags);
 
 	for (i = 0; i < NUM_LV1ENTRIES; i++)
@@ -658,41 +732,28 @@  static void exynos_iommu_domain_destroy(struct iommu_domain *domain)
 static int exynos_iommu_attach_device(struct iommu_domain *domain,
 				   struct device *dev)
 {
-	struct sysmmu_drvdata *data = dev_get_drvdata(dev->archdata.iommu);
+	struct exynos_iommu_owner *owner = dev->archdata.iommu;
 	struct exynos_iommu_domain *priv = domain->priv;
 	unsigned long flags;
 	int ret;
 
-	ret = pm_runtime_get_sync(data->sysmmu);
-	if (ret < 0)
-		return ret;
-
-	ret = 0;
-
 	spin_lock_irqsave(&priv->lock, flags);
 
-	ret = __exynos_sysmmu_enable(data, __pa(priv->pgtable), domain);
-
+	ret = __exynos_sysmmu_enable(dev, __pa(priv->pgtable), domain);
 	if (ret == 0) {
-		/* 'data->node' must not be appeared in priv->clients */
-		BUG_ON(!list_empty(&data->node));
-		data->dev = dev;
-		list_add_tail(&data->node, &priv->clients);
+		list_add_tail(&owner->client, &priv->clients);
+		owner->domain = domain;
 	}
 
 	spin_unlock_irqrestore(&priv->lock, flags);
 
-	if (ret < 0) {
-		dev_err(dev, "%s: Failed to attach IOMMU with pgtable %#lx\n",
+	if (ret < 0)
+		dev_err(dev, "%s: Failed to attach IOMMU with pgtable %#x\n",
 				__func__, __pa(priv->pgtable));
-		pm_runtime_put(data->sysmmu);
-	} else if (ret > 0) {
-		dev_dbg(dev, "%s: IOMMU with pgtable 0x%lx already attached\n",
-					__func__, __pa(priv->pgtable));
-	} else {
-		dev_dbg(dev, "%s: Attached new IOMMU with pgtable 0x%lx\n",
-					__func__, __pa(priv->pgtable));
-	}
+	else
+		dev_dbg(dev, "%s: Attached IOMMU with pgtable 0x%x%s\n",
+					__func__, __pa(priv->pgtable),
+					(ret == 0) ? "" : ", again");
 
 	return ret;
 }
@@ -700,39 +761,29 @@  static int exynos_iommu_attach_device(struct iommu_domain *domain,
 static void exynos_iommu_detach_device(struct iommu_domain *domain,
 				    struct device *dev)
 {
-	struct sysmmu_drvdata *data = dev_get_drvdata(dev->archdata.iommu);
+	struct exynos_iommu_owner *owner;
 	struct exynos_iommu_domain *priv = domain->priv;
-	struct list_head *pos;
 	unsigned long flags;
-	bool found = false;
 
 	spin_lock_irqsave(&priv->lock, flags);
 
-	list_for_each(pos, &priv->clients) {
-		if (list_entry(pos, struct sysmmu_drvdata, node) == data) {
-			found = true;
+	list_for_each_entry(owner, &priv->clients, client) {
+		if (owner == dev->archdata.iommu) {
+			if (exynos_sysmmu_disable(dev)) {
+				list_del_init(&owner->client);
+				owner->domain = NULL;
+			}
 			break;
 		}
 	}
 
-	if (!found)
-		goto finish;
-
-	if (__exynos_sysmmu_disable(data)) {
-		dev_dbg(dev, "%s: Detached IOMMU with pgtable %#lx\n",
-					__func__, __pa(priv->pgtable));
-		list_del_init(&data->node);
-
-	} else {
-		dev_dbg(dev, "%s: Detaching IOMMU with pgtable %#lx delayed",
-					__func__, __pa(priv->pgtable));
-	}
-
-finish:
 	spin_unlock_irqrestore(&priv->lock, flags);
 
-	if (found)
-		pm_runtime_put(data->sysmmu);
+	if (owner == dev->archdata.iommu)
+		dev_dbg(dev, "%s: Detached IOMMU with pgtable %#x\n",
+					__func__, __pa(priv->pgtable));
+	else
+		dev_dbg(dev, "%s: No IOMMU is attached\n", __func__);
 }
 
 static unsigned long *alloc_lv2entry(unsigned long *sent, unsigned long iova,
@@ -862,7 +913,7 @@  static size_t exynos_iommu_unmap(struct iommu_domain *domain,
 					       unsigned long iova, size_t size)
 {
 	struct exynos_iommu_domain *priv = domain->priv;
-	struct sysmmu_drvdata *data;
+	struct exynos_iommu_owner *owner;
 	unsigned long flags;
 	unsigned long *ent;
 	size_t err_pgsize;
@@ -923,8 +974,8 @@  done:
 	spin_unlock_irqrestore(&priv->pgtablelock, flags);
 
 	spin_lock_irqsave(&priv->lock, flags);
-	list_for_each_entry(data, &priv->clients, node)
-		sysmmu_tlb_invalidate_entry(data->dev, iova, size);
+	list_for_each_entry(owner, &priv->clients, client)
+		sysmmu_tlb_invalidate_entry(owner->dev, iova, size);
 	spin_unlock_irqrestore(&priv->lock, flags);
 
 	return size;
@@ -1009,3 +1060,59 @@  err_reg_driver:
 	return ret;
 }
 subsys_initcall(exynos_iommu_init);
+
+static int sysmmu_hook_driver_register(struct notifier_block *nb,
+					unsigned long val,
+					void *p)
+{
+	struct device *dev = p;
+
+	switch (val) {
+	case BUS_NOTIFY_BIND_DRIVER:
+	{
+		struct exynos_iommu_owner *owner;
+
+		/* No System MMU assigned. See exynos_sysmmu_probe(). */
+		if (dev->archdata.iommu == NULL)
+			break;
+
+		owner = devm_kzalloc(dev, sizeof(*owner), GFP_KERNEL);
+		if (!owner) {
+			dev_err(dev, "No Memory for exynos_iommu_owner\n");
+			return -ENOMEM;
+		}
+
+		owner->dev = dev;
+		INIT_LIST_HEAD(&owner->client);
+		owner->sysmmu = dev->archdata.iommu;
+
+		dev->archdata.iommu = owner;
+		break;
+	}
+	case BUS_NOTIFY_UNBOUND_DRIVER:
+	{
+		struct exynos_iommu_owner *owner = dev->archdata.iommu;
+		if (owner) {
+			struct device *sysmmu = owner->sysmmu;
+			/* if still attached to an iommu_domain. */
+			if (WARN_ON(!list_empty(&owner->client)))
+				iommu_detach_device(owner->domain, owner->dev);
+			devm_kfree(dev, owner);
+			dev->archdata.iommu = sysmmu;
+		}
+		break;
+	}
+	} /* switch (val) */
+
+	return 0;
+}
+
+static struct notifier_block sysmmu_notifier = {
+	.notifier_call = &sysmmu_hook_driver_register,
+};
+
+static int __init exynos_iommu_prepare(void)
+{
+	return bus_register_notifier(&platform_bus_type, &sysmmu_notifier);
+}
+arch_initcall(exynos_iommu_prepare);