diff mbox

[RFC,v2,7/7] xen/iommu: smmu-v3: Add Xen specific code to enable the ported driver

Message ID 1505954230-18892-8-git-send-email-sgoel@codeaurora.org (mailing list archive)
State New, archived
Headers show

Commit Message

Goel, Sameer Sept. 21, 2017, 12:37 a.m. UTC
This driver follows an approach similar to smmu driver. The intent here
is to reuse as much Linux code as possible.
- Glue code has been introduced to bridge the API calls.
- Called Linux functions from the Xen IOMMU function calls.
- Xen modifications are preceded by /*Xen: comment */

Signed-off-by: Sameer Goel <sgoel@codeaurora.org>
---
 xen/drivers/passthrough/arm/Makefile  |   1 +
 xen/drivers/passthrough/arm/smmu-v3.c | 853 +++++++++++++++++++++++++++++-----
 2 files changed, 738 insertions(+), 116 deletions(-)

Comments

Goel, Sameer Sept. 26, 2017, 12:03 a.m. UTC | #1
On 9/20/2017 6:37 PM, Sameer Goel wrote:
> This driver follows an approach similar to smmu driver. The intent here
> is to reuse as much Linux code as possible.
> - Glue code has been introduced to bridge the API calls.
> - Called Linux functions from the Xen IOMMU function calls.
> - Xen modifications are preceded by /*Xen: comment */
> 
> Signed-off-by: Sameer Goel <sgoel@codeaurora.org>
> ---
>  xen/drivers/passthrough/arm/Makefile  |   1 +
>  xen/drivers/passthrough/arm/smmu-v3.c | 853 +++++++++++++++++++++++++++++-----
>  2 files changed, 738 insertions(+), 116 deletions(-)
> 
> diff --git a/xen/drivers/passthrough/arm/Makefile b/xen/drivers/passthrough/arm/Makefile
> index f4cd26e..57a6da6 100644
> --- a/xen/drivers/passthrough/arm/Makefile
> +++ b/xen/drivers/passthrough/arm/Makefile
> @@ -1,2 +1,3 @@
>  obj-y += iommu.o
>  obj-y += smmu.o
> +obj-y += smmu-v3.o
> diff --git a/xen/drivers/passthrough/arm/smmu-v3.c b/xen/drivers/passthrough/arm/smmu-v3.c
> index 380969a..8f3b43d 100644
> --- a/xen/drivers/passthrough/arm/smmu-v3.c
> +++ b/xen/drivers/passthrough/arm/smmu-v3.c
> @@ -18,28 +18,266 @@
>   * Author: Will Deacon <will.deacon@arm.com>
>   *
>   * This driver is powered by bad coffee and bombay mix.
> + *
> + *
> + * Based on Linux drivers/iommu/arm-smmu-v3.c
> + * => commit bdf95923086fb359ccb44c815724c3ace1611c90
> + *
> + * Xen modifications:
> + * Sameer Goel <sgoel@codeaurora.org>
> + * Copyright (C) 2017, The Linux Foundation, All rights reserved.
> + *
>   */
>  
> -#include <linux/acpi.h>
> -#include <linux/acpi_iort.h>
> -#include <linux/delay.h>
> -#include <linux/dma-iommu.h>
> -#include <linux/err.h>
> -#include <linux/interrupt.h>
> -#include <linux/iommu.h>
> -#include <linux/iopoll.h>
> -#include <linux/module.h>
> -#include <linux/msi.h>
> -#include <linux/of.h>
> -#include <linux/of_address.h>
> -#include <linux/of_iommu.h>
> -#include <linux/of_platform.h>
> -#include <linux/pci.h>
> -#include <linux/platform_device.h>
> -
> -#include <linux/amba/bus.h>
> -
> -#include "io-pgtable.h"
> +#include <xen/config.h>
> +#include <xen/delay.h>
> +#include <xen/errno.h>
> +#include <xen/err.h>
> +#include <xen/irq.h>
> +#include <xen/lib.h>
> +#include <xen/list.h>
> +#include <xen/mm.h>
> +#include <xen/vmap.h>
> +#include <xen/rbtree.h>
> +#include <xen/sched.h>
> +#include <xen/sizes.h>
> +#include <asm/atomic.h>
> +#include <asm/device.h>
> +#include <asm/io.h>
> +#include <asm/platform.h>
> +#include <xen/acpi.h>
> +
> +typedef paddr_t phys_addr_t;
> +typedef paddr_t dma_addr_t;
> +
> +/* Alias to Xen device tree helpers */
> +#define device_node dt_device_node
> +#define of_phandle_args dt_phandle_args
> +#define of_device_id dt_device_match
> +#define of_match_node dt_match_node
> +#define of_property_read_u32(np, pname, out) (!dt_property_read_u32(np, pname, out))
> +#define of_property_read_bool dt_property_read_bool
> +#define of_parse_phandle_with_args dt_parse_phandle_with_args
> +#define mutex spinlock_t
> +#define mutex_init spin_lock_init
> +#define mutex_lock spin_lock
> +#define mutex_unlock spin_unlock
> +
> +/* Xen: Helpers to get device MMIO and IRQs */
> +struct resource {
> +	u64 addr;
> +	u64 size;
> +	unsigned int type;
> +};
> +
> +#define resource_size(res) ((res)->size)
> +
> +#define platform_device device
> +
> +#define IORESOURCE_MEM 0
> +#define IORESOURCE_IRQ 1
> +
> +static struct resource *platform_get_resource(struct platform_device *pdev,
> +					      unsigned int type,
> +					      unsigned int num)
> +{
> +	/*
> +	 * The resource is only used between 2 calls of platform_get_resource.
> +	 * It's quite ugly but it's avoid to add too much code in the part
> +	 * imported from Linux
> +	 */
> +	static struct resource res;
> +	struct acpi_iort_node *iort_node;
> +	struct acpi_iort_smmu_v3 *node_smmu_data;
> +	int ret = 0;
> +
> +	res.type = type;
> +
> +	switch (type) {
> +	case IORESOURCE_MEM:
> +		if (pdev->type == DEV_ACPI) {
> +			ret = 1;
> +			iort_node = pdev->acpi_node;
> +			node_smmu_data =
> +				(struct acpi_iort_smmu_v3 *)iort_node->node_data;
> +
> +			if (node_smmu_data != NULL) {
> +				res.addr = node_smmu_data->base_address;
> +				res.size = SZ_128K;
> +				ret = 0;
> +			}
> +		} else {
> +			ret = dt_device_get_address(dev_to_dt(pdev), num,
> +						    &res.addr, &res.size);
> +		}
> +
> +		return ((ret) ? NULL : &res);
> +
> +	case IORESOURCE_IRQ:
> +		ret = platform_get_irq(dev_to_dt(pdev), num);
> +
> +		if (ret < 0)
> +			return NULL;
> +
> +		res.addr = ret;
> +		res.size = 1;
> +
> +		return &res;
> +
> +	default:
> +		return NULL;
> +	}
> +}
> +
> +static int platform_get_irq_byname(struct platform_device *pdev, const char *name)
> +{
> +	const struct dt_property *dtprop;
> +	struct acpi_iort_node *iort_node;
> +	struct acpi_iort_smmu_v3 *node_smmu_data;
> +	int ret = 0;
> +
> +	if (pdev->type == DEV_ACPI) {
> +		iort_node = pdev->acpi_node;
> +		node_smmu_data = (struct acpi_iort_smmu_v3 *)iort_node->node_data;
> +
> +		if (node_smmu_data != NULL) {
> +			if (!strcmp(name, "eventq"))
> +				ret = node_smmu_data->event_gsiv;
> +			else if (!strcmp(name, "priq"))
> +				ret = node_smmu_data->pri_gsiv;
> +			else if (!strcmp(name, "cmdq-sync"))
> +				ret = node_smmu_data->sync_gsiv;
> +			else if (!strcmp(name, "gerror"))
> +				ret = node_smmu_data->gerr_gsiv;
> +			else
> +				ret = -EINVAL;
> +		}
> +	} else {
> +		dtprop = dt_find_property(dev_to_dt(pdev), "interrupt-names", NULL);
> +		if (!dtprop)
> +			return -EINVAL;
> +
> +		if (!dtprop->value)
> +			return -ENODATA;
> +	}
> +
> +	return ret;
> +}
> +
> +#define readx_poll_timeout(op, addr, val, cond, sleep_us, timeout_us) \
> +({ \
> +	s_time_t deadline = NOW() + MICROSECS(timeout_us); \
> +	for (;;) { \
> +		(val) = op(addr); \
> +		if (cond) \
> +			break; \
> +		if (NOW() > deadline) { \
> +			(val) = op(addr); \
> +			break; \
> +		} \
> +		cpu_relax(); \
> +		udelay(sleep_us); \
> +	} \
> +	(cond) ? 0 : -ETIMEDOUT; \
> +})
> +
> +#define readl_relaxed_poll_timeout(addr, val, cond, delay_us, timeout_us) \
> +	readx_poll_timeout(readl_relaxed, addr, val, cond, delay_us, timeout_us)
> +
> +/* Xen: Helpers for IRQ functions */
> +#define request_irq(irq, func, flags, name, dev) request_irq(irq, flags, func, name, dev)
> +#define free_irq release_irq
> +
> +enum irqreturn {
> +	IRQ_NONE	= (0 << 0),
> +	IRQ_HANDLED	= (1 << 0),
> +};
> +
> +typedef enum irqreturn irqreturn_t;
> +
> +/* Device logger functions */
> +#define dev_print(dev, lvl, fmt, ...)						\
> +	 printk(lvl "smmu: " fmt, ## __VA_ARGS__)
> +
> +#define dev_dbg(dev, fmt, ...) dev_print(dev, XENLOG_DEBUG, fmt, ## __VA_ARGS__)
> +#define dev_notice(dev, fmt, ...) dev_print(dev, XENLOG_INFO, fmt, ## __VA_ARGS__)
> +#define dev_warn(dev, fmt, ...) dev_print(dev, XENLOG_WARNING, fmt, ## __VA_ARGS__)
> +#define dev_err(dev, fmt, ...) dev_print(dev, XENLOG_ERR, fmt, ## __VA_ARGS__)
> +#define dev_info(dev, fmt, ...) dev_print(dev, XENLOG_INFO, fmt, ## __VA_ARGS__)
> +
> +#define dev_err_ratelimited(dev, fmt, ...)					\
> +	 dev_print(dev, XENLOG_ERR, fmt, ## __VA_ARGS__)
> +
> +#define dev_name(dev) dt_node_full_name(dev_to_dt(dev))
> +
> +/* Alias to Xen allocation helpers */
> +#define kfree xfree
> +#define kmalloc(size, flags)		_xmalloc(size, sizeof(void *))
> +#define kzalloc(size, flags)		_xzalloc(size, sizeof(void *))
> +#define devm_kzalloc(dev, size, flags)	_xzalloc(size, sizeof(void *))
> +#define kmalloc_array(size, n, flags)	_xmalloc_array(size, sizeof(void *), n)
> +
> +/* Compatibility defines */
> +#undef WARN_ON
> +#define WARN_ON(cond) (!!cond)
> +#define WARN_ON_ONCE(cond) WARN_ON(cond)
> +
> +static void __iomem *devm_ioremap_resource(struct device *dev,
> +					   struct resource *res)
> +{
> +	void __iomem *ptr;
> +
> +	if (!res || res->type != IORESOURCE_MEM) {
> +		dev_err(dev, "Invalid resource\n");
> +		return ERR_PTR(-EINVAL);
> +	}
> +
> +	ptr = ioremap_nocache(res->addr, res->size);
> +	if (!ptr) {
> +		dev_err(dev,
> +			"ioremap failed (addr 0x%"PRIx64" size 0x%"PRIx64")\n",
> +			res->addr, res->size);
> +		return ERR_PTR(-ENOMEM);
> +	}
> +
> +	return ptr;
> +}
> +
> +/* Xen: Dummy iommu_domain */
> +struct iommu_domain {
> +	/* Runtime SMMU configuration for this iommu_domain */
> +	struct arm_smmu_domain		*priv;
> +	unsigned int			type;
> +
> +	atomic_t ref;
> +	/* Used to link iommu_domain contexts for a same domain.
> +	 * There is at least one per-SMMU to used by the domain.
> +	 */
> +	struct list_head		list;
> +};
> +/* Xen: Domain type definitions. Not really needed for Xen, defining to port
> + * Linux code as-is
> + */
> +#define IOMMU_DOMAIN_UNMANAGED 0
> +#define IOMMU_DOMAIN_DMA 1
> +#define IOMMU_DOMAIN_IDENTITY 2
> +
> +/* Xen: Describes information required for a Xen domain */
> +struct arm_smmu_xen_domain {
> +	spinlock_t			lock;
> +	/* List of iommu domains associated to this domain */
> +	struct list_head		iommu_domains;
> +};
> +
> +/*
> + * Xen: Information about each device stored in dev->archdata.iommu
> + *
> + * The dev->archdata.iommu stores the iommu_domain (runtime configuration of
> + * the SMMU).
> + */
> +struct arm_smmu_xen_device {
> +	struct iommu_domain *domain;
> +};
>  
>  /* MMIO registers */
>  #define ARM_SMMU_IDR0			0x0
> @@ -412,10 +650,12 @@
>  #define MSI_IOVA_BASE			0x8000000
>  #define MSI_IOVA_LENGTH			0x100000
>  
> +#if 0 /* Not applicable for Xen */
>  static bool disable_bypass;
>  module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
>  MODULE_PARM_DESC(disable_bypass,
>  	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
> +#endif
>  
>  enum pri_resp {
>  	PRI_RESP_DENY,
> @@ -423,6 +663,7 @@ enum pri_resp {
>  	PRI_RESP_SUCC,
>  };
>  
> +#if 0 /* Xen: No MSI support in this iteration */
>  enum arm_smmu_msi_index {
>  	EVTQ_MSI_INDEX,
>  	GERROR_MSI_INDEX,
> @@ -447,6 +688,7 @@ static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
>  		ARM_SMMU_PRIQ_IRQ_CFG2,
>  	},
>  };
> +#endif
>  
>  struct arm_smmu_cmdq_ent {
>  	/* Common fields */
> @@ -551,6 +793,8 @@ struct arm_smmu_s2_cfg {
>  	u16				vmid;
>  	u64				vttbr;
>  	u64				vtcr;
> +	/* Xen: Domain associated to this configuration */
> +	struct domain			*domain;
>  };
>  
>  struct arm_smmu_strtab_ent {
> @@ -623,9 +867,20 @@ struct arm_smmu_device {
>  	struct arm_smmu_strtab_cfg	strtab_cfg;
>  
>  	/* IOMMU core code handle */
> -	struct iommu_device		iommu;
> +	//struct iommu_device		iommu;
> +
> +	/* Xen: Need to keep a list of SMMU devices */
> +	struct list_head                devices;
>  };
>  
> +/* Xen: Keep a list of devices associated with this driver */
> +static DEFINE_SPINLOCK(arm_smmu_devices_lock);
> +static LIST_HEAD(arm_smmu_devices);
> +/* Xen: Helper for finding a device using fwnode */
> +static
> +struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode);
> +
> +
>  /* SMMU private data for each master */
>  struct arm_smmu_master_data {
>  	struct arm_smmu_device		*smmu;
> @@ -642,7 +897,7 @@ enum arm_smmu_domain_stage {
>  
>  struct arm_smmu_domain {
>  	struct arm_smmu_device		*smmu;
> -	struct mutex			init_mutex; /* Protects smmu pointer */
> +	mutex			init_mutex; /* Protects smmu pointer */
>  
>  	struct io_pgtable_ops		*pgtbl_ops;
>  	spinlock_t			pgtbl_lock;
> @@ -737,15 +992,16 @@ static void queue_inc_prod(struct arm_smmu_queue *q)
>   */
>  static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe)
>  {
> -	ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
> +	s_time_t deadline = NOW() + MICROSECS(ARM_SMMU_POLL_TIMEOUT_US);
>  
>  	while (queue_sync_cons(q), (drain ? !queue_empty(q) : queue_full(q))) {
> -		if (ktime_compare(ktime_get(), timeout) > 0)
> +
> +		if (NOW() > deadline)
>  			return -ETIMEDOUT;
>  
> -		if (wfe) {
> +		if (wfe)
>  			wfe();
> -		} else {
> +		else {
>  			cpu_relax();
>  			udelay(1);
>  		}
> @@ -931,7 +1187,7 @@ static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
>  		dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
>  	spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
>  }
> -
> +#if 0
>  /* Context descriptor manipulation functions */
>  static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)
>  {
> @@ -974,7 +1230,7 @@ static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
>  
>  	cfg->cdptr[3] = cpu_to_le64(cfg->cd.mair << CTXDESC_CD_3_MAIR_SHIFT);
>  }
> -
> +#endif
>  /* Stream table manipulation functions */
>  static void
>  arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
> @@ -1044,7 +1300,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
>  			ste_live = true;
>  			break;
>  		case STRTAB_STE_0_CFG_ABORT:
> -			if (disable_bypass)
> +			//No bypass override for Xen
>  				break;
>  		default:
>  			BUG(); /* STE corruption */
> @@ -1056,7 +1312,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
>  
>  	/* Bypass/fault */
>  	if (!ste->assigned || !(ste->s1_cfg || ste->s2_cfg)) {
> -		if (!ste->assigned && disable_bypass)
> +		if (!ste->assigned)
>  			val |= STRTAB_STE_0_CFG_ABORT;
>  		else
>  			val |= STRTAB_STE_0_CFG_BYPASS;
> @@ -1135,16 +1391,20 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
>  	void *strtab;
>  	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
>  	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
> +	u32 alignment = 0;
>  
>  	if (desc->l2ptr)
>  		return 0;
>  
> -	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
> +	size = 1 << (STRTAB_SPLIT + LOG_2(STRTAB_STE_DWORDS) + 3);
>  	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
>  
>  	desc->span = STRTAB_SPLIT + 1;
> -	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
> -					  GFP_KERNEL | __GFP_ZERO);
> +
> +	alignment = 1 << ((5 + (desc->span - 1)));
> +	desc->l2ptr = _xzalloc(size, alignment);
> +	desc->l2ptr_dma = virt_to_maddr(desc->l2ptr);
> +
>  	if (!desc->l2ptr) {
>  		dev_err(smmu->dev,
>  			"failed to allocate l2 stream table for SID %u\n",
> @@ -1158,7 +1418,7 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
>  }
>  
>  /* IRQ and event handlers */
> -static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
> +static void arm_smmu_evtq_thread(int irq, void *dev, struct cpu_user_regs *regs)
>  {
>  	int i;
>  	struct arm_smmu_device *smmu = dev;
> @@ -1186,7 +1446,6 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
>  
>  	/* Sync our overflow flag, as we believe we're up to speed */
>  	q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
> -	return IRQ_HANDLED;
>  }
>  
>  static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
> @@ -1203,7 +1462,7 @@ static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
>  
>  	dev_info(smmu->dev, "unexpected PRI request received:\n");
>  	dev_info(smmu->dev,
> -		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
> +		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016lx\n",
>  		 sid, ssid, grpid, last ? "L" : "",
>  		 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
>  		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
> @@ -1227,7 +1486,7 @@ static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
>  	}
>  }
>  
> -static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
> +static void arm_smmu_priq_thread(int irq, void *dev, struct cpu_user_regs *regs)
>  {
>  	struct arm_smmu_device *smmu = dev;
>  	struct arm_smmu_queue *q = &smmu->priq.q;
> @@ -1243,18 +1502,16 @@ static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
>  
>  	/* Sync our overflow flag, as we believe we're up to speed */
>  	q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
> -	return IRQ_HANDLED;
>  }
>  
> -static irqreturn_t arm_smmu_cmdq_sync_handler(int irq, void *dev)
> +static void arm_smmu_cmdq_sync_handler(int irq, void *dev, struct cpu_user_regs *regs)
>  {
>  	/* We don't actually use CMD_SYNC interrupts for anything */
> -	return IRQ_HANDLED;
>  }
>  
>  static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
>  
> -static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
> +static void arm_smmu_gerror_handler(int irq, void *dev, struct cpu_user_regs *regs)
>  {
>  	u32 gerror, gerrorn, active;
>  	struct arm_smmu_device *smmu = dev;
> @@ -1264,7 +1521,7 @@ static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
>  
>  	active = gerror ^ gerrorn;
>  	if (!(active & GERROR_ERR_MASK))
> -		return IRQ_NONE; /* No errors pending */
> +		return; /* No errors pending */
>  
>  	dev_warn(smmu->dev,
>  		 "unexpected global error reported (0x%08x), this could be serious\n",
> @@ -1286,7 +1543,7 @@ static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
>  
>  	if (active & GERROR_MSI_CMDQ_ABT_ERR) {
>  		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
> -		arm_smmu_cmdq_sync_handler(irq, smmu->dev);
> +		arm_smmu_cmdq_sync_handler(irq, smmu->dev, NULL);
>  	}
>  
>  	if (active & GERROR_PRIQ_ABT_ERR)
> @@ -1299,7 +1556,6 @@ static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
>  		arm_smmu_cmdq_skip_err(smmu);
>  
>  	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
> -	return IRQ_HANDLED;
>  }
>  
>  /* IO_PGTABLE API */
> @@ -1311,11 +1567,13 @@ static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
>  	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
>  }
>  
> +#if 0 /*Xen: Unused function */
>  static void arm_smmu_tlb_sync(void *cookie)
>  {
>  	struct arm_smmu_domain *smmu_domain = cookie;
>  	__arm_smmu_tlb_sync(smmu_domain->smmu);
>  }
> +#endif
>  
>  static void arm_smmu_tlb_inv_context(void *cookie)
>  {
> @@ -1336,6 +1594,7 @@ static void arm_smmu_tlb_inv_context(void *cookie)
>  	__arm_smmu_tlb_sync(smmu);
>  }
>  
> +#if 0 /*Xen: Unused functionality */
>  static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
>  					  size_t granule, bool leaf, void *cookie)
>  {
> @@ -1362,7 +1621,7 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
>  	} while (size -= granule);
>  }
>  
> -static const struct iommu_gather_ops arm_smmu_gather_ops = {
> +static struct iommu_gather_ops arm_smmu_gather_ops = {
>  	.tlb_flush_all	= arm_smmu_tlb_inv_context,
>  	.tlb_add_flush	= arm_smmu_tlb_inv_range_nosync,
>  	.tlb_sync	= arm_smmu_tlb_sync,
> @@ -1380,6 +1639,11 @@ static bool arm_smmu_capable(enum iommu_cap cap)
>  		return false;
>  	}
>  }
> +#endif
> +/* Xen: Stub out DMA domain related functions */
> +#define iommu_get_dma_cookie(dom) 0
> +#define iommu_put_dma_cookie(dom) 0
> +
>  
>  static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
>  {
> @@ -1410,6 +1674,7 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
>  	return &smmu_domain->domain;
>  }
>  
> +#if 0
>  static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
>  {
>  	int idx, size = 1 << span;
> @@ -1427,36 +1692,20 @@ static void arm_smmu_bitmap_free(unsigned long *map, int idx)
>  {
>  	clear_bit(idx, map);
>  }
> +#endif
>  
>  static void arm_smmu_domain_free(struct iommu_domain *domain)
>  {
>  	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> -	struct arm_smmu_device *smmu = smmu_domain->smmu;
> -
> -	iommu_put_dma_cookie(domain);
> -	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
> -
> -	/* Free the CD and ASID, if we allocated them */
> -	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
> -		struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
> -
> -		if (cfg->cdptr) {
> -			dmam_free_coherent(smmu_domain->smmu->dev,
> -					   CTXDESC_CD_DWORDS << 3,
> -					   cfg->cdptr,
> -					   cfg->cdptr_dma);
> -
> -			arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid);
> -		}
> -	} else {
> -		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
> -		if (cfg->vmid)
> -			arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
> -	}
> +	/*
> +	 * Xen: Remove the free functions that are not used and code related
> +	 * to S1 translation. We just need to free the domain here.
> +	 */
>  
>  	kfree(smmu_domain);
>  }
>  
> +#if 0
>  static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
>  				       struct io_pgtable_cfg *pgtbl_cfg)
>  {
> @@ -1488,33 +1737,30 @@ out_free_asid:
>  	arm_smmu_bitmap_free(smmu->asid_map, asid);
>  	return ret;
>  }
> +#endif
>  
> -static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
> -				       struct io_pgtable_cfg *pgtbl_cfg)
> +static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain)
>  {
> -	int vmid;
> -	struct arm_smmu_device *smmu = smmu_domain->smmu;
>  	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
>  
> -	vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
> -	if (vmid < 0)
> -		return vmid;
> +	/* Xen: Set the values as needed */
>  
> -	cfg->vmid	= (u16)vmid;
> -	cfg->vttbr	= pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
> -	cfg->vtcr	= pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
> +	cfg->vmid	= cfg->domain->arch.p2m.vmid;
> +	cfg->vttbr	= page_to_maddr(cfg->domain->arch.p2m.root);
> +	cfg->vtcr	= READ_SYSREG32(VTCR_EL2);
>  	return 0;
>  }
>  
>  static int arm_smmu_domain_finalise(struct iommu_domain *domain)
>  {
>  	int ret;
> +#if 0	/* Xen: pgtbl_cfg not needed. So modify the function as needed */
>  	unsigned long ias, oas;
>  	enum io_pgtable_fmt fmt;
>  	struct io_pgtable_cfg pgtbl_cfg;
>  	struct io_pgtable_ops *pgtbl_ops;
> -	int (*finalise_stage_fn)(struct arm_smmu_domain *,
> -				 struct io_pgtable_cfg *);
> +	int (*finalise_stage_fn)(struct arm_smmu_domain *);
> +#endif
>  	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
>  	struct arm_smmu_device *smmu = smmu_domain->smmu;
>  
> @@ -1529,6 +1775,7 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
>  	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
>  		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
>  
> +#if 0
>  	switch (smmu_domain->stage) {
>  	case ARM_SMMU_DOMAIN_S1:
>  		ias = VA_BITS;
> @@ -1563,10 +1810,11 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
>  	domain->geometry.aperture_end = (1UL << ias) - 1;
>  	domain->geometry.force_aperture = true;
>  	smmu_domain->pgtbl_ops = pgtbl_ops;
> -
>  	ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
>  	if (ret < 0)
>  		free_io_pgtable_ops(pgtbl_ops);
> +#endif
> +	ret = arm_smmu_domain_finalise_s2(smmu_domain);
>  
>  	return ret;
>  }
> @@ -1660,7 +1908,8 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
>  	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
>  		ste->s1_cfg = &smmu_domain->s1_cfg;
>  		ste->s2_cfg = NULL;
> -		arm_smmu_write_ctx_desc(smmu, ste->s1_cfg);
> +		/*Xen: S1 configuratio not needed */
> +		//arm_smmu_write_ctx_desc(smmu, ste->s1_cfg);
>  	} else {
>  		ste->s1_cfg = NULL;
>  		ste->s2_cfg = &smmu_domain->s2_cfg;
> @@ -1672,6 +1921,7 @@ out_unlock:
>  	return ret;
>  }
>  
> +#if 0
>  static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
>  			phys_addr_t paddr, size_t size, int prot)
>  {
> @@ -1737,11 +1987,13 @@ static int arm_smmu_match_node(struct device *dev, void *data)
>  static
>  struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
>  {
> +	/* Xen; Function modified as needed for Xen.*/
>  	struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
>  						fwnode, arm_smmu_match_node);
>  	put_device(dev);
>  	return dev ? dev_get_drvdata(dev) : NULL;
>  }
> +#endif
>  
>  static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
>  {
> @@ -1752,8 +2004,9 @@ static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
>  
>  	return sid < limit;
>  }
> -
> +#if 0
>  static struct iommu_ops arm_smmu_ops;
> +#endif
>  
>  static int arm_smmu_add_device(struct device *dev)
>  {
> @@ -1761,9 +2014,9 @@ static int arm_smmu_add_device(struct device *dev)
>  	struct arm_smmu_device *smmu;
>  	struct arm_smmu_master_data *master;
>  	struct iommu_fwspec *fwspec = dev->iommu_fwspec;
> -	struct iommu_group *group;
>  
> -	if (!fwspec || fwspec->ops != &arm_smmu_ops)
> +	/* Xen: fwspec->ops are not needed */
> +	if (!fwspec)
>  		return -ENODEV;
>  	/*
>  	 * We _can_ actually withstand dodgy bus code re-calling add_device()
> @@ -1800,6 +2053,12 @@ static int arm_smmu_add_device(struct device *dev)
>  		}
>  	}
>  
> +#if 0
> +/*
> + * Xen: Do not need an iommu group as the stream data is carried by the SMMU
> + * master device object
> + */
> +
>  	group = iommu_group_get_for_dev(dev);
>  	if (!IS_ERR(group)) {
>  		iommu_group_put(group);
> @@ -1807,8 +2066,16 @@ static int arm_smmu_add_device(struct device *dev)
>  	}
>  
>  	return PTR_ERR_OR_ZERO(group);
> +#endif
> +	return 0;
>  }
>  
> +/*
> + * Xen: We can potentially support this function and destroy a device. This
> + * will be relevant for PCI hotplug. So, will be implemented as needed after
> + * passthrough support is available.
> + */
> +#if 0
>  static void arm_smmu_remove_device(struct device *dev)
>  {
>  	struct iommu_fwspec *fwspec = dev->iommu_fwspec;
> @@ -1944,7 +2211,7 @@ static struct iommu_ops arm_smmu_ops = {
>  	.put_resv_regions	= arm_smmu_put_resv_regions,
>  	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
>  };
> -
> +#endif
>  /* Probing and initialisation functions */
>  static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
>  				   struct arm_smmu_queue *q,
> @@ -1954,7 +2221,12 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
>  {
>  	size_t qsz = ((1 << q->max_n_shift) * dwords) << 3;
>  
> -	q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma, GFP_KERNEL);
> +	/* The SMMU cache coherency property is always set. Since we are sharing the CPU translation tables
> +	 * just make a regular allocation.
> +	 */
> +	q->base = _xzalloc(qsz, sizeof(void *));
> +	q->base_dma = virt_to_maddr(q->base);
> +
>  	if (!q->base) {
>  		dev_err(smmu->dev, "failed to allocate queue (0x%zx bytes)\n",
>  			qsz);
> @@ -2025,10 +2297,11 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
>  	void *strtab;
>  	u64 reg;
>  	u32 size, l1size;
> +	u32 alignment;
>  	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
>  
>  	/* Calculate the L1 size, capped to the SIDSIZE. */
> -	size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
> +	size = STRTAB_L1_SZ_SHIFT - (LOG_2(STRTAB_L1_DESC_DWORDS) + 3);
>  	size = min(size, smmu->sid_bits - STRTAB_SPLIT);
>  	cfg->num_l1_ents = 1 << size;
>  
> @@ -2039,8 +2312,13 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
>  			 size, smmu->sid_bits);
>  
>  	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
> -	strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
> -				     GFP_KERNEL | __GFP_ZERO);
> +
> +	alignment = max_t(u32, cfg->num_l1_ents, 64);
> +	//strtab = _xzalloc(l1size, sizeof(void *));
> +	strtab = _xzalloc(l1size, l1size);
> +	cfg->strtab_dma = virt_to_maddr(strtab);
> +
> +	//dsb(sy);
>  	if (!strtab) {
>  		dev_err(smmu->dev,
>  			"failed to allocate l1 stream table (%u bytes)\n",
> @@ -2068,8 +2346,9 @@ static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
>  	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
>  
>  	size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
> -	strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
> -				     GFP_KERNEL | __GFP_ZERO);
> +	strtab = _xzalloc(size, size);
> +	cfg->strtab_dma = virt_to_maddr(strtab);
> +
>  	if (!strtab) {
>  		dev_err(smmu->dev,
>  			"failed to allocate linear stream table (%u bytes)\n",
> @@ -2152,6 +2431,8 @@ static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
>  					  1, ARM_SMMU_POLL_TIMEOUT_US);
>  }
>  
> +/* Xen: There is no MSI support as yet */
> +#if 0
>  static void arm_smmu_free_msis(void *data)
>  {
>  	struct device *dev = data;
> @@ -2217,7 +2498,7 @@ static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
>  	/* Add callback to free MSIs on teardown */
>  	devm_add_action(dev, arm_smmu_free_msis, dev);
>  }
> -
> +#endif
>  static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
>  {
>  	int ret, irq;
> @@ -2231,31 +2512,31 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
>  		return ret;
>  	}
>  
> -	arm_smmu_setup_msis(smmu);
> +	//arm_smmu_setup_msis(smmu);
>  
>  	/* Request interrupt lines */
>  	irq = smmu->evtq.q.irq;
>  	if (irq) {
> -		ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
> -						arm_smmu_evtq_thread,
> -						IRQF_ONESHOT,
> -						"arm-smmu-v3-evtq", smmu);
> +		irq_set_type(irq, IRQ_TYPE_EDGE_BOTH);
> +		ret = request_irq(irq, arm_smmu_evtq_thread,
> +						0, "arm-smmu-v3-evtq", smmu);
>  		if (ret < 0)
>  			dev_warn(smmu->dev, "failed to enable evtq irq\n");
>  	}
>  
>  	irq = smmu->cmdq.q.irq;
>  	if (irq) {
> -		ret = devm_request_irq(smmu->dev, irq,
> -				       arm_smmu_cmdq_sync_handler, 0,
> -				       "arm-smmu-v3-cmdq-sync", smmu);
> +		irq_set_type(irq, IRQ_TYPE_EDGE_BOTH);
> +		ret = request_irq(irq, arm_smmu_cmdq_sync_handler,
> +				0, "arm-smmu-v3-cmdq-sync", smmu);
>  		if (ret < 0)
>  			dev_warn(smmu->dev, "failed to enable cmdq-sync irq\n");
>  	}
>  
>  	irq = smmu->gerr_irq;
>  	if (irq) {
> -		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
> +		irq_set_type(irq, IRQ_TYPE_EDGE_BOTH);
> +		ret = request_irq(irq, arm_smmu_gerror_handler,
>  				       0, "arm-smmu-v3-gerror", smmu);
>  		if (ret < 0)
>  			dev_warn(smmu->dev, "failed to enable gerror irq\n");
> @@ -2263,12 +2544,13 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
>  
>  	if (smmu->features & ARM_SMMU_FEAT_PRI) {
>  		irq = smmu->priq.q.irq;
> +		irq_set_type(irq, IRQ_TYPE_EDGE_BOTH);
>  		if (irq) {
> -			ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
> -							arm_smmu_priq_thread,
> -							IRQF_ONESHOT,
> -							"arm-smmu-v3-priq",
> -							smmu);
> +			ret = request_irq(irq,
> +					  arm_smmu_priq_thread,
> +					  0,
> +					  "arm-smmu-v3-priq",
> +					  smmu);
>  			if (ret < 0)
>  				dev_warn(smmu->dev,
>  					 "failed to enable priq irq\n");
> @@ -2400,7 +2682,7 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
>  
>  
>  	/* Enable the SMMU interface, or ensure bypass */
> -	if (!bypass || disable_bypass) {
> +	if (!bypass) {
>  		enables |= CR0_SMMUEN;
>  	} else {
>  		ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
> @@ -2488,8 +2770,11 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
>  		smmu->features |= ARM_SMMU_FEAT_STALLS;
>  	}
>  
> +#if 0/* Xen: Do not enable Stage 1 translations */
> +
>  	if (reg & IDR0_S1P)
>  		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
> +#endif
>  
>  	if (reg & IDR0_S2P)
>  		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
> @@ -2562,10 +2847,12 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
>  	if (reg & IDR5_GRAN4K)
>  		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
>  
> +#if 0 /* Xen: SMMU ops do not have a pgsize_bitmap member for Xen */
>  	if (arm_smmu_ops.pgsize_bitmap == -1UL)
>  		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
>  	else
>  		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
> +#endif
>  
>  	/* Output address size */
>  	switch (reg & IDR5_OAS_MASK << IDR5_OAS_SHIFT) {
> @@ -2592,10 +2879,12 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
>  		smmu->oas = 48;
>  	}
>  
> +#if 0 /* Xen: There is no support for DMA mask */
>  	/* Set the DMA mask for our table walker */
>  	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
>  		dev_warn(smmu->dev,
>  			 "failed to set DMA mask for table walker\n");
> +#endif
>  
>  	smmu->ias = max(smmu->ias, smmu->oas);
>  
> @@ -2612,7 +2901,8 @@ static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
>  	struct device *dev = smmu->dev;
>  	struct acpi_iort_node *node;
>  
> -	node = *(struct acpi_iort_node **)dev_get_platdata(dev);
> +	/* Xen: Modification to get iort_node */
> +	node = (struct acpi_iort_node *)dev->acpi_node;
>  
>  	/* Retrieve SMMUv3 specific data */
>  	iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
> @@ -2633,7 +2923,7 @@ static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
>  static int arm_smmu_device_dt_probe(struct platform_device *pdev,
>  				    struct arm_smmu_device *smmu)
>  {
> -	struct device *dev = &pdev->dev;
> +	struct device *dev = pdev;
>  	u32 cells;
>  	int ret = -EINVAL;
>  
> @@ -2646,8 +2936,8 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev,
>  
>  	parse_driver_options(smmu);
>  
> -	if (of_dma_is_coherent(dev->of_node))
> -		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
> +	/* Xen: Set the COHERNECY feature */
> +	smmu->features |= ARM_SMMU_FEAT_COHERENCY;
>  
>  	return ret;
>  }
> @@ -2656,9 +2946,10 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
>  {
>  	int irq, ret;
>  	struct resource *res;
> -	resource_size_t ioaddr;
> +	/*Xen: Do not need to setup sysfs */
> +	/* resource_size_t ioaddr;*/
>  	struct arm_smmu_device *smmu;
> -	struct device *dev = &pdev->dev;
> +	struct device *dev = pdev;/* Xen: dev is ignored */
>  	bool bypass;
>  
>  	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
> @@ -2674,7 +2965,7 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
>  		dev_err(dev, "MMIO region too small (%pr)\n", res);
>  		return -EINVAL;
>  	}
> -	ioaddr = res->start;
> +	/*ioaddr = res->start;*/
>  
>  	smmu->base = devm_ioremap_resource(dev, res);
>  	if (IS_ERR(smmu->base))
> @@ -2719,13 +3010,15 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
>  		return ret;
>  
>  	/* Record our private device structure */
> -	platform_set_drvdata(pdev, smmu);
> +	/* platform_set_drvdata(pdev, smmu); */
>  
>  	/* Reset the device */
>  	ret = arm_smmu_device_reset(smmu, bypass);
>  	if (ret)
>  		return ret;
>  
> +/* Xen: Not creating an IOMMU device list for Xen */
> +#if 0
>  	/* And we're up. Go go go! */
>  	ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
>  				     "smmu3.%pa", &ioaddr);
> @@ -2757,9 +3050,18 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
>  		if (ret)
>  			return ret;
>  	}
> +#endif
> +	/*
> +	 * Xen: Keep a list of all probed devices. This will be used to query
> +	 * the smmu devices based on the fwnode.
> +	 */
> +	INIT_LIST_HEAD(&smmu->devices);
> +	spin_lock(&arm_smmu_devices_lock);
> +	list_add(&smmu->devices, &arm_smmu_devices);
> +	spin_unlock(&arm_smmu_devices_lock);
>  	return 0;
>  }
> -
> +#if 0
>  static int arm_smmu_device_remove(struct platform_device *pdev)
>  {
>  	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
> @@ -2767,6 +3069,9 @@ static int arm_smmu_device_remove(struct platform_device *pdev)
>  	arm_smmu_device_disable(smmu);
>  	return 0;
>  }
> +#endif
> +#define MODULE_DEVICE_TABLE(type, name)
> +#define of_device_id dt_device_match
>  
>  static struct of_device_id arm_smmu_of_match[] = {
>  	{ .compatible = "arm,smmu-v3", },
> @@ -2774,6 +3079,7 @@ static struct of_device_id arm_smmu_of_match[] = {
>  };
>  MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
>  
> +#if 0
>  static struct platform_driver arm_smmu_driver = {
>  	.driver	= {
>  		.name		= "arm-smmu-v3",
> @@ -2789,3 +3095,318 @@ IOMMU_OF_DECLARE(arm_smmuv3, "arm,smmu-v3", NULL);
>  MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
>  MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
>  MODULE_LICENSE("GPL v2");
> +
> +#endif
> +/***** Start of Xen specific code *****/
> +
> +static int __must_check arm_smmu_iotlb_flush_all(struct domain *d)
> +{
> +	struct arm_smmu_xen_domain *smmu_domain = dom_iommu(d)->arch.priv;
> +	struct iommu_domain *cfg;
> +
> +	spin_lock(&smmu_domain->lock);
> +	list_for_each_entry(cfg, &smmu_domain->iommu_domains, list) {
> +		/*
> +		 * Only invalidate the context when SMMU is present.
> +		 * This is because the context initialization is delayed
> +		 * until a master has been added.
> +		 */
> +		if (unlikely(!ACCESS_ONCE(cfg->priv->smmu)))
> +			continue;
> +		arm_smmu_tlb_inv_context(cfg->priv);
> +	}
> +	spin_unlock(&smmu_domain->lock);
> +	return 0;
> +}
> +
> +static int __must_check arm_smmu_iotlb_flush(struct domain *d,
> +					     unsigned long gfn,
> +					     unsigned int page_count)
> +{
> +	return arm_smmu_iotlb_flush_all(d);
> +}
> +
> +static struct iommu_domain *arm_smmu_get_domain(struct domain *d,
> +						struct device *dev)
> +{
> +	struct iommu_domain *domain;
> +	struct arm_smmu_xen_domain *xen_domain;
> +	struct arm_smmu_device *smmu;
> +	struct arm_smmu_domain *smmu_domain;
> +
> +	xen_domain = dom_iommu(d)->arch.priv;
> +
> +	smmu = arm_smmu_get_by_fwnode(dev->iommu_fwspec->iommu_fwnode);
> +	if (!smmu)
> +		return NULL;
> +
> +	/*
> +	 * Loop through the &xen_domain->contexts to locate a context
> +	 * assigned to this SMMU
> +	 */
> +	list_for_each_entry(domain, &xen_domain->iommu_domains, list) {
> +		smmu_domain = to_smmu_domain(domain);
> +		if (smmu_domain->smmu == smmu)
> +			return domain;
> +	}
> +
> +	return NULL;
> +}
> +
> +static void arm_smmu_destroy_iommu_domain(struct iommu_domain *domain)
> +{
> +	list_del(&domain->list);
> +	xfree(domain);
> +}
The above function needs to call arm_smmu_domain_free(domain) instead of xfree(domain).

> +
> +static int arm_smmu_assign_dev(struct domain *d, u8 devfn,
> +			       struct device *dev, u32 flag)
> +{
> +	int ret = 0;
> +	struct iommu_domain *domain;
> +	struct arm_smmu_xen_domain *xen_domain;
> +	struct arm_smmu_domain *arm_smmu;
> +
> +	xen_domain = dom_iommu(d)->arch.priv;
> +
> +	if (!dev->archdata.iommu) {
> +		dev->archdata.iommu = xzalloc(struct arm_smmu_xen_device);
> +		if (!dev->archdata.iommu)
> +			return -ENOMEM;
> +	}
> +
> +	ret = arm_smmu_add_device(dev);
> +	if (ret)
> +		return ret;
> +
> +	spin_lock(&xen_domain->lock);
> +
> +	/*
> +	 * Check to see if an iommu_domain already exists for this xen domain
> +	 * under the same SMMU
> +	 */
> +	domain = arm_smmu_get_domain(d, dev);
> +	if (!domain) {
> +
> +		domain = arm_smmu_domain_alloc(IOMMU_DOMAIN_DMA);
> +		if (!domain) {
> +			ret = -ENOMEM;
> +			goto out;
> +		}
> +
> +		arm_smmu = to_smmu_domain(domain);
> +		arm_smmu->s2_cfg.domain = d;
> +
> +		/* Chain the new context to the domain */
> +		list_add(&domain->list, &xen_domain->iommu_domains);
> +
> +	}
> +
> +	ret = arm_smmu_attach_dev(domain, dev);
> +	if (ret) {
> +		if (domain->ref.counter == 0)
> +			arm_smmu_destroy_iommu_domain(domain);
> +	} else {
> +		atomic_inc(&domain->ref);
> +	}
> +
> +out:
> +	spin_unlock(&xen_domain->lock);
> +	return ret;
> +}
> +
> +static int arm_smmu_deassign_dev(struct domain *d, struct device *dev)
> +{
> +	struct iommu_domain *domain = arm_smmu_get_domain(d, dev);
> +	struct arm_smmu_xen_domain *xen_domain;
> +	struct arm_smmu_domain *arm_smmu = to_smmu_domain(domain);
> +
> +	xen_domain = dom_iommu(d)->arch.priv;
> +
> +	if (!arm_smmu || arm_smmu->s2_cfg.domain != d) {
> +		dev_err(dev, " not attached to domain %d\n", d->domain_id);
> +		return -ESRCH;
> +	}
> +
> +	spin_lock(&xen_domain->lock);
> +
> +	arm_smmu_detach_dev(dev);
> +	atomic_dec(&domain->ref);
> +
> +	if (domain->ref.counter == 0)
> +		arm_smmu_destroy_iommu_domain(domain);
> +
> +	spin_unlock(&xen_domain->lock);
> +
> +
> +
> +	return 0;
> +}
> +
> +static int arm_smmu_reassign_dev(struct domain *s, struct domain *t,
> +				 u8 devfn,  struct device *dev)
> +{
> +	int ret = 0;
> +
> +	/* Don't allow remapping on other domain than hwdom */
> +	if (t && t != hardware_domain)
> +		return -EPERM;
> +
> +	if (t == s)
> +		return 0;
> +
> +	ret = arm_smmu_deassign_dev(s, dev);
> +	if (ret)
> +		return ret;
> +
> +	if (t) {
> +		/* No flags are defined for ARM. */
> +		ret = arm_smmu_assign_dev(t, devfn, dev, 0);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	return 0;
> +}
> +
> +static int arm_smmu_iommu_domain_init(struct domain *d)
> +{
> +	struct arm_smmu_xen_domain *xen_domain;
> +
> +	xen_domain = xzalloc(struct arm_smmu_xen_domain);
> +	if (!xen_domain)
> +		return -ENOMEM;
> +
> +	spin_lock_init(&xen_domain->lock);
> +	INIT_LIST_HEAD(&xen_domain->iommu_domains);
> +
> +	dom_iommu(d)->arch.priv = xen_domain;
> +
> +	return 0;
> +}
> +
> +static void __hwdom_init arm_smmu_iommu_hwdom_init(struct domain *d)
> +{
> +}
> +
> +static void arm_smmu_iommu_domain_teardown(struct domain *d)
> +{
> +	struct arm_smmu_xen_domain *xen_domain = dom_iommu(d)->arch.priv;
> +
> +	ASSERT(list_empty(&xen_domain->iommu_domains));
> +	xfree(xen_domain);
> +}
> +
> +static int __must_check arm_smmu_map_page(struct domain *d, unsigned long gfn,
> +			unsigned long mfn, unsigned int flags)
> +{
> +	p2m_type_t t;
> +
> +	/*
> +	 * Grant mappings can be used for DMA requests. The dev_bus_addr
> +	 * returned by the hypercall is the MFN (not the IPA). For device
> +	 * protected by an IOMMU, Xen needs to add a 1:1 mapping in the domain
> +	 * p2m to allow DMA request to work.
> +	 * This is only valid when the domain is directed mapped. Hence this
> +	 * function should only be used by gnttab code with gfn == mfn.
> +	 */
> +	BUG_ON(!is_domain_direct_mapped(d));
> +	BUG_ON(mfn != gfn);
> +
> +	/* We only support readable and writable flags */
> +	if (!(flags & (IOMMUF_readable | IOMMUF_writable)))
> +		return -EINVAL;
> +
> +	t = (flags & IOMMUF_writable) ? p2m_iommu_map_rw : p2m_iommu_map_ro;
> +
> +	/*
> +	 * The function guest_physmap_add_entry replaces the current mapping
> +	 * if there is already one...
> +	 */
> +	return guest_physmap_add_entry(d, _gfn(gfn), _mfn(mfn), 0, t);
> +}
> +
> +static int __must_check arm_smmu_unmap_page(struct domain *d, unsigned long gfn)
> +{
> +	/*
> +	 * This function should only be used by gnttab code when the domain
> +	 * is direct mapped
> +	 */
> +	if (!is_domain_direct_mapped(d))
> +		return -EINVAL;
> +
> +	return guest_physmap_remove_page(d, _gfn(gfn), _mfn(gfn), 0);
> +}
> +
> +static const struct iommu_ops arm_smmu_iommu_ops = {
> +	.init = arm_smmu_iommu_domain_init,
> +	.hwdom_init = arm_smmu_iommu_hwdom_init,
> +	.teardown = arm_smmu_iommu_domain_teardown,
> +	.iotlb_flush = arm_smmu_iotlb_flush,
> +	.iotlb_flush_all = arm_smmu_iotlb_flush_all,
> +	.assign_device = arm_smmu_assign_dev,
> +	.reassign_device = arm_smmu_reassign_dev,
> +	.map_page = arm_smmu_map_page,
> +	.unmap_page = arm_smmu_unmap_page,
> +};
> +
> +static
> +struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
> +{
> +	struct arm_smmu_device *smmu = NULL;
> +
> +	spin_lock(&arm_smmu_devices_lock);
> +	list_for_each_entry(smmu, &arm_smmu_devices, devices) {
> +		if (smmu->dev->fwnode == fwnode)
> +			break;
> +	}
> +	spin_unlock(&arm_smmu_devices_lock);
> +
> +	return smmu;
> +}
> +
> +static __init int arm_smmu_dt_init(struct dt_device_node *dev,
> +				   const void *data)
> +{
> +	int rc;
> +
> +	/*
> +	 * Even if the device can't be initialized, we don't want to
> +	 * give the SMMU device to dom0.
> +	 */
> +	dt_device_set_used_by(dev, DOMID_XEN);
> +
> +	rc = arm_smmu_device_probe(dt_to_dev(dev));
> +	if (rc)
> +		return rc;
> +
> +	iommu_set_ops(&arm_smmu_iommu_ops);
> +
> +	return 0;
> +}
> +
> +DT_DEVICE_START(smmuv3, "ARM SMMU V3", DEVICE_IOMMU)
> +	.dt_match = arm_smmu_of_match,
> +	.init = arm_smmu_dt_init,
> +DT_DEVICE_END
> +
> +#ifdef CONFIG_ACPI
> +/* Set up the IOMMU */
> +static int __init arm_smmu_acpi_init(const void *data)
> +{
> +	int rc;
> +	rc = arm_smmu_device_probe((struct device *)data);
> +
> +	if (rc)
> +		return rc;
> +
> +	iommu_set_ops(&arm_smmu_iommu_ops);
> +	return 0;
> +}
> +
> +ACPI_DEVICE_START(asmmuv3, "ARM SMMU V3", DEVICE_IOMMU)
> +	.class_type = ACPI_IORT_NODE_SMMU_V3,
> +	.init = arm_smmu_acpi_init,
> +ACPI_DEVICE_END
> +
> +#endif
>
Julien Grall Oct. 12, 2017, 4:36 p.m. UTC | #2
Hi Sameer,

Given this is all Arm specific. I am not sure why people like Andrew, 
Jan have been added.

Please use scripts/get_maintainers to find the list of maintainers per 
patches and avoid to CC all of them on each patches.

On 21/09/17 01:37, Sameer Goel wrote:
> This driver follows an approach similar to smmu driver. The intent here
> is to reuse as much Linux code as possible.
> - Glue code has been introduced to bridge the API calls.
> - Called Linux functions from the Xen IOMMU function calls.
> - Xen modifications are preceded by /*Xen: comment */
> 
> Signed-off-by: Sameer Goel <sgoel@codeaurora.org>
> ---
>   xen/drivers/passthrough/arm/Makefile  |   1 +
>   xen/drivers/passthrough/arm/smmu-v3.c | 853 +++++++++++++++++++++++++++++-----

This is based on an old SMMUv3 version and I have been told there are 
some changes may benefits Xen (such as increasing the timeout for sync) 
and some optimisations also exist on the ML and will be queued soon.

So maybe you want to re-sync at least to master.

>   2 files changed, 738 insertions(+), 116 deletions(-)
> 
> diff --git a/xen/drivers/passthrough/arm/Makefile b/xen/drivers/passthrough/arm/Makefile
> index f4cd26e..57a6da6 100644
> --- a/xen/drivers/passthrough/arm/Makefile
> +++ b/xen/drivers/passthrough/arm/Makefile
> @@ -1,2 +1,3 @@
>   obj-y += iommu.o
>   obj-y += smmu.o
> +obj-y += smmu-v3.o

Do we want SMMUv3 to be built on Arm32? Maybe we should introduce a new 
Kconfig to let the user select.

> diff --git a/xen/drivers/passthrough/arm/smmu-v3.c b/xen/drivers/passthrough/arm/smmu-v3.c
> index 380969a..8f3b43d 100644
> --- a/xen/drivers/passthrough/arm/smmu-v3.c
> +++ b/xen/drivers/passthrough/arm/smmu-v3.c
> @@ -18,28 +18,266 @@
>    * Author: Will Deacon <will.deacon@arm.com>
>    *
>    * This driver is powered by bad coffee and bombay mix.
> + *
> + *
> + * Based on Linux drivers/iommu/arm-smmu-v3.c
> + * => commit bdf95923086fb359ccb44c815724c3ace1611c90
> + *
> + * Xen modifications:
> + * Sameer Goel <sgoel@codeaurora.org>
> + * Copyright (C) 2017, The Linux Foundation, All rights reserved.
> + *
>    */
>   
> -#include <linux/acpi.h>
> -#include <linux/acpi_iort.h>
> -#include <linux/delay.h>
> -#include <linux/dma-iommu.h>
> -#include <linux/err.h>
> -#include <linux/interrupt.h>
> -#include <linux/iommu.h>
> -#include <linux/iopoll.h>
> -#include <linux/module.h>
> -#include <linux/msi.h>
> -#include <linux/of.h>
> -#include <linux/of_address.h>
> -#include <linux/of_iommu.h>
> -#include <linux/of_platform.h>
> -#include <linux/pci.h>
> -#include <linux/platform_device.h>
> -
> -#include <linux/amba/bus.h>
> -
> -#include "io-pgtable.h"
> +#include <xen/config.h>

This is not necessary.

> +#include <xen/delay.h>
> +#include <xen/errno.h>
> +#include <xen/err.h>
> +#include <xen/irq.h>
> +#include <xen/lib.h>
> +#include <xen/list.h>
> +#include <xen/mm.h>
> +#include <xen/vmap.h>
> +#include <xen/rbtree.h>
> +#include <xen/sched.h>
> +#include <xen/sizes.h>
> +#include <asm/atomic.h>
> +#include <asm/device.h>
> +#include <asm/io.h>
> +#include <asm/platform.h>
> +#include <xen/acpi.h>

Please order the includes alphabetically with xen/* first then asm/*

> +
> +typedef paddr_t phys_addr_t;
> +typedef paddr_t dma_addr_t;
> +
> +/* Alias to Xen device tree helpers */
> +#define device_node dt_device_node
> +#define of_phandle_args dt_phandle_args
> +#define of_device_id dt_device_match
> +#define of_match_node dt_match_node
> +#define of_property_read_u32(np, pname, out) (!dt_property_read_u32(np, pname, out))
> +#define of_property_read_bool dt_property_read_bool
> +#define of_parse_phandle_with_args dt_parse_phandle_with_args
> +#define mutex spinlock_t
> +#define mutex_init spin_lock_init
> +#define mutex_lock spin_lock
> +#define mutex_unlock spin_unlock

mutex and spinlock are not the same. The former is sleeping whilst the 
later is not.

Can you please explain why this is fine and possibly add that in a comment?

> +
> +/* Xen: Helpers to get device MMIO and IRQs */
> +struct resource {
> +	u64 addr;
> +	u64 size;
> +	unsigned int type;
> +};

Likely we want a compat header for defining Linux helpers. This would 
avoid replicating it everywhere.

> +
> +#define resource_size(res) ((res)->size)
> +
> +#define platform_device device
> +
> +#define IORESOURCE_MEM 0
> +#define IORESOURCE_IRQ 1
> +
> +static struct resource *platform_get_resource(struct platform_device *pdev,
> +					      unsigned int type,
> +					      unsigned int num)
> +{
> +	/*
> +	 * The resource is only used between 2 calls of platform_get_resource.
> +	 * It's quite ugly but it's avoid to add too much code in the part
> +	 * imported from Linux
> +	 */
> +	static struct resource res;
> +	struct acpi_iort_node *iort_node;
> +	struct acpi_iort_smmu_v3 *node_smmu_data;
> +	int ret = 0;
> +
> +	res.type = type;
> +
> +	switch (type) {
> +	case IORESOURCE_MEM:
> +		if (pdev->type == DEV_ACPI) {
> +			ret = 1;
> +			iort_node = pdev->acpi_node;
> +			node_smmu_data =
> +				(struct acpi_iort_smmu_v3 *)iort_node->node_data;
> +
> +			if (node_smmu_data != NULL) {
> +				res.addr = node_smmu_data->base_address;
> +				res.size = SZ_128K;
> +				ret = 0;
> +			}
> +		} else {
> +			ret = dt_device_get_address(dev_to_dt(pdev), num,
> +						    &res.addr, &res.size);
> +		}
> +
> +		return ((ret) ? NULL : &res);
> +
> +	case IORESOURCE_IRQ:
> +		ret = platform_get_irq(dev_to_dt(pdev), num);

No IRQ for ACPI?

> +
> +		if (ret < 0)
> +			return NULL;
> +
> +		res.addr = ret;
> +		res.size = 1;
> +
> +		return &res;
> +
> +	default:
> +		return NULL;
> +	}
> +}
> +
> +static int platform_get_irq_byname(struct platform_device *pdev, const char *name)
> +{
> +	const struct dt_property *dtprop;
> +	struct acpi_iort_node *iort_node;
> +	struct acpi_iort_smmu_v3 *node_smmu_data;
> +	int ret = 0;
> +
> +	if (pdev->type == DEV_ACPI) {
> +		iort_node = pdev->acpi_node;
> +		node_smmu_data = (struct acpi_iort_smmu_v3 *)iort_node->node_data;
> +
> +		if (node_smmu_data != NULL) {
> +			if (!strcmp(name, "eventq"))
> +				ret = node_smmu_data->event_gsiv;
> +			else if (!strcmp(name, "priq"))
> +				ret = node_smmu_data->pri_gsiv;
> +			else if (!strcmp(name, "cmdq-sync"))
> +				ret = node_smmu_data->sync_gsiv;
> +			else if (!strcmp(name, "gerror"))
> +				ret = node_smmu_data->gerr_gsiv;
> +			else
> +				ret = -EINVAL;
> +		}
> +	} else {
> +		dtprop = dt_find_property(dev_to_dt(pdev), "interrupt-names", NULL);
> +		if (!dtprop)
> +			return -EINVAL;
> +
> +		if (!dtprop->value)
> +			return -ENODATA;
> +	}
> +
> +	return ret;
> +}
> +
> +#define readx_poll_timeout(op, addr, val, cond, sleep_us, timeout_us) \
> +({ \
> +	s_time_t deadline = NOW() + MICROSECS(timeout_us); \
> +	for (;;) { \
> +		(val) = op(addr); \
> +		if (cond) \
> +			break; \
> +		if (NOW() > deadline) { \
> +			(val) = op(addr); \
> +			break; \
> +		} \
> +		cpu_relax(); \

I don't think calling cpu_relax() is correct here.

> +		udelay(sleep_us); \
> +	} \
> +	(cond) ? 0 : -ETIMEDOUT; \
> +})
> +
> +#define readl_relaxed_poll_timeout(addr, val, cond, delay_us, timeout_us) \
> +	readx_poll_timeout(readl_relaxed, addr, val, cond, delay_us, timeout_us)
> +
> +/* Xen: Helpers for IRQ functions */
> +#define request_irq(irq, func, flags, name, dev) request_irq(irq, flags, func, name, dev)
> +#define free_irq release_irq
> +
> +enum irqreturn {
> +	IRQ_NONE	= (0 << 0),
> +	IRQ_HANDLED	= (1 << 0),
> +};
> +
> +typedef enum irqreturn irqreturn_t;
> +
> +/* Device logger functions */
> +#define dev_print(dev, lvl, fmt, ...)						\
> +	 printk(lvl "smmu: " fmt, ## __VA_ARGS__)
> +
> +#define dev_dbg(dev, fmt, ...) dev_print(dev, XENLOG_DEBUG, fmt, ## __VA_ARGS__)
> +#define dev_notice(dev, fmt, ...) dev_print(dev, XENLOG_INFO, fmt, ## __VA_ARGS__)
> +#define dev_warn(dev, fmt, ...) dev_print(dev, XENLOG_WARNING, fmt, ## __VA_ARGS__)
> +#define dev_err(dev, fmt, ...) dev_print(dev, XENLOG_ERR, fmt, ## __VA_ARGS__)
> +#define dev_info(dev, fmt, ...) dev_print(dev, XENLOG_INFO, fmt, ## __VA_ARGS__)
> +
> +#define dev_err_ratelimited(dev, fmt, ...)					\
> +	 dev_print(dev, XENLOG_ERR, fmt, ## __VA_ARGS__)
> +
> +#define dev_name(dev) dt_node_full_name(dev_to_dt(dev))
> +
> +/* Alias to Xen allocation helpers */
> +#define kfree xfree
> +#define kmalloc(size, flags)		_xmalloc(size, sizeof(void *))
> +#define kzalloc(size, flags)		_xzalloc(size, sizeof(void *))
> +#define devm_kzalloc(dev, size, flags)	_xzalloc(size, sizeof(void *))
> +#define kmalloc_array(size, n, flags)	_xmalloc_array(size, sizeof(void *), n)
> +
> +/* Compatibility defines */
> +#undef WARN_ON
> +#define WARN_ON(cond) (!!cond)

Why do you redefine WARN_ON?

> +#define WARN_ON_ONCE(cond) WARN_ON(cond)

Hmmm, can't we implement a common WARN_ON_ONCE?

> +
> +static void __iomem *devm_ioremap_resource(struct device *dev,
> +					   struct resource *res)
> +{
> +	void __iomem *ptr;
> +
> +	if (!res || res->type != IORESOURCE_MEM) {
> +		dev_err(dev, "Invalid resource\n");
> +		return ERR_PTR(-EINVAL);
> +	}
> +
> +	ptr = ioremap_nocache(res->addr, res->size);
> +	if (!ptr) {
> +		dev_err(dev,
> +			"ioremap failed (addr 0x%"PRIx64" size 0x%"PRIx64")\n",
> +			res->addr, res->size);
> +		return ERR_PTR(-ENOMEM);
> +	}
> +
> +	return ptr;
> +}
> +
> +/* Xen: Dummy iommu_domain */
> +struct iommu_domain {
> +	/* Runtime SMMU configuration for this iommu_domain */
> +	struct arm_smmu_domain		*priv;
> +	unsigned int			type;
> +
> +	atomic_t ref;
> +	/* Used to link iommu_domain contexts for a same domain.
> +	 * There is at least one per-SMMU to used by the domain.
> +	 */
> +	struct list_head		list;
> +};

This is very similar to the SMMU version. Could we share some bits?

> +/* Xen: Domain type definitions. Not really needed for Xen, defining to port
> + * Linux code as-is
> + */
> +#define IOMMU_DOMAIN_UNMANAGED 0
> +#define IOMMU_DOMAIN_DMA 1
> +#define IOMMU_DOMAIN_IDENTITY 2
> +
> +/* Xen: Describes information required for a Xen domain */
> +struct arm_smmu_xen_domain {
> +	spinlock_t			lock;
> +	/* List of iommu domains associated to this domain */
> +	struct list_head		iommu_domains;
> +};

Ditoo.

> +
> +/*
> + * Xen: Information about each device stored in dev->archdata.iommu
> + *
> + * The dev->archdata.iommu stores the iommu_domain (runtime configuration of
> + * the SMMU).
> + */
> +struct arm_smmu_xen_device {
> +	struct iommu_domain *domain;
> +};

Ditto.

>   
>   /* MMIO registers */
>   #define ARM_SMMU_IDR0			0x0
> @@ -412,10 +650,12 @@
>   #define MSI_IOVA_BASE			0x8000000
>   #define MSI_IOVA_LENGTH			0x100000
>   
> +#if 0 /* Not applicable for Xen */

While the module_param_name() is not applicable in Xen, I don't see any 
reason to remove the variable.

>   static bool disable_bypass; >   module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
>   MODULE_PARM_DESC(disable_bypass,
>   	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
> +#endif
>   
>   enum pri_resp {
>   	PRI_RESP_DENY,
> @@ -423,6 +663,7 @@ enum pri_resp {
>   	PRI_RESP_SUCC,
>   };
>   
> +#if 0 /* Xen: No MSI support in this iteration */
>   enum arm_smmu_msi_index {
>   	EVTQ_MSI_INDEX,
>   	GERROR_MSI_INDEX,
> @@ -447,6 +688,7 @@ static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
>   		ARM_SMMU_PRIQ_IRQ_CFG2,
>   	},
>   };
> +#endif
>   
>   struct arm_smmu_cmdq_ent {
>   	/* Common fields */
> @@ -551,6 +793,8 @@ struct arm_smmu_s2_cfg {
>   	u16				vmid;
>   	u64				vttbr;
>   	u64				vtcr;
> +	/* Xen: Domain associated to this configuration */
> +	struct domain			*domain;
>   };
>   
>   struct arm_smmu_strtab_ent {
> @@ -623,9 +867,20 @@ struct arm_smmu_device {
>   	struct arm_smmu_strtab_cfg	strtab_cfg;
>   
>   	/* IOMMU core code handle */
> -	struct iommu_device		iommu;
> +	//struct iommu_device		iommu;

#if 0 but no // please.

> +
> +	/* Xen: Need to keep a list of SMMU devices */
> +	struct list_head                devices;
>   };
>   
> +/* Xen: Keep a list of devices associated with this driver */
> +static DEFINE_SPINLOCK(arm_smmu_devices_lock);
> +static LIST_HEAD(arm_smmu_devices);
> +/* Xen: Helper for finding a device using fwnode */
> +static
> +struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode);
> +
> +
>   /* SMMU private data for each master */
>   struct arm_smmu_master_data {
>   	struct arm_smmu_device		*smmu;
> @@ -642,7 +897,7 @@ enum arm_smmu_domain_stage {
>   
>   struct arm_smmu_domain {
>   	struct arm_smmu_device		*smmu;
> -	struct mutex			init_mutex; /* Protects smmu pointer */
> +	mutex			init_mutex; /* Protects smmu pointer */
>   
>   	struct io_pgtable_ops		*pgtbl_ops;
>   	spinlock_t			pgtbl_lock;
> @@ -737,15 +992,16 @@ static void queue_inc_prod(struct arm_smmu_queue *q)
>    */
>   static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe)
>   {
> -	ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
> +	s_time_t deadline = NOW() + MICROSECS(ARM_SMMU_POLL_TIMEOUT_US);

Please introduce proper wrappers to avoid the modification of the code.

>   
>   	while (queue_sync_cons(q), (drain ? !queue_empty(q) : queue_full(q))) {
> -		if (ktime_compare(ktime_get(), timeout) > 0)
> +
> +		if (NOW() > deadline)

Ditto.

>   			return -ETIMEDOUT;
>   
> -		if (wfe) {
> +		if (wfe)

Please avoid to drop {

>   			wfe();
> -		} else {

Ditto.

> +		else {
>   			cpu_relax();

Hmmm I now see why you added cpu_relax() at the top. Well, on Xen 
cpu_relax is just a barrier. On Linux it is used to yield.

And that bit is worrying me. The Linux code will allow context switching 
to another tasks if the code is taking too much time.

Xen is not preemptible, so is it fine?

>   			udelay(1);
>   		}
> @@ -931,7 +1187,7 @@ static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
>   		dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
>   	spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
>   }
> -
> +#if 0

Please avoid dropping newline and explain why the #if 0.

>   /* Context descriptor manipulation functions */
>   static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)
>   {
> @@ -974,7 +1230,7 @@ static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
>   
>   	cfg->cdptr[3] = cpu_to_le64(cfg->cd.mair << CTXDESC_CD_3_MAIR_SHIFT);
>   }
> -
> +#endif

Ditto for the newline.

>   /* Stream table manipulation functions */
>   static void
>   arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
> @@ -1044,7 +1300,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
>   			ste_live = true;
>   			break;
>   		case STRTAB_STE_0_CFG_ABORT:
> -			if (disable_bypass)
> +			//No bypass override for Xen

Why no leaving the variable on top with a comment. This would avoid such 
change.

>   				break;
>   		default:
>   			BUG(); /* STE corruption */
> @@ -1056,7 +1312,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
>   
>   	/* Bypass/fault */
>   	if (!ste->assigned || !(ste->s1_cfg || ste->s2_cfg)) {
> -		if (!ste->assigned && disable_bypass)
> +		if (!ste->assigned)

Ditto.

>   			val |= STRTAB_STE_0_CFG_ABORT;
>   		else
>   			val |= STRTAB_STE_0_CFG_BYPASS;
> @@ -1135,16 +1391,20 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
>   	void *strtab;
>   	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
>   	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
> +	u32 alignment = 0;
>   
>   	if (desc->l2ptr)
>   		return 0;
>   
> -	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
> +	size = 1 << (STRTAB_SPLIT + LOG_2(STRTAB_STE_DWORDS) + 3);

I would prefer if you introduce ilog2.

>   	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
>   
>   	desc->span = STRTAB_SPLIT + 1;
> -	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
> -					  GFP_KERNEL | __GFP_ZERO);
> +
> +	alignment = 1 << ((5 + (desc->span - 1)));

Do you mind explaining the 5? Also, does the shift will always be < 32?

> +	desc->l2ptr = _xzalloc(size, alignment);
> +	desc->l2ptr_dma = virt_to_maddr(desc->l2ptr);

_xzalloc can fail and virt_to_maddr will result to a panic.

> +
>   	if (!desc->l2ptr) {
>   		dev_err(smmu->dev,
>   			"failed to allocate l2 stream table for SID %u\n",
> @@ -1158,7 +1418,7 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
>   }
>   
>   /* IRQ and event handlers */
> -static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
> +static void arm_smmu_evtq_thread(int irq, void *dev, struct cpu_user_regs *regs)

Could you please introduce a wrapper instead as it was done in smmu.c?

>   {
>   	int i;
>   	struct arm_smmu_device *smmu = dev;
> @@ -1186,7 +1446,6 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
>   
>   	/* Sync our overflow flag, as we believe we're up to speed */
>   	q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
> -	return IRQ_HANDLED;
>   }
>   
>   static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
> @@ -1203,7 +1462,7 @@ static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
>   
>   	dev_info(smmu->dev, "unexpected PRI request received:\n");
>   	dev_info(smmu->dev,
> -		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
> +		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016lx\n",

Hmmm why?

>   		 sid, ssid, grpid, last ? "L" : "",
>   		 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
>   		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
> @@ -1227,7 +1486,7 @@ static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
>   	}
>   }
>   
> -static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
> +static void arm_smmu_priq_thread(int irq, void *dev, struct cpu_user_regs *regs)

Ditto about the prototype.

>   {
>   	struct arm_smmu_device *smmu = dev;
>   	struct arm_smmu_queue *q = &smmu->priq.q;
> @@ -1243,18 +1502,16 @@ static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
>   
>   	/* Sync our overflow flag, as we believe we're up to speed */
>   	q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
> -	return IRQ_HANDLED;
>   }
>   
> -static irqreturn_t arm_smmu_cmdq_sync_handler(int irq, void *dev)
> +static void arm_smmu_cmdq_sync_handler(int irq, void *dev, struct cpu_user_regs *regs)

Ditto.

>   {
>   	/* We don't actually use CMD_SYNC interrupts for anything */
> -	return IRQ_HANDLED;
>   }
>   
>   static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
>   
> -static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
> +static void arm_smmu_gerror_handler(int irq, void *dev, struct cpu_user_regs *regs)

Ditto.

>   {
>   	u32 gerror, gerrorn, active;
>   	struct arm_smmu_device *smmu = dev;
> @@ -1264,7 +1521,7 @@ static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
>   
>   	active = gerror ^ gerrorn;
>   	if (!(active & GERROR_ERR_MASK))
> -		return IRQ_NONE; /* No errors pending */
> +		return; /* No errors pending */
>   
>   	dev_warn(smmu->dev,
>   		 "unexpected global error reported (0x%08x), this could be serious\n",
> @@ -1286,7 +1543,7 @@ static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
>   
>   	if (active & GERROR_MSI_CMDQ_ABT_ERR) {
>   		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
> -		arm_smmu_cmdq_sync_handler(irq, smmu->dev);
> +		arm_smmu_cmdq_sync_handler(irq, smmu->dev, NULL);
>   	}
>   
>   	if (active & GERROR_PRIQ_ABT_ERR)
> @@ -1299,7 +1556,6 @@ static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
>   		arm_smmu_cmdq_skip_err(smmu);
>   
>   	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
> -	return IRQ_HANDLED;
>   }
>   
>   /* IO_PGTABLE API */
> @@ -1311,11 +1567,13 @@ static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
>   	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
>   }
>   
> +#if 0 /*Xen: Unused function */
>   static void arm_smmu_tlb_sync(void *cookie)
>   {
>   	struct arm_smmu_domain *smmu_domain = cookie;
>   	__arm_smmu_tlb_sync(smmu_domain->smmu);
>   }
> +#endif
>   
>   static void arm_smmu_tlb_inv_context(void *cookie)
>   {
> @@ -1336,6 +1594,7 @@ static void arm_smmu_tlb_inv_context(void *cookie)
>   	__arm_smmu_tlb_sync(smmu);
>   }
>   
> +#if 0 /*Xen: Unused functionality */
>   static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
>   					  size_t granule, bool leaf, void *cookie)
>   {
> @@ -1362,7 +1621,7 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
>   	} while (size -= granule);
>   }
>   
> -static const struct iommu_gather_ops arm_smmu_gather_ops = {
> +static struct iommu_gather_ops arm_smmu_gather_ops = {
>   	.tlb_flush_all	= arm_smmu_tlb_inv_context,
>   	.tlb_add_flush	= arm_smmu_tlb_inv_range_nosync,
>   	.tlb_sync	= arm_smmu_tlb_sync,
> @@ -1380,6 +1639,11 @@ static bool arm_smmu_capable(enum iommu_cap cap)
>   		return false;
>   	}
>   }
> +#endif
> +/* Xen: Stub out DMA domain related functions */
> +#define iommu_get_dma_cookie(dom) 0
> +#define iommu_put_dma_cookie(dom) 0

Please stub them at the top of the file.

> +
>   
>   static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
>   {
> @@ -1410,6 +1674,7 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
>   	return &smmu_domain->domain;
>   }
>   
> +#if 0

Please explain the #if 0

>   static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
>   {
>   	int idx, size = 1 << span;
> @@ -1427,36 +1692,20 @@ static void arm_smmu_bitmap_free(unsigned long *map, int idx)
>   {
>   	clear_bit(idx, map);
>   }
> +#endif
>   
>   static void arm_smmu_domain_free(struct iommu_domain *domain)
>   {
>   	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> -	struct arm_smmu_device *smmu = smmu_domain->smmu;
> -
> -	iommu_put_dma_cookie(domain);
> -	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
> -
> -	/* Free the CD and ASID, if we allocated them */
> -	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
> -		struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
> -
> -		if (cfg->cdptr) {
> -			dmam_free_coherent(smmu_domain->smmu->dev,
> -					   CTXDESC_CD_DWORDS << 3,
> -					   cfg->cdptr,
> -					   cfg->cdptr_dma);
> -
> -			arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid);
> -		}
> -	} else {
> -		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
> -		if (cfg->vmid)
> -			arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
> -	}
> +	/*
> +	 * Xen: Remove the free functions that are not used and code related
> +	 * to S1 translation. We just need to free the domain here.
> +	 */

Please use #if 0 rather than removing the code + comment on top. But I 
am not sure why you drop the S2 free code. Shouldn't we allocate VMID 
from the SMMU?

>   
>   	kfree(smmu_domain);
>   }
>   
> +#if 0
>   static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
>   				       struct io_pgtable_cfg *pgtbl_cfg)
>   {
> @@ -1488,33 +1737,30 @@ out_free_asid:
>   	arm_smmu_bitmap_free(smmu->asid_map, asid);
>   	return ret;
>   }
> +#endif
>   
> -static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
> -				       struct io_pgtable_cfg *pgtbl_cfg)
> +static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain)
>   {
> -	int vmid;
> -	struct arm_smmu_device *smmu = smmu_domain->smmu;
>   	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
>   
> -	vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
> -	if (vmid < 0)
> -		return vmid;
> +	/* Xen: Set the values as needed */
>   
> -	cfg->vmid	= (u16)vmid;
> -	cfg->vttbr	= pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
> -	cfg->vtcr	= pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
> +	cfg->vmid	= cfg->domain->arch.p2m.vmid;

See my comment above.

> +	cfg->vttbr	= page_to_maddr(cfg->domain->arch.p2m.root);
> +	cfg->vtcr	= READ_SYSREG32(VTCR_EL2);

This looks a bit suspicious. Looking at the specs, the bits does not 
seem to correspond here.

I will look at the rest either tomorrow or Monday.

Cheers,
Goel, Sameer Nov. 19, 2017, 7:45 a.m. UTC | #3
On 10/12/2017 10:36 AM, Julien Grall wrote:
> Hi Sameer,
> 
> Given this is all Arm specific. I am not sure why people like Andrew, Jan have been added.
> 
> Please use scripts/get_maintainers to find the list of maintainers per patches and avoid to CC all of them on each patches.
Ok.

> 
> On 21/09/17 01:37, Sameer Goel wrote:
>> This driver follows an approach similar to smmu driver. The intent here
>> is to reuse as much Linux code as possible.
>> - Glue code has been introduced to bridge the API calls.
>> - Called Linux functions from the Xen IOMMU function calls.
>> - Xen modifications are preceded by /*Xen: comment */
>>
>> Signed-off-by: Sameer Goel <sgoel@codeaurora.org>
>> ---
>>   xen/drivers/passthrough/arm/Makefile  |   1 +
>>   xen/drivers/passthrough/arm/smmu-v3.c | 853 +++++++++++++++++++++++++++++-----
> 
> This is based on an old SMMUv3 version and I have been told there are some changes may benefits Xen (such as increasing the timeout for sync) and some optimisations also exist on the ML and will be queued soon.
> 
> So maybe you want to re-sync at least to master.
> 
>>   2 files changed, 738 insertions(+), 116 deletions(-)
>>
>> diff --git a/xen/drivers/passthrough/arm/Makefile b/xen/drivers/passthrough/arm/Makefile
>> index f4cd26e..57a6da6 100644
>> --- a/xen/drivers/passthrough/arm/Makefile
>> +++ b/xen/drivers/passthrough/arm/Makefile
>> @@ -1,2 +1,3 @@
>>   obj-y += iommu.o
>>   obj-y += smmu.o
>> +obj-y += smmu-v3.o
> 
> Do we want SMMUv3 to be built on Arm32? Maybe we should introduce a new Kconfig to let the user select.
Agreed. I will define a CONFIG_ARM_SMMU_V3.

> 
>> diff --git a/xen/drivers/passthrough/arm/smmu-v3.c b/xen/drivers/passthrough/arm/smmu-v3.c
>> index 380969a..8f3b43d 100644
>> --- a/xen/drivers/passthrough/arm/smmu-v3.c
>> +++ b/xen/drivers/passthrough/arm/smmu-v3.c
>> @@ -18,28 +18,266 @@
>>    * Author: Will Deacon <will.deacon@arm.com>
>>    *
>>    * This driver is powered by bad coffee and bombay mix.
>> + *
>> + *
>> + * Based on Linux drivers/iommu/arm-smmu-v3.c
>> + * => commit bdf95923086fb359ccb44c815724c3ace1611c90
>> + *
>> + * Xen modifications:
>> + * Sameer Goel <sgoel@codeaurora.org>
>> + * Copyright (C) 2017, The Linux Foundation, All rights reserved.
>> + *
>>    */
>>   -#include <linux/acpi.h>
>> -#include <linux/acpi_iort.h>
>> -#include <linux/delay.h>
>> -#include <linux/dma-iommu.h>
>> -#include <linux/err.h>
>> -#include <linux/interrupt.h>
>> -#include <linux/iommu.h>
>> -#include <linux/iopoll.h>
>> -#include <linux/module.h>
>> -#include <linux/msi.h>
>> -#include <linux/of.h>
>> -#include <linux/of_address.h>
>> -#include <linux/of_iommu.h>
>> -#include <linux/of_platform.h>
>> -#include <linux/pci.h>
>> -#include <linux/platform_device.h>
>> -
>> -#include <linux/amba/bus.h>
>> -
>> -#include "io-pgtable.h"
>> +#include <xen/config.h>
> 
> This is not necessary.
> 
>> +#include <xen/delay.h>
>> +#include <xen/errno.h>
>> +#include <xen/err.h>
>> +#include <xen/irq.h>
>> +#include <xen/lib.h>
>> +#include <xen/list.h>
>> +#include <xen/mm.h>
>> +#include <xen/vmap.h>
>> +#include <xen/rbtree.h>
>> +#include <xen/sched.h>
>> +#include <xen/sizes.h>
>> +#include <asm/atomic.h>
>> +#include <asm/device.h>
>> +#include <asm/io.h>
>> +#include <asm/platform.h>
>> +#include <xen/acpi.h>
> 
> Please order the includes alphabetically with xen/* first then asm/*

Done.
> 
>> +
>> +typedef paddr_t phys_addr_t;
>> +typedef paddr_t dma_addr_t;
>> +
>> +/* Alias to Xen device tree helpers */
>> +#define device_node dt_device_node
>> +#define of_phandle_args dt_phandle_args
>> +#define of_device_id dt_device_match
>> +#define of_match_node dt_match_node
>> +#define of_property_read_u32(np, pname, out) (!dt_property_read_u32(np, pname, out))
>> +#define of_property_read_bool dt_property_read_bool
>> +#define of_parse_phandle_with_args dt_parse_phandle_with_args
>> +#define mutex spinlock_t
>> +#define mutex_init spin_lock_init
>> +#define mutex_lock spin_lock
>> +#define mutex_unlock spin_unlock
> 
> mutex and spinlock are not the same. The former is sleeping whilst the later is not.
> 
> Can you please explain why this is fine and possibly add that in a comment?
> 
Mutex is used to protect the access to smmu device internal data structure when setting up the s2 config and installing stes for a given device in Linux. The ste programming  operation can be competitively long but in the current testing, I did not see this blocking for too long. I will put in a comment. 

>> +
>> +/* Xen: Helpers to get device MMIO and IRQs */
>> +struct resource {
>> +    u64 addr;
>> +    u64 size;
>> +    unsigned int type;
>> +};
> 
> Likely we want a compat header for defining Linux helpers. This would avoid replicating it everywhere.
Agreed.

> 
>> +
>> +#define resource_size(res) ((res)->size)
>> +
>> +#define platform_device device
>> +
>> +#define IORESOURCE_MEM 0
>> +#define IORESOURCE_IRQ 1
>> +
>> +static struct resource *platform_get_resource(struct platform_device *pdev,
>> +                          unsigned int type,
>> +                          unsigned int num)
>> +{
>> +    /*
>> +     * The resource is only used between 2 calls of platform_get_resource.
>> +     * It's quite ugly but it's avoid to add too much code in the part
>> +     * imported from Linux
>> +     */
>> +    static struct resource res;
>> +    struct acpi_iort_node *iort_node;
>> +    struct acpi_iort_smmu_v3 *node_smmu_data;
>> +    int ret = 0;
>> +
>> +    res.type = type;
>> +
>> +    switch (type) {
>> +    case IORESOURCE_MEM:
>> +        if (pdev->type == DEV_ACPI) {
>> +            ret = 1;
>> +            iort_node = pdev->acpi_node;
>> +            node_smmu_data =
>> +                (struct acpi_iort_smmu_v3 *)iort_node->node_data;
>> +
>> +            if (node_smmu_data != NULL) {
>> +                res.addr = node_smmu_data->base_address;
>> +                res.size = SZ_128K;
>> +                ret = 0;
>> +            }
>> +        } else {
>> +            ret = dt_device_get_address(dev_to_dt(pdev), num,
>> +                            &res.addr, &res.size);
>> +        }
>> +
>> +        return ((ret) ? NULL : &res);
>> +
>> +    case IORESOURCE_IRQ:
>> +        ret = platform_get_irq(dev_to_dt(pdev), num);
> 
> No IRQ for ACPI?
For IRQs the code calls platform_get_irq_byname. So, the IORESOURCE_IRQ implementation is not needed at all. (DT or ACPI)
> 
>> +
>> +        if (ret < 0)
>> +            return NULL;
>> +
>> +        res.addr = ret;
>> +        res.size = 1;
>> +
>> +        return &res;
>> +
>> +    default:
>> +        return NULL;
>> +    }
>> +}
>> +
>> +static int platform_get_irq_byname(struct platform_device *pdev, const char *name)
>> +{
>> +    const struct dt_property *dtprop;
>> +    struct acpi_iort_node *iort_node;
>> +    struct acpi_iort_smmu_v3 *node_smmu_data;
>> +    int ret = 0;
>> +
>> +    if (pdev->type == DEV_ACPI) {
>> +        iort_node = pdev->acpi_node;
>> +        node_smmu_data = (struct acpi_iort_smmu_v3 *)iort_node->node_data;
>> +
>> +        if (node_smmu_data != NULL) {
>> +            if (!strcmp(name, "eventq"))
>> +                ret = node_smmu_data->event_gsiv;
>> +            else if (!strcmp(name, "priq"))
>> +                ret = node_smmu_data->pri_gsiv;
>> +            else if (!strcmp(name, "cmdq-sync"))
>> +                ret = node_smmu_data->sync_gsiv;
>> +            else if (!strcmp(name, "gerror"))
>> +                ret = node_smmu_data->gerr_gsiv;
>> +            else
>> +                ret = -EINVAL;
>> +        }
>> +    } else {
>> +        dtprop = dt_find_property(dev_to_dt(pdev), "interrupt-names", NULL);
>> +        if (!dtprop)
>> +            return -EINVAL;
>> +
>> +        if (!dtprop->value)
>> +            return -ENODATA;
>> +    }
>> +
>> +    return ret;
>> +}
>> +
>> +#define readx_poll_timeout(op, addr, val, cond, sleep_us, timeout_us) \
>> +({ \
>> +    s_time_t deadline = NOW() + MICROSECS(timeout_us); \
>> +    for (;;) { \
>> +        (val) = op(addr); \
>> +        if (cond) \
>> +            break; \
>> +        if (NOW() > deadline) { \
>> +            (val) = op(addr); \
>> +            break; \
>> +        } \
>> +        cpu_relax(); \
> 
> I don't think calling cpu_relax() is correct here.
I will remove cpu_relax() from here. It should not be needed.
> 
>> +        udelay(sleep_us); \
>> +    } \
>> +    (cond) ? 0 : -ETIMEDOUT; \
>> +})
>> +
>> +#define readl_relaxed_poll_timeout(addr, val, cond, delay_us, timeout_us) \
>> +    readx_poll_timeout(readl_relaxed, addr, val, cond, delay_us, timeout_us)
>> +
>> +/* Xen: Helpers for IRQ functions */
>> +#define request_irq(irq, func, flags, name, dev) request_irq(irq, flags, func, name, dev)
>> +#define free_irq release_irq
>> +
>> +enum irqreturn {
>> +    IRQ_NONE    = (0 << 0),
>> +    IRQ_HANDLED    = (1 << 0),
>> +};
>> +
>> +typedef enum irqreturn irqreturn_t;
>> +
>> +/* Device logger functions */
>> +#define dev_print(dev, lvl, fmt, ...)                        \
>> +     printk(lvl "smmu: " fmt, ## __VA_ARGS__)
>> +
>> +#define dev_dbg(dev, fmt, ...) dev_print(dev, XENLOG_DEBUG, fmt, ## __VA_ARGS__)
>> +#define dev_notice(dev, fmt, ...) dev_print(dev, XENLOG_INFO, fmt, ## __VA_ARGS__)
>> +#define dev_warn(dev, fmt, ...) dev_print(dev, XENLOG_WARNING, fmt, ## __VA_ARGS__)
>> +#define dev_err(dev, fmt, ...) dev_print(dev, XENLOG_ERR, fmt, ## __VA_ARGS__)
>> +#define dev_info(dev, fmt, ...) dev_print(dev, XENLOG_INFO, fmt, ## __VA_ARGS__)
>> +
>> +#define dev_err_ratelimited(dev, fmt, ...)                    \
>> +     dev_print(dev, XENLOG_ERR, fmt, ## __VA_ARGS__)
>> +
>> +#define dev_name(dev) dt_node_full_name(dev_to_dt(dev))
>> +
>> +/* Alias to Xen allocation helpers */
>> +#define kfree xfree
>> +#define kmalloc(size, flags)        _xmalloc(size, sizeof(void *))
>> +#define kzalloc(size, flags)        _xzalloc(size, sizeof(void *))
>> +#define devm_kzalloc(dev, size, flags)    _xzalloc(size, sizeof(void *))
>> +#define kmalloc_array(size, n, flags)    _xmalloc_array(size, sizeof(void *), n)
>> +
>> +/* Compatibility defines */
>> +#undef WARN_ON
>> +#define WARN_ON(cond) (!!cond)
> 
> Why do you redefine WARN_ON?Need it to be a scalar value. The Xen implementation is a do..while. Did not want a large impact hence the local define. I can try proposing a new common define.
> 
>> +#define WARN_ON_ONCE(cond) WARN_ON(cond)
>> Hmmm, can't we implement a common WARN_ON_ONCE?
Will not really be executed. Defining it to maintain compatibility. I think this file is the right place for this define. We can send it to a generic file if so needed.
> 
>> +
>> +static void __iomem *devm_ioremap_resource(struct device *dev,
>> +                       struct resource *res)
>> +{
>> +    void __iomem *ptr;
>> +
>> +    if (!res || res->type != IORESOURCE_MEM) {
>> +        dev_err(dev, "Invalid resource\n");
>> +        return ERR_PTR(-EINVAL);
>> +    }
>> +
>> +    ptr = ioremap_nocache(res->addr, res->size);
>> +    if (!ptr) {
>> +        dev_err(dev,
>> +            "ioremap failed (addr 0x%"PRIx64" size 0x%"PRIx64")\n",
>> +            res->addr, res->size);
>> +        return ERR_PTR(-ENOMEM);
>> +    }
>> +
>> +    return ptr;
>> +}
>> +
>> +/* Xen: Dummy iommu_domain */
>> +struct iommu_domain {
>> +    /* Runtime SMMU configuration for this iommu_domain */
>> +    struct arm_smmu_domain        *priv;
>> +    unsigned int            type;
>> +
>> +    atomic_t ref;
>> +    /* Used to link iommu_domain contexts for a same domain.
>> +     * There is at least one per-SMMU to used by the domain.
>> +     */
>> +    struct list_head        list;
>> +};
> 
> This is very similar to the SMMU version. Could we share some bits?
I will propose a common header for arm-smmu files.
> 
>> +/* Xen: Domain type definitions. Not really needed for Xen, defining to port
>> + * Linux code as-is
>> + */
>> +#define IOMMU_DOMAIN_UNMANAGED 0
>> +#define IOMMU_DOMAIN_DMA 1
>> +#define IOMMU_DOMAIN_IDENTITY 2
>> +
>> +/* Xen: Describes information required for a Xen domain */
>> +struct arm_smmu_xen_domain {
>> +    spinlock_t            lock;
>> +    /* List of iommu domains associated to this domain */
>> +    struct list_head        iommu_domains;
>> +};
> 
> Ditoo.
> 
>> +
>> +/*
>> + * Xen: Information about each device stored in dev->archdata.iommu
>> + *
>> + * The dev->archdata.iommu stores the iommu_domain (runtime configuration of
>> + * the SMMU).
>> + */
>> +struct arm_smmu_xen_device {
>> +    struct iommu_domain *domain;
>> +};
> 
> Ditto.
> 
>>     /* MMIO registers */
>>   #define ARM_SMMU_IDR0            0x0
>> @@ -412,10 +650,12 @@
>>   #define MSI_IOVA_BASE            0x8000000
>>   #define MSI_IOVA_LENGTH            0x100000
>>   +#if 0 /* Not applicable for Xen */
> 
> While the module_param_name() is not applicable in Xen, I don't see any reason to remove the variable.

Agreed.
> 
>>   static bool disable_bypass; >   module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
>>   MODULE_PARM_DESC(disable_bypass,
>>       "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
>> +#endif
>>     enum pri_resp {
>>       PRI_RESP_DENY,
>> @@ -423,6 +663,7 @@ enum pri_resp {
>>       PRI_RESP_SUCC,
>>   };
>>   +#if 0 /* Xen: No MSI support in this iteration */
>>   enum arm_smmu_msi_index {
>>       EVTQ_MSI_INDEX,
>>       GERROR_MSI_INDEX,
>> @@ -447,6 +688,7 @@ static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
>>           ARM_SMMU_PRIQ_IRQ_CFG2,
>>       },
>>   };
>> +#endif
>>     struct arm_smmu_cmdq_ent {
>>       /* Common fields */
>> @@ -551,6 +793,8 @@ struct arm_smmu_s2_cfg {
>>       u16                vmid;
>>       u64                vttbr;
>>       u64                vtcr;
>> +    /* Xen: Domain associated to this configuration */
>> +    struct domain            *domain;
>>   };
>>     struct arm_smmu_strtab_ent {
>> @@ -623,9 +867,20 @@ struct arm_smmu_device {
>>       struct arm_smmu_strtab_cfg    strtab_cfg;
>>         /* IOMMU core code handle */
>> -    struct iommu_device        iommu;
>> +    //struct iommu_device        iommu;
> 
> #if 0 but no // please.
Done.

> 
>> +
>> +    /* Xen: Need to keep a list of SMMU devices */
>> +    struct list_head                devices;
>>   };
>>   +/* Xen: Keep a list of devices associated with this driver */
>> +static DEFINE_SPINLOCK(arm_smmu_devices_lock);
>> +static LIST_HEAD(arm_smmu_devices);
>> +/* Xen: Helper for finding a device using fwnode */
>> +static
>> +struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode);
>> +
>> +
>>   /* SMMU private data for each master */
>>   struct arm_smmu_master_data {
>>       struct arm_smmu_device        *smmu;
>> @@ -642,7 +897,7 @@ enum arm_smmu_domain_stage {
>>     struct arm_smmu_domain {
>>       struct arm_smmu_device        *smmu;
>> -    struct mutex            init_mutex; /* Protects smmu pointer */
>> +    mutex            init_mutex; /* Protects smmu pointer */
>>         struct io_pgtable_ops        *pgtbl_ops;
>>       spinlock_t            pgtbl_lock;
>> @@ -737,15 +992,16 @@ static void queue_inc_prod(struct arm_smmu_queue *q)
>>    */
>>   static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe)
>>   {
>> -    ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
>> +    s_time_t deadline = NOW() + MICROSECS(ARM_SMMU_POLL_TIMEOUT_US);
> 
> Please introduce proper wrappers to avoid the modification of the code.
Done.

> 
>>         while (queue_sync_cons(q), (drain ? !queue_empty(q) : queue_full(q))) {
>> -        if (ktime_compare(ktime_get(), timeout) > 0)
>> +
>> +        if (NOW() > deadline)
> 
> Ditto.
Done.

> 
>>               return -ETIMEDOUT;
>>   -        if (wfe) {
>> +        if (wfe)
> 
> Please avoid to drop {
> 
>>               wfe();
>> -        } else {
> 
> Ditto.
Done.

> 
>> +        else {
>>               cpu_relax();
> 
> Hmmm I now see why you added cpu_relax() at the top. Well, on Xen cpu_relax is just a barrier. On Linux it is used to yield.
> 
> And that bit is worrying me. The Linux code will allow context switching to another tasks if the code is taking too much time.
> 
> Xen is not preemptible, so is it fine?
This is used when consuming the command queue and could be a potential performance issue if the queue is large. (This is never the case).
I am wondering if we should define a yeild in long run? 

> 
>>               udelay(1);
>>           }
>> @@ -931,7 +1187,7 @@ static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
>>           dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
>>       spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
>>   }
>> -
>> +#if 0
> 
> Please avoid dropping newline and explain why the #if 0.
Done.

> 
>>   /* Context descriptor manipulation functions */
>>   static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)
>>   {
>> @@ -974,7 +1230,7 @@ static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
>>         cfg->cdptr[3] = cpu_to_le64(cfg->cd.mair << CTXDESC_CD_3_MAIR_SHIFT);
>>   }
>> -
>> +#endif
> 
> Ditto for the newline.
Done.

> 
>>   /* Stream table manipulation functions */
>>   static void
>>   arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
>> @@ -1044,7 +1300,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
>>               ste_live = true;
>>               break;
>>           case STRTAB_STE_0_CFG_ABORT:
>> -            if (disable_bypass)
>> +            //No bypass override for Xen
> 
> Why no leaving the variable on top with a comment. This would avoid such change.
Done.

> 
>>                   break;
>>           default:
>>               BUG(); /* STE corruption */
>> @@ -1056,7 +1312,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
>>         /* Bypass/fault */
>>       if (!ste->assigned || !(ste->s1_cfg || ste->s2_cfg)) {
>> -        if (!ste->assigned && disable_bypass)
>> +        if (!ste->assigned)
> 
> Ditto.
Done.

> 
>>               val |= STRTAB_STE_0_CFG_ABORT;
>>           else
>>               val |= STRTAB_STE_0_CFG_BYPASS;
>> @@ -1135,16 +1391,20 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
>>       void *strtab;
>>       struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
>>       struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
>> +    u32 alignment = 0;
>>         if (desc->l2ptr)
>>           return 0;
>>   -    size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
>> +    size = 1 << (STRTAB_SPLIT + LOG_2(STRTAB_STE_DWORDS) + 3);
> 
> I would prefer if you introduce ilog2.
Done.

> 
>>       strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
>>         desc->span = STRTAB_SPLIT + 1;
>> -    desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
>> -                      GFP_KERNEL | __GFP_ZERO);
>> +
>> +    alignment = 1 << ((5 + (desc->span - 1)));
> 
> Do you mind explaining the 5? Also, does the shift will always be < 32?
Its from the "Level 1 Stream Table Descriptor" from SMMU spec. I can add the ref to the spec. Yes the span is hard coded in the driver and with any spec valid span values this cannot exceed 32.
I have added a comment tot he code
> 
>> +    desc->l2ptr = _xzalloc(size, alignment);
>> +    desc->l2ptr_dma = virt_to_maddr(desc->l2ptr);
> 
> _xzalloc can fail and virt_to_maddr will result to a panic.
> 
I will move the check.
>> +
>>       if (!desc->l2ptr) {
>>           dev_err(smmu->dev,
>>               "failed to allocate l2 stream table for SID %u\n",
>> @@ -1158,7 +1418,7 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
>>   }
>>     /* IRQ and event handlers */
>> -static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
>> +static void arm_smmu_evtq_thread(int irq, void *dev, struct cpu_user_regs *regs)
> 
> Could you please introduce a wrapper instead as it was done in smmu.c?
> 
Done.

>>   {
>>       int i;
>>       struct arm_smmu_device *smmu = dev;
>> @@ -1186,7 +1446,6 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
>>         /* Sync our overflow flag, as we believe we're up to speed */
>>       q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
>> -    return IRQ_HANDLED;
>>   }
>>     static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
>> @@ -1203,7 +1462,7 @@ static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
>>         dev_info(smmu->dev, "unexpected PRI request received:\n");
>>       dev_info(smmu->dev,
>> -         "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
>> +         "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016lx\n",
> 
> Hmmm why?
The variable is defined as ungined long long in Linux. ( uses generic int-ll64.h) In Xen the definition changes to unsigned long for ARM_64 which actually makes sense. 
> 
>>            sid, ssid, grpid, last ? "L" : "",
>>            evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
>>            evt[0] & PRIQ_0_PERM_READ ? "R" : "",
>> @@ -1227,7 +1486,7 @@ static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
>>       }
>>   }
>>   -static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
>> +static void arm_smmu_priq_thread(int irq, void *dev, struct cpu_user_regs *regs)
> 
> Ditto about the prototype.
Done.
> 
>>   {
>>       struct arm_smmu_device *smmu = dev;
>>       struct arm_smmu_queue *q = &smmu->priq.q;
>> @@ -1243,18 +1502,16 @@ static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
>>         /* Sync our overflow flag, as we believe we're up to speed */
>>       q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
>> -    return IRQ_HANDLED;
>>   }
>>   -static irqreturn_t arm_smmu_cmdq_sync_handler(int irq, void *dev)
>> +static void arm_smmu_cmdq_sync_handler(int irq, void *dev, struct cpu_user_regs *regs)
> 
> Ditto.
Done.

> 
>>   {
>>       /* We don't actually use CMD_SYNC interrupts for anything */
>> -    return IRQ_HANDLED;
>>   }
>>     static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
>>   -static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
>> +static void arm_smmu_gerror_handler(int irq, void *dev, struct cpu_user_regs *regs)
> 
> Ditto
Done.

> 
>>   {
>>       u32 gerror, gerrorn, active;
>>       struct arm_smmu_device *smmu = dev;
>> @@ -1264,7 +1521,7 @@ static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
>>         active = gerror ^ gerrorn;
>>       if (!(active & GERROR_ERR_MASK))
>> -        return IRQ_NONE; /* No errors pending */
>> +        return; /* No errors pending */
>>         dev_warn(smmu->dev,
>>            "unexpected global error reported (0x%08x), this could be serious\n",
>> @@ -1286,7 +1543,7 @@ static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
>>         if (active & GERROR_MSI_CMDQ_ABT_ERR) {
>>           dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
>> -        arm_smmu_cmdq_sync_handler(irq, smmu->dev);
>> +        arm_smmu_cmdq_sync_handler(irq, smmu->dev, NULL);
>>       }
>>         if (active & GERROR_PRIQ_ABT_ERR)
>> @@ -1299,7 +1556,6 @@ static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
>>           arm_smmu_cmdq_skip_err(smmu);
>>         writel(gerror, smmu->base + ARM_SMMU_GERRORN);
>> -    return IRQ_HANDLED;
>>   }
>>     /* IO_PGTABLE API */
>> @@ -1311,11 +1567,13 @@ static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
>>       arm_smmu_cmdq_issue_cmd(smmu, &cmd);
>>   }
>>   +#if 0 /*Xen: Unused function */
>>   static void arm_smmu_tlb_sync(void *cookie)
>>   {
>>       struct arm_smmu_domain *smmu_domain = cookie;
>>       __arm_smmu_tlb_sync(smmu_domain->smmu);
>>   }
>> +#endif
>>     static void arm_smmu_tlb_inv_context(void *cookie)
>>   {
>> @@ -1336,6 +1594,7 @@ static void arm_smmu_tlb_inv_context(void *cookie)
>>       __arm_smmu_tlb_sync(smmu);
>>   }
>>   +#if 0 /*Xen: Unused functionality */
>>   static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
>>                         size_t granule, bool leaf, void *cookie)
>>   {
>> @@ -1362,7 +1621,7 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
>>       } while (size -= granule);
>>   }
>>   -static const struct iommu_gather_ops arm_smmu_gather_ops = {
>> +static struct iommu_gather_ops arm_smmu_gather_ops = {
>>       .tlb_flush_all    = arm_smmu_tlb_inv_context,
>>       .tlb_add_flush    = arm_smmu_tlb_inv_range_nosync,
>>       .tlb_sync    = arm_smmu_tlb_sync,
>> @@ -1380,6 +1639,11 @@ static bool arm_smmu_capable(enum iommu_cap cap)
>>           return false;
>>       }
>>   }
>> +#endif
>> +/* Xen: Stub out DMA domain related functions */
>> +#define iommu_get_dma_cookie(dom) 0
>> +#define iommu_put_dma_cookie(dom) 0
> 
> Please stub them at the top of the file.
Done.

> 
>> +
>>     static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
>>   {
>> @@ -1410,6 +1674,7 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
>>       return &smmu_domain->domain;
>>   }
>>   +#if 0
> 
> Please explain the #if 0
> 
Done.

>>   static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
>>   {
>>       int idx, size = 1 << span;
>> @@ -1427,36 +1692,20 @@ static void arm_smmu_bitmap_free(unsigned long *map, int idx)
>>   {
>>       clear_bit(idx, map);
>>   }
>> +#endif
>>     static void arm_smmu_domain_free(struct iommu_domain *domain)
>>   {
>>       struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
>> -    struct arm_smmu_device *smmu = smmu_domain->smmu;
>> -
>> -    iommu_put_dma_cookie(domain);
>> -    free_io_pgtable_ops(smmu_domain->pgtbl_ops);
>> -
>> -    /* Free the CD and ASID, if we allocated them */
>> -    if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
>> -        struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
>> -
>> -        if (cfg->cdptr) {
>> -            dmam_free_coherent(smmu_domain->smmu->dev,
>> -                       CTXDESC_CD_DWORDS << 3,
>> -                       cfg->cdptr,
>> -                       cfg->cdptr_dma);
>> -
>> -            arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid);
>> -        }
>> -    } else {
>> -        struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
>> -        if (cfg->vmid)
>> -            arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
>> -    }
>> +    /*
>> +     * Xen: Remove the free functions that are not used and code related
>> +     * to S1 translation. We just need to free the domain here.
>> +     */
> 
> Please use #if 0 rather than removing the code + comment on top. But I am not sure why you drop the S2 free code. Shouldn't we allocate VMID from the SMMU?
I am picking up the vmid from the domain I am sharing the page tables with. domain->arch.p2m.vmid. This seemed valid. Please let me know if we want to generate a different vmid?


> 
>>         kfree(smmu_domain);
>>   }
>>   +#if 0
>>   static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
>>                          struct io_pgtable_cfg *pgtbl_cfg)
>>   {
>> @@ -1488,33 +1737,30 @@ out_free_asid:
>>       arm_smmu_bitmap_free(smmu->asid_map, asid);
>>       return ret;
>>   }
>> +#endif
>>   -static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
>> -                       struct io_pgtable_cfg *pgtbl_cfg)
>> +static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain)
>>   {
>> -    int vmid;
>> -    struct arm_smmu_device *smmu = smmu_domain->smmu;
>>       struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
>>   -    vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
>> -    if (vmid < 0)
>> -        return vmid;
>> +    /* Xen: Set the values as needed */
>>   -    cfg->vmid    = (u16)vmid;
>> -    cfg->vttbr    = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
>> -    cfg->vtcr    = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
>> +    cfg->vmid    = cfg->domain->arch.p2m.vmid;
> 
> See my comment above.
I am wondering why we are considering this value invalid? Since the page tables are shared we can use the vmid from the domain. Is the concern regarding the size?

Thanks,
Sameer
> 
>> +    cfg->vttbr    = page_to_maddr(cfg->domain->arch.p2m.root);
>> +    cfg->vtcr    = READ_SYSREG32(VTCR_EL2);
> 
> This looks a bit suspicious. Looking at the specs, the bits does not seem to correspond here.
> 
> I will look at the rest either tomorrow or Monday.
> 
> Cheers,
>
Julien Grall Nov. 20, 2017, 2:25 p.m. UTC | #4
Hi Sameer,

On 19/11/17 07:45, Goel, Sameer wrote:
> On 10/12/2017 10:36 AM, Julien Grall wrote:
>>> +
>>> +typedef paddr_t phys_addr_t;
>>> +typedef paddr_t dma_addr_t;
>>> +
>>> +/* Alias to Xen device tree helpers */
>>> +#define device_node dt_device_node
>>> +#define of_phandle_args dt_phandle_args
>>> +#define of_device_id dt_device_match
>>> +#define of_match_node dt_match_node
>>> +#define of_property_read_u32(np, pname, out) (!dt_property_read_u32(np, pname, out))
>>> +#define of_property_read_bool dt_property_read_bool
>>> +#define of_parse_phandle_with_args dt_parse_phandle_with_args
>>> +#define mutex spinlock_t
>>> +#define mutex_init spin_lock_init
>>> +#define mutex_lock spin_lock
>>> +#define mutex_unlock spin_unlock
>>
>> mutex and spinlock are not the same. The former is sleeping whilst the later is not.
>>
>> Can you please explain why this is fine and possibly add that in a comment?
>>
> Mutex is used to protect the access to smmu device internal data structure when setting up the s2 config and installing stes for a given device in Linux. The ste programming  operation can be competitively long but in the current testing, I did not see this blocking for too long. I will put in a comment.

Well, I don't think that this is a justification. You tested on one 
platform and does not explain how you perform them.

If I understand correctly, that mutex is only used when assigning 
device. So it might be ok to switch to spinlock. But that's not because 
the operation is not too long, it just because it would be only perform 
by the toolstack (domctl) and will not be issued by guest.

> 
>>> +
>>> +/* Xen: Helpers to get device MMIO and IRQs */
>>> +struct resource {
>>> +    u64 addr;
>>> +    u64 size;
>>> +    unsigned int type;
>>> +};
>>
>> Likely we want a compat header for defining Linux helpers. This would avoid replicating it everywhere.
> Agreed.
> 
That should be
>>
>>> +
>>> +#define resource_size(res) ((res)->size)
>>> +
>>> +#define platform_device device
>>> +
>>> +#define IORESOURCE_MEM 0
>>> +#define IORESOURCE_IRQ 1
>>> +
>>> +static struct resource *platform_get_resource(struct platform_device *pdev,
>>> +                          unsigned int type,
>>> +                          unsigned int num)
>>> +{
>>> +    /*
>>> +     * The resource is only used between 2 calls of platform_get_resource.
>>> +     * It's quite ugly but it's avoid to add too much code in the part
>>> +     * imported from Linux
>>> +     */
>>> +    static struct resource res;
>>> +    struct acpi_iort_node *iort_node;
>>> +    struct acpi_iort_smmu_v3 *node_smmu_data;
>>> +    int ret = 0;
>>> +
>>> +    res.type = type;
>>> +
>>> +    switch (type) {
>>> +    case IORESOURCE_MEM:
>>> +        if (pdev->type == DEV_ACPI) {
>>> +            ret = 1;
>>> +            iort_node = pdev->acpi_node;
>>> +            node_smmu_data =
>>> +                (struct acpi_iort_smmu_v3 *)iort_node->node_data;
>>> +
>>> +            if (node_smmu_data != NULL) {
>>> +                res.addr = node_smmu_data->base_address;
>>> +                res.size = SZ_128K;
>>> +                ret = 0;
>>> +            }
>>> +        } else {
>>> +            ret = dt_device_get_address(dev_to_dt(pdev), num,
>>> +                            &res.addr, &res.size);
>>> +        }
>>> +
>>> +        return ((ret) ? NULL : &res);
>>> +
>>> +    case IORESOURCE_IRQ:
>>> +        ret = platform_get_irq(dev_to_dt(pdev), num);
>>
>> No IRQ for ACPI?
> For IRQs the code calls platform_get_irq_byname. So, the IORESOURCE_IRQ implementation is not needed at all. (DT or ACPI)

Please document it then.

[...]

>>
>>> +        udelay(sleep_us); \
>>> +    } \
>>> +    (cond) ? 0 : -ETIMEDOUT; \
>>> +})
>>> +
>>> +#define readl_relaxed_poll_timeout(addr, val, cond, delay_us, timeout_us) \
>>> +    readx_poll_timeout(readl_relaxed, addr, val, cond, delay_us, timeout_us)
>>> +
>>> +/* Xen: Helpers for IRQ functions */
>>> +#define request_irq(irq, func, flags, name, dev) request_irq(irq, flags, func, name, dev)
>>> +#define free_irq release_irq
>>> +
>>> +enum irqreturn {
>>> +    IRQ_NONE    = (0 << 0),
>>> +    IRQ_HANDLED    = (1 << 0),
>>> +};
>>> +
>>> +typedef enum irqreturn irqreturn_t;
>>> +
>>> +/* Device logger functions */
>>> +#define dev_print(dev, lvl, fmt, ...)                        \
>>> +     printk(lvl "smmu: " fmt, ## __VA_ARGS__)
>>> +
>>> +#define dev_dbg(dev, fmt, ...) dev_print(dev, XENLOG_DEBUG, fmt, ## __VA_ARGS__)
>>> +#define dev_notice(dev, fmt, ...) dev_print(dev, XENLOG_INFO, fmt, ## __VA_ARGS__)
>>> +#define dev_warn(dev, fmt, ...) dev_print(dev, XENLOG_WARNING, fmt, ## __VA_ARGS__)
>>> +#define dev_err(dev, fmt, ...) dev_print(dev, XENLOG_ERR, fmt, ## __VA_ARGS__)
>>> +#define dev_info(dev, fmt, ...) dev_print(dev, XENLOG_INFO, fmt, ## __VA_ARGS__)
>>> +
>>> +#define dev_err_ratelimited(dev, fmt, ...)                    \
>>> +     dev_print(dev, XENLOG_ERR, fmt, ## __VA_ARGS__)
>>> +
>>> +#define dev_name(dev) dt_node_full_name(dev_to_dt(dev))
>>> +
>>> +/* Alias to Xen allocation helpers */
>>> +#define kfree xfree
>>> +#define kmalloc(size, flags)        _xmalloc(size, sizeof(void *))
>>> +#define kzalloc(size, flags)        _xzalloc(size, sizeof(void *))
>>> +#define devm_kzalloc(dev, size, flags)    _xzalloc(size, sizeof(void *))
>>> +#define kmalloc_array(size, n, flags)    _xmalloc_array(size, sizeof(void *), n)
>>> +
>>> +/* Compatibility defines */
>>> +#undef WARN_ON
>>> +#define WARN_ON(cond) (!!cond)
>>
>> Why do you redefine WARN_ON?Need it to be a scalar value. The Xen implementation is a do..while. Did not want a large impact hence the local define. I can try proposing a new common define.

Well, I don't think it is acceptable to redefine WARN_ON. At least 
without trying to modify the common version.

>>
>>> +#define WARN_ON_ONCE(cond) WARN_ON(cond)
>>> Hmmm, can't we implement a common WARN_ON_ONCE?
> Will not really be executed. Defining it to maintain compatibility. I think this file is the right place for this define. We can send it to a generic file if so needed.

I can't see why we wouldn't want to have a WARN_ON_ONCE in the common 
code. If you disagree, please explain.

[...]

>>
>>> +        else {
>>>                cpu_relax();
>>
>> Hmmm I now see why you added cpu_relax() at the top. Well, on Xen cpu_relax is just a barrier. On Linux it is used to yield.
>>
>> And that bit is worrying me. The Linux code will allow context switching to another tasks if the code is taking too much time.
>>
>> Xen is not preemptible, so is it fine?
> This is used when consuming the command queue and could be a potential performance issue if the queue is large. (This is never the case).
> I am wondering if we should define a yeild in long run?

As I said before, Xen is not preemptible. In this particular case, there 
are spinlock taken by the callers (e.g any function assigning device). 
So yield would just make it worst.

[...]

>>
>>>        strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
>>>          desc->span = STRTAB_SPLIT + 1;
>>> -    desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
>>> -                      GFP_KERNEL | __GFP_ZERO);
>>> +
>>> +    alignment = 1 << ((5 + (desc->span - 1)));
>>
>> Do you mind explaining the 5? Also, does the shift will always be < 32?
> Its from the "Level 1 Stream Table Descriptor" from SMMU spec. I can add the ref to the spec. Yes the span is hard coded in the driver and with any spec valid span values this cannot exceed 32.
> I have added a comment tot he code
>>
>>> +    desc->l2ptr = _xzalloc(size, alignment);
>>> +    desc->l2ptr_dma = virt_to_maddr(desc->l2ptr);
>>
>> _xzalloc can fail and virt_to_maddr will result to a panic.
>>
> I will move the check.
>>> +
>>>        if (!desc->l2ptr) {
>>>            dev_err(smmu->dev,
>>>                "failed to allocate l2 stream table for SID %u\n",
>>> @@ -1158,7 +1418,7 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
>>>    }
>>>      /* IRQ and event handlers */
>>> -static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
>>> +static void arm_smmu_evtq_thread(int irq, void *dev, struct cpu_user_regs *regs)
>>
>> Could you please introduce a wrapper instead as it was done in smmu.c?
>>
> Done.
> 
>>>    {
>>>        int i;
>>>        struct arm_smmu_device *smmu = dev;
>>> @@ -1186,7 +1446,6 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
>>>          /* Sync our overflow flag, as we believe we're up to speed */
>>>        q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
>>> -    return IRQ_HANDLED;
>>>    }
>>>      static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
>>> @@ -1203,7 +1462,7 @@ static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
>>>          dev_info(smmu->dev, "unexpected PRI request received:\n");
>>>        dev_info(smmu->dev,
>>> -         "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
>>> +         "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016lx\n",
>>
>> Hmmm why?
> The variable is defined as ungined long long in Linux. ( uses generic int-ll64.h) In Xen the definition changes to unsigned long for ARM_64 which actually makes sense.

u64 is a 64-bit variable. And therefore the format should be PRIx64 to 
accomodate 64-bit and 32-bit.

[...]

>>
>>> +
>>>      static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
>>>    {
>>> @@ -1410,6 +1674,7 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
>>>        return &smmu_domain->domain;
>>>    }
>>>    +#if 0
>>
>> Please explain the #if 0
>>
> Done.
> 
>>>    static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
>>>    {
>>>        int idx, size = 1 << span;
>>> @@ -1427,36 +1692,20 @@ static void arm_smmu_bitmap_free(unsigned long *map, int idx)
>>>    {
>>>        clear_bit(idx, map);
>>>    }
>>> +#endif
>>>      static void arm_smmu_domain_free(struct iommu_domain *domain)
>>>    {
>>>        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
>>> -    struct arm_smmu_device *smmu = smmu_domain->smmu;
>>> -
>>> -    iommu_put_dma_cookie(domain);
>>> -    free_io_pgtable_ops(smmu_domain->pgtbl_ops);
>>> -
>>> -    /* Free the CD and ASID, if we allocated them */
>>> -    if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
>>> -        struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
>>> -
>>> -        if (cfg->cdptr) {
>>> -            dmam_free_coherent(smmu_domain->smmu->dev,
>>> -                       CTXDESC_CD_DWORDS << 3,
>>> -                       cfg->cdptr,
>>> -                       cfg->cdptr_dma);
>>> -
>>> -            arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid);
>>> -        }
>>> -    } else {
>>> -        struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
>>> -        if (cfg->vmid)
>>> -            arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
>>> -    }
>>> +    /*
>>> +     * Xen: Remove the free functions that are not used and code related
>>> +     * to S1 translation. We just need to free the domain here.
>>> +     */
>>
>> Please use #if 0 rather than removing the code + comment on top. But I am not sure why you drop the S2 free code. Shouldn't we allocate VMID from the SMMU?
> I am picking up the vmid from the domain I am sharing the page tables with. domain->arch.p2m.vmid. This seemed valid. Please let me know if we want to generate a different vmid?

The processor and the SMMU may support either 8 or 16bits VMID but I 
don't think any thing in the spec says that both processor and SMMU must 
support the same value.

So it would be possible to have a processor supporting 16-bits VMID and 
the SMMU only 8-bits VMID.

Adding a check at the SMMU initialization might be a solution. But as 
the code for allocating the VMID is already present, then I would prefer 
to we stick with different the VMID.

> 
> 
>>
>>>          kfree(smmu_domain);
>>>    }
>>>    +#if 0
>>>    static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
>>>                           struct io_pgtable_cfg *pgtbl_cfg)
>>>    {
>>> @@ -1488,33 +1737,30 @@ out_free_asid:
>>>        arm_smmu_bitmap_free(smmu->asid_map, asid);
>>>        return ret;
>>>    }
>>> +#endif
>>>    -static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
>>> -                       struct io_pgtable_cfg *pgtbl_cfg)
>>> +static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain)
>>>    {
>>> -    int vmid;
>>> -    struct arm_smmu_device *smmu = smmu_domain->smmu;
>>>        struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
>>>    -    vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
>>> -    if (vmid < 0)
>>> -        return vmid;
>>> +    /* Xen: Set the values as needed */
>>>    -    cfg->vmid    = (u16)vmid;
>>> -    cfg->vttbr    = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
>>> -    cfg->vtcr    = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
>>> +    cfg->vmid    = cfg->domain->arch.p2m.vmid;
>>
>> See my comment above.
> I am wondering why we are considering this value invalid? Since the page tables are shared we can use the vmid from the domain. Is the concern regarding the size?

See above.

Cheers,
Robin Murphy Nov. 20, 2017, 3:19 p.m. UTC | #5
On 20/11/17 14:25, Julien Grall wrote:
[...]
>>>
>>>> +        else {
>>>>                cpu_relax();
>>>
>>> Hmmm I now see why you added cpu_relax() at the top. Well, on Xen 
>>> cpu_relax is just a barrier. On Linux it is used to yield.
>>>
>>> And that bit is worrying me. The Linux code will allow context 
>>> switching to another tasks if the code is taking too much time.
>>>
>>> Xen is not preemptible, so is it fine?
>> This is used when consuming the command queue and could be a potential 
>> performance issue if the queue is large. (This is never the case).
>> I am wondering if we should define a yeild in long run?
> 
> As I said before, Xen is not preemptible. In this particular case, there 
> are spinlock taken by the callers (e.g any function assigning device). 
> So yield would just make it worst.

The arguments here don't make much sense - the "yield" instruction has 
nothing to do with software-level concepts of preemption. It is a hint 
to SMT *hardware* that this logical processor is doing nothing useful in 
the short term, so it might be a good idea to let other logical 
processor(s) have priority over shared execution resources if applicable.

Until SMT CPUs become commonly available, though, it's somewhat of a 
moot point and mostly just a future-proofing consideration.

Robin.
Julien Grall Nov. 20, 2017, 3:24 p.m. UTC | #6
Hi,

On 20/11/17 15:19, Robin Murphy wrote:
> On 20/11/17 14:25, Julien Grall wrote:
> [...]
>>>>
>>>>> +        else {
>>>>>                cpu_relax();
>>>>
>>>> Hmmm I now see why you added cpu_relax() at the top. Well, on Xen 
>>>> cpu_relax is just a barrier. On Linux it is used to yield.
>>>>
>>>> And that bit is worrying me. The Linux code will allow context 
>>>> switching to another tasks if the code is taking too much time.
>>>>
>>>> Xen is not preemptible, so is it fine?
>>> This is used when consuming the command queue and could be a 
>>> potential performance issue if the queue is large. (This is never the 
>>> case).
>>> I am wondering if we should define a yeild in long run?
>>
>> As I said before, Xen is not preemptible. In this particular case, 
>> there are spinlock taken by the callers (e.g any function assigning 
>> device). So yield would just make it worst.
> 
> The arguments here don't make much sense - the "yield" instruction has 
> nothing to do with software-level concepts of preemption. It is a hint 
> to SMT *hardware* that this logical processor is doing nothing useful in 
> the short term, so it might be a good idea to let other logical 
> processor(s) have priority over shared execution resources if applicable.

Oh, sorry I thought this could also be used by the software to switch 
between thread. Please disregard my comment then.

> 
> Until SMT CPUs become commonly available, though, it's somewhat of a 
> moot point and mostly just a future-proofing consideration.
> 
> Robin.

Cheers,
Goel, Sameer Nov. 22, 2017, 2:17 a.m. UTC | #7
On 11/20/2017 7:25 AM, Julien Grall wrote:
> Hi Sameer,
> 
> On 19/11/17 07:45, Goel, Sameer wrote:
>> On 10/12/2017 10:36 AM, Julien Grall wrote:
>>>> +
>>>> +typedef paddr_t phys_addr_t;
>>>> +typedef paddr_t dma_addr_t;
>>>> +
>>>> +/* Alias to Xen device tree helpers */
>>>> +#define device_node dt_device_node
>>>> +#define of_phandle_args dt_phandle_args
>>>> +#define of_device_id dt_device_match
>>>> +#define of_match_node dt_match_node
>>>> +#define of_property_read_u32(np, pname, out) (!dt_property_read_u32(np, pname, out))
>>>> +#define of_property_read_bool dt_property_read_bool
>>>> +#define of_parse_phandle_with_args dt_parse_phandle_with_args
>>>> +#define mutex spinlock_t
>>>> +#define mutex_init spin_lock_init
>>>> +#define mutex_lock spin_lock
>>>> +#define mutex_unlock spin_unlock
>>>
>>> mutex and spinlock are not the same. The former is sleeping whilst the later is not.
>>>
>>> Can you please explain why this is fine and possibly add that in a comment?
>>>
>> Mutex is used to protect the access to smmu device internal data structure when setting up the s2 config and installing stes for a given device in Linux. The ste programming  operation can be competitively long but in the current testing, I did not see this blocking for too long. I will put in a comment.
> 
> Well, I don't think that this is a justification. You tested on one platform and does not explain how you perform them.
> 
> If I understand correctly, that mutex is only used when assigning device. So it might be ok to switch to spinlock. But that's not because the operation is not too long, it just because it would be only perform by the toolstack (domctl) and will not be issued by guest.
Ok. I agree ans will update the comment. 
> 
>>
>>>> +
>>>> +/* Xen: Helpers to get device MMIO and IRQs */
>>>> +struct resource {
>>>> +    u64 addr;
>>>> +    u64 size;
>>>> +    unsigned int type;
>>>> +};
>>>
>>> Likely we want a compat header for defining Linux helpers. This would avoid replicating it everywhere.
>> Agreed.
>>
> That should be
>>>
>>>> +
>>>> +#define resource_size(res) ((res)->size)
>>>> +
>>>> +#define platform_device device
>>>> +
>>>> +#define IORESOURCE_MEM 0
>>>> +#define IORESOURCE_IRQ 1
>>>> +
>>>> +static struct resource *platform_get_resource(struct platform_device *pdev,
>>>> +                          unsigned int type,
>>>> +                          unsigned int num)
>>>> +{
>>>> +    /*
>>>> +     * The resource is only used between 2 calls of platform_get_resource.
>>>> +     * It's quite ugly but it's avoid to add too much code in the part
>>>> +     * imported from Linux
>>>> +     */
>>>> +    static struct resource res;
>>>> +    struct acpi_iort_node *iort_node;
>>>> +    struct acpi_iort_smmu_v3 *node_smmu_data;
>>>> +    int ret = 0;
>>>> +
>>>> +    res.type = type;
>>>> +
>>>> +    switch (type) {
>>>> +    case IORESOURCE_MEM:
>>>> +        if (pdev->type == DEV_ACPI) {
>>>> +            ret = 1;
>>>> +            iort_node = pdev->acpi_node;
>>>> +            node_smmu_data =
>>>> +                (struct acpi_iort_smmu_v3 *)iort_node->node_data;
>>>> +
>>>> +            if (node_smmu_data != NULL) {
>>>> +                res.addr = node_smmu_data->base_address;
>>>> +                res.size = SZ_128K;
>>>> +                ret = 0;
>>>> +            }
>>>> +        } else {
>>>> +            ret = dt_device_get_address(dev_to_dt(pdev), num,
>>>> +                            &res.addr, &res.size);
>>>> +        }
>>>> +
>>>> +        return ((ret) ? NULL : &res);
>>>> +
>>>> +    case IORESOURCE_IRQ:
>>>> +        ret = platform_get_irq(dev_to_dt(pdev), num);
>>>
>>> No IRQ for ACPI?
>> For IRQs the code calls platform_get_irq_byname. So, the IORESOURCE_IRQ implementation is not needed at all. (DT or ACPI)
> 
> Please document it then.
Ok.
> 
> [...]
> 
>>>
>>>> +        udelay(sleep_us); \
>>>> +    } \
>>>> +    (cond) ? 0 : -ETIMEDOUT; \
>>>> +})
>>>> +
>>>> +#define readl_relaxed_poll_timeout(addr, val, cond, delay_us, timeout_us) \
>>>> +    readx_poll_timeout(readl_relaxed, addr, val, cond, delay_us, timeout_us)
>>>> +
>>>> +/* Xen: Helpers for IRQ functions */
>>>> +#define request_irq(irq, func, flags, name, dev) request_irq(irq, flags, func, name, dev)
>>>> +#define free_irq release_irq
>>>> +
>>>> +enum irqreturn {
>>>> +    IRQ_NONE    = (0 << 0),
>>>> +    IRQ_HANDLED    = (1 << 0),
>>>> +};
>>>> +
>>>> +typedef enum irqreturn irqreturn_t;
>>>> +
>>>> +/* Device logger functions */
>>>> +#define dev_print(dev, lvl, fmt, ...)                        \
>>>> +     printk(lvl "smmu: " fmt, ## __VA_ARGS__)
>>>> +
>>>> +#define dev_dbg(dev, fmt, ...) dev_print(dev, XENLOG_DEBUG, fmt, ## __VA_ARGS__)
>>>> +#define dev_notice(dev, fmt, ...) dev_print(dev, XENLOG_INFO, fmt, ## __VA_ARGS__)
>>>> +#define dev_warn(dev, fmt, ...) dev_print(dev, XENLOG_WARNING, fmt, ## __VA_ARGS__)
>>>> +#define dev_err(dev, fmt, ...) dev_print(dev, XENLOG_ERR, fmt, ## __VA_ARGS__)
>>>> +#define dev_info(dev, fmt, ...) dev_print(dev, XENLOG_INFO, fmt, ## __VA_ARGS__)
>>>> +
>>>> +#define dev_err_ratelimited(dev, fmt, ...)                    \
>>>> +     dev_print(dev, XENLOG_ERR, fmt, ## __VA_ARGS__)
>>>> +
>>>> +#define dev_name(dev) dt_node_full_name(dev_to_dt(dev))
>>>> +
>>>> +/* Alias to Xen allocation helpers */
>>>> +#define kfree xfree
>>>> +#define kmalloc(size, flags)        _xmalloc(size, sizeof(void *))
>>>> +#define kzalloc(size, flags)        _xzalloc(size, sizeof(void *))
>>>> +#define devm_kzalloc(dev, size, flags)    _xzalloc(size, sizeof(void *))
>>>> +#define kmalloc_array(size, n, flags)    _xmalloc_array(size, sizeof(void *), n)
>>>> +
>>>> +/* Compatibility defines */
>>>> +#undef WARN_ON
>>>> +#define WARN_ON(cond) (!!cond)
>>>
>>> Why do you redefine WARN_ON?Need it to be a scalar value. The Xen implementation is a do..while. Did not want a large impact hence the local define. I can try proposing a new common define.
> 
> Well, I don't think it is acceptable to redefine WARN_ON. At least without trying to modify the common version.

I will put this in the common folder.
> 
>>>
>>>> +#define WARN_ON_ONCE(cond) WARN_ON(cond)
>>>> Hmmm, can't we implement a common WARN_ON_ONCE?
>> Will not really be executed. Defining it to maintain compatibility. I think this file is the right place for this define. We can send it to a generic file if so needed.
> 
> I can't see why we wouldn't want to have a WARN_ON_ONCE in the common code. If you disagree, please explain.
> 
> [...]
> 
>>>
>>>> +        else {
>>>>                cpu_relax();
>>>
>>> Hmmm I now see why you added cpu_relax() at the top. Well, on Xen cpu_relax is just a barrier. On Linux it is used to yield.
>>>
>>> And that bit is worrying me. The Linux code will allow context switching to another tasks if the code is taking too much time.
>>>
>>> Xen is not preemptible, so is it fine?
>> This is used when consuming the command queue and could be a potential performance issue if the queue is large. (This is never the case).
>> I am wondering if we should define a yeild in long run?
> 
> As I said before, Xen is not preemptible. In this particular case, there are spinlock taken by the callers (e.g any function assigning device). So yield would just make it worst.
> 
> [...]
I can keep the memory barrier as is. This should not impact much. This should not be a concern.
> 
>>>
>>>>        strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
>>>>          desc->span = STRTAB_SPLIT + 1;
>>>> -    desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
>>>> -                      GFP_KERNEL | __GFP_ZERO);
>>>> +
>>>> +    alignment = 1 << ((5 + (desc->span - 1)));
>>>
>>> Do you mind explaining the 5? Also, does the shift will always be < 32?
>> Its from the "Level 1 Stream Table Descriptor" from SMMU spec. I can add the ref to the spec. Yes the span is hard coded in the driver and with any spec valid span values this cannot exceed 32.
>> I have added a comment tot he code
>>>
>>>> +    desc->l2ptr = _xzalloc(size, alignment);
>>>> +    desc->l2ptr_dma = virt_to_maddr(desc->l2ptr);
>>>
>>> _xzalloc can fail and virt_to_maddr will result to a panic.
>>>
>> I will move the check.
>>>> +
>>>>        if (!desc->l2ptr) {
>>>>            dev_err(smmu->dev,
>>>>                "failed to allocate l2 stream table for SID %u\n",
>>>> @@ -1158,7 +1418,7 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
>>>>    }
>>>>      /* IRQ and event handlers */
>>>> -static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
>>>> +static void arm_smmu_evtq_thread(int irq, void *dev, struct cpu_user_regs *regs)
>>>
>>> Could you please introduce a wrapper instead as it was done in smmu.c?
>>>
>> Done.
>>
>>>>    {
>>>>        int i;
>>>>        struct arm_smmu_device *smmu = dev;
>>>> @@ -1186,7 +1446,6 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
>>>>          /* Sync our overflow flag, as we believe we're up to speed */
>>>>        q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
>>>> -    return IRQ_HANDLED;
>>>>    }
>>>>      static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
>>>> @@ -1203,7 +1462,7 @@ static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
>>>>          dev_info(smmu->dev, "unexpected PRI request received:\n");
>>>>        dev_info(smmu->dev,
>>>> -         "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
>>>> +         "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016lx\n",
>>>
>>> Hmmm why?
>> The variable is defined as ungined long long in Linux. ( uses generic int-ll64.h) In Xen the definition changes to unsigned long for ARM_64 which actually makes sense.
> 
> u64 is a 64-bit variable. And therefore the format should be PRIx64 to accomodate 64-bit and 32-bit.
Sure. I can change the format specifier. 
> 
> [...]
> 
>>>
>>>> +
>>>>      static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
>>>>    {
>>>> @@ -1410,6 +1674,7 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
>>>>        return &smmu_domain->domain;
>>>>    }
>>>>    +#if 0
>>>
>>> Please explain the #if 0
>>>
>> Done.
>>
>>>>    static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
>>>>    {
>>>>        int idx, size = 1 << span;
>>>> @@ -1427,36 +1692,20 @@ static void arm_smmu_bitmap_free(unsigned long *map, int idx)
>>>>    {
>>>>        clear_bit(idx, map);
>>>>    }
>>>> +#endif
>>>>      static void arm_smmu_domain_free(struct iommu_domain *domain)
>>>>    {
>>>>        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
>>>> -    struct arm_smmu_device *smmu = smmu_domain->smmu;
>>>> -
>>>> -    iommu_put_dma_cookie(domain);
>>>> -    free_io_pgtable_ops(smmu_domain->pgtbl_ops);
>>>> -
>>>> -    /* Free the CD and ASID, if we allocated them */
>>>> -    if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
>>>> -        struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
>>>> -
>>>> -        if (cfg->cdptr) {
>>>> -            dmam_free_coherent(smmu_domain->smmu->dev,
>>>> -                       CTXDESC_CD_DWORDS << 3,
>>>> -                       cfg->cdptr,
>>>> -                       cfg->cdptr_dma);
>>>> -
>>>> -            arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid);
>>>> -        }
>>>> -    } else {
>>>> -        struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
>>>> -        if (cfg->vmid)
>>>> -            arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
>>>> -    }
>>>> +    /*
>>>> +     * Xen: Remove the free functions that are not used and code related
>>>> +     * to S1 translation. We just need to free the domain here.
>>>> +     */
>>>
>>> Please use #if 0 rather than removing the code + comment on top. But I am not sure why you drop the S2 free code. Shouldn't we allocate VMID from the SMMU?
>> I am picking up the vmid from the domain I am sharing the page tables with. domain->arch.p2m.vmid. This seemed valid. Please let me know if we want to generate a different vmid?
> 
> The processor and the SMMU may support either 8 or 16bits VMID but I don't think any thing in the spec says that both processor and SMMU must support the same value.
> 
> So it would be possible to have a processor supporting 16-bits VMID and the SMMU only 8-bits VMID.
> 
> Adding a check at the SMMU initialization might be a solution. But as the code for allocating the VMID is already present, then I would prefer to we stick with different the VMID.
Sure. For this iteration I will generate an SMMU specific VMID. Do you think it would be an optimization to reuse the CPU VMID if SMMU supports 16bit VMID?

Thanks,
Sameer
> 
>>
>>
>>>
>>>>          kfree(smmu_domain);
>>>>    }
>>>>    +#if 0
>>>>    static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
>>>>                           struct io_pgtable_cfg *pgtbl_cfg)
>>>>    {
>>>> @@ -1488,33 +1737,30 @@ out_free_asid:
>>>>        arm_smmu_bitmap_free(smmu->asid_map, asid);
>>>>        return ret;
>>>>    }
>>>> +#endif
>>>>    -static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
>>>> -                       struct io_pgtable_cfg *pgtbl_cfg)
>>>> +static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain)
>>>>    {
>>>> -    int vmid;
>>>> -    struct arm_smmu_device *smmu = smmu_domain->smmu;
>>>>        struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
>>>>    -    vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
>>>> -    if (vmid < 0)
>>>> -        return vmid;
>>>> +    /* Xen: Set the values as needed */
>>>>    -    cfg->vmid    = (u16)vmid;
>>>> -    cfg->vttbr    = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
>>>> -    cfg->vtcr    = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
>>>> +    cfg->vmid    = cfg->domain->arch.p2m.vmid;
>>>
>>> See my comment above.
>> I am wondering why we are considering this value invalid? Since the page tables are shared we can use the vmid from the domain. Is the concern regarding the size?
> 
> See above.
> 
> Cheers,
>
diff mbox

Patch

diff --git a/xen/drivers/passthrough/arm/Makefile b/xen/drivers/passthrough/arm/Makefile
index f4cd26e..57a6da6 100644
--- a/xen/drivers/passthrough/arm/Makefile
+++ b/xen/drivers/passthrough/arm/Makefile
@@ -1,2 +1,3 @@ 
 obj-y += iommu.o
 obj-y += smmu.o
+obj-y += smmu-v3.o
diff --git a/xen/drivers/passthrough/arm/smmu-v3.c b/xen/drivers/passthrough/arm/smmu-v3.c
index 380969a..8f3b43d 100644
--- a/xen/drivers/passthrough/arm/smmu-v3.c
+++ b/xen/drivers/passthrough/arm/smmu-v3.c
@@ -18,28 +18,266 @@ 
  * Author: Will Deacon <will.deacon@arm.com>
  *
  * This driver is powered by bad coffee and bombay mix.
+ *
+ *
+ * Based on Linux drivers/iommu/arm-smmu-v3.c
+ * => commit bdf95923086fb359ccb44c815724c3ace1611c90
+ *
+ * Xen modifications:
+ * Sameer Goel <sgoel@codeaurora.org>
+ * Copyright (C) 2017, The Linux Foundation, All rights reserved.
+ *
  */
 
-#include <linux/acpi.h>
-#include <linux/acpi_iort.h>
-#include <linux/delay.h>
-#include <linux/dma-iommu.h>
-#include <linux/err.h>
-#include <linux/interrupt.h>
-#include <linux/iommu.h>
-#include <linux/iopoll.h>
-#include <linux/module.h>
-#include <linux/msi.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_iommu.h>
-#include <linux/of_platform.h>
-#include <linux/pci.h>
-#include <linux/platform_device.h>
-
-#include <linux/amba/bus.h>
-
-#include "io-pgtable.h"
+#include <xen/config.h>
+#include <xen/delay.h>
+#include <xen/errno.h>
+#include <xen/err.h>
+#include <xen/irq.h>
+#include <xen/lib.h>
+#include <xen/list.h>
+#include <xen/mm.h>
+#include <xen/vmap.h>
+#include <xen/rbtree.h>
+#include <xen/sched.h>
+#include <xen/sizes.h>
+#include <asm/atomic.h>
+#include <asm/device.h>
+#include <asm/io.h>
+#include <asm/platform.h>
+#include <xen/acpi.h>
+
+typedef paddr_t phys_addr_t;
+typedef paddr_t dma_addr_t;
+
+/* Alias to Xen device tree helpers */
+#define device_node dt_device_node
+#define of_phandle_args dt_phandle_args
+#define of_device_id dt_device_match
+#define of_match_node dt_match_node
+#define of_property_read_u32(np, pname, out) (!dt_property_read_u32(np, pname, out))
+#define of_property_read_bool dt_property_read_bool
+#define of_parse_phandle_with_args dt_parse_phandle_with_args
+#define mutex spinlock_t
+#define mutex_init spin_lock_init
+#define mutex_lock spin_lock
+#define mutex_unlock spin_unlock
+
+/* Xen: Helpers to get device MMIO and IRQs */
+struct resource {
+	u64 addr;
+	u64 size;
+	unsigned int type;
+};
+
+#define resource_size(res) ((res)->size)
+
+#define platform_device device
+
+#define IORESOURCE_MEM 0
+#define IORESOURCE_IRQ 1
+
+static struct resource *platform_get_resource(struct platform_device *pdev,
+					      unsigned int type,
+					      unsigned int num)
+{
+	/*
+	 * The resource is only used between 2 calls of platform_get_resource.
+	 * It's quite ugly but it's avoid to add too much code in the part
+	 * imported from Linux
+	 */
+	static struct resource res;
+	struct acpi_iort_node *iort_node;
+	struct acpi_iort_smmu_v3 *node_smmu_data;
+	int ret = 0;
+
+	res.type = type;
+
+	switch (type) {
+	case IORESOURCE_MEM:
+		if (pdev->type == DEV_ACPI) {
+			ret = 1;
+			iort_node = pdev->acpi_node;
+			node_smmu_data =
+				(struct acpi_iort_smmu_v3 *)iort_node->node_data;
+
+			if (node_smmu_data != NULL) {
+				res.addr = node_smmu_data->base_address;
+				res.size = SZ_128K;
+				ret = 0;
+			}
+		} else {
+			ret = dt_device_get_address(dev_to_dt(pdev), num,
+						    &res.addr, &res.size);
+		}
+
+		return ((ret) ? NULL : &res);
+
+	case IORESOURCE_IRQ:
+		ret = platform_get_irq(dev_to_dt(pdev), num);
+
+		if (ret < 0)
+			return NULL;
+
+		res.addr = ret;
+		res.size = 1;
+
+		return &res;
+
+	default:
+		return NULL;
+	}
+}
+
+static int platform_get_irq_byname(struct platform_device *pdev, const char *name)
+{
+	const struct dt_property *dtprop;
+	struct acpi_iort_node *iort_node;
+	struct acpi_iort_smmu_v3 *node_smmu_data;
+	int ret = 0;
+
+	if (pdev->type == DEV_ACPI) {
+		iort_node = pdev->acpi_node;
+		node_smmu_data = (struct acpi_iort_smmu_v3 *)iort_node->node_data;
+
+		if (node_smmu_data != NULL) {
+			if (!strcmp(name, "eventq"))
+				ret = node_smmu_data->event_gsiv;
+			else if (!strcmp(name, "priq"))
+				ret = node_smmu_data->pri_gsiv;
+			else if (!strcmp(name, "cmdq-sync"))
+				ret = node_smmu_data->sync_gsiv;
+			else if (!strcmp(name, "gerror"))
+				ret = node_smmu_data->gerr_gsiv;
+			else
+				ret = -EINVAL;
+		}
+	} else {
+		dtprop = dt_find_property(dev_to_dt(pdev), "interrupt-names", NULL);
+		if (!dtprop)
+			return -EINVAL;
+
+		if (!dtprop->value)
+			return -ENODATA;
+	}
+
+	return ret;
+}
+
+#define readx_poll_timeout(op, addr, val, cond, sleep_us, timeout_us) \
+({ \
+	s_time_t deadline = NOW() + MICROSECS(timeout_us); \
+	for (;;) { \
+		(val) = op(addr); \
+		if (cond) \
+			break; \
+		if (NOW() > deadline) { \
+			(val) = op(addr); \
+			break; \
+		} \
+		cpu_relax(); \
+		udelay(sleep_us); \
+	} \
+	(cond) ? 0 : -ETIMEDOUT; \
+})
+
+#define readl_relaxed_poll_timeout(addr, val, cond, delay_us, timeout_us) \
+	readx_poll_timeout(readl_relaxed, addr, val, cond, delay_us, timeout_us)
+
+/* Xen: Helpers for IRQ functions */
+#define request_irq(irq, func, flags, name, dev) request_irq(irq, flags, func, name, dev)
+#define free_irq release_irq
+
+enum irqreturn {
+	IRQ_NONE	= (0 << 0),
+	IRQ_HANDLED	= (1 << 0),
+};
+
+typedef enum irqreturn irqreturn_t;
+
+/* Device logger functions */
+#define dev_print(dev, lvl, fmt, ...)						\
+	 printk(lvl "smmu: " fmt, ## __VA_ARGS__)
+
+#define dev_dbg(dev, fmt, ...) dev_print(dev, XENLOG_DEBUG, fmt, ## __VA_ARGS__)
+#define dev_notice(dev, fmt, ...) dev_print(dev, XENLOG_INFO, fmt, ## __VA_ARGS__)
+#define dev_warn(dev, fmt, ...) dev_print(dev, XENLOG_WARNING, fmt, ## __VA_ARGS__)
+#define dev_err(dev, fmt, ...) dev_print(dev, XENLOG_ERR, fmt, ## __VA_ARGS__)
+#define dev_info(dev, fmt, ...) dev_print(dev, XENLOG_INFO, fmt, ## __VA_ARGS__)
+
+#define dev_err_ratelimited(dev, fmt, ...)					\
+	 dev_print(dev, XENLOG_ERR, fmt, ## __VA_ARGS__)
+
+#define dev_name(dev) dt_node_full_name(dev_to_dt(dev))
+
+/* Alias to Xen allocation helpers */
+#define kfree xfree
+#define kmalloc(size, flags)		_xmalloc(size, sizeof(void *))
+#define kzalloc(size, flags)		_xzalloc(size, sizeof(void *))
+#define devm_kzalloc(dev, size, flags)	_xzalloc(size, sizeof(void *))
+#define kmalloc_array(size, n, flags)	_xmalloc_array(size, sizeof(void *), n)
+
+/* Compatibility defines */
+#undef WARN_ON
+#define WARN_ON(cond) (!!cond)
+#define WARN_ON_ONCE(cond) WARN_ON(cond)
+
+static void __iomem *devm_ioremap_resource(struct device *dev,
+					   struct resource *res)
+{
+	void __iomem *ptr;
+
+	if (!res || res->type != IORESOURCE_MEM) {
+		dev_err(dev, "Invalid resource\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	ptr = ioremap_nocache(res->addr, res->size);
+	if (!ptr) {
+		dev_err(dev,
+			"ioremap failed (addr 0x%"PRIx64" size 0x%"PRIx64")\n",
+			res->addr, res->size);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	return ptr;
+}
+
+/* Xen: Dummy iommu_domain */
+struct iommu_domain {
+	/* Runtime SMMU configuration for this iommu_domain */
+	struct arm_smmu_domain		*priv;
+	unsigned int			type;
+
+	atomic_t ref;
+	/* Used to link iommu_domain contexts for a same domain.
+	 * There is at least one per-SMMU to used by the domain.
+	 */
+	struct list_head		list;
+};
+/* Xen: Domain type definitions. Not really needed for Xen, defining to port
+ * Linux code as-is
+ */
+#define IOMMU_DOMAIN_UNMANAGED 0
+#define IOMMU_DOMAIN_DMA 1
+#define IOMMU_DOMAIN_IDENTITY 2
+
+/* Xen: Describes information required for a Xen domain */
+struct arm_smmu_xen_domain {
+	spinlock_t			lock;
+	/* List of iommu domains associated to this domain */
+	struct list_head		iommu_domains;
+};
+
+/*
+ * Xen: Information about each device stored in dev->archdata.iommu
+ *
+ * The dev->archdata.iommu stores the iommu_domain (runtime configuration of
+ * the SMMU).
+ */
+struct arm_smmu_xen_device {
+	struct iommu_domain *domain;
+};
 
 /* MMIO registers */
 #define ARM_SMMU_IDR0			0x0
@@ -412,10 +650,12 @@ 
 #define MSI_IOVA_BASE			0x8000000
 #define MSI_IOVA_LENGTH			0x100000
 
+#if 0 /* Not applicable for Xen */
 static bool disable_bypass;
 module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
 MODULE_PARM_DESC(disable_bypass,
 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
+#endif
 
 enum pri_resp {
 	PRI_RESP_DENY,
@@ -423,6 +663,7 @@  enum pri_resp {
 	PRI_RESP_SUCC,
 };
 
+#if 0 /* Xen: No MSI support in this iteration */
 enum arm_smmu_msi_index {
 	EVTQ_MSI_INDEX,
 	GERROR_MSI_INDEX,
@@ -447,6 +688,7 @@  static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
 		ARM_SMMU_PRIQ_IRQ_CFG2,
 	},
 };
+#endif
 
 struct arm_smmu_cmdq_ent {
 	/* Common fields */
@@ -551,6 +793,8 @@  struct arm_smmu_s2_cfg {
 	u16				vmid;
 	u64				vttbr;
 	u64				vtcr;
+	/* Xen: Domain associated to this configuration */
+	struct domain			*domain;
 };
 
 struct arm_smmu_strtab_ent {
@@ -623,9 +867,20 @@  struct arm_smmu_device {
 	struct arm_smmu_strtab_cfg	strtab_cfg;
 
 	/* IOMMU core code handle */
-	struct iommu_device		iommu;
+	//struct iommu_device		iommu;
+
+	/* Xen: Need to keep a list of SMMU devices */
+	struct list_head                devices;
 };
 
+/* Xen: Keep a list of devices associated with this driver */
+static DEFINE_SPINLOCK(arm_smmu_devices_lock);
+static LIST_HEAD(arm_smmu_devices);
+/* Xen: Helper for finding a device using fwnode */
+static
+struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode);
+
+
 /* SMMU private data for each master */
 struct arm_smmu_master_data {
 	struct arm_smmu_device		*smmu;
@@ -642,7 +897,7 @@  enum arm_smmu_domain_stage {
 
 struct arm_smmu_domain {
 	struct arm_smmu_device		*smmu;
-	struct mutex			init_mutex; /* Protects smmu pointer */
+	mutex			init_mutex; /* Protects smmu pointer */
 
 	struct io_pgtable_ops		*pgtbl_ops;
 	spinlock_t			pgtbl_lock;
@@ -737,15 +992,16 @@  static void queue_inc_prod(struct arm_smmu_queue *q)
  */
 static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe)
 {
-	ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
+	s_time_t deadline = NOW() + MICROSECS(ARM_SMMU_POLL_TIMEOUT_US);
 
 	while (queue_sync_cons(q), (drain ? !queue_empty(q) : queue_full(q))) {
-		if (ktime_compare(ktime_get(), timeout) > 0)
+
+		if (NOW() > deadline)
 			return -ETIMEDOUT;
 
-		if (wfe) {
+		if (wfe)
 			wfe();
-		} else {
+		else {
 			cpu_relax();
 			udelay(1);
 		}
@@ -931,7 +1187,7 @@  static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
 		dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
 	spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
 }
-
+#if 0
 /* Context descriptor manipulation functions */
 static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)
 {
@@ -974,7 +1230,7 @@  static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
 
 	cfg->cdptr[3] = cpu_to_le64(cfg->cd.mair << CTXDESC_CD_3_MAIR_SHIFT);
 }
-
+#endif
 /* Stream table manipulation functions */
 static void
 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
@@ -1044,7 +1300,7 @@  static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
 			ste_live = true;
 			break;
 		case STRTAB_STE_0_CFG_ABORT:
-			if (disable_bypass)
+			//No bypass override for Xen
 				break;
 		default:
 			BUG(); /* STE corruption */
@@ -1056,7 +1312,7 @@  static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
 
 	/* Bypass/fault */
 	if (!ste->assigned || !(ste->s1_cfg || ste->s2_cfg)) {
-		if (!ste->assigned && disable_bypass)
+		if (!ste->assigned)
 			val |= STRTAB_STE_0_CFG_ABORT;
 		else
 			val |= STRTAB_STE_0_CFG_BYPASS;
@@ -1135,16 +1391,20 @@  static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
 	void *strtab;
 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
 	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
+	u32 alignment = 0;
 
 	if (desc->l2ptr)
 		return 0;
 
-	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
+	size = 1 << (STRTAB_SPLIT + LOG_2(STRTAB_STE_DWORDS) + 3);
 	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
 
 	desc->span = STRTAB_SPLIT + 1;
-	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
-					  GFP_KERNEL | __GFP_ZERO);
+
+	alignment = 1 << ((5 + (desc->span - 1)));
+	desc->l2ptr = _xzalloc(size, alignment);
+	desc->l2ptr_dma = virt_to_maddr(desc->l2ptr);
+
 	if (!desc->l2ptr) {
 		dev_err(smmu->dev,
 			"failed to allocate l2 stream table for SID %u\n",
@@ -1158,7 +1418,7 @@  static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
 }
 
 /* IRQ and event handlers */
-static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
+static void arm_smmu_evtq_thread(int irq, void *dev, struct cpu_user_regs *regs)
 {
 	int i;
 	struct arm_smmu_device *smmu = dev;
@@ -1186,7 +1446,6 @@  static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
 
 	/* Sync our overflow flag, as we believe we're up to speed */
 	q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
-	return IRQ_HANDLED;
 }
 
 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
@@ -1203,7 +1462,7 @@  static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
 
 	dev_info(smmu->dev, "unexpected PRI request received:\n");
 	dev_info(smmu->dev,
-		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
+		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016lx\n",
 		 sid, ssid, grpid, last ? "L" : "",
 		 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
 		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
@@ -1227,7 +1486,7 @@  static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
 	}
 }
 
-static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
+static void arm_smmu_priq_thread(int irq, void *dev, struct cpu_user_regs *regs)
 {
 	struct arm_smmu_device *smmu = dev;
 	struct arm_smmu_queue *q = &smmu->priq.q;
@@ -1243,18 +1502,16 @@  static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
 
 	/* Sync our overflow flag, as we believe we're up to speed */
 	q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
-	return IRQ_HANDLED;
 }
 
-static irqreturn_t arm_smmu_cmdq_sync_handler(int irq, void *dev)
+static void arm_smmu_cmdq_sync_handler(int irq, void *dev, struct cpu_user_regs *regs)
 {
 	/* We don't actually use CMD_SYNC interrupts for anything */
-	return IRQ_HANDLED;
 }
 
 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
 
-static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
+static void arm_smmu_gerror_handler(int irq, void *dev, struct cpu_user_regs *regs)
 {
 	u32 gerror, gerrorn, active;
 	struct arm_smmu_device *smmu = dev;
@@ -1264,7 +1521,7 @@  static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
 
 	active = gerror ^ gerrorn;
 	if (!(active & GERROR_ERR_MASK))
-		return IRQ_NONE; /* No errors pending */
+		return; /* No errors pending */
 
 	dev_warn(smmu->dev,
 		 "unexpected global error reported (0x%08x), this could be serious\n",
@@ -1286,7 +1543,7 @@  static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
 
 	if (active & GERROR_MSI_CMDQ_ABT_ERR) {
 		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
-		arm_smmu_cmdq_sync_handler(irq, smmu->dev);
+		arm_smmu_cmdq_sync_handler(irq, smmu->dev, NULL);
 	}
 
 	if (active & GERROR_PRIQ_ABT_ERR)
@@ -1299,7 +1556,6 @@  static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
 		arm_smmu_cmdq_skip_err(smmu);
 
 	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
-	return IRQ_HANDLED;
 }
 
 /* IO_PGTABLE API */
@@ -1311,11 +1567,13 @@  static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
 }
 
+#if 0 /*Xen: Unused function */
 static void arm_smmu_tlb_sync(void *cookie)
 {
 	struct arm_smmu_domain *smmu_domain = cookie;
 	__arm_smmu_tlb_sync(smmu_domain->smmu);
 }
+#endif
 
 static void arm_smmu_tlb_inv_context(void *cookie)
 {
@@ -1336,6 +1594,7 @@  static void arm_smmu_tlb_inv_context(void *cookie)
 	__arm_smmu_tlb_sync(smmu);
 }
 
+#if 0 /*Xen: Unused functionality */
 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
 					  size_t granule, bool leaf, void *cookie)
 {
@@ -1362,7 +1621,7 @@  static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
 	} while (size -= granule);
 }
 
-static const struct iommu_gather_ops arm_smmu_gather_ops = {
+static struct iommu_gather_ops arm_smmu_gather_ops = {
 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
 	.tlb_add_flush	= arm_smmu_tlb_inv_range_nosync,
 	.tlb_sync	= arm_smmu_tlb_sync,
@@ -1380,6 +1639,11 @@  static bool arm_smmu_capable(enum iommu_cap cap)
 		return false;
 	}
 }
+#endif
+/* Xen: Stub out DMA domain related functions */
+#define iommu_get_dma_cookie(dom) 0
+#define iommu_put_dma_cookie(dom) 0
+
 
 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
 {
@@ -1410,6 +1674,7 @@  static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
 	return &smmu_domain->domain;
 }
 
+#if 0
 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
 {
 	int idx, size = 1 << span;
@@ -1427,36 +1692,20 @@  static void arm_smmu_bitmap_free(unsigned long *map, int idx)
 {
 	clear_bit(idx, map);
 }
+#endif
 
 static void arm_smmu_domain_free(struct iommu_domain *domain)
 {
 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
-	struct arm_smmu_device *smmu = smmu_domain->smmu;
-
-	iommu_put_dma_cookie(domain);
-	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
-
-	/* Free the CD and ASID, if we allocated them */
-	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
-		struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
-
-		if (cfg->cdptr) {
-			dmam_free_coherent(smmu_domain->smmu->dev,
-					   CTXDESC_CD_DWORDS << 3,
-					   cfg->cdptr,
-					   cfg->cdptr_dma);
-
-			arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid);
-		}
-	} else {
-		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
-		if (cfg->vmid)
-			arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
-	}
+	/*
+	 * Xen: Remove the free functions that are not used and code related
+	 * to S1 translation. We just need to free the domain here.
+	 */
 
 	kfree(smmu_domain);
 }
 
+#if 0
 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
 				       struct io_pgtable_cfg *pgtbl_cfg)
 {
@@ -1488,33 +1737,30 @@  out_free_asid:
 	arm_smmu_bitmap_free(smmu->asid_map, asid);
 	return ret;
 }
+#endif
 
-static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
-				       struct io_pgtable_cfg *pgtbl_cfg)
+static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain)
 {
-	int vmid;
-	struct arm_smmu_device *smmu = smmu_domain->smmu;
 	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
 
-	vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
-	if (vmid < 0)
-		return vmid;
+	/* Xen: Set the values as needed */
 
-	cfg->vmid	= (u16)vmid;
-	cfg->vttbr	= pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
-	cfg->vtcr	= pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
+	cfg->vmid	= cfg->domain->arch.p2m.vmid;
+	cfg->vttbr	= page_to_maddr(cfg->domain->arch.p2m.root);
+	cfg->vtcr	= READ_SYSREG32(VTCR_EL2);
 	return 0;
 }
 
 static int arm_smmu_domain_finalise(struct iommu_domain *domain)
 {
 	int ret;
+#if 0	/* Xen: pgtbl_cfg not needed. So modify the function as needed */
 	unsigned long ias, oas;
 	enum io_pgtable_fmt fmt;
 	struct io_pgtable_cfg pgtbl_cfg;
 	struct io_pgtable_ops *pgtbl_ops;
-	int (*finalise_stage_fn)(struct arm_smmu_domain *,
-				 struct io_pgtable_cfg *);
+	int (*finalise_stage_fn)(struct arm_smmu_domain *);
+#endif
 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
 	struct arm_smmu_device *smmu = smmu_domain->smmu;
 
@@ -1529,6 +1775,7 @@  static int arm_smmu_domain_finalise(struct iommu_domain *domain)
 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
 
+#if 0
 	switch (smmu_domain->stage) {
 	case ARM_SMMU_DOMAIN_S1:
 		ias = VA_BITS;
@@ -1563,10 +1810,11 @@  static int arm_smmu_domain_finalise(struct iommu_domain *domain)
 	domain->geometry.aperture_end = (1UL << ias) - 1;
 	domain->geometry.force_aperture = true;
 	smmu_domain->pgtbl_ops = pgtbl_ops;
-
 	ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
 	if (ret < 0)
 		free_io_pgtable_ops(pgtbl_ops);
+#endif
+	ret = arm_smmu_domain_finalise_s2(smmu_domain);
 
 	return ret;
 }
@@ -1660,7 +1908,8 @@  static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
 		ste->s1_cfg = &smmu_domain->s1_cfg;
 		ste->s2_cfg = NULL;
-		arm_smmu_write_ctx_desc(smmu, ste->s1_cfg);
+		/*Xen: S1 configuratio not needed */
+		//arm_smmu_write_ctx_desc(smmu, ste->s1_cfg);
 	} else {
 		ste->s1_cfg = NULL;
 		ste->s2_cfg = &smmu_domain->s2_cfg;
@@ -1672,6 +1921,7 @@  out_unlock:
 	return ret;
 }
 
+#if 0
 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
 			phys_addr_t paddr, size_t size, int prot)
 {
@@ -1737,11 +1987,13 @@  static int arm_smmu_match_node(struct device *dev, void *data)
 static
 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
 {
+	/* Xen; Function modified as needed for Xen.*/
 	struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
 						fwnode, arm_smmu_match_node);
 	put_device(dev);
 	return dev ? dev_get_drvdata(dev) : NULL;
 }
+#endif
 
 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
 {
@@ -1752,8 +2004,9 @@  static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
 
 	return sid < limit;
 }
-
+#if 0
 static struct iommu_ops arm_smmu_ops;
+#endif
 
 static int arm_smmu_add_device(struct device *dev)
 {
@@ -1761,9 +2014,9 @@  static int arm_smmu_add_device(struct device *dev)
 	struct arm_smmu_device *smmu;
 	struct arm_smmu_master_data *master;
 	struct iommu_fwspec *fwspec = dev->iommu_fwspec;
-	struct iommu_group *group;
 
-	if (!fwspec || fwspec->ops != &arm_smmu_ops)
+	/* Xen: fwspec->ops are not needed */
+	if (!fwspec)
 		return -ENODEV;
 	/*
 	 * We _can_ actually withstand dodgy bus code re-calling add_device()
@@ -1800,6 +2053,12 @@  static int arm_smmu_add_device(struct device *dev)
 		}
 	}
 
+#if 0
+/*
+ * Xen: Do not need an iommu group as the stream data is carried by the SMMU
+ * master device object
+ */
+
 	group = iommu_group_get_for_dev(dev);
 	if (!IS_ERR(group)) {
 		iommu_group_put(group);
@@ -1807,8 +2066,16 @@  static int arm_smmu_add_device(struct device *dev)
 	}
 
 	return PTR_ERR_OR_ZERO(group);
+#endif
+	return 0;
 }
 
+/*
+ * Xen: We can potentially support this function and destroy a device. This
+ * will be relevant for PCI hotplug. So, will be implemented as needed after
+ * passthrough support is available.
+ */
+#if 0
 static void arm_smmu_remove_device(struct device *dev)
 {
 	struct iommu_fwspec *fwspec = dev->iommu_fwspec;
@@ -1944,7 +2211,7 @@  static struct iommu_ops arm_smmu_ops = {
 	.put_resv_regions	= arm_smmu_put_resv_regions,
 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
 };
-
+#endif
 /* Probing and initialisation functions */
 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
 				   struct arm_smmu_queue *q,
@@ -1954,7 +2221,12 @@  static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
 {
 	size_t qsz = ((1 << q->max_n_shift) * dwords) << 3;
 
-	q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma, GFP_KERNEL);
+	/* The SMMU cache coherency property is always set. Since we are sharing the CPU translation tables
+	 * just make a regular allocation.
+	 */
+	q->base = _xzalloc(qsz, sizeof(void *));
+	q->base_dma = virt_to_maddr(q->base);
+
 	if (!q->base) {
 		dev_err(smmu->dev, "failed to allocate queue (0x%zx bytes)\n",
 			qsz);
@@ -2025,10 +2297,11 @@  static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
 	void *strtab;
 	u64 reg;
 	u32 size, l1size;
+	u32 alignment;
 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
 
 	/* Calculate the L1 size, capped to the SIDSIZE. */
-	size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
+	size = STRTAB_L1_SZ_SHIFT - (LOG_2(STRTAB_L1_DESC_DWORDS) + 3);
 	size = min(size, smmu->sid_bits - STRTAB_SPLIT);
 	cfg->num_l1_ents = 1 << size;
 
@@ -2039,8 +2312,13 @@  static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
 			 size, smmu->sid_bits);
 
 	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
-	strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
-				     GFP_KERNEL | __GFP_ZERO);
+
+	alignment = max_t(u32, cfg->num_l1_ents, 64);
+	//strtab = _xzalloc(l1size, sizeof(void *));
+	strtab = _xzalloc(l1size, l1size);
+	cfg->strtab_dma = virt_to_maddr(strtab);
+
+	//dsb(sy);
 	if (!strtab) {
 		dev_err(smmu->dev,
 			"failed to allocate l1 stream table (%u bytes)\n",
@@ -2068,8 +2346,9 @@  static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
 
 	size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
-	strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
-				     GFP_KERNEL | __GFP_ZERO);
+	strtab = _xzalloc(size, size);
+	cfg->strtab_dma = virt_to_maddr(strtab);
+
 	if (!strtab) {
 		dev_err(smmu->dev,
 			"failed to allocate linear stream table (%u bytes)\n",
@@ -2152,6 +2431,8 @@  static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
 					  1, ARM_SMMU_POLL_TIMEOUT_US);
 }
 
+/* Xen: There is no MSI support as yet */
+#if 0
 static void arm_smmu_free_msis(void *data)
 {
 	struct device *dev = data;
@@ -2217,7 +2498,7 @@  static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
 	/* Add callback to free MSIs on teardown */
 	devm_add_action(dev, arm_smmu_free_msis, dev);
 }
-
+#endif
 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
 {
 	int ret, irq;
@@ -2231,31 +2512,31 @@  static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
 		return ret;
 	}
 
-	arm_smmu_setup_msis(smmu);
+	//arm_smmu_setup_msis(smmu);
 
 	/* Request interrupt lines */
 	irq = smmu->evtq.q.irq;
 	if (irq) {
-		ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
-						arm_smmu_evtq_thread,
-						IRQF_ONESHOT,
-						"arm-smmu-v3-evtq", smmu);
+		irq_set_type(irq, IRQ_TYPE_EDGE_BOTH);
+		ret = request_irq(irq, arm_smmu_evtq_thread,
+						0, "arm-smmu-v3-evtq", smmu);
 		if (ret < 0)
 			dev_warn(smmu->dev, "failed to enable evtq irq\n");
 	}
 
 	irq = smmu->cmdq.q.irq;
 	if (irq) {
-		ret = devm_request_irq(smmu->dev, irq,
-				       arm_smmu_cmdq_sync_handler, 0,
-				       "arm-smmu-v3-cmdq-sync", smmu);
+		irq_set_type(irq, IRQ_TYPE_EDGE_BOTH);
+		ret = request_irq(irq, arm_smmu_cmdq_sync_handler,
+				0, "arm-smmu-v3-cmdq-sync", smmu);
 		if (ret < 0)
 			dev_warn(smmu->dev, "failed to enable cmdq-sync irq\n");
 	}
 
 	irq = smmu->gerr_irq;
 	if (irq) {
-		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
+		irq_set_type(irq, IRQ_TYPE_EDGE_BOTH);
+		ret = request_irq(irq, arm_smmu_gerror_handler,
 				       0, "arm-smmu-v3-gerror", smmu);
 		if (ret < 0)
 			dev_warn(smmu->dev, "failed to enable gerror irq\n");
@@ -2263,12 +2544,13 @@  static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
 
 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
 		irq = smmu->priq.q.irq;
+		irq_set_type(irq, IRQ_TYPE_EDGE_BOTH);
 		if (irq) {
-			ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
-							arm_smmu_priq_thread,
-							IRQF_ONESHOT,
-							"arm-smmu-v3-priq",
-							smmu);
+			ret = request_irq(irq,
+					  arm_smmu_priq_thread,
+					  0,
+					  "arm-smmu-v3-priq",
+					  smmu);
 			if (ret < 0)
 				dev_warn(smmu->dev,
 					 "failed to enable priq irq\n");
@@ -2400,7 +2682,7 @@  static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
 
 
 	/* Enable the SMMU interface, or ensure bypass */
-	if (!bypass || disable_bypass) {
+	if (!bypass) {
 		enables |= CR0_SMMUEN;
 	} else {
 		ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
@@ -2488,8 +2770,11 @@  static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
 		smmu->features |= ARM_SMMU_FEAT_STALLS;
 	}
 
+#if 0/* Xen: Do not enable Stage 1 translations */
+
 	if (reg & IDR0_S1P)
 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
+#endif
 
 	if (reg & IDR0_S2P)
 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
@@ -2562,10 +2847,12 @@  static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
 	if (reg & IDR5_GRAN4K)
 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
 
+#if 0 /* Xen: SMMU ops do not have a pgsize_bitmap member for Xen */
 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
 	else
 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
+#endif
 
 	/* Output address size */
 	switch (reg & IDR5_OAS_MASK << IDR5_OAS_SHIFT) {
@@ -2592,10 +2879,12 @@  static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
 		smmu->oas = 48;
 	}
 
+#if 0 /* Xen: There is no support for DMA mask */
 	/* Set the DMA mask for our table walker */
 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
 		dev_warn(smmu->dev,
 			 "failed to set DMA mask for table walker\n");
+#endif
 
 	smmu->ias = max(smmu->ias, smmu->oas);
 
@@ -2612,7 +2901,8 @@  static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
 	struct device *dev = smmu->dev;
 	struct acpi_iort_node *node;
 
-	node = *(struct acpi_iort_node **)dev_get_platdata(dev);
+	/* Xen: Modification to get iort_node */
+	node = (struct acpi_iort_node *)dev->acpi_node;
 
 	/* Retrieve SMMUv3 specific data */
 	iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
@@ -2633,7 +2923,7 @@  static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
 				    struct arm_smmu_device *smmu)
 {
-	struct device *dev = &pdev->dev;
+	struct device *dev = pdev;
 	u32 cells;
 	int ret = -EINVAL;
 
@@ -2646,8 +2936,8 @@  static int arm_smmu_device_dt_probe(struct platform_device *pdev,
 
 	parse_driver_options(smmu);
 
-	if (of_dma_is_coherent(dev->of_node))
-		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
+	/* Xen: Set the COHERNECY feature */
+	smmu->features |= ARM_SMMU_FEAT_COHERENCY;
 
 	return ret;
 }
@@ -2656,9 +2946,10 @@  static int arm_smmu_device_probe(struct platform_device *pdev)
 {
 	int irq, ret;
 	struct resource *res;
-	resource_size_t ioaddr;
+	/*Xen: Do not need to setup sysfs */
+	/* resource_size_t ioaddr;*/
 	struct arm_smmu_device *smmu;
-	struct device *dev = &pdev->dev;
+	struct device *dev = pdev;/* Xen: dev is ignored */
 	bool bypass;
 
 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
@@ -2674,7 +2965,7 @@  static int arm_smmu_device_probe(struct platform_device *pdev)
 		dev_err(dev, "MMIO region too small (%pr)\n", res);
 		return -EINVAL;
 	}
-	ioaddr = res->start;
+	/*ioaddr = res->start;*/
 
 	smmu->base = devm_ioremap_resource(dev, res);
 	if (IS_ERR(smmu->base))
@@ -2719,13 +3010,15 @@  static int arm_smmu_device_probe(struct platform_device *pdev)
 		return ret;
 
 	/* Record our private device structure */
-	platform_set_drvdata(pdev, smmu);
+	/* platform_set_drvdata(pdev, smmu); */
 
 	/* Reset the device */
 	ret = arm_smmu_device_reset(smmu, bypass);
 	if (ret)
 		return ret;
 
+/* Xen: Not creating an IOMMU device list for Xen */
+#if 0
 	/* And we're up. Go go go! */
 	ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
 				     "smmu3.%pa", &ioaddr);
@@ -2757,9 +3050,18 @@  static int arm_smmu_device_probe(struct platform_device *pdev)
 		if (ret)
 			return ret;
 	}
+#endif
+	/*
+	 * Xen: Keep a list of all probed devices. This will be used to query
+	 * the smmu devices based on the fwnode.
+	 */
+	INIT_LIST_HEAD(&smmu->devices);
+	spin_lock(&arm_smmu_devices_lock);
+	list_add(&smmu->devices, &arm_smmu_devices);
+	spin_unlock(&arm_smmu_devices_lock);
 	return 0;
 }
-
+#if 0
 static int arm_smmu_device_remove(struct platform_device *pdev)
 {
 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
@@ -2767,6 +3069,9 @@  static int arm_smmu_device_remove(struct platform_device *pdev)
 	arm_smmu_device_disable(smmu);
 	return 0;
 }
+#endif
+#define MODULE_DEVICE_TABLE(type, name)
+#define of_device_id dt_device_match
 
 static struct of_device_id arm_smmu_of_match[] = {
 	{ .compatible = "arm,smmu-v3", },
@@ -2774,6 +3079,7 @@  static struct of_device_id arm_smmu_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
 
+#if 0
 static struct platform_driver arm_smmu_driver = {
 	.driver	= {
 		.name		= "arm-smmu-v3",
@@ -2789,3 +3095,318 @@  IOMMU_OF_DECLARE(arm_smmuv3, "arm,smmu-v3", NULL);
 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
 MODULE_LICENSE("GPL v2");
+
+#endif
+/***** Start of Xen specific code *****/
+
+static int __must_check arm_smmu_iotlb_flush_all(struct domain *d)
+{
+	struct arm_smmu_xen_domain *smmu_domain = dom_iommu(d)->arch.priv;
+	struct iommu_domain *cfg;
+
+	spin_lock(&smmu_domain->lock);
+	list_for_each_entry(cfg, &smmu_domain->iommu_domains, list) {
+		/*
+		 * Only invalidate the context when SMMU is present.
+		 * This is because the context initialization is delayed
+		 * until a master has been added.
+		 */
+		if (unlikely(!ACCESS_ONCE(cfg->priv->smmu)))
+			continue;
+		arm_smmu_tlb_inv_context(cfg->priv);
+	}
+	spin_unlock(&smmu_domain->lock);
+	return 0;
+}
+
+static int __must_check arm_smmu_iotlb_flush(struct domain *d,
+					     unsigned long gfn,
+					     unsigned int page_count)
+{
+	return arm_smmu_iotlb_flush_all(d);
+}
+
+static struct iommu_domain *arm_smmu_get_domain(struct domain *d,
+						struct device *dev)
+{
+	struct iommu_domain *domain;
+	struct arm_smmu_xen_domain *xen_domain;
+	struct arm_smmu_device *smmu;
+	struct arm_smmu_domain *smmu_domain;
+
+	xen_domain = dom_iommu(d)->arch.priv;
+
+	smmu = arm_smmu_get_by_fwnode(dev->iommu_fwspec->iommu_fwnode);
+	if (!smmu)
+		return NULL;
+
+	/*
+	 * Loop through the &xen_domain->contexts to locate a context
+	 * assigned to this SMMU
+	 */
+	list_for_each_entry(domain, &xen_domain->iommu_domains, list) {
+		smmu_domain = to_smmu_domain(domain);
+		if (smmu_domain->smmu == smmu)
+			return domain;
+	}
+
+	return NULL;
+}
+
+static void arm_smmu_destroy_iommu_domain(struct iommu_domain *domain)
+{
+	list_del(&domain->list);
+	xfree(domain);
+}
+
+static int arm_smmu_assign_dev(struct domain *d, u8 devfn,
+			       struct device *dev, u32 flag)
+{
+	int ret = 0;
+	struct iommu_domain *domain;
+	struct arm_smmu_xen_domain *xen_domain;
+	struct arm_smmu_domain *arm_smmu;
+
+	xen_domain = dom_iommu(d)->arch.priv;
+
+	if (!dev->archdata.iommu) {
+		dev->archdata.iommu = xzalloc(struct arm_smmu_xen_device);
+		if (!dev->archdata.iommu)
+			return -ENOMEM;
+	}
+
+	ret = arm_smmu_add_device(dev);
+	if (ret)
+		return ret;
+
+	spin_lock(&xen_domain->lock);
+
+	/*
+	 * Check to see if an iommu_domain already exists for this xen domain
+	 * under the same SMMU
+	 */
+	domain = arm_smmu_get_domain(d, dev);
+	if (!domain) {
+
+		domain = arm_smmu_domain_alloc(IOMMU_DOMAIN_DMA);
+		if (!domain) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		arm_smmu = to_smmu_domain(domain);
+		arm_smmu->s2_cfg.domain = d;
+
+		/* Chain the new context to the domain */
+		list_add(&domain->list, &xen_domain->iommu_domains);
+
+	}
+
+	ret = arm_smmu_attach_dev(domain, dev);
+	if (ret) {
+		if (domain->ref.counter == 0)
+			arm_smmu_destroy_iommu_domain(domain);
+	} else {
+		atomic_inc(&domain->ref);
+	}
+
+out:
+	spin_unlock(&xen_domain->lock);
+	return ret;
+}
+
+static int arm_smmu_deassign_dev(struct domain *d, struct device *dev)
+{
+	struct iommu_domain *domain = arm_smmu_get_domain(d, dev);
+	struct arm_smmu_xen_domain *xen_domain;
+	struct arm_smmu_domain *arm_smmu = to_smmu_domain(domain);
+
+	xen_domain = dom_iommu(d)->arch.priv;
+
+	if (!arm_smmu || arm_smmu->s2_cfg.domain != d) {
+		dev_err(dev, " not attached to domain %d\n", d->domain_id);
+		return -ESRCH;
+	}
+
+	spin_lock(&xen_domain->lock);
+
+	arm_smmu_detach_dev(dev);
+	atomic_dec(&domain->ref);
+
+	if (domain->ref.counter == 0)
+		arm_smmu_destroy_iommu_domain(domain);
+
+	spin_unlock(&xen_domain->lock);
+
+
+
+	return 0;
+}
+
+static int arm_smmu_reassign_dev(struct domain *s, struct domain *t,
+				 u8 devfn,  struct device *dev)
+{
+	int ret = 0;
+
+	/* Don't allow remapping on other domain than hwdom */
+	if (t && t != hardware_domain)
+		return -EPERM;
+
+	if (t == s)
+		return 0;
+
+	ret = arm_smmu_deassign_dev(s, dev);
+	if (ret)
+		return ret;
+
+	if (t) {
+		/* No flags are defined for ARM. */
+		ret = arm_smmu_assign_dev(t, devfn, dev, 0);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int arm_smmu_iommu_domain_init(struct domain *d)
+{
+	struct arm_smmu_xen_domain *xen_domain;
+
+	xen_domain = xzalloc(struct arm_smmu_xen_domain);
+	if (!xen_domain)
+		return -ENOMEM;
+
+	spin_lock_init(&xen_domain->lock);
+	INIT_LIST_HEAD(&xen_domain->iommu_domains);
+
+	dom_iommu(d)->arch.priv = xen_domain;
+
+	return 0;
+}
+
+static void __hwdom_init arm_smmu_iommu_hwdom_init(struct domain *d)
+{
+}
+
+static void arm_smmu_iommu_domain_teardown(struct domain *d)
+{
+	struct arm_smmu_xen_domain *xen_domain = dom_iommu(d)->arch.priv;
+
+	ASSERT(list_empty(&xen_domain->iommu_domains));
+	xfree(xen_domain);
+}
+
+static int __must_check arm_smmu_map_page(struct domain *d, unsigned long gfn,
+			unsigned long mfn, unsigned int flags)
+{
+	p2m_type_t t;
+
+	/*
+	 * Grant mappings can be used for DMA requests. The dev_bus_addr
+	 * returned by the hypercall is the MFN (not the IPA). For device
+	 * protected by an IOMMU, Xen needs to add a 1:1 mapping in the domain
+	 * p2m to allow DMA request to work.
+	 * This is only valid when the domain is directed mapped. Hence this
+	 * function should only be used by gnttab code with gfn == mfn.
+	 */
+	BUG_ON(!is_domain_direct_mapped(d));
+	BUG_ON(mfn != gfn);
+
+	/* We only support readable and writable flags */
+	if (!(flags & (IOMMUF_readable | IOMMUF_writable)))
+		return -EINVAL;
+
+	t = (flags & IOMMUF_writable) ? p2m_iommu_map_rw : p2m_iommu_map_ro;
+
+	/*
+	 * The function guest_physmap_add_entry replaces the current mapping
+	 * if there is already one...
+	 */
+	return guest_physmap_add_entry(d, _gfn(gfn), _mfn(mfn), 0, t);
+}
+
+static int __must_check arm_smmu_unmap_page(struct domain *d, unsigned long gfn)
+{
+	/*
+	 * This function should only be used by gnttab code when the domain
+	 * is direct mapped
+	 */
+	if (!is_domain_direct_mapped(d))
+		return -EINVAL;
+
+	return guest_physmap_remove_page(d, _gfn(gfn), _mfn(gfn), 0);
+}
+
+static const struct iommu_ops arm_smmu_iommu_ops = {
+	.init = arm_smmu_iommu_domain_init,
+	.hwdom_init = arm_smmu_iommu_hwdom_init,
+	.teardown = arm_smmu_iommu_domain_teardown,
+	.iotlb_flush = arm_smmu_iotlb_flush,
+	.iotlb_flush_all = arm_smmu_iotlb_flush_all,
+	.assign_device = arm_smmu_assign_dev,
+	.reassign_device = arm_smmu_reassign_dev,
+	.map_page = arm_smmu_map_page,
+	.unmap_page = arm_smmu_unmap_page,
+};
+
+static
+struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
+{
+	struct arm_smmu_device *smmu = NULL;
+
+	spin_lock(&arm_smmu_devices_lock);
+	list_for_each_entry(smmu, &arm_smmu_devices, devices) {
+		if (smmu->dev->fwnode == fwnode)
+			break;
+	}
+	spin_unlock(&arm_smmu_devices_lock);
+
+	return smmu;
+}
+
+static __init int arm_smmu_dt_init(struct dt_device_node *dev,
+				   const void *data)
+{
+	int rc;
+
+	/*
+	 * Even if the device can't be initialized, we don't want to
+	 * give the SMMU device to dom0.
+	 */
+	dt_device_set_used_by(dev, DOMID_XEN);
+
+	rc = arm_smmu_device_probe(dt_to_dev(dev));
+	if (rc)
+		return rc;
+
+	iommu_set_ops(&arm_smmu_iommu_ops);
+
+	return 0;
+}
+
+DT_DEVICE_START(smmuv3, "ARM SMMU V3", DEVICE_IOMMU)
+	.dt_match = arm_smmu_of_match,
+	.init = arm_smmu_dt_init,
+DT_DEVICE_END
+
+#ifdef CONFIG_ACPI
+/* Set up the IOMMU */
+static int __init arm_smmu_acpi_init(const void *data)
+{
+	int rc;
+	rc = arm_smmu_device_probe((struct device *)data);
+
+	if (rc)
+		return rc;
+
+	iommu_set_ops(&arm_smmu_iommu_ops);
+	return 0;
+}
+
+ACPI_DEVICE_START(asmmuv3, "ARM SMMU V3", DEVICE_IOMMU)
+	.class_type = ACPI_IORT_NODE_SMMU_V3,
+	.init = arm_smmu_acpi_init,
+ACPI_DEVICE_END
+
+#endif