diff mbox series

[v12,10/10] iommu/arm-smmu-v3: Add stall support for platform devices

Message ID 20210127154322.3959196-11-jean-philippe@linaro.org (mailing list archive)
State Not Applicable, archived
Headers show
Series iommu: I/O page faults for SMMUv3 | expand

Commit Message

Jean-Philippe Brucker Jan. 27, 2021, 3:43 p.m. UTC
The SMMU provides a Stall model for handling page faults in platform
devices. It is similar to PCIe PRI, but doesn't require devices to have
their own translation cache. Instead, faulting transactions are parked
and the OS is given a chance to fix the page tables and retry the
transaction.

Enable stall for devices that support it (opt-in by firmware). When an
event corresponds to a translation error, call the IOMMU fault handler.
If the fault is recoverable, it will call us back to terminate or
continue the stall.

To use stall device drivers need to enable IOMMU_DEV_FEAT_IOPF, which
initializes the fault queue for the device.

Tested-by: Zhangfei Gao <zhangfei.gao@linaro.org>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h   |  43 ++++
 .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c   |  59 +++++-
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c   | 189 +++++++++++++++++-
 3 files changed, 276 insertions(+), 15 deletions(-)

Comments

Eric Auger Jan. 31, 2021, 6:29 p.m. UTC | #1
Hi Jean,

Some rather minor comments§questions below that may not justify a respin.

On 1/27/21 4:43 PM, Jean-Philippe Brucker wrote:
> The SMMU provides a Stall model for handling page faults in platform
> devices. It is similar to PCIe PRI, but doesn't require devices to have
> their own translation cache. Instead, faulting transactions are parked
> and the OS is given a chance to fix the page tables and retry the
> transaction.
> 
> Enable stall for devices that support it (opt-in by firmware). When an
> event corresponds to a translation error, call the IOMMU fault handler.
> If the fault is recoverable, it will call us back to terminate or
> continue the stall.
> 
> To use stall device drivers need to enable IOMMU_DEV_FEAT_IOPF, which
> initializes the fault queue for the device.
> 
> Tested-by: Zhangfei Gao <zhangfei.gao@linaro.org>
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
> ---
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h   |  43 ++++
>  .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c   |  59 +++++-
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c   | 189 +++++++++++++++++-
>  3 files changed, 276 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> index 7b15b7580c6e..59af0bbd2f7b 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> @@ -354,6 +354,13 @@
>  #define CMDQ_PRI_1_GRPID		GENMASK_ULL(8, 0)
>  #define CMDQ_PRI_1_RESP			GENMASK_ULL(13, 12)
>  
> +#define CMDQ_RESUME_0_RESP_TERM		0UL
> +#define CMDQ_RESUME_0_RESP_RETRY	1UL
> +#define CMDQ_RESUME_0_RESP_ABORT	2UL
> +#define CMDQ_RESUME_0_RESP		GENMASK_ULL(13, 12)
> +#define CMDQ_RESUME_0_SID		GENMASK_ULL(63, 32)
> +#define CMDQ_RESUME_1_STAG		GENMASK_ULL(15, 0)
> +
>  #define CMDQ_SYNC_0_CS			GENMASK_ULL(13, 12)
>  #define CMDQ_SYNC_0_CS_NONE		0
>  #define CMDQ_SYNC_0_CS_IRQ		1
> @@ -370,6 +377,25 @@
>  
>  #define EVTQ_0_ID			GENMASK_ULL(7, 0)
>  
> +#define EVT_ID_TRANSLATION_FAULT	0x10
> +#define EVT_ID_ADDR_SIZE_FAULT		0x11
> +#define EVT_ID_ACCESS_FAULT		0x12
> +#define EVT_ID_PERMISSION_FAULT		0x13
> +
> +#define EVTQ_0_SSV			(1UL << 11)
> +#define EVTQ_0_SSID			GENMASK_ULL(31, 12)
> +#define EVTQ_0_SID			GENMASK_ULL(63, 32)
> +#define EVTQ_1_STAG			GENMASK_ULL(15, 0)
> +#define EVTQ_1_STALL			(1UL << 31)
> +#define EVTQ_1_PnU			(1UL << 33)
> +#define EVTQ_1_InD			(1UL << 34)
> +#define EVTQ_1_RnW			(1UL << 35)
> +#define EVTQ_1_S2			(1UL << 39)
> +#define EVTQ_1_CLASS			GENMASK_ULL(41, 40)
> +#define EVTQ_1_TT_READ			(1UL << 44)
> +#define EVTQ_2_ADDR			GENMASK_ULL(63, 0)
> +#define EVTQ_3_IPA			GENMASK_ULL(51, 12)
> +
>  /* PRI queue */
>  #define PRIQ_ENT_SZ_SHIFT		4
>  #define PRIQ_ENT_DWORDS			((1 << PRIQ_ENT_SZ_SHIFT) >> 3)
> @@ -464,6 +490,13 @@ struct arm_smmu_cmdq_ent {
>  			enum pri_resp		resp;
>  		} pri;
>  
> +		#define CMDQ_OP_RESUME		0x44
> +		struct {
> +			u32			sid;
> +			u16			stag;
> +			u8			resp;
> +		} resume;
> +
>  		#define CMDQ_OP_CMD_SYNC	0x46
>  		struct {
>  			u64			msiaddr;
> @@ -522,6 +555,7 @@ struct arm_smmu_cmdq_batch {
>  
>  struct arm_smmu_evtq {
>  	struct arm_smmu_queue		q;
> +	struct iopf_queue		*iopf;
>  	u32				max_stalls;
>  };
>  
> @@ -659,7 +693,9 @@ struct arm_smmu_master {
>  	struct arm_smmu_stream		*streams;
>  	unsigned int			num_streams;
>  	bool				ats_enabled;
> +	bool				stall_enabled;
>  	bool				sva_enabled;
> +	bool				iopf_enabled;
>  	struct list_head		bonds;
>  	unsigned int			ssid_bits;
>  };
> @@ -678,6 +714,7 @@ struct arm_smmu_domain {
>  
>  	struct io_pgtable_ops		*pgtbl_ops;
>  	bool				non_strict;
> +	bool				stall_enabled;
>  	atomic_t			nr_ats_masters;
>  
>  	enum arm_smmu_domain_stage	stage;
> @@ -719,6 +756,7 @@ bool arm_smmu_master_sva_supported(struct arm_smmu_master *master);
>  bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master);
>  int arm_smmu_master_enable_sva(struct arm_smmu_master *master);
>  int arm_smmu_master_disable_sva(struct arm_smmu_master *master);
> +bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master);
>  struct iommu_sva *arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm,
>  				    void *drvdata);
>  void arm_smmu_sva_unbind(struct iommu_sva *handle);
> @@ -750,6 +788,11 @@ static inline int arm_smmu_master_disable_sva(struct arm_smmu_master *master)
>  	return -ENODEV;
>  }
>  
> +static inline bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master)
> +{
> +	return false;
> +}
> +
>  static inline struct iommu_sva *
>  arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
>  {
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
> index bb251cab61f3..ee66d1f4cb81 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
> @@ -435,9 +435,13 @@ bool arm_smmu_sva_supported(struct arm_smmu_device *smmu)
>  	return true;
>  }
>  
> -static bool arm_smmu_iopf_supported(struct arm_smmu_master *master)
> +bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master)
>  {
> -	return false;
> +	/* We're not keeping track of SIDs in fault events */
shall we? [*] below
> +	if (master->num_streams != 1)
> +		return false;
> +
> +	return master->stall_enabled;
>  }
>  
>  bool arm_smmu_master_sva_supported(struct arm_smmu_master *master)
> @@ -445,8 +449,8 @@ bool arm_smmu_master_sva_supported(struct arm_smmu_master *master)
>  	if (!(master->smmu->features & ARM_SMMU_FEAT_SVA))
>  		return false;
>  
> -	/* SSID and IOPF support are mandatory for the moment */
> -	return master->ssid_bits && arm_smmu_iopf_supported(master);
> +	/* SSID support is mandatory for the moment */
> +	return master->ssid_bits;
>  }
>  
>  bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master)
> @@ -459,13 +463,55 @@ bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master)
>  	return enabled;
>  }
>  
> +static int arm_smmu_master_sva_enable_iopf(struct arm_smmu_master *master)
> +{
> +	int ret;
> +	struct device *dev = master->dev;
> +
> +	/*
> +	 * Drivers for devices supporting PRI or stall should enable IOPF first.
> +	 * Others have device-specific fault handlers and don't need IOPF.
> +	 */
> +	if (!arm_smmu_master_iopf_supported(master))
> +		return 0;
> +
> +	if (!master->iopf_enabled)
> +		return -EINVAL;
> +
> +	ret = iopf_queue_add_device(master->smmu->evtq.iopf, dev);
> +	if (ret)
> +		return ret;
> +
> +	ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev);
> +	if (ret) {
> +		iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
> +		return ret;
> +	}
> +	return 0;
> +}
> +
> +static void arm_smmu_master_sva_disable_iopf(struct arm_smmu_master *master)
> +{
> +	struct device *dev = master->dev;
> +
> +	if (!master->iopf_enabled)
> +		return;
> +
> +	iommu_unregister_device_fault_handler(dev);
> +	iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
> +}
> +
>  int arm_smmu_master_enable_sva(struct arm_smmu_master *master)
>  {
> +	int ret;
> +
>  	mutex_lock(&sva_lock);
> -	master->sva_enabled = true;
> +	ret = arm_smmu_master_sva_enable_iopf(master);
> +	if (!ret)
> +		master->sva_enabled = true;
>  	mutex_unlock(&sva_lock);
>  
> -	return 0;
> +	return ret;
>  }
>  
>  int arm_smmu_master_disable_sva(struct arm_smmu_master *master)
> @@ -476,6 +522,7 @@ int arm_smmu_master_disable_sva(struct arm_smmu_master *master)
>  		mutex_unlock(&sva_lock);
>  		return -EBUSY;
>  	}
> +	arm_smmu_master_sva_disable_iopf(master);
>  	master->sva_enabled = false;
>  	mutex_unlock(&sva_lock);
>  
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index 3afec6ed8075..76b2306ddff6 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -32,6 +32,7 @@
>  #include <linux/amba/bus.h>
>  
>  #include "arm-smmu-v3.h"
> +#include "../../iommu-sva-lib.h"
>  
>  static bool disable_bypass = true;
>  module_param(disable_bypass, bool, 0444);
> @@ -315,6 +316,11 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
>  		}
>  		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
>  		break;
> +	case CMDQ_OP_RESUME:
> +		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
> +		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
> +		cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
> +		break;
>  	case CMDQ_OP_CMD_SYNC:
>  		if (ent->sync.msiaddr) {
>  			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
> @@ -878,6 +884,44 @@ static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
>  	return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
>  }
>  
> +static int arm_smmu_page_response(struct device *dev,
> +				  struct iommu_fault_event *unused,
> +				  struct iommu_page_response *resp)
> +{
> +	struct arm_smmu_cmdq_ent cmd = {0};
> +	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
> +	int sid = master->streams[0].id;
[*]
> +
> +	if (master->stall_enabled) {
> +		cmd.opcode		= CMDQ_OP_RESUME;
> +		cmd.resume.sid		= sid;
> +		cmd.resume.stag		= resp->grpid;
> +		switch (resp->code) {
> +		case IOMMU_PAGE_RESP_INVALID:
add fallthrough?
> +		case IOMMU_PAGE_RESP_FAILURE:
> +			cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
> +			break;
> +		case IOMMU_PAGE_RESP_SUCCESS:
> +			cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
> +			break;
> +		default:
> +			return -EINVAL;
> +		}
> +	} else {
> +		return -ENODEV;
> +	}
> +
> +	arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
> +	/*
> +	 * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
> +	 * RESUME consumption guarantees that the stalled transaction will be
> +	 * terminated... at some point in the future. PRI_RESP is fire and
> +	 * forget.
> +	 */
> +
> +	return 0;
> +}
> +
>  /* Context descriptor manipulation functions */
>  void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
>  {
> @@ -988,7 +1032,6 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
>  	u64 val;
>  	bool cd_live;
>  	__le64 *cdptr;
> -	struct arm_smmu_device *smmu = smmu_domain->smmu;
>  
>  	if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
>  		return -E2BIG;
> @@ -1033,8 +1076,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
>  			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
>  			CTXDESC_CD_0_V;
>  
> -		/* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
> -		if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
> +		if (smmu_domain->stall_enabled)
>  			val |= CTXDESC_CD_0_S;
>  	}
>  
> @@ -1278,7 +1320,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
>  			 FIELD_PREP(STRTAB_STE_1_STRW, strw));
>  
>  		if (smmu->features & ARM_SMMU_FEAT_STALLS &&
> -		   !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
> +		    !master->stall_enabled)
>  			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
>  
>  		val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
> @@ -1355,7 +1397,6 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
>  	return 0;
>  }
>  
> -__maybe_unused
>  static struct arm_smmu_master *
>  arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
>  {
> @@ -1382,9 +1423,96 @@ arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
>  }
>  
>  /* IRQ and event handlers */
> +static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
> +{
> +	int ret;
> +	u32 reason;
> +	u32 perm = 0;
> +	struct arm_smmu_master *master;
> +	bool ssid_valid = evt[0] & EVTQ_0_SSV;
> +	u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
> +	struct iommu_fault_event fault_evt = { };
> +	struct iommu_fault *flt = &fault_evt.fault;
> +
> +	/* Stage-2 is always pinned at the moment */
> +	if (evt[1] & EVTQ_1_S2)
> +		return -EFAULT;
> +
> +	master = arm_smmu_find_master(smmu, sid);
> +	if (!master)
> +		return -EINVAL;
> +
> +	if (evt[1] & EVTQ_1_RnW)
> +		perm |= IOMMU_FAULT_PERM_READ;
> +	else
> +		perm |= IOMMU_FAULT_PERM_WRITE;
> +
> +	if (evt[1] & EVTQ_1_InD)
> +		perm |= IOMMU_FAULT_PERM_EXEC;
> +
> +	if (evt[1] & EVTQ_1_PnU)
> +		perm |= IOMMU_FAULT_PERM_PRIV;
> +
> +	switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
> +	case EVT_ID_TRANSLATION_FAULT:
> +	case EVT_ID_ADDR_SIZE_FAULT:
> +	case EVT_ID_ACCESS_FAULT:
> +		reason = IOMMU_FAULT_REASON_PTE_FETCH;
Doesn't it rather map to IOMMU_FAULT_REASON_ACCESS?
/* access flag check failed */"
> +		break;
> +	case EVT_ID_PERMISSION_FAULT:
> +		reason = IOMMU_FAULT_REASON_PERMISSION;
> +		break;
> +	default:
> +		return -EOPNOTSUPP;
> +	}
> +
> +	if (evt[1] & EVTQ_1_STALL) {
> +		flt->type = IOMMU_FAULT_PAGE_REQ;
> +		flt->prm = (struct iommu_fault_page_request) {
> +			.flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
> +			.grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
> +			.perm = perm,
> +			.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
> +		};
> +
> +		if (ssid_valid) {
> +			flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
> +			flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
> +		}
> +	} else {
> +		flt->type = IOMMU_FAULT_DMA_UNRECOV;
> +		flt->event = (struct iommu_fault_unrecoverable) {
> +			.reason = reason,
> +			.flags = IOMMU_FAULT_UNRECOV_ADDR_VALID |
> +				 IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID,
nit: shall IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID be set here? Supported
unrecoverable faults feature the IPA field which is UNKNOWN for S1
translations. fetch_addr rather was
corresponding to WALK_EABT.Fetch_addr to me.

> +			.perm = perm,
> +			.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
> +			.fetch_addr = FIELD_GET(EVTQ_3_IPA, evt[3]),
> +		};
> +
> +		if (ssid_valid) {
> +			flt->event.flags |= IOMMU_FAULT_UNRECOV_PASID_VALID;
> +			flt->event.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
> +		}
> +	}
> +
> +	ret = iommu_report_device_fault(master->dev, &fault_evt);
> +	if (ret && flt->type == IOMMU_FAULT_PAGE_REQ) {
> +		/* Nobody cared, abort the access */
> +		struct iommu_page_response resp = {
> +			.pasid		= flt->prm.pasid,
> +			.grpid		= flt->prm.grpid,
> +			.code		= IOMMU_PAGE_RESP_FAILURE,
> +		};
> +		arm_smmu_page_response(master->dev, &fault_evt, &resp);
> +	}
> +
> +	return ret;
> +}
> +
>  static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
>  {
> -	int i;
> +	int i, ret;
>  	struct arm_smmu_device *smmu = dev;
>  	struct arm_smmu_queue *q = &smmu->evtq.q;
>  	struct arm_smmu_ll_queue *llq = &q->llq;
> @@ -1394,6 +1522,10 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
>  		while (!queue_remove_raw(q, evt)) {
>  			u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
>  
> +			ret = arm_smmu_handle_evt(smmu, evt);
> +			if (!ret)
> +				continue;
> +
>  			dev_info(smmu->dev, "event 0x%02x received:\n", id);
>  			for (i = 0; i < ARRAY_SIZE(evt); ++i)
>  				dev_info(smmu->dev, "\t0x%016llx\n",
> @@ -1928,6 +2060,8 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
>  
>  	cfg->s1cdmax = master->ssid_bits;
>  
> +	smmu_domain->stall_enabled = master->stall_enabled;
> +
>  	ret = arm_smmu_alloc_cd_tables(smmu_domain);
>  	if (ret)
>  		goto out_free_asid;
> @@ -2275,6 +2409,12 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
>  			smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
>  		ret = -EINVAL;
>  		goto out_unlock;
> +	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
> +		   smmu_domain->stall_enabled != master->stall_enabled) {
> +		dev_err(dev, "cannot attach to stall-%s domain\n",
> +			smmu_domain->stall_enabled ? "enabled" : "disabled");
> +		ret = -EINVAL;
> +		goto out_unlock;
>  	}
>  
>  	master->domain = smmu_domain;
> @@ -2510,6 +2650,11 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev)
>  		master->ssid_bits = min_t(u8, master->ssid_bits,
>  					  CTXDESC_LINEAR_CDMAX);
>  
> +	if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
> +	     device_property_read_bool(dev, "dma-can-stall")) ||
> +	    smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
> +		master->stall_enabled = true;
> +
>  	return &smmu->iommu;
>  
>  err_free_master:
> @@ -2527,7 +2672,8 @@ static void arm_smmu_release_device(struct device *dev)
>  		return;
>  
>  	master = dev_iommu_priv_get(dev);
> -	WARN_ON(arm_smmu_master_sva_enabled(master));
> +	if (WARN_ON(arm_smmu_master_sva_enabled(master)))
> +		iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
>  	arm_smmu_detach_dev(master);
>  	arm_smmu_disable_pasid(master);
>  	arm_smmu_remove_master(master);
> @@ -2655,6 +2801,8 @@ static bool arm_smmu_dev_has_feature(struct device *dev,
>  		return false;
>  
>  	switch (feat) {
> +	case IOMMU_DEV_FEAT_IOPF:
> +		return arm_smmu_master_iopf_supported(master);
>  	case IOMMU_DEV_FEAT_SVA:
>  		return arm_smmu_master_sva_supported(master);
>  	default:
> @@ -2671,6 +2819,8 @@ static bool arm_smmu_dev_feature_enabled(struct device *dev,
>  		return false;
>  
>  	switch (feat) {
> +	case IOMMU_DEV_FEAT_IOPF:
> +		return master->iopf_enabled;
>  	case IOMMU_DEV_FEAT_SVA:
>  		return arm_smmu_master_sva_enabled(master);
>  	default:
> @@ -2681,6 +2831,8 @@ static bool arm_smmu_dev_feature_enabled(struct device *dev,
>  static int arm_smmu_dev_enable_feature(struct device *dev,
>  				       enum iommu_dev_features feat)
>  {
> +	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
> +
>  	if (!arm_smmu_dev_has_feature(dev, feat))
>  		return -ENODEV;
>  
> @@ -2688,8 +2840,11 @@ static int arm_smmu_dev_enable_feature(struct device *dev,
>  		return -EBUSY;
>  
>  	switch (feat) {
> +	case IOMMU_DEV_FEAT_IOPF:
> +		master->iopf_enabled = true;
> +		return 0;
>  	case IOMMU_DEV_FEAT_SVA:
> -		return arm_smmu_master_enable_sva(dev_iommu_priv_get(dev));
> +		return arm_smmu_master_enable_sva(master);
>  	default:
>  		return -EINVAL;
>  	}
> @@ -2698,12 +2853,19 @@ static int arm_smmu_dev_enable_feature(struct device *dev,
>  static int arm_smmu_dev_disable_feature(struct device *dev,
>  					enum iommu_dev_features feat)
>  {
> +	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
> +
>  	if (!arm_smmu_dev_feature_enabled(dev, feat))
>  		return -EINVAL;
>  
>  	switch (feat) {
> +	case IOMMU_DEV_FEAT_IOPF:
> +		if (master->sva_enabled)
> +			return -EBUSY;
> +		master->iopf_enabled = false;
> +		return 0;
>  	case IOMMU_DEV_FEAT_SVA:
> -		return arm_smmu_master_disable_sva(dev_iommu_priv_get(dev));
> +		return arm_smmu_master_disable_sva(master);
>  	default:
>  		return -EINVAL;
>  	}
> @@ -2734,6 +2896,7 @@ static struct iommu_ops arm_smmu_ops = {
>  	.sva_bind		= arm_smmu_sva_bind,
>  	.sva_unbind		= arm_smmu_sva_unbind,
>  	.sva_get_pasid		= arm_smmu_sva_get_pasid,
> +	.page_response		= arm_smmu_page_response,
>  	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
>  };
>  
> @@ -2831,6 +2994,13 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
>  	if (ret)
>  		return ret;
>  
> +	if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
> +	    (smmu->features & ARM_SMMU_FEAT_STALLS)) {
> +		smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
> +		if (!smmu->evtq.iopf)
> +			return -ENOMEM;
> +	}
> +
>  	/* priq */
>  	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
>  		return 0;
> @@ -3746,6 +3916,7 @@ static int arm_smmu_device_remove(struct platform_device *pdev)
>  	iommu_device_unregister(&smmu->iommu);
>  	iommu_device_sysfs_remove(&smmu->iommu);
>  	arm_smmu_device_disable(smmu);
> +	iopf_queue_free(smmu->evtq.iopf);
>  
>  	return 0;
>  }
> 
Thanks

Eric
Zhou Wang Feb. 1, 2021, 1:18 a.m. UTC | #2
On 2021/1/27 23:43, Jean-Philippe Brucker wrote:
> The SMMU provides a Stall model for handling page faults in platform
> devices. It is similar to PCIe PRI, but doesn't require devices to have
> their own translation cache. Instead, faulting transactions are parked
> and the OS is given a chance to fix the page tables and retry the
> transaction.
> 
> Enable stall for devices that support it (opt-in by firmware). When an
> event corresponds to a translation error, call the IOMMU fault handler.
> If the fault is recoverable, it will call us back to terminate or
> continue the stall.
> 
> To use stall device drivers need to enable IOMMU_DEV_FEAT_IOPF, which
> initializes the fault queue for the device.
> 
> Tested-by: Zhangfei Gao <zhangfei.gao@linaro.org>
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
> ---
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h   |  43 ++++
>  .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c   |  59 +++++-
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c   | 189 +++++++++++++++++-
>  3 files changed, 276 insertions(+), 15 deletions(-)
> 

[...]

> @@ -1033,8 +1076,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
>  			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
>  			CTXDESC_CD_0_V;
>  
> -		/* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
> -		if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
> +		if (smmu_domain->stall_enabled)

Could we add ssid checking here? like: if (smmu_domain->stall_enabled && ssid).
The reason is if not CD.S will also be set when ssid is 0, which is not needed.

Best,
Zhou

>  			val |= CTXDESC_CD_0_S;
>  	}
>  
> @@ -1278,7 +1320,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
>  			 FIELD_PREP(STRTAB_STE_1_STRW, strw));
>  
>  		if (smmu->features & ARM_SMMU_FEAT_STALLS &&
> -		   !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
> +		    !master->stall_enabled)
>  			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
>  
>  		val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |

[...]
Jean-Philippe Brucker Feb. 1, 2021, 11:12 a.m. UTC | #3
On Sun, Jan 31, 2021 at 07:29:09PM +0100, Auger Eric wrote:
> Hi Jean,
> 
> Some rather minor comments§questions below that may not justify a respin.
> 
> On 1/27/21 4:43 PM, Jean-Philippe Brucker wrote:
> > -static bool arm_smmu_iopf_supported(struct arm_smmu_master *master)
> > +bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master)
> >  {
> > -	return false;
> > +	/* We're not keeping track of SIDs in fault events */
> shall we? [*] below

That would require storing the incoming SID into the iommu_fault_event
struct, and retrieve it in arm_smmu_page_response(). Easy enough, but I
don't think it's needed for existing devices.

> > +	if (master->num_streams != 1)
> > +		return false;
[...]
> > +static int arm_smmu_page_response(struct device *dev,
> > +				  struct iommu_fault_event *unused,
> > +				  struct iommu_page_response *resp)
> > +{
> > +	struct arm_smmu_cmdq_ent cmd = {0};
> > +	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
> > +	int sid = master->streams[0].id;
> [*]
> > +
> > +	if (master->stall_enabled) {
> > +		cmd.opcode		= CMDQ_OP_RESUME;
> > +		cmd.resume.sid		= sid;
> > +		cmd.resume.stag		= resp->grpid;
> > +		switch (resp->code) {
> > +		case IOMMU_PAGE_RESP_INVALID:
> add fallthrough?

I think fallthrough is mainly useful to tell reader and compiler that a
break was omitted on purpose. When two cases are stuck together the intent
to merge the flow is clear enough in my opinion. GCC's
-Wimplicit-fallthrough doesn't warn in this case.

> > +		case IOMMU_PAGE_RESP_FAILURE:
> > +			cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
> > +			break;
[...]
> > +static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
> > +{
> > +	int ret;
> > +	u32 reason;
> > +	u32 perm = 0;
> > +	struct arm_smmu_master *master;
> > +	bool ssid_valid = evt[0] & EVTQ_0_SSV;
> > +	u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
> > +	struct iommu_fault_event fault_evt = { };
> > +	struct iommu_fault *flt = &fault_evt.fault;
> > +
> > +	/* Stage-2 is always pinned at the moment */
> > +	if (evt[1] & EVTQ_1_S2)
> > +		return -EFAULT;
> > +
> > +	master = arm_smmu_find_master(smmu, sid);
> > +	if (!master)
> > +		return -EINVAL;
> > +
> > +	if (evt[1] & EVTQ_1_RnW)
> > +		perm |= IOMMU_FAULT_PERM_READ;
> > +	else
> > +		perm |= IOMMU_FAULT_PERM_WRITE;
> > +
> > +	if (evt[1] & EVTQ_1_InD)
> > +		perm |= IOMMU_FAULT_PERM_EXEC;
> > +
> > +	if (evt[1] & EVTQ_1_PnU)
> > +		perm |= IOMMU_FAULT_PERM_PRIV;
> > +
> > +	switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
> > +	case EVT_ID_TRANSLATION_FAULT:
> > +	case EVT_ID_ADDR_SIZE_FAULT:
> > +	case EVT_ID_ACCESS_FAULT:
> > +		reason = IOMMU_FAULT_REASON_PTE_FETCH;
> Doesn't it rather map to IOMMU_FAULT_REASON_ACCESS?
> /* access flag check failed */"

Good point, I guess it didn't exist when I wrote this. And ADDR_SIZE_FAULT
corresponds to IOMMU_FAULT_REASON_OOR_ADDRESS now, right?

By the way the wording on those two fault reasons, "access flag" and
"stage", seems arch-specific - x86 names are "accessed flag" and "level".

> > +		break;
> > +	case EVT_ID_PERMISSION_FAULT:
> > +		reason = IOMMU_FAULT_REASON_PERMISSION;
> > +		break;
> > +	default:
> > +		return -EOPNOTSUPP;
> > +	}
> > +
> > +	if (evt[1] & EVTQ_1_STALL) {
> > +		flt->type = IOMMU_FAULT_PAGE_REQ;
> > +		flt->prm = (struct iommu_fault_page_request) {
> > +			.flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
> > +			.grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
> > +			.perm = perm,
> > +			.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
> > +		};
> > +
> > +		if (ssid_valid) {
> > +			flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
> > +			flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
> > +		}
> > +	} else {
> > +		flt->type = IOMMU_FAULT_DMA_UNRECOV;
> > +		flt->event = (struct iommu_fault_unrecoverable) {
> > +			.reason = reason,
> > +			.flags = IOMMU_FAULT_UNRECOV_ADDR_VALID |
> > +				 IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID,
> nit: shall IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID be set here? Supported
> unrecoverable faults feature the IPA field which is UNKNOWN for S1
> translations. fetch_addr rather was
> corresponding to WALK_EABT.Fetch_addr to me.

Right I should drop the IPA part entirely, since we don't report S2 faults
in this patch.

Thanks,
Jean

> > +			.perm = perm,
> > +			.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
> > +			.fetch_addr = FIELD_GET(EVTQ_3_IPA, evt[3]),
> > +		};
Jean-Philippe Brucker Feb. 1, 2021, 11:14 a.m. UTC | #4
Hi Zhou,

On Mon, Feb 01, 2021 at 09:18:42AM +0800, Zhou Wang wrote:
> > @@ -1033,8 +1076,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
> >  			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
> >  			CTXDESC_CD_0_V;
> >  
> > -		/* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
> > -		if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
> > +		if (smmu_domain->stall_enabled)
> 
> Could we add ssid checking here? like: if (smmu_domain->stall_enabled && ssid).
> The reason is if not CD.S will also be set when ssid is 0, which is not needed.

Some drivers may want to get stall events on SSID 0:
https://lore.kernel.org/kvm/20210125090402.1429-1-lushenming@huawei.com/#t

Are you seeing an issue with stall events on ssid 0?  Normally there
shouldn't be any fault on this context, but if they happen and no handler
is registered, the SMMU driver will just abort them and report them like a
non-stall event.

Thanks,
Jean
Zhou Wang Feb. 1, 2021, 12:53 p.m. UTC | #5
On 2021/2/1 19:14, Jean-Philippe Brucker wrote:
> Hi Zhou,
> 
> On Mon, Feb 01, 2021 at 09:18:42AM +0800, Zhou Wang wrote:
>>> @@ -1033,8 +1076,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
>>>  			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
>>>  			CTXDESC_CD_0_V;
>>>  
>>> -		/* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
>>> -		if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
>>> +		if (smmu_domain->stall_enabled)
>>
>> Could we add ssid checking here? like: if (smmu_domain->stall_enabled && ssid).
>> The reason is if not CD.S will also be set when ssid is 0, which is not needed.
> 
> Some drivers may want to get stall events on SSID 0:
> https://lore.kernel.org/kvm/20210125090402.1429-1-lushenming@huawei.com/#t

Hi Jean,

I did not notice this before. Yes, if we need to support IOPF in vfio, we
need enable stall events on SSID 0.

> 
> Are you seeing an issue with stall events on ssid 0?  Normally there
> shouldn't be any fault on this context, but if they happen and no handler

It happened that there is bug in the test code of HiSilicon HPRE crypto driver.
We used a wrong iova which triggered a SMMU event with stall bit :)

> is registered, the SMMU driver will just abort them and report them like a
> non-stall event.

It will report event 0x10 with stall bit.

Best,
Zhou

> 
> Thanks,
> Jean
> 
> .
>
Eric Auger Feb. 1, 2021, 1:16 p.m. UTC | #6
Hi Jean,

On 2/1/21 12:12 PM, Jean-Philippe Brucker wrote:
> On Sun, Jan 31, 2021 at 07:29:09PM +0100, Auger Eric wrote:
>> Hi Jean,
>>
>> Some rather minor comments§questions below that may not justify a respin.
>>
>> On 1/27/21 4:43 PM, Jean-Philippe Brucker wrote:
>>> -static bool arm_smmu_iopf_supported(struct arm_smmu_master *master)
>>> +bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master)
>>>  {
>>> -	return false;
>>> +	/* We're not keeping track of SIDs in fault events */
>> shall we? [*] below
> 
> That would require storing the incoming SID into the iommu_fault_event
> struct, and retrieve it in arm_smmu_page_response(). Easy enough, but I
> don't think it's needed for existing devices.
OK
> 
>>> +	if (master->num_streams != 1)
>>> +		return false;
> [...]
>>> +static int arm_smmu_page_response(struct device *dev,
>>> +				  struct iommu_fault_event *unused,
>>> +				  struct iommu_page_response *resp)
>>> +{
>>> +	struct arm_smmu_cmdq_ent cmd = {0};
>>> +	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
>>> +	int sid = master->streams[0].id;
>> [*]
>>> +
>>> +	if (master->stall_enabled) {
>>> +		cmd.opcode		= CMDQ_OP_RESUME;
>>> +		cmd.resume.sid		= sid;
>>> +		cmd.resume.stag		= resp->grpid;
>>> +		switch (resp->code) {
>>> +		case IOMMU_PAGE_RESP_INVALID:
>> add fallthrough?
> 
> I think fallthrough is mainly useful to tell reader and compiler that a
> break was omitted on purpose. When two cases are stuck together the intent
> to merge the flow is clear enough in my opinion. GCC's
> -Wimplicit-fallthrough doesn't warn in this case.
OK
> 
>>> +		case IOMMU_PAGE_RESP_FAILURE:
>>> +			cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
>>> +			break;
> [...]
>>> +static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
>>> +{
>>> +	int ret;
>>> +	u32 reason;
>>> +	u32 perm = 0;
>>> +	struct arm_smmu_master *master;
>>> +	bool ssid_valid = evt[0] & EVTQ_0_SSV;
>>> +	u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
>>> +	struct iommu_fault_event fault_evt = { };
>>> +	struct iommu_fault *flt = &fault_evt.fault;
>>> +
>>> +	/* Stage-2 is always pinned at the moment */
>>> +	if (evt[1] & EVTQ_1_S2)
>>> +		return -EFAULT;
>>> +
>>> +	master = arm_smmu_find_master(smmu, sid);
>>> +	if (!master)
>>> +		return -EINVAL;
>>> +
>>> +	if (evt[1] & EVTQ_1_RnW)
>>> +		perm |= IOMMU_FAULT_PERM_READ;
>>> +	else
>>> +		perm |= IOMMU_FAULT_PERM_WRITE;
>>> +
>>> +	if (evt[1] & EVTQ_1_InD)
>>> +		perm |= IOMMU_FAULT_PERM_EXEC;
>>> +
>>> +	if (evt[1] & EVTQ_1_PnU)
>>> +		perm |= IOMMU_FAULT_PERM_PRIV;
>>> +
>>> +	switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
>>> +	case EVT_ID_TRANSLATION_FAULT:
>>> +	case EVT_ID_ADDR_SIZE_FAULT:
>>> +	case EVT_ID_ACCESS_FAULT:
>>> +		reason = IOMMU_FAULT_REASON_PTE_FETCH;
>> Doesn't it rather map to IOMMU_FAULT_REASON_ACCESS?
>> /* access flag check failed */"
> 
> Good point, I guess it didn't exist when I wrote this. And ADDR_SIZE_FAULT
> corresponds to IOMMU_FAULT_REASON_OOR_ADDRESS now, right?
yes it dies
> 
> By the way the wording on those two fault reasons, "access flag" and
> "stage", seems arch-specific - x86 names are "accessed flag" and "level".
> 
>>> +		break;
>>> +	case EVT_ID_PERMISSION_FAULT:
>>> +		reason = IOMMU_FAULT_REASON_PERMISSION;
>>> +		break;
>>> +	default:
>>> +		return -EOPNOTSUPP;
>>> +	}
>>> +
>>> +	if (evt[1] & EVTQ_1_STALL) {
>>> +		flt->type = IOMMU_FAULT_PAGE_REQ;
>>> +		flt->prm = (struct iommu_fault_page_request) {
>>> +			.flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
>>> +			.grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
>>> +			.perm = perm,
>>> +			.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
>>> +		};
>>> +
>>> +		if (ssid_valid) {
>>> +			flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
>>> +			flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
>>> +		}
>>> +	} else {
>>> +		flt->type = IOMMU_FAULT_DMA_UNRECOV;
>>> +		flt->event = (struct iommu_fault_unrecoverable) {
>>> +			.reason = reason,
>>> +			.flags = IOMMU_FAULT_UNRECOV_ADDR_VALID |
>>> +				 IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID,
>> nit: shall IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID be set here? Supported
>> unrecoverable faults feature the IPA field which is UNKNOWN for S1
>> translations. fetch_addr rather was
>> corresponding to WALK_EABT.Fetch_addr to me.
> 
> Right I should drop the IPA part entirely, since we don't report S2 faults
> in this patch.
OK

But as I mentioned this can be fixed separately if you don't have other
comments on this version.

Thanks

Eric
> 
> Thanks,
> Jean
> 
>>> +			.perm = perm,
>>> +			.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
>>> +			.fetch_addr = FIELD_GET(EVTQ_3_IPA, evt[3]),
>>> +		};
> 
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
>
Jean-Philippe Brucker Feb. 1, 2021, 3:19 p.m. UTC | #7
On Mon, Feb 01, 2021 at 02:16:16PM +0100, Auger Eric wrote:
> >>> +		flt->type = IOMMU_FAULT_DMA_UNRECOV;
> >>> +		flt->event = (struct iommu_fault_unrecoverable) {
> >>> +			.reason = reason,
> >>> +			.flags = IOMMU_FAULT_UNRECOV_ADDR_VALID |
> >>> +				 IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID,
> >> nit: shall IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID be set here? Supported
> >> unrecoverable faults feature the IPA field which is UNKNOWN for S1
> >> translations. fetch_addr rather was
> >> corresponding to WALK_EABT.Fetch_addr to me.
> > 
> > Right I should drop the IPA part entirely, since we don't report S2 faults
> > in this patch.
> OK
> 
> But as I mentioned this can be fixed separately if you don't have other
> comments on this version.

Thanks, I need to resend anyway to fix patch 7.

Thanks,
Jean
Zhou Wang Feb. 26, 2021, 9:43 a.m. UTC | #8
On 2021/2/1 19:14, Jean-Philippe Brucker wrote:
> Hi Zhou,
> 
> On Mon, Feb 01, 2021 at 09:18:42AM +0800, Zhou Wang wrote:
>>> @@ -1033,8 +1076,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
>>>  			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
>>>  			CTXDESC_CD_0_V;
>>>  
>>> -		/* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
>>> -		if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
>>> +		if (smmu_domain->stall_enabled)
>>
>> Could we add ssid checking here? like: if (smmu_domain->stall_enabled && ssid).
>> The reason is if not CD.S will also be set when ssid is 0, which is not needed.
> 
> Some drivers may want to get stall events on SSID 0:
> https://lore.kernel.org/kvm/20210125090402.1429-1-lushenming@huawei.com/#t
> 
> Are you seeing an issue with stall events on ssid 0?  Normally there
> shouldn't be any fault on this context, but if they happen and no handler
> is registered, the SMMU driver will just abort them and report them like a
> non-stall event.

Hi Jean,

I notice that there is problem. In my case, I expect that CD0 is for kernel
and other CDs are for user space. Normally there shouldn't be any fault in
kernel, however, we have RAS case which is for some reason there may has
invalid address access from hardware device.

So at least there are two different address access failures: 1. hardware RAS problem;
2. software fault fail(e.g. kill process when doing DMA). Handlings for these
two are different: for 1, we should reset hardware device; for 2, stop related
DMA is enough.

Currently if SMMU returns the same signal(by SMMU resume abort), master device
driver can not tell these two kinds of cases.

From the basic concept, if a CD is used for kernel, its S bit should not be set.
How about we add iommu domain check here too, if DMA domain we do not set S bit for
CD0, if unmanaged domain we set S bit for all CDs?

Thanks,
Zhou

> 
> Thanks,
> Jean
> 
> .
>
Jean-Philippe Brucker Feb. 26, 2021, 4:29 p.m. UTC | #9
Hi Zhou,

On Fri, Feb 26, 2021 at 05:43:27PM +0800, Zhou Wang wrote:
> On 2021/2/1 19:14, Jean-Philippe Brucker wrote:
> > Hi Zhou,
> > 
> > On Mon, Feb 01, 2021 at 09:18:42AM +0800, Zhou Wang wrote:
> >>> @@ -1033,8 +1076,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
> >>>  			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
> >>>  			CTXDESC_CD_0_V;
> >>>  
> >>> -		/* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
> >>> -		if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
> >>> +		if (smmu_domain->stall_enabled)
> >>
> >> Could we add ssid checking here? like: if (smmu_domain->stall_enabled && ssid).
> >> The reason is if not CD.S will also be set when ssid is 0, which is not needed.
> > 
> > Some drivers may want to get stall events on SSID 0:
> > https://lore.kernel.org/kvm/20210125090402.1429-1-lushenming@huawei.com/#t
> > 
> > Are you seeing an issue with stall events on ssid 0?  Normally there
> > shouldn't be any fault on this context, but if they happen and no handler
> > is registered, the SMMU driver will just abort them and report them like a
> > non-stall event.
> 
> Hi Jean,
> 
> I notice that there is problem. In my case, I expect that CD0 is for kernel
> and other CDs are for user space. Normally there shouldn't be any fault in
> kernel, however, we have RAS case which is for some reason there may has
> invalid address access from hardware device.
> 
> So at least there are two different address access failures: 1. hardware RAS problem;
> 2. software fault fail(e.g. kill process when doing DMA). Handlings for these
> two are different: for 1, we should reset hardware device; for 2, stop related
> DMA is enough.

Right, and in case 2 there should be no report printed since it can be
triggered by user, while you probably want to be loud in case 1.

> Currently if SMMU returns the same signal(by SMMU resume abort), master device
> driver can not tell these two kinds of cases.

This part I don't understand. So the SMMU sends a RESUME(abort) command,
and then the master reports the DMA error to the device driver, which
cannot differentiate 1 from 2?  (I guess there is no SSID in this report?)
But how does disabling stall change this?  The invalid DMA access will
still be aborted by the SMMU.

Hypothetically, would it work if all stall events that could not be
handled went to the device driver?  Those reports would contain the SSID
(or lack thereof), so you could reset the device in case 1 and ignore case
2. Though resetting the device in the middle of a stalled transaction
probably comes with its own set of problems.

> From the basic concept, if a CD is used for kernel, its S bit should not be set.
> How about we add iommu domain check here too, if DMA domain we do not set S bit for
> CD0, if unmanaged domain we set S bit for all CDs?

I think disabling stall for CD0 of a DMA domain makes sense in general,
even though I don't really understand how that fixes your issue. But
someone might come up with a good use-case for receiving stall events on
DMA mappings, so I'm wondering whether the alternative solution where we
report unhandled stall events to the device driver would also work for
you.

Thanks,
Jean
Zhou Wang Feb. 27, 2021, 3:40 a.m. UTC | #10
On 2021/2/27 0:29, Jean-Philippe Brucker wrote:
> Hi Zhou,
> 
> On Fri, Feb 26, 2021 at 05:43:27PM +0800, Zhou Wang wrote:
>> On 2021/2/1 19:14, Jean-Philippe Brucker wrote:
>>> Hi Zhou,
>>>
>>> On Mon, Feb 01, 2021 at 09:18:42AM +0800, Zhou Wang wrote:
>>>>> @@ -1033,8 +1076,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
>>>>>  			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
>>>>>  			CTXDESC_CD_0_V;
>>>>>  
>>>>> -		/* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
>>>>> -		if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
>>>>> +		if (smmu_domain->stall_enabled)
>>>>
>>>> Could we add ssid checking here? like: if (smmu_domain->stall_enabled && ssid).
>>>> The reason is if not CD.S will also be set when ssid is 0, which is not needed.
>>>
>>> Some drivers may want to get stall events on SSID 0:
>>> https://lore.kernel.org/kvm/20210125090402.1429-1-lushenming@huawei.com/#t
>>>
>>> Are you seeing an issue with stall events on ssid 0?  Normally there
>>> shouldn't be any fault on this context, but if they happen and no handler
>>> is registered, the SMMU driver will just abort them and report them like a
>>> non-stall event.
>>
>> Hi Jean,
>>
>> I notice that there is problem. In my case, I expect that CD0 is for kernel
>> and other CDs are for user space. Normally there shouldn't be any fault in
>> kernel, however, we have RAS case which is for some reason there may has
>> invalid address access from hardware device.
>>
>> So at least there are two different address access failures: 1. hardware RAS problem;
>> 2. software fault fail(e.g. kill process when doing DMA). Handlings for these
>> two are different: for 1, we should reset hardware device; for 2, stop related
>> DMA is enough.
> 
> Right, and in case 2 there should be no report printed since it can be
> triggered by user, while you probably want to be loud in case 1.
> 
>> Currently if SMMU returns the same signal(by SMMU resume abort), master device
>> driver can not tell these two kinds of cases.
> 
> This part I don't understand. So the SMMU sends a RESUME(abort) command,
> and then the master reports the DMA error to the device driver, which
> cannot differentiate 1 from 2?  (I guess there is no SSID in this report?)
> But how does disabling stall change this?  The invalid DMA access will
> still be aborted by the SMMU.

This is about the hardware design. In D06 board, an invalid DMA access from
accelerator devices will be aborted, and an hardware error signal will be
returned to accelerator devices, which reports it as a RAS error irq.
while for the stall case, error signal triggered by SMMU resume abort is
also reported as same RAS error irq. This is problem in D60 board.

In next generation of hardware, a new irq will be added to report SMMU resume
abort information, it works with related registers in accelerator devices to
get related hardware queue, which need to be stopped.

So if CD0.S is 1, invalid DMA access in kernel will be reported into above
new added irq, which has not enough information to tell RAS errors(there are 10+
hardware RAS errors) from SMMU resume abort.

> 
> Hypothetically, would it work if all stall events that could not be
> handled went to the device driver?  Those reports would contain the SSID
> (or lack thereof), so you could reset the device in case 1 and ignore case
> 2. Though resetting the device in the middle of a stalled transaction

As above, it is hard to tell RAS errors and SMMU resume abort in SMMU resume abort
now :(

> probably comes with its own set of problems.
> 
>> From the basic concept, if a CD is used for kernel, its S bit should not be set.
>> How about we add iommu domain check here too, if DMA domain we do not set S bit for
>> CD0, if unmanaged domain we set S bit for all CDs?
> 
> I think disabling stall for CD0 of a DMA domain makes sense in general,
> even though I don't really understand how that fixes your issue. But

As above, if disabling stall for CD0, an invalid DMA access will be handled
by RAS error irq.

> someone might come up with a good use-case for receiving stall events on

If A DMA access in kernel fails, I think there should be a RAS issue :)
So better to disable CD0 stall for DMA domain.

Best,
Zhou

> DMA mappings, so I'm wondering whether the alternative solution where we
> report unhandled stall events to the device driver would also work for
> you.
> 
> Thanks,
> Jean
> 
> .
>
diff mbox series

Patch

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 7b15b7580c6e..59af0bbd2f7b 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -354,6 +354,13 @@ 
 #define CMDQ_PRI_1_GRPID		GENMASK_ULL(8, 0)
 #define CMDQ_PRI_1_RESP			GENMASK_ULL(13, 12)
 
+#define CMDQ_RESUME_0_RESP_TERM		0UL
+#define CMDQ_RESUME_0_RESP_RETRY	1UL
+#define CMDQ_RESUME_0_RESP_ABORT	2UL
+#define CMDQ_RESUME_0_RESP		GENMASK_ULL(13, 12)
+#define CMDQ_RESUME_0_SID		GENMASK_ULL(63, 32)
+#define CMDQ_RESUME_1_STAG		GENMASK_ULL(15, 0)
+
 #define CMDQ_SYNC_0_CS			GENMASK_ULL(13, 12)
 #define CMDQ_SYNC_0_CS_NONE		0
 #define CMDQ_SYNC_0_CS_IRQ		1
@@ -370,6 +377,25 @@ 
 
 #define EVTQ_0_ID			GENMASK_ULL(7, 0)
 
+#define EVT_ID_TRANSLATION_FAULT	0x10
+#define EVT_ID_ADDR_SIZE_FAULT		0x11
+#define EVT_ID_ACCESS_FAULT		0x12
+#define EVT_ID_PERMISSION_FAULT		0x13
+
+#define EVTQ_0_SSV			(1UL << 11)
+#define EVTQ_0_SSID			GENMASK_ULL(31, 12)
+#define EVTQ_0_SID			GENMASK_ULL(63, 32)
+#define EVTQ_1_STAG			GENMASK_ULL(15, 0)
+#define EVTQ_1_STALL			(1UL << 31)
+#define EVTQ_1_PnU			(1UL << 33)
+#define EVTQ_1_InD			(1UL << 34)
+#define EVTQ_1_RnW			(1UL << 35)
+#define EVTQ_1_S2			(1UL << 39)
+#define EVTQ_1_CLASS			GENMASK_ULL(41, 40)
+#define EVTQ_1_TT_READ			(1UL << 44)
+#define EVTQ_2_ADDR			GENMASK_ULL(63, 0)
+#define EVTQ_3_IPA			GENMASK_ULL(51, 12)
+
 /* PRI queue */
 #define PRIQ_ENT_SZ_SHIFT		4
 #define PRIQ_ENT_DWORDS			((1 << PRIQ_ENT_SZ_SHIFT) >> 3)
@@ -464,6 +490,13 @@  struct arm_smmu_cmdq_ent {
 			enum pri_resp		resp;
 		} pri;
 
+		#define CMDQ_OP_RESUME		0x44
+		struct {
+			u32			sid;
+			u16			stag;
+			u8			resp;
+		} resume;
+
 		#define CMDQ_OP_CMD_SYNC	0x46
 		struct {
 			u64			msiaddr;
@@ -522,6 +555,7 @@  struct arm_smmu_cmdq_batch {
 
 struct arm_smmu_evtq {
 	struct arm_smmu_queue		q;
+	struct iopf_queue		*iopf;
 	u32				max_stalls;
 };
 
@@ -659,7 +693,9 @@  struct arm_smmu_master {
 	struct arm_smmu_stream		*streams;
 	unsigned int			num_streams;
 	bool				ats_enabled;
+	bool				stall_enabled;
 	bool				sva_enabled;
+	bool				iopf_enabled;
 	struct list_head		bonds;
 	unsigned int			ssid_bits;
 };
@@ -678,6 +714,7 @@  struct arm_smmu_domain {
 
 	struct io_pgtable_ops		*pgtbl_ops;
 	bool				non_strict;
+	bool				stall_enabled;
 	atomic_t			nr_ats_masters;
 
 	enum arm_smmu_domain_stage	stage;
@@ -719,6 +756,7 @@  bool arm_smmu_master_sva_supported(struct arm_smmu_master *master);
 bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master);
 int arm_smmu_master_enable_sva(struct arm_smmu_master *master);
 int arm_smmu_master_disable_sva(struct arm_smmu_master *master);
+bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master);
 struct iommu_sva *arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm,
 				    void *drvdata);
 void arm_smmu_sva_unbind(struct iommu_sva *handle);
@@ -750,6 +788,11 @@  static inline int arm_smmu_master_disable_sva(struct arm_smmu_master *master)
 	return -ENODEV;
 }
 
+static inline bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master)
+{
+	return false;
+}
+
 static inline struct iommu_sva *
 arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
 {
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index bb251cab61f3..ee66d1f4cb81 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -435,9 +435,13 @@  bool arm_smmu_sva_supported(struct arm_smmu_device *smmu)
 	return true;
 }
 
-static bool arm_smmu_iopf_supported(struct arm_smmu_master *master)
+bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master)
 {
-	return false;
+	/* We're not keeping track of SIDs in fault events */
+	if (master->num_streams != 1)
+		return false;
+
+	return master->stall_enabled;
 }
 
 bool arm_smmu_master_sva_supported(struct arm_smmu_master *master)
@@ -445,8 +449,8 @@  bool arm_smmu_master_sva_supported(struct arm_smmu_master *master)
 	if (!(master->smmu->features & ARM_SMMU_FEAT_SVA))
 		return false;
 
-	/* SSID and IOPF support are mandatory for the moment */
-	return master->ssid_bits && arm_smmu_iopf_supported(master);
+	/* SSID support is mandatory for the moment */
+	return master->ssid_bits;
 }
 
 bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master)
@@ -459,13 +463,55 @@  bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master)
 	return enabled;
 }
 
+static int arm_smmu_master_sva_enable_iopf(struct arm_smmu_master *master)
+{
+	int ret;
+	struct device *dev = master->dev;
+
+	/*
+	 * Drivers for devices supporting PRI or stall should enable IOPF first.
+	 * Others have device-specific fault handlers and don't need IOPF.
+	 */
+	if (!arm_smmu_master_iopf_supported(master))
+		return 0;
+
+	if (!master->iopf_enabled)
+		return -EINVAL;
+
+	ret = iopf_queue_add_device(master->smmu->evtq.iopf, dev);
+	if (ret)
+		return ret;
+
+	ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev);
+	if (ret) {
+		iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
+		return ret;
+	}
+	return 0;
+}
+
+static void arm_smmu_master_sva_disable_iopf(struct arm_smmu_master *master)
+{
+	struct device *dev = master->dev;
+
+	if (!master->iopf_enabled)
+		return;
+
+	iommu_unregister_device_fault_handler(dev);
+	iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
+}
+
 int arm_smmu_master_enable_sva(struct arm_smmu_master *master)
 {
+	int ret;
+
 	mutex_lock(&sva_lock);
-	master->sva_enabled = true;
+	ret = arm_smmu_master_sva_enable_iopf(master);
+	if (!ret)
+		master->sva_enabled = true;
 	mutex_unlock(&sva_lock);
 
-	return 0;
+	return ret;
 }
 
 int arm_smmu_master_disable_sva(struct arm_smmu_master *master)
@@ -476,6 +522,7 @@  int arm_smmu_master_disable_sva(struct arm_smmu_master *master)
 		mutex_unlock(&sva_lock);
 		return -EBUSY;
 	}
+	arm_smmu_master_sva_disable_iopf(master);
 	master->sva_enabled = false;
 	mutex_unlock(&sva_lock);
 
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 3afec6ed8075..76b2306ddff6 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -32,6 +32,7 @@ 
 #include <linux/amba/bus.h>
 
 #include "arm-smmu-v3.h"
+#include "../../iommu-sva-lib.h"
 
 static bool disable_bypass = true;
 module_param(disable_bypass, bool, 0444);
@@ -315,6 +316,11 @@  static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
 		}
 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
 		break;
+	case CMDQ_OP_RESUME:
+		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
+		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
+		cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
+		break;
 	case CMDQ_OP_CMD_SYNC:
 		if (ent->sync.msiaddr) {
 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
@@ -878,6 +884,44 @@  static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
 }
 
+static int arm_smmu_page_response(struct device *dev,
+				  struct iommu_fault_event *unused,
+				  struct iommu_page_response *resp)
+{
+	struct arm_smmu_cmdq_ent cmd = {0};
+	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
+	int sid = master->streams[0].id;
+
+	if (master->stall_enabled) {
+		cmd.opcode		= CMDQ_OP_RESUME;
+		cmd.resume.sid		= sid;
+		cmd.resume.stag		= resp->grpid;
+		switch (resp->code) {
+		case IOMMU_PAGE_RESP_INVALID:
+		case IOMMU_PAGE_RESP_FAILURE:
+			cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
+			break;
+		case IOMMU_PAGE_RESP_SUCCESS:
+			cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
+			break;
+		default:
+			return -EINVAL;
+		}
+	} else {
+		return -ENODEV;
+	}
+
+	arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
+	/*
+	 * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
+	 * RESUME consumption guarantees that the stalled transaction will be
+	 * terminated... at some point in the future. PRI_RESP is fire and
+	 * forget.
+	 */
+
+	return 0;
+}
+
 /* Context descriptor manipulation functions */
 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
 {
@@ -988,7 +1032,6 @@  int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
 	u64 val;
 	bool cd_live;
 	__le64 *cdptr;
-	struct arm_smmu_device *smmu = smmu_domain->smmu;
 
 	if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
 		return -E2BIG;
@@ -1033,8 +1076,7 @@  int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
 			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
 			CTXDESC_CD_0_V;
 
-		/* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
-		if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
+		if (smmu_domain->stall_enabled)
 			val |= CTXDESC_CD_0_S;
 	}
 
@@ -1278,7 +1320,7 @@  static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
 			 FIELD_PREP(STRTAB_STE_1_STRW, strw));
 
 		if (smmu->features & ARM_SMMU_FEAT_STALLS &&
-		   !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
+		    !master->stall_enabled)
 			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
 
 		val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
@@ -1355,7 +1397,6 @@  static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
 	return 0;
 }
 
-__maybe_unused
 static struct arm_smmu_master *
 arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
 {
@@ -1382,9 +1423,96 @@  arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
 }
 
 /* IRQ and event handlers */
+static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
+{
+	int ret;
+	u32 reason;
+	u32 perm = 0;
+	struct arm_smmu_master *master;
+	bool ssid_valid = evt[0] & EVTQ_0_SSV;
+	u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
+	struct iommu_fault_event fault_evt = { };
+	struct iommu_fault *flt = &fault_evt.fault;
+
+	/* Stage-2 is always pinned at the moment */
+	if (evt[1] & EVTQ_1_S2)
+		return -EFAULT;
+
+	master = arm_smmu_find_master(smmu, sid);
+	if (!master)
+		return -EINVAL;
+
+	if (evt[1] & EVTQ_1_RnW)
+		perm |= IOMMU_FAULT_PERM_READ;
+	else
+		perm |= IOMMU_FAULT_PERM_WRITE;
+
+	if (evt[1] & EVTQ_1_InD)
+		perm |= IOMMU_FAULT_PERM_EXEC;
+
+	if (evt[1] & EVTQ_1_PnU)
+		perm |= IOMMU_FAULT_PERM_PRIV;
+
+	switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
+	case EVT_ID_TRANSLATION_FAULT:
+	case EVT_ID_ADDR_SIZE_FAULT:
+	case EVT_ID_ACCESS_FAULT:
+		reason = IOMMU_FAULT_REASON_PTE_FETCH;
+		break;
+	case EVT_ID_PERMISSION_FAULT:
+		reason = IOMMU_FAULT_REASON_PERMISSION;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	if (evt[1] & EVTQ_1_STALL) {
+		flt->type = IOMMU_FAULT_PAGE_REQ;
+		flt->prm = (struct iommu_fault_page_request) {
+			.flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
+			.grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
+			.perm = perm,
+			.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
+		};
+
+		if (ssid_valid) {
+			flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
+			flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
+		}
+	} else {
+		flt->type = IOMMU_FAULT_DMA_UNRECOV;
+		flt->event = (struct iommu_fault_unrecoverable) {
+			.reason = reason,
+			.flags = IOMMU_FAULT_UNRECOV_ADDR_VALID |
+				 IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID,
+			.perm = perm,
+			.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
+			.fetch_addr = FIELD_GET(EVTQ_3_IPA, evt[3]),
+		};
+
+		if (ssid_valid) {
+			flt->event.flags |= IOMMU_FAULT_UNRECOV_PASID_VALID;
+			flt->event.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
+		}
+	}
+
+	ret = iommu_report_device_fault(master->dev, &fault_evt);
+	if (ret && flt->type == IOMMU_FAULT_PAGE_REQ) {
+		/* Nobody cared, abort the access */
+		struct iommu_page_response resp = {
+			.pasid		= flt->prm.pasid,
+			.grpid		= flt->prm.grpid,
+			.code		= IOMMU_PAGE_RESP_FAILURE,
+		};
+		arm_smmu_page_response(master->dev, &fault_evt, &resp);
+	}
+
+	return ret;
+}
+
 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
 {
-	int i;
+	int i, ret;
 	struct arm_smmu_device *smmu = dev;
 	struct arm_smmu_queue *q = &smmu->evtq.q;
 	struct arm_smmu_ll_queue *llq = &q->llq;
@@ -1394,6 +1522,10 @@  static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
 		while (!queue_remove_raw(q, evt)) {
 			u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
 
+			ret = arm_smmu_handle_evt(smmu, evt);
+			if (!ret)
+				continue;
+
 			dev_info(smmu->dev, "event 0x%02x received:\n", id);
 			for (i = 0; i < ARRAY_SIZE(evt); ++i)
 				dev_info(smmu->dev, "\t0x%016llx\n",
@@ -1928,6 +2060,8 @@  static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
 
 	cfg->s1cdmax = master->ssid_bits;
 
+	smmu_domain->stall_enabled = master->stall_enabled;
+
 	ret = arm_smmu_alloc_cd_tables(smmu_domain);
 	if (ret)
 		goto out_free_asid;
@@ -2275,6 +2409,12 @@  static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 			smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
 		ret = -EINVAL;
 		goto out_unlock;
+	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
+		   smmu_domain->stall_enabled != master->stall_enabled) {
+		dev_err(dev, "cannot attach to stall-%s domain\n",
+			smmu_domain->stall_enabled ? "enabled" : "disabled");
+		ret = -EINVAL;
+		goto out_unlock;
 	}
 
 	master->domain = smmu_domain;
@@ -2510,6 +2650,11 @@  static struct iommu_device *arm_smmu_probe_device(struct device *dev)
 		master->ssid_bits = min_t(u8, master->ssid_bits,
 					  CTXDESC_LINEAR_CDMAX);
 
+	if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
+	     device_property_read_bool(dev, "dma-can-stall")) ||
+	    smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
+		master->stall_enabled = true;
+
 	return &smmu->iommu;
 
 err_free_master:
@@ -2527,7 +2672,8 @@  static void arm_smmu_release_device(struct device *dev)
 		return;
 
 	master = dev_iommu_priv_get(dev);
-	WARN_ON(arm_smmu_master_sva_enabled(master));
+	if (WARN_ON(arm_smmu_master_sva_enabled(master)))
+		iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
 	arm_smmu_detach_dev(master);
 	arm_smmu_disable_pasid(master);
 	arm_smmu_remove_master(master);
@@ -2655,6 +2801,8 @@  static bool arm_smmu_dev_has_feature(struct device *dev,
 		return false;
 
 	switch (feat) {
+	case IOMMU_DEV_FEAT_IOPF:
+		return arm_smmu_master_iopf_supported(master);
 	case IOMMU_DEV_FEAT_SVA:
 		return arm_smmu_master_sva_supported(master);
 	default:
@@ -2671,6 +2819,8 @@  static bool arm_smmu_dev_feature_enabled(struct device *dev,
 		return false;
 
 	switch (feat) {
+	case IOMMU_DEV_FEAT_IOPF:
+		return master->iopf_enabled;
 	case IOMMU_DEV_FEAT_SVA:
 		return arm_smmu_master_sva_enabled(master);
 	default:
@@ -2681,6 +2831,8 @@  static bool arm_smmu_dev_feature_enabled(struct device *dev,
 static int arm_smmu_dev_enable_feature(struct device *dev,
 				       enum iommu_dev_features feat)
 {
+	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
+
 	if (!arm_smmu_dev_has_feature(dev, feat))
 		return -ENODEV;
 
@@ -2688,8 +2840,11 @@  static int arm_smmu_dev_enable_feature(struct device *dev,
 		return -EBUSY;
 
 	switch (feat) {
+	case IOMMU_DEV_FEAT_IOPF:
+		master->iopf_enabled = true;
+		return 0;
 	case IOMMU_DEV_FEAT_SVA:
-		return arm_smmu_master_enable_sva(dev_iommu_priv_get(dev));
+		return arm_smmu_master_enable_sva(master);
 	default:
 		return -EINVAL;
 	}
@@ -2698,12 +2853,19 @@  static int arm_smmu_dev_enable_feature(struct device *dev,
 static int arm_smmu_dev_disable_feature(struct device *dev,
 					enum iommu_dev_features feat)
 {
+	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
+
 	if (!arm_smmu_dev_feature_enabled(dev, feat))
 		return -EINVAL;
 
 	switch (feat) {
+	case IOMMU_DEV_FEAT_IOPF:
+		if (master->sva_enabled)
+			return -EBUSY;
+		master->iopf_enabled = false;
+		return 0;
 	case IOMMU_DEV_FEAT_SVA:
-		return arm_smmu_master_disable_sva(dev_iommu_priv_get(dev));
+		return arm_smmu_master_disable_sva(master);
 	default:
 		return -EINVAL;
 	}
@@ -2734,6 +2896,7 @@  static struct iommu_ops arm_smmu_ops = {
 	.sva_bind		= arm_smmu_sva_bind,
 	.sva_unbind		= arm_smmu_sva_unbind,
 	.sva_get_pasid		= arm_smmu_sva_get_pasid,
+	.page_response		= arm_smmu_page_response,
 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
 };
 
@@ -2831,6 +2994,13 @@  static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
 	if (ret)
 		return ret;
 
+	if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
+	    (smmu->features & ARM_SMMU_FEAT_STALLS)) {
+		smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
+		if (!smmu->evtq.iopf)
+			return -ENOMEM;
+	}
+
 	/* priq */
 	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
 		return 0;
@@ -3746,6 +3916,7 @@  static int arm_smmu_device_remove(struct platform_device *pdev)
 	iommu_device_unregister(&smmu->iommu);
 	iommu_device_sysfs_remove(&smmu->iommu);
 	arm_smmu_device_disable(smmu);
+	iopf_queue_free(smmu->evtq.iopf);
 
 	return 0;
 }