diff mbox series

[v2,3/4] iommu/arm-smmu-v3: Add support for dirty tracking in domain alloc

Message ID 20240222094923.33104-4-shameerali.kolothum.thodi@huawei.com (mailing list archive)
State New, archived
Headers show
Series iommu/smmuv3: Add IOMMUFD dirty tracking support for SMMUv3 | expand

Commit Message

Shameer Kolothum Feb. 22, 2024, 9:49 a.m. UTC
From: Joao Martins <joao.m.martins@oracle.com>

This provides all the infrastructure to enable dirty tracking if the
hardware has the capability and domain alloc request for it.

Please note, we still report no support for IOMMU_CAP_DIRTY_TRACKING
as it will finally be enabled in a subsequent patch.

Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 95 ++++++++++++++++-----
 include/linux/io-pgtable.h                  |  4 +
 2 files changed, 77 insertions(+), 22 deletions(-)

Comments

Joao Martins Feb. 22, 2024, 11:04 a.m. UTC | #1
On 22/02/2024 09:49, Shameer Kolothum wrote:
> From: Joao Martins <joao.m.martins@oracle.com>
> 
> This provides all the infrastructure to enable dirty tracking if the
> hardware has the capability and domain alloc request for it.
> 
> Please note, we still report no support for IOMMU_CAP_DIRTY_TRACKING
> as it will finally be enabled in a subsequent patch.
> 
> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
> ---
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 95 ++++++++++++++++-----
>  include/linux/io-pgtable.h                  |  4 +
>  2 files changed, 77 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index bd30739e3588..058bbb0dbe2e 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -43,6 +43,7 @@ MODULE_PARM_DESC(disable_msipolling,
>  	"Disable MSI-based polling for CMD_SYNC completion.");
>  
>  static struct iommu_ops arm_smmu_ops;
> +static struct iommu_dirty_ops arm_smmu_dirty_ops;
>  
>  enum arm_smmu_msi_index {
>  	EVTQ_MSI_INDEX,
> @@ -86,7 +87,8 @@ static struct arm_smmu_option_prop arm_smmu_options[] = {
>  
>  static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu);
>  static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
> -				    struct arm_smmu_device *smmu);
> +				    struct arm_smmu_device *smmu,
> +				    bool enable_dirty);
>  static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master);
>  static void arm_smmu_tlb_inv_all_s2(struct arm_smmu_domain *smmu_domain);
>  
> @@ -2378,7 +2380,7 @@ static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev)
>  		struct arm_smmu_master *master = dev_iommu_priv_get(dev);
>  		int ret;
>  
> -		ret = arm_smmu_domain_finalise(smmu_domain, master->smmu);
> +		ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, false);
>  		if (ret) {
>  			kfree(smmu_domain);
>  			return ERR_PTR(ret);
> @@ -2445,10 +2447,11 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
>  }
>  
>  static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
> -				    struct arm_smmu_device *smmu)
> +				    struct arm_smmu_device *smmu,
> +				    bool enable_dirty)
>  {
>  	int ret;
> -	unsigned long ias, oas;
> +	unsigned long ias;
>  	enum io_pgtable_fmt fmt;
>  	struct io_pgtable_cfg pgtbl_cfg;
>  	struct io_pgtable_ops *pgtbl_ops;
> @@ -2459,31 +2462,31 @@ static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
>  	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
>  		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
>  
> +	pgtbl_cfg = (struct io_pgtable_cfg) {
> +		.pgsize_bitmap	= smmu->pgsize_bitmap,
> +		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
> +		.tlb		= &arm_smmu_flush_ops,
> +		.iommu_dev	= smmu->dev,
> +	};
> +
>  	switch (smmu_domain->stage) {
>  	case ARM_SMMU_DOMAIN_S1:
>  		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
> -		ias = min_t(unsigned long, ias, VA_BITS);
> -		oas = smmu->ias;
> +		pgtbl_cfg.ias = min_t(unsigned long, ias, VA_BITS);
> +		pgtbl_cfg.oas = smmu->ias;
> +		if (enable_dirty)
> +			pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD;
>  		fmt = ARM_64_LPAE_S1;
>  		break;
>  	case ARM_SMMU_DOMAIN_S2:
> -		ias = smmu->ias;
> -		oas = smmu->oas;
> +		pgtbl_cfg.ias = smmu->ias;
> +		pgtbl_cfg.oas = smmu->oas;
>  		fmt = ARM_64_LPAE_S2;
>  		break;
>  	default:
>  		return -EINVAL;
>  	}
>  
> -	pgtbl_cfg = (struct io_pgtable_cfg) {
> -		.pgsize_bitmap	= smmu->pgsize_bitmap,
> -		.ias		= ias,
> -		.oas		= oas,
> -		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
> -		.tlb		= &arm_smmu_flush_ops,
> -		.iommu_dev	= smmu->dev,
> -	};
> -
>  	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
>  	if (!pgtbl_ops)
>  		return -ENOMEM;
> @@ -2491,7 +2494,8 @@ static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
>  	smmu_domain->domain.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
>  	smmu_domain->domain.geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
>  	smmu_domain->domain.geometry.force_aperture = true;
> -
> +	if (enable_dirty && smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
> +		smmu_domain->domain.dirty_ops = &arm_smmu_dirty_ops;
>  	ret = arm_smmu_domain_alloc_id(smmu, smmu_domain);
>  	if (ret < 0) {
>  		free_io_pgtable_ops(pgtbl_ops);
> @@ -2811,7 +2815,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
>  	mutex_lock(&smmu_domain->init_mutex);
>  
>  	if (!smmu_domain->smmu) {
> -		ret = arm_smmu_domain_finalise(smmu_domain, smmu);
> +		ret = arm_smmu_domain_finalise(smmu_domain, smmu, false);
>  	} else if (smmu_domain->smmu != smmu)
>  		ret = -EINVAL;
>  


I think we are missing the domain attach_dev check for dirty tracking.

Something like:

if (domain->dirty_ops && !arm_smmu_dbm_capable(smmu))
	return -EINVAL;

But that helper is only introduced in the last patch, so maybe:

if (domain->dirty_ops &&
    !device_iommu_capable(dev, IOMMU_CAP_DIRTY_TRACKING))
	return -EINVAL;

> @@ -2876,7 +2880,7 @@ static int arm_smmu_s1_set_dev_pasid(struct iommu_domain *domain,
>  
>  	mutex_lock(&smmu_domain->init_mutex);
>  	if (!smmu_domain->smmu)
> -		ret = arm_smmu_domain_finalise(smmu_domain, smmu);
> +		ret = arm_smmu_domain_finalise(smmu_domain, smmu, false);
>  	else if (smmu_domain->smmu != smmu)
>  		ret = -EINVAL;
>  	mutex_unlock(&smmu_domain->init_mutex);
> @@ -3193,7 +3197,9 @@ arm_smmu_domain_alloc_user(struct device *dev, u32 flags,
>  			   const struct iommu_user_data *user_data)
>  {
>  	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
> -	const u32 paging_flags = IOMMU_HWPT_ALLOC_NEST_PARENT;
> +	const u32 paging_flags = IOMMU_HWPT_ALLOC_NEST_PARENT |
> +				 IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
> +	bool enforce_dirty = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
>  	struct arm_smmu_domain *smmu_domain;
>  	int ret;
>  
> @@ -3206,6 +3212,10 @@ arm_smmu_domain_alloc_user(struct device *dev, u32 flags,
>  	if (user_data)
>  		return ERR_PTR(-EINVAL);
>  
> +	if (enforce_dirty &&
> +	    !device_iommu_capable(dev, IOMMU_CAP_DIRTY_TRACKING))
> +		return ERR_PTR(-EOPNOTSUPP);
> +
>  	smmu_domain = arm_smmu_domain_alloc();
>  	if (!smmu_domain)
>  		return ERR_PTR(-ENOMEM);
> @@ -3221,7 +3231,7 @@ arm_smmu_domain_alloc_user(struct device *dev, u32 flags,
>  
>  	smmu_domain->domain.type = IOMMU_DOMAIN_UNMANAGED;
>  	smmu_domain->domain.ops = arm_smmu_ops.default_domain_ops;
> -	ret = arm_smmu_domain_finalise(smmu_domain, master->smmu);
> +	ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, enforce_dirty);
>  	if (ret)
>  		goto err_free;
>  	return &smmu_domain->domain;
> @@ -3470,6 +3480,42 @@ static void arm_smmu_release_device(struct device *dev)
>  	kfree(master);
>  }
>  
> +static int arm_smmu_read_and_clear_dirty(struct iommu_domain *domain,
> +					 unsigned long iova, size_t size,
> +					 unsigned long flags,
> +					 struct iommu_dirty_bitmap *dirty)
> +{
> +	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> +	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
> +
> +	if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1)
> +		return -EINVAL;
> +
> +	if (WARN_ON_ONCE(!ops || !ops->read_and_clear_dirty))
> +		return -ENODEV;
> +
> +	return ops->read_and_clear_dirty(ops, iova, size, flags, dirty);
> +}
> +
> +static int arm_smmu_set_dirty_tracking(struct iommu_domain *domain,
> +				       bool enabled)
> +{
> +	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> +	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
> +
> +	if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1)
> +		return -EINVAL;
> +
> +	if (WARN_ON_ONCE(!ops))
> +		return -ENODEV;
> +
> +	/*
> +	 * Always enabled and the dirty bitmap is cleared prior to
> +	 * set_dirty_tracking().
> +	 */
> +	return 0;
> +}
> +
>  static struct iommu_group *arm_smmu_device_group(struct device *dev)
>  {
>  	struct iommu_group *group;
> @@ -3612,6 +3658,11 @@ static struct iommu_ops arm_smmu_ops = {
>  	}
>  };
>  
> +static struct iommu_dirty_ops arm_smmu_dirty_ops = {
> +	.read_and_clear_dirty	= arm_smmu_read_and_clear_dirty,
> +	.set_dirty_tracking     = arm_smmu_set_dirty_tracking,
> +};
> +
>  /* Probing and initialisation functions */
>  static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
>  				   struct arm_smmu_queue *q,
> diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
> index 86cf1f7ae389..8e75f944f07a 100644
> --- a/include/linux/io-pgtable.h
> +++ b/include/linux/io-pgtable.h
> @@ -85,6 +85,8 @@ struct io_pgtable_cfg {
>  	 *
>  	 * IO_PGTABLE_QUIRK_ARM_OUTER_WBWA: Override the outer-cacheability
>  	 *	attributes set in the TCR for a non-coherent page-table walker.
> +	 *
> +	 * IO_PGTABLE_QUIRK_ARM_HD: Enables dirty tracking in stage 1 pagetable.
>  	 */
>  	#define IO_PGTABLE_QUIRK_ARM_NS			BIT(0)
>  	#define IO_PGTABLE_QUIRK_NO_PERMS		BIT(1)
> @@ -92,6 +94,8 @@ struct io_pgtable_cfg {
>  	#define IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT	BIT(4)
>  	#define IO_PGTABLE_QUIRK_ARM_TTBR1		BIT(5)
>  	#define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA		BIT(6)
> +	#define IO_PGTABLE_QUIRK_ARM_HD			BIT(7)
> +
>  	unsigned long			quirks;
>  	unsigned long			pgsize_bitmap;
>  	unsigned int			ias;
Shameer Kolothum Feb. 22, 2024, 11:31 a.m. UTC | #2
> -----Original Message-----
> From: Joao Martins <joao.m.martins@oracle.com>
> Sent: Thursday, February 22, 2024 11:04 AM
> To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>
> Cc: joro@8bytes.org; jgg@nvidia.com; kevin.tian@intel.com;
> nicolinc@nvidia.com; iommu@lists.linux.dev; mshavit@google.com;
> robin.murphy@arm.com; will@kernel.org; jiangkunkun
> <jiangkunkun@huawei.com>; zhukeqian <zhukeqian1@huawei.com>;
> Linuxarm <linuxarm@huawei.com>; linux-arm-kernel@lists.infradead.org
> Subject: Re: [PATCH v2 3/4] iommu/arm-smmu-v3: Add support for dirty
> tracking in domain alloc
> 
> On 22/02/2024 09:49, Shameer Kolothum wrote:
> > From: Joao Martins <joao.m.martins@oracle.com>
> >
> > This provides all the infrastructure to enable dirty tracking if the
> > hardware has the capability and domain alloc request for it.
> >
> > Please note, we still report no support for IOMMU_CAP_DIRTY_TRACKING
> > as it will finally be enabled in a subsequent patch.
> >
> > Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> > Signed-off-by: Shameer Kolothum
> <shameerali.kolothum.thodi@huawei.com>
> > ---
> >  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 95
> ++++++++++++++++-----
> >  include/linux/io-pgtable.h                  |  4 +
> >  2 files changed, 77 insertions(+), 22 deletions(-)
> >
> > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > index bd30739e3588..058bbb0dbe2e 100644
> > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > @@ -43,6 +43,7 @@ MODULE_PARM_DESC(disable_msipolling,
> >  	"Disable MSI-based polling for CMD_SYNC completion.");
> >
> >  static struct iommu_ops arm_smmu_ops;
> > +static struct iommu_dirty_ops arm_smmu_dirty_ops;
> >
> >  enum arm_smmu_msi_index {
> >  	EVTQ_MSI_INDEX,
> > @@ -86,7 +87,8 @@ static struct arm_smmu_option_prop
> > arm_smmu_options[] = {
> >
> >  static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device
> > *smmu);  static int arm_smmu_domain_finalise(struct arm_smmu_domain
> *smmu_domain,
> > -				    struct arm_smmu_device *smmu);
> > +				    struct arm_smmu_device *smmu,
> > +				    bool enable_dirty);
> >  static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master);
> > static void arm_smmu_tlb_inv_all_s2(struct arm_smmu_domain
> > *smmu_domain);
> >
> > @@ -2378,7 +2380,7 @@ static struct iommu_domain
> *arm_smmu_domain_alloc_paging(struct device *dev)
> >  		struct arm_smmu_master *master =
> dev_iommu_priv_get(dev);
> >  		int ret;
> >
> > -		ret = arm_smmu_domain_finalise(smmu_domain, master-
> >smmu);
> > +		ret = arm_smmu_domain_finalise(smmu_domain, master-
> >smmu, false);
> >  		if (ret) {
> >  			kfree(smmu_domain);
> >  			return ERR_PTR(ret);
> > @@ -2445,10 +2447,11 @@ static void arm_smmu_domain_free(struct
> > iommu_domain *domain)  }
> >
> >  static int arm_smmu_domain_finalise(struct arm_smmu_domain
> *smmu_domain,
> > -				    struct arm_smmu_device *smmu)
> > +				    struct arm_smmu_device *smmu,
> > +				    bool enable_dirty)
> >  {
> >  	int ret;
> > -	unsigned long ias, oas;
> > +	unsigned long ias;
> >  	enum io_pgtable_fmt fmt;
> >  	struct io_pgtable_cfg pgtbl_cfg;
> >  	struct io_pgtable_ops *pgtbl_ops;
> > @@ -2459,31 +2462,31 @@ static int arm_smmu_domain_finalise(struct
> arm_smmu_domain *smmu_domain,
> >  	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
> >  		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
> >
> > +	pgtbl_cfg = (struct io_pgtable_cfg) {
> > +		.pgsize_bitmap	= smmu->pgsize_bitmap,
> > +		.coherent_walk	= smmu->features &
> ARM_SMMU_FEAT_COHERENCY,
> > +		.tlb		= &arm_smmu_flush_ops,
> > +		.iommu_dev	= smmu->dev,
> > +	};
> > +
> >  	switch (smmu_domain->stage) {
> >  	case ARM_SMMU_DOMAIN_S1:
> >  		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
> > -		ias = min_t(unsigned long, ias, VA_BITS);
> > -		oas = smmu->ias;
> > +		pgtbl_cfg.ias = min_t(unsigned long, ias, VA_BITS);
> > +		pgtbl_cfg.oas = smmu->ias;
> > +		if (enable_dirty)
> > +			pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD;
> >  		fmt = ARM_64_LPAE_S1;
> >  		break;
> >  	case ARM_SMMU_DOMAIN_S2:
> > -		ias = smmu->ias;
> > -		oas = smmu->oas;
> > +		pgtbl_cfg.ias = smmu->ias;
> > +		pgtbl_cfg.oas = smmu->oas;
> >  		fmt = ARM_64_LPAE_S2;
> >  		break;
> >  	default:
> >  		return -EINVAL;
> >  	}
> >
> > -	pgtbl_cfg = (struct io_pgtable_cfg) {
> > -		.pgsize_bitmap	= smmu->pgsize_bitmap,
> > -		.ias		= ias,
> > -		.oas		= oas,
> > -		.coherent_walk	= smmu->features &
> ARM_SMMU_FEAT_COHERENCY,
> > -		.tlb		= &arm_smmu_flush_ops,
> > -		.iommu_dev	= smmu->dev,
> > -	};
> > -
> >  	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
> >  	if (!pgtbl_ops)
> >  		return -ENOMEM;
> > @@ -2491,7 +2494,8 @@ static int arm_smmu_domain_finalise(struct
> arm_smmu_domain *smmu_domain,
> >  	smmu_domain->domain.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
> >  	smmu_domain->domain.geometry.aperture_end = (1UL <<
> pgtbl_cfg.ias) - 1;
> >  	smmu_domain->domain.geometry.force_aperture = true;
> > -
> > +	if (enable_dirty && smmu_domain->stage ==
> ARM_SMMU_DOMAIN_S1)
> > +		smmu_domain->domain.dirty_ops =
> &arm_smmu_dirty_ops;
> >  	ret = arm_smmu_domain_alloc_id(smmu, smmu_domain);
> >  	if (ret < 0) {
> >  		free_io_pgtable_ops(pgtbl_ops);
> > @@ -2811,7 +2815,7 @@ static int arm_smmu_attach_dev(struct
> iommu_domain *domain, struct device *dev)
> >  	mutex_lock(&smmu_domain->init_mutex);
> >
> >  	if (!smmu_domain->smmu) {
> > -		ret = arm_smmu_domain_finalise(smmu_domain, smmu);
> > +		ret = arm_smmu_domain_finalise(smmu_domain, smmu,
> false);
> >  	} else if (smmu_domain->smmu != smmu)
> >  		ret = -EINVAL;
> >
> 
> 
> I think we are missing the domain attach_dev check for dirty tracking.
> 
> Something like:
> 
> if (domain->dirty_ops && !arm_smmu_dbm_capable(smmu))
> 	return -EINVAL;
> 
> But that helper is only introduced in the last patch, so maybe:
> 
> if (domain->dirty_ops &&
>     !device_iommu_capable(dev, IOMMU_CAP_DIRTY_TRACKING))
> 	return -EINVAL;

Ok. But do we really need to check this in attach()? As dirty_ops are added only
if it is requested in alloc_user() and there we return err when hardware doesn't 
have the capability.  So not sure how this matters in attach() path. May be I am 
missing something.

Thanks,
Shameer
Joao Martins Feb. 22, 2024, 11:37 a.m. UTC | #3
On 22/02/2024 11:31, Shameerali Kolothum Thodi wrote:
> 
> 
>> -----Original Message-----
>> From: Joao Martins <joao.m.martins@oracle.com>
>> Sent: Thursday, February 22, 2024 11:04 AM
>> To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>
>> Cc: joro@8bytes.org; jgg@nvidia.com; kevin.tian@intel.com;
>> nicolinc@nvidia.com; iommu@lists.linux.dev; mshavit@google.com;
>> robin.murphy@arm.com; will@kernel.org; jiangkunkun
>> <jiangkunkun@huawei.com>; zhukeqian <zhukeqian1@huawei.com>;
>> Linuxarm <linuxarm@huawei.com>; linux-arm-kernel@lists.infradead.org
>> Subject: Re: [PATCH v2 3/4] iommu/arm-smmu-v3: Add support for dirty
>> tracking in domain alloc
>>
>> On 22/02/2024 09:49, Shameer Kolothum wrote:
>>> From: Joao Martins <joao.m.martins@oracle.com>
>>>
>>> This provides all the infrastructure to enable dirty tracking if the
>>> hardware has the capability and domain alloc request for it.
>>>
>>> Please note, we still report no support for IOMMU_CAP_DIRTY_TRACKING
>>> as it will finally be enabled in a subsequent patch.
>>>
>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>> Signed-off-by: Shameer Kolothum
>> <shameerali.kolothum.thodi@huawei.com>
>>> ---
>>>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 95
>> ++++++++++++++++-----
>>>  include/linux/io-pgtable.h                  |  4 +
>>>  2 files changed, 77 insertions(+), 22 deletions(-)
>>>
>>> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
>>> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
>>> index bd30739e3588..058bbb0dbe2e 100644
>>> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
>>> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
>>> @@ -43,6 +43,7 @@ MODULE_PARM_DESC(disable_msipolling,
>>>  	"Disable MSI-based polling for CMD_SYNC completion.");
>>>
>>>  static struct iommu_ops arm_smmu_ops;
>>> +static struct iommu_dirty_ops arm_smmu_dirty_ops;
>>>
>>>  enum arm_smmu_msi_index {
>>>  	EVTQ_MSI_INDEX,
>>> @@ -86,7 +87,8 @@ static struct arm_smmu_option_prop
>>> arm_smmu_options[] = {
>>>
>>>  static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device
>>> *smmu);  static int arm_smmu_domain_finalise(struct arm_smmu_domain
>> *smmu_domain,
>>> -				    struct arm_smmu_device *smmu);
>>> +				    struct arm_smmu_device *smmu,
>>> +				    bool enable_dirty);
>>>  static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master);
>>> static void arm_smmu_tlb_inv_all_s2(struct arm_smmu_domain
>>> *smmu_domain);
>>>
>>> @@ -2378,7 +2380,7 @@ static struct iommu_domain
>> *arm_smmu_domain_alloc_paging(struct device *dev)
>>>  		struct arm_smmu_master *master =
>> dev_iommu_priv_get(dev);
>>>  		int ret;
>>>
>>> -		ret = arm_smmu_domain_finalise(smmu_domain, master-
>>> smmu);
>>> +		ret = arm_smmu_domain_finalise(smmu_domain, master-
>>> smmu, false);
>>>  		if (ret) {
>>>  			kfree(smmu_domain);
>>>  			return ERR_PTR(ret);
>>> @@ -2445,10 +2447,11 @@ static void arm_smmu_domain_free(struct
>>> iommu_domain *domain)  }
>>>
>>>  static int arm_smmu_domain_finalise(struct arm_smmu_domain
>> *smmu_domain,
>>> -				    struct arm_smmu_device *smmu)
>>> +				    struct arm_smmu_device *smmu,
>>> +				    bool enable_dirty)
>>>  {
>>>  	int ret;
>>> -	unsigned long ias, oas;
>>> +	unsigned long ias;
>>>  	enum io_pgtable_fmt fmt;
>>>  	struct io_pgtable_cfg pgtbl_cfg;
>>>  	struct io_pgtable_ops *pgtbl_ops;
>>> @@ -2459,31 +2462,31 @@ static int arm_smmu_domain_finalise(struct
>> arm_smmu_domain *smmu_domain,
>>>  	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
>>>  		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
>>>
>>> +	pgtbl_cfg = (struct io_pgtable_cfg) {
>>> +		.pgsize_bitmap	= smmu->pgsize_bitmap,
>>> +		.coherent_walk	= smmu->features &
>> ARM_SMMU_FEAT_COHERENCY,
>>> +		.tlb		= &arm_smmu_flush_ops,
>>> +		.iommu_dev	= smmu->dev,
>>> +	};
>>> +
>>>  	switch (smmu_domain->stage) {
>>>  	case ARM_SMMU_DOMAIN_S1:
>>>  		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
>>> -		ias = min_t(unsigned long, ias, VA_BITS);
>>> -		oas = smmu->ias;
>>> +		pgtbl_cfg.ias = min_t(unsigned long, ias, VA_BITS);
>>> +		pgtbl_cfg.oas = smmu->ias;
>>> +		if (enable_dirty)
>>> +			pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD;
>>>  		fmt = ARM_64_LPAE_S1;
>>>  		break;
>>>  	case ARM_SMMU_DOMAIN_S2:
>>> -		ias = smmu->ias;
>>> -		oas = smmu->oas;
>>> +		pgtbl_cfg.ias = smmu->ias;
>>> +		pgtbl_cfg.oas = smmu->oas;
>>>  		fmt = ARM_64_LPAE_S2;
>>>  		break;
>>>  	default:
>>>  		return -EINVAL;
>>>  	}
>>>
>>> -	pgtbl_cfg = (struct io_pgtable_cfg) {
>>> -		.pgsize_bitmap	= smmu->pgsize_bitmap,
>>> -		.ias		= ias,
>>> -		.oas		= oas,
>>> -		.coherent_walk	= smmu->features &
>> ARM_SMMU_FEAT_COHERENCY,
>>> -		.tlb		= &arm_smmu_flush_ops,
>>> -		.iommu_dev	= smmu->dev,
>>> -	};
>>> -
>>>  	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
>>>  	if (!pgtbl_ops)
>>>  		return -ENOMEM;
>>> @@ -2491,7 +2494,8 @@ static int arm_smmu_domain_finalise(struct
>> arm_smmu_domain *smmu_domain,
>>>  	smmu_domain->domain.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
>>>  	smmu_domain->domain.geometry.aperture_end = (1UL <<
>> pgtbl_cfg.ias) - 1;
>>>  	smmu_domain->domain.geometry.force_aperture = true;
>>> -
>>> +	if (enable_dirty && smmu_domain->stage ==
>> ARM_SMMU_DOMAIN_S1)
>>> +		smmu_domain->domain.dirty_ops =
>> &arm_smmu_dirty_ops;
>>>  	ret = arm_smmu_domain_alloc_id(smmu, smmu_domain);
>>>  	if (ret < 0) {
>>>  		free_io_pgtable_ops(pgtbl_ops);
>>> @@ -2811,7 +2815,7 @@ static int arm_smmu_attach_dev(struct
>> iommu_domain *domain, struct device *dev)
>>>  	mutex_lock(&smmu_domain->init_mutex);
>>>
>>>  	if (!smmu_domain->smmu) {
>>> -		ret = arm_smmu_domain_finalise(smmu_domain, smmu);
>>> +		ret = arm_smmu_domain_finalise(smmu_domain, smmu,
>> false);
>>>  	} else if (smmu_domain->smmu != smmu)
>>>  		ret = -EINVAL;
>>>
>>
>>
>> I think we are missing the domain attach_dev check for dirty tracking.
>>
>> Something like:
>>
>> if (domain->dirty_ops && !arm_smmu_dbm_capable(smmu))
>> 	return -EINVAL;
>>
>> But that helper is only introduced in the last patch, so maybe:
>>
>> if (domain->dirty_ops &&
>>     !device_iommu_capable(dev, IOMMU_CAP_DIRTY_TRACKING))
>> 	return -EINVAL;
> 
> Ok. But do we really need to check this in attach()? As dirty_ops are added only
> if it is requested in alloc_user() and there we return err when hardware doesn't 
> have the capability.  So not sure how this matters in attach() path. May be I am 
> missing something.

That's when you create the domain with dev A. Afterwards that dev A is attached,
but later on you can attach another device B to the domain. So this check is
there such that a domain with dirty tracking ops set will only have devices in
there that support dirty tracking.

	Joao
Shameer Kolothum Feb. 22, 2024, 12:24 p.m. UTC | #4
> -----Original Message-----
> From: Joao Martins <joao.m.martins@oracle.com>
> Sent: Thursday, February 22, 2024 11:38 AM
> To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>
> Cc: joro@8bytes.org; jgg@nvidia.com; kevin.tian@intel.com;
> nicolinc@nvidia.com; iommu@lists.linux.dev; mshavit@google.com;
> robin.murphy@arm.com; will@kernel.org; jiangkunkun
> <jiangkunkun@huawei.com>; zhukeqian <zhukeqian1@huawei.com>; Linuxarm
> <linuxarm@huawei.com>; linux-arm-kernel@lists.infradead.org
> Subject: Re: [PATCH v2 3/4] iommu/arm-smmu-v3: Add support for dirty tracking
> in domain alloc
> 
> On 22/02/2024 11:31, Shameerali Kolothum Thodi wrote:
> >
> >
> >> -----Original Message-----
> >> From: Joao Martins <joao.m.martins@oracle.com>
> >> Sent: Thursday, February 22, 2024 11:04 AM
> >> To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>
> >> Cc: joro@8bytes.org; jgg@nvidia.com; kevin.tian@intel.com;
> >> nicolinc@nvidia.com; iommu@lists.linux.dev; mshavit@google.com;
> >> robin.murphy@arm.com; will@kernel.org; jiangkunkun
> >> <jiangkunkun@huawei.com>; zhukeqian <zhukeqian1@huawei.com>;
> >> Linuxarm <linuxarm@huawei.com>; linux-arm-kernel@lists.infradead.org
> >> Subject: Re: [PATCH v2 3/4] iommu/arm-smmu-v3: Add support for dirty
> >> tracking in domain alloc
> >>
> >> On 22/02/2024 09:49, Shameer Kolothum wrote:
> >>> From: Joao Martins <joao.m.martins@oracle.com>
> >>>
> >>> This provides all the infrastructure to enable dirty tracking if the
> >>> hardware has the capability and domain alloc request for it.
> >>>
> >>> Please note, we still report no support for IOMMU_CAP_DIRTY_TRACKING
> >>> as it will finally be enabled in a subsequent patch.
> >>>
> >>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> >>> Signed-off-by: Shameer Kolothum
> >> <shameerali.kolothum.thodi@huawei.com>
> >>> ---
> >>>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 95
> >> ++++++++++++++++-----
> >>>  include/linux/io-pgtable.h                  |  4 +
> >>>  2 files changed, 77 insertions(+), 22 deletions(-)
> >>>
> >>> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> >>> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> >>> index bd30739e3588..058bbb0dbe2e 100644
> >>> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> >>> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> >>> @@ -43,6 +43,7 @@ MODULE_PARM_DESC(disable_msipolling,
> >>>  	"Disable MSI-based polling for CMD_SYNC completion.");
> >>>
> >>>  static struct iommu_ops arm_smmu_ops;
> >>> +static struct iommu_dirty_ops arm_smmu_dirty_ops;
> >>>
> >>>  enum arm_smmu_msi_index {
> >>>  	EVTQ_MSI_INDEX,
> >>> @@ -86,7 +87,8 @@ static struct arm_smmu_option_prop
> >>> arm_smmu_options[] = {
> >>>
> >>>  static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device
> >>> *smmu);  static int arm_smmu_domain_finalise(struct arm_smmu_domain
> >> *smmu_domain,
> >>> -				    struct arm_smmu_device *smmu);
> >>> +				    struct arm_smmu_device *smmu,
> >>> +				    bool enable_dirty);
> >>>  static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master);
> >>> static void arm_smmu_tlb_inv_all_s2(struct arm_smmu_domain
> >>> *smmu_domain);
> >>>
> >>> @@ -2378,7 +2380,7 @@ static struct iommu_domain
> >> *arm_smmu_domain_alloc_paging(struct device *dev)
> >>>  		struct arm_smmu_master *master =
> >> dev_iommu_priv_get(dev);
> >>>  		int ret;
> >>>
> >>> -		ret = arm_smmu_domain_finalise(smmu_domain, master-
> >>> smmu);
> >>> +		ret = arm_smmu_domain_finalise(smmu_domain, master-
> >>> smmu, false);
> >>>  		if (ret) {
> >>>  			kfree(smmu_domain);
> >>>  			return ERR_PTR(ret);
> >>> @@ -2445,10 +2447,11 @@ static void arm_smmu_domain_free(struct
> >>> iommu_domain *domain)  }
> >>>
> >>>  static int arm_smmu_domain_finalise(struct arm_smmu_domain
> >> *smmu_domain,
> >>> -				    struct arm_smmu_device *smmu)
> >>> +				    struct arm_smmu_device *smmu,
> >>> +				    bool enable_dirty)
> >>>  {
> >>>  	int ret;
> >>> -	unsigned long ias, oas;
> >>> +	unsigned long ias;
> >>>  	enum io_pgtable_fmt fmt;
> >>>  	struct io_pgtable_cfg pgtbl_cfg;
> >>>  	struct io_pgtable_ops *pgtbl_ops;
> >>> @@ -2459,31 +2462,31 @@ static int arm_smmu_domain_finalise(struct
> >> arm_smmu_domain *smmu_domain,
> >>>  	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
> >>>  		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
> >>>
> >>> +	pgtbl_cfg = (struct io_pgtable_cfg) {
> >>> +		.pgsize_bitmap	= smmu->pgsize_bitmap,
> >>> +		.coherent_walk	= smmu->features &
> >> ARM_SMMU_FEAT_COHERENCY,
> >>> +		.tlb		= &arm_smmu_flush_ops,
> >>> +		.iommu_dev	= smmu->dev,
> >>> +	};
> >>> +
> >>>  	switch (smmu_domain->stage) {
> >>>  	case ARM_SMMU_DOMAIN_S1:
> >>>  		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
> >>> -		ias = min_t(unsigned long, ias, VA_BITS);
> >>> -		oas = smmu->ias;
> >>> +		pgtbl_cfg.ias = min_t(unsigned long, ias, VA_BITS);
> >>> +		pgtbl_cfg.oas = smmu->ias;
> >>> +		if (enable_dirty)
> >>> +			pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD;
> >>>  		fmt = ARM_64_LPAE_S1;
> >>>  		break;
> >>>  	case ARM_SMMU_DOMAIN_S2:
> >>> -		ias = smmu->ias;
> >>> -		oas = smmu->oas;
> >>> +		pgtbl_cfg.ias = smmu->ias;
> >>> +		pgtbl_cfg.oas = smmu->oas;
> >>>  		fmt = ARM_64_LPAE_S2;
> >>>  		break;
> >>>  	default:
> >>>  		return -EINVAL;
> >>>  	}
> >>>
> >>> -	pgtbl_cfg = (struct io_pgtable_cfg) {
> >>> -		.pgsize_bitmap	= smmu->pgsize_bitmap,
> >>> -		.ias		= ias,
> >>> -		.oas		= oas,
> >>> -		.coherent_walk	= smmu->features &
> >> ARM_SMMU_FEAT_COHERENCY,
> >>> -		.tlb		= &arm_smmu_flush_ops,
> >>> -		.iommu_dev	= smmu->dev,
> >>> -	};
> >>> -
> >>>  	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
> >>>  	if (!pgtbl_ops)
> >>>  		return -ENOMEM;
> >>> @@ -2491,7 +2494,8 @@ static int arm_smmu_domain_finalise(struct
> >> arm_smmu_domain *smmu_domain,
> >>>  	smmu_domain->domain.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
> >>>  	smmu_domain->domain.geometry.aperture_end = (1UL <<
> >> pgtbl_cfg.ias) - 1;
> >>>  	smmu_domain->domain.geometry.force_aperture = true;
> >>> -
> >>> +	if (enable_dirty && smmu_domain->stage ==
> >> ARM_SMMU_DOMAIN_S1)
> >>> +		smmu_domain->domain.dirty_ops =
> >> &arm_smmu_dirty_ops;
> >>>  	ret = arm_smmu_domain_alloc_id(smmu, smmu_domain);
> >>>  	if (ret < 0) {
> >>>  		free_io_pgtable_ops(pgtbl_ops);
> >>> @@ -2811,7 +2815,7 @@ static int arm_smmu_attach_dev(struct
> >> iommu_domain *domain, struct device *dev)
> >>>  	mutex_lock(&smmu_domain->init_mutex);
> >>>
> >>>  	if (!smmu_domain->smmu) {
> >>> -		ret = arm_smmu_domain_finalise(smmu_domain, smmu);
> >>> +		ret = arm_smmu_domain_finalise(smmu_domain, smmu,
> >> false);
> >>>  	} else if (smmu_domain->smmu != smmu)
> >>>  		ret = -EINVAL;
> >>>
> >>
> >>
> >> I think we are missing the domain attach_dev check for dirty tracking.
> >>
> >> Something like:
> >>
> >> if (domain->dirty_ops && !arm_smmu_dbm_capable(smmu))
> >> 	return -EINVAL;
> >>
> >> But that helper is only introduced in the last patch, so maybe:
> >>
> >> if (domain->dirty_ops &&
> >>     !device_iommu_capable(dev, IOMMU_CAP_DIRTY_TRACKING))
> >> 	return -EINVAL;
> >
> > Ok. But do we really need to check this in attach()? As dirty_ops are added only
> > if it is requested in alloc_user() and there we return err when hardware doesn't
> > have the capability.  So not sure how this matters in attach() path. May be I am
> > missing something.
> 
> That's when you create the domain with dev A. Afterwards that dev A is
> attached,
> but later on you can attach another device B to the domain. So this check is
> there such that a domain with dirty tracking ops set will only have devices in
> there that support dirty tracking.

But that only matters if dev B is on  different smmu without dbm capability, right?
In that case we already fail the attach with,

>>>  	} else if (smmu_domain->smmu != smmu)
> >>>  		ret = -EINVAL;
> >>>

Is that right?

Thanks,
Shameer
Jason Gunthorpe Feb. 22, 2024, 1:11 p.m. UTC | #5
On Thu, Feb 22, 2024 at 12:24:06PM +0000, Shameerali Kolothum Thodi wrote:

> > > Ok. But do we really need to check this in attach()? As dirty_ops are added only
> > > if it is requested in alloc_user() and there we return err when hardware doesn't
> > > have the capability.  So not sure how this matters in attach() path. May be I am
> > > missing something.
> > 
> > That's when you create the domain with dev A. Afterwards that dev A is
> > attached,
> > but later on you can attach another device B to the domain. So this check is
> > there such that a domain with dirty tracking ops set will only have devices in
> > there that support dirty tracking.
> 
> But that only matters if dev B is on  different smmu without dbm capability, right?
> In that case we already fail the attach with,
> 
> >>>  	} else if (smmu_domain->smmu != smmu)
> > >>>  		ret = -EINVAL;
> > >>>
> 
> Is that right?

Right. Relaxing this is on my list :\

The other two drivers don't have this limitation so needed the check
that Joao described.

Jason
Joao Martins Feb. 22, 2024, 1:23 p.m. UTC | #6
On 22/02/2024 12:24, Shameerali Kolothum Thodi wrote:
> 
> 
>> -----Original Message-----
>> From: Joao Martins <joao.m.martins@oracle.com>
>> Sent: Thursday, February 22, 2024 11:38 AM
>> To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>
>> Cc: joro@8bytes.org; jgg@nvidia.com; kevin.tian@intel.com;
>> nicolinc@nvidia.com; iommu@lists.linux.dev; mshavit@google.com;
>> robin.murphy@arm.com; will@kernel.org; jiangkunkun
>> <jiangkunkun@huawei.com>; zhukeqian <zhukeqian1@huawei.com>; Linuxarm
>> <linuxarm@huawei.com>; linux-arm-kernel@lists.infradead.org
>> Subject: Re: [PATCH v2 3/4] iommu/arm-smmu-v3: Add support for dirty tracking
>> in domain alloc
>>
>> On 22/02/2024 11:31, Shameerali Kolothum Thodi wrote:
>>>
>>>
>>>> -----Original Message-----
>>>> From: Joao Martins <joao.m.martins@oracle.com>
>>>> Sent: Thursday, February 22, 2024 11:04 AM
>>>> To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>
>>>> Cc: joro@8bytes.org; jgg@nvidia.com; kevin.tian@intel.com;
>>>> nicolinc@nvidia.com; iommu@lists.linux.dev; mshavit@google.com;
>>>> robin.murphy@arm.com; will@kernel.org; jiangkunkun
>>>> <jiangkunkun@huawei.com>; zhukeqian <zhukeqian1@huawei.com>;
>>>> Linuxarm <linuxarm@huawei.com>; linux-arm-kernel@lists.infradead.org
>>>> Subject: Re: [PATCH v2 3/4] iommu/arm-smmu-v3: Add support for dirty
>>>> tracking in domain alloc
>>>>
>>>> On 22/02/2024 09:49, Shameer Kolothum wrote:
>>>>> From: Joao Martins <joao.m.martins@oracle.com>
>>>>>
>>>>> This provides all the infrastructure to enable dirty tracking if the
>>>>> hardware has the capability and domain alloc request for it.
>>>>>
>>>>> Please note, we still report no support for IOMMU_CAP_DIRTY_TRACKING
>>>>> as it will finally be enabled in a subsequent patch.
>>>>>
>>>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>>>> Signed-off-by: Shameer Kolothum
>>>> <shameerali.kolothum.thodi@huawei.com>
>>>>> ---
>>>>>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 95
>>>> ++++++++++++++++-----
>>>>>  include/linux/io-pgtable.h                  |  4 +
>>>>>  2 files changed, 77 insertions(+), 22 deletions(-)
>>>>>
>>>>> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
>>>>> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
>>>>> index bd30739e3588..058bbb0dbe2e 100644
>>>>> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
>>>>> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
>>>>> @@ -43,6 +43,7 @@ MODULE_PARM_DESC(disable_msipolling,
>>>>>  	"Disable MSI-based polling for CMD_SYNC completion.");
>>>>>
>>>>>  static struct iommu_ops arm_smmu_ops;
>>>>> +static struct iommu_dirty_ops arm_smmu_dirty_ops;
>>>>>
>>>>>  enum arm_smmu_msi_index {
>>>>>  	EVTQ_MSI_INDEX,
>>>>> @@ -86,7 +87,8 @@ static struct arm_smmu_option_prop
>>>>> arm_smmu_options[] = {
>>>>>
>>>>>  static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device
>>>>> *smmu);  static int arm_smmu_domain_finalise(struct arm_smmu_domain
>>>> *smmu_domain,
>>>>> -				    struct arm_smmu_device *smmu);
>>>>> +				    struct arm_smmu_device *smmu,
>>>>> +				    bool enable_dirty);
>>>>>  static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master);
>>>>> static void arm_smmu_tlb_inv_all_s2(struct arm_smmu_domain
>>>>> *smmu_domain);
>>>>>
>>>>> @@ -2378,7 +2380,7 @@ static struct iommu_domain
>>>> *arm_smmu_domain_alloc_paging(struct device *dev)
>>>>>  		struct arm_smmu_master *master =
>>>> dev_iommu_priv_get(dev);
>>>>>  		int ret;
>>>>>
>>>>> -		ret = arm_smmu_domain_finalise(smmu_domain, master-
>>>>> smmu);
>>>>> +		ret = arm_smmu_domain_finalise(smmu_domain, master-
>>>>> smmu, false);
>>>>>  		if (ret) {
>>>>>  			kfree(smmu_domain);
>>>>>  			return ERR_PTR(ret);
>>>>> @@ -2445,10 +2447,11 @@ static void arm_smmu_domain_free(struct
>>>>> iommu_domain *domain)  }
>>>>>
>>>>>  static int arm_smmu_domain_finalise(struct arm_smmu_domain
>>>> *smmu_domain,
>>>>> -				    struct arm_smmu_device *smmu)
>>>>> +				    struct arm_smmu_device *smmu,
>>>>> +				    bool enable_dirty)
>>>>>  {
>>>>>  	int ret;
>>>>> -	unsigned long ias, oas;
>>>>> +	unsigned long ias;
>>>>>  	enum io_pgtable_fmt fmt;
>>>>>  	struct io_pgtable_cfg pgtbl_cfg;
>>>>>  	struct io_pgtable_ops *pgtbl_ops;
>>>>> @@ -2459,31 +2462,31 @@ static int arm_smmu_domain_finalise(struct
>>>> arm_smmu_domain *smmu_domain,
>>>>>  	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
>>>>>  		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
>>>>>
>>>>> +	pgtbl_cfg = (struct io_pgtable_cfg) {
>>>>> +		.pgsize_bitmap	= smmu->pgsize_bitmap,
>>>>> +		.coherent_walk	= smmu->features &
>>>> ARM_SMMU_FEAT_COHERENCY,
>>>>> +		.tlb		= &arm_smmu_flush_ops,
>>>>> +		.iommu_dev	= smmu->dev,
>>>>> +	};
>>>>> +
>>>>>  	switch (smmu_domain->stage) {
>>>>>  	case ARM_SMMU_DOMAIN_S1:
>>>>>  		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
>>>>> -		ias = min_t(unsigned long, ias, VA_BITS);
>>>>> -		oas = smmu->ias;
>>>>> +		pgtbl_cfg.ias = min_t(unsigned long, ias, VA_BITS);
>>>>> +		pgtbl_cfg.oas = smmu->ias;
>>>>> +		if (enable_dirty)
>>>>> +			pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD;
>>>>>  		fmt = ARM_64_LPAE_S1;
>>>>>  		break;
>>>>>  	case ARM_SMMU_DOMAIN_S2:
>>>>> -		ias = smmu->ias;
>>>>> -		oas = smmu->oas;
>>>>> +		pgtbl_cfg.ias = smmu->ias;
>>>>> +		pgtbl_cfg.oas = smmu->oas;
>>>>>  		fmt = ARM_64_LPAE_S2;
>>>>>  		break;
>>>>>  	default:
>>>>>  		return -EINVAL;
>>>>>  	}
>>>>>
>>>>> -	pgtbl_cfg = (struct io_pgtable_cfg) {
>>>>> -		.pgsize_bitmap	= smmu->pgsize_bitmap,
>>>>> -		.ias		= ias,
>>>>> -		.oas		= oas,
>>>>> -		.coherent_walk	= smmu->features &
>>>> ARM_SMMU_FEAT_COHERENCY,
>>>>> -		.tlb		= &arm_smmu_flush_ops,
>>>>> -		.iommu_dev	= smmu->dev,
>>>>> -	};
>>>>> -
>>>>>  	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
>>>>>  	if (!pgtbl_ops)
>>>>>  		return -ENOMEM;
>>>>> @@ -2491,7 +2494,8 @@ static int arm_smmu_domain_finalise(struct
>>>> arm_smmu_domain *smmu_domain,
>>>>>  	smmu_domain->domain.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
>>>>>  	smmu_domain->domain.geometry.aperture_end = (1UL <<
>>>> pgtbl_cfg.ias) - 1;
>>>>>  	smmu_domain->domain.geometry.force_aperture = true;
>>>>> -
>>>>> +	if (enable_dirty && smmu_domain->stage ==
>>>> ARM_SMMU_DOMAIN_S1)
>>>>> +		smmu_domain->domain.dirty_ops =
>>>> &arm_smmu_dirty_ops;
>>>>>  	ret = arm_smmu_domain_alloc_id(smmu, smmu_domain);
>>>>>  	if (ret < 0) {
>>>>>  		free_io_pgtable_ops(pgtbl_ops);
>>>>> @@ -2811,7 +2815,7 @@ static int arm_smmu_attach_dev(struct
>>>> iommu_domain *domain, struct device *dev)
>>>>>  	mutex_lock(&smmu_domain->init_mutex);
>>>>>
>>>>>  	if (!smmu_domain->smmu) {
>>>>> -		ret = arm_smmu_domain_finalise(smmu_domain, smmu);
>>>>> +		ret = arm_smmu_domain_finalise(smmu_domain, smmu,
>>>> false);
>>>>>  	} else if (smmu_domain->smmu != smmu)
>>>>>  		ret = -EINVAL;
>>>>>
>>>>
>>>>
>>>> I think we are missing the domain attach_dev check for dirty tracking.
>>>>
>>>> Something like:
>>>>
>>>> if (domain->dirty_ops && !arm_smmu_dbm_capable(smmu))
>>>> 	return -EINVAL;
>>>>
>>>> But that helper is only introduced in the last patch, so maybe:
>>>>
>>>> if (domain->dirty_ops &&
>>>>     !device_iommu_capable(dev, IOMMU_CAP_DIRTY_TRACKING))
>>>> 	return -EINVAL;
>>>
>>> Ok. But do we really need to check this in attach()? As dirty_ops are added only
>>> if it is requested in alloc_user() and there we return err when hardware doesn't
>>> have the capability.  So not sure how this matters in attach() path. May be I am
>>> missing something.
>>
>> That's when you create the domain with dev A. Afterwards that dev A is
>> attached,
>> but later on you can attach another device B to the domain. So this check is
>> there such that a domain with dirty tracking ops set will only have devices in
>> there that support dirty tracking.
> 
> But that only matters if dev B is on  different smmu without dbm capability, right?
> In that case we already fail the attach with,
> 
>>>>  	} else if (smmu_domain->smmu != smmu)
>>>>>  		ret = -EINVAL;
>>>>>
> 
> Is that right?
> 

Oh yes, I missed that. That looks indeed to do the equivalent.

Perhaps a comment there at least to remind to reader in the event that such
check being lifted.
Jason Gunthorpe March 8, 2024, 2:31 p.m. UTC | #7
On Thu, Feb 22, 2024 at 09:49:22AM +0000, Shameer Kolothum wrote:
> From: Joao Martins <joao.m.martins@oracle.com>
> 
> This provides all the infrastructure to enable dirty tracking if the
> hardware has the capability and domain alloc request for it.
> 
> Please note, we still report no support for IOMMU_CAP_DIRTY_TRACKING
> as it will finally be enabled in a subsequent patch.
> 
> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
> ---
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 95 ++++++++++++++++-----
>  include/linux/io-pgtable.h                  |  4 +
>  2 files changed, 77 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index bd30739e3588..058bbb0dbe2e 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -43,6 +43,7 @@ MODULE_PARM_DESC(disable_msipolling,
>  	"Disable MSI-based polling for CMD_SYNC completion.");
>  
>  static struct iommu_ops arm_smmu_ops;
> +static struct iommu_dirty_ops arm_smmu_dirty_ops;
>  
>  enum arm_smmu_msi_index {
>  	EVTQ_MSI_INDEX,
> @@ -86,7 +87,8 @@ static struct arm_smmu_option_prop arm_smmu_options[] = {
>  
>  static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu);
>  static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
> -				    struct arm_smmu_device *smmu);
> +				    struct arm_smmu_device *smmu,
> +				    bool enable_dirty);
>  static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master);
>  static void arm_smmu_tlb_inv_all_s2(struct arm_smmu_domain *smmu_domain);
>  
> @@ -2378,7 +2380,7 @@ static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev)
>  		struct arm_smmu_master *master = dev_iommu_priv_get(dev);
>  		int ret;
>  
> -		ret = arm_smmu_domain_finalise(smmu_domain, master->smmu);
> +		ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, false);
>  		if (ret) {
>  			kfree(smmu_domain);
>  			return ERR_PTR(ret);
> @@ -2445,10 +2447,11 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
>  }
>  
>  static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
> -				    struct arm_smmu_device *smmu)
> +				    struct arm_smmu_device *smmu,
> +				    bool enable_dirty)

It is possibly a bit more readable if this is a flags and the test is
on IOMMU_HWPT_ALLOC_DIRTY_TRACKING

>  {
>  	int ret;
> -	unsigned long ias, oas;
> +	unsigned long ias;

Isn't ias unused too?

> @@ -3193,7 +3197,9 @@ arm_smmu_domain_alloc_user(struct device *dev, u32 flags,
>  			   const struct iommu_user_data *user_data)
>  {
>  	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
> -	const u32 paging_flags = IOMMU_HWPT_ALLOC_NEST_PARENT;
> +	const u32 paging_flags = IOMMU_HWPT_ALLOC_NEST_PARENT |
> +				 IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
> +	bool enforce_dirty = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;

Ah you based this on the nesting series.. Once part 2 is done we
should try to figure out what the order should be..

> +static int arm_smmu_read_and_clear_dirty(struct iommu_domain *domain,
> +					 unsigned long iova, size_t size,
> +					 unsigned long flags,
> +					 struct iommu_dirty_bitmap *dirty)
> +{
> +	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> +	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
> +
> +	if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1)
> +		return -EINVAL;

This is not possible, these ops are only installed on S1 domains.

> +	if (WARN_ON_ONCE(!ops || !ops->read_and_clear_dirty))
> +		return -ENODEV;

This is not really needed, if this is corrupted then this will have a
tidy crash:

> +	return ops->read_and_clear_dirty(ops, iova, size, flags, dirty);

If you are worried then move the WARN_ON into the finalize function to
ensure tha tthe io_pgtable_ops is properly formed after creating it.

> +static int arm_smmu_set_dirty_tracking(struct iommu_domain *domain,
> +				       bool enabled)
> +{
> +	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> +	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
> +
> +	if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1)
> +		return -EINVAL;
> +	if (WARN_ON_ONCE(!ops))
> +		return -ENODEV;

Ditto for both of these

Otherwise it looks OK to me

Jason
Ryan Roberts April 23, 2024, 4:27 p.m. UTC | #8
On 22/02/2024 09:49, Shameer Kolothum wrote:
> From: Joao Martins <joao.m.martins@oracle.com>
> 
> This provides all the infrastructure to enable dirty tracking if the
> hardware has the capability and domain alloc request for it.
> 
> Please note, we still report no support for IOMMU_CAP_DIRTY_TRACKING
> as it will finally be enabled in a subsequent patch.
> 
> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
> ---
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 95 ++++++++++++++++-----
>  include/linux/io-pgtable.h                  |  4 +
>  2 files changed, 77 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index bd30739e3588..058bbb0dbe2e 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -43,6 +43,7 @@ MODULE_PARM_DESC(disable_msipolling,
>  	"Disable MSI-based polling for CMD_SYNC completion.");
>  
>  static struct iommu_ops arm_smmu_ops;
> +static struct iommu_dirty_ops arm_smmu_dirty_ops;
>  
>  enum arm_smmu_msi_index {
>  	EVTQ_MSI_INDEX,
> @@ -86,7 +87,8 @@ static struct arm_smmu_option_prop arm_smmu_options[] = {
>  
>  static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu);
>  static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
> -				    struct arm_smmu_device *smmu);
> +				    struct arm_smmu_device *smmu,
> +				    bool enable_dirty);
>  static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master);
>  static void arm_smmu_tlb_inv_all_s2(struct arm_smmu_domain *smmu_domain);
>  
> @@ -2378,7 +2380,7 @@ static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev)
>  		struct arm_smmu_master *master = dev_iommu_priv_get(dev);
>  		int ret;
>  
> -		ret = arm_smmu_domain_finalise(smmu_domain, master->smmu);
> +		ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, false);
>  		if (ret) {
>  			kfree(smmu_domain);
>  			return ERR_PTR(ret);
> @@ -2445,10 +2447,11 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
>  }
>  
>  static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
> -				    struct arm_smmu_device *smmu)
> +				    struct arm_smmu_device *smmu,
> +				    bool enable_dirty)
>  {
>  	int ret;
> -	unsigned long ias, oas;
> +	unsigned long ias;
>  	enum io_pgtable_fmt fmt;
>  	struct io_pgtable_cfg pgtbl_cfg;
>  	struct io_pgtable_ops *pgtbl_ops;
> @@ -2459,31 +2462,31 @@ static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
>  	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
>  		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
>  
> +	pgtbl_cfg = (struct io_pgtable_cfg) {
> +		.pgsize_bitmap	= smmu->pgsize_bitmap,
> +		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
> +		.tlb		= &arm_smmu_flush_ops,
> +		.iommu_dev	= smmu->dev,
> +	};
> +
>  	switch (smmu_domain->stage) {
>  	case ARM_SMMU_DOMAIN_S1:
>  		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
> -		ias = min_t(unsigned long, ias, VA_BITS);
> -		oas = smmu->ias;
> +		pgtbl_cfg.ias = min_t(unsigned long, ias, VA_BITS);

I know this isn't changed by this patch, but do we really mean VA_BITS here?
Don't we want vabits_actual? I'm guessing we are intending to limit ias to the
size the kernel is using.

> +		pgtbl_cfg.oas = smmu->ias;
> +		if (enable_dirty)
> +			pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD;
>  		fmt = ARM_64_LPAE_S1;
>  		break;
>  	case ARM_SMMU_DOMAIN_S2:
> -		ias = smmu->ias;
> -		oas = smmu->oas;
> +		pgtbl_cfg.ias = smmu->ias;
> +		pgtbl_cfg.oas = smmu->oas;
>  		fmt = ARM_64_LPAE_S2;

Is it worth adding a WARN_ON(enable_dirty) here?

>  		break;
>  	default:
>  		return -EINVAL;
>  	}
>  
> -	pgtbl_cfg = (struct io_pgtable_cfg) {
> -		.pgsize_bitmap	= smmu->pgsize_bitmap,
> -		.ias		= ias,
> -		.oas		= oas,
> -		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
> -		.tlb		= &arm_smmu_flush_ops,
> -		.iommu_dev	= smmu->dev,
> -	};
> -
>  	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
>  	if (!pgtbl_ops)
>  		return -ENOMEM;
> @@ -2491,7 +2494,8 @@ static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
>  	smmu_domain->domain.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
>  	smmu_domain->domain.geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
>  	smmu_domain->domain.geometry.force_aperture = true;
> -
> +	if (enable_dirty && smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
> +		smmu_domain->domain.dirty_ops = &arm_smmu_dirty_ops;
>  	ret = arm_smmu_domain_alloc_id(smmu, smmu_domain);
>  	if (ret < 0) {
>  		free_io_pgtable_ops(pgtbl_ops);
> @@ -2811,7 +2815,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
>  	mutex_lock(&smmu_domain->init_mutex);
>  
>  	if (!smmu_domain->smmu) {
> -		ret = arm_smmu_domain_finalise(smmu_domain, smmu);
> +		ret = arm_smmu_domain_finalise(smmu_domain, smmu, false);
>  	} else if (smmu_domain->smmu != smmu)
>  		ret = -EINVAL;
>  
> @@ -2876,7 +2880,7 @@ static int arm_smmu_s1_set_dev_pasid(struct iommu_domain *domain,
>  
>  	mutex_lock(&smmu_domain->init_mutex);
>  	if (!smmu_domain->smmu)
> -		ret = arm_smmu_domain_finalise(smmu_domain, smmu);
> +		ret = arm_smmu_domain_finalise(smmu_domain, smmu, false);
>  	else if (smmu_domain->smmu != smmu)
>  		ret = -EINVAL;
>  	mutex_unlock(&smmu_domain->init_mutex);
> @@ -3193,7 +3197,9 @@ arm_smmu_domain_alloc_user(struct device *dev, u32 flags,
>  			   const struct iommu_user_data *user_data)
>  {
>  	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
> -	const u32 paging_flags = IOMMU_HWPT_ALLOC_NEST_PARENT;
> +	const u32 paging_flags = IOMMU_HWPT_ALLOC_NEST_PARENT |
> +				 IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
> +	bool enforce_dirty = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;

nit: It's called enable_dirty in other places; I think that is more appropriate
here?

>  	struct arm_smmu_domain *smmu_domain;
>  	int ret;
>  
> @@ -3206,6 +3212,10 @@ arm_smmu_domain_alloc_user(struct device *dev, u32 flags,
>  	if (user_data)
>  		return ERR_PTR(-EINVAL);
>  
> +	if (enforce_dirty &&
> +	    !device_iommu_capable(dev, IOMMU_CAP_DIRTY_TRACKING))
> +		return ERR_PTR(-EOPNOTSUPP);

I'm guessing the intention is that only a stage 1 will ever be marked with
IOMMU_CAP_DIRTY_TRACKING (there are a few places that assume/check we are
dealing with S1)? But is there a reason why stage 2 can't be supported as well?

> +
>  	smmu_domain = arm_smmu_domain_alloc();
>  	if (!smmu_domain)
>  		return ERR_PTR(-ENOMEM);
> @@ -3221,7 +3231,7 @@ arm_smmu_domain_alloc_user(struct device *dev, u32 flags,
>  
>  	smmu_domain->domain.type = IOMMU_DOMAIN_UNMANAGED;
>  	smmu_domain->domain.ops = arm_smmu_ops.default_domain_ops;
> -	ret = arm_smmu_domain_finalise(smmu_domain, master->smmu);
> +	ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, enforce_dirty);
>  	if (ret)
>  		goto err_free;
>  	return &smmu_domain->domain;
> @@ -3470,6 +3480,42 @@ static void arm_smmu_release_device(struct device *dev)
>  	kfree(master);
>  }
>  
> +static int arm_smmu_read_and_clear_dirty(struct iommu_domain *domain,
> +					 unsigned long iova, size_t size,
> +					 unsigned long flags,
> +					 struct iommu_dirty_bitmap *dirty)
> +{
> +	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> +	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
> +
> +	if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1)
> +		return -EINVAL;

You've only attached the dirty_ops if it was S1 in the first place, so this
check seems overkill to me.

> +
> +	if (WARN_ON_ONCE(!ops || !ops->read_and_clear_dirty))
> +		return -ENODEV;

And here; could this be moved to where you attach the dirty_ops?

> +
> +	return ops->read_and_clear_dirty(ops, iova, size, flags, dirty);
> +}
> +
> +static int arm_smmu_set_dirty_tracking(struct iommu_domain *domain,
> +				       bool enabled)
> +{
> +	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> +	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
> +
> +	if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1)
> +		return -EINVAL;
> +
> +	if (WARN_ON_ONCE(!ops))
> +		return -ENODEV;

Same comments for the 2 checks.

> +
> +	/*
> +	 * Always enabled and the dirty bitmap is cleared prior to
> +	 * set_dirty_tracking().
> +	 */
> +	return 0;
> +}
> +
>  static struct iommu_group *arm_smmu_device_group(struct device *dev)
>  {
>  	struct iommu_group *group;
> @@ -3612,6 +3658,11 @@ static struct iommu_ops arm_smmu_ops = {
>  	}
>  };
>  
> +static struct iommu_dirty_ops arm_smmu_dirty_ops = {
> +	.read_and_clear_dirty	= arm_smmu_read_and_clear_dirty,
> +	.set_dirty_tracking     = arm_smmu_set_dirty_tracking,
> +};
> +
>  /* Probing and initialisation functions */
>  static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
>  				   struct arm_smmu_queue *q,
> diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
> index 86cf1f7ae389..8e75f944f07a 100644
> --- a/include/linux/io-pgtable.h
> +++ b/include/linux/io-pgtable.h
> @@ -85,6 +85,8 @@ struct io_pgtable_cfg {
>  	 *
>  	 * IO_PGTABLE_QUIRK_ARM_OUTER_WBWA: Override the outer-cacheability
>  	 *	attributes set in the TCR for a non-coherent page-table walker.
> +	 *
> +	 * IO_PGTABLE_QUIRK_ARM_HD: Enables dirty tracking in stage 1 pagetable.
>  	 */
>  	#define IO_PGTABLE_QUIRK_ARM_NS			BIT(0)
>  	#define IO_PGTABLE_QUIRK_NO_PERMS		BIT(1)
> @@ -92,6 +94,8 @@ struct io_pgtable_cfg {
>  	#define IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT	BIT(4)
>  	#define IO_PGTABLE_QUIRK_ARM_TTBR1		BIT(5)
>  	#define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA		BIT(6)
> +	#define IO_PGTABLE_QUIRK_ARM_HD			BIT(7)
> +
>  	unsigned long			quirks;
>  	unsigned long			pgsize_bitmap;
>  	unsigned int			ias;
Jason Gunthorpe April 23, 2024, 4:39 p.m. UTC | #9
On Tue, Apr 23, 2024 at 05:27:09PM +0100, Ryan Roberts wrote:

> >  	switch (smmu_domain->stage) {
> >  	case ARM_SMMU_DOMAIN_S1:
> >  		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
> > -		ias = min_t(unsigned long, ias, VA_BITS);
> > -		oas = smmu->ias;
> > +		pgtbl_cfg.ias = min_t(unsigned long, ias, VA_BITS);
> 
> I know this isn't changed by this patch, but do we really mean VA_BITS here?
> Don't we want vabits_actual? I'm guessing we are intending to limit ias to the
> size the kernel is using.

I think the intention here is to allow CONFIG_ARM64_VA_BITS to choose
the number of page table levels in the iommu. Perhaps a SMMU specific
config option would be clearer.

There is no relationship to the configuration of the S1 paging domain
and what the MM is doing, there should be no crossing between those
two worlds.

vabits_actual is used in the SVA code to match the SMMU SVA S1 to the
mm_struct.

> > @@ -3206,6 +3212,10 @@ arm_smmu_domain_alloc_user(struct device *dev, u32 flags,
> >  	if (user_data)
> >  		return ERR_PTR(-EINVAL);
> >  
> > +	if (enforce_dirty &&
> > +	    !device_iommu_capable(dev, IOMMU_CAP_DIRTY_TRACKING))
> > +		return ERR_PTR(-EOPNOTSUPP);
> 
> I'm guessing the intention is that only a stage 1 will ever be marked with
> IOMMU_CAP_DIRTY_TRACKING (there are a few places that assume/check we are
> dealing with S1)? But is there a reason why stage 2 can't be supported as well?

Stage 2 really should ultimately be supported..

I'd say this is a first step as iommufd will naturally prefer the S1
configuration when not using nesting. After the nesting support is
merged the vSMMU will require the S2 to do the dirty tracking.

Jason
Ryan Roberts April 23, 2024, 4:50 p.m. UTC | #10
On 23/04/2024 17:39, Jason Gunthorpe wrote:
> On Tue, Apr 23, 2024 at 05:27:09PM +0100, Ryan Roberts wrote:
> 
>>>  	switch (smmu_domain->stage) {
>>>  	case ARM_SMMU_DOMAIN_S1:
>>>  		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
>>> -		ias = min_t(unsigned long, ias, VA_BITS);
>>> -		oas = smmu->ias;
>>> +		pgtbl_cfg.ias = min_t(unsigned long, ias, VA_BITS);
>>
>> I know this isn't changed by this patch, but do we really mean VA_BITS here?
>> Don't we want vabits_actual? I'm guessing we are intending to limit ias to the
>> size the kernel is using.
> 
> I think the intention here is to allow CONFIG_ARM64_VA_BITS to choose
> the number of page table levels in the iommu. 

Hmm ok, got it. But it seems odd...

> Perhaps a SMMU specific
> config option would be clearer.

Yes, it would certainly be clearer. Anyway, I don't think there is any required
action here. Thanks for explaining.

> 
> There is no relationship to the configuration of the S1 paging domain
> and what the MM is doing, there should be no crossing between those
> two worlds.
> 
> vabits_actual is used in the SVA code to match the SMMU SVA S1 to the
> mm_struct.
> 
>>> @@ -3206,6 +3212,10 @@ arm_smmu_domain_alloc_user(struct device *dev, u32 flags,
>>>  	if (user_data)
>>>  		return ERR_PTR(-EINVAL);
>>>  
>>> +	if (enforce_dirty &&
>>> +	    !device_iommu_capable(dev, IOMMU_CAP_DIRTY_TRACKING))
>>> +		return ERR_PTR(-EOPNOTSUPP);
>>
>> I'm guessing the intention is that only a stage 1 will ever be marked with
>> IOMMU_CAP_DIRTY_TRACKING (there are a few places that assume/check we are
>> dealing with S1)? But is there a reason why stage 2 can't be supported as well?
> 
> Stage 2 really should ultimately be supported..
> 
> I'd say this is a first step as iommufd will naturally prefer the S1
> configuration when not using nesting. After the nesting support is
> merged the vSMMU will require the S2 to do the dirty tracking.

Yep, ok, so we should plan to support S2 as a later step. Sounds good.

> 
> Jason
Shameer Kolothum April 24, 2024, 8:27 a.m. UTC | #11
> -----Original Message-----
> From: Ryan Roberts <ryan.roberts@arm.com>
> Sent: Tuesday, April 23, 2024 5:27 PM
> To: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>;
> iommu@lists.linux.dev; linux-arm-kernel@lists.infradead.org
> Cc: joro@8bytes.org; jgg@nvidia.com; kevin.tian@intel.com;
> nicolinc@nvidia.com; mshavit@google.com; robin.murphy@arm.com;
> will@kernel.org; joao.m.martins@oracle.com; jiangkunkun@huawei.com;
> zhukeqian1@huawei.com; linuxarm@huawei.com
> Subject: Re: [PATCH v2 3/4] iommu/arm-smmu-v3: Add support for dirty tracking
> in domain alloc
> 
> On 22/02/2024 09:49, Shameer Kolothum wrote:
> > From: Joao Martins <joao.m.martins@oracle.com>
> >
> > This provides all the infrastructure to enable dirty tracking if the
> > hardware has the capability and domain alloc request for it.
> >
> > Please note, we still report no support for IOMMU_CAP_DIRTY_TRACKING
> > as it will finally be enabled in a subsequent patch.
> >
> > Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> > Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
> > ---
> >  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 95 ++++++++++++++++--
> ---
> >  include/linux/io-pgtable.h                  |  4 +
> >  2 files changed, 77 insertions(+), 22 deletions(-)
> >
> > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > index bd30739e3588..058bbb0dbe2e 100644
> > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > @@ -43,6 +43,7 @@ MODULE_PARM_DESC(disable_msipolling,
> >  	"Disable MSI-based polling for CMD_SYNC completion.");
> >
> >  static struct iommu_ops arm_smmu_ops;
> > +static struct iommu_dirty_ops arm_smmu_dirty_ops;
> >
> >  enum arm_smmu_msi_index {
> >  	EVTQ_MSI_INDEX,
> > @@ -86,7 +87,8 @@ static struct arm_smmu_option_prop
> arm_smmu_options[] = {
> >
> >  static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device
> *smmu);
> >  static int arm_smmu_domain_finalise(struct arm_smmu_domain
> *smmu_domain,
> > -				    struct arm_smmu_device *smmu);
> > +				    struct arm_smmu_device *smmu,
> > +				    bool enable_dirty);
> >  static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master);
> >  static void arm_smmu_tlb_inv_all_s2(struct arm_smmu_domain
> *smmu_domain);
> >
> > @@ -2378,7 +2380,7 @@ static struct iommu_domain
> *arm_smmu_domain_alloc_paging(struct device *dev)
> >  		struct arm_smmu_master *master = dev_iommu_priv_get(dev);
> >  		int ret;
> >
> > -		ret = arm_smmu_domain_finalise(smmu_domain, master-
> >smmu);
> > +		ret = arm_smmu_domain_finalise(smmu_domain, master-
> >smmu, false);
> >  		if (ret) {
> >  			kfree(smmu_domain);
> >  			return ERR_PTR(ret);
> > @@ -2445,10 +2447,11 @@ static void arm_smmu_domain_free(struct
> iommu_domain *domain)
> >  }
> >
> >  static int arm_smmu_domain_finalise(struct arm_smmu_domain
> *smmu_domain,
> > -				    struct arm_smmu_device *smmu)
> > +				    struct arm_smmu_device *smmu,
> > +				    bool enable_dirty)
> >  {
> >  	int ret;
> > -	unsigned long ias, oas;
> > +	unsigned long ias;
> >  	enum io_pgtable_fmt fmt;
> >  	struct io_pgtable_cfg pgtbl_cfg;
> >  	struct io_pgtable_ops *pgtbl_ops;
> > @@ -2459,31 +2462,31 @@ static int arm_smmu_domain_finalise(struct
> arm_smmu_domain *smmu_domain,
> >  	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
> >  		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
> >
> > +	pgtbl_cfg = (struct io_pgtable_cfg) {
> > +		.pgsize_bitmap	= smmu->pgsize_bitmap,
> > +		.coherent_walk	= smmu->features &
> ARM_SMMU_FEAT_COHERENCY,
> > +		.tlb		= &arm_smmu_flush_ops,
> > +		.iommu_dev	= smmu->dev,
> > +	};
> > +
> >  	switch (smmu_domain->stage) {
> >  	case ARM_SMMU_DOMAIN_S1:
> >  		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
> > -		ias = min_t(unsigned long, ias, VA_BITS);
> > -		oas = smmu->ias;
> > +		pgtbl_cfg.ias = min_t(unsigned long, ias, VA_BITS);
> 
> I know this isn't changed by this patch, but do we really mean VA_BITS here?
> Don't we want vabits_actual? I'm guessing we are intending to limit ias to the
> size the kernel is using.

I see Jason has replied to this.

> 
> > +		pgtbl_cfg.oas = smmu->ias;
> > +		if (enable_dirty)
> > +			pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD;
> >  		fmt = ARM_64_LPAE_S1;
> >  		break;
> >  	case ARM_SMMU_DOMAIN_S2:
> > -		ias = smmu->ias;
> > -		oas = smmu->oas;
> > +		pgtbl_cfg.ias = smmu->ias;
> > +		pgtbl_cfg.oas = smmu->oas;
> >  		fmt = ARM_64_LPAE_S2;
> 
> Is it worth adding a WARN_ON(enable_dirty) here?

Not sure it makes a difference as we don’t set the quirk flag here.

> 
> >  		break;
> >  	default:
> >  		return -EINVAL;
> >  	}
> >
> > -	pgtbl_cfg = (struct io_pgtable_cfg) {
> > -		.pgsize_bitmap	= smmu->pgsize_bitmap,
> > -		.ias		= ias,
> > -		.oas		= oas,
> > -		.coherent_walk	= smmu->features &
> ARM_SMMU_FEAT_COHERENCY,
> > -		.tlb		= &arm_smmu_flush_ops,
> > -		.iommu_dev	= smmu->dev,
> > -	};
> > -
> >  	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
> >  	if (!pgtbl_ops)
> >  		return -ENOMEM;
> > @@ -2491,7 +2494,8 @@ static int arm_smmu_domain_finalise(struct
> arm_smmu_domain *smmu_domain,
> >  	smmu_domain->domain.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
> >  	smmu_domain->domain.geometry.aperture_end = (1UL <<
> pgtbl_cfg.ias) - 1;
> >  	smmu_domain->domain.geometry.force_aperture = true;
> > -
> > +	if (enable_dirty && smmu_domain->stage ==
> ARM_SMMU_DOMAIN_S1)
> > +		smmu_domain->domain.dirty_ops = &arm_smmu_dirty_ops;
> >  	ret = arm_smmu_domain_alloc_id(smmu, smmu_domain);
> >  	if (ret < 0) {
> >  		free_io_pgtable_ops(pgtbl_ops);
> > @@ -2811,7 +2815,7 @@ static int arm_smmu_attach_dev(struct
> iommu_domain *domain, struct device *dev)
> >  	mutex_lock(&smmu_domain->init_mutex);
> >
> >  	if (!smmu_domain->smmu) {
> > -		ret = arm_smmu_domain_finalise(smmu_domain, smmu);
> > +		ret = arm_smmu_domain_finalise(smmu_domain, smmu, false);
> >  	} else if (smmu_domain->smmu != smmu)
> >  		ret = -EINVAL;
> >
> > @@ -2876,7 +2880,7 @@ static int arm_smmu_s1_set_dev_pasid(struct
> iommu_domain *domain,
> >
> >  	mutex_lock(&smmu_domain->init_mutex);
> >  	if (!smmu_domain->smmu)
> > -		ret = arm_smmu_domain_finalise(smmu_domain, smmu);
> > +		ret = arm_smmu_domain_finalise(smmu_domain, smmu, false);
> >  	else if (smmu_domain->smmu != smmu)
> >  		ret = -EINVAL;
> >  	mutex_unlock(&smmu_domain->init_mutex);
> > @@ -3193,7 +3197,9 @@ arm_smmu_domain_alloc_user(struct device *dev,
> u32 flags,
> >  			   const struct iommu_user_data *user_data)
> >  {
> >  	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
> > -	const u32 paging_flags = IOMMU_HWPT_ALLOC_NEST_PARENT;
> > +	const u32 paging_flags = IOMMU_HWPT_ALLOC_NEST_PARENT |
> > +				 IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
> > +	bool enforce_dirty = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
> 
> nit: It's called enable_dirty in other places; I think that is more appropriate
> here?

Ok.

> >  	struct arm_smmu_domain *smmu_domain;
> >  	int ret;
> >
> > @@ -3206,6 +3212,10 @@ arm_smmu_domain_alloc_user(struct device
> *dev, u32 flags,
> >  	if (user_data)
> >  		return ERR_PTR(-EINVAL);
> >
> > +	if (enforce_dirty &&
> > +	    !device_iommu_capable(dev, IOMMU_CAP_DIRTY_TRACKING))
> > +		return ERR_PTR(-EOPNOTSUPP);
> 
> I'm guessing the intention is that only a stage 1 will ever be marked with
> IOMMU_CAP_DIRTY_TRACKING (there are a few places that assume/check we
> are
> dealing with S1)? But is there a reason why stage 2 can't be supported as well?

We don’t have nested support yet. S2 support will be added in future.
 
> > +
> >  	smmu_domain = arm_smmu_domain_alloc();
> >  	if (!smmu_domain)
> >  		return ERR_PTR(-ENOMEM);
> > @@ -3221,7 +3231,7 @@ arm_smmu_domain_alloc_user(struct device *dev,
> u32 flags,
> >
> >  	smmu_domain->domain.type = IOMMU_DOMAIN_UNMANAGED;
> >  	smmu_domain->domain.ops = arm_smmu_ops.default_domain_ops;
> > -	ret = arm_smmu_domain_finalise(smmu_domain, master->smmu);
> > +	ret = arm_smmu_domain_finalise(smmu_domain, master->smmu,
> enforce_dirty);
> >  	if (ret)
> >  		goto err_free;
> >  	return &smmu_domain->domain;
> > @@ -3470,6 +3480,42 @@ static void arm_smmu_release_device(struct
> device *dev)
> >  	kfree(master);
> >  }
> >
> > +static int arm_smmu_read_and_clear_dirty(struct iommu_domain *domain,
> > +					 unsigned long iova, size_t size,
> > +					 unsigned long flags,
> > +					 struct iommu_dirty_bitmap *dirty)
> > +{
> > +	struct arm_smmu_domain *smmu_domain =
> to_smmu_domain(domain);
> > +	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
> > +
> > +	if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1)
> > +		return -EINVAL;
> 
> You've only attached the dirty_ops if it was S1 in the first place, so this
> check seems overkill to me.
> 
> > +
> > +	if (WARN_ON_ONCE(!ops || !ops->read_and_clear_dirty))
> > +		return -ENODEV;
> 
> And here; could this be moved to where you attach the dirty_ops?

Yes. Jason has also made the same comment on this and  will be removed in
next revision.

> > +
> > +	return ops->read_and_clear_dirty(ops, iova, size, flags, dirty);
> > +}
> > +
> > +static int arm_smmu_set_dirty_tracking(struct iommu_domain *domain,
> > +				       bool enabled)
> > +{
> > +	struct arm_smmu_domain *smmu_domain =
> to_smmu_domain(domain);
> > +	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
> > +
> > +	if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1)
> > +		return -EINVAL;
> > +
> > +	if (WARN_ON_ONCE(!ops))
> > +		return -ENODEV;
> 
> Same comments for the 2 checks.

Sure.
 
Thanks,
Shameer
diff mbox series

Patch

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index bd30739e3588..058bbb0dbe2e 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -43,6 +43,7 @@  MODULE_PARM_DESC(disable_msipolling,
 	"Disable MSI-based polling for CMD_SYNC completion.");
 
 static struct iommu_ops arm_smmu_ops;
+static struct iommu_dirty_ops arm_smmu_dirty_ops;
 
 enum arm_smmu_msi_index {
 	EVTQ_MSI_INDEX,
@@ -86,7 +87,8 @@  static struct arm_smmu_option_prop arm_smmu_options[] = {
 
 static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu);
 static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
-				    struct arm_smmu_device *smmu);
+				    struct arm_smmu_device *smmu,
+				    bool enable_dirty);
 static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master);
 static void arm_smmu_tlb_inv_all_s2(struct arm_smmu_domain *smmu_domain);
 
@@ -2378,7 +2380,7 @@  static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev)
 		struct arm_smmu_master *master = dev_iommu_priv_get(dev);
 		int ret;
 
-		ret = arm_smmu_domain_finalise(smmu_domain, master->smmu);
+		ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, false);
 		if (ret) {
 			kfree(smmu_domain);
 			return ERR_PTR(ret);
@@ -2445,10 +2447,11 @@  static void arm_smmu_domain_free(struct iommu_domain *domain)
 }
 
 static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
-				    struct arm_smmu_device *smmu)
+				    struct arm_smmu_device *smmu,
+				    bool enable_dirty)
 {
 	int ret;
-	unsigned long ias, oas;
+	unsigned long ias;
 	enum io_pgtable_fmt fmt;
 	struct io_pgtable_cfg pgtbl_cfg;
 	struct io_pgtable_ops *pgtbl_ops;
@@ -2459,31 +2462,31 @@  static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
 
+	pgtbl_cfg = (struct io_pgtable_cfg) {
+		.pgsize_bitmap	= smmu->pgsize_bitmap,
+		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
+		.tlb		= &arm_smmu_flush_ops,
+		.iommu_dev	= smmu->dev,
+	};
+
 	switch (smmu_domain->stage) {
 	case ARM_SMMU_DOMAIN_S1:
 		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
-		ias = min_t(unsigned long, ias, VA_BITS);
-		oas = smmu->ias;
+		pgtbl_cfg.ias = min_t(unsigned long, ias, VA_BITS);
+		pgtbl_cfg.oas = smmu->ias;
+		if (enable_dirty)
+			pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD;
 		fmt = ARM_64_LPAE_S1;
 		break;
 	case ARM_SMMU_DOMAIN_S2:
-		ias = smmu->ias;
-		oas = smmu->oas;
+		pgtbl_cfg.ias = smmu->ias;
+		pgtbl_cfg.oas = smmu->oas;
 		fmt = ARM_64_LPAE_S2;
 		break;
 	default:
 		return -EINVAL;
 	}
 
-	pgtbl_cfg = (struct io_pgtable_cfg) {
-		.pgsize_bitmap	= smmu->pgsize_bitmap,
-		.ias		= ias,
-		.oas		= oas,
-		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
-		.tlb		= &arm_smmu_flush_ops,
-		.iommu_dev	= smmu->dev,
-	};
-
 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
 	if (!pgtbl_ops)
 		return -ENOMEM;
@@ -2491,7 +2494,8 @@  static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
 	smmu_domain->domain.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
 	smmu_domain->domain.geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
 	smmu_domain->domain.geometry.force_aperture = true;
-
+	if (enable_dirty && smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
+		smmu_domain->domain.dirty_ops = &arm_smmu_dirty_ops;
 	ret = arm_smmu_domain_alloc_id(smmu, smmu_domain);
 	if (ret < 0) {
 		free_io_pgtable_ops(pgtbl_ops);
@@ -2811,7 +2815,7 @@  static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 	mutex_lock(&smmu_domain->init_mutex);
 
 	if (!smmu_domain->smmu) {
-		ret = arm_smmu_domain_finalise(smmu_domain, smmu);
+		ret = arm_smmu_domain_finalise(smmu_domain, smmu, false);
 	} else if (smmu_domain->smmu != smmu)
 		ret = -EINVAL;
 
@@ -2876,7 +2880,7 @@  static int arm_smmu_s1_set_dev_pasid(struct iommu_domain *domain,
 
 	mutex_lock(&smmu_domain->init_mutex);
 	if (!smmu_domain->smmu)
-		ret = arm_smmu_domain_finalise(smmu_domain, smmu);
+		ret = arm_smmu_domain_finalise(smmu_domain, smmu, false);
 	else if (smmu_domain->smmu != smmu)
 		ret = -EINVAL;
 	mutex_unlock(&smmu_domain->init_mutex);
@@ -3193,7 +3197,9 @@  arm_smmu_domain_alloc_user(struct device *dev, u32 flags,
 			   const struct iommu_user_data *user_data)
 {
 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
-	const u32 paging_flags = IOMMU_HWPT_ALLOC_NEST_PARENT;
+	const u32 paging_flags = IOMMU_HWPT_ALLOC_NEST_PARENT |
+				 IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
+	bool enforce_dirty = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
 	struct arm_smmu_domain *smmu_domain;
 	int ret;
 
@@ -3206,6 +3212,10 @@  arm_smmu_domain_alloc_user(struct device *dev, u32 flags,
 	if (user_data)
 		return ERR_PTR(-EINVAL);
 
+	if (enforce_dirty &&
+	    !device_iommu_capable(dev, IOMMU_CAP_DIRTY_TRACKING))
+		return ERR_PTR(-EOPNOTSUPP);
+
 	smmu_domain = arm_smmu_domain_alloc();
 	if (!smmu_domain)
 		return ERR_PTR(-ENOMEM);
@@ -3221,7 +3231,7 @@  arm_smmu_domain_alloc_user(struct device *dev, u32 flags,
 
 	smmu_domain->domain.type = IOMMU_DOMAIN_UNMANAGED;
 	smmu_domain->domain.ops = arm_smmu_ops.default_domain_ops;
-	ret = arm_smmu_domain_finalise(smmu_domain, master->smmu);
+	ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, enforce_dirty);
 	if (ret)
 		goto err_free;
 	return &smmu_domain->domain;
@@ -3470,6 +3480,42 @@  static void arm_smmu_release_device(struct device *dev)
 	kfree(master);
 }
 
+static int arm_smmu_read_and_clear_dirty(struct iommu_domain *domain,
+					 unsigned long iova, size_t size,
+					 unsigned long flags,
+					 struct iommu_dirty_bitmap *dirty)
+{
+	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
+
+	if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1)
+		return -EINVAL;
+
+	if (WARN_ON_ONCE(!ops || !ops->read_and_clear_dirty))
+		return -ENODEV;
+
+	return ops->read_and_clear_dirty(ops, iova, size, flags, dirty);
+}
+
+static int arm_smmu_set_dirty_tracking(struct iommu_domain *domain,
+				       bool enabled)
+{
+	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
+
+	if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1)
+		return -EINVAL;
+
+	if (WARN_ON_ONCE(!ops))
+		return -ENODEV;
+
+	/*
+	 * Always enabled and the dirty bitmap is cleared prior to
+	 * set_dirty_tracking().
+	 */
+	return 0;
+}
+
 static struct iommu_group *arm_smmu_device_group(struct device *dev)
 {
 	struct iommu_group *group;
@@ -3612,6 +3658,11 @@  static struct iommu_ops arm_smmu_ops = {
 	}
 };
 
+static struct iommu_dirty_ops arm_smmu_dirty_ops = {
+	.read_and_clear_dirty	= arm_smmu_read_and_clear_dirty,
+	.set_dirty_tracking     = arm_smmu_set_dirty_tracking,
+};
+
 /* Probing and initialisation functions */
 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
 				   struct arm_smmu_queue *q,
diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index 86cf1f7ae389..8e75f944f07a 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -85,6 +85,8 @@  struct io_pgtable_cfg {
 	 *
 	 * IO_PGTABLE_QUIRK_ARM_OUTER_WBWA: Override the outer-cacheability
 	 *	attributes set in the TCR for a non-coherent page-table walker.
+	 *
+	 * IO_PGTABLE_QUIRK_ARM_HD: Enables dirty tracking in stage 1 pagetable.
 	 */
 	#define IO_PGTABLE_QUIRK_ARM_NS			BIT(0)
 	#define IO_PGTABLE_QUIRK_NO_PERMS		BIT(1)
@@ -92,6 +94,8 @@  struct io_pgtable_cfg {
 	#define IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT	BIT(4)
 	#define IO_PGTABLE_QUIRK_ARM_TTBR1		BIT(5)
 	#define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA		BIT(6)
+	#define IO_PGTABLE_QUIRK_ARM_HD			BIT(7)
+
 	unsigned long			quirks;
 	unsigned long			pgsize_bitmap;
 	unsigned int			ias;