diff mbox series

[RFCv2,21/24] iommu/arm-smmu-v3: Enable HTTU for stage1 with io-pgtable mapping

Message ID 20230518204650.14541-22-joao.m.martins@oracle.com (mailing list archive)
State New, archived
Headers show
Series IOMMUFD Dirty Tracking | expand

Commit Message

Joao Martins May 18, 2023, 8:46 p.m. UTC
From: Kunkun Jiang <jiangkunkun@huawei.com>

As nested mode is not upstreamed now, we just aim to support dirty
log tracking for stage1 with io-pgtable mapping (means not support
SVA mapping). If HTTU is supported, we enable HA/HD bits in the SMMU
CD and transfer ARM_HD quirk to io-pgtable.

We additionally filter out HD|HA if not supportted. The CD.HD bit
is not particularly useful unless we toggle the DBM bit in the PTE
entries.

Link: https://lore.kernel.org/lkml/20210413085457.25400-6-zhukeqian1@huawei.com/
Co-developed-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
[joaomart:Convey HD|HA bits over to the context descriptor
 and update commit message; original in Link, where this is based on]
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 10 ++++++++++
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h |  3 +++
 drivers/iommu/io-pgtable-arm.c              | 11 +++++++++--
 include/linux/io-pgtable.h                  |  4 ++++
 4 files changed, 26 insertions(+), 2 deletions(-)

Comments

Robin Murphy May 19, 2023, 1:49 p.m. UTC | #1
On 2023-05-18 21:46, Joao Martins wrote:
> From: Kunkun Jiang <jiangkunkun@huawei.com>
> 
> As nested mode is not upstreamed now, we just aim to support dirty
> log tracking for stage1 with io-pgtable mapping (means not support
> SVA mapping). If HTTU is supported, we enable HA/HD bits in the SMMU
> CD and transfer ARM_HD quirk to io-pgtable.
> 
> We additionally filter out HD|HA if not supportted. The CD.HD bit
> is not particularly useful unless we toggle the DBM bit in the PTE
> entries.

...seeds odd to describe the control which fundamentally enables DBM or 
not as "not particularly useful" to the DBM use-case :/

> Link: https://lore.kernel.org/lkml/20210413085457.25400-6-zhukeqian1@huawei.com/
> Co-developed-by: Keqian Zhu <zhukeqian1@huawei.com>
> Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
> Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
> [joaomart:Convey HD|HA bits over to the context descriptor
>   and update commit message; original in Link, where this is based on]
> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> ---
>   drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 10 ++++++++++
>   drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h |  3 +++
>   drivers/iommu/io-pgtable-arm.c              | 11 +++++++++--
>   include/linux/io-pgtable.h                  |  4 ++++

For the sake of cleanliness, please split the io-pgtable and SMMU 
additions into separate patches (you could perhaps then squash 
set_dirty_tracking() into the SMMU patch as well).

Thanks,
Robin.

>   4 files changed, 26 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index e110ff4710bf..e2b98a6a6b74 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -1998,6 +1998,11 @@ static const struct iommu_flush_ops arm_smmu_flush_ops = {
>   	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
>   };
>   
> +static bool arm_smmu_dbm_capable(struct arm_smmu_device *smmu)
> +{
> +	return smmu->features & (ARM_SMMU_FEAT_HD | ARM_SMMU_FEAT_COHERENCY);
> +}
> +
>   /* IOMMU API */
>   static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
>   {
> @@ -2124,6 +2129,8 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
>   			  FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
>   			  FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
>   			  CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
> +	if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_HD)
> +		cfg->cd.tcr |= CTXDESC_CD_0_TCR_HA | CTXDESC_CD_0_TCR_HD;
>   	cfg->cd.mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
>   
>   	/*
> @@ -2226,6 +2233,9 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain,
>   		.iommu_dev	= smmu->dev,
>   	};
>   
> +	if (smmu->features & arm_smmu_dbm_capable(smmu))
> +		pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD;
> +
>   	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
>   	if (!pgtbl_ops)
>   		return -ENOMEM;
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> index d82dd125446c..83d6f3a2554f 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> @@ -288,6 +288,9 @@
>   #define CTXDESC_CD_0_TCR_IPS		GENMASK_ULL(34, 32)
>   #define CTXDESC_CD_0_TCR_TBI0		(1ULL << 38)
>   
> +#define CTXDESC_CD_0_TCR_HA            (1UL << 43)
> +#define CTXDESC_CD_0_TCR_HD            (1UL << 42)
> +
>   #define CTXDESC_CD_0_AA64		(1UL << 41)
>   #define CTXDESC_CD_0_S			(1UL << 44)
>   #define CTXDESC_CD_0_R			(1UL << 45)
> diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
> index 72dcdd468cf3..b2f470529459 100644
> --- a/drivers/iommu/io-pgtable-arm.c
> +++ b/drivers/iommu/io-pgtable-arm.c
> @@ -75,6 +75,7 @@
>   
>   #define ARM_LPAE_PTE_NSTABLE		(((arm_lpae_iopte)1) << 63)
>   #define ARM_LPAE_PTE_XN			(((arm_lpae_iopte)3) << 53)
> +#define ARM_LPAE_PTE_DBM		(((arm_lpae_iopte)1) << 51)
>   #define ARM_LPAE_PTE_AF			(((arm_lpae_iopte)1) << 10)
>   #define ARM_LPAE_PTE_SH_NS		(((arm_lpae_iopte)0) << 8)
>   #define ARM_LPAE_PTE_SH_OS		(((arm_lpae_iopte)2) << 8)
> @@ -84,7 +85,7 @@
>   
>   #define ARM_LPAE_PTE_ATTR_LO_MASK	(((arm_lpae_iopte)0x3ff) << 2)
>   /* Ignore the contiguous bit for block splitting */
> -#define ARM_LPAE_PTE_ATTR_HI_MASK	(((arm_lpae_iopte)6) << 52)
> +#define ARM_LPAE_PTE_ATTR_HI_MASK	(((arm_lpae_iopte)13) << 51)
>   #define ARM_LPAE_PTE_ATTR_MASK		(ARM_LPAE_PTE_ATTR_LO_MASK |	\
>   					 ARM_LPAE_PTE_ATTR_HI_MASK)
>   /* Software bit for solving coherency races */
> @@ -93,6 +94,9 @@
>   /* Stage-1 PTE */
>   #define ARM_LPAE_PTE_AP_UNPRIV		(((arm_lpae_iopte)1) << 6)
>   #define ARM_LPAE_PTE_AP_RDONLY		(((arm_lpae_iopte)2) << 6)
> +#define ARM_LPAE_PTE_AP_RDONLY_BIT	7
> +#define ARM_LPAE_PTE_AP_WRITABLE	(ARM_LPAE_PTE_AP_RDONLY | \
> +					 ARM_LPAE_PTE_DBM)
>   #define ARM_LPAE_PTE_ATTRINDX_SHIFT	2
>   #define ARM_LPAE_PTE_nG			(((arm_lpae_iopte)1) << 11)
>   
> @@ -407,6 +411,8 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data,
>   		pte = ARM_LPAE_PTE_nG;
>   		if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ))
>   			pte |= ARM_LPAE_PTE_AP_RDONLY;
> +		else if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_HD)
> +			pte |= ARM_LPAE_PTE_AP_WRITABLE;
>   		if (!(prot & IOMMU_PRIV))
>   			pte |= ARM_LPAE_PTE_AP_UNPRIV;
>   	} else {
> @@ -804,7 +810,8 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
>   
>   	if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
>   			    IO_PGTABLE_QUIRK_ARM_TTBR1 |
> -			    IO_PGTABLE_QUIRK_ARM_OUTER_WBWA))
> +			    IO_PGTABLE_QUIRK_ARM_OUTER_WBWA |
> +			    IO_PGTABLE_QUIRK_ARM_HD))
>   		return NULL;
>   
>   	data = arm_lpae_alloc_pgtable(cfg);
> diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
> index 25142a0e2fc2..9a996ba7856d 100644
> --- a/include/linux/io-pgtable.h
> +++ b/include/linux/io-pgtable.h
> @@ -85,6 +85,8 @@ struct io_pgtable_cfg {
>   	 *
>   	 * IO_PGTABLE_QUIRK_ARM_OUTER_WBWA: Override the outer-cacheability
>   	 *	attributes set in the TCR for a non-coherent page-table walker.
> +	 *
> +	 * IO_PGTABLE_QUIRK_ARM_HD: Enables dirty tracking.
>   	 */
>   	#define IO_PGTABLE_QUIRK_ARM_NS			BIT(0)
>   	#define IO_PGTABLE_QUIRK_NO_PERMS		BIT(1)
> @@ -92,6 +94,8 @@ struct io_pgtable_cfg {
>   	#define IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT	BIT(4)
>   	#define IO_PGTABLE_QUIRK_ARM_TTBR1		BIT(5)
>   	#define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA		BIT(6)
> +	#define IO_PGTABLE_QUIRK_ARM_HD			BIT(7)
> +
>   	unsigned long			quirks;
>   	unsigned long			pgsize_bitmap;
>   	unsigned int			ias;
Joao Martins May 19, 2023, 2:05 p.m. UTC | #2
On 19/05/2023 14:49, Robin Murphy wrote:
> On 2023-05-18 21:46, Joao Martins wrote:
>> From: Kunkun Jiang <jiangkunkun@huawei.com>
>>
>> As nested mode is not upstreamed now, we just aim to support dirty
>> log tracking for stage1 with io-pgtable mapping (means not support
>> SVA mapping). If HTTU is supported, we enable HA/HD bits in the SMMU
>> CD and transfer ARM_HD quirk to io-pgtable.
>>
>> We additionally filter out HD|HA if not supportted. The CD.HD bit
>> is not particularly useful unless we toggle the DBM bit in the PTE
>> entries.
> 
> ...seeds odd to describe the control which fundamentally enables DBM or not as
> "not particularly useful" to the DBM use-case :/
> 

This is a remnant from v1 where we would just enable the context descriptor HD
bit, but not actually enabling DBM until set_dirty_Tracking(). Which no longer
is the case. Should remove this sentence.

>> Link: https://lore.kernel.org/lkml/20210413085457.25400-6-zhukeqian1@huawei.com/
>> Co-developed-by: Keqian Zhu <zhukeqian1@huawei.com>
>> Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
>> Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
>> [joaomart:Convey HD|HA bits over to the context descriptor
>>   and update commit message; original in Link, where this is based on]
>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>> ---
>>   drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 10 ++++++++++
>>   drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h |  3 +++
>>   drivers/iommu/io-pgtable-arm.c              | 11 +++++++++--
>>   include/linux/io-pgtable.h                  |  4 ++++
> 
> For the sake of cleanliness, please split the io-pgtable and SMMU additions into
> separate patches (you could perhaps then squash set_dirty_tracking() into the
> SMMU patch as well).
> 
ack
Shameerali Kolothum Thodi May 22, 2023, 10:34 a.m. UTC | #3
> -----Original Message-----
> From: Joao Martins [mailto:joao.m.martins@oracle.com]
> Sent: 18 May 2023 21:47
> To: iommu@lists.linux.dev
> Cc: Jason Gunthorpe <jgg@nvidia.com>; Kevin Tian <kevin.tian@intel.com>;
> Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>; Lu
> Baolu <baolu.lu@linux.intel.com>; Yi Liu <yi.l.liu@intel.com>; Yi Y Sun
> <yi.y.sun@intel.com>; Eric Auger <eric.auger@redhat.com>; Nicolin Chen
> <nicolinc@nvidia.com>; Joerg Roedel <joro@8bytes.org>; Jean-Philippe
> Brucker <jean-philippe@linaro.org>; Suravee Suthikulpanit
> <suravee.suthikulpanit@amd.com>; Will Deacon <will@kernel.org>; Robin
> Murphy <robin.murphy@arm.com>; Alex Williamson
> <alex.williamson@redhat.com>; kvm@vger.kernel.org; Joao Martins
> <joao.m.martins@oracle.com>
> Subject: [PATCH RFCv2 21/24] iommu/arm-smmu-v3: Enable HTTU for
> stage1 with io-pgtable mapping
> 
> From: Kunkun Jiang <jiangkunkun@huawei.com>
> 
> As nested mode is not upstreamed now, we just aim to support dirty
> log tracking for stage1 with io-pgtable mapping (means not support
> SVA mapping). If HTTU is supported, we enable HA/HD bits in the SMMU
> CD and transfer ARM_HD quirk to io-pgtable.
> 
> We additionally filter out HD|HA if not supportted. The CD.HD bit
> is not particularly useful unless we toggle the DBM bit in the PTE
> entries.
> 
> Link:
> https://lore.kernel.org/lkml/20210413085457.25400-6-zhukeqian1@huawei
> .com/
> Co-developed-by: Keqian Zhu <zhukeqian1@huawei.com>
> Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
> Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
> [joaomart:Convey HD|HA bits over to the context descriptor
>  and update commit message; original in Link, where this is based on]
> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> ---
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 10 ++++++++++
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h |  3 +++
>  drivers/iommu/io-pgtable-arm.c              | 11 +++++++++--
>  include/linux/io-pgtable.h                  |  4 ++++
>  4 files changed, 26 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index e110ff4710bf..e2b98a6a6b74 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -1998,6 +1998,11 @@ static const struct iommu_flush_ops
> arm_smmu_flush_ops = {
>  	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
>  };
> 
> +static bool arm_smmu_dbm_capable(struct arm_smmu_device *smmu)
> +{
> +	return smmu->features & (ARM_SMMU_FEAT_HD |
> ARM_SMMU_FEAT_COHERENCY);
> +}
> +

This will claim DBM capability for systems with just ARM_SMMU_FEAT_COHERENCY.

Thanks,
Shameer

>  /* IOMMU API */
>  static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
>  {
> @@ -2124,6 +2129,8 @@ static int arm_smmu_domain_finalise_s1(struct
> arm_smmu_domain *smmu_domain,
>  			  FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
>  			  FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
>  			  CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
> +	if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_HD)
> +		cfg->cd.tcr |= CTXDESC_CD_0_TCR_HA | CTXDESC_CD_0_TCR_HD;
>  	cfg->cd.mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
> 
>  	/*
> @@ -2226,6 +2233,9 @@ static int arm_smmu_domain_finalise(struct
> iommu_domain *domain,
>  		.iommu_dev	= smmu->dev,
>  	};
> 
> +	if (smmu->features & arm_smmu_dbm_capable(smmu))
> +		pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD;
> +
>  	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
>  	if (!pgtbl_ops)
>  		return -ENOMEM;
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> index d82dd125446c..83d6f3a2554f 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> @@ -288,6 +288,9 @@
>  #define CTXDESC_CD_0_TCR_IPS		GENMASK_ULL(34, 32)
>  #define CTXDESC_CD_0_TCR_TBI0		(1ULL << 38)
> 
> +#define CTXDESC_CD_0_TCR_HA            (1UL << 43)
> +#define CTXDESC_CD_0_TCR_HD            (1UL << 42)
> +
>  #define CTXDESC_CD_0_AA64		(1UL << 41)
>  #define CTXDESC_CD_0_S			(1UL << 44)
>  #define CTXDESC_CD_0_R			(1UL << 45)
> diff --git a/drivers/iommu/io-pgtable-arm.c
> b/drivers/iommu/io-pgtable-arm.c
> index 72dcdd468cf3..b2f470529459 100644
> --- a/drivers/iommu/io-pgtable-arm.c
> +++ b/drivers/iommu/io-pgtable-arm.c
> @@ -75,6 +75,7 @@
> 
>  #define ARM_LPAE_PTE_NSTABLE		(((arm_lpae_iopte)1) << 63)
>  #define ARM_LPAE_PTE_XN			(((arm_lpae_iopte)3) << 53)
> +#define ARM_LPAE_PTE_DBM		(((arm_lpae_iopte)1) << 51)
>  #define ARM_LPAE_PTE_AF			(((arm_lpae_iopte)1) << 10)
>  #define ARM_LPAE_PTE_SH_NS		(((arm_lpae_iopte)0) << 8)
>  #define ARM_LPAE_PTE_SH_OS		(((arm_lpae_iopte)2) << 8)
> @@ -84,7 +85,7 @@
> 
>  #define ARM_LPAE_PTE_ATTR_LO_MASK	(((arm_lpae_iopte)0x3ff) << 2)
>  /* Ignore the contiguous bit for block splitting */
> -#define ARM_LPAE_PTE_ATTR_HI_MASK	(((arm_lpae_iopte)6) << 52)
> +#define ARM_LPAE_PTE_ATTR_HI_MASK	(((arm_lpae_iopte)13) << 51)
>  #define ARM_LPAE_PTE_ATTR_MASK		(ARM_LPAE_PTE_ATTR_LO_MASK
> |	\
>  					 ARM_LPAE_PTE_ATTR_HI_MASK)
>  /* Software bit for solving coherency races */
> @@ -93,6 +94,9 @@
>  /* Stage-1 PTE */
>  #define ARM_LPAE_PTE_AP_UNPRIV		(((arm_lpae_iopte)1) << 6)
>  #define ARM_LPAE_PTE_AP_RDONLY		(((arm_lpae_iopte)2) << 6)
> +#define ARM_LPAE_PTE_AP_RDONLY_BIT	7
> +#define ARM_LPAE_PTE_AP_WRITABLE	(ARM_LPAE_PTE_AP_RDONLY | \
> +					 ARM_LPAE_PTE_DBM)
>  #define ARM_LPAE_PTE_ATTRINDX_SHIFT	2
>  #define ARM_LPAE_PTE_nG			(((arm_lpae_iopte)1) << 11)
> 
> @@ -407,6 +411,8 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct
> arm_lpae_io_pgtable *data,
>  		pte = ARM_LPAE_PTE_nG;
>  		if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ))
>  			pte |= ARM_LPAE_PTE_AP_RDONLY;
> +		else if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_HD)
> +			pte |= ARM_LPAE_PTE_AP_WRITABLE;
>  		if (!(prot & IOMMU_PRIV))
>  			pte |= ARM_LPAE_PTE_AP_UNPRIV;
>  	} else {
> @@ -804,7 +810,8 @@ arm_64_lpae_alloc_pgtable_s1(struct
> io_pgtable_cfg *cfg, void *cookie)
> 
>  	if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
>  			    IO_PGTABLE_QUIRK_ARM_TTBR1 |
> -			    IO_PGTABLE_QUIRK_ARM_OUTER_WBWA))
> +			    IO_PGTABLE_QUIRK_ARM_OUTER_WBWA |
> +			    IO_PGTABLE_QUIRK_ARM_HD))
>  		return NULL;
> 
>  	data = arm_lpae_alloc_pgtable(cfg);
> diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
> index 25142a0e2fc2..9a996ba7856d 100644
> --- a/include/linux/io-pgtable.h
> +++ b/include/linux/io-pgtable.h
> @@ -85,6 +85,8 @@ struct io_pgtable_cfg {
>  	 *
>  	 * IO_PGTABLE_QUIRK_ARM_OUTER_WBWA: Override the
> outer-cacheability
>  	 *	attributes set in the TCR for a non-coherent page-table walker.
> +	 *
> +	 * IO_PGTABLE_QUIRK_ARM_HD: Enables dirty tracking.
>  	 */
>  	#define IO_PGTABLE_QUIRK_ARM_NS			BIT(0)
>  	#define IO_PGTABLE_QUIRK_NO_PERMS		BIT(1)
> @@ -92,6 +94,8 @@ struct io_pgtable_cfg {
>  	#define IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT	BIT(4)
>  	#define IO_PGTABLE_QUIRK_ARM_TTBR1		BIT(5)
>  	#define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA		BIT(6)
> +	#define IO_PGTABLE_QUIRK_ARM_HD			BIT(7)
> +
>  	unsigned long			quirks;
>  	unsigned long			pgsize_bitmap;
>  	unsigned int			ias;
> --
> 2.17.2
Joao Martins May 22, 2023, 10:43 a.m. UTC | #4
On 22/05/2023 11:34, Shameerali Kolothum Thodi wrote:
>> -----Original Message-----
>> From: Joao Martins [mailto:joao.m.martins@oracle.com]
>> Sent: 18 May 2023 21:47
>> To: iommu@lists.linux.dev
>> Cc: Jason Gunthorpe <jgg@nvidia.com>; Kevin Tian <kevin.tian@intel.com>;
>> Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>; Lu
>> Baolu <baolu.lu@linux.intel.com>; Yi Liu <yi.l.liu@intel.com>; Yi Y Sun
>> <yi.y.sun@intel.com>; Eric Auger <eric.auger@redhat.com>; Nicolin Chen
>> <nicolinc@nvidia.com>; Joerg Roedel <joro@8bytes.org>; Jean-Philippe
>> Brucker <jean-philippe@linaro.org>; Suravee Suthikulpanit
>> <suravee.suthikulpanit@amd.com>; Will Deacon <will@kernel.org>; Robin
>> Murphy <robin.murphy@arm.com>; Alex Williamson
>> <alex.williamson@redhat.com>; kvm@vger.kernel.org; Joao Martins
>> <joao.m.martins@oracle.com>
>> Subject: [PATCH RFCv2 21/24] iommu/arm-smmu-v3: Enable HTTU for
>> stage1 with io-pgtable mapping
>>
>> From: Kunkun Jiang <jiangkunkun@huawei.com>
>>
>> As nested mode is not upstreamed now, we just aim to support dirty
>> log tracking for stage1 with io-pgtable mapping (means not support
>> SVA mapping). If HTTU is supported, we enable HA/HD bits in the SMMU
>> CD and transfer ARM_HD quirk to io-pgtable.
>>
>> We additionally filter out HD|HA if not supportted. The CD.HD bit
>> is not particularly useful unless we toggle the DBM bit in the PTE
>> entries.
>>
>> Link:
>> https://lore.kernel.org/lkml/20210413085457.25400-6-zhukeqian1@huawei
>> .com/
>> Co-developed-by: Keqian Zhu <zhukeqian1@huawei.com>
>> Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
>> Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
>> [joaomart:Convey HD|HA bits over to the context descriptor
>>  and update commit message; original in Link, where this is based on]
>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>> ---
>>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 10 ++++++++++
>>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h |  3 +++
>>  drivers/iommu/io-pgtable-arm.c              | 11 +++++++++--
>>  include/linux/io-pgtable.h                  |  4 ++++
>>  4 files changed, 26 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
>> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
>> index e110ff4710bf..e2b98a6a6b74 100644
>> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
>> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
>> @@ -1998,6 +1998,11 @@ static const struct iommu_flush_ops
>> arm_smmu_flush_ops = {
>>  	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
>>  };
>>
>> +static bool arm_smmu_dbm_capable(struct arm_smmu_device *smmu)
>> +{
>> +	return smmu->features & (ARM_SMMU_FEAT_HD |
>> ARM_SMMU_FEAT_COHERENCY);
>> +}
>> +
> 
> This will claim DBM capability for systems with just ARM_SMMU_FEAT_COHERENCY.

Gah, yes. It should be:

	(smmu->features & (ARM_SMMU_FEAT_HD | ARM_SMMU_FEAT_COHERENCY)) ==
		(ARM_SMMU_FEAT_HD | ARM_SMMU_FEAT_COHERENCY)

or making these two a macro on its own.

> 
>>  /* IOMMU API */
>>  static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
>>  {
>> @@ -2124,6 +2129,8 @@ static int arm_smmu_domain_finalise_s1(struct
>> arm_smmu_domain *smmu_domain,
>>  			  FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
>>  			  FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
>>  			  CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
>> +	if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_HD)
>> +		cfg->cd.tcr |= CTXDESC_CD_0_TCR_HA | CTXDESC_CD_0_TCR_HD;
>>  	cfg->cd.mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
>>
>>  	/*
>> @@ -2226,6 +2233,9 @@ static int arm_smmu_domain_finalise(struct
>> iommu_domain *domain,
>>  		.iommu_dev	= smmu->dev,
>>  	};
>>
>> +	if (smmu->features & arm_smmu_dbm_capable(smmu))
>> +		pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD;
>> +
>>  	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
>>  	if (!pgtbl_ops)
>>  		return -ENOMEM;
>> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
>> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
>> index d82dd125446c..83d6f3a2554f 100644
>> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
>> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
>> @@ -288,6 +288,9 @@
>>  #define CTXDESC_CD_0_TCR_IPS		GENMASK_ULL(34, 32)
>>  #define CTXDESC_CD_0_TCR_TBI0		(1ULL << 38)
>>
>> +#define CTXDESC_CD_0_TCR_HA            (1UL << 43)
>> +#define CTXDESC_CD_0_TCR_HD            (1UL << 42)
>> +
>>  #define CTXDESC_CD_0_AA64		(1UL << 41)
>>  #define CTXDESC_CD_0_S			(1UL << 44)
>>  #define CTXDESC_CD_0_R			(1UL << 45)
>> diff --git a/drivers/iommu/io-pgtable-arm.c
>> b/drivers/iommu/io-pgtable-arm.c
>> index 72dcdd468cf3..b2f470529459 100644
>> --- a/drivers/iommu/io-pgtable-arm.c
>> +++ b/drivers/iommu/io-pgtable-arm.c
>> @@ -75,6 +75,7 @@
>>
>>  #define ARM_LPAE_PTE_NSTABLE		(((arm_lpae_iopte)1) << 63)
>>  #define ARM_LPAE_PTE_XN			(((arm_lpae_iopte)3) << 53)
>> +#define ARM_LPAE_PTE_DBM		(((arm_lpae_iopte)1) << 51)
>>  #define ARM_LPAE_PTE_AF			(((arm_lpae_iopte)1) << 10)
>>  #define ARM_LPAE_PTE_SH_NS		(((arm_lpae_iopte)0) << 8)
>>  #define ARM_LPAE_PTE_SH_OS		(((arm_lpae_iopte)2) << 8)
>> @@ -84,7 +85,7 @@
>>
>>  #define ARM_LPAE_PTE_ATTR_LO_MASK	(((arm_lpae_iopte)0x3ff) << 2)
>>  /* Ignore the contiguous bit for block splitting */
>> -#define ARM_LPAE_PTE_ATTR_HI_MASK	(((arm_lpae_iopte)6) << 52)
>> +#define ARM_LPAE_PTE_ATTR_HI_MASK	(((arm_lpae_iopte)13) << 51)
>>  #define ARM_LPAE_PTE_ATTR_MASK		(ARM_LPAE_PTE_ATTR_LO_MASK
>> |	\
>>  					 ARM_LPAE_PTE_ATTR_HI_MASK)
>>  /* Software bit for solving coherency races */
>> @@ -93,6 +94,9 @@
>>  /* Stage-1 PTE */
>>  #define ARM_LPAE_PTE_AP_UNPRIV		(((arm_lpae_iopte)1) << 6)
>>  #define ARM_LPAE_PTE_AP_RDONLY		(((arm_lpae_iopte)2) << 6)
>> +#define ARM_LPAE_PTE_AP_RDONLY_BIT	7
>> +#define ARM_LPAE_PTE_AP_WRITABLE	(ARM_LPAE_PTE_AP_RDONLY | \
>> +					 ARM_LPAE_PTE_DBM)
>>  #define ARM_LPAE_PTE_ATTRINDX_SHIFT	2
>>  #define ARM_LPAE_PTE_nG			(((arm_lpae_iopte)1) << 11)
>>
>> @@ -407,6 +411,8 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct
>> arm_lpae_io_pgtable *data,
>>  		pte = ARM_LPAE_PTE_nG;
>>  		if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ))
>>  			pte |= ARM_LPAE_PTE_AP_RDONLY;
>> +		else if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_HD)
>> +			pte |= ARM_LPAE_PTE_AP_WRITABLE;
>>  		if (!(prot & IOMMU_PRIV))
>>  			pte |= ARM_LPAE_PTE_AP_UNPRIV;
>>  	} else {
>> @@ -804,7 +810,8 @@ arm_64_lpae_alloc_pgtable_s1(struct
>> io_pgtable_cfg *cfg, void *cookie)
>>
>>  	if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
>>  			    IO_PGTABLE_QUIRK_ARM_TTBR1 |
>> -			    IO_PGTABLE_QUIRK_ARM_OUTER_WBWA))
>> +			    IO_PGTABLE_QUIRK_ARM_OUTER_WBWA |
>> +			    IO_PGTABLE_QUIRK_ARM_HD))
>>  		return NULL;
>>
>>  	data = arm_lpae_alloc_pgtable(cfg);
>> diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
>> index 25142a0e2fc2..9a996ba7856d 100644
>> --- a/include/linux/io-pgtable.h
>> +++ b/include/linux/io-pgtable.h
>> @@ -85,6 +85,8 @@ struct io_pgtable_cfg {
>>  	 *
>>  	 * IO_PGTABLE_QUIRK_ARM_OUTER_WBWA: Override the
>> outer-cacheability
>>  	 *	attributes set in the TCR for a non-coherent page-table walker.
>> +	 *
>> +	 * IO_PGTABLE_QUIRK_ARM_HD: Enables dirty tracking.
>>  	 */
>>  	#define IO_PGTABLE_QUIRK_ARM_NS			BIT(0)
>>  	#define IO_PGTABLE_QUIRK_NO_PERMS		BIT(1)
>> @@ -92,6 +94,8 @@ struct io_pgtable_cfg {
>>  	#define IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT	BIT(4)
>>  	#define IO_PGTABLE_QUIRK_ARM_TTBR1		BIT(5)
>>  	#define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA		BIT(6)
>> +	#define IO_PGTABLE_QUIRK_ARM_HD			BIT(7)
>> +
>>  	unsigned long			quirks;
>>  	unsigned long			pgsize_bitmap;
>>  	unsigned int			ias;
>> --
>> 2.17.2
>
Shameerali Kolothum Thodi June 16, 2023, 5 p.m. UTC | #5
> -----Original Message-----
> From: Joao Martins [mailto:joao.m.martins@oracle.com]
> Sent: 22 May 2023 11:43
> To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>;
> iommu@lists.linux.dev
> Cc: Jason Gunthorpe <jgg@nvidia.com>; Kevin Tian <kevin.tian@intel.com>;
> Lu Baolu <baolu.lu@linux.intel.com>; Yi Liu <yi.l.liu@intel.com>; Yi Y Sun
> <yi.y.sun@intel.com>; Eric Auger <eric.auger@redhat.com>; Nicolin Chen
> <nicolinc@nvidia.com>; Joerg Roedel <joro@8bytes.org>; Jean-Philippe
> Brucker <jean-philippe@linaro.org>; Suravee Suthikulpanit
> <suravee.suthikulpanit@amd.com>; Will Deacon <will@kernel.org>; Robin
> Murphy <robin.murphy@arm.com>; Alex Williamson
> <alex.williamson@redhat.com>; kvm@vger.kernel.org
> Subject: Re: [PATCH RFCv2 21/24] iommu/arm-smmu-v3: Enable HTTU for
> stage1 with io-pgtable mapping

[...]

> >> @@ -2226,6 +2233,9 @@ static int arm_smmu_domain_finalise(struct
> >> iommu_domain *domain,
> >>  		.iommu_dev	= smmu->dev,
> >>  	};
> >>
> >> +	if (smmu->features & arm_smmu_dbm_capable(smmu))
> >> +		pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD;

Also, I think we should limit setting this to s1 only pgtbl_cfg.

Thanks,
Shameer

> >> +
> >>  	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
> >>  	if (!pgtbl_ops)
> >>  		return -ENOMEM;
> >> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> >> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> >> index d82dd125446c..83d6f3a2554f 100644
> >> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> >> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> >> @@ -288,6 +288,9 @@
> >>  #define CTXDESC_CD_0_TCR_IPS		GENMASK_ULL(34, 32)
> >>  #define CTXDESC_CD_0_TCR_TBI0		(1ULL << 38)
> >>
> >> +#define CTXDESC_CD_0_TCR_HA            (1UL << 43)
> >> +#define CTXDESC_CD_0_TCR_HD            (1UL << 42)
> >> +
> >>  #define CTXDESC_CD_0_AA64		(1UL << 41)
> >>  #define CTXDESC_CD_0_S			(1UL << 44)
> >>  #define CTXDESC_CD_0_R			(1UL << 45)
> >> diff --git a/drivers/iommu/io-pgtable-arm.c
> >> b/drivers/iommu/io-pgtable-arm.c index 72dcdd468cf3..b2f470529459
> >> 100644
> >> --- a/drivers/iommu/io-pgtable-arm.c
> >> +++ b/drivers/iommu/io-pgtable-arm.c
> >> @@ -75,6 +75,7 @@
> >>
> >>  #define ARM_LPAE_PTE_NSTABLE		(((arm_lpae_iopte)1) << 63)
> >>  #define ARM_LPAE_PTE_XN			(((arm_lpae_iopte)3) << 53)
> >> +#define ARM_LPAE_PTE_DBM		(((arm_lpae_iopte)1) << 51)
> >>  #define ARM_LPAE_PTE_AF			(((arm_lpae_iopte)1) << 10)
> >>  #define ARM_LPAE_PTE_SH_NS		(((arm_lpae_iopte)0) << 8)
> >>  #define ARM_LPAE_PTE_SH_OS		(((arm_lpae_iopte)2) << 8)
> >> @@ -84,7 +85,7 @@
> >>
> >>  #define ARM_LPAE_PTE_ATTR_LO_MASK	(((arm_lpae_iopte)0x3ff) <<
> 2)
> >>  /* Ignore the contiguous bit for block splitting */
> >> -#define ARM_LPAE_PTE_ATTR_HI_MASK	(((arm_lpae_iopte)6) << 52)
> >> +#define ARM_LPAE_PTE_ATTR_HI_MASK	(((arm_lpae_iopte)13) <<
> 51)
> >>  #define ARM_LPAE_PTE_ATTR_MASK
> 	(ARM_LPAE_PTE_ATTR_LO_MASK
> >> |	\
> >>  					 ARM_LPAE_PTE_ATTR_HI_MASK)
> >>  /* Software bit for solving coherency races */ @@ -93,6 +94,9 @@
> >>  /* Stage-1 PTE */
> >>  #define ARM_LPAE_PTE_AP_UNPRIV		(((arm_lpae_iopte)1) << 6)
> >>  #define ARM_LPAE_PTE_AP_RDONLY		(((arm_lpae_iopte)2) << 6)
> >> +#define ARM_LPAE_PTE_AP_RDONLY_BIT	7
> >> +#define ARM_LPAE_PTE_AP_WRITABLE
> 	(ARM_LPAE_PTE_AP_RDONLY | \
> >> +					 ARM_LPAE_PTE_DBM)
> >>  #define ARM_LPAE_PTE_ATTRINDX_SHIFT	2
> >>  #define ARM_LPAE_PTE_nG			(((arm_lpae_iopte)1) << 11)
> >>
> >> @@ -407,6 +411,8 @@ static arm_lpae_iopte
> arm_lpae_prot_to_pte(struct
> >> arm_lpae_io_pgtable *data,
> >>  		pte = ARM_LPAE_PTE_nG;
> >>  		if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ))
> >>  			pte |= ARM_LPAE_PTE_AP_RDONLY;
> >> +		else if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_HD)
> >> +			pte |= ARM_LPAE_PTE_AP_WRITABLE;
> >>  		if (!(prot & IOMMU_PRIV))
> >>  			pte |= ARM_LPAE_PTE_AP_UNPRIV;
> >>  	} else {
> >> @@ -804,7 +810,8 @@ arm_64_lpae_alloc_pgtable_s1(struct
> >> io_pgtable_cfg *cfg, void *cookie)
> >>
> >>  	if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
> >>  			    IO_PGTABLE_QUIRK_ARM_TTBR1 |
> >> -			    IO_PGTABLE_QUIRK_ARM_OUTER_WBWA))
> >> +			    IO_PGTABLE_QUIRK_ARM_OUTER_WBWA |
> >> +			    IO_PGTABLE_QUIRK_ARM_HD))
> >>  		return NULL;
> >>
> >>  	data = arm_lpae_alloc_pgtable(cfg); diff --git
> >> a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index
> >> 25142a0e2fc2..9a996ba7856d 100644
> >> --- a/include/linux/io-pgtable.h
> >> +++ b/include/linux/io-pgtable.h
> >> @@ -85,6 +85,8 @@ struct io_pgtable_cfg {
> >>  	 *
> >>  	 * IO_PGTABLE_QUIRK_ARM_OUTER_WBWA: Override the
> outer-cacheability
> >>  	 *	attributes set in the TCR for a non-coherent page-table walker.
> >> +	 *
> >> +	 * IO_PGTABLE_QUIRK_ARM_HD: Enables dirty tracking.
> >>  	 */
> >>  	#define IO_PGTABLE_QUIRK_ARM_NS			BIT(0)
> >>  	#define IO_PGTABLE_QUIRK_NO_PERMS		BIT(1)
> >> @@ -92,6 +94,8 @@ struct io_pgtable_cfg {
> >>  	#define IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT	BIT(4)
> >>  	#define IO_PGTABLE_QUIRK_ARM_TTBR1		BIT(5)
> >>  	#define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA		BIT(6)
> >> +	#define IO_PGTABLE_QUIRK_ARM_HD			BIT(7)
> >> +
> >>  	unsigned long			quirks;
> >>  	unsigned long			pgsize_bitmap;
> >>  	unsigned int			ias;
> >> --
> >> 2.17.2
> >
Joao Martins June 16, 2023, 6:11 p.m. UTC | #6
On 16/06/2023 18:00, Shameerali Kolothum Thodi wrote:
> 
> 
>> -----Original Message-----
>> From: Joao Martins [mailto:joao.m.martins@oracle.com]
>> Sent: 22 May 2023 11:43
>> To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>;
>> iommu@lists.linux.dev
>> Cc: Jason Gunthorpe <jgg@nvidia.com>; Kevin Tian <kevin.tian@intel.com>;
>> Lu Baolu <baolu.lu@linux.intel.com>; Yi Liu <yi.l.liu@intel.com>; Yi Y Sun
>> <yi.y.sun@intel.com>; Eric Auger <eric.auger@redhat.com>; Nicolin Chen
>> <nicolinc@nvidia.com>; Joerg Roedel <joro@8bytes.org>; Jean-Philippe
>> Brucker <jean-philippe@linaro.org>; Suravee Suthikulpanit
>> <suravee.suthikulpanit@amd.com>; Will Deacon <will@kernel.org>; Robin
>> Murphy <robin.murphy@arm.com>; Alex Williamson
>> <alex.williamson@redhat.com>; kvm@vger.kernel.org
>> Subject: Re: [PATCH RFCv2 21/24] iommu/arm-smmu-v3: Enable HTTU for
>> stage1 with io-pgtable mapping
> 
> [...]
> 
>>>> @@ -2226,6 +2233,9 @@ static int arm_smmu_domain_finalise(struct
>>>> iommu_domain *domain,
>>>>  		.iommu_dev	= smmu->dev,
>>>>  	};
>>>>
>>>> +	if (smmu->features & arm_smmu_dbm_capable(smmu))
>>>> +		pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD;
> 
> Also, I think we should limit setting this to s1 only pgtbl_cfg.
> 
+1, makes sense.

> Thanks,
> Shameer
> 
>>>> +
>>>>  	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
>>>>  	if (!pgtbl_ops)
>>>>  		return -ENOMEM;
>>>> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
>>>> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
>>>> index d82dd125446c..83d6f3a2554f 100644
>>>> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
>>>> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
>>>> @@ -288,6 +288,9 @@
>>>>  #define CTXDESC_CD_0_TCR_IPS		GENMASK_ULL(34, 32)
>>>>  #define CTXDESC_CD_0_TCR_TBI0		(1ULL << 38)
>>>>
>>>> +#define CTXDESC_CD_0_TCR_HA            (1UL << 43)
>>>> +#define CTXDESC_CD_0_TCR_HD            (1UL << 42)
>>>> +
>>>>  #define CTXDESC_CD_0_AA64		(1UL << 41)
>>>>  #define CTXDESC_CD_0_S			(1UL << 44)
>>>>  #define CTXDESC_CD_0_R			(1UL << 45)
>>>> diff --git a/drivers/iommu/io-pgtable-arm.c
>>>> b/drivers/iommu/io-pgtable-arm.c index 72dcdd468cf3..b2f470529459
>>>> 100644
>>>> --- a/drivers/iommu/io-pgtable-arm.c
>>>> +++ b/drivers/iommu/io-pgtable-arm.c
>>>> @@ -75,6 +75,7 @@
>>>>
>>>>  #define ARM_LPAE_PTE_NSTABLE		(((arm_lpae_iopte)1) << 63)
>>>>  #define ARM_LPAE_PTE_XN			(((arm_lpae_iopte)3) << 53)
>>>> +#define ARM_LPAE_PTE_DBM		(((arm_lpae_iopte)1) << 51)
>>>>  #define ARM_LPAE_PTE_AF			(((arm_lpae_iopte)1) << 10)
>>>>  #define ARM_LPAE_PTE_SH_NS		(((arm_lpae_iopte)0) << 8)
>>>>  #define ARM_LPAE_PTE_SH_OS		(((arm_lpae_iopte)2) << 8)
>>>> @@ -84,7 +85,7 @@
>>>>
>>>>  #define ARM_LPAE_PTE_ATTR_LO_MASK	(((arm_lpae_iopte)0x3ff) <<
>> 2)
>>>>  /* Ignore the contiguous bit for block splitting */
>>>> -#define ARM_LPAE_PTE_ATTR_HI_MASK	(((arm_lpae_iopte)6) << 52)
>>>> +#define ARM_LPAE_PTE_ATTR_HI_MASK	(((arm_lpae_iopte)13) <<
>> 51)
>>>>  #define ARM_LPAE_PTE_ATTR_MASK
>> 	(ARM_LPAE_PTE_ATTR_LO_MASK
>>>> |	\
>>>>  					 ARM_LPAE_PTE_ATTR_HI_MASK)
>>>>  /* Software bit for solving coherency races */ @@ -93,6 +94,9 @@
>>>>  /* Stage-1 PTE */
>>>>  #define ARM_LPAE_PTE_AP_UNPRIV		(((arm_lpae_iopte)1) << 6)
>>>>  #define ARM_LPAE_PTE_AP_RDONLY		(((arm_lpae_iopte)2) << 6)
>>>> +#define ARM_LPAE_PTE_AP_RDONLY_BIT	7
>>>> +#define ARM_LPAE_PTE_AP_WRITABLE
>> 	(ARM_LPAE_PTE_AP_RDONLY | \
>>>> +					 ARM_LPAE_PTE_DBM)
>>>>  #define ARM_LPAE_PTE_ATTRINDX_SHIFT	2
>>>>  #define ARM_LPAE_PTE_nG			(((arm_lpae_iopte)1) << 11)
>>>>
>>>> @@ -407,6 +411,8 @@ static arm_lpae_iopte
>> arm_lpae_prot_to_pte(struct
>>>> arm_lpae_io_pgtable *data,
>>>>  		pte = ARM_LPAE_PTE_nG;
>>>>  		if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ))
>>>>  			pte |= ARM_LPAE_PTE_AP_RDONLY;
>>>> +		else if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_HD)
>>>> +			pte |= ARM_LPAE_PTE_AP_WRITABLE;
>>>>  		if (!(prot & IOMMU_PRIV))
>>>>  			pte |= ARM_LPAE_PTE_AP_UNPRIV;
>>>>  	} else {
>>>> @@ -804,7 +810,8 @@ arm_64_lpae_alloc_pgtable_s1(struct
>>>> io_pgtable_cfg *cfg, void *cookie)
>>>>
>>>>  	if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
>>>>  			    IO_PGTABLE_QUIRK_ARM_TTBR1 |
>>>> -			    IO_PGTABLE_QUIRK_ARM_OUTER_WBWA))
>>>> +			    IO_PGTABLE_QUIRK_ARM_OUTER_WBWA |
>>>> +			    IO_PGTABLE_QUIRK_ARM_HD))
>>>>  		return NULL;
>>>>
>>>>  	data = arm_lpae_alloc_pgtable(cfg); diff --git
>>>> a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index
>>>> 25142a0e2fc2..9a996ba7856d 100644
>>>> --- a/include/linux/io-pgtable.h
>>>> +++ b/include/linux/io-pgtable.h
>>>> @@ -85,6 +85,8 @@ struct io_pgtable_cfg {
>>>>  	 *
>>>>  	 * IO_PGTABLE_QUIRK_ARM_OUTER_WBWA: Override the
>> outer-cacheability
>>>>  	 *	attributes set in the TCR for a non-coherent page-table walker.
>>>> +	 *
>>>> +	 * IO_PGTABLE_QUIRK_ARM_HD: Enables dirty tracking.
>>>>  	 */
>>>>  	#define IO_PGTABLE_QUIRK_ARM_NS			BIT(0)
>>>>  	#define IO_PGTABLE_QUIRK_NO_PERMS		BIT(1)
>>>> @@ -92,6 +94,8 @@ struct io_pgtable_cfg {
>>>>  	#define IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT	BIT(4)
>>>>  	#define IO_PGTABLE_QUIRK_ARM_TTBR1		BIT(5)
>>>>  	#define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA		BIT(6)
>>>> +	#define IO_PGTABLE_QUIRK_ARM_HD			BIT(7)
>>>> +
>>>>  	unsigned long			quirks;
>>>>  	unsigned long			pgsize_bitmap;
>>>>  	unsigned int			ias;
>>>> --
>>>> 2.17.2
>>>
diff mbox series

Patch

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index e110ff4710bf..e2b98a6a6b74 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -1998,6 +1998,11 @@  static const struct iommu_flush_ops arm_smmu_flush_ops = {
 	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
 };
 
+static bool arm_smmu_dbm_capable(struct arm_smmu_device *smmu)
+{
+	return smmu->features & (ARM_SMMU_FEAT_HD | ARM_SMMU_FEAT_COHERENCY);
+}
+
 /* IOMMU API */
 static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
 {
@@ -2124,6 +2129,8 @@  static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
 			  FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
 			  FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
 			  CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
+	if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_HD)
+		cfg->cd.tcr |= CTXDESC_CD_0_TCR_HA | CTXDESC_CD_0_TCR_HD;
 	cfg->cd.mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
 
 	/*
@@ -2226,6 +2233,9 @@  static int arm_smmu_domain_finalise(struct iommu_domain *domain,
 		.iommu_dev	= smmu->dev,
 	};
 
+	if (smmu->features & arm_smmu_dbm_capable(smmu))
+		pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD;
+
 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
 	if (!pgtbl_ops)
 		return -ENOMEM;
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index d82dd125446c..83d6f3a2554f 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -288,6 +288,9 @@ 
 #define CTXDESC_CD_0_TCR_IPS		GENMASK_ULL(34, 32)
 #define CTXDESC_CD_0_TCR_TBI0		(1ULL << 38)
 
+#define CTXDESC_CD_0_TCR_HA            (1UL << 43)
+#define CTXDESC_CD_0_TCR_HD            (1UL << 42)
+
 #define CTXDESC_CD_0_AA64		(1UL << 41)
 #define CTXDESC_CD_0_S			(1UL << 44)
 #define CTXDESC_CD_0_R			(1UL << 45)
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 72dcdd468cf3..b2f470529459 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -75,6 +75,7 @@ 
 
 #define ARM_LPAE_PTE_NSTABLE		(((arm_lpae_iopte)1) << 63)
 #define ARM_LPAE_PTE_XN			(((arm_lpae_iopte)3) << 53)
+#define ARM_LPAE_PTE_DBM		(((arm_lpae_iopte)1) << 51)
 #define ARM_LPAE_PTE_AF			(((arm_lpae_iopte)1) << 10)
 #define ARM_LPAE_PTE_SH_NS		(((arm_lpae_iopte)0) << 8)
 #define ARM_LPAE_PTE_SH_OS		(((arm_lpae_iopte)2) << 8)
@@ -84,7 +85,7 @@ 
 
 #define ARM_LPAE_PTE_ATTR_LO_MASK	(((arm_lpae_iopte)0x3ff) << 2)
 /* Ignore the contiguous bit for block splitting */
-#define ARM_LPAE_PTE_ATTR_HI_MASK	(((arm_lpae_iopte)6) << 52)
+#define ARM_LPAE_PTE_ATTR_HI_MASK	(((arm_lpae_iopte)13) << 51)
 #define ARM_LPAE_PTE_ATTR_MASK		(ARM_LPAE_PTE_ATTR_LO_MASK |	\
 					 ARM_LPAE_PTE_ATTR_HI_MASK)
 /* Software bit for solving coherency races */
@@ -93,6 +94,9 @@ 
 /* Stage-1 PTE */
 #define ARM_LPAE_PTE_AP_UNPRIV		(((arm_lpae_iopte)1) << 6)
 #define ARM_LPAE_PTE_AP_RDONLY		(((arm_lpae_iopte)2) << 6)
+#define ARM_LPAE_PTE_AP_RDONLY_BIT	7
+#define ARM_LPAE_PTE_AP_WRITABLE	(ARM_LPAE_PTE_AP_RDONLY | \
+					 ARM_LPAE_PTE_DBM)
 #define ARM_LPAE_PTE_ATTRINDX_SHIFT	2
 #define ARM_LPAE_PTE_nG			(((arm_lpae_iopte)1) << 11)
 
@@ -407,6 +411,8 @@  static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data,
 		pte = ARM_LPAE_PTE_nG;
 		if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ))
 			pte |= ARM_LPAE_PTE_AP_RDONLY;
+		else if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_HD)
+			pte |= ARM_LPAE_PTE_AP_WRITABLE;
 		if (!(prot & IOMMU_PRIV))
 			pte |= ARM_LPAE_PTE_AP_UNPRIV;
 	} else {
@@ -804,7 +810,8 @@  arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
 
 	if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
 			    IO_PGTABLE_QUIRK_ARM_TTBR1 |
-			    IO_PGTABLE_QUIRK_ARM_OUTER_WBWA))
+			    IO_PGTABLE_QUIRK_ARM_OUTER_WBWA |
+			    IO_PGTABLE_QUIRK_ARM_HD))
 		return NULL;
 
 	data = arm_lpae_alloc_pgtable(cfg);
diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index 25142a0e2fc2..9a996ba7856d 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -85,6 +85,8 @@  struct io_pgtable_cfg {
 	 *
 	 * IO_PGTABLE_QUIRK_ARM_OUTER_WBWA: Override the outer-cacheability
 	 *	attributes set in the TCR for a non-coherent page-table walker.
+	 *
+	 * IO_PGTABLE_QUIRK_ARM_HD: Enables dirty tracking.
 	 */
 	#define IO_PGTABLE_QUIRK_ARM_NS			BIT(0)
 	#define IO_PGTABLE_QUIRK_NO_PERMS		BIT(1)
@@ -92,6 +94,8 @@  struct io_pgtable_cfg {
 	#define IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT	BIT(4)
 	#define IO_PGTABLE_QUIRK_ARM_TTBR1		BIT(5)
 	#define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA		BIT(6)
+	#define IO_PGTABLE_QUIRK_ARM_HD			BIT(7)
+
 	unsigned long			quirks;
 	unsigned long			pgsize_bitmap;
 	unsigned int			ias;