Message ID | 20230518204650.14541-22-joao.m.martins@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | IOMMUFD Dirty Tracking | expand |
On 2023-05-18 21:46, Joao Martins wrote: > From: Kunkun Jiang <jiangkunkun@huawei.com> > > As nested mode is not upstreamed now, we just aim to support dirty > log tracking for stage1 with io-pgtable mapping (means not support > SVA mapping). If HTTU is supported, we enable HA/HD bits in the SMMU > CD and transfer ARM_HD quirk to io-pgtable. > > We additionally filter out HD|HA if not supportted. The CD.HD bit > is not particularly useful unless we toggle the DBM bit in the PTE > entries. ...seeds odd to describe the control which fundamentally enables DBM or not as "not particularly useful" to the DBM use-case :/ > Link: https://lore.kernel.org/lkml/20210413085457.25400-6-zhukeqian1@huawei.com/ > Co-developed-by: Keqian Zhu <zhukeqian1@huawei.com> > Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com> > Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com> > [joaomart:Convey HD|HA bits over to the context descriptor > and update commit message; original in Link, where this is based on] > Signed-off-by: Joao Martins <joao.m.martins@oracle.com> > --- > drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 10 ++++++++++ > drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 3 +++ > drivers/iommu/io-pgtable-arm.c | 11 +++++++++-- > include/linux/io-pgtable.h | 4 ++++ For the sake of cleanliness, please split the io-pgtable and SMMU additions into separate patches (you could perhaps then squash set_dirty_tracking() into the SMMU patch as well). Thanks, Robin. > 4 files changed, 26 insertions(+), 2 deletions(-) > > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > index e110ff4710bf..e2b98a6a6b74 100644 > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > @@ -1998,6 +1998,11 @@ static const struct iommu_flush_ops arm_smmu_flush_ops = { > .tlb_add_page = arm_smmu_tlb_inv_page_nosync, > }; > > +static bool arm_smmu_dbm_capable(struct arm_smmu_device *smmu) > +{ > + return smmu->features & (ARM_SMMU_FEAT_HD | ARM_SMMU_FEAT_COHERENCY); > +} > + > /* IOMMU API */ > static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap) > { > @@ -2124,6 +2129,8 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain, > FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) | > FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) | > CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64; > + if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_HD) > + cfg->cd.tcr |= CTXDESC_CD_0_TCR_HA | CTXDESC_CD_0_TCR_HD; > cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair; > > /* > @@ -2226,6 +2233,9 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain, > .iommu_dev = smmu->dev, > }; > > + if (smmu->features & arm_smmu_dbm_capable(smmu)) > + pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD; > + > pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); > if (!pgtbl_ops) > return -ENOMEM; > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h > index d82dd125446c..83d6f3a2554f 100644 > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h > @@ -288,6 +288,9 @@ > #define CTXDESC_CD_0_TCR_IPS GENMASK_ULL(34, 32) > #define CTXDESC_CD_0_TCR_TBI0 (1ULL << 38) > > +#define CTXDESC_CD_0_TCR_HA (1UL << 43) > +#define CTXDESC_CD_0_TCR_HD (1UL << 42) > + > #define CTXDESC_CD_0_AA64 (1UL << 41) > #define CTXDESC_CD_0_S (1UL << 44) > #define CTXDESC_CD_0_R (1UL << 45) > diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c > index 72dcdd468cf3..b2f470529459 100644 > --- a/drivers/iommu/io-pgtable-arm.c > +++ b/drivers/iommu/io-pgtable-arm.c > @@ -75,6 +75,7 @@ > > #define ARM_LPAE_PTE_NSTABLE (((arm_lpae_iopte)1) << 63) > #define ARM_LPAE_PTE_XN (((arm_lpae_iopte)3) << 53) > +#define ARM_LPAE_PTE_DBM (((arm_lpae_iopte)1) << 51) > #define ARM_LPAE_PTE_AF (((arm_lpae_iopte)1) << 10) > #define ARM_LPAE_PTE_SH_NS (((arm_lpae_iopte)0) << 8) > #define ARM_LPAE_PTE_SH_OS (((arm_lpae_iopte)2) << 8) > @@ -84,7 +85,7 @@ > > #define ARM_LPAE_PTE_ATTR_LO_MASK (((arm_lpae_iopte)0x3ff) << 2) > /* Ignore the contiguous bit for block splitting */ > -#define ARM_LPAE_PTE_ATTR_HI_MASK (((arm_lpae_iopte)6) << 52) > +#define ARM_LPAE_PTE_ATTR_HI_MASK (((arm_lpae_iopte)13) << 51) > #define ARM_LPAE_PTE_ATTR_MASK (ARM_LPAE_PTE_ATTR_LO_MASK | \ > ARM_LPAE_PTE_ATTR_HI_MASK) > /* Software bit for solving coherency races */ > @@ -93,6 +94,9 @@ > /* Stage-1 PTE */ > #define ARM_LPAE_PTE_AP_UNPRIV (((arm_lpae_iopte)1) << 6) > #define ARM_LPAE_PTE_AP_RDONLY (((arm_lpae_iopte)2) << 6) > +#define ARM_LPAE_PTE_AP_RDONLY_BIT 7 > +#define ARM_LPAE_PTE_AP_WRITABLE (ARM_LPAE_PTE_AP_RDONLY | \ > + ARM_LPAE_PTE_DBM) > #define ARM_LPAE_PTE_ATTRINDX_SHIFT 2 > #define ARM_LPAE_PTE_nG (((arm_lpae_iopte)1) << 11) > > @@ -407,6 +411,8 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data, > pte = ARM_LPAE_PTE_nG; > if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ)) > pte |= ARM_LPAE_PTE_AP_RDONLY; > + else if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_HD) > + pte |= ARM_LPAE_PTE_AP_WRITABLE; > if (!(prot & IOMMU_PRIV)) > pte |= ARM_LPAE_PTE_AP_UNPRIV; > } else { > @@ -804,7 +810,8 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) > > if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | > IO_PGTABLE_QUIRK_ARM_TTBR1 | > - IO_PGTABLE_QUIRK_ARM_OUTER_WBWA)) > + IO_PGTABLE_QUIRK_ARM_OUTER_WBWA | > + IO_PGTABLE_QUIRK_ARM_HD)) > return NULL; > > data = arm_lpae_alloc_pgtable(cfg); > diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h > index 25142a0e2fc2..9a996ba7856d 100644 > --- a/include/linux/io-pgtable.h > +++ b/include/linux/io-pgtable.h > @@ -85,6 +85,8 @@ struct io_pgtable_cfg { > * > * IO_PGTABLE_QUIRK_ARM_OUTER_WBWA: Override the outer-cacheability > * attributes set in the TCR for a non-coherent page-table walker. > + * > + * IO_PGTABLE_QUIRK_ARM_HD: Enables dirty tracking. > */ > #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) > #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) > @@ -92,6 +94,8 @@ struct io_pgtable_cfg { > #define IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT BIT(4) > #define IO_PGTABLE_QUIRK_ARM_TTBR1 BIT(5) > #define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA BIT(6) > + #define IO_PGTABLE_QUIRK_ARM_HD BIT(7) > + > unsigned long quirks; > unsigned long pgsize_bitmap; > unsigned int ias;
On 19/05/2023 14:49, Robin Murphy wrote: > On 2023-05-18 21:46, Joao Martins wrote: >> From: Kunkun Jiang <jiangkunkun@huawei.com> >> >> As nested mode is not upstreamed now, we just aim to support dirty >> log tracking for stage1 with io-pgtable mapping (means not support >> SVA mapping). If HTTU is supported, we enable HA/HD bits in the SMMU >> CD and transfer ARM_HD quirk to io-pgtable. >> >> We additionally filter out HD|HA if not supportted. The CD.HD bit >> is not particularly useful unless we toggle the DBM bit in the PTE >> entries. > > ...seeds odd to describe the control which fundamentally enables DBM or not as > "not particularly useful" to the DBM use-case :/ > This is a remnant from v1 where we would just enable the context descriptor HD bit, but not actually enabling DBM until set_dirty_Tracking(). Which no longer is the case. Should remove this sentence. >> Link: https://lore.kernel.org/lkml/20210413085457.25400-6-zhukeqian1@huawei.com/ >> Co-developed-by: Keqian Zhu <zhukeqian1@huawei.com> >> Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com> >> Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com> >> [joaomart:Convey HD|HA bits over to the context descriptor >> and update commit message; original in Link, where this is based on] >> Signed-off-by: Joao Martins <joao.m.martins@oracle.com> >> --- >> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 10 ++++++++++ >> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 3 +++ >> drivers/iommu/io-pgtable-arm.c | 11 +++++++++-- >> include/linux/io-pgtable.h | 4 ++++ > > For the sake of cleanliness, please split the io-pgtable and SMMU additions into > separate patches (you could perhaps then squash set_dirty_tracking() into the > SMMU patch as well). > ack
> -----Original Message----- > From: Joao Martins [mailto:joao.m.martins@oracle.com] > Sent: 18 May 2023 21:47 > To: iommu@lists.linux.dev > Cc: Jason Gunthorpe <jgg@nvidia.com>; Kevin Tian <kevin.tian@intel.com>; > Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>; Lu > Baolu <baolu.lu@linux.intel.com>; Yi Liu <yi.l.liu@intel.com>; Yi Y Sun > <yi.y.sun@intel.com>; Eric Auger <eric.auger@redhat.com>; Nicolin Chen > <nicolinc@nvidia.com>; Joerg Roedel <joro@8bytes.org>; Jean-Philippe > Brucker <jean-philippe@linaro.org>; Suravee Suthikulpanit > <suravee.suthikulpanit@amd.com>; Will Deacon <will@kernel.org>; Robin > Murphy <robin.murphy@arm.com>; Alex Williamson > <alex.williamson@redhat.com>; kvm@vger.kernel.org; Joao Martins > <joao.m.martins@oracle.com> > Subject: [PATCH RFCv2 21/24] iommu/arm-smmu-v3: Enable HTTU for > stage1 with io-pgtable mapping > > From: Kunkun Jiang <jiangkunkun@huawei.com> > > As nested mode is not upstreamed now, we just aim to support dirty > log tracking for stage1 with io-pgtable mapping (means not support > SVA mapping). If HTTU is supported, we enable HA/HD bits in the SMMU > CD and transfer ARM_HD quirk to io-pgtable. > > We additionally filter out HD|HA if not supportted. The CD.HD bit > is not particularly useful unless we toggle the DBM bit in the PTE > entries. > > Link: > https://lore.kernel.org/lkml/20210413085457.25400-6-zhukeqian1@huawei > .com/ > Co-developed-by: Keqian Zhu <zhukeqian1@huawei.com> > Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com> > Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com> > [joaomart:Convey HD|HA bits over to the context descriptor > and update commit message; original in Link, where this is based on] > Signed-off-by: Joao Martins <joao.m.martins@oracle.com> > --- > drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 10 ++++++++++ > drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 3 +++ > drivers/iommu/io-pgtable-arm.c | 11 +++++++++-- > include/linux/io-pgtable.h | 4 ++++ > 4 files changed, 26 insertions(+), 2 deletions(-) > > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > index e110ff4710bf..e2b98a6a6b74 100644 > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > @@ -1998,6 +1998,11 @@ static const struct iommu_flush_ops > arm_smmu_flush_ops = { > .tlb_add_page = arm_smmu_tlb_inv_page_nosync, > }; > > +static bool arm_smmu_dbm_capable(struct arm_smmu_device *smmu) > +{ > + return smmu->features & (ARM_SMMU_FEAT_HD | > ARM_SMMU_FEAT_COHERENCY); > +} > + This will claim DBM capability for systems with just ARM_SMMU_FEAT_COHERENCY. Thanks, Shameer > /* IOMMU API */ > static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap) > { > @@ -2124,6 +2129,8 @@ static int arm_smmu_domain_finalise_s1(struct > arm_smmu_domain *smmu_domain, > FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) | > FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) | > CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64; > + if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_HD) > + cfg->cd.tcr |= CTXDESC_CD_0_TCR_HA | CTXDESC_CD_0_TCR_HD; > cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair; > > /* > @@ -2226,6 +2233,9 @@ static int arm_smmu_domain_finalise(struct > iommu_domain *domain, > .iommu_dev = smmu->dev, > }; > > + if (smmu->features & arm_smmu_dbm_capable(smmu)) > + pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD; > + > pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); > if (!pgtbl_ops) > return -ENOMEM; > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h > b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h > index d82dd125446c..83d6f3a2554f 100644 > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h > @@ -288,6 +288,9 @@ > #define CTXDESC_CD_0_TCR_IPS GENMASK_ULL(34, 32) > #define CTXDESC_CD_0_TCR_TBI0 (1ULL << 38) > > +#define CTXDESC_CD_0_TCR_HA (1UL << 43) > +#define CTXDESC_CD_0_TCR_HD (1UL << 42) > + > #define CTXDESC_CD_0_AA64 (1UL << 41) > #define CTXDESC_CD_0_S (1UL << 44) > #define CTXDESC_CD_0_R (1UL << 45) > diff --git a/drivers/iommu/io-pgtable-arm.c > b/drivers/iommu/io-pgtable-arm.c > index 72dcdd468cf3..b2f470529459 100644 > --- a/drivers/iommu/io-pgtable-arm.c > +++ b/drivers/iommu/io-pgtable-arm.c > @@ -75,6 +75,7 @@ > > #define ARM_LPAE_PTE_NSTABLE (((arm_lpae_iopte)1) << 63) > #define ARM_LPAE_PTE_XN (((arm_lpae_iopte)3) << 53) > +#define ARM_LPAE_PTE_DBM (((arm_lpae_iopte)1) << 51) > #define ARM_LPAE_PTE_AF (((arm_lpae_iopte)1) << 10) > #define ARM_LPAE_PTE_SH_NS (((arm_lpae_iopte)0) << 8) > #define ARM_LPAE_PTE_SH_OS (((arm_lpae_iopte)2) << 8) > @@ -84,7 +85,7 @@ > > #define ARM_LPAE_PTE_ATTR_LO_MASK (((arm_lpae_iopte)0x3ff) << 2) > /* Ignore the contiguous bit for block splitting */ > -#define ARM_LPAE_PTE_ATTR_HI_MASK (((arm_lpae_iopte)6) << 52) > +#define ARM_LPAE_PTE_ATTR_HI_MASK (((arm_lpae_iopte)13) << 51) > #define ARM_LPAE_PTE_ATTR_MASK (ARM_LPAE_PTE_ATTR_LO_MASK > | \ > ARM_LPAE_PTE_ATTR_HI_MASK) > /* Software bit for solving coherency races */ > @@ -93,6 +94,9 @@ > /* Stage-1 PTE */ > #define ARM_LPAE_PTE_AP_UNPRIV (((arm_lpae_iopte)1) << 6) > #define ARM_LPAE_PTE_AP_RDONLY (((arm_lpae_iopte)2) << 6) > +#define ARM_LPAE_PTE_AP_RDONLY_BIT 7 > +#define ARM_LPAE_PTE_AP_WRITABLE (ARM_LPAE_PTE_AP_RDONLY | \ > + ARM_LPAE_PTE_DBM) > #define ARM_LPAE_PTE_ATTRINDX_SHIFT 2 > #define ARM_LPAE_PTE_nG (((arm_lpae_iopte)1) << 11) > > @@ -407,6 +411,8 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct > arm_lpae_io_pgtable *data, > pte = ARM_LPAE_PTE_nG; > if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ)) > pte |= ARM_LPAE_PTE_AP_RDONLY; > + else if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_HD) > + pte |= ARM_LPAE_PTE_AP_WRITABLE; > if (!(prot & IOMMU_PRIV)) > pte |= ARM_LPAE_PTE_AP_UNPRIV; > } else { > @@ -804,7 +810,8 @@ arm_64_lpae_alloc_pgtable_s1(struct > io_pgtable_cfg *cfg, void *cookie) > > if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | > IO_PGTABLE_QUIRK_ARM_TTBR1 | > - IO_PGTABLE_QUIRK_ARM_OUTER_WBWA)) > + IO_PGTABLE_QUIRK_ARM_OUTER_WBWA | > + IO_PGTABLE_QUIRK_ARM_HD)) > return NULL; > > data = arm_lpae_alloc_pgtable(cfg); > diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h > index 25142a0e2fc2..9a996ba7856d 100644 > --- a/include/linux/io-pgtable.h > +++ b/include/linux/io-pgtable.h > @@ -85,6 +85,8 @@ struct io_pgtable_cfg { > * > * IO_PGTABLE_QUIRK_ARM_OUTER_WBWA: Override the > outer-cacheability > * attributes set in the TCR for a non-coherent page-table walker. > + * > + * IO_PGTABLE_QUIRK_ARM_HD: Enables dirty tracking. > */ > #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) > #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) > @@ -92,6 +94,8 @@ struct io_pgtable_cfg { > #define IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT BIT(4) > #define IO_PGTABLE_QUIRK_ARM_TTBR1 BIT(5) > #define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA BIT(6) > + #define IO_PGTABLE_QUIRK_ARM_HD BIT(7) > + > unsigned long quirks; > unsigned long pgsize_bitmap; > unsigned int ias; > -- > 2.17.2
On 22/05/2023 11:34, Shameerali Kolothum Thodi wrote: >> -----Original Message----- >> From: Joao Martins [mailto:joao.m.martins@oracle.com] >> Sent: 18 May 2023 21:47 >> To: iommu@lists.linux.dev >> Cc: Jason Gunthorpe <jgg@nvidia.com>; Kevin Tian <kevin.tian@intel.com>; >> Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>; Lu >> Baolu <baolu.lu@linux.intel.com>; Yi Liu <yi.l.liu@intel.com>; Yi Y Sun >> <yi.y.sun@intel.com>; Eric Auger <eric.auger@redhat.com>; Nicolin Chen >> <nicolinc@nvidia.com>; Joerg Roedel <joro@8bytes.org>; Jean-Philippe >> Brucker <jean-philippe@linaro.org>; Suravee Suthikulpanit >> <suravee.suthikulpanit@amd.com>; Will Deacon <will@kernel.org>; Robin >> Murphy <robin.murphy@arm.com>; Alex Williamson >> <alex.williamson@redhat.com>; kvm@vger.kernel.org; Joao Martins >> <joao.m.martins@oracle.com> >> Subject: [PATCH RFCv2 21/24] iommu/arm-smmu-v3: Enable HTTU for >> stage1 with io-pgtable mapping >> >> From: Kunkun Jiang <jiangkunkun@huawei.com> >> >> As nested mode is not upstreamed now, we just aim to support dirty >> log tracking for stage1 with io-pgtable mapping (means not support >> SVA mapping). If HTTU is supported, we enable HA/HD bits in the SMMU >> CD and transfer ARM_HD quirk to io-pgtable. >> >> We additionally filter out HD|HA if not supportted. The CD.HD bit >> is not particularly useful unless we toggle the DBM bit in the PTE >> entries. >> >> Link: >> https://lore.kernel.org/lkml/20210413085457.25400-6-zhukeqian1@huawei >> .com/ >> Co-developed-by: Keqian Zhu <zhukeqian1@huawei.com> >> Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com> >> Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com> >> [joaomart:Convey HD|HA bits over to the context descriptor >> and update commit message; original in Link, where this is based on] >> Signed-off-by: Joao Martins <joao.m.martins@oracle.com> >> --- >> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 10 ++++++++++ >> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 3 +++ >> drivers/iommu/io-pgtable-arm.c | 11 +++++++++-- >> include/linux/io-pgtable.h | 4 ++++ >> 4 files changed, 26 insertions(+), 2 deletions(-) >> >> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c >> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c >> index e110ff4710bf..e2b98a6a6b74 100644 >> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c >> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c >> @@ -1998,6 +1998,11 @@ static const struct iommu_flush_ops >> arm_smmu_flush_ops = { >> .tlb_add_page = arm_smmu_tlb_inv_page_nosync, >> }; >> >> +static bool arm_smmu_dbm_capable(struct arm_smmu_device *smmu) >> +{ >> + return smmu->features & (ARM_SMMU_FEAT_HD | >> ARM_SMMU_FEAT_COHERENCY); >> +} >> + > > This will claim DBM capability for systems with just ARM_SMMU_FEAT_COHERENCY. Gah, yes. It should be: (smmu->features & (ARM_SMMU_FEAT_HD | ARM_SMMU_FEAT_COHERENCY)) == (ARM_SMMU_FEAT_HD | ARM_SMMU_FEAT_COHERENCY) or making these two a macro on its own. > >> /* IOMMU API */ >> static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap) >> { >> @@ -2124,6 +2129,8 @@ static int arm_smmu_domain_finalise_s1(struct >> arm_smmu_domain *smmu_domain, >> FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) | >> FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) | >> CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64; >> + if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_HD) >> + cfg->cd.tcr |= CTXDESC_CD_0_TCR_HA | CTXDESC_CD_0_TCR_HD; >> cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair; >> >> /* >> @@ -2226,6 +2233,9 @@ static int arm_smmu_domain_finalise(struct >> iommu_domain *domain, >> .iommu_dev = smmu->dev, >> }; >> >> + if (smmu->features & arm_smmu_dbm_capable(smmu)) >> + pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD; >> + >> pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); >> if (!pgtbl_ops) >> return -ENOMEM; >> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h >> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h >> index d82dd125446c..83d6f3a2554f 100644 >> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h >> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h >> @@ -288,6 +288,9 @@ >> #define CTXDESC_CD_0_TCR_IPS GENMASK_ULL(34, 32) >> #define CTXDESC_CD_0_TCR_TBI0 (1ULL << 38) >> >> +#define CTXDESC_CD_0_TCR_HA (1UL << 43) >> +#define CTXDESC_CD_0_TCR_HD (1UL << 42) >> + >> #define CTXDESC_CD_0_AA64 (1UL << 41) >> #define CTXDESC_CD_0_S (1UL << 44) >> #define CTXDESC_CD_0_R (1UL << 45) >> diff --git a/drivers/iommu/io-pgtable-arm.c >> b/drivers/iommu/io-pgtable-arm.c >> index 72dcdd468cf3..b2f470529459 100644 >> --- a/drivers/iommu/io-pgtable-arm.c >> +++ b/drivers/iommu/io-pgtable-arm.c >> @@ -75,6 +75,7 @@ >> >> #define ARM_LPAE_PTE_NSTABLE (((arm_lpae_iopte)1) << 63) >> #define ARM_LPAE_PTE_XN (((arm_lpae_iopte)3) << 53) >> +#define ARM_LPAE_PTE_DBM (((arm_lpae_iopte)1) << 51) >> #define ARM_LPAE_PTE_AF (((arm_lpae_iopte)1) << 10) >> #define ARM_LPAE_PTE_SH_NS (((arm_lpae_iopte)0) << 8) >> #define ARM_LPAE_PTE_SH_OS (((arm_lpae_iopte)2) << 8) >> @@ -84,7 +85,7 @@ >> >> #define ARM_LPAE_PTE_ATTR_LO_MASK (((arm_lpae_iopte)0x3ff) << 2) >> /* Ignore the contiguous bit for block splitting */ >> -#define ARM_LPAE_PTE_ATTR_HI_MASK (((arm_lpae_iopte)6) << 52) >> +#define ARM_LPAE_PTE_ATTR_HI_MASK (((arm_lpae_iopte)13) << 51) >> #define ARM_LPAE_PTE_ATTR_MASK (ARM_LPAE_PTE_ATTR_LO_MASK >> | \ >> ARM_LPAE_PTE_ATTR_HI_MASK) >> /* Software bit for solving coherency races */ >> @@ -93,6 +94,9 @@ >> /* Stage-1 PTE */ >> #define ARM_LPAE_PTE_AP_UNPRIV (((arm_lpae_iopte)1) << 6) >> #define ARM_LPAE_PTE_AP_RDONLY (((arm_lpae_iopte)2) << 6) >> +#define ARM_LPAE_PTE_AP_RDONLY_BIT 7 >> +#define ARM_LPAE_PTE_AP_WRITABLE (ARM_LPAE_PTE_AP_RDONLY | \ >> + ARM_LPAE_PTE_DBM) >> #define ARM_LPAE_PTE_ATTRINDX_SHIFT 2 >> #define ARM_LPAE_PTE_nG (((arm_lpae_iopte)1) << 11) >> >> @@ -407,6 +411,8 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct >> arm_lpae_io_pgtable *data, >> pte = ARM_LPAE_PTE_nG; >> if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ)) >> pte |= ARM_LPAE_PTE_AP_RDONLY; >> + else if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_HD) >> + pte |= ARM_LPAE_PTE_AP_WRITABLE; >> if (!(prot & IOMMU_PRIV)) >> pte |= ARM_LPAE_PTE_AP_UNPRIV; >> } else { >> @@ -804,7 +810,8 @@ arm_64_lpae_alloc_pgtable_s1(struct >> io_pgtable_cfg *cfg, void *cookie) >> >> if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | >> IO_PGTABLE_QUIRK_ARM_TTBR1 | >> - IO_PGTABLE_QUIRK_ARM_OUTER_WBWA)) >> + IO_PGTABLE_QUIRK_ARM_OUTER_WBWA | >> + IO_PGTABLE_QUIRK_ARM_HD)) >> return NULL; >> >> data = arm_lpae_alloc_pgtable(cfg); >> diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h >> index 25142a0e2fc2..9a996ba7856d 100644 >> --- a/include/linux/io-pgtable.h >> +++ b/include/linux/io-pgtable.h >> @@ -85,6 +85,8 @@ struct io_pgtable_cfg { >> * >> * IO_PGTABLE_QUIRK_ARM_OUTER_WBWA: Override the >> outer-cacheability >> * attributes set in the TCR for a non-coherent page-table walker. >> + * >> + * IO_PGTABLE_QUIRK_ARM_HD: Enables dirty tracking. >> */ >> #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) >> #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) >> @@ -92,6 +94,8 @@ struct io_pgtable_cfg { >> #define IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT BIT(4) >> #define IO_PGTABLE_QUIRK_ARM_TTBR1 BIT(5) >> #define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA BIT(6) >> + #define IO_PGTABLE_QUIRK_ARM_HD BIT(7) >> + >> unsigned long quirks; >> unsigned long pgsize_bitmap; >> unsigned int ias; >> -- >> 2.17.2 >
> -----Original Message----- > From: Joao Martins [mailto:joao.m.martins@oracle.com] > Sent: 22 May 2023 11:43 > To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>; > iommu@lists.linux.dev > Cc: Jason Gunthorpe <jgg@nvidia.com>; Kevin Tian <kevin.tian@intel.com>; > Lu Baolu <baolu.lu@linux.intel.com>; Yi Liu <yi.l.liu@intel.com>; Yi Y Sun > <yi.y.sun@intel.com>; Eric Auger <eric.auger@redhat.com>; Nicolin Chen > <nicolinc@nvidia.com>; Joerg Roedel <joro@8bytes.org>; Jean-Philippe > Brucker <jean-philippe@linaro.org>; Suravee Suthikulpanit > <suravee.suthikulpanit@amd.com>; Will Deacon <will@kernel.org>; Robin > Murphy <robin.murphy@arm.com>; Alex Williamson > <alex.williamson@redhat.com>; kvm@vger.kernel.org > Subject: Re: [PATCH RFCv2 21/24] iommu/arm-smmu-v3: Enable HTTU for > stage1 with io-pgtable mapping [...] > >> @@ -2226,6 +2233,9 @@ static int arm_smmu_domain_finalise(struct > >> iommu_domain *domain, > >> .iommu_dev = smmu->dev, > >> }; > >> > >> + if (smmu->features & arm_smmu_dbm_capable(smmu)) > >> + pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD; Also, I think we should limit setting this to s1 only pgtbl_cfg. Thanks, Shameer > >> + > >> pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); > >> if (!pgtbl_ops) > >> return -ENOMEM; > >> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h > >> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h > >> index d82dd125446c..83d6f3a2554f 100644 > >> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h > >> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h > >> @@ -288,6 +288,9 @@ > >> #define CTXDESC_CD_0_TCR_IPS GENMASK_ULL(34, 32) > >> #define CTXDESC_CD_0_TCR_TBI0 (1ULL << 38) > >> > >> +#define CTXDESC_CD_0_TCR_HA (1UL << 43) > >> +#define CTXDESC_CD_0_TCR_HD (1UL << 42) > >> + > >> #define CTXDESC_CD_0_AA64 (1UL << 41) > >> #define CTXDESC_CD_0_S (1UL << 44) > >> #define CTXDESC_CD_0_R (1UL << 45) > >> diff --git a/drivers/iommu/io-pgtable-arm.c > >> b/drivers/iommu/io-pgtable-arm.c index 72dcdd468cf3..b2f470529459 > >> 100644 > >> --- a/drivers/iommu/io-pgtable-arm.c > >> +++ b/drivers/iommu/io-pgtable-arm.c > >> @@ -75,6 +75,7 @@ > >> > >> #define ARM_LPAE_PTE_NSTABLE (((arm_lpae_iopte)1) << 63) > >> #define ARM_LPAE_PTE_XN (((arm_lpae_iopte)3) << 53) > >> +#define ARM_LPAE_PTE_DBM (((arm_lpae_iopte)1) << 51) > >> #define ARM_LPAE_PTE_AF (((arm_lpae_iopte)1) << 10) > >> #define ARM_LPAE_PTE_SH_NS (((arm_lpae_iopte)0) << 8) > >> #define ARM_LPAE_PTE_SH_OS (((arm_lpae_iopte)2) << 8) > >> @@ -84,7 +85,7 @@ > >> > >> #define ARM_LPAE_PTE_ATTR_LO_MASK (((arm_lpae_iopte)0x3ff) << > 2) > >> /* Ignore the contiguous bit for block splitting */ > >> -#define ARM_LPAE_PTE_ATTR_HI_MASK (((arm_lpae_iopte)6) << 52) > >> +#define ARM_LPAE_PTE_ATTR_HI_MASK (((arm_lpae_iopte)13) << > 51) > >> #define ARM_LPAE_PTE_ATTR_MASK > (ARM_LPAE_PTE_ATTR_LO_MASK > >> | \ > >> ARM_LPAE_PTE_ATTR_HI_MASK) > >> /* Software bit for solving coherency races */ @@ -93,6 +94,9 @@ > >> /* Stage-1 PTE */ > >> #define ARM_LPAE_PTE_AP_UNPRIV (((arm_lpae_iopte)1) << 6) > >> #define ARM_LPAE_PTE_AP_RDONLY (((arm_lpae_iopte)2) << 6) > >> +#define ARM_LPAE_PTE_AP_RDONLY_BIT 7 > >> +#define ARM_LPAE_PTE_AP_WRITABLE > (ARM_LPAE_PTE_AP_RDONLY | \ > >> + ARM_LPAE_PTE_DBM) > >> #define ARM_LPAE_PTE_ATTRINDX_SHIFT 2 > >> #define ARM_LPAE_PTE_nG (((arm_lpae_iopte)1) << 11) > >> > >> @@ -407,6 +411,8 @@ static arm_lpae_iopte > arm_lpae_prot_to_pte(struct > >> arm_lpae_io_pgtable *data, > >> pte = ARM_LPAE_PTE_nG; > >> if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ)) > >> pte |= ARM_LPAE_PTE_AP_RDONLY; > >> + else if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_HD) > >> + pte |= ARM_LPAE_PTE_AP_WRITABLE; > >> if (!(prot & IOMMU_PRIV)) > >> pte |= ARM_LPAE_PTE_AP_UNPRIV; > >> } else { > >> @@ -804,7 +810,8 @@ arm_64_lpae_alloc_pgtable_s1(struct > >> io_pgtable_cfg *cfg, void *cookie) > >> > >> if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | > >> IO_PGTABLE_QUIRK_ARM_TTBR1 | > >> - IO_PGTABLE_QUIRK_ARM_OUTER_WBWA)) > >> + IO_PGTABLE_QUIRK_ARM_OUTER_WBWA | > >> + IO_PGTABLE_QUIRK_ARM_HD)) > >> return NULL; > >> > >> data = arm_lpae_alloc_pgtable(cfg); diff --git > >> a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index > >> 25142a0e2fc2..9a996ba7856d 100644 > >> --- a/include/linux/io-pgtable.h > >> +++ b/include/linux/io-pgtable.h > >> @@ -85,6 +85,8 @@ struct io_pgtable_cfg { > >> * > >> * IO_PGTABLE_QUIRK_ARM_OUTER_WBWA: Override the > outer-cacheability > >> * attributes set in the TCR for a non-coherent page-table walker. > >> + * > >> + * IO_PGTABLE_QUIRK_ARM_HD: Enables dirty tracking. > >> */ > >> #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) > >> #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) > >> @@ -92,6 +94,8 @@ struct io_pgtable_cfg { > >> #define IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT BIT(4) > >> #define IO_PGTABLE_QUIRK_ARM_TTBR1 BIT(5) > >> #define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA BIT(6) > >> + #define IO_PGTABLE_QUIRK_ARM_HD BIT(7) > >> + > >> unsigned long quirks; > >> unsigned long pgsize_bitmap; > >> unsigned int ias; > >> -- > >> 2.17.2 > >
On 16/06/2023 18:00, Shameerali Kolothum Thodi wrote: > > >> -----Original Message----- >> From: Joao Martins [mailto:joao.m.martins@oracle.com] >> Sent: 22 May 2023 11:43 >> To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>; >> iommu@lists.linux.dev >> Cc: Jason Gunthorpe <jgg@nvidia.com>; Kevin Tian <kevin.tian@intel.com>; >> Lu Baolu <baolu.lu@linux.intel.com>; Yi Liu <yi.l.liu@intel.com>; Yi Y Sun >> <yi.y.sun@intel.com>; Eric Auger <eric.auger@redhat.com>; Nicolin Chen >> <nicolinc@nvidia.com>; Joerg Roedel <joro@8bytes.org>; Jean-Philippe >> Brucker <jean-philippe@linaro.org>; Suravee Suthikulpanit >> <suravee.suthikulpanit@amd.com>; Will Deacon <will@kernel.org>; Robin >> Murphy <robin.murphy@arm.com>; Alex Williamson >> <alex.williamson@redhat.com>; kvm@vger.kernel.org >> Subject: Re: [PATCH RFCv2 21/24] iommu/arm-smmu-v3: Enable HTTU for >> stage1 with io-pgtable mapping > > [...] > >>>> @@ -2226,6 +2233,9 @@ static int arm_smmu_domain_finalise(struct >>>> iommu_domain *domain, >>>> .iommu_dev = smmu->dev, >>>> }; >>>> >>>> + if (smmu->features & arm_smmu_dbm_capable(smmu)) >>>> + pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD; > > Also, I think we should limit setting this to s1 only pgtbl_cfg. > +1, makes sense. > Thanks, > Shameer > >>>> + >>>> pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); >>>> if (!pgtbl_ops) >>>> return -ENOMEM; >>>> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h >>>> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h >>>> index d82dd125446c..83d6f3a2554f 100644 >>>> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h >>>> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h >>>> @@ -288,6 +288,9 @@ >>>> #define CTXDESC_CD_0_TCR_IPS GENMASK_ULL(34, 32) >>>> #define CTXDESC_CD_0_TCR_TBI0 (1ULL << 38) >>>> >>>> +#define CTXDESC_CD_0_TCR_HA (1UL << 43) >>>> +#define CTXDESC_CD_0_TCR_HD (1UL << 42) >>>> + >>>> #define CTXDESC_CD_0_AA64 (1UL << 41) >>>> #define CTXDESC_CD_0_S (1UL << 44) >>>> #define CTXDESC_CD_0_R (1UL << 45) >>>> diff --git a/drivers/iommu/io-pgtable-arm.c >>>> b/drivers/iommu/io-pgtable-arm.c index 72dcdd468cf3..b2f470529459 >>>> 100644 >>>> --- a/drivers/iommu/io-pgtable-arm.c >>>> +++ b/drivers/iommu/io-pgtable-arm.c >>>> @@ -75,6 +75,7 @@ >>>> >>>> #define ARM_LPAE_PTE_NSTABLE (((arm_lpae_iopte)1) << 63) >>>> #define ARM_LPAE_PTE_XN (((arm_lpae_iopte)3) << 53) >>>> +#define ARM_LPAE_PTE_DBM (((arm_lpae_iopte)1) << 51) >>>> #define ARM_LPAE_PTE_AF (((arm_lpae_iopte)1) << 10) >>>> #define ARM_LPAE_PTE_SH_NS (((arm_lpae_iopte)0) << 8) >>>> #define ARM_LPAE_PTE_SH_OS (((arm_lpae_iopte)2) << 8) >>>> @@ -84,7 +85,7 @@ >>>> >>>> #define ARM_LPAE_PTE_ATTR_LO_MASK (((arm_lpae_iopte)0x3ff) << >> 2) >>>> /* Ignore the contiguous bit for block splitting */ >>>> -#define ARM_LPAE_PTE_ATTR_HI_MASK (((arm_lpae_iopte)6) << 52) >>>> +#define ARM_LPAE_PTE_ATTR_HI_MASK (((arm_lpae_iopte)13) << >> 51) >>>> #define ARM_LPAE_PTE_ATTR_MASK >> (ARM_LPAE_PTE_ATTR_LO_MASK >>>> | \ >>>> ARM_LPAE_PTE_ATTR_HI_MASK) >>>> /* Software bit for solving coherency races */ @@ -93,6 +94,9 @@ >>>> /* Stage-1 PTE */ >>>> #define ARM_LPAE_PTE_AP_UNPRIV (((arm_lpae_iopte)1) << 6) >>>> #define ARM_LPAE_PTE_AP_RDONLY (((arm_lpae_iopte)2) << 6) >>>> +#define ARM_LPAE_PTE_AP_RDONLY_BIT 7 >>>> +#define ARM_LPAE_PTE_AP_WRITABLE >> (ARM_LPAE_PTE_AP_RDONLY | \ >>>> + ARM_LPAE_PTE_DBM) >>>> #define ARM_LPAE_PTE_ATTRINDX_SHIFT 2 >>>> #define ARM_LPAE_PTE_nG (((arm_lpae_iopte)1) << 11) >>>> >>>> @@ -407,6 +411,8 @@ static arm_lpae_iopte >> arm_lpae_prot_to_pte(struct >>>> arm_lpae_io_pgtable *data, >>>> pte = ARM_LPAE_PTE_nG; >>>> if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ)) >>>> pte |= ARM_LPAE_PTE_AP_RDONLY; >>>> + else if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_HD) >>>> + pte |= ARM_LPAE_PTE_AP_WRITABLE; >>>> if (!(prot & IOMMU_PRIV)) >>>> pte |= ARM_LPAE_PTE_AP_UNPRIV; >>>> } else { >>>> @@ -804,7 +810,8 @@ arm_64_lpae_alloc_pgtable_s1(struct >>>> io_pgtable_cfg *cfg, void *cookie) >>>> >>>> if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | >>>> IO_PGTABLE_QUIRK_ARM_TTBR1 | >>>> - IO_PGTABLE_QUIRK_ARM_OUTER_WBWA)) >>>> + IO_PGTABLE_QUIRK_ARM_OUTER_WBWA | >>>> + IO_PGTABLE_QUIRK_ARM_HD)) >>>> return NULL; >>>> >>>> data = arm_lpae_alloc_pgtable(cfg); diff --git >>>> a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index >>>> 25142a0e2fc2..9a996ba7856d 100644 >>>> --- a/include/linux/io-pgtable.h >>>> +++ b/include/linux/io-pgtable.h >>>> @@ -85,6 +85,8 @@ struct io_pgtable_cfg { >>>> * >>>> * IO_PGTABLE_QUIRK_ARM_OUTER_WBWA: Override the >> outer-cacheability >>>> * attributes set in the TCR for a non-coherent page-table walker. >>>> + * >>>> + * IO_PGTABLE_QUIRK_ARM_HD: Enables dirty tracking. >>>> */ >>>> #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) >>>> #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) >>>> @@ -92,6 +94,8 @@ struct io_pgtable_cfg { >>>> #define IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT BIT(4) >>>> #define IO_PGTABLE_QUIRK_ARM_TTBR1 BIT(5) >>>> #define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA BIT(6) >>>> + #define IO_PGTABLE_QUIRK_ARM_HD BIT(7) >>>> + >>>> unsigned long quirks; >>>> unsigned long pgsize_bitmap; >>>> unsigned int ias; >>>> -- >>>> 2.17.2 >>>
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index e110ff4710bf..e2b98a6a6b74 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1998,6 +1998,11 @@ static const struct iommu_flush_ops arm_smmu_flush_ops = { .tlb_add_page = arm_smmu_tlb_inv_page_nosync, }; +static bool arm_smmu_dbm_capable(struct arm_smmu_device *smmu) +{ + return smmu->features & (ARM_SMMU_FEAT_HD | ARM_SMMU_FEAT_COHERENCY); +} + /* IOMMU API */ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap) { @@ -2124,6 +2129,8 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain, FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) | FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) | CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64; + if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_HD) + cfg->cd.tcr |= CTXDESC_CD_0_TCR_HA | CTXDESC_CD_0_TCR_HD; cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair; /* @@ -2226,6 +2233,9 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain, .iommu_dev = smmu->dev, }; + if (smmu->features & arm_smmu_dbm_capable(smmu)) + pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD; + pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); if (!pgtbl_ops) return -ENOMEM; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index d82dd125446c..83d6f3a2554f 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -288,6 +288,9 @@ #define CTXDESC_CD_0_TCR_IPS GENMASK_ULL(34, 32) #define CTXDESC_CD_0_TCR_TBI0 (1ULL << 38) +#define CTXDESC_CD_0_TCR_HA (1UL << 43) +#define CTXDESC_CD_0_TCR_HD (1UL << 42) + #define CTXDESC_CD_0_AA64 (1UL << 41) #define CTXDESC_CD_0_S (1UL << 44) #define CTXDESC_CD_0_R (1UL << 45) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 72dcdd468cf3..b2f470529459 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -75,6 +75,7 @@ #define ARM_LPAE_PTE_NSTABLE (((arm_lpae_iopte)1) << 63) #define ARM_LPAE_PTE_XN (((arm_lpae_iopte)3) << 53) +#define ARM_LPAE_PTE_DBM (((arm_lpae_iopte)1) << 51) #define ARM_LPAE_PTE_AF (((arm_lpae_iopte)1) << 10) #define ARM_LPAE_PTE_SH_NS (((arm_lpae_iopte)0) << 8) #define ARM_LPAE_PTE_SH_OS (((arm_lpae_iopte)2) << 8) @@ -84,7 +85,7 @@ #define ARM_LPAE_PTE_ATTR_LO_MASK (((arm_lpae_iopte)0x3ff) << 2) /* Ignore the contiguous bit for block splitting */ -#define ARM_LPAE_PTE_ATTR_HI_MASK (((arm_lpae_iopte)6) << 52) +#define ARM_LPAE_PTE_ATTR_HI_MASK (((arm_lpae_iopte)13) << 51) #define ARM_LPAE_PTE_ATTR_MASK (ARM_LPAE_PTE_ATTR_LO_MASK | \ ARM_LPAE_PTE_ATTR_HI_MASK) /* Software bit for solving coherency races */ @@ -93,6 +94,9 @@ /* Stage-1 PTE */ #define ARM_LPAE_PTE_AP_UNPRIV (((arm_lpae_iopte)1) << 6) #define ARM_LPAE_PTE_AP_RDONLY (((arm_lpae_iopte)2) << 6) +#define ARM_LPAE_PTE_AP_RDONLY_BIT 7 +#define ARM_LPAE_PTE_AP_WRITABLE (ARM_LPAE_PTE_AP_RDONLY | \ + ARM_LPAE_PTE_DBM) #define ARM_LPAE_PTE_ATTRINDX_SHIFT 2 #define ARM_LPAE_PTE_nG (((arm_lpae_iopte)1) << 11) @@ -407,6 +411,8 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data, pte = ARM_LPAE_PTE_nG; if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ)) pte |= ARM_LPAE_PTE_AP_RDONLY; + else if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_HD) + pte |= ARM_LPAE_PTE_AP_WRITABLE; if (!(prot & IOMMU_PRIV)) pte |= ARM_LPAE_PTE_AP_UNPRIV; } else { @@ -804,7 +810,8 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_ARM_TTBR1 | - IO_PGTABLE_QUIRK_ARM_OUTER_WBWA)) + IO_PGTABLE_QUIRK_ARM_OUTER_WBWA | + IO_PGTABLE_QUIRK_ARM_HD)) return NULL; data = arm_lpae_alloc_pgtable(cfg); diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 25142a0e2fc2..9a996ba7856d 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -85,6 +85,8 @@ struct io_pgtable_cfg { * * IO_PGTABLE_QUIRK_ARM_OUTER_WBWA: Override the outer-cacheability * attributes set in the TCR for a non-coherent page-table walker. + * + * IO_PGTABLE_QUIRK_ARM_HD: Enables dirty tracking. */ #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) @@ -92,6 +94,8 @@ struct io_pgtable_cfg { #define IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT BIT(4) #define IO_PGTABLE_QUIRK_ARM_TTBR1 BIT(5) #define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA BIT(6) + #define IO_PGTABLE_QUIRK_ARM_HD BIT(7) + unsigned long quirks; unsigned long pgsize_bitmap; unsigned int ias;