Message ID | 1576514271-15687-3-git-send-email-jcrouse@codeaurora.org (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | iommu/arm-smmu: Split pagetable support for arm-smmu-v2 | expand |
On Mon, Dec 16, 2019 at 09:37:48AM -0700, Jordan Crouse wrote: > Add support to enable split pagetables (TTBR1) if the supporting driver > requests it via the DOMAIN_ATTR_SPLIT_TABLES flag. When enabled, the driver > will set up the TTBR0 and TTBR1 regions and program the default domain > pagetable on TTBR1. > > After attaching the device, the value of he domain attribute can > be queried to see if the split pagetables were successfully programmed. > Furthermore the domain geometry will be updated so that the caller can > determine the active region for the pagetable that was programmed. > > Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org> > --- > > drivers/iommu/arm-smmu.c | 40 +++++++++++++++++++++++++++++++++++----- > drivers/iommu/arm-smmu.h | 45 +++++++++++++++++++++++++++++++++++++++------ > 2 files changed, 74 insertions(+), 11 deletions(-) > > diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c > index c106406..7b59116 100644 > --- a/drivers/iommu/arm-smmu.c > +++ b/drivers/iommu/arm-smmu.c > @@ -538,9 +538,17 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, > cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr; > cb->ttbr[1] = 0; > } else { > - cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr; > - cb->ttbr[0] |= FIELD_PREP(TTBRn_ASID, cfg->asid); > - cb->ttbr[1] = FIELD_PREP(TTBRn_ASID, cfg->asid); > + if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) { > + cb->ttbr[0] = FIELD_PREP(TTBRn_ASID, cfg->asid); > + cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr; > + cb->ttbr[1] |= > + FIELD_PREP(TTBRn_ASID, cfg->asid); > + } else { > + cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr; > + cb->ttbr[0] |= > + FIELD_PREP(TTBRn_ASID, cfg->asid); > + cb->ttbr[1] = FIELD_PREP(TTBRn_ASID, cfg->asid); > + } I still don't understand why you have to set the ASID in both of the TTBRs. Assuming TCR.A1 is clear, then we should only need to set the field in TTBR0. > } > } else { > cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr; > @@ -651,6 +659,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, > enum io_pgtable_fmt fmt; > struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); > struct arm_smmu_cfg *cfg = &smmu_domain->cfg; > + u32 quirks = 0; > > mutex_lock(&smmu_domain->init_mutex); > if (smmu_domain->smmu) > @@ -719,6 +728,8 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, > oas = smmu->ipa_size; > if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) { > fmt = ARM_64_LPAE_S1; > + if (smmu_domain->split_pagetables) > + quirks |= IO_PGTABLE_QUIRK_ARM_TTBR1; > } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) { > fmt = ARM_32_LPAE_S1; > ias = min(ias, 32UL); > @@ -788,6 +799,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, > .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK, > .tlb = smmu_domain->flush_ops, > .iommu_dev = smmu->dev, > + .quirks = quirks, > }; > > if (smmu_domain->non_strict) > @@ -801,8 +813,15 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, > > /* Update the domain's page sizes to reflect the page table format */ > domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap; > - domain->geometry.aperture_end = (1UL << ias) - 1; > - domain->geometry.force_aperture = true; > + > + if (pgtbl_cfg.quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) { > + domain->geometry.aperture_start = ~((1ULL << ias) - 1); > + domain->geometry.aperture_end = ~0UL; > + } else { > + domain->geometry.aperture_end = (1UL << ias) - 1; > + domain->geometry.force_aperture = true; > + smmu_domain->split_pagetables = false; > + } > > /* Initialise the context bank with our page table cfg */ > arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg); > @@ -1484,6 +1503,9 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain, > case DOMAIN_ATTR_NESTING: > *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED); > return 0; > + case DOMAIN_ATTR_SPLIT_TABLES: > + *(int *)data = smmu_domain->split_pagetables; > + return 0; > default: > return -ENODEV; > } > @@ -1524,6 +1546,14 @@ static int arm_smmu_domain_set_attr(struct iommu_domain *domain, > else > smmu_domain->stage = ARM_SMMU_DOMAIN_S1; > break; > + case DOMAIN_ATTR_SPLIT_TABLES: > + if (smmu_domain->smmu) { > + ret = -EPERM; > + goto out_unlock; > + } > + if (*(int *)data) > + smmu_domain->split_pagetables = true; > + break; > default: > ret = -ENODEV; > } > diff --git a/drivers/iommu/arm-smmu.h b/drivers/iommu/arm-smmu.h > index afab9de..68526cc 100644 > --- a/drivers/iommu/arm-smmu.h > +++ b/drivers/iommu/arm-smmu.h > @@ -177,6 +177,16 @@ enum arm_smmu_cbar_type { > #define TCR_IRGN0 GENMASK(9, 8) > #define TCR_T0SZ GENMASK(5, 0) > > +#define TCR_TG1 GENMASK(31, 30) > + > +#define TG0_4K 0 > +#define TG0_64K 1 > +#define TG0_16K 2 > + > +#define TG1_16K 1 > +#define TG1_4K 2 > +#define TG1_64K 3 > + > #define ARM_SMMU_CB_CONTEXTIDR 0x34 > #define ARM_SMMU_CB_S1_MAIR0 0x38 > #define ARM_SMMU_CB_S1_MAIR1 0x3c > @@ -329,16 +339,39 @@ struct arm_smmu_domain { > struct mutex init_mutex; /* Protects smmu pointer */ > spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */ > struct iommu_domain domain; > + bool split_pagetables; > }; > > +static inline u32 arm_smmu_lpae_tcr_tg(struct io_pgtable_cfg *cfg) > +{ > + u32 val; > + > + if (!(cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)) > + return FIELD_PREP(TCR_TG0, cfg->arm_lpae_s1_cfg.tcr.tg); > + > + val = FIELD_PREP(TCR_TG1, cfg->arm_lpae_s1_cfg.tcr.tg); > + > + if (cfg->arm_lpae_s1_cfg.tcr.tg == TG1_4K) > + val |= FIELD_PREP(TCR_TG0, TG0_4K); > + else if (cfg->arm_lpae_s1_cfg.tcr.tg == TG1_16K) > + val |= FIELD_PREP(TCR_TG0, TG0_16K); > + else > + val |= FIELD_PREP(TCR_TG0, TG0_64K); This looks like it's making assumptions about the order in which page-tables are installed, which I'd really like to avoid. See below. > static inline u32 arm_smmu_lpae_tcr(struct io_pgtable_cfg *cfg) > { > - return TCR_EPD1 | > - FIELD_PREP(TCR_TG0, cfg->arm_lpae_s1_cfg.tcr.tg) | > - FIELD_PREP(TCR_SH0, cfg->arm_lpae_s1_cfg.tcr.sh) | > - FIELD_PREP(TCR_ORGN0, cfg->arm_lpae_s1_cfg.tcr.orgn) | > - FIELD_PREP(TCR_IRGN0, cfg->arm_lpae_s1_cfg.tcr.irgn) | > - FIELD_PREP(TCR_T0SZ, cfg->arm_lpae_s1_cfg.tcr.tsz); > + u32 tcr = FIELD_PREP(TCR_SH0, cfg->arm_lpae_s1_cfg.tcr.sh) | > + FIELD_PREP(TCR_ORGN0, cfg->arm_lpae_s1_cfg.tcr.orgn) | > + FIELD_PREP(TCR_IRGN0, cfg->arm_lpae_s1_cfg.tcr.irgn) | > + FIELD_PREP(TCR_T0SZ, cfg->arm_lpae_s1_cfg.tcr.tsz); > + > + if (!(cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)) > + return tcr | TCR_EPD1 | arm_smmu_lpae_tcr_tg(cfg); This is interesting. If the intention is to have both TTBR0 and TTBR1 used concurrently by different domains, then we probably need to be a bit smarter about setting TCR_EPDx. Can we do something like start off with them both set, and then just clear the one we want when installing a page-table? Will
On Thu, Jan 09, 2020 at 02:33:34PM +0000, Will Deacon wrote: > On Mon, Dec 16, 2019 at 09:37:48AM -0700, Jordan Crouse wrote: > > Add support to enable split pagetables (TTBR1) if the supporting driver > > requests it via the DOMAIN_ATTR_SPLIT_TABLES flag. When enabled, the driver > > will set up the TTBR0 and TTBR1 regions and program the default domain > > pagetable on TTBR1. > > > > After attaching the device, the value of he domain attribute can > > be queried to see if the split pagetables were successfully programmed. > > Furthermore the domain geometry will be updated so that the caller can > > determine the active region for the pagetable that was programmed. > > > > Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org> > > --- > > > > drivers/iommu/arm-smmu.c | 40 +++++++++++++++++++++++++++++++++++----- > > drivers/iommu/arm-smmu.h | 45 +++++++++++++++++++++++++++++++++++++++------ > > 2 files changed, 74 insertions(+), 11 deletions(-) > > > > diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c > > index c106406..7b59116 100644 > > --- a/drivers/iommu/arm-smmu.c > > +++ b/drivers/iommu/arm-smmu.c > > @@ -538,9 +538,17 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, > > cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr; > > cb->ttbr[1] = 0; > > } else { > > - cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr; > > - cb->ttbr[0] |= FIELD_PREP(TTBRn_ASID, cfg->asid); > > - cb->ttbr[1] = FIELD_PREP(TTBRn_ASID, cfg->asid); > > + if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) { > > + cb->ttbr[0] = FIELD_PREP(TTBRn_ASID, cfg->asid); > > + cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr; > > + cb->ttbr[1] |= > > + FIELD_PREP(TTBRn_ASID, cfg->asid); > > + } else { > > + cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr; > > + cb->ttbr[0] |= > > + FIELD_PREP(TTBRn_ASID, cfg->asid); > > + cb->ttbr[1] = FIELD_PREP(TTBRn_ASID, cfg->asid); > > + } > > I still don't understand why you have to set the ASID in both of the TTBRs. > Assuming TCR.A1 is clear, then we should only need to set the field in > TTBR0. This is mostly out of a sense of symmetry with the non-split configuration. I'll clean it up. > > > } > > } else { > > cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr; > > @@ -651,6 +659,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, > > enum io_pgtable_fmt fmt; > > struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); > > struct arm_smmu_cfg *cfg = &smmu_domain->cfg; > > + u32 quirks = 0; > > > > mutex_lock(&smmu_domain->init_mutex); > > if (smmu_domain->smmu) > > @@ -719,6 +728,8 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, > > oas = smmu->ipa_size; > > if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) { > > fmt = ARM_64_LPAE_S1; > > + if (smmu_domain->split_pagetables) > > + quirks |= IO_PGTABLE_QUIRK_ARM_TTBR1; > > } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) { > > fmt = ARM_32_LPAE_S1; > > ias = min(ias, 32UL); > > @@ -788,6 +799,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, > > .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK, > > .tlb = smmu_domain->flush_ops, > > .iommu_dev = smmu->dev, > > + .quirks = quirks, > > }; > > > > if (smmu_domain->non_strict) > > @@ -801,8 +813,15 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, > > > > /* Update the domain's page sizes to reflect the page table format */ > > domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap; > > - domain->geometry.aperture_end = (1UL << ias) - 1; > > - domain->geometry.force_aperture = true; > > + > > + if (pgtbl_cfg.quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) { > > + domain->geometry.aperture_start = ~((1ULL << ias) - 1); > > + domain->geometry.aperture_end = ~0UL; > > + } else { > > + domain->geometry.aperture_end = (1UL << ias) - 1; > > + domain->geometry.force_aperture = true; > > + smmu_domain->split_pagetables = false; > > + } > > > > /* Initialise the context bank with our page table cfg */ > > arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg); > > @@ -1484,6 +1503,9 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain, > > case DOMAIN_ATTR_NESTING: > > *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED); > > return 0; > > + case DOMAIN_ATTR_SPLIT_TABLES: > > + *(int *)data = smmu_domain->split_pagetables; > > + return 0; > > default: > > return -ENODEV; > > } > > @@ -1524,6 +1546,14 @@ static int arm_smmu_domain_set_attr(struct iommu_domain *domain, > > else > > smmu_domain->stage = ARM_SMMU_DOMAIN_S1; > > break; > > + case DOMAIN_ATTR_SPLIT_TABLES: > > + if (smmu_domain->smmu) { > > + ret = -EPERM; > > + goto out_unlock; > > + } > > + if (*(int *)data) > > + smmu_domain->split_pagetables = true; > > + break; > > default: > > ret = -ENODEV; > > } > > diff --git a/drivers/iommu/arm-smmu.h b/drivers/iommu/arm-smmu.h > > index afab9de..68526cc 100644 > > --- a/drivers/iommu/arm-smmu.h > > +++ b/drivers/iommu/arm-smmu.h > > @@ -177,6 +177,16 @@ enum arm_smmu_cbar_type { > > #define TCR_IRGN0 GENMASK(9, 8) > > #define TCR_T0SZ GENMASK(5, 0) > > > > +#define TCR_TG1 GENMASK(31, 30) > > + > > +#define TG0_4K 0 > > +#define TG0_64K 1 > > +#define TG0_16K 2 > > + > > +#define TG1_16K 1 > > +#define TG1_4K 2 > > +#define TG1_64K 3 > > + > > #define ARM_SMMU_CB_CONTEXTIDR 0x34 > > #define ARM_SMMU_CB_S1_MAIR0 0x38 > > #define ARM_SMMU_CB_S1_MAIR1 0x3c > > @@ -329,16 +339,39 @@ struct arm_smmu_domain { > > struct mutex init_mutex; /* Protects smmu pointer */ > > spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */ > > struct iommu_domain domain; > > + bool split_pagetables; > > }; > > > > +static inline u32 arm_smmu_lpae_tcr_tg(struct io_pgtable_cfg *cfg) > > +{ > > + u32 val; > > + > > + if (!(cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)) > > + return FIELD_PREP(TCR_TG0, cfg->arm_lpae_s1_cfg.tcr.tg); > > + > > + val = FIELD_PREP(TCR_TG1, cfg->arm_lpae_s1_cfg.tcr.tg); > > + > > + if (cfg->arm_lpae_s1_cfg.tcr.tg == TG1_4K) > > + val |= FIELD_PREP(TCR_TG0, TG0_4K); > > + else if (cfg->arm_lpae_s1_cfg.tcr.tg == TG1_16K) > > + val |= FIELD_PREP(TCR_TG0, TG0_16K); > > + else > > + val |= FIELD_PREP(TCR_TG0, TG0_64K); > > This looks like it's making assumptions about the order in which page-tables > are installed, which I'd really like to avoid. See below. > > static inline u32 arm_smmu_lpae_tcr(struct io_pgtable_cfg *cfg) > > { > > - return TCR_EPD1 | > > - FIELD_PREP(TCR_TG0, cfg->arm_lpae_s1_cfg.tcr.tg) | > > - FIELD_PREP(TCR_SH0, cfg->arm_lpae_s1_cfg.tcr.sh) | > > - FIELD_PREP(TCR_ORGN0, cfg->arm_lpae_s1_cfg.tcr.orgn) | > > - FIELD_PREP(TCR_IRGN0, cfg->arm_lpae_s1_cfg.tcr.irgn) | > > - FIELD_PREP(TCR_T0SZ, cfg->arm_lpae_s1_cfg.tcr.tsz); > > + u32 tcr = FIELD_PREP(TCR_SH0, cfg->arm_lpae_s1_cfg.tcr.sh) | > > + FIELD_PREP(TCR_ORGN0, cfg->arm_lpae_s1_cfg.tcr.orgn) | > > + FIELD_PREP(TCR_IRGN0, cfg->arm_lpae_s1_cfg.tcr.irgn) | > > + FIELD_PREP(TCR_T0SZ, cfg->arm_lpae_s1_cfg.tcr.tsz); > > + > > + if (!(cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)) > > + return tcr | TCR_EPD1 | arm_smmu_lpae_tcr_tg(cfg); > > This is interesting. If the intention is to have both TTBR0 and TTBR1 > used concurrently by different domains, then we probably need to be a bit > smarter about setting TCR_EPDx. Can we do something like start off with them > both set, and then just clear the one we want when installing a page-table? My intention is that there should only be one domain that programs the hardware and installs the TTBR1 page-table. Under the proposed design [1] we used an aux domain that was basically a wrapper for a page-table and a domain attribute to return the physical address of the page-table to the GPU hardware which can program the TTBR0 register at runtime [2]. The object of this patch is that in split pagetable mode the "master" domain programs both the TTBR0 and TTBR1 sides of the TCR register but leaves the actual TTBR0 register empty for the GPU to manage. The GPU isn't currently set up to program register configuration outside of swapping the TTBR0 register and hitting the TIBALL bit which is part of an built in opcode and its not practical to add TCR configuration to the mix. I also feel pretty strongly that we should leave register configuration to the arm-smmu driver as much as possible so that was my motivation for doing this patch in this manner. Jordan [1] https://patchwork.freedesktop.org/patch/306117/ [2] https://patchwork.freedesktop.org/patch/307616/?series=57441&rev=3 > Will
On 16/12/2019 4:37 pm, Jordan Crouse wrote: > Add support to enable split pagetables (TTBR1) if the supporting driver > requests it via the DOMAIN_ATTR_SPLIT_TABLES flag. When enabled, the driver > will set up the TTBR0 and TTBR1 regions and program the default domain > pagetable on TTBR1. > > After attaching the device, the value of he domain attribute can > be queried to see if the split pagetables were successfully programmed. > Furthermore the domain geometry will be updated so that the caller can > determine the active region for the pagetable that was programmed. > > Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org> > --- > > drivers/iommu/arm-smmu.c | 40 +++++++++++++++++++++++++++++++++++----- > drivers/iommu/arm-smmu.h | 45 +++++++++++++++++++++++++++++++++++++++------ > 2 files changed, 74 insertions(+), 11 deletions(-) > > diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c > index c106406..7b59116 100644 > --- a/drivers/iommu/arm-smmu.c > +++ b/drivers/iommu/arm-smmu.c > @@ -538,9 +538,17 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, > cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr; > cb->ttbr[1] = 0; > } else { > - cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr; > - cb->ttbr[0] |= FIELD_PREP(TTBRn_ASID, cfg->asid); > - cb->ttbr[1] = FIELD_PREP(TTBRn_ASID, cfg->asid); > + if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) { > + cb->ttbr[0] = FIELD_PREP(TTBRn_ASID, cfg->asid); > + cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr; > + cb->ttbr[1] |= > + FIELD_PREP(TTBRn_ASID, cfg->asid); > + } else { > + cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr; > + cb->ttbr[0] |= > + FIELD_PREP(TTBRn_ASID, cfg->asid); > + cb->ttbr[1] = FIELD_PREP(TTBRn_ASID, cfg->asid); > + } > } > } else { > cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr; > @@ -651,6 +659,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, > enum io_pgtable_fmt fmt; > struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); > struct arm_smmu_cfg *cfg = &smmu_domain->cfg; > + u32 quirks = 0; > > mutex_lock(&smmu_domain->init_mutex); > if (smmu_domain->smmu) > @@ -719,6 +728,8 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, > oas = smmu->ipa_size; > if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) { > fmt = ARM_64_LPAE_S1; > + if (smmu_domain->split_pagetables) > + quirks |= IO_PGTABLE_QUIRK_ARM_TTBR1; > } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) { > fmt = ARM_32_LPAE_S1; > ias = min(ias, 32UL); > @@ -788,6 +799,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, > .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK, > .tlb = smmu_domain->flush_ops, > .iommu_dev = smmu->dev, > + .quirks = quirks, > }; > > if (smmu_domain->non_strict) > @@ -801,8 +813,15 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, > > /* Update the domain's page sizes to reflect the page table format */ > domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap; > - domain->geometry.aperture_end = (1UL << ias) - 1; > - domain->geometry.force_aperture = true; > + > + if (pgtbl_cfg.quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) { > + domain->geometry.aperture_start = ~((1ULL << ias) - 1); AKA "~0UL << ias", if I'm not mistaken ;) > + domain->geometry.aperture_end = ~0UL; > + } else { > + domain->geometry.aperture_end = (1UL << ias) - 1; > + domain->geometry.force_aperture = true; > + smmu_domain->split_pagetables = false; > + } > > /* Initialise the context bank with our page table cfg */ > arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg); > @@ -1484,6 +1503,9 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain, > case DOMAIN_ATTR_NESTING: > *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED); > return 0; > + case DOMAIN_ATTR_SPLIT_TABLES: > + *(int *)data = smmu_domain->split_pagetables; > + return 0; > default: > return -ENODEV; > } > @@ -1524,6 +1546,14 @@ static int arm_smmu_domain_set_attr(struct iommu_domain *domain, > else > smmu_domain->stage = ARM_SMMU_DOMAIN_S1; > break; > + case DOMAIN_ATTR_SPLIT_TABLES: > + if (smmu_domain->smmu) { > + ret = -EPERM; > + goto out_unlock; > + } > + if (*(int *)data) > + smmu_domain->split_pagetables = true; I still like the idea of passing the actual split point here, but as it is I think this sets the scene perfectly for coming back and doing that later. > + break; > default: > ret = -ENODEV; > } > diff --git a/drivers/iommu/arm-smmu.h b/drivers/iommu/arm-smmu.h > index afab9de..68526cc 100644 > --- a/drivers/iommu/arm-smmu.h > +++ b/drivers/iommu/arm-smmu.h > @@ -177,6 +177,16 @@ enum arm_smmu_cbar_type { > #define TCR_IRGN0 GENMASK(9, 8) > #define TCR_T0SZ GENMASK(5, 0) > > +#define TCR_TG1 GENMASK(31, 30) > + > +#define TG0_4K 0 > +#define TG0_64K 1 > +#define TG0_16K 2 > + > +#define TG1_16K 1 > +#define TG1_4K 2 > +#define TG1_64K 3 > + > #define ARM_SMMU_CB_CONTEXTIDR 0x34 > #define ARM_SMMU_CB_S1_MAIR0 0x38 > #define ARM_SMMU_CB_S1_MAIR1 0x3c > @@ -329,16 +339,39 @@ struct arm_smmu_domain { > struct mutex init_mutex; /* Protects smmu pointer */ > spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */ > struct iommu_domain domain; > + bool split_pagetables; > }; > > +static inline u32 arm_smmu_lpae_tcr_tg(struct io_pgtable_cfg *cfg) > +{ > + u32 val; > + > + if (!(cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)) > + return FIELD_PREP(TCR_TG0, cfg->arm_lpae_s1_cfg.tcr.tg); > + > + val = FIELD_PREP(TCR_TG1, cfg->arm_lpae_s1_cfg.tcr.tg); > + > + if (cfg->arm_lpae_s1_cfg.tcr.tg == TG1_4K) > + val |= FIELD_PREP(TCR_TG0, TG0_4K); > + else if (cfg->arm_lpae_s1_cfg.tcr.tg == TG1_16K) > + val |= FIELD_PREP(TCR_TG0, TG0_16K); > + else > + val |= FIELD_PREP(TCR_TG0, TG0_64K); > + > + return val; > +} This is all a bit ugly - I'd really like to rely on the real values from both io_pgtable instances if at all possible... > + > static inline u32 arm_smmu_lpae_tcr(struct io_pgtable_cfg *cfg) > { > - return TCR_EPD1 | > - FIELD_PREP(TCR_TG0, cfg->arm_lpae_s1_cfg.tcr.tg) | > - FIELD_PREP(TCR_SH0, cfg->arm_lpae_s1_cfg.tcr.sh) | > - FIELD_PREP(TCR_ORGN0, cfg->arm_lpae_s1_cfg.tcr.orgn) | > - FIELD_PREP(TCR_IRGN0, cfg->arm_lpae_s1_cfg.tcr.irgn) | > - FIELD_PREP(TCR_T0SZ, cfg->arm_lpae_s1_cfg.tcr.tsz); > + u32 tcr = FIELD_PREP(TCR_SH0, cfg->arm_lpae_s1_cfg.tcr.sh) | > + FIELD_PREP(TCR_ORGN0, cfg->arm_lpae_s1_cfg.tcr.orgn) | > + FIELD_PREP(TCR_IRGN0, cfg->arm_lpae_s1_cfg.tcr.irgn) | > + FIELD_PREP(TCR_T0SZ, cfg->arm_lpae_s1_cfg.tcr.tsz); > + > + if (!(cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)) > + return tcr | TCR_EPD1 | arm_smmu_lpae_tcr_tg(cfg); > + > + return tcr | (tcr << 16) | arm_smmu_lpae_tcr_tg(cfg); ...especially here - leaving TTBR0 enabled but pointing to who-knows-what until someone fills it in at some arbitrary point in the future seems rather scary. I'm looking at iommu_aux_attach_device() and friends, and it appears pretty achievable to hook that up in a workable manner, even if it's just routed straight through to the impl to only work within qcom-specific parameters to begin with. I figure the first aux_attach_dev sanity-checks that the main domain is using TTBR1 with a compatible split, sets TTBR0 and updates the merged TCR value at that point. For subsequent calls it shouldn't need to do much more than sanity-check that a new aux domain has the same parameters as the existing one(s) (and again, such checks could potentially even start out as just "this is OK by construction" comments). I guess we'd probably want a count of the number of 'live' aux domains so we can simply disable TTBR0 on the final aux_detach_dev without having to keep detailed track of whatever the GPU has actually context switched in the hardware. Can you see any holes in that idea? I haven't thought it through in detail, but it also feels like between aux_attach_dev and/or the TTBR1 quirk in attach_dev there ought to be enough information to influence the context bank allocation or shuffle any existing domains such that you can ensure that the right thing ends up in magic context 0 when it needs to be. That could be a pretty neat and robust way to finally put that to bed. Robin. > } > > static inline u32 arm_smmu_lpae_tcr2(struct io_pgtable_cfg *cfg) >
Oh, one more thing... On 16/12/2019 4:37 pm, Jordan Crouse wrote: [...] > @@ -651,6 +659,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, > enum io_pgtable_fmt fmt; > struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); > struct arm_smmu_cfg *cfg = &smmu_domain->cfg; > + u32 quirks = 0; > > mutex_lock(&smmu_domain->init_mutex); > if (smmu_domain->smmu) > @@ -719,6 +728,8 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, > oas = smmu->ipa_size; > if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) { > fmt = ARM_64_LPAE_S1; > + if (smmu_domain->split_pagetables) > + quirks |= IO_PGTABLE_QUIRK_ARM_TTBR1; To avoid me forgetting and questioning it again in future, I'd recommend sticking a comment somewhere near here that we don't reduce cfg->ias in this case because we're currently assuming SEP_UPSTREAM. Robin. > } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) { > fmt = ARM_32_LPAE_S1; > ias = min(ias, 32UL);
On Tue, Jan 21, 2020 at 02:36:19PM +0000, Robin Murphy wrote: > On 16/12/2019 4:37 pm, Jordan Crouse wrote: > >Add support to enable split pagetables (TTBR1) if the supporting driver > >requests it via the DOMAIN_ATTR_SPLIT_TABLES flag. When enabled, the driver > >will set up the TTBR0 and TTBR1 regions and program the default domain > >pagetable on TTBR1. > > > >After attaching the device, the value of he domain attribute can > >be queried to see if the split pagetables were successfully programmed. > >Furthermore the domain geometry will be updated so that the caller can > >determine the active region for the pagetable that was programmed. > > > >Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org> > >--- > > > > drivers/iommu/arm-smmu.c | 40 +++++++++++++++++++++++++++++++++++----- > > drivers/iommu/arm-smmu.h | 45 +++++++++++++++++++++++++++++++++++++++------ > > 2 files changed, 74 insertions(+), 11 deletions(-) > > > >diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c > >index c106406..7b59116 100644 > >--- a/drivers/iommu/arm-smmu.c > >+++ b/drivers/iommu/arm-smmu.c > >@@ -538,9 +538,17 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, > > cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr; > > cb->ttbr[1] = 0; > > } else { > >- cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr; > >- cb->ttbr[0] |= FIELD_PREP(TTBRn_ASID, cfg->asid); > >- cb->ttbr[1] = FIELD_PREP(TTBRn_ASID, cfg->asid); > >+ if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) { > >+ cb->ttbr[0] = FIELD_PREP(TTBRn_ASID, cfg->asid); > >+ cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr; > >+ cb->ttbr[1] |= > >+ FIELD_PREP(TTBRn_ASID, cfg->asid); > >+ } else { > >+ cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr; > >+ cb->ttbr[0] |= > >+ FIELD_PREP(TTBRn_ASID, cfg->asid); > >+ cb->ttbr[1] = FIELD_PREP(TTBRn_ASID, cfg->asid); > >+ } > > } > > } else { > > cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr; > >@@ -651,6 +659,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, > > enum io_pgtable_fmt fmt; > > struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); > > struct arm_smmu_cfg *cfg = &smmu_domain->cfg; > >+ u32 quirks = 0; > > mutex_lock(&smmu_domain->init_mutex); > > if (smmu_domain->smmu) > >@@ -719,6 +728,8 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, > > oas = smmu->ipa_size; > > if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) { > > fmt = ARM_64_LPAE_S1; > >+ if (smmu_domain->split_pagetables) > >+ quirks |= IO_PGTABLE_QUIRK_ARM_TTBR1; > > } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) { > > fmt = ARM_32_LPAE_S1; > > ias = min(ias, 32UL); > >@@ -788,6 +799,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, > > .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK, > > .tlb = smmu_domain->flush_ops, > > .iommu_dev = smmu->dev, > >+ .quirks = quirks, > > }; > > if (smmu_domain->non_strict) > >@@ -801,8 +813,15 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, > > /* Update the domain's page sizes to reflect the page table format */ > > domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap; > >- domain->geometry.aperture_end = (1UL << ias) - 1; > >- domain->geometry.force_aperture = true; > >+ > >+ if (pgtbl_cfg.quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) { > >+ domain->geometry.aperture_start = ~((1ULL << ias) - 1); > > AKA "~0UL << ias", if I'm not mistaken ;) > > >+ domain->geometry.aperture_end = ~0UL; > >+ } else { > >+ domain->geometry.aperture_end = (1UL << ias) - 1; > >+ domain->geometry.force_aperture = true; > >+ smmu_domain->split_pagetables = false; > >+ } > > /* Initialise the context bank with our page table cfg */ > > arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg); > >@@ -1484,6 +1503,9 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain, > > case DOMAIN_ATTR_NESTING: > > *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED); > > return 0; > >+ case DOMAIN_ATTR_SPLIT_TABLES: > >+ *(int *)data = smmu_domain->split_pagetables; > >+ return 0; > > default: > > return -ENODEV; > > } > >@@ -1524,6 +1546,14 @@ static int arm_smmu_domain_set_attr(struct iommu_domain *domain, > > else > > smmu_domain->stage = ARM_SMMU_DOMAIN_S1; > > break; > >+ case DOMAIN_ATTR_SPLIT_TABLES: > >+ if (smmu_domain->smmu) { > >+ ret = -EPERM; > >+ goto out_unlock; > >+ } > >+ if (*(int *)data) > >+ smmu_domain->split_pagetables = true; > > I still like the idea of passing the actual split point here, but as it is I > think this sets the scene perfectly for coming back and doing that later. > > >+ break; > > default: > > ret = -ENODEV; > > } > >diff --git a/drivers/iommu/arm-smmu.h b/drivers/iommu/arm-smmu.h > >index afab9de..68526cc 100644 > >--- a/drivers/iommu/arm-smmu.h > >+++ b/drivers/iommu/arm-smmu.h > >@@ -177,6 +177,16 @@ enum arm_smmu_cbar_type { > > #define TCR_IRGN0 GENMASK(9, 8) > > #define TCR_T0SZ GENMASK(5, 0) > >+#define TCR_TG1 GENMASK(31, 30) > >+ > >+#define TG0_4K 0 > >+#define TG0_64K 1 > >+#define TG0_16K 2 > >+ > >+#define TG1_16K 1 > >+#define TG1_4K 2 > >+#define TG1_64K 3 > >+ > > #define ARM_SMMU_CB_CONTEXTIDR 0x34 > > #define ARM_SMMU_CB_S1_MAIR0 0x38 > > #define ARM_SMMU_CB_S1_MAIR1 0x3c > >@@ -329,16 +339,39 @@ struct arm_smmu_domain { > > struct mutex init_mutex; /* Protects smmu pointer */ > > spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */ > > struct iommu_domain domain; > >+ bool split_pagetables; > > }; > >+static inline u32 arm_smmu_lpae_tcr_tg(struct io_pgtable_cfg *cfg) > >+{ > >+ u32 val; > >+ > >+ if (!(cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)) > >+ return FIELD_PREP(TCR_TG0, cfg->arm_lpae_s1_cfg.tcr.tg); > >+ > >+ val = FIELD_PREP(TCR_TG1, cfg->arm_lpae_s1_cfg.tcr.tg); > >+ > >+ if (cfg->arm_lpae_s1_cfg.tcr.tg == TG1_4K) > >+ val |= FIELD_PREP(TCR_TG0, TG0_4K); > >+ else if (cfg->arm_lpae_s1_cfg.tcr.tg == TG1_16K) > >+ val |= FIELD_PREP(TCR_TG0, TG0_16K); > >+ else > >+ val |= FIELD_PREP(TCR_TG0, TG0_64K); > >+ > >+ return val; > >+} > > This is all a bit ugly - I'd really like to rely on the real values from > both io_pgtable instances if at all possible... > > >+ > > static inline u32 arm_smmu_lpae_tcr(struct io_pgtable_cfg *cfg) > > { > >- return TCR_EPD1 | > >- FIELD_PREP(TCR_TG0, cfg->arm_lpae_s1_cfg.tcr.tg) | > >- FIELD_PREP(TCR_SH0, cfg->arm_lpae_s1_cfg.tcr.sh) | > >- FIELD_PREP(TCR_ORGN0, cfg->arm_lpae_s1_cfg.tcr.orgn) | > >- FIELD_PREP(TCR_IRGN0, cfg->arm_lpae_s1_cfg.tcr.irgn) | > >- FIELD_PREP(TCR_T0SZ, cfg->arm_lpae_s1_cfg.tcr.tsz); > >+ u32 tcr = FIELD_PREP(TCR_SH0, cfg->arm_lpae_s1_cfg.tcr.sh) | > >+ FIELD_PREP(TCR_ORGN0, cfg->arm_lpae_s1_cfg.tcr.orgn) | > >+ FIELD_PREP(TCR_IRGN0, cfg->arm_lpae_s1_cfg.tcr.irgn) | > >+ FIELD_PREP(TCR_T0SZ, cfg->arm_lpae_s1_cfg.tcr.tsz); > >+ > >+ if (!(cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)) > >+ return tcr | TCR_EPD1 | arm_smmu_lpae_tcr_tg(cfg); > >+ > >+ return tcr | (tcr << 16) | arm_smmu_lpae_tcr_tg(cfg); > > ...especially here - leaving TTBR0 enabled but pointing to who-knows-what > until someone fills it in at some arbitrary point in the future seems rather > scary. > > I'm looking at iommu_aux_attach_device() and friends, and it appears pretty > achievable to hook that up in a workable manner, even if it's just routed > straight through to the impl to only work within qcom-specific parameters to > begin with. I figure the first aux_attach_dev sanity-checks that the main > domain is using TTBR1 with a compatible split, sets TTBR0 and updates the > merged TCR value at that point. For subsequent calls it shouldn't need to do > much more than sanity-check that a new aux domain has the same parameters as > the existing one(s) (and again, such checks could potentially even start out > as just "this is OK by construction" comments). I guess we'd probably want a > count of the number of 'live' aux domains so we can simply disable TTBR0 on > the final aux_detach_dev without having to keep detailed track of whatever > the GPU has actually context switched in the hardware. Can you see any holes > in that idea? Let me repeat this back just to be sure we're on the same page. When the quirk is enabled on the primary domain, we'll set up TTBR1 and leave TTBR0 disabled. Then, when the first aux domain is attached we will set up that io_ptgable to enable TTBR0 and then let the GPU do what the GPU does until the last aux is detached and we can switch off TTBR0 again. I like this. I'll have to do a bit more exploration because the original aux design assumed that we didn't need to touch the hardware and I'm not sure if there are any resource contention issues between the primary domain and the aux domain. Luckily, these should be solvable if they exist (and the original design didn't take into account the TLB flush problem so this was likely something we had to do anyway). > I haven't thought it through in detail, but it also feels like between > aux_attach_dev and/or the TTBR1 quirk in attach_dev there ought to be enough > information to influence the context bank allocation or shuffle any existing > domains such that you can ensure that the right thing ends up in magic > context 0 when it needs to be. That could be a pretty neat and robust way to > finally put that to bed. I'll try to wrap my brain around this as well. Seems like we could do a magic swizzle of the SID mappings but I'm not sure how we could safely pull that off on an existing domain. Maybe I'm overthinking it. I'll spin up a new copy of the TTBR1 quirk patch and revive the aux domain stuff and then we can go from there. Thanks, Jordan > Robin. > > > } > > static inline u32 arm_smmu_lpae_tcr2(struct io_pgtable_cfg *cfg) > >
On 21/01/2020 5:11 pm, Jordan Crouse wrote: [...] >> I'm looking at iommu_aux_attach_device() and friends, and it appears pretty >> achievable to hook that up in a workable manner, even if it's just routed >> straight through to the impl to only work within qcom-specific parameters to >> begin with. I figure the first aux_attach_dev sanity-checks that the main >> domain is using TTBR1 with a compatible split, sets TTBR0 and updates the >> merged TCR value at that point. For subsequent calls it shouldn't need to do >> much more than sanity-check that a new aux domain has the same parameters as >> the existing one(s) (and again, such checks could potentially even start out >> as just "this is OK by construction" comments). I guess we'd probably want a >> count of the number of 'live' aux domains so we can simply disable TTBR0 on >> the final aux_detach_dev without having to keep detailed track of whatever >> the GPU has actually context switched in the hardware. Can you see any holes >> in that idea? > > Let me repeat this back just to be sure we're on the same page. When the quirk > is enabled on the primary domain, we'll set up TTBR1 and leave TTBR0 disabled. > Then, when the first aux domain is attached we will set up that io_ptgable > to enable TTBR0 and then let the GPU do what the GPU does until the last aux is > detached and we can switch off TTBR0 again. > > I like this. I'll have to do a bit more exploration because the original aux > design assumed that we didn't need to touch the hardware and I'm not sure if > there are any resource contention issues between the primary domain and the aux > domain. Luckily, these should be solvable if they exist (and the original design > didn't take into account the TLB flush problem so this was likely something we > had to do anyway). Yeah, sounds like you've got it (somehow I'd completely forgotten that you'd already prototyped the aux domain part, and I only re-read the cover letter after sending that review...). TBH it's not massively different, just being a bit more honest about the intermediate hardware state. As long as we can rely on all aux domains being equivalent and the GPU never writing nonsense to TTBR0, then all arm-smmu really wants to care about is whether there's *something* live or not at any given time, so attach (with quirk) does: TTBR1 = primary_domain->ttbr TCR = primary_domain->tcr | EPD0 then attach_aux comes along and adds: TTBR0 = aux_domain->ttbr TCR = primary_doman->tcr | aux_domain->tcr such that arm-smmu can be happy that TTBR0 is always pointing at *some* valid pagetable from that point on regardless of what subsequently happens underneath, and nobody need touch TCR until the party's completely over. >> I haven't thought it through in detail, but it also feels like between >> aux_attach_dev and/or the TTBR1 quirk in attach_dev there ought to be enough >> information to influence the context bank allocation or shuffle any existing >> domains such that you can ensure that the right thing ends up in magic >> context 0 when it needs to be. That could be a pretty neat and robust way to >> finally put that to bed. > > I'll try to wrap my brain around this as well. Seems like we could do a magic > swizzle of the SID mappings but I'm not sure how we could safely pull that off > on an existing domain. Maybe I'm overthinking it. What I'm imagining isn't all that far from how we do normal domain attach, except instead of setting up the newly-allocated context for a new domain you simply clone the existing context into it, and instead of having a given device's set of Stream IDs to retarget you'd just scan though the S2CRs checking cbndx and rewriting as appropriate. Then finally rewrite domain->cfg.cbndx and the old context is all yours. > I'll spin up a new copy of the TTBR1 quirk patch and revive the aux domain stuff > and then we can go from there. Sounds good, thanks! Robin.
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index c106406..7b59116 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -538,9 +538,17 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr; cb->ttbr[1] = 0; } else { - cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr; - cb->ttbr[0] |= FIELD_PREP(TTBRn_ASID, cfg->asid); - cb->ttbr[1] = FIELD_PREP(TTBRn_ASID, cfg->asid); + if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) { + cb->ttbr[0] = FIELD_PREP(TTBRn_ASID, cfg->asid); + cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr; + cb->ttbr[1] |= + FIELD_PREP(TTBRn_ASID, cfg->asid); + } else { + cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr; + cb->ttbr[0] |= + FIELD_PREP(TTBRn_ASID, cfg->asid); + cb->ttbr[1] = FIELD_PREP(TTBRn_ASID, cfg->asid); + } } } else { cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr; @@ -651,6 +659,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, enum io_pgtable_fmt fmt; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_cfg *cfg = &smmu_domain->cfg; + u32 quirks = 0; mutex_lock(&smmu_domain->init_mutex); if (smmu_domain->smmu) @@ -719,6 +728,8 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, oas = smmu->ipa_size; if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) { fmt = ARM_64_LPAE_S1; + if (smmu_domain->split_pagetables) + quirks |= IO_PGTABLE_QUIRK_ARM_TTBR1; } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) { fmt = ARM_32_LPAE_S1; ias = min(ias, 32UL); @@ -788,6 +799,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK, .tlb = smmu_domain->flush_ops, .iommu_dev = smmu->dev, + .quirks = quirks, }; if (smmu_domain->non_strict) @@ -801,8 +813,15 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, /* Update the domain's page sizes to reflect the page table format */ domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap; - domain->geometry.aperture_end = (1UL << ias) - 1; - domain->geometry.force_aperture = true; + + if (pgtbl_cfg.quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) { + domain->geometry.aperture_start = ~((1ULL << ias) - 1); + domain->geometry.aperture_end = ~0UL; + } else { + domain->geometry.aperture_end = (1UL << ias) - 1; + domain->geometry.force_aperture = true; + smmu_domain->split_pagetables = false; + } /* Initialise the context bank with our page table cfg */ arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg); @@ -1484,6 +1503,9 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain, case DOMAIN_ATTR_NESTING: *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED); return 0; + case DOMAIN_ATTR_SPLIT_TABLES: + *(int *)data = smmu_domain->split_pagetables; + return 0; default: return -ENODEV; } @@ -1524,6 +1546,14 @@ static int arm_smmu_domain_set_attr(struct iommu_domain *domain, else smmu_domain->stage = ARM_SMMU_DOMAIN_S1; break; + case DOMAIN_ATTR_SPLIT_TABLES: + if (smmu_domain->smmu) { + ret = -EPERM; + goto out_unlock; + } + if (*(int *)data) + smmu_domain->split_pagetables = true; + break; default: ret = -ENODEV; } diff --git a/drivers/iommu/arm-smmu.h b/drivers/iommu/arm-smmu.h index afab9de..68526cc 100644 --- a/drivers/iommu/arm-smmu.h +++ b/drivers/iommu/arm-smmu.h @@ -177,6 +177,16 @@ enum arm_smmu_cbar_type { #define TCR_IRGN0 GENMASK(9, 8) #define TCR_T0SZ GENMASK(5, 0) +#define TCR_TG1 GENMASK(31, 30) + +#define TG0_4K 0 +#define TG0_64K 1 +#define TG0_16K 2 + +#define TG1_16K 1 +#define TG1_4K 2 +#define TG1_64K 3 + #define ARM_SMMU_CB_CONTEXTIDR 0x34 #define ARM_SMMU_CB_S1_MAIR0 0x38 #define ARM_SMMU_CB_S1_MAIR1 0x3c @@ -329,16 +339,39 @@ struct arm_smmu_domain { struct mutex init_mutex; /* Protects smmu pointer */ spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */ struct iommu_domain domain; + bool split_pagetables; }; +static inline u32 arm_smmu_lpae_tcr_tg(struct io_pgtable_cfg *cfg) +{ + u32 val; + + if (!(cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)) + return FIELD_PREP(TCR_TG0, cfg->arm_lpae_s1_cfg.tcr.tg); + + val = FIELD_PREP(TCR_TG1, cfg->arm_lpae_s1_cfg.tcr.tg); + + if (cfg->arm_lpae_s1_cfg.tcr.tg == TG1_4K) + val |= FIELD_PREP(TCR_TG0, TG0_4K); + else if (cfg->arm_lpae_s1_cfg.tcr.tg == TG1_16K) + val |= FIELD_PREP(TCR_TG0, TG0_16K); + else + val |= FIELD_PREP(TCR_TG0, TG0_64K); + + return val; +} + static inline u32 arm_smmu_lpae_tcr(struct io_pgtable_cfg *cfg) { - return TCR_EPD1 | - FIELD_PREP(TCR_TG0, cfg->arm_lpae_s1_cfg.tcr.tg) | - FIELD_PREP(TCR_SH0, cfg->arm_lpae_s1_cfg.tcr.sh) | - FIELD_PREP(TCR_ORGN0, cfg->arm_lpae_s1_cfg.tcr.orgn) | - FIELD_PREP(TCR_IRGN0, cfg->arm_lpae_s1_cfg.tcr.irgn) | - FIELD_PREP(TCR_T0SZ, cfg->arm_lpae_s1_cfg.tcr.tsz); + u32 tcr = FIELD_PREP(TCR_SH0, cfg->arm_lpae_s1_cfg.tcr.sh) | + FIELD_PREP(TCR_ORGN0, cfg->arm_lpae_s1_cfg.tcr.orgn) | + FIELD_PREP(TCR_IRGN0, cfg->arm_lpae_s1_cfg.tcr.irgn) | + FIELD_PREP(TCR_T0SZ, cfg->arm_lpae_s1_cfg.tcr.tsz); + + if (!(cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)) + return tcr | TCR_EPD1 | arm_smmu_lpae_tcr_tg(cfg); + + return tcr | (tcr << 16) | arm_smmu_lpae_tcr_tg(cfg); } static inline u32 arm_smmu_lpae_tcr2(struct io_pgtable_cfg *cfg)
Add support to enable split pagetables (TTBR1) if the supporting driver requests it via the DOMAIN_ATTR_SPLIT_TABLES flag. When enabled, the driver will set up the TTBR0 and TTBR1 regions and program the default domain pagetable on TTBR1. After attaching the device, the value of he domain attribute can be queried to see if the split pagetables were successfully programmed. Furthermore the domain geometry will be updated so that the caller can determine the active region for the pagetable that was programmed. Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org> --- drivers/iommu/arm-smmu.c | 40 +++++++++++++++++++++++++++++++++++----- drivers/iommu/arm-smmu.h | 45 +++++++++++++++++++++++++++++++++++++++------ 2 files changed, 74 insertions(+), 11 deletions(-)