Message ID | 33a49ca158509c95d50b0d3f9cba03bba2facdf3.1565369764.git.robin.murphy@arm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Arm SMMU refactoring | expand |
On Fri, Aug 09, 2019 at 06:07:42PM +0100, Robin Murphy wrote: > Since we now use separate iommu_gather_ops for stage 1 and stage 2 > contexts, we may as well divide up the monolithic callback into its > respective stage 1 and stage 2 parts. > > Signed-off-by: Robin Murphy <robin.murphy@arm.com> > --- > drivers/iommu/arm-smmu.c | 66 ++++++++++++++++++++++------------------ > 1 file changed, 37 insertions(+), 29 deletions(-) This will conflict with my iommu API batching stuff, but I can sort that out if/when it gets queued by Joerg. > - if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) { > - iova &= ~12UL; > - iova |= cfg->asid; > - do { > - writel_relaxed(iova, reg); > - iova += granule; > - } while (size -= granule); > - } else { > - iova >>= 12; > - iova |= (u64)cfg->asid << 48; > - do { > - writeq_relaxed(iova, reg); > - iova += granule >> 12; > - } while (size -= granule); > - } > - } else { > - reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L : > - ARM_SMMU_CB_S2_TLBIIPAS2; > - iova >>= 12; > + if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) { > + iova &= ~12UL; Oh baby. You should move code around more often, so I'm forced to take a second look! Can you cook a fix for this that we can route separately, please? I see it also made its way into qcom_iommu.c... Will
On 15/08/2019 11:56, Will Deacon wrote: > On Fri, Aug 09, 2019 at 06:07:42PM +0100, Robin Murphy wrote: >> Since we now use separate iommu_gather_ops for stage 1 and stage 2 >> contexts, we may as well divide up the monolithic callback into its >> respective stage 1 and stage 2 parts. >> >> Signed-off-by: Robin Murphy <robin.murphy@arm.com> >> --- >> drivers/iommu/arm-smmu.c | 66 ++++++++++++++++++++++------------------ >> 1 file changed, 37 insertions(+), 29 deletions(-) > > This will conflict with my iommu API batching stuff, but I can sort that > out if/when it gets queued by Joerg. > >> - if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) { >> - iova &= ~12UL; >> - iova |= cfg->asid; >> - do { >> - writel_relaxed(iova, reg); >> - iova += granule; >> - } while (size -= granule); >> - } else { >> - iova >>= 12; >> - iova |= (u64)cfg->asid << 48; >> - do { >> - writeq_relaxed(iova, reg); >> - iova += granule >> 12; >> - } while (size -= granule); >> - } >> - } else { >> - reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L : >> - ARM_SMMU_CB_S2_TLBIIPAS2; >> - iova >>= 12; >> + if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) { >> + iova &= ~12UL; > > Oh baby. You should move code around more often, so I'm forced to take a > second look! Oh dear lord... The worst part is that I do now remember seeing this and having a similar moment of disbelief, but apparently I was easily distracted with rebasing and forgot about it too quickly :( > Can you cook a fix for this that we can route separately, please? I see > it also made its way into qcom_iommu.c... Sure, I'll split it out to the front of the series for the moment. Robin.
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 463bc8d98adb..a681e000e704 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -490,46 +490,54 @@ static void arm_smmu_tlb_inv_context_s2(void *cookie) arm_smmu_tlb_sync_global(smmu); } -static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, - size_t granule, bool leaf, void *cookie) +static void arm_smmu_tlb_inv_range_s1(unsigned long iova, size_t size, + size_t granule, bool leaf, void *cookie) { struct arm_smmu_domain *smmu_domain = cookie; + struct arm_smmu_device *smmu = smmu_domain->smmu; struct arm_smmu_cfg *cfg = &smmu_domain->cfg; - bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS; - void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx); + void __iomem *reg = ARM_SMMU_CB(smmu, cfg->cbndx); - if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) + if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) wmb(); - if (stage1) { - reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA; + reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA; - if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) { - iova &= ~12UL; - iova |= cfg->asid; - do { - writel_relaxed(iova, reg); - iova += granule; - } while (size -= granule); - } else { - iova >>= 12; - iova |= (u64)cfg->asid << 48; - do { - writeq_relaxed(iova, reg); - iova += granule >> 12; - } while (size -= granule); - } - } else { - reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L : - ARM_SMMU_CB_S2_TLBIIPAS2; - iova >>= 12; + if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) { + iova &= ~12UL; + iova |= cfg->asid; do { - smmu_write_atomic_lq(iova, reg); + writel_relaxed(iova, reg); + iova += granule; + } while (size -= granule); + } else { + iova >>= 12; + iova |= (u64)cfg->asid << 48; + do { + writeq_relaxed(iova, reg); iova += granule >> 12; } while (size -= granule); } } +static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size, + size_t granule, bool leaf, void *cookie) +{ + struct arm_smmu_domain *smmu_domain = cookie; + struct arm_smmu_device *smmu = smmu_domain->smmu; + void __iomem *reg = ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx); + + if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) + wmb(); + + reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L : ARM_SMMU_CB_S2_TLBIIPAS2; + iova >>= 12; + do { + smmu_write_atomic_lq(iova, reg); + iova += granule >> 12; + } while (size -= granule); +} + /* * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears * almost negligible, but the benefit of getting the first one in as far ahead @@ -550,13 +558,13 @@ static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size, static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = { .tlb_flush_all = arm_smmu_tlb_inv_context_s1, - .tlb_add_flush = arm_smmu_tlb_inv_range_nosync, + .tlb_add_flush = arm_smmu_tlb_inv_range_s1, .tlb_sync = arm_smmu_tlb_sync_context, }; static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = { .tlb_flush_all = arm_smmu_tlb_inv_context_s2, - .tlb_add_flush = arm_smmu_tlb_inv_range_nosync, + .tlb_add_flush = arm_smmu_tlb_inv_range_s2, .tlb_sync = arm_smmu_tlb_sync_context, };
Since we now use separate iommu_gather_ops for stage 1 and stage 2 contexts, we may as well divide up the monolithic callback into its respective stage 1 and stage 2 parts. Signed-off-by: Robin Murphy <robin.murphy@arm.com> --- drivers/iommu/arm-smmu.c | 66 ++++++++++++++++++++++------------------ 1 file changed, 37 insertions(+), 29 deletions(-)