Message ID | 24-v5-9a37e0c884ce+31e3-smmuv3_newapi_p2_jgg@nvidia.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Update SMMUv3 to the modern iommu API (part 2/3) | expand |
On Tue, Mar 5, 2024 at 7:44 AM Jason Gunthorpe <jgg@nvidia.com> wrote: > > BTM support is a feature where the CPU TLB invalidation can be forwarded > to the IOMMU and also invalidate the IOTLB. For this to work the CPU and > IOMMU ASID must be the same. > > Retain the prior SVA design here of keeping the ASID allocator for the > IOMMU private to SMMU and force SVA domains to set an ASID that matches > the CPU ASID. > > This requires changing the ASID assigned to a S1 domain if it happens to > be overlapping with the required CPU ASID. We hold on to the CPU ASID so > long as the SVA iommu_domain exists, so SVA domain conflict is not > possible. > > With the asid per-smmu we no longer have a problem that two per-smmu > iommu_domain's would need to share a CPU ASID entry in the IOMMU's xarray. > > Use the same ASID move algorithm for the S1 domains as before with some > streamlining around how the xarray is being used. Do not synchronize the > ASID's if BTM mode is not supported. Just leave BTM features off > everywhere. > > Audit all the places that touch cd->asid and think carefully about how the > locking works with the change to the cd->asid by the move algorithm. Use > xarray internal locking during xa_alloc() instead of double locking. Add a > note that concurrent S1 invalidation doesn't fully work. > > Note that this is all still dead code, ARM_SMMU_FEAT_BTM is never set. Clearly a lot of work/thought has gone into this patch, but I have to wonder if this complexity is worth carrying forward for a feature that was never enabled... Is moving this to part 3 or dropping it all together an option? > > Tested-by: Nicolin Chen <nicolinc@nvidia.com> > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> > --- > .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 133 ++++++++++++++++-- > drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 15 +- > drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 2 +- > 3 files changed, 129 insertions(+), 21 deletions(-) > > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c > index 3acd699433b7d8..0b5aeaa3a85575 100644 > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c > @@ -15,12 +15,33 @@ > > static DEFINE_MUTEX(sva_lock); > > -static void __maybe_unused > -arm_smmu_update_s1_domain_cd_entry(struct arm_smmu_domain *smmu_domain) > +static int arm_smmu_realloc_s1_domain_asid(struct arm_smmu_device *smmu, > + struct arm_smmu_domain *smmu_domain) > { > struct arm_smmu_master_domain *master_domain; > + u32 old_asid = smmu_domain->cd.asid; > struct arm_smmu_cd target_cd; > unsigned long flags; > + int ret; > + > + lockdep_assert_held(&smmu->asid_lock); > + > + /* > + * FIXME: The unmap and invalidation path doesn't take any locks but > + * this is not fully safe. Since updating the CD tables is not atomic > + * there is always a hole where invalidating only one ASID of two active > + * ASIDs during unmap will cause the IOTLB to become stale. > + * > + * This approach is to hopefully shift the racing CPUs to the new ASID > + * before we start programming the HW. This increases the chance that > + * racing IOPTE changes will pick up an invalidation for the new ASID > + * and we achieve eventual consistency. For the brief period where the > + * old ASID is still in the CD entries it will become incoherent. > + */ > + ret = xa_alloc(&smmu->asid_map, &smmu_domain->cd.asid, smmu_domain, > + XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL); > + if (ret) > + return ret; > > spin_lock_irqsave(&smmu_domain->devices_lock, flags); > list_for_each_entry(master_domain, &smmu_domain->devices, devices_elm) { > @@ -36,6 +57,10 @@ arm_smmu_update_s1_domain_cd_entry(struct arm_smmu_domain *smmu_domain) > &target_cd); > } > spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); > + > + /* Clean the ASID we are about to assign to a new translation */ > + arm_smmu_tlb_inv_asid(smmu, old_asid); > + return 0; > } > > static u64 page_size_to_cd(void) > @@ -148,12 +173,12 @@ static void arm_smmu_mm_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn, > } > > if (!smmu_domain->btm_invalidation) { > + ioasid_t asid = READ_ONCE(smmu_domain->cd.asid); > + > if (!size) > - arm_smmu_tlb_inv_asid(smmu_domain->smmu, > - smmu_domain->cd.asid); > + arm_smmu_tlb_inv_asid(smmu_domain->smmu, asid); > else > - arm_smmu_tlb_inv_range_asid(start, size, > - smmu_domain->cd.asid, > + arm_smmu_tlb_inv_range_asid(start, size, asid, > PAGE_SIZE, false, > smmu_domain); > } > @@ -182,6 +207,8 @@ static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) > cdptr = arm_smmu_get_cd_ptr(master, master_domain->ssid); > if (WARN_ON(!cdptr)) > continue; > + > + /* An SVA ASID never changes, no asid_lock required */ > arm_smmu_make_sva_cd(&target, master, NULL, > smmu_domain->cd.asid, > smmu_domain->btm_invalidation); > @@ -388,6 +415,8 @@ static void arm_smmu_sva_domain_free(struct iommu_domain *domain) > * unnecessary invalidation. > */ > arm_smmu_domain_free_id(smmu_domain); > + if (smmu_domain->btm_invalidation) > + arm64_mm_context_put(domain->mm); > > /* > * Actual free is defered to the SRCU callback > @@ -401,13 +430,97 @@ static const struct iommu_domain_ops arm_smmu_sva_domain_ops = { > .free = arm_smmu_sva_domain_free > }; > > +static int arm_smmu_share_asid(struct arm_smmu_device *smmu, > + struct arm_smmu_domain *smmu_domain, > + struct mm_struct *mm) > +{ > + struct arm_smmu_domain *old_s1_domain; > + int ret; > + > + /* > + * Notice that BTM is never currently enabled, this is all dead code. > + * The specification cautions: > + * > + * Note: Arm expects that SMMU stage 2 address spaces are generally > + * shared with their respective PE virtual machine stage 2 > + * configuration. If broadcast invalidation is required to be avoided > + * for a particular SMMU stage 2 address space, Arm recommends that a > + * hypervisor configures the STE with a VMID that is not allocated for > + * virtual machine use on the PEs > + * > + * However, in Linux, both KVM and SMMU think they own the VMID pool. > + * Unfortunately the ARM design is problematic for Linux as we do not > + * currently share the S2 table with KVM. This creates a situation where > + * the S2 needs to have the same VMID as KVM in order to allow the guest > + * to use BTM, however we must still invalidate the S2 directly since it > + * is a different radix tree. What Linux would like is something like > + * ASET for the STE to disable BTM only for the S2. > + * > + * Arguably in a system with BTM the driver should prefer to use a S1 > + * table in all cases execpt when explicitly asked to create a nesting > + * parent. Then it should use the VMID of KVM to enable BTM in the > + * guest. We cannot optimize away the resulting double invalidation of > + * the S2 :( Or we simply ignore BTM entirely as we are doing now. > + */ > + if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_BTM)) > + return xa_alloc(&smmu->asid_map, &smmu_domain->cd.asid, > + smmu_domain, > + XA_LIMIT(1, (1 << smmu->asid_bits) - 1), > + GFP_KERNEL); > + > + /* At this point the caller ensures we have a mmget() */ > + smmu_domain->cd.asid = arm64_mm_context_get(mm); > + > + mutex_lock(&smmu->asid_lock); > + old_s1_domain = xa_store(&smmu->asid_map, smmu_domain->cd.asid, > + smmu_domain, GFP_KERNEL); > + if (xa_err(old_s1_domain)) { > + ret = xa_err(old_s1_domain); > + goto out_put_asid; > + } > + > + /* > + * In BTM mode the CPU ASID and the IOMMU ASID have to be the same. > + * Unfortunately we run separate allocators for this and the IOMMU > + * ASID can already have been assigned to a S1 domain. SVA domains > + * always align to their CPU ASIDs. In this case we change > + * the S1 domain's ASID, update the CD entry and flush the caches. > + * > + * This is a bit tricky, all the places writing to a S1 CD, reading the > + * S1 ASID, or doing xa_erase must hold the asid_lock or xa_lock to > + * avoid IOTLB incoherence. > + */ > + if (old_s1_domain) { > + if (WARN_ON(old_s1_domain->domain.type == IOMMU_DOMAIN_SVA)) { > + ret = -EINVAL; > + goto out_restore_s1; > + } > + ret = arm_smmu_realloc_s1_domain_asid(smmu, old_s1_domain); > + if (ret) > + goto out_restore_s1; > + } > + > + smmu_domain->btm_invalidation = true; > + > + ret = 0; > + goto out_unlock; > + > +out_restore_s1: > + xa_store(&smmu->asid_map, smmu_domain->cd.asid, old_s1_domain, > + GFP_KERNEL); > +out_put_asid: > + arm64_mm_context_put(mm); > +out_unlock: > + mutex_unlock(&smmu->asid_lock); > + return ret; > +} > + > struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev, > struct mm_struct *mm) > { > struct arm_smmu_master *master = dev_iommu_priv_get(dev); > struct arm_smmu_device *smmu = master->smmu; > struct arm_smmu_domain *smmu_domain; > - u32 asid; > int ret; > > smmu_domain = arm_smmu_domain_alloc(); > @@ -418,12 +531,10 @@ struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev, > smmu_domain->domain.ops = &arm_smmu_sva_domain_ops; > smmu_domain->smmu = smmu; > > - ret = xa_alloc(&smmu->asid_map, &asid, smmu_domain, > - XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL); > + ret = arm_smmu_share_asid(smmu, smmu_domain, mm); > if (ret) > goto err_free; > > - smmu_domain->cd.asid = asid; > smmu_domain->mmu_notifier.ops = &arm_smmu_mmu_notifier_ops; > ret = mmu_notifier_register(&smmu_domain->mmu_notifier, mm); > if (ret) > @@ -433,6 +544,8 @@ struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev, > > err_asid: > arm_smmu_domain_free_id(smmu_domain); > + if (smmu_domain->btm_invalidation) > + arm64_mm_context_put(mm); > err_free: > kfree(smmu_domain); > return ERR_PTR(ret); > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > index 1a72dd63e0ca14..01737a0fc82828 100644 > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > @@ -1314,6 +1314,8 @@ void arm_smmu_make_s1_cd(struct arm_smmu_cd *target, > typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = > &pgtbl_cfg->arm_lpae_s1_cfg.tcr; > > + lockdep_assert_held(&master->smmu->asid_lock); > + > memset(target, 0, sizeof(*target)); > > target->data[0] = cpu_to_le64( > @@ -2089,7 +2091,7 @@ static void arm_smmu_tlb_inv_context(void *cookie) > * careful, 007. > */ > if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { > - arm_smmu_tlb_inv_asid(smmu, smmu_domain->cd.asid); > + arm_smmu_tlb_inv_asid(smmu, READ_ONCE(smmu_domain->cd.asid)); > } else { > cmd.opcode = CMDQ_OP_TLBI_S12_VMALL; > cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid; > @@ -2334,17 +2336,10 @@ static void arm_smmu_domain_free(struct iommu_domain *domain) > static int arm_smmu_domain_finalise_s1(struct arm_smmu_device *smmu, > struct arm_smmu_domain *smmu_domain) > { > - int ret; > - u32 asid; > struct arm_smmu_ctx_desc *cd = &smmu_domain->cd; > > - /* Prevent SVA from modifying the ASID until it is written to the CD */ > - mutex_lock(&smmu->asid_lock); > - ret = xa_alloc(&smmu->asid_map, &asid, smmu_domain, > - XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL); > - cd->asid = (u16)asid; > - mutex_unlock(&smmu->asid_lock); > - return ret; > + return xa_alloc(&smmu->asid_map, &cd->asid, smmu_domain, > + XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL); > } > > static int arm_smmu_domain_finalise_s2(struct arm_smmu_device *smmu, > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h > index 6becdbae905598..a23593f1830106 100644 > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h > @@ -586,7 +586,7 @@ struct arm_smmu_strtab_l1_desc { > }; > > struct arm_smmu_ctx_desc { > - u16 asid; > + u32 asid; > }; > > struct arm_smmu_l1_ctx_desc { > -- > 2.43.2 >
On Wed, Mar 20, 2024 at 01:07:01AM +0800, Michael Shavit wrote: > On Tue, Mar 5, 2024 at 7:44 AM Jason Gunthorpe <jgg@nvidia.com> wrote: > > > > BTM support is a feature where the CPU TLB invalidation can be forwarded > > to the IOMMU and also invalidate the IOTLB. For this to work the CPU and > > IOMMU ASID must be the same. > > > > Retain the prior SVA design here of keeping the ASID allocator for the > > IOMMU private to SMMU and force SVA domains to set an ASID that matches > > the CPU ASID. > > > > This requires changing the ASID assigned to a S1 domain if it happens to > > be overlapping with the required CPU ASID. We hold on to the CPU ASID so > > long as the SVA iommu_domain exists, so SVA domain conflict is not > > possible. > > > > With the asid per-smmu we no longer have a problem that two per-smmu > > iommu_domain's would need to share a CPU ASID entry in the IOMMU's xarray. > > > > Use the same ASID move algorithm for the S1 domains as before with some > > streamlining around how the xarray is being used. Do not synchronize the > > ASID's if BTM mode is not supported. Just leave BTM features off > > everywhere. > > > > Audit all the places that touch cd->asid and think carefully about how the > > locking works with the change to the cd->asid by the move algorithm. Use > > xarray internal locking during xa_alloc() instead of double locking. Add a > > note that concurrent S1 invalidation doesn't fully work. > > > > Note that this is all still dead code, ARM_SMMU_FEAT_BTM is never set. > > Clearly a lot of work/thought has gone into this patch, but I have to > wonder if this complexity is worth carrying forward for a feature that > was never enabled... Is moving this to part 3 or dropping it all > together an option? Yes, we can do any of those. If we drop it here then it will be moved to this series: https://lore.kernel.org/linux-iommu/20240208151837.35068-1-shameerali.kolothum.thodi@huawei.com/ Which is the final bit of work to actually enable the feature. We are getting quite close, it needs some of Nicolin's iommufd enablement work to cross the finish line, I think. I agree dead code like this should not be in the kernel.. Will? Please advise Jason
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index 3acd699433b7d8..0b5aeaa3a85575 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -15,12 +15,33 @@ static DEFINE_MUTEX(sva_lock); -static void __maybe_unused -arm_smmu_update_s1_domain_cd_entry(struct arm_smmu_domain *smmu_domain) +static int arm_smmu_realloc_s1_domain_asid(struct arm_smmu_device *smmu, + struct arm_smmu_domain *smmu_domain) { struct arm_smmu_master_domain *master_domain; + u32 old_asid = smmu_domain->cd.asid; struct arm_smmu_cd target_cd; unsigned long flags; + int ret; + + lockdep_assert_held(&smmu->asid_lock); + + /* + * FIXME: The unmap and invalidation path doesn't take any locks but + * this is not fully safe. Since updating the CD tables is not atomic + * there is always a hole where invalidating only one ASID of two active + * ASIDs during unmap will cause the IOTLB to become stale. + * + * This approach is to hopefully shift the racing CPUs to the new ASID + * before we start programming the HW. This increases the chance that + * racing IOPTE changes will pick up an invalidation for the new ASID + * and we achieve eventual consistency. For the brief period where the + * old ASID is still in the CD entries it will become incoherent. + */ + ret = xa_alloc(&smmu->asid_map, &smmu_domain->cd.asid, smmu_domain, + XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL); + if (ret) + return ret; spin_lock_irqsave(&smmu_domain->devices_lock, flags); list_for_each_entry(master_domain, &smmu_domain->devices, devices_elm) { @@ -36,6 +57,10 @@ arm_smmu_update_s1_domain_cd_entry(struct arm_smmu_domain *smmu_domain) &target_cd); } spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); + + /* Clean the ASID we are about to assign to a new translation */ + arm_smmu_tlb_inv_asid(smmu, old_asid); + return 0; } static u64 page_size_to_cd(void) @@ -148,12 +173,12 @@ static void arm_smmu_mm_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn, } if (!smmu_domain->btm_invalidation) { + ioasid_t asid = READ_ONCE(smmu_domain->cd.asid); + if (!size) - arm_smmu_tlb_inv_asid(smmu_domain->smmu, - smmu_domain->cd.asid); + arm_smmu_tlb_inv_asid(smmu_domain->smmu, asid); else - arm_smmu_tlb_inv_range_asid(start, size, - smmu_domain->cd.asid, + arm_smmu_tlb_inv_range_asid(start, size, asid, PAGE_SIZE, false, smmu_domain); } @@ -182,6 +207,8 @@ static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) cdptr = arm_smmu_get_cd_ptr(master, master_domain->ssid); if (WARN_ON(!cdptr)) continue; + + /* An SVA ASID never changes, no asid_lock required */ arm_smmu_make_sva_cd(&target, master, NULL, smmu_domain->cd.asid, smmu_domain->btm_invalidation); @@ -388,6 +415,8 @@ static void arm_smmu_sva_domain_free(struct iommu_domain *domain) * unnecessary invalidation. */ arm_smmu_domain_free_id(smmu_domain); + if (smmu_domain->btm_invalidation) + arm64_mm_context_put(domain->mm); /* * Actual free is defered to the SRCU callback @@ -401,13 +430,97 @@ static const struct iommu_domain_ops arm_smmu_sva_domain_ops = { .free = arm_smmu_sva_domain_free }; +static int arm_smmu_share_asid(struct arm_smmu_device *smmu, + struct arm_smmu_domain *smmu_domain, + struct mm_struct *mm) +{ + struct arm_smmu_domain *old_s1_domain; + int ret; + + /* + * Notice that BTM is never currently enabled, this is all dead code. + * The specification cautions: + * + * Note: Arm expects that SMMU stage 2 address spaces are generally + * shared with their respective PE virtual machine stage 2 + * configuration. If broadcast invalidation is required to be avoided + * for a particular SMMU stage 2 address space, Arm recommends that a + * hypervisor configures the STE with a VMID that is not allocated for + * virtual machine use on the PEs + * + * However, in Linux, both KVM and SMMU think they own the VMID pool. + * Unfortunately the ARM design is problematic for Linux as we do not + * currently share the S2 table with KVM. This creates a situation where + * the S2 needs to have the same VMID as KVM in order to allow the guest + * to use BTM, however we must still invalidate the S2 directly since it + * is a different radix tree. What Linux would like is something like + * ASET for the STE to disable BTM only for the S2. + * + * Arguably in a system with BTM the driver should prefer to use a S1 + * table in all cases execpt when explicitly asked to create a nesting + * parent. Then it should use the VMID of KVM to enable BTM in the + * guest. We cannot optimize away the resulting double invalidation of + * the S2 :( Or we simply ignore BTM entirely as we are doing now. + */ + if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_BTM)) + return xa_alloc(&smmu->asid_map, &smmu_domain->cd.asid, + smmu_domain, + XA_LIMIT(1, (1 << smmu->asid_bits) - 1), + GFP_KERNEL); + + /* At this point the caller ensures we have a mmget() */ + smmu_domain->cd.asid = arm64_mm_context_get(mm); + + mutex_lock(&smmu->asid_lock); + old_s1_domain = xa_store(&smmu->asid_map, smmu_domain->cd.asid, + smmu_domain, GFP_KERNEL); + if (xa_err(old_s1_domain)) { + ret = xa_err(old_s1_domain); + goto out_put_asid; + } + + /* + * In BTM mode the CPU ASID and the IOMMU ASID have to be the same. + * Unfortunately we run separate allocators for this and the IOMMU + * ASID can already have been assigned to a S1 domain. SVA domains + * always align to their CPU ASIDs. In this case we change + * the S1 domain's ASID, update the CD entry and flush the caches. + * + * This is a bit tricky, all the places writing to a S1 CD, reading the + * S1 ASID, or doing xa_erase must hold the asid_lock or xa_lock to + * avoid IOTLB incoherence. + */ + if (old_s1_domain) { + if (WARN_ON(old_s1_domain->domain.type == IOMMU_DOMAIN_SVA)) { + ret = -EINVAL; + goto out_restore_s1; + } + ret = arm_smmu_realloc_s1_domain_asid(smmu, old_s1_domain); + if (ret) + goto out_restore_s1; + } + + smmu_domain->btm_invalidation = true; + + ret = 0; + goto out_unlock; + +out_restore_s1: + xa_store(&smmu->asid_map, smmu_domain->cd.asid, old_s1_domain, + GFP_KERNEL); +out_put_asid: + arm64_mm_context_put(mm); +out_unlock: + mutex_unlock(&smmu->asid_lock); + return ret; +} + struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev, struct mm_struct *mm) { struct arm_smmu_master *master = dev_iommu_priv_get(dev); struct arm_smmu_device *smmu = master->smmu; struct arm_smmu_domain *smmu_domain; - u32 asid; int ret; smmu_domain = arm_smmu_domain_alloc(); @@ -418,12 +531,10 @@ struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev, smmu_domain->domain.ops = &arm_smmu_sva_domain_ops; smmu_domain->smmu = smmu; - ret = xa_alloc(&smmu->asid_map, &asid, smmu_domain, - XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL); + ret = arm_smmu_share_asid(smmu, smmu_domain, mm); if (ret) goto err_free; - smmu_domain->cd.asid = asid; smmu_domain->mmu_notifier.ops = &arm_smmu_mmu_notifier_ops; ret = mmu_notifier_register(&smmu_domain->mmu_notifier, mm); if (ret) @@ -433,6 +544,8 @@ struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev, err_asid: arm_smmu_domain_free_id(smmu_domain); + if (smmu_domain->btm_invalidation) + arm64_mm_context_put(mm); err_free: kfree(smmu_domain); return ERR_PTR(ret); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 1a72dd63e0ca14..01737a0fc82828 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1314,6 +1314,8 @@ void arm_smmu_make_s1_cd(struct arm_smmu_cd *target, typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr; + lockdep_assert_held(&master->smmu->asid_lock); + memset(target, 0, sizeof(*target)); target->data[0] = cpu_to_le64( @@ -2089,7 +2091,7 @@ static void arm_smmu_tlb_inv_context(void *cookie) * careful, 007. */ if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { - arm_smmu_tlb_inv_asid(smmu, smmu_domain->cd.asid); + arm_smmu_tlb_inv_asid(smmu, READ_ONCE(smmu_domain->cd.asid)); } else { cmd.opcode = CMDQ_OP_TLBI_S12_VMALL; cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid; @@ -2334,17 +2336,10 @@ static void arm_smmu_domain_free(struct iommu_domain *domain) static int arm_smmu_domain_finalise_s1(struct arm_smmu_device *smmu, struct arm_smmu_domain *smmu_domain) { - int ret; - u32 asid; struct arm_smmu_ctx_desc *cd = &smmu_domain->cd; - /* Prevent SVA from modifying the ASID until it is written to the CD */ - mutex_lock(&smmu->asid_lock); - ret = xa_alloc(&smmu->asid_map, &asid, smmu_domain, - XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL); - cd->asid = (u16)asid; - mutex_unlock(&smmu->asid_lock); - return ret; + return xa_alloc(&smmu->asid_map, &cd->asid, smmu_domain, + XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL); } static int arm_smmu_domain_finalise_s2(struct arm_smmu_device *smmu, diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 6becdbae905598..a23593f1830106 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -586,7 +586,7 @@ struct arm_smmu_strtab_l1_desc { }; struct arm_smmu_ctx_desc { - u16 asid; + u32 asid; }; struct arm_smmu_l1_ctx_desc {