Message ID | 20230809011204.v5.5.I219054a6cf538df5bb22f4ada2d9933155d6058c@changeid (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Refactor the SMMU's CD table ownership | expand |
On Wed, Aug 09, 2023 at 01:12:01AM +0800, Michael Shavit wrote: > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c > index 968559d625c40..e3992a0c16377 100644 > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c > @@ -37,6 +37,24 @@ struct arm_smmu_bond { > > static DEFINE_MUTEX(sva_lock); > > +static int arm_smmu_write_ctx_desc_devices(struct arm_smmu_domain *smmu_domain, > + int ssid, > + struct arm_smmu_ctx_desc *cd) > +{ > + struct arm_smmu_master *master; > + unsigned long flags; > + int ret; > + > + spin_lock_irqsave(&smmu_domain->devices_lock, flags); > + list_for_each_entry(master, &smmu_domain->devices, domain_head) { > + ret = arm_smmu_write_ctx_desc(master, ssid, cd); > + if (ret) > + break; > + } > + spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); > + return ret; > +} > + > /* > * Check if the CPU ASID is available on the SMMU side. If a private context > * descriptor is using it, try to replace it. > @@ -80,7 +98,7 @@ arm_smmu_share_asid(struct mm_struct *mm, u16 asid) > * be some overlap between use of both ASIDs, until we invalidate the > * TLB. > */ > - arm_smmu_write_ctx_desc(smmu_domain, 0, cd); > + arm_smmu_write_ctx_desc_devices(smmu_domain, 0, cd); > > /* Invalidate TLB entries previously associated with that context */ > arm_smmu_tlb_inv_asid(smmu, asid); > @@ -222,7 +240,7 @@ static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) > * DMA may still be running. Keep the cd valid to avoid C_BAD_CD events, > * but disable translation. > */ > - arm_smmu_write_ctx_desc(smmu_domain, mm->pasid, &quiet_cd); > + arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, &quiet_cd); > > arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_mn->cd->asid); > arm_smmu_atc_inv_domain(smmu_domain, mm->pasid, 0, 0); > @@ -279,9 +297,11 @@ arm_smmu_mmu_notifier_get(struct arm_smmu_domain *smmu_domain, > goto err_free_cd; > } > > - ret = arm_smmu_write_ctx_desc(smmu_domain, mm->pasid, cd); > - if (ret) > + ret = arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, cd); > + if (ret) { > + arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, NULL); Why is it safe to drop the lock between these two calls? > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > index c01023404c26c..34bd7815aeb8e 100644 > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > @@ -971,14 +971,12 @@ void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid) > arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); > } > > -static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain, > +static void arm_smmu_sync_cd(struct arm_smmu_master *master, > int ssid, bool leaf) > { > size_t i; > - unsigned long flags; > - struct arm_smmu_master *master; > struct arm_smmu_cmdq_batch cmds; > - struct arm_smmu_device *smmu = smmu_domain->smmu; > + struct arm_smmu_device *smmu = master->smmu; > struct arm_smmu_cmdq_ent cmd = { > .opcode = CMDQ_OP_CFGI_CD, > .cfgi = { > @@ -988,15 +986,10 @@ static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain, > }; > > cmds.num = 0; > - > - spin_lock_irqsave(&smmu_domain->devices_lock, flags); Since you're dropping this and relying on the lock being taken higher up callstack, can we add a lockdep assertion that we do actually hold the devices_lock, please? Will
On Wed, Aug 9, 2023 at 9:50 PM Will Deacon <will@kernel.org> wrote: > > > - ret = arm_smmu_write_ctx_desc(smmu_domain, mm->pasid, cd); > > - if (ret) > > + ret = arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, cd); > > + if (ret) { > > + arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, NULL); > > Why is it safe to drop the lock between these two calls? Hmmm this is a tricky question. Tracing through the SVA flow, it seems like there's a scenario where multiple masters (with the same upstream SMMU device) can be attached to the same primary/non-sva domain, in which case calling iommu_attach_device_pasid on one device will write the CD entry for both masters. This is still the case even with this patch series, and changing this behavior will be the subject of a separate follow-up. This is weird, especially since the second master need not even have the sva_enabled bit set. This also means that the list of attached masters can indeed change between these two calls if that second master (not the one used on the iommu_attach_device_pasid call leading to this code) is detached/attached at the same time. It's hard for me to reason about whether this is safe or not, since this is already weird behavior... > > Since you're dropping this and relying on the lock being taken higher up > callstack, can we add a lockdep assertion that we do actually hold the > devices_lock, please? Will do!
On Thu, Aug 10, 2023 at 05:15:50PM +0800, Michael Shavit wrote: > On Wed, Aug 9, 2023 at 9:50 PM Will Deacon <will@kernel.org> wrote: > > > > > - ret = arm_smmu_write_ctx_desc(smmu_domain, mm->pasid, cd); > > > - if (ret) > > > + ret = arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, cd); > > > + if (ret) { > > > + arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, NULL); > > > > Why is it safe to drop the lock between these two calls? > > Hmmm this is a tricky question. > Tracing through the SVA flow, it seems like there's a scenario where > multiple masters (with the same upstream SMMU device) can be attached > to the same primary/non-sva domain, in which case calling > iommu_attach_device_pasid on one device will write the CD entry for > both masters. This is still the case even with this patch series, and > changing this behavior will be the subject of a separate follow-up. > This is weird, especially since the second master need not even have > the sva_enabled bit set. This also means that the list of attached > masters can indeed change between these two calls if that second > master (not the one used on the iommu_attach_device_pasid call leading > to this code) is detached/attached at the same time. It's hard for me > to reason about whether this is safe or not, since this is already > weird behavior... I really think the writing of the context descriptors should look atomic; dropping the lock half way through a failed update and then coming back to NULL them out definitely isn't correct. So I think you've probably pushed the locking too far down the stack. Will
On Thu, Aug 10, 2023 at 03:40:52PM +0100, Will Deacon wrote: > On Thu, Aug 10, 2023 at 05:15:50PM +0800, Michael Shavit wrote: > > On Wed, Aug 9, 2023 at 9:50 PM Will Deacon <will@kernel.org> wrote: > > > > > > > - ret = arm_smmu_write_ctx_desc(smmu_domain, mm->pasid, cd); > > > > - if (ret) > > > > + ret = arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, cd); > > > > + if (ret) { > > > > + arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, NULL); > > > > > > Why is it safe to drop the lock between these two calls? > > > > Hmmm this is a tricky question. > > Tracing through the SVA flow, it seems like there's a scenario where > > multiple masters (with the same upstream SMMU device) can be attached > > to the same primary/non-sva domain, in which case calling > > iommu_attach_device_pasid on one device will write the CD entry for > > both masters. This is still the case even with this patch series, and > > changing this behavior will be the subject of a separate follow-up. > > This is weird, especially since the second master need not even have > > the sva_enabled bit set. This also means that the list of attached > > masters can indeed change between these two calls if that second > > master (not the one used on the iommu_attach_device_pasid call leading > > to this code) is detached/attached at the same time. It's hard for me > > to reason about whether this is safe or not, since this is already > > weird behavior... > > I really think the writing of the context descriptors should look atomic; > dropping the lock half way through a failed update and then coming back > to NULL them out definitely isn't correct. So I think you've probably pushed > the locking too far down the stack. Urk, the issue is that progressive refactorings have left this kind of wrong. Basically we always have a singular master we are supposed to be installing the SVA domain into a PASID for, we just need to load the CD table entry into that master's existing CD table. Actually, I don't think this even works as nothing on the PASID path adds to the list that arm_smmu_write_ctx_desc_devices() iterates over ?? Then the remaining two calls: arm_smmu_share_asid(struct mm_struct *mm, u16 asid) arm_smmu_write_ctx_desc_devices(smmu_domain, 0, cd); This is OK only if the sketchy assumption that the CD we extracted for a conflicting ASID is not asigned to a PASID. static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, &quiet_cd); This doesn't work because we didn't add the master to the list during __arm_smmu_sva_bind and this path is expressly working on the PASID binds, not the RID binds. This is all far too complicated. We really need to get this series: https://lore.kernel.org/linux-iommu/20230808074944.7825-1-tina.zhang@intel.com/ And rip out all this crazy code in the drivers trying to de-duplicate the SVA domains. The core code will do it, the driver can assume it has exactly one SVA domain per mm and do sane and simple things. :( Maybe for now we just accept that quiet_cd doesn't work, it is a minor issue and your next series fixes it, right? Anyhow, something like this will fix what Will pointed to, probably as an additional prep patch: diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index e3992a0c163779..8e751ba91e810a 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -297,12 +297,6 @@ arm_smmu_mmu_notifier_get(struct arm_smmu_domain *smmu_domain, goto err_free_cd; } - ret = arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, cd); - if (ret) { - arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, NULL); - goto err_put_notifier; - } - list_add(&smmu_mn->list, &smmu_domain->mmu_notifiers); return smmu_mn; @@ -325,8 +319,6 @@ static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn) list_del(&smmu_mn->list); - arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, NULL); - /* * If we went through clear(), we've already invalidated, and no * new TLB entry can have been formed. @@ -342,7 +334,7 @@ static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn) } static struct iommu_sva * -__arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm) +__arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, ioasid_t ssid) { int ret; struct arm_smmu_bond *bond; @@ -375,9 +367,15 @@ __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm) goto err_free_bond; } + ret = arm_smmu_write_ctx_desc(master, ssid, bond->smmu_mn->cd); + if (ret) + goto err_put_notifier; + list_add(&bond->list, &master->bonds); return &bond->sva; +err_put_notifier: + arm_smmu_mmu_notifier_put(bond->smmu_mn); err_free_bond: kfree(bond); return ERR_PTR(ret); @@ -548,6 +546,7 @@ void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain, if (!WARN_ON(!bond) && refcount_dec_and_test(&bond->refs)) { list_del(&bond->list); + arm_smmu_write_ctx_desc(master, id, NULL); arm_smmu_mmu_notifier_put(bond->smmu_mn); kfree(bond); } @@ -562,7 +561,7 @@ static int arm_smmu_sva_set_dev_pasid(struct iommu_domain *domain, struct mm_struct *mm = domain->mm; mutex_lock(&sva_lock); - handle = __arm_smmu_sva_bind(dev, mm); + handle = __arm_smmu_sva_bind(dev, mm, id); if (IS_ERR(handle)) ret = PTR_ERR(handle); mutex_unlock(&sva_lock);
On Thu, Aug 10, 2023 at 5:15 PM Michael Shavit <mshavit@google.com> wrote: > On Wed, Aug 9, 2023 at 9:50 PM Will Deacon <will@kernel.org> wrote: > > > > Since you're dropping this and relying on the lock being taken higher up > > callstack, can we add a lockdep assertion that we do actually hold the > > devices_lock, please? > > Will do! I spoke too soon; the point of this change was to remove the dependency on the arm_smmu_domain, piping the devices_lock would defeat this. In fact, this section is really depending on the iommu group lock not the devices_lock.
On Thu, Aug 10, 2023 at 11:39 PM Jason Gunthorpe <jgg@nvidia.com> wrote: > > Actually, I don't think this even works as nothing on the PASID path > adds to the list that arm_smmu_write_ctx_desc_devices() iterates over ?? > > Then the remaining two calls: > > arm_smmu_share_asid(struct mm_struct *mm, u16 asid) > arm_smmu_write_ctx_desc_devices(smmu_domain, 0, cd); > > This is OK only if the sketchy assumption that the CD > we extracted for a conflicting ASID is not asigned to a PASID. > > static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) > arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, &quiet_cd); > > This doesn't work because we didn't add the master to the list > during __arm_smmu_sva_bind and this path is expressly working > on the PASID binds, not the RID binds. Actually it is working on the RID attached domain (as returned by iommu_get_domain_for_dev() at sva_bind time) not the SVA domain here... The arm SVA implementation completely dismisses the SVA handle (I also have a patch to fix this ;) . Need to find the time to polish and send out).
On Tue, Aug 15, 2023 at 01:04:43PM +0800, Michael Shavit wrote: > On Thu, Aug 10, 2023 at 5:15 PM Michael Shavit <mshavit@google.com> wrote: > > On Wed, Aug 9, 2023 at 9:50 PM Will Deacon <will@kernel.org> wrote: > > > > > > Since you're dropping this and relying on the lock being taken higher up > > > callstack, can we add a lockdep assertion that we do actually hold the > > > devices_lock, please? > > > > Will do! > > I spoke too soon; the point of this change was to remove the > dependency on the arm_smmu_domain, piping the devices_lock would > defeat this. In fact, this section is really depending on the iommu > group lock not the devices_lock. Sorry, but I'm not following you here. What is the devices_lock protecting if we're depending on the group lock? Will
On Tue, Aug 15, 2023 at 01:20:04PM +0800, Michael Shavit wrote: > On Thu, Aug 10, 2023 at 11:39 PM Jason Gunthorpe <jgg@nvidia.com> wrote: > > > > Actually, I don't think this even works as nothing on the PASID path > > adds to the list that arm_smmu_write_ctx_desc_devices() iterates over ?? > > > > Then the remaining two calls: > > > > arm_smmu_share_asid(struct mm_struct *mm, u16 asid) > > arm_smmu_write_ctx_desc_devices(smmu_domain, 0, cd); > > > > This is OK only if the sketchy assumption that the CD > > we extracted for a conflicting ASID is not asigned to a PASID. > > > > static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) > > arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, &quiet_cd); > > > > This doesn't work because we didn't add the master to the list > > during __arm_smmu_sva_bind and this path is expressly working > > on the PASID binds, not the RID binds. > > Actually it is working on the RID attached domain (as returned by > iommu_get_domain_for_dev() at sva_bind time) not the SVA domain > here... That can't be right, the purpose of that call and arm_smmu_mm_release is to disable the PASID that is about the UAF the mm's page table. Jason
On Tue, Aug 15, 2023 at 6:19 PM Will Deacon <will@kernel.org> wrote: > > On Tue, Aug 15, 2023 at 01:04:43PM +0800, Michael Shavit wrote: > > On Thu, Aug 10, 2023 at 5:15 PM Michael Shavit <mshavit@google.com> wrote: > > > On Wed, Aug 9, 2023 at 9:50 PM Will Deacon <will@kernel.org> wrote: > > > > > > > > Since you're dropping this and relying on the lock being taken higher up > > > > callstack, can we add a lockdep assertion that we do actually hold the > > > > devices_lock, please? > > > > > > Will do! > > > > I spoke too soon; the point of this change was to remove the > > dependency on the arm_smmu_domain, piping the devices_lock would > > defeat this. In fact, this section is really depending on the iommu > > group lock not the devices_lock. > > Sorry, but I'm not following you here. What is the devices_lock protecting > if we're depending on the group lock? > > Will Ugh, yes sorry, we do need the devices_lock held *in the SVA* call flow. The group lock is ineffective there because SVA performs cross-master operations on iommu_domain callbacks. There, the devices_lock is needed to ensure that the list of masters that a domain is attached to doesn't mutate while we operate on that domain. Nonetheless, it feels awkward to require the devices_lock to be held outside SVA, since there we operate on a single master/CD table rather. We don't care what other masters that domain is attached to.
On Tue, Aug 15, 2023 at 7:38 PM Jason Gunthorpe <jgg@nvidia.com> wrote: > > On Tue, Aug 15, 2023 at 01:20:04PM +0800, Michael Shavit wrote: > > On Thu, Aug 10, 2023 at 11:39 PM Jason Gunthorpe <jgg@nvidia.com> wrote: > > > > > > Actually, I don't think this even works as nothing on the PASID path > > > adds to the list that arm_smmu_write_ctx_desc_devices() iterates over ?? > > > > > > Then the remaining two calls: > > > > > > arm_smmu_share_asid(struct mm_struct *mm, u16 asid) > > > arm_smmu_write_ctx_desc_devices(smmu_domain, 0, cd); > > > > > > This is OK only if the sketchy assumption that the CD > > > we extracted for a conflicting ASID is not asigned to a PASID. > > > > > > static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) > > > arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, &quiet_cd); > > > > > > This doesn't work because we didn't add the master to the list > > > during __arm_smmu_sva_bind and this path is expressly working > > > on the PASID binds, not the RID binds. > > > > Actually it is working on the RID attached domain (as returned by > > iommu_get_domain_for_dev() at sva_bind time) not the SVA domain > > here... > > That can't be right, the purpose of that call and arm_smmu_mm_release is to > disable the PASID that is about the UAF the mm's page table. > > Jason For the sake of this message, let's call "primary domain" whatever RID domain was attached to a master at the time set_dev_pasid() was called on that master. That RID domain is locked in while SVA is enabled and cannot be detached. The arm-smmu-v3-sva.c implementation creates a mapping between an SVA domain and this primary domain (through the sva domain's mm). In arm_smmu_mm_release, the primary domain is looked up and arm_smmu_write_ctx_desc() is called on all masters that this domain is attached to.
On Tue, Aug 15, 2023 at 08:03:40PM +0800, Michael Shavit wrote: > On Tue, Aug 15, 2023 at 7:38 PM Jason Gunthorpe <jgg@nvidia.com> wrote: > > > > On Tue, Aug 15, 2023 at 01:20:04PM +0800, Michael Shavit wrote: > > > On Thu, Aug 10, 2023 at 11:39 PM Jason Gunthorpe <jgg@nvidia.com> wrote: > > > > > > > > Actually, I don't think this even works as nothing on the PASID path > > > > adds to the list that arm_smmu_write_ctx_desc_devices() iterates over ?? > > > > > > > > Then the remaining two calls: > > > > > > > > arm_smmu_share_asid(struct mm_struct *mm, u16 asid) > > > > arm_smmu_write_ctx_desc_devices(smmu_domain, 0, cd); > > > > > > > > This is OK only if the sketchy assumption that the CD > > > > we extracted for a conflicting ASID is not asigned to a PASID. > > > > > > > > static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) > > > > arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, &quiet_cd); > > > > > > > > This doesn't work because we didn't add the master to the list > > > > during __arm_smmu_sva_bind and this path is expressly working > > > > on the PASID binds, not the RID binds. > > > > > > Actually it is working on the RID attached domain (as returned by > > > iommu_get_domain_for_dev() at sva_bind time) not the SVA domain > > > here... > > > > That can't be right, the purpose of that call and arm_smmu_mm_release is to > > disable the PASID that is about the UAF the mm's page table. > > > > Jason > > For the sake of this message, let's call "primary domain" whatever RID > domain was attached to a master at the time set_dev_pasid() was called > on that master. That RID domain is locked in while SVA is enabled and > cannot be detached. > > The arm-smmu-v3-sva.c implementation creates a mapping between an SVA > domain and this primary domain (through the sva domain's mm). In > arm_smmu_mm_release, the primary domain is looked up and > arm_smmu_write_ctx_desc() is called on all masters that this domain is > attached to. My question is still the same - how does arm_smmu_mm_release update the Contex descriptor table entry for the *PASID* The RID on PASID 0 hasn't change and doesn't need updating. Jason
On Tue, Aug 15, 2023 at 8:30 PM Jason Gunthorpe <jgg@nvidia.com> wrote: > > On Tue, Aug 15, 2023 at 08:03:40PM +0800, Michael Shavit wrote: > > On Tue, Aug 15, 2023 at 7:38 PM Jason Gunthorpe <jgg@nvidia.com> wrote: > > > > > > On Tue, Aug 15, 2023 at 01:20:04PM +0800, Michael Shavit wrote: > > > > On Thu, Aug 10, 2023 at 11:39 PM Jason Gunthorpe <jgg@nvidia.com> wrote: > > > > > > > > > > Actually, I don't think this even works as nothing on the PASID path > > > > > adds to the list that arm_smmu_write_ctx_desc_devices() iterates over ?? > > > > > > > > > > Then the remaining two calls: > > > > > > > > > > arm_smmu_share_asid(struct mm_struct *mm, u16 asid) > > > > > arm_smmu_write_ctx_desc_devices(smmu_domain, 0, cd); > > > > > > > > > > This is OK only if the sketchy assumption that the CD > > > > > we extracted for a conflicting ASID is not asigned to a PASID. > > > > > > > > > > static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) > > > > > arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, &quiet_cd); > > > > > > > > > > This doesn't work because we didn't add the master to the list > > > > > during __arm_smmu_sva_bind and this path is expressly working > > > > > on the PASID binds, not the RID binds. > > > > > > > > Actually it is working on the RID attached domain (as returned by > > > > iommu_get_domain_for_dev() at sva_bind time) not the SVA domain > > > > here... > > > > > > That can't be right, the purpose of that call and arm_smmu_mm_release is to > > > disable the PASID that is about the UAF the mm's page table. > > > > > > Jason > > > > For the sake of this message, let's call "primary domain" whatever RID > > domain was attached to a master at the time set_dev_pasid() was called > > on that master. That RID domain is locked in while SVA is enabled and > > cannot be detached. > > > > The arm-smmu-v3-sva.c implementation creates a mapping between an SVA > > domain and this primary domain (through the sva domain's mm). In > > arm_smmu_mm_release, the primary domain is looked up and > > arm_smmu_write_ctx_desc() is called on all masters that this domain is > > attached to. > > My question is still the same - how does arm_smmu_mm_release update the > Contex descriptor table entry for the *PASID* > > The RID on PASID 0 hasn't change and doesn't need updating. > > Jason arm_smmu_mm_release looks-up the CD table(s) to write using the primary domain's device list, and finds the index into those CD table(s) to write to using mm->pasid.
On Tue, Aug 15, 2023 at 08:36:55PM +0800, Michael Shavit wrote: > On Tue, Aug 15, 2023 at 8:30 PM Jason Gunthorpe <jgg@nvidia.com> wrote: > > > > On Tue, Aug 15, 2023 at 08:03:40PM +0800, Michael Shavit wrote: > > > On Tue, Aug 15, 2023 at 7:38 PM Jason Gunthorpe <jgg@nvidia.com> wrote: > > > > > > > > On Tue, Aug 15, 2023 at 01:20:04PM +0800, Michael Shavit wrote: > > > > > On Thu, Aug 10, 2023 at 11:39 PM Jason Gunthorpe <jgg@nvidia.com> wrote: > > > > > > > > > > > > Actually, I don't think this even works as nothing on the PASID path > > > > > > adds to the list that arm_smmu_write_ctx_desc_devices() iterates over ?? > > > > > > > > > > > > Then the remaining two calls: > > > > > > > > > > > > arm_smmu_share_asid(struct mm_struct *mm, u16 asid) > > > > > > arm_smmu_write_ctx_desc_devices(smmu_domain, 0, cd); > > > > > > > > > > > > This is OK only if the sketchy assumption that the CD > > > > > > we extracted for a conflicting ASID is not asigned to a PASID. > > > > > > > > > > > > static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) > > > > > > arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, &quiet_cd); > > > > > > > > > > > > This doesn't work because we didn't add the master to the list > > > > > > during __arm_smmu_sva_bind and this path is expressly working > > > > > > on the PASID binds, not the RID binds. > > > > > > > > > > Actually it is working on the RID attached domain (as returned by > > > > > iommu_get_domain_for_dev() at sva_bind time) not the SVA domain > > > > > here... > > > > > > > > That can't be right, the purpose of that call and arm_smmu_mm_release is to > > > > disable the PASID that is about the UAF the mm's page table. > > > > > > > > Jason > > > > > > For the sake of this message, let's call "primary domain" whatever RID > > > domain was attached to a master at the time set_dev_pasid() was called > > > on that master. That RID domain is locked in while SVA is enabled and > > > cannot be detached. > > > > > > The arm-smmu-v3-sva.c implementation creates a mapping between an SVA > > > domain and this primary domain (through the sva domain's mm). In > > > arm_smmu_mm_release, the primary domain is looked up and > > > arm_smmu_write_ctx_desc() is called on all masters that this domain is > > > attached to. > > > > My question is still the same - how does arm_smmu_mm_release update the > > Contex descriptor table entry for the *PASID* > > > > The RID on PASID 0 hasn't change and doesn't need updating. > > > > Jason > > arm_smmu_mm_release looks-up the CD table(s) to write using the > primary domain's device list, and finds the index into those CD > table(s) to write to using mm->pasid. Oh.. I don't think I caught that detail that at this point the arm_smmu_write_ctx_desc_devices() argument must always be the rid domain. Maybe add a comment to describe that? And lets try to undo that later :( Thanks, Jason
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index 968559d625c40..e3992a0c16377 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -37,6 +37,24 @@ struct arm_smmu_bond { static DEFINE_MUTEX(sva_lock); +static int arm_smmu_write_ctx_desc_devices(struct arm_smmu_domain *smmu_domain, + int ssid, + struct arm_smmu_ctx_desc *cd) +{ + struct arm_smmu_master *master; + unsigned long flags; + int ret; + + spin_lock_irqsave(&smmu_domain->devices_lock, flags); + list_for_each_entry(master, &smmu_domain->devices, domain_head) { + ret = arm_smmu_write_ctx_desc(master, ssid, cd); + if (ret) + break; + } + spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); + return ret; +} + /* * Check if the CPU ASID is available on the SMMU side. If a private context * descriptor is using it, try to replace it. @@ -80,7 +98,7 @@ arm_smmu_share_asid(struct mm_struct *mm, u16 asid) * be some overlap between use of both ASIDs, until we invalidate the * TLB. */ - arm_smmu_write_ctx_desc(smmu_domain, 0, cd); + arm_smmu_write_ctx_desc_devices(smmu_domain, 0, cd); /* Invalidate TLB entries previously associated with that context */ arm_smmu_tlb_inv_asid(smmu, asid); @@ -222,7 +240,7 @@ static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) * DMA may still be running. Keep the cd valid to avoid C_BAD_CD events, * but disable translation. */ - arm_smmu_write_ctx_desc(smmu_domain, mm->pasid, &quiet_cd); + arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, &quiet_cd); arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_mn->cd->asid); arm_smmu_atc_inv_domain(smmu_domain, mm->pasid, 0, 0); @@ -279,9 +297,11 @@ arm_smmu_mmu_notifier_get(struct arm_smmu_domain *smmu_domain, goto err_free_cd; } - ret = arm_smmu_write_ctx_desc(smmu_domain, mm->pasid, cd); - if (ret) + ret = arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, cd); + if (ret) { + arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, NULL); goto err_put_notifier; + } list_add(&smmu_mn->list, &smmu_domain->mmu_notifiers); return smmu_mn; @@ -304,7 +324,8 @@ static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn) return; list_del(&smmu_mn->list); - arm_smmu_write_ctx_desc(smmu_domain, mm->pasid, NULL); + + arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, NULL); /* * If we went through clear(), we've already invalidated, and no diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index c01023404c26c..34bd7815aeb8e 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -971,14 +971,12 @@ void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid) arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); } -static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain, +static void arm_smmu_sync_cd(struct arm_smmu_master *master, int ssid, bool leaf) { size_t i; - unsigned long flags; - struct arm_smmu_master *master; struct arm_smmu_cmdq_batch cmds; - struct arm_smmu_device *smmu = smmu_domain->smmu; + struct arm_smmu_device *smmu = master->smmu; struct arm_smmu_cmdq_ent cmd = { .opcode = CMDQ_OP_CFGI_CD, .cfgi = { @@ -988,15 +986,10 @@ static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain, }; cmds.num = 0; - - spin_lock_irqsave(&smmu_domain->devices_lock, flags); - list_for_each_entry(master, &smmu_domain->devices, domain_head) { - for (i = 0; i < master->num_streams; i++) { - cmd.cfgi.sid = master->streams[i].id; - arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd); - } + for (i = 0; i < master->num_streams; i++) { + cmd.cfgi.sid = master->streams[i].id; + arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd); } - spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); arm_smmu_cmdq_batch_submit(smmu, &cmds); } @@ -1026,14 +1019,13 @@ static void arm_smmu_write_cd_l1_desc(__le64 *dst, WRITE_ONCE(*dst, cpu_to_le64(val)); } -static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain, - u32 ssid) +static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, u32 ssid) { __le64 *l1ptr; unsigned int idx; struct arm_smmu_l1_ctx_desc *l1_desc; - struct arm_smmu_device *smmu = smmu_domain->smmu; - struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->cd_table; + struct arm_smmu_device *smmu = master->smmu; + struct arm_smmu_ctx_desc_cfg *cdcfg = &master->domain->cd_table; if (!cdcfg->l1_desc) return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS; @@ -1047,13 +1039,13 @@ static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain, l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS; arm_smmu_write_cd_l1_desc(l1ptr, l1_desc); /* An invalid L1CD can be cached */ - arm_smmu_sync_cd(smmu_domain, ssid, false); + arm_smmu_sync_cd(master, ssid, false); } idx = ssid & (CTXDESC_L2_ENTRIES - 1); return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS; } -int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid, +int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid, struct arm_smmu_ctx_desc *cd) { /* @@ -1070,11 +1062,12 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid, u64 val; bool cd_live; __le64 *cdptr; + struct arm_smmu_ctx_desc_cfg *cd_table = &master->domain->cd_table; - if (WARN_ON(ssid >= (1 << smmu_domain->cd_table.max_cds_bits))) + if (WARN_ON(ssid >= (1 << cd_table->max_cds_bits))) return -E2BIG; - cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid); + cdptr = arm_smmu_get_cd_ptr(master, ssid); if (!cdptr) return -ENOMEM; @@ -1102,7 +1095,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid, * order. Ensure that it observes valid values before reading * V=1. */ - arm_smmu_sync_cd(smmu_domain, ssid, true); + arm_smmu_sync_cd(master, ssid, true); val = cd->tcr | #ifdef __BIG_ENDIAN @@ -1114,7 +1107,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid, FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) | CTXDESC_CD_0_V; - if (smmu_domain->cd_table.stall_enabled) + if (cd_table->stall_enabled) val |= CTXDESC_CD_0_S; } @@ -1128,7 +1121,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid, * without first making the structure invalid. */ WRITE_ONCE(cdptr[0], cpu_to_le64(val)); - arm_smmu_sync_cd(smmu_domain, ssid, true); + arm_smmu_sync_cd(master, ssid, true); return 0; } @@ -1138,7 +1131,7 @@ static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain, int ret; size_t l1size; size_t max_contexts; - struct arm_smmu_device *smmu = smmu_domain->smmu; + struct arm_smmu_device *smmu = master->smmu; struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->cd_table; cdcfg->stall_enabled = master->stall_enabled; @@ -2137,12 +2130,7 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain, CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64; cd->mair = pgtbl_cfg->arm_lpae_s1_cfg.mair; - /* - * Note that this will end up calling arm_smmu_sync_cd() before - * the master has been added to the devices list for this domain. - * This isn't an issue because the STE hasn't been installed yet. - */ - ret = arm_smmu_write_ctx_desc(smmu_domain, 0, cd); + ret = arm_smmu_write_ctx_desc(master, 0, cd); if (ret) goto out_free_cd_tables; @@ -2460,8 +2448,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) ret = -EINVAL; goto out_unlock; } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 && - smmu_domain->cd_table.stall_enabled != - master->stall_enabled) { + smmu_domain->cd_table.stall_enabled != master->stall_enabled) { ret = -EINVAL; goto out_unlock; } diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 05b1f0ee60808..6066a09c01996 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -744,7 +744,7 @@ extern struct xarray arm_smmu_asid_xa; extern struct mutex arm_smmu_asid_lock; extern struct arm_smmu_ctx_desc quiet_cd; -int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid, +int arm_smmu_write_ctx_desc(struct arm_smmu_master *smmu_master, int ssid, struct arm_smmu_ctx_desc *cd); void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid); void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,