diff mbox series

[v5,5/9] iommu/arm-smmu-v3: Refactor write_ctx_desc

Message ID 20230809011204.v5.5.I219054a6cf538df5bb22f4ada2d9933155d6058c@changeid (mailing list archive)
State New, archived
Headers show
Series Refactor the SMMU's CD table ownership | expand

Commit Message

Michael Shavit Aug. 8, 2023, 5:12 p.m. UTC
Update arm_smmu_write_ctx_desc and downstream functions to operate on
a master instead of an smmu domain. We expect arm_smmu_write_ctx_desc()
to only be called to write a CD entry into a CD table owned by the
master. Under the hood, arm_smmu_write_ctx_desc still fetches the CD
table from the domain that is attached to the master, but a subsequent
commit will move that table's ownership to the master.

Note that this change isn't a nop refactor since SVA will call
arm_smmu_write_ctx_desc in a loop for every master the domain is
attached to despite the fact that they all share the same CD table. This
loop may look weird but becomes necessary when the CD table becomes
per-master in a subsequent commit.

Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Michael Shavit <mshavit@google.com>
---

(no changes since v3)

Changes in v3:
- Add a helper to write a CD to all masters that a domain is attached
  to.
- Fixed an issue where an arm_smmu_write_ctx_desc error return wasn't
  correctly handled by its caller.

Changes in v2:
- minor style fixes

Changes in v1:
- arm_smmu_write_ctx_desc now get's the CD table to write to from the
  master parameter instead of a distinct parameter. This works well
  because the CD table being written to should always be owned by the
  master by the end of this series. This version no longer allows master
  to be NULL.

 .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c   | 31 +++++++++--
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c   | 51 +++++++------------
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h   |  2 +-
 3 files changed, 46 insertions(+), 38 deletions(-)

Comments

Will Deacon Aug. 9, 2023, 1:50 p.m. UTC | #1
On Wed, Aug 09, 2023 at 01:12:01AM +0800, Michael Shavit wrote:
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
> index 968559d625c40..e3992a0c16377 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
> @@ -37,6 +37,24 @@ struct arm_smmu_bond {
>  
>  static DEFINE_MUTEX(sva_lock);
>  
> +static int arm_smmu_write_ctx_desc_devices(struct arm_smmu_domain *smmu_domain,
> +					    int ssid,
> +					    struct arm_smmu_ctx_desc *cd)
> +{
> +	struct arm_smmu_master *master;
> +	unsigned long flags;
> +	int ret;
> +
> +	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
> +	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
> +		ret = arm_smmu_write_ctx_desc(master, ssid, cd);
> +		if (ret)
> +			break;
> +	}
> +	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
> +	return ret;
> +}
> +
>  /*
>   * Check if the CPU ASID is available on the SMMU side. If a private context
>   * descriptor is using it, try to replace it.
> @@ -80,7 +98,7 @@ arm_smmu_share_asid(struct mm_struct *mm, u16 asid)
>  	 * be some overlap between use of both ASIDs, until we invalidate the
>  	 * TLB.
>  	 */
> -	arm_smmu_write_ctx_desc(smmu_domain, 0, cd);
> +	arm_smmu_write_ctx_desc_devices(smmu_domain, 0, cd);
>  
>  	/* Invalidate TLB entries previously associated with that context */
>  	arm_smmu_tlb_inv_asid(smmu, asid);
> @@ -222,7 +240,7 @@ static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
>  	 * DMA may still be running. Keep the cd valid to avoid C_BAD_CD events,
>  	 * but disable translation.
>  	 */
> -	arm_smmu_write_ctx_desc(smmu_domain, mm->pasid, &quiet_cd);
> +	arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, &quiet_cd);
>  
>  	arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_mn->cd->asid);
>  	arm_smmu_atc_inv_domain(smmu_domain, mm->pasid, 0, 0);
> @@ -279,9 +297,11 @@ arm_smmu_mmu_notifier_get(struct arm_smmu_domain *smmu_domain,
>  		goto err_free_cd;
>  	}
>  
> -	ret = arm_smmu_write_ctx_desc(smmu_domain, mm->pasid, cd);
> -	if (ret)
> +	ret = arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, cd);
> +	if (ret) {
> +		arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, NULL);

Why is it safe to drop the lock between these two calls?

> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index c01023404c26c..34bd7815aeb8e 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -971,14 +971,12 @@ void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
>  	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
>  }
>  
> -static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
> +static void arm_smmu_sync_cd(struct arm_smmu_master *master,
>  			     int ssid, bool leaf)
>  {
>  	size_t i;
> -	unsigned long flags;
> -	struct arm_smmu_master *master;
>  	struct arm_smmu_cmdq_batch cmds;
> -	struct arm_smmu_device *smmu = smmu_domain->smmu;
> +	struct arm_smmu_device *smmu = master->smmu;
>  	struct arm_smmu_cmdq_ent cmd = {
>  		.opcode	= CMDQ_OP_CFGI_CD,
>  		.cfgi	= {
> @@ -988,15 +986,10 @@ static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
>  	};
>  
>  	cmds.num = 0;
> -
> -	spin_lock_irqsave(&smmu_domain->devices_lock, flags);

Since you're dropping this and relying on the lock being taken higher up
callstack, can we add a lockdep assertion that we do actually hold the
devices_lock, please?

Will
Michael Shavit Aug. 10, 2023, 9:15 a.m. UTC | #2
On Wed, Aug 9, 2023 at 9:50 PM Will Deacon <will@kernel.org> wrote:
>
> > -     ret = arm_smmu_write_ctx_desc(smmu_domain, mm->pasid, cd);
> > -     if (ret)
> > +     ret = arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, cd);
> > +     if (ret) {
> > +             arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, NULL);
>
> Why is it safe to drop the lock between these two calls?

Hmmm this is a tricky question.
Tracing through the SVA flow, it seems like there's a scenario where
multiple masters (with the same upstream SMMU device) can be attached
to the same primary/non-sva domain, in which case calling
iommu_attach_device_pasid on one device will write the CD entry for
both masters. This is still the case even with this patch series, and
changing this behavior will be the subject of a separate follow-up.
This is weird, especially since the second master need not even have
the sva_enabled bit set. This also means that the list of attached
masters can indeed change between these two calls if that second
master (not the one used on the iommu_attach_device_pasid call leading
to this code) is detached/attached at the same time. It's hard for me
to reason about whether this is safe or not, since this is already
weird behavior...


>
> Since you're dropping this and relying on the lock being taken higher up
> callstack, can we add a lockdep assertion that we do actually hold the
> devices_lock, please?

Will do!
Will Deacon Aug. 10, 2023, 2:40 p.m. UTC | #3
On Thu, Aug 10, 2023 at 05:15:50PM +0800, Michael Shavit wrote:
> On Wed, Aug 9, 2023 at 9:50 PM Will Deacon <will@kernel.org> wrote:
> >
> > > -     ret = arm_smmu_write_ctx_desc(smmu_domain, mm->pasid, cd);
> > > -     if (ret)
> > > +     ret = arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, cd);
> > > +     if (ret) {
> > > +             arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, NULL);
> >
> > Why is it safe to drop the lock between these two calls?
> 
> Hmmm this is a tricky question.
> Tracing through the SVA flow, it seems like there's a scenario where
> multiple masters (with the same upstream SMMU device) can be attached
> to the same primary/non-sva domain, in which case calling
> iommu_attach_device_pasid on one device will write the CD entry for
> both masters. This is still the case even with this patch series, and
> changing this behavior will be the subject of a separate follow-up.
> This is weird, especially since the second master need not even have
> the sva_enabled bit set. This also means that the list of attached
> masters can indeed change between these two calls if that second
> master (not the one used on the iommu_attach_device_pasid call leading
> to this code) is detached/attached at the same time. It's hard for me
> to reason about whether this is safe or not, since this is already
> weird behavior...

I really think the writing of the context descriptors should look atomic;
dropping the lock half way through a failed update and then coming back
to NULL them out definitely isn't correct. So I think you've probably pushed
the locking too far down the stack.

Will
Jason Gunthorpe Aug. 10, 2023, 3:39 p.m. UTC | #4
On Thu, Aug 10, 2023 at 03:40:52PM +0100, Will Deacon wrote:
> On Thu, Aug 10, 2023 at 05:15:50PM +0800, Michael Shavit wrote:
> > On Wed, Aug 9, 2023 at 9:50 PM Will Deacon <will@kernel.org> wrote:
> > >
> > > > -     ret = arm_smmu_write_ctx_desc(smmu_domain, mm->pasid, cd);
> > > > -     if (ret)
> > > > +     ret = arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, cd);
> > > > +     if (ret) {
> > > > +             arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, NULL);
> > >
> > > Why is it safe to drop the lock between these two calls?
> > 
> > Hmmm this is a tricky question.
> > Tracing through the SVA flow, it seems like there's a scenario where
> > multiple masters (with the same upstream SMMU device) can be attached
> > to the same primary/non-sva domain, in which case calling
> > iommu_attach_device_pasid on one device will write the CD entry for
> > both masters. This is still the case even with this patch series, and
> > changing this behavior will be the subject of a separate follow-up.
> > This is weird, especially since the second master need not even have
> > the sva_enabled bit set. This also means that the list of attached
> > masters can indeed change between these two calls if that second
> > master (not the one used on the iommu_attach_device_pasid call leading
> > to this code) is detached/attached at the same time. It's hard for me
> > to reason about whether this is safe or not, since this is already
> > weird behavior...
> 
> I really think the writing of the context descriptors should look atomic;
> dropping the lock half way through a failed update and then coming back
> to NULL them out definitely isn't correct. So I think you've probably pushed
> the locking too far down the stack.

Urk, the issue is that progressive refactorings have left this kind of
wrong. 

Basically we always have a singular master we are supposed to be
installing the SVA domain into a PASID for, we just need to load the
CD table entry into that master's existing CD table.

Actually, I don't think this even works as nothing on the PASID path
adds to the list that arm_smmu_write_ctx_desc_devices() iterates over ??

Then the remaining two calls:

arm_smmu_share_asid(struct mm_struct *mm, u16 asid)
        arm_smmu_write_ctx_desc_devices(smmu_domain, 0, cd);
	
	This is OK only if the sketchy assumption that the CD
	we extracted for a conflicting ASID is not asigned to a PASID.

static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
        arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, &quiet_cd);

	This doesn't work because we didn't add the master to the list
	during __arm_smmu_sva_bind and this path is expressly working
	on the PASID binds, not the RID binds.

This is all far too complicated. We really need to get this series:

https://lore.kernel.org/linux-iommu/20230808074944.7825-1-tina.zhang@intel.com/

And rip out all this crazy code in the drivers trying to de-duplicate
the SVA domains. The core code will do it, the driver can assume it
has exactly one SVA domain per mm and do sane and simple things. :(

Maybe for now we just accept that quiet_cd doesn't work, it is a minor
issue and your next series fixes it, right?

Anyhow, something like this will fix what Will pointed to, probably as
an additional prep patch:

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index e3992a0c163779..8e751ba91e810a 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -297,12 +297,6 @@ arm_smmu_mmu_notifier_get(struct arm_smmu_domain *smmu_domain,
 		goto err_free_cd;
 	}
 
-	ret = arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, cd);
-	if (ret) {
-		arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, NULL);
-		goto err_put_notifier;
-	}
-
 	list_add(&smmu_mn->list, &smmu_domain->mmu_notifiers);
 	return smmu_mn;
 
@@ -325,8 +319,6 @@ static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn)
 
 	list_del(&smmu_mn->list);
 
-	arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, NULL);
-
 	/*
 	 * If we went through clear(), we've already invalidated, and no
 	 * new TLB entry can have been formed.
@@ -342,7 +334,7 @@ static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn)
 }
 
 static struct iommu_sva *
-__arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
+__arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, ioasid_t ssid)
 {
 	int ret;
 	struct arm_smmu_bond *bond;
@@ -375,9 +367,15 @@ __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
 		goto err_free_bond;
 	}
 
+	ret = arm_smmu_write_ctx_desc(master, ssid, bond->smmu_mn->cd);
+	if (ret)
+		goto err_put_notifier;
+
 	list_add(&bond->list, &master->bonds);
 	return &bond->sva;
 
+err_put_notifier:
+	arm_smmu_mmu_notifier_put(bond->smmu_mn);
 err_free_bond:
 	kfree(bond);
 	return ERR_PTR(ret);
@@ -548,6 +546,7 @@ void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain,
 
 	if (!WARN_ON(!bond) && refcount_dec_and_test(&bond->refs)) {
 		list_del(&bond->list);
+		arm_smmu_write_ctx_desc(master, id, NULL);
 		arm_smmu_mmu_notifier_put(bond->smmu_mn);
 		kfree(bond);
 	}
@@ -562,7 +561,7 @@ static int arm_smmu_sva_set_dev_pasid(struct iommu_domain *domain,
 	struct mm_struct *mm = domain->mm;
 
 	mutex_lock(&sva_lock);
-	handle = __arm_smmu_sva_bind(dev, mm);
+	handle = __arm_smmu_sva_bind(dev, mm, id);
 	if (IS_ERR(handle))
 		ret = PTR_ERR(handle);
 	mutex_unlock(&sva_lock);
Michael Shavit Aug. 15, 2023, 5:04 a.m. UTC | #5
On Thu, Aug 10, 2023 at 5:15 PM Michael Shavit <mshavit@google.com> wrote:
> On Wed, Aug 9, 2023 at 9:50 PM Will Deacon <will@kernel.org> wrote:
> >
> > Since you're dropping this and relying on the lock being taken higher up
> > callstack, can we add a lockdep assertion that we do actually hold the
> > devices_lock, please?
>
> Will do!

I spoke too soon; the point of this change was to remove the
dependency on the arm_smmu_domain, piping the devices_lock would
defeat this. In fact, this section is really depending on the iommu
group lock not the devices_lock.
Michael Shavit Aug. 15, 2023, 5:20 a.m. UTC | #6
On Thu, Aug 10, 2023 at 11:39 PM Jason Gunthorpe <jgg@nvidia.com> wrote:
>
> Actually, I don't think this even works as nothing on the PASID path
> adds to the list that arm_smmu_write_ctx_desc_devices() iterates over ??
>
> Then the remaining two calls:
>
> arm_smmu_share_asid(struct mm_struct *mm, u16 asid)
>         arm_smmu_write_ctx_desc_devices(smmu_domain, 0, cd);
>
>         This is OK only if the sketchy assumption that the CD
>         we extracted for a conflicting ASID is not asigned to a PASID.
>
> static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
>         arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, &quiet_cd);
>
>         This doesn't work because we didn't add the master to the list
>         during __arm_smmu_sva_bind and this path is expressly working
>         on the PASID binds, not the RID binds.

Actually it is working on the RID attached domain (as returned by
iommu_get_domain_for_dev() at sva_bind time) not the SVA domain
here... The arm SVA implementation completely dismisses the SVA handle
(I also have a patch to fix this ;) .  Need to find the time to polish
and send out).
Will Deacon Aug. 15, 2023, 10:19 a.m. UTC | #7
On Tue, Aug 15, 2023 at 01:04:43PM +0800, Michael Shavit wrote:
> On Thu, Aug 10, 2023 at 5:15 PM Michael Shavit <mshavit@google.com> wrote:
> > On Wed, Aug 9, 2023 at 9:50 PM Will Deacon <will@kernel.org> wrote:
> > >
> > > Since you're dropping this and relying on the lock being taken higher up
> > > callstack, can we add a lockdep assertion that we do actually hold the
> > > devices_lock, please?
> >
> > Will do!
> 
> I spoke too soon; the point of this change was to remove the
> dependency on the arm_smmu_domain, piping the devices_lock would
> defeat this. In fact, this section is really depending on the iommu
> group lock not the devices_lock.

Sorry, but I'm not following you here. What is the devices_lock protecting
if we're depending on the group lock?

Will
Jason Gunthorpe Aug. 15, 2023, 11:22 a.m. UTC | #8
On Tue, Aug 15, 2023 at 01:20:04PM +0800, Michael Shavit wrote:
> On Thu, Aug 10, 2023 at 11:39 PM Jason Gunthorpe <jgg@nvidia.com> wrote:
> >
> > Actually, I don't think this even works as nothing on the PASID path
> > adds to the list that arm_smmu_write_ctx_desc_devices() iterates over ??
> >
> > Then the remaining two calls:
> >
> > arm_smmu_share_asid(struct mm_struct *mm, u16 asid)
> >         arm_smmu_write_ctx_desc_devices(smmu_domain, 0, cd);
> >
> >         This is OK only if the sketchy assumption that the CD
> >         we extracted for a conflicting ASID is not asigned to a PASID.
> >
> > static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
> >         arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, &quiet_cd);
> >
> >         This doesn't work because we didn't add the master to the list
> >         during __arm_smmu_sva_bind and this path is expressly working
> >         on the PASID binds, not the RID binds.
> 
> Actually it is working on the RID attached domain (as returned by
> iommu_get_domain_for_dev() at sva_bind time) not the SVA domain
> here...

That can't be right, the purpose of that call and arm_smmu_mm_release is to
disable the PASID that is about the UAF the mm's page table.

Jason
Michael Shavit Aug. 15, 2023, 11:40 a.m. UTC | #9
On Tue, Aug 15, 2023 at 6:19 PM Will Deacon <will@kernel.org> wrote:
>
> On Tue, Aug 15, 2023 at 01:04:43PM +0800, Michael Shavit wrote:
> > On Thu, Aug 10, 2023 at 5:15 PM Michael Shavit <mshavit@google.com> wrote:
> > > On Wed, Aug 9, 2023 at 9:50 PM Will Deacon <will@kernel.org> wrote:
> > > >
> > > > Since you're dropping this and relying on the lock being taken higher up
> > > > callstack, can we add a lockdep assertion that we do actually hold the
> > > > devices_lock, please?
> > >
> > > Will do!
> >
> > I spoke too soon; the point of this change was to remove the
> > dependency on the arm_smmu_domain, piping the devices_lock would
> > defeat this. In fact, this section is really depending on the iommu
> > group lock not the devices_lock.
>
> Sorry, but I'm not following you here. What is the devices_lock protecting
> if we're depending on the group lock?
>
> Will

Ugh, yes sorry, we do need the devices_lock held *in the SVA* call
flow. The group lock is ineffective there because SVA performs
cross-master operations on iommu_domain callbacks. There, the
devices_lock is needed to ensure that the list of masters that a
domain is attached to doesn't mutate while we operate on that domain.

Nonetheless, it feels awkward to require the devices_lock to be held
outside SVA, since there we operate on a single master/CD table
rather. We don't care what other masters that domain is attached to.
Michael Shavit Aug. 15, 2023, 12:03 p.m. UTC | #10
On Tue, Aug 15, 2023 at 7:38 PM Jason Gunthorpe <jgg@nvidia.com> wrote:
>
> On Tue, Aug 15, 2023 at 01:20:04PM +0800, Michael Shavit wrote:
> > On Thu, Aug 10, 2023 at 11:39 PM Jason Gunthorpe <jgg@nvidia.com> wrote:
> > >
> > > Actually, I don't think this even works as nothing on the PASID path
> > > adds to the list that arm_smmu_write_ctx_desc_devices() iterates over ??
> > >
> > > Then the remaining two calls:
> > >
> > > arm_smmu_share_asid(struct mm_struct *mm, u16 asid)
> > >         arm_smmu_write_ctx_desc_devices(smmu_domain, 0, cd);
> > >
> > >         This is OK only if the sketchy assumption that the CD
> > >         we extracted for a conflicting ASID is not asigned to a PASID.
> > >
> > > static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
> > >         arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, &quiet_cd);
> > >
> > >         This doesn't work because we didn't add the master to the list
> > >         during __arm_smmu_sva_bind and this path is expressly working
> > >         on the PASID binds, not the RID binds.
> >
> > Actually it is working on the RID attached domain (as returned by
> > iommu_get_domain_for_dev() at sva_bind time) not the SVA domain
> > here...
>
> That can't be right, the purpose of that call and arm_smmu_mm_release is to
> disable the PASID that is about the UAF the mm's page table.
>
> Jason

For the sake of this message, let's call "primary domain" whatever RID
domain was attached to a master at the time set_dev_pasid() was called
on that master. That RID domain is locked in while SVA is enabled and
cannot be detached.

The arm-smmu-v3-sva.c implementation creates a mapping between an SVA
domain and this primary domain (through the sva domain's mm). In
arm_smmu_mm_release, the primary domain is looked up and
arm_smmu_write_ctx_desc() is called on all masters that this domain is
attached to.
Jason Gunthorpe Aug. 15, 2023, 12:30 p.m. UTC | #11
On Tue, Aug 15, 2023 at 08:03:40PM +0800, Michael Shavit wrote:
> On Tue, Aug 15, 2023 at 7:38 PM Jason Gunthorpe <jgg@nvidia.com> wrote:
> >
> > On Tue, Aug 15, 2023 at 01:20:04PM +0800, Michael Shavit wrote:
> > > On Thu, Aug 10, 2023 at 11:39 PM Jason Gunthorpe <jgg@nvidia.com> wrote:
> > > >
> > > > Actually, I don't think this even works as nothing on the PASID path
> > > > adds to the list that arm_smmu_write_ctx_desc_devices() iterates over ??
> > > >
> > > > Then the remaining two calls:
> > > >
> > > > arm_smmu_share_asid(struct mm_struct *mm, u16 asid)
> > > >         arm_smmu_write_ctx_desc_devices(smmu_domain, 0, cd);
> > > >
> > > >         This is OK only if the sketchy assumption that the CD
> > > >         we extracted for a conflicting ASID is not asigned to a PASID.
> > > >
> > > > static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
> > > >         arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, &quiet_cd);
> > > >
> > > >         This doesn't work because we didn't add the master to the list
> > > >         during __arm_smmu_sva_bind and this path is expressly working
> > > >         on the PASID binds, not the RID binds.
> > >
> > > Actually it is working on the RID attached domain (as returned by
> > > iommu_get_domain_for_dev() at sva_bind time) not the SVA domain
> > > here...
> >
> > That can't be right, the purpose of that call and arm_smmu_mm_release is to
> > disable the PASID that is about the UAF the mm's page table.
> >
> > Jason
> 
> For the sake of this message, let's call "primary domain" whatever RID
> domain was attached to a master at the time set_dev_pasid() was called
> on that master. That RID domain is locked in while SVA is enabled and
> cannot be detached.
> 
> The arm-smmu-v3-sva.c implementation creates a mapping between an SVA
> domain and this primary domain (through the sva domain's mm). In
> arm_smmu_mm_release, the primary domain is looked up and
> arm_smmu_write_ctx_desc() is called on all masters that this domain is
> attached to.

My question is still the same - how does arm_smmu_mm_release update the
Contex descriptor table entry for the *PASID*

The RID on PASID 0 hasn't change and doesn't need updating.

Jason
Michael Shavit Aug. 15, 2023, 12:36 p.m. UTC | #12
On Tue, Aug 15, 2023 at 8:30 PM Jason Gunthorpe <jgg@nvidia.com> wrote:
>
> On Tue, Aug 15, 2023 at 08:03:40PM +0800, Michael Shavit wrote:
> > On Tue, Aug 15, 2023 at 7:38 PM Jason Gunthorpe <jgg@nvidia.com> wrote:
> > >
> > > On Tue, Aug 15, 2023 at 01:20:04PM +0800, Michael Shavit wrote:
> > > > On Thu, Aug 10, 2023 at 11:39 PM Jason Gunthorpe <jgg@nvidia.com> wrote:
> > > > >
> > > > > Actually, I don't think this even works as nothing on the PASID path
> > > > > adds to the list that arm_smmu_write_ctx_desc_devices() iterates over ??
> > > > >
> > > > > Then the remaining two calls:
> > > > >
> > > > > arm_smmu_share_asid(struct mm_struct *mm, u16 asid)
> > > > >         arm_smmu_write_ctx_desc_devices(smmu_domain, 0, cd);
> > > > >
> > > > >         This is OK only if the sketchy assumption that the CD
> > > > >         we extracted for a conflicting ASID is not asigned to a PASID.
> > > > >
> > > > > static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
> > > > >         arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, &quiet_cd);
> > > > >
> > > > >         This doesn't work because we didn't add the master to the list
> > > > >         during __arm_smmu_sva_bind and this path is expressly working
> > > > >         on the PASID binds, not the RID binds.
> > > >
> > > > Actually it is working on the RID attached domain (as returned by
> > > > iommu_get_domain_for_dev() at sva_bind time) not the SVA domain
> > > > here...
> > >
> > > That can't be right, the purpose of that call and arm_smmu_mm_release is to
> > > disable the PASID that is about the UAF the mm's page table.
> > >
> > > Jason
> >
> > For the sake of this message, let's call "primary domain" whatever RID
> > domain was attached to a master at the time set_dev_pasid() was called
> > on that master. That RID domain is locked in while SVA is enabled and
> > cannot be detached.
> >
> > The arm-smmu-v3-sva.c implementation creates a mapping between an SVA
> > domain and this primary domain (through the sva domain's mm). In
> > arm_smmu_mm_release, the primary domain is looked up and
> > arm_smmu_write_ctx_desc() is called on all masters that this domain is
> > attached to.
>
> My question is still the same - how does arm_smmu_mm_release update the
> Contex descriptor table entry for the *PASID*
>
> The RID on PASID 0 hasn't change and doesn't need updating.
>
> Jason

arm_smmu_mm_release looks-up the CD table(s) to write using the
primary domain's device list, and finds the index into those CD
table(s) to write to using mm->pasid.
Jason Gunthorpe Aug. 15, 2023, 12:56 p.m. UTC | #13
On Tue, Aug 15, 2023 at 08:36:55PM +0800, Michael Shavit wrote:
> On Tue, Aug 15, 2023 at 8:30 PM Jason Gunthorpe <jgg@nvidia.com> wrote:
> >
> > On Tue, Aug 15, 2023 at 08:03:40PM +0800, Michael Shavit wrote:
> > > On Tue, Aug 15, 2023 at 7:38 PM Jason Gunthorpe <jgg@nvidia.com> wrote:
> > > >
> > > > On Tue, Aug 15, 2023 at 01:20:04PM +0800, Michael Shavit wrote:
> > > > > On Thu, Aug 10, 2023 at 11:39 PM Jason Gunthorpe <jgg@nvidia.com> wrote:
> > > > > >
> > > > > > Actually, I don't think this even works as nothing on the PASID path
> > > > > > adds to the list that arm_smmu_write_ctx_desc_devices() iterates over ??
> > > > > >
> > > > > > Then the remaining two calls:
> > > > > >
> > > > > > arm_smmu_share_asid(struct mm_struct *mm, u16 asid)
> > > > > >         arm_smmu_write_ctx_desc_devices(smmu_domain, 0, cd);
> > > > > >
> > > > > >         This is OK only if the sketchy assumption that the CD
> > > > > >         we extracted for a conflicting ASID is not asigned to a PASID.
> > > > > >
> > > > > > static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
> > > > > >         arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, &quiet_cd);
> > > > > >
> > > > > >         This doesn't work because we didn't add the master to the list
> > > > > >         during __arm_smmu_sva_bind and this path is expressly working
> > > > > >         on the PASID binds, not the RID binds.
> > > > >
> > > > > Actually it is working on the RID attached domain (as returned by
> > > > > iommu_get_domain_for_dev() at sva_bind time) not the SVA domain
> > > > > here...
> > > >
> > > > That can't be right, the purpose of that call and arm_smmu_mm_release is to
> > > > disable the PASID that is about the UAF the mm's page table.
> > > >
> > > > Jason
> > >
> > > For the sake of this message, let's call "primary domain" whatever RID
> > > domain was attached to a master at the time set_dev_pasid() was called
> > > on that master. That RID domain is locked in while SVA is enabled and
> > > cannot be detached.
> > >
> > > The arm-smmu-v3-sva.c implementation creates a mapping between an SVA
> > > domain and this primary domain (through the sva domain's mm). In
> > > arm_smmu_mm_release, the primary domain is looked up and
> > > arm_smmu_write_ctx_desc() is called on all masters that this domain is
> > > attached to.
> >
> > My question is still the same - how does arm_smmu_mm_release update the
> > Contex descriptor table entry for the *PASID*
> >
> > The RID on PASID 0 hasn't change and doesn't need updating.
> >
> > Jason
> 
> arm_smmu_mm_release looks-up the CD table(s) to write using the
> primary domain's device list, and finds the index into those CD
> table(s) to write to using mm->pasid.

Oh.. I don't think I caught that detail that at this point the
arm_smmu_write_ctx_desc_devices() argument must always be the rid
domain. Maybe add a comment to describe that? And lets try to undo
that later :(

Thanks,
Jason
diff mbox series

Patch

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index 968559d625c40..e3992a0c16377 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -37,6 +37,24 @@  struct arm_smmu_bond {
 
 static DEFINE_MUTEX(sva_lock);
 
+static int arm_smmu_write_ctx_desc_devices(struct arm_smmu_domain *smmu_domain,
+					    int ssid,
+					    struct arm_smmu_ctx_desc *cd)
+{
+	struct arm_smmu_master *master;
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
+	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
+		ret = arm_smmu_write_ctx_desc(master, ssid, cd);
+		if (ret)
+			break;
+	}
+	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
+	return ret;
+}
+
 /*
  * Check if the CPU ASID is available on the SMMU side. If a private context
  * descriptor is using it, try to replace it.
@@ -80,7 +98,7 @@  arm_smmu_share_asid(struct mm_struct *mm, u16 asid)
 	 * be some overlap between use of both ASIDs, until we invalidate the
 	 * TLB.
 	 */
-	arm_smmu_write_ctx_desc(smmu_domain, 0, cd);
+	arm_smmu_write_ctx_desc_devices(smmu_domain, 0, cd);
 
 	/* Invalidate TLB entries previously associated with that context */
 	arm_smmu_tlb_inv_asid(smmu, asid);
@@ -222,7 +240,7 @@  static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
 	 * DMA may still be running. Keep the cd valid to avoid C_BAD_CD events,
 	 * but disable translation.
 	 */
-	arm_smmu_write_ctx_desc(smmu_domain, mm->pasid, &quiet_cd);
+	arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, &quiet_cd);
 
 	arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_mn->cd->asid);
 	arm_smmu_atc_inv_domain(smmu_domain, mm->pasid, 0, 0);
@@ -279,9 +297,11 @@  arm_smmu_mmu_notifier_get(struct arm_smmu_domain *smmu_domain,
 		goto err_free_cd;
 	}
 
-	ret = arm_smmu_write_ctx_desc(smmu_domain, mm->pasid, cd);
-	if (ret)
+	ret = arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, cd);
+	if (ret) {
+		arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, NULL);
 		goto err_put_notifier;
+	}
 
 	list_add(&smmu_mn->list, &smmu_domain->mmu_notifiers);
 	return smmu_mn;
@@ -304,7 +324,8 @@  static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn)
 		return;
 
 	list_del(&smmu_mn->list);
-	arm_smmu_write_ctx_desc(smmu_domain, mm->pasid, NULL);
+
+	arm_smmu_write_ctx_desc_devices(smmu_domain, mm->pasid, NULL);
 
 	/*
 	 * If we went through clear(), we've already invalidated, and no
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index c01023404c26c..34bd7815aeb8e 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -971,14 +971,12 @@  void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
 }
 
-static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
+static void arm_smmu_sync_cd(struct arm_smmu_master *master,
 			     int ssid, bool leaf)
 {
 	size_t i;
-	unsigned long flags;
-	struct arm_smmu_master *master;
 	struct arm_smmu_cmdq_batch cmds;
-	struct arm_smmu_device *smmu = smmu_domain->smmu;
+	struct arm_smmu_device *smmu = master->smmu;
 	struct arm_smmu_cmdq_ent cmd = {
 		.opcode	= CMDQ_OP_CFGI_CD,
 		.cfgi	= {
@@ -988,15 +986,10 @@  static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
 	};
 
 	cmds.num = 0;
-
-	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
-	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
-		for (i = 0; i < master->num_streams; i++) {
-			cmd.cfgi.sid = master->streams[i].id;
-			arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
-		}
+	for (i = 0; i < master->num_streams; i++) {
+		cmd.cfgi.sid = master->streams[i].id;
+		arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
 	}
-	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
 
 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
 }
@@ -1026,14 +1019,13 @@  static void arm_smmu_write_cd_l1_desc(__le64 *dst,
 	WRITE_ONCE(*dst, cpu_to_le64(val));
 }
 
-static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
-				   u32 ssid)
+static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, u32 ssid)
 {
 	__le64 *l1ptr;
 	unsigned int idx;
 	struct arm_smmu_l1_ctx_desc *l1_desc;
-	struct arm_smmu_device *smmu = smmu_domain->smmu;
-	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->cd_table;
+	struct arm_smmu_device *smmu = master->smmu;
+	struct arm_smmu_ctx_desc_cfg *cdcfg = &master->domain->cd_table;
 
 	if (!cdcfg->l1_desc)
 		return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
@@ -1047,13 +1039,13 @@  static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
 		l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
 		arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
 		/* An invalid L1CD can be cached */
-		arm_smmu_sync_cd(smmu_domain, ssid, false);
+		arm_smmu_sync_cd(master, ssid, false);
 	}
 	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
 	return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
 }
 
-int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
+int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid,
 			    struct arm_smmu_ctx_desc *cd)
 {
 	/*
@@ -1070,11 +1062,12 @@  int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
 	u64 val;
 	bool cd_live;
 	__le64 *cdptr;
+	struct arm_smmu_ctx_desc_cfg *cd_table = &master->domain->cd_table;
 
-	if (WARN_ON(ssid >= (1 << smmu_domain->cd_table.max_cds_bits)))
+	if (WARN_ON(ssid >= (1 << cd_table->max_cds_bits)))
 		return -E2BIG;
 
-	cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
+	cdptr = arm_smmu_get_cd_ptr(master, ssid);
 	if (!cdptr)
 		return -ENOMEM;
 
@@ -1102,7 +1095,7 @@  int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
 		 * order. Ensure that it observes valid values before reading
 		 * V=1.
 		 */
-		arm_smmu_sync_cd(smmu_domain, ssid, true);
+		arm_smmu_sync_cd(master, ssid, true);
 
 		val = cd->tcr |
 #ifdef __BIG_ENDIAN
@@ -1114,7 +1107,7 @@  int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
 			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
 			CTXDESC_CD_0_V;
 
-		if (smmu_domain->cd_table.stall_enabled)
+		if (cd_table->stall_enabled)
 			val |= CTXDESC_CD_0_S;
 	}
 
@@ -1128,7 +1121,7 @@  int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
 	 *   without first making the structure invalid.
 	 */
 	WRITE_ONCE(cdptr[0], cpu_to_le64(val));
-	arm_smmu_sync_cd(smmu_domain, ssid, true);
+	arm_smmu_sync_cd(master, ssid, true);
 	return 0;
 }
 
@@ -1138,7 +1131,7 @@  static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain,
 	int ret;
 	size_t l1size;
 	size_t max_contexts;
-	struct arm_smmu_device *smmu = smmu_domain->smmu;
+	struct arm_smmu_device *smmu = master->smmu;
 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->cd_table;
 
 	cdcfg->stall_enabled = master->stall_enabled;
@@ -2137,12 +2130,7 @@  static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
 			  CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
 	cd->mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
 
-	/*
-	 * Note that this will end up calling arm_smmu_sync_cd() before
-	 * the master has been added to the devices list for this domain.
-	 * This isn't an issue because the STE hasn't been installed yet.
-	 */
-	ret = arm_smmu_write_ctx_desc(smmu_domain, 0, cd);
+	ret = arm_smmu_write_ctx_desc(master, 0, cd);
 	if (ret)
 		goto out_free_cd_tables;
 
@@ -2460,8 +2448,7 @@  static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 		ret = -EINVAL;
 		goto out_unlock;
 	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
-		   smmu_domain->cd_table.stall_enabled !=
-			   master->stall_enabled) {
+		   smmu_domain->cd_table.stall_enabled != master->stall_enabled) {
 		ret = -EINVAL;
 		goto out_unlock;
 	}
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 05b1f0ee60808..6066a09c01996 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -744,7 +744,7 @@  extern struct xarray arm_smmu_asid_xa;
 extern struct mutex arm_smmu_asid_lock;
 extern struct arm_smmu_ctx_desc quiet_cd;
 
-int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
+int arm_smmu_write_ctx_desc(struct arm_smmu_master *smmu_master, int ssid,
 			    struct arm_smmu_ctx_desc *cd);
 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid);
 void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,