diff mbox series

[v7,3/9] iommu/arm-smmu-v3: Move the CD generation for S1 domains into a function

Message ID 3-v7-cb149db3a320+3b5-smmuv3_newapi_p2_jgg@nvidia.com (mailing list archive)
State New
Headers show
Series Make the SMMUv3 CD logic match the new STE design (part 2a/3) | expand

Commit Message

Jason Gunthorpe April 16, 2024, 7:28 p.m. UTC
Introduce arm_smmu_make_s1_cd() to build the CD from the paging S1 domain,
and reorganize all the places programming S1 domain CD table entries to
call it.

Split arm_smmu_update_s1_domain_cd_entry() from
arm_smmu_update_ctx_desc_devices() so that the S1 path has its own call
chain separate from the unrelated SVA path.

arm_smmu_update_s1_domain_cd_entry() only works on S1 domains
attached to RIDs and refreshes all their CDs.

Remove the forced clear of the CD during S1 domain attach,
arm_smmu_write_cd_entry() will do this automatically if necessary.

Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Tested-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Reviewed-by: Michael Shavit <mshavit@google.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c   | 25 +++++++-
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c   | 60 +++++++++++++------
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h   |  9 +++
 3 files changed, 76 insertions(+), 18 deletions(-)

Comments

Nicolin Chen April 16, 2024, 9:22 p.m. UTC | #1
On Tue, Apr 16, 2024 at 04:28:14PM -0300, Jason Gunthorpe wrote:
> Introduce arm_smmu_make_s1_cd() to build the CD from the paging S1 domain,
> and reorganize all the places programming S1 domain CD table entries to
> call it.
> 
> Split arm_smmu_update_s1_domain_cd_entry() from
> arm_smmu_update_ctx_desc_devices() so that the S1 path has its own call
> chain separate from the unrelated SVA path.
> 
> arm_smmu_update_s1_domain_cd_entry() only works on S1 domains
> attached to RIDs and refreshes all their CDs.
> 
> Remove the forced clear of the CD during S1 domain attach,
> arm_smmu_write_cd_entry() will do this automatically if necessary.
> 
> Tested-by: Nicolin Chen <nicolinc@nvidia.com>
> Tested-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
> Reviewed-by: Michael Shavit <mshavit@google.com>
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>

Reviewed-by: Nicolin Chen <nicolinc@nvidia.com>
Mostafa Saleh April 19, 2024, 9:10 p.m. UTC | #2
Hi Jason,

On Tue, Apr 16, 2024 at 04:28:14PM -0300, Jason Gunthorpe wrote:
> Introduce arm_smmu_make_s1_cd() to build the CD from the paging S1 domain,
> and reorganize all the places programming S1 domain CD table entries to
> call it.
> 
> Split arm_smmu_update_s1_domain_cd_entry() from
> arm_smmu_update_ctx_desc_devices() so that the S1 path has its own call
> chain separate from the unrelated SVA path.
> 
> arm_smmu_update_s1_domain_cd_entry() only works on S1 domains
> attached to RIDs and refreshes all their CDs.
> 
> Remove the forced clear of the CD during S1 domain attach,
> arm_smmu_write_cd_entry() will do this automatically if necessary.
> 
> Tested-by: Nicolin Chen <nicolinc@nvidia.com>
> Tested-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
> Reviewed-by: Michael Shavit <mshavit@google.com>
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
>  .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c   | 25 +++++++-
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c   | 60 +++++++++++++------
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h   |  9 +++
>  3 files changed, 76 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
> index 41b44baef15e80..d159f60480935e 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
> @@ -53,6 +53,29 @@ static void arm_smmu_update_ctx_desc_devices(struct arm_smmu_domain *smmu_domain
>  	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
>  }
>  
> +static void
> +arm_smmu_update_s1_domain_cd_entry(struct arm_smmu_domain *smmu_domain)

nit: shouldn’t that be arm_smmu_update_sva_domain_cd_entry?
> +{
> +	struct arm_smmu_master *master;
> +	struct arm_smmu_cd target_cd;
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
> +	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
> +		struct arm_smmu_cd *cdptr;
> +
> +		/* S1 domains only support RID attachment right now */
> +		cdptr = arm_smmu_get_cd_ptr(master, IOMMU_NO_PASID);
> +		if (WARN_ON(!cdptr))
> +			continue;
> +
> +		arm_smmu_make_s1_cd(&target_cd, master, smmu_domain);
> +		arm_smmu_write_cd_entry(master, IOMMU_NO_PASID, cdptr,
> +					&target_cd);

Case ARM_SMMU_DOMAIN_S1 has the some code:
  arm_smmu_get_cd_pter => arm_smmu_make_s1_cd => arm_smmu_write_cd_entry
I’d prefer if that was abstracted with the SMMUv3 driver and it provides a higher
level API rather than exposing these low-level functions in the header file.
But no strong opinion.

> +	}
> +	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
> +}
> +
>  /*
>   * Check if the CPU ASID is available on the SMMU side. If a private context
>   * descriptor is using it, try to replace it.
> @@ -96,7 +119,7 @@ arm_smmu_share_asid(struct mm_struct *mm, u16 asid)
>  	 * be some overlap between use of both ASIDs, until we invalidate the
>  	 * TLB.
>  	 */
> -	arm_smmu_update_ctx_desc_devices(smmu_domain, IOMMU_NO_PASID, cd);
> +	arm_smmu_update_s1_domain_cd_entry(smmu_domain);
>  
>  	/* Invalidate TLB entries previously associated with that context */
>  	arm_smmu_tlb_inv_asid(smmu, asid);
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index 3983de90c2fa01..d24fa13a52b4e0 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -1204,8 +1204,8 @@ static void arm_smmu_write_cd_l1_desc(__le64 *dst,
>  	WRITE_ONCE(*dst, cpu_to_le64(val));
>  }
>  
> -static struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master,
> -					       u32 ssid)
> +struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master,
> +					u32 ssid)
>  {
>  	__le64 *l1ptr;
>  	unsigned int idx;
> @@ -1268,9 +1268,9 @@ static const struct arm_smmu_entry_writer_ops arm_smmu_cd_writer_ops = {
>  	.v_bit = cpu_to_le64(CTXDESC_CD_0_V),
>  };
>  
> -static void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid,
> -				    struct arm_smmu_cd *cdptr,
> -				    const struct arm_smmu_cd *target)
> +void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid,
> +			     struct arm_smmu_cd *cdptr,
> +			     const struct arm_smmu_cd *target)
>  {
>  	struct arm_smmu_cd_writer cd_writer = {
>  		.writer = {
> @@ -1283,6 +1283,32 @@ static void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid,
>  	arm_smmu_write_entry(&cd_writer.writer, cdptr->data, target->data);
>  }
>  
> +void arm_smmu_make_s1_cd(struct arm_smmu_cd *target,
> +			 struct arm_smmu_master *master,
> +			 struct arm_smmu_domain *smmu_domain)
> +{
> +	struct arm_smmu_ctx_desc *cd = &smmu_domain->cd;
> +
> +	memset(target, 0, sizeof(*target));
> +
> +	target->data[0] = cpu_to_le64(
> +		cd->tcr |
> +#ifdef __BIG_ENDIAN
> +		CTXDESC_CD_0_ENDI |
> +#endif
> +		CTXDESC_CD_0_V |
> +		CTXDESC_CD_0_AA64 |
> +		(master->stall_enabled ? CTXDESC_CD_0_S : 0) |
> +		CTXDESC_CD_0_R |
> +		CTXDESC_CD_0_A |
> +		CTXDESC_CD_0_ASET |
> +		FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid)
> +		);
> +
> +	target->data[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
> +	target->data[3] = cpu_to_le64(cd->mair);
> +}
> +

IMO, patches to handle CD = NULL and quiet CD should be introduced first so it is
easier to follow as now there is duplicate code in arm_smmu_write_ctx_desc() which
is dead and makes it a little harder to review, but if reordered,
arm_smmu_write_ctx_desc() can be removed in this patch so we can see how code moved.

Otherwise:
Reviewed-by: Mostafa Saleh <smostafa@google.com>

Thanks,
Mostafa
>  static void arm_smmu_clean_cd_entry(struct arm_smmu_cd *target)
>  {
>  	struct arm_smmu_cd used = {};
> @@ -2644,29 +2670,29 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
>  	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
>  
>  	switch (smmu_domain->stage) {
> -	case ARM_SMMU_DOMAIN_S1:
> +	case ARM_SMMU_DOMAIN_S1: {
> +		struct arm_smmu_cd target_cd;
> +		struct arm_smmu_cd *cdptr;
> +
>  		if (!master->cd_table.cdtab) {
>  			ret = arm_smmu_alloc_cd_tables(master);
>  			if (ret)
>  				goto out_list_del;
> -		} else {
> -			/*
> -			 * arm_smmu_write_ctx_desc() relies on the entry being
> -			 * invalid to work, clear any existing entry.
> -			 */
> -			ret = arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID,
> -						      NULL);
> -			if (ret)
> -				goto out_list_del;
>  		}
>  
> -		ret = arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, &smmu_domain->cd);
> -		if (ret)
> +		cdptr = arm_smmu_get_cd_ptr(master, IOMMU_NO_PASID);
> +		if (!cdptr) {
> +			ret = -ENOMEM;
>  			goto out_list_del;
> +		}
>  
> +		arm_smmu_make_s1_cd(&target_cd, master, smmu_domain);
> +		arm_smmu_write_cd_entry(master, IOMMU_NO_PASID, cdptr,
> +					&target_cd);
>  		arm_smmu_make_cdtable_ste(&target, master);
>  		arm_smmu_install_ste_for_dev(master, &target);
>  		break;
> +	}
>  	case ARM_SMMU_DOMAIN_S2:
>  		arm_smmu_make_s2_domain_ste(&target, master, smmu_domain);
>  		arm_smmu_install_ste_for_dev(master, &target);
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> index 4b767e0eeeb682..bb08f087ba39e4 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> @@ -751,6 +751,15 @@ extern struct xarray arm_smmu_asid_xa;
>  extern struct mutex arm_smmu_asid_lock;
>  extern struct arm_smmu_ctx_desc quiet_cd;
>  
> +struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master,
> +					u32 ssid);
> +void arm_smmu_make_s1_cd(struct arm_smmu_cd *target,
> +			 struct arm_smmu_master *master,
> +			 struct arm_smmu_domain *smmu_domain);
> +void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid,
> +			     struct arm_smmu_cd *cdptr,
> +			     const struct arm_smmu_cd *target);
> +
>  int arm_smmu_write_ctx_desc(struct arm_smmu_master *smmu_master, int ssid,
>  			    struct arm_smmu_ctx_desc *cd);
>  void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid);
> -- 
> 2.43.2
>
Jason Gunthorpe April 22, 2024, 1:52 p.m. UTC | #3
On Fri, Apr 19, 2024 at 09:10:59PM +0000, Mostafa Saleh wrote:
> Hi Jason,
> 
> On Tue, Apr 16, 2024 at 04:28:14PM -0300, Jason Gunthorpe wrote:
> > Introduce arm_smmu_make_s1_cd() to build the CD from the paging S1 domain,
> > and reorganize all the places programming S1 domain CD table entries to
> > call it.
> > 
> > Split arm_smmu_update_s1_domain_cd_entry() from
> > arm_smmu_update_ctx_desc_devices() so that the S1 path has its own call
> > chain separate from the unrelated SVA path.
> > 
> > arm_smmu_update_s1_domain_cd_entry() only works on S1 domains
> > attached to RIDs and refreshes all their CDs.
> > 
> > Remove the forced clear of the CD during S1 domain attach,
> > arm_smmu_write_cd_entry() will do this automatically if necessary.
> > 
> > Tested-by: Nicolin Chen <nicolinc@nvidia.com>
> > Tested-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
> > Reviewed-by: Michael Shavit <mshavit@google.com>
> > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> > ---
> >  .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c   | 25 +++++++-
> >  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c   | 60 +++++++++++++------
> >  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h   |  9 +++
> >  3 files changed, 76 insertions(+), 18 deletions(-)
> > 
> > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
> > index 41b44baef15e80..d159f60480935e 100644
> > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
> > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
> > @@ -53,6 +53,29 @@ static void arm_smmu_update_ctx_desc_devices(struct arm_smmu_domain *smmu_domain
> >  	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
> >  }
> >  
> > +static void
> > +arm_smmu_update_s1_domain_cd_entry(struct arm_smmu_domain *smmu_domain)
> 
> nit: shouldn’t that be arm_smmu_update_sva_domain_cd_entry?

No, that actually was my same confusion too when I was first looking
at this. The logic updates a *S1* domain's CD, it doesn't touch a SVA
CD or a SVA domain.

It actually has nothing to do with SVA, this is part of BTM support to
change the ASID in already programmed S1 domains.

> > +{
> > +	struct arm_smmu_master *master;
> > +	struct arm_smmu_cd target_cd;
> > +	unsigned long flags;
> > +
> > +	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
> > +	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
> > +		struct arm_smmu_cd *cdptr;
> > +
> > +		/* S1 domains only support RID attachment right now */
> > +		cdptr = arm_smmu_get_cd_ptr(master, IOMMU_NO_PASID);
> > +		if (WARN_ON(!cdptr))
> > +			continue;
> > +
> > +		arm_smmu_make_s1_cd(&target_cd, master, smmu_domain);
> > +		arm_smmu_write_cd_entry(master, IOMMU_NO_PASID, cdptr,
> > +					&target_cd);
> 
> Case ARM_SMMU_DOMAIN_S1 has the some code:
>   arm_smmu_get_cd_pter => arm_smmu_make_s1_cd => arm_smmu_write_cd_entry
> I’d prefer if that was abstracted with the SMMUv3 driver and it provides a higher
> level API rather than exposing these low-level functions in the header file.
> But no strong opinion.

It is only slightly the same now, and it will keep getting more
different as the patches progress. For instance "Make
arm_smmu_alloc_cd_ptr()" makes them call different alloc functions.

Later on this code will handle a SSID too.

I don't think of those functions as a lower level API, ptr/make/write
is the API design. We have different versions of each of those
functions. The call site needs to string together the right sequence
of three operations for its specific context.

At the end this is an atomic context working on S1 domains with SSID -
there isn't another case exactly like this.

> > +void arm_smmu_make_s1_cd(struct arm_smmu_cd *target,
> > +			 struct arm_smmu_master *master,
> > +			 struct arm_smmu_domain *smmu_domain)
> > +{
> > +	struct arm_smmu_ctx_desc *cd = &smmu_domain->cd;
> > +
> > +	memset(target, 0, sizeof(*target));
> > +
> > +	target->data[0] = cpu_to_le64(
> > +		cd->tcr |
> > +#ifdef __BIG_ENDIAN
> > +		CTXDESC_CD_0_ENDI |
> > +#endif
> > +		CTXDESC_CD_0_V |
> > +		CTXDESC_CD_0_AA64 |
> > +		(master->stall_enabled ? CTXDESC_CD_0_S : 0) |
> > +		CTXDESC_CD_0_R |
> > +		CTXDESC_CD_0_A |
> > +		CTXDESC_CD_0_ASET |
> > +		FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid)
> > +		);
> > +
> > +	target->data[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
> > +	target->data[3] = cpu_to_le64(cd->mair);
> > +}
> > +
> 
> IMO, patches to handle CD = NULL and quiet CD should be introduced first so it is
> easier to follow as now there is duplicate code in arm_smmu_write_ctx_desc() which
> is dead and makes it a little harder to review, but if reordered,
> arm_smmu_write_ctx_desc() can be removed in this patch so we can see how code moved.

arm_smmu_write_ctx_desc() can't be removed until all of S1, clear, SVA
and quiet_cd are converted. No matter what order you pick there will
be some weirdness.

The duplicate code "(1) and (2)" is also still being used for the SVA
domains, it is not unused until patch "Move the CD generation for SVA
into a function".

The only dead code here is the ASID change. So I'll brung this hunk forward:

--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -1328,14 +1328,11 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid,
         *
         * (1) Install primary CD, for normal DMA traffic (SSID = IOMMU_NO_PASID = 0).
         * (2) Install a secondary CD, for SID+SSID traffic.
-        * (3) Update ASID of a CD. Atomically write the first 64 bits of the
-        *     CD, then invalidate the old entry and mappings.
         * (4) Quiesce the context without clearing the valid bit. Disable
         *     translation, and ignore any translation fault.
         * (5) Remove a secondary CD.
         */
        u64 val;
-       bool cd_live;
        struct arm_smmu_cd target;
        struct arm_smmu_cd *cdptr = &target;
        struct arm_smmu_cd *cd_table_entry;
@@ -1351,7 +1348,6 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid,
 
        target = *cd_table_entry;
        val = le64_to_cpu(cdptr->data[0]);
-       cd_live = !!(val & CTXDESC_CD_0_V);
 
        if (!cd) { /* (5) */
                val = 0;
@@ -1359,13 +1355,6 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid,
                if (!(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
                        val &= ~(CTXDESC_CD_0_S | CTXDESC_CD_0_R);
                val |= CTXDESC_CD_0_TCR_EPD0;
-       } else if (cd_live) { /* (3) */
-               val &= ~CTXDESC_CD_0_ASID;
-               val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
-               /*
-                * Until CD+TLB invalidation, both ASIDs may be used for tagging
-                * this substream's traffic
-                */
        } else { /* (1) and (2) */
                cdptr->data[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
                cdptr->data[2] = 0;

> Otherwise:
> Reviewed-by: Mostafa Saleh <smostafa@google.com>

Thanks,
Jason
diff mbox series

Patch

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index 41b44baef15e80..d159f60480935e 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -53,6 +53,29 @@  static void arm_smmu_update_ctx_desc_devices(struct arm_smmu_domain *smmu_domain
 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
 }
 
+static void
+arm_smmu_update_s1_domain_cd_entry(struct arm_smmu_domain *smmu_domain)
+{
+	struct arm_smmu_master *master;
+	struct arm_smmu_cd target_cd;
+	unsigned long flags;
+
+	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
+	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
+		struct arm_smmu_cd *cdptr;
+
+		/* S1 domains only support RID attachment right now */
+		cdptr = arm_smmu_get_cd_ptr(master, IOMMU_NO_PASID);
+		if (WARN_ON(!cdptr))
+			continue;
+
+		arm_smmu_make_s1_cd(&target_cd, master, smmu_domain);
+		arm_smmu_write_cd_entry(master, IOMMU_NO_PASID, cdptr,
+					&target_cd);
+	}
+	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
+}
+
 /*
  * Check if the CPU ASID is available on the SMMU side. If a private context
  * descriptor is using it, try to replace it.
@@ -96,7 +119,7 @@  arm_smmu_share_asid(struct mm_struct *mm, u16 asid)
 	 * be some overlap between use of both ASIDs, until we invalidate the
 	 * TLB.
 	 */
-	arm_smmu_update_ctx_desc_devices(smmu_domain, IOMMU_NO_PASID, cd);
+	arm_smmu_update_s1_domain_cd_entry(smmu_domain);
 
 	/* Invalidate TLB entries previously associated with that context */
 	arm_smmu_tlb_inv_asid(smmu, asid);
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 3983de90c2fa01..d24fa13a52b4e0 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -1204,8 +1204,8 @@  static void arm_smmu_write_cd_l1_desc(__le64 *dst,
 	WRITE_ONCE(*dst, cpu_to_le64(val));
 }
 
-static struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master,
-					       u32 ssid)
+struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master,
+					u32 ssid)
 {
 	__le64 *l1ptr;
 	unsigned int idx;
@@ -1268,9 +1268,9 @@  static const struct arm_smmu_entry_writer_ops arm_smmu_cd_writer_ops = {
 	.v_bit = cpu_to_le64(CTXDESC_CD_0_V),
 };
 
-static void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid,
-				    struct arm_smmu_cd *cdptr,
-				    const struct arm_smmu_cd *target)
+void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid,
+			     struct arm_smmu_cd *cdptr,
+			     const struct arm_smmu_cd *target)
 {
 	struct arm_smmu_cd_writer cd_writer = {
 		.writer = {
@@ -1283,6 +1283,32 @@  static void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid,
 	arm_smmu_write_entry(&cd_writer.writer, cdptr->data, target->data);
 }
 
+void arm_smmu_make_s1_cd(struct arm_smmu_cd *target,
+			 struct arm_smmu_master *master,
+			 struct arm_smmu_domain *smmu_domain)
+{
+	struct arm_smmu_ctx_desc *cd = &smmu_domain->cd;
+
+	memset(target, 0, sizeof(*target));
+
+	target->data[0] = cpu_to_le64(
+		cd->tcr |
+#ifdef __BIG_ENDIAN
+		CTXDESC_CD_0_ENDI |
+#endif
+		CTXDESC_CD_0_V |
+		CTXDESC_CD_0_AA64 |
+		(master->stall_enabled ? CTXDESC_CD_0_S : 0) |
+		CTXDESC_CD_0_R |
+		CTXDESC_CD_0_A |
+		CTXDESC_CD_0_ASET |
+		FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid)
+		);
+
+	target->data[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
+	target->data[3] = cpu_to_le64(cd->mair);
+}
+
 static void arm_smmu_clean_cd_entry(struct arm_smmu_cd *target)
 {
 	struct arm_smmu_cd used = {};
@@ -2644,29 +2670,29 @@  static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
 
 	switch (smmu_domain->stage) {
-	case ARM_SMMU_DOMAIN_S1:
+	case ARM_SMMU_DOMAIN_S1: {
+		struct arm_smmu_cd target_cd;
+		struct arm_smmu_cd *cdptr;
+
 		if (!master->cd_table.cdtab) {
 			ret = arm_smmu_alloc_cd_tables(master);
 			if (ret)
 				goto out_list_del;
-		} else {
-			/*
-			 * arm_smmu_write_ctx_desc() relies on the entry being
-			 * invalid to work, clear any existing entry.
-			 */
-			ret = arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID,
-						      NULL);
-			if (ret)
-				goto out_list_del;
 		}
 
-		ret = arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, &smmu_domain->cd);
-		if (ret)
+		cdptr = arm_smmu_get_cd_ptr(master, IOMMU_NO_PASID);
+		if (!cdptr) {
+			ret = -ENOMEM;
 			goto out_list_del;
+		}
 
+		arm_smmu_make_s1_cd(&target_cd, master, smmu_domain);
+		arm_smmu_write_cd_entry(master, IOMMU_NO_PASID, cdptr,
+					&target_cd);
 		arm_smmu_make_cdtable_ste(&target, master);
 		arm_smmu_install_ste_for_dev(master, &target);
 		break;
+	}
 	case ARM_SMMU_DOMAIN_S2:
 		arm_smmu_make_s2_domain_ste(&target, master, smmu_domain);
 		arm_smmu_install_ste_for_dev(master, &target);
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 4b767e0eeeb682..bb08f087ba39e4 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -751,6 +751,15 @@  extern struct xarray arm_smmu_asid_xa;
 extern struct mutex arm_smmu_asid_lock;
 extern struct arm_smmu_ctx_desc quiet_cd;
 
+struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master,
+					u32 ssid);
+void arm_smmu_make_s1_cd(struct arm_smmu_cd *target,
+			 struct arm_smmu_master *master,
+			 struct arm_smmu_domain *smmu_domain);
+void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid,
+			     struct arm_smmu_cd *cdptr,
+			     const struct arm_smmu_cd *target);
+
 int arm_smmu_write_ctx_desc(struct arm_smmu_master *smmu_master, int ssid,
 			    struct arm_smmu_ctx_desc *cd);
 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid);