diff mbox series

[v2,01/19] iommu/arm-smmu-v3: Add a type for the STE

Message ID 1-v2-de8b10590bf5+400-smmuv3_newapi_p1_jgg@nvidia.com (mailing list archive)
State New, archived
Headers show
Series Update SMMUv3 to the modern iommu API (part 1/3) | expand

Commit Message

Jason Gunthorpe Nov. 13, 2023, 5:53 p.m. UTC
Instead of passing a naked __le16 * around to represent a STE wrap it in a
"struct arm_smmu_ste" with an array of the correct size. This makes it
much clearer which functions will comprise the "STE API".

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 54 ++++++++++-----------
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h |  7 ++-
 2 files changed, 32 insertions(+), 29 deletions(-)

Comments

Moritz Fischer Nov. 14, 2023, 3:06 p.m. UTC | #1
On Mon, Nov 13, 2023 at 01:53:08PM -0400, Jason Gunthorpe wrote:
> Instead of passing a naked __le16 * around to represent a STE wrap it in a
> "struct arm_smmu_ste" with an array of the correct size. This makes it
> much clearer which functions will comprise the "STE API".
> 
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Moritz Fischer <mdf@kernel.org>
> ---
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 54 ++++++++++-----------
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h |  7 ++-
>  2 files changed, 32 insertions(+), 29 deletions(-)
> 
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index 7445454c2af244..519749d15fbda0 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -1249,7 +1249,7 @@ static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
>  }
>  
>  static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
> -				      __le64 *dst)
> +				      struct arm_smmu_ste *dst)
>  {
>  	/*
>  	 * This is hideously complicated, but we only really care about
> @@ -1267,7 +1267,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
>  	 * 2. Write everything apart from dword 0, sync, write dword 0, sync
>  	 * 3. Update Config, sync
>  	 */
> -	u64 val = le64_to_cpu(dst[0]);
> +	u64 val = le64_to_cpu(dst->data[0]);
>  	bool ste_live = false;
>  	struct arm_smmu_device *smmu = NULL;
>  	struct arm_smmu_ctx_desc_cfg *cd_table = NULL;
> @@ -1325,10 +1325,10 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
>  		else
>  			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
>  
> -		dst[0] = cpu_to_le64(val);
> -		dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
> +		dst->data[0] = cpu_to_le64(val);
> +		dst->data[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
>  						STRTAB_STE_1_SHCFG_INCOMING));
> -		dst[2] = 0; /* Nuke the VMID */
> +		dst->data[2] = 0; /* Nuke the VMID */
>  		/*
>  		 * The SMMU can perform negative caching, so we must sync
>  		 * the STE regardless of whether the old value was live.
> @@ -1343,7 +1343,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
>  			STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
>  
>  		BUG_ON(ste_live);
> -		dst[1] = cpu_to_le64(
> +		dst->data[1] = cpu_to_le64(
>  			 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
>  			 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
>  			 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
> @@ -1352,7 +1352,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
>  
>  		if (smmu->features & ARM_SMMU_FEAT_STALLS &&
>  		    !master->stall_enabled)
> -			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
> +			dst->data[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
>  
>  		val |= (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
>  			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
> @@ -1362,7 +1362,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
>  
>  	if (s2_cfg) {
>  		BUG_ON(ste_live);
> -		dst[2] = cpu_to_le64(
> +		dst->data[2] = cpu_to_le64(
>  			 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
>  			 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
>  #ifdef __BIG_ENDIAN
> @@ -1371,18 +1371,18 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
>  			 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
>  			 STRTAB_STE_2_S2R);
>  
> -		dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
> +		dst->data[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
>  
>  		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
>  	}
>  
>  	if (master->ats_enabled)
> -		dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
> +		dst->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
>  						 STRTAB_STE_1_EATS_TRANS));
>  
>  	arm_smmu_sync_ste_for_sid(smmu, sid);
>  	/* See comment in arm_smmu_write_ctx_desc() */
> -	WRITE_ONCE(dst[0], cpu_to_le64(val));
> +	WRITE_ONCE(dst->data[0], cpu_to_le64(val));
>  	arm_smmu_sync_ste_for_sid(smmu, sid);
>  
>  	/* It's likely that we'll want to use the new STE soon */
> @@ -1390,7 +1390,8 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
>  		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
>  }
>  
> -static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent, bool force)
> +static void arm_smmu_init_bypass_stes(struct arm_smmu_ste *strtab,
> +				      unsigned int nent, bool force)
>  {
>  	unsigned int i;
>  	u64 val = STRTAB_STE_0_V;
> @@ -1401,11 +1402,11 @@ static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent, bool fo
>  		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
>  
>  	for (i = 0; i < nent; ++i) {
> -		strtab[0] = cpu_to_le64(val);
> -		strtab[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
> -						   STRTAB_STE_1_SHCFG_INCOMING));
> -		strtab[2] = 0;
> -		strtab += STRTAB_STE_DWORDS;
> +		strtab->data[0] = cpu_to_le64(val);
> +		strtab->data[1] = cpu_to_le64(FIELD_PREP(
> +			STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING));
> +		strtab->data[2] = 0;
> +		strtab++;
>  	}
>  }
>  
> @@ -2209,26 +2210,22 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
>  	return 0;
>  }
>  
> -static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
> +static struct arm_smmu_ste *
> +arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
>  {
> -	__le64 *step;
>  	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
>  
>  	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
> -		struct arm_smmu_strtab_l1_desc *l1_desc;
>  		int idx;
>  
>  		/* Two-level walk */
>  		idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
> -		l1_desc = &cfg->l1_desc[idx];
> -		idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
> -		step = &l1_desc->l2ptr[idx];
> +		return &cfg->l1_desc[idx].l2ptr[sid & ((1 << STRTAB_SPLIT) - 1)];
>  	} else {
>  		/* Simple linear lookup */
> -		step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
> +		return (struct arm_smmu_ste *)&cfg
> +			       ->strtab[sid * STRTAB_STE_DWORDS];
>  	}
> -
> -	return step;
>  }
>  
>  static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
> @@ -2238,7 +2235,8 @@ static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
>  
>  	for (i = 0; i < master->num_streams; ++i) {
>  		u32 sid = master->streams[i].id;
> -		__le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
> +		struct arm_smmu_ste *step =
> +			arm_smmu_get_step_for_sid(smmu, sid);
>  
>  		/* Bridged PCI devices may end up with duplicated IDs */
>  		for (j = 0; j < i; j++)
> @@ -3769,7 +3767,7 @@ static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu)
>  	iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
>  
>  	list_for_each_entry(e, &rmr_list, list) {
> -		__le64 *step;
> +		struct arm_smmu_ste *step;
>  		struct iommu_iort_rmr_data *rmr;
>  		int ret, i;
>  
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> index 961205ba86d25d..03f9e526cbd92f 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> @@ -206,6 +206,11 @@
>  #define STRTAB_L1_DESC_L2PTR_MASK	GENMASK_ULL(51, 6)
>  
>  #define STRTAB_STE_DWORDS		8
> +
> +struct arm_smmu_ste {
> +	__le64 data[STRTAB_STE_DWORDS];
> +};
> +
>  #define STRTAB_STE_0_V			(1UL << 0)
>  #define STRTAB_STE_0_CFG		GENMASK_ULL(3, 1)
>  #define STRTAB_STE_0_CFG_ABORT		0
> @@ -571,7 +576,7 @@ struct arm_smmu_priq {
>  struct arm_smmu_strtab_l1_desc {
>  	u8				span;
>  
> -	__le64				*l2ptr;
> +	struct arm_smmu_ste		*l2ptr;
>  	dma_addr_t			l2ptr_dma;
>  };
>  
> -- 
> 2.42.0
>
Michael Shavit Nov. 15, 2023, 11:52 a.m. UTC | #2
On Tue, Nov 14, 2023 at 11:07 PM Moritz Fischer <mdf@kernel.org> wrote:
>
> On Mon, Nov 13, 2023 at 01:53:08PM -0400, Jason Gunthorpe wrote:
> > Instead of passing a naked __le16 * around to represent a STE wrap it in a
> > "struct arm_smmu_ste" with an array of the correct size. This makes it
> > much clearer which functions will comprise the "STE API".
> >
> > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> Reviewed-by: Moritz Fischer <mdf@kernel.org>

Reviewed-by: Michael Shavit <mshavit@google.com>

> > ---
> >  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 54 ++++++++++-----------
> >  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h |  7 ++-
> >  2 files changed, 32 insertions(+), 29 deletions(-)
> >
> > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > index 7445454c2af244..519749d15fbda0 100644
> > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > @@ -1249,7 +1249,7 @@ static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
> >  }
> >
> >  static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
> > -                                   __le64 *dst)
> > +                                   struct arm_smmu_ste *dst)
> >  {
> >       /*
> >        * This is hideously complicated, but we only really care about
> > @@ -1267,7 +1267,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
> >        * 2. Write everything apart from dword 0, sync, write dword 0, sync
> >        * 3. Update Config, sync
> >        */
> > -     u64 val = le64_to_cpu(dst[0]);
> > +     u64 val = le64_to_cpu(dst->data[0]);
> >       bool ste_live = false;
> >       struct arm_smmu_device *smmu = NULL;
> >       struct arm_smmu_ctx_desc_cfg *cd_table = NULL;
> > @@ -1325,10 +1325,10 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
> >               else
> >                       val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
> >
> > -             dst[0] = cpu_to_le64(val);
> > -             dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
> > +             dst->data[0] = cpu_to_le64(val);
> > +             dst->data[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
> >                                               STRTAB_STE_1_SHCFG_INCOMING));
> > -             dst[2] = 0; /* Nuke the VMID */
> > +             dst->data[2] = 0; /* Nuke the VMID */
> >               /*
> >                * The SMMU can perform negative caching, so we must sync
> >                * the STE regardless of whether the old value was live.
> > @@ -1343,7 +1343,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
> >                       STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
> >
> >               BUG_ON(ste_live);
> > -             dst[1] = cpu_to_le64(
> > +             dst->data[1] = cpu_to_le64(
> >                        FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
> >                        FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
> >                        FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
> > @@ -1352,7 +1352,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
> >
> >               if (smmu->features & ARM_SMMU_FEAT_STALLS &&
> >                   !master->stall_enabled)
> > -                     dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
> > +                     dst->data[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
> >
> >               val |= (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
> >                       FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
> > @@ -1362,7 +1362,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
> >
> >       if (s2_cfg) {
> >               BUG_ON(ste_live);
> > -             dst[2] = cpu_to_le64(
> > +             dst->data[2] = cpu_to_le64(
> >                        FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
> >                        FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
> >  #ifdef __BIG_ENDIAN
> > @@ -1371,18 +1371,18 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
> >                        STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
> >                        STRTAB_STE_2_S2R);
> >
> > -             dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
> > +             dst->data[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
> >
> >               val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
> >       }
> >
> >       if (master->ats_enabled)
> > -             dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
> > +             dst->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
> >                                                STRTAB_STE_1_EATS_TRANS));
> >
> >       arm_smmu_sync_ste_for_sid(smmu, sid);
> >       /* See comment in arm_smmu_write_ctx_desc() */
> > -     WRITE_ONCE(dst[0], cpu_to_le64(val));
> > +     WRITE_ONCE(dst->data[0], cpu_to_le64(val));
> >       arm_smmu_sync_ste_for_sid(smmu, sid);
> >
> >       /* It's likely that we'll want to use the new STE soon */
> > @@ -1390,7 +1390,8 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
> >               arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
> >  }
> >
> > -static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent, bool force)
> > +static void arm_smmu_init_bypass_stes(struct arm_smmu_ste *strtab,
> > +                                   unsigned int nent, bool force)
> >  {
> >       unsigned int i;
> >       u64 val = STRTAB_STE_0_V;
> > @@ -1401,11 +1402,11 @@ static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent, bool fo
> >               val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
> >
> >       for (i = 0; i < nent; ++i) {
> > -             strtab[0] = cpu_to_le64(val);
> > -             strtab[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
> > -                                                STRTAB_STE_1_SHCFG_INCOMING));
> > -             strtab[2] = 0;
> > -             strtab += STRTAB_STE_DWORDS;
> > +             strtab->data[0] = cpu_to_le64(val);
> > +             strtab->data[1] = cpu_to_le64(FIELD_PREP(
> > +                     STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING));
> > +             strtab->data[2] = 0;
> > +             strtab++;
> >       }
> >  }
> >
> > @@ -2209,26 +2210,22 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
> >       return 0;
> >  }
> >
> > -static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
> > +static struct arm_smmu_ste *
> > +arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
> >  {
> > -     __le64 *step;
> >       struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
> >
> >       if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
> > -             struct arm_smmu_strtab_l1_desc *l1_desc;
> >               int idx;
> >
> >               /* Two-level walk */
> >               idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
> > -             l1_desc = &cfg->l1_desc[idx];
> > -             idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
> > -             step = &l1_desc->l2ptr[idx];
> > +             return &cfg->l1_desc[idx].l2ptr[sid & ((1 << STRTAB_SPLIT) - 1)];
> >       } else {
> >               /* Simple linear lookup */
> > -             step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
> > +             return (struct arm_smmu_ste *)&cfg
> > +                            ->strtab[sid * STRTAB_STE_DWORDS];
> >       }
> > -
> > -     return step;
> >  }
> >
> >  static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
> > @@ -2238,7 +2235,8 @@ static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
> >
> >       for (i = 0; i < master->num_streams; ++i) {
> >               u32 sid = master->streams[i].id;
> > -             __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
> > +             struct arm_smmu_ste *step =
> > +                     arm_smmu_get_step_for_sid(smmu, sid);
> >
> >               /* Bridged PCI devices may end up with duplicated IDs */
> >               for (j = 0; j < i; j++)
> > @@ -3769,7 +3767,7 @@ static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu)
> >       iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
> >
> >       list_for_each_entry(e, &rmr_list, list) {
> > -             __le64 *step;
> > +             struct arm_smmu_ste *step;
> >               struct iommu_iort_rmr_data *rmr;
> >               int ret, i;
> >
> > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> > index 961205ba86d25d..03f9e526cbd92f 100644
> > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> > @@ -206,6 +206,11 @@
> >  #define STRTAB_L1_DESC_L2PTR_MASK    GENMASK_ULL(51, 6)
> >
> >  #define STRTAB_STE_DWORDS            8
> > +
> > +struct arm_smmu_ste {
> > +     __le64 data[STRTAB_STE_DWORDS];
> > +};
> > +

nit: This looks out of place at this location compared to the rest of
the file. All of the other structs are defined after this large block
of #defines.

> >  #define STRTAB_STE_0_V                       (1UL << 0)
> >  #define STRTAB_STE_0_CFG             GENMASK_ULL(3, 1)
> >  #define STRTAB_STE_0_CFG_ABORT               0
> > @@ -571,7 +576,7 @@ struct arm_smmu_priq {
> >  struct arm_smmu_strtab_l1_desc {
> >       u8                              span;
> >
> > -     __le64                          *l2ptr;
> > +     struct arm_smmu_ste             *l2ptr;
> >       dma_addr_t                      l2ptr_dma;
> >  };
> >
> > --
> > 2.42.0
> >
Jason Gunthorpe Nov. 15, 2023, 1:35 p.m. UTC | #3
On Wed, Nov 15, 2023 at 07:52:17PM +0800, Michael Shavit wrote:

> > > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> > > index 961205ba86d25d..03f9e526cbd92f 100644
> > > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> > > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> > > @@ -206,6 +206,11 @@
> > >  #define STRTAB_L1_DESC_L2PTR_MASK    GENMASK_ULL(51, 6)
> > >
> > >  #define STRTAB_STE_DWORDS            8
> > > +
> > > +struct arm_smmu_ste {
> > > +     __le64 data[STRTAB_STE_DWORDS];
> > > +};
> > > +
> 
> nit: This looks out of place at this location compared to the rest of
> the file. All of the other structs are defined after this large block
> of #defines.

The struct follows the defines that specify how to parse its content,
which is pretty normal

Thanks,
Jason
Eric Auger Nov. 27, 2023, 4:03 p.m. UTC | #4
Hi Jason,

On 11/13/23 18:53, Jason Gunthorpe wrote:
> Instead of passing a naked __le16 * around to represent a STE wrap it in a
> "struct arm_smmu_ste" with an array of the correct size. This makes it
> much clearer which functions will comprise the "STE API".
> 
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 54 ++++++++++-----------
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h |  7 ++-
>  2 files changed, 32 insertions(+), 29 deletions(-)
> 
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index 7445454c2af244..519749d15fbda0 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -1249,7 +1249,7 @@ static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
>  }
>  
>  static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
> -				      __le64 *dst)
> +				      struct arm_smmu_ste *dst)
>  {
>  	/*
>  	 * This is hideously complicated, but we only really care about
> @@ -1267,7 +1267,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
>  	 * 2. Write everything apart from dword 0, sync, write dword 0, sync
>  	 * 3. Update Config, sync
>  	 */
> -	u64 val = le64_to_cpu(dst[0]);
> +	u64 val = le64_to_cpu(dst->data[0]);
>  	bool ste_live = false;
>  	struct arm_smmu_device *smmu = NULL;
>  	struct arm_smmu_ctx_desc_cfg *cd_table = NULL;
> @@ -1325,10 +1325,10 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
>  		else
>  			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
>  
> -		dst[0] = cpu_to_le64(val);
> -		dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
> +		dst->data[0] = cpu_to_le64(val);
> +		dst->data[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
>  						STRTAB_STE_1_SHCFG_INCOMING));
> -		dst[2] = 0; /* Nuke the VMID */
> +		dst->data[2] = 0; /* Nuke the VMID */
>  		/*
>  		 * The SMMU can perform negative caching, so we must sync
>  		 * the STE regardless of whether the old value was live.
> @@ -1343,7 +1343,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
>  			STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
>  
>  		BUG_ON(ste_live);
> -		dst[1] = cpu_to_le64(
> +		dst->data[1] = cpu_to_le64(
>  			 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
>  			 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
>  			 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
> @@ -1352,7 +1352,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
>  
>  		if (smmu->features & ARM_SMMU_FEAT_STALLS &&
>  		    !master->stall_enabled)
> -			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
> +			dst->data[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
>  
>  		val |= (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
>  			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
> @@ -1362,7 +1362,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
>  
>  	if (s2_cfg) {
>  		BUG_ON(ste_live);
> -		dst[2] = cpu_to_le64(
> +		dst->data[2] = cpu_to_le64(
>  			 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
>  			 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
>  #ifdef __BIG_ENDIAN
> @@ -1371,18 +1371,18 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
>  			 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
>  			 STRTAB_STE_2_S2R);
>  
> -		dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
> +		dst->data[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
>  
>  		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
>  	}
>  
>  	if (master->ats_enabled)
> -		dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
> +		dst->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
>  						 STRTAB_STE_1_EATS_TRANS));
>  
>  	arm_smmu_sync_ste_for_sid(smmu, sid);
>  	/* See comment in arm_smmu_write_ctx_desc() */
> -	WRITE_ONCE(dst[0], cpu_to_le64(val));
> +	WRITE_ONCE(dst->data[0], cpu_to_le64(val));
>  	arm_smmu_sync_ste_for_sid(smmu, sid);
>  
>  	/* It's likely that we'll want to use the new STE soon */
> @@ -1390,7 +1390,8 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
>  		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
>  }
>  
> -static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent, bool force)
> +static void arm_smmu_init_bypass_stes(struct arm_smmu_ste *strtab,
> +				      unsigned int nent, bool force)
>  {
>  	unsigned int i;
>  	u64 val = STRTAB_STE_0_V;
> @@ -1401,11 +1402,11 @@ static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent, bool fo
>  		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
>  
>  	for (i = 0; i < nent; ++i) {
> -		strtab[0] = cpu_to_le64(val);
> -		strtab[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
> -						   STRTAB_STE_1_SHCFG_INCOMING));
> -		strtab[2] = 0;
> -		strtab += STRTAB_STE_DWORDS;
> +		strtab->data[0] = cpu_to_le64(val);
> +		strtab->data[1] = cpu_to_le64(FIELD_PREP(
> +			STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING));
> +		strtab->data[2] = 0;
> +		strtab++;
>  	}
>  }
>  
> @@ -2209,26 +2210,22 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
>  	return 0;
>  }
>  
> -static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
> +static struct arm_smmu_ste *
> +arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
>  {
> -	__le64 *step;
>  	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
>  
>  	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
> -		struct arm_smmu_strtab_l1_desc *l1_desc;
>  		int idx;
>  
>  		/* Two-level walk */
>  		idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
> -		l1_desc = &cfg->l1_desc[idx];
> -		idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
> -		step = &l1_desc->l2ptr[idx];
> +		return &cfg->l1_desc[idx].l2ptr[sid & ((1 << STRTAB_SPLIT) - 1)];
This looks less readable to me than it was before.
>  	} else {
>  		/* Simple linear lookup */
> -		step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
> +		return (struct arm_smmu_ste *)&cfg
> +			       ->strtab[sid * STRTAB_STE_DWORDS];

Besides
Reviewed-by: Eric Auger <eric.auger@redhat.com>

Eric


>  	}
> -
> -	return step;
>  }
>  
>  static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
> @@ -2238,7 +2235,8 @@ static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
>  
>  	for (i = 0; i < master->num_streams; ++i) {
>  		u32 sid = master->streams[i].id;
> -		__le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
> +		struct arm_smmu_ste *step =
> +			arm_smmu_get_step_for_sid(smmu, sid);
>  
>  		/* Bridged PCI devices may end up with duplicated IDs */
>  		for (j = 0; j < i; j++)
> @@ -3769,7 +3767,7 @@ static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu)
>  	iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
>  
>  	list_for_each_entry(e, &rmr_list, list) {
> -		__le64 *step;
> +		struct arm_smmu_ste *step;
>  		struct iommu_iort_rmr_data *rmr;
>  		int ret, i;
>  
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> index 961205ba86d25d..03f9e526cbd92f 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> @@ -206,6 +206,11 @@
>  #define STRTAB_L1_DESC_L2PTR_MASK	GENMASK_ULL(51, 6)
>  
>  #define STRTAB_STE_DWORDS		8
> +
> +struct arm_smmu_ste {
> +	__le64 data[STRTAB_STE_DWORDS];
> +};
> +
>  #define STRTAB_STE_0_V			(1UL << 0)
>  #define STRTAB_STE_0_CFG		GENMASK_ULL(3, 1)
>  #define STRTAB_STE_0_CFG_ABORT		0
> @@ -571,7 +576,7 @@ struct arm_smmu_priq {
>  struct arm_smmu_strtab_l1_desc {
>  	u8				span;
>  
> -	__le64				*l2ptr;
> +	struct arm_smmu_ste		*l2ptr;
>  	dma_addr_t			l2ptr_dma;
>  };
>
Jason Gunthorpe Nov. 27, 2023, 5:42 p.m. UTC | #5
On Mon, Nov 27, 2023 at 05:03:36PM +0100, Eric Auger wrote:

> > -static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
> > +static struct arm_smmu_ste *
> > +arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
> >  {
> > -	__le64 *step;
> >  	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
> >  
> >  	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
> > -		struct arm_smmu_strtab_l1_desc *l1_desc;
> >  		int idx;
> >  
> >  		/* Two-level walk */
> >  		idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
> > -		l1_desc = &cfg->l1_desc[idx];
> > -		idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
> > -		step = &l1_desc->l2ptr[idx];
> > +		return &cfg->l1_desc[idx].l2ptr[sid & ((1 << STRTAB_SPLIT) - 1)];
> This looks less readable to me than it was before.

You would like the idx calculation outside?

  		idx1 = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
		idx2 = sid & ((1 << STRTAB_SPLIT) - 1);
		return &cfg->l1_desc[idx].l2ptr[idx2];

?

Thanks,
Jason
Eric Auger Nov. 27, 2023, 5:51 p.m. UTC | #6
Hi Jason,

On 11/27/23 18:42, Jason Gunthorpe wrote:
> On Mon, Nov 27, 2023 at 05:03:36PM +0100, Eric Auger wrote:
> 
>>> -static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
>>> +static struct arm_smmu_ste *
>>> +arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
>>>  {
>>> -	__le64 *step;
>>>  	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
>>>  
>>>  	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
>>> -		struct arm_smmu_strtab_l1_desc *l1_desc;
>>>  		int idx;
>>>  
>>>  		/* Two-level walk */
>>>  		idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
>>> -		l1_desc = &cfg->l1_desc[idx];
>>> -		idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
>>> -		step = &l1_desc->l2ptr[idx];
>>> +		return &cfg->l1_desc[idx].l2ptr[sid & ((1 << STRTAB_SPLIT) - 1)];
>> This looks less readable to me than it was before.
> 
> You would like the idx calculation outside?
> 
>   		idx1 = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
> 		idx2 = sid & ((1 << STRTAB_SPLIT) - 1);
> 		return &cfg->l1_desc[idx].l2ptr[idx2];
Yes this looks more readable to me

Eric
> 
> ?
> 
> Thanks,
> Jason
>
Jason Gunthorpe Nov. 27, 2023, 6:21 p.m. UTC | #7
On Mon, Nov 27, 2023 at 06:51:21PM +0100, Eric Auger wrote:
> Hi Jason,
> 
> On 11/27/23 18:42, Jason Gunthorpe wrote:
> > On Mon, Nov 27, 2023 at 05:03:36PM +0100, Eric Auger wrote:
> > 
> >>> -static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
> >>> +static struct arm_smmu_ste *
> >>> +arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
> >>>  {
> >>> -	__le64 *step;
> >>>  	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
> >>>  
> >>>  	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
> >>> -		struct arm_smmu_strtab_l1_desc *l1_desc;
> >>>  		int idx;
> >>>  
> >>>  		/* Two-level walk */
> >>>  		idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
> >>> -		l1_desc = &cfg->l1_desc[idx];
> >>> -		idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
> >>> -		step = &l1_desc->l2ptr[idx];
> >>> +		return &cfg->l1_desc[idx].l2ptr[sid & ((1 << STRTAB_SPLIT) - 1)];
> >> This looks less readable to me than it was before.
> > 
> > You would like the idx calculation outside?
> > 
> >   		idx1 = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
> > 		idx2 = sid & ((1 << STRTAB_SPLIT) - 1);
> > 		return &cfg->l1_desc[idx].l2ptr[idx2];
> Yes this looks more readable to me

done

Jason
Nicolin Chen Dec. 5, 2023, 12:44 a.m. UTC | #8
On Mon, Nov 13, 2023 at 01:53:08PM -0400, Jason Gunthorpe wrote:
> Instead of passing a naked __le16 * around to represent a STE wrap it in a
> "struct arm_smmu_ste" with an array of the correct size. This makes it
> much clearer which functions will comprise the "STE API".
> 
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>

Reviewed-by: Nicolin Chen <nicolinc@nvidia.com>
diff mbox series

Patch

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 7445454c2af244..519749d15fbda0 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -1249,7 +1249,7 @@  static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
 }
 
 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
-				      __le64 *dst)
+				      struct arm_smmu_ste *dst)
 {
 	/*
 	 * This is hideously complicated, but we only really care about
@@ -1267,7 +1267,7 @@  static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
 	 * 2. Write everything apart from dword 0, sync, write dword 0, sync
 	 * 3. Update Config, sync
 	 */
-	u64 val = le64_to_cpu(dst[0]);
+	u64 val = le64_to_cpu(dst->data[0]);
 	bool ste_live = false;
 	struct arm_smmu_device *smmu = NULL;
 	struct arm_smmu_ctx_desc_cfg *cd_table = NULL;
@@ -1325,10 +1325,10 @@  static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
 		else
 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
 
-		dst[0] = cpu_to_le64(val);
-		dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
+		dst->data[0] = cpu_to_le64(val);
+		dst->data[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
 						STRTAB_STE_1_SHCFG_INCOMING));
-		dst[2] = 0; /* Nuke the VMID */
+		dst->data[2] = 0; /* Nuke the VMID */
 		/*
 		 * The SMMU can perform negative caching, so we must sync
 		 * the STE regardless of whether the old value was live.
@@ -1343,7 +1343,7 @@  static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
 			STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
 
 		BUG_ON(ste_live);
-		dst[1] = cpu_to_le64(
+		dst->data[1] = cpu_to_le64(
 			 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
 			 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
 			 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
@@ -1352,7 +1352,7 @@  static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
 
 		if (smmu->features & ARM_SMMU_FEAT_STALLS &&
 		    !master->stall_enabled)
-			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
+			dst->data[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
 
 		val |= (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
 			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
@@ -1362,7 +1362,7 @@  static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
 
 	if (s2_cfg) {
 		BUG_ON(ste_live);
-		dst[2] = cpu_to_le64(
+		dst->data[2] = cpu_to_le64(
 			 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
 			 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
 #ifdef __BIG_ENDIAN
@@ -1371,18 +1371,18 @@  static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
 			 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
 			 STRTAB_STE_2_S2R);
 
-		dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
+		dst->data[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
 
 		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
 	}
 
 	if (master->ats_enabled)
-		dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
+		dst->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
 						 STRTAB_STE_1_EATS_TRANS));
 
 	arm_smmu_sync_ste_for_sid(smmu, sid);
 	/* See comment in arm_smmu_write_ctx_desc() */
-	WRITE_ONCE(dst[0], cpu_to_le64(val));
+	WRITE_ONCE(dst->data[0], cpu_to_le64(val));
 	arm_smmu_sync_ste_for_sid(smmu, sid);
 
 	/* It's likely that we'll want to use the new STE soon */
@@ -1390,7 +1390,8 @@  static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
 		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
 }
 
-static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent, bool force)
+static void arm_smmu_init_bypass_stes(struct arm_smmu_ste *strtab,
+				      unsigned int nent, bool force)
 {
 	unsigned int i;
 	u64 val = STRTAB_STE_0_V;
@@ -1401,11 +1402,11 @@  static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent, bool fo
 		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
 
 	for (i = 0; i < nent; ++i) {
-		strtab[0] = cpu_to_le64(val);
-		strtab[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
-						   STRTAB_STE_1_SHCFG_INCOMING));
-		strtab[2] = 0;
-		strtab += STRTAB_STE_DWORDS;
+		strtab->data[0] = cpu_to_le64(val);
+		strtab->data[1] = cpu_to_le64(FIELD_PREP(
+			STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING));
+		strtab->data[2] = 0;
+		strtab++;
 	}
 }
 
@@ -2209,26 +2210,22 @@  static int arm_smmu_domain_finalise(struct iommu_domain *domain)
 	return 0;
 }
 
-static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
+static struct arm_smmu_ste *
+arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
 {
-	__le64 *step;
 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
 
 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
-		struct arm_smmu_strtab_l1_desc *l1_desc;
 		int idx;
 
 		/* Two-level walk */
 		idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
-		l1_desc = &cfg->l1_desc[idx];
-		idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
-		step = &l1_desc->l2ptr[idx];
+		return &cfg->l1_desc[idx].l2ptr[sid & ((1 << STRTAB_SPLIT) - 1)];
 	} else {
 		/* Simple linear lookup */
-		step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
+		return (struct arm_smmu_ste *)&cfg
+			       ->strtab[sid * STRTAB_STE_DWORDS];
 	}
-
-	return step;
 }
 
 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
@@ -2238,7 +2235,8 @@  static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
 
 	for (i = 0; i < master->num_streams; ++i) {
 		u32 sid = master->streams[i].id;
-		__le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
+		struct arm_smmu_ste *step =
+			arm_smmu_get_step_for_sid(smmu, sid);
 
 		/* Bridged PCI devices may end up with duplicated IDs */
 		for (j = 0; j < i; j++)
@@ -3769,7 +3767,7 @@  static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu)
 	iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
 
 	list_for_each_entry(e, &rmr_list, list) {
-		__le64 *step;
+		struct arm_smmu_ste *step;
 		struct iommu_iort_rmr_data *rmr;
 		int ret, i;
 
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 961205ba86d25d..03f9e526cbd92f 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -206,6 +206,11 @@ 
 #define STRTAB_L1_DESC_L2PTR_MASK	GENMASK_ULL(51, 6)
 
 #define STRTAB_STE_DWORDS		8
+
+struct arm_smmu_ste {
+	__le64 data[STRTAB_STE_DWORDS];
+};
+
 #define STRTAB_STE_0_V			(1UL << 0)
 #define STRTAB_STE_0_CFG		GENMASK_ULL(3, 1)
 #define STRTAB_STE_0_CFG_ABORT		0
@@ -571,7 +576,7 @@  struct arm_smmu_priq {
 struct arm_smmu_strtab_l1_desc {
 	u8				span;
 
-	__le64				*l2ptr;
+	struct arm_smmu_ste		*l2ptr;
 	dma_addr_t			l2ptr_dma;
 };