diff mbox series

[v7,5/9] iommu/arm-smmu-v3: Make arm_smmu_alloc_cd_ptr()

Message ID 5-v7-cb149db3a320+3b5-smmuv3_newapi_p2_jgg@nvidia.com (mailing list archive)
State New
Headers show
Series Make the SMMUv3 CD logic match the new STE design (part 2a/3) | expand

Commit Message

Jason Gunthorpe April 16, 2024, 7:28 p.m. UTC
Only the attach callers can perform an allocation for the CD table entry,
the other callers must not do so, they do not have the correct locking and
they cannot sleep. Split up the functions so this is clear.

arm_smmu_get_cd_ptr() will return pointer to a CD table entry without
doing any kind of allocation.

arm_smmu_alloc_cd_ptr() will allocate the table and any required
leaf.

A following patch will add lockdep assertions to arm_smmu_alloc_cd_ptr()
once the restructuring is completed and arm_smmu_alloc_cd_ptr() is never
called in the wrong context.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 61 +++++++++++++--------
 1 file changed, 39 insertions(+), 22 deletions(-)

Comments

Nicolin Chen April 16, 2024, 10:19 p.m. UTC | #1
On Tue, Apr 16, 2024 at 04:28:16PM -0300, Jason Gunthorpe wrote:
> Only the attach callers can perform an allocation for the CD table entry,
> the other callers must not do so, they do not have the correct locking and
> they cannot sleep. Split up the functions so this is clear.
> 
> arm_smmu_get_cd_ptr() will return pointer to a CD table entry without
> doing any kind of allocation.
> 
> arm_smmu_alloc_cd_ptr() will allocate the table and any required
> leaf.
> 
> A following patch will add lockdep assertions to arm_smmu_alloc_cd_ptr()
> once the restructuring is completed and arm_smmu_alloc_cd_ptr() is never
> called in the wrong context.
> 
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>

Reviewed-by: Nicolin Chen <nicolinc@nvidia.com>
Mostafa Saleh April 19, 2024, 9:14 p.m. UTC | #2
Hi Jason,

On Tue, Apr 16, 2024 at 04:28:16PM -0300, Jason Gunthorpe wrote:
> Only the attach callers can perform an allocation for the CD table entry,
> the other callers must not do so, they do not have the correct locking and
> they cannot sleep. Split up the functions so this is clear.
> 
> arm_smmu_get_cd_ptr() will return pointer to a CD table entry without
> doing any kind of allocation.
> 
> arm_smmu_alloc_cd_ptr() will allocate the table and any required
> leaf.
> 
> A following patch will add lockdep assertions to arm_smmu_alloc_cd_ptr()
> once the restructuring is completed and arm_smmu_alloc_cd_ptr() is never
> called in the wrong context.
> 
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 61 +++++++++++++--------
>  1 file changed, 39 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index f3df1ec8d258dc..a0d1237272936f 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -98,6 +98,7 @@ static struct arm_smmu_option_prop arm_smmu_options[] = {
>  
>  static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
>  				    struct arm_smmu_device *smmu);
> +static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master);
>  
>  static void parse_driver_options(struct arm_smmu_device *smmu)
>  {
> @@ -1207,29 +1208,51 @@ static void arm_smmu_write_cd_l1_desc(__le64 *dst,
>  struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master,
>  					u32 ssid)
>  {
> -	__le64 *l1ptr;
> -	unsigned int idx;
>  	struct arm_smmu_l1_ctx_desc *l1_desc;
> -	struct arm_smmu_device *smmu = master->smmu;
>  	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
>  
> +	if (!cd_table->cdtab)
> +		return NULL;
> +
>  	if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
>  		return (struct arm_smmu_cd *)(cd_table->cdtab +
>  					      ssid * CTXDESC_CD_DWORDS);
>  
> -	idx = ssid >> CTXDESC_SPLIT;
> -	l1_desc = &cd_table->l1_desc[idx];
> -	if (!l1_desc->l2ptr) {
> -		if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
> -			return NULL;
> +	l1_desc = &cd_table->l1_desc[ssid / CTXDESC_L2_ENTRIES];

These operations used to be shift and bit masking which made sense as it does
what hardware does, is there any reason you changed it to division and modulo?
I checked the disassembly and gcc does the right thing as constants are power
of 2, but I am just curious.

> +	if (!l1_desc->l2ptr)
> +		return NULL;
> +	return &l1_desc->l2ptr[ssid % CTXDESC_L2_ENTRIES];
> +}
>  
> -		l1ptr = cd_table->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
> -		arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
> -		/* An invalid L1CD can be cached */
> -		arm_smmu_sync_cd(master, ssid, false);
> +static struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master,
> +						 u32 ssid)
> +{
> +	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
> +	struct arm_smmu_device *smmu = master->smmu;
> +
> +	if (!cd_table->cdtab) {
> +		if (arm_smmu_alloc_cd_tables(master))
> +			return NULL;
>  	}
> -	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
> -	return &l1_desc->l2ptr[idx];
> +
> +	if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_64K_L2) {
> +		unsigned int idx = ssid >> CTXDESC_SPLIT;

Ok, now it’s a shift, I think we should be consistent with how we
calculate the index.

> +		struct arm_smmu_l1_ctx_desc *l1_desc;
> +
> +		l1_desc = &cd_table->l1_desc[idx];
> +		if (!l1_desc->l2ptr) {
> +			__le64 *l1ptr;
> +
> +			if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
> +				return NULL;
> +
> +			l1ptr = cd_table->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
> +			arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
> +			/* An invalid L1CD can be cached */
> +			arm_smmu_sync_cd(master, ssid, false);
> +		}
> +	}
> +	return arm_smmu_get_cd_ptr(master, ssid);
>  }
>  
>  struct arm_smmu_cd_writer {
> @@ -1357,7 +1380,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid,
>  	if (WARN_ON(ssid >= (1 << cd_table->s1cdmax)))
>  		return -E2BIG;
>  
> -	cd_table_entry = arm_smmu_get_cd_ptr(master, ssid);
> +	cd_table_entry = arm_smmu_alloc_cd_ptr(master, ssid);

The only path allocates the main table is “arm_smmu_attach_dev”, I guess
it would be more robust to leave that as is and have 2 versions of get_cd,
one that allocates leaf and one that is not allocating, what do you think?

Thanks,
Mostafa



>  	if (!cd_table_entry)
>  		return -ENOMEM;
>  
> @@ -2687,13 +2710,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
>  		struct arm_smmu_cd target_cd;
>  		struct arm_smmu_cd *cdptr;
>  
> -		if (!master->cd_table.cdtab) {
> -			ret = arm_smmu_alloc_cd_tables(master);
> -			if (ret)
> -				goto out_list_del;
> -		}
> -
> -		cdptr = arm_smmu_get_cd_ptr(master, IOMMU_NO_PASID);
> +		cdptr = arm_smmu_alloc_cd_ptr(master, IOMMU_NO_PASID);
>  		if (!cdptr) {
>  			ret = -ENOMEM;
>  			goto out_list_del;
> -- 
> 2.43.2
>
Jason Gunthorpe April 22, 2024, 2:20 p.m. UTC | #3
On Fri, Apr 19, 2024 at 09:14:21PM +0000, Mostafa Saleh wrote:
> Hi Jason,
> 
> On Tue, Apr 16, 2024 at 04:28:16PM -0300, Jason Gunthorpe wrote:
> > Only the attach callers can perform an allocation for the CD table entry,
> > the other callers must not do so, they do not have the correct locking and
> > they cannot sleep. Split up the functions so this is clear.
> > 
> > arm_smmu_get_cd_ptr() will return pointer to a CD table entry without
> > doing any kind of allocation.
> > 
> > arm_smmu_alloc_cd_ptr() will allocate the table and any required
> > leaf.
> > 
> > A following patch will add lockdep assertions to arm_smmu_alloc_cd_ptr()
> > once the restructuring is completed and arm_smmu_alloc_cd_ptr() is never
> > called in the wrong context.
> > 
> > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> > ---
> >  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 61 +++++++++++++--------
> >  1 file changed, 39 insertions(+), 22 deletions(-)
> > 
> > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > index f3df1ec8d258dc..a0d1237272936f 100644
> > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > @@ -98,6 +98,7 @@ static struct arm_smmu_option_prop arm_smmu_options[] = {
> >  
> >  static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
> >  				    struct arm_smmu_device *smmu);
> > +static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master);
> >  
> >  static void parse_driver_options(struct arm_smmu_device *smmu)
> >  {
> > @@ -1207,29 +1208,51 @@ static void arm_smmu_write_cd_l1_desc(__le64 *dst,
> >  struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master,
> >  					u32 ssid)
> >  {
> > -	__le64 *l1ptr;
> > -	unsigned int idx;
> >  	struct arm_smmu_l1_ctx_desc *l1_desc;
> > -	struct arm_smmu_device *smmu = master->smmu;
> >  	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
> >  
> > +	if (!cd_table->cdtab)
> > +		return NULL;
> > +
> >  	if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
> >  		return (struct arm_smmu_cd *)(cd_table->cdtab +
> >  					      ssid * CTXDESC_CD_DWORDS);
> >  
> > -	idx = ssid >> CTXDESC_SPLIT;
> > -	l1_desc = &cd_table->l1_desc[idx];
> > -	if (!l1_desc->l2ptr) {
> > -		if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
> > -			return NULL;
> > +	l1_desc = &cd_table->l1_desc[ssid / CTXDESC_L2_ENTRIES];
> 
> These operations used to be shift and bit masking which made sense as it does
> what hardware does, is there any reason you changed it to division and modulo?
> I checked the disassembly and gcc does the right thing as constants are power
> of 2, but I am just curious.

I generally prefer the clarity and succinctness of / and % instead of
hacking up bit operations that the compiler will generate
automatically anyhow.

If bit extractions should be used it is better to wrap it in
FIELD_GET() than open code it..

> > +static struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master,
> > +						 u32 ssid)
> > +{
> > +	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
> > +	struct arm_smmu_device *smmu = master->smmu;
> > +
> > +	if (!cd_table->cdtab) {
> > +		if (arm_smmu_alloc_cd_tables(master))
> > +			return NULL;
> >  	}
> > -	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
> > -	return &l1_desc->l2ptr[idx];
> > +
> > +	if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_64K_L2) {
> > +		unsigned int idx = ssid >> CTXDESC_SPLIT;
> 
> Ok, now it’s a shift, I think we should be consistent with how we
> calculate the index.

Sure. Change that to / will make CTXDESC_SPLIT unused except in
computing CTXDESC_L2_ENTRIES so that can be simplified too:

-#define CTXDESC_SPLIT                  10
-#define CTXDESC_L2_ENTRIES             (1 << CTXDESC_SPLIT)
+#define CTXDESC_L2_ENTRIES             1024


> > @@ -1357,7 +1380,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid,
> >  	if (WARN_ON(ssid >= (1 << cd_table->s1cdmax)))
> >  		return -E2BIG;
> >  
> > -	cd_table_entry = arm_smmu_get_cd_ptr(master, ssid);
> > +	cd_table_entry = arm_smmu_alloc_cd_ptr(master, ssid);
> 
> The only path allocates the main table is “arm_smmu_attach_dev”,

There are two places that allocate the leaf, arm_smmu_attach_dev()
(for the RID) and arm_smmu_sva_set_dev_pasid() (for a PASID)

At this moment all the paths are relying on the above to allocate the
leaf. The next patch makes arm_smmu_attach_dev() allocate the leaf
itself. A few more patches also makes the PASID path allocate the leaf
itself, when the above is removed.

> I guess it would be more robust to leave that as is and have 2
> versions of get_cd, one that allocates leaf and one that is not
> allocating, what do you think?

I'm not sure what you are asking? We have two versions. One is called
alloc and one is called get. That have different locking requirements
on the caller so they have different names. I would not call them both
get?

Thanks,
Jason
Mostafa Saleh April 27, 2024, 10:19 p.m. UTC | #4
On Mon, Apr 22, 2024 at 11:20:53AM -0300, Jason Gunthorpe wrote:
> On Fri, Apr 19, 2024 at 09:14:21PM +0000, Mostafa Saleh wrote:
> > Hi Jason,
> > 
> > On Tue, Apr 16, 2024 at 04:28:16PM -0300, Jason Gunthorpe wrote:
> > > Only the attach callers can perform an allocation for the CD table entry,
> > > the other callers must not do so, they do not have the correct locking and
> > > they cannot sleep. Split up the functions so this is clear.
> > > 
> > > arm_smmu_get_cd_ptr() will return pointer to a CD table entry without
> > > doing any kind of allocation.
> > > 
> > > arm_smmu_alloc_cd_ptr() will allocate the table and any required
> > > leaf.
> > > 
> > > A following patch will add lockdep assertions to arm_smmu_alloc_cd_ptr()
> > > once the restructuring is completed and arm_smmu_alloc_cd_ptr() is never
> > > called in the wrong context.
> > > 
> > > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> > > ---
> > >  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 61 +++++++++++++--------
> > >  1 file changed, 39 insertions(+), 22 deletions(-)
> > > 
> > > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > > index f3df1ec8d258dc..a0d1237272936f 100644
> > > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > > @@ -98,6 +98,7 @@ static struct arm_smmu_option_prop arm_smmu_options[] = {
> > >  
> > >  static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
> > >  				    struct arm_smmu_device *smmu);
> > > +static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master);
> > >  
> > >  static void parse_driver_options(struct arm_smmu_device *smmu)
> > >  {
> > > @@ -1207,29 +1208,51 @@ static void arm_smmu_write_cd_l1_desc(__le64 *dst,
> > >  struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master,
> > >  					u32 ssid)
> > >  {
> > > -	__le64 *l1ptr;
> > > -	unsigned int idx;
> > >  	struct arm_smmu_l1_ctx_desc *l1_desc;
> > > -	struct arm_smmu_device *smmu = master->smmu;
> > >  	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
> > >  
> > > +	if (!cd_table->cdtab)
> > > +		return NULL;
> > > +
> > >  	if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
> > >  		return (struct arm_smmu_cd *)(cd_table->cdtab +
> > >  					      ssid * CTXDESC_CD_DWORDS);
> > >  
> > > -	idx = ssid >> CTXDESC_SPLIT;
> > > -	l1_desc = &cd_table->l1_desc[idx];
> > > -	if (!l1_desc->l2ptr) {
> > > -		if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
> > > -			return NULL;
> > > +	l1_desc = &cd_table->l1_desc[ssid / CTXDESC_L2_ENTRIES];
> > 
> > These operations used to be shift and bit masking which made sense as it does
> > what hardware does, is there any reason you changed it to division and modulo?
> > I checked the disassembly and gcc does the right thing as constants are power
> > of 2, but I am just curious.
> 
> I generally prefer the clarity and succinctness of / and % instead of
> hacking up bit operations that the compiler will generate
> automatically anyhow.
> 
> If bit extractions should be used it is better to wrap it in
> FIELD_GET() than open code it..
> 
> > > +static struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master,
> > > +						 u32 ssid)
> > > +{
> > > +	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
> > > +	struct arm_smmu_device *smmu = master->smmu;
> > > +
> > > +	if (!cd_table->cdtab) {
> > > +		if (arm_smmu_alloc_cd_tables(master))
> > > +			return NULL;
> > >  	}
> > > -	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
> > > -	return &l1_desc->l2ptr[idx];
> > > +
> > > +	if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_64K_L2) {
> > > +		unsigned int idx = ssid >> CTXDESC_SPLIT;
> > 
> > Ok, now it’s a shift, I think we should be consistent with how we
> > calculate the index.
> 
> Sure. Change that to / will make CTXDESC_SPLIT unused except in
> computing CTXDESC_L2_ENTRIES so that can be simplified too:
> 
> -#define CTXDESC_SPLIT                  10
> -#define CTXDESC_L2_ENTRIES             (1 << CTXDESC_SPLIT)
> +#define CTXDESC_L2_ENTRIES             1024
> 

Sounds good, I don’t think it matters much as long as its consistent, but
anyway the split is defined by the spec to be either 6, 8 or 10.
So split size has to be a power of 2.

> 
> > > @@ -1357,7 +1380,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid,
> > >  	if (WARN_ON(ssid >= (1 << cd_table->s1cdmax)))
> > >  		return -E2BIG;
> > >  
> > > -	cd_table_entry = arm_smmu_get_cd_ptr(master, ssid);
> > > +	cd_table_entry = arm_smmu_alloc_cd_ptr(master, ssid);
> > 
> > The only path allocates the main table is “arm_smmu_attach_dev”,
> 
> There are two places that allocate the leaf, arm_smmu_attach_dev()
> (for the RID) and arm_smmu_sva_set_dev_pasid() (for a PASID)
> 
> At this moment all the paths are relying on the above to allocate the
> leaf. The next patch makes arm_smmu_attach_dev() allocate the leaf
> itself. A few more patches also makes the PASID path allocate the leaf
> itself, when the above is removed.
> 
> > I guess it would be more robust to leave that as is and have 2
> > versions of get_cd, one that allocates leaf and one that is not
> > allocating, what do you think?
> 
> I'm not sure what you are asking? We have two versions. One is called
> alloc and one is called get. That have different locking requirements
> on the caller so they have different names. I would not call them both
> get?
> 

My point is that arm_smmu_alloc_cd_ptr() doesn’t only allocate the leaf,
but also the L1 through arm_smmu_alloc_cd_tables()

IMO, arm_smmu_alloc_cd_ptr() should only allocate leafs. And inside
arm_smmu_attach_dev() it calls arm_smmu_alloc_cd_tables().
This makes it clear which path is expected to allocate the L1 table.

And arm_smmu_get_cd_ptr() will remain as is.

Thanks,
Mostafa

> Thanks,
> Jason
Jason Gunthorpe April 29, 2024, 2:01 p.m. UTC | #5
On Sat, Apr 27, 2024 at 10:19:37PM +0000, Mostafa Saleh wrote:

> > I'm not sure what you are asking? We have two versions. One is called
> > alloc and one is called get. That have different locking requirements
> > on the caller so they have different names. I would not call them both
> > get?
> > 
> 
> My point is that arm_smmu_alloc_cd_ptr() doesn’t only allocate the leaf,
> but also the L1 through arm_smmu_alloc_cd_tables()

Sure, it is called alloc, it allocs everything to make the CD table
entry usable.

> IMO, arm_smmu_alloc_cd_ptr() should only allocate leafs. And inside
> arm_smmu_attach_dev() it calls arm_smmu_alloc_cd_tables().
> This makes it clear which path is expected to allocate the L1 table.

The PASID path sometimes has to allocate the L1 table too, why
duplicate the allocation code?

What is different about the L1 vs L2 that it should be open coded?

Jason
Mostafa Saleh April 29, 2024, 2:47 p.m. UTC | #6
On Mon, Apr 29, 2024 at 11:01:37AM -0300, Jason Gunthorpe wrote:
> On Sat, Apr 27, 2024 at 10:19:37PM +0000, Mostafa Saleh wrote:
> 
> > > I'm not sure what you are asking? We have two versions. One is called
> > > alloc and one is called get. That have different locking requirements
> > > on the caller so they have different names. I would not call them both
> > > get?
> > > 
> > 
> > My point is that arm_smmu_alloc_cd_ptr() doesn’t only allocate the leaf,
> > but also the L1 through arm_smmu_alloc_cd_tables()
> 
> Sure, it is called alloc, it allocs everything to make the CD table
> entry usable.

Maybe if it’s called alloc_leaf, it only allocates leafs :)

> 
> > IMO, arm_smmu_alloc_cd_ptr() should only allocate leafs. And inside
> > arm_smmu_attach_dev() it calls arm_smmu_alloc_cd_tables().
> > This makes it clear which path is expected to allocate the L1 table.
> 
> The PASID path sometimes has to allocate the L1 table too, why
> duplicate the allocation code?
> 
> What is different about the L1 vs L2 that it should be open coded?
> 

I don’t think it is a big problem, but my main concern is robustness,
for example a small erroneous code change might trigger allocation for
L1 table from a path that shouldn’t, and that might go unnoticed as
this function will allow it, leading to memory leaks, or other issues
that might be harder to triage later, instead with limiting which path
allocates which level, would return a NULL in that case and fail
immediately.

Thanks,
Mostafa
> Jason
Jason Gunthorpe April 29, 2024, 2:55 p.m. UTC | #7
On Mon, Apr 29, 2024 at 02:47:26PM +0000, Mostafa Saleh wrote:

> > > IMO, arm_smmu_alloc_cd_ptr() should only allocate leafs. And inside
> > > arm_smmu_attach_dev() it calls arm_smmu_alloc_cd_tables().
> > > This makes it clear which path is expected to allocate the L1 table.
> > 
> > The PASID path sometimes has to allocate the L1 table too, why
> > duplicate the allocation code?
> > 
> > What is different about the L1 vs L2 that it should be open coded?
> 
> I don’t think it is a big problem, but my main concern is robustness,
> for example a small erroneous code change might trigger allocation for
> L1 table from a path that shouldn’t,

A few patches more we add a lockdep, so a wrongly placed allocation is
*very* likely to hit the lockdep. If the lockdep satisfies then it is
not going to cause a functional problem.

> and that might go unnoticed as
> this function will allow it, leading to memory leaks, 

Any cd table memory allocated by arm_smmu_alloc_cd_ptr() is reliably
freed in the arm_smmu_release_device().

> or other issues that might be harder to triage later, instead with
> limiting which path allocates which level, would return a NULL in
> that case and fail immediately.

All cases that need to allocate a leaf need to allocate the L1 too, it
is artifical to make a distinction between them.

Jason
diff mbox series

Patch

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index f3df1ec8d258dc..a0d1237272936f 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -98,6 +98,7 @@  static struct arm_smmu_option_prop arm_smmu_options[] = {
 
 static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
 				    struct arm_smmu_device *smmu);
+static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master);
 
 static void parse_driver_options(struct arm_smmu_device *smmu)
 {
@@ -1207,29 +1208,51 @@  static void arm_smmu_write_cd_l1_desc(__le64 *dst,
 struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master,
 					u32 ssid)
 {
-	__le64 *l1ptr;
-	unsigned int idx;
 	struct arm_smmu_l1_ctx_desc *l1_desc;
-	struct arm_smmu_device *smmu = master->smmu;
 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
 
+	if (!cd_table->cdtab)
+		return NULL;
+
 	if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
 		return (struct arm_smmu_cd *)(cd_table->cdtab +
 					      ssid * CTXDESC_CD_DWORDS);
 
-	idx = ssid >> CTXDESC_SPLIT;
-	l1_desc = &cd_table->l1_desc[idx];
-	if (!l1_desc->l2ptr) {
-		if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
-			return NULL;
+	l1_desc = &cd_table->l1_desc[ssid / CTXDESC_L2_ENTRIES];
+	if (!l1_desc->l2ptr)
+		return NULL;
+	return &l1_desc->l2ptr[ssid % CTXDESC_L2_ENTRIES];
+}
 
-		l1ptr = cd_table->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
-		arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
-		/* An invalid L1CD can be cached */
-		arm_smmu_sync_cd(master, ssid, false);
+static struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master,
+						 u32 ssid)
+{
+	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
+	struct arm_smmu_device *smmu = master->smmu;
+
+	if (!cd_table->cdtab) {
+		if (arm_smmu_alloc_cd_tables(master))
+			return NULL;
 	}
-	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
-	return &l1_desc->l2ptr[idx];
+
+	if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_64K_L2) {
+		unsigned int idx = ssid >> CTXDESC_SPLIT;
+		struct arm_smmu_l1_ctx_desc *l1_desc;
+
+		l1_desc = &cd_table->l1_desc[idx];
+		if (!l1_desc->l2ptr) {
+			__le64 *l1ptr;
+
+			if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
+				return NULL;
+
+			l1ptr = cd_table->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
+			arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
+			/* An invalid L1CD can be cached */
+			arm_smmu_sync_cd(master, ssid, false);
+		}
+	}
+	return arm_smmu_get_cd_ptr(master, ssid);
 }
 
 struct arm_smmu_cd_writer {
@@ -1357,7 +1380,7 @@  int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid,
 	if (WARN_ON(ssid >= (1 << cd_table->s1cdmax)))
 		return -E2BIG;
 
-	cd_table_entry = arm_smmu_get_cd_ptr(master, ssid);
+	cd_table_entry = arm_smmu_alloc_cd_ptr(master, ssid);
 	if (!cd_table_entry)
 		return -ENOMEM;
 
@@ -2687,13 +2710,7 @@  static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 		struct arm_smmu_cd target_cd;
 		struct arm_smmu_cd *cdptr;
 
-		if (!master->cd_table.cdtab) {
-			ret = arm_smmu_alloc_cd_tables(master);
-			if (ret)
-				goto out_list_del;
-		}
-
-		cdptr = arm_smmu_get_cd_ptr(master, IOMMU_NO_PASID);
+		cdptr = arm_smmu_alloc_cd_ptr(master, IOMMU_NO_PASID);
 		if (!cdptr) {
 			ret = -ENOMEM;
 			goto out_list_del;