diff mbox series

[v4,11/13] iommu/arm-smmu-v3: Improve add_device() error handling

Message ID 20191219163033.2608177-12-jean-philippe@linaro.org (mailing list archive)
State Superseded, archived
Headers show
Series [v4,01/13] iommu/arm-smmu-v3: Drop __GFP_ZERO flag from DMA allocation | expand

Commit Message

Jean-Philippe Brucker Dec. 19, 2019, 4:30 p.m. UTC
Let add_device() clean up after itself. The iommu_bus_init() function
does call remove_device() on error, but other sites (e.g. of_iommu) do
not.

Don't free level-2 stream tables because we'd have to track if we
allocated each of them or if they are used by other endpoints. It's not
worth the hassle since they are managed resources.

Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
---
 drivers/iommu/arm-smmu-v3.c | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

Comments

Will Deacon Jan. 14, 2020, 3:25 p.m. UTC | #1
On Thu, Dec 19, 2019 at 05:30:31PM +0100, Jean-Philippe Brucker wrote:
> Let add_device() clean up after itself. The iommu_bus_init() function
> does call remove_device() on error, but other sites (e.g. of_iommu) do
> not.
> 
> Don't free level-2 stream tables because we'd have to track if we
> allocated each of them or if they are used by other endpoints. It's not
> worth the hassle since they are managed resources.
> 
> Reviewed-by: Eric Auger <eric.auger@redhat.com>
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
> ---
>  drivers/iommu/arm-smmu-v3.c | 28 +++++++++++++++++++++-------
>  1 file changed, 21 insertions(+), 7 deletions(-)

I think this is alright, with one caveat relating to:


	/*
	 * We _can_ actually withstand dodgy bus code re-calling add_device()
	 * without an intervening remove_device()/of_xlate() sequence, but
	 * we're not going to do so quietly...
	 */
	if (WARN_ON_ONCE(fwspec->iommu_priv)) {
		master = fwspec->iommu_priv;
		smmu = master->smmu;
	} ...


which may be on shakey ground if the subsequent add_device() call can fail
and free stuff that the first one allocated. At least, I don't know what
we're trying to support with this, so it's hard to tell whether or not it
still works as intended after your change.

How is this supposed to work? I don't recall ever seeing that WARN fire,
so can we just remove this and bail instead? Robin?

Something like below before your changes...

Will

--->8

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index effe72eb89e7..6ae3df2f3495 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -2534,28 +2534,23 @@ static int arm_smmu_add_device(struct device *dev)
 
 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
 		return -ENODEV;
-	/*
-	 * We _can_ actually withstand dodgy bus code re-calling add_device()
-	 * without an intervening remove_device()/of_xlate() sequence, but
-	 * we're not going to do so quietly...
-	 */
-	if (WARN_ON_ONCE(fwspec->iommu_priv)) {
-		master = fwspec->iommu_priv;
-		smmu = master->smmu;
-	} else {
-		smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
-		if (!smmu)
-			return -ENODEV;
-		master = kzalloc(sizeof(*master), GFP_KERNEL);
-		if (!master)
-			return -ENOMEM;
 
-		master->dev = dev;
-		master->smmu = smmu;
-		master->sids = fwspec->ids;
-		master->num_sids = fwspec->num_ids;
-		fwspec->iommu_priv = master;
-	}
+	if (WARN_ON_ONCE(fwspec->iommu_priv))
+		return -EBUSY;
+
+	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
+	if (!smmu)
+		return -ENODEV;
+
+	master = kzalloc(sizeof(*master), GFP_KERNEL);
+	if (!master)
+		return -ENOMEM;
+
+	master->dev = dev;
+	master->smmu = smmu;
+	master->sids = fwspec->ids;
+	master->num_sids = fwspec->num_ids;
+	fwspec->iommu_priv = master;
 
 	/* Check the SIDs are in range of the SMMU and our stream table */
 	for (i = 0; i < master->num_sids; i++) {
Will Deacon Jan. 15, 2020, 4:17 p.m. UTC | #2
On Tue, Jan 14, 2020 at 03:25:39PM +0000, Will Deacon wrote:
> On Thu, Dec 19, 2019 at 05:30:31PM +0100, Jean-Philippe Brucker wrote:
> > Let add_device() clean up after itself. The iommu_bus_init() function
> > does call remove_device() on error, but other sites (e.g. of_iommu) do
> > not.
> > 
> > Don't free level-2 stream tables because we'd have to track if we
> > allocated each of them or if they are used by other endpoints. It's not
> > worth the hassle since they are managed resources.
> > 
> > Reviewed-by: Eric Auger <eric.auger@redhat.com>
> > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> > Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
> > ---
> >  drivers/iommu/arm-smmu-v3.c | 28 +++++++++++++++++++++-------
> >  1 file changed, 21 insertions(+), 7 deletions(-)
> 
> I think this is alright, with one caveat relating to:
> 
> 
> 	/*
> 	 * We _can_ actually withstand dodgy bus code re-calling add_device()
> 	 * without an intervening remove_device()/of_xlate() sequence, but
> 	 * we're not going to do so quietly...
> 	 */
> 	if (WARN_ON_ONCE(fwspec->iommu_priv)) {
> 		master = fwspec->iommu_priv;
> 		smmu = master->smmu;
> 	} ...
> 
> 
> which may be on shakey ground if the subsequent add_device() call can fail
> and free stuff that the first one allocated. At least, I don't know what
> we're trying to support with this, so it's hard to tell whether or not it
> still works as intended after your change.
> 
> How is this supposed to work? I don't recall ever seeing that WARN fire,
> so can we just remove this and bail instead? Robin?
> 
> Something like below before your changes...

FWIW, I've written this as a patch locally, since I'd like to apply it
on top of v5 of your series.

Will

--->8

From 6029102f406d4db5e7a465da5fd2e08a5b12c532 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Wed, 15 Jan 2020 15:35:16 +0000
Subject: [PATCH] iommu/arm-smmu-v3: Return -EBUSY when trying to re-add a
 device

Although we WARN in arm_smmu_add_device() if the device being added has
been added already without a subsequent call to arm_smmu_remove_device(),
we still continue half-heartedly, initialising the stream-table for any
new StreamIDs that may have magically appeared and re-establishing device
links that should still be there from last time.

Given that calling ->add_device() twice without removing the device in the
meantime is indicative of an error in the caller, just return -EBUSY after
warning.

Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Jean Philippe-Brucker <jean-philippe@linaro.org>
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/iommu/arm-smmu-v3.c | 37 ++++++++++++++++---------------------
 1 file changed, 16 insertions(+), 21 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index efa326601308..cc26e1323da3 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -2841,28 +2841,23 @@ static int arm_smmu_add_device(struct device *dev)
 
 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
 		return -ENODEV;
-	/*
-	 * We _can_ actually withstand dodgy bus code re-calling add_device()
-	 * without an intervening remove_device()/of_xlate() sequence, but
-	 * we're not going to do so quietly...
-	 */
-	if (WARN_ON_ONCE(fwspec->iommu_priv)) {
-		master = fwspec->iommu_priv;
-		smmu = master->smmu;
-	} else {
-		smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
-		if (!smmu)
-			return -ENODEV;
-		master = kzalloc(sizeof(*master), GFP_KERNEL);
-		if (!master)
-			return -ENOMEM;
 
-		master->dev = dev;
-		master->smmu = smmu;
-		master->sids = fwspec->ids;
-		master->num_sids = fwspec->num_ids;
-		fwspec->iommu_priv = master;
-	}
+	if (WARN_ON_ONCE(fwspec->iommu_priv))
+		return -EBUSY;
+
+	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
+	if (!smmu)
+		return -ENODEV;
+
+	master = kzalloc(sizeof(*master), GFP_KERNEL);
+	if (!master)
+		return -ENOMEM;
+
+	master->dev = dev;
+	master->smmu = smmu;
+	master->sids = fwspec->ids;
+	master->num_sids = fwspec->num_ids;
+	fwspec->iommu_priv = master;
 
 	/* Check the SIDs are in range of the SMMU and our stream table */
 	for (i = 0; i < master->num_sids; i++) {
Robin Murphy Jan. 15, 2020, 5:44 p.m. UTC | #3
On 14/01/2020 3:25 pm, Will Deacon wrote:
> On Thu, Dec 19, 2019 at 05:30:31PM +0100, Jean-Philippe Brucker wrote:
>> Let add_device() clean up after itself. The iommu_bus_init() function
>> does call remove_device() on error, but other sites (e.g. of_iommu) do
>> not.
>>
>> Don't free level-2 stream tables because we'd have to track if we
>> allocated each of them or if they are used by other endpoints. It's not
>> worth the hassle since they are managed resources.
>>
>> Reviewed-by: Eric Auger <eric.auger@redhat.com>
>> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>> Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
>> ---
>>   drivers/iommu/arm-smmu-v3.c | 28 +++++++++++++++++++++-------
>>   1 file changed, 21 insertions(+), 7 deletions(-)
> 
> I think this is alright, with one caveat relating to:
> 
> 
> 	/*
> 	 * We _can_ actually withstand dodgy bus code re-calling add_device()
> 	 * without an intervening remove_device()/of_xlate() sequence, but
> 	 * we're not going to do so quietly...
> 	 */
> 	if (WARN_ON_ONCE(fwspec->iommu_priv)) {
> 		master = fwspec->iommu_priv;
> 		smmu = master->smmu;
> 	} ...
> 
> 
> which may be on shakey ground if the subsequent add_device() call can fail
> and free stuff that the first one allocated. At least, I don't know what
> we're trying to support with this, so it's hard to tell whether or not it
> still works as intended after your change.

Hmm, if add_device() ever did fail it should really be expected to 
return the device back to an un-added state, so I don't believe that 
particular concern should be significant regardless...
> How is this supposed to work? I don't recall ever seeing that WARN fire,
> so can we just remove this and bail instead? Robin?

However, I am inclined to agree that it's probably better to make it all 
moot. Although it indeed should never happen, ISTR at the time there 
appeared to be some possible path somewhere by which the notifier may 
have been triggered a second time - possibly if some other device failed 
or deferred after the first call, triggering the bus code to start all 
over again. Since then, though, we've made a lot of changes to how 
->add_device usually gets called, plus stuff like the 
iommu_device_link() call has snuck in that might not stand up to a 
replay anyway, so I don't see any problem with making this condition a 
hard failure. It's certainly much easier to reason about.

In fact, there will already be a WARN from iommu_probe_device() now 
(because the first call will have set the group), so I don't think we 
need any additional diagnostic in the driver any more.

Robin.

> Something like below before your changes...
> 
> Will
> 
> --->8
> 
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index effe72eb89e7..6ae3df2f3495 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -2534,28 +2534,23 @@ static int arm_smmu_add_device(struct device *dev)
>   
>   	if (!fwspec || fwspec->ops != &arm_smmu_ops)
>   		return -ENODEV;
> -	/*
> -	 * We _can_ actually withstand dodgy bus code re-calling add_device()
> -	 * without an intervening remove_device()/of_xlate() sequence, but
> -	 * we're not going to do so quietly...
> -	 */
> -	if (WARN_ON_ONCE(fwspec->iommu_priv)) {
> -		master = fwspec->iommu_priv;
> -		smmu = master->smmu;
> -	} else {
> -		smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
> -		if (!smmu)
> -			return -ENODEV;
> -		master = kzalloc(sizeof(*master), GFP_KERNEL);
> -		if (!master)
> -			return -ENOMEM;
>   
> -		master->dev = dev;
> -		master->smmu = smmu;
> -		master->sids = fwspec->ids;
> -		master->num_sids = fwspec->num_ids;
> -		fwspec->iommu_priv = master;
> -	}
> +	if (WARN_ON_ONCE(fwspec->iommu_priv))
> +		return -EBUSY;
> +
> +	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
> +	if (!smmu)
> +		return -ENODEV;
> +
> +	master = kzalloc(sizeof(*master), GFP_KERNEL);
> +	if (!master)
> +		return -ENOMEM;
> +
> +	master->dev = dev;
> +	master->smmu = smmu;
> +	master->sids = fwspec->ids;
> +	master->num_sids = fwspec->num_ids;
> +	fwspec->iommu_priv = master;
>   
>   	/* Check the SIDs are in range of the SMMU and our stream table */
>   	for (i = 0; i < master->num_sids; i++) {
>
diff mbox series

Patch

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index bf106a7b53eb..e62ca80f2f76 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -2837,14 +2837,16 @@  static int arm_smmu_add_device(struct device *dev)
 	for (i = 0; i < master->num_sids; i++) {
 		u32 sid = master->sids[i];
 
-		if (!arm_smmu_sid_in_range(smmu, sid))
-			return -ERANGE;
+		if (!arm_smmu_sid_in_range(smmu, sid)) {
+			ret = -ERANGE;
+			goto err_free_master;
+		}
 
 		/* Ensure l2 strtab is initialised */
 		if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
 			ret = arm_smmu_init_l2_strtab(smmu, sid);
 			if (ret)
-				return ret;
+				goto err_free_master;
 		}
 	}
 
@@ -2854,13 +2856,25 @@  static int arm_smmu_add_device(struct device *dev)
 		master->ssid_bits = min_t(u8, master->ssid_bits,
 					  CTXDESC_LINEAR_CDMAX);
 
+	ret = iommu_device_link(&smmu->iommu, dev);
+	if (ret)
+		goto err_free_master;
+
 	group = iommu_group_get_for_dev(dev);
-	if (!IS_ERR(group)) {
-		iommu_group_put(group);
-		iommu_device_link(&smmu->iommu, dev);
+	if (IS_ERR(group)) {
+		ret = PTR_ERR(group);
+		goto err_unlink;
 	}
 
-	return PTR_ERR_OR_ZERO(group);
+	iommu_group_put(group);
+	return 0;
+
+err_unlink:
+	iommu_device_unlink(&smmu->iommu, dev);
+err_free_master:
+	kfree(master);
+	fwspec->iommu_priv = NULL;
+	return ret;
 }
 
 static void arm_smmu_remove_device(struct device *dev)