diff mbox series

iommu/dma: Fix race condition during iova_domain initialization

Message ID 20220530120748.31733-1-yf.wang@mediatek.com (mailing list archive)
State New, archived
Headers show
Series iommu/dma: Fix race condition during iova_domain initialization | expand

Commit Message

yf.wang@mediatek.com May 30, 2022, 12:07 p.m. UTC
From: Yunfei Wang <yf.wang@mediatek.com>

When many devices share the same iova domain, iommu_dma_init_domain()
may be called at the same time. The checking of iovad->start_pfn will
all get false in iommu_dma_init_domain() and both enter init_iova_domain()
to do iovad initialization.

Fix this by protecting init_iova_domain() with iommu_dma_cookie->mutex.

Exception backtrace:
rb_insert_color(param1=0xFFFFFF80CD2BDB40, param3=1) + 64
init_iova_domain() + 180
iommu_setup_dma_ops() + 260
arch_setup_dma_ops() + 132
of_dma_configure_id() + 468
platform_dma_configure() + 32
really_probe() + 1168
driver_probe_device() + 268
__device_attach_driver() + 524
__device_attach() + 524
bus_probe_device() + 64
deferred_probe_work_func() + 260
process_one_work() + 580
worker_thread() + 1076
kthread() + 332
ret_from_fork() + 16

Signed-off-by: Ning Li <ning.li@mediatek.com>
Signed-off-by: Yunfei Wang <yf.wang@mediatek.com>
---
 drivers/iommu/dma-iommu.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

Comments

Miles Chen June 9, 2022, 5:04 p.m. UTC | #1
Hi YF,

>When many devices share the same iova domain, iommu_dma_init_domain()
>may be called at the same time. The checking of iovad->start_pfn will
>all get false in iommu_dma_init_domain() and both enter init_iova_domain()
>to do iovad initialization.

After reading this patch.
It means that we use iovad->start_pfn as a key to tell if an iovad is already initialized,
but we do not protect iovad->start_pfn from concurrent access.

So what's happening is like:

   cpu0                                    cpu1
of_dma_configure_id()              of_dma_configure_id()
  arch_setup_dma_ops()               arch_setup_dma_ops()
    iommu_setup_dma_ops()              iommu_setup_dma_ops()
      init_iova_domain()	 	 init_iova_domain()
         if (iovad->start_pfn) {           if (iovad->start_pfn) {
         }                                 }
         init_iova_domain()                init_iova_domain()


init_iova_domain() is called at the same time.

>Fix this by protecting init_iova_domain() with iommu_dma_cookie->mutex.

Reviewed-by: Miles Chen <miles.chen@mediatek.com> 

>Exception backtrace:
>rb_insert_color(param1=0xFFFFFF80CD2BDB40, param3=1) + 64
>init_iova_domain() + 180
>iommu_setup_dma_ops() + 260
>arch_setup_dma_ops() + 132
>of_dma_configure_id() + 468
>platform_dma_configure() + 32
>really_probe() + 1168
>driver_probe_device() + 268
>__device_attach_driver() + 524
>__device_attach() + 524
>bus_probe_device() + 64
>deferred_probe_work_func() + 260
>process_one_work() + 580
>worker_thread() + 1076
>kthread() + 332
>ret_from_fork() + 16
>
>Signed-off-by: Ning Li <ning.li@mediatek.com>
>Signed-off-by: Yunfei Wang <yf.wang@mediatek.com>
>---
> drivers/iommu/dma-iommu.c | 17 +++++++++++++----
> 1 file changed, 13 insertions(+), 4 deletions(-)
>
>diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
>index 09f6e1c0f9c0..b38c5041eeab 100644
>--- a/drivers/iommu/dma-iommu.c
>+++ b/drivers/iommu/dma-iommu.c
>@@ -63,6 +63,7 @@ struct iommu_dma_cookie {
> 
> 	/* Domain for flush queue callback; NULL if flush queue not in use */
> 	struct iommu_domain		*fq_domain;
>+	struct mutex			mutex;
> };
> 
> static DEFINE_STATIC_KEY_FALSE(iommu_deferred_attach_enabled);
>@@ -309,6 +310,7 @@ int iommu_get_dma_cookie(struct iommu_domain *domain)
> 	if (!domain->iova_cookie)
> 		return -ENOMEM;
> 
>+	mutex_init(&domain->iova_cookie->mutex);
> 	return 0;
> }
> 
>@@ -549,26 +551,33 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
> 	}
> 
> 	/* start_pfn is always nonzero for an already-initialised domain */
>+	mutex_lock(&cookie->mutex);
>
> 	if (iovad->start_pfn) {
> 		if (1UL << order != iovad->granule ||
> 		    base_pfn != iovad->start_pfn) {
> 			pr_warn("Incompatible range for DMA domain\n");
>-			return -EFAULT;
>+			ret = -EFAULT;
>+			goto done_unlock;
> 		}
> 
>-		return 0;
>+		ret = 0;
>+		goto done_unlock;
> 	}
> 
> 	init_iova_domain(iovad, 1UL << order, base_pfn);
> 	ret = iova_domain_init_rcaches(iovad);
> 	if (ret)
>-		return ret;
>+		goto done_unlock;
> 
> 	/* If the FQ fails we can simply fall back to strict mode */
> 	if (domain->type == IOMMU_DOMAIN_DMA_FQ && iommu_dma_init_fq(domain))
> 		domain->type = IOMMU_DOMAIN_DMA;
> 
>-	return iova_reserve_iommu_regions(dev, domain);
>+	ret = iova_reserve_iommu_regions(dev, domain);
>+
>+done_unlock:
>+	mutex_unlock(&cookie->mutex);
>+	return ret;
> }
> 
> /**
>-- 
>2.18.0
>
>
>_______________________________________________
>Linux-mediatek mailing list
>Linux-mediatek@lists.infradead.org
>http://lists.infradead.org/mailman/listinfo/linux-mediatek
>
Joerg Roedel June 22, 2022, 12:46 p.m. UTC | #2
Please re-send with

	Robin Murphy <robin.murphy@arm.com>

in Cc.

On Mon, May 30, 2022 at 08:07:45PM +0800, yf.wang@mediatek.com wrote:
> From: Yunfei Wang <yf.wang@mediatek.com>
> 
> When many devices share the same iova domain, iommu_dma_init_domain()
> may be called at the same time. The checking of iovad->start_pfn will
> all get false in iommu_dma_init_domain() and both enter init_iova_domain()
> to do iovad initialization.
> 
> Fix this by protecting init_iova_domain() with iommu_dma_cookie->mutex.
> 
> Exception backtrace:
> rb_insert_color(param1=0xFFFFFF80CD2BDB40, param3=1) + 64
> init_iova_domain() + 180
> iommu_setup_dma_ops() + 260
> arch_setup_dma_ops() + 132
> of_dma_configure_id() + 468
> platform_dma_configure() + 32
> really_probe() + 1168
> driver_probe_device() + 268
> __device_attach_driver() + 524
> __device_attach() + 524
> bus_probe_device() + 64
> deferred_probe_work_func() + 260
> process_one_work() + 580
> worker_thread() + 1076
> kthread() + 332
> ret_from_fork() + 16
> 
> Signed-off-by: Ning Li <ning.li@mediatek.com>
> Signed-off-by: Yunfei Wang <yf.wang@mediatek.com>
> ---
>  drivers/iommu/dma-iommu.c | 17 +++++++++++++----
>  1 file changed, 13 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> index 09f6e1c0f9c0..b38c5041eeab 100644
> --- a/drivers/iommu/dma-iommu.c
> +++ b/drivers/iommu/dma-iommu.c
> @@ -63,6 +63,7 @@ struct iommu_dma_cookie {
>  
>  	/* Domain for flush queue callback; NULL if flush queue not in use */
>  	struct iommu_domain		*fq_domain;
> +	struct mutex			mutex;
>  };
>  
>  static DEFINE_STATIC_KEY_FALSE(iommu_deferred_attach_enabled);
> @@ -309,6 +310,7 @@ int iommu_get_dma_cookie(struct iommu_domain *domain)
>  	if (!domain->iova_cookie)
>  		return -ENOMEM;
>  
> +	mutex_init(&domain->iova_cookie->mutex);
>  	return 0;
>  }
>  
> @@ -549,26 +551,33 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
>  	}
>  
>  	/* start_pfn is always nonzero for an already-initialised domain */
> +	mutex_lock(&cookie->mutex);
>  	if (iovad->start_pfn) {
>  		if (1UL << order != iovad->granule ||
>  		    base_pfn != iovad->start_pfn) {
>  			pr_warn("Incompatible range for DMA domain\n");
> -			return -EFAULT;
> +			ret = -EFAULT;
> +			goto done_unlock;
>  		}
>  
> -		return 0;
> +		ret = 0;
> +		goto done_unlock;
>  	}
>  
>  	init_iova_domain(iovad, 1UL << order, base_pfn);
>  	ret = iova_domain_init_rcaches(iovad);
>  	if (ret)
> -		return ret;
> +		goto done_unlock;
>  
>  	/* If the FQ fails we can simply fall back to strict mode */
>  	if (domain->type == IOMMU_DOMAIN_DMA_FQ && iommu_dma_init_fq(domain))
>  		domain->type = IOMMU_DOMAIN_DMA;
>  
> -	return iova_reserve_iommu_regions(dev, domain);
> +	ret = iova_reserve_iommu_regions(dev, domain);
> +
> +done_unlock:
> +	mutex_unlock(&cookie->mutex);
> +	return ret;
>  }
>  
>  /**
> -- 
> 2.18.0
Robin Murphy June 22, 2022, 1:27 p.m. UTC | #3
On 2022-06-22 13:46, Joerg Roedel wrote:
> Please re-send with
> 
> 	Robin Murphy <robin.murphy@arm.com>
> 
> in Cc.

Apologies, I did spot this before, I've just been tied up with other 
things and dropping everything non-critical on the floor, so didn't get 
round to replying before it slipped my mind again.

In summary, I hate it, but mostly because the whole situation of calling 
iommu_probe_device off the back of driver probe is fundamentally broken. 
I'm still a few steps away from fixing that properly, at which point I 
can just as well rip all these little bodges out again. If it really 
does need mitigating in the meantime (i.e. this is real-world async 
probe, not just some contrived testcase), then I can't easily think of 
any cleaner hack, so,

Acked-by: Robin Murphy <robin.murphy@arm.com>

(somewhat reluctantly)

Cheers,
Robin.

> On Mon, May 30, 2022 at 08:07:45PM +0800, yf.wang@mediatek.com wrote:
>> From: Yunfei Wang <yf.wang@mediatek.com>
>>
>> When many devices share the same iova domain, iommu_dma_init_domain()
>> may be called at the same time. The checking of iovad->start_pfn will
>> all get false in iommu_dma_init_domain() and both enter init_iova_domain()
>> to do iovad initialization.
>>
>> Fix this by protecting init_iova_domain() with iommu_dma_cookie->mutex.
>>
>> Exception backtrace:
>> rb_insert_color(param1=0xFFFFFF80CD2BDB40, param3=1) + 64
>> init_iova_domain() + 180
>> iommu_setup_dma_ops() + 260
>> arch_setup_dma_ops() + 132
>> of_dma_configure_id() + 468
>> platform_dma_configure() + 32
>> really_probe() + 1168
>> driver_probe_device() + 268
>> __device_attach_driver() + 524
>> __device_attach() + 524
>> bus_probe_device() + 64
>> deferred_probe_work_func() + 260
>> process_one_work() + 580
>> worker_thread() + 1076
>> kthread() + 332
>> ret_from_fork() + 16
>>
>> Signed-off-by: Ning Li <ning.li@mediatek.com>
>> Signed-off-by: Yunfei Wang <yf.wang@mediatek.com>
>> ---
>>   drivers/iommu/dma-iommu.c | 17 +++++++++++++----
>>   1 file changed, 13 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
>> index 09f6e1c0f9c0..b38c5041eeab 100644
>> --- a/drivers/iommu/dma-iommu.c
>> +++ b/drivers/iommu/dma-iommu.c
>> @@ -63,6 +63,7 @@ struct iommu_dma_cookie {
>>   
>>   	/* Domain for flush queue callback; NULL if flush queue not in use */
>>   	struct iommu_domain		*fq_domain;
>> +	struct mutex			mutex;
>>   };
>>   
>>   static DEFINE_STATIC_KEY_FALSE(iommu_deferred_attach_enabled);
>> @@ -309,6 +310,7 @@ int iommu_get_dma_cookie(struct iommu_domain *domain)
>>   	if (!domain->iova_cookie)
>>   		return -ENOMEM;
>>   
>> +	mutex_init(&domain->iova_cookie->mutex);
>>   	return 0;
>>   }
>>   
>> @@ -549,26 +551,33 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
>>   	}
>>   
>>   	/* start_pfn is always nonzero for an already-initialised domain */
>> +	mutex_lock(&cookie->mutex);
>>   	if (iovad->start_pfn) {
>>   		if (1UL << order != iovad->granule ||
>>   		    base_pfn != iovad->start_pfn) {
>>   			pr_warn("Incompatible range for DMA domain\n");
>> -			return -EFAULT;
>> +			ret = -EFAULT;
>> +			goto done_unlock;
>>   		}
>>   
>> -		return 0;
>> +		ret = 0;
>> +		goto done_unlock;
>>   	}
>>   
>>   	init_iova_domain(iovad, 1UL << order, base_pfn);
>>   	ret = iova_domain_init_rcaches(iovad);
>>   	if (ret)
>> -		return ret;
>> +		goto done_unlock;
>>   
>>   	/* If the FQ fails we can simply fall back to strict mode */
>>   	if (domain->type == IOMMU_DOMAIN_DMA_FQ && iommu_dma_init_fq(domain))
>>   		domain->type = IOMMU_DOMAIN_DMA;
>>   
>> -	return iova_reserve_iommu_regions(dev, domain);
>> +	ret = iova_reserve_iommu_regions(dev, domain);
>> +
>> +done_unlock:
>> +	mutex_unlock(&cookie->mutex);
>> +	return ret;
>>   }
>>   
>>   /**
>> -- 
>> 2.18.0
> _______________________________________________
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu
Joerg Roedel June 22, 2022, 1:42 p.m. UTC | #4
On Wed, Jun 22, 2022 at 02:27:57PM +0100, Robin Murphy wrote:
> Apologies, I did spot this before, I've just been tied up with other things
> and dropping everything non-critical on the floor, so didn't get round to
> replying before it slipped my mind again.
> 
> In summary, I hate it, but mostly because the whole situation of calling
> iommu_probe_device off the back of driver probe is fundamentally broken. I'm
> still a few steps away from fixing that properly, at which point I can just
> as well rip all these little bodges out again. If it really does need
> mitigating in the meantime (i.e. this is real-world async probe, not just
> some contrived testcase), then I can't easily think of any cleaner hack, so,
> 
> Acked-by: Robin Murphy <robin.murphy@arm.com>

Alright, applied this now.
diff mbox series

Patch

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 09f6e1c0f9c0..b38c5041eeab 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -63,6 +63,7 @@  struct iommu_dma_cookie {
 
 	/* Domain for flush queue callback; NULL if flush queue not in use */
 	struct iommu_domain		*fq_domain;
+	struct mutex			mutex;
 };
 
 static DEFINE_STATIC_KEY_FALSE(iommu_deferred_attach_enabled);
@@ -309,6 +310,7 @@  int iommu_get_dma_cookie(struct iommu_domain *domain)
 	if (!domain->iova_cookie)
 		return -ENOMEM;
 
+	mutex_init(&domain->iova_cookie->mutex);
 	return 0;
 }
 
@@ -549,26 +551,33 @@  static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
 	}
 
 	/* start_pfn is always nonzero for an already-initialised domain */
+	mutex_lock(&cookie->mutex);
 	if (iovad->start_pfn) {
 		if (1UL << order != iovad->granule ||
 		    base_pfn != iovad->start_pfn) {
 			pr_warn("Incompatible range for DMA domain\n");
-			return -EFAULT;
+			ret = -EFAULT;
+			goto done_unlock;
 		}
 
-		return 0;
+		ret = 0;
+		goto done_unlock;
 	}
 
 	init_iova_domain(iovad, 1UL << order, base_pfn);
 	ret = iova_domain_init_rcaches(iovad);
 	if (ret)
-		return ret;
+		goto done_unlock;
 
 	/* If the FQ fails we can simply fall back to strict mode */
 	if (domain->type == IOMMU_DOMAIN_DMA_FQ && iommu_dma_init_fq(domain))
 		domain->type = IOMMU_DOMAIN_DMA;
 
-	return iova_reserve_iommu_regions(dev, domain);
+	ret = iova_reserve_iommu_regions(dev, domain);
+
+done_unlock:
+	mutex_unlock(&cookie->mutex);
+	return ret;
 }
 
 /**