diff mbox series

[v7,9/9] swiotlb: search the software IO TLB only if the device makes use of it

Message ID adea71bd1fa8660d4c3157a562431ad8127016d4.1690871004.git.petr.tesarik.ext@huawei.com (mailing list archive)
State Accepted
Commit 1395706a14904f2593debecf20f827e72d7392a7
Headers show
Series Allow dynamic allocation of software IO TLB bounce buffers | expand

Commit Message

Petr Tesarik Aug. 1, 2023, 6:24 a.m. UTC
From: Petr Tesarik <petr.tesarik.ext@huawei.com>

Skip searching the software IO TLB if a device has never used it, making
sure these devices are not affected by the introduction of multiple IO TLB
memory pools.

Additional memory barrier is required to ensure that the new value of the
flag is visible to other CPUs after mapping a new bounce buffer. For
efficiency, the flag check should be inlined, and then the memory barrier
must be moved to is_swiotlb_buffer(). However, it can replace the existing
barrier in swiotlb_find_pool(), because all callers use is_swiotlb_buffer()
first to verify that the buffer address belongs to the software IO TLB.

Signed-off-by: Petr Tesarik <petr.tesarik.ext@huawei.com>
---
 include/linux/device.h  |  2 ++
 include/linux/swiotlb.h |  7 ++++++-
 kernel/dma/swiotlb.c    | 14 ++++++--------
 3 files changed, 14 insertions(+), 9 deletions(-)

Comments

Jonathan Corbet Aug. 9, 2023, 9:20 p.m. UTC | #1
Petr Tesarik <petrtesarik@huaweicloud.com> writes:

> From: Petr Tesarik <petr.tesarik.ext@huawei.com>
>
> Skip searching the software IO TLB if a device has never used it, making
> sure these devices are not affected by the introduction of multiple IO TLB
> memory pools.
>
> Additional memory barrier is required to ensure that the new value of the
> flag is visible to other CPUs after mapping a new bounce buffer. For
> efficiency, the flag check should be inlined, and then the memory barrier
> must be moved to is_swiotlb_buffer(). However, it can replace the existing
> barrier in swiotlb_find_pool(), because all callers use is_swiotlb_buffer()
> first to verify that the buffer address belongs to the software IO TLB.
>
> Signed-off-by: Petr Tesarik <petr.tesarik.ext@huawei.com>
> ---

Excuse me if this is a silly question, but I'm not able to figure it out
on my own...

>  include/linux/device.h  |  2 ++
>  include/linux/swiotlb.h |  7 ++++++-
>  kernel/dma/swiotlb.c    | 14 ++++++--------
>  3 files changed, 14 insertions(+), 9 deletions(-)
>
> diff --git a/include/linux/device.h b/include/linux/device.h
> index 5fd89c9d005c..6fc808d22bfd 100644
> --- a/include/linux/device.h
> +++ b/include/linux/device.h
> @@ -628,6 +628,7 @@ struct device_physical_location {
>   * @dma_io_tlb_mem: Software IO TLB allocator.  Not for driver use.
>   * @dma_io_tlb_pools:	List of transient swiotlb memory pools.
>   * @dma_io_tlb_lock:	Protects changes to the list of active pools.
> + * @dma_uses_io_tlb: %true if device has used the software IO TLB.
>   * @archdata:	For arch-specific additions.
>   * @of_node:	Associated device tree node.
>   * @fwnode:	Associated device node supplied by platform firmware.
> @@ -737,6 +738,7 @@ struct device {
>  #ifdef CONFIG_SWIOTLB_DYNAMIC
>  	struct list_head dma_io_tlb_pools;
>  	spinlock_t dma_io_tlb_lock;
> +	bool dma_uses_io_tlb;

You add this new member here, fine...

>  #endif
>  	/* arch specific additions */
>  	struct dev_archdata	archdata;
> diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
> index 8371c92a0271..b4536626f8ff 100644
> --- a/include/linux/swiotlb.h
> +++ b/include/linux/swiotlb.h
> @@ -172,8 +172,13 @@ static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
>  	if (!mem)
>  		return false;
>  
> -	if (IS_ENABLED(CONFIG_SWIOTLB_DYNAMIC))
> +	if (IS_ENABLED(CONFIG_SWIOTLB_DYNAMIC)) {
> +		/* Pairs with smp_wmb() in swiotlb_find_slots() and
> +		 * swiotlb_dyn_alloc(), which modify the RCU lists.
> +		 */
> +		smp_rmb();
>  		return swiotlb_find_pool(dev, paddr);
> +	}
>  	return paddr >= mem->defpool.start && paddr < mem->defpool.end;
>  }
>  
> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> index adf80dec42d7..d7eac84f975b 100644
> --- a/kernel/dma/swiotlb.c
> +++ b/kernel/dma/swiotlb.c
> @@ -730,7 +730,7 @@ static void swiotlb_dyn_alloc(struct work_struct *work)
>  
>  	add_mem_pool(mem, pool);
>  
> -	/* Pairs with smp_rmb() in swiotlb_find_pool(). */
> +	/* Pairs with smp_rmb() in is_swiotlb_buffer(). */
>  	smp_wmb();
>  }
>  
> @@ -764,11 +764,6 @@ struct io_tlb_pool *swiotlb_find_pool(struct device *dev, phys_addr_t paddr)
>  	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
>  	struct io_tlb_pool *pool;
>  
> -	/* Pairs with smp_wmb() in swiotlb_find_slots() and
> -	 * swiotlb_dyn_alloc(), which modify the RCU lists.
> -	 */
> -	smp_rmb();
> -
>  	rcu_read_lock();
>  	list_for_each_entry_rcu(pool, &mem->pools, node) {
>  		if (paddr >= pool->start && paddr < pool->end)
> @@ -813,6 +808,7 @@ void swiotlb_dev_init(struct device *dev)
>  #ifdef CONFIG_SWIOTLB_DYNAMIC
>  	INIT_LIST_HEAD(&dev->dma_io_tlb_pools);
>  	spin_lock_init(&dev->dma_io_tlb_lock);
> +	dev->dma_uses_io_tlb = false;

...here you initialize it, fine...

>  #endif
>  }
>  
> @@ -1157,9 +1153,11 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
>  	list_add_rcu(&pool->node, &dev->dma_io_tlb_pools);
>  	spin_unlock_irqrestore(&dev->dma_io_tlb_lock, flags);
>  
> -	/* Pairs with smp_rmb() in swiotlb_find_pool(). */
> -	smp_wmb();
>  found:
> +	dev->dma_uses_io_tlb = true;
> +	/* Pairs with smp_rmb() in is_swiotlb_buffer() */
> +	smp_wmb();
> +

...and here you set it if swiotlb is used.

But, as far as I can tell, you don't actually *use* this field anywhere.
What am I missing?

Thanks,

jon
Jonathan Corbet Aug. 30, 2023, 2:55 p.m. UTC | #2
So it seems this code got merged without this question ever being
answered.  Sorry if it's a dumb one, but I don't think this
functionality works as advertised...

Thanks,

jon

Jonathan Corbet <corbet@lwn.net> writes:

> Petr Tesarik <petrtesarik@huaweicloud.com> writes:
>
>> From: Petr Tesarik <petr.tesarik.ext@huawei.com>
>>
>> Skip searching the software IO TLB if a device has never used it, making
>> sure these devices are not affected by the introduction of multiple IO TLB
>> memory pools.
>>
>> Additional memory barrier is required to ensure that the new value of the
>> flag is visible to other CPUs after mapping a new bounce buffer. For
>> efficiency, the flag check should be inlined, and then the memory barrier
>> must be moved to is_swiotlb_buffer(). However, it can replace the existing
>> barrier in swiotlb_find_pool(), because all callers use is_swiotlb_buffer()
>> first to verify that the buffer address belongs to the software IO TLB.
>>
>> Signed-off-by: Petr Tesarik <petr.tesarik.ext@huawei.com>
>> ---
>
> Excuse me if this is a silly question, but I'm not able to figure it out
> on my own...
>
>>  include/linux/device.h  |  2 ++
>>  include/linux/swiotlb.h |  7 ++++++-
>>  kernel/dma/swiotlb.c    | 14 ++++++--------
>>  3 files changed, 14 insertions(+), 9 deletions(-)
>>
>> diff --git a/include/linux/device.h b/include/linux/device.h
>> index 5fd89c9d005c..6fc808d22bfd 100644
>> --- a/include/linux/device.h
>> +++ b/include/linux/device.h
>> @@ -628,6 +628,7 @@ struct device_physical_location {
>>   * @dma_io_tlb_mem: Software IO TLB allocator.  Not for driver use.
>>   * @dma_io_tlb_pools:	List of transient swiotlb memory pools.
>>   * @dma_io_tlb_lock:	Protects changes to the list of active pools.
>> + * @dma_uses_io_tlb: %true if device has used the software IO TLB.
>>   * @archdata:	For arch-specific additions.
>>   * @of_node:	Associated device tree node.
>>   * @fwnode:	Associated device node supplied by platform firmware.
>> @@ -737,6 +738,7 @@ struct device {
>>  #ifdef CONFIG_SWIOTLB_DYNAMIC
>>  	struct list_head dma_io_tlb_pools;
>>  	spinlock_t dma_io_tlb_lock;
>> +	bool dma_uses_io_tlb;
>
> You add this new member here, fine...
>
>>  #endif
>>  	/* arch specific additions */
>>  	struct dev_archdata	archdata;
>> diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
>> index 8371c92a0271..b4536626f8ff 100644
>> --- a/include/linux/swiotlb.h
>> +++ b/include/linux/swiotlb.h
>> @@ -172,8 +172,13 @@ static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
>>  	if (!mem)
>>  		return false;
>>  
>> -	if (IS_ENABLED(CONFIG_SWIOTLB_DYNAMIC))
>> +	if (IS_ENABLED(CONFIG_SWIOTLB_DYNAMIC)) {
>> +		/* Pairs with smp_wmb() in swiotlb_find_slots() and
>> +		 * swiotlb_dyn_alloc(), which modify the RCU lists.
>> +		 */
>> +		smp_rmb();
>>  		return swiotlb_find_pool(dev, paddr);
>> +	}
>>  	return paddr >= mem->defpool.start && paddr < mem->defpool.end;
>>  }
>>  
>> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
>> index adf80dec42d7..d7eac84f975b 100644
>> --- a/kernel/dma/swiotlb.c
>> +++ b/kernel/dma/swiotlb.c
>> @@ -730,7 +730,7 @@ static void swiotlb_dyn_alloc(struct work_struct *work)
>>  
>>  	add_mem_pool(mem, pool);
>>  
>> -	/* Pairs with smp_rmb() in swiotlb_find_pool(). */
>> +	/* Pairs with smp_rmb() in is_swiotlb_buffer(). */
>>  	smp_wmb();
>>  }
>>  
>> @@ -764,11 +764,6 @@ struct io_tlb_pool *swiotlb_find_pool(struct device *dev, phys_addr_t paddr)
>>  	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
>>  	struct io_tlb_pool *pool;
>>  
>> -	/* Pairs with smp_wmb() in swiotlb_find_slots() and
>> -	 * swiotlb_dyn_alloc(), which modify the RCU lists.
>> -	 */
>> -	smp_rmb();
>> -
>>  	rcu_read_lock();
>>  	list_for_each_entry_rcu(pool, &mem->pools, node) {
>>  		if (paddr >= pool->start && paddr < pool->end)
>> @@ -813,6 +808,7 @@ void swiotlb_dev_init(struct device *dev)
>>  #ifdef CONFIG_SWIOTLB_DYNAMIC
>>  	INIT_LIST_HEAD(&dev->dma_io_tlb_pools);
>>  	spin_lock_init(&dev->dma_io_tlb_lock);
>> +	dev->dma_uses_io_tlb = false;
>
> ...here you initialize it, fine...
>
>>  #endif
>>  }
>>  
>> @@ -1157,9 +1153,11 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
>>  	list_add_rcu(&pool->node, &dev->dma_io_tlb_pools);
>>  	spin_unlock_irqrestore(&dev->dma_io_tlb_lock, flags);
>>  
>> -	/* Pairs with smp_rmb() in swiotlb_find_pool(). */
>> -	smp_wmb();
>>  found:
>> +	dev->dma_uses_io_tlb = true;
>> +	/* Pairs with smp_rmb() in is_swiotlb_buffer() */
>> +	smp_wmb();
>> +
>
> ...and here you set it if swiotlb is used.
>
> But, as far as I can tell, you don't actually *use* this field anywhere.
> What am I missing?
>
> Thanks,
>
> jon
Christoph Hellwig Aug. 31, 2023, 12:51 p.m. UTC | #3
On Wed, Aug 09, 2023 at 03:20:43PM -0600, Jonathan Corbet wrote:
> >  	spin_unlock_irqrestore(&dev->dma_io_tlb_lock, flags);
> >  
> > -	/* Pairs with smp_rmb() in swiotlb_find_pool(). */
> > -	smp_wmb();
> >  found:
> > +	dev->dma_uses_io_tlb = true;
> > +	/* Pairs with smp_rmb() in is_swiotlb_buffer() */
> > +	smp_wmb();
> > +
> 
> ...and here you set it if swiotlb is used.
> 
> But, as far as I can tell, you don't actually *use* this field anywhere.
> What am I missing?

It's very much unused.  Petr, I guess you wanted to use this in
is_swiotlb_buffer to avoid the lookup unless required.  Can you send
a follow up?
Petr Tesařík Sept. 7, 2023, 11:12 a.m. UTC | #4
Hi all,

sorry for my late reply; I've been away from my work setup for a
month...

On Wed, 30 Aug 2023 08:55:51 -0600
Jonathan Corbet <corbet@lwn.net> wrote:

> So it seems this code got merged without this question ever being
> answered.  Sorry if it's a dumb one, but I don't think this
> functionality works as advertised...

Yes, I believe the check was originally in is_swiotlb_buffer(), but it
got lost during one of the numerous rebases of this patch set. Let me
send a follow-up patch after making sure it actually works.

Petr T

> Thanks,
> 
> jon
> 
> Jonathan Corbet <corbet@lwn.net> writes:
> 
> > Petr Tesarik <petrtesarik@huaweicloud.com> writes:
> >  
> >> From: Petr Tesarik <petr.tesarik.ext@huawei.com>
> >>
> >> Skip searching the software IO TLB if a device has never used it,
> >> making sure these devices are not affected by the introduction of
> >> multiple IO TLB memory pools.
> >>
> >> Additional memory barrier is required to ensure that the new value
> >> of the flag is visible to other CPUs after mapping a new bounce
> >> buffer. For efficiency, the flag check should be inlined, and then
> >> the memory barrier must be moved to is_swiotlb_buffer(). However,
> >> it can replace the existing barrier in swiotlb_find_pool(),
> >> because all callers use is_swiotlb_buffer() first to verify that
> >> the buffer address belongs to the software IO TLB.
> >>
> >> Signed-off-by: Petr Tesarik <petr.tesarik.ext@huawei.com>
> >> ---  
> >
> > Excuse me if this is a silly question, but I'm not able to figure
> > it out on my own...
> >  
> >>  include/linux/device.h  |  2 ++
> >>  include/linux/swiotlb.h |  7 ++++++-
> >>  kernel/dma/swiotlb.c    | 14 ++++++--------
> >>  3 files changed, 14 insertions(+), 9 deletions(-)
> >>
> >> diff --git a/include/linux/device.h b/include/linux/device.h
> >> index 5fd89c9d005c..6fc808d22bfd 100644
> >> --- a/include/linux/device.h
> >> +++ b/include/linux/device.h
> >> @@ -628,6 +628,7 @@ struct device_physical_location {
> >>   * @dma_io_tlb_mem: Software IO TLB allocator.  Not for driver
> >> use.
> >>   * @dma_io_tlb_pools:	List of transient swiotlb memory
> >> pools.
> >>   * @dma_io_tlb_lock:	Protects changes to the list of
> >> active pools.
> >> + * @dma_uses_io_tlb: %true if device has used the software IO TLB.
> >>   * @archdata:	For arch-specific additions.
> >>   * @of_node:	Associated device tree node.
> >>   * @fwnode:	Associated device node supplied by platform
> >> firmware. @@ -737,6 +738,7 @@ struct device {
> >>  #ifdef CONFIG_SWIOTLB_DYNAMIC
> >>  	struct list_head dma_io_tlb_pools;
> >>  	spinlock_t dma_io_tlb_lock;
> >> +	bool dma_uses_io_tlb;  
> >
> > You add this new member here, fine...
> >  
> >>  #endif
> >>  	/* arch specific additions */
> >>  	struct dev_archdata	archdata;
> >> diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
> >> index 8371c92a0271..b4536626f8ff 100644
> >> --- a/include/linux/swiotlb.h
> >> +++ b/include/linux/swiotlb.h
> >> @@ -172,8 +172,13 @@ static inline bool is_swiotlb_buffer(struct
> >> device *dev, phys_addr_t paddr) if (!mem)
> >>  		return false;
> >>  
> >> -	if (IS_ENABLED(CONFIG_SWIOTLB_DYNAMIC))
> >> +	if (IS_ENABLED(CONFIG_SWIOTLB_DYNAMIC)) {
> >> +		/* Pairs with smp_wmb() in swiotlb_find_slots()
> >> and
> >> +		 * swiotlb_dyn_alloc(), which modify the RCU
> >> lists.
> >> +		 */
> >> +		smp_rmb();
> >>  		return swiotlb_find_pool(dev, paddr);
> >> +	}
> >>  	return paddr >= mem->defpool.start && paddr <
> >> mem->defpool.end; }
> >>  
> >> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> >> index adf80dec42d7..d7eac84f975b 100644
> >> --- a/kernel/dma/swiotlb.c
> >> +++ b/kernel/dma/swiotlb.c
> >> @@ -730,7 +730,7 @@ static void swiotlb_dyn_alloc(struct
> >> work_struct *work) 
> >>  	add_mem_pool(mem, pool);
> >>  
> >> -	/* Pairs with smp_rmb() in swiotlb_find_pool(). */
> >> +	/* Pairs with smp_rmb() in is_swiotlb_buffer(). */
> >>  	smp_wmb();
> >>  }
> >>  
> >> @@ -764,11 +764,6 @@ struct io_tlb_pool *swiotlb_find_pool(struct
> >> device *dev, phys_addr_t paddr) struct io_tlb_mem *mem =
> >> dev->dma_io_tlb_mem; struct io_tlb_pool *pool;
> >>  
> >> -	/* Pairs with smp_wmb() in swiotlb_find_slots() and
> >> -	 * swiotlb_dyn_alloc(), which modify the RCU lists.
> >> -	 */
> >> -	smp_rmb();
> >> -
> >>  	rcu_read_lock();
> >>  	list_for_each_entry_rcu(pool, &mem->pools, node) {
> >>  		if (paddr >= pool->start && paddr < pool->end)
> >> @@ -813,6 +808,7 @@ void swiotlb_dev_init(struct device *dev)
> >>  #ifdef CONFIG_SWIOTLB_DYNAMIC
> >>  	INIT_LIST_HEAD(&dev->dma_io_tlb_pools);
> >>  	spin_lock_init(&dev->dma_io_tlb_lock);
> >> +	dev->dma_uses_io_tlb = false;  
> >
> > ...here you initialize it, fine...
> >  
> >>  #endif
> >>  }
> >>  
> >> @@ -1157,9 +1153,11 @@ static int swiotlb_find_slots(struct device
> >> *dev, phys_addr_t orig_addr, list_add_rcu(&pool->node,
> >> &dev->dma_io_tlb_pools);
> >> spin_unlock_irqrestore(&dev->dma_io_tlb_lock, flags); 
> >> -	/* Pairs with smp_rmb() in swiotlb_find_pool(). */
> >> -	smp_wmb();
> >>  found:
> >> +	dev->dma_uses_io_tlb = true;
> >> +	/* Pairs with smp_rmb() in is_swiotlb_buffer() */
> >> +	smp_wmb();
> >> +  
> >
> > ...and here you set it if swiotlb is used.
> >
> > But, as far as I can tell, you don't actually *use* this field
> > anywhere. What am I missing?
> >
> > Thanks,
> >
> > jon
Christoph Hellwig Sept. 8, 2023, 8 a.m. UTC | #5
On Thu, Sep 07, 2023 at 01:12:23PM +0200, Petr Tesařík wrote:
> Hi all,
> 
> sorry for my late reply; I've been away from my work setup for a
> month...

Please take a look at:

https://lore.kernel.org/linux-iommu/20230905064441.127588-1-hch@lst.de/T/#u
diff mbox series

Patch

diff --git a/include/linux/device.h b/include/linux/device.h
index 5fd89c9d005c..6fc808d22bfd 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -628,6 +628,7 @@  struct device_physical_location {
  * @dma_io_tlb_mem: Software IO TLB allocator.  Not for driver use.
  * @dma_io_tlb_pools:	List of transient swiotlb memory pools.
  * @dma_io_tlb_lock:	Protects changes to the list of active pools.
+ * @dma_uses_io_tlb: %true if device has used the software IO TLB.
  * @archdata:	For arch-specific additions.
  * @of_node:	Associated device tree node.
  * @fwnode:	Associated device node supplied by platform firmware.
@@ -737,6 +738,7 @@  struct device {
 #ifdef CONFIG_SWIOTLB_DYNAMIC
 	struct list_head dma_io_tlb_pools;
 	spinlock_t dma_io_tlb_lock;
+	bool dma_uses_io_tlb;
 #endif
 	/* arch specific additions */
 	struct dev_archdata	archdata;
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 8371c92a0271..b4536626f8ff 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -172,8 +172,13 @@  static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
 	if (!mem)
 		return false;
 
-	if (IS_ENABLED(CONFIG_SWIOTLB_DYNAMIC))
+	if (IS_ENABLED(CONFIG_SWIOTLB_DYNAMIC)) {
+		/* Pairs with smp_wmb() in swiotlb_find_slots() and
+		 * swiotlb_dyn_alloc(), which modify the RCU lists.
+		 */
+		smp_rmb();
 		return swiotlb_find_pool(dev, paddr);
+	}
 	return paddr >= mem->defpool.start && paddr < mem->defpool.end;
 }
 
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index adf80dec42d7..d7eac84f975b 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -730,7 +730,7 @@  static void swiotlb_dyn_alloc(struct work_struct *work)
 
 	add_mem_pool(mem, pool);
 
-	/* Pairs with smp_rmb() in swiotlb_find_pool(). */
+	/* Pairs with smp_rmb() in is_swiotlb_buffer(). */
 	smp_wmb();
 }
 
@@ -764,11 +764,6 @@  struct io_tlb_pool *swiotlb_find_pool(struct device *dev, phys_addr_t paddr)
 	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
 	struct io_tlb_pool *pool;
 
-	/* Pairs with smp_wmb() in swiotlb_find_slots() and
-	 * swiotlb_dyn_alloc(), which modify the RCU lists.
-	 */
-	smp_rmb();
-
 	rcu_read_lock();
 	list_for_each_entry_rcu(pool, &mem->pools, node) {
 		if (paddr >= pool->start && paddr < pool->end)
@@ -813,6 +808,7 @@  void swiotlb_dev_init(struct device *dev)
 #ifdef CONFIG_SWIOTLB_DYNAMIC
 	INIT_LIST_HEAD(&dev->dma_io_tlb_pools);
 	spin_lock_init(&dev->dma_io_tlb_lock);
+	dev->dma_uses_io_tlb = false;
 #endif
 }
 
@@ -1157,9 +1153,11 @@  static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
 	list_add_rcu(&pool->node, &dev->dma_io_tlb_pools);
 	spin_unlock_irqrestore(&dev->dma_io_tlb_lock, flags);
 
-	/* Pairs with smp_rmb() in swiotlb_find_pool(). */
-	smp_wmb();
 found:
+	dev->dma_uses_io_tlb = true;
+	/* Pairs with smp_rmb() in is_swiotlb_buffer() */
+	smp_wmb();
+
 	*retpool = pool;
 	return index;
 }