diff mbox series

[v4,8/8] swiotlb: search the software IO TLB only if a device makes use of it

Message ID a8d31d3fffa0867dce2b44b98dc2714289edfdc9.1689261692.git.petr.tesarik.ext@huawei.com (mailing list archive)
State Superseded
Headers show
Series Allow dynamic allocation of software IO TLB bounce buffers | expand

Commit Message

Petr Tesarik July 13, 2023, 3:23 p.m. UTC
From: Petr Tesarik <petr.tesarik.ext@huawei.com>

Skip searching the software IO TLB if a device has never used it, making
sure these devices are not affected by the introduction of multiple IO TLB
memory pools.

Additional memory barrier is required to ensure that the new value of the
flag is visible to other CPUs after mapping a new bounce buffer. For
efficiency, the flag check should be inlined, and then the memory barrier
must be moved to is_swiotlb_buffer(). However, it can replace the existing
barrier in swiotlb_find_pool(), because all callers use is_swiotlb_buffer()
first to verify that the buffer address belongs to the software IO TLB.

Signed-off-by: Petr Tesarik <petr.tesarik.ext@huawei.com>
---
 include/linux/device.h  |  2 ++
 include/linux/swiotlb.h |  6 +++++-
 kernel/dma/swiotlb.c    | 14 ++++++--------
 3 files changed, 13 insertions(+), 9 deletions(-)

Comments

Christoph Hellwig July 20, 2023, 6:47 a.m. UTC | #1
Any reason this can't just do a list_empty_careful on the list
instead of adding yet another field that grows struct device?
Petr Tesařík July 20, 2023, 8:02 a.m. UTC | #2
On Thu, 20 Jul 2023 08:47:44 +0200
Christoph Hellwig <hch@lst.de> wrote:

> Any reason this can't just do a list_empty_careful on the list
> instead of adding yet another field that grows struct device?

On which list?

The dma_io_tlb_pools list only contains transient pools, but a device
may use bounce buffers from a regular pool.

The dma_io_tlb_mem.pools list will always be non-empty, unless the
system runs without SWIOTLB.

On a system which does have a SWIOTLB, the flag allows to differentiate
between devices that actually use bounce buffers and devices that do
not (e.g. because they do not have any addressing limitations).

Petr T
Christoph Hellwig July 20, 2023, 8:22 a.m. UTC | #3
On Thu, Jul 20, 2023 at 10:02:38AM +0200, Petr Tesařík wrote:
> On Thu, 20 Jul 2023 08:47:44 +0200
> Christoph Hellwig <hch@lst.de> wrote:
> 
> > Any reason this can't just do a list_empty_careful on the list
> > instead of adding yet another field that grows struct device?
> 
> On which list?

dev->dma_io_tlb_mem->pools?

> 
> The dma_io_tlb_pools list only contains transient pools, but a device
> may use bounce buffers from a regular pool.

Oh, true.

> The dma_io_tlb_mem.pools list will always be non-empty, unless the
> system runs without SWIOTLB.
> 
> On a system which does have a SWIOTLB, the flag allows to differentiate
> between devices that actually use bounce buffers and devices that do
> not (e.g. because they do not have any addressing limitations).

Ok.
diff mbox series

Patch

diff --git a/include/linux/device.h b/include/linux/device.h
index 549b0a62455c..86871d628648 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -628,6 +628,7 @@  struct device_physical_location {
  * @dma_io_tlb_mem: Software IO TLB allocator.  Not for driver use.
  * @dma_io_tlb_pools:	List of transient swiotlb memory pools.
  * @dma_io_tlb_lock:	Protects changes to the list of active pools.
+ * @dma_uses_io_tlb: %true if device has used the software IO TLB.
  * @archdata:	For arch-specific additions.
  * @of_node:	Associated device tree node.
  * @fwnode:	Associated device node supplied by platform firmware.
@@ -735,6 +736,7 @@  struct device {
 	struct io_tlb_mem *dma_io_tlb_mem;
 	struct list_head dma_io_tlb_pools;
 	spinlock_t dma_io_tlb_lock;
+	bool dma_uses_io_tlb;
 #endif
 	/* arch specific additions */
 	struct dev_archdata	archdata;
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 06fd94de1cd8..8069cb62c893 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -150,7 +150,11 @@  struct io_tlb_pool *swiotlb_find_pool(struct device *dev, phys_addr_t paddr);
  */
 static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
 {
-	return dev->dma_io_tlb_mem &&
+	/* Pairs with smp_wmb() in swiotlb_find_slots() and
+	 * swiotlb_dyn_alloc(), which modify the RCU lists.
+	 */
+	smp_rmb();
+	return dev->dma_uses_io_tlb &&
 		!!swiotlb_find_pool(dev, paddr);
 }
 
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 9c66ec2c47dd..854d139ddcb7 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -706,7 +706,7 @@  static void swiotlb_dyn_alloc(struct work_struct *work)
 
 	add_mem_pool(mem, pool);
 
-	/* Pairs with smp_rmb() in swiotlb_find_pool(). */
+	/* Pairs with smp_rmb() in is_swiotlb_buffer(). */
 	smp_wmb();
 }
 
@@ -734,6 +734,7 @@  void swiotlb_dev_init(struct device *dev)
 	dev->dma_io_tlb_mem = &io_tlb_default_mem;
 	INIT_LIST_HEAD(&dev->dma_io_tlb_pools);
 	spin_lock_init(&dev->dma_io_tlb_lock);
+	dev->dma_uses_io_tlb = false;
 }
 
 /**
@@ -751,11 +752,6 @@  struct io_tlb_pool *swiotlb_find_pool(struct device *dev, phys_addr_t paddr)
 	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
 	struct io_tlb_pool *pool;
 
-	/* Pairs with smp_wmb() in swiotlb_find_slots() and
-	 * swiotlb_dyn_alloc(), which modify the RCU lists.
-	 */
-	smp_rmb();
-
 	rcu_read_lock();
 	list_for_each_entry_rcu(pool, &mem->pools, node) {
 		if (paddr >= pool->start && paddr < pool->end)
@@ -1128,9 +1124,11 @@  static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
 	list_add_rcu(&pool->node, &dev->dma_io_tlb_pools);
 	spin_unlock_irqrestore(&dev->dma_io_tlb_lock, flags);
 
-	/* Pairs with smp_rmb() in swiotlb_find_pool(). */
-	smp_wmb();
 found:
+	dev->dma_uses_io_tlb = true;
+	/* Pairs with smp_rmb() in is_swiotlb_buffer() */
+	smp_wmb();
+
 	*retpool = pool;
 	return index;
 }