Message ID | adea71bd1fa8660d4c3157a562431ad8127016d4.1690871004.git.petr.tesarik.ext@huawei.com (mailing list archive) |
---|---|
State | Accepted |
Commit | 1395706a14904f2593debecf20f827e72d7392a7 |
Headers | show |
Series | Allow dynamic allocation of software IO TLB bounce buffers | expand |
Petr Tesarik <petrtesarik@huaweicloud.com> writes: > From: Petr Tesarik <petr.tesarik.ext@huawei.com> > > Skip searching the software IO TLB if a device has never used it, making > sure these devices are not affected by the introduction of multiple IO TLB > memory pools. > > Additional memory barrier is required to ensure that the new value of the > flag is visible to other CPUs after mapping a new bounce buffer. For > efficiency, the flag check should be inlined, and then the memory barrier > must be moved to is_swiotlb_buffer(). However, it can replace the existing > barrier in swiotlb_find_pool(), because all callers use is_swiotlb_buffer() > first to verify that the buffer address belongs to the software IO TLB. > > Signed-off-by: Petr Tesarik <petr.tesarik.ext@huawei.com> > --- Excuse me if this is a silly question, but I'm not able to figure it out on my own... > include/linux/device.h | 2 ++ > include/linux/swiotlb.h | 7 ++++++- > kernel/dma/swiotlb.c | 14 ++++++-------- > 3 files changed, 14 insertions(+), 9 deletions(-) > > diff --git a/include/linux/device.h b/include/linux/device.h > index 5fd89c9d005c..6fc808d22bfd 100644 > --- a/include/linux/device.h > +++ b/include/linux/device.h > @@ -628,6 +628,7 @@ struct device_physical_location { > * @dma_io_tlb_mem: Software IO TLB allocator. Not for driver use. > * @dma_io_tlb_pools: List of transient swiotlb memory pools. > * @dma_io_tlb_lock: Protects changes to the list of active pools. > + * @dma_uses_io_tlb: %true if device has used the software IO TLB. > * @archdata: For arch-specific additions. > * @of_node: Associated device tree node. > * @fwnode: Associated device node supplied by platform firmware. > @@ -737,6 +738,7 @@ struct device { > #ifdef CONFIG_SWIOTLB_DYNAMIC > struct list_head dma_io_tlb_pools; > spinlock_t dma_io_tlb_lock; > + bool dma_uses_io_tlb; You add this new member here, fine... > #endif > /* arch specific additions */ > struct dev_archdata archdata; > diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h > index 8371c92a0271..b4536626f8ff 100644 > --- a/include/linux/swiotlb.h > +++ b/include/linux/swiotlb.h > @@ -172,8 +172,13 @@ static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr) > if (!mem) > return false; > > - if (IS_ENABLED(CONFIG_SWIOTLB_DYNAMIC)) > + if (IS_ENABLED(CONFIG_SWIOTLB_DYNAMIC)) { > + /* Pairs with smp_wmb() in swiotlb_find_slots() and > + * swiotlb_dyn_alloc(), which modify the RCU lists. > + */ > + smp_rmb(); > return swiotlb_find_pool(dev, paddr); > + } > return paddr >= mem->defpool.start && paddr < mem->defpool.end; > } > > diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c > index adf80dec42d7..d7eac84f975b 100644 > --- a/kernel/dma/swiotlb.c > +++ b/kernel/dma/swiotlb.c > @@ -730,7 +730,7 @@ static void swiotlb_dyn_alloc(struct work_struct *work) > > add_mem_pool(mem, pool); > > - /* Pairs with smp_rmb() in swiotlb_find_pool(). */ > + /* Pairs with smp_rmb() in is_swiotlb_buffer(). */ > smp_wmb(); > } > > @@ -764,11 +764,6 @@ struct io_tlb_pool *swiotlb_find_pool(struct device *dev, phys_addr_t paddr) > struct io_tlb_mem *mem = dev->dma_io_tlb_mem; > struct io_tlb_pool *pool; > > - /* Pairs with smp_wmb() in swiotlb_find_slots() and > - * swiotlb_dyn_alloc(), which modify the RCU lists. > - */ > - smp_rmb(); > - > rcu_read_lock(); > list_for_each_entry_rcu(pool, &mem->pools, node) { > if (paddr >= pool->start && paddr < pool->end) > @@ -813,6 +808,7 @@ void swiotlb_dev_init(struct device *dev) > #ifdef CONFIG_SWIOTLB_DYNAMIC > INIT_LIST_HEAD(&dev->dma_io_tlb_pools); > spin_lock_init(&dev->dma_io_tlb_lock); > + dev->dma_uses_io_tlb = false; ...here you initialize it, fine... > #endif > } > > @@ -1157,9 +1153,11 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, > list_add_rcu(&pool->node, &dev->dma_io_tlb_pools); > spin_unlock_irqrestore(&dev->dma_io_tlb_lock, flags); > > - /* Pairs with smp_rmb() in swiotlb_find_pool(). */ > - smp_wmb(); > found: > + dev->dma_uses_io_tlb = true; > + /* Pairs with smp_rmb() in is_swiotlb_buffer() */ > + smp_wmb(); > + ...and here you set it if swiotlb is used. But, as far as I can tell, you don't actually *use* this field anywhere. What am I missing? Thanks, jon
So it seems this code got merged without this question ever being answered. Sorry if it's a dumb one, but I don't think this functionality works as advertised... Thanks, jon Jonathan Corbet <corbet@lwn.net> writes: > Petr Tesarik <petrtesarik@huaweicloud.com> writes: > >> From: Petr Tesarik <petr.tesarik.ext@huawei.com> >> >> Skip searching the software IO TLB if a device has never used it, making >> sure these devices are not affected by the introduction of multiple IO TLB >> memory pools. >> >> Additional memory barrier is required to ensure that the new value of the >> flag is visible to other CPUs after mapping a new bounce buffer. For >> efficiency, the flag check should be inlined, and then the memory barrier >> must be moved to is_swiotlb_buffer(). However, it can replace the existing >> barrier in swiotlb_find_pool(), because all callers use is_swiotlb_buffer() >> first to verify that the buffer address belongs to the software IO TLB. >> >> Signed-off-by: Petr Tesarik <petr.tesarik.ext@huawei.com> >> --- > > Excuse me if this is a silly question, but I'm not able to figure it out > on my own... > >> include/linux/device.h | 2 ++ >> include/linux/swiotlb.h | 7 ++++++- >> kernel/dma/swiotlb.c | 14 ++++++-------- >> 3 files changed, 14 insertions(+), 9 deletions(-) >> >> diff --git a/include/linux/device.h b/include/linux/device.h >> index 5fd89c9d005c..6fc808d22bfd 100644 >> --- a/include/linux/device.h >> +++ b/include/linux/device.h >> @@ -628,6 +628,7 @@ struct device_physical_location { >> * @dma_io_tlb_mem: Software IO TLB allocator. Not for driver use. >> * @dma_io_tlb_pools: List of transient swiotlb memory pools. >> * @dma_io_tlb_lock: Protects changes to the list of active pools. >> + * @dma_uses_io_tlb: %true if device has used the software IO TLB. >> * @archdata: For arch-specific additions. >> * @of_node: Associated device tree node. >> * @fwnode: Associated device node supplied by platform firmware. >> @@ -737,6 +738,7 @@ struct device { >> #ifdef CONFIG_SWIOTLB_DYNAMIC >> struct list_head dma_io_tlb_pools; >> spinlock_t dma_io_tlb_lock; >> + bool dma_uses_io_tlb; > > You add this new member here, fine... > >> #endif >> /* arch specific additions */ >> struct dev_archdata archdata; >> diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h >> index 8371c92a0271..b4536626f8ff 100644 >> --- a/include/linux/swiotlb.h >> +++ b/include/linux/swiotlb.h >> @@ -172,8 +172,13 @@ static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr) >> if (!mem) >> return false; >> >> - if (IS_ENABLED(CONFIG_SWIOTLB_DYNAMIC)) >> + if (IS_ENABLED(CONFIG_SWIOTLB_DYNAMIC)) { >> + /* Pairs with smp_wmb() in swiotlb_find_slots() and >> + * swiotlb_dyn_alloc(), which modify the RCU lists. >> + */ >> + smp_rmb(); >> return swiotlb_find_pool(dev, paddr); >> + } >> return paddr >= mem->defpool.start && paddr < mem->defpool.end; >> } >> >> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c >> index adf80dec42d7..d7eac84f975b 100644 >> --- a/kernel/dma/swiotlb.c >> +++ b/kernel/dma/swiotlb.c >> @@ -730,7 +730,7 @@ static void swiotlb_dyn_alloc(struct work_struct *work) >> >> add_mem_pool(mem, pool); >> >> - /* Pairs with smp_rmb() in swiotlb_find_pool(). */ >> + /* Pairs with smp_rmb() in is_swiotlb_buffer(). */ >> smp_wmb(); >> } >> >> @@ -764,11 +764,6 @@ struct io_tlb_pool *swiotlb_find_pool(struct device *dev, phys_addr_t paddr) >> struct io_tlb_mem *mem = dev->dma_io_tlb_mem; >> struct io_tlb_pool *pool; >> >> - /* Pairs with smp_wmb() in swiotlb_find_slots() and >> - * swiotlb_dyn_alloc(), which modify the RCU lists. >> - */ >> - smp_rmb(); >> - >> rcu_read_lock(); >> list_for_each_entry_rcu(pool, &mem->pools, node) { >> if (paddr >= pool->start && paddr < pool->end) >> @@ -813,6 +808,7 @@ void swiotlb_dev_init(struct device *dev) >> #ifdef CONFIG_SWIOTLB_DYNAMIC >> INIT_LIST_HEAD(&dev->dma_io_tlb_pools); >> spin_lock_init(&dev->dma_io_tlb_lock); >> + dev->dma_uses_io_tlb = false; > > ...here you initialize it, fine... > >> #endif >> } >> >> @@ -1157,9 +1153,11 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, >> list_add_rcu(&pool->node, &dev->dma_io_tlb_pools); >> spin_unlock_irqrestore(&dev->dma_io_tlb_lock, flags); >> >> - /* Pairs with smp_rmb() in swiotlb_find_pool(). */ >> - smp_wmb(); >> found: >> + dev->dma_uses_io_tlb = true; >> + /* Pairs with smp_rmb() in is_swiotlb_buffer() */ >> + smp_wmb(); >> + > > ...and here you set it if swiotlb is used. > > But, as far as I can tell, you don't actually *use* this field anywhere. > What am I missing? > > Thanks, > > jon
On Wed, Aug 09, 2023 at 03:20:43PM -0600, Jonathan Corbet wrote: > > spin_unlock_irqrestore(&dev->dma_io_tlb_lock, flags); > > > > - /* Pairs with smp_rmb() in swiotlb_find_pool(). */ > > - smp_wmb(); > > found: > > + dev->dma_uses_io_tlb = true; > > + /* Pairs with smp_rmb() in is_swiotlb_buffer() */ > > + smp_wmb(); > > + > > ...and here you set it if swiotlb is used. > > But, as far as I can tell, you don't actually *use* this field anywhere. > What am I missing? It's very much unused. Petr, I guess you wanted to use this in is_swiotlb_buffer to avoid the lookup unless required. Can you send a follow up?
Hi all, sorry for my late reply; I've been away from my work setup for a month... On Wed, 30 Aug 2023 08:55:51 -0600 Jonathan Corbet <corbet@lwn.net> wrote: > So it seems this code got merged without this question ever being > answered. Sorry if it's a dumb one, but I don't think this > functionality works as advertised... Yes, I believe the check was originally in is_swiotlb_buffer(), but it got lost during one of the numerous rebases of this patch set. Let me send a follow-up patch after making sure it actually works. Petr T > Thanks, > > jon > > Jonathan Corbet <corbet@lwn.net> writes: > > > Petr Tesarik <petrtesarik@huaweicloud.com> writes: > > > >> From: Petr Tesarik <petr.tesarik.ext@huawei.com> > >> > >> Skip searching the software IO TLB if a device has never used it, > >> making sure these devices are not affected by the introduction of > >> multiple IO TLB memory pools. > >> > >> Additional memory barrier is required to ensure that the new value > >> of the flag is visible to other CPUs after mapping a new bounce > >> buffer. For efficiency, the flag check should be inlined, and then > >> the memory barrier must be moved to is_swiotlb_buffer(). However, > >> it can replace the existing barrier in swiotlb_find_pool(), > >> because all callers use is_swiotlb_buffer() first to verify that > >> the buffer address belongs to the software IO TLB. > >> > >> Signed-off-by: Petr Tesarik <petr.tesarik.ext@huawei.com> > >> --- > > > > Excuse me if this is a silly question, but I'm not able to figure > > it out on my own... > > > >> include/linux/device.h | 2 ++ > >> include/linux/swiotlb.h | 7 ++++++- > >> kernel/dma/swiotlb.c | 14 ++++++-------- > >> 3 files changed, 14 insertions(+), 9 deletions(-) > >> > >> diff --git a/include/linux/device.h b/include/linux/device.h > >> index 5fd89c9d005c..6fc808d22bfd 100644 > >> --- a/include/linux/device.h > >> +++ b/include/linux/device.h > >> @@ -628,6 +628,7 @@ struct device_physical_location { > >> * @dma_io_tlb_mem: Software IO TLB allocator. Not for driver > >> use. > >> * @dma_io_tlb_pools: List of transient swiotlb memory > >> pools. > >> * @dma_io_tlb_lock: Protects changes to the list of > >> active pools. > >> + * @dma_uses_io_tlb: %true if device has used the software IO TLB. > >> * @archdata: For arch-specific additions. > >> * @of_node: Associated device tree node. > >> * @fwnode: Associated device node supplied by platform > >> firmware. @@ -737,6 +738,7 @@ struct device { > >> #ifdef CONFIG_SWIOTLB_DYNAMIC > >> struct list_head dma_io_tlb_pools; > >> spinlock_t dma_io_tlb_lock; > >> + bool dma_uses_io_tlb; > > > > You add this new member here, fine... > > > >> #endif > >> /* arch specific additions */ > >> struct dev_archdata archdata; > >> diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h > >> index 8371c92a0271..b4536626f8ff 100644 > >> --- a/include/linux/swiotlb.h > >> +++ b/include/linux/swiotlb.h > >> @@ -172,8 +172,13 @@ static inline bool is_swiotlb_buffer(struct > >> device *dev, phys_addr_t paddr) if (!mem) > >> return false; > >> > >> - if (IS_ENABLED(CONFIG_SWIOTLB_DYNAMIC)) > >> + if (IS_ENABLED(CONFIG_SWIOTLB_DYNAMIC)) { > >> + /* Pairs with smp_wmb() in swiotlb_find_slots() > >> and > >> + * swiotlb_dyn_alloc(), which modify the RCU > >> lists. > >> + */ > >> + smp_rmb(); > >> return swiotlb_find_pool(dev, paddr); > >> + } > >> return paddr >= mem->defpool.start && paddr < > >> mem->defpool.end; } > >> > >> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c > >> index adf80dec42d7..d7eac84f975b 100644 > >> --- a/kernel/dma/swiotlb.c > >> +++ b/kernel/dma/swiotlb.c > >> @@ -730,7 +730,7 @@ static void swiotlb_dyn_alloc(struct > >> work_struct *work) > >> add_mem_pool(mem, pool); > >> > >> - /* Pairs with smp_rmb() in swiotlb_find_pool(). */ > >> + /* Pairs with smp_rmb() in is_swiotlb_buffer(). */ > >> smp_wmb(); > >> } > >> > >> @@ -764,11 +764,6 @@ struct io_tlb_pool *swiotlb_find_pool(struct > >> device *dev, phys_addr_t paddr) struct io_tlb_mem *mem = > >> dev->dma_io_tlb_mem; struct io_tlb_pool *pool; > >> > >> - /* Pairs with smp_wmb() in swiotlb_find_slots() and > >> - * swiotlb_dyn_alloc(), which modify the RCU lists. > >> - */ > >> - smp_rmb(); > >> - > >> rcu_read_lock(); > >> list_for_each_entry_rcu(pool, &mem->pools, node) { > >> if (paddr >= pool->start && paddr < pool->end) > >> @@ -813,6 +808,7 @@ void swiotlb_dev_init(struct device *dev) > >> #ifdef CONFIG_SWIOTLB_DYNAMIC > >> INIT_LIST_HEAD(&dev->dma_io_tlb_pools); > >> spin_lock_init(&dev->dma_io_tlb_lock); > >> + dev->dma_uses_io_tlb = false; > > > > ...here you initialize it, fine... > > > >> #endif > >> } > >> > >> @@ -1157,9 +1153,11 @@ static int swiotlb_find_slots(struct device > >> *dev, phys_addr_t orig_addr, list_add_rcu(&pool->node, > >> &dev->dma_io_tlb_pools); > >> spin_unlock_irqrestore(&dev->dma_io_tlb_lock, flags); > >> - /* Pairs with smp_rmb() in swiotlb_find_pool(). */ > >> - smp_wmb(); > >> found: > >> + dev->dma_uses_io_tlb = true; > >> + /* Pairs with smp_rmb() in is_swiotlb_buffer() */ > >> + smp_wmb(); > >> + > > > > ...and here you set it if swiotlb is used. > > > > But, as far as I can tell, you don't actually *use* this field > > anywhere. What am I missing? > > > > Thanks, > > > > jon
On Thu, Sep 07, 2023 at 01:12:23PM +0200, Petr Tesařík wrote: > Hi all, > > sorry for my late reply; I've been away from my work setup for a > month... Please take a look at: https://lore.kernel.org/linux-iommu/20230905064441.127588-1-hch@lst.de/T/#u
diff --git a/include/linux/device.h b/include/linux/device.h index 5fd89c9d005c..6fc808d22bfd 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -628,6 +628,7 @@ struct device_physical_location { * @dma_io_tlb_mem: Software IO TLB allocator. Not for driver use. * @dma_io_tlb_pools: List of transient swiotlb memory pools. * @dma_io_tlb_lock: Protects changes to the list of active pools. + * @dma_uses_io_tlb: %true if device has used the software IO TLB. * @archdata: For arch-specific additions. * @of_node: Associated device tree node. * @fwnode: Associated device node supplied by platform firmware. @@ -737,6 +738,7 @@ struct device { #ifdef CONFIG_SWIOTLB_DYNAMIC struct list_head dma_io_tlb_pools; spinlock_t dma_io_tlb_lock; + bool dma_uses_io_tlb; #endif /* arch specific additions */ struct dev_archdata archdata; diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 8371c92a0271..b4536626f8ff 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -172,8 +172,13 @@ static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr) if (!mem) return false; - if (IS_ENABLED(CONFIG_SWIOTLB_DYNAMIC)) + if (IS_ENABLED(CONFIG_SWIOTLB_DYNAMIC)) { + /* Pairs with smp_wmb() in swiotlb_find_slots() and + * swiotlb_dyn_alloc(), which modify the RCU lists. + */ + smp_rmb(); return swiotlb_find_pool(dev, paddr); + } return paddr >= mem->defpool.start && paddr < mem->defpool.end; } diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index adf80dec42d7..d7eac84f975b 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -730,7 +730,7 @@ static void swiotlb_dyn_alloc(struct work_struct *work) add_mem_pool(mem, pool); - /* Pairs with smp_rmb() in swiotlb_find_pool(). */ + /* Pairs with smp_rmb() in is_swiotlb_buffer(). */ smp_wmb(); } @@ -764,11 +764,6 @@ struct io_tlb_pool *swiotlb_find_pool(struct device *dev, phys_addr_t paddr) struct io_tlb_mem *mem = dev->dma_io_tlb_mem; struct io_tlb_pool *pool; - /* Pairs with smp_wmb() in swiotlb_find_slots() and - * swiotlb_dyn_alloc(), which modify the RCU lists. - */ - smp_rmb(); - rcu_read_lock(); list_for_each_entry_rcu(pool, &mem->pools, node) { if (paddr >= pool->start && paddr < pool->end) @@ -813,6 +808,7 @@ void swiotlb_dev_init(struct device *dev) #ifdef CONFIG_SWIOTLB_DYNAMIC INIT_LIST_HEAD(&dev->dma_io_tlb_pools); spin_lock_init(&dev->dma_io_tlb_lock); + dev->dma_uses_io_tlb = false; #endif } @@ -1157,9 +1153,11 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, list_add_rcu(&pool->node, &dev->dma_io_tlb_pools); spin_unlock_irqrestore(&dev->dma_io_tlb_lock, flags); - /* Pairs with smp_rmb() in swiotlb_find_pool(). */ - smp_wmb(); found: + dev->dma_uses_io_tlb = true; + /* Pairs with smp_rmb() in is_swiotlb_buffer() */ + smp_wmb(); + *retpool = pool; return index; }