Message ID | 20220404050559.132378-10-hch@lst.de (mailing list archive) |
---|---|
State | Handled Elsewhere |
Headers | show |
Series | [01/15] dma-direct: use is_swiotlb_active in dma_direct_map_page | expand |
From: Christoph Hellwig <hch@lst.de> Sent: Sunday, April 3, 2022 10:06 PM > > Pass a bool to pass if swiotlb needs to be enabled based on the Wording problems. I'm not sure what you meant to say. > addressing needs and replace the verbose argument with a set of > flags, including one to force enable bounce buffering. > > Note that this patch removes the possibility to force xen-swiotlb > use using swiotlb=force on the command line on x86 (arm and arm64 > never supported that), but this interface will be restored shortly. > > Signed-off-by: Christoph Hellwig <hch@lst.de> > --- > arch/arm/mm/init.c | 6 +---- > arch/arm64/mm/init.c | 6 +---- > arch/ia64/mm/init.c | 4 +-- > arch/mips/cavium-octeon/dma-octeon.c | 2 +- > arch/mips/loongson64/dma.c | 2 +- > arch/mips/sibyte/common/dma.c | 2 +- > arch/powerpc/mm/mem.c | 3 ++- > arch/powerpc/platforms/pseries/setup.c | 3 --- > arch/riscv/mm/init.c | 8 +----- > arch/s390/mm/init.c | 3 +-- > arch/x86/kernel/pci-dma.c | 15 ++++++----- > drivers/xen/swiotlb-xen.c | 4 +-- > include/linux/swiotlb.h | 15 ++++++----- > include/trace/events/swiotlb.h | 29 ++++++++------------- > kernel/dma/swiotlb.c | 35 ++++++++++++++------------ > 15 files changed, 55 insertions(+), 82 deletions(-) > > diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c > index fe249ea919083..ce64bdb55a16b 100644 > --- a/arch/arm/mm/init.c > +++ b/arch/arm/mm/init.c > @@ -271,11 +271,7 @@ static void __init free_highpages(void) > void __init mem_init(void) > { > #ifdef CONFIG_ARM_LPAE > - if (swiotlb_force == SWIOTLB_FORCE || > - max_pfn > arm_dma_pfn_limit) > - swiotlb_init(1); > - else > - swiotlb_force = SWIOTLB_NO_FORCE; > + swiotlb_init(max_pfn > arm_dma_pfn_limit, SWIOTLB_VERBOSE); > #endif > > set_max_mapnr(pfn_to_page(max_pfn) - mem_map); > diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c > index 8ac25f19084e8..7b6ea4d6733d6 100644 > --- a/arch/arm64/mm/init.c > +++ b/arch/arm64/mm/init.c > @@ -398,11 +398,7 @@ void __init bootmem_init(void) > */ > void __init mem_init(void) > { > - if (swiotlb_force == SWIOTLB_FORCE || > - max_pfn > PFN_DOWN(arm64_dma_phys_limit)) > - swiotlb_init(1); > - else if (!xen_swiotlb_detect()) > - swiotlb_force = SWIOTLB_NO_FORCE; > + swiotlb_init(max_pfn > PFN_DOWN(arm64_dma_phys_limit), > SWIOTLB_VERBOSE); > > /* this will put all unused low memory onto the freelists */ > memblock_free_all(); > diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c > index 5d165607bf354..3c3e15b22608f 100644 > --- a/arch/ia64/mm/init.c > +++ b/arch/ia64/mm/init.c > @@ -437,9 +437,7 @@ mem_init (void) > if (iommu_detected) > break; > #endif > -#ifdef CONFIG_SWIOTLB > - swiotlb_init(1); > -#endif > + swiotlb_init(true, SWIOTLB_VERBOSE); > } while (0); > > #ifdef CONFIG_FLATMEM > diff --git a/arch/mips/cavium-octeon/dma-octeon.c b/arch/mips/cavium-octeon/dma- > octeon.c > index fb7547e217263..9fbba6a8fa4c5 100644 > --- a/arch/mips/cavium-octeon/dma-octeon.c > +++ b/arch/mips/cavium-octeon/dma-octeon.c > @@ -235,5 +235,5 @@ void __init plat_swiotlb_setup(void) > #endif > > swiotlb_adjust_size(swiotlbsize); > - swiotlb_init(1); > + swiotlb_init(true, SWIOTLB_VERBOSE); > } > diff --git a/arch/mips/loongson64/dma.c b/arch/mips/loongson64/dma.c > index 364f2f27c8723..8220a1bc0db64 100644 > --- a/arch/mips/loongson64/dma.c > +++ b/arch/mips/loongson64/dma.c > @@ -24,5 +24,5 @@ phys_addr_t dma_to_phys(struct device *dev, dma_addr_t > daddr) > > void __init plat_swiotlb_setup(void) > { > - swiotlb_init(1); > + swiotlb_init(true, SWIOTLB_VERBOSE); > } > diff --git a/arch/mips/sibyte/common/dma.c b/arch/mips/sibyte/common/dma.c > index eb47a94f3583e..c5c2c782aff68 100644 > --- a/arch/mips/sibyte/common/dma.c > +++ b/arch/mips/sibyte/common/dma.c > @@ -10,5 +10,5 @@ > > void __init plat_swiotlb_setup(void) > { > - swiotlb_init(1); > + swiotlb_init(true, SWIOTLB_VERBOSE); > } > diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c > index 8e301cd8925b2..e1519e2edc656 100644 > --- a/arch/powerpc/mm/mem.c > +++ b/arch/powerpc/mm/mem.c > @@ -17,6 +17,7 @@ > #include <linux/suspend.h> > #include <linux/dma-direct.h> > > +#include <asm/swiotlb.h> > #include <asm/machdep.h> > #include <asm/rtas.h> > #include <asm/kasan.h> > @@ -251,7 +252,7 @@ void __init mem_init(void) > if (is_secure_guest()) > svm_swiotlb_init(); > else > - swiotlb_init(0); > + swiotlb_init(ppc_swiotlb_enable, 0); > #endif > > high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); > diff --git a/arch/powerpc/platforms/pseries/setup.c > b/arch/powerpc/platforms/pseries/setup.c > index 069d7b3bb142e..c6e06d91b6602 100644 > --- a/arch/powerpc/platforms/pseries/setup.c > +++ b/arch/powerpc/platforms/pseries/setup.c > @@ -838,9 +838,6 @@ static void __init pSeries_setup_arch(void) > } > > ppc_md.pcibios_root_bridge_prepare = pseries_root_bridge_prepare; > - > - if (swiotlb_force == SWIOTLB_FORCE) > - ppc_swiotlb_enable = 1; > } > > static void pseries_panic(char *str) > diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c > index 9535bea8688c0..181ffd322eafa 100644 > --- a/arch/riscv/mm/init.c > +++ b/arch/riscv/mm/init.c > @@ -120,13 +120,7 @@ void __init mem_init(void) > BUG_ON(!mem_map); > #endif /* CONFIG_FLATMEM */ > > -#ifdef CONFIG_SWIOTLB > - if (swiotlb_force == SWIOTLB_FORCE || > - max_pfn > PFN_DOWN(dma32_phys_limit)) > - swiotlb_init(1); > - else > - swiotlb_force = SWIOTLB_NO_FORCE; > -#endif > + swiotlb_init(max_pfn > PFN_DOWN(dma32_phys_limit), SWIOTLB_VERBOSE); > memblock_free_all(); > > print_vm_layout(); > diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c > index 86ffd0d51fd59..6fb6bf64326f9 100644 > --- a/arch/s390/mm/init.c > +++ b/arch/s390/mm/init.c > @@ -185,8 +185,7 @@ static void pv_init(void) > return; > > /* make sure bounce buffers are shared */ > - swiotlb_force = SWIOTLB_FORCE; > - swiotlb_init(1); > + swiotlb_init(true, SWIOTLB_FORCE | SWIOTLB_VERBOSE); > swiotlb_update_mem_attributes(); > } > > diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c > index 04140e20ef1a3..a705a199bf8a3 100644 > --- a/arch/x86/kernel/pci-dma.c > +++ b/arch/x86/kernel/pci-dma.c > @@ -39,6 +39,7 @@ int iommu_detected __read_mostly = 0; > > #ifdef CONFIG_SWIOTLB > bool x86_swiotlb_enable; > +static unsigned int x86_swiotlb_flags; > > static void __init pci_swiotlb_detect(void) > { > @@ -58,16 +59,16 @@ static void __init pci_swiotlb_detect(void) > * bounce buffers as the hypervisor can't access arbitrary VM memory > * that is not explicitly shared with it. > */ > - if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) > - swiotlb_force = SWIOTLB_FORCE; > - > - if (swiotlb_force == SWIOTLB_FORCE) > + if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) { > x86_swiotlb_enable = true; > + x86_swiotlb_flags |= SWIOTLB_FORCE; > + } > } > #else > static inline void __init pci_swiotlb_detect(void) > { > } > +#define x86_swiotlb_flags 0 > #endif /* CONFIG_SWIOTLB */ > > #ifdef CONFIG_SWIOTLB_XEN > @@ -75,8 +76,7 @@ static bool xen_swiotlb; > > static void __init pci_xen_swiotlb_init(void) > { > - if (!xen_initial_domain() && !x86_swiotlb_enable && > - swiotlb_force != SWIOTLB_FORCE) > + if (!xen_initial_domain() && !x86_swiotlb_enable) > return; > x86_swiotlb_enable = true; > xen_swiotlb = true; > @@ -120,8 +120,7 @@ void __init pci_iommu_alloc(void) > gart_iommu_hole_init(); > amd_iommu_detect(); > detect_intel_iommu(); > - if (x86_swiotlb_enable) > - swiotlb_init(0); > + swiotlb_init(x86_swiotlb_enable, x86_swiotlb_flags); > } > > /* > diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c > index 485cd06ed39e7..c2da3eb4826e8 100644 > --- a/drivers/xen/swiotlb-xen.c > +++ b/drivers/xen/swiotlb-xen.c > @@ -251,7 +251,7 @@ void __init xen_swiotlb_init_early(void) > panic("%s (rc:%d)", xen_swiotlb_error(XEN_SWIOTLB_EFIXUP), rc); > } > > - if (swiotlb_init_with_tbl(start, nslabs, true)) > + if (swiotlb_init_with_tbl(start, nslabs, SWIOTLB_VERBOSE)) > panic("Cannot allocate SWIOTLB buffer"); > } > #endif /* CONFIG_X86 */ > @@ -376,7 +376,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, > struct page *page, > /* > * Oh well, have to allocate and map a bounce buffer. > */ > - trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force); > + trace_swiotlb_bounced(dev, dev_addr, size); > > map = swiotlb_tbl_map_single(dev, phys, size, size, 0, dir, attrs); > if (map == (phys_addr_t)DMA_MAPPING_ERROR) > diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h > index b48b26bfa0edb..ae0407173e845 100644 > --- a/include/linux/swiotlb.h > +++ b/include/linux/swiotlb.h > @@ -13,11 +13,8 @@ struct device; > struct page; > struct scatterlist; > > -enum swiotlb_force { > - SWIOTLB_NORMAL, /* Default - depending on HW DMA mask etc. > */ > - SWIOTLB_FORCE, /* swiotlb=force */ > - SWIOTLB_NO_FORCE, /* swiotlb=noforce */ > -}; > +#define SWIOTLB_VERBOSE (1 << 0) /* verbose initialization */ > +#define SWIOTLB_FORCE (1 << 1) /* force bounce buffering */ > > /* > * Maximum allowable number of contiguous slabs to map, > @@ -36,8 +33,7 @@ enum swiotlb_force { > /* default to 64MB */ > #define IO_TLB_DEFAULT_SIZE (64UL<<20) > > -extern void swiotlb_init(int verbose); > -int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose); > +int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, unsigned int flags); > unsigned long swiotlb_size_or_default(void); > extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs); > int swiotlb_init_late(size_t size); > @@ -126,13 +122,16 @@ static inline bool is_swiotlb_force_bounce(struct device > *dev) > return mem && mem->force_bounce; > } > > +void swiotlb_init(bool addressing_limited, unsigned int flags); > void __init swiotlb_exit(void); > unsigned int swiotlb_max_segment(void); > size_t swiotlb_max_mapping_size(struct device *dev); > bool is_swiotlb_active(struct device *dev); > void __init swiotlb_adjust_size(unsigned long size); > #else > -#define swiotlb_force SWIOTLB_NO_FORCE > +static inline void swiotlb_init(bool addressing_limited, unsigned int flags) > +{ > +} > static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr) > { > return false; > diff --git a/include/trace/events/swiotlb.h b/include/trace/events/swiotlb.h > index 705be43b71ab0..da05c9ebd224a 100644 > --- a/include/trace/events/swiotlb.h > +++ b/include/trace/events/swiotlb.h > @@ -8,20 +8,15 @@ > #include <linux/tracepoint.h> > > TRACE_EVENT(swiotlb_bounced, > - > - TP_PROTO(struct device *dev, > - dma_addr_t dev_addr, > - size_t size, > - enum swiotlb_force swiotlb_force), > - > - TP_ARGS(dev, dev_addr, size, swiotlb_force), > + TP_PROTO(struct device *dev, dma_addr_t dev_addr, size_t size), > + TP_ARGS(dev, dev_addr, size), > > TP_STRUCT__entry( > - __string( dev_name, dev_name(dev) ) > - __field( u64, dma_mask ) > - __field( dma_addr_t, dev_addr ) > - __field( size_t, size ) > - __field( enum swiotlb_force, swiotlb_force ) > + __string(dev_name, dev_name(dev)) > + __field(u64, dma_mask) > + __field(dma_addr_t, dev_addr) > + __field(size_t, size) > + __field(bool, force) > ), > > TP_fast_assign( > @@ -29,19 +24,15 @@ TRACE_EVENT(swiotlb_bounced, > __entry->dma_mask = (dev->dma_mask ? *dev->dma_mask : 0); > __entry->dev_addr = dev_addr; > __entry->size = size; > - __entry->swiotlb_force = swiotlb_force; > + __entry->force = is_swiotlb_force_bounce(dev); > ), > > - TP_printk("dev_name: %s dma_mask=%llx dev_addr=%llx " > - "size=%zu %s", > + TP_printk("dev_name: %s dma_mask=%llx dev_addr=%llx size=%zu %s", > __get_str(dev_name), > __entry->dma_mask, > (unsigned long long)__entry->dev_addr, > __entry->size, > - __print_symbolic(__entry->swiotlb_force, > - { SWIOTLB_NORMAL, "NORMAL" }, > - { SWIOTLB_FORCE, "FORCE" }, > - { SWIOTLB_NO_FORCE, "NO_FORCE" })) > + __entry->force ? "FORCE" : "NORMAL") > ); > > #endif /* _TRACE_SWIOTLB_H */ > diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c > index 9a4fe6e48a074..86e877a96b828 100644 > --- a/kernel/dma/swiotlb.c > +++ b/kernel/dma/swiotlb.c > @@ -62,7 +62,8 @@ > > #define INVALID_PHYS_ADDR (~(phys_addr_t)0) > > -enum swiotlb_force swiotlb_force; > +static bool swiotlb_force_bounce; > +static bool swiotlb_force_disable; > > struct io_tlb_mem io_tlb_default_mem; > > @@ -81,9 +82,9 @@ setup_io_tlb_npages(char *str) > if (*str == ',') > ++str; > if (!strcmp(str, "force")) > - swiotlb_force = SWIOTLB_FORCE; > + swiotlb_force_bounce = true; > else if (!strcmp(str, "noforce")) > - swiotlb_force = SWIOTLB_NO_FORCE; > + swiotlb_force_disable = true; > > return 0; > } > @@ -202,7 +203,7 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, > phys_addr_t start, > mem->index = 0; > mem->late_alloc = late_alloc; > > - if (swiotlb_force == SWIOTLB_FORCE) > + if (swiotlb_force_bounce) > mem->force_bounce = true; > > spin_lock_init(&mem->lock); > @@ -224,12 +225,13 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem > *mem, phys_addr_t start, > return; > } > > -int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) > +int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, > + unsigned int flags) > { > struct io_tlb_mem *mem = &io_tlb_default_mem; > size_t alloc_size; > > - if (swiotlb_force == SWIOTLB_NO_FORCE) > + if (swiotlb_force_disable) > return 0; > > /* protect against double initialization */ > @@ -243,8 +245,9 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, > int verbose) > __func__, alloc_size, PAGE_SIZE); > > swiotlb_init_io_tlb_mem(mem, __pa(tlb), nslabs, false); > + mem->force_bounce = flags & SWIOTLB_FORCE; > > - if (verbose) > + if (flags & SWIOTLB_VERBOSE) > swiotlb_print_info(); > return 0; > } > @@ -253,20 +256,21 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long > nslabs, int verbose) > * Statically reserve bounce buffer space and initialize bounce buffer data > * structures for the software IO TLB used to implement the DMA API. > */ > -void __init > -swiotlb_init(int verbose) > +void __init swiotlb_init(bool addressing_limit, unsigned int flags) > { > size_t bytes = PAGE_ALIGN(default_nslabs << IO_TLB_SHIFT); > void *tlb; > > - if (swiotlb_force == SWIOTLB_NO_FORCE) > + if (!addressing_limit && !swiotlb_force_bounce) > + return; > + if (swiotlb_force_disable) > return; > > /* Get IO TLB memory from the low pages */ > tlb = memblock_alloc_low(bytes, PAGE_SIZE); > if (!tlb) > goto fail; > - if (swiotlb_init_with_tbl(tlb, default_nslabs, verbose)) > + if (swiotlb_init_with_tbl(tlb, default_nslabs, flags)) > goto fail_free_mem; > return; > > @@ -289,7 +293,7 @@ int swiotlb_init_late(size_t size) > unsigned int order; > int rc = 0; > > - if (swiotlb_force == SWIOTLB_NO_FORCE) > + if (swiotlb_force_disable) > return 0; > > /* > @@ -328,7 +332,7 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) > struct io_tlb_mem *mem = &io_tlb_default_mem; > unsigned long bytes = nslabs << IO_TLB_SHIFT; > > - if (swiotlb_force == SWIOTLB_NO_FORCE) > + if (swiotlb_force_disable) > return 0; > > /* protect against double initialization */ > @@ -353,7 +357,7 @@ void __init swiotlb_exit(void) > unsigned long tbl_vaddr; > size_t tbl_size, slots_size; > > - if (swiotlb_force == SWIOTLB_FORCE) > + if (swiotlb_force_bounce) > return; > > if (!mem->nslabs) > @@ -704,8 +708,7 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t > paddr, size_t size, > phys_addr_t swiotlb_addr; > dma_addr_t dma_addr; > > - trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size, > - swiotlb_force); > + trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size); > > swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, size, 0, dir, > attrs); > -- > 2.30.2
Hi Christoph, On Mon, Apr 04, 2022 at 07:05:53AM +0200, Christoph Hellwig wrote: > Pass a bool to pass if swiotlb needs to be enabled based on the > addressing needs and replace the verbose argument with a set of > flags, including one to force enable bounce buffering. > > Note that this patch removes the possibility to force xen-swiotlb > use using swiotlb=force on the command line on x86 (arm and arm64 > never supported that), but this interface will be restored shortly. > > Signed-off-by: Christoph Hellwig <hch@lst.de> I bisected a performance regression in WSL2 to this change as commit c6af2aa9ffc9 ("swiotlb: make the swiotlb_init interface more useful") in mainline (bisect log below). I initially noticed it because accessing the Windows filesystem through the /mnt/c mount is about 40x slower if I am doing my math right based on the benchmarks below. Before: $ uname -r; and hyperfine "ls -l /mnt/c/Users/natec/Downloads" 5.18.0-rc3-microsoft-standard-WSL2-00008-ga3e230926708 Benchmark 1: ls -l /mnt/c/Users/natec/Downloads Time (mean ± σ): 564.5 ms ± 24.1 ms [User: 2.5 ms, System: 130.3 ms] Range (min … max): 510.2 ms … 588.0 ms 10 runs After $ uname -r; and hyperfine "ls -l /mnt/c/Users/natec/Downloads" 5.18.0-rc3-microsoft-standard-WSL2-00009-gc6af2aa9ffc9 Benchmark 1: ls -l /mnt/c/Users/natec/Downloads Time (mean ± σ): 23.282 s ± 1.220 s [User: 0.013 s, System: 0.101 s] Range (min … max): 21.793 s … 25.317 s 10 runs I do see 'swiotlb=force' on the cmdline: $ cat /proc/cmdline initrd=\initrd.img panic=-1 nr_cpus=8 swiotlb=force earlycon=uart8250,io,0x3f8,115200 console=hvc0 debug pty.legacy_count=0 /mnt/c appears to be a 9p mount, not sure if that is relevant here: $ mount &| grep /mnt/c drvfs on /mnt/c type 9p (rw,noatime,dirsync,aname=drvfs;path=C:\;uid=1000;gid=1000;symlinkroot=/mnt/,mmap,access=client,msize=262144,trans=virtio) If there is any other information I can provide, please let me know. Cheers, Nathan # bad: [700170bf6b4d773e328fa54ebb70ba444007c702] Merge tag 'nfs-for-5.19-1' of git://git.linux-nfs.org/projects/anna/linux-nfs # good: [4b0986a3613c92f4ec1bdc7f60ec66fea135991f] Linux 5.18 git bisect start '700170bf6b4d773e328fa54ebb70ba444007c702' 'v5.18' # good: [86c87bea6b42100c67418af690919c44de6ede6e] Merge tag 'devicetree-for-5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/robh/linux git bisect good 86c87bea6b42100c67418af690919c44de6ede6e # bad: [ae862183285cbb2ef9032770d98ffa9becffe9d5] Merge tag 'arm-dt-5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/soc/soc git bisect bad ae862183285cbb2ef9032770d98ffa9becffe9d5 # good: [2518f226c60d8e04d18ba4295500a5b0b8ac7659] Merge tag 'drm-next-2022-05-25' of git://anongit.freedesktop.org/drm/drm git bisect good 2518f226c60d8e04d18ba4295500a5b0b8ac7659 # bad: [babf0bb978e3c9fce6c4eba6b744c8754fd43d8e] Merge tag 'xfs-5.19-for-linus' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux git bisect bad babf0bb978e3c9fce6c4eba6b744c8754fd43d8e # good: [beed983621fbdfd291e6e3a0cdc4d10517e60af8] ASoC: Intel: avs: Machine board registration git bisect good beed983621fbdfd291e6e3a0cdc4d10517e60af8 # good: [fbe86daca0ba878b04fa241b85e26e54d17d4229] Merge tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi git bisect good fbe86daca0ba878b04fa241b85e26e54d17d4229 # good: [166afc45ed5523298541fd0297f9ad585cc2708c] Merge tag 'reflink-speedups-5.19_2022-04-28' of git://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into xfs-5.19-for-next git bisect good 166afc45ed5523298541fd0297f9ad585cc2708c # bad: [e375780b631a5fc2a61a3b4fa12429255361a31e] Merge tag 'fsnotify_for_v5.19-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs git bisect bad e375780b631a5fc2a61a3b4fa12429255361a31e # bad: [4a37f3dd9a83186cb88d44808ab35b78375082c9] dma-direct: don't over-decrypt memory git bisect bad 4a37f3dd9a83186cb88d44808ab35b78375082c9 # bad: [742519538e6b07250c8085bbff4bd358bc03bf16] swiotlb: pass a gfp_mask argument to swiotlb_init_late git bisect bad 742519538e6b07250c8085bbff4bd358bc03bf16 # good: [9bbe7a7fc126e3d14fefa4b035854aba080926d9] arm/xen: don't check for xen_initial_domain() in xen_create_contiguous_region git bisect good 9bbe7a7fc126e3d14fefa4b035854aba080926d9 # good: [a3e230926708125205ffd06d3dc2175a8263ae7e] x86: centralize setting SWIOTLB_FORCE when guest memory encryption is enabled git bisect good a3e230926708125205ffd06d3dc2175a8263ae7e # bad: [8ba2ed1be90fc210126f68186564707478552c95] swiotlb: add a SWIOTLB_ANY flag to lift the low memory restriction git bisect bad 8ba2ed1be90fc210126f68186564707478552c95 # bad: [c6af2aa9ffc9763826607bc2664ef3ea4475ed18] swiotlb: make the swiotlb_init interface more useful git bisect bad c6af2aa9ffc9763826607bc2664ef3ea4475ed18 # first bad commit: [c6af2aa9ffc9763826607bc2664ef3ea4475ed18] swiotlb: make the swiotlb_init interface more useful
Can you send me the full dmesg and the content of /sys/kernel/debug/swiotlb/io_tlb_nslabs for a good and a bad boot? Thanks!
On Wed, Jun 01, 2022 at 10:46:54AM -0700, Nathan Chancellor wrote: > On Wed, Jun 01, 2022 at 07:34:41PM +0200, Christoph Hellwig wrote: > > Can you send me the full dmesg and the content of > > /sys/kernel/debug/swiotlb/io_tlb_nslabs for a good and a bad boot? > > Sure thing, they are attached! If there is anything else I can provide > or test, I am more than happy to do so. Nothing interesting. But the performance numbers almost look like swiotlb=force got ignored before (even if I can't explain why). Do you get a similar performance with the new kernel without swiotlb=force as the old one with that argument by any chance?
On Wed, Jun 01, 2022 at 07:57:43PM +0200, Christoph Hellwig wrote: > On Wed, Jun 01, 2022 at 10:46:54AM -0700, Nathan Chancellor wrote: > > On Wed, Jun 01, 2022 at 07:34:41PM +0200, Christoph Hellwig wrote: > > > Can you send me the full dmesg and the content of > > > /sys/kernel/debug/swiotlb/io_tlb_nslabs for a good and a bad boot? > > > > Sure thing, they are attached! If there is anything else I can provide > > or test, I am more than happy to do so. > > Nothing interesting. But the performance numbers almost look like > swiotlb=force got ignored before (even if I can't explain why). I was able to get my performance back with this diff but I don't know if this is a hack or a proper fix in the context of the series. diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index dfa1de89dc94..0bfb2fe3d8c5 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -276,7 +276,7 @@ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags, __func__, alloc_size, PAGE_SIZE); swiotlb_init_io_tlb_mem(mem, __pa(tlb), nslabs, false); - mem->force_bounce = flags & SWIOTLB_FORCE; + mem->force_bounce = swiotlb_force_bounce || (flags & SWIOTLB_FORCE); if (flags & SWIOTLB_VERBOSE) swiotlb_print_info(); > Do you get a similar performance with the new kernel without > swiotlb=force as the old one with that argument by any chance? I'll see if I can test that, as I am not sure I have control over those cmdline arguments. Cheers, Nathan
On Wed, Jun 01, 2022 at 11:11:57AM -0700, Nathan Chancellor wrote: > On Wed, Jun 01, 2022 at 07:57:43PM +0200, Christoph Hellwig wrote: > > On Wed, Jun 01, 2022 at 10:46:54AM -0700, Nathan Chancellor wrote: > > > On Wed, Jun 01, 2022 at 07:34:41PM +0200, Christoph Hellwig wrote: > > > > Can you send me the full dmesg and the content of > > > > /sys/kernel/debug/swiotlb/io_tlb_nslabs for a good and a bad boot? > > > > > > Sure thing, they are attached! If there is anything else I can provide > > > or test, I am more than happy to do so. > > > > Nothing interesting. But the performance numbers almost look like > > swiotlb=force got ignored before (even if I can't explain why). > > I was able to get my performance back with this diff but I don't know if > this is a hack or a proper fix in the context of the series. This looks good, but needs a little tweak. I'd go for this variant of it: diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index dfa1de89dc944..cb50f8d383606 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -192,7 +192,7 @@ void __init swiotlb_update_mem_attributes(void) } static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start, - unsigned long nslabs, bool late_alloc) + unsigned long nslabs, unsigned int flags, bool late_alloc) { void *vaddr = phys_to_virt(start); unsigned long bytes = nslabs << IO_TLB_SHIFT, i; @@ -203,8 +203,7 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start, mem->index = 0; mem->late_alloc = late_alloc; - if (swiotlb_force_bounce) - mem->force_bounce = true; + mem->force_bounce = swiotlb_force_bounce || (flags & SWIOTLB_FORCE); spin_lock_init(&mem->lock); for (i = 0; i < mem->nslabs; i++) { @@ -275,8 +274,7 @@ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags, panic("%s: Failed to allocate %zu bytes align=0x%lx\n", __func__, alloc_size, PAGE_SIZE); - swiotlb_init_io_tlb_mem(mem, __pa(tlb), nslabs, false); - mem->force_bounce = flags & SWIOTLB_FORCE; + swiotlb_init_io_tlb_mem(mem, __pa(tlb), nslabs, flags, false); if (flags & SWIOTLB_VERBOSE) swiotlb_print_info(); @@ -348,7 +346,7 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask, set_memory_decrypted((unsigned long)vstart, (nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT); - swiotlb_init_io_tlb_mem(mem, virt_to_phys(vstart), nslabs, true); + swiotlb_init_io_tlb_mem(mem, virt_to_phys(vstart), nslabs, 0, true); swiotlb_print_info(); return 0; @@ -835,8 +833,8 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem, set_memory_decrypted((unsigned long)phys_to_virt(rmem->base), rmem->size >> PAGE_SHIFT); - swiotlb_init_io_tlb_mem(mem, rmem->base, nslabs, false); - mem->force_bounce = true; + swiotlb_init_io_tlb_mem(mem, rmem->base, nslabs, SWIOTLB_FORCE, + false); mem->for_alloc = true; rmem->priv = mem;
On Wed, Jun 01, 2022 at 08:21:41PM +0200, Christoph Hellwig wrote: > On Wed, Jun 01, 2022 at 11:11:57AM -0700, Nathan Chancellor wrote: > > On Wed, Jun 01, 2022 at 07:57:43PM +0200, Christoph Hellwig wrote: > > > On Wed, Jun 01, 2022 at 10:46:54AM -0700, Nathan Chancellor wrote: > > > > On Wed, Jun 01, 2022 at 07:34:41PM +0200, Christoph Hellwig wrote: > > > > > Can you send me the full dmesg and the content of > > > > > /sys/kernel/debug/swiotlb/io_tlb_nslabs for a good and a bad boot? > > > > > > > > Sure thing, they are attached! If there is anything else I can provide > > > > or test, I am more than happy to do so. > > > > > > Nothing interesting. But the performance numbers almost look like > > > swiotlb=force got ignored before (even if I can't explain why). > > > > I was able to get my performance back with this diff but I don't know if > > this is a hack or a proper fix in the context of the series. > > This looks good, but needs a little tweak. I'd go for this variant of > it: Tested-by: Nathan Chancellor <nathan@kernel.org> Thanks a lot for the quick fix! > diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c > index dfa1de89dc944..cb50f8d383606 100644 > --- a/kernel/dma/swiotlb.c > +++ b/kernel/dma/swiotlb.c > @@ -192,7 +192,7 @@ void __init swiotlb_update_mem_attributes(void) > } > > static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start, > - unsigned long nslabs, bool late_alloc) > + unsigned long nslabs, unsigned int flags, bool late_alloc) > { > void *vaddr = phys_to_virt(start); > unsigned long bytes = nslabs << IO_TLB_SHIFT, i; > @@ -203,8 +203,7 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start, > mem->index = 0; > mem->late_alloc = late_alloc; > > - if (swiotlb_force_bounce) > - mem->force_bounce = true; > + mem->force_bounce = swiotlb_force_bounce || (flags & SWIOTLB_FORCE); > > spin_lock_init(&mem->lock); > for (i = 0; i < mem->nslabs; i++) { > @@ -275,8 +274,7 @@ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags, > panic("%s: Failed to allocate %zu bytes align=0x%lx\n", > __func__, alloc_size, PAGE_SIZE); > > - swiotlb_init_io_tlb_mem(mem, __pa(tlb), nslabs, false); > - mem->force_bounce = flags & SWIOTLB_FORCE; > + swiotlb_init_io_tlb_mem(mem, __pa(tlb), nslabs, flags, false); > > if (flags & SWIOTLB_VERBOSE) > swiotlb_print_info(); > @@ -348,7 +346,7 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask, > > set_memory_decrypted((unsigned long)vstart, > (nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT); > - swiotlb_init_io_tlb_mem(mem, virt_to_phys(vstart), nslabs, true); > + swiotlb_init_io_tlb_mem(mem, virt_to_phys(vstart), nslabs, 0, true); > > swiotlb_print_info(); > return 0; > @@ -835,8 +833,8 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem, > > set_memory_decrypted((unsigned long)phys_to_virt(rmem->base), > rmem->size >> PAGE_SHIFT); > - swiotlb_init_io_tlb_mem(mem, rmem->base, nslabs, false); > - mem->force_bounce = true; > + swiotlb_init_io_tlb_mem(mem, rmem->base, nslabs, SWIOTLB_FORCE, > + false); > mem->for_alloc = true; > > rmem->priv = mem; > Cheers, Nathan
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index fe249ea919083..ce64bdb55a16b 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -271,11 +271,7 @@ static void __init free_highpages(void) void __init mem_init(void) { #ifdef CONFIG_ARM_LPAE - if (swiotlb_force == SWIOTLB_FORCE || - max_pfn > arm_dma_pfn_limit) - swiotlb_init(1); - else - swiotlb_force = SWIOTLB_NO_FORCE; + swiotlb_init(max_pfn > arm_dma_pfn_limit, SWIOTLB_VERBOSE); #endif set_max_mapnr(pfn_to_page(max_pfn) - mem_map); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 8ac25f19084e8..7b6ea4d6733d6 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -398,11 +398,7 @@ void __init bootmem_init(void) */ void __init mem_init(void) { - if (swiotlb_force == SWIOTLB_FORCE || - max_pfn > PFN_DOWN(arm64_dma_phys_limit)) - swiotlb_init(1); - else if (!xen_swiotlb_detect()) - swiotlb_force = SWIOTLB_NO_FORCE; + swiotlb_init(max_pfn > PFN_DOWN(arm64_dma_phys_limit), SWIOTLB_VERBOSE); /* this will put all unused low memory onto the freelists */ memblock_free_all(); diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 5d165607bf354..3c3e15b22608f 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -437,9 +437,7 @@ mem_init (void) if (iommu_detected) break; #endif -#ifdef CONFIG_SWIOTLB - swiotlb_init(1); -#endif + swiotlb_init(true, SWIOTLB_VERBOSE); } while (0); #ifdef CONFIG_FLATMEM diff --git a/arch/mips/cavium-octeon/dma-octeon.c b/arch/mips/cavium-octeon/dma-octeon.c index fb7547e217263..9fbba6a8fa4c5 100644 --- a/arch/mips/cavium-octeon/dma-octeon.c +++ b/arch/mips/cavium-octeon/dma-octeon.c @@ -235,5 +235,5 @@ void __init plat_swiotlb_setup(void) #endif swiotlb_adjust_size(swiotlbsize); - swiotlb_init(1); + swiotlb_init(true, SWIOTLB_VERBOSE); } diff --git a/arch/mips/loongson64/dma.c b/arch/mips/loongson64/dma.c index 364f2f27c8723..8220a1bc0db64 100644 --- a/arch/mips/loongson64/dma.c +++ b/arch/mips/loongson64/dma.c @@ -24,5 +24,5 @@ phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) void __init plat_swiotlb_setup(void) { - swiotlb_init(1); + swiotlb_init(true, SWIOTLB_VERBOSE); } diff --git a/arch/mips/sibyte/common/dma.c b/arch/mips/sibyte/common/dma.c index eb47a94f3583e..c5c2c782aff68 100644 --- a/arch/mips/sibyte/common/dma.c +++ b/arch/mips/sibyte/common/dma.c @@ -10,5 +10,5 @@ void __init plat_swiotlb_setup(void) { - swiotlb_init(1); + swiotlb_init(true, SWIOTLB_VERBOSE); } diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 8e301cd8925b2..e1519e2edc656 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -17,6 +17,7 @@ #include <linux/suspend.h> #include <linux/dma-direct.h> +#include <asm/swiotlb.h> #include <asm/machdep.h> #include <asm/rtas.h> #include <asm/kasan.h> @@ -251,7 +252,7 @@ void __init mem_init(void) if (is_secure_guest()) svm_swiotlb_init(); else - swiotlb_init(0); + swiotlb_init(ppc_swiotlb_enable, 0); #endif high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 069d7b3bb142e..c6e06d91b6602 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -838,9 +838,6 @@ static void __init pSeries_setup_arch(void) } ppc_md.pcibios_root_bridge_prepare = pseries_root_bridge_prepare; - - if (swiotlb_force == SWIOTLB_FORCE) - ppc_swiotlb_enable = 1; } static void pseries_panic(char *str) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 9535bea8688c0..181ffd322eafa 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -120,13 +120,7 @@ void __init mem_init(void) BUG_ON(!mem_map); #endif /* CONFIG_FLATMEM */ -#ifdef CONFIG_SWIOTLB - if (swiotlb_force == SWIOTLB_FORCE || - max_pfn > PFN_DOWN(dma32_phys_limit)) - swiotlb_init(1); - else - swiotlb_force = SWIOTLB_NO_FORCE; -#endif + swiotlb_init(max_pfn > PFN_DOWN(dma32_phys_limit), SWIOTLB_VERBOSE); memblock_free_all(); print_vm_layout(); diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 86ffd0d51fd59..6fb6bf64326f9 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -185,8 +185,7 @@ static void pv_init(void) return; /* make sure bounce buffers are shared */ - swiotlb_force = SWIOTLB_FORCE; - swiotlb_init(1); + swiotlb_init(true, SWIOTLB_FORCE | SWIOTLB_VERBOSE); swiotlb_update_mem_attributes(); } diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 04140e20ef1a3..a705a199bf8a3 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -39,6 +39,7 @@ int iommu_detected __read_mostly = 0; #ifdef CONFIG_SWIOTLB bool x86_swiotlb_enable; +static unsigned int x86_swiotlb_flags; static void __init pci_swiotlb_detect(void) { @@ -58,16 +59,16 @@ static void __init pci_swiotlb_detect(void) * bounce buffers as the hypervisor can't access arbitrary VM memory * that is not explicitly shared with it. */ - if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) - swiotlb_force = SWIOTLB_FORCE; - - if (swiotlb_force == SWIOTLB_FORCE) + if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) { x86_swiotlb_enable = true; + x86_swiotlb_flags |= SWIOTLB_FORCE; + } } #else static inline void __init pci_swiotlb_detect(void) { } +#define x86_swiotlb_flags 0 #endif /* CONFIG_SWIOTLB */ #ifdef CONFIG_SWIOTLB_XEN @@ -75,8 +76,7 @@ static bool xen_swiotlb; static void __init pci_xen_swiotlb_init(void) { - if (!xen_initial_domain() && !x86_swiotlb_enable && - swiotlb_force != SWIOTLB_FORCE) + if (!xen_initial_domain() && !x86_swiotlb_enable) return; x86_swiotlb_enable = true; xen_swiotlb = true; @@ -120,8 +120,7 @@ void __init pci_iommu_alloc(void) gart_iommu_hole_init(); amd_iommu_detect(); detect_intel_iommu(); - if (x86_swiotlb_enable) - swiotlb_init(0); + swiotlb_init(x86_swiotlb_enable, x86_swiotlb_flags); } /* diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 485cd06ed39e7..c2da3eb4826e8 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -251,7 +251,7 @@ void __init xen_swiotlb_init_early(void) panic("%s (rc:%d)", xen_swiotlb_error(XEN_SWIOTLB_EFIXUP), rc); } - if (swiotlb_init_with_tbl(start, nslabs, true)) + if (swiotlb_init_with_tbl(start, nslabs, SWIOTLB_VERBOSE)) panic("Cannot allocate SWIOTLB buffer"); } #endif /* CONFIG_X86 */ @@ -376,7 +376,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, /* * Oh well, have to allocate and map a bounce buffer. */ - trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force); + trace_swiotlb_bounced(dev, dev_addr, size); map = swiotlb_tbl_map_single(dev, phys, size, size, 0, dir, attrs); if (map == (phys_addr_t)DMA_MAPPING_ERROR) diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index b48b26bfa0edb..ae0407173e845 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -13,11 +13,8 @@ struct device; struct page; struct scatterlist; -enum swiotlb_force { - SWIOTLB_NORMAL, /* Default - depending on HW DMA mask etc. */ - SWIOTLB_FORCE, /* swiotlb=force */ - SWIOTLB_NO_FORCE, /* swiotlb=noforce */ -}; +#define SWIOTLB_VERBOSE (1 << 0) /* verbose initialization */ +#define SWIOTLB_FORCE (1 << 1) /* force bounce buffering */ /* * Maximum allowable number of contiguous slabs to map, @@ -36,8 +33,7 @@ enum swiotlb_force { /* default to 64MB */ #define IO_TLB_DEFAULT_SIZE (64UL<<20) -extern void swiotlb_init(int verbose); -int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose); +int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, unsigned int flags); unsigned long swiotlb_size_or_default(void); extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs); int swiotlb_init_late(size_t size); @@ -126,13 +122,16 @@ static inline bool is_swiotlb_force_bounce(struct device *dev) return mem && mem->force_bounce; } +void swiotlb_init(bool addressing_limited, unsigned int flags); void __init swiotlb_exit(void); unsigned int swiotlb_max_segment(void); size_t swiotlb_max_mapping_size(struct device *dev); bool is_swiotlb_active(struct device *dev); void __init swiotlb_adjust_size(unsigned long size); #else -#define swiotlb_force SWIOTLB_NO_FORCE +static inline void swiotlb_init(bool addressing_limited, unsigned int flags) +{ +} static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr) { return false; diff --git a/include/trace/events/swiotlb.h b/include/trace/events/swiotlb.h index 705be43b71ab0..da05c9ebd224a 100644 --- a/include/trace/events/swiotlb.h +++ b/include/trace/events/swiotlb.h @@ -8,20 +8,15 @@ #include <linux/tracepoint.h> TRACE_EVENT(swiotlb_bounced, - - TP_PROTO(struct device *dev, - dma_addr_t dev_addr, - size_t size, - enum swiotlb_force swiotlb_force), - - TP_ARGS(dev, dev_addr, size, swiotlb_force), + TP_PROTO(struct device *dev, dma_addr_t dev_addr, size_t size), + TP_ARGS(dev, dev_addr, size), TP_STRUCT__entry( - __string( dev_name, dev_name(dev) ) - __field( u64, dma_mask ) - __field( dma_addr_t, dev_addr ) - __field( size_t, size ) - __field( enum swiotlb_force, swiotlb_force ) + __string(dev_name, dev_name(dev)) + __field(u64, dma_mask) + __field(dma_addr_t, dev_addr) + __field(size_t, size) + __field(bool, force) ), TP_fast_assign( @@ -29,19 +24,15 @@ TRACE_EVENT(swiotlb_bounced, __entry->dma_mask = (dev->dma_mask ? *dev->dma_mask : 0); __entry->dev_addr = dev_addr; __entry->size = size; - __entry->swiotlb_force = swiotlb_force; + __entry->force = is_swiotlb_force_bounce(dev); ), - TP_printk("dev_name: %s dma_mask=%llx dev_addr=%llx " - "size=%zu %s", + TP_printk("dev_name: %s dma_mask=%llx dev_addr=%llx size=%zu %s", __get_str(dev_name), __entry->dma_mask, (unsigned long long)__entry->dev_addr, __entry->size, - __print_symbolic(__entry->swiotlb_force, - { SWIOTLB_NORMAL, "NORMAL" }, - { SWIOTLB_FORCE, "FORCE" }, - { SWIOTLB_NO_FORCE, "NO_FORCE" })) + __entry->force ? "FORCE" : "NORMAL") ); #endif /* _TRACE_SWIOTLB_H */ diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 9a4fe6e48a074..86e877a96b828 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -62,7 +62,8 @@ #define INVALID_PHYS_ADDR (~(phys_addr_t)0) -enum swiotlb_force swiotlb_force; +static bool swiotlb_force_bounce; +static bool swiotlb_force_disable; struct io_tlb_mem io_tlb_default_mem; @@ -81,9 +82,9 @@ setup_io_tlb_npages(char *str) if (*str == ',') ++str; if (!strcmp(str, "force")) - swiotlb_force = SWIOTLB_FORCE; + swiotlb_force_bounce = true; else if (!strcmp(str, "noforce")) - swiotlb_force = SWIOTLB_NO_FORCE; + swiotlb_force_disable = true; return 0; } @@ -202,7 +203,7 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start, mem->index = 0; mem->late_alloc = late_alloc; - if (swiotlb_force == SWIOTLB_FORCE) + if (swiotlb_force_bounce) mem->force_bounce = true; spin_lock_init(&mem->lock); @@ -224,12 +225,13 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start, return; } -int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) +int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, + unsigned int flags) { struct io_tlb_mem *mem = &io_tlb_default_mem; size_t alloc_size; - if (swiotlb_force == SWIOTLB_NO_FORCE) + if (swiotlb_force_disable) return 0; /* protect against double initialization */ @@ -243,8 +245,9 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) __func__, alloc_size, PAGE_SIZE); swiotlb_init_io_tlb_mem(mem, __pa(tlb), nslabs, false); + mem->force_bounce = flags & SWIOTLB_FORCE; - if (verbose) + if (flags & SWIOTLB_VERBOSE) swiotlb_print_info(); return 0; } @@ -253,20 +256,21 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) * Statically reserve bounce buffer space and initialize bounce buffer data * structures for the software IO TLB used to implement the DMA API. */ -void __init -swiotlb_init(int verbose) +void __init swiotlb_init(bool addressing_limit, unsigned int flags) { size_t bytes = PAGE_ALIGN(default_nslabs << IO_TLB_SHIFT); void *tlb; - if (swiotlb_force == SWIOTLB_NO_FORCE) + if (!addressing_limit && !swiotlb_force_bounce) + return; + if (swiotlb_force_disable) return; /* Get IO TLB memory from the low pages */ tlb = memblock_alloc_low(bytes, PAGE_SIZE); if (!tlb) goto fail; - if (swiotlb_init_with_tbl(tlb, default_nslabs, verbose)) + if (swiotlb_init_with_tbl(tlb, default_nslabs, flags)) goto fail_free_mem; return; @@ -289,7 +293,7 @@ int swiotlb_init_late(size_t size) unsigned int order; int rc = 0; - if (swiotlb_force == SWIOTLB_NO_FORCE) + if (swiotlb_force_disable) return 0; /* @@ -328,7 +332,7 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) struct io_tlb_mem *mem = &io_tlb_default_mem; unsigned long bytes = nslabs << IO_TLB_SHIFT; - if (swiotlb_force == SWIOTLB_NO_FORCE) + if (swiotlb_force_disable) return 0; /* protect against double initialization */ @@ -353,7 +357,7 @@ void __init swiotlb_exit(void) unsigned long tbl_vaddr; size_t tbl_size, slots_size; - if (swiotlb_force == SWIOTLB_FORCE) + if (swiotlb_force_bounce) return; if (!mem->nslabs) @@ -704,8 +708,7 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size, phys_addr_t swiotlb_addr; dma_addr_t dma_addr; - trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size, - swiotlb_force); + trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size); swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, size, 0, dir, attrs);
Pass a bool to pass if swiotlb needs to be enabled based on the addressing needs and replace the verbose argument with a set of flags, including one to force enable bounce buffering. Note that this patch removes the possibility to force xen-swiotlb use using swiotlb=force on the command line on x86 (arm and arm64 never supported that), but this interface will be restored shortly. Signed-off-by: Christoph Hellwig <hch@lst.de> --- arch/arm/mm/init.c | 6 +---- arch/arm64/mm/init.c | 6 +---- arch/ia64/mm/init.c | 4 +-- arch/mips/cavium-octeon/dma-octeon.c | 2 +- arch/mips/loongson64/dma.c | 2 +- arch/mips/sibyte/common/dma.c | 2 +- arch/powerpc/mm/mem.c | 3 ++- arch/powerpc/platforms/pseries/setup.c | 3 --- arch/riscv/mm/init.c | 8 +----- arch/s390/mm/init.c | 3 +-- arch/x86/kernel/pci-dma.c | 15 ++++++----- drivers/xen/swiotlb-xen.c | 4 +-- include/linux/swiotlb.h | 15 ++++++----- include/trace/events/swiotlb.h | 29 ++++++++------------- kernel/dma/swiotlb.c | 35 ++++++++++++++------------ 15 files changed, 55 insertions(+), 82 deletions(-)