diff mbox series

[11/12] swiotlb: merge swiotlb-xen initialization into swiotlb

Message ID 20220301105311.885699-12-hch@lst.de (mailing list archive)
State Superseded
Headers show
Series [01/12] dma-direct: use is_swiotlb_active in dma_direct_map_page | expand

Commit Message

Christoph Hellwig March 1, 2022, 10:53 a.m. UTC
Allow to pass a remap argument to the swiotlb initialization functions
to handle the Xen/x86 remap case.  ARM/ARM64 never did any remapping
from xen_swiotlb_fixup, so we don't even need that quirk.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/arm/xen/mm.c               |  23 +++---
 arch/x86/include/asm/xen/page.h |   5 --
 arch/x86/kernel/pci-dma.c       |  19 +++--
 arch/x86/pci/sta2x11-fixup.c    |   2 +-
 drivers/xen/swiotlb-xen.c       | 128 +-------------------------------
 include/linux/swiotlb.h         |   7 +-
 include/xen/arm/page.h          |   1 -
 include/xen/swiotlb-xen.h       |   8 +-
 kernel/dma/swiotlb.c            | 120 +++++++++++++++---------------
 9 files changed, 96 insertions(+), 217 deletions(-)

Comments

Stefano Stabellini March 2, 2022, 2:55 a.m. UTC | #1
On Tue, 1 Mar 2022, Christoph Hellwig wrote:
> Allow to pass a remap argument to the swiotlb initialization functions
> to handle the Xen/x86 remap case.  ARM/ARM64 never did any remapping
> from xen_swiotlb_fixup, so we don't even need that quirk.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  arch/arm/xen/mm.c               |  23 +++---
>  arch/x86/include/asm/xen/page.h |   5 --
>  arch/x86/kernel/pci-dma.c       |  19 +++--
>  arch/x86/pci/sta2x11-fixup.c    |   2 +-
>  drivers/xen/swiotlb-xen.c       | 128 +-------------------------------
>  include/linux/swiotlb.h         |   7 +-
>  include/xen/arm/page.h          |   1 -
>  include/xen/swiotlb-xen.h       |   8 +-
>  kernel/dma/swiotlb.c            | 120 +++++++++++++++---------------
>  9 files changed, 96 insertions(+), 217 deletions(-)
> 
> diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
> index a7e54a087b802..58b40f87617d3 100644
> --- a/arch/arm/xen/mm.c
> +++ b/arch/arm/xen/mm.c
> @@ -23,22 +23,20 @@
>  #include <asm/xen/hypercall.h>
>  #include <asm/xen/interface.h>
>  
> -unsigned long xen_get_swiotlb_free_pages(unsigned int order)
> +static gfp_t xen_swiotlb_gfp(void)
>  {
>  	phys_addr_t base;
> -	gfp_t flags = __GFP_NOWARN|__GFP_KSWAPD_RECLAIM;
>  	u64 i;
>  
>  	for_each_mem_range(i, &base, NULL) {
>  		if (base < (phys_addr_t)0xffffffff) {
>  			if (IS_ENABLED(CONFIG_ZONE_DMA32))
> -				flags |= __GFP_DMA32;
> -			else
> -				flags |= __GFP_DMA;
> -			break;
> +				return __GFP_DMA32;
> +			return __GFP_DMA;
>  		}
>  	}
> -	return __get_free_pages(flags, order);
> +
> +	return GFP_KERNEL;
>  }

Unrelated to this specific patch series: now that I think about it, if
io_tlb_default_mem.nslabs is already allocated by the time xen_mm_init
is called, wouldn't we potentially have an issue with the GFP flags used
for the earlier allocation (e.g. GFP_DMA32 not used)? Maybe something
for another day.


>  static bool hypercall_cflush = false;
> @@ -143,10 +141,15 @@ static int __init xen_mm_init(void)
>  	if (!xen_swiotlb_detect())
>  		return 0;
>  
> -	rc = xen_swiotlb_init();
>  	/* we can work with the default swiotlb */
> -	if (rc < 0 && rc != -EEXIST)
> -		return rc;
> +	if (!io_tlb_default_mem.nslabs) {
> +		if (!xen_initial_domain())
> +			return -EINVAL;

I don't think we need this xen_initial_domain() check. It is all
already sorted out by the xen_swiotlb_detect() check above.

Aside from that the rest looks OK. Also, you can add my:

Tested-by: Stefano Stabellini <sstabellini@kernel.org>


> +		rc = swiotlb_init_late(swiotlb_size_or_default(),
> +				       xen_swiotlb_gfp(), NULL);
> +		if (rc < 0)
> +			return rc;
> +	}
>  
>  	cflush.op = 0;
>  	cflush.a.dev_bus_addr = 0;
> diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
> index e989bc2269f54..1fc67df500145 100644
> --- a/arch/x86/include/asm/xen/page.h
> +++ b/arch/x86/include/asm/xen/page.h
> @@ -357,9 +357,4 @@ static inline bool xen_arch_need_swiotlb(struct device *dev,
>  	return false;
>  }
>  
> -static inline unsigned long xen_get_swiotlb_free_pages(unsigned int order)
> -{
> -	return __get_free_pages(__GFP_NOWARN, order);
> -}
> -
>  #endif /* _ASM_X86_XEN_PAGE_H */
> diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
> index e0def4b1c3181..2f2c468acb955 100644
> --- a/arch/x86/kernel/pci-dma.c
> +++ b/arch/x86/kernel/pci-dma.c
> @@ -71,15 +71,12 @@ static inline void __init pci_swiotlb_detect(void)
>  #endif /* CONFIG_SWIOTLB */
>  
>  #ifdef CONFIG_SWIOTLB_XEN
> -static bool xen_swiotlb;
> -
>  static void __init pci_xen_swiotlb_init(void)
>  {
>  	if (!xen_initial_domain() && !x86_swiotlb_enable)
>  		return;
>  	x86_swiotlb_enable = false;
> -	xen_swiotlb = true;
> -	xen_swiotlb_init_early();
> +	swiotlb_init_remap(true, x86_swiotlb_flags, xen_swiotlb_fixup);
>  	dma_ops = &xen_swiotlb_dma_ops;
>  	if (IS_ENABLED(CONFIG_PCI))
>  		pci_request_acs();
> @@ -87,14 +84,16 @@ static void __init pci_xen_swiotlb_init(void)
>  
>  int pci_xen_swiotlb_init_late(void)
>  {
> -	int rc;
> -
> -	if (xen_swiotlb)
> +	if (dma_ops == &xen_swiotlb_dma_ops)
>  		return 0;
>  
> -	rc = xen_swiotlb_init();
> -	if (rc)
> -		return rc;
> +	/* we can work with the default swiotlb */
> +	if (!io_tlb_default_mem.nslabs) {
> +		int rc = swiotlb_init_late(swiotlb_size_or_default(),
> +					   GFP_KERNEL, xen_swiotlb_fixup);
> +		if (rc < 0)
> +			return rc;
> +	}
>  
>  	/* XXX: this switches the dma ops under live devices! */
>  	dma_ops = &xen_swiotlb_dma_ops;
> diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
> index c2da3eb4826e8..df8085b50df10 100644
> --- a/drivers/xen/swiotlb-xen.c
> +++ b/drivers/xen/swiotlb-xen.c
> @@ -104,7 +104,7 @@ static int is_xen_swiotlb_buffer(struct device *dev, dma_addr_t dma_addr)
>  	return 0;
>  }
>  
> -static int xen_swiotlb_fixup(void *buf, unsigned long nslabs)
> +int xen_swiotlb_fixup(void *buf, unsigned long nslabs)
>  {
>  	int rc;
>  	unsigned int order = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT);
> @@ -130,132 +130,6 @@ static int xen_swiotlb_fixup(void *buf, unsigned long nslabs)
>  	return 0;
>  }
>  
> -enum xen_swiotlb_err {
> -	XEN_SWIOTLB_UNKNOWN = 0,
> -	XEN_SWIOTLB_ENOMEM,
> -	XEN_SWIOTLB_EFIXUP
> -};
> -
> -static const char *xen_swiotlb_error(enum xen_swiotlb_err err)
> -{
> -	switch (err) {
> -	case XEN_SWIOTLB_ENOMEM:
> -		return "Cannot allocate Xen-SWIOTLB buffer\n";
> -	case XEN_SWIOTLB_EFIXUP:
> -		return "Failed to get contiguous memory for DMA from Xen!\n"\
> -		    "You either: don't have the permissions, do not have"\
> -		    " enough free memory under 4GB, or the hypervisor memory"\
> -		    " is too fragmented!";
> -	default:
> -		break;
> -	}
> -	return "";
> -}
> -
> -int xen_swiotlb_init(void)
> -{
> -	enum xen_swiotlb_err m_ret = XEN_SWIOTLB_UNKNOWN;
> -	unsigned long bytes = swiotlb_size_or_default();
> -	unsigned long nslabs = bytes >> IO_TLB_SHIFT;
> -	unsigned int order, repeat = 3;
> -	int rc = -ENOMEM;
> -	char *start;
> -
> -	if (io_tlb_default_mem.nslabs) {
> -		pr_warn("swiotlb buffer already initialized\n");
> -		return -EEXIST;
> -	}
> -
> -retry:
> -	m_ret = XEN_SWIOTLB_ENOMEM;
> -	order = get_order(bytes);
> -
> -	/*
> -	 * Get IO TLB memory from any location.
> -	 */
> -#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
> -#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
> -	while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
> -		start = (void *)xen_get_swiotlb_free_pages(order);
> -		if (start)
> -			break;
> -		order--;
> -	}
> -	if (!start)
> -		goto exit;
> -	if (order != get_order(bytes)) {
> -		pr_warn("Warning: only able to allocate %ld MB for software IO TLB\n",
> -			(PAGE_SIZE << order) >> 20);
> -		nslabs = SLABS_PER_PAGE << order;
> -		bytes = nslabs << IO_TLB_SHIFT;
> -	}
> -
> -	/*
> -	 * And replace that memory with pages under 4GB.
> -	 */
> -	rc = xen_swiotlb_fixup(start, nslabs);
> -	if (rc) {
> -		free_pages((unsigned long)start, order);
> -		m_ret = XEN_SWIOTLB_EFIXUP;
> -		goto error;
> -	}
> -	rc = swiotlb_late_init_with_tbl(start, nslabs);
> -	if (rc)
> -		return rc;
> -	return 0;
> -error:
> -	if (nslabs > 1024 && repeat--) {
> -		/* Min is 2MB */
> -		nslabs = max(1024UL, ALIGN(nslabs >> 1, IO_TLB_SEGSIZE));
> -		bytes = nslabs << IO_TLB_SHIFT;
> -		pr_info("Lowering to %luMB\n", bytes >> 20);
> -		goto retry;
> -	}
> -exit:
> -	pr_err("%s (rc:%d)\n", xen_swiotlb_error(m_ret), rc);
> -	return rc;
> -}
> -
> -#ifdef CONFIG_X86
> -void __init xen_swiotlb_init_early(void)
> -{
> -	unsigned long bytes = swiotlb_size_or_default();
> -	unsigned long nslabs = bytes >> IO_TLB_SHIFT;
> -	unsigned int repeat = 3;
> -	char *start;
> -	int rc;
> -
> -retry:
> -	/*
> -	 * Get IO TLB memory from any location.
> -	 */
> -	start = memblock_alloc(PAGE_ALIGN(bytes),
> -			       IO_TLB_SEGSIZE << IO_TLB_SHIFT);
> -	if (!start)
> -		panic("%s: Failed to allocate %lu bytes\n",
> -		      __func__, PAGE_ALIGN(bytes));
> -
> -	/*
> -	 * And replace that memory with pages under 4GB.
> -	 */
> -	rc = xen_swiotlb_fixup(start, nslabs);
> -	if (rc) {
> -		memblock_free(start, PAGE_ALIGN(bytes));
> -		if (nslabs > 1024 && repeat--) {
> -			/* Min is 2MB */
> -			nslabs = max(1024UL, ALIGN(nslabs >> 1, IO_TLB_SEGSIZE));
> -			bytes = nslabs << IO_TLB_SHIFT;
> -			pr_info("Lowering to %luMB\n", bytes >> 20);
> -			goto retry;
> -		}
> -		panic("%s (rc:%d)", xen_swiotlb_error(XEN_SWIOTLB_EFIXUP), rc);
> -	}
> -
> -	if (swiotlb_init_with_tbl(start, nslabs, SWIOTLB_VERBOSE))
> -		panic("Cannot allocate SWIOTLB buffer");
> -}
> -#endif /* CONFIG_X86 */
> -
>  static void *
>  xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
>  			   dma_addr_t *dma_handle, gfp_t flags,
> diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
> index ee655f2e4d28b..919cf82ed978e 100644
> --- a/include/linux/swiotlb.h
> +++ b/include/linux/swiotlb.h
> @@ -34,10 +34,11 @@ struct scatterlist;
>  /* default to 64MB */
>  #define IO_TLB_DEFAULT_SIZE (64UL<<20)
>  
> -int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, unsigned int flags);
>  unsigned long swiotlb_size_or_default(void);
> -extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs);
> -int swiotlb_init_late(size_t size, gfp_t gfp_mask);
> +int swiotlb_init_late(size_t size, gfp_t gfp_mask,
> +		int (*remap)(void *tlb, unsigned long nslabs));
> +void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
> +		int (*remap)(void *tlb, unsigned long nslabs));
>  extern void __init swiotlb_update_mem_attributes(void);
>  
>  phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys,
> diff --git a/include/xen/arm/page.h b/include/xen/arm/page.h
> index ac1b654705631..7e199c6656b90 100644
> --- a/include/xen/arm/page.h
> +++ b/include/xen/arm/page.h
> @@ -115,6 +115,5 @@ static inline bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
>  bool xen_arch_need_swiotlb(struct device *dev,
>  			   phys_addr_t phys,
>  			   dma_addr_t dev_addr);
> -unsigned long xen_get_swiotlb_free_pages(unsigned int order);
>  #endif /* _ASM_ARM_XEN_PAGE_H */
> diff --git a/include/xen/swiotlb-xen.h b/include/xen/swiotlb-xen.h
> index b3e647f86e3e2..590ceb923f0c8 100644
> --- a/include/xen/swiotlb-xen.h
> +++ b/include/xen/swiotlb-xen.h
> @@ -10,8 +10,12 @@ void xen_dma_sync_for_cpu(struct device *dev, dma_addr_t handle,
>  void xen_dma_sync_for_device(struct device *dev, dma_addr_t handle,
>  			     size_t size, enum dma_data_direction dir);
>  
> -int xen_swiotlb_init(void);
> -void __init xen_swiotlb_init_early(void);
> +#ifdef CONFIG_SWIOTLB_XEN
> +int xen_swiotlb_fixup(void *buf, unsigned long nslabs);
> +#else
> +#define xen_swiotlb_fixup NULL
> +#endif
> +
>  extern const struct dma_map_ops xen_swiotlb_dma_ops;
>  
>  #endif /* __LINUX_SWIOTLB_XEN_H */
> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> index 77cf73dc20a78..128363dc9b5bb 100644
> --- a/kernel/dma/swiotlb.c
> +++ b/kernel/dma/swiotlb.c
> @@ -234,40 +234,17 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
>  	return;
>  }
>  
> -int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs,
> -		unsigned int flags)
> -{
> -	struct io_tlb_mem *mem = &io_tlb_default_mem;
> -	size_t alloc_size;
> -
> -	if (swiotlb_force_disable)
> -		return 0;
> -
> -	/* protect against double initialization */
> -	if (WARN_ON_ONCE(mem->nslabs))
> -		return -ENOMEM;
> -
> -	alloc_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), nslabs));
> -	mem->slots = memblock_alloc(alloc_size, PAGE_SIZE);
> -	if (!mem->slots)
> -		panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
> -		      __func__, alloc_size, PAGE_SIZE);
> -
> -	swiotlb_init_io_tlb_mem(mem, __pa(tlb), nslabs, false);
> -	mem->force_bounce = flags & SWIOTLB_FORCE;
> -
> -	if (flags & SWIOTLB_VERBOSE)
> -		swiotlb_print_info();
> -	return 0;
> -}
> -
>  /*
>   * Statically reserve bounce buffer space and initialize bounce buffer data
>   * structures for the software IO TLB used to implement the DMA API.
>   */
> -void __init swiotlb_init(bool addressing_limit, unsigned int flags)
> +void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
> +		int (*remap)(void *tlb, unsigned long nslabs))
>  {
> -	size_t bytes = PAGE_ALIGN(default_nslabs << IO_TLB_SHIFT);
> +	struct io_tlb_mem *mem = &io_tlb_default_mem;
> +	unsigned long nslabs = default_nslabs;
> +	size_t alloc_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), nslabs));
> +	size_t bytes;
>  	void *tlb;
>  
>  	if (!addressing_limit && !swiotlb_force_bounce)
> @@ -275,23 +252,48 @@ void __init swiotlb_init(bool addressing_limit, unsigned int flags)
>  	if (swiotlb_force_disable)
>  		return;
>  
> +	/* protect against double initialization */
> +	if (WARN_ON_ONCE(mem->nslabs))
> +		return;
> +
>  	/*
>  	 * By default allocate the bonuce buffer memory from low memory.
>  	 */
> +retry:
> +	bytes = PAGE_ALIGN(default_nslabs << IO_TLB_SHIFT);
>  	if (flags & SWIOTLB_ANY)
>  		tlb = memblock_alloc(bytes, PAGE_SIZE);
>  	else
>  		tlb = memblock_alloc_low(bytes, PAGE_SIZE);
>  	if (!tlb)
> -		goto fail;
> -	if (swiotlb_init_with_tbl(tlb, default_nslabs, flags))
> -		goto fail_free_mem;
> -	return;
> +		panic("%s: failed to allocate tlb structure\n", __func__);
> +
> +	if (remap && remap(tlb, nslabs) < 0) {
> +		memblock_free(tlb, PAGE_ALIGN(bytes));
> +
> +		/* Min is 2MB */
> +		if (nslabs <= 1024)
> +			panic("%s: Failed to remap %zu bytes\n",
> +			      __func__, bytes);
> +		nslabs = max(1024UL, ALIGN(nslabs >> 1, IO_TLB_SEGSIZE));
> +		goto retry;
> +	}
> +
> +	mem->slots = memblock_alloc(alloc_size, PAGE_SIZE);
> +	if (!mem->slots)
> +		panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
> +		      __func__, alloc_size, PAGE_SIZE);
>  
> -fail_free_mem:
> -	memblock_free(tlb, bytes);
> -fail:
> -	pr_warn("Cannot allocate buffer");
> +	swiotlb_init_io_tlb_mem(mem, __pa(tlb), default_nslabs, false);
> +	mem->force_bounce = flags & SWIOTLB_FORCE;
> +
> +	if (flags & SWIOTLB_VERBOSE)
> +		swiotlb_print_info();
> +}
> +
> +void __init swiotlb_init(bool addressing_limit, unsigned int flags)
> +{
> +	return swiotlb_init_remap(addressing_limit, flags, NULL);
>  }
>  
>  /*
> @@ -299,8 +301,10 @@ void __init swiotlb_init(bool addressing_limit, unsigned int flags)
>   * initialize the swiotlb later using the slab allocator if needed.
>   * This should be just like above, but with some error catching.
>   */
> -int swiotlb_init_late(size_t size, gfp_t gfp_mask)
> +int swiotlb_init_late(size_t size, gfp_t gfp_mask,
> +		int (*remap)(void *tlb, unsigned long nslabs))
>  {
> +	struct io_tlb_mem *mem = &io_tlb_default_mem;
>  	unsigned long nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE);
>  	unsigned long bytes;
>  	unsigned char *vstart = NULL;
> @@ -310,9 +314,14 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask)
>  	if (swiotlb_force_disable)
>  		return 0;
>  
> +	/* protect against double initialization */
> +	if (WARN_ON_ONCE(mem->nslabs))
> +		return -ENOMEM;
> +
>  	/*
>  	 * Get IO TLB memory from the low pages
>  	 */
> +retry:
>  	order = get_order(nslabs << IO_TLB_SHIFT);
>  	nslabs = SLABS_PER_PAGE << order;
>  	bytes = nslabs << IO_TLB_SHIFT;
> @@ -333,33 +342,28 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask)
>  			(PAGE_SIZE << order) >> 20);
>  		nslabs = SLABS_PER_PAGE << order;
>  	}
> -	rc = swiotlb_late_init_with_tbl(vstart, nslabs);
> -	if (rc)
> -		free_pages((unsigned long)vstart, order);
> -
> -	return rc;
> -}
> -
> -int
> -swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
> -{
> -	struct io_tlb_mem *mem = &io_tlb_default_mem;
> -	unsigned long bytes = nslabs << IO_TLB_SHIFT;
>  
> -	if (swiotlb_force_disable)
> -		return 0;
> +	if (remap)
> +		rc = remap(vstart, nslabs);
> +	if (rc) {
> +		free_pages((unsigned long)vstart, order);
>  
> -	/* protect against double initialization */
> -	if (WARN_ON_ONCE(mem->nslabs))
> -		return -ENOMEM;
> +		/* Min is 2MB */
> +		if (nslabs <= 1024)
> +			return rc;
> +		nslabs = max(1024UL, ALIGN(nslabs >> 1, IO_TLB_SEGSIZE));
> +		goto retry;
> +	}
>  
>  	mem->slots = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
>  		get_order(array_size(sizeof(*mem->slots), nslabs)));
> -	if (!mem->slots)
> +	if (!mem->slots) {
> +		free_pages((unsigned long)vstart, order);
>  		return -ENOMEM;
> +	}
>  
> -	set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
> -	swiotlb_init_io_tlb_mem(mem, virt_to_phys(tlb), nslabs, true);
> +	set_memory_decrypted((unsigned long)vstart, bytes >> PAGE_SHIFT);
> +	swiotlb_init_io_tlb_mem(mem, virt_to_phys(vstart), nslabs, true);
>  
>  	swiotlb_print_info();
>  	return 0;
> -- 
> 2.30.2
> 
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
>
Christoph Hellwig March 2, 2022, 8:15 a.m. UTC | #2
On Tue, Mar 01, 2022 at 06:55:47PM -0800, Stefano Stabellini wrote:
> Unrelated to this specific patch series: now that I think about it, if
> io_tlb_default_mem.nslabs is already allocated by the time xen_mm_init
> is called, wouldn't we potentially have an issue with the GFP flags used
> for the earlier allocation (e.g. GFP_DMA32 not used)? Maybe something
> for another day.

swiotlb_init allocates low memory from meblock, which is roughly
equivalent to GFP_DMA allocations, so we'll be fine.

> > @@ -143,10 +141,15 @@ static int __init xen_mm_init(void)
> >  	if (!xen_swiotlb_detect())
> >  		return 0;
> >  
> > -	rc = xen_swiotlb_init();
> >  	/* we can work with the default swiotlb */
> > -	if (rc < 0 && rc != -EEXIST)
> > -		return rc;
> > +	if (!io_tlb_default_mem.nslabs) {
> > +		if (!xen_initial_domain())
> > +			return -EINVAL;
> 
> I don't think we need this xen_initial_domain() check. It is all
> already sorted out by the xen_swiotlb_detect() check above.

Is it?

static inline int xen_swiotlb_detect(void)
{
	if (!xen_domain())
		return 0;
	if (xen_feature(XENFEAT_direct_mapped))
		return 1;
	/* legacy case */
	if (!xen_feature(XENFEAT_not_direct_mapped) && xen_initial_domain())
		return 1;
	return 0;
}

I think I'd keep it as-is for now, as my planned next step would be to
fold xen-swiotlb into swiotlb entirely.
Boris Ostrovsky March 2, 2022, 1:15 p.m. UTC | #3
On 3/1/22 9:55 PM, Stefano Stabellini wrote:
> On Tue, 1 Mar 2022, Christoph Hellwig wrote:
>> Allow to pass a remap argument to the swiotlb initialization functions
>> to handle the Xen/x86 remap case.  ARM/ARM64 never did any remapping
>> from xen_swiotlb_fixup, so we don't even need that quirk.
>>

> Aside from that the rest looks OK. Also, you can add my:
> 
> Tested-by: Stefano Stabellini <sstabellini@kernel.org>


Not for me, I fail to boot with

[   52.202000] bnxt_en 0000:31:00.0: swiotlb buffer is full (sz: 256 bytes), total 0 (slots), used 0 (slots)

(this is iscsi root so I need the NIC).


I bisected it to "x86: remove the IOMMU table infrastructure" but haven't actually looked at the code yet.


-boris
Boris Ostrovsky March 2, 2022, 1:17 p.m. UTC | #4
On 3/2/22 8:15 AM, Boris Ostrovsky wrote:
>
>
> On 3/1/22 9:55 PM, Stefano Stabellini wrote:
>> On Tue, 1 Mar 2022, Christoph Hellwig wrote:
>>> Allow to pass a remap argument to the swiotlb initialization functions
>>> to handle the Xen/x86 remap case.  ARM/ARM64 never did any remapping
>>> from xen_swiotlb_fixup, so we don't even need that quirk.
>>>
>
>> Aside from that the rest looks OK. Also, you can add my:
>>
>> Tested-by: Stefano Stabellini <sstabellini@kernel.org>
>
>
> Not for me, I fail to boot with
>
> [   52.202000] bnxt_en 0000:31:00.0: swiotlb buffer is full (sz: 256 bytes), total 0 (slots), used 0 (slots)
>
> (this is iscsi root so I need the NIC).
>
>
> I bisected it to "x86: remove the IOMMU table infrastructure" but haven't actually looked at the code yet.
>

Again, this is as dom0. Baremetal is fine.


-boris
Stefano Stabellini March 3, 2022, 1:25 a.m. UTC | #5
On Wed, 2 Mar 2022, Christoph Hellwig wrote:
> On Tue, Mar 01, 2022 at 06:55:47PM -0800, Stefano Stabellini wrote:
> > Unrelated to this specific patch series: now that I think about it, if
> > io_tlb_default_mem.nslabs is already allocated by the time xen_mm_init
> > is called, wouldn't we potentially have an issue with the GFP flags used
> > for the earlier allocation (e.g. GFP_DMA32 not used)? Maybe something
> > for another day.
> 
> swiotlb_init allocates low memory from meblock, which is roughly
> equivalent to GFP_DMA allocations, so we'll be fine.
> 
> > > @@ -143,10 +141,15 @@ static int __init xen_mm_init(void)
> > >  	if (!xen_swiotlb_detect())
> > >  		return 0;
> > >  
> > > -	rc = xen_swiotlb_init();
> > >  	/* we can work with the default swiotlb */
> > > -	if (rc < 0 && rc != -EEXIST)
> > > -		return rc;
> > > +	if (!io_tlb_default_mem.nslabs) {
> > > +		if (!xen_initial_domain())
> > > +			return -EINVAL;
> > 
> > I don't think we need this xen_initial_domain() check. It is all
> > already sorted out by the xen_swiotlb_detect() check above.
> 
> Is it?
> 
> static inline int xen_swiotlb_detect(void)
> {
> 	if (!xen_domain())
> 		return 0;
> 	if (xen_feature(XENFEAT_direct_mapped))
> 		return 1;
> 	/* legacy case */
> 	if (!xen_feature(XENFEAT_not_direct_mapped) && xen_initial_domain())
> 		return 1;
> 	return 0;
> }

It used to be that we had a

  if (!xen_initial_domain())
      return -EINVAL;

check in the initialization of swiotlb-xen on ARM. Then we replaced it
with the more sophisticated xen_swiotlb_detect().

The reason is that swiotlb-xen on ARM relies on Dom0 being 1:1 mapped
(guest physical addresses == physical addresses). Recent changes in Xen
allowed also DomUs to be 1:1 mapped. Changes still under discussion will
allow Dom0 not to be 1:1 mapped.

So, before all the Xen-side changes, knowing what was going to happen, I
introduced a clearer interface: XENFEAT_direct_mapped and
XENFEAT_not_direct_mapped tell us whether the guest (Linux) is 1:1
mapped or not. If it is 1:1 mapped then Linux can take advantage of
swiotlb-xen. Now xen_swiotlb_detect() returns true if Linux is 1:1
mapped.

Then of course there is the legacy case. That's taken care of by:

 	if (!xen_feature(XENFEAT_not_direct_mapped) && xen_initial_domain())
 		return 1;

The intention is to say that if
XENFEAT_direct_mapped/XENFEAT_not_direct_mapped are not present, then
use xen_initial_domain() like we did before.

So if xen_swiotlb_detect() returns true we know that Linux is either
dom0 (xen_initial_domain() == true) or we have very good reasons to
think we should initialize swiotlb-xen anyway
(xen_feature(XENFEAT_direct_mapped) == true).


> I think I'd keep it as-is for now, as my planned next step would be to
> fold xen-swiotlb into swiotlb entirely.

Thinking more about it we actually need to drop the xen_initial_domain()
check otherwise some cases won't be functional (Dom0 not 1:1 mapped, or
DomU 1:1 mapped).
Christoph Hellwig March 3, 2022, 10:57 a.m. UTC | #6
On Wed, Mar 02, 2022 at 08:15:03AM -0500, Boris Ostrovsky wrote:
> Not for me, I fail to boot with
>
> [   52.202000] bnxt_en 0000:31:00.0: swiotlb buffer is full (sz: 256 bytes), total 0 (slots), used 0 (slots)
>
> (this is iscsi root so I need the NIC).
>
>
> I bisected it to "x86: remove the IOMMU table infrastructure" but haven't actually looked at the code yet.

Thanks. Looks like the sizing is going wrong.  Just to confirm, this is
dom0 on x86 and no special command line options?
Christoph Hellwig March 3, 2022, 10:59 a.m. UTC | #7
On Wed, Mar 02, 2022 at 05:25:10PM -0800, Stefano Stabellini wrote:
> Thinking more about it we actually need to drop the xen_initial_domain()
> check otherwise some cases won't be functional (Dom0 not 1:1 mapped, or
> DomU 1:1 mapped).

Hmm, but that would be the case even before this series, right?
Boris Ostrovsky March 3, 2022, 7:06 p.m. UTC | #8
On 3/3/22 5:57 AM, Christoph Hellwig wrote:
> On Wed, Mar 02, 2022 at 08:15:03AM -0500, Boris Ostrovsky wrote:
>> Not for me, I fail to boot with
>>
>> [   52.202000] bnxt_en 0000:31:00.0: swiotlb buffer is full (sz: 256 bytes), total 0 (slots), used 0 (slots)
>>
>> (this is iscsi root so I need the NIC).
>>
>>
>> I bisected it to "x86: remove the IOMMU table infrastructure" but haven't actually looked at the code yet.
> Thanks. Looks like the sizing is going wrong.  Just to confirm, this is
> dom0 on x86 and no special command line options?


Right.


module2 /boot/vmlinuz-5.17.0-rc6swiotlb placeholder root=UUID=dbef1262-8c8a-43db-8055-7d9bec7bece0 ro crashkernel=auto LANG=en_US.UTF-8 rd.luks=0 rd.lvm=0 rd.md=0 rd.dm=0 netroot=iscsi:169.254.0.2:::1:iqn.2015-02.oracle.boot:uefi iscsi_param=node.session.timeo.replacement_timeout=6000 net.ifnames=1 nvme_core.shutdown_timeout=10 ipmi_si.tryacpi=0 ipmi_si.trydmi=0 ipmi_si.trydefaults=0 libiscsi.debug_libiscsi_eh=1  panic=20 nokaslr earlyprintk=xen console=hvc0 loglevel=8 4
Stefano Stabellini March 3, 2022, 10:49 p.m. UTC | #9
On Thu, 3 Mar 2022, Christoph Hellwig wrote:
> On Wed, Mar 02, 2022 at 05:25:10PM -0800, Stefano Stabellini wrote:
> > Thinking more about it we actually need to drop the xen_initial_domain()
> > check otherwise some cases won't be functional (Dom0 not 1:1 mapped, or
> > DomU 1:1 mapped).
> 
> Hmm, but that would be the case even before this series, right?

Before this series we only have the xen_swiotlb_detect() check in
xen_mm_init, we don't have a second xen_initial_domain() check.

The issue is that this series is adding one more xen_initial_domain()
check in xen_mm_init.
Christoph Hellwig March 4, 2022, 4:34 p.m. UTC | #10
On Thu, Mar 03, 2022 at 02:49:29PM -0800, Stefano Stabellini wrote:
> On Thu, 3 Mar 2022, Christoph Hellwig wrote:
> > On Wed, Mar 02, 2022 at 05:25:10PM -0800, Stefano Stabellini wrote:
> > > Thinking more about it we actually need to drop the xen_initial_domain()
> > > check otherwise some cases won't be functional (Dom0 not 1:1 mapped, or
> > > DomU 1:1 mapped).
> > 
> > Hmm, but that would be the case even before this series, right?
> 
> Before this series we only have the xen_swiotlb_detect() check in
> xen_mm_init, we don't have a second xen_initial_domain() check.
> 
> The issue is that this series is adding one more xen_initial_domain()
> check in xen_mm_init.

In current mainline xen_mm_init calls xen_swiotlb_init unconditionally.
But xen_swiotlb_init then calls xen_swiotlb_fixup after allocating
the memory, which in turn calls xen_create_contiguous_region.
xen_create_contiguous_region fails with -EINVAL for the
!xen_initial_domain() and thus caues xen_swiotlb_fixup and
xen_swiotlb_init to unwind and return -EINVAL.

So as far as I can tell there is no change in behavior, but maybe I'm
missing something subtle?
Christoph Hellwig March 4, 2022, 5:28 p.m. UTC | #11
On Wed, Mar 02, 2022 at 08:15:03AM -0500, Boris Ostrovsky wrote:
> Not for me, I fail to boot with
>
> [   52.202000] bnxt_en 0000:31:00.0: swiotlb buffer is full (sz: 256 bytes), total 0 (slots), used 0 (slots)
>
> (this is iscsi root so I need the NIC).
>
>
> I bisected it to "x86: remove the IOMMU table infrastructure" but haven't actually looked at the code yet.

That looks like the swiotlb buffer did not get initialized at all, but I
can't really explain why.

Can you stick in a printk and see if xen_swiotlb_init_early gets called
at all?
Boris Ostrovsky March 4, 2022, 5:36 p.m. UTC | #12
On 3/4/22 12:28 PM, Christoph Hellwig wrote:
> On Wed, Mar 02, 2022 at 08:15:03AM -0500, Boris Ostrovsky wrote:
>> Not for me, I fail to boot with
>>
>> [   52.202000] bnxt_en 0000:31:00.0: swiotlb buffer is full (sz: 256 bytes), total 0 (slots), used 0 (slots)
>>
>> (this is iscsi root so I need the NIC).
>>
>>
>> I bisected it to "x86: remove the IOMMU table infrastructure" but haven't actually looked at the code yet.
> That looks like the swiotlb buffer did not get initialized at all, but I
> can't really explain why.
>
> Can you stick in a printk and see if xen_swiotlb_init_early gets called
> at all?



Actually, that's the only thing I did do so far and yes, it does get called.


-boris
Christoph Hellwig March 4, 2022, 5:43 p.m. UTC | #13
On Fri, Mar 04, 2022 at 12:36:17PM -0500, Boris Ostrovsky wrote:
>>> I bisected it to "x86: remove the IOMMU table infrastructure" but haven't actually looked at the code yet.
>> That looks like the swiotlb buffer did not get initialized at all, but I
>> can't really explain why.
>>
>> Can you stick in a printk and see if xen_swiotlb_init_early gets called
>> at all?
>
>
>
> Actually, that's the only thing I did do so far and yes, it does get called.

So, specifically for "x86: remove the IOMMU table infrastructure" I
think we need the one-liner below so that swiotlb_exit doesn't get called
for the Xen case.  But that should have been fixed up by the next
patch already.

diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 2ac0ef9c2fb76..1173aa282ab27 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -70,7 +70,7 @@ static void __init pci_xen_swiotlb_init(void)
 	if (!xen_initial_domain() && !x86_swiotlb_enable &&
 	    swiotlb_force != SWIOTLB_FORCE)
 		return;
-	x86_swiotlb_enable = false;
+	x86_swiotlb_enable = true;
 	xen_swiotlb = true;
 	xen_swiotlb_init_early();
 	dma_ops = &xen_swiotlb_dma_ops;
Boris Ostrovsky March 4, 2022, 8:18 p.m. UTC | #14
On 3/4/22 12:43 PM, Christoph Hellwig wrote:
> On Fri, Mar 04, 2022 at 12:36:17PM -0500, Boris Ostrovsky wrote:
>>>> I bisected it to "x86: remove the IOMMU table infrastructure" but haven't actually looked at the code yet.
>>> That looks like the swiotlb buffer did not get initialized at all, but I
>>> can't really explain why.
>>>
>>> Can you stick in a printk and see if xen_swiotlb_init_early gets called
>>> at all?
>>
>>
>> Actually, that's the only thing I did do so far and yes, it does get called.
> So, specifically for "x86: remove the IOMMU table infrastructure" I
> think we need the one-liner below so that swiotlb_exit doesn't get called
> for the Xen case.  But that should have been fixed up by the next
> patch already.


This indeed allows dom0 to boot. Not sure I see where in the next patch this would have been fixed?


(BTW, just noticed in iommu_setup() you set this variable to 1. Should be 'true')


-boris
Christoph Hellwig March 4, 2022, 9:03 p.m. UTC | #15
On Fri, Mar 04, 2022 at 03:18:23PM -0500, Boris Ostrovsky wrote:
> This indeed allows dom0 to boot. Not sure I see where in the next patch this would have been fixed?

I thought it did, but it doesn't.  In the meantime I've pushed out an
updated branch with this folded in to:

git://git.infradead.org/users/hch/misc.git swiotlb-init-cleanup

> (BTW, just noticed in iommu_setup() you set this variable to 1. Should be 'true')

Thank, I'll fix this up.
Stefano Stabellini March 4, 2022, 11:22 p.m. UTC | #16
On Fri, 4 Mar 2022, Christoph Hellwig wrote:
> On Thu, Mar 03, 2022 at 02:49:29PM -0800, Stefano Stabellini wrote:
> > On Thu, 3 Mar 2022, Christoph Hellwig wrote:
> > > On Wed, Mar 02, 2022 at 05:25:10PM -0800, Stefano Stabellini wrote:
> > > > Thinking more about it we actually need to drop the xen_initial_domain()
> > > > check otherwise some cases won't be functional (Dom0 not 1:1 mapped, or
> > > > DomU 1:1 mapped).
> > > 
> > > Hmm, but that would be the case even before this series, right?
> > 
> > Before this series we only have the xen_swiotlb_detect() check in
> > xen_mm_init, we don't have a second xen_initial_domain() check.
> > 
> > The issue is that this series is adding one more xen_initial_domain()
> > check in xen_mm_init.
> 
> In current mainline xen_mm_init calls xen_swiotlb_init unconditionally.
> But xen_swiotlb_init then calls xen_swiotlb_fixup after allocating
> the memory, which in turn calls xen_create_contiguous_region.
> xen_create_contiguous_region fails with -EINVAL for the
> !xen_initial_domain() and thus caues xen_swiotlb_fixup and
> xen_swiotlb_init to unwind and return -EINVAL.
> 
> So as far as I can tell there is no change in behavior, but maybe I'm
> missing something subtle?

You are right.

The xen_initial_domain() check in xen_create_contiguous_region() is
wrong and we should get rid of it. It is a leftover from before the
xen_swiotlb_detect rework.

We could either remove it or change it into another xen_swiotlb_detect()
check.

Feel free to add the patch to your series or fold it with another patch
or rework it as you prefer. Thanks for spotting this!

---

arm/xen: don't check for xen_initial_domain() in xen_create_contiguous_region

It used to be that Linux enabled swiotlb-xen when running a dom0 on ARM.
Since f5079a9a2a31 "xen/arm: introduce XENFEAT_direct_mapped and
XENFEAT_not_direct_mapped", Linux detects whether to enable or disable
swiotlb-xen based on the new feature flags: XENFEAT_direct_mapped and
XENFEAT_not_direct_mapped.

However, there is still a leftover xen_initial_domain() check in
xen_create_contiguous_region. Remove the check as
xen_create_contiguous_region is only called by swiotlb-xen during
initialization. If xen_create_contiguous_region is called, we know Linux
is running 1:1 mapped so there is no need for additional checks.

Also update the in-code comment.

Signed-off-by: Stefano Stabellini <stefano.stabellini@xilinx.com>


diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index a7e54a087b80..28c207060253 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -122,10 +122,7 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
 				 unsigned int address_bits,
 				 dma_addr_t *dma_handle)
 {
-	if (!xen_initial_domain())
-		return -EINVAL;
-
-	/* we assume that dom0 is mapped 1:1 for now */
+	/* the domain is 1:1 mapped to use swiotlb-xen */
 	*dma_handle = pstart;
 	return 0;
 }
Boris Ostrovsky March 8, 2022, 9:38 p.m. UTC | #17
On 3/1/22 5:53 AM, Christoph Hellwig wrote:
> Allow to pass a remap argument to the swiotlb initialization functions
> to handle the Xen/x86 remap case.  ARM/ARM64 never did any remapping
> from xen_swiotlb_fixup, so we don't even need that quirk.


Any chance this patch could be split? Lots of things are happening here and it's somewhat hard to review. (Patch 7 too BTW but I think I managed to get through it)


> diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
> index e0def4b1c3181..2f2c468acb955 100644
> --- a/arch/x86/kernel/pci-dma.c
> +++ b/arch/x86/kernel/pci-dma.c
> @@ -71,15 +71,12 @@ static inline void __init pci_swiotlb_detect(void)
>   #endif /* CONFIG_SWIOTLB */
>   
>   #ifdef CONFIG_SWIOTLB_XEN
> -static bool xen_swiotlb;
> -
>   static void __init pci_xen_swiotlb_init(void)
>   {
>   	if (!xen_initial_domain() && !x86_swiotlb_enable)
>   		return;


Now that there is a single call site for this routine I think this check can be dropped. We are only called here for xen_initial_domain()==true.


-boris
Christoph Hellwig March 9, 2022, 6:18 a.m. UTC | #18
On Tue, Mar 08, 2022 at 04:38:21PM -0500, Boris Ostrovsky wrote:
>
> On 3/1/22 5:53 AM, Christoph Hellwig wrote:
>> Allow to pass a remap argument to the swiotlb initialization functions
>> to handle the Xen/x86 remap case.  ARM/ARM64 never did any remapping
>> from xen_swiotlb_fixup, so we don't even need that quirk.
>
>
> Any chance this patch could be split? Lots of things are happening here and it's somewhat hard to review. (Patch 7 too BTW but I think I managed to get through it)

What would be your preferred split?

>> diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
>> index e0def4b1c3181..2f2c468acb955 100644
>> --- a/arch/x86/kernel/pci-dma.c
>> +++ b/arch/x86/kernel/pci-dma.c
>> @@ -71,15 +71,12 @@ static inline void __init pci_swiotlb_detect(void)
>>   #endif /* CONFIG_SWIOTLB */
>>     #ifdef CONFIG_SWIOTLB_XEN
>> -static bool xen_swiotlb;
>> -
>>   static void __init pci_xen_swiotlb_init(void)
>>   {
>>   	if (!xen_initial_domain() && !x86_swiotlb_enable)
>>   		return;
>
>
> Now that there is a single call site for this routine I think this check can be dropped. We are only called here for xen_initial_domain()==true.

The callsite just checks xen_pv_domain() and itself is called
unconditionally during initialization.
Boris Ostrovsky March 9, 2022, 3:18 p.m. UTC | #19
On 3/9/22 1:18 AM, Christoph Hellwig wrote:
> On Tue, Mar 08, 2022 at 04:38:21PM -0500, Boris Ostrovsky wrote:
>> On 3/1/22 5:53 AM, Christoph Hellwig wrote:
>>> Allow to pass a remap argument to the swiotlb initialization functions
>>> to handle the Xen/x86 remap case.  ARM/ARM64 never did any remapping
>>> from xen_swiotlb_fixup, so we don't even need that quirk.
>>
>> Any chance this patch could be split? Lots of things are happening here and it's somewhat hard to review. (Patch 7 too BTW but I think I managed to get through it)
> What would be your preferred split?


swiotlb_init() rework to be done separately?


>
>>> diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
>>> index e0def4b1c3181..2f2c468acb955 100644
>>> --- a/arch/x86/kernel/pci-dma.c
>>> +++ b/arch/x86/kernel/pci-dma.c
>>> @@ -71,15 +71,12 @@ static inline void __init pci_swiotlb_detect(void)
>>>    #endif /* CONFIG_SWIOTLB */
>>>      #ifdef CONFIG_SWIOTLB_XEN
>>> -static bool xen_swiotlb;
>>> -
>>>    static void __init pci_xen_swiotlb_init(void)
>>>    {
>>>    	if (!xen_initial_domain() && !x86_swiotlb_enable)
>>>    		return;
>>
>> Now that there is a single call site for this routine I think this check can be dropped. We are only called here for xen_initial_domain()==true.
> The callsite just checks xen_pv_domain() and itself is called
> unconditionally during initialization.


Oh, right, nevermind. *pv* domain.


-boris
diff mbox series

Patch

diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index a7e54a087b802..58b40f87617d3 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -23,22 +23,20 @@ 
 #include <asm/xen/hypercall.h>
 #include <asm/xen/interface.h>
 
-unsigned long xen_get_swiotlb_free_pages(unsigned int order)
+static gfp_t xen_swiotlb_gfp(void)
 {
 	phys_addr_t base;
-	gfp_t flags = __GFP_NOWARN|__GFP_KSWAPD_RECLAIM;
 	u64 i;
 
 	for_each_mem_range(i, &base, NULL) {
 		if (base < (phys_addr_t)0xffffffff) {
 			if (IS_ENABLED(CONFIG_ZONE_DMA32))
-				flags |= __GFP_DMA32;
-			else
-				flags |= __GFP_DMA;
-			break;
+				return __GFP_DMA32;
+			return __GFP_DMA;
 		}
 	}
-	return __get_free_pages(flags, order);
+
+	return GFP_KERNEL;
 }
 
 static bool hypercall_cflush = false;
@@ -143,10 +141,15 @@  static int __init xen_mm_init(void)
 	if (!xen_swiotlb_detect())
 		return 0;
 
-	rc = xen_swiotlb_init();
 	/* we can work with the default swiotlb */
-	if (rc < 0 && rc != -EEXIST)
-		return rc;
+	if (!io_tlb_default_mem.nslabs) {
+		if (!xen_initial_domain())
+			return -EINVAL;
+		rc = swiotlb_init_late(swiotlb_size_or_default(),
+				       xen_swiotlb_gfp(), NULL);
+		if (rc < 0)
+			return rc;
+	}
 
 	cflush.op = 0;
 	cflush.a.dev_bus_addr = 0;
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index e989bc2269f54..1fc67df500145 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -357,9 +357,4 @@  static inline bool xen_arch_need_swiotlb(struct device *dev,
 	return false;
 }
 
-static inline unsigned long xen_get_swiotlb_free_pages(unsigned int order)
-{
-	return __get_free_pages(__GFP_NOWARN, order);
-}
-
 #endif /* _ASM_X86_XEN_PAGE_H */
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index e0def4b1c3181..2f2c468acb955 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -71,15 +71,12 @@  static inline void __init pci_swiotlb_detect(void)
 #endif /* CONFIG_SWIOTLB */
 
 #ifdef CONFIG_SWIOTLB_XEN
-static bool xen_swiotlb;
-
 static void __init pci_xen_swiotlb_init(void)
 {
 	if (!xen_initial_domain() && !x86_swiotlb_enable)
 		return;
 	x86_swiotlb_enable = false;
-	xen_swiotlb = true;
-	xen_swiotlb_init_early();
+	swiotlb_init_remap(true, x86_swiotlb_flags, xen_swiotlb_fixup);
 	dma_ops = &xen_swiotlb_dma_ops;
 	if (IS_ENABLED(CONFIG_PCI))
 		pci_request_acs();
@@ -87,14 +84,16 @@  static void __init pci_xen_swiotlb_init(void)
 
 int pci_xen_swiotlb_init_late(void)
 {
-	int rc;
-
-	if (xen_swiotlb)
+	if (dma_ops == &xen_swiotlb_dma_ops)
 		return 0;
 
-	rc = xen_swiotlb_init();
-	if (rc)
-		return rc;
+	/* we can work with the default swiotlb */
+	if (!io_tlb_default_mem.nslabs) {
+		int rc = swiotlb_init_late(swiotlb_size_or_default(),
+					   GFP_KERNEL, xen_swiotlb_fixup);
+		if (rc < 0)
+			return rc;
+	}
 
 	/* XXX: this switches the dma ops under live devices! */
 	dma_ops = &xen_swiotlb_dma_ops;
diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
index c7e6faf59a861..7368afc039987 100644
--- a/arch/x86/pci/sta2x11-fixup.c
+++ b/arch/x86/pci/sta2x11-fixup.c
@@ -57,7 +57,7 @@  static void sta2x11_new_instance(struct pci_dev *pdev)
 		int size = STA2X11_SWIOTLB_SIZE;
 		/* First instance: register your own swiotlb area */
 		dev_info(&pdev->dev, "Using SWIOTLB (size %i)\n", size);
-		if (swiotlb_init_late(size, GFP_DMA))
+		if (swiotlb_init_late(size, GFP_DMA, NULL))
 			dev_emerg(&pdev->dev, "init swiotlb failed\n");
 	}
 	list_add(&instance->list, &sta2x11_instance_list);
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index c2da3eb4826e8..df8085b50df10 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -104,7 +104,7 @@  static int is_xen_swiotlb_buffer(struct device *dev, dma_addr_t dma_addr)
 	return 0;
 }
 
-static int xen_swiotlb_fixup(void *buf, unsigned long nslabs)
+int xen_swiotlb_fixup(void *buf, unsigned long nslabs)
 {
 	int rc;
 	unsigned int order = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT);
@@ -130,132 +130,6 @@  static int xen_swiotlb_fixup(void *buf, unsigned long nslabs)
 	return 0;
 }
 
-enum xen_swiotlb_err {
-	XEN_SWIOTLB_UNKNOWN = 0,
-	XEN_SWIOTLB_ENOMEM,
-	XEN_SWIOTLB_EFIXUP
-};
-
-static const char *xen_swiotlb_error(enum xen_swiotlb_err err)
-{
-	switch (err) {
-	case XEN_SWIOTLB_ENOMEM:
-		return "Cannot allocate Xen-SWIOTLB buffer\n";
-	case XEN_SWIOTLB_EFIXUP:
-		return "Failed to get contiguous memory for DMA from Xen!\n"\
-		    "You either: don't have the permissions, do not have"\
-		    " enough free memory under 4GB, or the hypervisor memory"\
-		    " is too fragmented!";
-	default:
-		break;
-	}
-	return "";
-}
-
-int xen_swiotlb_init(void)
-{
-	enum xen_swiotlb_err m_ret = XEN_SWIOTLB_UNKNOWN;
-	unsigned long bytes = swiotlb_size_or_default();
-	unsigned long nslabs = bytes >> IO_TLB_SHIFT;
-	unsigned int order, repeat = 3;
-	int rc = -ENOMEM;
-	char *start;
-
-	if (io_tlb_default_mem.nslabs) {
-		pr_warn("swiotlb buffer already initialized\n");
-		return -EEXIST;
-	}
-
-retry:
-	m_ret = XEN_SWIOTLB_ENOMEM;
-	order = get_order(bytes);
-
-	/*
-	 * Get IO TLB memory from any location.
-	 */
-#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
-#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
-	while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
-		start = (void *)xen_get_swiotlb_free_pages(order);
-		if (start)
-			break;
-		order--;
-	}
-	if (!start)
-		goto exit;
-	if (order != get_order(bytes)) {
-		pr_warn("Warning: only able to allocate %ld MB for software IO TLB\n",
-			(PAGE_SIZE << order) >> 20);
-		nslabs = SLABS_PER_PAGE << order;
-		bytes = nslabs << IO_TLB_SHIFT;
-	}
-
-	/*
-	 * And replace that memory with pages under 4GB.
-	 */
-	rc = xen_swiotlb_fixup(start, nslabs);
-	if (rc) {
-		free_pages((unsigned long)start, order);
-		m_ret = XEN_SWIOTLB_EFIXUP;
-		goto error;
-	}
-	rc = swiotlb_late_init_with_tbl(start, nslabs);
-	if (rc)
-		return rc;
-	return 0;
-error:
-	if (nslabs > 1024 && repeat--) {
-		/* Min is 2MB */
-		nslabs = max(1024UL, ALIGN(nslabs >> 1, IO_TLB_SEGSIZE));
-		bytes = nslabs << IO_TLB_SHIFT;
-		pr_info("Lowering to %luMB\n", bytes >> 20);
-		goto retry;
-	}
-exit:
-	pr_err("%s (rc:%d)\n", xen_swiotlb_error(m_ret), rc);
-	return rc;
-}
-
-#ifdef CONFIG_X86
-void __init xen_swiotlb_init_early(void)
-{
-	unsigned long bytes = swiotlb_size_or_default();
-	unsigned long nslabs = bytes >> IO_TLB_SHIFT;
-	unsigned int repeat = 3;
-	char *start;
-	int rc;
-
-retry:
-	/*
-	 * Get IO TLB memory from any location.
-	 */
-	start = memblock_alloc(PAGE_ALIGN(bytes),
-			       IO_TLB_SEGSIZE << IO_TLB_SHIFT);
-	if (!start)
-		panic("%s: Failed to allocate %lu bytes\n",
-		      __func__, PAGE_ALIGN(bytes));
-
-	/*
-	 * And replace that memory with pages under 4GB.
-	 */
-	rc = xen_swiotlb_fixup(start, nslabs);
-	if (rc) {
-		memblock_free(start, PAGE_ALIGN(bytes));
-		if (nslabs > 1024 && repeat--) {
-			/* Min is 2MB */
-			nslabs = max(1024UL, ALIGN(nslabs >> 1, IO_TLB_SEGSIZE));
-			bytes = nslabs << IO_TLB_SHIFT;
-			pr_info("Lowering to %luMB\n", bytes >> 20);
-			goto retry;
-		}
-		panic("%s (rc:%d)", xen_swiotlb_error(XEN_SWIOTLB_EFIXUP), rc);
-	}
-
-	if (swiotlb_init_with_tbl(start, nslabs, SWIOTLB_VERBOSE))
-		panic("Cannot allocate SWIOTLB buffer");
-}
-#endif /* CONFIG_X86 */
-
 static void *
 xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 			   dma_addr_t *dma_handle, gfp_t flags,
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index ee655f2e4d28b..919cf82ed978e 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -34,10 +34,11 @@  struct scatterlist;
 /* default to 64MB */
 #define IO_TLB_DEFAULT_SIZE (64UL<<20)
 
-int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, unsigned int flags);
 unsigned long swiotlb_size_or_default(void);
-extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs);
-int swiotlb_init_late(size_t size, gfp_t gfp_mask);
+int swiotlb_init_late(size_t size, gfp_t gfp_mask,
+		int (*remap)(void *tlb, unsigned long nslabs));
+void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
+		int (*remap)(void *tlb, unsigned long nslabs));
 extern void __init swiotlb_update_mem_attributes(void);
 
 phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys,
diff --git a/include/xen/arm/page.h b/include/xen/arm/page.h
index ac1b654705631..7e199c6656b90 100644
--- a/include/xen/arm/page.h
+++ b/include/xen/arm/page.h
@@ -115,6 +115,5 @@  static inline bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
 bool xen_arch_need_swiotlb(struct device *dev,
 			   phys_addr_t phys,
 			   dma_addr_t dev_addr);
-unsigned long xen_get_swiotlb_free_pages(unsigned int order);
 
 #endif /* _ASM_ARM_XEN_PAGE_H */
diff --git a/include/xen/swiotlb-xen.h b/include/xen/swiotlb-xen.h
index b3e647f86e3e2..590ceb923f0c8 100644
--- a/include/xen/swiotlb-xen.h
+++ b/include/xen/swiotlb-xen.h
@@ -10,8 +10,12 @@  void xen_dma_sync_for_cpu(struct device *dev, dma_addr_t handle,
 void xen_dma_sync_for_device(struct device *dev, dma_addr_t handle,
 			     size_t size, enum dma_data_direction dir);
 
-int xen_swiotlb_init(void);
-void __init xen_swiotlb_init_early(void);
+#ifdef CONFIG_SWIOTLB_XEN
+int xen_swiotlb_fixup(void *buf, unsigned long nslabs);
+#else
+#define xen_swiotlb_fixup NULL
+#endif
+
 extern const struct dma_map_ops xen_swiotlb_dma_ops;
 
 #endif /* __LINUX_SWIOTLB_XEN_H */
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 77cf73dc20a78..128363dc9b5bb 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -234,40 +234,17 @@  static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
 	return;
 }
 
-int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs,
-		unsigned int flags)
-{
-	struct io_tlb_mem *mem = &io_tlb_default_mem;
-	size_t alloc_size;
-
-	if (swiotlb_force_disable)
-		return 0;
-
-	/* protect against double initialization */
-	if (WARN_ON_ONCE(mem->nslabs))
-		return -ENOMEM;
-
-	alloc_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), nslabs));
-	mem->slots = memblock_alloc(alloc_size, PAGE_SIZE);
-	if (!mem->slots)
-		panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
-		      __func__, alloc_size, PAGE_SIZE);
-
-	swiotlb_init_io_tlb_mem(mem, __pa(tlb), nslabs, false);
-	mem->force_bounce = flags & SWIOTLB_FORCE;
-
-	if (flags & SWIOTLB_VERBOSE)
-		swiotlb_print_info();
-	return 0;
-}
-
 /*
  * Statically reserve bounce buffer space and initialize bounce buffer data
  * structures for the software IO TLB used to implement the DMA API.
  */
-void __init swiotlb_init(bool addressing_limit, unsigned int flags)
+void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
+		int (*remap)(void *tlb, unsigned long nslabs))
 {
-	size_t bytes = PAGE_ALIGN(default_nslabs << IO_TLB_SHIFT);
+	struct io_tlb_mem *mem = &io_tlb_default_mem;
+	unsigned long nslabs = default_nslabs;
+	size_t alloc_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), nslabs));
+	size_t bytes;
 	void *tlb;
 
 	if (!addressing_limit && !swiotlb_force_bounce)
@@ -275,23 +252,48 @@  void __init swiotlb_init(bool addressing_limit, unsigned int flags)
 	if (swiotlb_force_disable)
 		return;
 
+	/* protect against double initialization */
+	if (WARN_ON_ONCE(mem->nslabs))
+		return;
+
 	/*
 	 * By default allocate the bonuce buffer memory from low memory.
 	 */
+retry:
+	bytes = PAGE_ALIGN(default_nslabs << IO_TLB_SHIFT);
 	if (flags & SWIOTLB_ANY)
 		tlb = memblock_alloc(bytes, PAGE_SIZE);
 	else
 		tlb = memblock_alloc_low(bytes, PAGE_SIZE);
 	if (!tlb)
-		goto fail;
-	if (swiotlb_init_with_tbl(tlb, default_nslabs, flags))
-		goto fail_free_mem;
-	return;
+		panic("%s: failed to allocate tlb structure\n", __func__);
+
+	if (remap && remap(tlb, nslabs) < 0) {
+		memblock_free(tlb, PAGE_ALIGN(bytes));
+
+		/* Min is 2MB */
+		if (nslabs <= 1024)
+			panic("%s: Failed to remap %zu bytes\n",
+			      __func__, bytes);
+		nslabs = max(1024UL, ALIGN(nslabs >> 1, IO_TLB_SEGSIZE));
+		goto retry;
+	}
+
+	mem->slots = memblock_alloc(alloc_size, PAGE_SIZE);
+	if (!mem->slots)
+		panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
+		      __func__, alloc_size, PAGE_SIZE);
 
-fail_free_mem:
-	memblock_free(tlb, bytes);
-fail:
-	pr_warn("Cannot allocate buffer");
+	swiotlb_init_io_tlb_mem(mem, __pa(tlb), default_nslabs, false);
+	mem->force_bounce = flags & SWIOTLB_FORCE;
+
+	if (flags & SWIOTLB_VERBOSE)
+		swiotlb_print_info();
+}
+
+void __init swiotlb_init(bool addressing_limit, unsigned int flags)
+{
+	return swiotlb_init_remap(addressing_limit, flags, NULL);
 }
 
 /*
@@ -299,8 +301,10 @@  void __init swiotlb_init(bool addressing_limit, unsigned int flags)
  * initialize the swiotlb later using the slab allocator if needed.
  * This should be just like above, but with some error catching.
  */
-int swiotlb_init_late(size_t size, gfp_t gfp_mask)
+int swiotlb_init_late(size_t size, gfp_t gfp_mask,
+		int (*remap)(void *tlb, unsigned long nslabs))
 {
+	struct io_tlb_mem *mem = &io_tlb_default_mem;
 	unsigned long nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE);
 	unsigned long bytes;
 	unsigned char *vstart = NULL;
@@ -310,9 +314,14 @@  int swiotlb_init_late(size_t size, gfp_t gfp_mask)
 	if (swiotlb_force_disable)
 		return 0;
 
+	/* protect against double initialization */
+	if (WARN_ON_ONCE(mem->nslabs))
+		return -ENOMEM;
+
 	/*
 	 * Get IO TLB memory from the low pages
 	 */
+retry:
 	order = get_order(nslabs << IO_TLB_SHIFT);
 	nslabs = SLABS_PER_PAGE << order;
 	bytes = nslabs << IO_TLB_SHIFT;
@@ -333,33 +342,28 @@  int swiotlb_init_late(size_t size, gfp_t gfp_mask)
 			(PAGE_SIZE << order) >> 20);
 		nslabs = SLABS_PER_PAGE << order;
 	}
-	rc = swiotlb_late_init_with_tbl(vstart, nslabs);
-	if (rc)
-		free_pages((unsigned long)vstart, order);
-
-	return rc;
-}
-
-int
-swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
-{
-	struct io_tlb_mem *mem = &io_tlb_default_mem;
-	unsigned long bytes = nslabs << IO_TLB_SHIFT;
 
-	if (swiotlb_force_disable)
-		return 0;
+	if (remap)
+		rc = remap(vstart, nslabs);
+	if (rc) {
+		free_pages((unsigned long)vstart, order);
 
-	/* protect against double initialization */
-	if (WARN_ON_ONCE(mem->nslabs))
-		return -ENOMEM;
+		/* Min is 2MB */
+		if (nslabs <= 1024)
+			return rc;
+		nslabs = max(1024UL, ALIGN(nslabs >> 1, IO_TLB_SEGSIZE));
+		goto retry;
+	}
 
 	mem->slots = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
 		get_order(array_size(sizeof(*mem->slots), nslabs)));
-	if (!mem->slots)
+	if (!mem->slots) {
+		free_pages((unsigned long)vstart, order);
 		return -ENOMEM;
+	}
 
-	set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
-	swiotlb_init_io_tlb_mem(mem, virt_to_phys(tlb), nslabs, true);
+	set_memory_decrypted((unsigned long)vstart, bytes >> PAGE_SHIFT);
+	swiotlb_init_io_tlb_mem(mem, virt_to_phys(vstart), nslabs, true);
 
 	swiotlb_print_info();
 	return 0;