diff mbox

[v5,08/13] swiotlb-xen: support autotranslate guests

Message ID 1377801154-29215-8-git-send-email-stefano.stabellini@eu.citrix.com (mailing list archive)
State New, archived
Headers show

Commit Message

Stefano Stabellini Aug. 29, 2013, 6:32 p.m. UTC
Support autotranslate guests in swiotlb-xen by keeping track of the
phys-to-bus and bus-to-phys mappings of the swiotlb buffer
(xen_io_tlb_start-xen_io_tlb_end).

Use a simple direct access on a pre-allocated array for phys-to-bus
queries. Use a red-black tree for bus-to-phys queries.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Reviewed-by: David Vrabel <david.vrabel@citrix.com>


Changes in v5:
- fix xen_dma_add_entry error path;
- remove the spin_lock: the red-black tree is not modified at run time.

Changes in v4:
- add err_out label in xen_dma_add_entry;
- remove INVALID_ADDRESS, use DMA_ERROR_CODE instead;
- code style fixes;
- add in-code comments regarding the usage of xen_dma_seg[0].dma_addr.

Changes in v3:
- many code style and name changes;
- improve error checks in xen_dma_add_entry.
---
 drivers/xen/swiotlb-xen.c |  155 ++++++++++++++++++++++++++++++++++++++++-----
 1 files changed, 139 insertions(+), 16 deletions(-)

Comments

Konrad Rzeszutek Wilk Aug. 30, 2013, 1:46 p.m. UTC | #1
On Thu, Aug 29, 2013 at 07:32:29PM +0100, Stefano Stabellini wrote:
> Support autotranslate guests in swiotlb-xen by keeping track of the
> phys-to-bus and bus-to-phys mappings of the swiotlb buffer
> (xen_io_tlb_start-xen_io_tlb_end).
> 
> Use a simple direct access on a pre-allocated array for phys-to-bus
> queries. Use a red-black tree for bus-to-phys queries.
> 
> Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
> Reviewed-by: David Vrabel <david.vrabel@citrix.com>

Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> 
> 
> Changes in v5:
> - fix xen_dma_add_entry error path;
> - remove the spin_lock: the red-black tree is not modified at run time.
> 
> Changes in v4:
> - add err_out label in xen_dma_add_entry;
> - remove INVALID_ADDRESS, use DMA_ERROR_CODE instead;
> - code style fixes;
> - add in-code comments regarding the usage of xen_dma_seg[0].dma_addr.
> 
> Changes in v3:
> - many code style and name changes;
> - improve error checks in xen_dma_add_entry.
> ---
>  drivers/xen/swiotlb-xen.c |  155 ++++++++++++++++++++++++++++++++++++++++-----
>  1 files changed, 139 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
> index b72f31c..7bb99ae 100644
> --- a/drivers/xen/swiotlb-xen.c
> +++ b/drivers/xen/swiotlb-xen.c
> @@ -38,32 +38,131 @@
>  #include <linux/bootmem.h>
>  #include <linux/dma-mapping.h>
>  #include <linux/export.h>
> +#include <linux/slab.h>
> +#include <linux/rbtree.h>
>  #include <xen/swiotlb-xen.h>
>  #include <xen/page.h>
>  #include <xen/xen-ops.h>
>  #include <xen/hvc-console.h>
> +#include <xen/features.h>
>  /*
>   * Used to do a quick range check in swiotlb_tbl_unmap_single and
>   * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
>   * API.
>   */
>  
> +#define NR_DMA_SEGS  ((xen_io_tlb_nslabs + IO_TLB_SEGSIZE - 1) / IO_TLB_SEGSIZE)
>  static char *xen_io_tlb_start, *xen_io_tlb_end;
>  static unsigned long xen_io_tlb_nslabs;
>  /*
>   * Quick lookup value of the bus address of the IOTLB.
>   */
>  
> -static u64 start_dma_addr;
> +struct xen_dma_info {
> +	dma_addr_t dma_addr;
> +	phys_addr_t phys_addr;
> +	size_t size;
> +	struct rb_node rbnode;
> +};
> +
> +/*
> + * This array of struct xen_dma_info is indexed by physical addresses,
> + * starting from virt_to_phys(xen_io_tlb_start). Each entry maps
> + * (IO_TLB_SEGSIZE << IO_TLB_SHIFT) bytes, except the last one that is
> + * smaller. Getting the dma address corresponding to a given physical
> + * address can be done by direct access with the right index on the
> + * array.
> + */
> +static struct xen_dma_info *xen_dma_seg;
> +/* 
> + * This tree keeps track of bus address to physical address
> + * mappings.
> + */
> +static struct rb_root bus_to_phys = RB_ROOT;
> +
> +static int xen_dma_add_entry(struct xen_dma_info *new)
> +{
> +	struct rb_node **link = &bus_to_phys.rb_node;
> +	struct rb_node *parent = NULL;
> +	struct xen_dma_info *entry;
> +	int rc = 0;
> +
> +	while (*link) {
> +		parent = *link;
> +		entry = rb_entry(parent, struct xen_dma_info, rbnode);
> +
> +		if (new->dma_addr == entry->dma_addr)
> +			goto err_out;
> +		if (new->phys_addr == entry->phys_addr)
> +			goto err_out;
> +
> +		if (new->dma_addr < entry->dma_addr)
> +			link = &(*link)->rb_left;
> +		else
> +			link = &(*link)->rb_right;
> +	}
> +	rb_link_node(&new->rbnode, parent, link);
> +	rb_insert_color(&new->rbnode, &bus_to_phys);
> +	goto out;
> +
> +err_out:
> +	rc = -EINVAL;
> +	pr_warn("%s: cannot add phys=0x%pa -> dma=0x%pa: phys=0x%pa -> dma=0x%pa already exists\n",
> +			__func__, &new->phys_addr, &new->dma_addr, &entry->phys_addr, &entry->dma_addr);
> +out:
> +	return rc;
> +}
> +
> +static struct xen_dma_info *xen_get_dma_info(dma_addr_t dma_addr)
> +{
> +	struct rb_node *n = bus_to_phys.rb_node;
> +	struct xen_dma_info *entry;
> +
> +	while (n) {
> +		entry = rb_entry(n, struct xen_dma_info, rbnode);
> +		if (entry->dma_addr <= dma_addr &&
> +				entry->dma_addr + entry->size > dma_addr) {
> +			return entry;
> +		}
> +		if (dma_addr < entry->dma_addr)
> +			n = n->rb_left;
> +		else
> +			n = n->rb_right;
> +	}
> +
> +	return NULL;
> +}
>  
>  static dma_addr_t xen_phys_to_bus(phys_addr_t paddr)
>  {
> -	return phys_to_machine(XPADDR(paddr)).maddr;
> +	int nr_seg;
> +	unsigned long offset;
> +	char *vaddr;
> +
> +	if (!xen_feature(XENFEAT_auto_translated_physmap))
> +		return phys_to_machine(XPADDR(paddr)).maddr;
> +
> +	vaddr = (char *)phys_to_virt(paddr);
> +	if (vaddr >= xen_io_tlb_end || vaddr < xen_io_tlb_start)
> +		return DMA_ERROR_CODE;
> +
> +	offset = vaddr - xen_io_tlb_start;
> +	nr_seg = offset / (IO_TLB_SEGSIZE << IO_TLB_SHIFT);
> +
> +	return xen_dma_seg[nr_seg].dma_addr +
> +		(paddr - xen_dma_seg[nr_seg].phys_addr);
>  }
>  
>  static phys_addr_t xen_bus_to_phys(dma_addr_t baddr)
>  {
> -	return machine_to_phys(XMADDR(baddr)).paddr;
> +	if (xen_feature(XENFEAT_auto_translated_physmap)) {
> +		struct xen_dma_info *dma = xen_get_dma_info(baddr);
> +		if (dma == NULL)
> +			return DMA_ERROR_CODE;
> +		else
> +			return dma->phys_addr + (baddr - dma->dma_addr);
> +	} else
> +		return machine_to_phys(XMADDR(baddr)).paddr;
>  }
>  
>  static dma_addr_t xen_virt_to_bus(void *address)
> @@ -107,6 +206,9 @@ static int is_xen_swiotlb_buffer(dma_addr_t dma_addr)
>  	unsigned long pfn = mfn_to_local_pfn(mfn);
>  	phys_addr_t paddr;
>  
> +	if (xen_feature(XENFEAT_auto_translated_physmap))
> +		return 1;
> +
>  	/* If the address is outside our domain, it CAN
>  	 * have the same virtual address as another address
>  	 * in our domain. Therefore _only_ check address within our domain.
> @@ -124,13 +226,12 @@ static int max_dma_bits = 32;
>  static int
>  xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs)
>  {
> -	int i, rc;
> +	int i, j, rc;
>  	int dma_bits;
> -	dma_addr_t dma_handle;
>  
>  	dma_bits = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT) + PAGE_SHIFT;
>  
> -	i = 0;
> +	i = j = 0;
>  	do {
>  		int slabs = min(nslabs - i, (unsigned long)IO_TLB_SEGSIZE);
>  
> @@ -138,12 +239,18 @@ xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs)
>  			rc = xen_create_contiguous_region(
>  				(unsigned long)buf + (i << IO_TLB_SHIFT),
>  				get_order(slabs << IO_TLB_SHIFT),
> -				dma_bits, &dma_handle);
> +				dma_bits, &xen_dma_seg[j].dma_addr);
>  		} while (rc && dma_bits++ < max_dma_bits);
>  		if (rc)
>  			return rc;
>  
> +		xen_dma_seg[j].phys_addr = virt_to_phys(buf + (i << IO_TLB_SHIFT));
> +		xen_dma_seg[j].size = slabs << IO_TLB_SHIFT;
> +		rc = xen_dma_add_entry(&xen_dma_seg[j]);
> +		if (rc != 0)
> +			return rc;
>  		i += slabs;
> +		j++;
>  	} while (i < nslabs);
>  	return 0;
>  }
> @@ -193,9 +300,10 @@ retry:
>  	/*
>  	 * Get IO TLB memory from any location.
>  	 */
> -	if (early)
> +	if (early) {
>  		xen_io_tlb_start = alloc_bootmem_pages(PAGE_ALIGN(bytes));
> -	else {
> +		xen_dma_seg = alloc_bootmem(sizeof(struct xen_dma_info) * NR_DMA_SEGS);
> +	} else {
>  #define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
>  #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
>  		while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
> @@ -210,6 +318,8 @@ retry:
>  			xen_io_tlb_nslabs = SLABS_PER_PAGE << order;
>  			bytes = xen_io_tlb_nslabs << IO_TLB_SHIFT;
>  		}
> +		xen_dma_seg = kzalloc(sizeof(struct xen_dma_info) * NR_DMA_SEGS,
> +				GFP_KERNEL);
>  	}
>  	if (!xen_io_tlb_start) {
>  		m_ret = XEN_SWIOTLB_ENOMEM;
> @@ -232,7 +342,6 @@ retry:
>  		m_ret = XEN_SWIOTLB_EFIXUP;
>  		goto error;
>  	}
> -	start_dma_addr = xen_virt_to_bus(xen_io_tlb_start);
>  	if (early) {
>  		if (swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs,
>  			 verbose))
> @@ -290,7 +399,8 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
>  
>  	phys = virt_to_phys(ret);
>  	dev_addr = xen_phys_to_bus(phys);
> -	if (((dev_addr + size - 1 <= dma_mask)) &&
> +	if (!xen_feature(XENFEAT_auto_translated_physmap) &&
> +	    ((dev_addr + size - 1 <= dma_mask)) &&
>  	    !range_straddles_page_boundary(phys, size))
>  		*dma_handle = dev_addr;
>  	else {
> @@ -321,8 +431,9 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
>  
>  	phys = virt_to_phys(vaddr);
>  
> -	if (((dev_addr + size - 1 > dma_mask)) ||
> -	    range_straddles_page_boundary(phys, size))
> +	if (xen_feature(XENFEAT_auto_translated_physmap) ||
> +		(((dev_addr + size - 1 > dma_mask)) ||
> +		 range_straddles_page_boundary(phys, size)))
>  		xen_destroy_contiguous_region((unsigned long)vaddr, order);
>  
>  	free_pages((unsigned long)vaddr, order);
> @@ -351,14 +462,19 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
>  	 * we can safely return the device addr and not worry about bounce
>  	 * buffering it.
>  	 */
> -	if (dma_capable(dev, dev_addr, size) &&
> +	if (!xen_feature(XENFEAT_auto_translated_physmap) &&
> +	    dma_capable(dev, dev_addr, size) &&
>  	    !range_straddles_page_boundary(phys, size) && !swiotlb_force)
>  		return dev_addr;
>  
>  	/*
>  	 * Oh well, have to allocate and map a bounce buffer.
> +	 * Pass the dma_addr of the first slab in the iotlb buffer as
> +	 * argument so that swiotlb_tbl_map_single is free to allocate
> +	 * the bounce buffer anywhere appropriate in io_tlb_start -
> +	 * io_tlb_end.
>  	 */
> -	map = swiotlb_tbl_map_single(dev, start_dma_addr, phys, size, dir);
> +	map = swiotlb_tbl_map_single(dev, xen_dma_seg[0].dma_addr, phys, size, dir);
>  	if (map == SWIOTLB_MAP_ERROR)
>  		return DMA_ERROR_CODE;
>  
> @@ -494,10 +610,17 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
>  		dma_addr_t dev_addr = xen_phys_to_bus(paddr);
>  
>  		if (swiotlb_force ||
> +		    xen_feature(XENFEAT_auto_translated_physmap) ||
>  		    !dma_capable(hwdev, dev_addr, sg->length) ||
>  		    range_straddles_page_boundary(paddr, sg->length)) {
> +			/*
> +			 * Pass the dma_addr of the first slab in the iotlb buffer as
> +			 * argument so that swiotlb_tbl_map_single is free to allocate
> +			 * the bounce buffer anywhere appropriate in io_tlb_start -
> +			 * io_tlb_end.
> +			 */
>  			phys_addr_t map = swiotlb_tbl_map_single(hwdev,
> -								 start_dma_addr,
> +								 xen_dma_seg[0].dma_addr,
>  								 sg_phys(sg),
>  								 sg->length,
>  								 dir);
> -- 
> 1.7.2.5
>
diff mbox

Patch

diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index b72f31c..7bb99ae 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -38,32 +38,131 @@ 
 #include <linux/bootmem.h>
 #include <linux/dma-mapping.h>
 #include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/rbtree.h>
 #include <xen/swiotlb-xen.h>
 #include <xen/page.h>
 #include <xen/xen-ops.h>
 #include <xen/hvc-console.h>
+#include <xen/features.h>
 /*
  * Used to do a quick range check in swiotlb_tbl_unmap_single and
  * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
  * API.
  */
 
+#define NR_DMA_SEGS  ((xen_io_tlb_nslabs + IO_TLB_SEGSIZE - 1) / IO_TLB_SEGSIZE)
 static char *xen_io_tlb_start, *xen_io_tlb_end;
 static unsigned long xen_io_tlb_nslabs;
 /*
  * Quick lookup value of the bus address of the IOTLB.
  */
 
-static u64 start_dma_addr;
+struct xen_dma_info {
+	dma_addr_t dma_addr;
+	phys_addr_t phys_addr;
+	size_t size;
+	struct rb_node rbnode;
+};
+
+/*
+ * This array of struct xen_dma_info is indexed by physical addresses,
+ * starting from virt_to_phys(xen_io_tlb_start). Each entry maps
+ * (IO_TLB_SEGSIZE << IO_TLB_SHIFT) bytes, except the last one that is
+ * smaller. Getting the dma address corresponding to a given physical
+ * address can be done by direct access with the right index on the
+ * array.
+ */
+static struct xen_dma_info *xen_dma_seg;
+/* 
+ * This tree keeps track of bus address to physical address
+ * mappings.
+ */
+static struct rb_root bus_to_phys = RB_ROOT;
+
+static int xen_dma_add_entry(struct xen_dma_info *new)
+{
+	struct rb_node **link = &bus_to_phys.rb_node;
+	struct rb_node *parent = NULL;
+	struct xen_dma_info *entry;
+	int rc = 0;
+
+	while (*link) {
+		parent = *link;
+		entry = rb_entry(parent, struct xen_dma_info, rbnode);
+
+		if (new->dma_addr == entry->dma_addr)
+			goto err_out;
+		if (new->phys_addr == entry->phys_addr)
+			goto err_out;
+
+		if (new->dma_addr < entry->dma_addr)
+			link = &(*link)->rb_left;
+		else
+			link = &(*link)->rb_right;
+	}
+	rb_link_node(&new->rbnode, parent, link);
+	rb_insert_color(&new->rbnode, &bus_to_phys);
+	goto out;
+
+err_out:
+	rc = -EINVAL;
+	pr_warn("%s: cannot add phys=0x%pa -> dma=0x%pa: phys=0x%pa -> dma=0x%pa already exists\n",
+			__func__, &new->phys_addr, &new->dma_addr, &entry->phys_addr, &entry->dma_addr);
+out:
+	return rc;
+}
+
+static struct xen_dma_info *xen_get_dma_info(dma_addr_t dma_addr)
+{
+	struct rb_node *n = bus_to_phys.rb_node;
+	struct xen_dma_info *entry;
+
+	while (n) {
+		entry = rb_entry(n, struct xen_dma_info, rbnode);
+		if (entry->dma_addr <= dma_addr &&
+				entry->dma_addr + entry->size > dma_addr) {
+			return entry;
+		}
+		if (dma_addr < entry->dma_addr)
+			n = n->rb_left;
+		else
+			n = n->rb_right;
+	}
+
+	return NULL;
+}
 
 static dma_addr_t xen_phys_to_bus(phys_addr_t paddr)
 {
-	return phys_to_machine(XPADDR(paddr)).maddr;
+	int nr_seg;
+	unsigned long offset;
+	char *vaddr;
+
+	if (!xen_feature(XENFEAT_auto_translated_physmap))
+		return phys_to_machine(XPADDR(paddr)).maddr;
+
+	vaddr = (char *)phys_to_virt(paddr);
+	if (vaddr >= xen_io_tlb_end || vaddr < xen_io_tlb_start)
+		return DMA_ERROR_CODE;
+
+	offset = vaddr - xen_io_tlb_start;
+	nr_seg = offset / (IO_TLB_SEGSIZE << IO_TLB_SHIFT);
+
+	return xen_dma_seg[nr_seg].dma_addr +
+		(paddr - xen_dma_seg[nr_seg].phys_addr);
 }
 
 static phys_addr_t xen_bus_to_phys(dma_addr_t baddr)
 {
-	return machine_to_phys(XMADDR(baddr)).paddr;
+	if (xen_feature(XENFEAT_auto_translated_physmap)) {
+		struct xen_dma_info *dma = xen_get_dma_info(baddr);
+		if (dma == NULL)
+			return DMA_ERROR_CODE;
+		else
+			return dma->phys_addr + (baddr - dma->dma_addr);
+	} else
+		return machine_to_phys(XMADDR(baddr)).paddr;
 }
 
 static dma_addr_t xen_virt_to_bus(void *address)
@@ -107,6 +206,9 @@  static int is_xen_swiotlb_buffer(dma_addr_t dma_addr)
 	unsigned long pfn = mfn_to_local_pfn(mfn);
 	phys_addr_t paddr;
 
+	if (xen_feature(XENFEAT_auto_translated_physmap))
+		return 1;
+
 	/* If the address is outside our domain, it CAN
 	 * have the same virtual address as another address
 	 * in our domain. Therefore _only_ check address within our domain.
@@ -124,13 +226,12 @@  static int max_dma_bits = 32;
 static int
 xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs)
 {
-	int i, rc;
+	int i, j, rc;
 	int dma_bits;
-	dma_addr_t dma_handle;
 
 	dma_bits = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT) + PAGE_SHIFT;
 
-	i = 0;
+	i = j = 0;
 	do {
 		int slabs = min(nslabs - i, (unsigned long)IO_TLB_SEGSIZE);
 
@@ -138,12 +239,18 @@  xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs)
 			rc = xen_create_contiguous_region(
 				(unsigned long)buf + (i << IO_TLB_SHIFT),
 				get_order(slabs << IO_TLB_SHIFT),
-				dma_bits, &dma_handle);
+				dma_bits, &xen_dma_seg[j].dma_addr);
 		} while (rc && dma_bits++ < max_dma_bits);
 		if (rc)
 			return rc;
 
+		xen_dma_seg[j].phys_addr = virt_to_phys(buf + (i << IO_TLB_SHIFT));
+		xen_dma_seg[j].size = slabs << IO_TLB_SHIFT;
+		rc = xen_dma_add_entry(&xen_dma_seg[j]);
+		if (rc != 0)
+			return rc;
 		i += slabs;
+		j++;
 	} while (i < nslabs);
 	return 0;
 }
@@ -193,9 +300,10 @@  retry:
 	/*
 	 * Get IO TLB memory from any location.
 	 */
-	if (early)
+	if (early) {
 		xen_io_tlb_start = alloc_bootmem_pages(PAGE_ALIGN(bytes));
-	else {
+		xen_dma_seg = alloc_bootmem(sizeof(struct xen_dma_info) * NR_DMA_SEGS);
+	} else {
 #define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
 #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
 		while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
@@ -210,6 +318,8 @@  retry:
 			xen_io_tlb_nslabs = SLABS_PER_PAGE << order;
 			bytes = xen_io_tlb_nslabs << IO_TLB_SHIFT;
 		}
+		xen_dma_seg = kzalloc(sizeof(struct xen_dma_info) * NR_DMA_SEGS,
+				GFP_KERNEL);
 	}
 	if (!xen_io_tlb_start) {
 		m_ret = XEN_SWIOTLB_ENOMEM;
@@ -232,7 +342,6 @@  retry:
 		m_ret = XEN_SWIOTLB_EFIXUP;
 		goto error;
 	}
-	start_dma_addr = xen_virt_to_bus(xen_io_tlb_start);
 	if (early) {
 		if (swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs,
 			 verbose))
@@ -290,7 +399,8 @@  xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 
 	phys = virt_to_phys(ret);
 	dev_addr = xen_phys_to_bus(phys);
-	if (((dev_addr + size - 1 <= dma_mask)) &&
+	if (!xen_feature(XENFEAT_auto_translated_physmap) &&
+	    ((dev_addr + size - 1 <= dma_mask)) &&
 	    !range_straddles_page_boundary(phys, size))
 		*dma_handle = dev_addr;
 	else {
@@ -321,8 +431,9 @@  xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
 
 	phys = virt_to_phys(vaddr);
 
-	if (((dev_addr + size - 1 > dma_mask)) ||
-	    range_straddles_page_boundary(phys, size))
+	if (xen_feature(XENFEAT_auto_translated_physmap) ||
+		(((dev_addr + size - 1 > dma_mask)) ||
+		 range_straddles_page_boundary(phys, size)))
 		xen_destroy_contiguous_region((unsigned long)vaddr, order);
 
 	free_pages((unsigned long)vaddr, order);
@@ -351,14 +462,19 @@  dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
 	 * we can safely return the device addr and not worry about bounce
 	 * buffering it.
 	 */
-	if (dma_capable(dev, dev_addr, size) &&
+	if (!xen_feature(XENFEAT_auto_translated_physmap) &&
+	    dma_capable(dev, dev_addr, size) &&
 	    !range_straddles_page_boundary(phys, size) && !swiotlb_force)
 		return dev_addr;
 
 	/*
 	 * Oh well, have to allocate and map a bounce buffer.
+	 * Pass the dma_addr of the first slab in the iotlb buffer as
+	 * argument so that swiotlb_tbl_map_single is free to allocate
+	 * the bounce buffer anywhere appropriate in io_tlb_start -
+	 * io_tlb_end.
 	 */
-	map = swiotlb_tbl_map_single(dev, start_dma_addr, phys, size, dir);
+	map = swiotlb_tbl_map_single(dev, xen_dma_seg[0].dma_addr, phys, size, dir);
 	if (map == SWIOTLB_MAP_ERROR)
 		return DMA_ERROR_CODE;
 
@@ -494,10 +610,17 @@  xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
 		dma_addr_t dev_addr = xen_phys_to_bus(paddr);
 
 		if (swiotlb_force ||
+		    xen_feature(XENFEAT_auto_translated_physmap) ||
 		    !dma_capable(hwdev, dev_addr, sg->length) ||
 		    range_straddles_page_boundary(paddr, sg->length)) {
+			/*
+			 * Pass the dma_addr of the first slab in the iotlb buffer as
+			 * argument so that swiotlb_tbl_map_single is free to allocate
+			 * the bounce buffer anywhere appropriate in io_tlb_start -
+			 * io_tlb_end.
+			 */
 			phys_addr_t map = swiotlb_tbl_map_single(hwdev,
-								 start_dma_addr,
+								 xen_dma_seg[0].dma_addr,
 								 sg_phys(sg),
 								 sg->length,
 								 dir);