diff mbox

[v6,19/19] swiotlb-xen: instead of bouncing on the swiotlb, pin single pages

Message ID 1380298207-29151-19-git-send-email-stefano.stabellini@eu.citrix.com (mailing list archive)
State New, archived
Headers show

Commit Message

Stefano Stabellini Sept. 27, 2013, 4:10 p.m. UTC
If we are dealing with single page mappings that don't cross page
boundaries, we can try to pin the page and get the corresponding mfn,
using xen_pin_page.  This avoids going through the swiotlb bounce
buffer.  If xen_pin_page fails (because the underlying mfn doesn't
respect the dma_mask) fall back to the swiotlb bounce buffer.
Add a ref count to xen_dma_info, so that we can avoid pinnig pages that
are already pinned.
Use a spinlock to protect accesses, insertions and deletions in the
rbtrees.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
---
 drivers/xen/swiotlb-xen.c |  152 ++++++++++++++++++++++++++++++++++++++++++---
 1 files changed, 143 insertions(+), 9 deletions(-)

Comments

Konrad Rzeszutek Wilk Sept. 30, 2013, 5:39 p.m. UTC | #1
On Fri, Sep 27, 2013 at 05:10:07PM +0100, Stefano Stabellini wrote:
> If we are dealing with single page mappings that don't cross page
> boundaries, we can try to pin the page and get the corresponding mfn,
> using xen_pin_page.  This avoids going through the swiotlb bounce
> buffer.  If xen_pin_page fails (because the underlying mfn doesn't
> respect the dma_mask) fall back to the swiotlb bounce buffer.
> Add a ref count to xen_dma_info, so that we can avoid pinnig pages that
> are already pinned.
> Use a spinlock to protect accesses, insertions and deletions in the
> rbtrees.
> 
> Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
> ---
>  drivers/xen/swiotlb-xen.c |  152 ++++++++++++++++++++++++++++++++++++++++++---
>  1 files changed, 143 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
> index 022bcaf..6f94285 100644
> --- a/drivers/xen/swiotlb-xen.c
> +++ b/drivers/xen/swiotlb-xen.c
> @@ -57,6 +57,8 @@
>  #define NR_DMA_SEGS  ((xen_io_tlb_nslabs + IO_TLB_SEGSIZE - 1) / IO_TLB_SEGSIZE)
>  static char *xen_io_tlb_start, *xen_io_tlb_end;
>  static unsigned long xen_io_tlb_nslabs;
> +spinlock_t swiotlb_lock;
> +
>  /*
>   * Quick lookup value of the bus address of the IOTLB.
>   */
> @@ -79,6 +81,7 @@ struct xen_dma_info {
>  	dma_addr_t dma_addr;
>  	phys_addr_t phys_addr;
>  	size_t size;
> +	atomic_t refs;
>  	struct rb_node rbnode_dma;
>  	struct rb_node rbnode_phys;
>  };
> @@ -254,6 +257,48 @@ static dma_addr_t xen_virt_to_bus(void *address)
>  	return xen_phys_to_bus_quick(virt_to_phys(address));
>  }
>  
> +static int xen_pin_dev_page(struct device *dev,
> +							phys_addr_t phys,
> +							dma_addr_t *dev_addr)

Something is odd with your tabs.
> +{
> +	u64 dma_mask = DMA_BIT_MASK(32);

Why 32?

> +	xen_pfn_t in;
> +	struct xen_dma_info *dma_info = xen_get_dma_info_from_phys(phys);
> +
> +	if (dma_info != NULL) {
> +		atomic_inc(&dma_info->refs);
> +		*dev_addr = dma_info->dma_addr + (phys - dma_info->phys_addr);
> +		return 0;
> +	}
> +
> +	if (dev && dev->coherent_dma_mask)
> +		dma_mask = dma_alloc_coherent_mask(dev, GFP_KERNEL);
> +
> +	in = phys >> PAGE_SHIFT;
> +	if (!xen_pin_page(&in, fls64(dma_mask))) {

Why not just make xen_pin_page use an phys address and it can also
do the appropiate bit shifting in it?

> +		*dev_addr = in << PAGE_SHIFT;
> +		dma_info = kzalloc(sizeof(struct xen_dma_info), GFP_NOWAIT);
> +		if (!dma_info) {
> +			pr_warn("cannot allocate xen_dma_info\n");
> +			xen_destroy_contiguous_region(phys & PAGE_MASK, 0);

Perhaps we should add an inline function for that called 'xen_unpin_page' ?

> +			return -ENOMEM;
> +		}
> +		dma_info->phys_addr = phys & PAGE_MASK;
> +		dma_info->size = PAGE_SIZE;
> +		dma_info->dma_addr = *dev_addr;
> +		if (xen_dma_add_entry(dma_info)) {
> +			pr_warn("cannot add new entry to bus_to_phys\n");
> +			xen_destroy_contiguous_region(phys & PAGE_MASK, 0);
> +			kfree(dma_info);
> +			return -EFAULT;
> +		}
> +		atomic_set(&dma_info->refs, 1);
> +		*dev_addr += (phys & ~PAGE_MASK);
> +		return 0;
> +	}

Don't you want to the opposite of dma_alloc_coherent_mask ?

> +	return -EFAULT;
> +}
> +
>  static int check_pages_physically_contiguous(unsigned long pfn,
>  					     unsigned int offset,
>  					     size_t length)
> @@ -434,6 +479,7 @@ retry:
>  		rc = 0;
>  	} else
>  		rc = swiotlb_late_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs);
> +	spin_lock_init(&swiotlb_lock);
>  	return rc;
>  error:
>  	if (repeat--) {
> @@ -461,6 +507,7 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
>  	phys_addr_t phys;
>  	dma_addr_t dev_addr;
>  	struct xen_dma_info *dma_info = NULL;
> +	unsigned long irqflags;
>  
>  	/*
>  	* Ignore region specifiers - the kernel's ideas of
> @@ -497,7 +544,7 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
>  	    !range_straddles_page_boundary(phys, size))
>  		*dma_handle = dev_addr;
>  	else {
> -		if (xen_create_contiguous_region(phys, order,
> +		if (xen_create_contiguous_region(phys & PAGE_MASK, order,
>  						 fls64(dma_mask), dma_handle) != 0) {
>  			xen_free_coherent_pages(hwdev, size, ret, (dma_addr_t)phys, attrs);
>  			return NULL;
> @@ -509,15 +556,19 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
>  			xen_destroy_contiguous_region(phys, order);
>  			return NULL;
>  		}
> -		dma_info->phys_addr = phys;
> -		dma_info->size = size;
> +		dma_info->phys_addr = phys & PAGE_MASK;
> +		dma_info->size = (1U << order) << PAGE_SHIFT;
>  		dma_info->dma_addr = *dma_handle;
> +		atomic_set(&dma_info->refs, 1);
> +		spin_lock_irqsave(&swiotlb_lock, irqflags);
>  		if (xen_dma_add_entry(dma_info)) {
> +			spin_unlock_irqrestore(&swiotlb_lock, irqflags);
>  			pr_warn("cannot add new entry to bus_to_phys\n");
>  			xen_destroy_contiguous_region(phys, order);
>  			kfree(dma_info);
>  			return NULL;
>  		}
> +		spin_unlock_irqrestore(&swiotlb_lock, irqflags);
>  	}
>  	memset(ret, 0, size);
>  	return ret;
> @@ -532,6 +583,7 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
>  	phys_addr_t phys;
>  	u64 dma_mask = DMA_BIT_MASK(32);
>  	struct xen_dma_info *dma_info = NULL;
> +	unsigned long flags;
>  
>  	if (dma_release_from_coherent(hwdev, order, vaddr))
>  		return;
> @@ -539,6 +591,7 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
>  	if (hwdev && hwdev->coherent_dma_mask)
>  		dma_mask = hwdev->coherent_dma_mask;
>  
> +	spin_lock_irqsave(&swiotlb_lock, flags);
>  	/* do not use virt_to_phys because on ARM it doesn't return you the
>  	 * physical address */
>  	phys = xen_bus_to_phys(dev_addr);
> @@ -546,12 +599,16 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
>  	if (xen_feature(XENFEAT_auto_translated_physmap) ||
>  		(((dev_addr + size - 1 > dma_mask)) ||
>  		 range_straddles_page_boundary(phys, size))) {
> -		xen_destroy_contiguous_region(phys, order);
>  		dma_info = xen_get_dma_info_from_dma(dev_addr);
> -		rb_erase(&dma_info->rbnode, &bus_to_phys);
> -		kfree(dma_info);
> +		if (atomic_dec_and_test(&dma_info->refs)) {
> +			xen_destroy_contiguous_region(phys & PAGE_MASK, order);
> +			rb_erase(&dma_info->rbnode_dma, &bus_to_phys);
> +			rb_erase(&dma_info->rbnode_phys, &phys_to_bus);
> +			kfree(dma_info);
> +		}

If xen_pin_dev_page failed or was not called we would still end up
calling this. And we would decrement a potentially garbage value? Or not.
>  	}
>  
> +	spin_unlock_irqrestore(&swiotlb_lock, flags);
>  	xen_free_coherent_pages(hwdev, size, vaddr, (dma_addr_t)phys, attrs);
>  }
>  EXPORT_SYMBOL_GPL(xen_swiotlb_free_coherent);
> @@ -583,6 +640,23 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
>  	    !range_straddles_page_boundary(phys, size) && !swiotlb_force)
>  		return dev_addr;
>  
> +	if (xen_feature(XENFEAT_auto_translated_physmap) &&
> +		size <= PAGE_SIZE &&
> +		!range_straddles_page_boundary(phys, size) &&
> +		!swiotlb_force) {
> +		unsigned long flags;
> +		int rc;
> +
> +		spin_lock_irqsave(&swiotlb_lock, flags);
> +		rc = xen_pin_dev_page(dev, phys, &dev_addr);
> +		spin_unlock_irqrestore(&swiotlb_lock, flags);
> +
> +		if (!rc) {
> +			dma_mark_clean(phys_to_virt(phys), size);
> +			return dev_addr;
> +		}

And if there is an rc you should probably do
		dev_warn(.., "RC ..")


But more importantly - all of this code adds an extra lock on the X86 side
which will get -ENOxxx on the xen_pin_dev_page.

I am wondering if it makes sense to make most of this code dependent
on CONFIG_ARM? As the check for auto-xlat falls flat on X86 + PVH. Thought
I have no idea what we want to do with PVH and X86 at this point.

> +	}
> +
>  	/*
>  	 * Oh well, have to allocate and map a bounce buffer.
>  	 * Pass the dma_addr of the first slab in the iotlb buffer as
> @@ -618,10 +692,37 @@ EXPORT_SYMBOL_GPL(xen_swiotlb_map_page);
>  static void xen_unmap_single(struct device *hwdev, dma_addr_t dev_addr,
>  			     size_t size, enum dma_data_direction dir)
>  {
> -	phys_addr_t paddr = xen_bus_to_phys(dev_addr);
> +	struct xen_dma_info *dma_info;
> +	phys_addr_t paddr = DMA_ERROR_CODE;
> +	char *vaddr = NULL;
> +	unsigned long flags;
>  
>  	BUG_ON(dir == DMA_NONE);
>  
> +	spin_lock_irqsave(&swiotlb_lock, flags);
> +	dma_info = xen_get_dma_info_from_dma(dev_addr);
> +	if (dma_info != NULL) {
> +		paddr = dma_info->phys_addr + (dev_addr - dma_info->dma_addr);
> +		vaddr = phys_to_virt(paddr);
> +	}
> +
> +	if (xen_feature(XENFEAT_auto_translated_physmap) &&
> +		paddr != DMA_ERROR_CODE &&
> +		!(vaddr >= xen_io_tlb_start && vaddr < xen_io_tlb_end) &&
> +		!swiotlb_force) {
> +		if (atomic_dec_and_test(&dma_info->refs)) {
> +			xen_destroy_contiguous_region(paddr & PAGE_MASK, 0);
> +			rb_erase(&dma_info->rbnode_dma, &bus_to_phys);
> +			rb_erase(&dma_info->rbnode_phys, &phys_to_bus);
> +			kfree(dma_info);
> +		}
> +		spin_unlock_irqrestore(&swiotlb_lock, flags);
> +		if ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))
> +			dma_mark_clean(vaddr, size);
> +		return;
> +	}
> +	spin_unlock_irqrestore(&swiotlb_lock, flags);
> +
>  	/* NOTE: We use dev_addr here, not paddr! */
>  	if (is_xen_swiotlb_buffer(dev_addr)) {
>  		swiotlb_tbl_unmap_single(hwdev, paddr, size, dir);
> @@ -664,9 +765,19 @@ xen_swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
>  			enum dma_sync_target target)
>  {
>  	phys_addr_t paddr = xen_bus_to_phys(dev_addr);
> +	char *vaddr = phys_to_virt(paddr);
>  
>  	BUG_ON(dir == DMA_NONE);
>  
> +	if (xen_feature(XENFEAT_auto_translated_physmap) &&
> +		paddr != DMA_ERROR_CODE &&
> +		size <= PAGE_SIZE &&
> +		!(vaddr >= xen_io_tlb_start && vaddr < xen_io_tlb_end) &&
> +		!range_straddles_page_boundary(paddr, size) && !swiotlb_force) {
> +		dma_mark_clean(vaddr, size);		
> +		return;
> +	}
> +
>  	/* NOTE: We use dev_addr here, not paddr! */
>  	if (is_xen_swiotlb_buffer(dev_addr)) {
>  		swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target);
> @@ -717,13 +828,36 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
>  			 struct dma_attrs *attrs)
>  {
>  	struct scatterlist *sg;
> -	int i;
> +	int i, rc;
> +	u64 dma_mask = DMA_BIT_MASK(32);
> +	unsigned long flags;
>  
>  	BUG_ON(dir == DMA_NONE);
>  
> +	if (hwdev && hwdev->coherent_dma_mask)
> +		dma_mask = dma_alloc_coherent_mask(hwdev, GFP_KERNEL);
> +
>  	for_each_sg(sgl, sg, nelems, i) {
>  		phys_addr_t paddr = sg_phys(sg);
> -		dma_addr_t dev_addr = xen_phys_to_bus_quick(paddr);
> +		dma_addr_t dev_addr;
> +
> +		if (xen_feature(XENFEAT_auto_translated_physmap) &&
> +			!range_straddles_page_boundary(paddr, sg->length) &&
> +			sg->length <= PAGE_SIZE &&
> +			!swiotlb_force) {
> +
> +			spin_lock_irqsave(&swiotlb_lock, flags);
> +			rc = xen_pin_dev_page(hwdev, paddr, &dev_addr);
> +			spin_unlock_irqrestore(&swiotlb_lock, flags);
> +
> +			if (!rc) {
> +				dma_mark_clean(phys_to_virt(paddr), sg->length);
> +				sg_dma_len(sg) = sg->length;
> +				sg->dma_address = dev_addr;
> +				continue;
> +			}
> +		}
> +		dev_addr = xen_phys_to_bus_quick(paddr);
>  
>  		if (swiotlb_force ||
>  		    xen_feature(XENFEAT_auto_translated_physmap) ||
> -- 
> 1.7.2.5
>
Stefano Stabellini Oct. 9, 2013, 5:27 p.m. UTC | #2
On Mon, 30 Sep 2013, Konrad Rzeszutek Wilk wrote:
> On Fri, Sep 27, 2013 at 05:10:07PM +0100, Stefano Stabellini wrote:
> > If we are dealing with single page mappings that don't cross page
> > boundaries, we can try to pin the page and get the corresponding mfn,
> > using xen_pin_page.  This avoids going through the swiotlb bounce
> > buffer.  If xen_pin_page fails (because the underlying mfn doesn't
> > respect the dma_mask) fall back to the swiotlb bounce buffer.
> > Add a ref count to xen_dma_info, so that we can avoid pinnig pages that
> > are already pinned.
> > Use a spinlock to protect accesses, insertions and deletions in the
> > rbtrees.
> > 
> > Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>

Thanks for the review, however I am dropping this patch because it
wasn't improving performances as I was hoping it would be.

I am taking a new approach now: I am keeping the 1:1 physical to machine
mapping for dom0 and using swiotlb-xen only to handle dma requests
involving foreign grants.

The code is much nicer, and it runs much faster.


> >  drivers/xen/swiotlb-xen.c |  152 ++++++++++++++++++++++++++++++++++++++++++---
> >  1 files changed, 143 insertions(+), 9 deletions(-)
> > 
> > diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
> > index 022bcaf..6f94285 100644
> > --- a/drivers/xen/swiotlb-xen.c
> > +++ b/drivers/xen/swiotlb-xen.c
> > @@ -57,6 +57,8 @@
> >  #define NR_DMA_SEGS  ((xen_io_tlb_nslabs + IO_TLB_SEGSIZE - 1) / IO_TLB_SEGSIZE)
> >  static char *xen_io_tlb_start, *xen_io_tlb_end;
> >  static unsigned long xen_io_tlb_nslabs;
> > +spinlock_t swiotlb_lock;
> > +
> >  /*
> >   * Quick lookup value of the bus address of the IOTLB.
> >   */
> > @@ -79,6 +81,7 @@ struct xen_dma_info {
> >  	dma_addr_t dma_addr;
> >  	phys_addr_t phys_addr;
> >  	size_t size;
> > +	atomic_t refs;
> >  	struct rb_node rbnode_dma;
> >  	struct rb_node rbnode_phys;
> >  };
> > @@ -254,6 +257,48 @@ static dma_addr_t xen_virt_to_bus(void *address)
> >  	return xen_phys_to_bus_quick(virt_to_phys(address));
> >  }
> >  
> > +static int xen_pin_dev_page(struct device *dev,
> > +							phys_addr_t phys,
> > +							dma_addr_t *dev_addr)
> 
> Something is odd with your tabs.
> > +{
> > +	u64 dma_mask = DMA_BIT_MASK(32);
> 
> Why 32?
> 
> > +	xen_pfn_t in;
> > +	struct xen_dma_info *dma_info = xen_get_dma_info_from_phys(phys);
> > +
> > +	if (dma_info != NULL) {
> > +		atomic_inc(&dma_info->refs);
> > +		*dev_addr = dma_info->dma_addr + (phys - dma_info->phys_addr);
> > +		return 0;
> > +	}
> > +
> > +	if (dev && dev->coherent_dma_mask)
> > +		dma_mask = dma_alloc_coherent_mask(dev, GFP_KERNEL);
> > +
> > +	in = phys >> PAGE_SHIFT;
> > +	if (!xen_pin_page(&in, fls64(dma_mask))) {
> 
> Why not just make xen_pin_page use an phys address and it can also
> do the appropiate bit shifting in it?
> 
> > +		*dev_addr = in << PAGE_SHIFT;
> > +		dma_info = kzalloc(sizeof(struct xen_dma_info), GFP_NOWAIT);
> > +		if (!dma_info) {
> > +			pr_warn("cannot allocate xen_dma_info\n");
> > +			xen_destroy_contiguous_region(phys & PAGE_MASK, 0);
> 
> Perhaps we should add an inline function for that called 'xen_unpin_page' ?
> 
> > +			return -ENOMEM;
> > +		}
> > +		dma_info->phys_addr = phys & PAGE_MASK;
> > +		dma_info->size = PAGE_SIZE;
> > +		dma_info->dma_addr = *dev_addr;
> > +		if (xen_dma_add_entry(dma_info)) {
> > +			pr_warn("cannot add new entry to bus_to_phys\n");
> > +			xen_destroy_contiguous_region(phys & PAGE_MASK, 0);
> > +			kfree(dma_info);
> > +			return -EFAULT;
> > +		}
> > +		atomic_set(&dma_info->refs, 1);
> > +		*dev_addr += (phys & ~PAGE_MASK);
> > +		return 0;
> > +	}
> 
> Don't you want to the opposite of dma_alloc_coherent_mask ?
> 
> > +	return -EFAULT;
> > +}
> > +
> >  static int check_pages_physically_contiguous(unsigned long pfn,
> >  					     unsigned int offset,
> >  					     size_t length)
> > @@ -434,6 +479,7 @@ retry:
> >  		rc = 0;
> >  	} else
> >  		rc = swiotlb_late_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs);
> > +	spin_lock_init(&swiotlb_lock);
> >  	return rc;
> >  error:
> >  	if (repeat--) {
> > @@ -461,6 +507,7 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
> >  	phys_addr_t phys;
> >  	dma_addr_t dev_addr;
> >  	struct xen_dma_info *dma_info = NULL;
> > +	unsigned long irqflags;
> >  
> >  	/*
> >  	* Ignore region specifiers - the kernel's ideas of
> > @@ -497,7 +544,7 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
> >  	    !range_straddles_page_boundary(phys, size))
> >  		*dma_handle = dev_addr;
> >  	else {
> > -		if (xen_create_contiguous_region(phys, order,
> > +		if (xen_create_contiguous_region(phys & PAGE_MASK, order,
> >  						 fls64(dma_mask), dma_handle) != 0) {
> >  			xen_free_coherent_pages(hwdev, size, ret, (dma_addr_t)phys, attrs);
> >  			return NULL;
> > @@ -509,15 +556,19 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
> >  			xen_destroy_contiguous_region(phys, order);
> >  			return NULL;
> >  		}
> > -		dma_info->phys_addr = phys;
> > -		dma_info->size = size;
> > +		dma_info->phys_addr = phys & PAGE_MASK;
> > +		dma_info->size = (1U << order) << PAGE_SHIFT;
> >  		dma_info->dma_addr = *dma_handle;
> > +		atomic_set(&dma_info->refs, 1);
> > +		spin_lock_irqsave(&swiotlb_lock, irqflags);
> >  		if (xen_dma_add_entry(dma_info)) {
> > +			spin_unlock_irqrestore(&swiotlb_lock, irqflags);
> >  			pr_warn("cannot add new entry to bus_to_phys\n");
> >  			xen_destroy_contiguous_region(phys, order);
> >  			kfree(dma_info);
> >  			return NULL;
> >  		}
> > +		spin_unlock_irqrestore(&swiotlb_lock, irqflags);
> >  	}
> >  	memset(ret, 0, size);
> >  	return ret;
> > @@ -532,6 +583,7 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
> >  	phys_addr_t phys;
> >  	u64 dma_mask = DMA_BIT_MASK(32);
> >  	struct xen_dma_info *dma_info = NULL;
> > +	unsigned long flags;
> >  
> >  	if (dma_release_from_coherent(hwdev, order, vaddr))
> >  		return;
> > @@ -539,6 +591,7 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
> >  	if (hwdev && hwdev->coherent_dma_mask)
> >  		dma_mask = hwdev->coherent_dma_mask;
> >  
> > +	spin_lock_irqsave(&swiotlb_lock, flags);
> >  	/* do not use virt_to_phys because on ARM it doesn't return you the
> >  	 * physical address */
> >  	phys = xen_bus_to_phys(dev_addr);
> > @@ -546,12 +599,16 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
> >  	if (xen_feature(XENFEAT_auto_translated_physmap) ||
> >  		(((dev_addr + size - 1 > dma_mask)) ||
> >  		 range_straddles_page_boundary(phys, size))) {
> > -		xen_destroy_contiguous_region(phys, order);
> >  		dma_info = xen_get_dma_info_from_dma(dev_addr);
> > -		rb_erase(&dma_info->rbnode, &bus_to_phys);
> > -		kfree(dma_info);
> > +		if (atomic_dec_and_test(&dma_info->refs)) {
> > +			xen_destroy_contiguous_region(phys & PAGE_MASK, order);
> > +			rb_erase(&dma_info->rbnode_dma, &bus_to_phys);
> > +			rb_erase(&dma_info->rbnode_phys, &phys_to_bus);
> > +			kfree(dma_info);
> > +		}
> 
> If xen_pin_dev_page failed or was not called we would still end up
> calling this. And we would decrement a potentially garbage value? Or not.
> >  	}
> >  
> > +	spin_unlock_irqrestore(&swiotlb_lock, flags);
> >  	xen_free_coherent_pages(hwdev, size, vaddr, (dma_addr_t)phys, attrs);
> >  }
> >  EXPORT_SYMBOL_GPL(xen_swiotlb_free_coherent);
> > @@ -583,6 +640,23 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
> >  	    !range_straddles_page_boundary(phys, size) && !swiotlb_force)
> >  		return dev_addr;
> >  
> > +	if (xen_feature(XENFEAT_auto_translated_physmap) &&
> > +		size <= PAGE_SIZE &&
> > +		!range_straddles_page_boundary(phys, size) &&
> > +		!swiotlb_force) {
> > +		unsigned long flags;
> > +		int rc;
> > +
> > +		spin_lock_irqsave(&swiotlb_lock, flags);
> > +		rc = xen_pin_dev_page(dev, phys, &dev_addr);
> > +		spin_unlock_irqrestore(&swiotlb_lock, flags);
> > +
> > +		if (!rc) {
> > +			dma_mark_clean(phys_to_virt(phys), size);
> > +			return dev_addr;
> > +		}
> 
> And if there is an rc you should probably do
> 		dev_warn(.., "RC ..")
> 
> 
> But more importantly - all of this code adds an extra lock on the X86 side
> which will get -ENOxxx on the xen_pin_dev_page.
> 
> I am wondering if it makes sense to make most of this code dependent
> on CONFIG_ARM? As the check for auto-xlat falls flat on X86 + PVH. Thought
> I have no idea what we want to do with PVH and X86 at this point.
> 
> > +	}
> > +
> >  	/*
> >  	 * Oh well, have to allocate and map a bounce buffer.
> >  	 * Pass the dma_addr of the first slab in the iotlb buffer as
> > @@ -618,10 +692,37 @@ EXPORT_SYMBOL_GPL(xen_swiotlb_map_page);
> >  static void xen_unmap_single(struct device *hwdev, dma_addr_t dev_addr,
> >  			     size_t size, enum dma_data_direction dir)
> >  {
> > -	phys_addr_t paddr = xen_bus_to_phys(dev_addr);
> > +	struct xen_dma_info *dma_info;
> > +	phys_addr_t paddr = DMA_ERROR_CODE;
> > +	char *vaddr = NULL;
> > +	unsigned long flags;
> >  
> >  	BUG_ON(dir == DMA_NONE);
> >  
> > +	spin_lock_irqsave(&swiotlb_lock, flags);
> > +	dma_info = xen_get_dma_info_from_dma(dev_addr);
> > +	if (dma_info != NULL) {
> > +		paddr = dma_info->phys_addr + (dev_addr - dma_info->dma_addr);
> > +		vaddr = phys_to_virt(paddr);
> > +	}
> > +
> > +	if (xen_feature(XENFEAT_auto_translated_physmap) &&
> > +		paddr != DMA_ERROR_CODE &&
> > +		!(vaddr >= xen_io_tlb_start && vaddr < xen_io_tlb_end) &&
> > +		!swiotlb_force) {
> > +		if (atomic_dec_and_test(&dma_info->refs)) {
> > +			xen_destroy_contiguous_region(paddr & PAGE_MASK, 0);
> > +			rb_erase(&dma_info->rbnode_dma, &bus_to_phys);
> > +			rb_erase(&dma_info->rbnode_phys, &phys_to_bus);
> > +			kfree(dma_info);
> > +		}
> > +		spin_unlock_irqrestore(&swiotlb_lock, flags);
> > +		if ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))
> > +			dma_mark_clean(vaddr, size);
> > +		return;
> > +	}
> > +	spin_unlock_irqrestore(&swiotlb_lock, flags);
> > +
> >  	/* NOTE: We use dev_addr here, not paddr! */
> >  	if (is_xen_swiotlb_buffer(dev_addr)) {
> >  		swiotlb_tbl_unmap_single(hwdev, paddr, size, dir);
> > @@ -664,9 +765,19 @@ xen_swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
> >  			enum dma_sync_target target)
> >  {
> >  	phys_addr_t paddr = xen_bus_to_phys(dev_addr);
> > +	char *vaddr = phys_to_virt(paddr);
> >  
> >  	BUG_ON(dir == DMA_NONE);
> >  
> > +	if (xen_feature(XENFEAT_auto_translated_physmap) &&
> > +		paddr != DMA_ERROR_CODE &&
> > +		size <= PAGE_SIZE &&
> > +		!(vaddr >= xen_io_tlb_start && vaddr < xen_io_tlb_end) &&
> > +		!range_straddles_page_boundary(paddr, size) && !swiotlb_force) {
> > +		dma_mark_clean(vaddr, size);		
> > +		return;
> > +	}
> > +
> >  	/* NOTE: We use dev_addr here, not paddr! */
> >  	if (is_xen_swiotlb_buffer(dev_addr)) {
> >  		swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target);
> > @@ -717,13 +828,36 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
> >  			 struct dma_attrs *attrs)
> >  {
> >  	struct scatterlist *sg;
> > -	int i;
> > +	int i, rc;
> > +	u64 dma_mask = DMA_BIT_MASK(32);
> > +	unsigned long flags;
> >  
> >  	BUG_ON(dir == DMA_NONE);
> >  
> > +	if (hwdev && hwdev->coherent_dma_mask)
> > +		dma_mask = dma_alloc_coherent_mask(hwdev, GFP_KERNEL);
> > +
> >  	for_each_sg(sgl, sg, nelems, i) {
> >  		phys_addr_t paddr = sg_phys(sg);
> > -		dma_addr_t dev_addr = xen_phys_to_bus_quick(paddr);
> > +		dma_addr_t dev_addr;
> > +
> > +		if (xen_feature(XENFEAT_auto_translated_physmap) &&
> > +			!range_straddles_page_boundary(paddr, sg->length) &&
> > +			sg->length <= PAGE_SIZE &&
> > +			!swiotlb_force) {
> > +
> > +			spin_lock_irqsave(&swiotlb_lock, flags);
> > +			rc = xen_pin_dev_page(hwdev, paddr, &dev_addr);
> > +			spin_unlock_irqrestore(&swiotlb_lock, flags);
> > +
> > +			if (!rc) {
> > +				dma_mark_clean(phys_to_virt(paddr), sg->length);
> > +				sg_dma_len(sg) = sg->length;
> > +				sg->dma_address = dev_addr;
> > +				continue;
> > +			}
> > +		}
> > +		dev_addr = xen_phys_to_bus_quick(paddr);
> >  
> >  		if (swiotlb_force ||
> >  		    xen_feature(XENFEAT_auto_translated_physmap) ||
> > -- 
> > 1.7.2.5
> > 
>
diff mbox

Patch

diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 022bcaf..6f94285 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -57,6 +57,8 @@ 
 #define NR_DMA_SEGS  ((xen_io_tlb_nslabs + IO_TLB_SEGSIZE - 1) / IO_TLB_SEGSIZE)
 static char *xen_io_tlb_start, *xen_io_tlb_end;
 static unsigned long xen_io_tlb_nslabs;
+spinlock_t swiotlb_lock;
+
 /*
  * Quick lookup value of the bus address of the IOTLB.
  */
@@ -79,6 +81,7 @@  struct xen_dma_info {
 	dma_addr_t dma_addr;
 	phys_addr_t phys_addr;
 	size_t size;
+	atomic_t refs;
 	struct rb_node rbnode_dma;
 	struct rb_node rbnode_phys;
 };
@@ -254,6 +257,48 @@  static dma_addr_t xen_virt_to_bus(void *address)
 	return xen_phys_to_bus_quick(virt_to_phys(address));
 }
 
+static int xen_pin_dev_page(struct device *dev,
+							phys_addr_t phys,
+							dma_addr_t *dev_addr)
+{
+	u64 dma_mask = DMA_BIT_MASK(32);
+	xen_pfn_t in;
+	struct xen_dma_info *dma_info = xen_get_dma_info_from_phys(phys);
+
+	if (dma_info != NULL) {
+		atomic_inc(&dma_info->refs);
+		*dev_addr = dma_info->dma_addr + (phys - dma_info->phys_addr);
+		return 0;
+	}
+
+	if (dev && dev->coherent_dma_mask)
+		dma_mask = dma_alloc_coherent_mask(dev, GFP_KERNEL);
+
+	in = phys >> PAGE_SHIFT;
+	if (!xen_pin_page(&in, fls64(dma_mask))) {
+		*dev_addr = in << PAGE_SHIFT;
+		dma_info = kzalloc(sizeof(struct xen_dma_info), GFP_NOWAIT);
+		if (!dma_info) {
+			pr_warn("cannot allocate xen_dma_info\n");
+			xen_destroy_contiguous_region(phys & PAGE_MASK, 0);
+			return -ENOMEM;
+		}
+		dma_info->phys_addr = phys & PAGE_MASK;
+		dma_info->size = PAGE_SIZE;
+		dma_info->dma_addr = *dev_addr;
+		if (xen_dma_add_entry(dma_info)) {
+			pr_warn("cannot add new entry to bus_to_phys\n");
+			xen_destroy_contiguous_region(phys & PAGE_MASK, 0);
+			kfree(dma_info);
+			return -EFAULT;
+		}
+		atomic_set(&dma_info->refs, 1);
+		*dev_addr += (phys & ~PAGE_MASK);
+		return 0;
+	}
+	return -EFAULT;
+}
+
 static int check_pages_physically_contiguous(unsigned long pfn,
 					     unsigned int offset,
 					     size_t length)
@@ -434,6 +479,7 @@  retry:
 		rc = 0;
 	} else
 		rc = swiotlb_late_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs);
+	spin_lock_init(&swiotlb_lock);
 	return rc;
 error:
 	if (repeat--) {
@@ -461,6 +507,7 @@  xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 	phys_addr_t phys;
 	dma_addr_t dev_addr;
 	struct xen_dma_info *dma_info = NULL;
+	unsigned long irqflags;
 
 	/*
 	* Ignore region specifiers - the kernel's ideas of
@@ -497,7 +544,7 @@  xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 	    !range_straddles_page_boundary(phys, size))
 		*dma_handle = dev_addr;
 	else {
-		if (xen_create_contiguous_region(phys, order,
+		if (xen_create_contiguous_region(phys & PAGE_MASK, order,
 						 fls64(dma_mask), dma_handle) != 0) {
 			xen_free_coherent_pages(hwdev, size, ret, (dma_addr_t)phys, attrs);
 			return NULL;
@@ -509,15 +556,19 @@  xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 			xen_destroy_contiguous_region(phys, order);
 			return NULL;
 		}
-		dma_info->phys_addr = phys;
-		dma_info->size = size;
+		dma_info->phys_addr = phys & PAGE_MASK;
+		dma_info->size = (1U << order) << PAGE_SHIFT;
 		dma_info->dma_addr = *dma_handle;
+		atomic_set(&dma_info->refs, 1);
+		spin_lock_irqsave(&swiotlb_lock, irqflags);
 		if (xen_dma_add_entry(dma_info)) {
+			spin_unlock_irqrestore(&swiotlb_lock, irqflags);
 			pr_warn("cannot add new entry to bus_to_phys\n");
 			xen_destroy_contiguous_region(phys, order);
 			kfree(dma_info);
 			return NULL;
 		}
+		spin_unlock_irqrestore(&swiotlb_lock, irqflags);
 	}
 	memset(ret, 0, size);
 	return ret;
@@ -532,6 +583,7 @@  xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
 	phys_addr_t phys;
 	u64 dma_mask = DMA_BIT_MASK(32);
 	struct xen_dma_info *dma_info = NULL;
+	unsigned long flags;
 
 	if (dma_release_from_coherent(hwdev, order, vaddr))
 		return;
@@ -539,6 +591,7 @@  xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
 	if (hwdev && hwdev->coherent_dma_mask)
 		dma_mask = hwdev->coherent_dma_mask;
 
+	spin_lock_irqsave(&swiotlb_lock, flags);
 	/* do not use virt_to_phys because on ARM it doesn't return you the
 	 * physical address */
 	phys = xen_bus_to_phys(dev_addr);
@@ -546,12 +599,16 @@  xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
 	if (xen_feature(XENFEAT_auto_translated_physmap) ||
 		(((dev_addr + size - 1 > dma_mask)) ||
 		 range_straddles_page_boundary(phys, size))) {
-		xen_destroy_contiguous_region(phys, order);
 		dma_info = xen_get_dma_info_from_dma(dev_addr);
-		rb_erase(&dma_info->rbnode, &bus_to_phys);
-		kfree(dma_info);
+		if (atomic_dec_and_test(&dma_info->refs)) {
+			xen_destroy_contiguous_region(phys & PAGE_MASK, order);
+			rb_erase(&dma_info->rbnode_dma, &bus_to_phys);
+			rb_erase(&dma_info->rbnode_phys, &phys_to_bus);
+			kfree(dma_info);
+		}
 	}
 
+	spin_unlock_irqrestore(&swiotlb_lock, flags);
 	xen_free_coherent_pages(hwdev, size, vaddr, (dma_addr_t)phys, attrs);
 }
 EXPORT_SYMBOL_GPL(xen_swiotlb_free_coherent);
@@ -583,6 +640,23 @@  dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
 	    !range_straddles_page_boundary(phys, size) && !swiotlb_force)
 		return dev_addr;
 
+	if (xen_feature(XENFEAT_auto_translated_physmap) &&
+		size <= PAGE_SIZE &&
+		!range_straddles_page_boundary(phys, size) &&
+		!swiotlb_force) {
+		unsigned long flags;
+		int rc;
+
+		spin_lock_irqsave(&swiotlb_lock, flags);
+		rc = xen_pin_dev_page(dev, phys, &dev_addr);
+		spin_unlock_irqrestore(&swiotlb_lock, flags);
+
+		if (!rc) {
+			dma_mark_clean(phys_to_virt(phys), size);
+			return dev_addr;
+		}
+	}
+
 	/*
 	 * Oh well, have to allocate and map a bounce buffer.
 	 * Pass the dma_addr of the first slab in the iotlb buffer as
@@ -618,10 +692,37 @@  EXPORT_SYMBOL_GPL(xen_swiotlb_map_page);
 static void xen_unmap_single(struct device *hwdev, dma_addr_t dev_addr,
 			     size_t size, enum dma_data_direction dir)
 {
-	phys_addr_t paddr = xen_bus_to_phys(dev_addr);
+	struct xen_dma_info *dma_info;
+	phys_addr_t paddr = DMA_ERROR_CODE;
+	char *vaddr = NULL;
+	unsigned long flags;
 
 	BUG_ON(dir == DMA_NONE);
 
+	spin_lock_irqsave(&swiotlb_lock, flags);
+	dma_info = xen_get_dma_info_from_dma(dev_addr);
+	if (dma_info != NULL) {
+		paddr = dma_info->phys_addr + (dev_addr - dma_info->dma_addr);
+		vaddr = phys_to_virt(paddr);
+	}
+
+	if (xen_feature(XENFEAT_auto_translated_physmap) &&
+		paddr != DMA_ERROR_CODE &&
+		!(vaddr >= xen_io_tlb_start && vaddr < xen_io_tlb_end) &&
+		!swiotlb_force) {
+		if (atomic_dec_and_test(&dma_info->refs)) {
+			xen_destroy_contiguous_region(paddr & PAGE_MASK, 0);
+			rb_erase(&dma_info->rbnode_dma, &bus_to_phys);
+			rb_erase(&dma_info->rbnode_phys, &phys_to_bus);
+			kfree(dma_info);
+		}
+		spin_unlock_irqrestore(&swiotlb_lock, flags);
+		if ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))
+			dma_mark_clean(vaddr, size);
+		return;
+	}
+	spin_unlock_irqrestore(&swiotlb_lock, flags);
+
 	/* NOTE: We use dev_addr here, not paddr! */
 	if (is_xen_swiotlb_buffer(dev_addr)) {
 		swiotlb_tbl_unmap_single(hwdev, paddr, size, dir);
@@ -664,9 +765,19 @@  xen_swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
 			enum dma_sync_target target)
 {
 	phys_addr_t paddr = xen_bus_to_phys(dev_addr);
+	char *vaddr = phys_to_virt(paddr);
 
 	BUG_ON(dir == DMA_NONE);
 
+	if (xen_feature(XENFEAT_auto_translated_physmap) &&
+		paddr != DMA_ERROR_CODE &&
+		size <= PAGE_SIZE &&
+		!(vaddr >= xen_io_tlb_start && vaddr < xen_io_tlb_end) &&
+		!range_straddles_page_boundary(paddr, size) && !swiotlb_force) {
+		dma_mark_clean(vaddr, size);		
+		return;
+	}
+
 	/* NOTE: We use dev_addr here, not paddr! */
 	if (is_xen_swiotlb_buffer(dev_addr)) {
 		swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target);
@@ -717,13 +828,36 @@  xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
 			 struct dma_attrs *attrs)
 {
 	struct scatterlist *sg;
-	int i;
+	int i, rc;
+	u64 dma_mask = DMA_BIT_MASK(32);
+	unsigned long flags;
 
 	BUG_ON(dir == DMA_NONE);
 
+	if (hwdev && hwdev->coherent_dma_mask)
+		dma_mask = dma_alloc_coherent_mask(hwdev, GFP_KERNEL);
+
 	for_each_sg(sgl, sg, nelems, i) {
 		phys_addr_t paddr = sg_phys(sg);
-		dma_addr_t dev_addr = xen_phys_to_bus_quick(paddr);
+		dma_addr_t dev_addr;
+
+		if (xen_feature(XENFEAT_auto_translated_physmap) &&
+			!range_straddles_page_boundary(paddr, sg->length) &&
+			sg->length <= PAGE_SIZE &&
+			!swiotlb_force) {
+
+			spin_lock_irqsave(&swiotlb_lock, flags);
+			rc = xen_pin_dev_page(hwdev, paddr, &dev_addr);
+			spin_unlock_irqrestore(&swiotlb_lock, flags);
+
+			if (!rc) {
+				dma_mark_clean(phys_to_virt(paddr), sg->length);
+				sg_dma_len(sg) = sg->length;
+				sg->dma_address = dev_addr;
+				continue;
+			}
+		}
+		dev_addr = xen_phys_to_bus_quick(paddr);
 
 		if (swiotlb_force ||
 		    xen_feature(XENFEAT_auto_translated_physmap) ||