diff mbox

[v6,6/7] dma-reserved-iommu: iommu_get/put_single_reserved

Message ID 1459757222-2668-7-git-send-email-eric.auger@linaro.org (mailing list archive)
State New, archived
Headers show

Commit Message

Eric Auger April 4, 2016, 8:07 a.m. UTC
This patch introduces iommu_get/put_single_reserved.

iommu_get_single_reserved allows to allocate a new reserved iova page
and map it onto the physical page that contains a given physical address.
Page size is the IOMMU page one. It is the responsability of the
system integrator to make sure the in use IOMMU page size corresponds
to the granularity of the MSI frame.

It returns the iova that is mapped onto the provided physical address.
Hence the physical address passed in argument does not need to be aligned.

In case a mapping already exists between both pages, the IOVA mapped
to the PA is directly returned.

Each time an iova is successfully returned a binding ref count is
incremented.

iommu_put_single_reserved decrements the ref count and when this latter
is null, the mapping is destroyed and the iova is released.

Signed-off-by: Eric Auger <eric.auger@linaro.org>
Signed-off-by: Ankit Jindal <ajindal@apm.com>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Signed-off-by: Bharat Bhushan <Bharat.Bhushan@freescale.com>

---

v5 -> v6:
- revisit locking with spin_lock instead of mutex
- do not kref_get on 1st get
- add size parameter to the get function following Marc's request
- use the iova domain shift instead of using the smallest supported page size

v3 -> v4:
- formerly in iommu: iommu_get/put_single_reserved &
  iommu/arm-smmu: implement iommu_get/put_single_reserved
- Attempted to address Marc's doubts about missing size/alignment
  at VFIO level (user-space knows the IOMMU page size and the number
  of IOVA pages to provision)

v2 -> v3:
- remove static implementation of iommu_get_single_reserved &
  iommu_put_single_reserved when CONFIG_IOMMU_API is not set

v1 -> v2:
- previously a VFIO API, named vfio_alloc_map/unmap_free_reserved_iova
---
 drivers/iommu/dma-reserved-iommu.c | 146 +++++++++++++++++++++++++++++++++++++
 include/linux/dma-reserved-iommu.h |  28 +++++++
 2 files changed, 174 insertions(+)

Comments

Alex Williamson April 6, 2016, 11:12 p.m. UTC | #1
On Mon,  4 Apr 2016 08:07:01 +0000
Eric Auger <eric.auger@linaro.org> wrote:

> This patch introduces iommu_get/put_single_reserved.
> 
> iommu_get_single_reserved allows to allocate a new reserved iova page
> and map it onto the physical page that contains a given physical address.
> Page size is the IOMMU page one. It is the responsability of the
> system integrator to make sure the in use IOMMU page size corresponds
> to the granularity of the MSI frame.
> 
> It returns the iova that is mapped onto the provided physical address.
> Hence the physical address passed in argument does not need to be aligned.
> 
> In case a mapping already exists between both pages, the IOVA mapped
> to the PA is directly returned.
> 
> Each time an iova is successfully returned a binding ref count is
> incremented.
> 
> iommu_put_single_reserved decrements the ref count and when this latter
> is null, the mapping is destroyed and the iova is released.
> 
> Signed-off-by: Eric Auger <eric.auger@linaro.org>
> Signed-off-by: Ankit Jindal <ajindal@apm.com>
> Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
> Signed-off-by: Bharat Bhushan <Bharat.Bhushan@freescale.com>
> 
> ---
> 
> v5 -> v6:
> - revisit locking with spin_lock instead of mutex
> - do not kref_get on 1st get
> - add size parameter to the get function following Marc's request
> - use the iova domain shift instead of using the smallest supported page size
> 
> v3 -> v4:
> - formerly in iommu: iommu_get/put_single_reserved &
>   iommu/arm-smmu: implement iommu_get/put_single_reserved
> - Attempted to address Marc's doubts about missing size/alignment
>   at VFIO level (user-space knows the IOMMU page size and the number
>   of IOVA pages to provision)
> 
> v2 -> v3:
> - remove static implementation of iommu_get_single_reserved &
>   iommu_put_single_reserved when CONFIG_IOMMU_API is not set
> 
> v1 -> v2:
> - previously a VFIO API, named vfio_alloc_map/unmap_free_reserved_iova
> ---
>  drivers/iommu/dma-reserved-iommu.c | 146 +++++++++++++++++++++++++++++++++++++
>  include/linux/dma-reserved-iommu.h |  28 +++++++
>  2 files changed, 174 insertions(+)
> 
> diff --git a/drivers/iommu/dma-reserved-iommu.c b/drivers/iommu/dma-reserved-iommu.c
> index f592118..3c759d9 100644
> --- a/drivers/iommu/dma-reserved-iommu.c
> +++ b/drivers/iommu/dma-reserved-iommu.c
> @@ -136,3 +136,149 @@ void iommu_free_reserved_iova_domain(struct iommu_domain *domain)
>  	spin_unlock_irqrestore(&domain->reserved_lock, flags);
>  }
>  EXPORT_SYMBOL_GPL(iommu_free_reserved_iova_domain);
> +
> +static void delete_reserved_binding(struct iommu_domain *domain,
> +				    struct iommu_reserved_binding *b)
> +{
> +	struct iova_domain *iovad =
> +		(struct iova_domain *)domain->reserved_iova_cookie;
> +	unsigned long order = iova_shift(iovad);
> +
> +	iommu_unmap(domain, b->iova, b->size);
> +	free_iova(iovad, b->iova >> order);
> +	kfree(b);
> +}
> +
> +int iommu_get_reserved_iova(struct iommu_domain *domain,
> +			      phys_addr_t addr, size_t size, int prot,
> +			      dma_addr_t *iova)
> +{
> +	struct iova_domain *iovad =
> +		(struct iova_domain *)domain->reserved_iova_cookie;
> +	unsigned long order = iova_shift(iovad);
> +	unsigned long  base_pfn, end_pfn, nb_iommu_pages;
> +	size_t iommu_page_size = 1 << order, binding_size;
> +	phys_addr_t aligned_base, offset;
> +	struct iommu_reserved_binding *b, *newb;
> +	unsigned long flags;
> +	struct iova *p_iova;
> +	bool unmap = false;
> +	int ret;
> +
> +	base_pfn = addr >> order;
> +	end_pfn = (addr + size - 1) >> order;
> +	nb_iommu_pages = end_pfn - base_pfn + 1;
> +	aligned_base = base_pfn << order;
> +	offset = addr - aligned_base;
> +	binding_size = nb_iommu_pages * iommu_page_size;
> +
> +	if (!iovad)
> +		return -EINVAL;
> +
> +	spin_lock_irqsave(&domain->reserved_lock, flags);
> +
> +	b = find_reserved_binding(domain, aligned_base, binding_size);
> +	if (b) {
> +		*iova = b->iova + offset;
> +		kref_get(&b->kref);
> +		ret = 0;
> +		goto unlock;
> +	}
> +
> +	spin_unlock_irqrestore(&domain->reserved_lock, flags);
> +
> +	/*
> +	 * no reserved IOVA was found for this PA, start allocating and
> +	 * registering one while the spin-lock is not held. iommu_map/unmap
> +	 * are not supposed to be atomic
> +	 */
> +
> +	p_iova = alloc_iova(iovad, nb_iommu_pages, iovad->dma_32bit_pfn, true);
> +	if (!p_iova)
> +		return -ENOMEM;

Here we're using iovad, which was the reserved_iova_cookie outside of
the locking, which makes the locking ineffective.  Isn't this lock also
protecting our iova domain, I'm confused.

> +
> +	*iova = iova_dma_addr(iovad, p_iova);
> +
> +	newb = kzalloc(sizeof(*b), GFP_KERNEL);
> +	if (!newb) {
> +		free_iova(iovad, p_iova->pfn_lo);
> +		return -ENOMEM;
> +	}
> +
> +	ret = iommu_map(domain, *iova, aligned_base, binding_size, prot);
> +	if (ret) {
> +		kfree(newb);
> +		free_iova(iovad, p_iova->pfn_lo);
> +		return ret;
> +	}
> +
> +	spin_lock_irqsave(&domain->reserved_lock, flags);
> +
> +	/* re-check the PA was not mapped in our back when lock was not held */
> +	b = find_reserved_binding(domain, aligned_base, binding_size);
> +	if (b) {
> +		*iova = b->iova + offset;
> +		kref_get(&b->kref);
> +		ret = 0;
> +		unmap = true;
> +		goto unlock;
> +	}
> +
> +	kref_init(&newb->kref);
> +	newb->domain = domain;
> +	newb->addr = aligned_base;
> +	newb->iova = *iova;
> +	newb->size = binding_size;
> +
> +	link_reserved_binding(domain, newb);
> +
> +	*iova += offset;
> +	goto unlock;

??

> +
> +unlock:
> +	spin_unlock_irqrestore(&domain->reserved_lock, flags);
> +	if (unmap)
> +		delete_reserved_binding(domain, newb);
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(iommu_get_reserved_iova);
> +
> +void iommu_put_reserved_iova(struct iommu_domain *domain, dma_addr_t iova)
> +{
> +	struct iova_domain *iovad =
> +		(struct iova_domain *)domain->reserved_iova_cookie;
> +	unsigned long order;
> +	phys_addr_t aligned_addr;
> +	dma_addr_t aligned_iova, page_size, mask, offset;
> +	struct iommu_reserved_binding *b;
> +	unsigned long flags;
> +	bool unmap = false;
> +
> +	order = iova_shift(iovad);
> +	page_size = (uint64_t)1 << order;
> +	mask = page_size - 1;
> +
> +	aligned_iova = iova & ~mask;
> +	offset = iova - aligned_iova;
> +
> +	aligned_addr = iommu_iova_to_phys(domain, aligned_iova);
> +
> +	spin_lock_irqsave(&domain->reserved_lock, flags);
> +	b = find_reserved_binding(domain, aligned_addr, page_size);
> +	if (!b)
> +		goto unlock;
> +
> +	if (atomic_sub_and_test(1, &b->kref.refcount)) {
> +		unlink_reserved_binding(domain, b);
> +		unmap = true;
> +	}
> +
> +unlock:
> +	spin_unlock_irqrestore(&domain->reserved_lock, flags);
> +	if (unmap)
> +		delete_reserved_binding(domain, b);
> +}
> +EXPORT_SYMBOL_GPL(iommu_put_reserved_iova);
> +
> +
> +
> diff --git a/include/linux/dma-reserved-iommu.h b/include/linux/dma-reserved-iommu.h
> index 5bf863b..dedea56 100644
> --- a/include/linux/dma-reserved-iommu.h
> +++ b/include/linux/dma-reserved-iommu.h
> @@ -40,6 +40,34 @@ int iommu_alloc_reserved_iova_domain(struct iommu_domain *domain,
>   */
>  void iommu_free_reserved_iova_domain(struct iommu_domain *domain);
>  
> +/**
> + * iommu_get_reserved_iova: allocate a contiguous set of iova pages and
> + * map them to the physical range defined by @addr and @size.
> + *
> + * @domain: iommu domain handle
> + * @addr: physical address to bind
> + * @size: size of the binding
> + * @prot: mapping protection attribute
> + * @iova: returned iova
> + *
> + * Mapped physical pfns are within [@addr >> order, (@addr + size -1) >> order]
> + * where order corresponds to the iova domain order.
> + * This mapping is reference counted as a whole and cannot by split.
> + */
> +int iommu_get_reserved_iova(struct iommu_domain *domain,
> +			      phys_addr_t addr, size_t size, int prot,
> +			      dma_addr_t *iova);
> +
> +/**
> + * iommu_put_reserved_iova: decrement a ref count of the reserved mapping
> + *
> + * @domain: iommu domain handle
> + * @iova: reserved iova whose binding ref count is decremented
> + *
> + * if the binding ref count is null, destroy the reserved mapping
> + */
> +void iommu_put_reserved_iova(struct iommu_domain *domain, dma_addr_t iova);
> +
>  #endif	/* CONFIG_IOMMU_DMA_RESERVED */
>  #endif	/* __KERNEL__ */
>  #endif	/* __DMA_RESERVED_IOMMU_H */
Eric Auger April 7, 2016, 9:33 a.m. UTC | #2
Alex,
On 04/07/2016 01:12 AM, Alex Williamson wrote:
> On Mon,  4 Apr 2016 08:07:01 +0000
> Eric Auger <eric.auger@linaro.org> wrote:
> 
>> This patch introduces iommu_get/put_single_reserved.
>>
>> iommu_get_single_reserved allows to allocate a new reserved iova page
>> and map it onto the physical page that contains a given physical address.
>> Page size is the IOMMU page one. It is the responsability of the
>> system integrator to make sure the in use IOMMU page size corresponds
>> to the granularity of the MSI frame.
>>
>> It returns the iova that is mapped onto the provided physical address.
>> Hence the physical address passed in argument does not need to be aligned.
>>
>> In case a mapping already exists between both pages, the IOVA mapped
>> to the PA is directly returned.
>>
>> Each time an iova is successfully returned a binding ref count is
>> incremented.
>>
>> iommu_put_single_reserved decrements the ref count and when this latter
>> is null, the mapping is destroyed and the iova is released.
>>
>> Signed-off-by: Eric Auger <eric.auger@linaro.org>
>> Signed-off-by: Ankit Jindal <ajindal@apm.com>
>> Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
>> Signed-off-by: Bharat Bhushan <Bharat.Bhushan@freescale.com>
>>
>> ---
>>
>> v5 -> v6:
>> - revisit locking with spin_lock instead of mutex
>> - do not kref_get on 1st get
>> - add size parameter to the get function following Marc's request
>> - use the iova domain shift instead of using the smallest supported page size
>>
>> v3 -> v4:
>> - formerly in iommu: iommu_get/put_single_reserved &
>>   iommu/arm-smmu: implement iommu_get/put_single_reserved
>> - Attempted to address Marc's doubts about missing size/alignment
>>   at VFIO level (user-space knows the IOMMU page size and the number
>>   of IOVA pages to provision)
>>
>> v2 -> v3:
>> - remove static implementation of iommu_get_single_reserved &
>>   iommu_put_single_reserved when CONFIG_IOMMU_API is not set
>>
>> v1 -> v2:
>> - previously a VFIO API, named vfio_alloc_map/unmap_free_reserved_iova
>> ---
>>  drivers/iommu/dma-reserved-iommu.c | 146 +++++++++++++++++++++++++++++++++++++
>>  include/linux/dma-reserved-iommu.h |  28 +++++++
>>  2 files changed, 174 insertions(+)
>>
>> diff --git a/drivers/iommu/dma-reserved-iommu.c b/drivers/iommu/dma-reserved-iommu.c
>> index f592118..3c759d9 100644
>> --- a/drivers/iommu/dma-reserved-iommu.c
>> +++ b/drivers/iommu/dma-reserved-iommu.c
>> @@ -136,3 +136,149 @@ void iommu_free_reserved_iova_domain(struct iommu_domain *domain)
>>  	spin_unlock_irqrestore(&domain->reserved_lock, flags);
>>  }
>>  EXPORT_SYMBOL_GPL(iommu_free_reserved_iova_domain);
>> +
>> +static void delete_reserved_binding(struct iommu_domain *domain,
>> +				    struct iommu_reserved_binding *b)
>> +{
>> +	struct iova_domain *iovad =
>> +		(struct iova_domain *)domain->reserved_iova_cookie;
>> +	unsigned long order = iova_shift(iovad);
>> +
>> +	iommu_unmap(domain, b->iova, b->size);
>> +	free_iova(iovad, b->iova >> order);
>> +	kfree(b);
>> +}
>> +
>> +int iommu_get_reserved_iova(struct iommu_domain *domain,
>> +			      phys_addr_t addr, size_t size, int prot,
>> +			      dma_addr_t *iova)
>> +{
>> +	struct iova_domain *iovad =
>> +		(struct iova_domain *)domain->reserved_iova_cookie;
>> +	unsigned long order = iova_shift(iovad);
>> +	unsigned long  base_pfn, end_pfn, nb_iommu_pages;
>> +	size_t iommu_page_size = 1 << order, binding_size;
>> +	phys_addr_t aligned_base, offset;
>> +	struct iommu_reserved_binding *b, *newb;
>> +	unsigned long flags;
>> +	struct iova *p_iova;
>> +	bool unmap = false;
>> +	int ret;
>> +
>> +	base_pfn = addr >> order;
>> +	end_pfn = (addr + size - 1) >> order;
>> +	nb_iommu_pages = end_pfn - base_pfn + 1;
>> +	aligned_base = base_pfn << order;
>> +	offset = addr - aligned_base;
>> +	binding_size = nb_iommu_pages * iommu_page_size;
>> +
>> +	if (!iovad)
>> +		return -EINVAL;
>> +
>> +	spin_lock_irqsave(&domain->reserved_lock, flags);
>> +
>> +	b = find_reserved_binding(domain, aligned_base, binding_size);
>> +	if (b) {
>> +		*iova = b->iova + offset;
>> +		kref_get(&b->kref);
>> +		ret = 0;
>> +		goto unlock;
>> +	}
>> +
>> +	spin_unlock_irqrestore(&domain->reserved_lock, flags);
>> +
>> +	/*
>> +	 * no reserved IOVA was found for this PA, start allocating and
>> +	 * registering one while the spin-lock is not held. iommu_map/unmap
>> +	 * are not supposed to be atomic
>> +	 */
>> +
>> +	p_iova = alloc_iova(iovad, nb_iommu_pages, iovad->dma_32bit_pfn, true);
>> +	if (!p_iova)
>> +		return -ENOMEM;
> 
> Here we're using iovad, which was the reserved_iova_cookie outside of
> the locking, which makes the locking ineffective.  Isn't this lock also
> protecting our iova domain, I'm confused.
I think I was too when I wrote that :-(
> 
>> +
>> +	*iova = iova_dma_addr(iovad, p_iova);
>> +
>> +	newb = kzalloc(sizeof(*b), GFP_KERNEL);
needs to to be GPF_ATOMIC as Jean-Philippe stated before.
>> +	if (!newb) {
>> +		free_iova(iovad, p_iova->pfn_lo);
>> +		return -ENOMEM;
>> +	}
>> +
>> +	ret = iommu_map(domain, *iova, aligned_base, binding_size, prot);
one problem I have is I would need iommu_map to be atomic (because of
the call sequence reported by Jean-Philippe) and I have no guarantee it
is in general I think . It is for arm-smmu(-v3).c which covers the ARM
use case. But what about other smmus potentially used in that process?

Thanks
Eric
>> +	if (ret) {
>> +		kfree(newb);
>> +		free_iova(iovad, p_iova->pfn_lo);
>> +		return ret;
>> +	}
>> +
>> +	spin_lock_irqsave(&domain->reserved_lock, flags);
>> +
>> +	/* re-check the PA was not mapped in our back when lock was not held */
>> +	b = find_reserved_binding(domain, aligned_base, binding_size);
>> +	if (b) {
>> +		*iova = b->iova + offset;
>> +		kref_get(&b->kref);
>> +		ret = 0;
>> +		unmap = true;
>> +		goto unlock;
>> +	}
>> +
>> +	kref_init(&newb->kref);
>> +	newb->domain = domain;
>> +	newb->addr = aligned_base;
>> +	newb->iova = *iova;
>> +	newb->size = binding_size;
>> +
>> +	link_reserved_binding(domain, newb);
>> +
>> +	*iova += offset;
>> +	goto unlock;
> 
> ??
> 
>> +
>> +unlock:
>> +	spin_unlock_irqrestore(&domain->reserved_lock, flags);
>> +	if (unmap)
>> +		delete_reserved_binding(domain, newb);
>> +	return ret;
>> +}
>> +EXPORT_SYMBOL_GPL(iommu_get_reserved_iova);
>> +
>> +void iommu_put_reserved_iova(struct iommu_domain *domain, dma_addr_t iova)
>> +{
>> +	struct iova_domain *iovad =
>> +		(struct iova_domain *)domain->reserved_iova_cookie;
>> +	unsigned long order;
>> +	phys_addr_t aligned_addr;
>> +	dma_addr_t aligned_iova, page_size, mask, offset;
>> +	struct iommu_reserved_binding *b;
>> +	unsigned long flags;
>> +	bool unmap = false;
>> +
>> +	order = iova_shift(iovad);
>> +	page_size = (uint64_t)1 << order;
>> +	mask = page_size - 1;
>> +
>> +	aligned_iova = iova & ~mask;
>> +	offset = iova - aligned_iova;
>> +
>> +	aligned_addr = iommu_iova_to_phys(domain, aligned_iova);
>> +
>> +	spin_lock_irqsave(&domain->reserved_lock, flags);
>> +	b = find_reserved_binding(domain, aligned_addr, page_size);
>> +	if (!b)
>> +		goto unlock;
>> +
>> +	if (atomic_sub_and_test(1, &b->kref.refcount)) {
>> +		unlink_reserved_binding(domain, b);
>> +		unmap = true;
>> +	}
>> +
>> +unlock:
>> +	spin_unlock_irqrestore(&domain->reserved_lock, flags);
>> +	if (unmap)
>> +		delete_reserved_binding(domain, b);
>> +}
>> +EXPORT_SYMBOL_GPL(iommu_put_reserved_iova);
>> +
>> +
>> +
>> diff --git a/include/linux/dma-reserved-iommu.h b/include/linux/dma-reserved-iommu.h
>> index 5bf863b..dedea56 100644
>> --- a/include/linux/dma-reserved-iommu.h
>> +++ b/include/linux/dma-reserved-iommu.h
>> @@ -40,6 +40,34 @@ int iommu_alloc_reserved_iova_domain(struct iommu_domain *domain,
>>   */
>>  void iommu_free_reserved_iova_domain(struct iommu_domain *domain);
>>  
>> +/**
>> + * iommu_get_reserved_iova: allocate a contiguous set of iova pages and
>> + * map them to the physical range defined by @addr and @size.
>> + *
>> + * @domain: iommu domain handle
>> + * @addr: physical address to bind
>> + * @size: size of the binding
>> + * @prot: mapping protection attribute
>> + * @iova: returned iova
>> + *
>> + * Mapped physical pfns are within [@addr >> order, (@addr + size -1) >> order]
>> + * where order corresponds to the iova domain order.
>> + * This mapping is reference counted as a whole and cannot by split.
>> + */
>> +int iommu_get_reserved_iova(struct iommu_domain *domain,
>> +			      phys_addr_t addr, size_t size, int prot,
>> +			      dma_addr_t *iova);
>> +
>> +/**
>> + * iommu_put_reserved_iova: decrement a ref count of the reserved mapping
>> + *
>> + * @domain: iommu domain handle
>> + * @iova: reserved iova whose binding ref count is decremented
>> + *
>> + * if the binding ref count is null, destroy the reserved mapping
>> + */
>> +void iommu_put_reserved_iova(struct iommu_domain *domain, dma_addr_t iova);
>> +
>>  #endif	/* CONFIG_IOMMU_DMA_RESERVED */
>>  #endif	/* __KERNEL__ */
>>  #endif	/* __DMA_RESERVED_IOMMU_H */
>
Jean-Philippe Brucker April 7, 2016, 2:38 p.m. UTC | #3
Hi Eric,

On Thu, Apr 07, 2016 at 11:33:42AM +0200, Eric Auger wrote:
> Alex,
> On 04/07/2016 01:12 AM, Alex Williamson wrote:
> > On Mon,  4 Apr 2016 08:07:01 +0000
> > Eric Auger <eric.auger@linaro.org> wrote:
> > 
> >> This patch introduces iommu_get/put_single_reserved.
> >>
> >> iommu_get_single_reserved allows to allocate a new reserved iova page
> >> and map it onto the physical page that contains a given physical address.
> >> Page size is the IOMMU page one. It is the responsability of the
> >> system integrator to make sure the in use IOMMU page size corresponds
> >> to the granularity of the MSI frame.
> >>
> >> It returns the iova that is mapped onto the provided physical address.
> >> Hence the physical address passed in argument does not need to be aligned.
> >>
> >> In case a mapping already exists between both pages, the IOVA mapped
> >> to the PA is directly returned.
> >>
> >> Each time an iova is successfully returned a binding ref count is
> >> incremented.
> >>
> >> iommu_put_single_reserved decrements the ref count and when this latter
> >> is null, the mapping is destroyed and the iova is released.
> >>
> >> Signed-off-by: Eric Auger <eric.auger@linaro.org>
> >> Signed-off-by: Ankit Jindal <ajindal@apm.com>
> >> Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
> >> Signed-off-by: Bharat Bhushan <Bharat.Bhushan@freescale.com>
> >>
> >> ---
> >>
> >> v5 -> v6:
> >> - revisit locking with spin_lock instead of mutex
> >> - do not kref_get on 1st get
> >> - add size parameter to the get function following Marc's request
> >> - use the iova domain shift instead of using the smallest supported page size
> >>
> >> v3 -> v4:
> >> - formerly in iommu: iommu_get/put_single_reserved &
> >>   iommu/arm-smmu: implement iommu_get/put_single_reserved
> >> - Attempted to address Marc's doubts about missing size/alignment
> >>   at VFIO level (user-space knows the IOMMU page size and the number
> >>   of IOVA pages to provision)
> >>
> >> v2 -> v3:
> >> - remove static implementation of iommu_get_single_reserved &
> >>   iommu_put_single_reserved when CONFIG_IOMMU_API is not set
> >>
> >> v1 -> v2:
> >> - previously a VFIO API, named vfio_alloc_map/unmap_free_reserved_iova
> >> ---
> >>  drivers/iommu/dma-reserved-iommu.c | 146 +++++++++++++++++++++++++++++++++++++
> >>  include/linux/dma-reserved-iommu.h |  28 +++++++
> >>  2 files changed, 174 insertions(+)
> >>
> >> diff --git a/drivers/iommu/dma-reserved-iommu.c b/drivers/iommu/dma-reserved-iommu.c
> >> index f592118..3c759d9 100644
> >> --- a/drivers/iommu/dma-reserved-iommu.c
> >> +++ b/drivers/iommu/dma-reserved-iommu.c
> >> @@ -136,3 +136,149 @@ void iommu_free_reserved_iova_domain(struct iommu_domain *domain)
> >>  	spin_unlock_irqrestore(&domain->reserved_lock, flags);
> >>  }
> >>  EXPORT_SYMBOL_GPL(iommu_free_reserved_iova_domain);
> >> +
> >> +static void delete_reserved_binding(struct iommu_domain *domain,
> >> +				    struct iommu_reserved_binding *b)
> >> +{
> >> +	struct iova_domain *iovad =
> >> +		(struct iova_domain *)domain->reserved_iova_cookie;
> >> +	unsigned long order = iova_shift(iovad);
> >> +
> >> +	iommu_unmap(domain, b->iova, b->size);
> >> +	free_iova(iovad, b->iova >> order);
> >> +	kfree(b);
> >> +}
> >> +
> >> +int iommu_get_reserved_iova(struct iommu_domain *domain,
> >> +			      phys_addr_t addr, size_t size, int prot,
> >> +			      dma_addr_t *iova)
> >> +{
> >> +	struct iova_domain *iovad =
> >> +		(struct iova_domain *)domain->reserved_iova_cookie;
> >> +	unsigned long order = iova_shift(iovad);

Another nit: this call should be after the !iovad check below

> >> +	unsigned long  base_pfn, end_pfn, nb_iommu_pages;
> >> +	size_t iommu_page_size = 1 << order, binding_size;
> >> +	phys_addr_t aligned_base, offset;
> >> +	struct iommu_reserved_binding *b, *newb;
> >> +	unsigned long flags;
> >> +	struct iova *p_iova;
> >> +	bool unmap = false;
> >> +	int ret;
> >> +
> >> +	base_pfn = addr >> order;
> >> +	end_pfn = (addr + size - 1) >> order;
> >> +	nb_iommu_pages = end_pfn - base_pfn + 1;
> >> +	aligned_base = base_pfn << order;
> >> +	offset = addr - aligned_base;
> >> +	binding_size = nb_iommu_pages * iommu_page_size;
> >> +
> >> +	if (!iovad)
> >> +		return -EINVAL;
> >> +
> >> +	spin_lock_irqsave(&domain->reserved_lock, flags);
> >> +
> >> +	b = find_reserved_binding(domain, aligned_base, binding_size);
> >> +	if (b) {
> >> +		*iova = b->iova + offset;
> >> +		kref_get(&b->kref);
> >> +		ret = 0;
> >> +		goto unlock;
> >> +	}
> >> +
> >> +	spin_unlock_irqrestore(&domain->reserved_lock, flags);
> >> +
> >> +	/*
> >> +	 * no reserved IOVA was found for this PA, start allocating and
> >> +	 * registering one while the spin-lock is not held. iommu_map/unmap
> >> +	 * are not supposed to be atomic
> >> +	 */
> >> +
> >> +	p_iova = alloc_iova(iovad, nb_iommu_pages, iovad->dma_32bit_pfn, true);
> >> +	if (!p_iova)
> >> +		return -ENOMEM;
> > 
> > Here we're using iovad, which was the reserved_iova_cookie outside of
> > the locking, which makes the locking ineffective.  Isn't this lock also
> > protecting our iova domain, I'm confused.
> I think I was too when I wrote that :-(
> > 
> >> +
> >> +	*iova = iova_dma_addr(iovad, p_iova);
> >> +
> >> +	newb = kzalloc(sizeof(*b), GFP_KERNEL);
> needs to to be GPF_ATOMIC as Jean-Philippe stated before.
> >> +	if (!newb) {
> >> +		free_iova(iovad, p_iova->pfn_lo);
> >> +		return -ENOMEM;
> >> +	}
> >> +
> >> +	ret = iommu_map(domain, *iova, aligned_base, binding_size, prot);
> one problem I have is I would need iommu_map to be atomic (because of
> the call sequence reported by Jean-Philippe) and I have no guarantee it
> is in general I think . It is for arm-smmu(-v3).c which covers the ARM
> use case. But what about other smmus potentially used in that process?

Hmm, indeed. How about we move all the heavy mapping work to
msi_domain_prepare_irqs instead? It is allowed to sleep and, more
importantly, fail. It is called much earlier, by pci_enable_msi_range.

All we are missing is details about doorbells, which we could retrieve
from the MSI controller's driver, using one or more additional callbacks
in msi_domain_ops. Currently, we already need one such callbacks for
querying the number of doorbell pages, maybe we could also ask the
driver to provide their addresses? And in msi_domain_activate, simply
search for the IOVA already associated with the MSI message?

I only briefly though about it from the host point of view, not sure how
VFIO would cope with this method.

Thanks,
Jean-Philippe
Eric Auger April 7, 2016, 4:44 p.m. UTC | #4
On 04/07/2016 04:38 PM, Jean-Philippe Brucker wrote:
> Hi Eric,
> 
> On Thu, Apr 07, 2016 at 11:33:42AM +0200, Eric Auger wrote:
>> Alex,
>> On 04/07/2016 01:12 AM, Alex Williamson wrote:
>>> On Mon,  4 Apr 2016 08:07:01 +0000
>>> Eric Auger <eric.auger@linaro.org> wrote:
>>>
>>>> This patch introduces iommu_get/put_single_reserved.
>>>>
>>>> iommu_get_single_reserved allows to allocate a new reserved iova page
>>>> and map it onto the physical page that contains a given physical address.
>>>> Page size is the IOMMU page one. It is the responsability of the
>>>> system integrator to make sure the in use IOMMU page size corresponds
>>>> to the granularity of the MSI frame.
>>>>
>>>> It returns the iova that is mapped onto the provided physical address.
>>>> Hence the physical address passed in argument does not need to be aligned.
>>>>
>>>> In case a mapping already exists between both pages, the IOVA mapped
>>>> to the PA is directly returned.
>>>>
>>>> Each time an iova is successfully returned a binding ref count is
>>>> incremented.
>>>>
>>>> iommu_put_single_reserved decrements the ref count and when this latter
>>>> is null, the mapping is destroyed and the iova is released.
>>>>
>>>> Signed-off-by: Eric Auger <eric.auger@linaro.org>
>>>> Signed-off-by: Ankit Jindal <ajindal@apm.com>
>>>> Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
>>>> Signed-off-by: Bharat Bhushan <Bharat.Bhushan@freescale.com>
>>>>
>>>> ---
>>>>
>>>> v5 -> v6:
>>>> - revisit locking with spin_lock instead of mutex
>>>> - do not kref_get on 1st get
>>>> - add size parameter to the get function following Marc's request
>>>> - use the iova domain shift instead of using the smallest supported page size
>>>>
>>>> v3 -> v4:
>>>> - formerly in iommu: iommu_get/put_single_reserved &
>>>>   iommu/arm-smmu: implement iommu_get/put_single_reserved
>>>> - Attempted to address Marc's doubts about missing size/alignment
>>>>   at VFIO level (user-space knows the IOMMU page size and the number
>>>>   of IOVA pages to provision)
>>>>
>>>> v2 -> v3:
>>>> - remove static implementation of iommu_get_single_reserved &
>>>>   iommu_put_single_reserved when CONFIG_IOMMU_API is not set
>>>>
>>>> v1 -> v2:
>>>> - previously a VFIO API, named vfio_alloc_map/unmap_free_reserved_iova
>>>> ---
>>>>  drivers/iommu/dma-reserved-iommu.c | 146 +++++++++++++++++++++++++++++++++++++
>>>>  include/linux/dma-reserved-iommu.h |  28 +++++++
>>>>  2 files changed, 174 insertions(+)
>>>>
>>>> diff --git a/drivers/iommu/dma-reserved-iommu.c b/drivers/iommu/dma-reserved-iommu.c
>>>> index f592118..3c759d9 100644
>>>> --- a/drivers/iommu/dma-reserved-iommu.c
>>>> +++ b/drivers/iommu/dma-reserved-iommu.c
>>>> @@ -136,3 +136,149 @@ void iommu_free_reserved_iova_domain(struct iommu_domain *domain)
>>>>  	spin_unlock_irqrestore(&domain->reserved_lock, flags);
>>>>  }
>>>>  EXPORT_SYMBOL_GPL(iommu_free_reserved_iova_domain);
>>>> +
>>>> +static void delete_reserved_binding(struct iommu_domain *domain,
>>>> +				    struct iommu_reserved_binding *b)
>>>> +{
>>>> +	struct iova_domain *iovad =
>>>> +		(struct iova_domain *)domain->reserved_iova_cookie;
>>>> +	unsigned long order = iova_shift(iovad);
>>>> +
>>>> +	iommu_unmap(domain, b->iova, b->size);
>>>> +	free_iova(iovad, b->iova >> order);
>>>> +	kfree(b);
>>>> +}
>>>> +
>>>> +int iommu_get_reserved_iova(struct iommu_domain *domain,
>>>> +			      phys_addr_t addr, size_t size, int prot,
>>>> +			      dma_addr_t *iova)
>>>> +{
>>>> +	struct iova_domain *iovad =
>>>> +		(struct iova_domain *)domain->reserved_iova_cookie;
>>>> +	unsigned long order = iova_shift(iovad);
> 
> Another nit: this call should be after the !iovad check belo

Yes definitively
> 
>>>> +	unsigned long  base_pfn, end_pfn, nb_iommu_pages;
>>>> +	size_t iommu_page_size = 1 << order, binding_size;
>>>> +	phys_addr_t aligned_base, offset;
>>>> +	struct iommu_reserved_binding *b, *newb;
>>>> +	unsigned long flags;
>>>> +	struct iova *p_iova;
>>>> +	bool unmap = false;
>>>> +	int ret;
>>>> +
>>>> +	base_pfn = addr >> order;
>>>> +	end_pfn = (addr + size - 1) >> order;
>>>> +	nb_iommu_pages = end_pfn - base_pfn + 1;
>>>> +	aligned_base = base_pfn << order;
>>>> +	offset = addr - aligned_base;
>>>> +	binding_size = nb_iommu_pages * iommu_page_size;
>>>> +
>>>> +	if (!iovad)
>>>> +		return -EINVAL;
>>>> +
>>>> +	spin_lock_irqsave(&domain->reserved_lock, flags);
>>>> +
>>>> +	b = find_reserved_binding(domain, aligned_base, binding_size);
>>>> +	if (b) {
>>>> +		*iova = b->iova + offset;
>>>> +		kref_get(&b->kref);
>>>> +		ret = 0;
>>>> +		goto unlock;
>>>> +	}
>>>> +
>>>> +	spin_unlock_irqrestore(&domain->reserved_lock, flags);
>>>> +
>>>> +	/*
>>>> +	 * no reserved IOVA was found for this PA, start allocating and
>>>> +	 * registering one while the spin-lock is not held. iommu_map/unmap
>>>> +	 * are not supposed to be atomic
>>>> +	 */
>>>> +
>>>> +	p_iova = alloc_iova(iovad, nb_iommu_pages, iovad->dma_32bit_pfn, true);
>>>> +	if (!p_iova)
>>>> +		return -ENOMEM;
>>>
>>> Here we're using iovad, which was the reserved_iova_cookie outside of
>>> the locking, which makes the locking ineffective.  Isn't this lock also
>>> protecting our iova domain, I'm confused.
>> I think I was too when I wrote that :-(
>>>
>>>> +
>>>> +	*iova = iova_dma_addr(iovad, p_iova);
>>>> +
>>>> +	newb = kzalloc(sizeof(*b), GFP_KERNEL);
>> needs to to be GPF_ATOMIC as Jean-Philippe stated before.
>>>> +	if (!newb) {
>>>> +		free_iova(iovad, p_iova->pfn_lo);
>>>> +		return -ENOMEM;
>>>> +	}
>>>> +
>>>> +	ret = iommu_map(domain, *iova, aligned_base, binding_size, prot);
>> one problem I have is I would need iommu_map to be atomic (because of
>> the call sequence reported by Jean-Philippe) and I have no guarantee it
>> is in general I think . It is for arm-smmu(-v3).c which covers the ARM
>> use case. But what about other smmus potentially used in that process?
> 
> Hmm, indeed. How about we move all the heavy mapping work to
> msi_domain_prepare_irqs instead? It is allowed to sleep and, more
> importantly, fail. It is called much earlier, by pci_enable_msi_range.

Indeed this could be an option for setup.

However a substitute to msi_domain_set_affinity should also be found I
think, to handle a change in affinity (which can change the doorbell):

We have this path and irq_migrate_all_off_this_cpu takes the irq_desc
raw_spin_lock.

cpuhotplug.c:irq_migrate_all_off_this_cpu
cpuhotplug.c:migrate_one_irq
irq_do_set_affinity
chip->irq_set_affinity
msi_domain_set_affinity
../..
iommu_map/unmap

> 
> All we are missing is details about doorbells, which we could retrieve
> from the MSI controller's driver, using one or more additional callbacks
> in msi_domain_ops. Currently, we already need one such callbacks for
> querying the number of doorbell pages,
Yes currently I assume a single page per MSI controller which is
arbitrary. I can add such callback in my next version.

Thank you for your time

Eric
 maybe we could also ask the
> driver to provide their addresses? And in msi_domain_activate, simply
> search for the IOVA already associated with the MSI message?
> 
> I only briefly though about it from the host point of view, not sure how
> VFIO would cope with this method.
> 
> Thanks,
> Jean-Philippe
>
diff mbox

Patch

diff --git a/drivers/iommu/dma-reserved-iommu.c b/drivers/iommu/dma-reserved-iommu.c
index f592118..3c759d9 100644
--- a/drivers/iommu/dma-reserved-iommu.c
+++ b/drivers/iommu/dma-reserved-iommu.c
@@ -136,3 +136,149 @@  void iommu_free_reserved_iova_domain(struct iommu_domain *domain)
 	spin_unlock_irqrestore(&domain->reserved_lock, flags);
 }
 EXPORT_SYMBOL_GPL(iommu_free_reserved_iova_domain);
+
+static void delete_reserved_binding(struct iommu_domain *domain,
+				    struct iommu_reserved_binding *b)
+{
+	struct iova_domain *iovad =
+		(struct iova_domain *)domain->reserved_iova_cookie;
+	unsigned long order = iova_shift(iovad);
+
+	iommu_unmap(domain, b->iova, b->size);
+	free_iova(iovad, b->iova >> order);
+	kfree(b);
+}
+
+int iommu_get_reserved_iova(struct iommu_domain *domain,
+			      phys_addr_t addr, size_t size, int prot,
+			      dma_addr_t *iova)
+{
+	struct iova_domain *iovad =
+		(struct iova_domain *)domain->reserved_iova_cookie;
+	unsigned long order = iova_shift(iovad);
+	unsigned long  base_pfn, end_pfn, nb_iommu_pages;
+	size_t iommu_page_size = 1 << order, binding_size;
+	phys_addr_t aligned_base, offset;
+	struct iommu_reserved_binding *b, *newb;
+	unsigned long flags;
+	struct iova *p_iova;
+	bool unmap = false;
+	int ret;
+
+	base_pfn = addr >> order;
+	end_pfn = (addr + size - 1) >> order;
+	nb_iommu_pages = end_pfn - base_pfn + 1;
+	aligned_base = base_pfn << order;
+	offset = addr - aligned_base;
+	binding_size = nb_iommu_pages * iommu_page_size;
+
+	if (!iovad)
+		return -EINVAL;
+
+	spin_lock_irqsave(&domain->reserved_lock, flags);
+
+	b = find_reserved_binding(domain, aligned_base, binding_size);
+	if (b) {
+		*iova = b->iova + offset;
+		kref_get(&b->kref);
+		ret = 0;
+		goto unlock;
+	}
+
+	spin_unlock_irqrestore(&domain->reserved_lock, flags);
+
+	/*
+	 * no reserved IOVA was found for this PA, start allocating and
+	 * registering one while the spin-lock is not held. iommu_map/unmap
+	 * are not supposed to be atomic
+	 */
+
+	p_iova = alloc_iova(iovad, nb_iommu_pages, iovad->dma_32bit_pfn, true);
+	if (!p_iova)
+		return -ENOMEM;
+
+	*iova = iova_dma_addr(iovad, p_iova);
+
+	newb = kzalloc(sizeof(*b), GFP_KERNEL);
+	if (!newb) {
+		free_iova(iovad, p_iova->pfn_lo);
+		return -ENOMEM;
+	}
+
+	ret = iommu_map(domain, *iova, aligned_base, binding_size, prot);
+	if (ret) {
+		kfree(newb);
+		free_iova(iovad, p_iova->pfn_lo);
+		return ret;
+	}
+
+	spin_lock_irqsave(&domain->reserved_lock, flags);
+
+	/* re-check the PA was not mapped in our back when lock was not held */
+	b = find_reserved_binding(domain, aligned_base, binding_size);
+	if (b) {
+		*iova = b->iova + offset;
+		kref_get(&b->kref);
+		ret = 0;
+		unmap = true;
+		goto unlock;
+	}
+
+	kref_init(&newb->kref);
+	newb->domain = domain;
+	newb->addr = aligned_base;
+	newb->iova = *iova;
+	newb->size = binding_size;
+
+	link_reserved_binding(domain, newb);
+
+	*iova += offset;
+	goto unlock;
+
+unlock:
+	spin_unlock_irqrestore(&domain->reserved_lock, flags);
+	if (unmap)
+		delete_reserved_binding(domain, newb);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(iommu_get_reserved_iova);
+
+void iommu_put_reserved_iova(struct iommu_domain *domain, dma_addr_t iova)
+{
+	struct iova_domain *iovad =
+		(struct iova_domain *)domain->reserved_iova_cookie;
+	unsigned long order;
+	phys_addr_t aligned_addr;
+	dma_addr_t aligned_iova, page_size, mask, offset;
+	struct iommu_reserved_binding *b;
+	unsigned long flags;
+	bool unmap = false;
+
+	order = iova_shift(iovad);
+	page_size = (uint64_t)1 << order;
+	mask = page_size - 1;
+
+	aligned_iova = iova & ~mask;
+	offset = iova - aligned_iova;
+
+	aligned_addr = iommu_iova_to_phys(domain, aligned_iova);
+
+	spin_lock_irqsave(&domain->reserved_lock, flags);
+	b = find_reserved_binding(domain, aligned_addr, page_size);
+	if (!b)
+		goto unlock;
+
+	if (atomic_sub_and_test(1, &b->kref.refcount)) {
+		unlink_reserved_binding(domain, b);
+		unmap = true;
+	}
+
+unlock:
+	spin_unlock_irqrestore(&domain->reserved_lock, flags);
+	if (unmap)
+		delete_reserved_binding(domain, b);
+}
+EXPORT_SYMBOL_GPL(iommu_put_reserved_iova);
+
+
+
diff --git a/include/linux/dma-reserved-iommu.h b/include/linux/dma-reserved-iommu.h
index 5bf863b..dedea56 100644
--- a/include/linux/dma-reserved-iommu.h
+++ b/include/linux/dma-reserved-iommu.h
@@ -40,6 +40,34 @@  int iommu_alloc_reserved_iova_domain(struct iommu_domain *domain,
  */
 void iommu_free_reserved_iova_domain(struct iommu_domain *domain);
 
+/**
+ * iommu_get_reserved_iova: allocate a contiguous set of iova pages and
+ * map them to the physical range defined by @addr and @size.
+ *
+ * @domain: iommu domain handle
+ * @addr: physical address to bind
+ * @size: size of the binding
+ * @prot: mapping protection attribute
+ * @iova: returned iova
+ *
+ * Mapped physical pfns are within [@addr >> order, (@addr + size -1) >> order]
+ * where order corresponds to the iova domain order.
+ * This mapping is reference counted as a whole and cannot by split.
+ */
+int iommu_get_reserved_iova(struct iommu_domain *domain,
+			      phys_addr_t addr, size_t size, int prot,
+			      dma_addr_t *iova);
+
+/**
+ * iommu_put_reserved_iova: decrement a ref count of the reserved mapping
+ *
+ * @domain: iommu domain handle
+ * @iova: reserved iova whose binding ref count is decremented
+ *
+ * if the binding ref count is null, destroy the reserved mapping
+ */
+void iommu_put_reserved_iova(struct iommu_domain *domain, dma_addr_t iova);
+
 #endif	/* CONFIG_IOMMU_DMA_RESERVED */
 #endif	/* __KERNEL__ */
 #endif	/* __DMA_RESERVED_IOMMU_H */