diff mbox

[01/37] iommu: Introduce Shared Virtual Addressing API

Message ID 20180212183352.22730-2-jean-philippe.brucker@arm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jean-Philippe Brucker Feb. 12, 2018, 6:33 p.m. UTC
Shared Virtual Addressing (SVA) provides a way for device drivers to bind
process address spaces to devices. This requires the IOMMU to support the
same page table format as CPUs, and requires the system to support I/O
Page Faults (IOPF) and Process Address Space ID (PASID). When all of these
are available, DMA can access virtual addresses of a process. A PASID is
allocated for each process, and the device driver programs it into the
device in an implementation-specific way.

Add a new API for sharing process page tables with devices. Introduce two
IOMMU operations, sva_device_init() and sva_device_shutdown(), that
prepare the IOMMU driver for SVA. For example allocate PASID tables and
fault queues. Subsequent patches will implement the bind() and unbind()
operations.

Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
---
 drivers/iommu/Kconfig     | 10 ++++++
 drivers/iommu/Makefile    |  1 +
 drivers/iommu/iommu-sva.c | 90 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/iommu.h     | 32 +++++++++++++++++
 4 files changed, 133 insertions(+)
 create mode 100644 drivers/iommu/iommu-sva.c

Comments

Tian, Kevin Feb. 13, 2018, 7:31 a.m. UTC | #1
> From: Jean-Philippe Brucker
> Sent: Tuesday, February 13, 2018 2:33 AM
> 
> Shared Virtual Addressing (SVA) provides a way for device drivers to bind
> process address spaces to devices. This requires the IOMMU to support the
> same page table format as CPUs, and requires the system to support I/O

"same" is a bit restrictive. "compatible" is better as you used in coverletter. :-)

> Page Faults (IOPF) and Process Address Space ID (PASID). When all of these
> are available, DMA can access virtual addresses of a process. A PASID is
> allocated for each process, and the device driver programs it into the
> device in an implementation-specific way.
> 
> Add a new API for sharing process page tables with devices. Introduce two
> IOMMU operations, sva_device_init() and sva_device_shutdown(), that
> prepare the IOMMU driver for SVA. For example allocate PASID tables and
> fault queues. Subsequent patches will implement the bind() and unbind()
> operations.
> 
> Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
> ---
>  drivers/iommu/Kconfig     | 10 ++++++
>  drivers/iommu/Makefile    |  1 +
>  drivers/iommu/iommu-sva.c | 90
> +++++++++++++++++++++++++++++++++++++++++++++++
>  include/linux/iommu.h     | 32 +++++++++++++++++
>  4 files changed, 133 insertions(+)
>  create mode 100644 drivers/iommu/iommu-sva.c
> 
> diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
> index f3a21343e636..555147a61f7c 100644
> --- a/drivers/iommu/Kconfig
> +++ b/drivers/iommu/Kconfig
> @@ -74,6 +74,16 @@ config IOMMU_DMA
>  	select IOMMU_IOVA
>  	select NEED_SG_DMA_LENGTH
> 
> +config IOMMU_SVA
> +	bool "Shared Virtual Addressing API for the IOMMU"
> +	select IOMMU_API
> +	help
> +	  Enable process address space management for the IOMMU API. In
> systems
> +	  that support it, device drivers can bind process address spaces to
> +	  devices and share their page tables using this API.

"their page table" is a bit confusing here.

> +
> +	  If unsure, say N here.
> +
>  config FSL_PAMU
>  	bool "Freescale IOMMU support"
>  	depends on PCI
> diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
> index 1fb695854809..1dbcc89ebe4c 100644
> --- a/drivers/iommu/Makefile
> +++ b/drivers/iommu/Makefile
> @@ -3,6 +3,7 @@ obj-$(CONFIG_IOMMU_API) += iommu.o
>  obj-$(CONFIG_IOMMU_API) += iommu-traces.o
>  obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o
>  obj-$(CONFIG_IOMMU_DMA) += dma-iommu.o
> +obj-$(CONFIG_IOMMU_SVA) += iommu-sva.o
>  obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o
>  obj-$(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) += io-pgtable-arm-v7s.o
>  obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o
> diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c
> new file mode 100644
> index 000000000000..cab5d723520f
> --- /dev/null
> +++ b/drivers/iommu/iommu-sva.c
> @@ -0,0 +1,90 @@
> +/*
> + * Track processes address spaces bound to devices and allocate PASIDs.
> + *
> + * Copyright (C) 2018 ARM Ltd.
> + * Author: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
> + *
> + * SPDX-License-Identifier: GPL-2.0
> + */
> +
> +#include <linux/iommu.h>
> +
> +/**
> + * iommu_sva_device_init() - Initialize Shared Virtual Addressing for a
> device
> + * @dev: the device
> + * @features: bitmask of features that need to be initialized
> + * @max_pasid: max PASID value supported by the device
> + *
> + * Users of the bind()/unbind() API must call this function to initialize all
> + * features required for SVA.
> + *
> + * - If the device should support multiple address spaces (e.g. PCI PASID),
> + *   IOMMU_SVA_FEAT_PASID must be requested.

I think it is by default assumed when using this API, based on definition of
SVA. Can you elaborate the situation where this flag can be cleared?

> + *
> + *   By default the PASID allocated during bind() is limited by the IOMMU
> + *   capacity, and by the device PASID width defined in the PCI capability or
> in
> + *   the firmware description. Setting @max_pasid to a non-zero value
> smaller
> + *   than this limit overrides it.
> + *
> + * - If the device should support I/O Page Faults (e.g. PCI PRI),
> + *   IOMMU_SVA_FEAT_IOPF must be requested.
> + *
> + * The device should not be be performing any DMA while this function is

remove double "be"

> + * running.

"otherwise the behavior is undefined"

> + *
> + * Return 0 if initialization succeeded, or an error.
> + */
> +int iommu_sva_device_init(struct device *dev, unsigned long features,
> +			  unsigned int max_pasid)
> +{
> +	int ret;
> +	unsigned int min_pasid = 0;
> +	struct iommu_param *dev_param = dev->iommu_param;
> +	struct iommu_domain *domain =
> iommu_get_domain_for_dev(dev);
> +
> +	if (!domain || !dev_param || !domain->ops->sva_device_init)
> +		return -ENODEV;
> +
> +	/*
> +	 * IOMMU driver updates the limits depending on the IOMMU and
> device
> +	 * capabilities.
> +	 */
> +	ret = domain->ops->sva_device_init(dev, features, &min_pasid,
> +					   &max_pasid);
> +	if (ret)
> +		return ret;
> +
> +	/* FIXME: racy. Next version should have a mutex (same as fault
> handler) */
> +	dev_param->sva_features = features;
> +	dev_param->min_pasid = min_pasid;
> +	dev_param->max_pasid = max_pasid;

what's the point of min_pasid here?

> +
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(iommu_sva_device_init);
> +
> +/**
> + * iommu_sva_device_shutdown() - Shutdown Shared Virtual Addressing
> for a device
> + * @dev: the device
> + *
> + * Disable SVA. The device should not be performing any DMA while this
> function
> + * is running.
> + */
> +int iommu_sva_device_shutdown(struct device *dev)
> +{
> +	struct iommu_param *dev_param = dev->iommu_param;
> +	struct iommu_domain *domain =
> iommu_get_domain_for_dev(dev);
> +
> +	if (!domain)
> +		return -ENODEV;
> +
> +	if (domain->ops->sva_device_shutdown)
> +		domain->ops->sva_device_shutdown(dev);
> +
> +	dev_param->sva_features = 0;
> +	dev_param->min_pasid = 0;
> +	dev_param->max_pasid = 0;
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(iommu_sva_device_shutdown);
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index 66ef406396e9..e9e09eecdece 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -60,6 +60,11 @@ typedef int (*iommu_fault_handler_t)(struct
> iommu_domain *,
>  			struct device *, unsigned long, int, void *);
>  typedef int (*iommu_dev_fault_handler_t)(struct iommu_fault_event *,
> void *);
> 
> +/* Request PASID support */
> +#define IOMMU_SVA_FEAT_PASID		(1 << 0)
> +/* Request I/O page fault support */
> +#define IOMMU_SVA_FEAT_IOPF		(1 << 1)
> +
>  struct iommu_domain_geometry {
>  	dma_addr_t aperture_start; /* First address that can be mapped
> */
>  	dma_addr_t aperture_end;   /* Last address that can be mapped
> */
> @@ -197,6 +202,8 @@ struct page_response_msg {
>   * @domain_free: free iommu domain
>   * @attach_dev: attach device to an iommu domain
>   * @detach_dev: detach device from an iommu domain
> + * @sva_device_init: initialize Shared Virtual Adressing for a device
> + * @sva_device_shutdown: shutdown Shared Virtual Adressing for a
> device
>   * @map: map a physically contiguous memory region to an iommu
> domain
>   * @unmap: unmap a physically contiguous memory region from an
> iommu domain
>   * @map_sg: map a scatter-gather list of physically contiguous memory
> chunks
> @@ -230,6 +237,10 @@ struct iommu_ops {
> 
>  	int (*attach_dev)(struct iommu_domain *domain, struct device
> *dev);
>  	void (*detach_dev)(struct iommu_domain *domain, struct device
> *dev);
> +	int (*sva_device_init)(struct device *dev, unsigned long features,
> +			       unsigned int *min_pasid,
> +			       unsigned int *max_pasid);
> +	void (*sva_device_shutdown)(struct device *dev);
>  	int (*map)(struct iommu_domain *domain, unsigned long iova,
>  		   phys_addr_t paddr, size_t size, int prot);
>  	size_t (*unmap)(struct iommu_domain *domain, unsigned long
> iova,
> @@ -385,6 +396,9 @@ struct iommu_fault_param {
>   */
>  struct iommu_param {
>  	struct iommu_fault_param *fault_param;
> +	unsigned long sva_features;
> +	unsigned int min_pasid;
> +	unsigned int max_pasid;
>  };
> 
>  int  iommu_device_register(struct iommu_device *iommu);
> @@ -878,4 +892,22 @@ const struct iommu_ops
> *iommu_ops_from_fwnode(struct fwnode_handle *fwnode)
> 
>  #endif /* CONFIG_IOMMU_API */
> 
> +#ifdef CONFIG_IOMMU_SVA
> +extern int iommu_sva_device_init(struct device *dev, unsigned long
> features,
> +				 unsigned int max_pasid);
> +extern int iommu_sva_device_shutdown(struct device *dev);
> +#else /* CONFIG_IOMMU_SVA */
> +static inline int iommu_sva_device_init(struct device *dev,
> +					unsigned long features,
> +					unsigned int max_pasid)
> +{
> +	return -ENODEV;
> +}
> +
> +static inline int iommu_sva_device_shutdown(struct device *dev)
> +{
> +	return -ENODEV;
> +}
> +#endif /* CONFIG_IOMMU_SVA */
> +
>  #endif /* __LINUX_IOMMU_H */
> --
> 2.15.1
> 
> _______________________________________________
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu
Jean-Philippe Brucker Feb. 13, 2018, 12:40 p.m. UTC | #2
Hi Kevin,

Thanks for taking a look!

On 13/02/18 07:31, Tian, Kevin wrote:
>> From: Jean-Philippe Brucker
>> Sent: Tuesday, February 13, 2018 2:33 AM
>>
>> Shared Virtual Addressing (SVA) provides a way for device drivers to bind
>> process address spaces to devices. This requires the IOMMU to support the
>> same page table format as CPUs, and requires the system to support I/O
> 
> "same" is a bit restrictive. "compatible" is better as you used in coverletter. :-)

Indeed

[..]
>> +config IOMMU_SVA
>> +	bool "Shared Virtual Addressing API for the IOMMU"
>> +	select IOMMU_API
>> +	help
>> +	  Enable process address space management for the IOMMU API. In
>> systems
>> +	  that support it, device drivers can bind process address spaces to
>> +	  devices and share their page tables using this API.
> 
> "their page table" is a bit confusing here.

Maybe this is sufficient:
"In systems that support it, drivers can share process address spaces with
their devices using this API."

[...]
>> +
>> +/**
>> + * iommu_sva_device_init() - Initialize Shared Virtual Addressing for a
>> device
>> + * @dev: the device
>> + * @features: bitmask of features that need to be initialized
>> + * @max_pasid: max PASID value supported by the device
>> + *
>> + * Users of the bind()/unbind() API must call this function to initialize all
>> + * features required for SVA.
>> + *
>> + * - If the device should support multiple address spaces (e.g. PCI PASID),
>> + *   IOMMU_SVA_FEAT_PASID must be requested.
> 
> I think it is by default assumed when using this API, based on definition of
> SVA. Can you elaborate the situation where this flag can be cleared?

When passing a device to userspace, you could also share its non-pasid
address space with the process. It requires a new domain type so is left
as a TODO in patch 2/37. I did get requests for this feature, though I
think it was mostly for prototyping. I guess I could remove the flag, and
reintroduce it as IOMMU_SVA_FEAT_NO_PASID later on.

>> + *
>> + *   By default the PASID allocated during bind() is limited by the IOMMU
>> + *   capacity, and by the device PASID width defined in the PCI capability or
>> in
>> + *   the firmware description. Setting @max_pasid to a non-zero value
>> smaller
>> + *   than this limit overrides it.
>> + *
>> + * - If the device should support I/O Page Faults (e.g. PCI PRI),
>> + *   IOMMU_SVA_FEAT_IOPF must be requested.
>> + *
>> + * The device should not be be performing any DMA while this function is
> 
> remove double "be"
> 
>> + * running.
> 
> "otherwise the behavior is undefined"

ok

[...]
>> +	ret = domain->ops->sva_device_init(dev, features, &min_pasid,
>> +					   &max_pasid);
>> +	if (ret)
>> +		return ret;
>> +
>> +	/* FIXME: racy. Next version should have a mutex (same as fault
>> handler) */
>> +	dev_param->sva_features = features;
>> +	dev_param->min_pasid = min_pasid;
>> +	dev_param->max_pasid = max_pasid;
> 
> what's the point of min_pasid here?

Arm SMMUv3 uses entry 0 of the PASID table for the default (non-pasid)
context, so it needs to set min_pasid to 1. AMD IOMMU recently added a
similar feature (GIoSup), if I understood correctly.

Thanks,
Jean
Tian, Kevin Feb. 13, 2018, 11:43 p.m. UTC | #3
> From: Jean-Philippe Brucker
> Sent: Tuesday, February 13, 2018 8:40 PM
> 
> 
> [...]
> >> +
> >> +/**
> >> + * iommu_sva_device_init() - Initialize Shared Virtual Addressing for a
> >> device
> >> + * @dev: the device
> >> + * @features: bitmask of features that need to be initialized
> >> + * @max_pasid: max PASID value supported by the device
> >> + *
> >> + * Users of the bind()/unbind() API must call this function to initialize all
> >> + * features required for SVA.
> >> + *
> >> + * - If the device should support multiple address spaces (e.g. PCI
> PASID),
> >> + *   IOMMU_SVA_FEAT_PASID must be requested.
> >
> > I think it is by default assumed when using this API, based on definition of
> > SVA. Can you elaborate the situation where this flag can be cleared?
> 
> When passing a device to userspace, you could also share its non-pasid
> address space with the process. It requires a new domain type so is left
> as a TODO in patch 2/37. I did get requests for this feature, though I
> think it was mostly for prototyping. I guess I could remove the flag, and
> reintroduce it as IOMMU_SVA_FEAT_NO_PASID later on.

sorry I still didn't get the definition of non-pasid address space. 
Did you mean the GPA/IOVA address space and no_pasid implies
actually some default PASID associated?

> 
> [...]
> >> +	ret = domain->ops->sva_device_init(dev, features, &min_pasid,
> >> +					   &max_pasid);
> >> +	if (ret)
> >> +		return ret;
> >> +
> >> +	/* FIXME: racy. Next version should have a mutex (same as fault
> >> handler) */
> >> +	dev_param->sva_features = features;
> >> +	dev_param->min_pasid = min_pasid;
> >> +	dev_param->max_pasid = max_pasid;
> >
> > what's the point of min_pasid here?
> 
> Arm SMMUv3 uses entry 0 of the PASID table for the default (non-pasid)
> context, so it needs to set min_pasid to 1. AMD IOMMU recently added a
> similar feature (GIoSup), if I understood correctly.
> 

just for such purpose maybe we should just define a reserved_pasid
otherwise there will be some waste if an implementation allows it
non-zero.

Thanks
Kevin
Joerg Roedel Feb. 15, 2018, 9:59 a.m. UTC | #4
On Mon, Feb 12, 2018 at 06:33:16PM +0000, Jean-Philippe Brucker wrote:
  
> +config IOMMU_SVA
> +	bool "Shared Virtual Addressing API for the IOMMU"
> +	select IOMMU_API
> +	help
> +	  Enable process address space management for the IOMMU API. In systems
> +	  that support it, device drivers can bind process address spaces to
> +	  devices and share their page tables using this API.
> +
> +	  If unsure, say N here.

I think this should be an option selected by IOMMU driver and not be
activly selectable by the user.

> +/**
> + * iommu_sva_device_shutdown() - Shutdown Shared Virtual Addressing for a device
> + * @dev: the device
> + *
> + * Disable SVA. The device should not be performing any DMA while this function
> + * is running.

Is this a good idea? How about devices that get hot-unplugged while
processes still use them and there is DMA going back and forth? This
function can be the point to shut down all ongoing stuff first and the
shutdown the device.
Jean-Philippe Brucker Feb. 15, 2018, 12:42 p.m. UTC | #5
On 13/02/18 23:43, Tian, Kevin wrote:
>> From: Jean-Philippe Brucker
>> Sent: Tuesday, February 13, 2018 8:40 PM
>>
>>
>> [...]
>>>> +
>>>> +/**
>>>> + * iommu_sva_device_init() - Initialize Shared Virtual Addressing for a
>>>> device
>>>> + * @dev: the device
>>>> + * @features: bitmask of features that need to be initialized
>>>> + * @max_pasid: max PASID value supported by the device
>>>> + *
>>>> + * Users of the bind()/unbind() API must call this function to initialize all
>>>> + * features required for SVA.
>>>> + *
>>>> + * - If the device should support multiple address spaces (e.g. PCI
>> PASID),
>>>> + *   IOMMU_SVA_FEAT_PASID must be requested.
>>>
>>> I think it is by default assumed when using this API, based on definition of
>>> SVA. Can you elaborate the situation where this flag can be cleared?
>>
>> When passing a device to userspace, you could also share its non-pasid
>> address space with the process. It requires a new domain type so is left
>> as a TODO in patch 2/37. I did get requests for this feature, though I
>> think it was mostly for prototyping. I guess I could remove the flag, and
>> reintroduce it as IOMMU_SVA_FEAT_NO_PASID later on.
> 
> sorry I still didn't get the definition of non-pasid address space. 
> Did you mean the GPA/IOVA address space and no_pasid implies
> actually some default PASID associated?

Yes I mean merging the process address space and IOVA space. There are no
PASIDs involved if the device or the IOMMU doesn't support it. Instead of
private DMA page tables you program the mm pgd into the IOMMU. A VFIO
userspace driver, instead of sending MAP/UNMAP ioctl, could simply issue a
BIND.

Technically nothing prevents it, but now the resv problem discussed on
patch 2/37 stands out. For example on x86 you'd probably need to carve the
IOAPIC MSI range out of the process address space. On Arm you'd need to
create a write-only mapping for MSIs (IOMMU translates it to the IRQ chip
address, but thankfully accessing the doorbell from CPU side doesn't
trigger an MSI.)

>> [...]
>>>> +	ret = domain->ops->sva_device_init(dev, features, &min_pasid,
>>>> +					   &max_pasid);
>>>> +	if (ret)
>>>> +		return ret;
>>>> +
>>>> +	/* FIXME: racy. Next version should have a mutex (same as fault
>>>> handler) */
>>>> +	dev_param->sva_features = features;
>>>> +	dev_param->min_pasid = min_pasid;
>>>> +	dev_param->max_pasid = max_pasid;
>>>
>>> what's the point of min_pasid here?
>>
>> Arm SMMUv3 uses entry 0 of the PASID table for the default (non-pasid)
>> context, so it needs to set min_pasid to 1. AMD IOMMU recently added a
>> similar feature (GIoSup), if I understood correctly.
>>
> 
> just for such purpose maybe we should just define a reserved_pasid
> otherwise there will be some waste if an implementation allows it
> non-zero.

What's wasted? It's slightly simpler to use min_pasid because we just pass
that limit to idr_alloc(). With a reserved_pasid we'll have to call
idr_alloc(reserved_pasid) once, for the same result.

Thanks,
Jean
Jean-Philippe Brucker Feb. 15, 2018, 12:43 p.m. UTC | #6
On 15/02/18 09:59, Joerg Roedel wrote:
> On Mon, Feb 12, 2018 at 06:33:16PM +0000, Jean-Philippe Brucker wrote:
>   
>> +config IOMMU_SVA
>> +	bool "Shared Virtual Addressing API for the IOMMU"
>> +	select IOMMU_API
>> +	help
>> +	  Enable process address space management for the IOMMU API. In systems
>> +	  that support it, device drivers can bind process address spaces to
>> +	  devices and share their page tables using this API.
>> +
>> +	  If unsure, say N here.
> 
> I think this should be an option selected by IOMMU driver and not be
> activly selectable by the user.

Ok

>> +/**
>> + * iommu_sva_device_shutdown() - Shutdown Shared Virtual Addressing for a device
>> + * @dev: the device
>> + *
>> + * Disable SVA. The device should not be performing any DMA while this function
>> + * is running.
> 
> Is this a good idea? How about devices that get hot-unplugged while
> processes still use them and there is DMA going back and forth? This
> function can be the point to shut down all ongoing stuff first and the
> shutdown the device.

To be honest I don't know how hot-unplug works. But sva_device_shutdown()
may be called, for instance, by the device driver before the device
disappears, so it has to know how to stop DMA before calling it. The IOMMU
driver can't really do anything more.

For hot-unplug I guess that device_driver::remove() is called first,
allowing it to stop all DMA and call sva_device_shutdown().

Then the IOMMU gets a BUS_NOTIFY_REMOVED_DEVICE notification and calls
iommu_ops::remove_device(), allowing to clean up SVA structure if the
device driver didn't call unbind_device() and sva_device_shutdown(). But
at that point we don't have a way to cooperate with the driver to stop DMA
anymore.

Thanks,
Jean
Tian, Kevin Feb. 27, 2018, 6:21 a.m. UTC | #7
> From: Jean-Philippe Brucker [mailto:jean-philippe.brucker@arm.com]
> Sent: Thursday, February 15, 2018 8:42 PM
> 
> On 13/02/18 23:43, Tian, Kevin wrote:
> >> From: Jean-Philippe Brucker
> >> Sent: Tuesday, February 13, 2018 8:40 PM
> >>
> >>
> >> [...]
> >>>> +
> >>>> +/**
> >>>> + * iommu_sva_device_init() - Initialize Shared Virtual Addressing for
> a
> >>>> device
> >>>> + * @dev: the device
> >>>> + * @features: bitmask of features that need to be initialized
> >>>> + * @max_pasid: max PASID value supported by the device
> >>>> + *
> >>>> + * Users of the bind()/unbind() API must call this function to initialize
> all
> >>>> + * features required for SVA.
> >>>> + *
> >>>> + * - If the device should support multiple address spaces (e.g. PCI
> >> PASID),
> >>>> + *   IOMMU_SVA_FEAT_PASID must be requested.
> >>>
> >>> I think it is by default assumed when using this API, based on definition
> of
> >>> SVA. Can you elaborate the situation where this flag can be cleared?
> >>
> >> When passing a device to userspace, you could also share its non-pasid
> >> address space with the process. It requires a new domain type so is left
> >> as a TODO in patch 2/37. I did get requests for this feature, though I
> >> think it was mostly for prototyping. I guess I could remove the flag, and
> >> reintroduce it as IOMMU_SVA_FEAT_NO_PASID later on.
> >
> > sorry I still didn't get the definition of non-pasid address space.
> > Did you mean the GPA/IOVA address space and no_pasid implies
> > actually some default PASID associated?
> 
> Yes I mean merging the process address space and IOVA space. There are
> no
> PASIDs involved if the device or the IOMMU doesn't support it. Instead of
> private DMA page tables you program the mm pgd into the IOMMU. A VFIO
> userspace driver, instead of sending MAP/UNMAP ioctl, could simply issue
> a
> BIND.

got it. yes it's better to remove it for now which can avoid
unnecessary confusion. :-)

> 
> Technically nothing prevents it, but now the resv problem discussed on
> patch 2/37 stands out. For example on x86 you'd probably need to carve
> the
> IOAPIC MSI range out of the process address space. On Arm you'd need to
> create a write-only mapping for MSIs (IOMMU translates it to the IRQ chip
> address, but thankfully accessing the doorbell from CPU side doesn't
> trigger an MSI.)

so if overlap already exists when binding a process address space
(since binding may happen much later than creating the process),
I assume the call will simply fail since carve out at this point is not
possible?

> 
> >> [...]
> >>>> +	ret = domain->ops->sva_device_init(dev, features, &min_pasid,
> >>>> +					   &max_pasid);
> >>>> +	if (ret)
> >>>> +		return ret;
> >>>> +
> >>>> +	/* FIXME: racy. Next version should have a mutex (same as fault
> >>>> handler) */
> >>>> +	dev_param->sva_features = features;
> >>>> +	dev_param->min_pasid = min_pasid;
> >>>> +	dev_param->max_pasid = max_pasid;
> >>>
> >>> what's the point of min_pasid here?
> >>
> >> Arm SMMUv3 uses entry 0 of the PASID table for the default (non-pasid)
> >> context, so it needs to set min_pasid to 1. AMD IOMMU recently added
> a
> >> similar feature (GIoSup), if I understood correctly.
> >>
> >
> > just for such purpose maybe we should just define a reserved_pasid
> > otherwise there will be some waste if an implementation allows it
> > non-zero.
> 
> What's wasted? It's slightly simpler to use min_pasid because we just pass
> that limit to idr_alloc(). With a reserved_pasid we'll have to call
> idr_alloc(reserved_pasid) once, for the same result.
> 

I'm thinking about the case where an implementation allows
software to define a random reserved_pasid, then banning
all pasids below reserved one could be a waste. But after
more thinking it is not a big problem. We can request such
driver to use 0 as reserved_pasid then same situation as
ARM side.

Thanks
Kevin
Jean-Philippe Brucker Feb. 28, 2018, 4:20 p.m. UTC | #8
On 27/02/18 06:21, Tian, Kevin wrote:
[...]
>> Technically nothing prevents it, but now the resv problem discussed on
>> patch 2/37 stands out. For example on x86 you'd probably need to carve
>> the
>> IOAPIC MSI range out of the process address space. On Arm you'd need to
>> create a write-only mapping for MSIs (IOMMU translates it to the IRQ chip
>> address, but thankfully accessing the doorbell from CPU side doesn't
>> trigger an MSI.)
> 
> so if overlap already exists when binding a process address space
> (since binding may happen much later than creating the process),
> I assume the call will simply fail since carve out at this point is not
> possible?

Yes in this case I think it's safer to abort the bind() call

Thanks,
Jean
diff mbox

Patch

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index f3a21343e636..555147a61f7c 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -74,6 +74,16 @@  config IOMMU_DMA
 	select IOMMU_IOVA
 	select NEED_SG_DMA_LENGTH
 
+config IOMMU_SVA
+	bool "Shared Virtual Addressing API for the IOMMU"
+	select IOMMU_API
+	help
+	  Enable process address space management for the IOMMU API. In systems
+	  that support it, device drivers can bind process address spaces to
+	  devices and share their page tables using this API.
+
+	  If unsure, say N here.
+
 config FSL_PAMU
 	bool "Freescale IOMMU support"
 	depends on PCI
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 1fb695854809..1dbcc89ebe4c 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -3,6 +3,7 @@  obj-$(CONFIG_IOMMU_API) += iommu.o
 obj-$(CONFIG_IOMMU_API) += iommu-traces.o
 obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o
 obj-$(CONFIG_IOMMU_DMA) += dma-iommu.o
+obj-$(CONFIG_IOMMU_SVA) += iommu-sva.o
 obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o
 obj-$(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) += io-pgtable-arm-v7s.o
 obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o
diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c
new file mode 100644
index 000000000000..cab5d723520f
--- /dev/null
+++ b/drivers/iommu/iommu-sva.c
@@ -0,0 +1,90 @@ 
+/*
+ * Track processes address spaces bound to devices and allocate PASIDs.
+ *
+ * Copyright (C) 2018 ARM Ltd.
+ * Author: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ */
+
+#include <linux/iommu.h>
+
+/**
+ * iommu_sva_device_init() - Initialize Shared Virtual Addressing for a device
+ * @dev: the device
+ * @features: bitmask of features that need to be initialized
+ * @max_pasid: max PASID value supported by the device
+ *
+ * Users of the bind()/unbind() API must call this function to initialize all
+ * features required for SVA.
+ *
+ * - If the device should support multiple address spaces (e.g. PCI PASID),
+ *   IOMMU_SVA_FEAT_PASID must be requested.
+ *
+ *   By default the PASID allocated during bind() is limited by the IOMMU
+ *   capacity, and by the device PASID width defined in the PCI capability or in
+ *   the firmware description. Setting @max_pasid to a non-zero value smaller
+ *   than this limit overrides it.
+ *
+ * - If the device should support I/O Page Faults (e.g. PCI PRI),
+ *   IOMMU_SVA_FEAT_IOPF must be requested.
+ *
+ * The device should not be be performing any DMA while this function is
+ * running.
+ *
+ * Return 0 if initialization succeeded, or an error.
+ */
+int iommu_sva_device_init(struct device *dev, unsigned long features,
+			  unsigned int max_pasid)
+{
+	int ret;
+	unsigned int min_pasid = 0;
+	struct iommu_param *dev_param = dev->iommu_param;
+	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+
+	if (!domain || !dev_param || !domain->ops->sva_device_init)
+		return -ENODEV;
+
+	/*
+	 * IOMMU driver updates the limits depending on the IOMMU and device
+	 * capabilities.
+	 */
+	ret = domain->ops->sva_device_init(dev, features, &min_pasid,
+					   &max_pasid);
+	if (ret)
+		return ret;
+
+	/* FIXME: racy. Next version should have a mutex (same as fault handler) */
+	dev_param->sva_features = features;
+	dev_param->min_pasid = min_pasid;
+	dev_param->max_pasid = max_pasid;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(iommu_sva_device_init);
+
+/**
+ * iommu_sva_device_shutdown() - Shutdown Shared Virtual Addressing for a device
+ * @dev: the device
+ *
+ * Disable SVA. The device should not be performing any DMA while this function
+ * is running.
+ */
+int iommu_sva_device_shutdown(struct device *dev)
+{
+	struct iommu_param *dev_param = dev->iommu_param;
+	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+
+	if (!domain)
+		return -ENODEV;
+
+	if (domain->ops->sva_device_shutdown)
+		domain->ops->sva_device_shutdown(dev);
+
+	dev_param->sva_features = 0;
+	dev_param->min_pasid = 0;
+	dev_param->max_pasid = 0;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(iommu_sva_device_shutdown);
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 66ef406396e9..e9e09eecdece 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -60,6 +60,11 @@  typedef int (*iommu_fault_handler_t)(struct iommu_domain *,
 			struct device *, unsigned long, int, void *);
 typedef int (*iommu_dev_fault_handler_t)(struct iommu_fault_event *, void *);
 
+/* Request PASID support */
+#define IOMMU_SVA_FEAT_PASID		(1 << 0)
+/* Request I/O page fault support */
+#define IOMMU_SVA_FEAT_IOPF		(1 << 1)
+
 struct iommu_domain_geometry {
 	dma_addr_t aperture_start; /* First address that can be mapped    */
 	dma_addr_t aperture_end;   /* Last address that can be mapped     */
@@ -197,6 +202,8 @@  struct page_response_msg {
  * @domain_free: free iommu domain
  * @attach_dev: attach device to an iommu domain
  * @detach_dev: detach device from an iommu domain
+ * @sva_device_init: initialize Shared Virtual Adressing for a device
+ * @sva_device_shutdown: shutdown Shared Virtual Adressing for a device
  * @map: map a physically contiguous memory region to an iommu domain
  * @unmap: unmap a physically contiguous memory region from an iommu domain
  * @map_sg: map a scatter-gather list of physically contiguous memory chunks
@@ -230,6 +237,10 @@  struct iommu_ops {
 
 	int (*attach_dev)(struct iommu_domain *domain, struct device *dev);
 	void (*detach_dev)(struct iommu_domain *domain, struct device *dev);
+	int (*sva_device_init)(struct device *dev, unsigned long features,
+			       unsigned int *min_pasid,
+			       unsigned int *max_pasid);
+	void (*sva_device_shutdown)(struct device *dev);
 	int (*map)(struct iommu_domain *domain, unsigned long iova,
 		   phys_addr_t paddr, size_t size, int prot);
 	size_t (*unmap)(struct iommu_domain *domain, unsigned long iova,
@@ -385,6 +396,9 @@  struct iommu_fault_param {
  */
 struct iommu_param {
 	struct iommu_fault_param *fault_param;
+	unsigned long sva_features;
+	unsigned int min_pasid;
+	unsigned int max_pasid;
 };
 
 int  iommu_device_register(struct iommu_device *iommu);
@@ -878,4 +892,22 @@  const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode)
 
 #endif /* CONFIG_IOMMU_API */
 
+#ifdef CONFIG_IOMMU_SVA
+extern int iommu_sva_device_init(struct device *dev, unsigned long features,
+				 unsigned int max_pasid);
+extern int iommu_sva_device_shutdown(struct device *dev);
+#else /* CONFIG_IOMMU_SVA */
+static inline int iommu_sva_device_init(struct device *dev,
+					unsigned long features,
+					unsigned int max_pasid)
+{
+	return -ENODEV;
+}
+
+static inline int iommu_sva_device_shutdown(struct device *dev)
+{
+	return -ENODEV;
+}
+#endif /* CONFIG_IOMMU_SVA */
+
 #endif /* __LINUX_IOMMU_H */