diff mbox

[RFC,v2,8/8] iommu/arm-smmu: implement add_reserved_regions callback

Message ID 1478258646-3117-9-git-send-email-eric.auger@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Eric Auger Nov. 4, 2016, 11:24 a.m. UTC
The function populates the list of reserved regions with the
PCI host bridge windows and the MSI IOVA range.

At the moment an arbitray MSI IOVA window is set at 0x8000000
of size 1MB.

Signed-off-by: Eric Auger <eric.auger@redhat.com>

---

RFC v1 -> v2: use defines for MSI IOVA base and length
---
 drivers/iommu/arm-smmu.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)

Comments

Robin Murphy Nov. 4, 2016, 2:16 p.m. UTC | #1
On 04/11/16 11:24, Eric Auger wrote:
> The function populates the list of reserved regions with the
> PCI host bridge windows and the MSI IOVA range.
> 
> At the moment an arbitray MSI IOVA window is set at 0x8000000
> of size 1MB.
> 
> Signed-off-by: Eric Auger <eric.auger@redhat.com>
> 
> ---
> 
> RFC v1 -> v2: use defines for MSI IOVA base and length
> ---
>  drivers/iommu/arm-smmu.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 66 insertions(+)
> 
> diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
> index c841eb7..c07ea41 100644
> --- a/drivers/iommu/arm-smmu.c
> +++ b/drivers/iommu/arm-smmu.c
> @@ -278,6 +278,9 @@ enum arm_smmu_s2cr_privcfg {
>  
>  #define FSYNR0_WNR			(1 << 4)
>  
> +#define MSI_IOVA_BASE			0x8000000
> +#define MSI_IOVA_LENGTH			0x100000
> +
>  static int force_stage;
>  module_param(force_stage, int, S_IRUGO);
>  MODULE_PARM_DESC(force_stage,
> @@ -1533,6 +1536,68 @@ static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
>  	return iommu_fwspec_add_ids(dev, &fwid, 1);
>  }
>  
> +static int add_pci_window_reserved_regions(struct iommu_domain *domain,
> +					   struct pci_dev *dev)
> +{
> +	struct pci_host_bridge *bridge = pci_find_host_bridge(dev->bus);
> +	struct iommu_reserved_region *region;
> +	struct resource_entry *window;
> +	phys_addr_t start;
> +	size_t length;
> +
> +	resource_list_for_each_entry(window, &bridge->windows) {
> +		if (resource_type(window->res) != IORESOURCE_MEM &&
> +		    resource_type(window->res) != IORESOURCE_IO)
> +			continue;
> +
> +		start = window->res->start - window->offset;
> +		length = window->res->end - window->res->start + 1;
> +
> +		iommu_reserved_region_for_each(region, domain) {
> +			if (region->start == start && region->length == length)
> +				continue;
> +		}
> +		region = kzalloc(sizeof(*region), GFP_KERNEL);
> +		if (!region)
> +			return -ENOMEM;
> +
> +		region->start = start;
> +		region->length = length;
> +
> +		list_add_tail(&region->list, &domain->reserved_regions);
> +	}
> +	return 0;
> +}

Per the previous observation, let's just convert
iova_reserve_pci_windows() into a public iommu_dma_get_dm_regions()
callback...

> +static int arm_smmu_add_reserved_regions(struct iommu_domain *domain,
> +					 struct device *device)
> +{
> +	struct iommu_reserved_region *region;
> +	int ret = 0;
> +
> +	/* An arbitrary 1MB region starting at 0x8000000 is reserved for MSIs */
> +	if (!domain->iova_cookie) {
> +
> +		region = kzalloc(sizeof(*region), GFP_KERNEL);
> +		if (!region)
> +			return -ENOMEM;
> +
> +		region->start = MSI_IOVA_BASE;
> +		region->length = MSI_IOVA_LENGTH;
> +		list_add_tail(&region->list, &domain->reserved_regions);
> +
> +		ret = iommu_get_dma_msi_region_cookie(domain,
> +						region->start, region->length);
> +		if (ret)
> +			return ret;

...and stick this bit in there as well. Then we only need to add code to
individual IOMMU drivers if there are also regions which bypass
translation at the IOMMU itself (if someone does ever integrate an SMMU
with an upstream/parallel ITS, x86-style, I think we'd need to describe
that with a DT property on the SMMU, so it would have to be the SMMU
driver's responsibility).

Robin.

> +	}
> +
> +	if (dev_is_pci(device))
> +		ret =  add_pci_window_reserved_regions(domain,
> +						       to_pci_dev(device));
> +	return ret;
> +}
> +
>  static struct iommu_ops arm_smmu_ops = {
>  	.capable		= arm_smmu_capable,
>  	.domain_alloc		= arm_smmu_domain_alloc,
> @@ -1548,6 +1613,7 @@ static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
>  	.domain_get_attr	= arm_smmu_domain_get_attr,
>  	.domain_set_attr	= arm_smmu_domain_set_attr,
>  	.of_xlate		= arm_smmu_of_xlate,
> +	.add_reserved_regions	= arm_smmu_add_reserved_regions,
>  	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
>  };
>  
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Joerg Roedel Nov. 10, 2016, 3:46 p.m. UTC | #2
On Fri, Nov 04, 2016 at 11:24:06AM +0000, Eric Auger wrote:
> The function populates the list of reserved regions with the
> PCI host bridge windows and the MSI IOVA range.
> 
> At the moment an arbitray MSI IOVA window is set at 0x8000000
> of size 1MB.
> 
> Signed-off-by: Eric Auger <eric.auger@redhat.com>
> 
> ---
> 
> RFC v1 -> v2: use defines for MSI IOVA base and length
> ---
>  drivers/iommu/arm-smmu.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 66 insertions(+)
> 
> diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
> index c841eb7..c07ea41 100644
> --- a/drivers/iommu/arm-smmu.c
> +++ b/drivers/iommu/arm-smmu.c
> @@ -278,6 +278,9 @@ enum arm_smmu_s2cr_privcfg {
>  
>  #define FSYNR0_WNR			(1 << 4)
>  
> +#define MSI_IOVA_BASE			0x8000000
> +#define MSI_IOVA_LENGTH			0x100000
> +
>  static int force_stage;
>  module_param(force_stage, int, S_IRUGO);
>  MODULE_PARM_DESC(force_stage,
> @@ -1533,6 +1536,68 @@ static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
>  	return iommu_fwspec_add_ids(dev, &fwid, 1);
>  }
>  
> +static int add_pci_window_reserved_regions(struct iommu_domain *domain,
> +					   struct pci_dev *dev)
> +{
> +	struct pci_host_bridge *bridge = pci_find_host_bridge(dev->bus);
> +	struct iommu_reserved_region *region;
> +	struct resource_entry *window;
> +	phys_addr_t start;
> +	size_t length;
> +
> +	resource_list_for_each_entry(window, &bridge->windows) {
> +		if (resource_type(window->res) != IORESOURCE_MEM &&
> +		    resource_type(window->res) != IORESOURCE_IO)
> +			continue;

Why do you care about IO resources?

> +
> +		start = window->res->start - window->offset;
> +		length = window->res->end - window->res->start + 1;
> +
> +		iommu_reserved_region_for_each(region, domain) {
> +			if (region->start == start && region->length == length)
> +				continue;
> +		}
> +		region = kzalloc(sizeof(*region), GFP_KERNEL);
> +		if (!region)
> +			return -ENOMEM;
> +
> +		region->start = start;
> +		region->length = length;
> +
> +		list_add_tail(&region->list, &domain->reserved_regions);
> +	}
> +	return 0;
> +}
> +
> +static int arm_smmu_add_reserved_regions(struct iommu_domain *domain,
> +					 struct device *device)
> +{
> +	struct iommu_reserved_region *region;
> +	int ret = 0;
> +
> +	/* An arbitrary 1MB region starting at 0x8000000 is reserved for MSIs */
> +	if (!domain->iova_cookie) {
> +
> +		region = kzalloc(sizeof(*region), GFP_KERNEL);
> +		if (!region)
> +			return -ENOMEM;
> +
> +		region->start = MSI_IOVA_BASE;
> +		region->length = MSI_IOVA_LENGTH;
> +		list_add_tail(&region->list, &domain->reserved_regions);
> +
> +		ret = iommu_get_dma_msi_region_cookie(domain,
> +						region->start, region->length);
> +		if (ret)
> +			return ret;

Gah, I hate this. Is there no other and simpler way to get the MSI
region than allocating an iova-domain? In that regard, I also _hate_ the
patch before introducing this weird iommu_get_dma_msi_region_cookie()
function.

Allocation an iova-domain is pretty expensive, as it also includes
per-cpu data-structures and all, so please don't do this just for the
purpose of compiling a list of reserved regions.



	Joerg

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Auger Nov. 10, 2016, 3:57 p.m. UTC | #3
Hi Joerg,

On 10/11/2016 16:46, Joerg Roedel wrote:
> On Fri, Nov 04, 2016 at 11:24:06AM +0000, Eric Auger wrote:
>> The function populates the list of reserved regions with the
>> PCI host bridge windows and the MSI IOVA range.
>>
>> At the moment an arbitray MSI IOVA window is set at 0x8000000
>> of size 1MB.
>>
>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>>
>> ---
>>
>> RFC v1 -> v2: use defines for MSI IOVA base and length
>> ---
>>  drivers/iommu/arm-smmu.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 66 insertions(+)
>>
>> diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
>> index c841eb7..c07ea41 100644
>> --- a/drivers/iommu/arm-smmu.c
>> +++ b/drivers/iommu/arm-smmu.c
>> @@ -278,6 +278,9 @@ enum arm_smmu_s2cr_privcfg {
>>  
>>  #define FSYNR0_WNR			(1 << 4)
>>  
>> +#define MSI_IOVA_BASE			0x8000000
>> +#define MSI_IOVA_LENGTH			0x100000
>> +
>>  static int force_stage;
>>  module_param(force_stage, int, S_IRUGO);
>>  MODULE_PARM_DESC(force_stage,
>> @@ -1533,6 +1536,68 @@ static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
>>  	return iommu_fwspec_add_ids(dev, &fwid, 1);
>>  }
>>  
>> +static int add_pci_window_reserved_regions(struct iommu_domain *domain,
>> +					   struct pci_dev *dev)
>> +{
>> +	struct pci_host_bridge *bridge = pci_find_host_bridge(dev->bus);
>> +	struct iommu_reserved_region *region;
>> +	struct resource_entry *window;
>> +	phys_addr_t start;
>> +	size_t length;
>> +
>> +	resource_list_for_each_entry(window, &bridge->windows) {
>> +		if (resource_type(window->res) != IORESOURCE_MEM &&
>> +		    resource_type(window->res) != IORESOURCE_IO)
>> +			continue;
> 
> Why do you care about IO resources?
Effectively that's a draft implementation inspired from "iommu/dma:
Avoid PCI host bridge windows". Also not all PCI host bridge windows
induce issues; my understanding is only those not supporting ACS are a
problem.
> 
>> +
>> +		start = window->res->start - window->offset;
>> +		length = window->res->end - window->res->start + 1;
>> +
>> +		iommu_reserved_region_for_each(region, domain) {
>> +			if (region->start == start && region->length == length)
>> +				continue;
>> +		}
>> +		region = kzalloc(sizeof(*region), GFP_KERNEL);
>> +		if (!region)
>> +			return -ENOMEM;
>> +
>> +		region->start = start;
>> +		region->length = length;
>> +
>> +		list_add_tail(&region->list, &domain->reserved_regions);
>> +	}
>> +	return 0;
>> +}
>> +
>> +static int arm_smmu_add_reserved_regions(struct iommu_domain *domain,
>> +					 struct device *device)
>> +{
>> +	struct iommu_reserved_region *region;
>> +	int ret = 0;
>> +
>> +	/* An arbitrary 1MB region starting at 0x8000000 is reserved for MSIs */
>> +	if (!domain->iova_cookie) {
>> +
>> +		region = kzalloc(sizeof(*region), GFP_KERNEL);
>> +		if (!region)
>> +			return -ENOMEM;
>> +
>> +		region->start = MSI_IOVA_BASE;
>> +		region->length = MSI_IOVA_LENGTH;
>> +		list_add_tail(&region->list, &domain->reserved_regions);
>> +
>> +		ret = iommu_get_dma_msi_region_cookie(domain,
>> +						region->start, region->length);
>> +		if (ret)
>> +			return ret;
> 
> Gah, I hate this. Is there no other and simpler way to get the MSI
> region than allocating an iova-domain? In that regard, I also _hate_ the
> patch before introducing this weird iommu_get_dma_msi_region_cookie()
> function.
> 
> Allocation an iova-domain is pretty expensive, as it also includes
> per-cpu data-structures and all, so please don't do this just for the
> purpose of compiling a list of reserved regions.
It does not only serve the purpose to register the MSI IOVA region. We
also need to allocate an iova_domain where MSI IOVAs will be allocated
upon the request of the relevant MSI controllers. Do you mean you don't
like to use the iova allocator for this purpose?

Thanks

Eric
> 
> 
> 
> 	Joerg
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Robin Murphy Nov. 10, 2016, 4:07 p.m. UTC | #4
On 10/11/16 15:46, Joerg Roedel wrote:
> On Fri, Nov 04, 2016 at 11:24:06AM +0000, Eric Auger wrote:
>> The function populates the list of reserved regions with the
>> PCI host bridge windows and the MSI IOVA range.
>>
>> At the moment an arbitray MSI IOVA window is set at 0x8000000
>> of size 1MB.
>>
>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>>
>> ---
>>
>> RFC v1 -> v2: use defines for MSI IOVA base and length
>> ---
>>  drivers/iommu/arm-smmu.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 66 insertions(+)
>>
>> diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
>> index c841eb7..c07ea41 100644
>> --- a/drivers/iommu/arm-smmu.c
>> +++ b/drivers/iommu/arm-smmu.c
>> @@ -278,6 +278,9 @@ enum arm_smmu_s2cr_privcfg {
>>  
>>  #define FSYNR0_WNR			(1 << 4)
>>  
>> +#define MSI_IOVA_BASE			0x8000000
>> +#define MSI_IOVA_LENGTH			0x100000
>> +
>>  static int force_stage;
>>  module_param(force_stage, int, S_IRUGO);
>>  MODULE_PARM_DESC(force_stage,
>> @@ -1533,6 +1536,68 @@ static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
>>  	return iommu_fwspec_add_ids(dev, &fwid, 1);
>>  }
>>  
>> +static int add_pci_window_reserved_regions(struct iommu_domain *domain,
>> +					   struct pci_dev *dev)
>> +{
>> +	struct pci_host_bridge *bridge = pci_find_host_bridge(dev->bus);
>> +	struct iommu_reserved_region *region;
>> +	struct resource_entry *window;
>> +	phys_addr_t start;
>> +	size_t length;
>> +
>> +	resource_list_for_each_entry(window, &bridge->windows) {
>> +		if (resource_type(window->res) != IORESOURCE_MEM &&
>> +		    resource_type(window->res) != IORESOURCE_IO)
>> +			continue;
> 
> Why do you care about IO resources?

[since this is essentially code I wrote]

Because they occupy some area of the PCI address space, therefore I
assumed that, like memory windows, they would be treated as P2P. Is that
not the case?

>> +
>> +		start = window->res->start - window->offset;
>> +		length = window->res->end - window->res->start + 1;
>> +
>> +		iommu_reserved_region_for_each(region, domain) {
>> +			if (region->start == start && region->length == length)
>> +				continue;
>> +		}
>> +		region = kzalloc(sizeof(*region), GFP_KERNEL);
>> +		if (!region)
>> +			return -ENOMEM;
>> +
>> +		region->start = start;
>> +		region->length = length;
>> +
>> +		list_add_tail(&region->list, &domain->reserved_regions);
>> +	}
>> +	return 0;
>> +}
>> +
>> +static int arm_smmu_add_reserved_regions(struct iommu_domain *domain,
>> +					 struct device *device)
>> +{
>> +	struct iommu_reserved_region *region;
>> +	int ret = 0;
>> +
>> +	/* An arbitrary 1MB region starting at 0x8000000 is reserved for MSIs */
>> +	if (!domain->iova_cookie) {
>> +
>> +		region = kzalloc(sizeof(*region), GFP_KERNEL);
>> +		if (!region)
>> +			return -ENOMEM;
>> +
>> +		region->start = MSI_IOVA_BASE;
>> +		region->length = MSI_IOVA_LENGTH;
>> +		list_add_tail(&region->list, &domain->reserved_regions);
>> +
>> +		ret = iommu_get_dma_msi_region_cookie(domain,
>> +						region->start, region->length);
>> +		if (ret)
>> +			return ret;
> 
> Gah, I hate this. Is there no other and simpler way to get the MSI
> region than allocating an iova-domain? In that regard, I also _hate_ the
> patch before introducing this weird iommu_get_dma_msi_region_cookie()
> function.

Mea culpa ;)

> Allocation an iova-domain is pretty expensive, as it also includes
> per-cpu data-structures and all, so please don't do this just for the
> purpose of compiling a list of reserved regions.

Yeah, this was really more of a "get it working" thing - the automatic
MSI mapping stuff is already plumbed in for DMA domains, so faking up a
DMA domain is the easy way in. I'll have a look at factoring out the
relevant parts a bit so that MSI-only regions can have an alternate
cookie with a lightweight allocator.

Robin.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Joerg Roedel Nov. 10, 2016, 4:13 p.m. UTC | #5
On Thu, Nov 10, 2016 at 04:57:51PM +0100, Auger Eric wrote:
> It does not only serve the purpose to register the MSI IOVA region. We
> also need to allocate an iova_domain where MSI IOVAs will be allocated
> upon the request of the relevant MSI controllers. Do you mean you don't
> like to use the iova allocator for this purpose?

Yes, it looks like the only purpose of iommu_get_dma_msi_region_cookie()
is to get the msi-region information into into the reserved-list.

Why do you need to 'allocate' the MSI region after all? Except for
IOMMU_DOMAIN_DMA domains the iommu-core code does not care about address
allocation. This is up to the users of the domain, which includes that
the user has to take care of the MSI region.

Besides that, 'iommu_get_dma_msi_region_cookie' is a terrible function
name and does not describe at all what the function does or is supposed
to do.


	Joerg

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Joerg Roedel Nov. 10, 2016, 4:16 p.m. UTC | #6
On Thu, Nov 10, 2016 at 04:07:08PM +0000, Robin Murphy wrote:
> On 10/11/16 15:46, Joerg Roedel wrote:
> > On Fri, Nov 04, 2016 at 11:24:06AM +0000, Eric Auger wrote:
> >> +	resource_list_for_each_entry(window, &bridge->windows) {
> >> +		if (resource_type(window->res) != IORESOURCE_MEM &&
> >> +		    resource_type(window->res) != IORESOURCE_IO)
> >> +			continue;
> > 
> > Why do you care about IO resources?
> 
> [since this is essentially code I wrote]
> 
> Because they occupy some area of the PCI address space, therefore I
> assumed that, like memory windows, they would be treated as P2P. Is that
> not the case?

No, not at all. The IO-space is completly seperate from the MEM-space.
They are two different address-spaces, addressing different things. And
the IO-space is also not translated by any IOMMU I am aware of.


	Joerg

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Auger Nov. 10, 2016, 6 p.m. UTC | #7
Hi Joerg,

On 10/11/2016 17:13, Joerg Roedel wrote:
> On Thu, Nov 10, 2016 at 04:57:51PM +0100, Auger Eric wrote:
>> It does not only serve the purpose to register the MSI IOVA region. We
>> also need to allocate an iova_domain where MSI IOVAs will be allocated
>> upon the request of the relevant MSI controllers. Do you mean you don't
>> like to use the iova allocator for this purpose?
> 
> Yes, it looks like the only purpose of iommu_get_dma_msi_region_cookie()
> is to get the msi-region information into into the reserved-list.
> 
> Why do you need to 'allocate' the MSI region after all? Except for
> IOMMU_DOMAIN_DMA domains the iommu-core code does not care about address
> allocation. This is up to the users of the domain, which includes that
> the user has to take care of the MSI region.
The purpose is to reuse the transparent MSI IOVA allocation scheme
introduced by Robin in [PATCH v7 00/22] Generic DT bindings for PCI
IOMMUs and ARM SMMU (commit 44bb7e243bd4b4e5c79de2452cd9762582f58925).

GICv2m and GICV3 ITS use dma-mapping iommu_dma_map_msi_msg to allocate
an MSI IOVA on-demand. This relies on the existence of an allocated
iova_domain whose handle is stored in domain->iova_cookie.

the iommu_dma_init_domain could be done in VFIO instead of in the
IOMMU-code, on the basis of dm region info. But we would need to
differentiate the MSI window from P2P windows.

msi-region start/length are arbitrarily chosen and the setup of the
reserved region list does not depend on iommu_get_dma_msi_region_cookie;
but maybe I completely misunderstand your question.


> Besides that, 'iommu_get_dma_msi_region_cookie' is a terrible function
> name and does not describe at all what the function does or is supposed
> to do.
Yes this was discussed with Alex& Robin already
(https://patchwork.kernel.org/patch/9363989/). I proposed to rename into
iommu_setup_dma_msi_region but this was not validated.

Thanks

Eric
> 
> 
> 	Joerg
> 
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Joerg Roedel Nov. 11, 2016, 11:42 a.m. UTC | #8
On Thu, Nov 10, 2016 at 07:00:52PM +0100, Auger Eric wrote:
> GICv2m and GICV3 ITS use dma-mapping iommu_dma_map_msi_msg to allocate
> an MSI IOVA on-demand.

Yes, and it the right thing to do there because as a DMA-API
implementation the dma-iommu code cares about the address space
allocation.

As I understand it this is different in your case, as someone else is
defining the address space layout. So why do you need to allocate it
yourself?


	Joerg

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Robin Murphy Nov. 11, 2016, 2:34 p.m. UTC | #9
On 10/11/16 16:16, Joerg Roedel wrote:
> On Thu, Nov 10, 2016 at 04:07:08PM +0000, Robin Murphy wrote:
>> On 10/11/16 15:46, Joerg Roedel wrote:
>>> On Fri, Nov 04, 2016 at 11:24:06AM +0000, Eric Auger wrote:
>>>> +	resource_list_for_each_entry(window, &bridge->windows) {
>>>> +		if (resource_type(window->res) != IORESOURCE_MEM &&
>>>> +		    resource_type(window->res) != IORESOURCE_IO)
>>>> +			continue;
>>>
>>> Why do you care about IO resources?
>>
>> [since this is essentially code I wrote]
>>
>> Because they occupy some area of the PCI address space, therefore I
>> assumed that, like memory windows, they would be treated as P2P. Is that
>> not the case?
> 
> No, not at all. The IO-space is completly seperate from the MEM-space.
> They are two different address-spaces, addressing different things. And
> the IO-space is also not translated by any IOMMU I am aware of.

OK. On the particular root complex I have to hand, though, any DMA to
IOVAs between 0x5f800000 and 0x5fffffff sends an error back to the
endpoint, and that just so happens to be where the I/O window is placed
(both on the PCI side and the AXI (i.e. CPU MMIO) side. Whether it's
that the external MMIO view of the RC's I/O window is explicitly
duplicated in its PCI memory space as some side-effect of the PCI/AXI
bridge, or that the thing just doesn't actually respect the access type
on the PCI side I don't know, but that's how it is (and I spent this
morning recreating it to make sure I wasn't mistaken).

This thing's ECAM space is similarly visible from the PCI side and
aborts DMA the same way - I've not tried decoding the "PCI hardware
error (0x1010)" that the sky2 network driver reports, but I do note it's
a slightly different number from the one it gives when trying to access
an address matching another device's BAR (actual peer-to-peer is
explicitly unsupported). Admittedly that's not something we can detect
from the host bridge windows at all.

Anyway, you are of course right that in the normal case this should only
matter if devices were doing I/O accesses in the first place, which
makes especially no sense in the context of the DMA API, so perhaps we
could drop the unintuitive IORESOURCE_IO check from here and consider
weird PCI controllers a separate problem to solve.

Robin.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Joerg Roedel Nov. 11, 2016, 3:03 p.m. UTC | #10
On Fri, Nov 11, 2016 at 02:34:39PM +0000, Robin Murphy wrote:
> On 10/11/16 16:16, Joerg Roedel wrote:
> > On Thu, Nov 10, 2016 at 04:07:08PM +0000, Robin Murphy wrote:
> >> On 10/11/16 15:46, Joerg Roedel wrote:
> >>> On Fri, Nov 04, 2016 at 11:24:06AM +0000, Eric Auger wrote:
> >>>> +	resource_list_for_each_entry(window, &bridge->windows) {
> >>>> +		if (resource_type(window->res) != IORESOURCE_MEM &&
> >>>> +		    resource_type(window->res) != IORESOURCE_IO)
> >>>> +			continue;
> >>>
> >>> Why do you care about IO resources?
> >>
> >> [since this is essentially code I wrote]
> >>
> >> Because they occupy some area of the PCI address space, therefore I
> >> assumed that, like memory windows, they would be treated as P2P. Is that
> >> not the case?
> > 
> > No, not at all. The IO-space is completly seperate from the MEM-space.
> > They are two different address-spaces, addressing different things. And
> > the IO-space is also not translated by any IOMMU I am aware of.
> 
> OK. On the particular root complex I have to hand, though, any DMA to
> IOVAs between 0x5f800000 and 0x5fffffff sends an error back to the
> endpoint, and that just so happens to be where the I/O window is placed
> (both on the PCI side and the AXI (i.e. CPU MMIO) side. Whether it's
> that the external MMIO view of the RC's I/O window is explicitly
> duplicated in its PCI memory space as some side-effect of the PCI/AXI
> bridge, or that the thing just doesn't actually respect the access type
> on the PCI side I don't know, but that's how it is (and I spent this
> morning recreating it to make sure I wasn't mistaken).

What you see is that on your platform the io-ports are accessed by an
mmio-window. On x86 you have dedicated instructions to access io-ports.
And the io-port ranges are what is what the io-resources describe. These
resources do no tell you where the mmio-region for that devices io-ports
are.


	Joerg

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Auger Nov. 11, 2016, 3:47 p.m. UTC | #11
Hi Joerg,

On 11/11/2016 12:42, Joerg Roedel wrote:
> On Thu, Nov 10, 2016 at 07:00:52PM +0100, Auger Eric wrote:
>> GICv2m and GICV3 ITS use dma-mapping iommu_dma_map_msi_msg to allocate
>> an MSI IOVA on-demand.
> 
> Yes, and it the right thing to do there because as a DMA-API
> implementation the dma-iommu code cares about the address space
> allocation.
> 
> As I understand it this is different in your case, as someone else is
> defining the address space layout. So why do you need to allocate it
> yourself?
Effectively in passthrough use case, the userspace defines the address
space layout and maps guest RAM PA=IOVA to PAs (using
VFIO_IOMMU_MAP_DMA). But this address space does not comprise the MSI
IOVAs. Userspace does not care about MSI IOMMU mapping. So the MSI IOVA
region must be allocated by either the VFIO driver or the IOMMU driver I
think. Who else could initialize the IOVA allocator domain?

That's true that we have a mix of unmanaged addresses and "managed"
addresses which is not neat. But how to manage otherwise?

Thanks

Eric
> 
> 
> 	Joerg
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Joerg Roedel Nov. 11, 2016, 4:22 p.m. UTC | #12
Hi Eric,

On Fri, Nov 11, 2016 at 04:47:01PM +0100, Auger Eric wrote:
> Effectively in passthrough use case, the userspace defines the address
> space layout and maps guest RAM PA=IOVA to PAs (using
> VFIO_IOMMU_MAP_DMA). But this address space does not comprise the MSI
> IOVAs. Userspace does not care about MSI IOMMU mapping. So the MSI IOVA
> region must be allocated by either the VFIO driver or the IOMMU driver I
> think. Who else could initialize the IOVA allocator domain?

So I think we need a way to tell userspace about the reserved regions
(per iommu-group) so that userspace knows where it can not map anything,
and VFIO can enforce that. But the right struct here is not an
iova-allocator rb-tree, a ordered linked list should be sufficient.


	Joerg

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Auger Nov. 11, 2016, 4:45 p.m. UTC | #13
Hi Joerg,

On 11/11/2016 17:22, Joerg Roedel wrote:
> Hi Eric,
> 
> On Fri, Nov 11, 2016 at 04:47:01PM +0100, Auger Eric wrote:
>> Effectively in passthrough use case, the userspace defines the address
>> space layout and maps guest RAM PA=IOVA to PAs (using
>> VFIO_IOMMU_MAP_DMA). But this address space does not comprise the MSI
>> IOVAs. Userspace does not care about MSI IOMMU mapping. So the MSI IOVA
>> region must be allocated by either the VFIO driver or the IOMMU driver I
>> think. Who else could initialize the IOVA allocator domain?
> 
> So I think we need a way to tell userspace about the reserved regions
> (per iommu-group) so that userspace knows where it can not map anything,
Current plan is to expose that info through an iommu-group sysfs
attribute, as you and Robin advised.
> and VFIO can enforce that. But the right struct here is not an
> iova-allocator rb-tree, a ordered linked list should be sufficient.
I plan a linked list to store the reserved regions (P2P regions, MSI
region, ...). get_dma_regions is called with a list local to a function
for that. Might be needed to move that list head in the iommu_group to
avoid calling the get_dm_regions again in the attribute show function?

But to allocate the IOVAs within the MSI reserved region, I understand
you don't want us to use the iova.c allocator, is that correct? We need
an allocator though, even a very basic one based on bitmap or whatever.
There potentially have several different physical MSI frame pages to map.

Best Regards

Eric

> 
> 
> 	Joerg
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Joerg Roedel Nov. 14, 2016, 3:31 p.m. UTC | #14
Hi Eric,

On Fri, Nov 11, 2016 at 05:45:19PM +0100, Auger Eric wrote:
> On 11/11/2016 17:22, Joerg Roedel wrote:
> > So I think we need a way to tell userspace about the reserved regions
> > (per iommu-group) so that userspace knows where it can not map anything,

> Current plan is to expose that info through an iommu-group sysfs
> attribute, as you and Robin advised.

Great.

> > and VFIO can enforce that. But the right struct here is not an
> > iova-allocator rb-tree, a ordered linked list should be sufficient.
> I plan a linked list to store the reserved regions (P2P regions, MSI
> region, ...). get_dma_regions is called with a list local to a function
> for that. Might be needed to move that list head in the iommu_group to
> avoid calling the get_dm_regions again in the attribute show function?

You can re-use the get_dm_regions() call-back available in the iommu-ops
already. Just rename it and add a flag to it which tells the iommu-core
whether that region needs to be mapped or not.

> But to allocate the IOVAs within the MSI reserved region, I understand
> you don't want us to use the iova.c allocator, is that correct? We need
> an allocator though, even a very basic one based on bitmap or whatever.
> There potentially have several different physical MSI frame pages to map.

I don't get this, what do you need and address-allocator for?



	Joerg

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Auger Nov. 14, 2016, 4:08 p.m. UTC | #15
Hi Joerg,

On 14/11/2016 16:31, Joerg Roedel wrote:
> Hi Eric,
> 
> On Fri, Nov 11, 2016 at 05:45:19PM +0100, Auger Eric wrote:
>> On 11/11/2016 17:22, Joerg Roedel wrote:
>>> So I think we need a way to tell userspace about the reserved regions
>>> (per iommu-group) so that userspace knows where it can not map anything,
> 
>> Current plan is to expose that info through an iommu-group sysfs
>> attribute, as you and Robin advised.
> 
> Great.
> 
>>> and VFIO can enforce that. But the right struct here is not an
>>> iova-allocator rb-tree, a ordered linked list should be sufficient.
>> I plan a linked list to store the reserved regions (P2P regions, MSI
>> region, ...). get_dma_regions is called with a list local to a function
>> for that. Might be needed to move that list head in the iommu_group to
>> avoid calling the get_dm_regions again in the attribute show function?
> 
> You can re-use the get_dm_regions() call-back available in the iommu-ops
> already. Just rename it and add a flag to it which tells the iommu-core
> whether that region needs to be mapped or not.
> 
>> But to allocate the IOVAs within the MSI reserved region, I understand
>> you don't want us to use the iova.c allocator, is that correct? We need
>> an allocator though, even a very basic one based on bitmap or whatever.
>> There potentially have several different physical MSI frame pages to map.
> 
> I don't get this, what do you need and address-allocator for?

There are potentially several MSI doorbell physical pages in the SOC
that are accessed through the IOMMU (translated). Each of those must
have a corresponding IOVA and IOVA/PA mapping programmed in the IOMMU.
Else MSI will fault.

- step 1 was to define a usable IOVA range for MSI mapping. So now we
decided the base address and size would be hardcoded for ARM. The
get_dm_region can be used to retrieve that hardcoded region.
- Step2 is to allocate IOVAs within that range and map then for each of
those MSI doorbells. This is done in the MSI controller compose() callback.

I hope I succeeded in clarifying this time.

Robin sent today a new version of its cookie think using a dummy
allocator. I am currently integrating it.

Thanks

Eric
> 
> 
> 
> 	Joerg
> 
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Joerg Roedel Nov. 14, 2016, 4:20 p.m. UTC | #16
On Mon, Nov 14, 2016 at 05:08:16PM +0100, Auger Eric wrote:
> There are potentially several MSI doorbell physical pages in the SOC
> that are accessed through the IOMMU (translated). Each of those must
> have a corresponding IOVA and IOVA/PA mapping programmed in the IOMMU.
> Else MSI will fault.
> 
> - step 1 was to define a usable IOVA range for MSI mapping. So now we
> decided the base address and size would be hardcoded for ARM. The
> get_dm_region can be used to retrieve that hardcoded region.
> - Step2 is to allocate IOVAs within that range and map then for each of
> those MSI doorbells. This is done in the MSI controller compose() callback.
> 
> I hope I succeeded in clarifying this time.
> 
> Robin sent today a new version of its cookie think using a dummy
> allocator. I am currently integrating it.

Okay, I understand. A simple bitmap-allocator would probably be
sufficient, and doesn't have the overhead the iova allocator has. About
how many pages are we talking here?


	Joerg

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Auger Nov. 14, 2016, 4:57 p.m. UTC | #17
Hi Joerg,

On 14/11/2016 17:20, Joerg Roedel wrote:
> On Mon, Nov 14, 2016 at 05:08:16PM +0100, Auger Eric wrote:
>> There are potentially several MSI doorbell physical pages in the SOC
>> that are accessed through the IOMMU (translated). Each of those must
>> have a corresponding IOVA and IOVA/PA mapping programmed in the IOMMU.
>> Else MSI will fault.
>>
>> - step 1 was to define a usable IOVA range for MSI mapping. So now we
>> decided the base address and size would be hardcoded for ARM. The
>> get_dm_region can be used to retrieve that hardcoded region.
>> - Step2 is to allocate IOVAs within that range and map then for each of
>> those MSI doorbells. This is done in the MSI controller compose() callback.
>>
>> I hope I succeeded in clarifying this time.
>>
>> Robin sent today a new version of its cookie think using a dummy
>> allocator. I am currently integrating it.
> 
> Okay, I understand. A simple bitmap-allocator would probably be
> sufficient, and doesn't have the overhead the iova allocator has. About
> how many pages are we talking here?
Very few actually. In the systems I have access to I only have a single
page. In most advanced systems we could imagine per-cpu doorbells but
this does not exist yet as far as I know.

Thanks

Eric
> 
> 
> 	Joerg
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index c841eb7..c07ea41 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -278,6 +278,9 @@  enum arm_smmu_s2cr_privcfg {
 
 #define FSYNR0_WNR			(1 << 4)
 
+#define MSI_IOVA_BASE			0x8000000
+#define MSI_IOVA_LENGTH			0x100000
+
 static int force_stage;
 module_param(force_stage, int, S_IRUGO);
 MODULE_PARM_DESC(force_stage,
@@ -1533,6 +1536,68 @@  static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
 	return iommu_fwspec_add_ids(dev, &fwid, 1);
 }
 
+static int add_pci_window_reserved_regions(struct iommu_domain *domain,
+					   struct pci_dev *dev)
+{
+	struct pci_host_bridge *bridge = pci_find_host_bridge(dev->bus);
+	struct iommu_reserved_region *region;
+	struct resource_entry *window;
+	phys_addr_t start;
+	size_t length;
+
+	resource_list_for_each_entry(window, &bridge->windows) {
+		if (resource_type(window->res) != IORESOURCE_MEM &&
+		    resource_type(window->res) != IORESOURCE_IO)
+			continue;
+
+		start = window->res->start - window->offset;
+		length = window->res->end - window->res->start + 1;
+
+		iommu_reserved_region_for_each(region, domain) {
+			if (region->start == start && region->length == length)
+				continue;
+		}
+		region = kzalloc(sizeof(*region), GFP_KERNEL);
+		if (!region)
+			return -ENOMEM;
+
+		region->start = start;
+		region->length = length;
+
+		list_add_tail(&region->list, &domain->reserved_regions);
+	}
+	return 0;
+}
+
+static int arm_smmu_add_reserved_regions(struct iommu_domain *domain,
+					 struct device *device)
+{
+	struct iommu_reserved_region *region;
+	int ret = 0;
+
+	/* An arbitrary 1MB region starting at 0x8000000 is reserved for MSIs */
+	if (!domain->iova_cookie) {
+
+		region = kzalloc(sizeof(*region), GFP_KERNEL);
+		if (!region)
+			return -ENOMEM;
+
+		region->start = MSI_IOVA_BASE;
+		region->length = MSI_IOVA_LENGTH;
+		list_add_tail(&region->list, &domain->reserved_regions);
+
+		ret = iommu_get_dma_msi_region_cookie(domain,
+						region->start, region->length);
+		if (ret)
+			return ret;
+	}
+
+	if (dev_is_pci(device))
+		ret =  add_pci_window_reserved_regions(domain,
+						       to_pci_dev(device));
+	return ret;
+}
+
 static struct iommu_ops arm_smmu_ops = {
 	.capable		= arm_smmu_capable,
 	.domain_alloc		= arm_smmu_domain_alloc,
@@ -1548,6 +1613,7 @@  static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
 	.domain_get_attr	= arm_smmu_domain_get_attr,
 	.domain_set_attr	= arm_smmu_domain_set_attr,
 	.of_xlate		= arm_smmu_of_xlate,
+	.add_reserved_regions	= arm_smmu_add_reserved_regions,
 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
 };