diff mbox

[RFC,v3,15/15] irqchip/gicv2m/v3-its-pci-msi: IOMMU map the MSI frame when needed

Message ID 1455264797-2334-16-git-send-email-eric.auger@linaro.org (mailing list archive)
State New, archived
Headers show

Commit Message

Eric Auger Feb. 12, 2016, 8:13 a.m. UTC
In case the msi_desc references a device attached to an iommu
domain, the msi address needs to be mapped in the IOMMU. Else any
MSI write transaction will cause a fault.

gic_set_msi_addr detects that case and allocates an iova bound
to the physical address page comprising the MSI frame. This iova
then is used as the msi_msg address. Unset operation decrements the
reference on the binding.

The functions are called in the irq_write_msi_msg ops implementation.
At that time we can recognize whether the msi is setup or teared down
looking at the msi_msg content. Indeed msi_domain_deactivate zeroes all
the fields.

Signed-off-by: Eric Auger <eric.auger@linaro.org>

---

v2 -> v3:
- protect iova/addr manipulation with CONFIG_ARCH_DMA_ADDR_T_64BIT and
  CONFIG_PHYS_ADDR_T_64BIT
- only expose gic_pci_msi_domain_write_msg in case CONFIG_IOMMU_API &
  CONFIG_PCI_MSI_IRQ_DOMAIN are set.
- gic_set/unset_msi_addr duly become static
---
 drivers/irqchip/irq-gic-common.c         | 69 ++++++++++++++++++++++++++++++++
 drivers/irqchip/irq-gic-common.h         |  5 +++
 drivers/irqchip/irq-gic-v2m.c            |  7 +++-
 drivers/irqchip/irq-gic-v3-its-pci-msi.c |  5 +++
 4 files changed, 85 insertions(+), 1 deletion(-)

Comments

Marc Zyngier Feb. 18, 2016, 11:33 a.m. UTC | #1
On Fri, 12 Feb 2016 08:13:17 +0000
Eric Auger <eric.auger@linaro.org> wrote:

> In case the msi_desc references a device attached to an iommu
> domain, the msi address needs to be mapped in the IOMMU. Else any
> MSI write transaction will cause a fault.
> 
> gic_set_msi_addr detects that case and allocates an iova bound
> to the physical address page comprising the MSI frame. This iova
> then is used as the msi_msg address. Unset operation decrements the
> reference on the binding.
> 
> The functions are called in the irq_write_msi_msg ops implementation.
> At that time we can recognize whether the msi is setup or teared down
> looking at the msi_msg content. Indeed msi_domain_deactivate zeroes all
> the fields.
> 
> Signed-off-by: Eric Auger <eric.auger@linaro.org>
> 
> ---
> 
> v2 -> v3:
> - protect iova/addr manipulation with CONFIG_ARCH_DMA_ADDR_T_64BIT and
>   CONFIG_PHYS_ADDR_T_64BIT
> - only expose gic_pci_msi_domain_write_msg in case CONFIG_IOMMU_API &
>   CONFIG_PCI_MSI_IRQ_DOMAIN are set.
> - gic_set/unset_msi_addr duly become static
> ---
>  drivers/irqchip/irq-gic-common.c         | 69 ++++++++++++++++++++++++++++++++
>  drivers/irqchip/irq-gic-common.h         |  5 +++
>  drivers/irqchip/irq-gic-v2m.c            |  7 +++-
>  drivers/irqchip/irq-gic-v3-its-pci-msi.c |  5 +++
>  4 files changed, 85 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c
> index f174ce0..46cd06c 100644
> --- a/drivers/irqchip/irq-gic-common.c
> +++ b/drivers/irqchip/irq-gic-common.c
> @@ -18,6 +18,8 @@
>  #include <linux/io.h>
>  #include <linux/irq.h>
>  #include <linux/irqchip/arm-gic.h>
> +#include <linux/iommu.h>
> +#include <linux/msi.h>
>  
>  #include "irq-gic-common.h"
>  
> @@ -121,3 +123,70 @@ void gic_cpu_config(void __iomem *base, void (*sync_access)(void))
>  	if (sync_access)
>  		sync_access();
>  }
> +
> +#if defined(CONFIG_IOMMU_API) && defined(CONFIG_PCI_MSI_IRQ_DOMAIN)
> +static int gic_set_msi_addr(struct irq_data *data, struct msi_msg *msg)
> +{
> +	struct msi_desc *desc = irq_data_get_msi_desc(data);
> +	struct device *dev = msi_desc_to_dev(desc);
> +	struct iommu_domain *d;
> +	phys_addr_t addr;
> +	dma_addr_t iova;
> +	int ret;
> +
> +	d = iommu_get_domain_for_dev(dev);
> +	if (!d)
> +		return 0;
> +#ifdef CONFIG_PHYS_ADDR_T_64BIT
> +	addr = ((phys_addr_t)(msg->address_hi) << 32) | msg->address_lo;
> +#else
> +	addr = msg->address_lo;
> +#endif
> +
> +	ret = iommu_get_single_reserved(d, addr, IOMMU_WRITE, &iova);
> +
> +	if (!ret) {
> +		msg->address_lo = lower_32_bits(iova);
> +		msg->address_hi = upper_32_bits(iova);
> +	}
> +	return ret;
> +}
> +
> +
> +static void gic_unset_msi_addr(struct irq_data *data)
> +{
> +	struct msi_desc *desc = irq_data_get_msi_desc(data);
> +	struct device *dev;
> +	struct iommu_domain *d;
> +	dma_addr_t iova;
> +
> +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
> +	iova = ((dma_addr_t)(desc->msg.address_hi) << 32) |
> +		desc->msg.address_lo;
> +#else
> +	iova = desc->msg.address_lo;
> +#endif
> +
> +	dev = msi_desc_to_dev(desc);
> +	if (!dev)
> +		return;
> +
> +	d = iommu_get_domain_for_dev(dev);
> +	if (!d)
> +		return;
> +
> +	iommu_put_single_reserved(d, iova);
> +}
> +
> +void gic_pci_msi_domain_write_msg(struct irq_data *irq_data,
> +				  struct msi_msg *msg)
> +{
> +	if (!msg->address_hi && !msg->address_lo && !msg->data)
> +		gic_unset_msi_addr(irq_data); /* deactivate */
> +	else
> +		gic_set_msi_addr(irq_data, msg); /* activate, set_affinity */
> +
> +	pci_msi_domain_write_msg(irq_data, msg);
> +}

So by doing that, you are specializing this infrastructure to PCI.
If you hijacked irq_compose_msi_msg() instead, you'd have both
platform and PCI MSI for the same price.

I can see a potential problem with the teardown of an MSI (I don't
think the compose method is called on teardown), but I think this could
be easily addressed.

> +#endif
> +
> diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h
> index fff697d..98681fd 100644
> --- a/drivers/irqchip/irq-gic-common.h
> +++ b/drivers/irqchip/irq-gic-common.h
> @@ -35,4 +35,9 @@ void gic_cpu_config(void __iomem *base, void (*sync_access)(void));
>  void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks,
>  		void *data);
>  
> +#if defined(CONFIG_PCI_MSI_IRQ_DOMAIN) && defined(CONFIG_IOMMU_API)
> +void gic_pci_msi_domain_write_msg(struct irq_data *irq_data,
> +				  struct msi_msg *msg);
> +#endif
> +
>  #endif /* _IRQ_GIC_COMMON_H */
> diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c
> index c779f83..692d809 100644
> --- a/drivers/irqchip/irq-gic-v2m.c
> +++ b/drivers/irqchip/irq-gic-v2m.c
> @@ -24,6 +24,7 @@
>  #include <linux/of_pci.h>
>  #include <linux/slab.h>
>  #include <linux/spinlock.h>
> +#include "irq-gic-common.h"
>  
>  /*
>  * MSI_TYPER:
> @@ -83,7 +84,11 @@ static struct irq_chip gicv2m_msi_irq_chip = {
>  	.irq_mask		= gicv2m_mask_msi_irq,
>  	.irq_unmask		= gicv2m_unmask_msi_irq,
>  	.irq_eoi		= irq_chip_eoi_parent,
> -	.irq_write_msi_msg	= pci_msi_domain_write_msg,
> +#ifdef CONFIG_IOMMU_API
> +	.irq_write_msi_msg	= gic_pci_msi_domain_write_msg,
> +#else
> +	.irq_write_msi_msg      = pci_msi_domain_write_msg,
> +#endif

Irrespective of the way you implement the translation procedure, you
should make this unconditional, and have the #ifdefery in the code that
implements it.

>  };
>  
>  static struct msi_domain_info gicv2m_msi_domain_info = {
> diff --git a/drivers/irqchip/irq-gic-v3-its-pci-msi.c b/drivers/irqchip/irq-gic-v3-its-pci-msi.c
> index 8223765..690504e 100644
> --- a/drivers/irqchip/irq-gic-v3-its-pci-msi.c
> +++ b/drivers/irqchip/irq-gic-v3-its-pci-msi.c
> @@ -19,6 +19,7 @@
>  #include <linux/of.h>
>  #include <linux/of_irq.h>
>  #include <linux/of_pci.h>
> +#include "irq-gic-common.h"
>  
>  static void its_mask_msi_irq(struct irq_data *d)
>  {
> @@ -37,7 +38,11 @@ static struct irq_chip its_msi_irq_chip = {
>  	.irq_unmask		= its_unmask_msi_irq,
>  	.irq_mask		= its_mask_msi_irq,
>  	.irq_eoi		= irq_chip_eoi_parent,
> +#ifdef CONFIG_IOMMU_API
> +	.irq_write_msi_msg	= gic_pci_msi_domain_write_msg,
> +#else
>  	.irq_write_msi_msg	= pci_msi_domain_write_msg,
> +#endif
>  };
>  
>  struct its_pci_alias {

Thanks,

	M.
Eric Auger Feb. 18, 2016, 3:33 p.m. UTC | #2
Hi Marc,
On 02/18/2016 12:33 PM, Marc Zyngier wrote:
> On Fri, 12 Feb 2016 08:13:17 +0000
> Eric Auger <eric.auger@linaro.org> wrote:
> 
>> In case the msi_desc references a device attached to an iommu
>> domain, the msi address needs to be mapped in the IOMMU. Else any
>> MSI write transaction will cause a fault.
>>
>> gic_set_msi_addr detects that case and allocates an iova bound
>> to the physical address page comprising the MSI frame. This iova
>> then is used as the msi_msg address. Unset operation decrements the
>> reference on the binding.
>>
>> The functions are called in the irq_write_msi_msg ops implementation.
>> At that time we can recognize whether the msi is setup or teared down
>> looking at the msi_msg content. Indeed msi_domain_deactivate zeroes all
>> the fields.
>>
>> Signed-off-by: Eric Auger <eric.auger@linaro.org>
>>
>> ---
>>
>> v2 -> v3:
>> - protect iova/addr manipulation with CONFIG_ARCH_DMA_ADDR_T_64BIT and
>>   CONFIG_PHYS_ADDR_T_64BIT
>> - only expose gic_pci_msi_domain_write_msg in case CONFIG_IOMMU_API &
>>   CONFIG_PCI_MSI_IRQ_DOMAIN are set.
>> - gic_set/unset_msi_addr duly become static
>> ---
>>  drivers/irqchip/irq-gic-common.c         | 69 ++++++++++++++++++++++++++++++++
>>  drivers/irqchip/irq-gic-common.h         |  5 +++
>>  drivers/irqchip/irq-gic-v2m.c            |  7 +++-
>>  drivers/irqchip/irq-gic-v3-its-pci-msi.c |  5 +++
>>  4 files changed, 85 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c
>> index f174ce0..46cd06c 100644
>> --- a/drivers/irqchip/irq-gic-common.c
>> +++ b/drivers/irqchip/irq-gic-common.c
>> @@ -18,6 +18,8 @@
>>  #include <linux/io.h>
>>  #include <linux/irq.h>
>>  #include <linux/irqchip/arm-gic.h>
>> +#include <linux/iommu.h>
>> +#include <linux/msi.h>
>>  
>>  #include "irq-gic-common.h"
>>  
>> @@ -121,3 +123,70 @@ void gic_cpu_config(void __iomem *base, void (*sync_access)(void))
>>  	if (sync_access)
>>  		sync_access();
>>  }
>> +
>> +#if defined(CONFIG_IOMMU_API) && defined(CONFIG_PCI_MSI_IRQ_DOMAIN)
>> +static int gic_set_msi_addr(struct irq_data *data, struct msi_msg *msg)
>> +{
>> +	struct msi_desc *desc = irq_data_get_msi_desc(data);
>> +	struct device *dev = msi_desc_to_dev(desc);
>> +	struct iommu_domain *d;
>> +	phys_addr_t addr;
>> +	dma_addr_t iova;
>> +	int ret;
>> +
>> +	d = iommu_get_domain_for_dev(dev);
>> +	if (!d)
>> +		return 0;
>> +#ifdef CONFIG_PHYS_ADDR_T_64BIT
>> +	addr = ((phys_addr_t)(msg->address_hi) << 32) | msg->address_lo;
>> +#else
>> +	addr = msg->address_lo;
>> +#endif
>> +
>> +	ret = iommu_get_single_reserved(d, addr, IOMMU_WRITE, &iova);
>> +
>> +	if (!ret) {
>> +		msg->address_lo = lower_32_bits(iova);
>> +		msg->address_hi = upper_32_bits(iova);
>> +	}
>> +	return ret;
>> +}
>> +
>> +
>> +static void gic_unset_msi_addr(struct irq_data *data)
>> +{
>> +	struct msi_desc *desc = irq_data_get_msi_desc(data);
>> +	struct device *dev;
>> +	struct iommu_domain *d;
>> +	dma_addr_t iova;
>> +
>> +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
>> +	iova = ((dma_addr_t)(desc->msg.address_hi) << 32) |
>> +		desc->msg.address_lo;
>> +#else
>> +	iova = desc->msg.address_lo;
>> +#endif
>> +
>> +	dev = msi_desc_to_dev(desc);
>> +	if (!dev)
>> +		return;
>> +
>> +	d = iommu_get_domain_for_dev(dev);
>> +	if (!d)
>> +		return;
>> +
>> +	iommu_put_single_reserved(d, iova);
>> +}
>> +
>> +void gic_pci_msi_domain_write_msg(struct irq_data *irq_data,
>> +				  struct msi_msg *msg)
>> +{
>> +	if (!msg->address_hi && !msg->address_lo && !msg->data)
>> +		gic_unset_msi_addr(irq_data); /* deactivate */
>> +	else
>> +		gic_set_msi_addr(irq_data, msg); /* activate, set_affinity */
>> +
>> +	pci_msi_domain_write_msg(irq_data, msg);
>> +}
> 
> So by doing that, you are specializing this infrastructure to PCI.
> If you hijacked irq_compose_msi_msg() instead, you'd have both
> platform and PCI MSI for the same price.
> 
> I can see a potential problem with the teardown of an MSI (I don't
> think the compose method is called on teardown), but I think this could
> be easily addressed.
Yes effectively this is the reason why I moved it from
irq_compose_msi_msg (my original implementation) to irq_write_msi_msg. I
noticed I had no way to detect the teardown whereas the
msi_domain_deactivate also calls irq_write_msi_msg which is quite
practical ;-) To be honest I need to further look at the non PCI
implementation.


> 
>> +#endif
>> +
>> diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h
>> index fff697d..98681fd 100644
>> --- a/drivers/irqchip/irq-gic-common.h
>> +++ b/drivers/irqchip/irq-gic-common.h
>> @@ -35,4 +35,9 @@ void gic_cpu_config(void __iomem *base, void (*sync_access)(void));
>>  void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks,
>>  		void *data);
>>  
>> +#if defined(CONFIG_PCI_MSI_IRQ_DOMAIN) && defined(CONFIG_IOMMU_API)
>> +void gic_pci_msi_domain_write_msg(struct irq_data *irq_data,
>> +				  struct msi_msg *msg);
>> +#endif
>> +
>>  #endif /* _IRQ_GIC_COMMON_H */
>> diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c
>> index c779f83..692d809 100644
>> --- a/drivers/irqchip/irq-gic-v2m.c
>> +++ b/drivers/irqchip/irq-gic-v2m.c
>> @@ -24,6 +24,7 @@
>>  #include <linux/of_pci.h>
>>  #include <linux/slab.h>
>>  #include <linux/spinlock.h>
>> +#include "irq-gic-common.h"
>>  
>>  /*
>>  * MSI_TYPER:
>> @@ -83,7 +84,11 @@ static struct irq_chip gicv2m_msi_irq_chip = {
>>  	.irq_mask		= gicv2m_mask_msi_irq,
>>  	.irq_unmask		= gicv2m_unmask_msi_irq,
>>  	.irq_eoi		= irq_chip_eoi_parent,
>> -	.irq_write_msi_msg	= pci_msi_domain_write_msg,
>> +#ifdef CONFIG_IOMMU_API
>> +	.irq_write_msi_msg	= gic_pci_msi_domain_write_msg,
>> +#else
>> +	.irq_write_msi_msg      = pci_msi_domain_write_msg,
>> +#endif
> 
> Irrespective of the way you implement the translation procedure, you
> should make this unconditional, and have the #ifdefery in the code that
> implements it.

OK

Thanks

Eric
> 
>>  };
>>  
>>  static struct msi_domain_info gicv2m_msi_domain_info = {
>> diff --git a/drivers/irqchip/irq-gic-v3-its-pci-msi.c b/drivers/irqchip/irq-gic-v3-its-pci-msi.c
>> index 8223765..690504e 100644
>> --- a/drivers/irqchip/irq-gic-v3-its-pci-msi.c
>> +++ b/drivers/irqchip/irq-gic-v3-its-pci-msi.c
>> @@ -19,6 +19,7 @@
>>  #include <linux/of.h>
>>  #include <linux/of_irq.h>
>>  #include <linux/of_pci.h>
>> +#include "irq-gic-common.h"
>>  
>>  static void its_mask_msi_irq(struct irq_data *d)
>>  {
>> @@ -37,7 +38,11 @@ static struct irq_chip its_msi_irq_chip = {
>>  	.irq_unmask		= its_unmask_msi_irq,
>>  	.irq_mask		= its_mask_msi_irq,
>>  	.irq_eoi		= irq_chip_eoi_parent,
>> +#ifdef CONFIG_IOMMU_API
>> +	.irq_write_msi_msg	= gic_pci_msi_domain_write_msg,
>> +#else
>>  	.irq_write_msi_msg	= pci_msi_domain_write_msg,
>> +#endif
>>  };
>>  
>>  struct its_pci_alias {
> 
> Thanks,
> 
> 	M.
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marc Zyngier Feb. 18, 2016, 3:47 p.m. UTC | #3
On 18/02/16 15:33, Eric Auger wrote:
> Hi Marc,
> On 02/18/2016 12:33 PM, Marc Zyngier wrote:
>> On Fri, 12 Feb 2016 08:13:17 +0000
>> Eric Auger <eric.auger@linaro.org> wrote:
>>
>>> In case the msi_desc references a device attached to an iommu
>>> domain, the msi address needs to be mapped in the IOMMU. Else any
>>> MSI write transaction will cause a fault.
>>>
>>> gic_set_msi_addr detects that case and allocates an iova bound
>>> to the physical address page comprising the MSI frame. This iova
>>> then is used as the msi_msg address. Unset operation decrements the
>>> reference on the binding.
>>>
>>> The functions are called in the irq_write_msi_msg ops implementation.
>>> At that time we can recognize whether the msi is setup or teared down
>>> looking at the msi_msg content. Indeed msi_domain_deactivate zeroes all
>>> the fields.
>>>
>>> Signed-off-by: Eric Auger <eric.auger@linaro.org>
>>>
>>> ---
>>>
>>> v2 -> v3:
>>> - protect iova/addr manipulation with CONFIG_ARCH_DMA_ADDR_T_64BIT and
>>>   CONFIG_PHYS_ADDR_T_64BIT
>>> - only expose gic_pci_msi_domain_write_msg in case CONFIG_IOMMU_API &
>>>   CONFIG_PCI_MSI_IRQ_DOMAIN are set.
>>> - gic_set/unset_msi_addr duly become static
>>> ---
>>>  drivers/irqchip/irq-gic-common.c         | 69 ++++++++++++++++++++++++++++++++
>>>  drivers/irqchip/irq-gic-common.h         |  5 +++
>>>  drivers/irqchip/irq-gic-v2m.c            |  7 +++-
>>>  drivers/irqchip/irq-gic-v3-its-pci-msi.c |  5 +++
>>>  4 files changed, 85 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c
>>> index f174ce0..46cd06c 100644
>>> --- a/drivers/irqchip/irq-gic-common.c
>>> +++ b/drivers/irqchip/irq-gic-common.c
>>> @@ -18,6 +18,8 @@
>>>  #include <linux/io.h>
>>>  #include <linux/irq.h>
>>>  #include <linux/irqchip/arm-gic.h>
>>> +#include <linux/iommu.h>
>>> +#include <linux/msi.h>
>>>  
>>>  #include "irq-gic-common.h"
>>>  
>>> @@ -121,3 +123,70 @@ void gic_cpu_config(void __iomem *base, void (*sync_access)(void))
>>>  	if (sync_access)
>>>  		sync_access();
>>>  }
>>> +
>>> +#if defined(CONFIG_IOMMU_API) && defined(CONFIG_PCI_MSI_IRQ_DOMAIN)
>>> +static int gic_set_msi_addr(struct irq_data *data, struct msi_msg *msg)
>>> +{
>>> +	struct msi_desc *desc = irq_data_get_msi_desc(data);
>>> +	struct device *dev = msi_desc_to_dev(desc);
>>> +	struct iommu_domain *d;
>>> +	phys_addr_t addr;
>>> +	dma_addr_t iova;
>>> +	int ret;
>>> +
>>> +	d = iommu_get_domain_for_dev(dev);
>>> +	if (!d)
>>> +		return 0;
>>> +#ifdef CONFIG_PHYS_ADDR_T_64BIT
>>> +	addr = ((phys_addr_t)(msg->address_hi) << 32) | msg->address_lo;
>>> +#else
>>> +	addr = msg->address_lo;
>>> +#endif
>>> +
>>> +	ret = iommu_get_single_reserved(d, addr, IOMMU_WRITE, &iova);
>>> +
>>> +	if (!ret) {
>>> +		msg->address_lo = lower_32_bits(iova);
>>> +		msg->address_hi = upper_32_bits(iova);
>>> +	}
>>> +	return ret;
>>> +}
>>> +
>>> +
>>> +static void gic_unset_msi_addr(struct irq_data *data)
>>> +{
>>> +	struct msi_desc *desc = irq_data_get_msi_desc(data);
>>> +	struct device *dev;
>>> +	struct iommu_domain *d;
>>> +	dma_addr_t iova;
>>> +
>>> +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
>>> +	iova = ((dma_addr_t)(desc->msg.address_hi) << 32) |
>>> +		desc->msg.address_lo;
>>> +#else
>>> +	iova = desc->msg.address_lo;
>>> +#endif
>>> +
>>> +	dev = msi_desc_to_dev(desc);
>>> +	if (!dev)
>>> +		return;
>>> +
>>> +	d = iommu_get_domain_for_dev(dev);
>>> +	if (!d)
>>> +		return;
>>> +
>>> +	iommu_put_single_reserved(d, iova);
>>> +}
>>> +
>>> +void gic_pci_msi_domain_write_msg(struct irq_data *irq_data,
>>> +				  struct msi_msg *msg)
>>> +{
>>> +	if (!msg->address_hi && !msg->address_lo && !msg->data)
>>> +		gic_unset_msi_addr(irq_data); /* deactivate */
>>> +	else
>>> +		gic_set_msi_addr(irq_data, msg); /* activate, set_affinity */
>>> +
>>> +	pci_msi_domain_write_msg(irq_data, msg);
>>> +}
>>
>> So by doing that, you are specializing this infrastructure to PCI.
>> If you hijacked irq_compose_msi_msg() instead, you'd have both
>> platform and PCI MSI for the same price.
>>
>> I can see a potential problem with the teardown of an MSI (I don't
>> think the compose method is called on teardown), but I think this could
>> be easily addressed.
> Yes effectively this is the reason why I moved it from
> irq_compose_msi_msg (my original implementation) to irq_write_msi_msg. I
> noticed I had no way to detect the teardown whereas the
> msi_domain_deactivate also calls irq_write_msi_msg which is quite
> practical ;-) To be honest I need to further look at the non PCI
> implementation.

Another thing to consider is that MSI controllers may use different
doorbells for different CPU affinities. With your implementation,
repeatedly changing the affinity from one CPU to another would increase
the refcounting, and never actually lower it (you don't necessarily go
via an "unmap"). Of course, none of that applies to GICv2m/GICv3-ITS,
but that's worth considering.

So I think we may need some better tracking of the IOVA we program in
the device, and offer a generic infrastructure for this instead of
hiding it in the MSI controller drivers.

Thanks,

	M.
Eric Auger Feb. 18, 2016, 4:58 p.m. UTC | #4
Hi Marc,
On 02/18/2016 04:47 PM, Marc Zyngier wrote:
> On 18/02/16 15:33, Eric Auger wrote:
>> Hi Marc,
>> On 02/18/2016 12:33 PM, Marc Zyngier wrote:
>>> On Fri, 12 Feb 2016 08:13:17 +0000
>>> Eric Auger <eric.auger@linaro.org> wrote:
>>>
>>>> In case the msi_desc references a device attached to an iommu
>>>> domain, the msi address needs to be mapped in the IOMMU. Else any
>>>> MSI write transaction will cause a fault.
>>>>
>>>> gic_set_msi_addr detects that case and allocates an iova bound
>>>> to the physical address page comprising the MSI frame. This iova
>>>> then is used as the msi_msg address. Unset operation decrements the
>>>> reference on the binding.
>>>>
>>>> The functions are called in the irq_write_msi_msg ops implementation.
>>>> At that time we can recognize whether the msi is setup or teared down
>>>> looking at the msi_msg content. Indeed msi_domain_deactivate zeroes all
>>>> the fields.
>>>>
>>>> Signed-off-by: Eric Auger <eric.auger@linaro.org>
>>>>
>>>> ---
>>>>
>>>> v2 -> v3:
>>>> - protect iova/addr manipulation with CONFIG_ARCH_DMA_ADDR_T_64BIT and
>>>>   CONFIG_PHYS_ADDR_T_64BIT
>>>> - only expose gic_pci_msi_domain_write_msg in case CONFIG_IOMMU_API &
>>>>   CONFIG_PCI_MSI_IRQ_DOMAIN are set.
>>>> - gic_set/unset_msi_addr duly become static
>>>> ---
>>>>  drivers/irqchip/irq-gic-common.c         | 69 ++++++++++++++++++++++++++++++++
>>>>  drivers/irqchip/irq-gic-common.h         |  5 +++
>>>>  drivers/irqchip/irq-gic-v2m.c            |  7 +++-
>>>>  drivers/irqchip/irq-gic-v3-its-pci-msi.c |  5 +++
>>>>  4 files changed, 85 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c
>>>> index f174ce0..46cd06c 100644
>>>> --- a/drivers/irqchip/irq-gic-common.c
>>>> +++ b/drivers/irqchip/irq-gic-common.c
>>>> @@ -18,6 +18,8 @@
>>>>  #include <linux/io.h>
>>>>  #include <linux/irq.h>
>>>>  #include <linux/irqchip/arm-gic.h>
>>>> +#include <linux/iommu.h>
>>>> +#include <linux/msi.h>
>>>>  
>>>>  #include "irq-gic-common.h"
>>>>  
>>>> @@ -121,3 +123,70 @@ void gic_cpu_config(void __iomem *base, void (*sync_access)(void))
>>>>  	if (sync_access)
>>>>  		sync_access();
>>>>  }
>>>> +
>>>> +#if defined(CONFIG_IOMMU_API) && defined(CONFIG_PCI_MSI_IRQ_DOMAIN)
>>>> +static int gic_set_msi_addr(struct irq_data *data, struct msi_msg *msg)
>>>> +{
>>>> +	struct msi_desc *desc = irq_data_get_msi_desc(data);
>>>> +	struct device *dev = msi_desc_to_dev(desc);
>>>> +	struct iommu_domain *d;
>>>> +	phys_addr_t addr;
>>>> +	dma_addr_t iova;
>>>> +	int ret;
>>>> +
>>>> +	d = iommu_get_domain_for_dev(dev);
>>>> +	if (!d)
>>>> +		return 0;
>>>> +#ifdef CONFIG_PHYS_ADDR_T_64BIT
>>>> +	addr = ((phys_addr_t)(msg->address_hi) << 32) | msg->address_lo;
>>>> +#else
>>>> +	addr = msg->address_lo;
>>>> +#endif
>>>> +
>>>> +	ret = iommu_get_single_reserved(d, addr, IOMMU_WRITE, &iova);
>>>> +
>>>> +	if (!ret) {
>>>> +		msg->address_lo = lower_32_bits(iova);
>>>> +		msg->address_hi = upper_32_bits(iova);
>>>> +	}
>>>> +	return ret;
>>>> +}
>>>> +
>>>> +
>>>> +static void gic_unset_msi_addr(struct irq_data *data)
>>>> +{
>>>> +	struct msi_desc *desc = irq_data_get_msi_desc(data);
>>>> +	struct device *dev;
>>>> +	struct iommu_domain *d;
>>>> +	dma_addr_t iova;
>>>> +
>>>> +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
>>>> +	iova = ((dma_addr_t)(desc->msg.address_hi) << 32) |
>>>> +		desc->msg.address_lo;
>>>> +#else
>>>> +	iova = desc->msg.address_lo;
>>>> +#endif
>>>> +
>>>> +	dev = msi_desc_to_dev(desc);
>>>> +	if (!dev)
>>>> +		return;
>>>> +
>>>> +	d = iommu_get_domain_for_dev(dev);
>>>> +	if (!d)
>>>> +		return;
>>>> +
>>>> +	iommu_put_single_reserved(d, iova);
>>>> +}
>>>> +
>>>> +void gic_pci_msi_domain_write_msg(struct irq_data *irq_data,
>>>> +				  struct msi_msg *msg)
>>>> +{
>>>> +	if (!msg->address_hi && !msg->address_lo && !msg->data)
>>>> +		gic_unset_msi_addr(irq_data); /* deactivate */
>>>> +	else
>>>> +		gic_set_msi_addr(irq_data, msg); /* activate, set_affinity */
>>>> +
>>>> +	pci_msi_domain_write_msg(irq_data, msg);
>>>> +}
>>>
>>> So by doing that, you are specializing this infrastructure to PCI.
>>> If you hijacked irq_compose_msi_msg() instead, you'd have both
>>> platform and PCI MSI for the same price.
>>>
>>> I can see a potential problem with the teardown of an MSI (I don't
>>> think the compose method is called on teardown), but I think this could
>>> be easily addressed.
>> Yes effectively this is the reason why I moved it from
>> irq_compose_msi_msg (my original implementation) to irq_write_msi_msg. I
>> noticed I had no way to detect the teardown whereas the
>> msi_domain_deactivate also calls irq_write_msi_msg which is quite
>> practical ;-) To be honest I need to further look at the non PCI
>> implementation.
> 
> Another thing to consider is that MSI controllers may use different
> doorbells for different CPU affinities.

OK thanks for pointing this out.

This is also a good confirmation that a single IOVA address is not
always sufficient (at some point we wondered if we could directly use
the MSI controller guest PA instead of having the user-space providing
an IOVA pool)

 With your implementation,
> repeatedly changing the affinity from one CPU to another would increase
> the refcounting, and never actually lower it (you don't necessarily go
> via an "unmap").

 Of course, none of that applies to GICv2m/GICv3-ITS,
> but that's worth considering.
> 
> So I think we may need some better tracking of the IOVA we program in
> the device, and offer a generic infrastructure for this instead of
> hiding it in the MSI controller drivers.
OK I will study that.

Thanks for your time!

Eric
> 
> Thanks,
> 
> 	M.
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c
index f174ce0..46cd06c 100644
--- a/drivers/irqchip/irq-gic-common.c
+++ b/drivers/irqchip/irq-gic-common.c
@@ -18,6 +18,8 @@ 
 #include <linux/io.h>
 #include <linux/irq.h>
 #include <linux/irqchip/arm-gic.h>
+#include <linux/iommu.h>
+#include <linux/msi.h>
 
 #include "irq-gic-common.h"
 
@@ -121,3 +123,70 @@  void gic_cpu_config(void __iomem *base, void (*sync_access)(void))
 	if (sync_access)
 		sync_access();
 }
+
+#if defined(CONFIG_IOMMU_API) && defined(CONFIG_PCI_MSI_IRQ_DOMAIN)
+static int gic_set_msi_addr(struct irq_data *data, struct msi_msg *msg)
+{
+	struct msi_desc *desc = irq_data_get_msi_desc(data);
+	struct device *dev = msi_desc_to_dev(desc);
+	struct iommu_domain *d;
+	phys_addr_t addr;
+	dma_addr_t iova;
+	int ret;
+
+	d = iommu_get_domain_for_dev(dev);
+	if (!d)
+		return 0;
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+	addr = ((phys_addr_t)(msg->address_hi) << 32) | msg->address_lo;
+#else
+	addr = msg->address_lo;
+#endif
+
+	ret = iommu_get_single_reserved(d, addr, IOMMU_WRITE, &iova);
+
+	if (!ret) {
+		msg->address_lo = lower_32_bits(iova);
+		msg->address_hi = upper_32_bits(iova);
+	}
+	return ret;
+}
+
+
+static void gic_unset_msi_addr(struct irq_data *data)
+{
+	struct msi_desc *desc = irq_data_get_msi_desc(data);
+	struct device *dev;
+	struct iommu_domain *d;
+	dma_addr_t iova;
+
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+	iova = ((dma_addr_t)(desc->msg.address_hi) << 32) |
+		desc->msg.address_lo;
+#else
+	iova = desc->msg.address_lo;
+#endif
+
+	dev = msi_desc_to_dev(desc);
+	if (!dev)
+		return;
+
+	d = iommu_get_domain_for_dev(dev);
+	if (!d)
+		return;
+
+	iommu_put_single_reserved(d, iova);
+}
+
+void gic_pci_msi_domain_write_msg(struct irq_data *irq_data,
+				  struct msi_msg *msg)
+{
+	if (!msg->address_hi && !msg->address_lo && !msg->data)
+		gic_unset_msi_addr(irq_data); /* deactivate */
+	else
+		gic_set_msi_addr(irq_data, msg); /* activate, set_affinity */
+
+	pci_msi_domain_write_msg(irq_data, msg);
+}
+#endif
+
diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h
index fff697d..98681fd 100644
--- a/drivers/irqchip/irq-gic-common.h
+++ b/drivers/irqchip/irq-gic-common.h
@@ -35,4 +35,9 @@  void gic_cpu_config(void __iomem *base, void (*sync_access)(void));
 void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks,
 		void *data);
 
+#if defined(CONFIG_PCI_MSI_IRQ_DOMAIN) && defined(CONFIG_IOMMU_API)
+void gic_pci_msi_domain_write_msg(struct irq_data *irq_data,
+				  struct msi_msg *msg);
+#endif
+
 #endif /* _IRQ_GIC_COMMON_H */
diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c
index c779f83..692d809 100644
--- a/drivers/irqchip/irq-gic-v2m.c
+++ b/drivers/irqchip/irq-gic-v2m.c
@@ -24,6 +24,7 @@ 
 #include <linux/of_pci.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
+#include "irq-gic-common.h"
 
 /*
 * MSI_TYPER:
@@ -83,7 +84,11 @@  static struct irq_chip gicv2m_msi_irq_chip = {
 	.irq_mask		= gicv2m_mask_msi_irq,
 	.irq_unmask		= gicv2m_unmask_msi_irq,
 	.irq_eoi		= irq_chip_eoi_parent,
-	.irq_write_msi_msg	= pci_msi_domain_write_msg,
+#ifdef CONFIG_IOMMU_API
+	.irq_write_msi_msg	= gic_pci_msi_domain_write_msg,
+#else
+	.irq_write_msi_msg      = pci_msi_domain_write_msg,
+#endif
 };
 
 static struct msi_domain_info gicv2m_msi_domain_info = {
diff --git a/drivers/irqchip/irq-gic-v3-its-pci-msi.c b/drivers/irqchip/irq-gic-v3-its-pci-msi.c
index 8223765..690504e 100644
--- a/drivers/irqchip/irq-gic-v3-its-pci-msi.c
+++ b/drivers/irqchip/irq-gic-v3-its-pci-msi.c
@@ -19,6 +19,7 @@ 
 #include <linux/of.h>
 #include <linux/of_irq.h>
 #include <linux/of_pci.h>
+#include "irq-gic-common.h"
 
 static void its_mask_msi_irq(struct irq_data *d)
 {
@@ -37,7 +38,11 @@  static struct irq_chip its_msi_irq_chip = {
 	.irq_unmask		= its_unmask_msi_irq,
 	.irq_mask		= its_mask_msi_irq,
 	.irq_eoi		= irq_chip_eoi_parent,
+#ifdef CONFIG_IOMMU_API
+	.irq_write_msi_msg	= gic_pci_msi_domain_write_msg,
+#else
 	.irq_write_msi_msg	= pci_msi_domain_write_msg,
+#endif
 };
 
 struct its_pci_alias {