diff mbox

[v3,1/4] PCI: X-Gene: Add the APM X-Gene v1 PCIe MSI/MSIX termination driver

Message ID 7923ef6ffd60a7c3de3c8c04fe746bbd3c15b5ac.1428564115.git.dhdang@apm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Duc Dang April 9, 2015, 5:05 p.m. UTC
X-Gene v1 SoC supports total 2688 MSI/MSIX vectors coalesced into
16 HW IRQ lines.

Signed-off-by: Duc Dang <dhdang@apm.com>
Signed-off-by: Tanmay Inamdar <tinamdar@apm.com>
---
 drivers/pci/host/Kconfig         |   6 +
 drivers/pci/host/Makefile        |   1 +
 drivers/pci/host/pci-xgene-msi.c | 407 +++++++++++++++++++++++++++++++++++++++
 drivers/pci/host/pci-xgene.c     |  21 ++
 4 files changed, 435 insertions(+)
 create mode 100644 drivers/pci/host/pci-xgene-msi.c

Comments

Bjorn Helgaas April 9, 2015, 8:11 p.m. UTC | #1
On Thu, Apr 09, 2015 at 10:05:03AM -0700, Duc Dang wrote:
> X-Gene v1 SoC supports total 2688 MSI/MSIX vectors coalesced into
> 16 HW IRQ lines.
> 
> Signed-off-by: Duc Dang <dhdang@apm.com>
> Signed-off-by: Tanmay Inamdar <tinamdar@apm.com>
> ...

> --- /dev/null
> +++ b/drivers/pci/host/pci-xgene-msi.c
> @@ -0,0 +1,407 @@
> ...

> +static void xgene_irq_domain_free(struct irq_domain *domain,
> +				  unsigned int virq, unsigned int nr_irqs)
> +{
> +	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
> +	struct xgene_msi *msi = irq_data_get_irq_chip_data(d);
> +
> +	mutex_lock(&msi->bitmap_lock);
> +
> +	if (!test_bit(d->hwirq, msi->bitmap))
> +		pr_err("trying to free unused MSI#%lu\n", d->hwirq);

I'll let Marc comment on the substance of this patch, but I'd sure like to
see more information in this and the other pr_errs below.  The text "failed
to create parent MSI domain" in the dmesg log is not enough.  Ideally it
would identify the relevant host bridge.

> +	else
> +		clear_bit(d->hwirq, msi->bitmap);
> +
> +	mutex_unlock(&msi->bitmap_lock);
> +
> +	irq_domain_free_irqs_parent(domain, virq, nr_irqs);
> +}
> +
> +static const struct irq_domain_ops msi_domain_ops = {
> +	.alloc  = xgene_irq_domain_alloc,
> +	.free   = xgene_irq_domain_free,
> +};
> +
> +static int xgene_allocate_domains(struct xgene_msi *msi)
> +{
> +	msi->domain = irq_domain_add_linear(NULL, msi->settings->nr_msi_vec,
> +					    &msi_domain_ops, msi);
> +
> +	if (!msi->domain) {
> +		pr_err("failed to create parent MSI domain\n");
> +		return -ENOMEM;
> +	}
> +
> +	msi->mchip.of_node = msi->node;
> +	msi->mchip.domain = pci_msi_create_irq_domain(msi->mchip.of_node,
> +						      &xgene_msi_domain_info,
> +						      msi->domain);
> +
> +	if (!msi->mchip.domain) {
> +		pr_err("failed to create MSI domain\n");
> +		irq_domain_remove(msi->domain);
> +		return -ENOMEM;
> +	}
> +
> +	return 0;
> +}
> +
> +static void xgene_free_domains(struct xgene_msi *msi)
> +{
> +	if (msi->mchip.domain)
> +		irq_domain_remove(msi->mchip.domain);
> +	if (msi->domain)
> +		irq_domain_remove(msi->domain);
> +}
> +
> +static int xgene_msi_init_allocator(struct xgene_msi *xgene_msi)
> +{
> +	u32 msi_irq_count = xgene_msi->settings->nr_msi_vec;
> +	u32 hw_irq_count = xgene_msi->settings->nr_hw_irqs;
> +	int size = BITS_TO_LONGS(msi_irq_count) * sizeof(long);
> +
> +	xgene_msi->bitmap = kzalloc(size, GFP_KERNEL);
> +	if (!xgene_msi->bitmap)
> +		return -ENOMEM;
> +
> +	mutex_init(&xgene_msi->bitmap_lock);
> +
> +	xgene_msi->msi_virqs = kcalloc(hw_irq_count, sizeof(int), GFP_KERNEL);
> +	if (!xgene_msi->msi_virqs)
> +		return -ENOMEM;
> +
> +	return 0;
> +}
> +
> +static irqreturn_t xgene_msi_isr(int irq, void *data)
> +{
> +	struct xgene_msi *xgene_msi = (struct xgene_msi *) data;
> +	unsigned int virq;
> +	int msir_index, msir_reg, msir_val, hw_irq;
> +	u32 intr_index, grp_select, msi_grp, processed = 0;
> +	u32 nr_hw_irqs, irqs_per_index, index_per_group;
> +
> +	msi_grp = irq - xgene_msi->msi_virqs[0];
> +	if (msi_grp >= xgene_msi->settings->nr_hw_irqs) {
> +		pr_err("invalid msi received\n");
> +		return IRQ_NONE;
> +	}

Bjorn
Duc Dang April 9, 2015, 9:52 p.m. UTC | #2
On Thu, Apr 9, 2015 at 1:11 PM, Bjorn Helgaas <bhelgaas@google.com> wrote:
> On Thu, Apr 09, 2015 at 10:05:03AM -0700, Duc Dang wrote:
>> X-Gene v1 SoC supports total 2688 MSI/MSIX vectors coalesced into
>> 16 HW IRQ lines.
>>
>> Signed-off-by: Duc Dang <dhdang@apm.com>
>> Signed-off-by: Tanmay Inamdar <tinamdar@apm.com>
>> ...
>
>> --- /dev/null
>> +++ b/drivers/pci/host/pci-xgene-msi.c
>> @@ -0,0 +1,407 @@
>> ...
>
>> +static void xgene_irq_domain_free(struct irq_domain *domain,
>> +                               unsigned int virq, unsigned int nr_irqs)
>> +{
>> +     struct irq_data *d = irq_domain_get_irq_data(domain, virq);
>> +     struct xgene_msi *msi = irq_data_get_irq_chip_data(d);
>> +
>> +     mutex_lock(&msi->bitmap_lock);
>> +
>> +     if (!test_bit(d->hwirq, msi->bitmap))
>> +             pr_err("trying to free unused MSI#%lu\n", d->hwirq);
>
> I'll let Marc comment on the substance of this patch, but I'd sure like to
> see more information in this and the other pr_errs below.  The text "failed
> to create parent MSI domain" in the dmesg log is not enough.  Ideally it
> would identify the relevant host bridge.
>
Hi Bjorn,

X-Gene has single MSI block that serves 5 PCIe ports. so in this
implementation, 5 X-Gene PCIe host bridges share the same MSI domain.

I can add X-Gene 1 MSI driver name and device tree node name into
these messages, but the host bridge information is not available.

As I understand, in case of error, the upper layer MSI core and driver
code will check and report the error with additional device
information (bus number, function number, etc)?

Regards,
Duc Dang.

>> +     else
>> +             clear_bit(d->hwirq, msi->bitmap);
>> +
>> +     mutex_unlock(&msi->bitmap_lock);
>> +
>> +     irq_domain_free_irqs_parent(domain, virq, nr_irqs);
>> +}
>> +
>> +static const struct irq_domain_ops msi_domain_ops = {
>> +     .alloc  = xgene_irq_domain_alloc,
>> +     .free   = xgene_irq_domain_free,
>> +};
>> +
>> +static int xgene_allocate_domains(struct xgene_msi *msi)
>> +{
>> +     msi->domain = irq_domain_add_linear(NULL, msi->settings->nr_msi_vec,
>> +                                         &msi_domain_ops, msi);
>> +
>> +     if (!msi->domain) {
>> +             pr_err("failed to create parent MSI domain\n");
>> +             return -ENOMEM;
>> +     }
>> +
>> +     msi->mchip.of_node = msi->node;
>> +     msi->mchip.domain = pci_msi_create_irq_domain(msi->mchip.of_node,
>> +                                                   &xgene_msi_domain_info,
>> +                                                   msi->domain);
>> +
>> +     if (!msi->mchip.domain) {
>> +             pr_err("failed to create MSI domain\n");
>> +             irq_domain_remove(msi->domain);
>> +             return -ENOMEM;
>> +     }
>> +
>> +     return 0;
>> +}
>> +
>> +static void xgene_free_domains(struct xgene_msi *msi)
>> +{
>> +     if (msi->mchip.domain)
>> +             irq_domain_remove(msi->mchip.domain);
>> +     if (msi->domain)
>> +             irq_domain_remove(msi->domain);
>> +}
>> +
>> +static int xgene_msi_init_allocator(struct xgene_msi *xgene_msi)
>> +{
>> +     u32 msi_irq_count = xgene_msi->settings->nr_msi_vec;
>> +     u32 hw_irq_count = xgene_msi->settings->nr_hw_irqs;
>> +     int size = BITS_TO_LONGS(msi_irq_count) * sizeof(long);
>> +
>> +     xgene_msi->bitmap = kzalloc(size, GFP_KERNEL);
>> +     if (!xgene_msi->bitmap)
>> +             return -ENOMEM;
>> +
>> +     mutex_init(&xgene_msi->bitmap_lock);
>> +
>> +     xgene_msi->msi_virqs = kcalloc(hw_irq_count, sizeof(int), GFP_KERNEL);
>> +     if (!xgene_msi->msi_virqs)
>> +             return -ENOMEM;
>> +
>> +     return 0;
>> +}
>> +
>> +static irqreturn_t xgene_msi_isr(int irq, void *data)
>> +{
>> +     struct xgene_msi *xgene_msi = (struct xgene_msi *) data;
>> +     unsigned int virq;
>> +     int msir_index, msir_reg, msir_val, hw_irq;
>> +     u32 intr_index, grp_select, msi_grp, processed = 0;
>> +     u32 nr_hw_irqs, irqs_per_index, index_per_group;
>> +
>> +     msi_grp = irq - xgene_msi->msi_virqs[0];
>> +     if (msi_grp >= xgene_msi->settings->nr_hw_irqs) {
>> +             pr_err("invalid msi received\n");
>> +             return IRQ_NONE;
>> +     }
>
> Bjorn
Bjorn Helgaas April 9, 2015, 10:39 p.m. UTC | #3
On Thu, Apr 9, 2015 at 4:52 PM, Duc Dang <dhdang@apm.com> wrote:
> On Thu, Apr 9, 2015 at 1:11 PM, Bjorn Helgaas <bhelgaas@google.com> wrote:
>> On Thu, Apr 09, 2015 at 10:05:03AM -0700, Duc Dang wrote:
>>> X-Gene v1 SoC supports total 2688 MSI/MSIX vectors coalesced into
>>> 16 HW IRQ lines.
>>>
>>> Signed-off-by: Duc Dang <dhdang@apm.com>
>>> Signed-off-by: Tanmay Inamdar <tinamdar@apm.com>
>>> ...
>>
>>> --- /dev/null
>>> +++ b/drivers/pci/host/pci-xgene-msi.c
>>> @@ -0,0 +1,407 @@
>>> ...
>>
>>> +static void xgene_irq_domain_free(struct irq_domain *domain,
>>> +                               unsigned int virq, unsigned int nr_irqs)
>>> +{
>>> +     struct irq_data *d = irq_domain_get_irq_data(domain, virq);
>>> +     struct xgene_msi *msi = irq_data_get_irq_chip_data(d);
>>> +
>>> +     mutex_lock(&msi->bitmap_lock);
>>> +
>>> +     if (!test_bit(d->hwirq, msi->bitmap))
>>> +             pr_err("trying to free unused MSI#%lu\n", d->hwirq);
>>
>> I'll let Marc comment on the substance of this patch, but I'd sure like to
>> see more information in this and the other pr_errs below.  The text "failed
>> to create parent MSI domain" in the dmesg log is not enough.  Ideally it
>> would identify the relevant host bridge.
>>
> Hi Bjorn,
>
> X-Gene has single MSI block that serves 5 PCIe ports. so in this
> implementation, 5 X-Gene PCIe host bridges share the same MSI domain.
>
> I can add X-Gene 1 MSI driver name and device tree node name into
> these messages, but the host bridge information is not available.
>
> As I understand, in case of error, the upper layer MSI core and driver
> code will check and report the error with additional device
> information (bus number, function number, etc)?

If you think the upper layers already report the error, maybe you
could just drop the output from the X-Gene code.

I just think it's pointless to print messages that are constant
strings with no hook for the user to associate them with a device or
event.

>>> +     else
>>> +             clear_bit(d->hwirq, msi->bitmap);
>>> +
>>> +     mutex_unlock(&msi->bitmap_lock);
>>> +
>>> +     irq_domain_free_irqs_parent(domain, virq, nr_irqs);
>>> +}
>>> +
>>> +static const struct irq_domain_ops msi_domain_ops = {
>>> +     .alloc  = xgene_irq_domain_alloc,
>>> +     .free   = xgene_irq_domain_free,
>>> +};
>>> +
>>> +static int xgene_allocate_domains(struct xgene_msi *msi)
>>> +{
>>> +     msi->domain = irq_domain_add_linear(NULL, msi->settings->nr_msi_vec,
>>> +                                         &msi_domain_ops, msi);
>>> +
>>> +     if (!msi->domain) {
>>> +             pr_err("failed to create parent MSI domain\n");
>>> +             return -ENOMEM;
>>> +     }
>>> +
>>> +     msi->mchip.of_node = msi->node;
>>> +     msi->mchip.domain = pci_msi_create_irq_domain(msi->mchip.of_node,
>>> +                                                   &xgene_msi_domain_info,
>>> +                                                   msi->domain);
>>> +
>>> +     if (!msi->mchip.domain) {
>>> +             pr_err("failed to create MSI domain\n");
>>> +             irq_domain_remove(msi->domain);
>>> +             return -ENOMEM;
>>> +     }
>>> +
>>> +     return 0;
>>> +}
>>> +
>>> +static void xgene_free_domains(struct xgene_msi *msi)
>>> +{
>>> +     if (msi->mchip.domain)
>>> +             irq_domain_remove(msi->mchip.domain);
>>> +     if (msi->domain)
>>> +             irq_domain_remove(msi->domain);
>>> +}
>>> +
>>> +static int xgene_msi_init_allocator(struct xgene_msi *xgene_msi)
>>> +{
>>> +     u32 msi_irq_count = xgene_msi->settings->nr_msi_vec;
>>> +     u32 hw_irq_count = xgene_msi->settings->nr_hw_irqs;
>>> +     int size = BITS_TO_LONGS(msi_irq_count) * sizeof(long);
>>> +
>>> +     xgene_msi->bitmap = kzalloc(size, GFP_KERNEL);
>>> +     if (!xgene_msi->bitmap)
>>> +             return -ENOMEM;
>>> +
>>> +     mutex_init(&xgene_msi->bitmap_lock);
>>> +
>>> +     xgene_msi->msi_virqs = kcalloc(hw_irq_count, sizeof(int), GFP_KERNEL);
>>> +     if (!xgene_msi->msi_virqs)
>>> +             return -ENOMEM;
>>> +
>>> +     return 0;
>>> +}
>>> +
>>> +static irqreturn_t xgene_msi_isr(int irq, void *data)
>>> +{
>>> +     struct xgene_msi *xgene_msi = (struct xgene_msi *) data;
>>> +     unsigned int virq;
>>> +     int msir_index, msir_reg, msir_val, hw_irq;
>>> +     u32 intr_index, grp_select, msi_grp, processed = 0;
>>> +     u32 nr_hw_irqs, irqs_per_index, index_per_group;
>>> +
>>> +     msi_grp = irq - xgene_msi->msi_virqs[0];
>>> +     if (msi_grp >= xgene_msi->settings->nr_hw_irqs) {
>>> +             pr_err("invalid msi received\n");
>>> +             return IRQ_NONE;
>>> +     }
>>
>> Bjorn
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
Duc Dang April 9, 2015, 11:26 p.m. UTC | #4
On Thu, Apr 9, 2015 at 3:39 PM, Bjorn Helgaas <bhelgaas@google.com> wrote:
> On Thu, Apr 9, 2015 at 4:52 PM, Duc Dang <dhdang@apm.com> wrote:
>> On Thu, Apr 9, 2015 at 1:11 PM, Bjorn Helgaas <bhelgaas@google.com> wrote:
>>> On Thu, Apr 09, 2015 at 10:05:03AM -0700, Duc Dang wrote:
>>>> X-Gene v1 SoC supports total 2688 MSI/MSIX vectors coalesced into
>>>> 16 HW IRQ lines.
>>>>
>>>> Signed-off-by: Duc Dang <dhdang@apm.com>
>>>> Signed-off-by: Tanmay Inamdar <tinamdar@apm.com>
>>>> ...
>>>
>>>> --- /dev/null
>>>> +++ b/drivers/pci/host/pci-xgene-msi.c
>>>> @@ -0,0 +1,407 @@
>>>> ...
>>>
>>>> +static void xgene_irq_domain_free(struct irq_domain *domain,
>>>> +                               unsigned int virq, unsigned int nr_irqs)
>>>> +{
>>>> +     struct irq_data *d = irq_domain_get_irq_data(domain, virq);
>>>> +     struct xgene_msi *msi = irq_data_get_irq_chip_data(d);
>>>> +
>>>> +     mutex_lock(&msi->bitmap_lock);
>>>> +
>>>> +     if (!test_bit(d->hwirq, msi->bitmap))
>>>> +             pr_err("trying to free unused MSI#%lu\n", d->hwirq);
>>>
>>> I'll let Marc comment on the substance of this patch, but I'd sure like to
>>> see more information in this and the other pr_errs below.  The text "failed
>>> to create parent MSI domain" in the dmesg log is not enough.  Ideally it
>>> would identify the relevant host bridge.
>>>
>> Hi Bjorn,
>>
>> X-Gene has single MSI block that serves 5 PCIe ports. so in this
>> implementation, 5 X-Gene PCIe host bridges share the same MSI domain.
>>
>> I can add X-Gene 1 MSI driver name and device tree node name into
>> these messages, but the host bridge information is not available.
>>
>> As I understand, in case of error, the upper layer MSI core and driver
>> code will check and report the error with additional device
>> information (bus number, function number, etc)?
>
> If you think the upper layers already report the error, maybe you
> could just drop the output from the X-Gene code.
>
> I just think it's pointless to print messages that are constant
> strings with no hook for the user to associate them with a device or
> event.

Hi Bjorn,

Yes, I will do so.

Regards,
Duc Dang.
>
>>>> +     else
>>>> +             clear_bit(d->hwirq, msi->bitmap);
>>>> +
>>>> +     mutex_unlock(&msi->bitmap_lock);
>>>> +
>>>> +     irq_domain_free_irqs_parent(domain, virq, nr_irqs);
>>>> +}
>>>> +
>>>> +static const struct irq_domain_ops msi_domain_ops = {
>>>> +     .alloc  = xgene_irq_domain_alloc,
>>>> +     .free   = xgene_irq_domain_free,
>>>> +};
>>>> +
>>>> +static int xgene_allocate_domains(struct xgene_msi *msi)
>>>> +{
>>>> +     msi->domain = irq_domain_add_linear(NULL, msi->settings->nr_msi_vec,
>>>> +                                         &msi_domain_ops, msi);
>>>> +
>>>> +     if (!msi->domain) {
>>>> +             pr_err("failed to create parent MSI domain\n");
>>>> +             return -ENOMEM;
>>>> +     }
>>>> +
>>>> +     msi->mchip.of_node = msi->node;
>>>> +     msi->mchip.domain = pci_msi_create_irq_domain(msi->mchip.of_node,
>>>> +                                                   &xgene_msi_domain_info,
>>>> +                                                   msi->domain);
>>>> +
>>>> +     if (!msi->mchip.domain) {
>>>> +             pr_err("failed to create MSI domain\n");
>>>> +             irq_domain_remove(msi->domain);
>>>> +             return -ENOMEM;
>>>> +     }
>>>> +
>>>> +     return 0;
>>>> +}
>>>> +
>>>> +static void xgene_free_domains(struct xgene_msi *msi)
>>>> +{
>>>> +     if (msi->mchip.domain)
>>>> +             irq_domain_remove(msi->mchip.domain);
>>>> +     if (msi->domain)
>>>> +             irq_domain_remove(msi->domain);
>>>> +}
>>>> +
>>>> +static int xgene_msi_init_allocator(struct xgene_msi *xgene_msi)
>>>> +{
>>>> +     u32 msi_irq_count = xgene_msi->settings->nr_msi_vec;
>>>> +     u32 hw_irq_count = xgene_msi->settings->nr_hw_irqs;
>>>> +     int size = BITS_TO_LONGS(msi_irq_count) * sizeof(long);
>>>> +
>>>> +     xgene_msi->bitmap = kzalloc(size, GFP_KERNEL);
>>>> +     if (!xgene_msi->bitmap)
>>>> +             return -ENOMEM;
>>>> +
>>>> +     mutex_init(&xgene_msi->bitmap_lock);
>>>> +
>>>> +     xgene_msi->msi_virqs = kcalloc(hw_irq_count, sizeof(int), GFP_KERNEL);
>>>> +     if (!xgene_msi->msi_virqs)
>>>> +             return -ENOMEM;
>>>> +
>>>> +     return 0;
>>>> +}
>>>> +
>>>> +static irqreturn_t xgene_msi_isr(int irq, void *data)
>>>> +{
>>>> +     struct xgene_msi *xgene_msi = (struct xgene_msi *) data;
>>>> +     unsigned int virq;
>>>> +     int msir_index, msir_reg, msir_val, hw_irq;
>>>> +     u32 intr_index, grp_select, msi_grp, processed = 0;
>>>> +     u32 nr_hw_irqs, irqs_per_index, index_per_group;
>>>> +
>>>> +     msi_grp = irq - xgene_msi->msi_virqs[0];
>>>> +     if (msi_grp >= xgene_msi->settings->nr_hw_irqs) {
>>>> +             pr_err("invalid msi received\n");
>>>> +             return IRQ_NONE;
>>>> +     }
>>>
>>> Bjorn
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> Please read the FAQ at  http://www.tux.org/lkml/
Marc Zyngier April 10, 2015, 5:20 p.m. UTC | #5
On 09/04/15 18:05, Duc Dang wrote:
> X-Gene v1 SoC supports total 2688 MSI/MSIX vectors coalesced into
> 16 HW IRQ lines.
> 
> Signed-off-by: Duc Dang <dhdang@apm.com>
> Signed-off-by: Tanmay Inamdar <tinamdar@apm.com>
> ---
>  drivers/pci/host/Kconfig         |   6 +
>  drivers/pci/host/Makefile        |   1 +
>  drivers/pci/host/pci-xgene-msi.c | 407 +++++++++++++++++++++++++++++++++++++++
>  drivers/pci/host/pci-xgene.c     |  21 ++
>  4 files changed, 435 insertions(+)
>  create mode 100644 drivers/pci/host/pci-xgene-msi.c
> 
> diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig
> index 7b892a9..c9b61fa 100644
> --- a/drivers/pci/host/Kconfig
> +++ b/drivers/pci/host/Kconfig
> @@ -89,11 +89,17 @@ config PCI_XGENE
>         depends on ARCH_XGENE
>         depends on OF
>         select PCIEPORTBUS
> +       select PCI_MSI_IRQ_DOMAIN if PCI_MSI
> +       select PCI_XGENE_MSI if PCI_MSI
>         help
>           Say Y here if you want internal PCI support on APM X-Gene SoC.
>           There are 5 internal PCIe ports available. Each port is GEN3 capable
>           and have varied lanes from x1 to x8.
> 
> +config PCI_XGENE_MSI
> +       bool "X-Gene v1 PCIe MSI feature"
> +       depends on PCI_XGENE && PCI_MSI
> +
>  config PCI_LAYERSCAPE
>         bool "Freescale Layerscape PCIe controller"
>         depends on OF && ARM
> diff --git a/drivers/pci/host/Makefile b/drivers/pci/host/Makefile
> index e61d91c..f39bde3 100644
> --- a/drivers/pci/host/Makefile
> +++ b/drivers/pci/host/Makefile
> @@ -11,5 +11,6 @@ obj-$(CONFIG_PCIE_SPEAR13XX) += pcie-spear13xx.o
>  obj-$(CONFIG_PCI_KEYSTONE) += pci-keystone-dw.o pci-keystone.o
>  obj-$(CONFIG_PCIE_XILINX) += pcie-xilinx.o
>  obj-$(CONFIG_PCI_XGENE) += pci-xgene.o
> +obj-$(CONFIG_PCI_XGENE_MSI) += pci-xgene-msi.o
>  obj-$(CONFIG_PCI_LAYERSCAPE) += pci-layerscape.o
>  obj-$(CONFIG_PCI_VERSATILE) += pci-versatile.o
> diff --git a/drivers/pci/host/pci-xgene-msi.c b/drivers/pci/host/pci-xgene-msi.c
> new file mode 100644
> index 0000000..4f0ff42
> --- /dev/null
> +++ b/drivers/pci/host/pci-xgene-msi.c
> @@ -0,0 +1,407 @@
> +/*
> + * APM X-Gene MSI Driver
> + *
> + * Copyright (c) 2014, Applied Micro Circuits Corporation
> + * Author: Tanmay Inamdar <tinamdar@apm.com>
> + *        Duc Dang <dhdang@apm.com>
> + *
> + * This program is free software; you can redistribute  it and/or modify it
> + * under  the terms of  the GNU General  Public License as published by the
> + * Free Software Foundation;  either version 2 of the  License, or (at your
> + * option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + */
> +#include <linux/interrupt.h>
> +#include <linux/module.h>
> +#include <linux/msi.h>
> +#include <linux/of_irq.h>
> +#include <linux/pci.h>
> +#include <linux/platform_device.h>
> +#include <linux/of_pci.h>
> +
> +#define MSI_INDEX0             0x000000
> +#define MSI_INT0               0x800000
> +
> +struct xgene_msi_settings {
> +       u32     index_per_group;
> +       u32     irqs_per_index;
> +       u32     nr_msi_vec;
> +       u32     nr_hw_irqs;
> +};
> +
> +struct xgene_msi {
> +       struct device_node              *node;
> +       struct msi_controller           mchip;
> +       struct irq_domain               *domain;
> +       struct xgene_msi_settings       *settings;
> +       u32                             msi_addr_lo;
> +       u32                             msi_addr_hi;

I'd rather see the mailbox address directly, and only do the split when
assigning it to the message (you seem to play all kind of tricks on the
address anyway).

> +       void __iomem                    *msi_regs;
> +       unsigned long                   *bitmap;
> +       struct mutex                    bitmap_lock;
> +       int                             *msi_virqs;
> +};
> +
> +struct xgene_msi_settings storm_msi_settings = {
> +       .index_per_group        = 8,
> +       .irqs_per_index         = 21,
> +       .nr_msi_vec             = 2688,
> +       .nr_hw_irqs             = 16,
> +};

It would really help understanding how index, group and hw irq lines are
structured. nr_msi_vec is obviously the product of these three numbers,
so maybe you can loose it (we have computers, remember... ;-).

Do you expect more than this single "storm" implementation? If so, maybe
they should be described in the DT. If not, why do we need a separate
structure with an indirection if we know these are constants?

> +
> +typedef int (*xgene_msi_initcall_t)(struct xgene_msi *);
> +
> +static int xgene_msi_init_storm_settings(struct xgene_msi *xgene_msi)
> +{
> +       xgene_msi->settings = &storm_msi_settings;
> +       return 0;
> +}
> +
> +static struct irq_chip xgene_msi_top_irq_chip = {
> +       .name           = "X-Gene1 MSI",
> +       .irq_enable     = pci_msi_unmask_irq,
> +       .irq_disable    = pci_msi_mask_irq,
> +       .irq_mask       = pci_msi_mask_irq,
> +       .irq_unmask     = pci_msi_unmask_irq,
> +};
> +
> +static struct  msi_domain_info xgene_msi_domain_info = {
> +       .flags  = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
> +                 MSI_FLAG_PCI_MSIX),
> +       .chip   = &xgene_msi_top_irq_chip,
> +};
> +
> +static void xgene_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
> +{
> +       struct xgene_msi *msi = irq_data_get_irq_chip_data(data);
> +       u32 nr_hw_irqs = msi->settings->nr_hw_irqs;
> +       u32 irqs_per_index = msi->settings->irqs_per_index;
> +       u32 reg_set = data->hwirq / (nr_hw_irqs * irqs_per_index);
> +       u32 group = data->hwirq % nr_hw_irqs;
> +
> +       msg->address_hi = msi->msi_addr_hi;
> +       msg->address_lo = msi->msi_addr_lo +
> +                         (((8 * group) + reg_set) << 16);
> +       msg->data = (data->hwirq / nr_hw_irqs) % irqs_per_index;

... (sound of head exploding...). I hate it when I have to reverse
engineer the hardware by looking at the code...

Please give us some clue on how interrupts are spread across the various
pages, how the various indexes are combined to give an actual address.

> +}
> +
> +static int xgene_msi_set_affinity(struct irq_data *irq_data,
> +                                 const struct cpumask *mask, bool force)
> +{
> +       struct xgene_msi *msi = irq_data_get_irq_chip_data(irq_data);
> +       unsigned int gic_irq;
> +       int ret;
> +
> +       gic_irq = msi->msi_virqs[irq_data->hwirq % msi->settings->nr_hw_irqs];
> +       ret = irq_set_affinity(gic_irq, mask);

Erm... This as the effect of moving *all* the MSIs hanging off this
interrupt to another CPU. I'm not sure that's an acceptable effect...
What if another MSI requires a different affinity?

> +       if (ret == IRQ_SET_MASK_OK)
> +               ret = IRQ_SET_MASK_OK_DONE;
> +
> +       return ret;
> +}
> +
> +static struct irq_chip xgene_msi_bottom_irq_chip = {
> +       .name                   = "MSI",
> +       .irq_set_affinity       = xgene_msi_set_affinity,
> +       .irq_compose_msi_msg    = xgene_compose_msi_msg,
> +};
> +
> +static int xgene_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
> +                                 unsigned int nr_irqs, void *args)
> +{
> +
> +       struct xgene_msi *msi = domain->host_data;
> +       u32 msi_irq_count = msi->settings->nr_msi_vec;
> +       int msi_irq;
> +
> +       mutex_lock(&msi->bitmap_lock);
> +
> +       msi_irq = find_first_zero_bit(msi->bitmap, msi_irq_count);
> +       if (msi_irq < msi_irq_count)
> +               set_bit(msi_irq, msi->bitmap);
> +       else
> +               msi_irq = -ENOSPC;
> +
> +       mutex_unlock(&msi->bitmap_lock);
> +
> +       if (msi_irq < 0)
> +               return msi_irq;
> +
> +       irq_domain_set_info(domain, virq, msi_irq, &xgene_msi_bottom_irq_chip,
> +                           domain->host_data, handle_simple_irq, NULL, NULL);
> +       set_irq_flags(virq, IRQF_VALID);
> +
> +       return 0;
> +}
> +
> +static void xgene_irq_domain_free(struct irq_domain *domain,
> +                                 unsigned int virq, unsigned int nr_irqs)
> +{
> +       struct irq_data *d = irq_domain_get_irq_data(domain, virq);
> +       struct xgene_msi *msi = irq_data_get_irq_chip_data(d);
> +
> +       mutex_lock(&msi->bitmap_lock);
> +
> +       if (!test_bit(d->hwirq, msi->bitmap))
> +               pr_err("trying to free unused MSI#%lu\n", d->hwirq);
> +       else
> +               clear_bit(d->hwirq, msi->bitmap);
> +
> +       mutex_unlock(&msi->bitmap_lock);
> +
> +       irq_domain_free_irqs_parent(domain, virq, nr_irqs);
> +}
> +
> +static const struct irq_domain_ops msi_domain_ops = {
> +       .alloc  = xgene_irq_domain_alloc,
> +       .free   = xgene_irq_domain_free,
> +};
> +
> +static int xgene_allocate_domains(struct xgene_msi *msi)
> +{
> +       msi->domain = irq_domain_add_linear(NULL, msi->settings->nr_msi_vec,
> +                                           &msi_domain_ops, msi);
> +
> +       if (!msi->domain) {
> +               pr_err("failed to create parent MSI domain\n");
> +               return -ENOMEM;
> +       }
> +
> +       msi->mchip.of_node = msi->node;
> +       msi->mchip.domain = pci_msi_create_irq_domain(msi->mchip.of_node,
> +                                                     &xgene_msi_domain_info,
> +                                                     msi->domain);
> +
> +       if (!msi->mchip.domain) {
> +               pr_err("failed to create MSI domain\n");
> +               irq_domain_remove(msi->domain);
> +               return -ENOMEM;
> +       }
> +
> +       return 0;
> +}
> +
> +static void xgene_free_domains(struct xgene_msi *msi)
> +{
> +       if (msi->mchip.domain)
> +               irq_domain_remove(msi->mchip.domain);
> +       if (msi->domain)
> +               irq_domain_remove(msi->domain);
> +}
> +
> +static int xgene_msi_init_allocator(struct xgene_msi *xgene_msi)
> +{
> +       u32 msi_irq_count = xgene_msi->settings->nr_msi_vec;
> +       u32 hw_irq_count = xgene_msi->settings->nr_hw_irqs;
> +       int size = BITS_TO_LONGS(msi_irq_count) * sizeof(long);
> +
> +       xgene_msi->bitmap = kzalloc(size, GFP_KERNEL);
> +       if (!xgene_msi->bitmap)
> +               return -ENOMEM;
> +
> +       mutex_init(&xgene_msi->bitmap_lock);
> +
> +       xgene_msi->msi_virqs = kcalloc(hw_irq_count, sizeof(int), GFP_KERNEL);
> +       if (!xgene_msi->msi_virqs)
> +               return -ENOMEM;
> +
> +       return 0;
> +}
> +
> +static irqreturn_t xgene_msi_isr(int irq, void *data)

This is not a "normal" interrupt handler. This is really a chained
interrupt controller. Please use the proper infrastructure for this
(irq_set_chained_handler, chained_irq_enter, chained_irq_exit).
Otherwise, your usage of irq_find_mapping is illegal wrt RCU.

> +{
> +       struct xgene_msi *xgene_msi = (struct xgene_msi *) data;
> +       unsigned int virq;
> +       int msir_index, msir_reg, msir_val, hw_irq;
> +       u32 intr_index, grp_select, msi_grp, processed = 0;
> +       u32 nr_hw_irqs, irqs_per_index, index_per_group;
> +
> +       msi_grp = irq - xgene_msi->msi_virqs[0];
> +       if (msi_grp >= xgene_msi->settings->nr_hw_irqs) {
> +               pr_err("invalid msi received\n");
> +               return IRQ_NONE;
> +       }
> +
> +       nr_hw_irqs = xgene_msi->settings->nr_hw_irqs;
> +       irqs_per_index = xgene_msi->settings->irqs_per_index;
> +       index_per_group = xgene_msi->settings->index_per_group;
> +
> +       grp_select = readl(xgene_msi->msi_regs + MSI_INT0 + (msi_grp << 16));
> +       while (grp_select) {
> +               msir_index = ffs(grp_select) - 1;
> +               msir_reg = (msi_grp << 19) + (msir_index << 16);
> +               msir_val = readl(xgene_msi->msi_regs + MSI_INDEX0 + msir_reg);
> +               while (msir_val) {
> +                       intr_index = ffs(msir_val) - 1;
> +                       hw_irq = (((msir_index * irqs_per_index) + intr_index) *
> +                                nr_hw_irqs) + msi_grp;

Same thing here. This requires some explaination on how the HW is organised.

> +                       virq = irq_find_mapping(xgene_msi->domain, hw_irq);
> +                       if (virq != 0)
> +                               generic_handle_irq(virq);
> +                       msir_val &= ~(1 << intr_index);
> +                       processed++;
> +               }
> +               grp_select &= ~(1 << msir_index);
> +       }
> +
> +       return processed > 0 ? IRQ_HANDLED : IRQ_NONE;
> +}
> +
> +static int xgene_msi_remove(struct platform_device *pdev)
> +{
> +       int virq, i;
> +       struct xgene_msi *msi = platform_get_drvdata(pdev);
> +       u32 nr_hw_irqs = msi->settings->nr_hw_irqs;
> +
> +       for (i = 0; i < nr_hw_irqs; i++) {
> +               virq = msi->msi_virqs[i];
> +               if (virq != 0)
> +                       free_irq(virq, msi);
> +       }
> +
> +       kfree(msi->bitmap);
> +       msi->bitmap = NULL;
> +
> +       xgene_free_domains(msi);
> +
> +       return 0;
> +}
> +
> +static int xgene_msi_setup_hwirq(struct xgene_msi *msi,
> +                                struct platform_device *pdev,
> +                                int irq_index)
> +{
> +       int virt_msir;
> +       cpumask_var_t mask;
> +       int err;
> +
> +       virt_msir = platform_get_irq(pdev, irq_index);
> +       if (virt_msir < 0) {
> +               dev_err(&pdev->dev, "Cannot translate IRQ index %d\n",
> +                       irq_index);
> +               return -EINVAL;
> +       }
> +
> +       err = request_irq(virt_msir, xgene_msi_isr, 0,
> +                         xgene_msi_top_irq_chip.name, msi);

This is where you should use irq_set_chained_handler.

> +       if (err) {
> +               dev_err(&pdev->dev, "request irq failed\n");
> +               return err;
> +       }
> +
> +       if (alloc_cpumask_var(&mask, GFP_KERNEL)) {
> +               cpumask_setall(mask);
> +               irq_set_affinity(virt_msir, mask);
> +               free_cpumask_var(mask);
> +       }
> +
> +       msi->msi_virqs[irq_index] = virt_msir;
> +
> +       return 0;
> +}
> +
> +static const struct of_device_id xgene_msi_match_table[] = {
> +       {.compatible = "apm,xgene1-msi",
> +        .data = xgene_msi_init_storm_settings},
> +       {},
> +};
> +
> +static int xgene_msi_probe(struct platform_device *pdev)
> +{
> +       struct resource *res;
> +       int rc, irq_index;
> +       struct device_node *np;
> +       const struct of_device_id *matched_np;
> +       struct xgene_msi *xgene_msi;
> +       xgene_msi_initcall_t init_fn;
> +       u32 nr_hw_irqs, nr_msi_vecs;
> +
> +       np = of_find_matching_node_and_match(NULL,
> +                            xgene_msi_match_table, &matched_np);
> +       if (!np)
> +               return -ENODEV;
> +
> +       xgene_msi = kzalloc(sizeof(struct xgene_msi), GFP_KERNEL);
> +       if (!xgene_msi) {
> +               dev_err(&pdev->dev, "failed to allocate X-Gene MSI data\n");
> +               return -ENOMEM;
> +       }
> +
> +       xgene_msi->node = np;
> +
> +       init_fn = (xgene_msi_initcall_t) matched_np->data;
> +       rc = init_fn(xgene_msi);
> +       if (rc)
> +               return rc;
> +
> +       platform_set_drvdata(pdev, xgene_msi);
> +
> +       nr_msi_vecs = xgene_msi->settings->nr_msi_vec;
> +
> +       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +       xgene_msi->msi_regs = devm_ioremap_resource(&pdev->dev, res);
> +       if (IS_ERR(xgene_msi->msi_regs)) {
> +               dev_err(&pdev->dev, "no reg space\n");
> +               rc = -EINVAL;
> +               goto error;
> +       }
> +
> +       xgene_msi->msi_addr_hi = upper_32_bits(res->start);
> +       xgene_msi->msi_addr_lo = lower_32_bits(res->start);
> +
> +       rc = xgene_msi_init_allocator(xgene_msi);
> +       if (rc) {
> +               dev_err(&pdev->dev, "Error allocating MSI bitmap\n");
> +               goto error;
> +       }
> +
> +       rc = xgene_allocate_domains(xgene_msi);
> +       if (rc) {
> +               dev_err(&pdev->dev, "Failed to allocate MSI domain\n");
> +               goto error;
> +       }
> +
> +       nr_hw_irqs = xgene_msi->settings->nr_hw_irqs;
> +       for (irq_index = 0; irq_index < nr_hw_irqs; irq_index++) {
> +               rc = xgene_msi_setup_hwirq(xgene_msi, pdev, irq_index);
> +               if (rc)
> +                       goto error;
> +       }
> +
> +       rc = of_pci_msi_chip_add(&xgene_msi->mchip);
> +       if (rc) {
> +               dev_err(&pdev->dev, "failed to add MSI controller chip\n");
> +               goto error;
> +       }
> +
> +       dev_info(&pdev->dev, "APM X-Gene PCIe MSI driver loaded\n");
> +
> +       return 0;
> +error:
> +       xgene_msi_remove(pdev);
> +       return rc;
> +}
> +
> +static struct platform_driver xgene_msi_driver = {
> +       .driver = {
> +               .name = "xgene-msi",
> +               .owner = THIS_MODULE,
> +               .of_match_table = xgene_msi_match_table,
> +       },
> +       .probe = xgene_msi_probe,
> +       .remove = xgene_msi_remove,
> +};
> +
> +static int __init xgene_pcie_msi_init(void)
> +{
> +       return platform_driver_register(&xgene_msi_driver);
> +}
> +subsys_initcall(xgene_pcie_msi_init);
> +
> +MODULE_AUTHOR("Duc Dang <dhdang@apm.com>");
> +MODULE_DESCRIPTION("APM X-Gene PCIe MSI driver");
> +MODULE_LICENSE("GPL v2");
> diff --git a/drivers/pci/host/pci-xgene.c b/drivers/pci/host/pci-xgene.c
> index 22751ed..3e6faa1 100644
> --- a/drivers/pci/host/pci-xgene.c
> +++ b/drivers/pci/host/pci-xgene.c
> @@ -468,6 +468,23 @@ static int xgene_pcie_setup(struct xgene_pcie_port *port,
>         return 0;
>  }
> 
> +static int xgene_pcie_msi_enable(struct pci_bus *bus)
> +{
> +       struct device_node *msi_node;
> +
> +       msi_node = of_parse_phandle(bus->dev.of_node,
> +                                       "msi-parent", 0);
> +       if (!msi_node)
> +               return -ENODEV;
> +
> +       bus->msi = of_pci_find_msi_chip_by_node(msi_node);
> +       if (bus->msi)
> +               bus->msi->dev = &bus->dev;
> +       else
> +               return -ENODEV;
> +       return 0;
> +}
> +
>  static int xgene_pcie_probe_bridge(struct platform_device *pdev)
>  {
>         struct device_node *dn = pdev->dev.of_node;
> @@ -504,6 +521,10 @@ static int xgene_pcie_probe_bridge(struct platform_device *pdev)
>         if (!bus)
>                 return -ENOMEM;
> 
> +       if (IS_ENABLED(CONFIG_PCI_MSI))
> +               if (xgene_pcie_msi_enable(bus))
> +                       dev_info(port->dev, "failed to enable MSI\n");
> +
>         pci_scan_child_bus(bus);
>         pci_assign_unassigned_bus_resources(bus);
>         pci_bus_add_devices(bus);
> --
> 1.9.1
> 

Thanks,

	M.
Paul Bolle April 10, 2015, 6:13 p.m. UTC | #6
Just a nit about a license mismatch.

On Thu, 2015-04-09 at 10:05 -0700, Duc Dang wrote:
> --- a/drivers/pci/host/Kconfig
> +++ b/drivers/pci/host/Kconfig
 
> +config PCI_XGENE_MSI
> +	bool "X-Gene v1 PCIe MSI feature"
> +	depends on PCI_XGENE && PCI_MSI

> --- a/drivers/pci/host/Makefile
> +++ b/drivers/pci/host/Makefile

> +obj-$(CONFIG_PCI_XGENE_MSI) += pci-xgene-msi.o

> --- /dev/null
> +++ b/drivers/pci/host/pci-xgene-msi.c

> + * This program is free software; you can redistribute  it and/or modify it
> + * under  the terms of  the GNU General  Public License as published by the
> + * Free Software Foundation;  either version 2 of the  License, or (at your
> + * option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.

This states the license of this file is GPL v2 or later.

> +MODULE_LICENSE("GPL v2");

And, according to include/linux/module.h, this states the license of
this driver is GPL v2. So either the comment at the top of this file or
the license ident used in this macro needs to change.

(Now, as far as I can tell, this driver is built-in only. So I think
this macro - and all other module specific code in this file - is
actually unneeded. But I already know Bjorn's position here, so I won't
bother you about that.)

Thanks,


Paul Bolle
Duc Dang April 10, 2015, 11:42 p.m. UTC | #7
On Fri, Apr 10, 2015 at 10:20 AM, Marc Zyngier <marc.zyngier@arm.com> wrote:
> On 09/04/15 18:05, Duc Dang wrote:
>> X-Gene v1 SoC supports total 2688 MSI/MSIX vectors coalesced into
>> 16 HW IRQ lines.
>>
>> Signed-off-by: Duc Dang <dhdang@apm.com>
>> Signed-off-by: Tanmay Inamdar <tinamdar@apm.com>
>> ---
>>  drivers/pci/host/Kconfig         |   6 +
>>  drivers/pci/host/Makefile        |   1 +
>>  drivers/pci/host/pci-xgene-msi.c | 407 +++++++++++++++++++++++++++++++++++++++
>>  drivers/pci/host/pci-xgene.c     |  21 ++
>>  4 files changed, 435 insertions(+)
>>  create mode 100644 drivers/pci/host/pci-xgene-msi.c
>>
>> diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig
>> index 7b892a9..c9b61fa 100644
>> --- a/drivers/pci/host/Kconfig
>> +++ b/drivers/pci/host/Kconfig
>> @@ -89,11 +89,17 @@ config PCI_XGENE
>>         depends on ARCH_XGENE
>>         depends on OF
>>         select PCIEPORTBUS
>> +       select PCI_MSI_IRQ_DOMAIN if PCI_MSI
>> +       select PCI_XGENE_MSI if PCI_MSI
>>         help
>>           Say Y here if you want internal PCI support on APM X-Gene SoC.
>>           There are 5 internal PCIe ports available. Each port is GEN3 capable
>>           and have varied lanes from x1 to x8.
>>
>> +config PCI_XGENE_MSI
>> +       bool "X-Gene v1 PCIe MSI feature"
>> +       depends on PCI_XGENE && PCI_MSI
>> +
>>  config PCI_LAYERSCAPE
>>         bool "Freescale Layerscape PCIe controller"
>>         depends on OF && ARM
>> diff --git a/drivers/pci/host/Makefile b/drivers/pci/host/Makefile
>> index e61d91c..f39bde3 100644
>> --- a/drivers/pci/host/Makefile
>> +++ b/drivers/pci/host/Makefile
>> @@ -11,5 +11,6 @@ obj-$(CONFIG_PCIE_SPEAR13XX) += pcie-spear13xx.o
>>  obj-$(CONFIG_PCI_KEYSTONE) += pci-keystone-dw.o pci-keystone.o
>>  obj-$(CONFIG_PCIE_XILINX) += pcie-xilinx.o
>>  obj-$(CONFIG_PCI_XGENE) += pci-xgene.o
>> +obj-$(CONFIG_PCI_XGENE_MSI) += pci-xgene-msi.o
>>  obj-$(CONFIG_PCI_LAYERSCAPE) += pci-layerscape.o
>>  obj-$(CONFIG_PCI_VERSATILE) += pci-versatile.o
>> diff --git a/drivers/pci/host/pci-xgene-msi.c b/drivers/pci/host/pci-xgene-msi.c
>> new file mode 100644
>> index 0000000..4f0ff42
>> --- /dev/null
>> +++ b/drivers/pci/host/pci-xgene-msi.c
>> @@ -0,0 +1,407 @@
>> +/*
>> + * APM X-Gene MSI Driver
>> + *
>> + * Copyright (c) 2014, Applied Micro Circuits Corporation
>> + * Author: Tanmay Inamdar <tinamdar@apm.com>
>> + *        Duc Dang <dhdang@apm.com>
>> + *
>> + * This program is free software; you can redistribute  it and/or modify it
>> + * under  the terms of  the GNU General  Public License as published by the
>> + * Free Software Foundation;  either version 2 of the  License, or (at your
>> + * option) any later version.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>> + */
>> +#include <linux/interrupt.h>
>> +#include <linux/module.h>
>> +#include <linux/msi.h>
>> +#include <linux/of_irq.h>
>> +#include <linux/pci.h>
>> +#include <linux/platform_device.h>
>> +#include <linux/of_pci.h>
>> +
>> +#define MSI_INDEX0             0x000000
>> +#define MSI_INT0               0x800000
>> +
>> +struct xgene_msi_settings {
>> +       u32     index_per_group;
>> +       u32     irqs_per_index;
>> +       u32     nr_msi_vec;
>> +       u32     nr_hw_irqs;
>> +};
>> +
>> +struct xgene_msi {
>> +       struct device_node              *node;
>> +       struct msi_controller           mchip;
>> +       struct irq_domain               *domain;
>> +       struct xgene_msi_settings       *settings;
>> +       u32                             msi_addr_lo;
>> +       u32                             msi_addr_hi;
>
> I'd rather see the mailbox address directly, and only do the split when
> assigning it to the message (you seem to play all kind of tricks on the
> address anyway).

msi_addr_lo and msi_addr_hi store the physical base address of MSI
controller registers. I will add comment to clarify this.
>
>> +       void __iomem                    *msi_regs;
>> +       unsigned long                   *bitmap;
>> +       struct mutex                    bitmap_lock;
>> +       int                             *msi_virqs;
>> +};
>> +
>> +struct xgene_msi_settings storm_msi_settings = {
>> +       .index_per_group        = 8,
>> +       .irqs_per_index         = 21,
>> +       .nr_msi_vec             = 2688,
>> +       .nr_hw_irqs             = 16,
>> +};
>
> It would really help understanding how index, group and hw irq lines are
> structured. nr_msi_vec is obviously the product of these three numbers,
> so maybe you can loose it (we have computers, remember... ;-).
>
> Do you expect more than this single "storm" implementation? If so, maybe
> they should be described in the DT. If not, why do we need a separate
> structure with an indirection if we know these are constants?
>
Yes, I will add description about index, group, and hw irq lines
structure and also replace the fields in this storm_msi_settings
structure with constant dedinitions.

>> +
>> +typedef int (*xgene_msi_initcall_t)(struct xgene_msi *);
>> +
>> +static int xgene_msi_init_storm_settings(struct xgene_msi *xgene_msi)
>> +{
>> +       xgene_msi->settings = &storm_msi_settings;
>> +       return 0;
>> +}
>> +
>> +static struct irq_chip xgene_msi_top_irq_chip = {
>> +       .name           = "X-Gene1 MSI",
>> +       .irq_enable     = pci_msi_unmask_irq,
>> +       .irq_disable    = pci_msi_mask_irq,
>> +       .irq_mask       = pci_msi_mask_irq,
>> +       .irq_unmask     = pci_msi_unmask_irq,
>> +};
>> +
>> +static struct  msi_domain_info xgene_msi_domain_info = {
>> +       .flags  = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
>> +                 MSI_FLAG_PCI_MSIX),
>> +       .chip   = &xgene_msi_top_irq_chip,
>> +};
>> +
>> +static void xgene_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
>> +{
>> +       struct xgene_msi *msi = irq_data_get_irq_chip_data(data);
>> +       u32 nr_hw_irqs = msi->settings->nr_hw_irqs;
>> +       u32 irqs_per_index = msi->settings->irqs_per_index;
>> +       u32 reg_set = data->hwirq / (nr_hw_irqs * irqs_per_index);
>> +       u32 group = data->hwirq % nr_hw_irqs;
>> +
>> +       msg->address_hi = msi->msi_addr_hi;
>> +       msg->address_lo = msi->msi_addr_lo +
>> +                         (((8 * group) + reg_set) << 16);
>> +       msg->data = (data->hwirq / nr_hw_irqs) % irqs_per_index;
>
> ... (sound of head exploding...). I hate it when I have to reverse
> engineer the hardware by looking at the code...
>
> Please give us some clue on how interrupts are spread across the various
> pages, how the various indexes are combined to give an actual address.

I will add description about this.
>
>> +}
>> +
>> +static int xgene_msi_set_affinity(struct irq_data *irq_data,
>> +                                 const struct cpumask *mask, bool force)
>> +{
>> +       struct xgene_msi *msi = irq_data_get_irq_chip_data(irq_data);
>> +       unsigned int gic_irq;
>> +       int ret;
>> +
>> +       gic_irq = msi->msi_virqs[irq_data->hwirq % msi->settings->nr_hw_irqs];
>> +       ret = irq_set_affinity(gic_irq, mask);
>
> Erm... This as the effect of moving *all* the MSIs hanging off this
> interrupt to another CPU. I'm not sure that's an acceptable effect...
> What if another MSI requires a different affinity?

We have 16 'real' hardware IRQs. Each of these has multiple MSIs attached to it.
So this will move all MSIs handing off this interrupt to another CPU;
and we don't support different affinity settings for different MSIs
that are attached to the same hardware IRQ.

>
>> +       if (ret == IRQ_SET_MASK_OK)
>> +               ret = IRQ_SET_MASK_OK_DONE;
>> +
>> +       return ret;
>> +}
>> +
>> +static struct irq_chip xgene_msi_bottom_irq_chip = {
>> +       .name                   = "MSI",
>> +       .irq_set_affinity       = xgene_msi_set_affinity,
>> +       .irq_compose_msi_msg    = xgene_compose_msi_msg,
>> +};
>> +
>> +static int xgene_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
>> +                                 unsigned int nr_irqs, void *args)
>> +{
>> +
>> +       struct xgene_msi *msi = domain->host_data;
>> +       u32 msi_irq_count = msi->settings->nr_msi_vec;
>> +       int msi_irq;
>> +
>> +       mutex_lock(&msi->bitmap_lock);
>> +
>> +       msi_irq = find_first_zero_bit(msi->bitmap, msi_irq_count);
>> +       if (msi_irq < msi_irq_count)
>> +               set_bit(msi_irq, msi->bitmap);
>> +       else
>> +               msi_irq = -ENOSPC;
>> +
>> +       mutex_unlock(&msi->bitmap_lock);
>> +
>> +       if (msi_irq < 0)
>> +               return msi_irq;
>> +
>> +       irq_domain_set_info(domain, virq, msi_irq, &xgene_msi_bottom_irq_chip,
>> +                           domain->host_data, handle_simple_irq, NULL, NULL);
>> +       set_irq_flags(virq, IRQF_VALID);
>> +
>> +       return 0;
>> +}
>> +
>> +static void xgene_irq_domain_free(struct irq_domain *domain,
>> +                                 unsigned int virq, unsigned int nr_irqs)
>> +{
>> +       struct irq_data *d = irq_domain_get_irq_data(domain, virq);
>> +       struct xgene_msi *msi = irq_data_get_irq_chip_data(d);
>> +
>> +       mutex_lock(&msi->bitmap_lock);
>> +
>> +       if (!test_bit(d->hwirq, msi->bitmap))
>> +               pr_err("trying to free unused MSI#%lu\n", d->hwirq);
>> +       else
>> +               clear_bit(d->hwirq, msi->bitmap);
>> +
>> +       mutex_unlock(&msi->bitmap_lock);
>> +
>> +       irq_domain_free_irqs_parent(domain, virq, nr_irqs);
>> +}
>> +
>> +static const struct irq_domain_ops msi_domain_ops = {
>> +       .alloc  = xgene_irq_domain_alloc,
>> +       .free   = xgene_irq_domain_free,
>> +};
>> +
>> +static int xgene_allocate_domains(struct xgene_msi *msi)
>> +{
>> +       msi->domain = irq_domain_add_linear(NULL, msi->settings->nr_msi_vec,
>> +                                           &msi_domain_ops, msi);
>> +
>> +       if (!msi->domain) {
>> +               pr_err("failed to create parent MSI domain\n");
>> +               return -ENOMEM;
>> +       }
>> +
>> +       msi->mchip.of_node = msi->node;
>> +       msi->mchip.domain = pci_msi_create_irq_domain(msi->mchip.of_node,
>> +                                                     &xgene_msi_domain_info,
>> +                                                     msi->domain);
>> +
>> +       if (!msi->mchip.domain) {
>> +               pr_err("failed to create MSI domain\n");
>> +               irq_domain_remove(msi->domain);
>> +               return -ENOMEM;
>> +       }
>> +
>> +       return 0;
>> +}
>> +
>> +static void xgene_free_domains(struct xgene_msi *msi)
>> +{
>> +       if (msi->mchip.domain)
>> +               irq_domain_remove(msi->mchip.domain);
>> +       if (msi->domain)
>> +               irq_domain_remove(msi->domain);
>> +}
>> +
>> +static int xgene_msi_init_allocator(struct xgene_msi *xgene_msi)
>> +{
>> +       u32 msi_irq_count = xgene_msi->settings->nr_msi_vec;
>> +       u32 hw_irq_count = xgene_msi->settings->nr_hw_irqs;
>> +       int size = BITS_TO_LONGS(msi_irq_count) * sizeof(long);
>> +
>> +       xgene_msi->bitmap = kzalloc(size, GFP_KERNEL);
>> +       if (!xgene_msi->bitmap)
>> +               return -ENOMEM;
>> +
>> +       mutex_init(&xgene_msi->bitmap_lock);
>> +
>> +       xgene_msi->msi_virqs = kcalloc(hw_irq_count, sizeof(int), GFP_KERNEL);
>> +       if (!xgene_msi->msi_virqs)
>> +               return -ENOMEM;
>> +
>> +       return 0;
>> +}
>> +
>> +static irqreturn_t xgene_msi_isr(int irq, void *data)
>
> This is not a "normal" interrupt handler. This is really a chained
> interrupt controller. Please use the proper infrastructure for this
> (irq_set_chained_handler, chained_irq_enter, chained_irq_exit).
> Otherwise, your usage of irq_find_mapping is illegal wrt RCU.
>
Yes, I will change this function to protect it with chained_irq_enter,
chained_irq_exit

>> +{
>> +       struct xgene_msi *xgene_msi = (struct xgene_msi *) data;
>> +       unsigned int virq;
>> +       int msir_index, msir_reg, msir_val, hw_irq;
>> +       u32 intr_index, grp_select, msi_grp, processed = 0;
>> +       u32 nr_hw_irqs, irqs_per_index, index_per_group;
>> +
>> +       msi_grp = irq - xgene_msi->msi_virqs[0];
>> +       if (msi_grp >= xgene_msi->settings->nr_hw_irqs) {
>> +               pr_err("invalid msi received\n");
>> +               return IRQ_NONE;
>> +       }
>> +
>> +       nr_hw_irqs = xgene_msi->settings->nr_hw_irqs;
>> +       irqs_per_index = xgene_msi->settings->irqs_per_index;
>> +       index_per_group = xgene_msi->settings->index_per_group;
>> +
>> +       grp_select = readl(xgene_msi->msi_regs + MSI_INT0 + (msi_grp << 16));
>> +       while (grp_select) {
>> +               msir_index = ffs(grp_select) - 1;
>> +               msir_reg = (msi_grp << 19) + (msir_index << 16);
>> +               msir_val = readl(xgene_msi->msi_regs + MSI_INDEX0 + msir_reg);
>> +               while (msir_val) {
>> +                       intr_index = ffs(msir_val) - 1;
>> +                       hw_irq = (((msir_index * irqs_per_index) + intr_index) *
>> +                                nr_hw_irqs) + msi_grp;
>
> Same thing here. This requires some explaination on how the HW is organised.

I will have description for this.

>
>> +                       virq = irq_find_mapping(xgene_msi->domain, hw_irq);
>> +                       if (virq != 0)
>> +                               generic_handle_irq(virq);
>> +                       msir_val &= ~(1 << intr_index);
>> +                       processed++;
>> +               }
>> +               grp_select &= ~(1 << msir_index);
>> +       }
>> +
>> +       return processed > 0 ? IRQ_HANDLED : IRQ_NONE;
>> +}
>> +
>> +static int xgene_msi_remove(struct platform_device *pdev)
>> +{
>> +       int virq, i;
>> +       struct xgene_msi *msi = platform_get_drvdata(pdev);
>> +       u32 nr_hw_irqs = msi->settings->nr_hw_irqs;
>> +
>> +       for (i = 0; i < nr_hw_irqs; i++) {
>> +               virq = msi->msi_virqs[i];
>> +               if (virq != 0)
>> +                       free_irq(virq, msi);
>> +       }
>> +
>> +       kfree(msi->bitmap);
>> +       msi->bitmap = NULL;
>> +
>> +       xgene_free_domains(msi);
>> +
>> +       return 0;
>> +}
>> +
>> +static int xgene_msi_setup_hwirq(struct xgene_msi *msi,
>> +                                struct platform_device *pdev,
>> +                                int irq_index)
>> +{
>> +       int virt_msir;
>> +       cpumask_var_t mask;
>> +       int err;
>> +
>> +       virt_msir = platform_get_irq(pdev, irq_index);
>> +       if (virt_msir < 0) {
>> +               dev_err(&pdev->dev, "Cannot translate IRQ index %d\n",
>> +                       irq_index);
>> +               return -EINVAL;
>> +       }
>> +
>> +       err = request_irq(virt_msir, xgene_msi_isr, 0,
>> +                         xgene_msi_top_irq_chip.name, msi);
>
> This is where you should use irq_set_chained_handler.

I will replace request_irq with irq_set_chained_handler and irq_set_handler_data
>
>> +       if (err) {
>> +               dev_err(&pdev->dev, "request irq failed\n");
>> +               return err;
>> +       }
>> +
>> +       if (alloc_cpumask_var(&mask, GFP_KERNEL)) {
>> +               cpumask_setall(mask);
>> +               irq_set_affinity(virt_msir, mask);
>> +               free_cpumask_var(mask);
>> +       }
>> +
>> +       msi->msi_virqs[irq_index] = virt_msir;
>> +
>> +       return 0;
>> +}
>> +
>> +static const struct of_device_id xgene_msi_match_table[] = {
>> +       {.compatible = "apm,xgene1-msi",
>> +        .data = xgene_msi_init_storm_settings},
>> +       {},
>> +};
>> +
>> +static int xgene_msi_probe(struct platform_device *pdev)
>> +{
>> +       struct resource *res;
>> +       int rc, irq_index;
>> +       struct device_node *np;
>> +       const struct of_device_id *matched_np;
>> +       struct xgene_msi *xgene_msi;
>> +       xgene_msi_initcall_t init_fn;
>> +       u32 nr_hw_irqs, nr_msi_vecs;
>> +
>> +       np = of_find_matching_node_and_match(NULL,
>> +                            xgene_msi_match_table, &matched_np);
>> +       if (!np)
>> +               return -ENODEV;
>> +
>> +       xgene_msi = kzalloc(sizeof(struct xgene_msi), GFP_KERNEL);
>> +       if (!xgene_msi) {
>> +               dev_err(&pdev->dev, "failed to allocate X-Gene MSI data\n");
>> +               return -ENOMEM;
>> +       }
>> +
>> +       xgene_msi->node = np;
>> +
>> +       init_fn = (xgene_msi_initcall_t) matched_np->data;
>> +       rc = init_fn(xgene_msi);
>> +       if (rc)
>> +               return rc;
>> +
>> +       platform_set_drvdata(pdev, xgene_msi);
>> +
>> +       nr_msi_vecs = xgene_msi->settings->nr_msi_vec;
>> +
>> +       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
>> +       xgene_msi->msi_regs = devm_ioremap_resource(&pdev->dev, res);
>> +       if (IS_ERR(xgene_msi->msi_regs)) {
>> +               dev_err(&pdev->dev, "no reg space\n");
>> +               rc = -EINVAL;
>> +               goto error;
>> +       }
>> +
>> +       xgene_msi->msi_addr_hi = upper_32_bits(res->start);
>> +       xgene_msi->msi_addr_lo = lower_32_bits(res->start);
>> +
>> +       rc = xgene_msi_init_allocator(xgene_msi);
>> +       if (rc) {
>> +               dev_err(&pdev->dev, "Error allocating MSI bitmap\n");
>> +               goto error;
>> +       }
>> +
>> +       rc = xgene_allocate_domains(xgene_msi);
>> +       if (rc) {
>> +               dev_err(&pdev->dev, "Failed to allocate MSI domain\n");
>> +               goto error;
>> +       }
>> +
>> +       nr_hw_irqs = xgene_msi->settings->nr_hw_irqs;
>> +       for (irq_index = 0; irq_index < nr_hw_irqs; irq_index++) {
>> +               rc = xgene_msi_setup_hwirq(xgene_msi, pdev, irq_index);
>> +               if (rc)
>> +                       goto error;
>> +       }
>> +
>> +       rc = of_pci_msi_chip_add(&xgene_msi->mchip);
>> +       if (rc) {
>> +               dev_err(&pdev->dev, "failed to add MSI controller chip\n");
>> +               goto error;
>> +       }
>> +
>> +       dev_info(&pdev->dev, "APM X-Gene PCIe MSI driver loaded\n");
>> +
>> +       return 0;
>> +error:
>> +       xgene_msi_remove(pdev);
>> +       return rc;
>> +}
>> +
>> +static struct platform_driver xgene_msi_driver = {
>> +       .driver = {
>> +               .name = "xgene-msi",
>> +               .owner = THIS_MODULE,
>> +               .of_match_table = xgene_msi_match_table,
>> +       },
>> +       .probe = xgene_msi_probe,
>> +       .remove = xgene_msi_remove,
>> +};
>> +
>> +static int __init xgene_pcie_msi_init(void)
>> +{
>> +       return platform_driver_register(&xgene_msi_driver);
>> +}
>> +subsys_initcall(xgene_pcie_msi_init);
>> +
>> +MODULE_AUTHOR("Duc Dang <dhdang@apm.com>");
>> +MODULE_DESCRIPTION("APM X-Gene PCIe MSI driver");
>> +MODULE_LICENSE("GPL v2");
>> diff --git a/drivers/pci/host/pci-xgene.c b/drivers/pci/host/pci-xgene.c
>> index 22751ed..3e6faa1 100644
>> --- a/drivers/pci/host/pci-xgene.c
>> +++ b/drivers/pci/host/pci-xgene.c
>> @@ -468,6 +468,23 @@ static int xgene_pcie_setup(struct xgene_pcie_port *port,
>>         return 0;
>>  }
>>
>> +static int xgene_pcie_msi_enable(struct pci_bus *bus)
>> +{
>> +       struct device_node *msi_node;
>> +
>> +       msi_node = of_parse_phandle(bus->dev.of_node,
>> +                                       "msi-parent", 0);
>> +       if (!msi_node)
>> +               return -ENODEV;
>> +
>> +       bus->msi = of_pci_find_msi_chip_by_node(msi_node);
>> +       if (bus->msi)
>> +               bus->msi->dev = &bus->dev;
>> +       else
>> +               return -ENODEV;
>> +       return 0;
>> +}
>> +
>>  static int xgene_pcie_probe_bridge(struct platform_device *pdev)
>>  {
>>         struct device_node *dn = pdev->dev.of_node;
>> @@ -504,6 +521,10 @@ static int xgene_pcie_probe_bridge(struct platform_device *pdev)
>>         if (!bus)
>>                 return -ENOMEM;
>>
>> +       if (IS_ENABLED(CONFIG_PCI_MSI))
>> +               if (xgene_pcie_msi_enable(bus))
>> +                       dev_info(port->dev, "failed to enable MSI\n");
>> +
>>         pci_scan_child_bus(bus);
>>         pci_assign_unassigned_bus_resources(bus);
>>         pci_bus_add_devices(bus);
>> --
>> 1.9.1
>>
>
> Thanks,
>
>         M.
> --
> Jazz is not dead. It just smells funny...
Regards,
Duc Dang.
Duc Dang April 10, 2015, 11:55 p.m. UTC | #8
On Fri, Apr 10, 2015 at 11:13 AM, Paul Bolle <pebolle@tiscali.nl> wrote:
> Just a nit about a license mismatch.
>
> On Thu, 2015-04-09 at 10:05 -0700, Duc Dang wrote:
>> --- a/drivers/pci/host/Kconfig
>> +++ b/drivers/pci/host/Kconfig
>
>> +config PCI_XGENE_MSI
>> +     bool "X-Gene v1 PCIe MSI feature"
>> +     depends on PCI_XGENE && PCI_MSI
>
>> --- a/drivers/pci/host/Makefile
>> +++ b/drivers/pci/host/Makefile
>
>> +obj-$(CONFIG_PCI_XGENE_MSI) += pci-xgene-msi.o
>
>> --- /dev/null
>> +++ b/drivers/pci/host/pci-xgene-msi.c
>
>> + * This program is free software; you can redistribute  it and/or modify it
>> + * under  the terms of  the GNU General  Public License as published by the
>> + * Free Software Foundation;  either version 2 of the  License, or (at your
>> + * option) any later version.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>
> This states the license of this file is GPL v2 or later.
>
>> +MODULE_LICENSE("GPL v2");
>
> And, according to include/linux/module.h, this states the license of
> this driver is GPL v2. So either the comment at the top of this file or
> the license ident used in this macro needs to change.
>
Thanks, I will fix this.

> (Now, as far as I can tell, this driver is built-in only. So I think
> this macro - and all other module specific code in this file - is
> actually unneeded. But I already know Bjorn's position here, so I won't
> bother you about that.)
>
> Thanks,
>
>
> Paul Bolle
>

Regards,
Duc Dang.
Feng Kan April 11, 2015, 12:16 a.m. UTC | #9
Hi Marc:

Is there any plans to support ACPI for GICv2m MSI? Both this driver and the
GICv2m seems to support OF model of discovery for msi controller. X-Gene1
uses this driver and X-Gene2 uses GICv2m, there needs to be a way to
associate msi controller with the PCIe bus. I haven't
found a standard way of doing finding "msi-parent" for ACPI. Do you have
any suggestion.

Sorry for top posting.


On Fri, Apr 10, 2015 at 10:20 AM, Marc Zyngier <marc.zyngier@arm.com> wrote:
> On 09/04/15 18:05, Duc Dang wrote:
>> X-Gene v1 SoC supports total 2688 MSI/MSIX vectors coalesced into
>> 16 HW IRQ lines.
>>
>> Signed-off-by: Duc Dang <dhdang@apm.com>
>> Signed-off-by: Tanmay Inamdar <tinamdar@apm.com>
>> ---
>>  drivers/pci/host/Kconfig         |   6 +
>>  drivers/pci/host/Makefile        |   1 +
>>  drivers/pci/host/pci-xgene-msi.c | 407 +++++++++++++++++++++++++++++++++++++++
>>  drivers/pci/host/pci-xgene.c     |  21 ++
>>  4 files changed, 435 insertions(+)
>>  create mode 100644 drivers/pci/host/pci-xgene-msi.c
>>
>> diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig
>> index 7b892a9..c9b61fa 100644
>> --- a/drivers/pci/host/Kconfig
>> +++ b/drivers/pci/host/Kconfig
>> @@ -89,11 +89,17 @@ config PCI_XGENE
>>         depends on ARCH_XGENE
>>         depends on OF
>>         select PCIEPORTBUS
>> +       select PCI_MSI_IRQ_DOMAIN if PCI_MSI
>> +       select PCI_XGENE_MSI if PCI_MSI
>>         help
>>           Say Y here if you want internal PCI support on APM X-Gene SoC.
>>           There are 5 internal PCIe ports available. Each port is GEN3 capable
>>           and have varied lanes from x1 to x8.
>>
>> +config PCI_XGENE_MSI
>> +       bool "X-Gene v1 PCIe MSI feature"
>> +       depends on PCI_XGENE && PCI_MSI
>> +
>>  config PCI_LAYERSCAPE
>>         bool "Freescale Layerscape PCIe controller"
>>         depends on OF && ARM
>> diff --git a/drivers/pci/host/Makefile b/drivers/pci/host/Makefile
>> index e61d91c..f39bde3 100644
>> --- a/drivers/pci/host/Makefile
>> +++ b/drivers/pci/host/Makefile
>> @@ -11,5 +11,6 @@ obj-$(CONFIG_PCIE_SPEAR13XX) += pcie-spear13xx.o
>>  obj-$(CONFIG_PCI_KEYSTONE) += pci-keystone-dw.o pci-keystone.o
>>  obj-$(CONFIG_PCIE_XILINX) += pcie-xilinx.o
>>  obj-$(CONFIG_PCI_XGENE) += pci-xgene.o
>> +obj-$(CONFIG_PCI_XGENE_MSI) += pci-xgene-msi.o
>>  obj-$(CONFIG_PCI_LAYERSCAPE) += pci-layerscape.o
>>  obj-$(CONFIG_PCI_VERSATILE) += pci-versatile.o
>> diff --git a/drivers/pci/host/pci-xgene-msi.c b/drivers/pci/host/pci-xgene-msi.c
>> new file mode 100644
>> index 0000000..4f0ff42
>> --- /dev/null
>> +++ b/drivers/pci/host/pci-xgene-msi.c
>> @@ -0,0 +1,407 @@
>> +/*
>> + * APM X-Gene MSI Driver
>> + *
>> + * Copyright (c) 2014, Applied Micro Circuits Corporation
>> + * Author: Tanmay Inamdar <tinamdar@apm.com>
>> + *        Duc Dang <dhdang@apm.com>
>> + *
>> + * This program is free software; you can redistribute  it and/or modify it
>> + * under  the terms of  the GNU General  Public License as published by the
>> + * Free Software Foundation;  either version 2 of the  License, or (at your
>> + * option) any later version.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>> + */
>> +#include <linux/interrupt.h>
>> +#include <linux/module.h>
>> +#include <linux/msi.h>
>> +#include <linux/of_irq.h>
>> +#include <linux/pci.h>
>> +#include <linux/platform_device.h>
>> +#include <linux/of_pci.h>
>> +
>> +#define MSI_INDEX0             0x000000
>> +#define MSI_INT0               0x800000
>> +
>> +struct xgene_msi_settings {
>> +       u32     index_per_group;
>> +       u32     irqs_per_index;
>> +       u32     nr_msi_vec;
>> +       u32     nr_hw_irqs;
>> +};
>> +
>> +struct xgene_msi {
>> +       struct device_node              *node;
>> +       struct msi_controller           mchip;
>> +       struct irq_domain               *domain;
>> +       struct xgene_msi_settings       *settings;
>> +       u32                             msi_addr_lo;
>> +       u32                             msi_addr_hi;
>
> I'd rather see the mailbox address directly, and only do the split when
> assigning it to the message (you seem to play all kind of tricks on the
> address anyway).
>
>> +       void __iomem                    *msi_regs;
>> +       unsigned long                   *bitmap;
>> +       struct mutex                    bitmap_lock;
>> +       int                             *msi_virqs;
>> +};
>> +
>> +struct xgene_msi_settings storm_msi_settings = {
>> +       .index_per_group        = 8,
>> +       .irqs_per_index         = 21,
>> +       .nr_msi_vec             = 2688,
>> +       .nr_hw_irqs             = 16,
>> +};
>
> It would really help understanding how index, group and hw irq lines are
> structured. nr_msi_vec is obviously the product of these three numbers,
> so maybe you can loose it (we have computers, remember... ;-).
>
> Do you expect more than this single "storm" implementation? If so, maybe
> they should be described in the DT. If not, why do we need a separate
> structure with an indirection if we know these are constants?
>
>> +
>> +typedef int (*xgene_msi_initcall_t)(struct xgene_msi *);
>> +
>> +static int xgene_msi_init_storm_settings(struct xgene_msi *xgene_msi)
>> +{
>> +       xgene_msi->settings = &storm_msi_settings;
>> +       return 0;
>> +}
>> +
>> +static struct irq_chip xgene_msi_top_irq_chip = {
>> +       .name           = "X-Gene1 MSI",
>> +       .irq_enable     = pci_msi_unmask_irq,
>> +       .irq_disable    = pci_msi_mask_irq,
>> +       .irq_mask       = pci_msi_mask_irq,
>> +       .irq_unmask     = pci_msi_unmask_irq,
>> +};
>> +
>> +static struct  msi_domain_info xgene_msi_domain_info = {
>> +       .flags  = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
>> +                 MSI_FLAG_PCI_MSIX),
>> +       .chip   = &xgene_msi_top_irq_chip,
>> +};
>> +
>> +static void xgene_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
>> +{
>> +       struct xgene_msi *msi = irq_data_get_irq_chip_data(data);
>> +       u32 nr_hw_irqs = msi->settings->nr_hw_irqs;
>> +       u32 irqs_per_index = msi->settings->irqs_per_index;
>> +       u32 reg_set = data->hwirq / (nr_hw_irqs * irqs_per_index);
>> +       u32 group = data->hwirq % nr_hw_irqs;
>> +
>> +       msg->address_hi = msi->msi_addr_hi;
>> +       msg->address_lo = msi->msi_addr_lo +
>> +                         (((8 * group) + reg_set) << 16);
>> +       msg->data = (data->hwirq / nr_hw_irqs) % irqs_per_index;
>
> ... (sound of head exploding...). I hate it when I have to reverse
> engineer the hardware by looking at the code...
>
> Please give us some clue on how interrupts are spread across the various
> pages, how the various indexes are combined to give an actual address.
>
>> +}
>> +
>> +static int xgene_msi_set_affinity(struct irq_data *irq_data,
>> +                                 const struct cpumask *mask, bool force)
>> +{
>> +       struct xgene_msi *msi = irq_data_get_irq_chip_data(irq_data);
>> +       unsigned int gic_irq;
>> +       int ret;
>> +
>> +       gic_irq = msi->msi_virqs[irq_data->hwirq % msi->settings->nr_hw_irqs];
>> +       ret = irq_set_affinity(gic_irq, mask);
>
> Erm... This as the effect of moving *all* the MSIs hanging off this
> interrupt to another CPU. I'm not sure that's an acceptable effect...
> What if another MSI requires a different affinity?
>
>> +       if (ret == IRQ_SET_MASK_OK)
>> +               ret = IRQ_SET_MASK_OK_DONE;
>> +
>> +       return ret;
>> +}
>> +
>> +static struct irq_chip xgene_msi_bottom_irq_chip = {
>> +       .name                   = "MSI",
>> +       .irq_set_affinity       = xgene_msi_set_affinity,
>> +       .irq_compose_msi_msg    = xgene_compose_msi_msg,
>> +};
>> +
>> +static int xgene_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
>> +                                 unsigned int nr_irqs, void *args)
>> +{
>> +
>> +       struct xgene_msi *msi = domain->host_data;
>> +       u32 msi_irq_count = msi->settings->nr_msi_vec;
>> +       int msi_irq;
>> +
>> +       mutex_lock(&msi->bitmap_lock);
>> +
>> +       msi_irq = find_first_zero_bit(msi->bitmap, msi_irq_count);
>> +       if (msi_irq < msi_irq_count)
>> +               set_bit(msi_irq, msi->bitmap);
>> +       else
>> +               msi_irq = -ENOSPC;
>> +
>> +       mutex_unlock(&msi->bitmap_lock);
>> +
>> +       if (msi_irq < 0)
>> +               return msi_irq;
>> +
>> +       irq_domain_set_info(domain, virq, msi_irq, &xgene_msi_bottom_irq_chip,
>> +                           domain->host_data, handle_simple_irq, NULL, NULL);
>> +       set_irq_flags(virq, IRQF_VALID);
>> +
>> +       return 0;
>> +}
>> +
>> +static void xgene_irq_domain_free(struct irq_domain *domain,
>> +                                 unsigned int virq, unsigned int nr_irqs)
>> +{
>> +       struct irq_data *d = irq_domain_get_irq_data(domain, virq);
>> +       struct xgene_msi *msi = irq_data_get_irq_chip_data(d);
>> +
>> +       mutex_lock(&msi->bitmap_lock);
>> +
>> +       if (!test_bit(d->hwirq, msi->bitmap))
>> +               pr_err("trying to free unused MSI#%lu\n", d->hwirq);
>> +       else
>> +               clear_bit(d->hwirq, msi->bitmap);
>> +
>> +       mutex_unlock(&msi->bitmap_lock);
>> +
>> +       irq_domain_free_irqs_parent(domain, virq, nr_irqs);
>> +}
>> +
>> +static const struct irq_domain_ops msi_domain_ops = {
>> +       .alloc  = xgene_irq_domain_alloc,
>> +       .free   = xgene_irq_domain_free,
>> +};
>> +
>> +static int xgene_allocate_domains(struct xgene_msi *msi)
>> +{
>> +       msi->domain = irq_domain_add_linear(NULL, msi->settings->nr_msi_vec,
>> +                                           &msi_domain_ops, msi);
>> +
>> +       if (!msi->domain) {
>> +               pr_err("failed to create parent MSI domain\n");
>> +               return -ENOMEM;
>> +       }
>> +
>> +       msi->mchip.of_node = msi->node;
>> +       msi->mchip.domain = pci_msi_create_irq_domain(msi->mchip.of_node,
>> +                                                     &xgene_msi_domain_info,
>> +                                                     msi->domain);
>> +
>> +       if (!msi->mchip.domain) {
>> +               pr_err("failed to create MSI domain\n");
>> +               irq_domain_remove(msi->domain);
>> +               return -ENOMEM;
>> +       }
>> +
>> +       return 0;
>> +}
>> +
>> +static void xgene_free_domains(struct xgene_msi *msi)
>> +{
>> +       if (msi->mchip.domain)
>> +               irq_domain_remove(msi->mchip.domain);
>> +       if (msi->domain)
>> +               irq_domain_remove(msi->domain);
>> +}
>> +
>> +static int xgene_msi_init_allocator(struct xgene_msi *xgene_msi)
>> +{
>> +       u32 msi_irq_count = xgene_msi->settings->nr_msi_vec;
>> +       u32 hw_irq_count = xgene_msi->settings->nr_hw_irqs;
>> +       int size = BITS_TO_LONGS(msi_irq_count) * sizeof(long);
>> +
>> +       xgene_msi->bitmap = kzalloc(size, GFP_KERNEL);
>> +       if (!xgene_msi->bitmap)
>> +               return -ENOMEM;
>> +
>> +       mutex_init(&xgene_msi->bitmap_lock);
>> +
>> +       xgene_msi->msi_virqs = kcalloc(hw_irq_count, sizeof(int), GFP_KERNEL);
>> +       if (!xgene_msi->msi_virqs)
>> +               return -ENOMEM;
>> +
>> +       return 0;
>> +}
>> +
>> +static irqreturn_t xgene_msi_isr(int irq, void *data)
>
> This is not a "normal" interrupt handler. This is really a chained
> interrupt controller. Please use the proper infrastructure for this
> (irq_set_chained_handler, chained_irq_enter, chained_irq_exit).
> Otherwise, your usage of irq_find_mapping is illegal wrt RCU.
>
>> +{
>> +       struct xgene_msi *xgene_msi = (struct xgene_msi *) data;
>> +       unsigned int virq;
>> +       int msir_index, msir_reg, msir_val, hw_irq;
>> +       u32 intr_index, grp_select, msi_grp, processed = 0;
>> +       u32 nr_hw_irqs, irqs_per_index, index_per_group;
>> +
>> +       msi_grp = irq - xgene_msi->msi_virqs[0];
>> +       if (msi_grp >= xgene_msi->settings->nr_hw_irqs) {
>> +               pr_err("invalid msi received\n");
>> +               return IRQ_NONE;
>> +       }
>> +
>> +       nr_hw_irqs = xgene_msi->settings->nr_hw_irqs;
>> +       irqs_per_index = xgene_msi->settings->irqs_per_index;
>> +       index_per_group = xgene_msi->settings->index_per_group;
>> +
>> +       grp_select = readl(xgene_msi->msi_regs + MSI_INT0 + (msi_grp << 16));
>> +       while (grp_select) {
>> +               msir_index = ffs(grp_select) - 1;
>> +               msir_reg = (msi_grp << 19) + (msir_index << 16);
>> +               msir_val = readl(xgene_msi->msi_regs + MSI_INDEX0 + msir_reg);
>> +               while (msir_val) {
>> +                       intr_index = ffs(msir_val) - 1;
>> +                       hw_irq = (((msir_index * irqs_per_index) + intr_index) *
>> +                                nr_hw_irqs) + msi_grp;
>
> Same thing here. This requires some explaination on how the HW is organised.
>
>> +                       virq = irq_find_mapping(xgene_msi->domain, hw_irq);
>> +                       if (virq != 0)
>> +                               generic_handle_irq(virq);
>> +                       msir_val &= ~(1 << intr_index);
>> +                       processed++;
>> +               }
>> +               grp_select &= ~(1 << msir_index);
>> +       }
>> +
>> +       return processed > 0 ? IRQ_HANDLED : IRQ_NONE;
>> +}
>> +
>> +static int xgene_msi_remove(struct platform_device *pdev)
>> +{
>> +       int virq, i;
>> +       struct xgene_msi *msi = platform_get_drvdata(pdev);
>> +       u32 nr_hw_irqs = msi->settings->nr_hw_irqs;
>> +
>> +       for (i = 0; i < nr_hw_irqs; i++) {
>> +               virq = msi->msi_virqs[i];
>> +               if (virq != 0)
>> +                       free_irq(virq, msi);
>> +       }
>> +
>> +       kfree(msi->bitmap);
>> +       msi->bitmap = NULL;
>> +
>> +       xgene_free_domains(msi);
>> +
>> +       return 0;
>> +}
>> +
>> +static int xgene_msi_setup_hwirq(struct xgene_msi *msi,
>> +                                struct platform_device *pdev,
>> +                                int irq_index)
>> +{
>> +       int virt_msir;
>> +       cpumask_var_t mask;
>> +       int err;
>> +
>> +       virt_msir = platform_get_irq(pdev, irq_index);
>> +       if (virt_msir < 0) {
>> +               dev_err(&pdev->dev, "Cannot translate IRQ index %d\n",
>> +                       irq_index);
>> +               return -EINVAL;
>> +       }
>> +
>> +       err = request_irq(virt_msir, xgene_msi_isr, 0,
>> +                         xgene_msi_top_irq_chip.name, msi);
>
> This is where you should use irq_set_chained_handler.
>
>> +       if (err) {
>> +               dev_err(&pdev->dev, "request irq failed\n");
>> +               return err;
>> +       }
>> +
>> +       if (alloc_cpumask_var(&mask, GFP_KERNEL)) {
>> +               cpumask_setall(mask);
>> +               irq_set_affinity(virt_msir, mask);
>> +               free_cpumask_var(mask);
>> +       }
>> +
>> +       msi->msi_virqs[irq_index] = virt_msir;
>> +
>> +       return 0;
>> +}
>> +
>> +static const struct of_device_id xgene_msi_match_table[] = {
>> +       {.compatible = "apm,xgene1-msi",
>> +        .data = xgene_msi_init_storm_settings},
>> +       {},
>> +};
>> +
>> +static int xgene_msi_probe(struct platform_device *pdev)
>> +{
>> +       struct resource *res;
>> +       int rc, irq_index;
>> +       struct device_node *np;
>> +       const struct of_device_id *matched_np;
>> +       struct xgene_msi *xgene_msi;
>> +       xgene_msi_initcall_t init_fn;
>> +       u32 nr_hw_irqs, nr_msi_vecs;
>> +
>> +       np = of_find_matching_node_and_match(NULL,
>> +                            xgene_msi_match_table, &matched_np);
>> +       if (!np)
>> +               return -ENODEV;
>> +
>> +       xgene_msi = kzalloc(sizeof(struct xgene_msi), GFP_KERNEL);
>> +       if (!xgene_msi) {
>> +               dev_err(&pdev->dev, "failed to allocate X-Gene MSI data\n");
>> +               return -ENOMEM;
>> +       }
>> +
>> +       xgene_msi->node = np;
>> +
>> +       init_fn = (xgene_msi_initcall_t) matched_np->data;
>> +       rc = init_fn(xgene_msi);
>> +       if (rc)
>> +               return rc;
>> +
>> +       platform_set_drvdata(pdev, xgene_msi);
>> +
>> +       nr_msi_vecs = xgene_msi->settings->nr_msi_vec;
>> +
>> +       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
>> +       xgene_msi->msi_regs = devm_ioremap_resource(&pdev->dev, res);
>> +       if (IS_ERR(xgene_msi->msi_regs)) {
>> +               dev_err(&pdev->dev, "no reg space\n");
>> +               rc = -EINVAL;
>> +               goto error;
>> +       }
>> +
>> +       xgene_msi->msi_addr_hi = upper_32_bits(res->start);
>> +       xgene_msi->msi_addr_lo = lower_32_bits(res->start);
>> +
>> +       rc = xgene_msi_init_allocator(xgene_msi);
>> +       if (rc) {
>> +               dev_err(&pdev->dev, "Error allocating MSI bitmap\n");
>> +               goto error;
>> +       }
>> +
>> +       rc = xgene_allocate_domains(xgene_msi);
>> +       if (rc) {
>> +               dev_err(&pdev->dev, "Failed to allocate MSI domain\n");
>> +               goto error;
>> +       }
>> +
>> +       nr_hw_irqs = xgene_msi->settings->nr_hw_irqs;
>> +       for (irq_index = 0; irq_index < nr_hw_irqs; irq_index++) {
>> +               rc = xgene_msi_setup_hwirq(xgene_msi, pdev, irq_index);
>> +               if (rc)
>> +                       goto error;
>> +       }
>> +
>> +       rc = of_pci_msi_chip_add(&xgene_msi->mchip);
>> +       if (rc) {
>> +               dev_err(&pdev->dev, "failed to add MSI controller chip\n");
>> +               goto error;
>> +       }
>> +
>> +       dev_info(&pdev->dev, "APM X-Gene PCIe MSI driver loaded\n");
>> +
>> +       return 0;
>> +error:
>> +       xgene_msi_remove(pdev);
>> +       return rc;
>> +}
>> +
>> +static struct platform_driver xgene_msi_driver = {
>> +       .driver = {
>> +               .name = "xgene-msi",
>> +               .owner = THIS_MODULE,
>> +               .of_match_table = xgene_msi_match_table,
>> +       },
>> +       .probe = xgene_msi_probe,
>> +       .remove = xgene_msi_remove,
>> +};
>> +
>> +static int __init xgene_pcie_msi_init(void)
>> +{
>> +       return platform_driver_register(&xgene_msi_driver);
>> +}
>> +subsys_initcall(xgene_pcie_msi_init);
>> +
>> +MODULE_AUTHOR("Duc Dang <dhdang@apm.com>");
>> +MODULE_DESCRIPTION("APM X-Gene PCIe MSI driver");
>> +MODULE_LICENSE("GPL v2");
>> diff --git a/drivers/pci/host/pci-xgene.c b/drivers/pci/host/pci-xgene.c
>> index 22751ed..3e6faa1 100644
>> --- a/drivers/pci/host/pci-xgene.c
>> +++ b/drivers/pci/host/pci-xgene.c
>> @@ -468,6 +468,23 @@ static int xgene_pcie_setup(struct xgene_pcie_port *port,
>>         return 0;
>>  }
>>
>> +static int xgene_pcie_msi_enable(struct pci_bus *bus)
>> +{
>> +       struct device_node *msi_node;
>> +
>> +       msi_node = of_parse_phandle(bus->dev.of_node,
>> +                                       "msi-parent", 0);
>> +       if (!msi_node)
>> +               return -ENODEV;
>> +
>> +       bus->msi = of_pci_find_msi_chip_by_node(msi_node);
>> +       if (bus->msi)
>> +               bus->msi->dev = &bus->dev;
>> +       else
>> +               return -ENODEV;
>> +       return 0;
>> +}
>> +
>>  static int xgene_pcie_probe_bridge(struct platform_device *pdev)
>>  {
>>         struct device_node *dn = pdev->dev.of_node;
>> @@ -504,6 +521,10 @@ static int xgene_pcie_probe_bridge(struct platform_device *pdev)
>>         if (!bus)
>>                 return -ENOMEM;
>>
>> +       if (IS_ENABLED(CONFIG_PCI_MSI))
>> +               if (xgene_pcie_msi_enable(bus))
>> +                       dev_info(port->dev, "failed to enable MSI\n");
>> +
>>         pci_scan_child_bus(bus);
>>         pci_assign_unassigned_bus_resources(bus);
>>         pci_bus_add_devices(bus);
>> --
>> 1.9.1
>>
>
> Thanks,
>
>         M.
> --
> Jazz is not dead. It just smells funny...
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marc Zyngier April 11, 2015, 12:06 p.m. UTC | #10
On 2015-04-11 00:42, Duc Dang wrote:
> On Fri, Apr 10, 2015 at 10:20 AM, Marc Zyngier <marc.zyngier@arm.com> 
> wrote:
>> On 09/04/15 18:05, Duc Dang wrote:
>>> X-Gene v1 SoC supports total 2688 MSI/MSIX vectors coalesced into
>>> 16 HW IRQ lines.
>>>
>>> Signed-off-by: Duc Dang <dhdang@apm.com>
>>> Signed-off-by: Tanmay Inamdar <tinamdar@apm.com>
>>> ---
>>>  drivers/pci/host/Kconfig         |   6 +
>>>  drivers/pci/host/Makefile        |   1 +
>>>  drivers/pci/host/pci-xgene-msi.c | 407 
>>> +++++++++++++++++++++++++++++++++++++++
>>>  drivers/pci/host/pci-xgene.c     |  21 ++
>>>  4 files changed, 435 insertions(+)
>>>  create mode 100644 drivers/pci/host/pci-xgene-msi.c
>>>
>>> diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig
>>> index 7b892a9..c9b61fa 100644
>>> --- a/drivers/pci/host/Kconfig
>>> +++ b/drivers/pci/host/Kconfig
>>> @@ -89,11 +89,17 @@ config PCI_XGENE
>>>         depends on ARCH_XGENE
>>>         depends on OF
>>>         select PCIEPORTBUS
>>> +       select PCI_MSI_IRQ_DOMAIN if PCI_MSI
>>> +       select PCI_XGENE_MSI if PCI_MSI
>>>         help
>>>           Say Y here if you want internal PCI support on APM X-Gene 
>>> SoC.
>>>           There are 5 internal PCIe ports available. Each port is 
>>> GEN3 capable
>>>           and have varied lanes from x1 to x8.
>>>
>>> +config PCI_XGENE_MSI
>>> +       bool "X-Gene v1 PCIe MSI feature"
>>> +       depends on PCI_XGENE && PCI_MSI
>>> +
>>>  config PCI_LAYERSCAPE
>>>         bool "Freescale Layerscape PCIe controller"
>>>         depends on OF && ARM
>>> diff --git a/drivers/pci/host/Makefile b/drivers/pci/host/Makefile
>>> index e61d91c..f39bde3 100644
>>> --- a/drivers/pci/host/Makefile
>>> +++ b/drivers/pci/host/Makefile
>>> @@ -11,5 +11,6 @@ obj-$(CONFIG_PCIE_SPEAR13XX) += pcie-spear13xx.o
>>>  obj-$(CONFIG_PCI_KEYSTONE) += pci-keystone-dw.o pci-keystone.o
>>>  obj-$(CONFIG_PCIE_XILINX) += pcie-xilinx.o
>>>  obj-$(CONFIG_PCI_XGENE) += pci-xgene.o
>>> +obj-$(CONFIG_PCI_XGENE_MSI) += pci-xgene-msi.o
>>>  obj-$(CONFIG_PCI_LAYERSCAPE) += pci-layerscape.o
>>>  obj-$(CONFIG_PCI_VERSATILE) += pci-versatile.o
>>> diff --git a/drivers/pci/host/pci-xgene-msi.c 
>>> b/drivers/pci/host/pci-xgene-msi.c
>>> new file mode 100644
>>> index 0000000..4f0ff42
>>> --- /dev/null
>>> +++ b/drivers/pci/host/pci-xgene-msi.c
>>> @@ -0,0 +1,407 @@
>>> +/*
>>> + * APM X-Gene MSI Driver
>>> + *
>>> + * Copyright (c) 2014, Applied Micro Circuits Corporation
>>> + * Author: Tanmay Inamdar <tinamdar@apm.com>
>>> + *        Duc Dang <dhdang@apm.com>
>>> + *
>>> + * This program is free software; you can redistribute  it and/or 
>>> modify it
>>> + * under  the terms of  the GNU General  Public License as 
>>> published by the
>>> + * Free Software Foundation;  either version 2 of the  License, or 
>>> (at your
>>> + * option) any later version.
>>> + *
>>> + * This program is distributed in the hope that it will be useful,
>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>>> + * GNU General Public License for more details.
>>> + */
>>> +#include <linux/interrupt.h>
>>> +#include <linux/module.h>
>>> +#include <linux/msi.h>
>>> +#include <linux/of_irq.h>
>>> +#include <linux/pci.h>
>>> +#include <linux/platform_device.h>
>>> +#include <linux/of_pci.h>
>>> +
>>> +#define MSI_INDEX0             0x000000
>>> +#define MSI_INT0               0x800000
>>> +
>>> +struct xgene_msi_settings {
>>> +       u32     index_per_group;
>>> +       u32     irqs_per_index;
>>> +       u32     nr_msi_vec;
>>> +       u32     nr_hw_irqs;
>>> +};
>>> +
>>> +struct xgene_msi {
>>> +       struct device_node              *node;
>>> +       struct msi_controller           mchip;
>>> +       struct irq_domain               *domain;
>>> +       struct xgene_msi_settings       *settings;
>>> +       u32                             msi_addr_lo;
>>> +       u32                             msi_addr_hi;
>>
>> I'd rather see the mailbox address directly, and only do the split 
>> when
>> assigning it to the message (you seem to play all kind of tricks on 
>> the
>> address anyway).
>
> msi_addr_lo and msi_addr_hi store the physical base address of MSI
> controller registers. I will add comment to clarify this.

What I mean is that there is no point in keeping this around as a pair
of 32bit variables. You'd better keep it as a single 64bit, and do the
split when assigning it the the MSI message.

[...]

>>> +static int xgene_msi_set_affinity(struct irq_data *irq_data,
>>> +                                 const struct cpumask *mask, bool 
>>> force)
>>> +{
>>> +       struct xgene_msi *msi = 
>>> irq_data_get_irq_chip_data(irq_data);
>>> +       unsigned int gic_irq;
>>> +       int ret;
>>> +
>>> +       gic_irq = msi->msi_virqs[irq_data->hwirq % 
>>> msi->settings->nr_hw_irqs];
>>> +       ret = irq_set_affinity(gic_irq, mask);
>>
>> Erm... This as the effect of moving *all* the MSIs hanging off this
>> interrupt to another CPU. I'm not sure that's an acceptable 
>> effect...
>> What if another MSI requires a different affinity?
>
> We have 16 'real' hardware IRQs. Each of these has multiple MSIs
> attached to it.
> So this will move all MSIs handing off this interrupt to another CPU;
> and we don't support different affinity settings for different MSIs
> that are attached to the same hardware IRQ.

Well, that's a significant departure from the expected behaviour. In 
other
words, I wonder how useful this is. Could you instead reconfigure the 
MSI
itself to hit the right CPU (assuming you don't have more than 16 CPUs 
and if
that's only for XGene-1, this will only be 8 at most)? This would 
reduce
your number of possible MSIs, but at least the semantics of set_afinity
would be preserved.

Thanks,

         M.
Marc Zyngier April 11, 2015, 12:18 p.m. UTC | #11
Hi Feng,

On 2015-04-11 01:16, Feng Kan wrote:

> Is there any plans to support ACPI for GICv2m MSI? Both this driver 
> and the
> GICv2m seems to support OF model of discovery for msi controller. 
> X-Gene1
> uses this driver and X-Gene2 uses GICv2m, there needs to be a way to
> associate msi controller with the PCIe bus. I haven't
> found a standard way of doing finding "msi-parent" for ACPI. Do you 
> have
> any suggestion.

This is remarkably off topic! ;-)

ACPI only provides information for GICv2m and GICv3. No other interrupt 
controller,
MSI widget or fairy dust provider will be supported on arm64 with ACPI. 
So this driver
will be DT only.

GICv2m, being described in the ACPI spec, is growing some support. My 
understanding is
that this is done by AMD (Suravee Suthikulpanit) using my 
irq/msi-domain branch as a
base (I know that Suravee is looking at expanding the irqdomain 
matching code to
accept a pointer to an ACPI table instead of a device node to match the 
target
domain).

Hope this helps,

         M.
Arnd Bergmann April 11, 2015, 2:50 p.m. UTC | #12
On Friday 10 April 2015 17:16:32 Feng Kan wrote:
> Hi Marc:
> 
> Is there any plans to support ACPI for GICv2m MSI? Both this driver and the
> GICv2m seems to support OF model of discovery for msi controller. X-Gene1
> uses this driver and X-Gene2 uses GICv2m, there needs to be a way to
> associate msi controller with the PCIe bus. I haven't
> found a standard way of doing finding "msi-parent" for ACPI. Do you have
> any suggestion.
> 
> Sorry for top posting.

Doing gicv2m in ACPI should be straightforward, and the ACPI code can simply
assume that there is only one GIC instance (whichever version) in the system
and use that for all standard PCI hosts.

	Arnd
Duc Dang April 14, 2015, 6:20 p.m. UTC | #13
On Sat, Apr 11, 2015 at 5:06 AM, Marc Zyngier <marc.zyngier@arm.com> wrote:
> On 2015-04-11 00:42, Duc Dang wrote:
>>
>> On Fri, Apr 10, 2015 at 10:20 AM, Marc Zyngier <marc.zyngier@arm.com>
>> wrote:
>>>
>>> On 09/04/15 18:05, Duc Dang wrote:
>>>>
>>>> X-Gene v1 SoC supports total 2688 MSI/MSIX vectors coalesced into
>>>> 16 HW IRQ lines.
>>>>
>>>> Signed-off-by: Duc Dang <dhdang@apm.com>
>>>> Signed-off-by: Tanmay Inamdar <tinamdar@apm.com>
>>>> ---
>>>>  drivers/pci/host/Kconfig         |   6 +
>>>>  drivers/pci/host/Makefile        |   1 +
>>>>  drivers/pci/host/pci-xgene-msi.c | 407
>>>> +++++++++++++++++++++++++++++++++++++++
>>>>  drivers/pci/host/pci-xgene.c     |  21 ++
>>>>  4 files changed, 435 insertions(+)
>>>>  create mode 100644 drivers/pci/host/pci-xgene-msi.c
>>>>
>>>> diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig
>>>> index 7b892a9..c9b61fa 100644
>>>> --- a/drivers/pci/host/Kconfig
>>>> +++ b/drivers/pci/host/Kconfig
>>>> @@ -89,11 +89,17 @@ config PCI_XGENE
>>>>         depends on ARCH_XGENE
>>>>         depends on OF
>>>>         select PCIEPORTBUS
>>>> +       select PCI_MSI_IRQ_DOMAIN if PCI_MSI
>>>> +       select PCI_XGENE_MSI if PCI_MSI
>>>>         help
>>>>           Say Y here if you want internal PCI support on APM X-Gene SoC.
>>>>           There are 5 internal PCIe ports available. Each port is GEN3
>>>> capable
>>>>           and have varied lanes from x1 to x8.
>>>>
>>>> +config PCI_XGENE_MSI
>>>> +       bool "X-Gene v1 PCIe MSI feature"
>>>> +       depends on PCI_XGENE && PCI_MSI
>>>> +
>>>>  config PCI_LAYERSCAPE
>>>>         bool "Freescale Layerscape PCIe controller"
>>>>         depends on OF && ARM
>>>> diff --git a/drivers/pci/host/Makefile b/drivers/pci/host/Makefile
>>>> index e61d91c..f39bde3 100644
>>>> --- a/drivers/pci/host/Makefile
>>>> +++ b/drivers/pci/host/Makefile
>>>> @@ -11,5 +11,6 @@ obj-$(CONFIG_PCIE_SPEAR13XX) += pcie-spear13xx.o
>>>>  obj-$(CONFIG_PCI_KEYSTONE) += pci-keystone-dw.o pci-keystone.o
>>>>  obj-$(CONFIG_PCIE_XILINX) += pcie-xilinx.o
>>>>  obj-$(CONFIG_PCI_XGENE) += pci-xgene.o
>>>> +obj-$(CONFIG_PCI_XGENE_MSI) += pci-xgene-msi.o
>>>>  obj-$(CONFIG_PCI_LAYERSCAPE) += pci-layerscape.o
>>>>  obj-$(CONFIG_PCI_VERSATILE) += pci-versatile.o
>>>> diff --git a/drivers/pci/host/pci-xgene-msi.c
>>>> b/drivers/pci/host/pci-xgene-msi.c
>>>> new file mode 100644
>>>> index 0000000..4f0ff42
>>>> --- /dev/null
>>>> +++ b/drivers/pci/host/pci-xgene-msi.c
>>>> @@ -0,0 +1,407 @@
>>>> +/*
>>>> + * APM X-Gene MSI Driver
>>>> + *
>>>> + * Copyright (c) 2014, Applied Micro Circuits Corporation
>>>> + * Author: Tanmay Inamdar <tinamdar@apm.com>
>>>> + *        Duc Dang <dhdang@apm.com>
>>>> + *
>>>> + * This program is free software; you can redistribute  it and/or
>>>> modify it
>>>> + * under  the terms of  the GNU General  Public License as published by
>>>> the
>>>> + * Free Software Foundation;  either version 2 of the  License, or (at
>>>> your
>>>> + * option) any later version.
>>>> + *
>>>> + * This program is distributed in the hope that it will be useful,
>>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>>>> + * GNU General Public License for more details.
>>>> + */
>>>> +#include <linux/interrupt.h>
>>>> +#include <linux/module.h>
>>>> +#include <linux/msi.h>
>>>> +#include <linux/of_irq.h>
>>>> +#include <linux/pci.h>
>>>> +#include <linux/platform_device.h>
>>>> +#include <linux/of_pci.h>
>>>> +
>>>> +#define MSI_INDEX0             0x000000
>>>> +#define MSI_INT0               0x800000
>>>> +
>>>> +struct xgene_msi_settings {
>>>> +       u32     index_per_group;
>>>> +       u32     irqs_per_index;
>>>> +       u32     nr_msi_vec;
>>>> +       u32     nr_hw_irqs;
>>>> +};
>>>> +
>>>> +struct xgene_msi {
>>>> +       struct device_node              *node;
>>>> +       struct msi_controller           mchip;
>>>> +       struct irq_domain               *domain;
>>>> +       struct xgene_msi_settings       *settings;
>>>> +       u32                             msi_addr_lo;
>>>> +       u32                             msi_addr_hi;
>>>
>>>
>>> I'd rather see the mailbox address directly, and only do the split when
>>> assigning it to the message (you seem to play all kind of tricks on the
>>> address anyway).
>>
>>
>> msi_addr_lo and msi_addr_hi store the physical base address of MSI
>> controller registers. I will add comment to clarify this.
>
>
> What I mean is that there is no point in keeping this around as a pair
> of 32bit variables. You'd better keep it as a single 64bit, and do the
> split when assigning it the the MSI message.

Hi Marc,

These came from device-tree (which describes 64-bit address number as
2 32-bit words).
If I store them this way, I don't need CPU cycles to do the split
every time assigning them to the MSI message. Please let me know what
do you think about it.

>
> [...]
>
>>>> +static int xgene_msi_set_affinity(struct irq_data *irq_data,
>>>> +                                 const struct cpumask *mask, bool
>>>> force)
>>>> +{
>>>> +       struct xgene_msi *msi = irq_data_get_irq_chip_data(irq_data);
>>>> +       unsigned int gic_irq;
>>>> +       int ret;
>>>> +
>>>> +       gic_irq = msi->msi_virqs[irq_data->hwirq %
>>>> msi->settings->nr_hw_irqs];
>>>> +       ret = irq_set_affinity(gic_irq, mask);
>>>
>>>
>>> Erm... This as the effect of moving *all* the MSIs hanging off this
>>> interrupt to another CPU. I'm not sure that's an acceptable effect...
>>> What if another MSI requires a different affinity?
>>
>>
>> We have 16 'real' hardware IRQs. Each of these has multiple MSIs
>> attached to it.
>> So this will move all MSIs handing off this interrupt to another CPU;
>> and we don't support different affinity settings for different MSIs
>> that are attached to the same hardware IRQ.
>
>
> Well, that's a significant departure from the expected behaviour. In other
> words, I wonder how useful this is. Could you instead reconfigure the MSI
> itself to hit the right CPU (assuming you don't have more than 16 CPUs and
> if
> that's only for XGene-1, this will only be 8 at most)? This would reduce
> your number of possible MSIs, but at least the semantics of set_afinity
> would be preserved.

X-Gene-1 supports 2688 MSIs that are divided into 16 groups, each
group has 168 MSIs that are mapped to 1 hardware GIC IRQ (so we have
16 hardware GIC IRQs for 2688 MSIs).

Setting affinity of single MSI to deliver it to a target CPU will move
all the MSIs mapped to the same GIC IRQ to that CPU as well. This is
not a standard behavior, but limiting the total number of MSIs will
cause a lot of devices to fall back to INTx (with huge performance
penalty) or even fail to load their driver as these devices request
more than 16 MSIs during driver initialization.

I can document the limitation in affinity setting of X-Gene-1 MSI in
the driver to hopefully not make people surprise and hope to keep the
total number of supported MSI as 2688 so that we can support as many
cards that require MSI/MSI-X as possible.

Please let me know your opinion.

>
> Thanks,
>
>         M.
> --
> Fast, cheap, reliable. Pick two.
Marc Zyngier April 15, 2015, 8:16 a.m. UTC | #14
On Tue, 14 Apr 2015 19:20:19 +0100
Duc Dang <dhdang@apm.com> wrote:

> On Sat, Apr 11, 2015 at 5:06 AM, Marc Zyngier <marc.zyngier@arm.com>
> wrote:
> > On 2015-04-11 00:42, Duc Dang wrote:
> >>
> >> On Fri, Apr 10, 2015 at 10:20 AM, Marc Zyngier
> >> <marc.zyngier@arm.com> wrote:
> >>>
> >>> On 09/04/15 18:05, Duc Dang wrote:
> >>>>
> >>>> X-Gene v1 SoC supports total 2688 MSI/MSIX vectors coalesced into
> >>>> 16 HW IRQ lines.
> >>>>
> >>>> Signed-off-by: Duc Dang <dhdang@apm.com>
> >>>> Signed-off-by: Tanmay Inamdar <tinamdar@apm.com>
> >>>> ---
> >>>>  drivers/pci/host/Kconfig         |   6 +
> >>>>  drivers/pci/host/Makefile        |   1 +
> >>>>  drivers/pci/host/pci-xgene-msi.c | 407
> >>>> +++++++++++++++++++++++++++++++++++++++
> >>>>  drivers/pci/host/pci-xgene.c     |  21 ++
> >>>>  4 files changed, 435 insertions(+)
> >>>>  create mode 100644 drivers/pci/host/pci-xgene-msi.c
> >>>>
> >>>> diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig
> >>>> index 7b892a9..c9b61fa 100644
> >>>> --- a/drivers/pci/host/Kconfig
> >>>> +++ b/drivers/pci/host/Kconfig
> >>>> @@ -89,11 +89,17 @@ config PCI_XGENE
> >>>>         depends on ARCH_XGENE
> >>>>         depends on OF
> >>>>         select PCIEPORTBUS
> >>>> +       select PCI_MSI_IRQ_DOMAIN if PCI_MSI
> >>>> +       select PCI_XGENE_MSI if PCI_MSI
> >>>>         help
> >>>>           Say Y here if you want internal PCI support on APM
> >>>> X-Gene SoC. There are 5 internal PCIe ports available. Each port
> >>>> is GEN3 capable
> >>>>           and have varied lanes from x1 to x8.
> >>>>
> >>>> +config PCI_XGENE_MSI
> >>>> +       bool "X-Gene v1 PCIe MSI feature"
> >>>> +       depends on PCI_XGENE && PCI_MSI
> >>>> +
> >>>>  config PCI_LAYERSCAPE
> >>>>         bool "Freescale Layerscape PCIe controller"
> >>>>         depends on OF && ARM
> >>>> diff --git a/drivers/pci/host/Makefile
> >>>> b/drivers/pci/host/Makefile index e61d91c..f39bde3 100644
> >>>> --- a/drivers/pci/host/Makefile
> >>>> +++ b/drivers/pci/host/Makefile
> >>>> @@ -11,5 +11,6 @@ obj-$(CONFIG_PCIE_SPEAR13XX) +=
> >>>> pcie-spear13xx.o obj-$(CONFIG_PCI_KEYSTONE) += pci-keystone-dw.o
> >>>> pci-keystone.o obj-$(CONFIG_PCIE_XILINX) += pcie-xilinx.o
> >>>>  obj-$(CONFIG_PCI_XGENE) += pci-xgene.o
> >>>> +obj-$(CONFIG_PCI_XGENE_MSI) += pci-xgene-msi.o
> >>>>  obj-$(CONFIG_PCI_LAYERSCAPE) += pci-layerscape.o
> >>>>  obj-$(CONFIG_PCI_VERSATILE) += pci-versatile.o
> >>>> diff --git a/drivers/pci/host/pci-xgene-msi.c
> >>>> b/drivers/pci/host/pci-xgene-msi.c
> >>>> new file mode 100644
> >>>> index 0000000..4f0ff42
> >>>> --- /dev/null
> >>>> +++ b/drivers/pci/host/pci-xgene-msi.c
> >>>> @@ -0,0 +1,407 @@
> >>>> +/*
> >>>> + * APM X-Gene MSI Driver
> >>>> + *
> >>>> + * Copyright (c) 2014, Applied Micro Circuits Corporation
> >>>> + * Author: Tanmay Inamdar <tinamdar@apm.com>
> >>>> + *        Duc Dang <dhdang@apm.com>
> >>>> + *
> >>>> + * This program is free software; you can redistribute  it
> >>>> and/or modify it
> >>>> + * under  the terms of  the GNU General  Public License as
> >>>> published by the
> >>>> + * Free Software Foundation;  either version 2 of the  License,
> >>>> or (at your
> >>>> + * option) any later version.
> >>>> + *
> >>>> + * This program is distributed in the hope that it will be
> >>>> useful,
> >>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty
> >>>> of
> >>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> >>>> + * GNU General Public License for more details.
> >>>> + */
> >>>> +#include <linux/interrupt.h>
> >>>> +#include <linux/module.h>
> >>>> +#include <linux/msi.h>
> >>>> +#include <linux/of_irq.h>
> >>>> +#include <linux/pci.h>
> >>>> +#include <linux/platform_device.h>
> >>>> +#include <linux/of_pci.h>
> >>>> +
> >>>> +#define MSI_INDEX0             0x000000
> >>>> +#define MSI_INT0               0x800000
> >>>> +
> >>>> +struct xgene_msi_settings {
> >>>> +       u32     index_per_group;
> >>>> +       u32     irqs_per_index;
> >>>> +       u32     nr_msi_vec;
> >>>> +       u32     nr_hw_irqs;
> >>>> +};
> >>>> +
> >>>> +struct xgene_msi {
> >>>> +       struct device_node              *node;
> >>>> +       struct msi_controller           mchip;
> >>>> +       struct irq_domain               *domain;
> >>>> +       struct xgene_msi_settings       *settings;
> >>>> +       u32                             msi_addr_lo;
> >>>> +       u32                             msi_addr_hi;
> >>>
> >>>
> >>> I'd rather see the mailbox address directly, and only do the
> >>> split when assigning it to the message (you seem to play all kind
> >>> of tricks on the address anyway).
> >>
> >>
> >> msi_addr_lo and msi_addr_hi store the physical base address of MSI
> >> controller registers. I will add comment to clarify this.
> >
> >
> > What I mean is that there is no point in keeping this around as a
> > pair of 32bit variables. You'd better keep it as a single 64bit,
> > and do the split when assigning it the the MSI message.
> 
> Hi Marc,
> 
> These came from device-tree (which describes 64-bit address number as
> 2 32-bit words).

... and converted to a resource as a 64bit word, on which you apply
{upper,lower}_32_bit(). So much for DT...

> If I store them this way, I don't need CPU cycles to do the split
> every time assigning them to the MSI message. Please let me know what
> do you think about it.

This is getting absolutely silly.

How many cycles does it take to execute "lsr x1, x0, #32" on X-Gene? If
it takes so long that it is considered to be a bottleneck, I suggest
you go and design a better CPU (hint: the answer is probably 1 cycle
absolutely everywhere).

How often are you configuring MSIs in the face of what is happening in
the rest of the kernel? Almost never!

So, given that "never" times 1 is still never,  I'll consider that
readability of the code trumps it anytime (I can't believe we're having
that kind of conversation...).

> >
> > [...]
> >
> >>>> +static int xgene_msi_set_affinity(struct irq_data *irq_data,
> >>>> +                                 const struct cpumask *mask,
> >>>> bool force)
> >>>> +{
> >>>> +       struct xgene_msi *msi =
> >>>> irq_data_get_irq_chip_data(irq_data);
> >>>> +       unsigned int gic_irq;
> >>>> +       int ret;
> >>>> +
> >>>> +       gic_irq = msi->msi_virqs[irq_data->hwirq %
> >>>> msi->settings->nr_hw_irqs];
> >>>> +       ret = irq_set_affinity(gic_irq, mask);
> >>>
> >>>
> >>> Erm... This as the effect of moving *all* the MSIs hanging off
> >>> this interrupt to another CPU. I'm not sure that's an acceptable
> >>> effect... What if another MSI requires a different affinity?
> >>
> >>
> >> We have 16 'real' hardware IRQs. Each of these has multiple MSIs
> >> attached to it.
> >> So this will move all MSIs handing off this interrupt to another
> >> CPU; and we don't support different affinity settings for
> >> different MSIs that are attached to the same hardware IRQ.
> >
> >
> > Well, that's a significant departure from the expected behaviour.
> > In other words, I wonder how useful this is. Could you instead
> > reconfigure the MSI itself to hit the right CPU (assuming you don't
> > have more than 16 CPUs and if
> > that's only for XGene-1, this will only be 8 at most)? This would
> > reduce your number of possible MSIs, but at least the semantics of
> > set_afinity would be preserved.
> 
> X-Gene-1 supports 2688 MSIs that are divided into 16 groups, each
> group has 168 MSIs that are mapped to 1 hardware GIC IRQ (so we have
> 16 hardware GIC IRQs for 2688 MSIs).

We've already established that.

> Setting affinity of single MSI to deliver it to a target CPU will move
> all the MSIs mapped to the same GIC IRQ to that CPU as well. This is
> not a standard behavior, but limiting the total number of MSIs will
> cause a lot of devices to fall back to INTx (with huge performance
> penalty) or even fail to load their driver as these devices request
> more than 16 MSIs during driver initialization.

No, I think you got it wrong. If you have 168 MSIs per GIC IRQ, and
provided that you have 8 CPUs (XGene-1), you end up with 336 MSIs per
CPU (having statically assigned 2 IRQs per CPU).

Assuming you adopt my scheme, you still have a grand total of 336 MSIs
that can be freely moved around without breaking any userspace
expectations.

I think that 336 MSIs is a fair number (nowhere near the 16 you claim).
Most platforms are doing quite well with that kind of numbers. Also,
you don't have to allocate all the MSIs a device can possibly claim (up
to 2048 MSI-X per device), as they are all perfectly capable of using
less MSI without having to fallback to INTx).

> I can document the limitation in affinity setting of X-Gene-1 MSI in
> the driver to hopefully not make people surprise and hope to keep the
> total number of supported MSI as 2688 so that we can support as many
> cards that require MSI/MSI-X as possible.

I don't think this is a valid approach. This breaks userspace (think of
things like irqbalance), and breaks the SMP affinity model that Linux
uses. No amount of documentation is going to solve it, so I think you
just have to admit that the HW is mis-designed and do the best you can
to make it work like Linux expect it to work.

The alternative would to disable affinity setting altogether instead of
introducing these horrible side effects.

Thanks,

	M.
Duc Dang April 17, 2015, 9:50 a.m. UTC | #15
This patch set adds MSI/MSIX termination driver support for APM X-Gene v1 SoC.
APM X-Gene v1 SoC supports its own implementation of MSI, which is not compliant
to GIC V2M specification for MSI Termination.

There is single MSI block in X-Gene v1 SOC which serves all 5 PCIe ports. This MSI
block supports 2048 MSI termination ports coalesced into 16 physical HW IRQ lines
and shared across all 5 PCIe ports.

v4 changes:
	1. Remove affinity setting for each MSI
	2. Add description about register layout, MSI termination address and data
	3. Correct total number of MSI vectors to 2048
	4. Clean up error messages
	5. Remove unused module code

v3 changes:
        1. Implement MSI support using PCI MSI IRQ domain
        2. Only use msi_controller to store IRQ domain
v2 changes:
        1. Use msi_controller structure
        2. Remove arch hooks arch_teardown_msi_irqs and arch_setup_msi_irqs
 


 .../devicetree/bindings/pci/xgene-pci-msi.txt      |  63 ++++
 MAINTAINERS                                        |   8 +
 arch/arm64/boot/dts/apm/apm-storm.dtsi             |  27 ++
 drivers/pci/host/Kconfig                           |   6 +
 drivers/pci/host/Makefile                          |   1 +
 drivers/pci/host/pci-xgene-msi.c                   | 410 +++++++++++++++++++++
 drivers/pci/host/pci-xgene.c                       |  21 ++
 7 files changed, 536 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/pci/xgene-pci-msi.txt
 create mode 100644 drivers/pci/host/pci-xgene-msi.c
Duc Dang April 17, 2015, 10 a.m. UTC | #16
On Wed, Apr 15, 2015 at 1:16 AM, Marc Zyngier <marc.zyngier@arm.com> wrote:
> On Tue, 14 Apr 2015 19:20:19 +0100
> Duc Dang <dhdang@apm.com> wrote:
>
>> On Sat, Apr 11, 2015 at 5:06 AM, Marc Zyngier <marc.zyngier@arm.com>
>> wrote:
>> > On 2015-04-11 00:42, Duc Dang wrote:
>> >>
>> >> On Fri, Apr 10, 2015 at 10:20 AM, Marc Zyngier
>> >> <marc.zyngier@arm.com> wrote:
>> >>>
>> >>> On 09/04/15 18:05, Duc Dang wrote:
>> >>>>
>> >>>> X-Gene v1 SoC supports total 2688 MSI/MSIX vectors coalesced into
>> >>>> 16 HW IRQ lines.
>> >>>>
>> >>>> Signed-off-by: Duc Dang <dhdang@apm.com>
>> >>>> Signed-off-by: Tanmay Inamdar <tinamdar@apm.com>
>> >>>> ---
>> >>>>  drivers/pci/host/Kconfig         |   6 +
>> >>>>  drivers/pci/host/Makefile        |   1 +
>> >>>>  drivers/pci/host/pci-xgene-msi.c | 407
>> >>>> +++++++++++++++++++++++++++++++++++++++
>> >>>>  drivers/pci/host/pci-xgene.c     |  21 ++
>> >>>>  4 files changed, 435 insertions(+)
>> >>>>  create mode 100644 drivers/pci/host/pci-xgene-msi.c
>> >>>>
>> >>>> diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig
>> >>>> index 7b892a9..c9b61fa 100644
>> >>>> --- a/drivers/pci/host/Kconfig
>> >>>> +++ b/drivers/pci/host/Kconfig
>> >>>> @@ -89,11 +89,17 @@ config PCI_XGENE
>> >>>>         depends on ARCH_XGENE
>> >>>>         depends on OF
>> >>>>         select PCIEPORTBUS
>> >>>> +       select PCI_MSI_IRQ_DOMAIN if PCI_MSI
>> >>>> +       select PCI_XGENE_MSI if PCI_MSI
>> >>>>         help
>> >>>>           Say Y here if you want internal PCI support on APM
>> >>>> X-Gene SoC. There are 5 internal PCIe ports available. Each port
>> >>>> is GEN3 capable
>> >>>>           and have varied lanes from x1 to x8.
>> >>>>
>> >>>> +config PCI_XGENE_MSI
>> >>>> +       bool "X-Gene v1 PCIe MSI feature"
>> >>>> +       depends on PCI_XGENE && PCI_MSI
>> >>>> +
>> >>>>  config PCI_LAYERSCAPE
>> >>>>         bool "Freescale Layerscape PCIe controller"
>> >>>>         depends on OF && ARM
>> >>>> diff --git a/drivers/pci/host/Makefile
>> >>>> b/drivers/pci/host/Makefile index e61d91c..f39bde3 100644
>> >>>> --- a/drivers/pci/host/Makefile
>> >>>> +++ b/drivers/pci/host/Makefile
>> >>>> @@ -11,5 +11,6 @@ obj-$(CONFIG_PCIE_SPEAR13XX) +=
>> >>>> pcie-spear13xx.o obj-$(CONFIG_PCI_KEYSTONE) += pci-keystone-dw.o
>> >>>> pci-keystone.o obj-$(CONFIG_PCIE_XILINX) += pcie-xilinx.o
>> >>>>  obj-$(CONFIG_PCI_XGENE) += pci-xgene.o
>> >>>> +obj-$(CONFIG_PCI_XGENE_MSI) += pci-xgene-msi.o
>> >>>>  obj-$(CONFIG_PCI_LAYERSCAPE) += pci-layerscape.o
>> >>>>  obj-$(CONFIG_PCI_VERSATILE) += pci-versatile.o
>> >>>> diff --git a/drivers/pci/host/pci-xgene-msi.c
>> >>>> b/drivers/pci/host/pci-xgene-msi.c
>> >>>> new file mode 100644
>> >>>> index 0000000..4f0ff42
>> >>>> --- /dev/null
>> >>>> +++ b/drivers/pci/host/pci-xgene-msi.c
>> >>>> @@ -0,0 +1,407 @@
>> >>>> +/*
>> >>>> + * APM X-Gene MSI Driver
>> >>>> + *
>> >>>> + * Copyright (c) 2014, Applied Micro Circuits Corporation
>> >>>> + * Author: Tanmay Inamdar <tinamdar@apm.com>
>> >>>> + *        Duc Dang <dhdang@apm.com>
>> >>>> + *
>> >>>> + * This program is free software; you can redistribute  it
>> >>>> and/or modify it
>> >>>> + * under  the terms of  the GNU General  Public License as
>> >>>> published by the
>> >>>> + * Free Software Foundation;  either version 2 of the  License,
>> >>>> or (at your
>> >>>> + * option) any later version.
>> >>>> + *
>> >>>> + * This program is distributed in the hope that it will be
>> >>>> useful,
>> >>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty
>> >>>> of
>> >>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> >>>> + * GNU General Public License for more details.
>> >>>> + */
>> >>>> +#include <linux/interrupt.h>
>> >>>> +#include <linux/module.h>
>> >>>> +#include <linux/msi.h>
>> >>>> +#include <linux/of_irq.h>
>> >>>> +#include <linux/pci.h>
>> >>>> +#include <linux/platform_device.h>
>> >>>> +#include <linux/of_pci.h>
>> >>>> +
>> >>>> +#define MSI_INDEX0             0x000000
>> >>>> +#define MSI_INT0               0x800000
>> >>>> +
>> >>>> +struct xgene_msi_settings {
>> >>>> +       u32     index_per_group;
>> >>>> +       u32     irqs_per_index;
>> >>>> +       u32     nr_msi_vec;
>> >>>> +       u32     nr_hw_irqs;
>> >>>> +};
>> >>>> +
>> >>>> +struct xgene_msi {
>> >>>> +       struct device_node              *node;
>> >>>> +       struct msi_controller           mchip;
>> >>>> +       struct irq_domain               *domain;
>> >>>> +       struct xgene_msi_settings       *settings;
>> >>>> +       u32                             msi_addr_lo;
>> >>>> +       u32                             msi_addr_hi;
>> >>>
>> >>>
>> >>> I'd rather see the mailbox address directly, and only do the
>> >>> split when assigning it to the message (you seem to play all kind
>> >>> of tricks on the address anyway).
>> >>
>> >>
>> >> msi_addr_lo and msi_addr_hi store the physical base address of MSI
>> >> controller registers. I will add comment to clarify this.
>> >
>> >
>> > What I mean is that there is no point in keeping this around as a
>> > pair of 32bit variables. You'd better keep it as a single 64bit,
>> > and do the split when assigning it the the MSI message.
>>
>> Hi Marc,
>>
>> These came from device-tree (which describes 64-bit address number as
>> 2 32-bit words).
>
> ... and converted to a resource as a 64bit word, on which you apply
> {upper,lower}_32_bit(). So much for DT...
>
>> If I store them this way, I don't need CPU cycles to do the split
>> every time assigning them to the MSI message. Please let me know what
>> do you think about it.
>
> This is getting absolutely silly.
>
> How many cycles does it take to execute "lsr x1, x0, #32" on X-Gene? If
> it takes so long that it is considered to be a bottleneck, I suggest
> you go and design a better CPU (hint: the answer is probably 1 cycle
> absolutely everywhere).
>
> How often are you configuring MSIs in the face of what is happening in
> the rest of the kernel? Almost never!
>
> So, given that "never" times 1 is still never,  I'll consider that
> readability of the code trumps it anytime (I can't believe we're having
> that kind of conversation...).
>
I changed to use u64 for msi_addr and split it when composing MSI messages.
The change is in v4 of the patch set that I just posted.
>> >
>> > [...]
>> >
>> >>>> +static int xgene_msi_set_affinity(struct irq_data *irq_data,
>> >>>> +                                 const struct cpumask *mask,
>> >>>> bool force)
>> >>>> +{
>> >>>> +       struct xgene_msi *msi =
>> >>>> irq_data_get_irq_chip_data(irq_data);
>> >>>> +       unsigned int gic_irq;
>> >>>> +       int ret;
>> >>>> +
>> >>>> +       gic_irq = msi->msi_virqs[irq_data->hwirq %
>> >>>> msi->settings->nr_hw_irqs];
>> >>>> +       ret = irq_set_affinity(gic_irq, mask);
>> >>>
>> >>>
>> >>> Erm... This as the effect of moving *all* the MSIs hanging off
>> >>> this interrupt to another CPU. I'm not sure that's an acceptable
>> >>> effect... What if another MSI requires a different affinity?
>> >>
>> >>
>> >> We have 16 'real' hardware IRQs. Each of these has multiple MSIs
>> >> attached to it.
>> >> So this will move all MSIs handing off this interrupt to another
>> >> CPU; and we don't support different affinity settings for
>> >> different MSIs that are attached to the same hardware IRQ.
>> >
>> >
>> > Well, that's a significant departure from the expected behaviour.
>> > In other words, I wonder how useful this is. Could you instead
>> > reconfigure the MSI itself to hit the right CPU (assuming you don't
>> > have more than 16 CPUs and if
>> > that's only for XGene-1, this will only be 8 at most)? This would
>> > reduce your number of possible MSIs, but at least the semantics of
>> > set_afinity would be preserved.
>>
>> X-Gene-1 supports 2688 MSIs that are divided into 16 groups, each
>> group has 168 MSIs that are mapped to 1 hardware GIC IRQ (so we have
>> 16 hardware GIC IRQs for 2688 MSIs).
>
> We've already established that.
>
>> Setting affinity of single MSI to deliver it to a target CPU will move
>> all the MSIs mapped to the same GIC IRQ to that CPU as well. This is
>> not a standard behavior, but limiting the total number of MSIs will
>> cause a lot of devices to fall back to INTx (with huge performance
>> penalty) or even fail to load their driver as these devices request
>> more than 16 MSIs during driver initialization.
>
> No, I think you got it wrong. If you have 168 MSIs per GIC IRQ, and
> provided that you have 8 CPUs (XGene-1), you end up with 336 MSIs per
> CPU (having statically assigned 2 IRQs per CPU).
>
> Assuming you adopt my scheme, you still have a grand total of 336 MSIs
> that can be freely moved around without breaking any userspace
> expectations.
>
Thanks Marc. This is a very good idea.

But to  move MSIs around, I need to change MSI termination address and data
and write them to device configuration space. This may cause problems
if the device
fires an interrupt at the same time when I do the config write?

What is your opinion here?

> I think that 336 MSIs is a fair number (nowhere near the 16 you claim).
> Most platforms are doing quite well with that kind of numbers. Also,
> you don't have to allocate all the MSIs a device can possibly claim (up
> to 2048 MSI-X per device), as they are all perfectly capable of using
> less MSI without having to fallback to INTx).
>
>> I can document the limitation in affinity setting of X-Gene-1 MSI in
>> the driver to hopefully not make people surprise and hope to keep the
>> total number of supported MSI as 2688 so that we can support as many
>> cards that require MSI/MSI-X as possible.
>
> I don't think this is a valid approach. This breaks userspace (think of
> things like irqbalance), and breaks the SMP affinity model that Linux
> uses. No amount of documentation is going to solve it, so I think you
> just have to admit that the HW is mis-designed and do the best you can
> to make it work like Linux expect it to work.
>
> The alternative would to disable affinity setting altogether instead of
> introducing these horrible side effects.
>
I have it disabled (set_affinity does nothing) in my v4 patch.

> Thanks,
>
>         M.
> --
> Jazz is not dead. It just smells funny.
Marc Zyngier April 17, 2015, 10:17 a.m. UTC | #17
On 17/04/15 11:00, Duc Dang wrote:
> On Wed, Apr 15, 2015 at 1:16 AM, Marc Zyngier <marc.zyngier@arm.com> wrote:
>> On Tue, 14 Apr 2015 19:20:19 +0100
>> Duc Dang <dhdang@apm.com> wrote:
>>
>>> On Sat, Apr 11, 2015 at 5:06 AM, Marc Zyngier <marc.zyngier@arm.com>
>>> wrote:
>>>> On 2015-04-11 00:42, Duc Dang wrote:
>>>>>
>>>>> On Fri, Apr 10, 2015 at 10:20 AM, Marc Zyngier
>>>>> <marc.zyngier@arm.com> wrote:
>>>>>>
>>>>>> On 09/04/15 18:05, Duc Dang wrote:
>>>>>>>
>>>>>>> X-Gene v1 SoC supports total 2688 MSI/MSIX vectors coalesced into
>>>>>>> 16 HW IRQ lines.
>>>>>>>
>>>>>>> Signed-off-by: Duc Dang <dhdang@apm.com>
>>>>>>> Signed-off-by: Tanmay Inamdar <tinamdar@apm.com>
>>>>>>> ---
>>>>>>>  drivers/pci/host/Kconfig         |   6 +
>>>>>>>  drivers/pci/host/Makefile        |   1 +
>>>>>>>  drivers/pci/host/pci-xgene-msi.c | 407
>>>>>>> +++++++++++++++++++++++++++++++++++++++
>>>>>>>  drivers/pci/host/pci-xgene.c     |  21 ++
>>>>>>>  4 files changed, 435 insertions(+)
>>>>>>>  create mode 100644 drivers/pci/host/pci-xgene-msi.c
>>>>>>>
>>>>>>> diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig
>>>>>>> index 7b892a9..c9b61fa 100644
>>>>>>> --- a/drivers/pci/host/Kconfig
>>>>>>> +++ b/drivers/pci/host/Kconfig
>>>>>>> @@ -89,11 +89,17 @@ config PCI_XGENE
>>>>>>>         depends on ARCH_XGENE
>>>>>>>         depends on OF
>>>>>>>         select PCIEPORTBUS
>>>>>>> +       select PCI_MSI_IRQ_DOMAIN if PCI_MSI
>>>>>>> +       select PCI_XGENE_MSI if PCI_MSI
>>>>>>>         help
>>>>>>>           Say Y here if you want internal PCI support on APM
>>>>>>> X-Gene SoC. There are 5 internal PCIe ports available. Each port
>>>>>>> is GEN3 capable
>>>>>>>           and have varied lanes from x1 to x8.
>>>>>>>
>>>>>>> +config PCI_XGENE_MSI
>>>>>>> +       bool "X-Gene v1 PCIe MSI feature"
>>>>>>> +       depends on PCI_XGENE && PCI_MSI
>>>>>>> +
>>>>>>>  config PCI_LAYERSCAPE
>>>>>>>         bool "Freescale Layerscape PCIe controller"
>>>>>>>         depends on OF && ARM
>>>>>>> diff --git a/drivers/pci/host/Makefile
>>>>>>> b/drivers/pci/host/Makefile index e61d91c..f39bde3 100644
>>>>>>> --- a/drivers/pci/host/Makefile
>>>>>>> +++ b/drivers/pci/host/Makefile
>>>>>>> @@ -11,5 +11,6 @@ obj-$(CONFIG_PCIE_SPEAR13XX) +=
>>>>>>> pcie-spear13xx.o obj-$(CONFIG_PCI_KEYSTONE) += pci-keystone-dw.o
>>>>>>> pci-keystone.o obj-$(CONFIG_PCIE_XILINX) += pcie-xilinx.o
>>>>>>>  obj-$(CONFIG_PCI_XGENE) += pci-xgene.o
>>>>>>> +obj-$(CONFIG_PCI_XGENE_MSI) += pci-xgene-msi.o
>>>>>>>  obj-$(CONFIG_PCI_LAYERSCAPE) += pci-layerscape.o
>>>>>>>  obj-$(CONFIG_PCI_VERSATILE) += pci-versatile.o
>>>>>>> diff --git a/drivers/pci/host/pci-xgene-msi.c
>>>>>>> b/drivers/pci/host/pci-xgene-msi.c
>>>>>>> new file mode 100644
>>>>>>> index 0000000..4f0ff42
>>>>>>> --- /dev/null
>>>>>>> +++ b/drivers/pci/host/pci-xgene-msi.c
>>>>>>> @@ -0,0 +1,407 @@
>>>>>>> +/*
>>>>>>> + * APM X-Gene MSI Driver
>>>>>>> + *
>>>>>>> + * Copyright (c) 2014, Applied Micro Circuits Corporation
>>>>>>> + * Author: Tanmay Inamdar <tinamdar@apm.com>
>>>>>>> + *        Duc Dang <dhdang@apm.com>
>>>>>>> + *
>>>>>>> + * This program is free software; you can redistribute  it
>>>>>>> and/or modify it
>>>>>>> + * under  the terms of  the GNU General  Public License as
>>>>>>> published by the
>>>>>>> + * Free Software Foundation;  either version 2 of the  License,
>>>>>>> or (at your
>>>>>>> + * option) any later version.
>>>>>>> + *
>>>>>>> + * This program is distributed in the hope that it will be
>>>>>>> useful,
>>>>>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty
>>>>>>> of
>>>>>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>>>>>>> + * GNU General Public License for more details.
>>>>>>> + */
>>>>>>> +#include <linux/interrupt.h>
>>>>>>> +#include <linux/module.h>
>>>>>>> +#include <linux/msi.h>
>>>>>>> +#include <linux/of_irq.h>
>>>>>>> +#include <linux/pci.h>
>>>>>>> +#include <linux/platform_device.h>
>>>>>>> +#include <linux/of_pci.h>
>>>>>>> +
>>>>>>> +#define MSI_INDEX0             0x000000
>>>>>>> +#define MSI_INT0               0x800000
>>>>>>> +
>>>>>>> +struct xgene_msi_settings {
>>>>>>> +       u32     index_per_group;
>>>>>>> +       u32     irqs_per_index;
>>>>>>> +       u32     nr_msi_vec;
>>>>>>> +       u32     nr_hw_irqs;
>>>>>>> +};
>>>>>>> +
>>>>>>> +struct xgene_msi {
>>>>>>> +       struct device_node              *node;
>>>>>>> +       struct msi_controller           mchip;
>>>>>>> +       struct irq_domain               *domain;
>>>>>>> +       struct xgene_msi_settings       *settings;
>>>>>>> +       u32                             msi_addr_lo;
>>>>>>> +       u32                             msi_addr_hi;
>>>>>>
>>>>>>
>>>>>> I'd rather see the mailbox address directly, and only do the
>>>>>> split when assigning it to the message (you seem to play all kind
>>>>>> of tricks on the address anyway).
>>>>>
>>>>>
>>>>> msi_addr_lo and msi_addr_hi store the physical base address of MSI
>>>>> controller registers. I will add comment to clarify this.
>>>>
>>>>
>>>> What I mean is that there is no point in keeping this around as a
>>>> pair of 32bit variables. You'd better keep it as a single 64bit,
>>>> and do the split when assigning it the the MSI message.
>>>
>>> Hi Marc,
>>>
>>> These came from device-tree (which describes 64-bit address number as
>>> 2 32-bit words).
>>
>> ... and converted to a resource as a 64bit word, on which you apply
>> {upper,lower}_32_bit(). So much for DT...
>>
>>> If I store them this way, I don't need CPU cycles to do the split
>>> every time assigning them to the MSI message. Please let me know what
>>> do you think about it.
>>
>> This is getting absolutely silly.
>>
>> How many cycles does it take to execute "lsr x1, x0, #32" on X-Gene? If
>> it takes so long that it is considered to be a bottleneck, I suggest
>> you go and design a better CPU (hint: the answer is probably 1 cycle
>> absolutely everywhere).
>>
>> How often are you configuring MSIs in the face of what is happening in
>> the rest of the kernel? Almost never!
>>
>> So, given that "never" times 1 is still never,  I'll consider that
>> readability of the code trumps it anytime (I can't believe we're having
>> that kind of conversation...).
>>
> I changed to use u64 for msi_addr and split it when composing MSI messages.
> The change is in v4 of the patch set that I just posted.
>>>>
>>>> [...]
>>>>
>>>>>>> +static int xgene_msi_set_affinity(struct irq_data *irq_data,
>>>>>>> +                                 const struct cpumask *mask,
>>>>>>> bool force)
>>>>>>> +{
>>>>>>> +       struct xgene_msi *msi =
>>>>>>> irq_data_get_irq_chip_data(irq_data);
>>>>>>> +       unsigned int gic_irq;
>>>>>>> +       int ret;
>>>>>>> +
>>>>>>> +       gic_irq = msi->msi_virqs[irq_data->hwirq %
>>>>>>> msi->settings->nr_hw_irqs];
>>>>>>> +       ret = irq_set_affinity(gic_irq, mask);
>>>>>>
>>>>>>
>>>>>> Erm... This as the effect of moving *all* the MSIs hanging off
>>>>>> this interrupt to another CPU. I'm not sure that's an acceptable
>>>>>> effect... What if another MSI requires a different affinity?
>>>>>
>>>>>
>>>>> We have 16 'real' hardware IRQs. Each of these has multiple MSIs
>>>>> attached to it.
>>>>> So this will move all MSIs handing off this interrupt to another
>>>>> CPU; and we don't support different affinity settings for
>>>>> different MSIs that are attached to the same hardware IRQ.
>>>>
>>>>
>>>> Well, that's a significant departure from the expected behaviour.
>>>> In other words, I wonder how useful this is. Could you instead
>>>> reconfigure the MSI itself to hit the right CPU (assuming you don't
>>>> have more than 16 CPUs and if
>>>> that's only for XGene-1, this will only be 8 at most)? This would
>>>> reduce your number of possible MSIs, but at least the semantics of
>>>> set_afinity would be preserved.
>>>
>>> X-Gene-1 supports 2688 MSIs that are divided into 16 groups, each
>>> group has 168 MSIs that are mapped to 1 hardware GIC IRQ (so we have
>>> 16 hardware GIC IRQs for 2688 MSIs).
>>
>> We've already established that.
>>
>>> Setting affinity of single MSI to deliver it to a target CPU will move
>>> all the MSIs mapped to the same GIC IRQ to that CPU as well. This is
>>> not a standard behavior, but limiting the total number of MSIs will
>>> cause a lot of devices to fall back to INTx (with huge performance
>>> penalty) or even fail to load their driver as these devices request
>>> more than 16 MSIs during driver initialization.
>>
>> No, I think you got it wrong. If you have 168 MSIs per GIC IRQ, and
>> provided that you have 8 CPUs (XGene-1), you end up with 336 MSIs per
>> CPU (having statically assigned 2 IRQs per CPU).
>>
>> Assuming you adopt my scheme, you still have a grand total of 336 MSIs
>> that can be freely moved around without breaking any userspace
>> expectations.
>>
> Thanks Marc. This is a very good idea.
> 
> But to  move MSIs around, I need to change MSI termination address and data
> and write them to device configuration space. This may cause problems
> if the device
> fires an interrupt at the same time when I do the config write?
> 
> What is your opinion here?

There is an inherent race when changing the affinity of any interrupt,
whether that's an MSI or not. The kernel is perfectly prepared to handle
such a situation (to be honest, the kernel doesn't really care).

The write to the config space shouldn't be a concern. You will either
hit the old *or* the new CPU, but that race is only during the write
itself (you can read back from the config space if you're really
paranoid).  By the time you return from this read/write, the device will
be reconfigured.

>> I think that 336 MSIs is a fair number (nowhere near the 16 you claim).
>> Most platforms are doing quite well with that kind of numbers. Also,
>> you don't have to allocate all the MSIs a device can possibly claim (up
>> to 2048 MSI-X per device), as they are all perfectly capable of using
>> less MSI without having to fallback to INTx).
>>
>>> I can document the limitation in affinity setting of X-Gene-1 MSI in
>>> the driver to hopefully not make people surprise and hope to keep the
>>> total number of supported MSI as 2688 so that we can support as many
>>> cards that require MSI/MSI-X as possible.
>>
>> I don't think this is a valid approach. This breaks userspace (think of
>> things like irqbalance), and breaks the SMP affinity model that Linux
>> uses. No amount of documentation is going to solve it, so I think you
>> just have to admit that the HW is mis-designed and do the best you can
>> to make it work like Linux expect it to work.
>>
>> The alternative would to disable affinity setting altogether instead of
>> introducing these horrible side effects.
>>
> I have it disabled (set_affinity does nothing) in my v4 patch.

It would be good if you could try the above approach. It shouldn't be
hard to write, and it would be a lot better than just ignoring the problem.

I'll try to review the patches soon(-ish)...

	M.
Duc Dang April 17, 2015, 12:37 p.m. UTC | #18
On Fri, Apr 17, 2015 at 3:17 AM, Marc Zyngier <marc.zyngier@arm.com> wrote:
> On 17/04/15 11:00, Duc Dang wrote:
>> On Wed, Apr 15, 2015 at 1:16 AM, Marc Zyngier <marc.zyngier@arm.com> wrote:
>>> On Tue, 14 Apr 2015 19:20:19 +0100
>>> Duc Dang <dhdang@apm.com> wrote:
>>>
>>>> On Sat, Apr 11, 2015 at 5:06 AM, Marc Zyngier <marc.zyngier@arm.com>
>>>> wrote:
>>>>> On 2015-04-11 00:42, Duc Dang wrote:
>>>>>>
>>>>>> On Fri, Apr 10, 2015 at 10:20 AM, Marc Zyngier
>>>>>> <marc.zyngier@arm.com> wrote:
>>>>>>>
>>>>>>> On 09/04/15 18:05, Duc Dang wrote:
>>>>>>>>
>>>>>>>> X-Gene v1 SoC supports total 2688 MSI/MSIX vectors coalesced into
>>>>>>>> 16 HW IRQ lines.
>>>>>>>>
>>>>>>>> Signed-off-by: Duc Dang <dhdang@apm.com>
>>>>>>>> Signed-off-by: Tanmay Inamdar <tinamdar@apm.com>
>>>>>>>> ---
>>>>>>>>  drivers/pci/host/Kconfig         |   6 +
>>>>>>>>  drivers/pci/host/Makefile        |   1 +
>>>>>>>>  drivers/pci/host/pci-xgene-msi.c | 407
>>>>>>>> +++++++++++++++++++++++++++++++++++++++
>>>>>>>>  drivers/pci/host/pci-xgene.c     |  21 ++
>>>>>>>>  4 files changed, 435 insertions(+)
>>>>>>>>  create mode 100644 drivers/pci/host/pci-xgene-msi.c
>>>>>>>>
>>>>>>>> diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig
>>>>>>>> index 7b892a9..c9b61fa 100644
>>>>>>>> --- a/drivers/pci/host/Kconfig
>>>>>>>> +++ b/drivers/pci/host/Kconfig
>>>>>>>> @@ -89,11 +89,17 @@ config PCI_XGENE
>>>>>>>>         depends on ARCH_XGENE
>>>>>>>>         depends on OF
>>>>>>>>         select PCIEPORTBUS
>>>>>>>> +       select PCI_MSI_IRQ_DOMAIN if PCI_MSI
>>>>>>>> +       select PCI_XGENE_MSI if PCI_MSI
>>>>>>>>         help
>>>>>>>>           Say Y here if you want internal PCI support on APM
>>>>>>>> X-Gene SoC. There are 5 internal PCIe ports available. Each port
>>>>>>>> is GEN3 capable
>>>>>>>>           and have varied lanes from x1 to x8.
>>>>>>>>
>>>>>>>> +config PCI_XGENE_MSI
>>>>>>>> +       bool "X-Gene v1 PCIe MSI feature"
>>>>>>>> +       depends on PCI_XGENE && PCI_MSI
>>>>>>>> +
>>>>>>>>  config PCI_LAYERSCAPE
>>>>>>>>         bool "Freescale Layerscape PCIe controller"
>>>>>>>>         depends on OF && ARM
>>>>>>>> diff --git a/drivers/pci/host/Makefile
>>>>>>>> b/drivers/pci/host/Makefile index e61d91c..f39bde3 100644
>>>>>>>> --- a/drivers/pci/host/Makefile
>>>>>>>> +++ b/drivers/pci/host/Makefile
>>>>>>>> @@ -11,5 +11,6 @@ obj-$(CONFIG_PCIE_SPEAR13XX) +=
>>>>>>>> pcie-spear13xx.o obj-$(CONFIG_PCI_KEYSTONE) += pci-keystone-dw.o
>>>>>>>> pci-keystone.o obj-$(CONFIG_PCIE_XILINX) += pcie-xilinx.o
>>>>>>>>  obj-$(CONFIG_PCI_XGENE) += pci-xgene.o
>>>>>>>> +obj-$(CONFIG_PCI_XGENE_MSI) += pci-xgene-msi.o
>>>>>>>>  obj-$(CONFIG_PCI_LAYERSCAPE) += pci-layerscape.o
>>>>>>>>  obj-$(CONFIG_PCI_VERSATILE) += pci-versatile.o
>>>>>>>> diff --git a/drivers/pci/host/pci-xgene-msi.c
>>>>>>>> b/drivers/pci/host/pci-xgene-msi.c
>>>>>>>> new file mode 100644
>>>>>>>> index 0000000..4f0ff42
>>>>>>>> --- /dev/null
>>>>>>>> +++ b/drivers/pci/host/pci-xgene-msi.c
>>>>>>>> @@ -0,0 +1,407 @@
>>>>>>>> +/*
>>>>>>>> + * APM X-Gene MSI Driver
>>>>>>>> + *
>>>>>>>> + * Copyright (c) 2014, Applied Micro Circuits Corporation
>>>>>>>> + * Author: Tanmay Inamdar <tinamdar@apm.com>
>>>>>>>> + *        Duc Dang <dhdang@apm.com>
>>>>>>>> + *
>>>>>>>> + * This program is free software; you can redistribute  it
>>>>>>>> and/or modify it
>>>>>>>> + * under  the terms of  the GNU General  Public License as
>>>>>>>> published by the
>>>>>>>> + * Free Software Foundation;  either version 2 of the  License,
>>>>>>>> or (at your
>>>>>>>> + * option) any later version.
>>>>>>>> + *
>>>>>>>> + * This program is distributed in the hope that it will be
>>>>>>>> useful,
>>>>>>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty
>>>>>>>> of
>>>>>>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>>>>>>>> + * GNU General Public License for more details.
>>>>>>>> + */
>>>>>>>> +#include <linux/interrupt.h>
>>>>>>>> +#include <linux/module.h>
>>>>>>>> +#include <linux/msi.h>
>>>>>>>> +#include <linux/of_irq.h>
>>>>>>>> +#include <linux/pci.h>
>>>>>>>> +#include <linux/platform_device.h>
>>>>>>>> +#include <linux/of_pci.h>
>>>>>>>> +
>>>>>>>> +#define MSI_INDEX0             0x000000
>>>>>>>> +#define MSI_INT0               0x800000
>>>>>>>> +
>>>>>>>> +struct xgene_msi_settings {
>>>>>>>> +       u32     index_per_group;
>>>>>>>> +       u32     irqs_per_index;
>>>>>>>> +       u32     nr_msi_vec;
>>>>>>>> +       u32     nr_hw_irqs;
>>>>>>>> +};
>>>>>>>> +
>>>>>>>> +struct xgene_msi {
>>>>>>>> +       struct device_node              *node;
>>>>>>>> +       struct msi_controller           mchip;
>>>>>>>> +       struct irq_domain               *domain;
>>>>>>>> +       struct xgene_msi_settings       *settings;
>>>>>>>> +       u32                             msi_addr_lo;
>>>>>>>> +       u32                             msi_addr_hi;
>>>>>>>
>>>>>>>
>>>>>>> I'd rather see the mailbox address directly, and only do the
>>>>>>> split when assigning it to the message (you seem to play all kind
>>>>>>> of tricks on the address anyway).
>>>>>>
>>>>>>
>>>>>> msi_addr_lo and msi_addr_hi store the physical base address of MSI
>>>>>> controller registers. I will add comment to clarify this.
>>>>>
>>>>>
>>>>> What I mean is that there is no point in keeping this around as a
>>>>> pair of 32bit variables. You'd better keep it as a single 64bit,
>>>>> and do the split when assigning it the the MSI message.
>>>>
>>>> Hi Marc,
>>>>
>>>> These came from device-tree (which describes 64-bit address number as
>>>> 2 32-bit words).
>>>
>>> ... and converted to a resource as a 64bit word, on which you apply
>>> {upper,lower}_32_bit(). So much for DT...
>>>
>>>> If I store them this way, I don't need CPU cycles to do the split
>>>> every time assigning them to the MSI message. Please let me know what
>>>> do you think about it.
>>>
>>> This is getting absolutely silly.
>>>
>>> How many cycles does it take to execute "lsr x1, x0, #32" on X-Gene? If
>>> it takes so long that it is considered to be a bottleneck, I suggest
>>> you go and design a better CPU (hint: the answer is probably 1 cycle
>>> absolutely everywhere).
>>>
>>> How often are you configuring MSIs in the face of what is happening in
>>> the rest of the kernel? Almost never!
>>>
>>> So, given that "never" times 1 is still never,  I'll consider that
>>> readability of the code trumps it anytime (I can't believe we're having
>>> that kind of conversation...).
>>>
>> I changed to use u64 for msi_addr and split it when composing MSI messages.
>> The change is in v4 of the patch set that I just posted.
>>>>>
>>>>> [...]
>>>>>
>>>>>>>> +static int xgene_msi_set_affinity(struct irq_data *irq_data,
>>>>>>>> +                                 const struct cpumask *mask,
>>>>>>>> bool force)
>>>>>>>> +{
>>>>>>>> +       struct xgene_msi *msi =
>>>>>>>> irq_data_get_irq_chip_data(irq_data);
>>>>>>>> +       unsigned int gic_irq;
>>>>>>>> +       int ret;
>>>>>>>> +
>>>>>>>> +       gic_irq = msi->msi_virqs[irq_data->hwirq %
>>>>>>>> msi->settings->nr_hw_irqs];
>>>>>>>> +       ret = irq_set_affinity(gic_irq, mask);
>>>>>>>
>>>>>>>
>>>>>>> Erm... This as the effect of moving *all* the MSIs hanging off
>>>>>>> this interrupt to another CPU. I'm not sure that's an acceptable
>>>>>>> effect... What if another MSI requires a different affinity?
>>>>>>
>>>>>>
>>>>>> We have 16 'real' hardware IRQs. Each of these has multiple MSIs
>>>>>> attached to it.
>>>>>> So this will move all MSIs handing off this interrupt to another
>>>>>> CPU; and we don't support different affinity settings for
>>>>>> different MSIs that are attached to the same hardware IRQ.
>>>>>
>>>>>
>>>>> Well, that's a significant departure from the expected behaviour.
>>>>> In other words, I wonder how useful this is. Could you instead
>>>>> reconfigure the MSI itself to hit the right CPU (assuming you don't
>>>>> have more than 16 CPUs and if
>>>>> that's only for XGene-1, this will only be 8 at most)? This would
>>>>> reduce your number of possible MSIs, but at least the semantics of
>>>>> set_afinity would be preserved.
>>>>
>>>> X-Gene-1 supports 2688 MSIs that are divided into 16 groups, each
>>>> group has 168 MSIs that are mapped to 1 hardware GIC IRQ (so we have
>>>> 16 hardware GIC IRQs for 2688 MSIs).
>>>
>>> We've already established that.
>>>
>>>> Setting affinity of single MSI to deliver it to a target CPU will move
>>>> all the MSIs mapped to the same GIC IRQ to that CPU as well. This is
>>>> not a standard behavior, but limiting the total number of MSIs will
>>>> cause a lot of devices to fall back to INTx (with huge performance
>>>> penalty) or even fail to load their driver as these devices request
>>>> more than 16 MSIs during driver initialization.
>>>
>>> No, I think you got it wrong. If you have 168 MSIs per GIC IRQ, and
>>> provided that you have 8 CPUs (XGene-1), you end up with 336 MSIs per
>>> CPU (having statically assigned 2 IRQs per CPU).
>>>
>>> Assuming you adopt my scheme, you still have a grand total of 336 MSIs
>>> that can be freely moved around without breaking any userspace
>>> expectations.
>>>
>> Thanks Marc. This is a very good idea.
>>
>> But to  move MSIs around, I need to change MSI termination address and data
>> and write them to device configuration space. This may cause problems
>> if the device
>> fires an interrupt at the same time when I do the config write?
>>
>> What is your opinion here?
>
> There is an inherent race when changing the affinity of any interrupt,
> whether that's an MSI or not. The kernel is perfectly prepared to handle
> such a situation (to be honest, the kernel doesn't really care).
>
> The write to the config space shouldn't be a concern. You will either
> hit the old *or* the new CPU, but that race is only during the write
> itself (you can read back from the config space if you're really
> paranoid).  By the time you return from this read/write, the device will
> be reconfigured.
>
>>> I think that 336 MSIs is a fair number (nowhere near the 16 you claim).
>>> Most platforms are doing quite well with that kind of numbers. Also,
>>> you don't have to allocate all the MSIs a device can possibly claim (up
>>> to 2048 MSI-X per device), as they are all perfectly capable of using
>>> less MSI without having to fallback to INTx).
>>>
>>>> I can document the limitation in affinity setting of X-Gene-1 MSI in
>>>> the driver to hopefully not make people surprise and hope to keep the
>>>> total number of supported MSI as 2688 so that we can support as many
>>>> cards that require MSI/MSI-X as possible.
>>>
>>> I don't think this is a valid approach. This breaks userspace (think of
>>> things like irqbalance), and breaks the SMP affinity model that Linux
>>> uses. No amount of documentation is going to solve it, so I think you
>>> just have to admit that the HW is mis-designed and do the best you can
>>> to make it work like Linux expect it to work.
>>>
>>> The alternative would to disable affinity setting altogether instead of
>>> introducing these horrible side effects.
>>>
>> I have it disabled (set_affinity does nothing) in my v4 patch.
>
> It would be good if you could try the above approach. It shouldn't be
> hard to write, and it would be a lot better than just ignoring the problem.
>
Yes. I am working on this change.

> I'll try to review the patches soon(-ish)...
>
>         M.
> --
> Jazz is not dead. It just smells funny...
Regards,
Duc Dang.
Marc Zyngier April 17, 2015, 12:45 p.m. UTC | #19
On 17/04/15 13:37, Duc Dang wrote:
> On Fri, Apr 17, 2015 at 3:17 AM, Marc Zyngier <marc.zyngier@arm.com> wrote:
>> On 17/04/15 11:00, Duc Dang wrote:
>>> On Wed, Apr 15, 2015 at 1:16 AM, Marc Zyngier <marc.zyngier@arm.com> wrote:
>>>> On Tue, 14 Apr 2015 19:20:19 +0100
>>>> Duc Dang <dhdang@apm.com> wrote:
>>>>
>>>>> On Sat, Apr 11, 2015 at 5:06 AM, Marc Zyngier <marc.zyngier@arm.com>
>>>>> wrote:
>>>>>> On 2015-04-11 00:42, Duc Dang wrote:
>>>>>>>
>>>>>>> On Fri, Apr 10, 2015 at 10:20 AM, Marc Zyngier
>>>>>>> <marc.zyngier@arm.com> wrote:
>>>>>>>>
>>>>>>>> On 09/04/15 18:05, Duc Dang wrote:
>>>>>>>>>
>>>>>>>>> X-Gene v1 SoC supports total 2688 MSI/MSIX vectors coalesced into
>>>>>>>>> 16 HW IRQ lines.
>>>>>>>>>
>>>>>>>>> Signed-off-by: Duc Dang <dhdang@apm.com>
>>>>>>>>> Signed-off-by: Tanmay Inamdar <tinamdar@apm.com>
>>>>>>>>> ---
>>>>>>>>>  drivers/pci/host/Kconfig         |   6 +
>>>>>>>>>  drivers/pci/host/Makefile        |   1 +
>>>>>>>>>  drivers/pci/host/pci-xgene-msi.c | 407
>>>>>>>>> +++++++++++++++++++++++++++++++++++++++
>>>>>>>>>  drivers/pci/host/pci-xgene.c     |  21 ++
>>>>>>>>>  4 files changed, 435 insertions(+)
>>>>>>>>>  create mode 100644 drivers/pci/host/pci-xgene-msi.c
>>>>>>>>>
>>>>>>>>> diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig
>>>>>>>>> index 7b892a9..c9b61fa 100644
>>>>>>>>> --- a/drivers/pci/host/Kconfig
>>>>>>>>> +++ b/drivers/pci/host/Kconfig
>>>>>>>>> @@ -89,11 +89,17 @@ config PCI_XGENE
>>>>>>>>>         depends on ARCH_XGENE
>>>>>>>>>         depends on OF
>>>>>>>>>         select PCIEPORTBUS
>>>>>>>>> +       select PCI_MSI_IRQ_DOMAIN if PCI_MSI
>>>>>>>>> +       select PCI_XGENE_MSI if PCI_MSI
>>>>>>>>>         help
>>>>>>>>>           Say Y here if you want internal PCI support on APM
>>>>>>>>> X-Gene SoC. There are 5 internal PCIe ports available. Each port
>>>>>>>>> is GEN3 capable
>>>>>>>>>           and have varied lanes from x1 to x8.
>>>>>>>>>
>>>>>>>>> +config PCI_XGENE_MSI
>>>>>>>>> +       bool "X-Gene v1 PCIe MSI feature"
>>>>>>>>> +       depends on PCI_XGENE && PCI_MSI
>>>>>>>>> +
>>>>>>>>>  config PCI_LAYERSCAPE
>>>>>>>>>         bool "Freescale Layerscape PCIe controller"
>>>>>>>>>         depends on OF && ARM
>>>>>>>>> diff --git a/drivers/pci/host/Makefile
>>>>>>>>> b/drivers/pci/host/Makefile index e61d91c..f39bde3 100644
>>>>>>>>> --- a/drivers/pci/host/Makefile
>>>>>>>>> +++ b/drivers/pci/host/Makefile
>>>>>>>>> @@ -11,5 +11,6 @@ obj-$(CONFIG_PCIE_SPEAR13XX) +=
>>>>>>>>> pcie-spear13xx.o obj-$(CONFIG_PCI_KEYSTONE) += pci-keystone-dw.o
>>>>>>>>> pci-keystone.o obj-$(CONFIG_PCIE_XILINX) += pcie-xilinx.o
>>>>>>>>>  obj-$(CONFIG_PCI_XGENE) += pci-xgene.o
>>>>>>>>> +obj-$(CONFIG_PCI_XGENE_MSI) += pci-xgene-msi.o
>>>>>>>>>  obj-$(CONFIG_PCI_LAYERSCAPE) += pci-layerscape.o
>>>>>>>>>  obj-$(CONFIG_PCI_VERSATILE) += pci-versatile.o
>>>>>>>>> diff --git a/drivers/pci/host/pci-xgene-msi.c
>>>>>>>>> b/drivers/pci/host/pci-xgene-msi.c
>>>>>>>>> new file mode 100644
>>>>>>>>> index 0000000..4f0ff42
>>>>>>>>> --- /dev/null
>>>>>>>>> +++ b/drivers/pci/host/pci-xgene-msi.c
>>>>>>>>> @@ -0,0 +1,407 @@
>>>>>>>>> +/*
>>>>>>>>> + * APM X-Gene MSI Driver
>>>>>>>>> + *
>>>>>>>>> + * Copyright (c) 2014, Applied Micro Circuits Corporation
>>>>>>>>> + * Author: Tanmay Inamdar <tinamdar@apm.com>
>>>>>>>>> + *        Duc Dang <dhdang@apm.com>
>>>>>>>>> + *
>>>>>>>>> + * This program is free software; you can redistribute  it
>>>>>>>>> and/or modify it
>>>>>>>>> + * under  the terms of  the GNU General  Public License as
>>>>>>>>> published by the
>>>>>>>>> + * Free Software Foundation;  either version 2 of the  License,
>>>>>>>>> or (at your
>>>>>>>>> + * option) any later version.
>>>>>>>>> + *
>>>>>>>>> + * This program is distributed in the hope that it will be
>>>>>>>>> useful,
>>>>>>>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty
>>>>>>>>> of
>>>>>>>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>>>>>>>>> + * GNU General Public License for more details.
>>>>>>>>> + */
>>>>>>>>> +#include <linux/interrupt.h>
>>>>>>>>> +#include <linux/module.h>
>>>>>>>>> +#include <linux/msi.h>
>>>>>>>>> +#include <linux/of_irq.h>
>>>>>>>>> +#include <linux/pci.h>
>>>>>>>>> +#include <linux/platform_device.h>
>>>>>>>>> +#include <linux/of_pci.h>
>>>>>>>>> +
>>>>>>>>> +#define MSI_INDEX0             0x000000
>>>>>>>>> +#define MSI_INT0               0x800000
>>>>>>>>> +
>>>>>>>>> +struct xgene_msi_settings {
>>>>>>>>> +       u32     index_per_group;
>>>>>>>>> +       u32     irqs_per_index;
>>>>>>>>> +       u32     nr_msi_vec;
>>>>>>>>> +       u32     nr_hw_irqs;
>>>>>>>>> +};
>>>>>>>>> +
>>>>>>>>> +struct xgene_msi {
>>>>>>>>> +       struct device_node              *node;
>>>>>>>>> +       struct msi_controller           mchip;
>>>>>>>>> +       struct irq_domain               *domain;
>>>>>>>>> +       struct xgene_msi_settings       *settings;
>>>>>>>>> +       u32                             msi_addr_lo;
>>>>>>>>> +       u32                             msi_addr_hi;
>>>>>>>>
>>>>>>>>
>>>>>>>> I'd rather see the mailbox address directly, and only do the
>>>>>>>> split when assigning it to the message (you seem to play all kind
>>>>>>>> of tricks on the address anyway).
>>>>>>>
>>>>>>>
>>>>>>> msi_addr_lo and msi_addr_hi store the physical base address of MSI
>>>>>>> controller registers. I will add comment to clarify this.
>>>>>>
>>>>>>
>>>>>> What I mean is that there is no point in keeping this around as a
>>>>>> pair of 32bit variables. You'd better keep it as a single 64bit,
>>>>>> and do the split when assigning it the the MSI message.
>>>>>
>>>>> Hi Marc,
>>>>>
>>>>> These came from device-tree (which describes 64-bit address number as
>>>>> 2 32-bit words).
>>>>
>>>> ... and converted to a resource as a 64bit word, on which you apply
>>>> {upper,lower}_32_bit(). So much for DT...
>>>>
>>>>> If I store them this way, I don't need CPU cycles to do the split
>>>>> every time assigning them to the MSI message. Please let me know what
>>>>> do you think about it.
>>>>
>>>> This is getting absolutely silly.
>>>>
>>>> How many cycles does it take to execute "lsr x1, x0, #32" on X-Gene? If
>>>> it takes so long that it is considered to be a bottleneck, I suggest
>>>> you go and design a better CPU (hint: the answer is probably 1 cycle
>>>> absolutely everywhere).
>>>>
>>>> How often are you configuring MSIs in the face of what is happening in
>>>> the rest of the kernel? Almost never!
>>>>
>>>> So, given that "never" times 1 is still never,  I'll consider that
>>>> readability of the code trumps it anytime (I can't believe we're having
>>>> that kind of conversation...).
>>>>
>>> I changed to use u64 for msi_addr and split it when composing MSI messages.
>>> The change is in v4 of the patch set that I just posted.
>>>>>>
>>>>>> [...]
>>>>>>
>>>>>>>>> +static int xgene_msi_set_affinity(struct irq_data *irq_data,
>>>>>>>>> +                                 const struct cpumask *mask,
>>>>>>>>> bool force)
>>>>>>>>> +{
>>>>>>>>> +       struct xgene_msi *msi =
>>>>>>>>> irq_data_get_irq_chip_data(irq_data);
>>>>>>>>> +       unsigned int gic_irq;
>>>>>>>>> +       int ret;
>>>>>>>>> +
>>>>>>>>> +       gic_irq = msi->msi_virqs[irq_data->hwirq %
>>>>>>>>> msi->settings->nr_hw_irqs];
>>>>>>>>> +       ret = irq_set_affinity(gic_irq, mask);
>>>>>>>>
>>>>>>>>
>>>>>>>> Erm... This as the effect of moving *all* the MSIs hanging off
>>>>>>>> this interrupt to another CPU. I'm not sure that's an acceptable
>>>>>>>> effect... What if another MSI requires a different affinity?
>>>>>>>
>>>>>>>
>>>>>>> We have 16 'real' hardware IRQs. Each of these has multiple MSIs
>>>>>>> attached to it.
>>>>>>> So this will move all MSIs handing off this interrupt to another
>>>>>>> CPU; and we don't support different affinity settings for
>>>>>>> different MSIs that are attached to the same hardware IRQ.
>>>>>>
>>>>>>
>>>>>> Well, that's a significant departure from the expected behaviour.
>>>>>> In other words, I wonder how useful this is. Could you instead
>>>>>> reconfigure the MSI itself to hit the right CPU (assuming you don't
>>>>>> have more than 16 CPUs and if
>>>>>> that's only for XGene-1, this will only be 8 at most)? This would
>>>>>> reduce your number of possible MSIs, but at least the semantics of
>>>>>> set_afinity would be preserved.
>>>>>
>>>>> X-Gene-1 supports 2688 MSIs that are divided into 16 groups, each
>>>>> group has 168 MSIs that are mapped to 1 hardware GIC IRQ (so we have
>>>>> 16 hardware GIC IRQs for 2688 MSIs).
>>>>
>>>> We've already established that.
>>>>
>>>>> Setting affinity of single MSI to deliver it to a target CPU will move
>>>>> all the MSIs mapped to the same GIC IRQ to that CPU as well. This is
>>>>> not a standard behavior, but limiting the total number of MSIs will
>>>>> cause a lot of devices to fall back to INTx (with huge performance
>>>>> penalty) or even fail to load their driver as these devices request
>>>>> more than 16 MSIs during driver initialization.
>>>>
>>>> No, I think you got it wrong. If you have 168 MSIs per GIC IRQ, and
>>>> provided that you have 8 CPUs (XGene-1), you end up with 336 MSIs per
>>>> CPU (having statically assigned 2 IRQs per CPU).
>>>>
>>>> Assuming you adopt my scheme, you still have a grand total of 336 MSIs
>>>> that can be freely moved around without breaking any userspace
>>>> expectations.
>>>>
>>> Thanks Marc. This is a very good idea.
>>>
>>> But to  move MSIs around, I need to change MSI termination address and data
>>> and write them to device configuration space. This may cause problems
>>> if the device
>>> fires an interrupt at the same time when I do the config write?
>>>
>>> What is your opinion here?
>>
>> There is an inherent race when changing the affinity of any interrupt,
>> whether that's an MSI or not. The kernel is perfectly prepared to handle
>> such a situation (to be honest, the kernel doesn't really care).
>>
>> The write to the config space shouldn't be a concern. You will either
>> hit the old *or* the new CPU, but that race is only during the write
>> itself (you can read back from the config space if you're really
>> paranoid).  By the time you return from this read/write, the device will
>> be reconfigured.
>>
>>>> I think that 336 MSIs is a fair number (nowhere near the 16 you claim).
>>>> Most platforms are doing quite well with that kind of numbers. Also,
>>>> you don't have to allocate all the MSIs a device can possibly claim (up
>>>> to 2048 MSI-X per device), as they are all perfectly capable of using
>>>> less MSI without having to fallback to INTx).
>>>>
>>>>> I can document the limitation in affinity setting of X-Gene-1 MSI in
>>>>> the driver to hopefully not make people surprise and hope to keep the
>>>>> total number of supported MSI as 2688 so that we can support as many
>>>>> cards that require MSI/MSI-X as possible.
>>>>
>>>> I don't think this is a valid approach. This breaks userspace (think of
>>>> things like irqbalance), and breaks the SMP affinity model that Linux
>>>> uses. No amount of documentation is going to solve it, so I think you
>>>> just have to admit that the HW is mis-designed and do the best you can
>>>> to make it work like Linux expect it to work.
>>>>
>>>> The alternative would to disable affinity setting altogether instead of
>>>> introducing these horrible side effects.
>>>>
>>> I have it disabled (set_affinity does nothing) in my v4 patch.
>>
>> It would be good if you could try the above approach. It shouldn't be
>> hard to write, and it would be a lot better than just ignoring the problem.
>>
> Yes. I am working on this change.

In which case, I'll wait for a v5. No need to spend time on something
that is going to change anyway.

Thanks,

	M.
Feng Kan April 20, 2015, 6:51 p.m. UTC | #20
On Fri, Apr 17, 2015 at 5:45 AM, Marc Zyngier <marc.zyngier@arm.com> wrote:
> On 17/04/15 13:37, Duc Dang wrote:
>> On Fri, Apr 17, 2015 at 3:17 AM, Marc Zyngier <marc.zyngier@arm.com> wrote:
>>> On 17/04/15 11:00, Duc Dang wrote:
>>>> On Wed, Apr 15, 2015 at 1:16 AM, Marc Zyngier <marc.zyngier@arm.com> wrote:
>>>>> On Tue, 14 Apr 2015 19:20:19 +0100
>>>>> Duc Dang <dhdang@apm.com> wrote:
>>>>>
>>>>>> On Sat, Apr 11, 2015 at 5:06 AM, Marc Zyngier <marc.zyngier@arm.com>
>>>>>> wrote:
>>>>>>> On 2015-04-11 00:42, Duc Dang wrote:
>>>>>>>>
>>>>>>>> On Fri, Apr 10, 2015 at 10:20 AM, Marc Zyngier
>>>>>>>> <marc.zyngier@arm.com> wrote:
>>>>>>>>>
>>>>>>>>> On 09/04/15 18:05, Duc Dang wrote:
>>>>>>>>>>
>>>>>>>>>> X-Gene v1 SoC supports total 2688 MSI/MSIX vectors coalesced into
>>>>>>>>>> 16 HW IRQ lines.
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Duc Dang <dhdang@apm.com>
>>>>>>>>>> Signed-off-by: Tanmay Inamdar <tinamdar@apm.com>
>>>>>>>>>> ---
>>>>>>>>>>  drivers/pci/host/Kconfig         |   6 +
>>>>>>>>>>  drivers/pci/host/Makefile        |   1 +
>>>>>>>>>>  drivers/pci/host/pci-xgene-msi.c | 407
>>>>>>>>>> +++++++++++++++++++++++++++++++++++++++
>>>>>>>>>>  drivers/pci/host/pci-xgene.c     |  21 ++
>>>>>>>>>>  4 files changed, 435 insertions(+)
>>>>>>>>>>  create mode 100644 drivers/pci/host/pci-xgene-msi.c
>>>>>>>>>>
>>>>>>>>>> diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig
>>>>>>>>>> index 7b892a9..c9b61fa 100644
>>>>>>>>>> --- a/drivers/pci/host/Kconfig
>>>>>>>>>> +++ b/drivers/pci/host/Kconfig
>>>>>>>>>> @@ -89,11 +89,17 @@ config PCI_XGENE
>>>>>>>>>>         depends on ARCH_XGENE
>>>>>>>>>>         depends on OF
>>>>>>>>>>         select PCIEPORTBUS
>>>>>>>>>> +       select PCI_MSI_IRQ_DOMAIN if PCI_MSI
>>>>>>>>>> +       select PCI_XGENE_MSI if PCI_MSI
>>>>>>>>>>         help
>>>>>>>>>>           Say Y here if you want internal PCI support on APM
>>>>>>>>>> X-Gene SoC. There are 5 internal PCIe ports available. Each port
>>>>>>>>>> is GEN3 capable
>>>>>>>>>>           and have varied lanes from x1 to x8.
>>>>>>>>>>
>>>>>>>>>> +config PCI_XGENE_MSI
>>>>>>>>>> +       bool "X-Gene v1 PCIe MSI feature"
>>>>>>>>>> +       depends on PCI_XGENE && PCI_MSI
>>>>>>>>>> +
>>>>>>>>>>  config PCI_LAYERSCAPE
>>>>>>>>>>         bool "Freescale Layerscape PCIe controller"
>>>>>>>>>>         depends on OF && ARM
>>>>>>>>>> diff --git a/drivers/pci/host/Makefile
>>>>>>>>>> b/drivers/pci/host/Makefile index e61d91c..f39bde3 100644
>>>>>>>>>> --- a/drivers/pci/host/Makefile
>>>>>>>>>> +++ b/drivers/pci/host/Makefile
>>>>>>>>>> @@ -11,5 +11,6 @@ obj-$(CONFIG_PCIE_SPEAR13XX) +=
>>>>>>>>>> pcie-spear13xx.o obj-$(CONFIG_PCI_KEYSTONE) += pci-keystone-dw.o
>>>>>>>>>> pci-keystone.o obj-$(CONFIG_PCIE_XILINX) += pcie-xilinx.o
>>>>>>>>>>  obj-$(CONFIG_PCI_XGENE) += pci-xgene.o
>>>>>>>>>> +obj-$(CONFIG_PCI_XGENE_MSI) += pci-xgene-msi.o
>>>>>>>>>>  obj-$(CONFIG_PCI_LAYERSCAPE) += pci-layerscape.o
>>>>>>>>>>  obj-$(CONFIG_PCI_VERSATILE) += pci-versatile.o
>>>>>>>>>> diff --git a/drivers/pci/host/pci-xgene-msi.c
>>>>>>>>>> b/drivers/pci/host/pci-xgene-msi.c
>>>>>>>>>> new file mode 100644
>>>>>>>>>> index 0000000..4f0ff42
>>>>>>>>>> --- /dev/null
>>>>>>>>>> +++ b/drivers/pci/host/pci-xgene-msi.c
>>>>>>>>>> @@ -0,0 +1,407 @@
>>>>>>>>>> +/*
>>>>>>>>>> + * APM X-Gene MSI Driver
>>>>>>>>>> + *
>>>>>>>>>> + * Copyright (c) 2014, Applied Micro Circuits Corporation
>>>>>>>>>> + * Author: Tanmay Inamdar <tinamdar@apm.com>
>>>>>>>>>> + *        Duc Dang <dhdang@apm.com>
>>>>>>>>>> + *
>>>>>>>>>> + * This program is free software; you can redistribute  it
>>>>>>>>>> and/or modify it
>>>>>>>>>> + * under  the terms of  the GNU General  Public License as
>>>>>>>>>> published by the
>>>>>>>>>> + * Free Software Foundation;  either version 2 of the  License,
>>>>>>>>>> or (at your
>>>>>>>>>> + * option) any later version.
>>>>>>>>>> + *
>>>>>>>>>> + * This program is distributed in the hope that it will be
>>>>>>>>>> useful,
>>>>>>>>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty
>>>>>>>>>> of
>>>>>>>>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>>>>>>>>>> + * GNU General Public License for more details.
>>>>>>>>>> + */
>>>>>>>>>> +#include <linux/interrupt.h>
>>>>>>>>>> +#include <linux/module.h>
>>>>>>>>>> +#include <linux/msi.h>
>>>>>>>>>> +#include <linux/of_irq.h>
>>>>>>>>>> +#include <linux/pci.h>
>>>>>>>>>> +#include <linux/platform_device.h>
>>>>>>>>>> +#include <linux/of_pci.h>
>>>>>>>>>> +
>>>>>>>>>> +#define MSI_INDEX0             0x000000
>>>>>>>>>> +#define MSI_INT0               0x800000
>>>>>>>>>> +
>>>>>>>>>> +struct xgene_msi_settings {
>>>>>>>>>> +       u32     index_per_group;
>>>>>>>>>> +       u32     irqs_per_index;
>>>>>>>>>> +       u32     nr_msi_vec;
>>>>>>>>>> +       u32     nr_hw_irqs;
>>>>>>>>>> +};
>>>>>>>>>> +
>>>>>>>>>> +struct xgene_msi {
>>>>>>>>>> +       struct device_node              *node;
>>>>>>>>>> +       struct msi_controller           mchip;
>>>>>>>>>> +       struct irq_domain               *domain;
>>>>>>>>>> +       struct xgene_msi_settings       *settings;
>>>>>>>>>> +       u32                             msi_addr_lo;
>>>>>>>>>> +       u32                             msi_addr_hi;
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> I'd rather see the mailbox address directly, and only do the
>>>>>>>>> split when assigning it to the message (you seem to play all kind
>>>>>>>>> of tricks on the address anyway).
>>>>>>>>
>>>>>>>>
>>>>>>>> msi_addr_lo and msi_addr_hi store the physical base address of MSI
>>>>>>>> controller registers. I will add comment to clarify this.
>>>>>>>
>>>>>>>
>>>>>>> What I mean is that there is no point in keeping this around as a
>>>>>>> pair of 32bit variables. You'd better keep it as a single 64bit,
>>>>>>> and do the split when assigning it the the MSI message.
>>>>>>
>>>>>> Hi Marc,
>>>>>>
>>>>>> These came from device-tree (which describes 64-bit address number as
>>>>>> 2 32-bit words).
>>>>>
>>>>> ... and converted to a resource as a 64bit word, on which you apply
>>>>> {upper,lower}_32_bit(). So much for DT...
>>>>>
>>>>>> If I store them this way, I don't need CPU cycles to do the split
>>>>>> every time assigning them to the MSI message. Please let me know what
>>>>>> do you think about it.
>>>>>
>>>>> This is getting absolutely silly.
>>>>>
>>>>> How many cycles does it take to execute "lsr x1, x0, #32" on X-Gene? If
>>>>> it takes so long that it is considered to be a bottleneck, I suggest
>>>>> you go and design a better CPU (hint: the answer is probably 1 cycle
>>>>> absolutely everywhere).
>>>>>
>>>>> How often are you configuring MSIs in the face of what is happening in
>>>>> the rest of the kernel? Almost never!
>>>>>
>>>>> So, given that "never" times 1 is still never,  I'll consider that
>>>>> readability of the code trumps it anytime (I can't believe we're having
>>>>> that kind of conversation...).
>>>>>
>>>> I changed to use u64 for msi_addr and split it when composing MSI messages.
>>>> The change is in v4 of the patch set that I just posted.
>>>>>>>
>>>>>>> [...]
>>>>>>>
>>>>>>>>>> +static int xgene_msi_set_affinity(struct irq_data *irq_data,
>>>>>>>>>> +                                 const struct cpumask *mask,
>>>>>>>>>> bool force)
>>>>>>>>>> +{
>>>>>>>>>> +       struct xgene_msi *msi =
>>>>>>>>>> irq_data_get_irq_chip_data(irq_data);
>>>>>>>>>> +       unsigned int gic_irq;
>>>>>>>>>> +       int ret;
>>>>>>>>>> +
>>>>>>>>>> +       gic_irq = msi->msi_virqs[irq_data->hwirq %
>>>>>>>>>> msi->settings->nr_hw_irqs];
>>>>>>>>>> +       ret = irq_set_affinity(gic_irq, mask);
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> Erm... This as the effect of moving *all* the MSIs hanging off
>>>>>>>>> this interrupt to another CPU. I'm not sure that's an acceptable
>>>>>>>>> effect... What if another MSI requires a different affinity?
>>>>>>>>
>>>>>>>>
>>>>>>>> We have 16 'real' hardware IRQs. Each of these has multiple MSIs
>>>>>>>> attached to it.
>>>>>>>> So this will move all MSIs handing off this interrupt to another
>>>>>>>> CPU; and we don't support different affinity settings for
>>>>>>>> different MSIs that are attached to the same hardware IRQ.
>>>>>>>
>>>>>>>
>>>>>>> Well, that's a significant departure from the expected behaviour.
>>>>>>> In other words, I wonder how useful this is. Could you instead
>>>>>>> reconfigure the MSI itself to hit the right CPU (assuming you don't
>>>>>>> have more than 16 CPUs and if
>>>>>>> that's only for XGene-1, this will only be 8 at most)? This would
>>>>>>> reduce your number of possible MSIs, but at least the semantics of
>>>>>>> set_afinity would be preserved.
>>>>>>
>>>>>> X-Gene-1 supports 2688 MSIs that are divided into 16 groups, each
>>>>>> group has 168 MSIs that are mapped to 1 hardware GIC IRQ (so we have
>>>>>> 16 hardware GIC IRQs for 2688 MSIs).
>>>>>
>>>>> We've already established that.
>>>>>
>>>>>> Setting affinity of single MSI to deliver it to a target CPU will move
>>>>>> all the MSIs mapped to the same GIC IRQ to that CPU as well. This is
>>>>>> not a standard behavior, but limiting the total number of MSIs will
>>>>>> cause a lot of devices to fall back to INTx (with huge performance
>>>>>> penalty) or even fail to load their driver as these devices request
>>>>>> more than 16 MSIs during driver initialization.
>>>>>
>>>>> No, I think you got it wrong. If you have 168 MSIs per GIC IRQ, and
>>>>> provided that you have 8 CPUs (XGene-1), you end up with 336 MSIs per
>>>>> CPU (having statically assigned 2 IRQs per CPU).
>>>>>
>>>>> Assuming you adopt my scheme, you still have a grand total of 336 MSIs
>>>>> that can be freely moved around without breaking any userspace
>>>>> expectations.
>>>>>
>>>> Thanks Marc. This is a very good idea.
>>>>
>>>> But to  move MSIs around, I need to change MSI termination address and data
>>>> and write them to device configuration space. This may cause problems
>>>> if the device
>>>> fires an interrupt at the same time when I do the config write?
>>>>
>>>> What is your opinion here?
>>>
>>> There is an inherent race when changing the affinity of any interrupt,
>>> whether that's an MSI or not. The kernel is perfectly prepared to handle
>>> such a situation (to be honest, the kernel doesn't really care).
>>>
>>> The write to the config space shouldn't be a concern. You will either
>>> hit the old *or* the new CPU, but that race is only during the write
>>> itself (you can read back from the config space if you're really
>>> paranoid).  By the time you return from this read/write, the device will
>>> be reconfigured.
>>>
>>>>> I think that 336 MSIs is a fair number (nowhere near the 16 you claim).
>>>>> Most platforms are doing quite well with that kind of numbers. Also,
>>>>> you don't have to allocate all the MSIs a device can possibly claim (up
>>>>> to 2048 MSI-X per device), as they are all perfectly capable of using
>>>>> less MSI without having to fallback to INTx).
>>>>>
>>>>>> I can document the limitation in affinity setting of X-Gene-1 MSI in
>>>>>> the driver to hopefully not make people surprise and hope to keep the
>>>>>> total number of supported MSI as 2688 so that we can support as many
>>>>>> cards that require MSI/MSI-X as possible.
>>>>>
>>>>> I don't think this is a valid approach. This breaks userspace (think of
>>>>> things like irqbalance), and breaks the SMP affinity model that Linux
>>>>> uses. No amount of documentation is going to solve it, so I think you
>>>>> just have to admit that the HW is mis-designed and do the best you can
>>>>> to make it work like Linux expect it to work.
>>>>>
>>>>> The alternative would to disable affinity setting altogether instead of
>>>>> introducing these horrible side effects.
>>>>>
>>>> I have it disabled (set_affinity does nothing) in my v4 patch.
>>>
>>> It would be good if you could try the above approach. It shouldn't be
>>> hard to write, and it would be a lot better than just ignoring the problem.
>>>
>> Yes. I am working on this change.
>
> In which case, I'll wait for a v5. No need to spend time on something
> that is going to change anyway.

Marc:

Is it possible to use the pci_msi_create_default_irq_domain for the X-Gene MSI
driver. Or is this going to be removed in the future. Thanks

>
> Thanks,
>
>         M.
> --
> Jazz is not dead. It just smells funny...
Duc Dang April 21, 2015, 4:04 a.m. UTC | #21
This patch set adds MSI/MSIX termination driver support for APM X-Gene v1 SoC.
APM X-Gene v1 SoC supports its own implementation of MSI, which is not compliant
to GIC V2M specification for MSI Termination.

There is single MSI block in X-Gene v1 SOC which serves all 5 PCIe ports. This MSI
block supports 2048 MSI termination ports coalesced into 16 physical HW IRQ lines
and shared across all 5 PCIe ports. As the version 5 of this patch, the total MSI
vectors this driver supports is reduced to 256 to maintain the correct set_affinity 
behavior for each MSI.

v5 changes:
	1. Implement set_affinity for each MSI by statically allocating 2 MSI GIC IRQs
	for each X-Gene CPU core and moving MSI vectors around these GIC IRQs to steer
	them to target CPU core. As a consequence, the total MSI vectors that X-Gene v1
	supports is reduced to 256.

v4 changes:
        1. Remove affinity setting for each MSI
        2. Add description about register layout, MSI termination address and data
        3. Correct total number of MSI vectors to 2048
        4. Clean up error messages
        5. Remove unused module code

v3 changes:
        1. Implement MSI support using PCI MSI IRQ domain
        2. Only use msi_controller to store IRQ domain
v2 changes:
        1. Use msi_controller structure
        2. Remove arch hooks arch_teardown_msi_irqs and arch_setup_msi_irqs

 .../devicetree/bindings/pci/xgene-pci-msi.txt      |  63 +++
 MAINTAINERS                                        |   8 +
 arch/arm64/boot/dts/apm/apm-storm.dtsi             |  27 ++
 drivers/pci/host/Kconfig                           |   6 +
 drivers/pci/host/Makefile                          |   1 +
 drivers/pci/host/pci-xgene-msi.c                   | 477 +++++++++++++++++++++
 drivers/pci/host/pci-xgene.c                       |  21 +
 7 files changed, 603 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/pci/xgene-pci-msi.txt
 create mode 100644 drivers/pci/host/pci-xgene-msi.c
Marc Zyngier April 21, 2015, 8:32 a.m. UTC | #22
On 20/04/15 19:51, Feng Kan wrote:

> Is it possible to use the pci_msi_create_default_irq_domain for the X-Gene MSI
> driver. Or is this going to be removed in the future. Thanks

This is not the preferred way. pci_msi_create_default_irq_domain is only
there for legacy purposes, so that we can lookup an MSI without
specifying the domain. New code shouldn't have to rely on it.

Thanks,

	M.
Jon Masters April 22, 2015, 3:02 a.m. UTC | #23
On 04/21/2015 12:04 AM, Duc Dang wrote:
> This patch set adds MSI/MSIX termination driver support for APM X-Gene v1 SoC.
> APM X-Gene v1 SoC supports its own implementation of MSI, which is not compliant
> to GIC V2M specification for MSI Termination.
> 
> There is single MSI block in X-Gene v1 SOC which serves all 5 PCIe ports. This MSI
> block supports 2048 MSI termination ports coalesced into 16 physical HW IRQ lines
> and shared across all 5 PCIe ports. As the version 5 of this patch, the total MSI
> vectors this driver supports is reduced to 256 to maintain the correct set_affinity 
> behavior for each MSI.

Quick note that I have tested the previous several versions of this
patch series on several platforms and will test v6 once available with
the pending feedback addressed.

Jon.
Jon Masters April 22, 2015, 3:02 a.m. UTC | #24
On 04/21/2015 12:04 AM, Duc Dang wrote:
> This patch set adds MSI/MSIX termination driver support for APM X-Gene v1 SoC.
> APM X-Gene v1 SoC supports its own implementation of MSI, which is not compliant
> to GIC V2M specification for MSI Termination.
> 
> There is single MSI block in X-Gene v1 SOC which serves all 5 PCIe ports. This MSI
> block supports 2048 MSI termination ports coalesced into 16 physical HW IRQ lines
> and shared across all 5 PCIe ports. As the version 5 of this patch, the total MSI
> vectors this driver supports is reduced to 256 to maintain the correct set_affinity 
> behavior for each MSI.

Quick note that I have tested the previous several versions of this
patch series on several platforms and will test v6 once available with
the pending feedback addressed.

Jon.
diff mbox

Patch

diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig
index 7b892a9..c9b61fa 100644
--- a/drivers/pci/host/Kconfig
+++ b/drivers/pci/host/Kconfig
@@ -89,11 +89,17 @@  config PCI_XGENE
 	depends on ARCH_XGENE
 	depends on OF
 	select PCIEPORTBUS
+	select PCI_MSI_IRQ_DOMAIN if PCI_MSI
+	select PCI_XGENE_MSI if PCI_MSI
 	help
 	  Say Y here if you want internal PCI support on APM X-Gene SoC.
 	  There are 5 internal PCIe ports available. Each port is GEN3 capable
 	  and have varied lanes from x1 to x8.
 
+config PCI_XGENE_MSI
+	bool "X-Gene v1 PCIe MSI feature"
+	depends on PCI_XGENE && PCI_MSI
+
 config PCI_LAYERSCAPE
 	bool "Freescale Layerscape PCIe controller"
 	depends on OF && ARM
diff --git a/drivers/pci/host/Makefile b/drivers/pci/host/Makefile
index e61d91c..f39bde3 100644
--- a/drivers/pci/host/Makefile
+++ b/drivers/pci/host/Makefile
@@ -11,5 +11,6 @@  obj-$(CONFIG_PCIE_SPEAR13XX) += pcie-spear13xx.o
 obj-$(CONFIG_PCI_KEYSTONE) += pci-keystone-dw.o pci-keystone.o
 obj-$(CONFIG_PCIE_XILINX) += pcie-xilinx.o
 obj-$(CONFIG_PCI_XGENE) += pci-xgene.o
+obj-$(CONFIG_PCI_XGENE_MSI) += pci-xgene-msi.o
 obj-$(CONFIG_PCI_LAYERSCAPE) += pci-layerscape.o
 obj-$(CONFIG_PCI_VERSATILE) += pci-versatile.o
diff --git a/drivers/pci/host/pci-xgene-msi.c b/drivers/pci/host/pci-xgene-msi.c
new file mode 100644
index 0000000..4f0ff42
--- /dev/null
+++ b/drivers/pci/host/pci-xgene-msi.c
@@ -0,0 +1,407 @@ 
+/*
+ * APM X-Gene MSI Driver
+ *
+ * Copyright (c) 2014, Applied Micro Circuits Corporation
+ * Author: Tanmay Inamdar <tinamdar@apm.com>
+ *	   Duc Dang <dhdang@apm.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/msi.h>
+#include <linux/of_irq.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/of_pci.h>
+
+#define MSI_INDEX0		0x000000
+#define MSI_INT0		0x800000
+
+struct xgene_msi_settings {
+	u32	index_per_group;
+	u32	irqs_per_index;
+	u32	nr_msi_vec;
+	u32	nr_hw_irqs;
+};
+
+struct xgene_msi {
+	struct device_node		*node;
+	struct msi_controller		mchip;
+	struct irq_domain		*domain;
+	struct xgene_msi_settings	*settings;
+	u32				msi_addr_lo;
+	u32				msi_addr_hi;
+	void __iomem			*msi_regs;
+	unsigned long			*bitmap;
+	struct mutex			bitmap_lock;
+	int				*msi_virqs;
+};
+
+struct xgene_msi_settings storm_msi_settings = {
+	.index_per_group	= 8,
+	.irqs_per_index		= 21,
+	.nr_msi_vec		= 2688,
+	.nr_hw_irqs		= 16,
+};
+
+typedef int (*xgene_msi_initcall_t)(struct xgene_msi *);
+
+static int xgene_msi_init_storm_settings(struct xgene_msi *xgene_msi)
+{
+	xgene_msi->settings = &storm_msi_settings;
+	return 0;
+}
+
+static struct irq_chip xgene_msi_top_irq_chip = {
+	.name		= "X-Gene1 MSI",
+	.irq_enable	= pci_msi_unmask_irq,
+	.irq_disable	= pci_msi_mask_irq,
+	.irq_mask	= pci_msi_mask_irq,
+	.irq_unmask	= pci_msi_unmask_irq,
+};
+
+static struct  msi_domain_info xgene_msi_domain_info = {
+	.flags	= (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+		  MSI_FLAG_PCI_MSIX),
+	.chip	= &xgene_msi_top_irq_chip,
+};
+
+static void xgene_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+{
+	struct xgene_msi *msi = irq_data_get_irq_chip_data(data);
+	u32 nr_hw_irqs = msi->settings->nr_hw_irqs;
+	u32 irqs_per_index = msi->settings->irqs_per_index;
+	u32 reg_set = data->hwirq / (nr_hw_irqs * irqs_per_index);
+	u32 group = data->hwirq % nr_hw_irqs;
+
+	msg->address_hi = msi->msi_addr_hi;
+	msg->address_lo = msi->msi_addr_lo +
+			  (((8 * group) + reg_set) << 16);
+	msg->data = (data->hwirq / nr_hw_irqs) % irqs_per_index;
+}
+
+static int xgene_msi_set_affinity(struct irq_data *irq_data,
+				  const struct cpumask *mask, bool force)
+{
+	struct xgene_msi *msi = irq_data_get_irq_chip_data(irq_data);
+	unsigned int gic_irq;
+	int ret;
+
+	gic_irq = msi->msi_virqs[irq_data->hwirq % msi->settings->nr_hw_irqs];
+	ret = irq_set_affinity(gic_irq, mask);
+	if (ret == IRQ_SET_MASK_OK)
+		ret = IRQ_SET_MASK_OK_DONE;
+
+	return ret;
+}
+
+static struct irq_chip xgene_msi_bottom_irq_chip = {
+	.name			= "MSI",
+	.irq_set_affinity       = xgene_msi_set_affinity,
+	.irq_compose_msi_msg	= xgene_compose_msi_msg,
+};
+
+static int xgene_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				  unsigned int nr_irqs, void *args)
+{
+
+	struct xgene_msi *msi = domain->host_data;
+	u32 msi_irq_count = msi->settings->nr_msi_vec;
+	int msi_irq;
+
+	mutex_lock(&msi->bitmap_lock);
+
+	msi_irq = find_first_zero_bit(msi->bitmap, msi_irq_count);
+	if (msi_irq < msi_irq_count)
+		set_bit(msi_irq, msi->bitmap);
+	else
+		msi_irq = -ENOSPC;
+
+	mutex_unlock(&msi->bitmap_lock);
+
+	if (msi_irq < 0)
+		return msi_irq;
+
+	irq_domain_set_info(domain, virq, msi_irq, &xgene_msi_bottom_irq_chip,
+			    domain->host_data, handle_simple_irq, NULL, NULL);
+	set_irq_flags(virq, IRQF_VALID);
+
+	return 0;
+}
+
+static void xgene_irq_domain_free(struct irq_domain *domain,
+				  unsigned int virq, unsigned int nr_irqs)
+{
+	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+	struct xgene_msi *msi = irq_data_get_irq_chip_data(d);
+
+	mutex_lock(&msi->bitmap_lock);
+
+	if (!test_bit(d->hwirq, msi->bitmap))
+		pr_err("trying to free unused MSI#%lu\n", d->hwirq);
+	else
+		clear_bit(d->hwirq, msi->bitmap);
+
+	mutex_unlock(&msi->bitmap_lock);
+
+	irq_domain_free_irqs_parent(domain, virq, nr_irqs);
+}
+
+static const struct irq_domain_ops msi_domain_ops = {
+	.alloc  = xgene_irq_domain_alloc,
+	.free   = xgene_irq_domain_free,
+};
+
+static int xgene_allocate_domains(struct xgene_msi *msi)
+{
+	msi->domain = irq_domain_add_linear(NULL, msi->settings->nr_msi_vec,
+					    &msi_domain_ops, msi);
+
+	if (!msi->domain) {
+		pr_err("failed to create parent MSI domain\n");
+		return -ENOMEM;
+	}
+
+	msi->mchip.of_node = msi->node;
+	msi->mchip.domain = pci_msi_create_irq_domain(msi->mchip.of_node,
+						      &xgene_msi_domain_info,
+						      msi->domain);
+
+	if (!msi->mchip.domain) {
+		pr_err("failed to create MSI domain\n");
+		irq_domain_remove(msi->domain);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void xgene_free_domains(struct xgene_msi *msi)
+{
+	if (msi->mchip.domain)
+		irq_domain_remove(msi->mchip.domain);
+	if (msi->domain)
+		irq_domain_remove(msi->domain);
+}
+
+static int xgene_msi_init_allocator(struct xgene_msi *xgene_msi)
+{
+	u32 msi_irq_count = xgene_msi->settings->nr_msi_vec;
+	u32 hw_irq_count = xgene_msi->settings->nr_hw_irqs;
+	int size = BITS_TO_LONGS(msi_irq_count) * sizeof(long);
+
+	xgene_msi->bitmap = kzalloc(size, GFP_KERNEL);
+	if (!xgene_msi->bitmap)
+		return -ENOMEM;
+
+	mutex_init(&xgene_msi->bitmap_lock);
+
+	xgene_msi->msi_virqs = kcalloc(hw_irq_count, sizeof(int), GFP_KERNEL);
+	if (!xgene_msi->msi_virqs)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static irqreturn_t xgene_msi_isr(int irq, void *data)
+{
+	struct xgene_msi *xgene_msi = (struct xgene_msi *) data;
+	unsigned int virq;
+	int msir_index, msir_reg, msir_val, hw_irq;
+	u32 intr_index, grp_select, msi_grp, processed = 0;
+	u32 nr_hw_irqs, irqs_per_index, index_per_group;
+
+	msi_grp = irq - xgene_msi->msi_virqs[0];
+	if (msi_grp >= xgene_msi->settings->nr_hw_irqs) {
+		pr_err("invalid msi received\n");
+		return IRQ_NONE;
+	}
+
+	nr_hw_irqs = xgene_msi->settings->nr_hw_irqs;
+	irqs_per_index = xgene_msi->settings->irqs_per_index;
+	index_per_group = xgene_msi->settings->index_per_group;
+
+	grp_select = readl(xgene_msi->msi_regs + MSI_INT0 + (msi_grp << 16));
+	while (grp_select) {
+		msir_index = ffs(grp_select) - 1;
+		msir_reg = (msi_grp << 19) + (msir_index << 16);
+		msir_val = readl(xgene_msi->msi_regs + MSI_INDEX0 + msir_reg);
+		while (msir_val) {
+			intr_index = ffs(msir_val) - 1;
+			hw_irq = (((msir_index * irqs_per_index) + intr_index) *
+				 nr_hw_irqs) + msi_grp;
+			virq = irq_find_mapping(xgene_msi->domain, hw_irq);
+			if (virq != 0)
+				generic_handle_irq(virq);
+			msir_val &= ~(1 << intr_index);
+			processed++;
+		}
+		grp_select &= ~(1 << msir_index);
+	}
+
+	return processed > 0 ? IRQ_HANDLED : IRQ_NONE;
+}
+
+static int xgene_msi_remove(struct platform_device *pdev)
+{
+	int virq, i;
+	struct xgene_msi *msi = platform_get_drvdata(pdev);
+	u32 nr_hw_irqs = msi->settings->nr_hw_irqs;
+
+	for (i = 0; i < nr_hw_irqs; i++) {
+		virq = msi->msi_virqs[i];
+		if (virq != 0)
+			free_irq(virq, msi);
+	}
+
+	kfree(msi->bitmap);
+	msi->bitmap = NULL;
+
+	xgene_free_domains(msi);
+
+	return 0;
+}
+
+static int xgene_msi_setup_hwirq(struct xgene_msi *msi,
+				 struct platform_device *pdev,
+				 int irq_index)
+{
+	int virt_msir;
+	cpumask_var_t mask;
+	int err;
+
+	virt_msir = platform_get_irq(pdev, irq_index);
+	if (virt_msir < 0) {
+		dev_err(&pdev->dev, "Cannot translate IRQ index %d\n",
+			irq_index);
+		return -EINVAL;
+	}
+
+	err = request_irq(virt_msir, xgene_msi_isr, 0,
+			  xgene_msi_top_irq_chip.name, msi);
+	if (err) {
+		dev_err(&pdev->dev, "request irq failed\n");
+		return err;
+	}
+
+	if (alloc_cpumask_var(&mask, GFP_KERNEL)) {
+		cpumask_setall(mask);
+		irq_set_affinity(virt_msir, mask);
+		free_cpumask_var(mask);
+	}
+
+	msi->msi_virqs[irq_index] = virt_msir;
+
+	return 0;
+}
+
+static const struct of_device_id xgene_msi_match_table[] = {
+	{.compatible = "apm,xgene1-msi",
+	 .data = xgene_msi_init_storm_settings},
+	{},
+};
+
+static int xgene_msi_probe(struct platform_device *pdev)
+{
+	struct resource *res;
+	int rc, irq_index;
+	struct device_node *np;
+	const struct of_device_id *matched_np;
+	struct xgene_msi *xgene_msi;
+	xgene_msi_initcall_t init_fn;
+	u32 nr_hw_irqs, nr_msi_vecs;
+
+	np = of_find_matching_node_and_match(NULL,
+			     xgene_msi_match_table, &matched_np);
+	if (!np)
+		return -ENODEV;
+
+	xgene_msi = kzalloc(sizeof(struct xgene_msi), GFP_KERNEL);
+	if (!xgene_msi) {
+		dev_err(&pdev->dev, "failed to allocate X-Gene MSI data\n");
+		return -ENOMEM;
+	}
+
+	xgene_msi->node = np;
+
+	init_fn = (xgene_msi_initcall_t) matched_np->data;
+	rc = init_fn(xgene_msi);
+	if (rc)
+		return rc;
+
+	platform_set_drvdata(pdev, xgene_msi);
+
+	nr_msi_vecs = xgene_msi->settings->nr_msi_vec;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	xgene_msi->msi_regs = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(xgene_msi->msi_regs)) {
+		dev_err(&pdev->dev, "no reg space\n");
+		rc = -EINVAL;
+		goto error;
+	}
+
+	xgene_msi->msi_addr_hi = upper_32_bits(res->start);
+	xgene_msi->msi_addr_lo = lower_32_bits(res->start);
+
+	rc = xgene_msi_init_allocator(xgene_msi);
+	if (rc) {
+		dev_err(&pdev->dev, "Error allocating MSI bitmap\n");
+		goto error;
+	}
+
+	rc = xgene_allocate_domains(xgene_msi);
+	if (rc) {
+		dev_err(&pdev->dev, "Failed to allocate MSI domain\n");
+		goto error;
+	}
+
+	nr_hw_irqs = xgene_msi->settings->nr_hw_irqs;
+	for (irq_index = 0; irq_index < nr_hw_irqs; irq_index++) {
+		rc = xgene_msi_setup_hwirq(xgene_msi, pdev, irq_index);
+		if (rc)
+			goto error;
+	}
+
+	rc = of_pci_msi_chip_add(&xgene_msi->mchip);
+	if (rc) {
+		dev_err(&pdev->dev, "failed to add MSI controller chip\n");
+		goto error;
+	}
+
+	dev_info(&pdev->dev, "APM X-Gene PCIe MSI driver loaded\n");
+
+	return 0;
+error:
+	xgene_msi_remove(pdev);
+	return rc;
+}
+
+static struct platform_driver xgene_msi_driver = {
+	.driver = {
+		.name = "xgene-msi",
+		.owner = THIS_MODULE,
+		.of_match_table = xgene_msi_match_table,
+	},
+	.probe = xgene_msi_probe,
+	.remove = xgene_msi_remove,
+};
+
+static int __init xgene_pcie_msi_init(void)
+{
+	return platform_driver_register(&xgene_msi_driver);
+}
+subsys_initcall(xgene_pcie_msi_init);
+
+MODULE_AUTHOR("Duc Dang <dhdang@apm.com>");
+MODULE_DESCRIPTION("APM X-Gene PCIe MSI driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/host/pci-xgene.c b/drivers/pci/host/pci-xgene.c
index 22751ed..3e6faa1 100644
--- a/drivers/pci/host/pci-xgene.c
+++ b/drivers/pci/host/pci-xgene.c
@@ -468,6 +468,23 @@  static int xgene_pcie_setup(struct xgene_pcie_port *port,
 	return 0;
 }
 
+static int xgene_pcie_msi_enable(struct pci_bus *bus)
+{
+	struct device_node *msi_node;
+
+	msi_node = of_parse_phandle(bus->dev.of_node,
+					"msi-parent", 0);
+	if (!msi_node)
+		return -ENODEV;
+
+	bus->msi = of_pci_find_msi_chip_by_node(msi_node);
+	if (bus->msi)
+		bus->msi->dev = &bus->dev;
+	else
+		return -ENODEV;
+	return 0;
+}
+
 static int xgene_pcie_probe_bridge(struct platform_device *pdev)
 {
 	struct device_node *dn = pdev->dev.of_node;
@@ -504,6 +521,10 @@  static int xgene_pcie_probe_bridge(struct platform_device *pdev)
 	if (!bus)
 		return -ENOMEM;
 
+	if (IS_ENABLED(CONFIG_PCI_MSI))
+		if (xgene_pcie_msi_enable(bus))
+			dev_info(port->dev, "failed to enable MSI\n");
+
 	pci_scan_child_bus(bus);
 	pci_assign_unassigned_bus_resources(bus);
 	pci_bus_add_devices(bus);