diff mbox

[Part2,v4,21/31] PCI/MSI: enhance PCI MSI core to support hierarchy irqdomain

Message ID 1415102525-9898-22-git-send-email-jiang.liu@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jiang Liu Nov. 4, 2014, 12:01 p.m. UTC
Enhance PCI MSI core to support hierarchy irqdomain, so the common
code could be shared among architectures.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
---
 drivers/pci/Kconfig |    4 ++
 drivers/pci/msi.c   |  126 +++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/msi.h |   11 +++++
 3 files changed, 141 insertions(+)

Comments

Bjorn Helgaas Nov. 5, 2014, 11:09 p.m. UTC | #1
On Tue, Nov 04, 2014 at 08:01:55PM +0800, Jiang Liu wrote:

In your topic:

  PCI/MSI: enhance PCI MSI core to support hierarchy irqdomain

There's no need to repeat "PCI MSI".  Please run "git log --oneline
drivers/pci/msi.c" and make your similar (capitalize the first word).

> Enhance PCI MSI core to support hierarchy irqdomain, so the common
> code could be shared among architectures.
> 
> Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
> ---
>  drivers/pci/Kconfig |    4 ++
>  drivers/pci/msi.c   |  126 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  include/linux/msi.h |   11 +++++
>  3 files changed, 141 insertions(+)
> 
> diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
> index b9db0f2ce11f..022e89745f86 100644
> --- a/drivers/pci/Kconfig
> +++ b/drivers/pci/Kconfig
> @@ -16,6 +16,10 @@ config PCI_MSI
>  
>  	   If you don't know what to do here, say Y.
>  
> +config PCI_MSI_IRQ_DOMAIN
> +	bool
> +	depends on PCI_MSI && IRQ_DOMAIN_HIERARCHY
> +
>  config PCI_DEBUG
>  	bool "PCI Debugging"
>  	depends on PCI && DEBUG_KERNEL
> diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
> index da181c59394b..7423ee16972f 100644
> --- a/drivers/pci/msi.c
> +++ b/drivers/pci/msi.c
> @@ -19,6 +19,7 @@
>  #include <linux/errno.h>
>  #include <linux/io.h>
>  #include <linux/slab.h>
> +#include <linux/irqdomain.h>
>  
>  #include "pci.h"
>  
> @@ -1098,3 +1099,128 @@ int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
>  	return nvec;
>  }
>  EXPORT_SYMBOL(pci_enable_msix_range);
> +
> +#ifdef	CONFIG_PCI_MSI_IRQ_DOMAIN

Space, not tab.

> +static inline irq_hw_number_t
> +msi_get_hwirq(struct pci_dev *pdev, struct msi_desc *msidesc)

The convention in this file is "struct pci_dev *dev".  And "struct msi_desc
*desc" (or maybe "*entry").  Try to converge things, not diverge them.

> +{
> +	return (irq_hw_number_t)msidesc->msi_attrib.entry_nr |
> +		PCI_DEVID(pdev->bus->number, pdev->devfn) << 11 |
> +		(pci_domain_nr(pdev->bus) & 0xFFFFFFFF) << 27;

Where does this bit layout come from?  Is this defined in the spec
somewhere?  A reference would help.

> +}
> +
> +static int msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
> +			    unsigned int nr_irqs, void *arg)
> +{
> +	int i, ret;
> +	irq_hw_number_t hwirq = arch_msi_irq_domain_get_hwirq(arg);
> +
> +	if (irq_find_mapping(domain, hwirq) > 0)
> +		return -EEXIST;
> +
> +	ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
> +	if (ret >= 0)

	if (ret < 0)
		return ret;

and un-indent the mainline code below.  Then it's obvious that this is the
normal case, not the error case.

> +		for (i = 0; i < nr_irqs; i++) {
> +			irq_domain_set_hwirq_and_chip(domain, virq + i,
> +					hwirq + i, &msi_chip, (void *)(long)i);
> +			__irq_set_handler(virq + i, handle_edge_irq, 0, "edge");
> +		}
> +
> +	return ret;
> +}
> +
> +static void msi_domain_free(struct irq_domain *domain, unsigned int virq,
> +			    unsigned int nr_irqs)
> +{
> +	int i;
> +
> +	for (i = 0; i < nr_irqs; i++) {
> +		struct msi_desc *msidesc = irq_get_msi_desc(virq);
> +
> +		if (msidesc)
> +			msidesc->irq = 0;
> +	}
> +	irq_domain_free_irqs_top(domain, virq, nr_irqs);
> +}
> +
> +static int msi_domain_activate(struct irq_domain *domain,
> +			       struct irq_data *irq_data)
> +{
> +	int ret = 0;
> +	struct msi_msg msg;
> +
> +	/*
> +	 * irq_data->chip_data is MSI/MSIx offset.

"MSI-X", as you wrote on the next line.

> +	 * MSI-X message is written per-IRQ, the offset is always 0.
> +	 * MSI message denotes a contiguous group of IRQs, written for 0th IRQ.
> +	 */
> +	if (!irq_data->chip_data) {

	if (irq_data->chip_data)
		return 0;

and un-indent the mainline code below, and drop the "ret = 0" init above.

> +		ret = irq_chip_compose_msi_msg(irq_data, &msg);
> +		if (ret == 0)

	if (ret)
		return ret;

> +			write_msi_msg(irq_data->irq, &msg);
> +	}
> +
> +	return ret;
	return 0;
> +}
> +
> +static int msi_domain_deactivate(struct irq_domain *domain,
> +				 struct irq_data *irq_data)
> +{
> +	struct msi_msg msg;
> +
> +	if (irq_data->chip_data) {
> +		memset(&msg, 0, sizeof(msg));
> +		write_msi_msg(irq_data->irq, &msg);
> +	}
> +
> +	return 0;
> +}
> +
> +static struct irq_domain_ops msi_domain_ops = {
> +	.alloc = msi_domain_alloc,
> +	.free = msi_domain_free,
> +	.activate = msi_domain_activate,
> +	.deactivate = msi_domain_deactivate,
> +};
> +
> +struct irq_domain *msi_create_irq_domain(struct irq_domain *parent)
> +{
> +	struct irq_domain *domain;
> +
> +	domain = irq_domain_add_tree(NULL, &msi_domain_ops, NULL);
> +	if (domain)

	if (!domain)
		return NULL;

and un-indent this:

> +		domain->parent = parent;
> +
> +	return domain;
> +}
> +
> +int msi_irq_domain_alloc_irqs(struct irq_domain *domain, int type,
> +			      struct pci_dev *dev, void *arg)
> +{
> +	int i, virq;
> +	struct msi_desc *msidesc;
> +	int node = dev_to_node(&dev->dev);
> +
> +	list_for_each_entry(msidesc, &dev->msi_list, list) {
> +		arch_msi_irq_domain_set_hwirq(arg, msi_get_hwirq(dev, msidesc));
> +		virq = irq_domain_alloc_irqs(domain, msidesc->nvec_used,
> +					     node, arg);
> +		if (virq < 0) {
> +			/* Special handling for pci_enable_msi_range(). */
> +			return (type == PCI_CAP_ID_MSI &&
> +				msidesc->nvec_used > 1) ?  1 : -ENOSPC;	

I think "if" would be easier to read than this ternary expression.

> +		}
> +		for (i = 0; i < msidesc->nvec_used; i++)
> +			irq_set_msi_desc_off(virq + i, i, msidesc);
> +	}
> +
> +	list_for_each_entry(msidesc, &dev->msi_list, list)
> +		if (msidesc->nvec_used == 1)
> +			dev_dbg(&dev->dev, "irq %d for MSI/MSI-X\n", virq);
> +		else
> +			dev_dbg(&dev->dev, "irq [%d-%d] for MSI/MSI-X\n",
> +				virq, virq + msidesc->nvec_used - 1);
> +
> +	return 0;
> +}
> +#endif	/* CONFIG_PCI_MSI_IRQ_DOMAIN */
> diff --git a/include/linux/msi.h b/include/linux/msi.h
> index 44f4746d033b..05dcd425f82b 100644
> --- a/include/linux/msi.h
> +++ b/include/linux/msi.h
> @@ -75,4 +75,15 @@ struct msi_chip {
>  	void (*teardown_irq)(struct msi_chip *chip, unsigned int irq);
>  };
>  
> +#ifdef	CONFIG_PCI_MSI_IRQ_DOMAIN

Use a space here, not a tab.

> +extern struct irq_chip msi_chip;

I don't think "msi_chip" is a good name.  "Chip" only hints that it's a
semiconductor integrated circuit; it doesn't say anything about what it
does.  I've suggested "msi_controller" elsewhere.

Why does this need to be exported?  And why should there be only one in a
system?

> +extern struct irq_domain *msi_create_irq_domain(struct irq_domain *parent);
> +extern int msi_irq_domain_alloc_irqs(struct irq_domain *domain, int type,
> +				     struct pci_dev *dev, void *arg);
> +
> +extern irq_hw_number_t arch_msi_irq_domain_get_hwirq(void *arg);
> +extern void arch_msi_irq_domain_set_hwirq(void *arg, irq_hw_number_t hwirq);

Look at the rest of the file and notice that the existing code does not use
"extern" on function declarations.

> +#endif	/* CONFIG_PCI_MSI_IRQ_DOMAIN */

Use a space here (not a tab), like the #endif just below.

>  #endif /* LINUX_MSI_H */
> -- 
> 1.7.10.4
>
Yijing Wang Nov. 6, 2014, 1:58 a.m. UTC | #2
>>  
>> @@ -1098,3 +1099,128 @@ int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
>>  	return nvec;
>>  }
>>  EXPORT_SYMBOL(pci_enable_msix_range);
>> +
>> +#ifdef	CONFIG_PCI_MSI_IRQ_DOMAIN
> 
> Space, not tab.
> 
>> +static inline irq_hw_number_t
>> +msi_get_hwirq(struct pci_dev *pdev, struct msi_desc *msidesc)
> 
> The convention in this file is "struct pci_dev *dev".  And "struct msi_desc
> *desc" (or maybe "*entry").  Try to converge things, not diverge them.
> 
>> +{
>> +	return (irq_hw_number_t)msidesc->msi_attrib.entry_nr |
>> +		PCI_DEVID(pdev->bus->number, pdev->devfn) << 11 |
>> +		(pci_domain_nr(pdev->bus) & 0xFFFFFFFF) << 27;
> 
> Where does this bit layout come from?  Is this defined in the spec
> somewhere?  A reference would help.

Currently, more and more Non-PCI device use MSI(or similar MSI mechanism), like DMAR fault irq
and HPET FSB irq. And we have to add additional code to support the MSI capability.
So I hope we can decouple MSI code and PCI code, then we can unify all MSI(or Message Based interrupt)
in one framework.

Thanks!
Yijing.

> 
>> +}
>> +
>> +static int msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
>> +			    unsigned int nr_irqs, void *arg)
>> +{
>> +	int i, ret;
>> +	irq_hw_number_t hwirq = arch_msi_irq_domain_get_hwirq(arg);
>> +
>> +	if (irq_find_mapping(domain, hwirq) > 0)
>> +		return -EEXIST;
>> +
>> +	ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
>> +	if (ret >= 0)
> 
> 	if (ret < 0)
> 		return ret;
> 
> and un-indent the mainline code below.  Then it's obvious that this is the
> normal case, not the error case.
> 
>> +		for (i = 0; i < nr_irqs; i++) {
>> +			irq_domain_set_hwirq_and_chip(domain, virq + i,
>> +					hwirq + i, &msi_chip, (void *)(long)i);
>> +			__irq_set_handler(virq + i, handle_edge_irq, 0, "edge");
>> +		}
>> +
>> +	return ret;
>> +}
>> +
>> +static void msi_domain_free(struct irq_domain *domain, unsigned int virq,
>> +			    unsigned int nr_irqs)
>> +{
>> +	int i;
>> +
>> +	for (i = 0; i < nr_irqs; i++) {
>> +		struct msi_desc *msidesc = irq_get_msi_desc(virq);
>> +
>> +		if (msidesc)
>> +			msidesc->irq = 0;
>> +	}
>> +	irq_domain_free_irqs_top(domain, virq, nr_irqs);
>> +}
>> +
>> +static int msi_domain_activate(struct irq_domain *domain,
>> +			       struct irq_data *irq_data)
>> +{
>> +	int ret = 0;
>> +	struct msi_msg msg;
>> +
>> +	/*
>> +	 * irq_data->chip_data is MSI/MSIx offset.
> 
> "MSI-X", as you wrote on the next line.
> 
>> +	 * MSI-X message is written per-IRQ, the offset is always 0.
>> +	 * MSI message denotes a contiguous group of IRQs, written for 0th IRQ.
>> +	 */
>> +	if (!irq_data->chip_data) {
> 
> 	if (irq_data->chip_data)
> 		return 0;
> 
> and un-indent the mainline code below, and drop the "ret = 0" init above.
> 
>> +		ret = irq_chip_compose_msi_msg(irq_data, &msg);
>> +		if (ret == 0)
> 
> 	if (ret)
> 		return ret;
> 
>> +			write_msi_msg(irq_data->irq, &msg);
>> +	}
>> +
>> +	return ret;
> 	return 0;
>> +}
>> +
>> +static int msi_domain_deactivate(struct irq_domain *domain,
>> +				 struct irq_data *irq_data)
>> +{
>> +	struct msi_msg msg;
>> +
>> +	if (irq_data->chip_data) {
>> +		memset(&msg, 0, sizeof(msg));
>> +		write_msi_msg(irq_data->irq, &msg);
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>> +static struct irq_domain_ops msi_domain_ops = {
>> +	.alloc = msi_domain_alloc,
>> +	.free = msi_domain_free,
>> +	.activate = msi_domain_activate,
>> +	.deactivate = msi_domain_deactivate,
>> +};
>> +
>> +struct irq_domain *msi_create_irq_domain(struct irq_domain *parent)
>> +{
>> +	struct irq_domain *domain;
>> +
>> +	domain = irq_domain_add_tree(NULL, &msi_domain_ops, NULL);
>> +	if (domain)
> 
> 	if (!domain)
> 		return NULL;
> 
> and un-indent this:
> 
>> +		domain->parent = parent;
>> +
>> +	return domain;
>> +}
>> +
>> +int msi_irq_domain_alloc_irqs(struct irq_domain *domain, int type,
>> +			      struct pci_dev *dev, void *arg)
>> +{
>> +	int i, virq;
>> +	struct msi_desc *msidesc;
>> +	int node = dev_to_node(&dev->dev);
>> +
>> +	list_for_each_entry(msidesc, &dev->msi_list, list) {
>> +		arch_msi_irq_domain_set_hwirq(arg, msi_get_hwirq(dev, msidesc));
>> +		virq = irq_domain_alloc_irqs(domain, msidesc->nvec_used,
>> +					     node, arg);
>> +		if (virq < 0) {
>> +			/* Special handling for pci_enable_msi_range(). */
>> +			return (type == PCI_CAP_ID_MSI &&
>> +				msidesc->nvec_used > 1) ?  1 : -ENOSPC;	
> 
> I think "if" would be easier to read than this ternary expression.
> 
>> +		}
>> +		for (i = 0; i < msidesc->nvec_used; i++)
>> +			irq_set_msi_desc_off(virq + i, i, msidesc);
>> +	}
>> +
>> +	list_for_each_entry(msidesc, &dev->msi_list, list)
>> +		if (msidesc->nvec_used == 1)
>> +			dev_dbg(&dev->dev, "irq %d for MSI/MSI-X\n", virq);
>> +		else
>> +			dev_dbg(&dev->dev, "irq [%d-%d] for MSI/MSI-X\n",
>> +				virq, virq + msidesc->nvec_used - 1);
>> +
>> +	return 0;
>> +}
>> +#endif	/* CONFIG_PCI_MSI_IRQ_DOMAIN */
>> diff --git a/include/linux/msi.h b/include/linux/msi.h
>> index 44f4746d033b..05dcd425f82b 100644
>> --- a/include/linux/msi.h
>> +++ b/include/linux/msi.h
>> @@ -75,4 +75,15 @@ struct msi_chip {
>>  	void (*teardown_irq)(struct msi_chip *chip, unsigned int irq);
>>  };
>>  
>> +#ifdef	CONFIG_PCI_MSI_IRQ_DOMAIN
> 
> Use a space here, not a tab.
> 
>> +extern struct irq_chip msi_chip;
> 
> I don't think "msi_chip" is a good name.  "Chip" only hints that it's a
> semiconductor integrated circuit; it doesn't say anything about what it
> does.  I've suggested "msi_controller" elsewhere.
> 
> Why does this need to be exported?  And why should there be only one in a
> system?
> 
>> +extern struct irq_domain *msi_create_irq_domain(struct irq_domain *parent);
>> +extern int msi_irq_domain_alloc_irqs(struct irq_domain *domain, int type,
>> +				     struct pci_dev *dev, void *arg);
>> +
>> +extern irq_hw_number_t arch_msi_irq_domain_get_hwirq(void *arg);
>> +extern void arch_msi_irq_domain_set_hwirq(void *arg, irq_hw_number_t hwirq);
> 
> Look at the rest of the file and notice that the existing code does not use
> "extern" on function declarations.
> 
>> +#endif	/* CONFIG_PCI_MSI_IRQ_DOMAIN */
> 
> Use a space here (not a tab), like the #endif just below.
> 
>>  #endif /* LINUX_MSI_H */
>> -- 
>> 1.7.10.4
>>
> 
> .
>
Bjorn Helgaas Nov. 6, 2014, 4:10 a.m. UTC | #3
On Wed, Nov 5, 2014 at 6:58 PM, Yijing Wang <wangyijing@huawei.com> wrote:

>>> +{
>>> +    return (irq_hw_number_t)msidesc->msi_attrib.entry_nr |
>>> +            PCI_DEVID(pdev->bus->number, pdev->devfn) << 11 |
>>> +            (pci_domain_nr(pdev->bus) & 0xFFFFFFFF) << 27;
>>
>> Where does this bit layout come from?  Is this defined in the spec
>> somewhere?  A reference would help.
>
> Currently, more and more Non-PCI device use MSI(or similar MSI mechanism), like DMAR fault irq
> and HPET FSB irq. And we have to add additional code to support the MSI capability.
> So I hope we can decouple MSI code and PCI code, then we can unify all MSI(or Message Based interrupt)
> in one framework.

Was that supposed to answer my question?  If so, I didn't understand
how it explains where the bit layout came from.

Bjorn
Yijing Wang Nov. 6, 2014, 4:54 a.m. UTC | #4
On 2014/11/6 12:10, Bjorn Helgaas wrote:
> On Wed, Nov 5, 2014 at 6:58 PM, Yijing Wang <wangyijing@huawei.com> wrote:
> 
>>>> +{
>>>> +    return (irq_hw_number_t)msidesc->msi_attrib.entry_nr |
>>>> +            PCI_DEVID(pdev->bus->number, pdev->devfn) << 11 |
>>>> +            (pci_domain_nr(pdev->bus) & 0xFFFFFFFF) << 27;
>>>
>>> Where does this bit layout come from?  Is this defined in the spec
>>> somewhere?  A reference would help.
>>
>> Currently, more and more Non-PCI device use MSI(or similar MSI mechanism), like DMAR fault irq
>> and HPET FSB irq. And we have to add additional code to support the MSI capability.
>> So I hope we can decouple MSI code and PCI code, then we can unify all MSI(or Message Based interrupt)
>> in one framework.
> 
> Was that supposed to answer my question?  If so, I didn't understand
> how it explains where the bit layout came from.

No, that's just my concern. Because this function uses the pci device id,
but more and more Non-PCI devices use MSI.

> 
> Bjorn
> --
> To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> .
>
Jiang Liu Nov. 6, 2014, 4:58 a.m. UTC | #5
On 2014/11/6 7:09, Bjorn Helgaas wrote:
> On Tue, Nov 04, 2014 at 08:01:55PM +0800, Jiang Liu wrote:
> 
> In your topic:
> 
>   PCI/MSI: enhance PCI MSI core to support hierarchy irqdomain
> 
> There's no need to repeat "PCI MSI".  Please run "git log --oneline
> drivers/pci/msi.c" and make your similar (capitalize the first word).
Hi Bjornm
	I'm already very carefully with your education about commit
log messages, but still missed this one:(. Will be even more careful
next time.

> 
>> Enhance PCI MSI core to support hierarchy irqdomain, so the common
>> code could be shared among architectures.
>>
>> Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
>> ---
>>  drivers/pci/Kconfig |    4 ++
>>  drivers/pci/msi.c   |  126 +++++++++++++++++++++++++++++++++++++++++++++++++++
>>  include/linux/msi.h |   11 +++++
>>  3 files changed, 141 insertions(+)
>>
>> diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
>> index b9db0f2ce11f..022e89745f86 100644
>> --- a/drivers/pci/Kconfig
>> +++ b/drivers/pci/Kconfig
>> @@ -16,6 +16,10 @@ config PCI_MSI
>>  
>>  	   If you don't know what to do here, say Y.
>>  
>> +config PCI_MSI_IRQ_DOMAIN
>> +	bool
>> +	depends on PCI_MSI && IRQ_DOMAIN_HIERARCHY
>> +
>>  config PCI_DEBUG
>>  	bool "PCI Debugging"
>>  	depends on PCI && DEBUG_KERNEL
>> diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
>> index da181c59394b..7423ee16972f 100644
>> --- a/drivers/pci/msi.c
>> +++ b/drivers/pci/msi.c
>> @@ -19,6 +19,7 @@
>>  #include <linux/errno.h>
>>  #include <linux/io.h>
>>  #include <linux/slab.h>
>> +#include <linux/irqdomain.h>
>>  
>>  #include "pci.h"
>>  
>> @@ -1098,3 +1099,128 @@ int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
>>  	return nvec;
>>  }
>>  EXPORT_SYMBOL(pci_enable_msix_range);
>> +
>> +#ifdef	CONFIG_PCI_MSI_IRQ_DOMAIN
> 
> Space, not tab.
Will fix it in next version.

> 
>> +static inline irq_hw_number_t
>> +msi_get_hwirq(struct pci_dev *pdev, struct msi_desc *msidesc)
> 
> The convention in this file is "struct pci_dev *dev".  And "struct msi_desc
> *desc" (or maybe "*entry").  Try to converge things, not diverge them.
Thanks for reminder. Adding another check item to my list before
sending out patches:)

> 
>> +{
>> +	return (irq_hw_number_t)msidesc->msi_attrib.entry_nr |
>> +		PCI_DEVID(pdev->bus->number, pdev->devfn) << 11 |
>> +		(pci_domain_nr(pdev->bus) & 0xFFFFFFFF) << 27;
> 
> Where does this bit layout come from?  Is this defined in the spec
> somewhere?  A reference would help.
We need a unique number to identify every possible MSI source,
and this ID number is only used within the irqdomain subsystem.
So we used above algorithm to generate the ID number, there's
no specification for it.

> 
>> +}
>> +
>> +static int msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
>> +			    unsigned int nr_irqs, void *arg)
>> +{
>> +	int i, ret;
>> +	irq_hw_number_t hwirq = arch_msi_irq_domain_get_hwirq(arg);
>> +
>> +	if (irq_find_mapping(domain, hwirq) > 0)
>> +		return -EEXIST;
>> +
>> +	ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
>> +	if (ret >= 0)
> 
> 	if (ret < 0)
> 		return ret;
> 
> and un-indent the mainline code below.  Then it's obvious that this is the
> normal case, not the error case.
Sure, I want to only use one return statement, but didn't realized that
syntax seems like error handling:)

> 
>> +		for (i = 0; i < nr_irqs; i++) {
>> +			irq_domain_set_hwirq_and_chip(domain, virq + i,
>> +					hwirq + i, &msi_chip, (void *)(long)i);
>> +			__irq_set_handler(virq + i, handle_edge_irq, 0, "edge");
>> +		}
>> +
>> +	return ret;
>> +}
>> +
>> +static void msi_domain_free(struct irq_domain *domain, unsigned int virq,
>> +			    unsigned int nr_irqs)
>> +{
>> +	int i;
>> +
>> +	for (i = 0; i < nr_irqs; i++) {
>> +		struct msi_desc *msidesc = irq_get_msi_desc(virq);
>> +
>> +		if (msidesc)
>> +			msidesc->irq = 0;
>> +	}
>> +	irq_domain_free_irqs_top(domain, virq, nr_irqs);
>> +}
>> +
>> +static int msi_domain_activate(struct irq_domain *domain,
>> +			       struct irq_data *irq_data)
>> +{
>> +	int ret = 0;
>> +	struct msi_msg msg;
>> +
>> +	/*
>> +	 * irq_data->chip_data is MSI/MSIx offset.
> 
> "MSI-X", as you wrote on the next line.
Sure.

> 
>> +	 * MSI-X message is written per-IRQ, the offset is always 0.
>> +	 * MSI message denotes a contiguous group of IRQs, written for 0th IRQ.
>> +	 */
>> +	if (!irq_data->chip_data) {
> 
> 	if (irq_data->chip_data)
> 		return 0;
> 
> and un-indent the mainline code below, and drop the "ret = 0" init above.
> 
>> +		ret = irq_chip_compose_msi_msg(irq_data, &msg);
>> +		if (ret == 0)
> 
> 	if (ret)
> 		return ret;
> 
>> +			write_msi_msg(irq_data->irq, &msg);
>> +	}
>> +
>> +	return ret;
> 	return 0;
>> +}
>> +
>> +static int msi_domain_deactivate(struct irq_domain *domain,
>> +				 struct irq_data *irq_data)
>> +{
>> +	struct msi_msg msg;
>> +
>> +	if (irq_data->chip_data) {
>> +		memset(&msg, 0, sizeof(msg));
>> +		write_msi_msg(irq_data->irq, &msg);
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>> +static struct irq_domain_ops msi_domain_ops = {
>> +	.alloc = msi_domain_alloc,
>> +	.free = msi_domain_free,
>> +	.activate = msi_domain_activate,
>> +	.deactivate = msi_domain_deactivate,
>> +};
>> +
>> +struct irq_domain *msi_create_irq_domain(struct irq_domain *parent)
>> +{
>> +	struct irq_domain *domain;
>> +
>> +	domain = irq_domain_add_tree(NULL, &msi_domain_ops, NULL);
>> +	if (domain)
> 
> 	if (!domain)
> 		return NULL;
> 
> and un-indent this:
> 
>> +		domain->parent = parent;
>> +
>> +	return domain;
>> +}
>> +
>> +int msi_irq_domain_alloc_irqs(struct irq_domain *domain, int type,
>> +			      struct pci_dev *dev, void *arg)
>> +{
>> +	int i, virq;
>> +	struct msi_desc *msidesc;
>> +	int node = dev_to_node(&dev->dev);
>> +
>> +	list_for_each_entry(msidesc, &dev->msi_list, list) {
>> +		arch_msi_irq_domain_set_hwirq(arg, msi_get_hwirq(dev, msidesc));
>> +		virq = irq_domain_alloc_irqs(domain, msidesc->nvec_used,
>> +					     node, arg);
>> +		if (virq < 0) {
>> +			/* Special handling for pci_enable_msi_range(). */
>> +			return (type == PCI_CAP_ID_MSI &&
>> +				msidesc->nvec_used > 1) ?  1 : -ENOSPC;	
> 
> I think "if" would be easier to read than this ternary expression.
Sure.

> 
>> +		}
>> +		for (i = 0; i < msidesc->nvec_used; i++)
>> +			irq_set_msi_desc_off(virq + i, i, msidesc);
>> +	}
>> +
>> +	list_for_each_entry(msidesc, &dev->msi_list, list)
>> +		if (msidesc->nvec_used == 1)
>> +			dev_dbg(&dev->dev, "irq %d for MSI/MSI-X\n", virq);
>> +		else
>> +			dev_dbg(&dev->dev, "irq [%d-%d] for MSI/MSI-X\n",
>> +				virq, virq + msidesc->nvec_used - 1);
>> +
>> +	return 0;
>> +}
>> +#endif	/* CONFIG_PCI_MSI_IRQ_DOMAIN */
>> diff --git a/include/linux/msi.h b/include/linux/msi.h
>> index 44f4746d033b..05dcd425f82b 100644
>> --- a/include/linux/msi.h
>> +++ b/include/linux/msi.h
>> @@ -75,4 +75,15 @@ struct msi_chip {
>>  	void (*teardown_irq)(struct msi_chip *chip, unsigned int irq);
>>  };
>>  
>> +#ifdef	CONFIG_PCI_MSI_IRQ_DOMAIN
> 
> Use a space here, not a tab.
Sure.

> 
>> +extern struct irq_chip msi_chip;
> 
> I don't think "msi_chip" is a good name.  "Chip" only hints that it's a
> semiconductor integrated circuit; it doesn't say anything about what it
> does.  I've suggested "msi_controller" elsewhere.
> 
> Why does this need to be exported?  And why should there be only one in a
> system?
I have changed the interfaces as below in next version, so we could
hide "msi_chip" private and support different irq_chip for different
irqdomains.

struct irq_domain *msi_create_irq_domain(struct device_node *of_node,
                                         struct irq_chip *chip,
                                         struct irq_domain *parent);

> 
>> +extern struct irq_domain *msi_create_irq_domain(struct irq_domain *parent);
>> +extern int msi_irq_domain_alloc_irqs(struct irq_domain *domain, int type,
>> +				     struct pci_dev *dev, void *arg);
>> +
>> +extern irq_hw_number_t arch_msi_irq_domain_get_hwirq(void *arg);
>> +extern void arch_msi_irq_domain_set_hwirq(void *arg, irq_hw_number_t hwirq);
> 
> Look at the rest of the file and notice that the existing code does not use
> "extern" on function declarations.
Sure.

> 
>> +#endif	/* CONFIG_PCI_MSI_IRQ_DOMAIN */
> 
> Use a space here (not a tab), like the #endif just below.
Sure.

Thanks for your review and great comments!
Regards!
Gerry
> 
>>  #endif /* LINUX_MSI_H */
>> -- 
>> 1.7.10.4
>>
Jiang Liu Nov. 6, 2014, 5:06 a.m. UTC | #6
On 2014/11/6 9:58, Yijing Wang wrote:
>>>  
>>> @@ -1098,3 +1099,128 @@ int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
>>>  	return nvec;
>>>  }
>>>  EXPORT_SYMBOL(pci_enable_msix_range);
>>> +
>>> +#ifdef	CONFIG_PCI_MSI_IRQ_DOMAIN
>>
>> Space, not tab.
>>
>>> +static inline irq_hw_number_t
>>> +msi_get_hwirq(struct pci_dev *pdev, struct msi_desc *msidesc)
>>
>> The convention in this file is "struct pci_dev *dev".  And "struct msi_desc
>> *desc" (or maybe "*entry").  Try to converge things, not diverge them.
>>
>>> +{
>>> +	return (irq_hw_number_t)msidesc->msi_attrib.entry_nr |
>>> +		PCI_DEVID(pdev->bus->number, pdev->devfn) << 11 |
>>> +		(pci_domain_nr(pdev->bus) & 0xFFFFFFFF) << 27;
>>
>> Where does this bit layout come from?  Is this defined in the spec
>> somewhere?  A reference would help.
> 
> Currently, more and more Non-PCI device use MSI(or similar MSI mechanism), like DMAR fault irq
> and HPET FSB irq. And we have to add additional code to support the MSI capability.
> So I hope we can decouple MSI code and PCI code, then we can unify all MSI(or Message Based interrupt)
> in one framework.
Hi Yijing,
	I have a following patch to share more code among MSI/DMAR/HPET,
which is one step forward as you suggested. Will send out that patch set
soon.
Regards!
Gerry

> 
> Thanks!
> Yijing.
> 
>>
>>> +}
>>> +
>>> +static int msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
>>> +			    unsigned int nr_irqs, void *arg)
>>> +{
>>> +	int i, ret;
>>> +	irq_hw_number_t hwirq = arch_msi_irq_domain_get_hwirq(arg);
>>> +
>>> +	if (irq_find_mapping(domain, hwirq) > 0)
>>> +		return -EEXIST;
>>> +
>>> +	ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
>>> +	if (ret >= 0)
>>
>> 	if (ret < 0)
>> 		return ret;
>>
>> and un-indent the mainline code below.  Then it's obvious that this is the
>> normal case, not the error case.
>>
>>> +		for (i = 0; i < nr_irqs; i++) {
>>> +			irq_domain_set_hwirq_and_chip(domain, virq + i,
>>> +					hwirq + i, &msi_chip, (void *)(long)i);
>>> +			__irq_set_handler(virq + i, handle_edge_irq, 0, "edge");
>>> +		}
>>> +
>>> +	return ret;
>>> +}
>>> +
>>> +static void msi_domain_free(struct irq_domain *domain, unsigned int virq,
>>> +			    unsigned int nr_irqs)
>>> +{
>>> +	int i;
>>> +
>>> +	for (i = 0; i < nr_irqs; i++) {
>>> +		struct msi_desc *msidesc = irq_get_msi_desc(virq);
>>> +
>>> +		if (msidesc)
>>> +			msidesc->irq = 0;
>>> +	}
>>> +	irq_domain_free_irqs_top(domain, virq, nr_irqs);
>>> +}
>>> +
>>> +static int msi_domain_activate(struct irq_domain *domain,
>>> +			       struct irq_data *irq_data)
>>> +{
>>> +	int ret = 0;
>>> +	struct msi_msg msg;
>>> +
>>> +	/*
>>> +	 * irq_data->chip_data is MSI/MSIx offset.
>>
>> "MSI-X", as you wrote on the next line.
>>
>>> +	 * MSI-X message is written per-IRQ, the offset is always 0.
>>> +	 * MSI message denotes a contiguous group of IRQs, written for 0th IRQ.
>>> +	 */
>>> +	if (!irq_data->chip_data) {
>>
>> 	if (irq_data->chip_data)
>> 		return 0;
>>
>> and un-indent the mainline code below, and drop the "ret = 0" init above.
>>
>>> +		ret = irq_chip_compose_msi_msg(irq_data, &msg);
>>> +		if (ret == 0)
>>
>> 	if (ret)
>> 		return ret;
>>
>>> +			write_msi_msg(irq_data->irq, &msg);
>>> +	}
>>> +
>>> +	return ret;
>> 	return 0;
>>> +}
>>> +
>>> +static int msi_domain_deactivate(struct irq_domain *domain,
>>> +				 struct irq_data *irq_data)
>>> +{
>>> +	struct msi_msg msg;
>>> +
>>> +	if (irq_data->chip_data) {
>>> +		memset(&msg, 0, sizeof(msg));
>>> +		write_msi_msg(irq_data->irq, &msg);
>>> +	}
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static struct irq_domain_ops msi_domain_ops = {
>>> +	.alloc = msi_domain_alloc,
>>> +	.free = msi_domain_free,
>>> +	.activate = msi_domain_activate,
>>> +	.deactivate = msi_domain_deactivate,
>>> +};
>>> +
>>> +struct irq_domain *msi_create_irq_domain(struct irq_domain *parent)
>>> +{
>>> +	struct irq_domain *domain;
>>> +
>>> +	domain = irq_domain_add_tree(NULL, &msi_domain_ops, NULL);
>>> +	if (domain)
>>
>> 	if (!domain)
>> 		return NULL;
>>
>> and un-indent this:
>>
>>> +		domain->parent = parent;
>>> +
>>> +	return domain;
>>> +}
>>> +
>>> +int msi_irq_domain_alloc_irqs(struct irq_domain *domain, int type,
>>> +			      struct pci_dev *dev, void *arg)
>>> +{
>>> +	int i, virq;
>>> +	struct msi_desc *msidesc;
>>> +	int node = dev_to_node(&dev->dev);
>>> +
>>> +	list_for_each_entry(msidesc, &dev->msi_list, list) {
>>> +		arch_msi_irq_domain_set_hwirq(arg, msi_get_hwirq(dev, msidesc));
>>> +		virq = irq_domain_alloc_irqs(domain, msidesc->nvec_used,
>>> +					     node, arg);
>>> +		if (virq < 0) {
>>> +			/* Special handling for pci_enable_msi_range(). */
>>> +			return (type == PCI_CAP_ID_MSI &&
>>> +				msidesc->nvec_used > 1) ?  1 : -ENOSPC;	
>>
>> I think "if" would be easier to read than this ternary expression.
>>
>>> +		}
>>> +		for (i = 0; i < msidesc->nvec_used; i++)
>>> +			irq_set_msi_desc_off(virq + i, i, msidesc);
>>> +	}
>>> +
>>> +	list_for_each_entry(msidesc, &dev->msi_list, list)
>>> +		if (msidesc->nvec_used == 1)
>>> +			dev_dbg(&dev->dev, "irq %d for MSI/MSI-X\n", virq);
>>> +		else
>>> +			dev_dbg(&dev->dev, "irq [%d-%d] for MSI/MSI-X\n",
>>> +				virq, virq + msidesc->nvec_used - 1);
>>> +
>>> +	return 0;
>>> +}
>>> +#endif	/* CONFIG_PCI_MSI_IRQ_DOMAIN */
>>> diff --git a/include/linux/msi.h b/include/linux/msi.h
>>> index 44f4746d033b..05dcd425f82b 100644
>>> --- a/include/linux/msi.h
>>> +++ b/include/linux/msi.h
>>> @@ -75,4 +75,15 @@ struct msi_chip {
>>>  	void (*teardown_irq)(struct msi_chip *chip, unsigned int irq);
>>>  };
>>>  
>>> +#ifdef	CONFIG_PCI_MSI_IRQ_DOMAIN
>>
>> Use a space here, not a tab.
>>
>>> +extern struct irq_chip msi_chip;
>>
>> I don't think "msi_chip" is a good name.  "Chip" only hints that it's a
>> semiconductor integrated circuit; it doesn't say anything about what it
>> does.  I've suggested "msi_controller" elsewhere.
>>
>> Why does this need to be exported?  And why should there be only one in a
>> system?
>>
>>> +extern struct irq_domain *msi_create_irq_domain(struct irq_domain *parent);
>>> +extern int msi_irq_domain_alloc_irqs(struct irq_domain *domain, int type,
>>> +				     struct pci_dev *dev, void *arg);
>>> +
>>> +extern irq_hw_number_t arch_msi_irq_domain_get_hwirq(void *arg);
>>> +extern void arch_msi_irq_domain_set_hwirq(void *arg, irq_hw_number_t hwirq);
>>
>> Look at the rest of the file and notice that the existing code does not use
>> "extern" on function declarations.
>>
>>> +#endif	/* CONFIG_PCI_MSI_IRQ_DOMAIN */
>>
>> Use a space here (not a tab), like the #endif just below.
>>
>>>  #endif /* LINUX_MSI_H */
>>> -- 
>>> 1.7.10.4
>>>
>>
>> .
>>
> 
>
Bjorn Helgaas Nov. 6, 2014, 5:28 a.m. UTC | #7
On Wed, Nov 5, 2014 at 9:58 PM, Jiang Liu <jiang.liu@linux.intel.com> wrote:
> On 2014/11/6 7:09, Bjorn Helgaas wrote:
>> On Tue, Nov 04, 2014 at 08:01:55PM +0800, Jiang Liu wrote:

>>> +{
>>> +    return (irq_hw_number_t)msidesc->msi_attrib.entry_nr |
>>> +            PCI_DEVID(pdev->bus->number, pdev->devfn) << 11 |
>>> +            (pci_domain_nr(pdev->bus) & 0xFFFFFFFF) << 27;
>>
>> Where does this bit layout come from?  Is this defined in the spec
>> somewhere?  A reference would help.
> We need a unique number to identify every possible MSI source,
> and this ID number is only used within the irqdomain subsystem.
> So we used above algorithm to generate the ID number, there's
> no specification for it.

A comment to that effect would be great.

Bjorn
Yijing Wang Nov. 6, 2014, 5:42 a.m. UTC | #8
On 2014/11/6 13:06, Jiang Liu wrote:
> On 2014/11/6 9:58, Yijing Wang wrote:
>>>>  
>>>> @@ -1098,3 +1099,128 @@ int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
>>>>  	return nvec;
>>>>  }
>>>>  EXPORT_SYMBOL(pci_enable_msix_range);
>>>> +
>>>> +#ifdef	CONFIG_PCI_MSI_IRQ_DOMAIN
>>>
>>> Space, not tab.
>>>
>>>> +static inline irq_hw_number_t
>>>> +msi_get_hwirq(struct pci_dev *pdev, struct msi_desc *msidesc)
>>>
>>> The convention in this file is "struct pci_dev *dev".  And "struct msi_desc
>>> *desc" (or maybe "*entry").  Try to converge things, not diverge them.
>>>
>>>> +{
>>>> +	return (irq_hw_number_t)msidesc->msi_attrib.entry_nr |
>>>> +		PCI_DEVID(pdev->bus->number, pdev->devfn) << 11 |
>>>> +		(pci_domain_nr(pdev->bus) & 0xFFFFFFFF) << 27;
>>>
>>> Where does this bit layout come from?  Is this defined in the spec
>>> somewhere?  A reference would help.
>>
>> Currently, more and more Non-PCI device use MSI(or similar MSI mechanism), like DMAR fault irq
>> and HPET FSB irq. And we have to add additional code to support the MSI capability.
>> So I hope we can decouple MSI code and PCI code, then we can unify all MSI(or Message Based interrupt)
>> in one framework.
> Hi Yijing,
> 	I have a following patch to share more code among MSI/DMAR/HPET,
> which is one step forward as you suggested. Will send out that patch set
> soon.

That's Great! :)

> Regards!
> Gerry
> 
>>
>> Thanks!
>> Yijing.
>>
>>>
>>>> +}
>>>> +
>>>> +static int msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
>>>> +			    unsigned int nr_irqs, void *arg)
>>>> +{
>>>> +	int i, ret;
>>>> +	irq_hw_number_t hwirq = arch_msi_irq_domain_get_hwirq(arg);
>>>> +
>>>> +	if (irq_find_mapping(domain, hwirq) > 0)
>>>> +		return -EEXIST;
>>>> +
>>>> +	ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
>>>> +	if (ret >= 0)
>>>
>>> 	if (ret < 0)
>>> 		return ret;
>>>
>>> and un-indent the mainline code below.  Then it's obvious that this is the
>>> normal case, not the error case.
>>>
>>>> +		for (i = 0; i < nr_irqs; i++) {
>>>> +			irq_domain_set_hwirq_and_chip(domain, virq + i,
>>>> +					hwirq + i, &msi_chip, (void *)(long)i);
>>>> +			__irq_set_handler(virq + i, handle_edge_irq, 0, "edge");
>>>> +		}
>>>> +
>>>> +	return ret;
>>>> +}
>>>> +
>>>> +static void msi_domain_free(struct irq_domain *domain, unsigned int virq,
>>>> +			    unsigned int nr_irqs)
>>>> +{
>>>> +	int i;
>>>> +
>>>> +	for (i = 0; i < nr_irqs; i++) {
>>>> +		struct msi_desc *msidesc = irq_get_msi_desc(virq);
>>>> +
>>>> +		if (msidesc)
>>>> +			msidesc->irq = 0;
>>>> +	}
>>>> +	irq_domain_free_irqs_top(domain, virq, nr_irqs);
>>>> +}
>>>> +
>>>> +static int msi_domain_activate(struct irq_domain *domain,
>>>> +			       struct irq_data *irq_data)
>>>> +{
>>>> +	int ret = 0;
>>>> +	struct msi_msg msg;
>>>> +
>>>> +	/*
>>>> +	 * irq_data->chip_data is MSI/MSIx offset.
>>>
>>> "MSI-X", as you wrote on the next line.
>>>
>>>> +	 * MSI-X message is written per-IRQ, the offset is always 0.
>>>> +	 * MSI message denotes a contiguous group of IRQs, written for 0th IRQ.
>>>> +	 */
>>>> +	if (!irq_data->chip_data) {
>>>
>>> 	if (irq_data->chip_data)
>>> 		return 0;
>>>
>>> and un-indent the mainline code below, and drop the "ret = 0" init above.
>>>
>>>> +		ret = irq_chip_compose_msi_msg(irq_data, &msg);
>>>> +		if (ret == 0)
>>>
>>> 	if (ret)
>>> 		return ret;
>>>
>>>> +			write_msi_msg(irq_data->irq, &msg);
>>>> +	}
>>>> +
>>>> +	return ret;
>>> 	return 0;
>>>> +}
>>>> +
>>>> +static int msi_domain_deactivate(struct irq_domain *domain,
>>>> +				 struct irq_data *irq_data)
>>>> +{
>>>> +	struct msi_msg msg;
>>>> +
>>>> +	if (irq_data->chip_data) {
>>>> +		memset(&msg, 0, sizeof(msg));
>>>> +		write_msi_msg(irq_data->irq, &msg);
>>>> +	}
>>>> +
>>>> +	return 0;
>>>> +}
>>>> +
>>>> +static struct irq_domain_ops msi_domain_ops = {
>>>> +	.alloc = msi_domain_alloc,
>>>> +	.free = msi_domain_free,
>>>> +	.activate = msi_domain_activate,
>>>> +	.deactivate = msi_domain_deactivate,
>>>> +};
>>>> +
>>>> +struct irq_domain *msi_create_irq_domain(struct irq_domain *parent)
>>>> +{
>>>> +	struct irq_domain *domain;
>>>> +
>>>> +	domain = irq_domain_add_tree(NULL, &msi_domain_ops, NULL);
>>>> +	if (domain)
>>>
>>> 	if (!domain)
>>> 		return NULL;
>>>
>>> and un-indent this:
>>>
>>>> +		domain->parent = parent;
>>>> +
>>>> +	return domain;
>>>> +}
>>>> +
>>>> +int msi_irq_domain_alloc_irqs(struct irq_domain *domain, int type,
>>>> +			      struct pci_dev *dev, void *arg)
>>>> +{
>>>> +	int i, virq;
>>>> +	struct msi_desc *msidesc;
>>>> +	int node = dev_to_node(&dev->dev);
>>>> +
>>>> +	list_for_each_entry(msidesc, &dev->msi_list, list) {
>>>> +		arch_msi_irq_domain_set_hwirq(arg, msi_get_hwirq(dev, msidesc));
>>>> +		virq = irq_domain_alloc_irqs(domain, msidesc->nvec_used,
>>>> +					     node, arg);
>>>> +		if (virq < 0) {
>>>> +			/* Special handling for pci_enable_msi_range(). */
>>>> +			return (type == PCI_CAP_ID_MSI &&
>>>> +				msidesc->nvec_used > 1) ?  1 : -ENOSPC;	
>>>
>>> I think "if" would be easier to read than this ternary expression.
>>>
>>>> +		}
>>>> +		for (i = 0; i < msidesc->nvec_used; i++)
>>>> +			irq_set_msi_desc_off(virq + i, i, msidesc);
>>>> +	}
>>>> +
>>>> +	list_for_each_entry(msidesc, &dev->msi_list, list)
>>>> +		if (msidesc->nvec_used == 1)
>>>> +			dev_dbg(&dev->dev, "irq %d for MSI/MSI-X\n", virq);
>>>> +		else
>>>> +			dev_dbg(&dev->dev, "irq [%d-%d] for MSI/MSI-X\n",
>>>> +				virq, virq + msidesc->nvec_used - 1);
>>>> +
>>>> +	return 0;
>>>> +}
>>>> +#endif	/* CONFIG_PCI_MSI_IRQ_DOMAIN */
>>>> diff --git a/include/linux/msi.h b/include/linux/msi.h
>>>> index 44f4746d033b..05dcd425f82b 100644
>>>> --- a/include/linux/msi.h
>>>> +++ b/include/linux/msi.h
>>>> @@ -75,4 +75,15 @@ struct msi_chip {
>>>>  	void (*teardown_irq)(struct msi_chip *chip, unsigned int irq);
>>>>  };
>>>>  
>>>> +#ifdef	CONFIG_PCI_MSI_IRQ_DOMAIN
>>>
>>> Use a space here, not a tab.
>>>
>>>> +extern struct irq_chip msi_chip;
>>>
>>> I don't think "msi_chip" is a good name.  "Chip" only hints that it's a
>>> semiconductor integrated circuit; it doesn't say anything about what it
>>> does.  I've suggested "msi_controller" elsewhere.
>>>
>>> Why does this need to be exported?  And why should there be only one in a
>>> system?
>>>
>>>> +extern struct irq_domain *msi_create_irq_domain(struct irq_domain *parent);
>>>> +extern int msi_irq_domain_alloc_irqs(struct irq_domain *domain, int type,
>>>> +				     struct pci_dev *dev, void *arg);
>>>> +
>>>> +extern irq_hw_number_t arch_msi_irq_domain_get_hwirq(void *arg);
>>>> +extern void arch_msi_irq_domain_set_hwirq(void *arg, irq_hw_number_t hwirq);
>>>
>>> Look at the rest of the file and notice that the existing code does not use
>>> "extern" on function declarations.
>>>
>>>> +#endif	/* CONFIG_PCI_MSI_IRQ_DOMAIN */
>>>
>>> Use a space here (not a tab), like the #endif just below.
>>>
>>>>  #endif /* LINUX_MSI_H */
>>>> -- 
>>>> 1.7.10.4
>>>>
>>>
>>> .
>>>
>>
>>
> 
> .
>
Thomas Gleixner Nov. 6, 2014, 10:01 a.m. UTC | #9
On Tue, 4 Nov 2014, Jiang Liu wrote:
> +#ifdef	CONFIG_PCI_MSI_IRQ_DOMAIN
> +static inline irq_hw_number_t
> +msi_get_hwirq(struct pci_dev *pdev, struct msi_desc *msidesc)
> +{
> +	return (irq_hw_number_t)msidesc->msi_attrib.entry_nr |
> +		PCI_DEVID(pdev->bus->number, pdev->devfn) << 11 |
> +		(pci_domain_nr(pdev->bus) & 0xFFFFFFFF) << 27;
> +}
> +
> +static int msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
> +			    unsigned int nr_irqs, void *arg)
> +{
> +	int i, ret;
> +	irq_hw_number_t hwirq = arch_msi_irq_domain_get_hwirq(arg);
> +
> +	if (irq_find_mapping(domain, hwirq) > 0)
> +		return -EEXIST;
> +
> +	ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
> +	if (ret >= 0)
> +		for (i = 0; i < nr_irqs; i++) {
> +			irq_domain_set_hwirq_and_chip(domain, virq + i,
> +					hwirq + i, &msi_chip, (void *)(long)i);

I think msi_chip being a global unique thing is problematic. It does
not allow multi platform kernels to select a chip at boot time and it
does not allow per domain chip implementations when you have multiple
msi domains. Aside of that msi_chip is a pretty bad name for a global.

The solution is rather simple and msi is wide spread enough to justify
that.

struct irqdomain_msi_data {
       struct irq_chip       *irq_chip;
};

We make that a struct so we can accomodate for other special things
which might be domain rather than architecture specific. One
obvious use case would be to hold the arch_msi_irq_domain_get/set_hwirq
callbacks.

struct irq_domain *msi_create_irq_domain(struct irq_domain *parent,
       		  			 struct irqdomain_msi_data *data)
{
        struct irq_domain *domain;

        domain = irq_domain_add_tree(NULL, &msi_domain_ops, NULL);
        if (domain) {
                domain->parent = parent;
		domain->msi_data = data;
	}
        return domain;
}

Now the above becomes:

    	struct irq_chip *msi_chip = domain->msi_data->irq_chip;

	irq_domain_set_hwirq_and_chip(domain, virq + i,
				      hwirq + i, msi_chip, (void *)(long)i);

> +int msi_irq_domain_alloc_irqs(struct irq_domain *domain, int type,
> +			      struct pci_dev *dev, void *arg)
> +{
> +	int i, virq;
> +	struct msi_desc *msidesc;
> +	int node = dev_to_node(&dev->dev);
> +
> +	list_for_each_entry(msidesc, &dev->msi_list, list) {
> +		arch_msi_irq_domain_set_hwirq(arg, msi_get_hwirq(dev, msidesc));

The arch_xxx callbacks want to be documented. It's not obvious what
they are supposed to do.

Thanks,

	tglx
Thomas Gleixner Nov. 6, 2014, 10:30 a.m. UTC | #10
On Thu, 6 Nov 2014, Thomas Gleixner wrote:
> On Tue, 4 Nov 2014, Jiang Liu wrote:
> > +#ifdef	CONFIG_PCI_MSI_IRQ_DOMAIN
> > +static inline irq_hw_number_t
> > +msi_get_hwirq(struct pci_dev *pdev, struct msi_desc *msidesc)
> > +{
> > +	return (irq_hw_number_t)msidesc->msi_attrib.entry_nr |
> > +		PCI_DEVID(pdev->bus->number, pdev->devfn) << 11 |
> > +		(pci_domain_nr(pdev->bus) & 0xFFFFFFFF) << 27;
> > +}
> > +
> > +static int msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
> > +			    unsigned int nr_irqs, void *arg)
> > +{
> > +	int i, ret;
> > +	irq_hw_number_t hwirq = arch_msi_irq_domain_get_hwirq(arg);
> > +
> > +	if (irq_find_mapping(domain, hwirq) > 0)
> > +		return -EEXIST;
> > +
> > +	ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
> > +	if (ret >= 0)
> > +		for (i = 0; i < nr_irqs; i++) {
> > +			irq_domain_set_hwirq_and_chip(domain, virq + i,
> > +					hwirq + i, &msi_chip, (void *)(long)i);
> 
> I think msi_chip being a global unique thing is problematic. It does
> not allow multi platform kernels to select a chip at boot time and it
> does not allow per domain chip implementations when you have multiple
> msi domains. Aside of that msi_chip is a pretty bad name for a global.
> 
> The solution is rather simple and msi is wide spread enough to justify
> that.
> 
> struct irqdomain_msi_data {
>        struct irq_chip       *irq_chip;
> };
> 
> We make that a struct so we can accomodate for other special things
> which might be domain rather than architecture specific. One
> obvious use case would be to hold the arch_msi_irq_domain_get/set_hwirq
> callbacks.

That needs to hand in the domain as an argument as well.

Thanks,

	tglx
Jiang Liu Nov. 6, 2014, 11:41 a.m. UTC | #11
On 2014/11/6 18:01, Thomas Gleixner wrote:
> On Tue, 4 Nov 2014, Jiang Liu wrote:
>> +#ifdef	CONFIG_PCI_MSI_IRQ_DOMAIN
>> +static inline irq_hw_number_t
>> +msi_get_hwirq(struct pci_dev *pdev, struct msi_desc *msidesc)
>> +{
>> +	return (irq_hw_number_t)msidesc->msi_attrib.entry_nr |
>> +		PCI_DEVID(pdev->bus->number, pdev->devfn) << 11 |
>> +		(pci_domain_nr(pdev->bus) & 0xFFFFFFFF) << 27;
>> +}
>> +
>> +static int msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
>> +			    unsigned int nr_irqs, void *arg)
>> +{
>> +	int i, ret;
>> +	irq_hw_number_t hwirq = arch_msi_irq_domain_get_hwirq(arg);
>> +
>> +	if (irq_find_mapping(domain, hwirq) > 0)
>> +		return -EEXIST;
>> +
>> +	ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
>> +	if (ret >= 0)
>> +		for (i = 0; i < nr_irqs; i++) {
>> +			irq_domain_set_hwirq_and_chip(domain, virq + i,
>> +					hwirq + i, &msi_chip, (void *)(long)i);
> 
> I think msi_chip being a global unique thing is problematic. It does
> not allow multi platform kernels to select a chip at boot time and it
> does not allow per domain chip implementations when you have multiple
> msi domains. Aside of that msi_chip is a pretty bad name for a global.
> 
> The solution is rather simple and msi is wide spread enough to justify
> that.
> 
> struct irqdomain_msi_data {
>        struct irq_chip       *irq_chip;
> };
> 
> We make that a struct so we can accomodate for other special things
> which might be domain rather than architecture specific. One
> obvious use case would be to hold the arch_msi_irq_domain_get/set_hwirq
> callbacks.
> 
> struct irq_domain *msi_create_irq_domain(struct irq_domain *parent,
>        		  			 struct irqdomain_msi_data *data)
> {
>         struct irq_domain *domain;
> 
>         domain = irq_domain_add_tree(NULL, &msi_domain_ops, NULL);
>         if (domain) {
>                 domain->parent = parent;
> 		domain->msi_data = data;
> 	}
>         return domain;
> }
> 
> Now the above becomes:
> 
>     	struct irq_chip *msi_chip = domain->msi_data->irq_chip;
> 
> 	irq_domain_set_hwirq_and_chip(domain, virq + i,
> 				      hwirq + i, msi_chip, (void *)(long)i);
Hi Thomas,
	Actually I'm working on a patch set to improve MSI support in
the way you described above this afternoon. And I'm also trying to
split MSI code into PCI dependent part and PCI independent part.
I plan to add a file kernel/irq/msi.c to host PCI independent part,
is that OK? Or should I put it under something like drivers/msi/?
The PCI indepenent part will be used to support DMAR/HPET/HTIRQ and
some ARM/ARM64 interrupts.
Regards!
Gerry
> 
>> +int msi_irq_domain_alloc_irqs(struct irq_domain *domain, int type,
>> +			      struct pci_dev *dev, void *arg)
>> +{
>> +	int i, virq;
>> +	struct msi_desc *msidesc;
>> +	int node = dev_to_node(&dev->dev);
>> +
>> +	list_for_each_entry(msidesc, &dev->msi_list, list) {
>> +		arch_msi_irq_domain_set_hwirq(arg, msi_get_hwirq(dev, msidesc));
> 
> The arch_xxx callbacks want to be documented. It's not obvious what
> they are supposed to do.
> 
> Thanks,
> 
> 	tglx
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
>
Thomas Gleixner Nov. 6, 2014, 11:59 a.m. UTC | #12
On Thu, 6 Nov 2014, Jiang Liu wrote:
> On 2014/11/6 18:01, Thomas Gleixner wrote:
> Hi Thomas,
> 	Actually I'm working on a patch set to improve MSI support in
> the way you described above this afternoon. And I'm also trying to
> split MSI code into PCI dependent part and PCI independent part.
> I plan to add a file kernel/irq/msi.c to host PCI independent part,

kernel/irq/msi.c is fine.

Thanks,

	tglx
diff mbox

Patch

diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index b9db0f2ce11f..022e89745f86 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -16,6 +16,10 @@  config PCI_MSI
 
 	   If you don't know what to do here, say Y.
 
+config PCI_MSI_IRQ_DOMAIN
+	bool
+	depends on PCI_MSI && IRQ_DOMAIN_HIERARCHY
+
 config PCI_DEBUG
 	bool "PCI Debugging"
 	depends on PCI && DEBUG_KERNEL
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index da181c59394b..7423ee16972f 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -19,6 +19,7 @@ 
 #include <linux/errno.h>
 #include <linux/io.h>
 #include <linux/slab.h>
+#include <linux/irqdomain.h>
 
 #include "pci.h"
 
@@ -1098,3 +1099,128 @@  int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
 	return nvec;
 }
 EXPORT_SYMBOL(pci_enable_msix_range);
+
+#ifdef	CONFIG_PCI_MSI_IRQ_DOMAIN
+static inline irq_hw_number_t
+msi_get_hwirq(struct pci_dev *pdev, struct msi_desc *msidesc)
+{
+	return (irq_hw_number_t)msidesc->msi_attrib.entry_nr |
+		PCI_DEVID(pdev->bus->number, pdev->devfn) << 11 |
+		(pci_domain_nr(pdev->bus) & 0xFFFFFFFF) << 27;
+}
+
+static int msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
+			    unsigned int nr_irqs, void *arg)
+{
+	int i, ret;
+	irq_hw_number_t hwirq = arch_msi_irq_domain_get_hwirq(arg);
+
+	if (irq_find_mapping(domain, hwirq) > 0)
+		return -EEXIST;
+
+	ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
+	if (ret >= 0)
+		for (i = 0; i < nr_irqs; i++) {
+			irq_domain_set_hwirq_and_chip(domain, virq + i,
+					hwirq + i, &msi_chip, (void *)(long)i);
+			__irq_set_handler(virq + i, handle_edge_irq, 0, "edge");
+		}
+
+	return ret;
+}
+
+static void msi_domain_free(struct irq_domain *domain, unsigned int virq,
+			    unsigned int nr_irqs)
+{
+	int i;
+
+	for (i = 0; i < nr_irqs; i++) {
+		struct msi_desc *msidesc = irq_get_msi_desc(virq);
+
+		if (msidesc)
+			msidesc->irq = 0;
+	}
+	irq_domain_free_irqs_top(domain, virq, nr_irqs);
+}
+
+static int msi_domain_activate(struct irq_domain *domain,
+			       struct irq_data *irq_data)
+{
+	int ret = 0;
+	struct msi_msg msg;
+
+	/*
+	 * irq_data->chip_data is MSI/MSIx offset.
+	 * MSI-X message is written per-IRQ, the offset is always 0.
+	 * MSI message denotes a contiguous group of IRQs, written for 0th IRQ.
+	 */
+	if (!irq_data->chip_data) {
+		ret = irq_chip_compose_msi_msg(irq_data, &msg);
+		if (ret == 0)
+			write_msi_msg(irq_data->irq, &msg);
+	}
+
+	return ret;
+}
+
+static int msi_domain_deactivate(struct irq_domain *domain,
+				 struct irq_data *irq_data)
+{
+	struct msi_msg msg;
+
+	if (irq_data->chip_data) {
+		memset(&msg, 0, sizeof(msg));
+		write_msi_msg(irq_data->irq, &msg);
+	}
+
+	return 0;
+}
+
+static struct irq_domain_ops msi_domain_ops = {
+	.alloc = msi_domain_alloc,
+	.free = msi_domain_free,
+	.activate = msi_domain_activate,
+	.deactivate = msi_domain_deactivate,
+};
+
+struct irq_domain *msi_create_irq_domain(struct irq_domain *parent)
+{
+	struct irq_domain *domain;
+
+	domain = irq_domain_add_tree(NULL, &msi_domain_ops, NULL);
+	if (domain)
+		domain->parent = parent;
+
+	return domain;
+}
+
+int msi_irq_domain_alloc_irqs(struct irq_domain *domain, int type,
+			      struct pci_dev *dev, void *arg)
+{
+	int i, virq;
+	struct msi_desc *msidesc;
+	int node = dev_to_node(&dev->dev);
+
+	list_for_each_entry(msidesc, &dev->msi_list, list) {
+		arch_msi_irq_domain_set_hwirq(arg, msi_get_hwirq(dev, msidesc));
+		virq = irq_domain_alloc_irqs(domain, msidesc->nvec_used,
+					     node, arg);
+		if (virq < 0) {
+			/* Special handling for pci_enable_msi_range(). */
+			return (type == PCI_CAP_ID_MSI &&
+				msidesc->nvec_used > 1) ?  1 : -ENOSPC;
+		}
+		for (i = 0; i < msidesc->nvec_used; i++)
+			irq_set_msi_desc_off(virq + i, i, msidesc);
+	}
+
+	list_for_each_entry(msidesc, &dev->msi_list, list)
+		if (msidesc->nvec_used == 1)
+			dev_dbg(&dev->dev, "irq %d for MSI/MSI-X\n", virq);
+		else
+			dev_dbg(&dev->dev, "irq [%d-%d] for MSI/MSI-X\n",
+				virq, virq + msidesc->nvec_used - 1);
+
+	return 0;
+}
+#endif	/* CONFIG_PCI_MSI_IRQ_DOMAIN */
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 44f4746d033b..05dcd425f82b 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -75,4 +75,15 @@  struct msi_chip {
 	void (*teardown_irq)(struct msi_chip *chip, unsigned int irq);
 };
 
+#ifdef	CONFIG_PCI_MSI_IRQ_DOMAIN
+extern struct irq_chip msi_chip;
+
+extern struct irq_domain *msi_create_irq_domain(struct irq_domain *parent);
+extern int msi_irq_domain_alloc_irqs(struct irq_domain *domain, int type,
+				     struct pci_dev *dev, void *arg);
+
+extern irq_hw_number_t arch_msi_irq_domain_get_hwirq(void *arg);
+extern void arch_msi_irq_domain_set_hwirq(void *arg, irq_hw_number_t hwirq);
+#endif	/* CONFIG_PCI_MSI_IRQ_DOMAIN */
+
 #endif /* LINUX_MSI_H */