diff mbox

[6/8] pci: provide sensible irq vector alloc/free routines

Message ID 1460770552-31260-7-git-send-email-hch@lst.de (mailing list archive)
State New, archived
Headers show

Commit Message

Christoph Hellwig April 16, 2016, 1:35 a.m. UTC
Hide all the MSI-X vs MSI vs legacy bullshit, and provide an array of
interrupt vectors in the pci_dev structure, and ensure we get proper
interrupt affinity by default.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/pci/irq.c   | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 drivers/pci/msi.c   |  2 +-
 drivers/pci/pci.h   |  5 +++
 include/linux/pci.h |  5 +++
 4 files changed, 99 insertions(+), 2 deletions(-)

Comments

Bjorn Helgaas April 29, 2016, 9:16 p.m. UTC | #1
[+cc Alexander]

Sorry to be a pedant, but can you please edit the subject to be:

  PCI: Provide sensible IRQ vector alloc/free routines

so it matches the drivers/pci convention?

I like this idea a lot.  The MSI-X/MSI interfaces are much better than
they used to be, and I think this would be another significant
improvement.  What do you think, Alexander?  Here's the whole series
in case you don't have it handy:
http://lkml.kernel.org/r/1460770552-31260-1-git-send-email-hch@lst.de

On Fri, Apr 15, 2016 at 06:35:50PM -0700, Christoph Hellwig wrote:
> Hide all the MSI-X vs MSI vs legacy bullshit, and provide an array of
> interrupt vectors in the pci_dev structure, and ensure we get proper
> interrupt affinity by default.

This patch doesn't do anything for affinity by itself.

> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  drivers/pci/irq.c   | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  drivers/pci/msi.c   |  2 +-
>  drivers/pci/pci.h   |  5 +++
>  include/linux/pci.h |  5 +++
>  4 files changed, 99 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/pci/irq.c b/drivers/pci/irq.c
> index 6684f15..b683465 100644
> --- a/drivers/pci/irq.c
> +++ b/drivers/pci/irq.c
> @@ -1,7 +1,8 @@
>  /*
> - * PCI IRQ failure handing code
> + * PCI IRQ handing code

s/handing/handling/ :)

>   *
>   * Copyright (c) 2008 James Bottomley <James.Bottomley@HansenPartnership.com>
> + * Copyright (c) 2016 Christoph Hellwig.
>   */
>  
>  #include <linux/acpi.h>
> @@ -9,6 +10,92 @@
>  #include <linux/kernel.h>
>  #include <linux/export.h>
>  #include <linux/pci.h>
> +#include <linux/interrupt.h>
> +#include "pci.h"
> +
> +static int pci_nr_irq_vectors(struct pci_dev *pdev)
> +{
> +	int nr_entries;
> +
> +	nr_entries = pci_msix_vec_count(pdev);
> +	if (nr_entries <= 0 && pci_msi_supported(pdev, 1))
> +		nr_entries = pci_msi_vec_count(pdev);
> +	if (nr_entries <= 0)
> +		nr_entries = 1;
> +	return nr_entries;
> +}
> +
> +static int pci_enable_msix_range_wrapper(struct pci_dev *pdev, u32 *irqs,
> +		int nr_vecs)
> +{
> +	struct msix_entry *msix_entries;
> +	int vecs, i;
> +
> +	msix_entries = kcalloc(nr_vecs, sizeof(struct msix_entry), GFP_KERNEL);
> +	if (!msix_entries)
> +		return -ENOMEM;
> +
> +	for (i = 0; i < nr_vecs; i++)
> +		msix_entries[i].entry = i;
> +
> +	vecs = pci_enable_msix_range(pdev, msix_entries, 1, nr_vecs);
> +	if (vecs > 0) {
> +		for (i = 0; i < vecs; i++)
> +			irqs[i] = msix_entries[i].vector;
> +	}
> +
> +	kfree(msix_entries);
> +	return vecs;
> +}
> +
> +int pci_alloc_irq_vectors(struct pci_dev *pdev, int nr_vecs)
> +{
> +	int vecs, ret, i;
> +	u32 *irqs;
> +
> +	nr_vecs = min(nr_vecs, pci_nr_irq_vectors(pdev));
> +
> +	irqs = kcalloc(nr_vecs, sizeof(u32), GFP_KERNEL);
> +	if (!irqs)
> +		return -ENOMEM;
> +
> +	vecs = pci_enable_msix_range_wrapper(pdev, irqs, nr_vecs);
> +	if (vecs <= 0) {
> +		vecs = pci_enable_msi_range(pdev, 1, min(nr_vecs, 32));

I don't see one, but seems like we should have a #define for this
"32".  I guess pci_enable_msi_range() already protects itself, so this
min() is probably not strictly necessary anyway.

> +		if (vecs <= 0) {
> +			ret = -EIO;
> +			if (!pdev->irq)
> +				goto out_free_irqs;
> +
> +			/* use legacy irq */
> +			vecs = 1;
> +		}
> +
> +		for (i = 0; i < vecs; i++)
> +			irqs[i] = pdev->irq + i;
> +	}
> +
> +	pdev->irqs = irqs;
> +	return vecs;
> +
> +out_free_irqs:
> +	kfree(irqs);
> +	return ret;

  return -EIO;

and remove "ret".

> +}
> +EXPORT_SYMBOL(pci_alloc_irq_vectors);
> +
> +void pci_free_irq_vectors(struct pci_dev *pdev)
> +{
> +	if (pdev->msi_enabled)
> +		pci_disable_msi(pdev);
> +	else if (pdev->msix_enabled)
> +		pci_disable_msix(pdev);
> +
> +	kfree(pdev->dev.irq_affinity);
> +	pdev->dev.irq_affinity = NULL;

These two lines belong in a different patch.

> +	kfree(pdev->irqs);
> +}
> +EXPORT_SYMBOL(pci_free_irq_vectors);
>  
>  static void pci_note_irq_problem(struct pci_dev *pdev, const char *reason)
>  {
> diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
> index a080f44..544d306 100644
> --- a/drivers/pci/msi.c
> +++ b/drivers/pci/msi.c
> @@ -815,7 +815,7 @@ out_free:
>   * to determine if MSI/-X are supported for the device. If MSI/-X is
>   * supported return 1, else return 0.
>   **/
> -static int pci_msi_supported(struct pci_dev *dev, int nvec)
> +int pci_msi_supported(struct pci_dev *dev, int nvec)
>  {
>  	struct pci_bus *bus;
>  
> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> index d0fb934..263422c 100644
> --- a/drivers/pci/pci.h
> +++ b/drivers/pci/pci.h
> @@ -144,8 +144,13 @@ extern unsigned int pci_pm_d3_delay;
>  
>  #ifdef CONFIG_PCI_MSI
>  void pci_no_msi(void);
> +int pci_msi_supported(struct pci_dev *dev, int nvec);
>  #else
>  static inline void pci_no_msi(void) { }
> +static int pci_msi_supported(struct pci_dev *dev, int nvec)
> +{
> +	return 0;
> +}
>  #endif
>  
>  static inline void pci_msi_set_enable(struct pci_dev *dev, int enable)
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 004b813..4fbc14f 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -322,6 +322,7 @@ struct pci_dev {
>  	 * directly, use the values stored here. They might be different!
>  	 */
>  	unsigned int	irq;
> +	unsigned int	*irqs;
>  	struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */
>  
>  	bool match_driver;		/* Skip attaching driver */
> @@ -1235,6 +1236,9 @@ resource_size_t pcibios_iov_resource_alignment(struct pci_dev *dev, int resno);
>  int pci_set_vga_state(struct pci_dev *pdev, bool decode,
>  		      unsigned int command_bits, u32 flags);
>  
> +int pci_alloc_irq_vectors(struct pci_dev *dev, int nr_vecs);
> +void pci_free_irq_vectors(struct pci_dev *pdev);
> +
>  /* kmem_cache style wrapper around pci_alloc_consistent() */
>  
>  #include <linux/pci-dma.h>
> @@ -1282,6 +1286,7 @@ static inline int pci_enable_msix_exact(struct pci_dev *dev,
>  		return rc;
>  	return 0;
>  }
> +
>  #else
>  static inline int pci_msi_vec_count(struct pci_dev *dev) { return -ENOSYS; }
>  static inline void pci_msi_shutdown(struct pci_dev *dev) { }
> -- 
> 2.1.4
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-block" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christoph Hellwig May 1, 2016, 6:01 p.m. UTC | #2
On Fri, Apr 29, 2016 at 04:16:39PM -0500, Bjorn Helgaas wrote:
> Sorry to be a pedant, but can you please edit the subject to be:
> 
>   PCI: Provide sensible IRQ vector alloc/free routines

sure.

> 
> so it matches the drivers/pci convention?
> 
> I like this idea a lot.  The MSI-X/MSI interfaces are much better than
> they used to be, and I think this would be another significant
> improvement.  What do you think, Alexander?  Here's the whole series
> in case you don't have it handy:
> http://lkml.kernel.org/r/1460770552-31260-1-git-send-email-hch@lst.de

FYI, I spent some time trying to convert more drivers to this, and
I think we'll need an additional flag to skip MSI or MSI-X as there
is plenty of hardware claiming support in the capabilities flag,
but not actually supporting one of them.

> > Hide all the MSI-X vs MSI vs legacy bullshit, and provide an array of
> > interrupt vectors in the pci_dev structure, and ensure we get proper
> > interrupt affinity by default.
> 
> This patch doesn't do anything for affinity by itself.

it used to in an earlier incarnation before I split that out.  But yes,
the changelog should be updated.

> > +	vecs = pci_enable_msix_range_wrapper(pdev, irqs, nr_vecs);
> > +	if (vecs <= 0) {
> > +		vecs = pci_enable_msi_range(pdev, 1, min(nr_vecs, 32));
> 
> I don't see one, but seems like we should have a #define for this
> "32".  I guess pci_enable_msi_range() already protects itself, so this
> min() is probably not strictly necessary anyway.

Ok, I'll take a look an will either remove it entirely or add an
define depending on the audit.
--
To unsubscribe from this list: send the line "unsubscribe linux-block" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bjorn Helgaas May 2, 2016, 1:11 p.m. UTC | #3
On Sun, May 01, 2016 at 08:01:49PM +0200, Christoph Hellwig wrote:
> FYI, I spent some time trying to convert more drivers to this, and
> I think we'll need an additional flag to skip MSI or MSI-X as there
> is plenty of hardware claiming support in the capabilities flag,
> but not actually supporting one of them.

Or maybe add a "pdev->msix_broken" bit and quirks to set it?  Or if
pci_fixup_final quirks merely cleared pdev->msix_cap, I think the PCI
core would never try to enable MSI-X.
--
To unsubscribe from this list: send the line "unsubscribe linux-block" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christoph Hellwig May 2, 2016, 2:42 p.m. UTC | #4
On Mon, May 02, 2016 at 08:11:24AM -0500, Bjorn Helgaas wrote:
> On Sun, May 01, 2016 at 08:01:49PM +0200, Christoph Hellwig wrote:
> > FYI, I spent some time trying to convert more drivers to this, and
> > I think we'll need an additional flag to skip MSI or MSI-X as there
> > is plenty of hardware claiming support in the capabilities flag,
> > but not actually supporting one of them.
> 
> Or maybe add a "pdev->msix_broken" bit and quirks to set it?  Or if
> pci_fixup_final quirks merely cleared pdev->msix_cap, I think the PCI
> core would never try to enable MSI-X.

Can't say I'm excited about quirks - now we'd have to patch core
code for something that previously was entirely in the driver.
--
To unsubscribe from this list: send the line "unsubscribe linux-block" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bjorn Helgaas May 2, 2016, 3:29 p.m. UTC | #5
On Mon, May 02, 2016 at 04:42:03PM +0200, Christoph Hellwig wrote:
> On Mon, May 02, 2016 at 08:11:24AM -0500, Bjorn Helgaas wrote:
> > On Sun, May 01, 2016 at 08:01:49PM +0200, Christoph Hellwig wrote:
> > > FYI, I spent some time trying to convert more drivers to this, and
> > > I think we'll need an additional flag to skip MSI or MSI-X as there
> > > is plenty of hardware claiming support in the capabilities flag,
> > > but not actually supporting one of them.
> > 
> > Or maybe add a "pdev->msix_broken" bit and quirks to set it?  Or if
> > pci_fixup_final quirks merely cleared pdev->msix_cap, I think the PCI
> > core would never try to enable MSI-X.
> 
> Can't say I'm excited about quirks - now we'd have to patch core
> code for something that previously was entirely in the driver.

Yeah, you're right.  I was imagining a quirk in the driver itself, but
now that I look at it, I don't see any infrastructure for that.  I
think there are a lot of existing quirks that could be moved from the
core to a driver if we had support for quirks in drivers.

It just seems a shame to complicate the pci_alloc_irq_vectors()
interface with flags about broken devices.

I guess if we added a "pdev->msix_broken" bit, it would be visible to
drivers, and they could easily set it themselves in their .probe()
methods even without any actual quirk mechanism.  But a flag to
pci_alloc_irq_vectors() would certainly be more direct.
--
To unsubscribe from this list: send the line "unsubscribe linux-block" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christoph Hellwig May 3, 2016, 9:19 p.m. UTC | #6
Hi Bjorn,

I've implemented your suggestion and I'm getting ready to send out
a new version.  One thing that came to mind is:  do you prefer this
code in irq.c or would you rather have it in msi.c?  While it
also has a legacy irq fallback most of it tied pretty closely to
the msi.c code, so I wonder if we should group them together.
--
To unsubscribe from this list: send the line "unsubscribe linux-block" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bjorn Helgaas May 3, 2016, 9:37 p.m. UTC | #7
On Tue, May 03, 2016 at 11:19:46PM +0200, Christoph Hellwig wrote:
> Hi Bjorn,
> 
> I've implemented your suggestion and I'm getting ready to send out
> a new version.  One thing that came to mind is:  do you prefer this
> code in irq.c or would you rather have it in msi.c?  While it
> also has a legacy irq fallback most of it tied pretty closely to
> the msi.c code, so I wonder if we should group them together.

Good question.  There isn't much in irq.c, and the interesting bits
are the MSI-related things, so maybe msi.c would make more sense.

Bjorn
--
To unsubscribe from this list: send the line "unsubscribe linux-block" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/pci/irq.c b/drivers/pci/irq.c
index 6684f15..b683465 100644
--- a/drivers/pci/irq.c
+++ b/drivers/pci/irq.c
@@ -1,7 +1,8 @@ 
 /*
- * PCI IRQ failure handing code
+ * PCI IRQ handing code
  *
  * Copyright (c) 2008 James Bottomley <James.Bottomley@HansenPartnership.com>
+ * Copyright (c) 2016 Christoph Hellwig.
  */
 
 #include <linux/acpi.h>
@@ -9,6 +10,92 @@ 
 #include <linux/kernel.h>
 #include <linux/export.h>
 #include <linux/pci.h>
+#include <linux/interrupt.h>
+#include "pci.h"
+
+static int pci_nr_irq_vectors(struct pci_dev *pdev)
+{
+	int nr_entries;
+
+	nr_entries = pci_msix_vec_count(pdev);
+	if (nr_entries <= 0 && pci_msi_supported(pdev, 1))
+		nr_entries = pci_msi_vec_count(pdev);
+	if (nr_entries <= 0)
+		nr_entries = 1;
+	return nr_entries;
+}
+
+static int pci_enable_msix_range_wrapper(struct pci_dev *pdev, u32 *irqs,
+		int nr_vecs)
+{
+	struct msix_entry *msix_entries;
+	int vecs, i;
+
+	msix_entries = kcalloc(nr_vecs, sizeof(struct msix_entry), GFP_KERNEL);
+	if (!msix_entries)
+		return -ENOMEM;
+
+	for (i = 0; i < nr_vecs; i++)
+		msix_entries[i].entry = i;
+
+	vecs = pci_enable_msix_range(pdev, msix_entries, 1, nr_vecs);
+	if (vecs > 0) {
+		for (i = 0; i < vecs; i++)
+			irqs[i] = msix_entries[i].vector;
+	}
+
+	kfree(msix_entries);
+	return vecs;
+}
+
+int pci_alloc_irq_vectors(struct pci_dev *pdev, int nr_vecs)
+{
+	int vecs, ret, i;
+	u32 *irqs;
+
+	nr_vecs = min(nr_vecs, pci_nr_irq_vectors(pdev));
+
+	irqs = kcalloc(nr_vecs, sizeof(u32), GFP_KERNEL);
+	if (!irqs)
+		return -ENOMEM;
+
+	vecs = pci_enable_msix_range_wrapper(pdev, irqs, nr_vecs);
+	if (vecs <= 0) {
+		vecs = pci_enable_msi_range(pdev, 1, min(nr_vecs, 32));
+		if (vecs <= 0) {
+			ret = -EIO;
+			if (!pdev->irq)
+				goto out_free_irqs;
+
+			/* use legacy irq */
+			vecs = 1;
+		}
+
+		for (i = 0; i < vecs; i++)
+			irqs[i] = pdev->irq + i;
+	}
+
+	pdev->irqs = irqs;
+	return vecs;
+
+out_free_irqs:
+	kfree(irqs);
+	return ret;
+}
+EXPORT_SYMBOL(pci_alloc_irq_vectors);
+
+void pci_free_irq_vectors(struct pci_dev *pdev)
+{
+	if (pdev->msi_enabled)
+		pci_disable_msi(pdev);
+	else if (pdev->msix_enabled)
+		pci_disable_msix(pdev);
+
+	kfree(pdev->dev.irq_affinity);
+	pdev->dev.irq_affinity = NULL;
+	kfree(pdev->irqs);
+}
+EXPORT_SYMBOL(pci_free_irq_vectors);
 
 static void pci_note_irq_problem(struct pci_dev *pdev, const char *reason)
 {
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index a080f44..544d306 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -815,7 +815,7 @@  out_free:
  * to determine if MSI/-X are supported for the device. If MSI/-X is
  * supported return 1, else return 0.
  **/
-static int pci_msi_supported(struct pci_dev *dev, int nvec)
+int pci_msi_supported(struct pci_dev *dev, int nvec)
 {
 	struct pci_bus *bus;
 
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index d0fb934..263422c 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -144,8 +144,13 @@  extern unsigned int pci_pm_d3_delay;
 
 #ifdef CONFIG_PCI_MSI
 void pci_no_msi(void);
+int pci_msi_supported(struct pci_dev *dev, int nvec);
 #else
 static inline void pci_no_msi(void) { }
+static int pci_msi_supported(struct pci_dev *dev, int nvec)
+{
+	return 0;
+}
 #endif
 
 static inline void pci_msi_set_enable(struct pci_dev *dev, int enable)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 004b813..4fbc14f 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -322,6 +322,7 @@  struct pci_dev {
 	 * directly, use the values stored here. They might be different!
 	 */
 	unsigned int	irq;
+	unsigned int	*irqs;
 	struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */
 
 	bool match_driver;		/* Skip attaching driver */
@@ -1235,6 +1236,9 @@  resource_size_t pcibios_iov_resource_alignment(struct pci_dev *dev, int resno);
 int pci_set_vga_state(struct pci_dev *pdev, bool decode,
 		      unsigned int command_bits, u32 flags);
 
+int pci_alloc_irq_vectors(struct pci_dev *dev, int nr_vecs);
+void pci_free_irq_vectors(struct pci_dev *pdev);
+
 /* kmem_cache style wrapper around pci_alloc_consistent() */
 
 #include <linux/pci-dma.h>
@@ -1282,6 +1286,7 @@  static inline int pci_enable_msix_exact(struct pci_dev *dev,
 		return rc;
 	return 0;
 }
+
 #else
 static inline int pci_msi_vec_count(struct pci_dev *dev) { return -ENOSYS; }
 static inline void pci_msi_shutdown(struct pci_dev *dev) { }