diff mbox

[2/2] PCIe native PME support

Message ID 20090901023549.GB3695@sli10-desk.sh.intel.com (mailing list archive)
State RFC, archived
Headers show

Commit Message

Shaohua Li Sept. 1, 2009, 2:35 a.m. UTC
On Sun, Aug 30, 2009 at 04:48:00AM +0800, Rafael J. Wysocki wrote:
> Hi,
> 
> On Thursday 27 August 2009, Shaohua Li wrote:
> ...
> > > > I have different understanding. The root port can buffer several PME requests from
> > > > different devices. If the PME pending bit is set and after the status is clear,
> > > > the status bit will get set soon. We can do one request one time for each interrupt,
> > > > but seems not necessary. And the loop makes OS more quickly handle PME request without
> > > > overflow root port buffer.
> > >
> > > OK, but in that case it probably is a good idea to use the PME Pending bit as
> > > well.
> > Sounds not required. If the pending bit is set, the status bit will get re-set soon after
> > it's cleared.
> 
> That's exactly why I'd like to check it. :-)
> 
> The whole point of the loop in pcie_pme_work_handle() is to avoid being
> interrupted again after it's returned, but if the pending bit is set, we _know_
> we're going to get interrupted immediately, so the loop should continue in
> that case.  Otherwise there's no point in looping at all.
> 
> > If pending isn't set, the loop will exit in one circle since the status bit
> > is cleared after the circle. Even we missed one PME in the loop, we will get a new interrupt
> > soon after PME interrupt is reeabled. So we should be fine here in any case.
> >
> > Changes since last post:
> > 1. Add kerneldoc for functions/remove config option/add boot option
> > 2. Handle the pcie-to-pci bridge case more robust
> > 3. don't disable PME report interrupt even spurios interrupt occurs. I saw
> > root port receives an extra interrupt when device resumes and sounds harmless.
> >
> >
> >
> > PCIe defines a native PME detection mechanism. When a PCIe endpoint
> > invokes PME, PCIe root port has a set of regisets to detect the
> > endpoint's bus/device/function number and root port will send out
> > interrupt when PME is received. After getting interrupt, OS can identify
> > which device invokes PME according to such information. See PCIe
> > spec for detail. This patch implements this feature.
> >
> > Signed-off-by: Shaohua Li <shaohua.li@intel.com>
> > ---
> >  drivers/pci/pcie/Kconfig    |    4
> >  drivers/pci/pcie/Makefile   |    2
> >  drivers/pci/pcie/pcie_pme.c |  383 ++++++++++++++++++++++++++++++++++++++++++++
> >  3 files changed, 389 insertions(+)
> >
> > Index: linux/drivers/pci/pcie/Kconfig
> > ===================================================================
> > --- linux.orig/drivers/pci/pcie/Kconfig       2009-08-27 14:07:35.000000000 +0800
> > +++ linux/drivers/pci/pcie/Kconfig    2009-08-27 14:15:41.000000000 +0800
> > @@ -46,3 +46,7 @@ config PCIEASPM_DEBUG
> >       help
> >         This enables PCI Express ASPM debug support. It will add per-device
> >         interface to control ASPM.
> > +
> > +config PCIE_PME
> > +     def_bool y
> > +     depends on PCIEPORTBUS && PM_RUNTIME && EXPERIMENTAL
> > Index: linux/drivers/pci/pcie/Makefile
> > ===================================================================
> > --- linux.orig/drivers/pci/pcie/Makefile      2009-08-27 14:07:35.000000000 +0800
> > +++ linux/drivers/pci/pcie/Makefile   2009-08-27 14:30:20.000000000 +0800
> > @@ -11,3 +11,5 @@ obj-$(CONFIG_PCIEPORTBUS)   += pcieportdrv
> >
> >  # Build PCI Express AER if needed
> >  obj-$(CONFIG_PCIEAER)                += aer/
> > +
> > +obj-$(CONFIG_PCIE_PME) += pcie_pme.o
> > Index: linux/drivers/pci/pcie/pcie_pme.c
> > ===================================================================
> > --- /dev/null 1970-01-01 00:00:00.000000000 +0000
> > +++ linux/drivers/pci/pcie/pcie_pme.c 2009-08-27 14:15:41.000000000 +0800
> > @@ -0,0 +1,383 @@
> > +/*
> > + * PCIe Native PME support
> > + *
> > + * Copyright (C) 2007 - 2009 Intel Corp
> > + *  Shaohua Li <shaohua.li@intel.com>
> > + *
> > + * This file is subject to the terms and conditions of the GNU General Public
> > + * License.  See the file "COPYING" in the main directory of this archive
> > + * for more details.
> > + */
> > +
> > +#include <linux/module.h>
> > +#include <linux/pci.h>
> > +#include <linux/kernel.h>
> > +#include <linux/errno.h>
> > +#include <linux/init.h>
> > +#include <linux/interrupt.h>
> > +#include <linux/device.h>
> > +#include <linux/pcieport_if.h>
> > +#include <linux/acpi.h>
> > +#include <linux/pci-acpi.h>
> > +#include <linux/pm_runtime.h>
> > +#include "../pci.h"
> > +
> > +#define PCI_EXP_RTSTA_PME 0x10000 /* PME status */
> > +
> > +static int disabled;
> > +/*
> > + * It appears a lot of BIOS doesn't implement ACPI _OSC correctly, so let's
> > + * force to enable PCIe PME by default
> > + */
> > +static int force = 1;
> > +
> > +static int __init pcie_pme_setup(char *str)
> > +{
> > +     if (!strcmp(str, "off"))
> > +             disabled = 1;
> > +     else if (!strcmp(str, "strict"))
> > +             force = 0;
> > +     return 1;
> > +}
> > +__setup("pcie_pme=", pcie_pme_setup);
> > +
> > +struct pcie_pme_service_data {
> > +     spinlock_t lock;
> > +     struct pcie_device *dev;
> > +     struct work_struct work;
> > +     int in_exiting; /* driver is exiting, don't reenable interrupt */
> 
> That should be bool.
> 
> > +};
> > +
> > +/*
> > + * pcie_pme_enable_interrupt - enable/disable PCIe PME report interrupt
> > + * @pdev: PCI root port
> > + * @enable: enable or disable interrupt
> > + */
> > +static inline void pcie_pme_enable_interrupt(struct pci_dev *pdev,
> > +     bool enable)
> > +{
> > +     int pos;
> > +     u16 rtctl;
> > +
> > +     pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
> > +
> > +     pci_read_config_word(pdev, pos + PCI_EXP_RTCTL, &rtctl);
> > +     if (enable)
> > +             rtctl |= PCI_EXP_RTCTL_PMEIE;
> > +     else
> > +             rtctl &= ~PCI_EXP_RTCTL_PMEIE;
> > +     pci_write_config_word(pdev, pos + PCI_EXP_RTCTL, rtctl);
> > +}
> > +
> > +/*
> > + * pcie_pme_clear_status - clear root port PME report status
> > + * @pdev: the root port
> > + */
> > +static inline void pcie_pme_clear_status(struct pci_dev *pdev)
> > +{
> > +     int pos;
> > +     u32 rtsta;
> > +
> > +     pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
> > +
> > +     pci_read_config_dword(pdev, pos + PCI_EXP_RTSTA, &rtsta);
> > +     rtsta |= PCI_EXP_RTSTA_PME;
> > +     pci_write_config_dword(pdev, pos + PCI_EXP_RTSTA, rtsta);
> > +}
> > +
> > +/*
> > + * pcie_pme_handle_request - handle one PCI device's PME
> > + * @root_port: the root port which gets PME report
> > + * @requester_id: the device ID of PCI device which invokes PME event
> > + */
> > +static bool pcie_pme_handle_request(struct pci_dev *root_port,
> > +     u16 requester_id)
> 
> Do we care about the return value?
> 
> > +{
> > +     struct pci_dev *target;
> > +     bool device_found = false;
> > +     u8 busnr = requester_id >> 8, devfn = requester_id & 0xff;
> > +
> > +     target = pci_get_bus_and_slot(busnr, devfn);
> > +
> > +     /* the PCIe-to-PCI bridge case, see below comment */
> > +     if (!target->is_pcie) {
> > +             pci_dev_put(target);
> > +             target = NULL;
> > +             WARN_ON(devfn); /* devfn should be zero */
> 
> Perhaps reset devfn after printing the warning?
> 
> > +     }
> 
> You shouldn't dereference target before checking it's not NULL.
> 
> > +     if (target) {
> > +             device_found = pci_pm_handle_wakeup_event_native(target);
> > +             pci_dev_put(target);
> > +             return device_found;
> > +     }
> > +
> > +     /*
> > +      * PME from PCI devices under a PCIe-to-PCI bridge may be converted to
> > +      * a PCIe in-band PME message. In such case, bridge will assign the
> > +      * message a new request id using bridge's secondary bus numer and
> > +      * device number/function number 0
> > +      */
> > +     if (!target && !devfn) {
> > +             struct pci_bus *bus;
> > +
> > +             bus = pci_find_bus(pci_domain_nr(root_port->bus), busnr);
> > +             if (bus) {
> > +                     target = bus->self;
> > +                     if (!target->is_pcie || target->pcie_type !=
> > +                                     PCI_EXP_TYPE_PCI_BRIDGE)
> 
> Please don't break the line like this.  Is it necessary to break it at all?
> 
> > +                             target = NULL;
> > +             }
> > +     }
> > +
> > +     if (target)
> > +             device_found = pci_pm_handle_wakeup_event_native(target);
> > +     else
> > +             printk(KERN_ERR"Can't find device %02d:%d.%d which invokes PME\n",
> > +                     busnr, PCI_SLOT(devfn), PCI_FUNC(devfn));
> 
> If you print the message about the spurious interrupt here, the return value
> won't be necessary any more and you'll be able to simplify the next function.
> 
> > +
> > +     return device_found;
> > +}
> > +
> > +/* pcie_pme_work_handle - work handler for root port PME report interrupt */
> > +static void pcie_pme_work_handle(struct work_struct *work)
> > +{
> > +     struct pcie_pme_service_data *data =
> > +                     container_of(work, struct pcie_pme_service_data, work);
> > +     struct pci_dev *root_port = data->dev->port;
> > +     unsigned long flags;
> > +     bool spurious = true;
> > +     u32 rtsta;
> > +     int pos;
> > +
> > +     pos = pci_find_capability(root_port, PCI_CAP_ID_EXP);
> > +     while (1) {
> > +             spin_lock_irqsave(&data->lock, flags);
> > +             pci_read_config_dword(root_port, pos + PCI_EXP_RTSTA, &rtsta);
> > +
> > +             if (!(rtsta & PCI_EXP_RTSTA_PME)) {
> > +                     spin_unlock_irqrestore(&data->lock, flags);
> > +                     break;
> > +             }
> 
> As I said above, I'd check PME_Pending here as well.
> 
> > +             /* clear PME, if there are other pending PME, the status will
> > +              * get set again
> > +              */
> > +             pcie_pme_clear_status(root_port);
> > +             spin_unlock_irqrestore(&data->lock, flags);
> > +
> > +             if (pcie_pme_handle_request(root_port, rtsta & 0xffff))
> > +                     spurious = false;
> > +     }
> > +
> > +     if (!data->in_exiting) {
> 
> Perhaps
> 
> +       if (!data->in_exiting)
> +               return;
> 
> and save an indentation level?
> 
> Also data->in_exiting should be checked under the spinlock, shouldn't it?
> 
> > +             spin_lock_irqsave(&data->lock, flags);
> > +             /* reenable native PME */
> > +             pcie_pme_enable_interrupt(root_port, true);
> > +             spin_unlock_irqrestore(&data->lock, flags);
> > +     }
> > +
> > +     if (spurious)
> > +             printk(KERN_ERR"Spurious Native PME interrupt %d received\n",
> > +                     root_port->irq);
> > +}
> > +
> > +/* pcie_pme_irq - interrupt handler for root port PME report interrupt */
> > +static irqreturn_t pcie_pme_irq(int irq, void *context)
> > +{
> > +     int pos;
> > +     struct pci_dev *pdev;
> > +     u32 rtsta;
> > +     struct pcie_pme_service_data *data;
> > +     unsigned long flags;
> > +
> > +     pdev = ((struct pcie_device *)context)->port;
> > +     data = get_service_data((struct pcie_device *)context);
> > +
> > +     pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
> > +
> > +     spin_lock_irqsave(&data->lock, flags);
> > +     pci_read_config_dword(pdev, pos + PCI_EXP_RTSTA, &rtsta);
> > +
> > +     if (!(rtsta & PCI_EXP_RTSTA_PME)) {
> > +             spin_unlock_irqrestore(&data->lock, flags);
> > +             return IRQ_NONE;
> > +     }
> > +
> > +     /* disable PME to avoid interrupt flood */
> > +     pcie_pme_enable_interrupt(pdev, false);
> > +     spin_unlock_irqrestore(&data->lock, flags);
> > +
> > +     queue_work(pm_wq, &data->work);
> > +
> > +     return IRQ_HANDLED;
> > +}
> > +
> > +#ifdef CONFIG_ACPI
> > +/*
> > + * pcie_pme_osc_setup - declare OS support PCIe native PME feature to BIOS
> > + * @pciedev - pcie device struct for root port
> > + */
> > +static int pcie_pme_osc_setup(struct pcie_device *pciedev)
> > +{
> > +     acpi_status status = AE_NOT_FOUND;
> > +     struct pci_dev *pdev = pciedev->port;
> > +     acpi_handle handle = DEVICE_ACPI_HANDLE(&pdev->dev);
> > +
> > +     if (!handle)
> > +             return -EINVAL;
> > +     status = acpi_pci_osc_control_set(handle,
> > +                     OSC_PCI_EXPRESS_PME_CONTROL |
> > +                     OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL);
> > +
> > +     if (ACPI_FAILURE(status)) {
> > +             printk(KERN_DEBUG "Native PME service couldn't init device "
> > +                     "%s - %s\n", dev_name(&pciedev->device),
> > +                     (status == AE_SUPPORT || status == AE_NOT_FOUND) ?
> > +                     "no _OSC support" : "Run ACPI _OSC fails");
> > +             return -EINVAL;
> > +     }
> > +
> > +     return 0;
> > +}
> > +#else
> > +static inline int pcie_pme_osc_setup(struct pcie_device *pciedev)
> > +{
> > +     return 0;
> > +}
> > +#endif
> > +
> > +/*
> > + * pcie_pme_probe - initialize PCIe PME report for a root port
> > + * @dev - pcie device struct for root port
> > + */
> > +static int pcie_pme_probe(struct pcie_device *dev)
> > +{
> > +     struct pci_dev *pdev;
> > +     int status;
> > +     struct pcie_pme_service_data *data;
> > +
> > +     if (pcie_pme_osc_setup(dev) && !force)
> > +             return -EINVAL;
> > +
> > +     data = kzalloc(sizeof(*data), GFP_KERNEL);
> > +     if (!data)
> > +             return -ENOMEM;
> > +     spin_lock_init(&data->lock);
> > +     INIT_WORK(&data->work, pcie_pme_work_handle);
> > +     data->dev = dev;
> > +     set_service_data(dev, data);
> > +
> > +     pdev = dev->port;
> > +
> > +     /* clear pending PME */
> > +     pcie_pme_clear_status(pdev);
> > +
> > +     status = request_irq(dev->irq, pcie_pme_irq, IRQF_SHARED,
> > +             "pcie_pme", dev);
> > +     if (status) {
> > +             kfree(data);
> > +             return status;
> > +     }
> > +
> > +     /* enable PME interrupt */
> > +     pcie_pme_enable_interrupt(pdev, true);
> > +
> > +     return 0;
> > +}
> > +
> > +/*
> > + * pcie_pme_remove - finialize PCIe PME report for a root port
> > + * @dev - pcie device struct for root port
> > + */
> > +static void pcie_pme_remove(struct pcie_device *dev)
> > +{
> > +     struct pci_dev *pdev;
> > +     unsigned long flags;
> > +     struct pcie_pme_service_data *data = get_service_data(dev);
> > +
> > +     pdev = dev->port;
> > +
> > +     /* disable PME interrupt */
> > +     spin_lock_irqsave(&data->lock, flags);
> > +     data->in_exiting = 1;
> > +     pcie_pme_enable_interrupt(pdev, false);
> > +     spin_unlock_irqrestore(&data->lock, flags);
> > +
> > +     flush_scheduled_work();
> > +     free_irq(dev->irq, dev);
> > +
> > +     /* clear pending PME */
> > +     pcie_pme_clear_status(pdev);
> > +
> > +     set_service_data(dev, NULL);
> > +     kfree(data);
> > +}
> > +
> > +/*
> > + * pcie_pme_suspend - suspend PCIe PME report for a root port
> > + * @dev - pcie device struct for root port
> > + */
> > +static int pcie_pme_suspend(struct pcie_device *dev)
> > +{
> > +     struct pci_dev *pdev;
> > +     struct pcie_pme_service_data *data;
> > +     unsigned long flags;
> > +
> > +     pdev = dev->port;
> > +     data = get_service_data(dev);
> > +
> > +     spin_lock_irqsave(&data->lock, flags);
> > +     /* disable PME to avoid further interrupt */
> > +     pcie_pme_enable_interrupt(pdev, false);
> > +
> > +     /* clear pending PME */
> > +     pcie_pme_clear_status(pdev);
> > +     spin_unlock_irqrestore(&data->lock, flags);
> > +
> > +     return 0;
> > +}
> > +
> > +/*
> > + * pcie_pme_resume - resume PCIe PME report for a root port
> > + * @dev - pcie device struct for root port
> > + */
> > +static int pcie_pme_resume(struct pcie_device *dev)
> > +{
> > +     struct pci_dev *pdev = dev->port;
> > +     unsigned long flags;
> > +     struct pcie_pme_service_data *data = get_service_data(dev);
> > +
> > +     spin_lock_irqsave(&data->lock, flags);
> > +     pcie_pme_clear_status(pdev);
> > +     pcie_pme_enable_interrupt(pdev, true);
> > +     spin_unlock_irqrestore(&data->lock, flags);
> > +
> > +     return 0;
> > +}
> > +
> > +static struct pcie_port_service_driver pcie_pme_driver = {
> > +     .name           = "pcie_pme",
> > +     .port_type      = PCIE_RC_PORT,
> > +     .service        = PCIE_PORT_SERVICE_PME,
> > +
> > +     .probe          = pcie_pme_probe,
> > +     .remove         = pcie_pme_remove,
> > +     .suspend        = pcie_pme_suspend,
> > +     .resume         = pcie_pme_resume,
> > +};
> > +
> > +static int __init pcie_pme_service_init(void)
> > +{
> > +     if (disabled)
> > +             return -EINVAL;
> > +     return pcie_port_service_register(&pcie_pme_driver);
> > +}
> > +
> > +static void __exit pcie_pme_service_exit(void)
> > +{
> > +     pcie_port_service_unregister(&pcie_pme_driver);
> > +}
> > +
> > +module_init(pcie_pme_service_init);
> > +module_exit(pcie_pme_service_exit);
Updated patch.


PCIe defines a native PME detection mechanism. When a PCIe endpoint
invokes PME, PCIe root port has a set of regisets to detect the
endpoint's bus/device/function number and root port will send out
interrupt when PME is received. After getting interrupt, OS can identify
which device invokes PME according to such information. See PCIe
spec for detail. This patch implements this feature.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
---
 drivers/pci/pcie/Kconfig    |    4 
 drivers/pci/pcie/Makefile   |    2 
 drivers/pci/pcie/pcie_pme.c |  427 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 433 insertions(+)


--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

Index: linux/drivers/pci/pcie/Kconfig
===================================================================
--- linux.orig/drivers/pci/pcie/Kconfig	2009-08-31 17:26:23.000000000 +0800
+++ linux/drivers/pci/pcie/Kconfig	2009-08-31 17:28:05.000000000 +0800
@@ -46,3 +46,7 @@  config PCIEASPM_DEBUG
 	help
 	  This enables PCI Express ASPM debug support. It will add per-device
 	  interface to control ASPM.
+
+config PCIE_PME
+	def_bool y
+	depends on PCIEPORTBUS && PM_RUNTIME && EXPERIMENTAL
Index: linux/drivers/pci/pcie/Makefile
===================================================================
--- linux.orig/drivers/pci/pcie/Makefile	2009-08-31 17:26:23.000000000 +0800
+++ linux/drivers/pci/pcie/Makefile	2009-08-31 17:28:05.000000000 +0800
@@ -11,3 +11,5 @@  obj-$(CONFIG_PCIEPORTBUS)	+= pcieportdrv
 
 # Build PCI Express AER if needed
 obj-$(CONFIG_PCIEAER)		+= aer/
+
+obj-$(CONFIG_PCIE_PME) += pcie_pme.o
Index: linux/drivers/pci/pcie/pcie_pme.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux/drivers/pci/pcie/pcie_pme.c	2009-09-01 10:29:35.000000000 +0800
@@ -0,0 +1,427 @@ 
+/*
+ * PCIe Native PME support
+ *
+ * Copyright (C) 2007 - 2009 Intel Corp
+ *  Shaohua Li <shaohua.li@intel.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/device.h>
+#include <linux/pcieport_if.h>
+#include <linux/acpi.h>
+#include <linux/pci-acpi.h>
+#include <linux/pm_runtime.h>
+#include "../pci.h"
+
+#define PCI_EXP_RTSTA_PME 0x10000 /* PME status */
+#define PCI_EXP_RTSTA_PEND 0x20000 /* PME pending */
+
+static int disabled;
+/*
+ * It appears a lot of BIOS doesn't implement ACPI _OSC correctly, so let's
+ * force to enable PCIe PME by default
+ */
+static int force = 1;
+
+static int __init pcie_pme_setup(char *str)
+{
+	if (!strcmp(str, "off"))
+		disabled = 1;
+	else if (!strcmp(str, "strict"))
+		force = 0;
+	return 1;
+}
+__setup("pcie_pme=", pcie_pme_setup);
+
+struct pcie_pme_service_data {
+	spinlock_t lock;
+	struct pcie_device *dev;
+	struct work_struct work;
+	bool in_exiting; /* driver is exiting, don't reenable interrupt */
+};
+
+/*
+ * pcie_pme_enable_interrupt - enable/disable PCIe PME report interrupt
+ * @pdev: PCI root port
+ * @enable: enable or disable interrupt
+ */
+static inline void pcie_pme_enable_interrupt(struct pci_dev *pdev,
+	bool enable)
+{
+	int pos;
+	u16 rtctl;
+
+	pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
+
+	pci_read_config_word(pdev, pos + PCI_EXP_RTCTL, &rtctl);
+	if (enable)
+		rtctl |= PCI_EXP_RTCTL_PMEIE;
+	else
+		rtctl &= ~PCI_EXP_RTCTL_PMEIE;
+	pci_write_config_word(pdev, pos + PCI_EXP_RTCTL, rtctl);
+}
+
+/*
+ * pcie_pme_clear_status - clear root port PME report status
+ * @pdev: the root port
+ */
+static inline void pcie_pme_clear_status(struct pci_dev *pdev)
+{
+	int pos;
+	u32 rtsta;
+
+	pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
+
+	pci_read_config_dword(pdev, pos + PCI_EXP_RTSTA, &rtsta);
+	rtsta |= PCI_EXP_RTSTA_PME;
+	pci_write_config_dword(pdev, pos + PCI_EXP_RTSTA, rtsta);
+}
+
+/*
+ * pcie_pme_do_wakeup_event - handle PCIe native PME event
+ * @target is suspected to invoke a wakeup event (PME)
+ * Note @target should be a PCIe device
+ */
+static bool pcie_pme_do_wakeup_event(struct pci_dev *target)
+{
+	bool ret;
+	struct pci_dev *tmp = NULL;
+	int domain_nr, bus_start, bus_end;
+
+	BUG_ON(!target->is_pcie);
+
+	/* For PCIe PME, if the target isn't the source, do nothing */
+	ret = pci_pm_check_wakeup_event(target);
+
+	if (!target->subordinate || (target->pcie_type != PCI_EXP_TYPE_ROOT_PORT
+		&& target->pcie_type != PCI_EXP_TYPE_PCI_BRIDGE)) {
+		if (ret)
+			pm_request_resume(&target->dev);
+		return ret;
+	}
+
+	if (ret)
+		pm_request_resume(&target->dev);
+
+	/* scan legacy PCI devices under the bridge or root port */
+	domain_nr = pci_domain_nr(target->bus);
+	bus_start = target->subordinate->secondary;
+	bus_end = target->subordinate->subordinate;
+	while ((tmp = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, tmp)) != NULL) {
+		if (pci_domain_nr(tmp->bus) != domain_nr ||
+		   tmp->bus->number < bus_start ||
+		   tmp->bus->number > bus_end)
+			continue;
+		if (!tmp->is_pcie && pci_pm_check_wakeup_event(tmp)) {
+			ret = true;
+			pm_request_resume(&tmp->dev);
+		}
+	}
+	return ret;
+}
+
+/*
+ * pcie_pme_handle_request - handle one PCI device's PME
+ * @root_port: the root port which gets PME report
+ * @requester_id: the device ID of PCI device which invokes PME event
+ */
+static void pcie_pme_handle_request(struct pci_dev *root_port,
+	u16 requester_id)
+{
+	struct pci_dev *target;
+	bool device_found = false;
+	u8 busnr = requester_id >> 8, devfn = requester_id & 0xff;
+
+	target = pci_get_bus_and_slot(busnr, devfn);
+
+	/* the PCIe-to-PCI bridge case, see below comment */
+	if (target && !target->is_pcie) {
+		pci_dev_put(target);
+		target = NULL;
+		printk(KERN_WARNING"%s: devfn should be zero\n", __func__);
+		devfn = 0; /* devfn should be zero */
+	}
+
+	if (target) {
+		device_found = pcie_pme_do_wakeup_event(target);
+		pci_dev_put(target);
+		goto out;
+	}
+
+	/*
+	 * PME from PCI devices under a PCIe-to-PCI bridge may be converted to
+	 * a PCIe in-band PME message. In such case, bridge will assign the
+	 * message a new request id using bridge's secondary bus numer and
+	 * device number/function number 0
+	 */
+	if (!target && !devfn) {
+		struct pci_bus *bus;
+
+		bus = pci_find_bus(pci_domain_nr(root_port->bus), busnr);
+		if (bus) {
+			target = bus->self;
+			if (!target->is_pcie ||
+			   target->pcie_type != PCI_EXP_TYPE_PCI_BRIDGE)
+				target = NULL;
+		}
+	}
+
+	if (target)
+		device_found = pcie_pme_do_wakeup_event(target);
+
+out:
+	if (!device_found)
+		printk(KERN_ERR"%s: Spurious Native PME interrupt received\n",
+			pci_name(root_port));
+}
+
+/* pcie_pme_work_handle - work handler for root port PME report interrupt */
+static void pcie_pme_work_handle(struct work_struct *work)
+{
+	struct pcie_pme_service_data *data =
+			container_of(work, struct pcie_pme_service_data, work);
+	struct pci_dev *root_port = data->dev->port;
+	unsigned long flags;
+	u32 rtsta;
+	int pos;
+
+	pos = pci_find_capability(root_port, PCI_CAP_ID_EXP);
+	while (1) {
+		spin_lock_irqsave(&data->lock, flags);
+		pci_read_config_dword(root_port, pos + PCI_EXP_RTSTA, &rtsta);
+
+		if (!(rtsta & PCI_EXP_RTSTA_PME)) {
+			spin_unlock_irqrestore(&data->lock, flags);
+
+			/* Has pending bit set but no status bit set, slow HW? */
+			if (rtsta & PCI_EXP_RTSTA_PEND) {
+				cpu_relax();
+				continue;
+			} else
+				break;
+		}
+
+		/* clear PME, if there are other pending PME, the status will
+		 * get set again
+		 */
+		pcie_pme_clear_status(root_port);
+		spin_unlock_irqrestore(&data->lock, flags);
+
+		pcie_pme_handle_request(root_port, rtsta & 0xffff);
+	}
+
+	spin_lock_irqsave(&data->lock, flags);
+	if (!data->in_exiting)
+		/* reenable native PME */
+		pcie_pme_enable_interrupt(root_port, true);
+	spin_unlock_irqrestore(&data->lock, flags);
+}
+
+/* pcie_pme_irq - interrupt handler for root port PME report interrupt */
+static irqreturn_t pcie_pme_irq(int irq, void *context)
+{
+	int pos;
+	struct pci_dev *pdev;
+	u32 rtsta;
+	struct pcie_pme_service_data *data;
+	unsigned long flags;
+
+	pdev = ((struct pcie_device *)context)->port;
+	data = get_service_data((struct pcie_device *)context);
+
+	pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
+
+	spin_lock_irqsave(&data->lock, flags);
+	pci_read_config_dword(pdev, pos + PCI_EXP_RTSTA, &rtsta);
+
+	if (!(rtsta & PCI_EXP_RTSTA_PME)) {
+		spin_unlock_irqrestore(&data->lock, flags);
+		return IRQ_NONE;
+	}
+
+	/* disable PME to avoid interrupt flood */
+	pcie_pme_enable_interrupt(pdev, false);
+	spin_unlock_irqrestore(&data->lock, flags);
+
+	queue_work(pm_wq, &data->work);
+
+	return IRQ_HANDLED;
+}
+
+#ifdef CONFIG_ACPI
+/*
+ * pcie_pme_osc_setup - declare OS support PCIe native PME feature to BIOS
+ * @pciedev - pcie device struct for root port
+ */
+static int pcie_pme_osc_setup(struct pcie_device *pciedev)
+{
+	acpi_status status = AE_NOT_FOUND;
+	struct pci_dev *pdev = pciedev->port;
+	acpi_handle handle = DEVICE_ACPI_HANDLE(&pdev->dev);
+
+	if (!handle)
+		return -EINVAL;
+	status = acpi_pci_osc_control_set(handle,
+			OSC_PCI_EXPRESS_PME_CONTROL |
+			OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL);
+
+	if (ACPI_FAILURE(status)) {
+		printk(KERN_DEBUG "Native PME service couldn't init device "
+			"%s - %s\n", dev_name(&pciedev->device),
+			(status == AE_SUPPORT || status == AE_NOT_FOUND) ?
+			"no _OSC support" : "Run ACPI _OSC fails");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+#else
+static inline int pcie_pme_osc_setup(struct pcie_device *pciedev)
+{
+	return 0;
+}
+#endif
+
+/*
+ * pcie_pme_probe - initialize PCIe PME report for a root port
+ * @dev - pcie device struct for root port
+ */
+static int pcie_pme_probe(struct pcie_device *dev)
+{
+	struct pci_dev *pdev;
+	int status;
+	struct pcie_pme_service_data *data;
+
+	if (pcie_pme_osc_setup(dev) && !force)
+		return -EINVAL;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+	spin_lock_init(&data->lock);
+	INIT_WORK(&data->work, pcie_pme_work_handle);
+	data->dev = dev;
+	set_service_data(dev, data);
+
+	pdev = dev->port;
+
+	/* clear pending PME */
+	pcie_pme_clear_status(pdev);
+
+	status = request_irq(dev->irq, pcie_pme_irq, IRQF_SHARED,
+		"pcie_pme", dev);
+	if (status) {
+		kfree(data);
+		return status;
+	}
+
+	/* enable PME interrupt */
+	pcie_pme_enable_interrupt(pdev, true);
+
+	return 0;
+}
+
+/*
+ * pcie_pme_remove - finialize PCIe PME report for a root port
+ * @dev - pcie device struct for root port
+ */
+static void pcie_pme_remove(struct pcie_device *dev)
+{
+	struct pci_dev *pdev;
+	unsigned long flags;
+	struct pcie_pme_service_data *data = get_service_data(dev);
+
+	pdev = dev->port;
+
+	/* disable PME interrupt */
+	spin_lock_irqsave(&data->lock, flags);
+	data->in_exiting = true;
+	pcie_pme_enable_interrupt(pdev, false);
+	spin_unlock_irqrestore(&data->lock, flags);
+
+	flush_scheduled_work();
+	free_irq(dev->irq, dev);
+
+	/* clear pending PME */
+	pcie_pme_clear_status(pdev);
+
+	set_service_data(dev, NULL);
+	kfree(data);
+}
+
+/*
+ * pcie_pme_suspend - suspend PCIe PME report for a root port
+ * @dev - pcie device struct for root port
+ */
+static int pcie_pme_suspend(struct pcie_device *dev)
+{
+	struct pci_dev *pdev;
+	struct pcie_pme_service_data *data;
+	unsigned long flags;
+
+	pdev = dev->port;
+	data = get_service_data(dev);
+
+	spin_lock_irqsave(&data->lock, flags);
+	/* disable PME to avoid further interrupt */
+	pcie_pme_enable_interrupt(pdev, false);
+
+	/* clear pending PME */
+	pcie_pme_clear_status(pdev);
+	spin_unlock_irqrestore(&data->lock, flags);
+
+	return 0;
+}
+
+/*
+ * pcie_pme_resume - resume PCIe PME report for a root port
+ * @dev - pcie device struct for root port
+ */
+static int pcie_pme_resume(struct pcie_device *dev)
+{
+	struct pci_dev *pdev = dev->port;
+	unsigned long flags;
+	struct pcie_pme_service_data *data = get_service_data(dev);
+
+	spin_lock_irqsave(&data->lock, flags);
+	pcie_pme_clear_status(pdev);
+	pcie_pme_enable_interrupt(pdev, true);
+	spin_unlock_irqrestore(&data->lock, flags);
+
+	return 0;
+}
+
+static struct pcie_port_service_driver pcie_pme_driver = {
+	.name		= "pcie_pme",
+	.port_type 	= PCIE_RC_PORT,
+	.service 	= PCIE_PORT_SERVICE_PME,
+
+	.probe		= pcie_pme_probe,
+	.remove		= pcie_pme_remove,
+	.suspend	= pcie_pme_suspend,
+	.resume		= pcie_pme_resume,
+};
+
+static int __init pcie_pme_service_init(void)
+{
+	if (disabled)
+		return -EINVAL;
+	return pcie_port_service_register(&pcie_pme_driver);
+}
+
+static void __exit pcie_pme_service_exit(void)
+{
+	pcie_port_service_unregister(&pcie_pme_driver);
+}
+
+module_init(pcie_pme_service_init);
+module_exit(pcie_pme_service_exit);