diff mbox

[PATCHv4,next,1/3] pci: Add is_removed state

Message ID 1477695497-6207-2-git-send-email-keith.busch@intel.com (mailing list archive)
State New, archived
Delegated to: Bjorn Helgaas
Headers show

Commit Message

Keith Busch Oct. 28, 2016, 10:58 p.m. UTC
This adds a new state for devices that were once in the system, but
unexpectedly removed. This is so device tear down functions can observe
the device is not accessible so it may skip attempting to initialize
the hardware.

The pciehp and pcie-dpc drivers are aware of when the link is down,
so these explicitly set this flag when its handlers detect the device
is gone.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Cc: Lukas Wunner <lukas@wunner.de>
---
 drivers/pci/hotplug/pciehp_pci.c | 5 +++++
 drivers/pci/pcie/pcie-dpc.c      | 4 ++++
 include/linux/pci.h              | 7 +++++++
 3 files changed, 16 insertions(+)

Comments

Lukas Wunner Oct. 31, 2016, 10:41 a.m. UTC | #1
On Fri, Oct 28, 2016 at 06:58:15PM -0400, Keith Busch wrote:
> This adds a new state for devices that were once in the system, but
> unexpectedly removed. This is so device tear down functions can observe
> the device is not accessible so it may skip attempting to initialize
> the hardware.
> 
> The pciehp and pcie-dpc drivers are aware of when the link is down,
> so these explicitly set this flag when its handlers detect the device
> is gone.
> 
> Signed-off-by: Keith Busch <keith.busch@intel.com>

Reviewed-by: Lukas Wunner <lukas@wunner.de>

I'll send a follow-up patch in a few minutes which leverages the
is_removed flag to fix a soft lockup on surprise removal of the
Apple Thunderbolt Gigabit Ethernet adapter.

Thanks,

Lukas

> ---
>  drivers/pci/hotplug/pciehp_pci.c | 5 +++++
>  drivers/pci/pcie/pcie-dpc.c      | 4 ++++
>  include/linux/pci.h              | 7 +++++++
>  3 files changed, 16 insertions(+)
> 
> diff --git a/drivers/pci/hotplug/pciehp_pci.c b/drivers/pci/hotplug/pciehp_pci.c
> index 9e69403..7560961 100644
> --- a/drivers/pci/hotplug/pciehp_pci.c
> +++ b/drivers/pci/hotplug/pciehp_pci.c
> @@ -109,6 +109,11 @@ int pciehp_unconfigure_device(struct slot *p_slot)
>  				break;
>  			}
>  		}
> +		if (!presence) {
> +			pci_set_removed(dev, NULL);
> +			if (pci_has_subordinate(dev))
> +				pci_walk_bus(dev->subordinate, pci_set_removed, NULL);
> +		}
>  		pci_stop_and_remove_bus_device(dev);
>  		/*
>  		 * Ensure that no new Requests will be generated from
> diff --git a/drivers/pci/pcie/pcie-dpc.c b/drivers/pci/pcie/pcie-dpc.c
> index 9811b14..7818c88 100644
> --- a/drivers/pci/pcie/pcie-dpc.c
> +++ b/drivers/pci/pcie/pcie-dpc.c
> @@ -14,6 +14,7 @@
>  #include <linux/init.h>
>  #include <linux/pci.h>
>  #include <linux/pcieport_if.h>
> +#include "../pci.h"
>  
>  struct dpc_dev {
>  	struct pcie_device	*dev;
> @@ -46,6 +47,9 @@ static void interrupt_event_handler(struct work_struct *work)
>  	list_for_each_entry_safe_reverse(dev, temp, &parent->devices,
>  					 bus_list) {
>  		pci_dev_get(dev);
> +		pci_set_removed(dev, NULL);
> +		if (pci_has_subordinate(dev))
> +			pci_walk_bus(dev->subordinate, pci_set_removed, NULL);
>  		pci_stop_and_remove_bus_device(dev);
>  		pci_dev_put(dev);
>  	}
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 0e49f70..2115d19 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -341,6 +341,7 @@ struct pci_dev {
>  	unsigned int	multifunction:1;/* Part of multi-function device */
>  	/* keep track of device state */
>  	unsigned int	is_added:1;
> +	unsigned int	is_removed:1;	/* device was surprise removed */
>  	unsigned int	is_busmaster:1; /* device is busmaster */
>  	unsigned int	no_msi:1;	/* device may not use msi */
>  	unsigned int	no_64bit_msi:1; /* device may only use 32-bit MSIs */
> @@ -417,6 +418,12 @@ static inline int pci_channel_offline(struct pci_dev *pdev)
>  	return (pdev->error_state != pci_channel_io_normal);
>  }
>  
> +static inline int pci_set_removed(struct pci_dev *pdev, void *unused)
> +{
> +	pdev->is_removed = 1;
> +	return 0;
> +}
> +
>  struct pci_host_bridge {
>  	struct device dev;
>  	struct pci_bus *bus;		/* root bus */
> -- 
> 2.7.2
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bjorn Helgaas Dec. 13, 2016, 8:56 p.m. UTC | #2
On Fri, Oct 28, 2016 at 06:58:15PM -0400, Keith Busch wrote:
> This adds a new state for devices that were once in the system, but
> unexpectedly removed. This is so device tear down functions can observe
> the device is not accessible so it may skip attempting to initialize
> the hardware.
> 
> The pciehp and pcie-dpc drivers are aware of when the link is down,
> so these explicitly set this flag when its handlers detect the device
> is gone.
> 
> Signed-off-by: Keith Busch <keith.busch@intel.com>
> Cc: Lukas Wunner <lukas@wunner.de>
> ---
>  drivers/pci/hotplug/pciehp_pci.c | 5 +++++
>  drivers/pci/pcie/pcie-dpc.c      | 4 ++++
>  include/linux/pci.h              | 7 +++++++
>  3 files changed, 16 insertions(+)
> 
> diff --git a/drivers/pci/hotplug/pciehp_pci.c b/drivers/pci/hotplug/pciehp_pci.c
> index 9e69403..7560961 100644
> --- a/drivers/pci/hotplug/pciehp_pci.c
> +++ b/drivers/pci/hotplug/pciehp_pci.c
> @@ -109,6 +109,11 @@ int pciehp_unconfigure_device(struct slot *p_slot)
>  				break;
>  			}
>  		}
> +		if (!presence) {
> +			pci_set_removed(dev, NULL);
> +			if (pci_has_subordinate(dev))
> +				pci_walk_bus(dev->subordinate, pci_set_removed, NULL);
> +		}
>  		pci_stop_and_remove_bus_device(dev);
>  		/*
>  		 * Ensure that no new Requests will be generated from
> diff --git a/drivers/pci/pcie/pcie-dpc.c b/drivers/pci/pcie/pcie-dpc.c
> index 9811b14..7818c88 100644
> --- a/drivers/pci/pcie/pcie-dpc.c
> +++ b/drivers/pci/pcie/pcie-dpc.c
> @@ -14,6 +14,7 @@
>  #include <linux/init.h>
>  #include <linux/pci.h>
>  #include <linux/pcieport_if.h>
> +#include "../pci.h"
>  
>  struct dpc_dev {
>  	struct pcie_device	*dev;
> @@ -46,6 +47,9 @@ static void interrupt_event_handler(struct work_struct *work)
>  	list_for_each_entry_safe_reverse(dev, temp, &parent->devices,
>  					 bus_list) {
>  		pci_dev_get(dev);
> +		pci_set_removed(dev, NULL);
> +		if (pci_has_subordinate(dev))
> +			pci_walk_bus(dev->subordinate, pci_set_removed, NULL);
>  		pci_stop_and_remove_bus_device(dev);
>  		pci_dev_put(dev);
>  	}
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 0e49f70..2115d19 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -341,6 +341,7 @@ struct pci_dev {
>  	unsigned int	multifunction:1;/* Part of multi-function device */
>  	/* keep track of device state */
>  	unsigned int	is_added:1;
> +	unsigned int	is_removed:1;	/* device was surprise removed */
>  	unsigned int	is_busmaster:1; /* device is busmaster */
>  	unsigned int	no_msi:1;	/* device may not use msi */
>  	unsigned int	no_64bit_msi:1; /* device may only use 32-bit MSIs */
> @@ -417,6 +418,12 @@ static inline int pci_channel_offline(struct pci_dev *pdev)
>  	return (pdev->error_state != pci_channel_io_normal);
>  }
>  
> +static inline int pci_set_removed(struct pci_dev *pdev, void *unused)
> +{
> +	pdev->is_removed = 1;

This makes me slightly worried because this is a bitfield and there's
no locking.  A concurrent write to some nearby field can corrupt
things.  It doesn't look *likely*, but it's a lot of work to be
convinced that this is completely safe, especially since the writer is
running on behalf of the bridge, and the target is a child of the
bridge.

The USB HCD_FLAG_DEAD and HCD_FLAG_HW_ACCESSIBLE flags are somewhat
similar.  Maybe we can leverage some of that design?

> +	return 0;
> +}
> +
>  struct pci_host_bridge {
>  	struct device dev;
>  	struct pci_bus *bus;		/* root bus */
> -- 
> 2.7.2
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Keith Busch Dec. 13, 2016, 11:07 p.m. UTC | #3
On Tue, Dec 13, 2016 at 02:56:14PM -0600, Bjorn Helgaas wrote:
> On Fri, Oct 28, 2016 at 06:58:15PM -0400, Keith Busch wrote:
> > diff --git a/include/linux/pci.h b/include/linux/pci.h
> > index 0e49f70..2115d19 100644
> > --- a/include/linux/pci.h
> > +++ b/include/linux/pci.h
> > @@ -341,6 +341,7 @@ struct pci_dev {
> >  	unsigned int	multifunction:1;/* Part of multi-function device */
> >  	/* keep track of device state */
> >  	unsigned int	is_added:1;
> > +	unsigned int	is_removed:1;	/* device was surprise removed */
> >  	unsigned int	is_busmaster:1; /* device is busmaster */
> >  	unsigned int	no_msi:1;	/* device may not use msi */
> >  	unsigned int	no_64bit_msi:1; /* device may only use 32-bit MSIs */
> > @@ -417,6 +418,12 @@ static inline int pci_channel_offline(struct pci_dev *pdev)
> >  	return (pdev->error_state != pci_channel_io_normal);
> >  }
> >  
> > +static inline int pci_set_removed(struct pci_dev *pdev, void *unused)
> > +{
> > +	pdev->is_removed = 1;
> 
> This makes me slightly worried because this is a bitfield and there's
> no locking.  A concurrent write to some nearby field can corrupt
> things.  It doesn't look *likely*, but it's a lot of work to be
> convinced that this is completely safe, especially since the writer is
> running on behalf of the bridge, and the target is a child of the
> bridge.
> 
> The USB HCD_FLAG_DEAD and HCD_FLAG_HW_ACCESSIBLE flags are somewhat
> similar.  Maybe we can leverage some of that design?

A bit field with atomic accessors sounds good to me. Do you want to
see all the struct pci_dev bit flags converted to that model? If so,
I can send you a prep patch that does that first.
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Lukas Wunner Dec. 13, 2016, 11:54 p.m. UTC | #4
On Tue, Dec 13, 2016 at 02:56:14PM -0600, Bjorn Helgaas wrote:
> On Fri, Oct 28, 2016 at 06:58:15PM -0400, Keith Busch wrote:
[snip]
> > diff --git a/include/linux/pci.h b/include/linux/pci.h
> > index 0e49f70..2115d19 100644
> > --- a/include/linux/pci.h
> > +++ b/include/linux/pci.h
> > @@ -341,6 +341,7 @@ struct pci_dev {
> >  	unsigned int	multifunction:1;/* Part of multi-function device */
> >  	/* keep track of device state */
> >  	unsigned int	is_added:1;
> > +	unsigned int	is_removed:1;	/* device was surprise removed */
> >  	unsigned int	is_busmaster:1; /* device is busmaster */
> >  	unsigned int	no_msi:1;	/* device may not use msi */
> >  	unsigned int	no_64bit_msi:1; /* device may only use 32-bit MSIs */
> > @@ -417,6 +418,12 @@ static inline int pci_channel_offline(struct pci_dev *pdev)
> >  	return (pdev->error_state != pci_channel_io_normal);
> >  }
> >  
> > +static inline int pci_set_removed(struct pci_dev *pdev, void *unused)
> > +{
> > +	pdev->is_removed = 1;
> 
> This makes me slightly worried because this is a bitfield and there's
> no locking.  A concurrent write to some nearby field can corrupt
> things.  It doesn't look *likely*, but it's a lot of work to be
> convinced that this is completely safe, especially since the writer is
> running on behalf of the bridge, and the target is a child of the
> bridge.
> 
> The USB HCD_FLAG_DEAD and HCD_FLAG_HW_ACCESSIBLE flags are somewhat
> similar.  Maybe we can leverage some of that design?

Back in October I suggested leveraging the error_state field in struct
pci_dev.  That's an enum defined at the top of include/linux/pci.h
with values pci_channel_io_normal, pci_channel_io_frozen and
pci_channel_io_perm_failure.  I suggested adding a removed state.
The benefit is that lots of drivers already check pci_channel_offline()
before accessing a device, so without any further changes they would
treat surprise-removed devices properly.

However Keith responded:
"I'd be happy if we can reuse that, but concerned about overloading
error_state's intended purpose for AER. The conditions under which an
'is_removed' may be set can also create AER events, and the aer driver
overrides the error_state."
(http://www.spinics.net/lists/linux-pci/msg55417.html)

So it would seem to require at least a modification of the AER driver
to not overwrite a pci_channel_io_removed state.

Best regards,

Lukas
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bjorn Helgaas Dec. 14, 2016, 2:50 a.m. UTC | #5
[+cc Alan, Greg]

On Tue, Dec 13, 2016 at 06:07:31PM -0500, Keith Busch wrote:
> On Tue, Dec 13, 2016 at 02:56:14PM -0600, Bjorn Helgaas wrote:
> > On Fri, Oct 28, 2016 at 06:58:15PM -0400, Keith Busch wrote:
> > > diff --git a/include/linux/pci.h b/include/linux/pci.h
> > > index 0e49f70..2115d19 100644
> > > --- a/include/linux/pci.h
> > > +++ b/include/linux/pci.h
> > > @@ -341,6 +341,7 @@ struct pci_dev {
> > >  	unsigned int	multifunction:1;/* Part of multi-function device */
> > >  	/* keep track of device state */
> > >  	unsigned int	is_added:1;
> > > +	unsigned int	is_removed:1;	/* device was surprise removed */
> > >  	unsigned int	is_busmaster:1; /* device is busmaster */
> > >  	unsigned int	no_msi:1;	/* device may not use msi */
> > >  	unsigned int	no_64bit_msi:1; /* device may only use 32-bit MSIs */
> > > @@ -417,6 +418,12 @@ static inline int pci_channel_offline(struct pci_dev *pdev)
> > >  	return (pdev->error_state != pci_channel_io_normal);
> > >  }
> > >  
> > > +static inline int pci_set_removed(struct pci_dev *pdev, void *unused)
> > > +{
> > > +	pdev->is_removed = 1;
> > 
> > This makes me slightly worried because this is a bitfield and there's
> > no locking.  A concurrent write to some nearby field can corrupt
> > things.  It doesn't look *likely*, but it's a lot of work to be
> > convinced that this is completely safe, especially since the writer is
> > running on behalf of the bridge, and the target is a child of the
> > bridge.
> > 
> > The USB HCD_FLAG_DEAD and HCD_FLAG_HW_ACCESSIBLE flags are somewhat
> > similar.  Maybe we can leverage some of that design?
> 
> A bit field with atomic accessors sounds good to me. Do you want to
> see all the struct pci_dev bit flags converted to that model? If so,
> I can send you a prep patch that does that first.

This is still blue-sky, "what if?" thinking on my part.  I was
starting to wonder if we could make something generic that could be
used for your is_removed work and also for the USB stuff.

The USB HCD_FLAG_DEAD is set by usb_hc_died(), which is frequently
used by drivers after an MMIO read returns ~0.  HCD_FLAG_HW_ACCESSIBLE
seems to encode part of the idea of "this device is powered up enough
to respond", which is sort of similar to what PCI does with
dev->current_state.

If there were generic PCI interfaces like pci_dev_died() and a
pci_dev_dead() predicate, I wonder if USB could use them instead of
rolling their own?

Bjorn
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bjorn Helgaas Dec. 14, 2016, 2:54 a.m. UTC | #6
On Tue, Dec 13, 2016 at 08:50:07PM -0600, Bjorn Helgaas wrote:
> [+cc Alan, Greg]
> 
> On Tue, Dec 13, 2016 at 06:07:31PM -0500, Keith Busch wrote:
> > On Tue, Dec 13, 2016 at 02:56:14PM -0600, Bjorn Helgaas wrote:
> > > On Fri, Oct 28, 2016 at 06:58:15PM -0400, Keith Busch wrote:
> > > > diff --git a/include/linux/pci.h b/include/linux/pci.h
> > > > index 0e49f70..2115d19 100644
> > > > --- a/include/linux/pci.h
> > > > +++ b/include/linux/pci.h
> > > > @@ -341,6 +341,7 @@ struct pci_dev {
> > > >  	unsigned int	multifunction:1;/* Part of multi-function device */
> > > >  	/* keep track of device state */
> > > >  	unsigned int	is_added:1;
> > > > +	unsigned int	is_removed:1;	/* device was surprise removed */
> > > >  	unsigned int	is_busmaster:1; /* device is busmaster */
> > > >  	unsigned int	no_msi:1;	/* device may not use msi */
> > > >  	unsigned int	no_64bit_msi:1; /* device may only use 32-bit MSIs */
> > > > @@ -417,6 +418,12 @@ static inline int pci_channel_offline(struct pci_dev *pdev)
> > > >  	return (pdev->error_state != pci_channel_io_normal);
> > > >  }
> > > >  
> > > > +static inline int pci_set_removed(struct pci_dev *pdev, void *unused)
> > > > +{
> > > > +	pdev->is_removed = 1;
> > > 
> > > This makes me slightly worried because this is a bitfield and there's
> > > no locking.  A concurrent write to some nearby field can corrupt
> > > things.  It doesn't look *likely*, but it's a lot of work to be
> > > convinced that this is completely safe, especially since the writer is
> > > running on behalf of the bridge, and the target is a child of the
> > > bridge.
> > > 
> > > The USB HCD_FLAG_DEAD and HCD_FLAG_HW_ACCESSIBLE flags are somewhat
> > > similar.  Maybe we can leverage some of that design?
> > 
> > A bit field with atomic accessors sounds good to me. Do you want to
> > see all the struct pci_dev bit flags converted to that model? If so,
> > I can send you a prep patch that does that first.
> 
> This is still blue-sky, "what if?" thinking on my part.  I was
> starting to wonder if we could make something generic that could be
> used for your is_removed work and also for the USB stuff.
> 
> The USB HCD_FLAG_DEAD is set by usb_hc_died(), which is frequently
> used by drivers after an MMIO read returns ~0.  HCD_FLAG_HW_ACCESSIBLE
> seems to encode part of the idea of "this device is powered up enough
> to respond", which is sort of similar to what PCI does with
> dev->current_state.
> 
> If there were generic PCI interfaces like pci_dev_died() and a
> pci_dev_dead() predicate, I wonder if USB could use them instead of
> rolling their own?

But I guess all the world is not PCI -- there are non-PCI USB host
controllers, so it's not quite that simple.
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/pci/hotplug/pciehp_pci.c b/drivers/pci/hotplug/pciehp_pci.c
index 9e69403..7560961 100644
--- a/drivers/pci/hotplug/pciehp_pci.c
+++ b/drivers/pci/hotplug/pciehp_pci.c
@@ -109,6 +109,11 @@  int pciehp_unconfigure_device(struct slot *p_slot)
 				break;
 			}
 		}
+		if (!presence) {
+			pci_set_removed(dev, NULL);
+			if (pci_has_subordinate(dev))
+				pci_walk_bus(dev->subordinate, pci_set_removed, NULL);
+		}
 		pci_stop_and_remove_bus_device(dev);
 		/*
 		 * Ensure that no new Requests will be generated from
diff --git a/drivers/pci/pcie/pcie-dpc.c b/drivers/pci/pcie/pcie-dpc.c
index 9811b14..7818c88 100644
--- a/drivers/pci/pcie/pcie-dpc.c
+++ b/drivers/pci/pcie/pcie-dpc.c
@@ -14,6 +14,7 @@ 
 #include <linux/init.h>
 #include <linux/pci.h>
 #include <linux/pcieport_if.h>
+#include "../pci.h"
 
 struct dpc_dev {
 	struct pcie_device	*dev;
@@ -46,6 +47,9 @@  static void interrupt_event_handler(struct work_struct *work)
 	list_for_each_entry_safe_reverse(dev, temp, &parent->devices,
 					 bus_list) {
 		pci_dev_get(dev);
+		pci_set_removed(dev, NULL);
+		if (pci_has_subordinate(dev))
+			pci_walk_bus(dev->subordinate, pci_set_removed, NULL);
 		pci_stop_and_remove_bus_device(dev);
 		pci_dev_put(dev);
 	}
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 0e49f70..2115d19 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -341,6 +341,7 @@  struct pci_dev {
 	unsigned int	multifunction:1;/* Part of multi-function device */
 	/* keep track of device state */
 	unsigned int	is_added:1;
+	unsigned int	is_removed:1;	/* device was surprise removed */
 	unsigned int	is_busmaster:1; /* device is busmaster */
 	unsigned int	no_msi:1;	/* device may not use msi */
 	unsigned int	no_64bit_msi:1; /* device may only use 32-bit MSIs */
@@ -417,6 +418,12 @@  static inline int pci_channel_offline(struct pci_dev *pdev)
 	return (pdev->error_state != pci_channel_io_normal);
 }
 
+static inline int pci_set_removed(struct pci_dev *pdev, void *unused)
+{
+	pdev->is_removed = 1;
+	return 0;
+}
+
 struct pci_host_bridge {
 	struct device dev;
 	struct pci_bus *bus;		/* root bus */