Message ID | 60e02b87b526cdf2930400059d98704bf0a147d1.1585000084.git.sathyanarayanan.kuppuswamy@linux.intel.com (mailing list archive) |
---|---|
State | Accepted, archived |
Delegated to: | Bjorn Helgaas |
Headers | show |
Series | Add Error Disconnect Recover (EDR) support | expand |
Hi Bjorn, On 3/23/20 5:26 PM, sathyanarayanan.kuppuswamy@linux.intel.com wrote: > From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> > > +void pcie_do_recovery(struct pci_dev *dev, > + enum pci_channel_state state, > + pci_ers_result_t (*reset_link)(struct pci_dev *pdev)) > { > pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER; > struct pci_bus *bus; > @@ -206,9 +165,12 @@ void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state, > pci_dbg(dev, "broadcast error_detected message\n"); > if (state == pci_channel_io_frozen) { > pci_walk_bus(bus, report_frozen_detected, &status); > - status = reset_link(dev, service); > - if if (reset_link) status = reset_link(dev);(status == PCI_ERS_RESULT_DISCONNECT > + status = reset_link(dev); Above line needs to be replaced as below. Since there is a possibility reset_link can NULL (eventhough currently its not true). if (reset_link) status = reset_link(dev); Shall I submit another version to add above fix on top of our pci/edr branch ? > + if ((status != PCI_ERS_RESULT_RECOVERED) && > + (status != PCI_ERS_RESULT_NEED_RESET)) { > + pci_dbg(dev, "link reset at upstream device failed\n"); > goto failed; > + } > } else { > pci_walk_bus(bus, report_normal_detected, &status); > } > diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h > index 1e673619b101..64b5e081cdb2 100644 > --- a/drivers/pci/pcie/portdrv.h > +++ b/drivers/pci/pcie/portdrv.h > @@ -92,9 +92,6 @@ struct pcie_port_service_driver { > /* Device driver may resume normal operations */ > void (*error_resume)(struct pci_dev *dev); > > - /* Link Reset Capability - AER service driver specific */ > - pci_ers_result_t (*reset_link)(struct pci_dev *dev); > - > int port_type; /* Type of the port this driver can handle */ > u32 service; /* Port service this device represents */ > > @@ -161,7 +158,5 @@ static inline int pcie_aer_get_firmware_first(struct pci_dev *pci_dev) > } > #endif > > -struct pcie_port_service_driver *pcie_port_find_service(struct pci_dev *dev, > - u32 service); > struct device *pcie_port_find_device(struct pci_dev *dev, u32 service); > #endif /* _PORTDRV_H_ */ > diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c > index 5075cb9e850c..50a9522ab07d 100644 > --- a/drivers/pci/pcie/portdrv_core.c > +++ b/drivers/pci/pcie/portdrv_core.c > @@ -458,27 +458,6 @@ static int find_service_iter(struct device *device, void *data) > return 0; > } > > -/** > - * pcie_port_find_service - find the service driver > - * @dev: PCI Express port the service is associated with > - * @service: Service to find > - * > - * Find PCI Express port service driver associated with given service > - */ > -struct pcie_port_service_driver *pcie_port_find_service(struct pci_dev *dev, > - u32 service) > -{ > - struct pcie_port_service_driver *drv; > - struct portdrv_service_data pdrvs; > - > - pdrvs.drv = NULL; > - pdrvs.service = service; > - device_for_each_child(&dev->dev, &pdrvs, find_service_iter); > - > - drv = pdrvs.drv; > - return drv; > -} > - > /** > * pcie_port_find_device - find the struct device > * @dev: PCI Express port the service is associated with >
On Sat, Mar 28, 2020 at 02:12:48PM -0700, Kuppuswamy, Sathyanarayanan wrote: > On 3/23/20 5:26 PM, sathyanarayanan.kuppuswamy@linux.intel.com wrote: > > From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> > > > > > +void pcie_do_recovery(struct pci_dev *dev, > > + enum pci_channel_state state, > > + pci_ers_result_t (*reset_link)(struct pci_dev *pdev)) > > { > > pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER; > > struct pci_bus *bus; > > @@ -206,9 +165,12 @@ void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state, > > pci_dbg(dev, "broadcast error_detected message\n"); > > if (state == pci_channel_io_frozen) { > > pci_walk_bus(bus, report_frozen_detected, &status); > > - status = reset_link(dev, service); > > - if if (reset_link) > status = reset_link(dev);(status == PCI_ERS_RESULT_DISCONNECT > > + status = reset_link(dev); > Above line needs to be replaced as below. Since there is a > possibility reset_link can NULL (eventhough currently its > not true). > if (reset_link) > status = reset_link(dev); > Shall I submit another version to add above fix on top of > our pci/edr branch ? No, I can squash that in if needed. But I don't actually think we *do* need it. All the callers supply a valid reset_link function pointer, and if somebody changes or adds a new one that doesn't, I'd rather take the null pointer exception and find out about it than silently ignore it. Bjorn
Hi Bjorn, On 3/28/20 2:32 PM, Bjorn Helgaas wrote: > On Sat, Mar 28, 2020 at 02:12:48PM -0700, Kuppuswamy, Sathyanarayanan wrote: >> On 3/23/20 5:26 PM, sathyanarayanan.kuppuswamy@linux.intel.com wrote: >>> From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> >>> >> >>> +void pcie_do_recovery(struct pci_dev *dev, >>> + enum pci_channel_state state, >>> + pci_ers_result_t (*reset_link)(struct pci_dev *pdev)) >>> { >>> pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER; >>> struct pci_bus *bus; >>> @@ -206,9 +165,12 @@ void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state, >>> pci_dbg(dev, "broadcast error_detected message\n"); >>> if (state == pci_channel_io_frozen) { >>> pci_walk_bus(bus, report_frozen_detected, &status); >>> - status = reset_link(dev, service); >>> - if if (reset_link) >> status = reset_link(dev);(status == PCI_ERS_RESULT_DISCONNECT >>> + status = reset_link(dev); >> Above line needs to be replaced as below. Since there is a >> possibility reset_link can NULL (eventhough currently its >> not true). >> if (reset_link) >> status = reset_link(dev); >> Shall I submit another version to add above fix on top of >> our pci/edr branch ? > > No, I can squash that in if needed. > > But I don't actually think we *do* need it. All the callers supply a > valid reset_link function pointer, and if somebody changes or adds a > new one that doesn't, I'd rather take the null pointer exception and > find out about it than silently ignore it. But the documentation says "If reset_link is not NULL, recovery function will use it to reset the link." It considers NULL as a possible case. So I think its better to allow that case with a pci_warn() message. > > Bjorn >
On Sat, Mar 28, 2020 at 02:55:50PM -0700, Kuppuswamy, Sathyanarayanan wrote: > On 3/28/20 2:32 PM, Bjorn Helgaas wrote: > > On Sat, Mar 28, 2020 at 02:12:48PM -0700, Kuppuswamy, Sathyanarayanan wrote: > > > On 3/23/20 5:26 PM, sathyanarayanan.kuppuswamy@linux.intel.com wrote: > > > > From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> > > > > > > > > > > > +void pcie_do_recovery(struct pci_dev *dev, > > > > + enum pci_channel_state state, > > > > + pci_ers_result_t (*reset_link)(struct pci_dev *pdev)) > > > > { > > > > pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER; > > > > struct pci_bus *bus; > > > > @@ -206,9 +165,12 @@ void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state, > > > > pci_dbg(dev, "broadcast error_detected message\n"); > > > > if (state == pci_channel_io_frozen) { > > > > pci_walk_bus(bus, report_frozen_detected, &status); > > > > - status = reset_link(dev, service); > > > > - if if (reset_link) > > > status = reset_link(dev);(status == PCI_ERS_RESULT_DISCONNECT > > > > + status = reset_link(dev); > > > Above line needs to be replaced as below. Since there is a > > > possibility reset_link can NULL (eventhough currently its > > > not true). > > > if (reset_link) > > > status = reset_link(dev); > > > Shall I submit another version to add above fix on top of > > > our pci/edr branch ? > > > > No, I can squash that in if needed. > > > > But I don't actually think we *do* need it. All the callers supply a > > valid reset_link function pointer, and if somebody changes or adds a > > new one that doesn't, I'd rather take the null pointer exception and > > find out about it than silently ignore it. > > But the documentation says "If reset_link is not NULL, recovery function > will use it to reset the link." It considers NULL as a possible case. > So I think its better to allow that case with a pci_warn() message. I think we should rework the documentation to remove that. pcie_do_recovery() is internal to the PCI core and not directly relevant to drivers.
diff --git a/Documentation/PCI/pcieaer-howto.rst b/Documentation/PCI/pcieaer-howto.rst index 18bdefaafd1a..afbd8c1c321d 100644 --- a/Documentation/PCI/pcieaer-howto.rst +++ b/Documentation/PCI/pcieaer-howto.rst @@ -156,12 +156,6 @@ default reset_link function, but different upstream ports might have different specifications to reset pci express link, so all upstream ports should provide their own reset_link functions. -In struct pcie_port_service_driver, a new pointer, reset_link, is -added. -:: - - pci_ers_result_t (*reset_link) (struct pci_dev *dev); - Section 3.2.2.2 provides more detailed info on when to call reset_link. @@ -212,15 +206,10 @@ error_detected(dev, pci_channel_io_frozen) to all drivers within a hierarchy in question. Then, performing link reset at upstream is necessary. As different kinds of devices might use different approaches to reset link, AER port service driver is required to provide the -function to reset link. Firstly, kernel looks for if the upstream -component has an aer driver. If it has, kernel uses the reset_link -callback of the aer driver. If the upstream component has no aer driver -and the port is downstream port, we will perform a hot reset as the -default by setting the Secondary Bus Reset bit of the Bridge Control -register associated with the downstream port. As for upstream ports, -they should provide their own aer service drivers with reset_link -function. If error_detected returns PCI_ERS_RESULT_CAN_RECOVER and -reset_link returns PCI_ERS_RESULT_RECOVERED, the error handling goes +function to reset link via callback parameter of pcie_do_recovery() +function. If reset_link is not NULL, recovery function will use it +to reset the link. If error_detected returns PCI_ERS_RESULT_CAN_RECOVER +and reset_link returns PCI_ERS_RESULT_RECOVERED, the error handling goes to mmio_enabled. helper functions diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 6394e7746fb5..3e5efb83e9a2 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -548,7 +548,7 @@ static inline int pci_dev_specific_disable_acs_redir(struct pci_dev *dev) /* PCI error reporting and recovery */ void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state, - u32 service); + pci_ers_result_t (*reset_link)(struct pci_dev *pdev)); bool pcie_wait_for_link(struct pci_dev *pdev, bool active); #ifdef CONFIG_PCIEASPM diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 4a818b07a1af..c0540c3761dc 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -102,6 +102,7 @@ struct aer_stats { #define ERR_UNCOR_ID(d) (d >> 16) static int pcie_aer_disable; +static pci_ers_result_t aer_root_reset(struct pci_dev *dev); void pci_no_aer(void) { @@ -1053,11 +1054,9 @@ static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info) info->status); pci_aer_clear_device_status(dev); } else if (info->severity == AER_NONFATAL) - pcie_do_recovery(dev, pci_channel_io_normal, - PCIE_PORT_SERVICE_AER); + pcie_do_recovery(dev, pci_channel_io_normal, aer_root_reset); else if (info->severity == AER_FATAL) - pcie_do_recovery(dev, pci_channel_io_frozen, - PCIE_PORT_SERVICE_AER); + pcie_do_recovery(dev, pci_channel_io_frozen, aer_root_reset); pci_dev_put(dev); } @@ -1094,10 +1093,10 @@ static void aer_recover_work_func(struct work_struct *work) cper_print_aer(pdev, entry.severity, entry.regs); if (entry.severity == AER_NONFATAL) pcie_do_recovery(pdev, pci_channel_io_normal, - PCIE_PORT_SERVICE_AER); + aer_root_reset); else if (entry.severity == AER_FATAL) pcie_do_recovery(pdev, pci_channel_io_frozen, - PCIE_PORT_SERVICE_AER); + aer_root_reset); pci_dev_put(pdev); } } @@ -1501,7 +1500,6 @@ static struct pcie_port_service_driver aerdriver = { .probe = aer_probe, .remove = aer_remove, - .reset_link = aer_root_reset, }; /** diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c index 5c2e9d45a269..0c45133a9a91 100644 --- a/drivers/pci/pcie/dpc.c +++ b/drivers/pci/pcie/dpc.c @@ -235,7 +235,7 @@ static irqreturn_t dpc_handler(int irq, void *context) } /* We configure DPC so it only triggers on ERR_FATAL */ - pcie_do_recovery(pdev, pci_channel_io_frozen, PCIE_PORT_SERVICE_DPC); + pcie_do_recovery(pdev, pci_channel_io_frozen, dpc_reset_link); return IRQ_HANDLED; } @@ -321,7 +321,6 @@ static struct pcie_port_service_driver dpcdriver = { .service = PCIE_PORT_SERVICE_DPC, .probe = dpc_probe, .remove = dpc_remove, - .reset_link = dpc_reset_link, }; int __init pcie_dpc_init(void) diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index 6e52591a4722..caeb6f5d9970 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -146,50 +146,9 @@ static int report_resume(struct pci_dev *dev, void *data) return 0; } -/** - * default_reset_link - default reset function - * @dev: pointer to pci_dev data structure - * - * Invoked when performing link reset on a Downstream Port or a - * Root Port with no aer driver. - */ -static pci_ers_result_t default_reset_link(struct pci_dev *dev) -{ - int rc; - - rc = pci_bus_error_reset(dev); - pci_printk(KERN_DEBUG, dev, "downstream link has been reset\n"); - return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; -} - -static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service) -{ - pci_ers_result_t status; - struct pcie_port_service_driver *driver = NULL; - - driver = pcie_port_find_service(dev, service); - if (driver && driver->reset_link) { - status = driver->reset_link(dev); - } else if (pcie_downstream_port(dev)) { - status = default_reset_link(dev); - } else { - pci_printk(KERN_DEBUG, dev, "no link-reset support at upstream device %s\n", - pci_name(dev)); - return PCI_ERS_RESULT_DISCONNECT; - } - - if ((status != PCI_ERS_RESULT_RECOVERED) && - (status != PCI_ERS_RESULT_NEED_RESET)) { - pci_printk(KERN_DEBUG, dev, "link reset at upstream device %s failed\n", - pci_name(dev)); - return PCI_ERS_RESULT_DISCONNECT; - } - - return status; -} - -void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state, - u32 service) +void pcie_do_recovery(struct pci_dev *dev, + enum pci_channel_state state, + pci_ers_result_t (*reset_link)(struct pci_dev *pdev)) { pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER; struct pci_bus *bus; @@ -206,9 +165,12 @@ void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state, pci_dbg(dev, "broadcast error_detected message\n"); if (state == pci_channel_io_frozen) { pci_walk_bus(bus, report_frozen_detected, &status); - status = reset_link(dev, service); - if (status == PCI_ERS_RESULT_DISCONNECT) + status = reset_link(dev); + if ((status != PCI_ERS_RESULT_RECOVERED) && + (status != PCI_ERS_RESULT_NEED_RESET)) { + pci_dbg(dev, "link reset at upstream device failed\n"); goto failed; + } } else { pci_walk_bus(bus, report_normal_detected, &status); } diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index 1e673619b101..64b5e081cdb2 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -92,9 +92,6 @@ struct pcie_port_service_driver { /* Device driver may resume normal operations */ void (*error_resume)(struct pci_dev *dev); - /* Link Reset Capability - AER service driver specific */ - pci_ers_result_t (*reset_link)(struct pci_dev *dev); - int port_type; /* Type of the port this driver can handle */ u32 service; /* Port service this device represents */ @@ -161,7 +158,5 @@ static inline int pcie_aer_get_firmware_first(struct pci_dev *pci_dev) } #endif -struct pcie_port_service_driver *pcie_port_find_service(struct pci_dev *dev, - u32 service); struct device *pcie_port_find_device(struct pci_dev *dev, u32 service); #endif /* _PORTDRV_H_ */ diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index 5075cb9e850c..50a9522ab07d 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -458,27 +458,6 @@ static int find_service_iter(struct device *device, void *data) return 0; } -/** - * pcie_port_find_service - find the service driver - * @dev: PCI Express port the service is associated with - * @service: Service to find - * - * Find PCI Express port service driver associated with given service - */ -struct pcie_port_service_driver *pcie_port_find_service(struct pci_dev *dev, - u32 service) -{ - struct pcie_port_service_driver *drv; - struct portdrv_service_data pdrvs; - - pdrvs.drv = NULL; - pdrvs.service = service; - device_for_each_child(&dev->dev, &pdrvs, find_service_iter); - - drv = pdrvs.drv; - return drv; -} - /** * pcie_port_find_device - find the struct device * @dev: PCI Express port the service is associated with