Message ID | b2494cde6711c0157cbd93b4fc4ec85e987af8fe.1581617873.git.sathyanarayanan.kuppuswamy@linux.intel.com (mailing list archive) |
---|---|
State | Superseded, archived |
Delegated to: | Bjorn Helgaas |
Headers | show |
Series | Add Error Disconnect Recover (EDR) support | expand |
On Thu, Feb 13, 2020 at 10:20:15AM -0800, sathyanarayanan.kuppuswamy@linux.intel.com wrote: > From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> > > This is a preparatory patch for adding EDR support. > > As per the Downstream Port Containment Related Enhancements ECN to the > PCI Firmware Specification r3.2, sec 4.5.1, table 4-6, If DPC is > controlled by firmware, firmware is responsible for initializing > Downstream Port Containment Extended Capability Structures per firmware > policy. Further, the OS is permitted to read or write DPC Control and > Status registers of a port while processing an Error Disconnect Recover > notification from firmware on that port. Error Disconnect Recover > notification processing begins with the Error Disconnect Recover notify > from Firmware, and ends when the OS releases DPC by clearing the DPC > Trigger Status bit.Firmware can read DPC Trigger Status bit to determine > the ownership of DPC Control and Status registers. Firmware is not > permitted to write to DPC Control and Status registers if DPC Trigger > Status is set i.e. the link is in DPC state. Outside of the Error > Disconnect Recover notification processing window, the OS is not > permitted to modify DPC Control or Status registers; only firmware > is allowed to. > > To add EDR support we need to re-use some of the existing DPC, > AER and pCIE error recovery functions. So add necessary interfaces. > > Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> > --- > drivers/pci/pci.h | 8 ++++ > drivers/pci/pcie/aer.c | 39 ++++++++++++++------ > drivers/pci/pcie/dpc.c | 84 +++++++++++++++++++++++++----------------- > drivers/pci/pcie/dpc.h | 20 ++++++++++ > drivers/pci/pcie/err.c | 30 ++++++++++++--- > 5 files changed, 131 insertions(+), 50 deletions(-) > create mode 100644 drivers/pci/pcie/dpc.h > > diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h > index 6394e7746fb5..136f27cf3871 100644 > --- a/drivers/pci/pci.h > +++ b/drivers/pci/pci.h > @@ -443,6 +443,9 @@ struct aer_err_info { > > int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info); > void aer_print_error(struct pci_dev *dev, struct aer_err_info *info); > +int pci_aer_clear_err_uncor_status(struct pci_dev *dev); > +void pci_aer_clear_err_fatal_status(struct pci_dev *dev); > +int pci_aer_clear_err_status_regs(struct pci_dev *dev); > #endif /* CONFIG_PCIEAER */ > > #ifdef CONFIG_PCIE_DPC > @@ -549,6 +552,11 @@ static inline int pci_dev_specific_disable_acs_redir(struct pci_dev *dev) > /* PCI error reporting and recovery */ > void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state, > u32 service); > +pci_ers_result_t pcie_do_recovery_common(struct pci_dev *dev, > + enum pci_channel_state state, > + u32 service, > + pci_ers_result_t (*reset_cb)(void *cb_data), > + void *cb_data); > > bool pcie_wait_for_link(struct pci_dev *pdev, bool active); > #ifdef CONFIG_PCIEASPM > diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c > index 4a818b07a1af..399836aa07f4 100644 > --- a/drivers/pci/pcie/aer.c > +++ b/drivers/pci/pcie/aer.c > @@ -376,7 +376,7 @@ void pci_aer_clear_device_status(struct pci_dev *dev) > pcie_capability_write_word(dev, PCI_EXP_DEVSTA, sta); > } > > -int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) > +int pci_aer_clear_err_uncor_status(struct pci_dev *dev) > { > int pos; > u32 status, sev; > @@ -385,9 +385,6 @@ int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) > if (!pos) > return -EIO; > > - if (pcie_aer_get_firmware_first(dev)) > - return -EIO; > - > /* Clear status bits for ERR_NONFATAL errors only */ > pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); > pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &sev); > @@ -397,9 +394,17 @@ int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) > > return 0; > } > + > +int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) > +{ > + if (pcie_aer_get_firmware_first(dev)) > + return -EIO; > + > + return pci_aer_clear_err_uncor_status(dev); > +} > EXPORT_SYMBOL_GPL(pci_cleanup_aer_uncorrect_error_status); > > -void pci_aer_clear_fatal_status(struct pci_dev *dev) > +void pci_aer_clear_err_fatal_status(struct pci_dev *dev) > { > int pos; > u32 status, sev; > @@ -408,9 +413,6 @@ void pci_aer_clear_fatal_status(struct pci_dev *dev) > if (!pos) > return; > > - if (pcie_aer_get_firmware_first(dev)) > - return; > - > /* Clear status bits for ERR_FATAL errors only */ > pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); > pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &sev); > @@ -419,7 +421,15 @@ void pci_aer_clear_fatal_status(struct pci_dev *dev) > pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status); > } > > -int pci_cleanup_aer_error_status_regs(struct pci_dev *dev) > +void pci_aer_clear_fatal_status(struct pci_dev *dev) > +{ > + if (pcie_aer_get_firmware_first(dev)) > + return; > + > + return pci_aer_clear_err_fatal_status(dev); > +} > + > +int pci_aer_clear_err_status_regs(struct pci_dev *dev) > { > int pos; > u32 status; > @@ -432,9 +442,6 @@ int pci_cleanup_aer_error_status_regs(struct pci_dev *dev) > if (!pos) > return -EIO; > > - if (pcie_aer_get_firmware_first(dev)) > - return -EIO; > - > port_type = pci_pcie_type(dev); > if (port_type == PCI_EXP_TYPE_ROOT_PORT) { > pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, &status); > @@ -450,6 +457,14 @@ int pci_cleanup_aer_error_status_regs(struct pci_dev *dev) > return 0; > } > > +int pci_cleanup_aer_error_status_regs(struct pci_dev *dev) > +{ > + if (pcie_aer_get_firmware_first(dev)) > + return -EIO; > + > + return pci_aer_clear_err_status_regs(dev); > +} We started with these: pci_cleanup_aer_uncorrect_error_status() pci_aer_clear_fatal_status() pci_cleanup_aer_error_status_regs() This was already a mess. They do basically similar things, but the function names are a complete jumble. Let's start with preliminary patches to name them consistently, e.g., pci_aer_clear_nonfatal_status() pci_aer_clear_fatal_status() pci_aer_clear_status() Now, for EDR you eventually add this in edr_handle_event(): edr_handle_event() { ... pci_aer_clear_err_uncor_status(pdev); pci_aer_clear_err_fatal_status(pdev); pci_aer_clear_err_status_regs(pdev); I see that this path needs to clear the status even in the firmware-first case, so you do need some change for that. But pci_aer_clear_err_status_regs() does exactly the same thing as pci_aer_clear_err_uncor_status() and pci_aer_clear_err_fatal_status() plus a little more (clearing PCI_ERR_ROOT_STATUS), so it should be sufficient to just call pci_aer_clear_err_status_regs(). So I don't think you need to make wrappers for pci_aer_clear_nonfatal_status() and pci_aer_clear_fatal_status() at all since they're not needed by the EDR path. You *do* need a wrapper for pci_aer_clear_status(), but instead of just adding random words ("err", "regs") to the name, please name it something like pci_aer_raw_clear_status() as a hint that it skips some sort of check. I would split this into a separate patch. This patch contains a bunch of things that aren't really related except that they're needed for EDR. I think it will be more readable if each patch just does one piece of it. > void pci_save_aer_state(struct pci_dev *dev) > { > struct pci_cap_saved_state *save_state; > diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c > index 99fca8400956..acae12dbf9ff 100644 > --- a/drivers/pci/pcie/dpc.c > +++ b/drivers/pci/pcie/dpc.c > @@ -15,15 +15,9 @@ > #include <linux/pci.h> > > #include "portdrv.h" > +#include "dpc.h" > #include "../pci.h" > > -struct dpc_dev { > - struct pci_dev *pdev; > - u16 cap_pos; > - bool rp_extensions; > - u8 rp_log_size; > -}; Adding dpc.h shouldn't be in this patch because there's no need for a separate dpc.h file yet -- it's only included this one place. I'm not positive a dpc.h is needed at all -- see comments on patch [4/5]. > static const char * const rp_pio_error_string[] = { > "Configuration Request received UR Completion", /* Bit Position 0 */ > "Configuration Request received CA Completion", /* Bit Position 1 */ > @@ -117,36 +111,44 @@ static int dpc_wait_rp_inactive(struct dpc_dev *dpc) > return 0; > } > > -static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev) > +pci_ers_result_t dpc_reset_link_common(struct dpc_dev *dpc) > { I don't see why you need to split this into dpc_reset_link_common() and dpc_reset_link(). IIUC, you split it so the DPC driver path can supply a pci_dev * via dpc_handler pcie_do_recovery pcie_do_recovery_common(..., NULL, NULL) reset_link(..., NULL, NULL) driver->reset_link(pdev) dpc_reset_link(pdev) dpc = to_dpc_dev(pdev) dpc_reset_link_common(dpc) while the EDR path can supply a dpc_dev * via edr_handle_event pcie_do_recovery_common(..., edr_dpc_reset_link, dpc) reset_link(..., edr_dpc_reset_link, dpc) edr_dpc_reset_link(dpc) dpc_reset_link_common(dpc) But it looks like you *could* make these paths look like: dpc_handler pcie_do_recovery pcie_do_recovery_common(..., NULL, NULL) reset_link(..., NULL, NULL) driver->reset_link(pdev) dpc_reset_link(pdev) edr_handle_event pcie_do_recovery_common(..., dpc_reset_link, pdev) reset_link(..., dpc_reset_link, pdev) dpc_reset_link(pdev) and you wouldn't need edr_dpc_reset_link() at all and you wouldn't have to split dpc_reset_link_common() out. > - struct dpc_dev *dpc; > u16 cap; > > - /* > - * DPC disables the Link automatically in hardware, so it has > - * already been reset by the time we get here. > - */ > - dpc = to_dpc_dev(pdev); > cap = dpc->cap_pos; > > /* > * Wait until the Link is inactive, then clear DPC Trigger Status > * to allow the Port to leave DPC. > */ > - pcie_wait_for_link(pdev, false); > + pcie_wait_for_link(dpc->pdev, false); I'm hoping you don't need to split this out at all, but if you do, adding struct pci_dev *pdev = dpc->pdev; at the top will avoid these needless diffs. > if (dpc->rp_extensions && dpc_wait_rp_inactive(dpc)) > return PCI_ERS_RESULT_DISCONNECT; > > - pci_write_config_word(pdev, cap + PCI_EXP_DPC_STATUS, > + pci_write_config_word(dpc->pdev, cap + PCI_EXP_DPC_STATUS, > PCI_EXP_DPC_STATUS_TRIGGER); > > - if (!pcie_wait_for_link(pdev, true)) > + if (!pcie_wait_for_link(dpc->pdev, true)) > return PCI_ERS_RESULT_DISCONNECT; > > return PCI_ERS_RESULT_RECOVERED; > } > > +static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev) > +{ > + struct dpc_dev *dpc; > + > + /* > + * DPC disables the Link automatically in hardware, so it has > + * already been reset by the time we get here. > + */ > + dpc = to_dpc_dev(pdev); > + > + return dpc_reset_link_common(dpc); > + > +} > + > static void dpc_process_rp_pio_error(struct dpc_dev *dpc) > { > struct pci_dev *pdev = dpc->pdev; > @@ -224,10 +226,9 @@ static int dpc_get_aer_uncorrect_severity(struct pci_dev *dev, > return 1; > } > > -static irqreturn_t dpc_handler(int irq, void *context) > +void dpc_process_error(struct dpc_dev *dpc) > { > struct aer_err_info info; > - struct dpc_dev *dpc = context; > struct pci_dev *pdev = dpc->pdev; > u16 cap = dpc->cap_pos, status, source, reason, ext_reason; > > @@ -257,6 +258,14 @@ static irqreturn_t dpc_handler(int irq, void *context) > pci_cleanup_aer_uncorrect_error_status(pdev); > pci_aer_clear_fatal_status(pdev); > } > +} > + > +static irqreturn_t dpc_handler(int irq, void *context) > +{ > + struct dpc_dev *dpc = context; > + struct pci_dev *pdev = dpc->pdev; > + > + dpc_process_error(dpc); > > /* We configure DPC so it only triggers on ERR_FATAL */ > pcie_do_recovery(pdev, pci_channel_io_frozen, PCIE_PORT_SERVICE_DPC); > @@ -282,6 +291,25 @@ static irqreturn_t dpc_irq(int irq, void *context) > return IRQ_HANDLED; > } > > +void dpc_dev_init(struct pci_dev *pdev, struct dpc_dev *dpc) > +{ Can you include the kzalloc() here so we don't have to do the alloc in pci_acpi_add_edr_notifier()? I think there's also a leak there: pci_acpi_add_edr_notifier() kzallocs a struct dpc_dev, but I don't see a corresponding kfree(). > + dpc->cap_pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DPC); I think we need to check cap_pos for non-zero here. It's arguably safe today because portdrv only calls dpc_probe() when PCIE_PORT_SERVICE_DPC is set and we only set that if there's a DPC capability. But that's a long ways away from here and it's convoluted, so it's pretty hard to verify that it's safe. When we add EDR into the picture and call this from pci_acpi_add_edr_notifier() and edr_handle_event(), I'm pretty sure it's not safe at all because we have no idea whether the device has a DPC capability. Factoring out dpc_dev_init() and the kzalloc() would be a simple cleanup-type patch all by itself, so it could be separated out for ease of reviewing. > + dpc->pdev = pdev; > + pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CAP, &dpc->cap); > + pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &dpc->ctl); > + > + dpc->rp_extensions = (dpc->cap & PCI_EXP_DPC_CAP_RP_EXT); > + if (dpc->rp_extensions) { > + dpc->rp_log_size = > + (dpc->cap & PCI_EXP_DPC_RP_PIO_LOG_SIZE) >> 8; > + if (dpc->rp_log_size < 4 || dpc->rp_log_size > 9) { > + pci_err(pdev, "RP PIO log size %u is invalid\n", > + dpc->rp_log_size); > + dpc->rp_log_size = 0; > + } > + } > +} > + > #define FLAG(x, y) (((x) & (y)) ? '+' : '-') > static int dpc_probe(struct pcie_device *dev) > { > @@ -298,8 +326,8 @@ static int dpc_probe(struct pcie_device *dev) > if (!dpc) > return -ENOMEM; > > - dpc->cap_pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DPC); > - dpc->pdev = pdev; > + dpc_dev_init(pdev, dpc); > + > set_service_data(dev, dpc); > > status = devm_request_threaded_irq(device, dev->irq, dpc_irq, > @@ -311,18 +339,8 @@ static int dpc_probe(struct pcie_device *dev) > return status; > } > > - pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CAP, &cap); > - pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &ctl); > - > - dpc->rp_extensions = (cap & PCI_EXP_DPC_CAP_RP_EXT); > - if (dpc->rp_extensions) { > - dpc->rp_log_size = (cap & PCI_EXP_DPC_RP_PIO_LOG_SIZE) >> 8; > - if (dpc->rp_log_size < 4 || dpc->rp_log_size > 9) { > - pci_err(pdev, "RP PIO log size %u is invalid\n", > - dpc->rp_log_size); > - dpc->rp_log_size = 0; > - } > - } > + ctl = dpc->ctl; > + cap = dpc->cap; > > ctl = (ctl & 0xfff4) | PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN; > pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, ctl); > diff --git a/drivers/pci/pcie/dpc.h b/drivers/pci/pcie/dpc.h > new file mode 100644 > index 000000000000..2d82bc917fcb > --- /dev/null > +++ b/drivers/pci/pcie/dpc.h > @@ -0,0 +1,20 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +#ifndef DRIVERS_PCIE_DPC_H > +#define DRIVERS_PCIE_DPC_H > + > +#include <linux/pci.h> > + > +struct dpc_dev { > + struct pci_dev *pdev; > + u16 cap_pos; > + bool rp_extensions; > + u8 rp_log_size; > + u16 ctl; > + u16 cap; > +}; > + > +pci_ers_result_t dpc_reset_link_common(struct dpc_dev *dpc); > +void dpc_process_error(struct dpc_dev *dpc); > +void dpc_dev_init(struct pci_dev *pdev, struct dpc_dev *dpc); > + > +#endif /* DRIVERS_PCIE_DPC_H */ > diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c > index eefefe03857a..e7b9dfae9035 100644 > --- a/drivers/pci/pcie/err.c > +++ b/drivers/pci/pcie/err.c > @@ -162,11 +162,18 @@ static pci_ers_result_t default_reset_link(struct pci_dev *dev) > return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; > } > > -static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service) > +static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service, > + pci_ers_result_t (*reset_cb)(void *cb_data), > + void *cb_data) This rejiggering of reset_link() and pcie_do_recovery() is unrelated to the rest (except that it's needed for EDR, of course), so maybe could be a separate patch. We could also consider changing the interface of pcie_do_recovery() to just add reset_cb and cb_data, and change the existing callers to pass NULL, NULL. Then we wouldn't need pcie_do_recovery_common(). I'm not sure which way would be better. > { > pci_ers_result_t status; > struct pcie_port_service_driver *driver = NULL; > > + if (reset_cb) { > + status = reset_cb(cb_data); > + goto done; > + } > + > driver = pcie_port_find_service(dev, service); > if (driver && driver->reset_link) { > status = driver->reset_link(dev); > @@ -178,6 +185,7 @@ static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service) > return PCI_ERS_RESULT_DISCONNECT; > } > > +done: > if (status != PCI_ERS_RESULT_RECOVERED) { > pci_printk(KERN_DEBUG, dev, "link reset at upstream device %s failed\n", > pci_name(dev)); > @@ -187,8 +195,11 @@ static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service) > return status; > } > > -void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state, > - u32 service) > +pci_ers_result_t pcie_do_recovery_common(struct pci_dev *dev, > + enum pci_channel_state state, > + u32 service, > + pci_ers_result_t (*reset_cb)(void *cb_data), > + void *cb_data) > { > pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER; > struct pci_bus *bus; > @@ -209,7 +220,7 @@ void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state, > pci_walk_bus(bus, report_normal_detected, &status); > > if (state == pci_channel_io_frozen) { > - status = reset_link(dev, service); > + status = reset_link(dev, service, reset_cb, cb_data); > if (status != PCI_ERS_RESULT_RECOVERED) > goto failed; > } > @@ -240,11 +251,20 @@ void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state, > pci_aer_clear_device_status(dev); > pci_cleanup_aer_uncorrect_error_status(dev); > pci_info(dev, "device recovery successful\n"); > - return; > + > + return status; > > failed: > pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT); > > /* TODO: Should kernel panic here? */ > pci_info(dev, "device recovery failed\n"); > + > + return status; > +} > + > +void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state, > + u32 service) > +{ > + pcie_do_recovery_common(dev, state, service, NULL, NULL); > } > -- > 2.21.0 >
Hi Bjorn, On 2/25/20 5:02 PM, Bjorn Helgaas wrote: > On Thu, Feb 13, 2020 at 10:20:15AM -0800, sathyanarayanan.kuppuswamy@linux.intel.com wrote: >> From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> >> >> This is a preparatory patch for adding EDR support. >> >> As per the Downstream Port Containment Related Enhancements ECN to the >> PCI Firmware Specification r3.2, sec 4.5.1, table 4-6, If DPC is >> controlled by firmware, firmware is responsible for initializing >> Downstream Port Containment Extended Capability Structures per firmware >> policy. Further, the OS is permitted to read or write DPC Control and >> Status registers of a port while processing an Error Disconnect Recover >> notification from firmware on that port. Error Disconnect Recover >> notification processing begins with the Error Disconnect Recover notify >> from Firmware, and ends when the OS releases DPC by clearing the DPC >> Trigger Status bit.Firmware can read DPC Trigger Status bit to determine >> the ownership of DPC Control and Status registers. Firmware is not >> permitted to write to DPC Control and Status registers if DPC Trigger >> Status is set i.e. the link is in DPC state. Outside of the Error >> Disconnect Recover notification processing window, the OS is not >> permitted to modify DPC Control or Status registers; only firmware >> is allowed to. >> >> To add EDR support we need to re-use some of the existing DPC, >> AER and pCIE error recovery functions. So add necessary interfaces. >> >> Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> >> --- >> drivers/pci/pci.h | 8 ++++ >> drivers/pci/pcie/aer.c | 39 ++++++++++++++------ >> drivers/pci/pcie/dpc.c | 84 +++++++++++++++++++++++++----------------- >> drivers/pci/pcie/dpc.h | 20 ++++++++++ >> drivers/pci/pcie/err.c | 30 ++++++++++++--- >> 5 files changed, 131 insertions(+), 50 deletions(-) >> create mode 100644 drivers/pci/pcie/dpc.h >> >> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h >> index 6394e7746fb5..136f27cf3871 100644 >> --- a/drivers/pci/pci.h >> +++ b/drivers/pci/pci.h >> @@ -443,6 +443,9 @@ struct aer_err_info { >> >> int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info); >> void aer_print_error(struct pci_dev *dev, struct aer_err_info *info); >> +int pci_aer_clear_err_uncor_status(struct pci_dev *dev); >> +void pci_aer_clear_err_fatal_status(struct pci_dev *dev); >> +int pci_aer_clear_err_status_regs(struct pci_dev *dev); >> #endif /* CONFIG_PCIEAER */ >> >> #ifdef CONFIG_PCIE_DPC >> @@ -549,6 +552,11 @@ static inline int pci_dev_specific_disable_acs_redir(struct pci_dev *dev) >> /* PCI error reporting and recovery */ >> void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state, >> u32 service); >> +pci_ers_result_t pcie_do_recovery_common(struct pci_dev *dev, >> + enum pci_channel_state state, >> + u32 service, >> + pci_ers_result_t (*reset_cb)(void *cb_data), >> + void *cb_data); >> >> bool pcie_wait_for_link(struct pci_dev *pdev, bool active); >> #ifdef CONFIG_PCIEASPM >> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c >> index 4a818b07a1af..399836aa07f4 100644 >> --- a/drivers/pci/pcie/aer.c >> +++ b/drivers/pci/pcie/aer.c >> @@ -376,7 +376,7 @@ void pci_aer_clear_device_status(struct pci_dev *dev) >> pcie_capability_write_word(dev, PCI_EXP_DEVSTA, sta); >> } >> >> -int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) >> +int pci_aer_clear_err_uncor_status(struct pci_dev *dev) >> { >> int pos; >> u32 status, sev; >> @@ -385,9 +385,6 @@ int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) >> if (!pos) >> return -EIO; >> >> - if (pcie_aer_get_firmware_first(dev)) >> - return -EIO; >> - >> /* Clear status bits for ERR_NONFATAL errors only */ >> pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); >> pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &sev); >> @@ -397,9 +394,17 @@ int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) >> >> return 0; >> } >> + >> +int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) >> +{ >> + if (pcie_aer_get_firmware_first(dev)) >> + return -EIO; >> + >> + return pci_aer_clear_err_uncor_status(dev); >> +} >> EXPORT_SYMBOL_GPL(pci_cleanup_aer_uncorrect_error_status); >> >> -void pci_aer_clear_fatal_status(struct pci_dev *dev) >> +void pci_aer_clear_err_fatal_status(struct pci_dev *dev) >> { >> int pos; >> u32 status, sev; >> @@ -408,9 +413,6 @@ void pci_aer_clear_fatal_status(struct pci_dev *dev) >> if (!pos) >> return; >> >> - if (pcie_aer_get_firmware_first(dev)) >> - return; >> - >> /* Clear status bits for ERR_FATAL errors only */ >> pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); >> pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &sev); >> @@ -419,7 +421,15 @@ void pci_aer_clear_fatal_status(struct pci_dev *dev) >> pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status); >> } >> >> -int pci_cleanup_aer_error_status_regs(struct pci_dev *dev) >> +void pci_aer_clear_fatal_status(struct pci_dev *dev) >> +{ >> + if (pcie_aer_get_firmware_first(dev)) >> + return; >> + >> + return pci_aer_clear_err_fatal_status(dev); >> +} >> + >> +int pci_aer_clear_err_status_regs(struct pci_dev *dev) >> { >> int pos; >> u32 status; >> @@ -432,9 +442,6 @@ int pci_cleanup_aer_error_status_regs(struct pci_dev *dev) >> if (!pos) >> return -EIO; >> >> - if (pcie_aer_get_firmware_first(dev)) >> - return -EIO; >> - >> port_type = pci_pcie_type(dev); >> if (port_type == PCI_EXP_TYPE_ROOT_PORT) { >> pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, &status); >> @@ -450,6 +457,14 @@ int pci_cleanup_aer_error_status_regs(struct pci_dev *dev) >> return 0; >> } >> >> +int pci_cleanup_aer_error_status_regs(struct pci_dev *dev) >> +{ >> + if (pcie_aer_get_firmware_first(dev)) >> + return -EIO; >> + >> + return pci_aer_clear_err_status_regs(dev); >> +} > We started with these: > > pci_cleanup_aer_uncorrect_error_status() > pci_aer_clear_fatal_status() > pci_cleanup_aer_error_status_regs() > > This was already a mess. They do basically similar things, but the > function names are a complete jumble. Let's start with preliminary > patches to name them consistently, e.g., > > pci_aer_clear_nonfatal_status() > pci_aer_clear_fatal_status() > pci_aer_clear_status() > > Now, for EDR you eventually add this in edr_handle_event(): > > edr_handle_event() > { > ... > pci_aer_clear_err_uncor_status(pdev); > pci_aer_clear_err_fatal_status(pdev); > pci_aer_clear_err_status_regs(pdev); > > I see that this path needs to clear the status even in the > firmware-first case, so you do need some change for that. But > pci_aer_clear_err_status_regs() does exactly the same thing as > pci_aer_clear_err_uncor_status() and pci_aer_clear_err_fatal_status() > plus a little more (clearing PCI_ERR_ROOT_STATUS), so it should be > sufficient to just call pci_aer_clear_err_status_regs(). > > So I don't think you need to make wrappers for > pci_aer_clear_nonfatal_status() and pci_aer_clear_fatal_status() at > all since they're not needed by the EDR path. > > You *do* need a wrapper for pci_aer_clear_status(), but instead of > just adding random words ("err", "regs") to the name, please name it > something like pci_aer_raw_clear_status() as a hint that it skips > some sort of check. > > I would split this into a separate patch. This patch contains a bunch > of things that aren't really related except that they're needed for > EDR. I think it will be more readable if each patch just does one > piece of it. Ok. I will split it into multiple patches. I just grouped them together as a preparatory patch for adding EDR support. > >> void pci_save_aer_state(struct pci_dev *dev) >> { >> struct pci_cap_saved_state *save_state; >> diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c >> index 99fca8400956..acae12dbf9ff 100644 >> --- a/drivers/pci/pcie/dpc.c >> +++ b/drivers/pci/pcie/dpc.c >> @@ -15,15 +15,9 @@ >> #include <linux/pci.h> >> >> #include "portdrv.h" >> +#include "dpc.h" >> #include "../pci.h" >> >> -struct dpc_dev { >> - struct pci_dev *pdev; >> - u16 cap_pos; >> - bool rp_extensions; >> - u8 rp_log_size; >> -}; > Adding dpc.h shouldn't be in this patch because there's no need for a > separate dpc.h file yet -- it's only included this one place. I'm not > positive a dpc.h is needed at all -- see comments on patch [4/5]. Yes, if we use pdev in dpc functions used by EDR code, we should not need it. > >> static const char * const rp_pio_error_string[] = { >> "Configuration Request received UR Completion", /* Bit Position 0 */ >> "Configuration Request received CA Completion", /* Bit Position 1 */ >> @@ -117,36 +111,44 @@ static int dpc_wait_rp_inactive(struct dpc_dev *dpc) >> return 0; >> } >> >> -static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev) >> +pci_ers_result_t dpc_reset_link_common(struct dpc_dev *dpc) >> { > I don't see why you need to split this into dpc_reset_link_common() > and dpc_reset_link(). IIUC, you split it so the DPC driver path can > supply a pci_dev * via > > dpc_handler > pcie_do_recovery > pcie_do_recovery_common(..., NULL, NULL) > reset_link(..., NULL, NULL) > driver->reset_link(pdev) > dpc_reset_link(pdev) > dpc = to_dpc_dev(pdev) I have split it mainly because of dpc_dev dependency. If we use dpc_reset_link(pdev) directly it will try to find related dpc_dev using to_dpc_dev() function. But this will not work in FF mode where DPC is handled by firmware and hence we will not have DPC pcie_port service driver enumerated for this device. > dpc_reset_link_common(dpc) > > while the EDR path can supply a dpc_dev * via > > edr_handle_event > pcie_do_recovery_common(..., edr_dpc_reset_link, dpc) > reset_link(..., edr_dpc_reset_link, dpc) > edr_dpc_reset_link(dpc) > dpc_reset_link_common(dpc) > > But it looks like you *could* make these paths look like: > > dpc_handler > pcie_do_recovery > pcie_do_recovery_common(..., NULL, NULL) > reset_link(..., NULL, NULL) > driver->reset_link(pdev) > dpc_reset_link(pdev) > > edr_handle_event > pcie_do_recovery_common(..., dpc_reset_link, pdev) > reset_link(..., dpc_reset_link, pdev) > dpc_reset_link(pdev) > > and you wouldn't need edr_dpc_reset_link() at all and you wouldn't > have to split dpc_reset_link_common() out. > >> - struct dpc_dev *dpc; >> u16 cap; >> >> - /* >> - * DPC disables the Link automatically in hardware, so it has >> - * already been reset by the time we get here. >> - */ >> - dpc = to_dpc_dev(pdev); >> cap = dpc->cap_pos; >> >> /* >> * Wait until the Link is inactive, then clear DPC Trigger Status >> * to allow the Port to leave DPC. >> */ >> - pcie_wait_for_link(pdev, false); >> + pcie_wait_for_link(dpc->pdev, false); > I'm hoping you don't need to split this out at all, but if you do, > adding > > struct pci_dev *pdev = dpc->pdev; > > at the top will avoid these needless diffs. > >> if (dpc->rp_extensions && dpc_wait_rp_inactive(dpc)) >> return PCI_ERS_RESULT_DISCONNECT; >> >> - pci_write_config_word(pdev, cap + PCI_EXP_DPC_STATUS, >> + pci_write_config_word(dpc->pdev, cap + PCI_EXP_DPC_STATUS, >> PCI_EXP_DPC_STATUS_TRIGGER); >> >> - if (!pcie_wait_for_link(pdev, true)) >> + if (!pcie_wait_for_link(dpc->pdev, true)) >> return PCI_ERS_RESULT_DISCONNECT; >> >> return PCI_ERS_RESULT_RECOVERED; >> } >> >> +static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev) >> +{ >> + struct dpc_dev *dpc; >> + >> + /* >> + * DPC disables the Link automatically in hardware, so it has >> + * already been reset by the time we get here. >> + */ >> + dpc = to_dpc_dev(pdev); >> + >> + return dpc_reset_link_common(dpc); >> + >> +} >> + >> static void dpc_process_rp_pio_error(struct dpc_dev *dpc) >> { >> struct pci_dev *pdev = dpc->pdev; >> @@ -224,10 +226,9 @@ static int dpc_get_aer_uncorrect_severity(struct pci_dev *dev, >> return 1; >> } >> >> -static irqreturn_t dpc_handler(int irq, void *context) >> +void dpc_process_error(struct dpc_dev *dpc) >> { >> struct aer_err_info info; >> - struct dpc_dev *dpc = context; >> struct pci_dev *pdev = dpc->pdev; >> u16 cap = dpc->cap_pos, status, source, reason, ext_reason; >> >> @@ -257,6 +258,14 @@ static irqreturn_t dpc_handler(int irq, void *context) >> pci_cleanup_aer_uncorrect_error_status(pdev); >> pci_aer_clear_fatal_status(pdev); >> } >> +} >> + >> +static irqreturn_t dpc_handler(int irq, void *context) >> +{ >> + struct dpc_dev *dpc = context; >> + struct pci_dev *pdev = dpc->pdev; >> + >> + dpc_process_error(dpc); >> >> /* We configure DPC so it only triggers on ERR_FATAL */ >> pcie_do_recovery(pdev, pci_channel_io_frozen, PCIE_PORT_SERVICE_DPC); >> @@ -282,6 +291,25 @@ static irqreturn_t dpc_irq(int irq, void *context) >> return IRQ_HANDLED; >> } >> >> +void dpc_dev_init(struct pci_dev *pdev, struct dpc_dev *dpc) >> +{ > Can you include the kzalloc() here so we don't have to do the alloc in > pci_acpi_add_edr_notifier()? Currently dpc driver allocates dpc_dev structure using pcie_port->device reference in its devm_alloc* calls. But if we allocate dpc_dev inside this function we should use pci_dev->dev reference for it. Is it OK to us pci_dev->dev reference for DPC driver allocations ? > > I think there's also a leak there: pci_acpi_add_edr_notifier() > kzallocs a struct dpc_dev, but I don't see a corresponding kfree(). since we are using devm_allocs, It should be freed when removing the PCI device right? > >> + dpc->cap_pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DPC); > I think we need to check cap_pos for non-zero here. It's arguably > safe today because portdrv only calls dpc_probe() when > PCIE_PORT_SERVICE_DPC is set and we only set that if there's a DPC > capability. But that's a long ways away from here and it's > convoluted, so it's pretty hard to verify that it's safe. ok. makes sense. > > When we add EDR into the picture and call this from > pci_acpi_add_edr_notifier() and edr_handle_event(), I'm pretty sure > it's not safe at all because we have no idea whether the device has a > DPC capability. > > Factoring out dpc_dev_init() and the kzalloc() would be a simple > cleanup-type patch all by itself, so it could be separated out for > ease of reviewing. > >> + dpc->pdev = pdev; >> + pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CAP, &dpc->cap); >> + pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &dpc->ctl); >> + >> + dpc->rp_extensions = (dpc->cap & PCI_EXP_DPC_CAP_RP_EXT); >> + if (dpc->rp_extensions) { >> + dpc->rp_log_size = >> + (dpc->cap & PCI_EXP_DPC_RP_PIO_LOG_SIZE) >> 8; >> + if (dpc->rp_log_size < 4 || dpc->rp_log_size > 9) { >> + pci_err(pdev, "RP PIO log size %u is invalid\n", >> + dpc->rp_log_size); >> + dpc->rp_log_size = 0; >> + } >> + } >> +} >> + >> #define FLAG(x, y) (((x) & (y)) ? '+' : '-') >> static int dpc_probe(struct pcie_device *dev) >> { >> @@ -298,8 +326,8 @@ static int dpc_probe(struct pcie_device *dev) >> if (!dpc) >> return -ENOMEM; >> >> - dpc->cap_pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DPC); >> - dpc->pdev = pdev; >> + dpc_dev_init(pdev, dpc); >> + >> set_service_data(dev, dpc); >> >> status = devm_request_threaded_irq(device, dev->irq, dpc_irq, >> @@ -311,18 +339,8 @@ static int dpc_probe(struct pcie_device *dev) >> return status; >> } >> >> - pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CAP, &cap); >> - pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &ctl); >> - >> - dpc->rp_extensions = (cap & PCI_EXP_DPC_CAP_RP_EXT); >> - if (dpc->rp_extensions) { >> - dpc->rp_log_size = (cap & PCI_EXP_DPC_RP_PIO_LOG_SIZE) >> 8; >> - if (dpc->rp_log_size < 4 || dpc->rp_log_size > 9) { >> - pci_err(pdev, "RP PIO log size %u is invalid\n", >> - dpc->rp_log_size); >> - dpc->rp_log_size = 0; >> - } >> - } >> + ctl = dpc->ctl; >> + cap = dpc->cap; >> >> ctl = (ctl & 0xfff4) | PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN; >> pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, ctl); >> diff --git a/drivers/pci/pcie/dpc.h b/drivers/pci/pcie/dpc.h >> new file mode 100644 >> index 000000000000..2d82bc917fcb >> --- /dev/null >> +++ b/drivers/pci/pcie/dpc.h >> @@ -0,0 +1,20 @@ >> +/* SPDX-License-Identifier: GPL-2.0 */ >> +#ifndef DRIVERS_PCIE_DPC_H >> +#define DRIVERS_PCIE_DPC_H >> + >> +#include <linux/pci.h> >> + >> +struct dpc_dev { >> + struct pci_dev *pdev; >> + u16 cap_pos; >> + bool rp_extensions; >> + u8 rp_log_size; >> + u16 ctl; >> + u16 cap; >> +}; >> + >> +pci_ers_result_t dpc_reset_link_common(struct dpc_dev *dpc); >> +void dpc_process_error(struct dpc_dev *dpc); >> +void dpc_dev_init(struct pci_dev *pdev, struct dpc_dev *dpc); >> + >> +#endif /* DRIVERS_PCIE_DPC_H */ >> diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c >> index eefefe03857a..e7b9dfae9035 100644 >> --- a/drivers/pci/pcie/err.c >> +++ b/drivers/pci/pcie/err.c >> @@ -162,11 +162,18 @@ static pci_ers_result_t default_reset_link(struct pci_dev *dev) >> return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; >> } >> >> -static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service) >> +static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service, >> + pci_ers_result_t (*reset_cb)(void *cb_data), >> + void *cb_data) > This rejiggering of reset_link() and pcie_do_recovery() is unrelated > to the rest (except that it's needed for EDR, of course), so maybe > could be a separate patch. ok > > We could also consider changing the interface of pcie_do_recovery() to > just add reset_cb and cb_data, and change the existing callers to pass > NULL, NULL. Then we wouldn't need pcie_do_recovery_common(). I'm not > sure which way would be better. I will change the pcie_do_recovery. Currently only aer and dpc drivers uses it. > >> { >> pci_ers_result_t status; >> struct pcie_port_service_driver *driver = NULL; >> >> + if (reset_cb) { >> + status = reset_cb(cb_data); >> + goto done; >> + } >> + >> driver = pcie_port_find_service(dev, service); >> if (driver && driver->reset_link) { >> status = driver->reset_link(dev); >> @@ -178,6 +185,7 @@ static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service) >> return PCI_ERS_RESULT_DISCONNECT; >> } >> >> +done: >> if (status != PCI_ERS_RESULT_RECOVERED) { >> pci_printk(KERN_DEBUG, dev, "link reset at upstream device %s failed\n", >> pci_name(dev)); >> @@ -187,8 +195,11 @@ static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service) >> return status; >> } >> >> -void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state, >> - u32 service) >> +pci_ers_result_t pcie_do_recovery_common(struct pci_dev *dev, >> + enum pci_channel_state state, >> + u32 service, >> + pci_ers_result_t (*reset_cb)(void *cb_data), >> + void *cb_data) >> { >> pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER; >> struct pci_bus *bus; >> @@ -209,7 +220,7 @@ void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state, >> pci_walk_bus(bus, report_normal_detected, &status); >> >> if (state == pci_channel_io_frozen) { >> - status = reset_link(dev, service); >> + status = reset_link(dev, service, reset_cb, cb_data); >> if (status != PCI_ERS_RESULT_RECOVERED) >> goto failed; >> } >> @@ -240,11 +251,20 @@ void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state, >> pci_aer_clear_device_status(dev); >> pci_cleanup_aer_uncorrect_error_status(dev); >> pci_info(dev, "device recovery successful\n"); >> - return; >> + >> + return status; >> >> failed: >> pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT); >> >> /* TODO: Should kernel panic here? */ >> pci_info(dev, "device recovery failed\n"); >> + >> + return status; >> +} >> + >> +void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state, >> + u32 service) >> +{ >> + pcie_do_recovery_common(dev, state, service, NULL, NULL); >> } >> -- >> 2.21.0 >>
On Wed, Feb 26, 2020 at 11:30:43AM -0800, Kuppuswamy Sathyanarayanan wrote: > On 2/25/20 5:02 PM, Bjorn Helgaas wrote: > > On Thu, Feb 13, 2020 at 10:20:15AM -0800, sathyanarayanan.kuppuswamy@linux.intel.com wrote: > > > From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> > > > ... > > > +int pci_cleanup_aer_error_status_regs(struct pci_dev *dev) > > > +{ > > > + if (pcie_aer_get_firmware_first(dev)) > > > + return -EIO; > > > + > > > + return pci_aer_clear_err_status_regs(dev); > > > +} > > We started with these: > > > > pci_cleanup_aer_uncorrect_error_status() > > pci_aer_clear_fatal_status() > > pci_cleanup_aer_error_status_regs() > > > > This was already a mess. They do basically similar things, but the > > function names are a complete jumble. Let's start with preliminary > > patches to name them consistently, e.g., > > > > pci_aer_clear_nonfatal_status() > > pci_aer_clear_fatal_status() > > pci_aer_clear_status() > > > > Now, for EDR you eventually add this in edr_handle_event(): > > > > edr_handle_event() > > { > > ... > > pci_aer_clear_err_uncor_status(pdev); > > pci_aer_clear_err_fatal_status(pdev); > > pci_aer_clear_err_status_regs(pdev); > > > > I see that this path needs to clear the status even in the > > firmware-first case, so you do need some change for that. But > > pci_aer_clear_err_status_regs() does exactly the same thing as > > pci_aer_clear_err_uncor_status() and pci_aer_clear_err_fatal_status() > > plus a little more (clearing PCI_ERR_ROOT_STATUS), so it should be > > sufficient to just call pci_aer_clear_err_status_regs(). > > > > So I don't think you need to make wrappers for > > pci_aer_clear_nonfatal_status() and pci_aer_clear_fatal_status() at > > all since they're not needed by the EDR path. > > > > You *do* need a wrapper for pci_aer_clear_status(), but instead of > > just adding random words ("err", "regs") to the name, please name it > > something like pci_aer_raw_clear_status() as a hint that it skips > > some sort of check. What do you think about the above? > > I would split this into a separate patch. This patch contains a bunch > > of things that aren't really related except that they're needed for > > EDR. I think it will be more readable if each patch just does one > > piece of it. > > Ok. I will split it into multiple patches. I just grouped them together > as a preparatory patch for adding EDR support. Sounds good. > > > diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c > > > index 99fca8400956..acae12dbf9ff 100644 > > > --- a/drivers/pci/pcie/dpc.c > > > +++ b/drivers/pci/pcie/dpc.c > > > @@ -15,15 +15,9 @@ > > > #include <linux/pci.h> > > > #include "portdrv.h" > > > +#include "dpc.h" > > > #include "../pci.h" > > > -struct dpc_dev { > > > - struct pci_dev *pdev; > > > - u16 cap_pos; > > > - bool rp_extensions; > > > - u8 rp_log_size; > > > -}; > > Adding dpc.h shouldn't be in this patch because there's no need for a > > separate dpc.h file yet -- it's only included this one place. I'm not > > positive a dpc.h is needed at all -- see comments on patch [4/5]. > Yes, if we use pdev in dpc functions used by EDR code, we should > not need it. I think struct dpc_dev might be more trouble than it's worth. I attached a possible patch at the end that folds the 25 bits of DPC-related info into the struct pci_dev and gets rid of struct dpc_dev completely. I compiled it but haven't tested it. > > > -static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev) > > > +pci_ers_result_t dpc_reset_link_common(struct dpc_dev *dpc) > > > { > > I don't see why you need to split this into dpc_reset_link_common() > > and dpc_reset_link(). IIUC, you split it so the DPC driver path can > > supply a pci_dev * via > > > > dpc_handler > > pcie_do_recovery > > pcie_do_recovery_common(..., NULL, NULL) > > reset_link(..., NULL, NULL) > > driver->reset_link(pdev) > > dpc_reset_link(pdev) > > dpc = to_dpc_dev(pdev) > > I have split it mainly because of dpc_dev dependency. If we use > dpc_reset_link(pdev) directly it will try to find related dpc_dev using > to_dpc_dev() function. But this will not work in FF mode where DPC > is handled by firmware and hence we will not have DPC pcie_port > service driver enumerated for this device. The patch below might help this case. > > > +void dpc_dev_init(struct pci_dev *pdev, struct dpc_dev *dpc) > > > +{ > > Can you include the kzalloc() here so we don't have to do the alloc in > > pci_acpi_add_edr_notifier()? > Currently dpc driver allocates dpc_dev structure using pcie_port->device > reference in its devm_alloc* calls. But if we allocate dpc_dev inside > this function we should use pci_dev->dev reference for it. Is it OK to us > pci_dev->dev reference for DPC driver allocations ? I think the patch below would solve this issue too because we don't need the alloc at all. > > I think there's also a leak there: pci_acpi_add_edr_notifier() > > kzallocs a struct dpc_dev, but I don't see a corresponding kfree(). > since we are using devm_allocs, It should be freed when removing > the PCI device right? Oh, right, sorry. commit 7fb9f4495711 ("PCI/DPC: Move data to struct pci_dev") Author: Bjorn Helgaas <bhelgaas@google.com> Date: Wed Feb 26 08:00:55 2020 -0600 PCI/DPC: Move data to struct pci_dev diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c index e06f42f58d3d..6b116d7fdb89 100644 --- a/drivers/pci/pcie/dpc.c +++ b/drivers/pci/pcie/dpc.c @@ -17,13 +17,6 @@ #include "portdrv.h" #include "../pci.h" -struct dpc_dev { - struct pcie_device *dev; - u16 cap_pos; - bool rp_extensions; - u8 rp_log_size; -}; - static const char * const rp_pio_error_string[] = { "Configuration Request received UR Completion", /* Bit Position 0 */ "Configuration Request received CA Completion", /* Bit Position 1 */ @@ -46,63 +39,42 @@ static const char * const rp_pio_error_string[] = { "Memory Request Completion Timeout", /* Bit Position 18 */ }; -static struct dpc_dev *to_dpc_dev(struct pci_dev *dev) -{ - struct device *device; - - device = pcie_port_find_device(dev, PCIE_PORT_SERVICE_DPC); - if (!device) - return NULL; - return get_service_data(to_pcie_device(device)); -} - void pci_save_dpc_state(struct pci_dev *dev) { - struct dpc_dev *dpc; struct pci_cap_saved_state *save_state; u16 *cap; if (!pci_is_pcie(dev)) return; - dpc = to_dpc_dev(dev); - if (!dpc) - return; - save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_DPC); if (!save_state) return; cap = (u16 *)&save_state->cap.data[0]; - pci_read_config_word(dev, dpc->cap_pos + PCI_EXP_DPC_CTL, cap); + pci_read_config_word(dev, dev->dpc_cap + PCI_EXP_DPC_CTL, cap); } void pci_restore_dpc_state(struct pci_dev *dev) { - struct dpc_dev *dpc; struct pci_cap_saved_state *save_state; u16 *cap; if (!pci_is_pcie(dev)) return; - dpc = to_dpc_dev(dev); - if (!dpc) - return; - save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_DPC); if (!save_state) return; cap = (u16 *)&save_state->cap.data[0]; - pci_write_config_word(dev, dpc->cap_pos + PCI_EXP_DPC_CTL, *cap); + pci_write_config_word(dev, dev->dpc_cap + PCI_EXP_DPC_CTL, *cap); } -static int dpc_wait_rp_inactive(struct dpc_dev *dpc) +static int dpc_wait_rp_inactive(struct pci_dev *pdev) { unsigned long timeout = jiffies + HZ; - struct pci_dev *pdev = dpc->dev->port; - u16 cap = dpc->cap_pos, status; + u16 cap = pdev->dpc_cap, status; pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status); while (status & PCI_EXP_DPC_RP_BUSY && @@ -119,15 +91,13 @@ static int dpc_wait_rp_inactive(struct dpc_dev *dpc) static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev) { - struct dpc_dev *dpc; u16 cap; /* * DPC disables the Link automatically in hardware, so it has * already been reset by the time we get here. */ - dpc = to_dpc_dev(pdev); - cap = dpc->cap_pos; + cap = pdev->dpc_cap; /* * Wait until the Link is inactive, then clear DPC Trigger Status @@ -135,7 +105,7 @@ static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev) */ pcie_wait_for_link(pdev, false); - if (dpc->rp_extensions && dpc_wait_rp_inactive(dpc)) + if (pdev->dpc_rp_extensions && dpc_wait_rp_inactive(pdev)) return PCI_ERS_RESULT_DISCONNECT; pci_write_config_word(pdev, cap + PCI_EXP_DPC_STATUS, @@ -147,10 +117,9 @@ static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev) return PCI_ERS_RESULT_RECOVERED; } -static void dpc_process_rp_pio_error(struct dpc_dev *dpc) +static void dpc_process_rp_pio_error(struct pci_dev *pdev) { - struct pci_dev *pdev = dpc->dev->port; - u16 cap = dpc->cap_pos, dpc_status, first_error; + u16 cap = pdev->dpc_cap, dpc_status, first_error; u32 status, mask, sev, syserr, exc, dw0, dw1, dw2, dw3, log, prefix; int i; @@ -175,7 +144,7 @@ static void dpc_process_rp_pio_error(struct dpc_dev *dpc) first_error == i ? " (First)" : ""); } - if (dpc->rp_log_size < 4) + if (pdev->dpc_rp_log_size < 4) goto clear_status; pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG, &dw0); @@ -188,12 +157,12 @@ static void dpc_process_rp_pio_error(struct dpc_dev *dpc) pci_err(pdev, "TLP Header: %#010x %#010x %#010x %#010x\n", dw0, dw1, dw2, dw3); - if (dpc->rp_log_size < 5) + if (pdev->dpc_rp_log_size < 5) goto clear_status; pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_IMPSPEC_LOG, &log); pci_err(pdev, "RP PIO ImpSpec Log %#010x\n", log); - for (i = 0; i < dpc->rp_log_size - 5; i++) { + for (i = 0; i < pdev->dpc_rp_log_size - 5; i++) { pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_TLPPREFIX_LOG, &prefix); pci_err(pdev, "TLP Prefix Header: dw%d, %#010x\n", i, prefix); @@ -226,10 +195,9 @@ static int dpc_get_aer_uncorrect_severity(struct pci_dev *dev, static irqreturn_t dpc_handler(int irq, void *context) { + struct pci_dev *pdev = context; + u16 cap = pdev->dpc_cap, status, source, reason, ext_reason; struct aer_err_info info; - struct dpc_dev *dpc = context; - struct pci_dev *pdev = dpc->dev->port; - u16 cap = dpc->cap_pos, status, source, reason, ext_reason; pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status); pci_read_config_word(pdev, cap + PCI_EXP_DPC_SOURCE_ID, &source); @@ -248,8 +216,8 @@ static irqreturn_t dpc_handler(int irq, void *context) "reserved error"); /* show RP PIO error detail information */ - if (dpc->rp_extensions && reason == 3 && ext_reason == 0) - dpc_process_rp_pio_error(dpc); + if (pdev->dpc_rp_extensions && reason == 3 && ext_reason == 0) + dpc_process_rp_pio_error(pdev); else if (reason == 0 && dpc_get_aer_uncorrect_severity(pdev, &info) && aer_get_device_error_info(pdev, &info)) { @@ -266,9 +234,8 @@ static irqreturn_t dpc_handler(int irq, void *context) static irqreturn_t dpc_irq(int irq, void *context) { - struct dpc_dev *dpc = (struct dpc_dev *)context; - struct pci_dev *pdev = dpc->dev->port; - u16 cap = dpc->cap_pos, status; + struct pci_dev *pdev = context; + u16 cap = pdev->dpc_cap, status; pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status); @@ -285,7 +252,6 @@ static irqreturn_t dpc_irq(int irq, void *context) #define FLAG(x, y) (((x) & (y)) ? '+' : '-') static int dpc_probe(struct pcie_device *dev) { - struct dpc_dev *dpc; struct pci_dev *pdev = dev->port; struct device *device = &dev->device; int status; @@ -294,43 +260,37 @@ static int dpc_probe(struct pcie_device *dev) if (pcie_aer_get_firmware_first(pdev) && !pcie_ports_dpc_native) return -ENOTSUPP; - dpc = devm_kzalloc(device, sizeof(*dpc), GFP_KERNEL); - if (!dpc) - return -ENOMEM; - - dpc->cap_pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DPC); - dpc->dev = dev; - set_service_data(dev, dpc); + pdev->dpc_cap = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DPC); status = devm_request_threaded_irq(device, dev->irq, dpc_irq, dpc_handler, IRQF_SHARED, - "pcie-dpc", dpc); + "pcie-dpc", pdev); if (status) { pci_warn(pdev, "request IRQ%d failed: %d\n", dev->irq, status); return status; } - pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CAP, &cap); - pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &ctl); + pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CAP, &cap); + pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, &ctl); - dpc->rp_extensions = (cap & PCI_EXP_DPC_CAP_RP_EXT); - if (dpc->rp_extensions) { - dpc->rp_log_size = (cap & PCI_EXP_DPC_RP_PIO_LOG_SIZE) >> 8; - if (dpc->rp_log_size < 4 || dpc->rp_log_size > 9) { + pdev->dpc_rp_extensions = (cap & PCI_EXP_DPC_CAP_RP_EXT) ? 1 : 0; + if (pdev->dpc_rp_extensions) { + pdev->dpc_rp_log_size = (cap & PCI_EXP_DPC_RP_PIO_LOG_SIZE) >> 8; + if (pdev->dpc_rp_log_size < 4 || pdev->dpc_rp_log_size > 9) { pci_err(pdev, "RP PIO log size %u is invalid\n", - dpc->rp_log_size); - dpc->rp_log_size = 0; + pdev->dpc_rp_log_size); + pdev->dpc_rp_log_size = 0; } } ctl = (ctl & 0xfff4) | PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN; - pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, ctl); + pci_write_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, ctl); pci_info(pdev, "error containment capabilities: Int Msg #%d, RPExt%c PoisonedTLP%c SwTrigger%c RP PIO Log %d, DL_ActiveErr%c\n", cap & PCI_EXP_DPC_IRQ, FLAG(cap, PCI_EXP_DPC_CAP_RP_EXT), FLAG(cap, PCI_EXP_DPC_CAP_POISONED_TLP), - FLAG(cap, PCI_EXP_DPC_CAP_SW_TRIGGER), dpc->rp_log_size, + FLAG(cap, PCI_EXP_DPC_CAP_SW_TRIGGER), pdev->dpc_rp_log_size, FLAG(cap, PCI_EXP_DPC_CAP_DL_ACTIVE)); pci_add_ext_cap_save_buffer(pdev, PCI_EXT_CAP_ID_DPC, sizeof(u16)); @@ -339,13 +299,12 @@ static int dpc_probe(struct pcie_device *dev) static void dpc_remove(struct pcie_device *dev) { - struct dpc_dev *dpc = get_service_data(dev); struct pci_dev *pdev = dev->port; u16 ctl; - pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &ctl); + pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, &ctl); ctl &= ~(PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN); - pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, ctl); + pci_write_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, ctl); } static struct pcie_port_service_driver dpcdriver = { diff --git a/include/linux/pci.h b/include/linux/pci.h index 3840a541a9de..a0b7e7a53741 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -444,6 +444,11 @@ struct pci_dev { const struct attribute_group **msi_irq_groups; #endif struct pci_vpd *vpd; +#ifdef CONFIG_PCIE_DPC + u16 dpc_cap; + unsigned int dpc_rp_extensions:1; + u8 dpc_rp_log_size; +#endif #ifdef CONFIG_PCI_ATS union { struct pci_sriov *sriov; /* PF: SR-IOV info */
Hi Bjorn, On 2/26/20 1:13 PM, Bjorn Helgaas wrote: > On Wed, Feb 26, 2020 at 11:30:43AM -0800, Kuppuswamy Sathyanarayanan wrote: >> On 2/25/20 5:02 PM, Bjorn Helgaas wrote: >>> On Thu, Feb 13, 2020 at 10:20:15AM -0800, sathyanarayanan.kuppuswamy@linux.intel.com wrote: >>>> From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> >>>> ... >>>> +int pci_cleanup_aer_error_status_regs(struct pci_dev *dev) >>>> +{ >>>> + if (pcie_aer_get_firmware_first(dev)) >>>> + return -EIO; >>>> + >>>> + return pci_aer_clear_err_status_regs(dev); >>>> +} >>> We started with these: >>> >>> pci_cleanup_aer_uncorrect_error_status() >>> pci_aer_clear_fatal_status() >>> pci_cleanup_aer_error_status_regs() >>> >>> This was already a mess. They do basically similar things, but the >>> function names are a complete jumble. Let's start with preliminary >>> patches to name them consistently, e.g., >>> >>> pci_aer_clear_nonfatal_status() >>> pci_aer_clear_fatal_status() >>> pci_aer_clear_status() >>> >>> Now, for EDR you eventually add this in edr_handle_event(): >>> >>> edr_handle_event() >>> { >>> ... >>> pci_aer_clear_err_uncor_status(pdev); >>> pci_aer_clear_err_fatal_status(pdev); >>> pci_aer_clear_err_status_regs(pdev); >>> >>> I see that this path needs to clear the status even in the >>> firmware-first case, so you do need some change for that. But >>> pci_aer_clear_err_status_regs() does exactly the same thing as >>> pci_aer_clear_err_uncor_status() and pci_aer_clear_err_fatal_status() >>> plus a little more (clearing PCI_ERR_ROOT_STATUS), so it should be >>> sufficient to just call pci_aer_clear_err_status_regs(). >>> >>> So I don't think you need to make wrappers for >>> pci_aer_clear_nonfatal_status() and pci_aer_clear_fatal_status() at >>> all since they're not needed by the EDR path. >>> >>> You *do* need a wrapper for pci_aer_clear_status(), but instead of >>> just adding random words ("err", "regs") to the name, please name it >>> something like pci_aer_raw_clear_status() as a hint that it skips >>> some sort of check. > What do you think about the above? I agree with your comments.. I will use your recommendation in naming the wrappers I need. > >>> I would split this into a separate patch. This patch contains a bunch >>> of things that aren't really related except that they're needed for >>> EDR. I think it will be more readable if each patch just does one >>> piece of it. >> Ok. I will split it into multiple patches. I just grouped them together >> as a preparatory patch for adding EDR support. > Sounds good. > >>>> diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c >>>> index 99fca8400956..acae12dbf9ff 100644 >>>> --- a/drivers/pci/pcie/dpc.c >>>> +++ b/drivers/pci/pcie/dpc.c >>>> @@ -15,15 +15,9 @@ >>>> #include <linux/pci.h> >>>> #include "portdrv.h" >>>> +#include "dpc.h" >>>> #include "../pci.h" >>>> -struct dpc_dev { >>>> - struct pci_dev *pdev; >>>> - u16 cap_pos; >>>> - bool rp_extensions; >>>> - u8 rp_log_size; >>>> -}; >>> Adding dpc.h shouldn't be in this patch because there's no need for a >>> separate dpc.h file yet -- it's only included this one place. I'm not >>> positive a dpc.h is needed at all -- see comments on patch [4/5]. >> Yes, if we use pdev in dpc functions used by EDR code, we should >> not need it. > I think struct dpc_dev might be more trouble than it's worth. I > attached a possible patch at the end that folds the 25 bits of > DPC-related info into the struct pci_dev and gets rid of struct > dpc_dev completely. I compiled it but haven't tested it. That would solve most of my export issues. Do you want me to add it to my patch list or merge it separately. > >>>> -static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev) >>>> +pci_ers_result_t dpc_reset_link_common(struct dpc_dev *dpc) >>>> { >>> I don't see why you need to split this into dpc_reset_link_common() >>> and dpc_reset_link(). IIUC, you split it so the DPC driver path can >>> supply a pci_dev * via >>> >>> dpc_handler >>> pcie_do_recovery >>> pcie_do_recovery_common(..., NULL, NULL) >>> reset_link(..., NULL, NULL) >>> driver->reset_link(pdev) >>> dpc_reset_link(pdev) >>> dpc = to_dpc_dev(pdev) >> I have split it mainly because of dpc_dev dependency. If we use >> dpc_reset_link(pdev) directly it will try to find related dpc_dev using >> to_dpc_dev() function. But this will not work in FF mode where DPC >> is handled by firmware and hence we will not have DPC pcie_port >> service driver enumerated for this device. > The patch below might help this case. Yes. > >>>> +void dpc_dev_init(struct pci_dev *pdev, struct dpc_dev *dpc) >>>> +{ >>> Can you include the kzalloc() here so we don't have to do the alloc in >>> pci_acpi_add_edr_notifier()? >> Currently dpc driver allocates dpc_dev structure using pcie_port->device >> reference in its devm_alloc* calls. But if we allocate dpc_dev inside >> this function we should use pci_dev->dev reference for it. Is it OK to us >> pci_dev->dev reference for DPC driver allocations ? > I think the patch below would solve this issue too because we don't > need the alloc at all. Agree. > >>> I think there's also a leak there: pci_acpi_add_edr_notifier() >>> kzallocs a struct dpc_dev, but I don't see a corresponding kfree(). >> since we are using devm_allocs, It should be freed when removing >> the PCI device right? > Oh, right, sorry. > > > commit 7fb9f4495711 ("PCI/DPC: Move data to struct pci_dev") > Author: Bjorn Helgaas <bhelgaas@google.com> > Date: Wed Feb 26 08:00:55 2020 -0600 > > PCI/DPC: Move data to struct pci_dev > > > diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c > index e06f42f58d3d..6b116d7fdb89 100644 > --- a/drivers/pci/pcie/dpc.c > +++ b/drivers/pci/pcie/dpc.c > @@ -17,13 +17,6 @@ > #include "portdrv.h" > #include "../pci.h" > > -struct dpc_dev { > - struct pcie_device *dev; > - u16 cap_pos; > - bool rp_extensions; > - u8 rp_log_size; > -}; > - > static const char * const rp_pio_error_string[] = { > "Configuration Request received UR Completion", /* Bit Position 0 */ > "Configuration Request received CA Completion", /* Bit Position 1 */ > @@ -46,63 +39,42 @@ static const char * const rp_pio_error_string[] = { > "Memory Request Completion Timeout", /* Bit Position 18 */ > }; > > -static struct dpc_dev *to_dpc_dev(struct pci_dev *dev) > -{ > - struct device *device; > - > - device = pcie_port_find_device(dev, PCIE_PORT_SERVICE_DPC); > - if (!device) > - return NULL; > - return get_service_data(to_pcie_device(device)); > -} > - > void pci_save_dpc_state(struct pci_dev *dev) > { > - struct dpc_dev *dpc; > struct pci_cap_saved_state *save_state; > u16 *cap; > > if (!pci_is_pcie(dev)) > return; > > - dpc = to_dpc_dev(dev); > - if (!dpc) > - return; > - > save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_DPC); > if (!save_state) > return; > > cap = (u16 *)&save_state->cap.data[0]; > - pci_read_config_word(dev, dpc->cap_pos + PCI_EXP_DPC_CTL, cap); > + pci_read_config_word(dev, dev->dpc_cap + PCI_EXP_DPC_CTL, cap); > } > > void pci_restore_dpc_state(struct pci_dev *dev) > { > - struct dpc_dev *dpc; > struct pci_cap_saved_state *save_state; > u16 *cap; > > if (!pci_is_pcie(dev)) > return; > > - dpc = to_dpc_dev(dev); > - if (!dpc) > - return; > - > save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_DPC); > if (!save_state) > return; > > cap = (u16 *)&save_state->cap.data[0]; > - pci_write_config_word(dev, dpc->cap_pos + PCI_EXP_DPC_CTL, *cap); > + pci_write_config_word(dev, dev->dpc_cap + PCI_EXP_DPC_CTL, *cap); > } > > -static int dpc_wait_rp_inactive(struct dpc_dev *dpc) > +static int dpc_wait_rp_inactive(struct pci_dev *pdev) > { > unsigned long timeout = jiffies + HZ; > - struct pci_dev *pdev = dpc->dev->port; > - u16 cap = dpc->cap_pos, status; > + u16 cap = pdev->dpc_cap, status; > > pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status); > while (status & PCI_EXP_DPC_RP_BUSY && > @@ -119,15 +91,13 @@ static int dpc_wait_rp_inactive(struct dpc_dev *dpc) > > static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev) > { > - struct dpc_dev *dpc; > u16 cap; > > /* > * DPC disables the Link automatically in hardware, so it has > * already been reset by the time we get here. > */ > - dpc = to_dpc_dev(pdev); > - cap = dpc->cap_pos; > + cap = pdev->dpc_cap; > > /* > * Wait until the Link is inactive, then clear DPC Trigger Status > @@ -135,7 +105,7 @@ static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev) > */ > pcie_wait_for_link(pdev, false); > > - if (dpc->rp_extensions && dpc_wait_rp_inactive(dpc)) > + if (pdev->dpc_rp_extensions && dpc_wait_rp_inactive(pdev)) > return PCI_ERS_RESULT_DISCONNECT; > > pci_write_config_word(pdev, cap + PCI_EXP_DPC_STATUS, > @@ -147,10 +117,9 @@ static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev) > return PCI_ERS_RESULT_RECOVERED; > } > > -static void dpc_process_rp_pio_error(struct dpc_dev *dpc) > +static void dpc_process_rp_pio_error(struct pci_dev *pdev) > { > - struct pci_dev *pdev = dpc->dev->port; > - u16 cap = dpc->cap_pos, dpc_status, first_error; > + u16 cap = pdev->dpc_cap, dpc_status, first_error; > u32 status, mask, sev, syserr, exc, dw0, dw1, dw2, dw3, log, prefix; > int i; > > @@ -175,7 +144,7 @@ static void dpc_process_rp_pio_error(struct dpc_dev *dpc) > first_error == i ? " (First)" : ""); > } > > - if (dpc->rp_log_size < 4) > + if (pdev->dpc_rp_log_size < 4) > goto clear_status; > pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG, > &dw0); > @@ -188,12 +157,12 @@ static void dpc_process_rp_pio_error(struct dpc_dev *dpc) > pci_err(pdev, "TLP Header: %#010x %#010x %#010x %#010x\n", > dw0, dw1, dw2, dw3); > > - if (dpc->rp_log_size < 5) > + if (pdev->dpc_rp_log_size < 5) > goto clear_status; > pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_IMPSPEC_LOG, &log); > pci_err(pdev, "RP PIO ImpSpec Log %#010x\n", log); > > - for (i = 0; i < dpc->rp_log_size - 5; i++) { > + for (i = 0; i < pdev->dpc_rp_log_size - 5; i++) { > pci_read_config_dword(pdev, > cap + PCI_EXP_DPC_RP_PIO_TLPPREFIX_LOG, &prefix); > pci_err(pdev, "TLP Prefix Header: dw%d, %#010x\n", i, prefix); > @@ -226,10 +195,9 @@ static int dpc_get_aer_uncorrect_severity(struct pci_dev *dev, > > static irqreturn_t dpc_handler(int irq, void *context) > { > + struct pci_dev *pdev = context; > + u16 cap = pdev->dpc_cap, status, source, reason, ext_reason; > struct aer_err_info info; > - struct dpc_dev *dpc = context; > - struct pci_dev *pdev = dpc->dev->port; > - u16 cap = dpc->cap_pos, status, source, reason, ext_reason; > > pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status); > pci_read_config_word(pdev, cap + PCI_EXP_DPC_SOURCE_ID, &source); > @@ -248,8 +216,8 @@ static irqreturn_t dpc_handler(int irq, void *context) > "reserved error"); > > /* show RP PIO error detail information */ > - if (dpc->rp_extensions && reason == 3 && ext_reason == 0) > - dpc_process_rp_pio_error(dpc); > + if (pdev->dpc_rp_extensions && reason == 3 && ext_reason == 0) > + dpc_process_rp_pio_error(pdev); > else if (reason == 0 && > dpc_get_aer_uncorrect_severity(pdev, &info) && > aer_get_device_error_info(pdev, &info)) { > @@ -266,9 +234,8 @@ static irqreturn_t dpc_handler(int irq, void *context) > > static irqreturn_t dpc_irq(int irq, void *context) > { > - struct dpc_dev *dpc = (struct dpc_dev *)context; > - struct pci_dev *pdev = dpc->dev->port; > - u16 cap = dpc->cap_pos, status; > + struct pci_dev *pdev = context; > + u16 cap = pdev->dpc_cap, status; > > pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status); > > @@ -285,7 +252,6 @@ static irqreturn_t dpc_irq(int irq, void *context) > #define FLAG(x, y) (((x) & (y)) ? '+' : '-') > static int dpc_probe(struct pcie_device *dev) > { > - struct dpc_dev *dpc; > struct pci_dev *pdev = dev->port; > struct device *device = &dev->device; > int status; > @@ -294,43 +260,37 @@ static int dpc_probe(struct pcie_device *dev) > if (pcie_aer_get_firmware_first(pdev) && !pcie_ports_dpc_native) > return -ENOTSUPP; > > - dpc = devm_kzalloc(device, sizeof(*dpc), GFP_KERNEL); > - if (!dpc) > - return -ENOMEM; > - > - dpc->cap_pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DPC); > - dpc->dev = dev; > - set_service_data(dev, dpc); > + pdev->dpc_cap = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DPC); > > status = devm_request_threaded_irq(device, dev->irq, dpc_irq, > dpc_handler, IRQF_SHARED, > - "pcie-dpc", dpc); > + "pcie-dpc", pdev); > if (status) { > pci_warn(pdev, "request IRQ%d failed: %d\n", dev->irq, > status); > return status; > } > > - pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CAP, &cap); > - pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &ctl); > + pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CAP, &cap); > + pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, &ctl); > > - dpc->rp_extensions = (cap & PCI_EXP_DPC_CAP_RP_EXT); > - if (dpc->rp_extensions) { > - dpc->rp_log_size = (cap & PCI_EXP_DPC_RP_PIO_LOG_SIZE) >> 8; > - if (dpc->rp_log_size < 4 || dpc->rp_log_size > 9) { > + pdev->dpc_rp_extensions = (cap & PCI_EXP_DPC_CAP_RP_EXT) ? 1 : 0; > + if (pdev->dpc_rp_extensions) { > + pdev->dpc_rp_log_size = (cap & PCI_EXP_DPC_RP_PIO_LOG_SIZE) >> 8; > + if (pdev->dpc_rp_log_size < 4 || pdev->dpc_rp_log_size > 9) { > pci_err(pdev, "RP PIO log size %u is invalid\n", > - dpc->rp_log_size); > - dpc->rp_log_size = 0; > + pdev->dpc_rp_log_size); > + pdev->dpc_rp_log_size = 0; > } > } > > ctl = (ctl & 0xfff4) | PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN; > - pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, ctl); > + pci_write_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, ctl); > > pci_info(pdev, "error containment capabilities: Int Msg #%d, RPExt%c PoisonedTLP%c SwTrigger%c RP PIO Log %d, DL_ActiveErr%c\n", > cap & PCI_EXP_DPC_IRQ, FLAG(cap, PCI_EXP_DPC_CAP_RP_EXT), > FLAG(cap, PCI_EXP_DPC_CAP_POISONED_TLP), > - FLAG(cap, PCI_EXP_DPC_CAP_SW_TRIGGER), dpc->rp_log_size, > + FLAG(cap, PCI_EXP_DPC_CAP_SW_TRIGGER), pdev->dpc_rp_log_size, > FLAG(cap, PCI_EXP_DPC_CAP_DL_ACTIVE)); > > pci_add_ext_cap_save_buffer(pdev, PCI_EXT_CAP_ID_DPC, sizeof(u16)); > @@ -339,13 +299,12 @@ static int dpc_probe(struct pcie_device *dev) > > static void dpc_remove(struct pcie_device *dev) > { > - struct dpc_dev *dpc = get_service_data(dev); > struct pci_dev *pdev = dev->port; > u16 ctl; > > - pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &ctl); > + pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, &ctl); > ctl &= ~(PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN); > - pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, ctl); > + pci_write_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, ctl); > } > > static struct pcie_port_service_driver dpcdriver = { > diff --git a/include/linux/pci.h b/include/linux/pci.h > index 3840a541a9de..a0b7e7a53741 100644 > --- a/include/linux/pci.h > +++ b/include/linux/pci.h > @@ -444,6 +444,11 @@ struct pci_dev { > const struct attribute_group **msi_irq_groups; > #endif > struct pci_vpd *vpd; > +#ifdef CONFIG_PCIE_DPC > + u16 dpc_cap; > + unsigned int dpc_rp_extensions:1; > + u8 dpc_rp_log_size; > +#endif > #ifdef CONFIG_PCI_ATS > union { > struct pci_sriov *sriov; /* PF: SR-IOV info */ >
On Wed, Feb 26, 2020 at 01:26:09PM -0800, Kuppuswamy Sathyanarayanan wrote: > On 2/26/20 1:13 PM, Bjorn Helgaas wrote: > > On Wed, Feb 26, 2020 at 11:30:43AM -0800, Kuppuswamy Sathyanarayanan wrote: > > > On 2/25/20 5:02 PM, Bjorn Helgaas wrote: > > > > On Thu, Feb 13, 2020 at 10:20:15AM -0800, sathyanarayanan.kuppuswamy@linux.intel.com wrote: > > > > > From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> > > > > > ... > > > > > diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c > > > > > index 99fca8400956..acae12dbf9ff 100644 > > > > > --- a/drivers/pci/pcie/dpc.c > > > > > +++ b/drivers/pci/pcie/dpc.c > > > > > @@ -15,15 +15,9 @@ > > > > > #include <linux/pci.h> > > > > > #include "portdrv.h" > > > > > +#include "dpc.h" > > > > > #include "../pci.h" > > > > > -struct dpc_dev { > > > > > - struct pci_dev *pdev; > > > > > - u16 cap_pos; > > > > > - bool rp_extensions; > > > > > - u8 rp_log_size; > > > > > -}; > > > > Adding dpc.h shouldn't be in this patch because there's no need for a > > > > separate dpc.h file yet -- it's only included this one place. I'm not > > > > positive a dpc.h is needed at all -- see comments on patch [4/5]. > > > Yes, if we use pdev in dpc functions used by EDR code, we should > > > not need it. > > I think struct dpc_dev might be more trouble than it's worth. I > > attached a possible patch at the end that folds the 25 bits of > > DPC-related info into the struct pci_dev and gets rid of struct > > dpc_dev completely. I compiled it but haven't tested it. > That would solve most of my export issues. Do you want me > to add it to my patch list or merge it separately. If you want to use that, I would merge it first, followed by the error clearing cleanup, followed by the other EDR stuff. Bjorn
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 6394e7746fb5..136f27cf3871 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -443,6 +443,9 @@ struct aer_err_info { int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info); void aer_print_error(struct pci_dev *dev, struct aer_err_info *info); +int pci_aer_clear_err_uncor_status(struct pci_dev *dev); +void pci_aer_clear_err_fatal_status(struct pci_dev *dev); +int pci_aer_clear_err_status_regs(struct pci_dev *dev); #endif /* CONFIG_PCIEAER */ #ifdef CONFIG_PCIE_DPC @@ -549,6 +552,11 @@ static inline int pci_dev_specific_disable_acs_redir(struct pci_dev *dev) /* PCI error reporting and recovery */ void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state, u32 service); +pci_ers_result_t pcie_do_recovery_common(struct pci_dev *dev, + enum pci_channel_state state, + u32 service, + pci_ers_result_t (*reset_cb)(void *cb_data), + void *cb_data); bool pcie_wait_for_link(struct pci_dev *pdev, bool active); #ifdef CONFIG_PCIEASPM diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 4a818b07a1af..399836aa07f4 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -376,7 +376,7 @@ void pci_aer_clear_device_status(struct pci_dev *dev) pcie_capability_write_word(dev, PCI_EXP_DEVSTA, sta); } -int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) +int pci_aer_clear_err_uncor_status(struct pci_dev *dev) { int pos; u32 status, sev; @@ -385,9 +385,6 @@ int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) if (!pos) return -EIO; - if (pcie_aer_get_firmware_first(dev)) - return -EIO; - /* Clear status bits for ERR_NONFATAL errors only */ pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &sev); @@ -397,9 +394,17 @@ int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) return 0; } + +int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) +{ + if (pcie_aer_get_firmware_first(dev)) + return -EIO; + + return pci_aer_clear_err_uncor_status(dev); +} EXPORT_SYMBOL_GPL(pci_cleanup_aer_uncorrect_error_status); -void pci_aer_clear_fatal_status(struct pci_dev *dev) +void pci_aer_clear_err_fatal_status(struct pci_dev *dev) { int pos; u32 status, sev; @@ -408,9 +413,6 @@ void pci_aer_clear_fatal_status(struct pci_dev *dev) if (!pos) return; - if (pcie_aer_get_firmware_first(dev)) - return; - /* Clear status bits for ERR_FATAL errors only */ pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &sev); @@ -419,7 +421,15 @@ void pci_aer_clear_fatal_status(struct pci_dev *dev) pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status); } -int pci_cleanup_aer_error_status_regs(struct pci_dev *dev) +void pci_aer_clear_fatal_status(struct pci_dev *dev) +{ + if (pcie_aer_get_firmware_first(dev)) + return; + + return pci_aer_clear_err_fatal_status(dev); +} + +int pci_aer_clear_err_status_regs(struct pci_dev *dev) { int pos; u32 status; @@ -432,9 +442,6 @@ int pci_cleanup_aer_error_status_regs(struct pci_dev *dev) if (!pos) return -EIO; - if (pcie_aer_get_firmware_first(dev)) - return -EIO; - port_type = pci_pcie_type(dev); if (port_type == PCI_EXP_TYPE_ROOT_PORT) { pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, &status); @@ -450,6 +457,14 @@ int pci_cleanup_aer_error_status_regs(struct pci_dev *dev) return 0; } +int pci_cleanup_aer_error_status_regs(struct pci_dev *dev) +{ + if (pcie_aer_get_firmware_first(dev)) + return -EIO; + + return pci_aer_clear_err_status_regs(dev); +} + void pci_save_aer_state(struct pci_dev *dev) { struct pci_cap_saved_state *save_state; diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c index 99fca8400956..acae12dbf9ff 100644 --- a/drivers/pci/pcie/dpc.c +++ b/drivers/pci/pcie/dpc.c @@ -15,15 +15,9 @@ #include <linux/pci.h> #include "portdrv.h" +#include "dpc.h" #include "../pci.h" -struct dpc_dev { - struct pci_dev *pdev; - u16 cap_pos; - bool rp_extensions; - u8 rp_log_size; -}; - static const char * const rp_pio_error_string[] = { "Configuration Request received UR Completion", /* Bit Position 0 */ "Configuration Request received CA Completion", /* Bit Position 1 */ @@ -117,36 +111,44 @@ static int dpc_wait_rp_inactive(struct dpc_dev *dpc) return 0; } -static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev) +pci_ers_result_t dpc_reset_link_common(struct dpc_dev *dpc) { - struct dpc_dev *dpc; u16 cap; - /* - * DPC disables the Link automatically in hardware, so it has - * already been reset by the time we get here. - */ - dpc = to_dpc_dev(pdev); cap = dpc->cap_pos; /* * Wait until the Link is inactive, then clear DPC Trigger Status * to allow the Port to leave DPC. */ - pcie_wait_for_link(pdev, false); + pcie_wait_for_link(dpc->pdev, false); if (dpc->rp_extensions && dpc_wait_rp_inactive(dpc)) return PCI_ERS_RESULT_DISCONNECT; - pci_write_config_word(pdev, cap + PCI_EXP_DPC_STATUS, + pci_write_config_word(dpc->pdev, cap + PCI_EXP_DPC_STATUS, PCI_EXP_DPC_STATUS_TRIGGER); - if (!pcie_wait_for_link(pdev, true)) + if (!pcie_wait_for_link(dpc->pdev, true)) return PCI_ERS_RESULT_DISCONNECT; return PCI_ERS_RESULT_RECOVERED; } +static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev) +{ + struct dpc_dev *dpc; + + /* + * DPC disables the Link automatically in hardware, so it has + * already been reset by the time we get here. + */ + dpc = to_dpc_dev(pdev); + + return dpc_reset_link_common(dpc); + +} + static void dpc_process_rp_pio_error(struct dpc_dev *dpc) { struct pci_dev *pdev = dpc->pdev; @@ -224,10 +226,9 @@ static int dpc_get_aer_uncorrect_severity(struct pci_dev *dev, return 1; } -static irqreturn_t dpc_handler(int irq, void *context) +void dpc_process_error(struct dpc_dev *dpc) { struct aer_err_info info; - struct dpc_dev *dpc = context; struct pci_dev *pdev = dpc->pdev; u16 cap = dpc->cap_pos, status, source, reason, ext_reason; @@ -257,6 +258,14 @@ static irqreturn_t dpc_handler(int irq, void *context) pci_cleanup_aer_uncorrect_error_status(pdev); pci_aer_clear_fatal_status(pdev); } +} + +static irqreturn_t dpc_handler(int irq, void *context) +{ + struct dpc_dev *dpc = context; + struct pci_dev *pdev = dpc->pdev; + + dpc_process_error(dpc); /* We configure DPC so it only triggers on ERR_FATAL */ pcie_do_recovery(pdev, pci_channel_io_frozen, PCIE_PORT_SERVICE_DPC); @@ -282,6 +291,25 @@ static irqreturn_t dpc_irq(int irq, void *context) return IRQ_HANDLED; } +void dpc_dev_init(struct pci_dev *pdev, struct dpc_dev *dpc) +{ + dpc->cap_pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DPC); + dpc->pdev = pdev; + pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CAP, &dpc->cap); + pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &dpc->ctl); + + dpc->rp_extensions = (dpc->cap & PCI_EXP_DPC_CAP_RP_EXT); + if (dpc->rp_extensions) { + dpc->rp_log_size = + (dpc->cap & PCI_EXP_DPC_RP_PIO_LOG_SIZE) >> 8; + if (dpc->rp_log_size < 4 || dpc->rp_log_size > 9) { + pci_err(pdev, "RP PIO log size %u is invalid\n", + dpc->rp_log_size); + dpc->rp_log_size = 0; + } + } +} + #define FLAG(x, y) (((x) & (y)) ? '+' : '-') static int dpc_probe(struct pcie_device *dev) { @@ -298,8 +326,8 @@ static int dpc_probe(struct pcie_device *dev) if (!dpc) return -ENOMEM; - dpc->cap_pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DPC); - dpc->pdev = pdev; + dpc_dev_init(pdev, dpc); + set_service_data(dev, dpc); status = devm_request_threaded_irq(device, dev->irq, dpc_irq, @@ -311,18 +339,8 @@ static int dpc_probe(struct pcie_device *dev) return status; } - pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CAP, &cap); - pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &ctl); - - dpc->rp_extensions = (cap & PCI_EXP_DPC_CAP_RP_EXT); - if (dpc->rp_extensions) { - dpc->rp_log_size = (cap & PCI_EXP_DPC_RP_PIO_LOG_SIZE) >> 8; - if (dpc->rp_log_size < 4 || dpc->rp_log_size > 9) { - pci_err(pdev, "RP PIO log size %u is invalid\n", - dpc->rp_log_size); - dpc->rp_log_size = 0; - } - } + ctl = dpc->ctl; + cap = dpc->cap; ctl = (ctl & 0xfff4) | PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN; pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, ctl); diff --git a/drivers/pci/pcie/dpc.h b/drivers/pci/pcie/dpc.h new file mode 100644 index 000000000000..2d82bc917fcb --- /dev/null +++ b/drivers/pci/pcie/dpc.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef DRIVERS_PCIE_DPC_H +#define DRIVERS_PCIE_DPC_H + +#include <linux/pci.h> + +struct dpc_dev { + struct pci_dev *pdev; + u16 cap_pos; + bool rp_extensions; + u8 rp_log_size; + u16 ctl; + u16 cap; +}; + +pci_ers_result_t dpc_reset_link_common(struct dpc_dev *dpc); +void dpc_process_error(struct dpc_dev *dpc); +void dpc_dev_init(struct pci_dev *pdev, struct dpc_dev *dpc); + +#endif /* DRIVERS_PCIE_DPC_H */ diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index eefefe03857a..e7b9dfae9035 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -162,11 +162,18 @@ static pci_ers_result_t default_reset_link(struct pci_dev *dev) return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; } -static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service) +static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service, + pci_ers_result_t (*reset_cb)(void *cb_data), + void *cb_data) { pci_ers_result_t status; struct pcie_port_service_driver *driver = NULL; + if (reset_cb) { + status = reset_cb(cb_data); + goto done; + } + driver = pcie_port_find_service(dev, service); if (driver && driver->reset_link) { status = driver->reset_link(dev); @@ -178,6 +185,7 @@ static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service) return PCI_ERS_RESULT_DISCONNECT; } +done: if (status != PCI_ERS_RESULT_RECOVERED) { pci_printk(KERN_DEBUG, dev, "link reset at upstream device %s failed\n", pci_name(dev)); @@ -187,8 +195,11 @@ static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service) return status; } -void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state, - u32 service) +pci_ers_result_t pcie_do_recovery_common(struct pci_dev *dev, + enum pci_channel_state state, + u32 service, + pci_ers_result_t (*reset_cb)(void *cb_data), + void *cb_data) { pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER; struct pci_bus *bus; @@ -209,7 +220,7 @@ void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state, pci_walk_bus(bus, report_normal_detected, &status); if (state == pci_channel_io_frozen) { - status = reset_link(dev, service); + status = reset_link(dev, service, reset_cb, cb_data); if (status != PCI_ERS_RESULT_RECOVERED) goto failed; } @@ -240,11 +251,20 @@ void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state, pci_aer_clear_device_status(dev); pci_cleanup_aer_uncorrect_error_status(dev); pci_info(dev, "device recovery successful\n"); - return; + + return status; failed: pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT); /* TODO: Should kernel panic here? */ pci_info(dev, "device recovery failed\n"); + + return status; +} + +void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state, + u32 service) +{ + pcie_do_recovery_common(dev, state, service, NULL, NULL); }