diff mbox series

[v15,3/5] PCI/EDR: Export AER, DPC and error recovery functions

Message ID b2494cde6711c0157cbd93b4fc4ec85e987af8fe.1581617873.git.sathyanarayanan.kuppuswamy@linux.intel.com (mailing list archive)
State Superseded, archived
Delegated to: Bjorn Helgaas
Headers show
Series Add Error Disconnect Recover (EDR) support | expand

Commit Message

Kuppuswamy Sathyanarayanan Feb. 13, 2020, 6:20 p.m. UTC
From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>

This is a preparatory patch for adding EDR support.

As per the Downstream Port Containment Related Enhancements ECN to the
PCI Firmware Specification r3.2, sec 4.5.1, table 4-6, If DPC is
controlled by firmware, firmware is responsible for initializing
Downstream Port Containment Extended Capability Structures per firmware
policy. Further, the OS is permitted to read or write DPC Control and
Status registers of a port while processing an Error Disconnect Recover
notification from firmware on that port. Error Disconnect Recover
notification processing begins with the Error Disconnect Recover notify
from Firmware, and ends when the OS releases DPC by clearing the DPC
Trigger Status bit.Firmware can read DPC Trigger Status bit to determine
the ownership of DPC Control and Status registers. Firmware is not
permitted to write to DPC Control and Status registers if DPC Trigger
Status is set i.e. the link is in DPC state. Outside of the Error
Disconnect Recover notification processing window, the OS is not
permitted to modify DPC Control or Status registers; only firmware
is allowed to.

To add EDR support we need to re-use some of the existing DPC,
AER and pCIE error recovery functions. So add necessary interfaces.

Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
---
 drivers/pci/pci.h      |  8 ++++
 drivers/pci/pcie/aer.c | 39 ++++++++++++++------
 drivers/pci/pcie/dpc.c | 84 +++++++++++++++++++++++++-----------------
 drivers/pci/pcie/dpc.h | 20 ++++++++++
 drivers/pci/pcie/err.c | 30 ++++++++++++---
 5 files changed, 131 insertions(+), 50 deletions(-)
 create mode 100644 drivers/pci/pcie/dpc.h

Comments

Bjorn Helgaas Feb. 26, 2020, 1:02 a.m. UTC | #1
On Thu, Feb 13, 2020 at 10:20:15AM -0800, sathyanarayanan.kuppuswamy@linux.intel.com wrote:
> From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
> 
> This is a preparatory patch for adding EDR support.
> 
> As per the Downstream Port Containment Related Enhancements ECN to the
> PCI Firmware Specification r3.2, sec 4.5.1, table 4-6, If DPC is
> controlled by firmware, firmware is responsible for initializing
> Downstream Port Containment Extended Capability Structures per firmware
> policy. Further, the OS is permitted to read or write DPC Control and
> Status registers of a port while processing an Error Disconnect Recover
> notification from firmware on that port. Error Disconnect Recover
> notification processing begins with the Error Disconnect Recover notify
> from Firmware, and ends when the OS releases DPC by clearing the DPC
> Trigger Status bit.Firmware can read DPC Trigger Status bit to determine
> the ownership of DPC Control and Status registers. Firmware is not
> permitted to write to DPC Control and Status registers if DPC Trigger
> Status is set i.e. the link is in DPC state. Outside of the Error
> Disconnect Recover notification processing window, the OS is not
> permitted to modify DPC Control or Status registers; only firmware
> is allowed to.
> 
> To add EDR support we need to re-use some of the existing DPC,
> AER and pCIE error recovery functions. So add necessary interfaces.
> 
> Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
> ---
>  drivers/pci/pci.h      |  8 ++++
>  drivers/pci/pcie/aer.c | 39 ++++++++++++++------
>  drivers/pci/pcie/dpc.c | 84 +++++++++++++++++++++++++-----------------
>  drivers/pci/pcie/dpc.h | 20 ++++++++++
>  drivers/pci/pcie/err.c | 30 ++++++++++++---
>  5 files changed, 131 insertions(+), 50 deletions(-)
>  create mode 100644 drivers/pci/pcie/dpc.h
> 
> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> index 6394e7746fb5..136f27cf3871 100644
> --- a/drivers/pci/pci.h
> +++ b/drivers/pci/pci.h
> @@ -443,6 +443,9 @@ struct aer_err_info {
>  
>  int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info);
>  void aer_print_error(struct pci_dev *dev, struct aer_err_info *info);
> +int pci_aer_clear_err_uncor_status(struct pci_dev *dev);
> +void pci_aer_clear_err_fatal_status(struct pci_dev *dev);
> +int pci_aer_clear_err_status_regs(struct pci_dev *dev);
>  #endif	/* CONFIG_PCIEAER */
>  
>  #ifdef CONFIG_PCIE_DPC
> @@ -549,6 +552,11 @@ static inline int pci_dev_specific_disable_acs_redir(struct pci_dev *dev)
>  /* PCI error reporting and recovery */
>  void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state,
>  		      u32 service);
> +pci_ers_result_t pcie_do_recovery_common(struct pci_dev *dev,
> +				enum pci_channel_state state,
> +				u32 service,
> +				pci_ers_result_t (*reset_cb)(void *cb_data),
> +				void *cb_data);
>  
>  bool pcie_wait_for_link(struct pci_dev *pdev, bool active);
>  #ifdef CONFIG_PCIEASPM
> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
> index 4a818b07a1af..399836aa07f4 100644
> --- a/drivers/pci/pcie/aer.c
> +++ b/drivers/pci/pcie/aer.c
> @@ -376,7 +376,7 @@ void pci_aer_clear_device_status(struct pci_dev *dev)
>  	pcie_capability_write_word(dev, PCI_EXP_DEVSTA, sta);
>  }
>  
> -int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
> +int pci_aer_clear_err_uncor_status(struct pci_dev *dev)
>  {
>  	int pos;
>  	u32 status, sev;
> @@ -385,9 +385,6 @@ int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
>  	if (!pos)
>  		return -EIO;
>  
> -	if (pcie_aer_get_firmware_first(dev))
> -		return -EIO;
> -
>  	/* Clear status bits for ERR_NONFATAL errors only */
>  	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
>  	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &sev);
> @@ -397,9 +394,17 @@ int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
>  
>  	return 0;
>  }
> +
> +int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
> +{
> +	if (pcie_aer_get_firmware_first(dev))
> +		return -EIO;
> +
> +	return pci_aer_clear_err_uncor_status(dev);
> +}
>  EXPORT_SYMBOL_GPL(pci_cleanup_aer_uncorrect_error_status);
>  
> -void pci_aer_clear_fatal_status(struct pci_dev *dev)
> +void pci_aer_clear_err_fatal_status(struct pci_dev *dev)
>  {
>  	int pos;
>  	u32 status, sev;
> @@ -408,9 +413,6 @@ void pci_aer_clear_fatal_status(struct pci_dev *dev)
>  	if (!pos)
>  		return;
>  
> -	if (pcie_aer_get_firmware_first(dev))
> -		return;
> -
>  	/* Clear status bits for ERR_FATAL errors only */
>  	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
>  	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &sev);
> @@ -419,7 +421,15 @@ void pci_aer_clear_fatal_status(struct pci_dev *dev)
>  		pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status);
>  }
>  
> -int pci_cleanup_aer_error_status_regs(struct pci_dev *dev)
> +void pci_aer_clear_fatal_status(struct pci_dev *dev)
> +{
> +	if (pcie_aer_get_firmware_first(dev))
> +		return;
> +
> +	return pci_aer_clear_err_fatal_status(dev);
> +}
> +
> +int pci_aer_clear_err_status_regs(struct pci_dev *dev)
>  {
>  	int pos;
>  	u32 status;
> @@ -432,9 +442,6 @@ int pci_cleanup_aer_error_status_regs(struct pci_dev *dev)
>  	if (!pos)
>  		return -EIO;
>  
> -	if (pcie_aer_get_firmware_first(dev))
> -		return -EIO;
> -
>  	port_type = pci_pcie_type(dev);
>  	if (port_type == PCI_EXP_TYPE_ROOT_PORT) {
>  		pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, &status);
> @@ -450,6 +457,14 @@ int pci_cleanup_aer_error_status_regs(struct pci_dev *dev)
>  	return 0;
>  }
>  
> +int pci_cleanup_aer_error_status_regs(struct pci_dev *dev)
> +{
> +	if (pcie_aer_get_firmware_first(dev))
> +		return -EIO;
> +
> +	return pci_aer_clear_err_status_regs(dev);
> +}

We started with these:

  pci_cleanup_aer_uncorrect_error_status()
  pci_aer_clear_fatal_status()
  pci_cleanup_aer_error_status_regs()

This was already a mess.  They do basically similar things, but the
function names are a complete jumble.  Let's start with preliminary
patches to name them consistently, e.g.,

  pci_aer_clear_nonfatal_status()
  pci_aer_clear_fatal_status()
  pci_aer_clear_status()

Now, for EDR you eventually add this in edr_handle_event():

  edr_handle_event()
  {
    ...
    pci_aer_clear_err_uncor_status(pdev);
    pci_aer_clear_err_fatal_status(pdev);
    pci_aer_clear_err_status_regs(pdev);

I see that this path needs to clear the status even in the
firmware-first case, so you do need some change for that.  But
pci_aer_clear_err_status_regs() does exactly the same thing as
pci_aer_clear_err_uncor_status() and pci_aer_clear_err_fatal_status()
plus a little more (clearing PCI_ERR_ROOT_STATUS), so it should be
sufficient to just call pci_aer_clear_err_status_regs().

So I don't think you need to make wrappers for
pci_aer_clear_nonfatal_status() and pci_aer_clear_fatal_status() at
all since they're not needed by the EDR path.

You *do* need a wrapper for pci_aer_clear_status(), but instead of
just adding random words ("err", "regs") to the name, please name it
something like pci_aer_raw_clear_status() as a hint that it skips
some sort of check.

I would split this into a separate patch.  This patch contains a bunch
of things that aren't really related except that they're needed for
EDR.  I think it will be more readable if each patch just does one
piece of it.

>  void pci_save_aer_state(struct pci_dev *dev)
>  {
>  	struct pci_cap_saved_state *save_state;
> diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
> index 99fca8400956..acae12dbf9ff 100644
> --- a/drivers/pci/pcie/dpc.c
> +++ b/drivers/pci/pcie/dpc.c
> @@ -15,15 +15,9 @@
>  #include <linux/pci.h>
>  
>  #include "portdrv.h"
> +#include "dpc.h"
>  #include "../pci.h"
>  
> -struct dpc_dev {
> -	struct pci_dev		*pdev;
> -	u16			cap_pos;
> -	bool			rp_extensions;
> -	u8			rp_log_size;
> -};

Adding dpc.h shouldn't be in this patch because there's no need for a
separate dpc.h file yet -- it's only included this one place.  I'm not
positive a dpc.h is needed at all -- see comments on patch [4/5].

>  static const char * const rp_pio_error_string[] = {
>  	"Configuration Request received UR Completion",	 /* Bit Position 0  */
>  	"Configuration Request received CA Completion",	 /* Bit Position 1  */
> @@ -117,36 +111,44 @@ static int dpc_wait_rp_inactive(struct dpc_dev *dpc)
>  	return 0;
>  }
>  
> -static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev)
> +pci_ers_result_t dpc_reset_link_common(struct dpc_dev *dpc)
>  {

I don't see why you need to split this into dpc_reset_link_common()
and dpc_reset_link().  IIUC, you split it so the DPC driver path can
supply a pci_dev * via

  dpc_handler
    pcie_do_recovery
      pcie_do_recovery_common(..., NULL, NULL)
        reset_link(..., NULL, NULL)
          driver->reset_link(pdev)
            dpc_reset_link(pdev)
              dpc = to_dpc_dev(pdev)
              dpc_reset_link_common(dpc)

while the EDR path can supply a dpc_dev * via

  edr_handle_event
    pcie_do_recovery_common(..., edr_dpc_reset_link, dpc)
      reset_link(..., edr_dpc_reset_link, dpc)
        edr_dpc_reset_link(dpc)
          dpc_reset_link_common(dpc)

But it looks like you *could* make these paths look like:

  dpc_handler
    pcie_do_recovery
      pcie_do_recovery_common(..., NULL, NULL)
        reset_link(..., NULL, NULL)
          driver->reset_link(pdev)
            dpc_reset_link(pdev)

  edr_handle_event
    pcie_do_recovery_common(..., dpc_reset_link, pdev)
      reset_link(..., dpc_reset_link, pdev)
        dpc_reset_link(pdev)

and you wouldn't need edr_dpc_reset_link() at all and you wouldn't
have to split dpc_reset_link_common() out.

> -	struct dpc_dev *dpc;
>  	u16 cap;
>  
> -	/*
> -	 * DPC disables the Link automatically in hardware, so it has
> -	 * already been reset by the time we get here.
> -	 */
> -	dpc = to_dpc_dev(pdev);
>  	cap = dpc->cap_pos;
>  
>  	/*
>  	 * Wait until the Link is inactive, then clear DPC Trigger Status
>  	 * to allow the Port to leave DPC.
>  	 */
> -	pcie_wait_for_link(pdev, false);
> +	pcie_wait_for_link(dpc->pdev, false);

I'm hoping you don't need to split this out at all, but if you do,
adding

  struct pci_dev *pdev = dpc->pdev;

at the top will avoid these needless diffs.

>  	if (dpc->rp_extensions && dpc_wait_rp_inactive(dpc))
>  		return PCI_ERS_RESULT_DISCONNECT;
>  
> -	pci_write_config_word(pdev, cap + PCI_EXP_DPC_STATUS,
> +	pci_write_config_word(dpc->pdev, cap + PCI_EXP_DPC_STATUS,
>  			      PCI_EXP_DPC_STATUS_TRIGGER);
>  
> -	if (!pcie_wait_for_link(pdev, true))
> +	if (!pcie_wait_for_link(dpc->pdev, true))
>  		return PCI_ERS_RESULT_DISCONNECT;
>  
>  	return PCI_ERS_RESULT_RECOVERED;
>  }
>  
> +static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev)
> +{
> +	struct dpc_dev *dpc;
> +
> +	/*
> +	 * DPC disables the Link automatically in hardware, so it has
> +	 * already been reset by the time we get here.
> + 	 */
> +	dpc = to_dpc_dev(pdev);
> +
> +	return dpc_reset_link_common(dpc);
> +
> +}
> +
>  static void dpc_process_rp_pio_error(struct dpc_dev *dpc)
>  {
>  	struct pci_dev *pdev = dpc->pdev;
> @@ -224,10 +226,9 @@ static int dpc_get_aer_uncorrect_severity(struct pci_dev *dev,
>  	return 1;
>  }
>  
> -static irqreturn_t dpc_handler(int irq, void *context)
> +void dpc_process_error(struct dpc_dev *dpc)
>  {
>  	struct aer_err_info info;
> -	struct dpc_dev *dpc = context;
>  	struct pci_dev *pdev = dpc->pdev;
>  	u16 cap = dpc->cap_pos, status, source, reason, ext_reason;
>  
> @@ -257,6 +258,14 @@ static irqreturn_t dpc_handler(int irq, void *context)
>  		pci_cleanup_aer_uncorrect_error_status(pdev);
>  		pci_aer_clear_fatal_status(pdev);
>  	}
> +}
> +
> +static irqreturn_t dpc_handler(int irq, void *context)
> +{
> +	struct dpc_dev *dpc = context;
> +	struct pci_dev *pdev = dpc->pdev;
> +
> +	dpc_process_error(dpc);
>  
>  	/* We configure DPC so it only triggers on ERR_FATAL */
>  	pcie_do_recovery(pdev, pci_channel_io_frozen, PCIE_PORT_SERVICE_DPC);
> @@ -282,6 +291,25 @@ static irqreturn_t dpc_irq(int irq, void *context)
>  	return IRQ_HANDLED;
>  }
>  
> +void dpc_dev_init(struct pci_dev *pdev, struct dpc_dev *dpc)
> +{

Can you include the kzalloc() here so we don't have to do the alloc in
pci_acpi_add_edr_notifier()?

I think there's also a leak there: pci_acpi_add_edr_notifier()
kzallocs a struct dpc_dev, but I don't see a corresponding kfree().

> +	dpc->cap_pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DPC);

I think we need to check cap_pos for non-zero here.  It's arguably
safe today because portdrv only calls dpc_probe() when
PCIE_PORT_SERVICE_DPC is set and we only set that if there's a DPC
capability.  But that's a long ways away from here and it's
convoluted, so it's pretty hard to verify that it's safe.

When we add EDR into the picture and call this from
pci_acpi_add_edr_notifier() and edr_handle_event(), I'm pretty sure
it's not safe at all because we have no idea whether the device has a
DPC capability.

Factoring out dpc_dev_init() and the kzalloc() would be a simple
cleanup-type patch all by itself, so it could be separated out for
ease of reviewing.

> +	dpc->pdev = pdev;
> +	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CAP, &dpc->cap);
> +	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &dpc->ctl);
> +
> +	dpc->rp_extensions = (dpc->cap & PCI_EXP_DPC_CAP_RP_EXT);
> +	if (dpc->rp_extensions) {
> +		dpc->rp_log_size =
> +			(dpc->cap & PCI_EXP_DPC_RP_PIO_LOG_SIZE) >> 8;
> +		if (dpc->rp_log_size < 4 || dpc->rp_log_size > 9) {
> +			pci_err(pdev, "RP PIO log size %u is invalid\n",
> +				dpc->rp_log_size);
> +			dpc->rp_log_size = 0;
> +		}
> +	}
> +}
> +
>  #define FLAG(x, y) (((x) & (y)) ? '+' : '-')
>  static int dpc_probe(struct pcie_device *dev)
>  {
> @@ -298,8 +326,8 @@ static int dpc_probe(struct pcie_device *dev)
>  	if (!dpc)
>  		return -ENOMEM;
>  
> -	dpc->cap_pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DPC);
> -	dpc->pdev = pdev;
> +	dpc_dev_init(pdev, dpc);
> +
>  	set_service_data(dev, dpc);
>  
>  	status = devm_request_threaded_irq(device, dev->irq, dpc_irq,
> @@ -311,18 +339,8 @@ static int dpc_probe(struct pcie_device *dev)
>  		return status;
>  	}
>  
> -	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CAP, &cap);
> -	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &ctl);
> -
> -	dpc->rp_extensions = (cap & PCI_EXP_DPC_CAP_RP_EXT);
> -	if (dpc->rp_extensions) {
> -		dpc->rp_log_size = (cap & PCI_EXP_DPC_RP_PIO_LOG_SIZE) >> 8;
> -		if (dpc->rp_log_size < 4 || dpc->rp_log_size > 9) {
> -			pci_err(pdev, "RP PIO log size %u is invalid\n",
> -				dpc->rp_log_size);
> -			dpc->rp_log_size = 0;
> -		}
> -	}
> +	ctl = dpc->ctl;
> +	cap = dpc->cap;
>  
>  	ctl = (ctl & 0xfff4) | PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN;
>  	pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, ctl);
> diff --git a/drivers/pci/pcie/dpc.h b/drivers/pci/pcie/dpc.h
> new file mode 100644
> index 000000000000..2d82bc917fcb
> --- /dev/null
> +++ b/drivers/pci/pcie/dpc.h
> @@ -0,0 +1,20 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef DRIVERS_PCIE_DPC_H
> +#define DRIVERS_PCIE_DPC_H
> +
> +#include <linux/pci.h>
> +
> +struct dpc_dev {
> +	struct pci_dev		*pdev;
> +	u16			cap_pos;
> +	bool			rp_extensions;
> +	u8			rp_log_size;
> +	u16			ctl;
> +	u16			cap;
> +};
> +
> +pci_ers_result_t dpc_reset_link_common(struct dpc_dev *dpc);
> +void dpc_process_error(struct dpc_dev *dpc);
> +void dpc_dev_init(struct pci_dev *pdev, struct dpc_dev *dpc);
> +
> +#endif /* DRIVERS_PCIE_DPC_H */
> diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
> index eefefe03857a..e7b9dfae9035 100644
> --- a/drivers/pci/pcie/err.c
> +++ b/drivers/pci/pcie/err.c
> @@ -162,11 +162,18 @@ static pci_ers_result_t default_reset_link(struct pci_dev *dev)
>  	return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
>  }
>  
> -static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service)
> +static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service,
> +				   pci_ers_result_t (*reset_cb)(void *cb_data),
> +				   void *cb_data)

This rejiggering of reset_link() and pcie_do_recovery() is unrelated
to the rest (except that it's needed for EDR, of course), so maybe
could be a separate patch.

We could also consider changing the interface of pcie_do_recovery() to
just add reset_cb and cb_data, and change the existing callers to pass
NULL, NULL.  Then we wouldn't need pcie_do_recovery_common().  I'm not
sure which way would be better.

>  {
>  	pci_ers_result_t status;
>  	struct pcie_port_service_driver *driver = NULL;
>  
> +	if (reset_cb) {
> +		status = reset_cb(cb_data);
> +		goto done;
> +	}
> +
>  	driver = pcie_port_find_service(dev, service);
>  	if (driver && driver->reset_link) {
>  		status = driver->reset_link(dev);
> @@ -178,6 +185,7 @@ static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service)
>  		return PCI_ERS_RESULT_DISCONNECT;
>  	}
>  
> +done:
>  	if (status != PCI_ERS_RESULT_RECOVERED) {
>  		pci_printk(KERN_DEBUG, dev, "link reset at upstream device %s failed\n",
>  			pci_name(dev));
> @@ -187,8 +195,11 @@ static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service)
>  	return status;
>  }
>  
> -void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state,
> -		      u32 service)
> +pci_ers_result_t pcie_do_recovery_common(struct pci_dev *dev,
> +				enum pci_channel_state state,
> +				u32 service,
> +				pci_ers_result_t (*reset_cb)(void *cb_data),
> +				void *cb_data)
>  {
>  	pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER;
>  	struct pci_bus *bus;
> @@ -209,7 +220,7 @@ void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state,
>  		pci_walk_bus(bus, report_normal_detected, &status);
>  
>  	if (state == pci_channel_io_frozen) {
> -		status = reset_link(dev, service);
> +		status = reset_link(dev, service, reset_cb, cb_data);
>  		if (status != PCI_ERS_RESULT_RECOVERED)
>  			goto failed;
>  	}
> @@ -240,11 +251,20 @@ void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state,
>  	pci_aer_clear_device_status(dev);
>  	pci_cleanup_aer_uncorrect_error_status(dev);
>  	pci_info(dev, "device recovery successful\n");
> -	return;
> +
> +	return status;
>  
>  failed:
>  	pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
>  
>  	/* TODO: Should kernel panic here? */
>  	pci_info(dev, "device recovery failed\n");
> +
> +	return status;
> +}
> +
> +void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state,
> +		      u32 service)
> +{
> +	pcie_do_recovery_common(dev, state, service, NULL, NULL);
>  }
> -- 
> 2.21.0
>
Kuppuswamy Sathyanarayanan Feb. 26, 2020, 7:30 p.m. UTC | #2
Hi Bjorn,

On 2/25/20 5:02 PM, Bjorn Helgaas wrote:
> On Thu, Feb 13, 2020 at 10:20:15AM -0800, sathyanarayanan.kuppuswamy@linux.intel.com wrote:
>> From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
>>
>> This is a preparatory patch for adding EDR support.
>>
>> As per the Downstream Port Containment Related Enhancements ECN to the
>> PCI Firmware Specification r3.2, sec 4.5.1, table 4-6, If DPC is
>> controlled by firmware, firmware is responsible for initializing
>> Downstream Port Containment Extended Capability Structures per firmware
>> policy. Further, the OS is permitted to read or write DPC Control and
>> Status registers of a port while processing an Error Disconnect Recover
>> notification from firmware on that port. Error Disconnect Recover
>> notification processing begins with the Error Disconnect Recover notify
>> from Firmware, and ends when the OS releases DPC by clearing the DPC
>> Trigger Status bit.Firmware can read DPC Trigger Status bit to determine
>> the ownership of DPC Control and Status registers. Firmware is not
>> permitted to write to DPC Control and Status registers if DPC Trigger
>> Status is set i.e. the link is in DPC state. Outside of the Error
>> Disconnect Recover notification processing window, the OS is not
>> permitted to modify DPC Control or Status registers; only firmware
>> is allowed to.
>>
>> To add EDR support we need to re-use some of the existing DPC,
>> AER and pCIE error recovery functions. So add necessary interfaces.
>>
>> Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
>> ---
>>   drivers/pci/pci.h      |  8 ++++
>>   drivers/pci/pcie/aer.c | 39 ++++++++++++++------
>>   drivers/pci/pcie/dpc.c | 84 +++++++++++++++++++++++++-----------------
>>   drivers/pci/pcie/dpc.h | 20 ++++++++++
>>   drivers/pci/pcie/err.c | 30 ++++++++++++---
>>   5 files changed, 131 insertions(+), 50 deletions(-)
>>   create mode 100644 drivers/pci/pcie/dpc.h
>>
>> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
>> index 6394e7746fb5..136f27cf3871 100644
>> --- a/drivers/pci/pci.h
>> +++ b/drivers/pci/pci.h
>> @@ -443,6 +443,9 @@ struct aer_err_info {
>>   
>>   int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info);
>>   void aer_print_error(struct pci_dev *dev, struct aer_err_info *info);
>> +int pci_aer_clear_err_uncor_status(struct pci_dev *dev);
>> +void pci_aer_clear_err_fatal_status(struct pci_dev *dev);
>> +int pci_aer_clear_err_status_regs(struct pci_dev *dev);
>>   #endif	/* CONFIG_PCIEAER */
>>   
>>   #ifdef CONFIG_PCIE_DPC
>> @@ -549,6 +552,11 @@ static inline int pci_dev_specific_disable_acs_redir(struct pci_dev *dev)
>>   /* PCI error reporting and recovery */
>>   void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state,
>>   		      u32 service);
>> +pci_ers_result_t pcie_do_recovery_common(struct pci_dev *dev,
>> +				enum pci_channel_state state,
>> +				u32 service,
>> +				pci_ers_result_t (*reset_cb)(void *cb_data),
>> +				void *cb_data);
>>   
>>   bool pcie_wait_for_link(struct pci_dev *pdev, bool active);
>>   #ifdef CONFIG_PCIEASPM
>> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
>> index 4a818b07a1af..399836aa07f4 100644
>> --- a/drivers/pci/pcie/aer.c
>> +++ b/drivers/pci/pcie/aer.c
>> @@ -376,7 +376,7 @@ void pci_aer_clear_device_status(struct pci_dev *dev)
>>   	pcie_capability_write_word(dev, PCI_EXP_DEVSTA, sta);
>>   }
>>   
>> -int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
>> +int pci_aer_clear_err_uncor_status(struct pci_dev *dev)
>>   {
>>   	int pos;
>>   	u32 status, sev;
>> @@ -385,9 +385,6 @@ int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
>>   	if (!pos)
>>   		return -EIO;
>>   
>> -	if (pcie_aer_get_firmware_first(dev))
>> -		return -EIO;
>> -
>>   	/* Clear status bits for ERR_NONFATAL errors only */
>>   	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
>>   	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &sev);
>> @@ -397,9 +394,17 @@ int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
>>   
>>   	return 0;
>>   }
>> +
>> +int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
>> +{
>> +	if (pcie_aer_get_firmware_first(dev))
>> +		return -EIO;
>> +
>> +	return pci_aer_clear_err_uncor_status(dev);
>> +}
>>   EXPORT_SYMBOL_GPL(pci_cleanup_aer_uncorrect_error_status);
>>   
>> -void pci_aer_clear_fatal_status(struct pci_dev *dev)
>> +void pci_aer_clear_err_fatal_status(struct pci_dev *dev)
>>   {
>>   	int pos;
>>   	u32 status, sev;
>> @@ -408,9 +413,6 @@ void pci_aer_clear_fatal_status(struct pci_dev *dev)
>>   	if (!pos)
>>   		return;
>>   
>> -	if (pcie_aer_get_firmware_first(dev))
>> -		return;
>> -
>>   	/* Clear status bits for ERR_FATAL errors only */
>>   	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
>>   	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &sev);
>> @@ -419,7 +421,15 @@ void pci_aer_clear_fatal_status(struct pci_dev *dev)
>>   		pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status);
>>   }
>>   
>> -int pci_cleanup_aer_error_status_regs(struct pci_dev *dev)
>> +void pci_aer_clear_fatal_status(struct pci_dev *dev)
>> +{
>> +	if (pcie_aer_get_firmware_first(dev))
>> +		return;
>> +
>> +	return pci_aer_clear_err_fatal_status(dev);
>> +}
>> +
>> +int pci_aer_clear_err_status_regs(struct pci_dev *dev)
>>   {
>>   	int pos;
>>   	u32 status;
>> @@ -432,9 +442,6 @@ int pci_cleanup_aer_error_status_regs(struct pci_dev *dev)
>>   	if (!pos)
>>   		return -EIO;
>>   
>> -	if (pcie_aer_get_firmware_first(dev))
>> -		return -EIO;
>> -
>>   	port_type = pci_pcie_type(dev);
>>   	if (port_type == PCI_EXP_TYPE_ROOT_PORT) {
>>   		pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, &status);
>> @@ -450,6 +457,14 @@ int pci_cleanup_aer_error_status_regs(struct pci_dev *dev)
>>   	return 0;
>>   }
>>   
>> +int pci_cleanup_aer_error_status_regs(struct pci_dev *dev)
>> +{
>> +	if (pcie_aer_get_firmware_first(dev))
>> +		return -EIO;
>> +
>> +	return pci_aer_clear_err_status_regs(dev);
>> +}
> We started with these:
>
>    pci_cleanup_aer_uncorrect_error_status()
>    pci_aer_clear_fatal_status()
>    pci_cleanup_aer_error_status_regs()
>
> This was already a mess.  They do basically similar things, but the
> function names are a complete jumble.  Let's start with preliminary
> patches to name them consistently, e.g.,
>
>    pci_aer_clear_nonfatal_status()
>    pci_aer_clear_fatal_status()
>    pci_aer_clear_status()
>
> Now, for EDR you eventually add this in edr_handle_event():
>
>    edr_handle_event()
>    {
>      ...
>      pci_aer_clear_err_uncor_status(pdev);
>      pci_aer_clear_err_fatal_status(pdev);
>      pci_aer_clear_err_status_regs(pdev);
>
> I see that this path needs to clear the status even in the
> firmware-first case, so you do need some change for that.  But
> pci_aer_clear_err_status_regs() does exactly the same thing as
> pci_aer_clear_err_uncor_status() and pci_aer_clear_err_fatal_status()
> plus a little more (clearing PCI_ERR_ROOT_STATUS), so it should be
> sufficient to just call pci_aer_clear_err_status_regs().
>
> So I don't think you need to make wrappers for
> pci_aer_clear_nonfatal_status() and pci_aer_clear_fatal_status() at
> all since they're not needed by the EDR path.
>
> You *do* need a wrapper for pci_aer_clear_status(), but instead of
> just adding random words ("err", "regs") to the name, please name it
> something like pci_aer_raw_clear_status() as a hint that it skips
> some sort of check.
>
> I would split this into a separate patch.  This patch contains a bunch
> of things that aren't really related except that they're needed for
> EDR.  I think it will be more readable if each patch just does one
> piece of it.
Ok. I will split it into multiple patches. I just grouped them together
as a preparatory patch for adding EDR support.
>
>>   void pci_save_aer_state(struct pci_dev *dev)
>>   {
>>   	struct pci_cap_saved_state *save_state;
>> diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
>> index 99fca8400956..acae12dbf9ff 100644
>> --- a/drivers/pci/pcie/dpc.c
>> +++ b/drivers/pci/pcie/dpc.c
>> @@ -15,15 +15,9 @@
>>   #include <linux/pci.h>
>>   
>>   #include "portdrv.h"
>> +#include "dpc.h"
>>   #include "../pci.h"
>>   
>> -struct dpc_dev {
>> -	struct pci_dev		*pdev;
>> -	u16			cap_pos;
>> -	bool			rp_extensions;
>> -	u8			rp_log_size;
>> -};
> Adding dpc.h shouldn't be in this patch because there's no need for a
> separate dpc.h file yet -- it's only included this one place.  I'm not
> positive a dpc.h is needed at all -- see comments on patch [4/5].
Yes, if we use pdev in dpc functions used by EDR code, we should
not need it.
>
>>   static const char * const rp_pio_error_string[] = {
>>   	"Configuration Request received UR Completion",	 /* Bit Position 0  */
>>   	"Configuration Request received CA Completion",	 /* Bit Position 1  */
>> @@ -117,36 +111,44 @@ static int dpc_wait_rp_inactive(struct dpc_dev *dpc)
>>   	return 0;
>>   }
>>   
>> -static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev)
>> +pci_ers_result_t dpc_reset_link_common(struct dpc_dev *dpc)
>>   {
> I don't see why you need to split this into dpc_reset_link_common()
> and dpc_reset_link().  IIUC, you split it so the DPC driver path can
> supply a pci_dev * via
>
>    dpc_handler
>      pcie_do_recovery
>        pcie_do_recovery_common(..., NULL, NULL)
>          reset_link(..., NULL, NULL)
>            driver->reset_link(pdev)
>              dpc_reset_link(pdev)
>                dpc = to_dpc_dev(pdev)
I have split it mainly because of dpc_dev dependency. If we use
dpc_reset_link(pdev) directly it will try to find related dpc_dev using
to_dpc_dev() function. But this will not work in FF mode where DPC
is handled by firmware and hence we will not have DPC pcie_port
service driver enumerated for this device.
>                dpc_reset_link_common(dpc)
>
> while the EDR path can supply a dpc_dev * via
>
>    edr_handle_event
>      pcie_do_recovery_common(..., edr_dpc_reset_link, dpc)
>        reset_link(..., edr_dpc_reset_link, dpc)
>          edr_dpc_reset_link(dpc)
>            dpc_reset_link_common(dpc)
>
> But it looks like you *could* make these paths look like:
>
>    dpc_handler
>      pcie_do_recovery
>        pcie_do_recovery_common(..., NULL, NULL)
>          reset_link(..., NULL, NULL)
>            driver->reset_link(pdev)
>              dpc_reset_link(pdev)
>
>    edr_handle_event
>      pcie_do_recovery_common(..., dpc_reset_link, pdev)
>        reset_link(..., dpc_reset_link, pdev)
>          dpc_reset_link(pdev)
>
> and you wouldn't need edr_dpc_reset_link() at all and you wouldn't
> have to split dpc_reset_link_common() out.
>
>> -	struct dpc_dev *dpc;
>>   	u16 cap;
>>   
>> -	/*
>> -	 * DPC disables the Link automatically in hardware, so it has
>> -	 * already been reset by the time we get here.
>> -	 */
>> -	dpc = to_dpc_dev(pdev);
>>   	cap = dpc->cap_pos;
>>   
>>   	/*
>>   	 * Wait until the Link is inactive, then clear DPC Trigger Status
>>   	 * to allow the Port to leave DPC.
>>   	 */
>> -	pcie_wait_for_link(pdev, false);
>> +	pcie_wait_for_link(dpc->pdev, false);
> I'm hoping you don't need to split this out at all, but if you do,
> adding
>
>    struct pci_dev *pdev = dpc->pdev;
>
> at the top will avoid these needless diffs.
>
>>   	if (dpc->rp_extensions && dpc_wait_rp_inactive(dpc))
>>   		return PCI_ERS_RESULT_DISCONNECT;
>>   
>> -	pci_write_config_word(pdev, cap + PCI_EXP_DPC_STATUS,
>> +	pci_write_config_word(dpc->pdev, cap + PCI_EXP_DPC_STATUS,
>>   			      PCI_EXP_DPC_STATUS_TRIGGER);
>>   
>> -	if (!pcie_wait_for_link(pdev, true))
>> +	if (!pcie_wait_for_link(dpc->pdev, true))
>>   		return PCI_ERS_RESULT_DISCONNECT;
>>   
>>   	return PCI_ERS_RESULT_RECOVERED;
>>   }
>>   
>> +static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev)
>> +{
>> +	struct dpc_dev *dpc;
>> +
>> +	/*
>> +	 * DPC disables the Link automatically in hardware, so it has
>> +	 * already been reset by the time we get here.
>> + 	 */
>> +	dpc = to_dpc_dev(pdev);
>> +
>> +	return dpc_reset_link_common(dpc);
>> +
>> +}
>> +
>>   static void dpc_process_rp_pio_error(struct dpc_dev *dpc)
>>   {
>>   	struct pci_dev *pdev = dpc->pdev;
>> @@ -224,10 +226,9 @@ static int dpc_get_aer_uncorrect_severity(struct pci_dev *dev,
>>   	return 1;
>>   }
>>   
>> -static irqreturn_t dpc_handler(int irq, void *context)
>> +void dpc_process_error(struct dpc_dev *dpc)
>>   {
>>   	struct aer_err_info info;
>> -	struct dpc_dev *dpc = context;
>>   	struct pci_dev *pdev = dpc->pdev;
>>   	u16 cap = dpc->cap_pos, status, source, reason, ext_reason;
>>   
>> @@ -257,6 +258,14 @@ static irqreturn_t dpc_handler(int irq, void *context)
>>   		pci_cleanup_aer_uncorrect_error_status(pdev);
>>   		pci_aer_clear_fatal_status(pdev);
>>   	}
>> +}
>> +
>> +static irqreturn_t dpc_handler(int irq, void *context)
>> +{
>> +	struct dpc_dev *dpc = context;
>> +	struct pci_dev *pdev = dpc->pdev;
>> +
>> +	dpc_process_error(dpc);
>>   
>>   	/* We configure DPC so it only triggers on ERR_FATAL */
>>   	pcie_do_recovery(pdev, pci_channel_io_frozen, PCIE_PORT_SERVICE_DPC);
>> @@ -282,6 +291,25 @@ static irqreturn_t dpc_irq(int irq, void *context)
>>   	return IRQ_HANDLED;
>>   }
>>   
>> +void dpc_dev_init(struct pci_dev *pdev, struct dpc_dev *dpc)
>> +{
> Can you include the kzalloc() here so we don't have to do the alloc in
> pci_acpi_add_edr_notifier()?
Currently dpc driver allocates dpc_dev structure using pcie_port->device
reference in its devm_alloc* calls. But if we allocate dpc_dev inside
this function we should use pci_dev->dev reference for it. Is it OK to us
pci_dev->dev reference for DPC driver allocations ?
>
> I think there's also a leak there: pci_acpi_add_edr_notifier()
> kzallocs a struct dpc_dev, but I don't see a corresponding kfree().
since we are using devm_allocs, It should be freed when removing
the PCI device right?
>
>> +	dpc->cap_pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DPC);
> I think we need to check cap_pos for non-zero here.  It's arguably
> safe today because portdrv only calls dpc_probe() when
> PCIE_PORT_SERVICE_DPC is set and we only set that if there's a DPC
> capability.  But that's a long ways away from here and it's
> convoluted, so it's pretty hard to verify that it's safe.
ok. makes sense.
>
> When we add EDR into the picture and call this from
> pci_acpi_add_edr_notifier() and edr_handle_event(), I'm pretty sure
> it's not safe at all because we have no idea whether the device has a
> DPC capability.
>
> Factoring out dpc_dev_init() and the kzalloc() would be a simple
> cleanup-type patch all by itself, so it could be separated out for
> ease of reviewing.
>
>> +	dpc->pdev = pdev;
>> +	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CAP, &dpc->cap);
>> +	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &dpc->ctl);
>> +
>> +	dpc->rp_extensions = (dpc->cap & PCI_EXP_DPC_CAP_RP_EXT);
>> +	if (dpc->rp_extensions) {
>> +		dpc->rp_log_size =
>> +			(dpc->cap & PCI_EXP_DPC_RP_PIO_LOG_SIZE) >> 8;
>> +		if (dpc->rp_log_size < 4 || dpc->rp_log_size > 9) {
>> +			pci_err(pdev, "RP PIO log size %u is invalid\n",
>> +				dpc->rp_log_size);
>> +			dpc->rp_log_size = 0;
>> +		}
>> +	}
>> +}
>> +
>>   #define FLAG(x, y) (((x) & (y)) ? '+' : '-')
>>   static int dpc_probe(struct pcie_device *dev)
>>   {
>> @@ -298,8 +326,8 @@ static int dpc_probe(struct pcie_device *dev)
>>   	if (!dpc)
>>   		return -ENOMEM;
>>   
>> -	dpc->cap_pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DPC);
>> -	dpc->pdev = pdev;
>> +	dpc_dev_init(pdev, dpc);
>> +
>>   	set_service_data(dev, dpc);
>>   
>>   	status = devm_request_threaded_irq(device, dev->irq, dpc_irq,
>> @@ -311,18 +339,8 @@ static int dpc_probe(struct pcie_device *dev)
>>   		return status;
>>   	}
>>   
>> -	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CAP, &cap);
>> -	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &ctl);
>> -
>> -	dpc->rp_extensions = (cap & PCI_EXP_DPC_CAP_RP_EXT);
>> -	if (dpc->rp_extensions) {
>> -		dpc->rp_log_size = (cap & PCI_EXP_DPC_RP_PIO_LOG_SIZE) >> 8;
>> -		if (dpc->rp_log_size < 4 || dpc->rp_log_size > 9) {
>> -			pci_err(pdev, "RP PIO log size %u is invalid\n",
>> -				dpc->rp_log_size);
>> -			dpc->rp_log_size = 0;
>> -		}
>> -	}
>> +	ctl = dpc->ctl;
>> +	cap = dpc->cap;
>>   
>>   	ctl = (ctl & 0xfff4) | PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN;
>>   	pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, ctl);
>> diff --git a/drivers/pci/pcie/dpc.h b/drivers/pci/pcie/dpc.h
>> new file mode 100644
>> index 000000000000..2d82bc917fcb
>> --- /dev/null
>> +++ b/drivers/pci/pcie/dpc.h
>> @@ -0,0 +1,20 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +#ifndef DRIVERS_PCIE_DPC_H
>> +#define DRIVERS_PCIE_DPC_H
>> +
>> +#include <linux/pci.h>
>> +
>> +struct dpc_dev {
>> +	struct pci_dev		*pdev;
>> +	u16			cap_pos;
>> +	bool			rp_extensions;
>> +	u8			rp_log_size;
>> +	u16			ctl;
>> +	u16			cap;
>> +};
>> +
>> +pci_ers_result_t dpc_reset_link_common(struct dpc_dev *dpc);
>> +void dpc_process_error(struct dpc_dev *dpc);
>> +void dpc_dev_init(struct pci_dev *pdev, struct dpc_dev *dpc);
>> +
>> +#endif /* DRIVERS_PCIE_DPC_H */
>> diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
>> index eefefe03857a..e7b9dfae9035 100644
>> --- a/drivers/pci/pcie/err.c
>> +++ b/drivers/pci/pcie/err.c
>> @@ -162,11 +162,18 @@ static pci_ers_result_t default_reset_link(struct pci_dev *dev)
>>   	return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
>>   }
>>   
>> -static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service)
>> +static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service,
>> +				   pci_ers_result_t (*reset_cb)(void *cb_data),
>> +				   void *cb_data)
> This rejiggering of reset_link() and pcie_do_recovery() is unrelated
> to the rest (except that it's needed for EDR, of course), so maybe
> could be a separate patch.
ok
>
> We could also consider changing the interface of pcie_do_recovery() to
> just add reset_cb and cb_data, and change the existing callers to pass
> NULL, NULL.  Then we wouldn't need pcie_do_recovery_common().  I'm not
> sure which way would be better.
I will change the pcie_do_recovery. Currently only aer and dpc drivers
uses it.
>
>>   {
>>   	pci_ers_result_t status;
>>   	struct pcie_port_service_driver *driver = NULL;
>>   
>> +	if (reset_cb) {
>> +		status = reset_cb(cb_data);
>> +		goto done;
>> +	}
>> +
>>   	driver = pcie_port_find_service(dev, service);
>>   	if (driver && driver->reset_link) {
>>   		status = driver->reset_link(dev);
>> @@ -178,6 +185,7 @@ static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service)
>>   		return PCI_ERS_RESULT_DISCONNECT;
>>   	}
>>   
>> +done:
>>   	if (status != PCI_ERS_RESULT_RECOVERED) {
>>   		pci_printk(KERN_DEBUG, dev, "link reset at upstream device %s failed\n",
>>   			pci_name(dev));
>> @@ -187,8 +195,11 @@ static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service)
>>   	return status;
>>   }
>>   
>> -void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state,
>> -		      u32 service)
>> +pci_ers_result_t pcie_do_recovery_common(struct pci_dev *dev,
>> +				enum pci_channel_state state,
>> +				u32 service,
>> +				pci_ers_result_t (*reset_cb)(void *cb_data),
>> +				void *cb_data)
>>   {
>>   	pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER;
>>   	struct pci_bus *bus;
>> @@ -209,7 +220,7 @@ void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state,
>>   		pci_walk_bus(bus, report_normal_detected, &status);
>>   
>>   	if (state == pci_channel_io_frozen) {
>> -		status = reset_link(dev, service);
>> +		status = reset_link(dev, service, reset_cb, cb_data);
>>   		if (status != PCI_ERS_RESULT_RECOVERED)
>>   			goto failed;
>>   	}
>> @@ -240,11 +251,20 @@ void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state,
>>   	pci_aer_clear_device_status(dev);
>>   	pci_cleanup_aer_uncorrect_error_status(dev);
>>   	pci_info(dev, "device recovery successful\n");
>> -	return;
>> +
>> +	return status;
>>   
>>   failed:
>>   	pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
>>   
>>   	/* TODO: Should kernel panic here? */
>>   	pci_info(dev, "device recovery failed\n");
>> +
>> +	return status;
>> +}
>> +
>> +void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state,
>> +		      u32 service)
>> +{
>> +	pcie_do_recovery_common(dev, state, service, NULL, NULL);
>>   }
>> -- 
>> 2.21.0
>>
Bjorn Helgaas Feb. 26, 2020, 9:13 p.m. UTC | #3
On Wed, Feb 26, 2020 at 11:30:43AM -0800, Kuppuswamy Sathyanarayanan wrote:
> On 2/25/20 5:02 PM, Bjorn Helgaas wrote:
> > On Thu, Feb 13, 2020 at 10:20:15AM -0800, sathyanarayanan.kuppuswamy@linux.intel.com wrote:
> > > From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
> > > ...

> > > +int pci_cleanup_aer_error_status_regs(struct pci_dev *dev)
> > > +{
> > > +	if (pcie_aer_get_firmware_first(dev))
> > > +		return -EIO;
> > > +
> > > +	return pci_aer_clear_err_status_regs(dev);
> > > +}
> > We started with these:
> > 
> >    pci_cleanup_aer_uncorrect_error_status()
> >    pci_aer_clear_fatal_status()
> >    pci_cleanup_aer_error_status_regs()
> > 
> > This was already a mess.  They do basically similar things, but the
> > function names are a complete jumble.  Let's start with preliminary
> > patches to name them consistently, e.g.,
> > 
> >    pci_aer_clear_nonfatal_status()
> >    pci_aer_clear_fatal_status()
> >    pci_aer_clear_status()
> > 
> > Now, for EDR you eventually add this in edr_handle_event():
> > 
> >    edr_handle_event()
> >    {
> >      ...
> >      pci_aer_clear_err_uncor_status(pdev);
> >      pci_aer_clear_err_fatal_status(pdev);
> >      pci_aer_clear_err_status_regs(pdev);
> > 
> > I see that this path needs to clear the status even in the
> > firmware-first case, so you do need some change for that.  But
> > pci_aer_clear_err_status_regs() does exactly the same thing as
> > pci_aer_clear_err_uncor_status() and pci_aer_clear_err_fatal_status()
> > plus a little more (clearing PCI_ERR_ROOT_STATUS), so it should be
> > sufficient to just call pci_aer_clear_err_status_regs().
> > 
> > So I don't think you need to make wrappers for
> > pci_aer_clear_nonfatal_status() and pci_aer_clear_fatal_status() at
> > all since they're not needed by the EDR path.
> > 
> > You *do* need a wrapper for pci_aer_clear_status(), but instead of
> > just adding random words ("err", "regs") to the name, please name it
> > something like pci_aer_raw_clear_status() as a hint that it skips
> > some sort of check.

What do you think about the above?

> > I would split this into a separate patch.  This patch contains a bunch
> > of things that aren't really related except that they're needed for
> > EDR.  I think it will be more readable if each patch just does one
> > piece of it.
>
> Ok. I will split it into multiple patches. I just grouped them together
> as a preparatory patch for adding EDR support.

Sounds good.

> > > diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
> > > index 99fca8400956..acae12dbf9ff 100644
> > > --- a/drivers/pci/pcie/dpc.c
> > > +++ b/drivers/pci/pcie/dpc.c
> > > @@ -15,15 +15,9 @@
> > >   #include <linux/pci.h>
> > >   #include "portdrv.h"
> > > +#include "dpc.h"
> > >   #include "../pci.h"
> > > -struct dpc_dev {
> > > -	struct pci_dev		*pdev;
> > > -	u16			cap_pos;
> > > -	bool			rp_extensions;
> > > -	u8			rp_log_size;
> > > -};

> > Adding dpc.h shouldn't be in this patch because there's no need for a
> > separate dpc.h file yet -- it's only included this one place.  I'm not
> > positive a dpc.h is needed at all -- see comments on patch [4/5].

> Yes, if we use pdev in dpc functions used by EDR code, we should
> not need it.

I think struct dpc_dev might be more trouble than it's worth.  I
attached a possible patch at the end that folds the 25 bits of
DPC-related info into the struct pci_dev and gets rid of struct
dpc_dev completely.  I compiled it but haven't tested it.

> > > -static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev)
> > > +pci_ers_result_t dpc_reset_link_common(struct dpc_dev *dpc)
> > >   {
> > I don't see why you need to split this into dpc_reset_link_common()
> > and dpc_reset_link().  IIUC, you split it so the DPC driver path can
> > supply a pci_dev * via
> > 
> >    dpc_handler
> >      pcie_do_recovery
> >        pcie_do_recovery_common(..., NULL, NULL)
> >          reset_link(..., NULL, NULL)
> >            driver->reset_link(pdev)
> >              dpc_reset_link(pdev)
> >                dpc = to_dpc_dev(pdev)
>
> I have split it mainly because of dpc_dev dependency. If we use
> dpc_reset_link(pdev) directly it will try to find related dpc_dev using
> to_dpc_dev() function. But this will not work in FF mode where DPC
> is handled by firmware and hence we will not have DPC pcie_port
> service driver enumerated for this device.

The patch below might help this case.

> > > +void dpc_dev_init(struct pci_dev *pdev, struct dpc_dev *dpc)
> > > +{
> > Can you include the kzalloc() here so we don't have to do the alloc in
> > pci_acpi_add_edr_notifier()?

> Currently dpc driver allocates dpc_dev structure using pcie_port->device
> reference in its devm_alloc* calls. But if we allocate dpc_dev inside
> this function we should use pci_dev->dev reference for it. Is it OK to us
> pci_dev->dev reference for DPC driver allocations ?

I think the patch below would solve this issue too because we don't
need the alloc at all.

> > I think there's also a leak there: pci_acpi_add_edr_notifier()
> > kzallocs a struct dpc_dev, but I don't see a corresponding kfree().

> since we are using devm_allocs, It should be freed when removing
> the PCI device right?

Oh, right, sorry.


commit 7fb9f4495711 ("PCI/DPC: Move data to struct pci_dev")
Author: Bjorn Helgaas <bhelgaas@google.com>
Date:   Wed Feb 26 08:00:55 2020 -0600

    PCI/DPC: Move data to struct pci_dev
    

diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
index e06f42f58d3d..6b116d7fdb89 100644
--- a/drivers/pci/pcie/dpc.c
+++ b/drivers/pci/pcie/dpc.c
@@ -17,13 +17,6 @@
 #include "portdrv.h"
 #include "../pci.h"
 
-struct dpc_dev {
-	struct pcie_device	*dev;
-	u16			cap_pos;
-	bool			rp_extensions;
-	u8			rp_log_size;
-};
-
 static const char * const rp_pio_error_string[] = {
 	"Configuration Request received UR Completion",	 /* Bit Position 0  */
 	"Configuration Request received CA Completion",	 /* Bit Position 1  */
@@ -46,63 +39,42 @@ static const char * const rp_pio_error_string[] = {
 	"Memory Request Completion Timeout",		 /* Bit Position 18 */
 };
 
-static struct dpc_dev *to_dpc_dev(struct pci_dev *dev)
-{
-	struct device *device;
-
-	device = pcie_port_find_device(dev, PCIE_PORT_SERVICE_DPC);
-	if (!device)
-		return NULL;
-	return get_service_data(to_pcie_device(device));
-}
-
 void pci_save_dpc_state(struct pci_dev *dev)
 {
-	struct dpc_dev *dpc;
 	struct pci_cap_saved_state *save_state;
 	u16 *cap;
 
 	if (!pci_is_pcie(dev))
 		return;
 
-	dpc = to_dpc_dev(dev);
-	if (!dpc)
-		return;
-
 	save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_DPC);
 	if (!save_state)
 		return;
 
 	cap = (u16 *)&save_state->cap.data[0];
-	pci_read_config_word(dev, dpc->cap_pos + PCI_EXP_DPC_CTL, cap);
+	pci_read_config_word(dev, dev->dpc_cap + PCI_EXP_DPC_CTL, cap);
 }
 
 void pci_restore_dpc_state(struct pci_dev *dev)
 {
-	struct dpc_dev *dpc;
 	struct pci_cap_saved_state *save_state;
 	u16 *cap;
 
 	if (!pci_is_pcie(dev))
 		return;
 
-	dpc = to_dpc_dev(dev);
-	if (!dpc)
-		return;
-
 	save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_DPC);
 	if (!save_state)
 		return;
 
 	cap = (u16 *)&save_state->cap.data[0];
-	pci_write_config_word(dev, dpc->cap_pos + PCI_EXP_DPC_CTL, *cap);
+	pci_write_config_word(dev, dev->dpc_cap + PCI_EXP_DPC_CTL, *cap);
 }
 
-static int dpc_wait_rp_inactive(struct dpc_dev *dpc)
+static int dpc_wait_rp_inactive(struct pci_dev *pdev)
 {
 	unsigned long timeout = jiffies + HZ;
-	struct pci_dev *pdev = dpc->dev->port;
-	u16 cap = dpc->cap_pos, status;
+	u16 cap = pdev->dpc_cap, status;
 
 	pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status);
 	while (status & PCI_EXP_DPC_RP_BUSY &&
@@ -119,15 +91,13 @@ static int dpc_wait_rp_inactive(struct dpc_dev *dpc)
 
 static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev)
 {
-	struct dpc_dev *dpc;
 	u16 cap;
 
 	/*
 	 * DPC disables the Link automatically in hardware, so it has
 	 * already been reset by the time we get here.
 	 */
-	dpc = to_dpc_dev(pdev);
-	cap = dpc->cap_pos;
+	cap = pdev->dpc_cap;
 
 	/*
 	 * Wait until the Link is inactive, then clear DPC Trigger Status
@@ -135,7 +105,7 @@ static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev)
 	 */
 	pcie_wait_for_link(pdev, false);
 
-	if (dpc->rp_extensions && dpc_wait_rp_inactive(dpc))
+	if (pdev->dpc_rp_extensions && dpc_wait_rp_inactive(pdev))
 		return PCI_ERS_RESULT_DISCONNECT;
 
 	pci_write_config_word(pdev, cap + PCI_EXP_DPC_STATUS,
@@ -147,10 +117,9 @@ static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev)
 	return PCI_ERS_RESULT_RECOVERED;
 }
 
-static void dpc_process_rp_pio_error(struct dpc_dev *dpc)
+static void dpc_process_rp_pio_error(struct pci_dev *pdev)
 {
-	struct pci_dev *pdev = dpc->dev->port;
-	u16 cap = dpc->cap_pos, dpc_status, first_error;
+	u16 cap = pdev->dpc_cap, dpc_status, first_error;
 	u32 status, mask, sev, syserr, exc, dw0, dw1, dw2, dw3, log, prefix;
 	int i;
 
@@ -175,7 +144,7 @@ static void dpc_process_rp_pio_error(struct dpc_dev *dpc)
 				first_error == i ? " (First)" : "");
 	}
 
-	if (dpc->rp_log_size < 4)
+	if (pdev->dpc_rp_log_size < 4)
 		goto clear_status;
 	pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG,
 			      &dw0);
@@ -188,12 +157,12 @@ static void dpc_process_rp_pio_error(struct dpc_dev *dpc)
 	pci_err(pdev, "TLP Header: %#010x %#010x %#010x %#010x\n",
 		dw0, dw1, dw2, dw3);
 
-	if (dpc->rp_log_size < 5)
+	if (pdev->dpc_rp_log_size < 5)
 		goto clear_status;
 	pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_IMPSPEC_LOG, &log);
 	pci_err(pdev, "RP PIO ImpSpec Log %#010x\n", log);
 
-	for (i = 0; i < dpc->rp_log_size - 5; i++) {
+	for (i = 0; i < pdev->dpc_rp_log_size - 5; i++) {
 		pci_read_config_dword(pdev,
 			cap + PCI_EXP_DPC_RP_PIO_TLPPREFIX_LOG, &prefix);
 		pci_err(pdev, "TLP Prefix Header: dw%d, %#010x\n", i, prefix);
@@ -226,10 +195,9 @@ static int dpc_get_aer_uncorrect_severity(struct pci_dev *dev,
 
 static irqreturn_t dpc_handler(int irq, void *context)
 {
+	struct pci_dev *pdev = context;
+	u16 cap = pdev->dpc_cap, status, source, reason, ext_reason;
 	struct aer_err_info info;
-	struct dpc_dev *dpc = context;
-	struct pci_dev *pdev = dpc->dev->port;
-	u16 cap = dpc->cap_pos, status, source, reason, ext_reason;
 
 	pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status);
 	pci_read_config_word(pdev, cap + PCI_EXP_DPC_SOURCE_ID, &source);
@@ -248,8 +216,8 @@ static irqreturn_t dpc_handler(int irq, void *context)
 				     "reserved error");
 
 	/* show RP PIO error detail information */
-	if (dpc->rp_extensions && reason == 3 && ext_reason == 0)
-		dpc_process_rp_pio_error(dpc);
+	if (pdev->dpc_rp_extensions && reason == 3 && ext_reason == 0)
+		dpc_process_rp_pio_error(pdev);
 	else if (reason == 0 &&
 		 dpc_get_aer_uncorrect_severity(pdev, &info) &&
 		 aer_get_device_error_info(pdev, &info)) {
@@ -266,9 +234,8 @@ static irqreturn_t dpc_handler(int irq, void *context)
 
 static irqreturn_t dpc_irq(int irq, void *context)
 {
-	struct dpc_dev *dpc = (struct dpc_dev *)context;
-	struct pci_dev *pdev = dpc->dev->port;
-	u16 cap = dpc->cap_pos, status;
+	struct pci_dev *pdev = context;
+	u16 cap = pdev->dpc_cap, status;
 
 	pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status);
 
@@ -285,7 +252,6 @@ static irqreturn_t dpc_irq(int irq, void *context)
 #define FLAG(x, y) (((x) & (y)) ? '+' : '-')
 static int dpc_probe(struct pcie_device *dev)
 {
-	struct dpc_dev *dpc;
 	struct pci_dev *pdev = dev->port;
 	struct device *device = &dev->device;
 	int status;
@@ -294,43 +260,37 @@ static int dpc_probe(struct pcie_device *dev)
 	if (pcie_aer_get_firmware_first(pdev) && !pcie_ports_dpc_native)
 		return -ENOTSUPP;
 
-	dpc = devm_kzalloc(device, sizeof(*dpc), GFP_KERNEL);
-	if (!dpc)
-		return -ENOMEM;
-
-	dpc->cap_pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DPC);
-	dpc->dev = dev;
-	set_service_data(dev, dpc);
+	pdev->dpc_cap = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DPC);
 
 	status = devm_request_threaded_irq(device, dev->irq, dpc_irq,
 					   dpc_handler, IRQF_SHARED,
-					   "pcie-dpc", dpc);
+					   "pcie-dpc", pdev);
 	if (status) {
 		pci_warn(pdev, "request IRQ%d failed: %d\n", dev->irq,
 			 status);
 		return status;
 	}
 
-	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CAP, &cap);
-	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &ctl);
+	pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CAP, &cap);
+	pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, &ctl);
 
-	dpc->rp_extensions = (cap & PCI_EXP_DPC_CAP_RP_EXT);
-	if (dpc->rp_extensions) {
-		dpc->rp_log_size = (cap & PCI_EXP_DPC_RP_PIO_LOG_SIZE) >> 8;
-		if (dpc->rp_log_size < 4 || dpc->rp_log_size > 9) {
+	pdev->dpc_rp_extensions = (cap & PCI_EXP_DPC_CAP_RP_EXT) ? 1 : 0;
+	if (pdev->dpc_rp_extensions) {
+		pdev->dpc_rp_log_size = (cap & PCI_EXP_DPC_RP_PIO_LOG_SIZE) >> 8;
+		if (pdev->dpc_rp_log_size < 4 || pdev->dpc_rp_log_size > 9) {
 			pci_err(pdev, "RP PIO log size %u is invalid\n",
-				dpc->rp_log_size);
-			dpc->rp_log_size = 0;
+				pdev->dpc_rp_log_size);
+			pdev->dpc_rp_log_size = 0;
 		}
 	}
 
 	ctl = (ctl & 0xfff4) | PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN;
-	pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, ctl);
+	pci_write_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, ctl);
 
 	pci_info(pdev, "error containment capabilities: Int Msg #%d, RPExt%c PoisonedTLP%c SwTrigger%c RP PIO Log %d, DL_ActiveErr%c\n",
 		 cap & PCI_EXP_DPC_IRQ, FLAG(cap, PCI_EXP_DPC_CAP_RP_EXT),
 		 FLAG(cap, PCI_EXP_DPC_CAP_POISONED_TLP),
-		 FLAG(cap, PCI_EXP_DPC_CAP_SW_TRIGGER), dpc->rp_log_size,
+		 FLAG(cap, PCI_EXP_DPC_CAP_SW_TRIGGER), pdev->dpc_rp_log_size,
 		 FLAG(cap, PCI_EXP_DPC_CAP_DL_ACTIVE));
 
 	pci_add_ext_cap_save_buffer(pdev, PCI_EXT_CAP_ID_DPC, sizeof(u16));
@@ -339,13 +299,12 @@ static int dpc_probe(struct pcie_device *dev)
 
 static void dpc_remove(struct pcie_device *dev)
 {
-	struct dpc_dev *dpc = get_service_data(dev);
 	struct pci_dev *pdev = dev->port;
 	u16 ctl;
 
-	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &ctl);
+	pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, &ctl);
 	ctl &= ~(PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN);
-	pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, ctl);
+	pci_write_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, ctl);
 }
 
 static struct pcie_port_service_driver dpcdriver = {
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 3840a541a9de..a0b7e7a53741 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -444,6 +444,11 @@ struct pci_dev {
 	const struct attribute_group **msi_irq_groups;
 #endif
 	struct pci_vpd *vpd;
+#ifdef CONFIG_PCIE_DPC
+	u16		dpc_cap;
+	unsigned int	dpc_rp_extensions:1;
+	u8		dpc_rp_log_size;
+#endif
 #ifdef CONFIG_PCI_ATS
 	union {
 		struct pci_sriov	*sriov;		/* PF: SR-IOV info */
Kuppuswamy Sathyanarayanan Feb. 26, 2020, 9:26 p.m. UTC | #4
Hi Bjorn,

On 2/26/20 1:13 PM, Bjorn Helgaas wrote:
> On Wed, Feb 26, 2020 at 11:30:43AM -0800, Kuppuswamy Sathyanarayanan wrote:
>> On 2/25/20 5:02 PM, Bjorn Helgaas wrote:
>>> On Thu, Feb 13, 2020 at 10:20:15AM -0800, sathyanarayanan.kuppuswamy@linux.intel.com wrote:
>>>> From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
>>>> ...
>>>> +int pci_cleanup_aer_error_status_regs(struct pci_dev *dev)
>>>> +{
>>>> +	if (pcie_aer_get_firmware_first(dev))
>>>> +		return -EIO;
>>>> +
>>>> +	return pci_aer_clear_err_status_regs(dev);
>>>> +}
>>> We started with these:
>>>
>>>     pci_cleanup_aer_uncorrect_error_status()
>>>     pci_aer_clear_fatal_status()
>>>     pci_cleanup_aer_error_status_regs()
>>>
>>> This was already a mess.  They do basically similar things, but the
>>> function names are a complete jumble.  Let's start with preliminary
>>> patches to name them consistently, e.g.,
>>>
>>>     pci_aer_clear_nonfatal_status()
>>>     pci_aer_clear_fatal_status()
>>>     pci_aer_clear_status()
>>>
>>> Now, for EDR you eventually add this in edr_handle_event():
>>>
>>>     edr_handle_event()
>>>     {
>>>       ...
>>>       pci_aer_clear_err_uncor_status(pdev);
>>>       pci_aer_clear_err_fatal_status(pdev);
>>>       pci_aer_clear_err_status_regs(pdev);
>>>
>>> I see that this path needs to clear the status even in the
>>> firmware-first case, so you do need some change for that.  But
>>> pci_aer_clear_err_status_regs() does exactly the same thing as
>>> pci_aer_clear_err_uncor_status() and pci_aer_clear_err_fatal_status()
>>> plus a little more (clearing PCI_ERR_ROOT_STATUS), so it should be
>>> sufficient to just call pci_aer_clear_err_status_regs().
>>>
>>> So I don't think you need to make wrappers for
>>> pci_aer_clear_nonfatal_status() and pci_aer_clear_fatal_status() at
>>> all since they're not needed by the EDR path.
>>>
>>> You *do* need a wrapper for pci_aer_clear_status(), but instead of
>>> just adding random words ("err", "regs") to the name, please name it
>>> something like pci_aer_raw_clear_status() as a hint that it skips
>>> some sort of check.
> What do you think about the above?
I agree with your comments.. I will use your recommendation in
naming the wrappers I need.
>
>>> I would split this into a separate patch.  This patch contains a bunch
>>> of things that aren't really related except that they're needed for
>>> EDR.  I think it will be more readable if each patch just does one
>>> piece of it.
>> Ok. I will split it into multiple patches. I just grouped them together
>> as a preparatory patch for adding EDR support.
> Sounds good.
>
>>>> diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
>>>> index 99fca8400956..acae12dbf9ff 100644
>>>> --- a/drivers/pci/pcie/dpc.c
>>>> +++ b/drivers/pci/pcie/dpc.c
>>>> @@ -15,15 +15,9 @@
>>>>    #include <linux/pci.h>
>>>>    #include "portdrv.h"
>>>> +#include "dpc.h"
>>>>    #include "../pci.h"
>>>> -struct dpc_dev {
>>>> -	struct pci_dev		*pdev;
>>>> -	u16			cap_pos;
>>>> -	bool			rp_extensions;
>>>> -	u8			rp_log_size;
>>>> -};
>>> Adding dpc.h shouldn't be in this patch because there's no need for a
>>> separate dpc.h file yet -- it's only included this one place.  I'm not
>>> positive a dpc.h is needed at all -- see comments on patch [4/5].
>> Yes, if we use pdev in dpc functions used by EDR code, we should
>> not need it.
> I think struct dpc_dev might be more trouble than it's worth.  I
> attached a possible patch at the end that folds the 25 bits of
> DPC-related info into the struct pci_dev and gets rid of struct
> dpc_dev completely.  I compiled it but haven't tested it.
That would solve most of my export issues. Do you want me
to add it to my patch list or merge it separately.
>
>>>> -static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev)
>>>> +pci_ers_result_t dpc_reset_link_common(struct dpc_dev *dpc)
>>>>    {
>>> I don't see why you need to split this into dpc_reset_link_common()
>>> and dpc_reset_link().  IIUC, you split it so the DPC driver path can
>>> supply a pci_dev * via
>>>
>>>     dpc_handler
>>>       pcie_do_recovery
>>>         pcie_do_recovery_common(..., NULL, NULL)
>>>           reset_link(..., NULL, NULL)
>>>             driver->reset_link(pdev)
>>>               dpc_reset_link(pdev)
>>>                 dpc = to_dpc_dev(pdev)
>> I have split it mainly because of dpc_dev dependency. If we use
>> dpc_reset_link(pdev) directly it will try to find related dpc_dev using
>> to_dpc_dev() function. But this will not work in FF mode where DPC
>> is handled by firmware and hence we will not have DPC pcie_port
>> service driver enumerated for this device.
> The patch below might help this case.
Yes.
>
>>>> +void dpc_dev_init(struct pci_dev *pdev, struct dpc_dev *dpc)
>>>> +{
>>> Can you include the kzalloc() here so we don't have to do the alloc in
>>> pci_acpi_add_edr_notifier()?
>> Currently dpc driver allocates dpc_dev structure using pcie_port->device
>> reference in its devm_alloc* calls. But if we allocate dpc_dev inside
>> this function we should use pci_dev->dev reference for it. Is it OK to us
>> pci_dev->dev reference for DPC driver allocations ?
> I think the patch below would solve this issue too because we don't
> need the alloc at all.
Agree.
>
>>> I think there's also a leak there: pci_acpi_add_edr_notifier()
>>> kzallocs a struct dpc_dev, but I don't see a corresponding kfree().
>> since we are using devm_allocs, It should be freed when removing
>> the PCI device right?
> Oh, right, sorry.
>
>
> commit 7fb9f4495711 ("PCI/DPC: Move data to struct pci_dev")
> Author: Bjorn Helgaas <bhelgaas@google.com>
> Date:   Wed Feb 26 08:00:55 2020 -0600
>
>      PCI/DPC: Move data to struct pci_dev
>      
>
> diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
> index e06f42f58d3d..6b116d7fdb89 100644
> --- a/drivers/pci/pcie/dpc.c
> +++ b/drivers/pci/pcie/dpc.c
> @@ -17,13 +17,6 @@
>   #include "portdrv.h"
>   #include "../pci.h"
>   
> -struct dpc_dev {
> -	struct pcie_device	*dev;
> -	u16			cap_pos;
> -	bool			rp_extensions;
> -	u8			rp_log_size;
> -};
> -
>   static const char * const rp_pio_error_string[] = {
>   	"Configuration Request received UR Completion",	 /* Bit Position 0  */
>   	"Configuration Request received CA Completion",	 /* Bit Position 1  */
> @@ -46,63 +39,42 @@ static const char * const rp_pio_error_string[] = {
>   	"Memory Request Completion Timeout",		 /* Bit Position 18 */
>   };
>   
> -static struct dpc_dev *to_dpc_dev(struct pci_dev *dev)
> -{
> -	struct device *device;
> -
> -	device = pcie_port_find_device(dev, PCIE_PORT_SERVICE_DPC);
> -	if (!device)
> -		return NULL;
> -	return get_service_data(to_pcie_device(device));
> -}
> -
>   void pci_save_dpc_state(struct pci_dev *dev)
>   {
> -	struct dpc_dev *dpc;
>   	struct pci_cap_saved_state *save_state;
>   	u16 *cap;
>   
>   	if (!pci_is_pcie(dev))
>   		return;
>   
> -	dpc = to_dpc_dev(dev);
> -	if (!dpc)
> -		return;
> -
>   	save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_DPC);
>   	if (!save_state)
>   		return;
>   
>   	cap = (u16 *)&save_state->cap.data[0];
> -	pci_read_config_word(dev, dpc->cap_pos + PCI_EXP_DPC_CTL, cap);
> +	pci_read_config_word(dev, dev->dpc_cap + PCI_EXP_DPC_CTL, cap);
>   }
>   
>   void pci_restore_dpc_state(struct pci_dev *dev)
>   {
> -	struct dpc_dev *dpc;
>   	struct pci_cap_saved_state *save_state;
>   	u16 *cap;
>   
>   	if (!pci_is_pcie(dev))
>   		return;
>   
> -	dpc = to_dpc_dev(dev);
> -	if (!dpc)
> -		return;
> -
>   	save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_DPC);
>   	if (!save_state)
>   		return;
>   
>   	cap = (u16 *)&save_state->cap.data[0];
> -	pci_write_config_word(dev, dpc->cap_pos + PCI_EXP_DPC_CTL, *cap);
> +	pci_write_config_word(dev, dev->dpc_cap + PCI_EXP_DPC_CTL, *cap);
>   }
>   
> -static int dpc_wait_rp_inactive(struct dpc_dev *dpc)
> +static int dpc_wait_rp_inactive(struct pci_dev *pdev)
>   {
>   	unsigned long timeout = jiffies + HZ;
> -	struct pci_dev *pdev = dpc->dev->port;
> -	u16 cap = dpc->cap_pos, status;
> +	u16 cap = pdev->dpc_cap, status;
>   
>   	pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status);
>   	while (status & PCI_EXP_DPC_RP_BUSY &&
> @@ -119,15 +91,13 @@ static int dpc_wait_rp_inactive(struct dpc_dev *dpc)
>   
>   static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev)
>   {
> -	struct dpc_dev *dpc;
>   	u16 cap;
>   
>   	/*
>   	 * DPC disables the Link automatically in hardware, so it has
>   	 * already been reset by the time we get here.
>   	 */
> -	dpc = to_dpc_dev(pdev);
> -	cap = dpc->cap_pos;
> +	cap = pdev->dpc_cap;
>   
>   	/*
>   	 * Wait until the Link is inactive, then clear DPC Trigger Status
> @@ -135,7 +105,7 @@ static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev)
>   	 */
>   	pcie_wait_for_link(pdev, false);
>   
> -	if (dpc->rp_extensions && dpc_wait_rp_inactive(dpc))
> +	if (pdev->dpc_rp_extensions && dpc_wait_rp_inactive(pdev))
>   		return PCI_ERS_RESULT_DISCONNECT;
>   
>   	pci_write_config_word(pdev, cap + PCI_EXP_DPC_STATUS,
> @@ -147,10 +117,9 @@ static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev)
>   	return PCI_ERS_RESULT_RECOVERED;
>   }
>   
> -static void dpc_process_rp_pio_error(struct dpc_dev *dpc)
> +static void dpc_process_rp_pio_error(struct pci_dev *pdev)
>   {
> -	struct pci_dev *pdev = dpc->dev->port;
> -	u16 cap = dpc->cap_pos, dpc_status, first_error;
> +	u16 cap = pdev->dpc_cap, dpc_status, first_error;
>   	u32 status, mask, sev, syserr, exc, dw0, dw1, dw2, dw3, log, prefix;
>   	int i;
>   
> @@ -175,7 +144,7 @@ static void dpc_process_rp_pio_error(struct dpc_dev *dpc)
>   				first_error == i ? " (First)" : "");
>   	}
>   
> -	if (dpc->rp_log_size < 4)
> +	if (pdev->dpc_rp_log_size < 4)
>   		goto clear_status;
>   	pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG,
>   			      &dw0);
> @@ -188,12 +157,12 @@ static void dpc_process_rp_pio_error(struct dpc_dev *dpc)
>   	pci_err(pdev, "TLP Header: %#010x %#010x %#010x %#010x\n",
>   		dw0, dw1, dw2, dw3);
>   
> -	if (dpc->rp_log_size < 5)
> +	if (pdev->dpc_rp_log_size < 5)
>   		goto clear_status;
>   	pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_IMPSPEC_LOG, &log);
>   	pci_err(pdev, "RP PIO ImpSpec Log %#010x\n", log);
>   
> -	for (i = 0; i < dpc->rp_log_size - 5; i++) {
> +	for (i = 0; i < pdev->dpc_rp_log_size - 5; i++) {
>   		pci_read_config_dword(pdev,
>   			cap + PCI_EXP_DPC_RP_PIO_TLPPREFIX_LOG, &prefix);
>   		pci_err(pdev, "TLP Prefix Header: dw%d, %#010x\n", i, prefix);
> @@ -226,10 +195,9 @@ static int dpc_get_aer_uncorrect_severity(struct pci_dev *dev,
>   
>   static irqreturn_t dpc_handler(int irq, void *context)
>   {
> +	struct pci_dev *pdev = context;
> +	u16 cap = pdev->dpc_cap, status, source, reason, ext_reason;
>   	struct aer_err_info info;
> -	struct dpc_dev *dpc = context;
> -	struct pci_dev *pdev = dpc->dev->port;
> -	u16 cap = dpc->cap_pos, status, source, reason, ext_reason;
>   
>   	pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status);
>   	pci_read_config_word(pdev, cap + PCI_EXP_DPC_SOURCE_ID, &source);
> @@ -248,8 +216,8 @@ static irqreturn_t dpc_handler(int irq, void *context)
>   				     "reserved error");
>   
>   	/* show RP PIO error detail information */
> -	if (dpc->rp_extensions && reason == 3 && ext_reason == 0)
> -		dpc_process_rp_pio_error(dpc);
> +	if (pdev->dpc_rp_extensions && reason == 3 && ext_reason == 0)
> +		dpc_process_rp_pio_error(pdev);
>   	else if (reason == 0 &&
>   		 dpc_get_aer_uncorrect_severity(pdev, &info) &&
>   		 aer_get_device_error_info(pdev, &info)) {
> @@ -266,9 +234,8 @@ static irqreturn_t dpc_handler(int irq, void *context)
>   
>   static irqreturn_t dpc_irq(int irq, void *context)
>   {
> -	struct dpc_dev *dpc = (struct dpc_dev *)context;
> -	struct pci_dev *pdev = dpc->dev->port;
> -	u16 cap = dpc->cap_pos, status;
> +	struct pci_dev *pdev = context;
> +	u16 cap = pdev->dpc_cap, status;
>   
>   	pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status);
>   
> @@ -285,7 +252,6 @@ static irqreturn_t dpc_irq(int irq, void *context)
>   #define FLAG(x, y) (((x) & (y)) ? '+' : '-')
>   static int dpc_probe(struct pcie_device *dev)
>   {
> -	struct dpc_dev *dpc;
>   	struct pci_dev *pdev = dev->port;
>   	struct device *device = &dev->device;
>   	int status;
> @@ -294,43 +260,37 @@ static int dpc_probe(struct pcie_device *dev)
>   	if (pcie_aer_get_firmware_first(pdev) && !pcie_ports_dpc_native)
>   		return -ENOTSUPP;
>   
> -	dpc = devm_kzalloc(device, sizeof(*dpc), GFP_KERNEL);
> -	if (!dpc)
> -		return -ENOMEM;
> -
> -	dpc->cap_pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DPC);
> -	dpc->dev = dev;
> -	set_service_data(dev, dpc);
> +	pdev->dpc_cap = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DPC);
>   
>   	status = devm_request_threaded_irq(device, dev->irq, dpc_irq,
>   					   dpc_handler, IRQF_SHARED,
> -					   "pcie-dpc", dpc);
> +					   "pcie-dpc", pdev);
>   	if (status) {
>   		pci_warn(pdev, "request IRQ%d failed: %d\n", dev->irq,
>   			 status);
>   		return status;
>   	}
>   
> -	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CAP, &cap);
> -	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &ctl);
> +	pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CAP, &cap);
> +	pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, &ctl);
>   
> -	dpc->rp_extensions = (cap & PCI_EXP_DPC_CAP_RP_EXT);
> -	if (dpc->rp_extensions) {
> -		dpc->rp_log_size = (cap & PCI_EXP_DPC_RP_PIO_LOG_SIZE) >> 8;
> -		if (dpc->rp_log_size < 4 || dpc->rp_log_size > 9) {
> +	pdev->dpc_rp_extensions = (cap & PCI_EXP_DPC_CAP_RP_EXT) ? 1 : 0;
> +	if (pdev->dpc_rp_extensions) {
> +		pdev->dpc_rp_log_size = (cap & PCI_EXP_DPC_RP_PIO_LOG_SIZE) >> 8;
> +		if (pdev->dpc_rp_log_size < 4 || pdev->dpc_rp_log_size > 9) {
>   			pci_err(pdev, "RP PIO log size %u is invalid\n",
> -				dpc->rp_log_size);
> -			dpc->rp_log_size = 0;
> +				pdev->dpc_rp_log_size);
> +			pdev->dpc_rp_log_size = 0;
>   		}
>   	}
>   
>   	ctl = (ctl & 0xfff4) | PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN;
> -	pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, ctl);
> +	pci_write_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, ctl);
>   
>   	pci_info(pdev, "error containment capabilities: Int Msg #%d, RPExt%c PoisonedTLP%c SwTrigger%c RP PIO Log %d, DL_ActiveErr%c\n",
>   		 cap & PCI_EXP_DPC_IRQ, FLAG(cap, PCI_EXP_DPC_CAP_RP_EXT),
>   		 FLAG(cap, PCI_EXP_DPC_CAP_POISONED_TLP),
> -		 FLAG(cap, PCI_EXP_DPC_CAP_SW_TRIGGER), dpc->rp_log_size,
> +		 FLAG(cap, PCI_EXP_DPC_CAP_SW_TRIGGER), pdev->dpc_rp_log_size,
>   		 FLAG(cap, PCI_EXP_DPC_CAP_DL_ACTIVE));
>   
>   	pci_add_ext_cap_save_buffer(pdev, PCI_EXT_CAP_ID_DPC, sizeof(u16));
> @@ -339,13 +299,12 @@ static int dpc_probe(struct pcie_device *dev)
>   
>   static void dpc_remove(struct pcie_device *dev)
>   {
> -	struct dpc_dev *dpc = get_service_data(dev);
>   	struct pci_dev *pdev = dev->port;
>   	u16 ctl;
>   
> -	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &ctl);
> +	pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, &ctl);
>   	ctl &= ~(PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN);
> -	pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, ctl);
> +	pci_write_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, ctl);
>   }
>   
>   static struct pcie_port_service_driver dpcdriver = {
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 3840a541a9de..a0b7e7a53741 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -444,6 +444,11 @@ struct pci_dev {
>   	const struct attribute_group **msi_irq_groups;
>   #endif
>   	struct pci_vpd *vpd;
> +#ifdef CONFIG_PCIE_DPC
> +	u16		dpc_cap;
> +	unsigned int	dpc_rp_extensions:1;
> +	u8		dpc_rp_log_size;
> +#endif
>   #ifdef CONFIG_PCI_ATS
>   	union {
>   		struct pci_sriov	*sriov;		/* PF: SR-IOV info */
>
Bjorn Helgaas Feb. 26, 2020, 9:48 p.m. UTC | #5
On Wed, Feb 26, 2020 at 01:26:09PM -0800, Kuppuswamy Sathyanarayanan wrote:
> On 2/26/20 1:13 PM, Bjorn Helgaas wrote:
> > On Wed, Feb 26, 2020 at 11:30:43AM -0800, Kuppuswamy Sathyanarayanan wrote:
> > > On 2/25/20 5:02 PM, Bjorn Helgaas wrote:
> > > > On Thu, Feb 13, 2020 at 10:20:15AM -0800, sathyanarayanan.kuppuswamy@linux.intel.com wrote:
> > > > > From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
> > > > > ...

> > > > > diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
> > > > > index 99fca8400956..acae12dbf9ff 100644
> > > > > --- a/drivers/pci/pcie/dpc.c
> > > > > +++ b/drivers/pci/pcie/dpc.c
> > > > > @@ -15,15 +15,9 @@
> > > > >    #include <linux/pci.h>
> > > > >    #include "portdrv.h"
> > > > > +#include "dpc.h"
> > > > >    #include "../pci.h"
> > > > > -struct dpc_dev {
> > > > > -	struct pci_dev		*pdev;
> > > > > -	u16			cap_pos;
> > > > > -	bool			rp_extensions;
> > > > > -	u8			rp_log_size;
> > > > > -};

> > > > Adding dpc.h shouldn't be in this patch because there's no need for a
> > > > separate dpc.h file yet -- it's only included this one place.  I'm not
> > > > positive a dpc.h is needed at all -- see comments on patch [4/5].

> > > Yes, if we use pdev in dpc functions used by EDR code, we should
> > > not need it.

> > I think struct dpc_dev might be more trouble than it's worth.  I
> > attached a possible patch at the end that folds the 25 bits of
> > DPC-related info into the struct pci_dev and gets rid of struct
> > dpc_dev completely.  I compiled it but haven't tested it.

> That would solve most of my export issues. Do you want me
> to add it to my patch list or merge it separately.

If you want to use that, I would merge it first, followed by the error
clearing cleanup, followed by the other EDR stuff.

Bjorn
diff mbox series

Patch

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 6394e7746fb5..136f27cf3871 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -443,6 +443,9 @@  struct aer_err_info {
 
 int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info);
 void aer_print_error(struct pci_dev *dev, struct aer_err_info *info);
+int pci_aer_clear_err_uncor_status(struct pci_dev *dev);
+void pci_aer_clear_err_fatal_status(struct pci_dev *dev);
+int pci_aer_clear_err_status_regs(struct pci_dev *dev);
 #endif	/* CONFIG_PCIEAER */
 
 #ifdef CONFIG_PCIE_DPC
@@ -549,6 +552,11 @@  static inline int pci_dev_specific_disable_acs_redir(struct pci_dev *dev)
 /* PCI error reporting and recovery */
 void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state,
 		      u32 service);
+pci_ers_result_t pcie_do_recovery_common(struct pci_dev *dev,
+				enum pci_channel_state state,
+				u32 service,
+				pci_ers_result_t (*reset_cb)(void *cb_data),
+				void *cb_data);
 
 bool pcie_wait_for_link(struct pci_dev *pdev, bool active);
 #ifdef CONFIG_PCIEASPM
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index 4a818b07a1af..399836aa07f4 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -376,7 +376,7 @@  void pci_aer_clear_device_status(struct pci_dev *dev)
 	pcie_capability_write_word(dev, PCI_EXP_DEVSTA, sta);
 }
 
-int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
+int pci_aer_clear_err_uncor_status(struct pci_dev *dev)
 {
 	int pos;
 	u32 status, sev;
@@ -385,9 +385,6 @@  int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
 	if (!pos)
 		return -EIO;
 
-	if (pcie_aer_get_firmware_first(dev))
-		return -EIO;
-
 	/* Clear status bits for ERR_NONFATAL errors only */
 	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
 	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &sev);
@@ -397,9 +394,17 @@  int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
 
 	return 0;
 }
+
+int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
+{
+	if (pcie_aer_get_firmware_first(dev))
+		return -EIO;
+
+	return pci_aer_clear_err_uncor_status(dev);
+}
 EXPORT_SYMBOL_GPL(pci_cleanup_aer_uncorrect_error_status);
 
-void pci_aer_clear_fatal_status(struct pci_dev *dev)
+void pci_aer_clear_err_fatal_status(struct pci_dev *dev)
 {
 	int pos;
 	u32 status, sev;
@@ -408,9 +413,6 @@  void pci_aer_clear_fatal_status(struct pci_dev *dev)
 	if (!pos)
 		return;
 
-	if (pcie_aer_get_firmware_first(dev))
-		return;
-
 	/* Clear status bits for ERR_FATAL errors only */
 	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
 	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &sev);
@@ -419,7 +421,15 @@  void pci_aer_clear_fatal_status(struct pci_dev *dev)
 		pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status);
 }
 
-int pci_cleanup_aer_error_status_regs(struct pci_dev *dev)
+void pci_aer_clear_fatal_status(struct pci_dev *dev)
+{
+	if (pcie_aer_get_firmware_first(dev))
+		return;
+
+	return pci_aer_clear_err_fatal_status(dev);
+}
+
+int pci_aer_clear_err_status_regs(struct pci_dev *dev)
 {
 	int pos;
 	u32 status;
@@ -432,9 +442,6 @@  int pci_cleanup_aer_error_status_regs(struct pci_dev *dev)
 	if (!pos)
 		return -EIO;
 
-	if (pcie_aer_get_firmware_first(dev))
-		return -EIO;
-
 	port_type = pci_pcie_type(dev);
 	if (port_type == PCI_EXP_TYPE_ROOT_PORT) {
 		pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, &status);
@@ -450,6 +457,14 @@  int pci_cleanup_aer_error_status_regs(struct pci_dev *dev)
 	return 0;
 }
 
+int pci_cleanup_aer_error_status_regs(struct pci_dev *dev)
+{
+	if (pcie_aer_get_firmware_first(dev))
+		return -EIO;
+
+	return pci_aer_clear_err_status_regs(dev);
+}
+
 void pci_save_aer_state(struct pci_dev *dev)
 {
 	struct pci_cap_saved_state *save_state;
diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
index 99fca8400956..acae12dbf9ff 100644
--- a/drivers/pci/pcie/dpc.c
+++ b/drivers/pci/pcie/dpc.c
@@ -15,15 +15,9 @@ 
 #include <linux/pci.h>
 
 #include "portdrv.h"
+#include "dpc.h"
 #include "../pci.h"
 
-struct dpc_dev {
-	struct pci_dev		*pdev;
-	u16			cap_pos;
-	bool			rp_extensions;
-	u8			rp_log_size;
-};
-
 static const char * const rp_pio_error_string[] = {
 	"Configuration Request received UR Completion",	 /* Bit Position 0  */
 	"Configuration Request received CA Completion",	 /* Bit Position 1  */
@@ -117,36 +111,44 @@  static int dpc_wait_rp_inactive(struct dpc_dev *dpc)
 	return 0;
 }
 
-static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev)
+pci_ers_result_t dpc_reset_link_common(struct dpc_dev *dpc)
 {
-	struct dpc_dev *dpc;
 	u16 cap;
 
-	/*
-	 * DPC disables the Link automatically in hardware, so it has
-	 * already been reset by the time we get here.
-	 */
-	dpc = to_dpc_dev(pdev);
 	cap = dpc->cap_pos;
 
 	/*
 	 * Wait until the Link is inactive, then clear DPC Trigger Status
 	 * to allow the Port to leave DPC.
 	 */
-	pcie_wait_for_link(pdev, false);
+	pcie_wait_for_link(dpc->pdev, false);
 
 	if (dpc->rp_extensions && dpc_wait_rp_inactive(dpc))
 		return PCI_ERS_RESULT_DISCONNECT;
 
-	pci_write_config_word(pdev, cap + PCI_EXP_DPC_STATUS,
+	pci_write_config_word(dpc->pdev, cap + PCI_EXP_DPC_STATUS,
 			      PCI_EXP_DPC_STATUS_TRIGGER);
 
-	if (!pcie_wait_for_link(pdev, true))
+	if (!pcie_wait_for_link(dpc->pdev, true))
 		return PCI_ERS_RESULT_DISCONNECT;
 
 	return PCI_ERS_RESULT_RECOVERED;
 }
 
+static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev)
+{
+	struct dpc_dev *dpc;
+
+	/*
+	 * DPC disables the Link automatically in hardware, so it has
+	 * already been reset by the time we get here.
+	 */
+	dpc = to_dpc_dev(pdev);
+
+	return dpc_reset_link_common(dpc);
+
+}
+
 static void dpc_process_rp_pio_error(struct dpc_dev *dpc)
 {
 	struct pci_dev *pdev = dpc->pdev;
@@ -224,10 +226,9 @@  static int dpc_get_aer_uncorrect_severity(struct pci_dev *dev,
 	return 1;
 }
 
-static irqreturn_t dpc_handler(int irq, void *context)
+void dpc_process_error(struct dpc_dev *dpc)
 {
 	struct aer_err_info info;
-	struct dpc_dev *dpc = context;
 	struct pci_dev *pdev = dpc->pdev;
 	u16 cap = dpc->cap_pos, status, source, reason, ext_reason;
 
@@ -257,6 +258,14 @@  static irqreturn_t dpc_handler(int irq, void *context)
 		pci_cleanup_aer_uncorrect_error_status(pdev);
 		pci_aer_clear_fatal_status(pdev);
 	}
+}
+
+static irqreturn_t dpc_handler(int irq, void *context)
+{
+	struct dpc_dev *dpc = context;
+	struct pci_dev *pdev = dpc->pdev;
+
+	dpc_process_error(dpc);
 
 	/* We configure DPC so it only triggers on ERR_FATAL */
 	pcie_do_recovery(pdev, pci_channel_io_frozen, PCIE_PORT_SERVICE_DPC);
@@ -282,6 +291,25 @@  static irqreturn_t dpc_irq(int irq, void *context)
 	return IRQ_HANDLED;
 }
 
+void dpc_dev_init(struct pci_dev *pdev, struct dpc_dev *dpc)
+{
+	dpc->cap_pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DPC);
+	dpc->pdev = pdev;
+	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CAP, &dpc->cap);
+	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &dpc->ctl);
+
+	dpc->rp_extensions = (dpc->cap & PCI_EXP_DPC_CAP_RP_EXT);
+	if (dpc->rp_extensions) {
+		dpc->rp_log_size =
+			(dpc->cap & PCI_EXP_DPC_RP_PIO_LOG_SIZE) >> 8;
+		if (dpc->rp_log_size < 4 || dpc->rp_log_size > 9) {
+			pci_err(pdev, "RP PIO log size %u is invalid\n",
+				dpc->rp_log_size);
+			dpc->rp_log_size = 0;
+		}
+	}
+}
+
 #define FLAG(x, y) (((x) & (y)) ? '+' : '-')
 static int dpc_probe(struct pcie_device *dev)
 {
@@ -298,8 +326,8 @@  static int dpc_probe(struct pcie_device *dev)
 	if (!dpc)
 		return -ENOMEM;
 
-	dpc->cap_pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DPC);
-	dpc->pdev = pdev;
+	dpc_dev_init(pdev, dpc);
+
 	set_service_data(dev, dpc);
 
 	status = devm_request_threaded_irq(device, dev->irq, dpc_irq,
@@ -311,18 +339,8 @@  static int dpc_probe(struct pcie_device *dev)
 		return status;
 	}
 
-	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CAP, &cap);
-	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &ctl);
-
-	dpc->rp_extensions = (cap & PCI_EXP_DPC_CAP_RP_EXT);
-	if (dpc->rp_extensions) {
-		dpc->rp_log_size = (cap & PCI_EXP_DPC_RP_PIO_LOG_SIZE) >> 8;
-		if (dpc->rp_log_size < 4 || dpc->rp_log_size > 9) {
-			pci_err(pdev, "RP PIO log size %u is invalid\n",
-				dpc->rp_log_size);
-			dpc->rp_log_size = 0;
-		}
-	}
+	ctl = dpc->ctl;
+	cap = dpc->cap;
 
 	ctl = (ctl & 0xfff4) | PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN;
 	pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, ctl);
diff --git a/drivers/pci/pcie/dpc.h b/drivers/pci/pcie/dpc.h
new file mode 100644
index 000000000000..2d82bc917fcb
--- /dev/null
+++ b/drivers/pci/pcie/dpc.h
@@ -0,0 +1,20 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef DRIVERS_PCIE_DPC_H
+#define DRIVERS_PCIE_DPC_H
+
+#include <linux/pci.h>
+
+struct dpc_dev {
+	struct pci_dev		*pdev;
+	u16			cap_pos;
+	bool			rp_extensions;
+	u8			rp_log_size;
+	u16			ctl;
+	u16			cap;
+};
+
+pci_ers_result_t dpc_reset_link_common(struct dpc_dev *dpc);
+void dpc_process_error(struct dpc_dev *dpc);
+void dpc_dev_init(struct pci_dev *pdev, struct dpc_dev *dpc);
+
+#endif /* DRIVERS_PCIE_DPC_H */
diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
index eefefe03857a..e7b9dfae9035 100644
--- a/drivers/pci/pcie/err.c
+++ b/drivers/pci/pcie/err.c
@@ -162,11 +162,18 @@  static pci_ers_result_t default_reset_link(struct pci_dev *dev)
 	return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
 }
 
-static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service)
+static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service,
+				   pci_ers_result_t (*reset_cb)(void *cb_data),
+				   void *cb_data)
 {
 	pci_ers_result_t status;
 	struct pcie_port_service_driver *driver = NULL;
 
+	if (reset_cb) {
+		status = reset_cb(cb_data);
+		goto done;
+	}
+
 	driver = pcie_port_find_service(dev, service);
 	if (driver && driver->reset_link) {
 		status = driver->reset_link(dev);
@@ -178,6 +185,7 @@  static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service)
 		return PCI_ERS_RESULT_DISCONNECT;
 	}
 
+done:
 	if (status != PCI_ERS_RESULT_RECOVERED) {
 		pci_printk(KERN_DEBUG, dev, "link reset at upstream device %s failed\n",
 			pci_name(dev));
@@ -187,8 +195,11 @@  static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service)
 	return status;
 }
 
-void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state,
-		      u32 service)
+pci_ers_result_t pcie_do_recovery_common(struct pci_dev *dev,
+				enum pci_channel_state state,
+				u32 service,
+				pci_ers_result_t (*reset_cb)(void *cb_data),
+				void *cb_data)
 {
 	pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER;
 	struct pci_bus *bus;
@@ -209,7 +220,7 @@  void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state,
 		pci_walk_bus(bus, report_normal_detected, &status);
 
 	if (state == pci_channel_io_frozen) {
-		status = reset_link(dev, service);
+		status = reset_link(dev, service, reset_cb, cb_data);
 		if (status != PCI_ERS_RESULT_RECOVERED)
 			goto failed;
 	}
@@ -240,11 +251,20 @@  void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state,
 	pci_aer_clear_device_status(dev);
 	pci_cleanup_aer_uncorrect_error_status(dev);
 	pci_info(dev, "device recovery successful\n");
-	return;
+
+	return status;
 
 failed:
 	pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
 
 	/* TODO: Should kernel panic here? */
 	pci_info(dev, "device recovery failed\n");
+
+	return status;
+}
+
+void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state,
+		      u32 service)
+{
+	pcie_do_recovery_common(dev, state, service, NULL, NULL);
 }