diff mbox series

[v2,2/2] PCI/DPC: Add Error Disconnect Recover (EDR) support

Message ID 4818f4dfa41bddc3968da4717c453897d4ad5717.1553027719.git.sathyanarayanan.kuppuswamy@linux.intel.com (mailing list archive)
State Not Applicable, archived
Headers show
Series Add Error Disconnect Recover (EDR) support. | expand

Commit Message

Kuppuswamy Sathyanarayanan March 19, 2019, 8:47 p.m. UTC
From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>

As per PCI firmware specification v3.2 ECN
(https://members.pcisig.com/wg/PCI-SIG/document/12614), when firmware
owns Downstream Port Containment (DPC), its expected to use the "Error
Disconnect Recover" (EDR) notification to alert OSPM of a DPC event and
if OS supports EDR, its expected to handle the software state invalidation
and port recovery in OS and let firmware know the recovery status via _OST
ACPI call.

Since EDR is a hybrid service, DPC service shall be enabled in OS even
if AER is not natively enabled in OS.

Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
---
 drivers/pci/pcie/Kconfig        |  10 +
 drivers/pci/pcie/dpc.c          | 326 ++++++++++++++++++++++++++++++--
 drivers/pci/pcie/portdrv_core.c |   9 +-
 3 files changed, 329 insertions(+), 16 deletions(-)

Comments

Bjorn Helgaas April 10, 2019, 6:41 p.m. UTC | #1
On Tue, Mar 19, 2019 at 01:47:29PM -0700, sathyanarayanan.kuppuswamy@linux.intel.com wrote:
> From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
> 
> As per PCI firmware specification v3.2 ECN
> (https://members.pcisig.com/wg/PCI-SIG/document/12614), when firmware
> owns Downstream Port Containment (DPC), its expected to use the "Error
> Disconnect Recover" (EDR) notification to alert OSPM of a DPC event and
> if OS supports EDR, its expected to handle the software state invalidation
> and port recovery in OS and let firmware know the recovery status via _OST
> ACPI call.
> 
> Since EDR is a hybrid service, DPC service shall be enabled in OS even
> if AER is not natively enabled in OS.
> 
> Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
> ---
>  drivers/pci/pcie/Kconfig        |  10 +
>  drivers/pci/pcie/dpc.c          | 326 ++++++++++++++++++++++++++++++--

I'll be looking for Keith's reviewed-by for this eventually.

It looks like this could be split into some smaller patches:

  - Add dpc_dev.native_dpc (hopefully we can find a less confusing name)
  - Convert dpc_handler() to dpc_handler() + dpc_process_error()
  - Add EDR support

>  drivers/pci/pcie/portdrv_core.c |   9 +-
>  3 files changed, 329 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig
> index 5cbdbca904ac..55f65d1cbc9e 100644
> --- a/drivers/pci/pcie/Kconfig
> +++ b/drivers/pci/pcie/Kconfig
> @@ -142,3 +142,13 @@ config PCIE_PTM
>  
>  	  This is only useful if you have devices that support PTM, but it
>  	  is safe to enable even if you don't.
> +
> +config PCIE_EDR
> +	bool "PCI Express Error Disconnect Recover support"
> +	default n
> +	depends on PCIE_DPC && ACPI
> +	help
> +	  This options adds Error Disconnect Recover support as specified
> +	  in PCI firmware specification v3.2 Downstream Port Containment
> +	  Related Enhancements ECN. Enable this if you want to support hybrid
> +	  DPC model which uses both firmware and OS to implement DPC.
> diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
> index 7b77754a82de..bdf4ca8a0acb 100644
> --- a/drivers/pci/pcie/dpc.c
> +++ b/drivers/pci/pcie/dpc.c
> @@ -11,6 +11,7 @@
>  #include <linux/interrupt.h>
>  #include <linux/init.h>
>  #include <linux/pci.h>
> +#include <linux/acpi.h>
>  
>  #include "portdrv.h"
>  #include "../pci.h"
> @@ -20,8 +21,23 @@ struct dpc_dev {
>  	u16			cap_pos;
>  	bool			rp_extensions;
>  	u8			rp_log_size;
> +	bool			native_dpc;

This is going to be way too confusing with a "native_dpc" in both the
struct pci_host_bridge and the struct dpc_dev.

> +	pci_ers_result_t	error_state;
> +#ifdef CONFIG_ACPI
> +	struct acpi_device	*adev;
> +#endif
>  };
>  
> +#ifdef CONFIG_ACPI
> +
> +#define EDR_PORT_ENABLE_DSM     0x0C
> +#define EDR_PORT_LOCATE_DSM     0x0D
> +
> +static const guid_t pci_acpi_dsm_guid =
> +		GUID_INIT(0xe5c937d0, 0x3553, 0x4d7a,
> +			  0x91, 0x17, 0xea, 0x4d, 0x19, 0xc3, 0x43, 0x4d);
> +#endif
> +
>  static const char * const rp_pio_error_string[] = {
>  	"Configuration Request received UR Completion",	 /* Bit Position 0  */
>  	"Configuration Request received CA Completion",	 /* Bit Position 1  */
> @@ -67,6 +83,9 @@ void pci_save_dpc_state(struct pci_dev *dev)
>  	if (!dpc)
>  		return;
>  
> +	if (!dpc->native_dpc)
> +		return;
> +
>  	save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_DPC);
>  	if (!save_state)
>  		return;
> @@ -88,6 +107,9 @@ void pci_restore_dpc_state(struct pci_dev *dev)
>  	if (!dpc)
>  		return;
>  
> +	if (!dpc->native_dpc)
> +		return;
> +
>  	save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_DPC);
>  	if (!save_state)
>  		return;
> @@ -224,10 +246,9 @@ static int dpc_get_aer_uncorrect_severity(struct pci_dev *dev,
>  	return 1;
>  }
>  
> -static irqreturn_t dpc_handler(int irq, void *context)
> +static void dpc_process_error(struct dpc_dev *dpc)
>  {
>  	struct aer_err_info info;
> -	struct dpc_dev *dpc = context;
>  	struct pci_dev *pdev = dpc->dev->port;
>  	struct device *dev = &dpc->dev->device;
>  	u16 cap = dpc->cap_pos, status, source, reason, ext_reason;
> @@ -261,6 +282,13 @@ static irqreturn_t dpc_handler(int irq, void *context)
>  
>  	/* We configure DPC so it only triggers on ERR_FATAL */
>  	pcie_do_recovery(pdev, pci_channel_io_frozen, PCIE_PORT_SERVICE_DPC);
> +}
> +
> +static irqreturn_t dpc_handler(int irq, void *context)
> +{
> +	struct dpc_dev *dpc = context;
> +
> +	dpc_process_error(dpc);
>  
>  	return IRQ_HANDLED;
>  }
> @@ -283,6 +311,230 @@ static irqreturn_t dpc_irq(int irq, void *context)
>  	return IRQ_HANDLED;
>  }
>  
> +void dpc_error_resume(struct pci_dev *dev)

Looks like this should be static?

> +{
> +	struct dpc_dev *dpc;
> +
> +	dpc = to_dpc_dev(dev);
> +	if (!dpc)
> +		return;

I don't think it's possible for dpc to be NULL, is it?  Remove the
test if not.

> +	dpc->error_state = PCI_ERS_RESULT_RECOVERED;
> +}
> +
> +#ifdef CONFIG_ACPI
> +
> +/*
> + * _DSM wrapper function to enable/disable DPC port.
> + * @dpc   : DPC device structure
> + * @enable: status of DPC port (0 or 1).
> + *
> + * returns 0 on success or errno on failure.
> + */
> +static int acpi_enable_dpc_port(struct dpc_dev *dpc, bool enable)
> +{
> +	union acpi_object *obj;
> +	int status;
> +	union acpi_object argv4;
> +
> +	/* Check whether EDR_PORT_ENABLE_DSM is supported in firmware */
> +	status = acpi_check_dsm(dpc->adev->handle, &pci_acpi_dsm_guid, 1,
> +				1 << EDR_PORT_ENABLE_DSM);

I don't think this acpi_check_dsm() is necessary, is it?  I expect
acpi_evaluate_dsm() to fail nicely if there's no _DSM or it doesn't
support the function.

> +	if (!status)
> +		return -ENOTSUPP;
> +
> +	argv4.type = ACPI_TYPE_INTEGER;
> +	argv4.integer.value = enable;
> +
> +	obj = acpi_evaluate_dsm(dpc->adev->handle, &pci_acpi_dsm_guid, 1,
> +				EDR_PORT_ENABLE_DSM, &argv4);
> +	if (!obj)
> +		return -EIO;
> +
> +	if (obj->type == ACPI_TYPE_INTEGER && obj->integer.value == enable)
> +		status = 0;
> +	else
> +		status = -EIO;
> +
> +	ACPI_FREE(obj);
> +
> +	return status;
> +}
> +
> +/*
> + * _DSM wrapper function to locate DPC port.
> + * @dpc   : DPC device structure
> + *
> + * returns pci_dev or NULL.
> + */
> +static struct pci_dev *acpi_locate_dpc_port(struct dpc_dev *dpc)
> +{
> +	union acpi_object *obj;
> +	int status;
> +	u16 port;
> +
> +	/* Check whether EDR_PORT_LOCATE_DSM is supported in firmware */
> +	status = acpi_check_dsm(dpc->adev->handle, &pci_acpi_dsm_guid, 1,
> +				1 << EDR_PORT_LOCATE_DSM);

Unnecessary?

> +	if (!status)
> +		return dpc->dev->port;

If you *do* need the acpi_check_dsm(), I would have expected to return
NULL in this error case?

> +
> +

Extra blank line here.

> +	obj = acpi_evaluate_dsm(dpc->adev->handle, &pci_acpi_dsm_guid, 1,
> +				EDR_PORT_LOCATE_DSM, NULL);
> +	if (!obj)
> +		return NULL;
> +
> +	if (obj->type == ACPI_TYPE_INTEGER) {
> +		/*
> +		 * Firmware returns DPC port BDF details in following format:
> +		 *	15:8 = bus
> +		 *	7:3 = device
> +		 *	2:0 = function
> +		 */
> +		port = obj->integer.value;
> +		ACPI_FREE(obj);
> +	} else {
> +		ACPI_FREE(obj);
> +		return NULL;
> +	}
> +
> +	return pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(port), port & 0xff);
> +}
> +
> +/*
> + * _OST wrapper function to let firmware know the status of EDR event.
> + * @dpc   : DPC device structure.
> + * @status: Status of EDR event.
> + *

Spurious blank line in the comment.

> + */
> +static int acpi_send_edr_status(struct dpc_dev *dpc,  u16 status)
> +{
> +	u32 ost_status;
> +	struct pci_dev *pdev = dpc->dev->port;
> +
> +	dev_dbg(&pdev->dev, "Sending EDR status :%x\n", status);
> +
> +	ost_status =  PCI_DEVID(pdev->bus->number, pdev->devfn);
> +	ost_status = (ost_status << 16) | status;
> +
> +	if (!acpi_has_method(dpc->adev->handle, "_OST"))
> +		return -ENOTSUPP;

This acpi_has_method() check is superfluous, isn't it?  I would expect
acpi_evaluate_ost() to fail gracefully if there's no _OST.

I do see several other instances of code of the form:

  if (!acpi_has_method(handle, "XXXX"))
    return false;

  status = acpi_evaluate_*(handle, "XXXX", ...);

But I think that's a pointless pattern.  I think we could just try to
evaluate the method directly, since the evaluation will fail if the
method doesn't exist:

  status = acpi_evaluate_*(handle, "XXXX", ...);

> +	status = acpi_evaluate_ost(dpc->adev->handle,
> +				   ACPI_NOTIFY_DISCONNECT_RECOVER,
> +				   ost_status, NULL);
> +	if (ACPI_FAILURE(status))
> +		return -EINVAL;
> +
> +	return 0;
> +}
> +
> +/*
> + * Helper function used for disconnecting the child devices when EDR event is
> + * received from firmware.
> + */
> +static void dpc_disconnect_devices(struct pci_dev *dev)
> +{
> +	struct pci_dev *udev;
> +	struct pci_bus *parent;
> +	struct pci_dev *pdev, *temp;
> +
> +	dev_dbg(&dev->dev, "Disconnecting the child devices\n");
> +
> +	if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
> +		udev = dev;
> +	else
> +		udev = dev->bus->self;
> +
> +	parent = udev->subordinate;
> +	pci_walk_bus(parent, pci_dev_set_disconnected, NULL);
> +
> +	pci_lock_rescan_remove();
> +	pci_dev_get(dev);
> +	list_for_each_entry_safe_reverse(pdev, temp, &parent->devices,
> +					 bus_list) {
> +		pci_stop_and_remove_bus_device(pdev);
> +	}
> +	pci_dev_put(dev);
> +	pci_unlock_rescan_remove();
> +}
> +
> +static void edr_handle_event(acpi_handle handle, u32 event, void *data)
> +{
> +	struct dpc_dev *dpc = (struct dpc_dev *) data;
> +	struct pci_dev *pdev;
> +	u16 status, cap;
> +
> +	if (event != ACPI_NOTIFY_DISCONNECT_RECOVER)
> +		return;
> +
> +	if (!data) {
> +		pr_err("Invalid EDR event\n");

In what instance can "data" be NULL?  I think *never*, unless ACPI
screwed up and lost the context you supplied to
acpi_install_notify_handler().  In that case, we should do something
more significant than print a message and just return.  I think we
should just omit this test, and if ACPI screwed up, we'll take a null
pointer dereference oops, we'll see exactly where, and we'll have a
chance to fix the problem.

> +		return;
> +	}
> +
> +	dev_dbg(&dpc->dev->port->dev, "Valid EDR event received\n");

Set "pdev" at declaration and use it in these printks.

> +
> +	/*
> +	 * Check if _DSM(0xD) is available, and if present locate the
> +	 * port which issued EDR event.
> +	 */
> +	pdev = acpi_locate_dpc_port(dpc);

Can this be done at probe-time instead of event-time?

> +	if (!pdev) {
> +		dev_err(&dpc->dev->port->dev, "No valid port found\n");
> +		return;
> +	}
> +
> +	/*
> +	 * Get DPC priv data for given pdev
> +	 */
> +	dpc = to_dpc_dev(pdev);
> +	dpc->error_state = PCI_ERS_RESULT_DISCONNECT;
> +	pdev = dpc->dev->port;

It's quite confusing to reassign pdev here.  The comment about "Get
DPC priv data for given pdev" is really superfluous since that much is
obvious from the code.  What's *not* obvious is whether this "pdev =
dpc->dev->port" changes anything and if so, why.

> +	cap = dpc->cap_pos;
> +
> +	/*
> +	 * Check if the port supports DPC:
> +	 *
> +	 * if port does not support DPC, then let firmware handle
> +	 * the error recovery and OS is responsible for cleaning
> +	 * up the child devices.
> +	 *
> +	 * if port supports DPC, then fall back to default error
> +	 * recovery.

Capitalize first letter of sentences.

> +	 *
> +	 */
> +	if (cap) {
> +		/* Check if there is a valid DPC trigger */
> +		pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status);
> +		if (!(status & PCI_EXP_DPC_STATUS_TRIGGER)) {
> +			dev_err(&pdev->dev, "Invalid DPC trigger\n");

Include "status" value in the printk.

> +			return;
> +		}
> +		dpc_process_error(dpc);
> +	}
> +
> +	if (dpc->error_state == PCI_ERS_RESULT_RECOVERED) {
> +		/*
> +		 * Recovery is successful, so send
> +		 * _OST(0xF, BDF << 16 | 0x80, "") to firmware.
> +		 */
> +		status = 0x80;
> +	} else {
> +		/*
> +		 * Recovery is not successful, so disconnect child devices
> +		 * and send _OST(0xF, BDF << 16 | 0x81, "") to firmware.
> +		 */
> +		dpc_disconnect_devices(pdev);
> +		status = 0x81;
> +	}
> +
> +	acpi_send_edr_status(dpc, status);
> +}
> +
> +#endif
> +
>  #define FLAG(x, y) (((x) & (y)) ? '+' : '-')
>  static int dpc_probe(struct pcie_device *dev)
>  {
> @@ -291,9 +543,10 @@ static int dpc_probe(struct pcie_device *dev)
>  	struct device *device = &dev->device;
>  	int status;
>  	u16 ctl, cap;
> -
> -	if (pcie_aer_get_firmware_first(pdev))
> -		return -ENOTSUPP;
> +#ifdef CONFIG_ACPI
> +	struct acpi_device *adev = ACPI_COMPANION(&pdev->dev);
> +	acpi_status astatus;
> +#endif
>  
>  	dpc = devm_kzalloc(device, sizeof(*dpc), GFP_KERNEL);
>  	if (!dpc)
> @@ -302,16 +555,53 @@ static int dpc_probe(struct pcie_device *dev)
>  	dpc->cap_pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DPC);
>  	dpc->dev = dev;
>  	set_service_data(dev, dpc);
> +	dpc->error_state = PCI_ERS_RESULT_NONE;
> +
> +	if (!pcie_aer_get_firmware_first(pdev))
> +		if (pci_aer_available() && dpc->cap_pos)
> +			dpc->native_dpc = 1;
>  
> -	status = devm_request_threaded_irq(device, dev->irq, dpc_irq,
> -					   dpc_handler, IRQF_SHARED,
> -					   "pcie-dpc", dpc);
> -	if (status) {
> -		dev_warn(device, "request IRQ%d failed: %d\n", dev->irq,
> -			 status);
> -		return status;
> +	/*
> +	 * If native support is not enabled and ACPI is not
> +	 * enabled then return error.
> +	 */
> +	if (!dpc->native_dpc && !IS_ENABLED(CONFIG_APCI))
> +		return -ENODEV;
> +
> +	if (dpc->native_dpc) {
> +		status = devm_request_threaded_irq(device, dev->irq, dpc_irq,
> +						   dpc_handler, IRQF_SHARED,
> +						   "pcie-dpc", dpc);
> +		if (status) {
> +			dev_warn(device, "request IRQ%d failed: %d\n", dev->irq,
> +				 status);
> +			return status;
> +		}
>  	}
>  
> +#ifdef CONFIG_ACPI
> +	if (!dpc->native_dpc) {
> +		if (!adev) {
> +			dev_err(device, "No valid acpi device found\n");

s/acpi/ACPI/ (in comments, printk, other English text)

> +			return -ENODEV;
> +		}
> +
> +		dpc->adev = adev;
> +
> +		/* Register ACPI notifier for EDR event */

"Register handler for System events, one of which is the EDR event"?

> +		astatus = acpi_install_notify_handler(adev->handle,
> +						      ACPI_SYSTEM_NOTIFY,
> +						      edr_handle_event,
> +						      dpc);
> +
> +		if (ACPI_FAILURE(astatus)) {
> +			dev_err(device, "Install notifier failed\n");

Mention "ACPI notify handler" here.

> +			return -EBUSY;
> +		}
> +
> +		acpi_enable_dpc_port(dpc, true);
> +	}
> +#endif
>  	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CAP, &cap);
>  	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &ctl);
>  
> @@ -325,8 +615,12 @@ static int dpc_probe(struct pcie_device *dev)
>  		}
>  	}
>  
> -	ctl = (ctl & 0xfff4) | PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN;
> -	pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, ctl);
> +	if (dpc->native_dpc) {
> +		ctl = (ctl & 0xfff4) | PCI_EXP_DPC_CTL_EN_FATAL |
> +			PCI_EXP_DPC_CTL_INT_EN;
> +		pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL,
> +				      ctl);
> +	}
>  
>  	dev_info(device, "DPC error containment capabilities: Int Msg #%d, RPExt%c PoisonedTLP%c SwTrigger%c RP PIO Log %d, DL_ActiveErr%c\n",
>  		cap & PCI_EXP_DPC_IRQ, FLAG(cap, PCI_EXP_DPC_CAP_RP_EXT),
> @@ -344,6 +638,9 @@ static void dpc_remove(struct pcie_device *dev)
>  	struct pci_dev *pdev = dev->port;
>  	u16 ctl;
>  
> +	if (!dpc->native_dpc)
> +		return;
> +
>  	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &ctl);
>  	ctl &= ~(PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN);
>  	pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, ctl);
> @@ -356,6 +653,7 @@ static struct pcie_port_service_driver dpcdriver = {
>  	.probe		= dpc_probe,
>  	.remove		= dpc_remove,
>  	.reset_link	= dpc_reset_link,
> +	.error_resume   = dpc_error_resume,
>  };
>  
>  int __init pcie_dpc_init(void)
> diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c
> index 865f12f4b314..050cbb7a5083 100644
> --- a/drivers/pci/pcie/portdrv_core.c
> +++ b/drivers/pci/pcie/portdrv_core.c
> @@ -249,9 +249,14 @@ static int get_port_device_capability(struct pci_dev *dev)
>  		pcie_pme_interrupt_enable(dev, false);
>  	}
>  
> +	/*
> +	 * If EDR support is enabled in OS, then even if AER is not handled in
> +	 * OS, DPC service can be enabled.
> +	 */
>  	if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_DPC) &&
> -	    pci_aer_available() && services & PCIE_PORT_SERVICE_AER &&
> -	    (pcie_ports_native || host->native_dpc))
> +	    ((IS_ENABLED(CONFIG_PCIE_EDR) && !host->native_dpc) ||
> +	    (pci_aer_available() && services & PCIE_PORT_SERVICE_AER &&
> +	    (pcie_ports_native || host->native_dpc))))

Holy cow, I think I'll have to schedule an hour sometime to figure
out what's going on here :)

>  		services |= PCIE_PORT_SERVICE_DPC;
>  
>  	if (pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM || -- 2.20.1
>
Kuppuswamy Sathyanarayanan April 10, 2019, 10:12 p.m. UTC | #2
Hi ,

On 4/10/19 11:41 AM, Bjorn Helgaas wrote:
> On Tue, Mar 19, 2019 at 01:47:29PM -0700, sathyanarayanan.kuppuswamy@linux.intel.com wrote:
>> From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
>>
>> As per PCI firmware specification v3.2 ECN
>> (https://members.pcisig.com/wg/PCI-SIG/document/12614), when firmware
>> owns Downstream Port Containment (DPC), its expected to use the "Error
>> Disconnect Recover" (EDR) notification to alert OSPM of a DPC event and
>> if OS supports EDR, its expected to handle the software state invalidation
>> and port recovery in OS and let firmware know the recovery status via _OST
>> ACPI call.
>>
>> Since EDR is a hybrid service, DPC service shall be enabled in OS even
>> if AER is not natively enabled in OS.
>>
>> Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
>> ---
>>   drivers/pci/pcie/Kconfig        |  10 +
>>   drivers/pci/pcie/dpc.c          | 326 ++++++++++++++++++++++++++++++--
Thanks for the review comments.
> I'll be looking for Keith's reviewed-by for this eventually.
>
> It looks like this could be split into some smaller patches:
>
>    - Add dpc_dev.native_dpc (hopefully we can find a less confusing name)
>    - Convert dpc_handler() to dpc_handler() + dpc_process_error()
>    - Add EDR support
Ok. I will split them in next patch set.
>
>>   drivers/pci/pcie/portdrv_core.c |   9 +-
>>   3 files changed, 329 insertions(+), 16 deletions(-)
>>
>> diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig
>> index 5cbdbca904ac..55f65d1cbc9e 100644
>> --- a/drivers/pci/pcie/Kconfig
>> +++ b/drivers/pci/pcie/Kconfig
>> @@ -142,3 +142,13 @@ config PCIE_PTM
>>   
>>   	  This is only useful if you have devices that support PTM, but it
>>   	  is safe to enable even if you don't.
>> +
>> +config PCIE_EDR
>> +	bool "PCI Express Error Disconnect Recover support"
>> +	default n
>> +	depends on PCIE_DPC && ACPI
>> +	help
>> +	  This options adds Error Disconnect Recover support as specified
>> +	  in PCI firmware specification v3.2 Downstream Port Containment
>> +	  Related Enhancements ECN. Enable this if you want to support hybrid
>> +	  DPC model which uses both firmware and OS to implement DPC.
>> diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
>> index 7b77754a82de..bdf4ca8a0acb 100644
>> --- a/drivers/pci/pcie/dpc.c
>> +++ b/drivers/pci/pcie/dpc.c
>> @@ -11,6 +11,7 @@
>>   #include <linux/interrupt.h>
>>   #include <linux/init.h>
>>   #include <linux/pci.h>
>> +#include <linux/acpi.h>
>>   
>>   #include "portdrv.h"
>>   #include "../pci.h"
>> @@ -20,8 +21,23 @@ struct dpc_dev {
>>   	u16			cap_pos;
>>   	bool			rp_extensions;
>>   	u8			rp_log_size;
>> +	bool			native_dpc;
> This is going to be way too confusing with a "native_dpc" in both the
> struct pci_host_bridge and the struct dpc_dev.
Any suggestions? what about native_mode ?
>
>> +	pci_ers_result_t	error_state;
>> +#ifdef CONFIG_ACPI
>> +	struct acpi_device	*adev;
>> +#endif
>>   };
>>   
>> +#ifdef CONFIG_ACPI
>> +
>> +#define EDR_PORT_ENABLE_DSM     0x0C
>> +#define EDR_PORT_LOCATE_DSM     0x0D
>> +
>> +static const guid_t pci_acpi_dsm_guid =
>> +		GUID_INIT(0xe5c937d0, 0x3553, 0x4d7a,
>> +			  0x91, 0x17, 0xea, 0x4d, 0x19, 0xc3, 0x43, 0x4d);
>> +#endif
>> +
>>   static const char * const rp_pio_error_string[] = {
>>   	"Configuration Request received UR Completion",	 /* Bit Position 0  */
>>   	"Configuration Request received CA Completion",	 /* Bit Position 1  */
>> @@ -67,6 +83,9 @@ void pci_save_dpc_state(struct pci_dev *dev)
>>   	if (!dpc)
>>   		return;
>>   
>> +	if (!dpc->native_dpc)
>> +		return;
>> +
>>   	save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_DPC);
>>   	if (!save_state)
>>   		return;
>> @@ -88,6 +107,9 @@ void pci_restore_dpc_state(struct pci_dev *dev)
>>   	if (!dpc)
>>   		return;
>>   
>> +	if (!dpc->native_dpc)
>> +		return;
>> +
>>   	save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_DPC);
>>   	if (!save_state)
>>   		return;
>> @@ -224,10 +246,9 @@ static int dpc_get_aer_uncorrect_severity(struct pci_dev *dev,
>>   	return 1;
>>   }
>>   
>> -static irqreturn_t dpc_handler(int irq, void *context)
>> +static void dpc_process_error(struct dpc_dev *dpc)
>>   {
>>   	struct aer_err_info info;
>> -	struct dpc_dev *dpc = context;
>>   	struct pci_dev *pdev = dpc->dev->port;
>>   	struct device *dev = &dpc->dev->device;
>>   	u16 cap = dpc->cap_pos, status, source, reason, ext_reason;
>> @@ -261,6 +282,13 @@ static irqreturn_t dpc_handler(int irq, void *context)
>>   
>>   	/* We configure DPC so it only triggers on ERR_FATAL */
>>   	pcie_do_recovery(pdev, pci_channel_io_frozen, PCIE_PORT_SERVICE_DPC);
>> +}
>> +
>> +static irqreturn_t dpc_handler(int irq, void *context)
>> +{
>> +	struct dpc_dev *dpc = context;
>> +
>> +	dpc_process_error(dpc);
>>   
>>   	return IRQ_HANDLED;
>>   }
>> @@ -283,6 +311,230 @@ static irqreturn_t dpc_irq(int irq, void *context)
>>   	return IRQ_HANDLED;
>>   }
>>   
>> +void dpc_error_resume(struct pci_dev *dev)
> Looks like this should be static?
yes. I will fix it in next version.
>
>> +{
>> +	struct dpc_dev *dpc;
>> +
>> +	dpc = to_dpc_dev(dev);
>> +	if (!dpc)
>> +		return;
> I don't think it's possible for dpc to be NULL, is it?  Remove the
> test if not.
ok.
>
>> +	dpc->error_state = PCI_ERS_RESULT_RECOVERED;
>> +}
>> +
>> +#ifdef CONFIG_ACPI
>> +
>> +/*
>> + * _DSM wrapper function to enable/disable DPC port.
>> + * @dpc   : DPC device structure
>> + * @enable: status of DPC port (0 or 1).
>> + *
>> + * returns 0 on success or errno on failure.
>> + */
>> +static int acpi_enable_dpc_port(struct dpc_dev *dpc, bool enable)
>> +{
>> +	union acpi_object *obj;
>> +	int status;
>> +	union acpi_object argv4;
>> +
>> +	/* Check whether EDR_PORT_ENABLE_DSM is supported in firmware */
>> +	status = acpi_check_dsm(dpc->adev->handle, &pci_acpi_dsm_guid, 1,
>> +				1 << EDR_PORT_ENABLE_DSM);
> I don't think this acpi_check_dsm() is necessary, is it?  I expect
> acpi_evaluate_dsm() to fail nicely if there's no _DSM or it doesn't
> support the function.
will remove it next version.
>
>> +	if (!status)
>> +		return -ENOTSUPP;
>> +
>> +	argv4.type = ACPI_TYPE_INTEGER;
>> +	argv4.integer.value = enable;
>> +
>> +	obj = acpi_evaluate_dsm(dpc->adev->handle, &pci_acpi_dsm_guid, 1,
>> +				EDR_PORT_ENABLE_DSM, &argv4);
>> +	if (!obj)
>> +		return -EIO;
>> +
>> +	if (obj->type == ACPI_TYPE_INTEGER && obj->integer.value == enable)
>> +		status = 0;
>> +	else
>> +		status = -EIO;
>> +
>> +	ACPI_FREE(obj);
>> +
>> +	return status;
>> +}
>> +
>> +/*
>> + * _DSM wrapper function to locate DPC port.
>> + * @dpc   : DPC device structure
>> + *
>> + * returns pci_dev or NULL.
>> + */
>> +static struct pci_dev *acpi_locate_dpc_port(struct dpc_dev *dpc)
>> +{
>> +	union acpi_object *obj;
>> +	int status;
>> +	u16 port;
>> +
>> +	/* Check whether EDR_PORT_LOCATE_DSM is supported in firmware */
>> +	status = acpi_check_dsm(dpc->adev->handle, &pci_acpi_dsm_guid, 1,
>> +				1 << EDR_PORT_LOCATE_DSM);
> Unnecessary?
Agreed
>
>> +	if (!status)
>> +		return dpc->dev->port;
> If you *do* need the acpi_check_dsm(), I would have expected to return
> NULL in this error case?
>
>> +
>> +
> Extra blank line here.
>
>> +	obj = acpi_evaluate_dsm(dpc->adev->handle, &pci_acpi_dsm_guid, 1,
>> +				EDR_PORT_LOCATE_DSM, NULL);
>> +	if (!obj)
>> +		return NULL;
>> +
>> +	if (obj->type == ACPI_TYPE_INTEGER) {
>> +		/*
>> +		 * Firmware returns DPC port BDF details in following format:
>> +		 *	15:8 = bus
>> +		 *	7:3 = device
>> +		 *	2:0 = function
>> +		 */
>> +		port = obj->integer.value;
>> +		ACPI_FREE(obj);
>> +	} else {
>> +		ACPI_FREE(obj);
>> +		return NULL;
>> +	}
>> +
>> +	return pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(port), port & 0xff);
>> +}
>> +
>> +/*
>> + * _OST wrapper function to let firmware know the status of EDR event.
>> + * @dpc   : DPC device structure.
>> + * @status: Status of EDR event.
>> + *
> Spurious blank line in the comment.
>
>> + */
>> +static int acpi_send_edr_status(struct dpc_dev *dpc,  u16 status)
>> +{
>> +	u32 ost_status;
>> +	struct pci_dev *pdev = dpc->dev->port;
>> +
>> +	dev_dbg(&pdev->dev, "Sending EDR status :%x\n", status);
>> +
>> +	ost_status =  PCI_DEVID(pdev->bus->number, pdev->devfn);
>> +	ost_status = (ost_status << 16) | status;
>> +
>> +	if (!acpi_has_method(dpc->adev->handle, "_OST"))
>> +		return -ENOTSUPP;
> This acpi_has_method() check is superfluous, isn't it?  I would expect
> acpi_evaluate_ost() to fail gracefully if there's no _OST.
>
> I do see several other instances of code of the form:
>
>    if (!acpi_has_method(handle, "XXXX"))
>      return false;
>
>    status = acpi_evaluate_*(handle, "XXXX", ...);
>
> But I think that's a pointless pattern.  I think we could just try to
> evaluate the method directly, since the evaluation will fail if the
> method doesn't exist:
>
>    status = acpi_evaluate_*(handle, "XXXX", ...);
Agreed.
>
>> +	status = acpi_evaluate_ost(dpc->adev->handle,
>> +				   ACPI_NOTIFY_DISCONNECT_RECOVER,
>> +				   ost_status, NULL);
>> +	if (ACPI_FAILURE(status))
>> +		return -EINVAL;
>> +
>> +	return 0;
>> +}
>> +
>> +/*
>> + * Helper function used for disconnecting the child devices when EDR event is
>> + * received from firmware.
>> + */
>> +static void dpc_disconnect_devices(struct pci_dev *dev)
>> +{
>> +	struct pci_dev *udev;
>> +	struct pci_bus *parent;
>> +	struct pci_dev *pdev, *temp;
>> +
>> +	dev_dbg(&dev->dev, "Disconnecting the child devices\n");
>> +
>> +	if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
>> +		udev = dev;
>> +	else
>> +		udev = dev->bus->self;
>> +
>> +	parent = udev->subordinate;
>> +	pci_walk_bus(parent, pci_dev_set_disconnected, NULL);
>> +
>> +	pci_lock_rescan_remove();
>> +	pci_dev_get(dev);
>> +	list_for_each_entry_safe_reverse(pdev, temp, &parent->devices,
>> +					 bus_list) {
>> +		pci_stop_and_remove_bus_device(pdev);
>> +	}
>> +	pci_dev_put(dev);
>> +	pci_unlock_rescan_remove();
>> +}
>> +
>> +static void edr_handle_event(acpi_handle handle, u32 event, void *data)
>> +{
>> +	struct dpc_dev *dpc = (struct dpc_dev *) data;
>> +	struct pci_dev *pdev;
>> +	u16 status, cap;
>> +
>> +	if (event != ACPI_NOTIFY_DISCONNECT_RECOVER)
>> +		return;
>> +
>> +	if (!data) {
>> +		pr_err("Invalid EDR event\n");
> In what instance can "data" be NULL?  I think *never*, unless ACPI
> screwed up and lost the context you supplied to
> acpi_install_notify_handler().  In that case, we should do something
> more significant than print a message and just return.  I think we
> should just omit this test, and if ACPI screwed up, we'll take a null
> pointer dereference oops, we'll see exactly where, and we'll have a
> chance to fix the problem.
Makes sense. I will fix it in next version.
>
>> +		return;
>> +	}
>> +
>> +	dev_dbg(&dpc->dev->port->dev, "Valid EDR event received\n");
> Set "pdev" at declaration and use it in these printks.
>
>> +
>> +	/*
>> +	 * Check if _DSM(0xD) is available, and if present locate the
>> +	 * port which issued EDR event.
>> +	 */
>> +	pdev = acpi_locate_dpc_port(dpc);
> Can this be done at probe-time instead of event-time?
>
>> +	if (!pdev) {
>> +		dev_err(&dpc->dev->port->dev, "No valid port found\n");
>> +		return;
>> +	}
>> +
>> +	/*
>> +	 * Get DPC priv data for given pdev
>> +	 */
>> +	dpc = to_dpc_dev(pdev);
>> +	dpc->error_state = PCI_ERS_RESULT_DISCONNECT;
>> +	pdev = dpc->dev->port;
> It's quite confusing to reassign pdev here.  The comment about "Get
> DPC priv data for given pdev" is really superfluous since that much is
> obvious from the code.  What's *not* obvious is whether this "pdev =
> dpc->dev->port" changes anything and if so, why.
No its not required. It does not add any value. I will remove it in next 
version.
>
>> +	cap = dpc->cap_pos;
>> +
>> +	/*
>> +	 * Check if the port supports DPC:
>> +	 *
>> +	 * if port does not support DPC, then let firmware handle
>> +	 * the error recovery and OS is responsible for cleaning
>> +	 * up the child devices.
>> +	 *
>> +	 * if port supports DPC, then fall back to default error
>> +	 * recovery.
> Capitalize first letter of sentences.
ok
>
>> +	 *
>> +	 */
>> +	if (cap) {
>> +		/* Check if there is a valid DPC trigger */
>> +		pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status);
>> +		if (!(status & PCI_EXP_DPC_STATUS_TRIGGER)) {
>> +			dev_err(&pdev->dev, "Invalid DPC trigger\n");
> Include "status" value in the printk.
ok
>
>> +			return;
>> +		}
>> +		dpc_process_error(dpc);
>> +	}
>> +
>> +	if (dpc->error_state == PCI_ERS_RESULT_RECOVERED) {
>> +		/*
>> +		 * Recovery is successful, so send
>> +		 * _OST(0xF, BDF << 16 | 0x80, "") to firmware.
>> +		 */
>> +		status = 0x80;
>> +	} else {
>> +		/*
>> +		 * Recovery is not successful, so disconnect child devices
>> +		 * and send _OST(0xF, BDF << 16 | 0x81, "") to firmware.
>> +		 */
>> +		dpc_disconnect_devices(pdev);
>> +		status = 0x81;
>> +	}
>> +
>> +	acpi_send_edr_status(dpc, status);
>> +}
>> +
>> +#endif
>> +
>>   #define FLAG(x, y) (((x) & (y)) ? '+' : '-')
>>   static int dpc_probe(struct pcie_device *dev)
>>   {
>> @@ -291,9 +543,10 @@ static int dpc_probe(struct pcie_device *dev)
>>   	struct device *device = &dev->device;
>>   	int status;
>>   	u16 ctl, cap;
>> -
>> -	if (pcie_aer_get_firmware_first(pdev))
>> -		return -ENOTSUPP;
>> +#ifdef CONFIG_ACPI
>> +	struct acpi_device *adev = ACPI_COMPANION(&pdev->dev);
>> +	acpi_status astatus;
>> +#endif
>>   
>>   	dpc = devm_kzalloc(device, sizeof(*dpc), GFP_KERNEL);
>>   	if (!dpc)
>> @@ -302,16 +555,53 @@ static int dpc_probe(struct pcie_device *dev)
>>   	dpc->cap_pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DPC);
>>   	dpc->dev = dev;
>>   	set_service_data(dev, dpc);
>> +	dpc->error_state = PCI_ERS_RESULT_NONE;
>> +
>> +	if (!pcie_aer_get_firmware_first(pdev))
>> +		if (pci_aer_available() && dpc->cap_pos)
>> +			dpc->native_dpc = 1;
>>   
>> -	status = devm_request_threaded_irq(device, dev->irq, dpc_irq,
>> -					   dpc_handler, IRQF_SHARED,
>> -					   "pcie-dpc", dpc);
>> -	if (status) {
>> -		dev_warn(device, "request IRQ%d failed: %d\n", dev->irq,
>> -			 status);
>> -		return status;
>> +	/*
>> +	 * If native support is not enabled and ACPI is not
>> +	 * enabled then return error.
>> +	 */
>> +	if (!dpc->native_dpc && !IS_ENABLED(CONFIG_APCI))
>> +		return -ENODEV;
>> +
>> +	if (dpc->native_dpc) {
>> +		status = devm_request_threaded_irq(device, dev->irq, dpc_irq,
>> +						   dpc_handler, IRQF_SHARED,
>> +						   "pcie-dpc", dpc);
>> +		if (status) {
>> +			dev_warn(device, "request IRQ%d failed: %d\n", dev->irq,
>> +				 status);
>> +			return status;
>> +		}
>>   	}
>>   
>> +#ifdef CONFIG_ACPI
>> +	if (!dpc->native_dpc) {
>> +		if (!adev) {
>> +			dev_err(device, "No valid acpi device found\n");
> s/acpi/ACPI/ (in comments, printk, other English text)
ok
>
>> +			return -ENODEV;
>> +		}
>> +
>> +		dpc->adev = adev;
>> +
>> +		/* Register ACPI notifier for EDR event */
> "Register handler for System events, one of which is the EDR event"?
In our case, we only handle EDR event. So I just explicitly mentioned it.
>
>> +		astatus = acpi_install_notify_handler(adev->handle,
>> +						      ACPI_SYSTEM_NOTIFY,
>> +						      edr_handle_event,
>> +						      dpc);
>> +
>> +		if (ACPI_FAILURE(astatus)) {
>> +			dev_err(device, "Install notifier failed\n");
> Mention "ACPI notify handler" here.
ok
>
>> +			return -EBUSY;
>> +		}
>> +
>> +		acpi_enable_dpc_port(dpc, true);
>> +	}
>> +#endif
>>   	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CAP, &cap);
>>   	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &ctl);
>>   
>> @@ -325,8 +615,12 @@ static int dpc_probe(struct pcie_device *dev)
>>   		}
>>   	}
>>   
>> -	ctl = (ctl & 0xfff4) | PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN;
>> -	pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, ctl);
>> +	if (dpc->native_dpc) {
>> +		ctl = (ctl & 0xfff4) | PCI_EXP_DPC_CTL_EN_FATAL |
>> +			PCI_EXP_DPC_CTL_INT_EN;
>> +		pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL,
>> +				      ctl);
>> +	}
>>   
>>   	dev_info(device, "DPC error containment capabilities: Int Msg #%d, RPExt%c PoisonedTLP%c SwTrigger%c RP PIO Log %d, DL_ActiveErr%c\n",
>>   		cap & PCI_EXP_DPC_IRQ, FLAG(cap, PCI_EXP_DPC_CAP_RP_EXT),
>> @@ -344,6 +638,9 @@ static void dpc_remove(struct pcie_device *dev)
>>   	struct pci_dev *pdev = dev->port;
>>   	u16 ctl;
>>   
>> +	if (!dpc->native_dpc)
>> +		return;
>> +
>>   	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &ctl);
>>   	ctl &= ~(PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN);
>>   	pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, ctl);
>> @@ -356,6 +653,7 @@ static struct pcie_port_service_driver dpcdriver = {
>>   	.probe		= dpc_probe,
>>   	.remove		= dpc_remove,
>>   	.reset_link	= dpc_reset_link,
>> +	.error_resume   = dpc_error_resume,
>>   };
>>   
>>   int __init pcie_dpc_init(void)
>> diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c
>> index 865f12f4b314..050cbb7a5083 100644
>> --- a/drivers/pci/pcie/portdrv_core.c
>> +++ b/drivers/pci/pcie/portdrv_core.c
>> @@ -249,9 +249,14 @@ static int get_port_device_capability(struct pci_dev *dev)
>>   		pcie_pme_interrupt_enable(dev, false);
>>   	}
>>   
>> +	/*
>> +	 * If EDR support is enabled in OS, then even if AER is not handled in
>> +	 * OS, DPC service can be enabled.
>> +	 */
>>   	if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_DPC) &&
>> -	    pci_aer_available() && services & PCIE_PORT_SERVICE_AER &&
>> -	    (pcie_ports_native || host->native_dpc))
>> +	    ((IS_ENABLED(CONFIG_PCIE_EDR) && !host->native_dpc) ||
>> +	    (pci_aer_available() && services & PCIE_PORT_SERVICE_AER &&
>> +	    (pcie_ports_native || host->native_dpc))))
> Holy cow, I think I'll have to schedule an hour sometime to figure
> out what's going on here :)
Please check the previous patch in this series for details related to 
host->native_dpc.

host->native_dpc is set/unset based on _OSC negotiation in 
drivers/acpi/pci_root.c

Previously if BIOS handles AER and DPC then we don't need to enable 
AER/DPC services in OS. But with EDR support (hybrid mode), even if BIOS 
handles DPC support, there is way for it let OS handle the port recovery 
and error handling. So we should enable the DPC service independently. 
That's why I have added additional if condition for non-native DPC mode 
with EDR support enabled.

If we expand the if condition it would look like,

if (port has dpc capability) {

     if (EDR support is supported/enabled in OS and bios_handles_dpc) then
         services |= PCIE_PORT_SERVICE_DPC;

         if ( AER/DPC is handled in OS)
         services |= PCIE_PORT_SERVICE_DPC;

}

>
>>   		services |= PCIE_PORT_SERVICE_DPC;
>>   
>>   	if (pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM || -- 2.20.1
>>
Bjorn Helgaas April 10, 2019, 11:08 p.m. UTC | #3
On Wed, Apr 10, 2019 at 03:12:05PM -0700, sathyanarayanan kuppuswamy wrote:
> On 4/10/19 11:41 AM, Bjorn Helgaas wrote:
> > On Tue, Mar 19, 2019 at 01:47:29PM -0700, sathyanarayanan.kuppuswamy@linux.intel.com wrote:
> > > From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
> > > 
> > > As per PCI firmware specification v3.2 ECN
> > > (https://members.pcisig.com/wg/PCI-SIG/document/12614), when firmware
> > > owns Downstream Port Containment (DPC), its expected to use the "Error
> > > Disconnect Recover" (EDR) notification to alert OSPM of a DPC event and
> > > if OS supports EDR, its expected to handle the software state invalidation
> > > and port recovery in OS and let firmware know the recovery status via _OST
> > > ACPI call.
> > > 
> > > Since EDR is a hybrid service, DPC service shall be enabled in OS even
> > > if AER is not natively enabled in OS.

> > > +	bool			native_dpc;
> > This is going to be way too confusing with a "native_dpc" in both the
> > struct pci_host_bridge and the struct dpc_dev.
> Any suggestions? what about native_mode ?

"native_mode" is different but doesn't contain any additional
information.  I haven't worked out what this new symbol actually does;
if it's related to EDR, maybe that could be incorporated somehow?

> > > +		/* Register ACPI notifier for EDR event */
> > "Register handler for System events, one of which is the EDR event"?
> In our case, we only handle EDR event. So I just explicitly mentioned it.

Right.  As a courtesy to the reader, I think it's worth making
the comment match the code, i.e., you can't pass an event type to
acpi_install_notify_handler(), so obviously the handler will be called
for *all* System events.  By saying "one of which is the EDR event",
you give the reader a hint that the handler will have to filter out
the others.

The reason I noticed this is because I read "Register notifier for EDR
event" and wondered to myself "Hmmm, how does this work?  I don't see
anything passed to acpi_install_notify_handler() that would identify
an EDR event."

> > > +	/*
> > > +	 * If EDR support is enabled in OS, then even if AER is not handled in
> > > +	 * OS, DPC service can be enabled.
> > > +	 */
> > >   	if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_DPC) &&
> > > -	    pci_aer_available() && services & PCIE_PORT_SERVICE_AER &&
> > > -	    (pcie_ports_native || host->native_dpc))
> > > +	    ((IS_ENABLED(CONFIG_PCIE_EDR) && !host->native_dpc) ||
> > > +	    (pci_aer_available() && services & PCIE_PORT_SERVICE_AER &&
> > > +	    (pcie_ports_native || host->native_dpc))))
> > Holy cow, I think I'll have to schedule an hour sometime to figure
> > out what's going on here :)
> Please check the previous patch in this series for details related to
> host->native_dpc.

Yeah, I'm just saying that conditional is starting to look pretty
gnarly.  Possibly there's nothing to do about it.

Bjorn
diff mbox series

Patch

diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig
index 5cbdbca904ac..55f65d1cbc9e 100644
--- a/drivers/pci/pcie/Kconfig
+++ b/drivers/pci/pcie/Kconfig
@@ -142,3 +142,13 @@  config PCIE_PTM
 
 	  This is only useful if you have devices that support PTM, but it
 	  is safe to enable even if you don't.
+
+config PCIE_EDR
+	bool "PCI Express Error Disconnect Recover support"
+	default n
+	depends on PCIE_DPC && ACPI
+	help
+	  This options adds Error Disconnect Recover support as specified
+	  in PCI firmware specification v3.2 Downstream Port Containment
+	  Related Enhancements ECN. Enable this if you want to support hybrid
+	  DPC model which uses both firmware and OS to implement DPC.
diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
index 7b77754a82de..bdf4ca8a0acb 100644
--- a/drivers/pci/pcie/dpc.c
+++ b/drivers/pci/pcie/dpc.c
@@ -11,6 +11,7 @@ 
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/pci.h>
+#include <linux/acpi.h>
 
 #include "portdrv.h"
 #include "../pci.h"
@@ -20,8 +21,23 @@  struct dpc_dev {
 	u16			cap_pos;
 	bool			rp_extensions;
 	u8			rp_log_size;
+	bool			native_dpc;
+	pci_ers_result_t	error_state;
+#ifdef CONFIG_ACPI
+	struct acpi_device	*adev;
+#endif
 };
 
+#ifdef CONFIG_ACPI
+
+#define EDR_PORT_ENABLE_DSM     0x0C
+#define EDR_PORT_LOCATE_DSM     0x0D
+
+static const guid_t pci_acpi_dsm_guid =
+		GUID_INIT(0xe5c937d0, 0x3553, 0x4d7a,
+			  0x91, 0x17, 0xea, 0x4d, 0x19, 0xc3, 0x43, 0x4d);
+#endif
+
 static const char * const rp_pio_error_string[] = {
 	"Configuration Request received UR Completion",	 /* Bit Position 0  */
 	"Configuration Request received CA Completion",	 /* Bit Position 1  */
@@ -67,6 +83,9 @@  void pci_save_dpc_state(struct pci_dev *dev)
 	if (!dpc)
 		return;
 
+	if (!dpc->native_dpc)
+		return;
+
 	save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_DPC);
 	if (!save_state)
 		return;
@@ -88,6 +107,9 @@  void pci_restore_dpc_state(struct pci_dev *dev)
 	if (!dpc)
 		return;
 
+	if (!dpc->native_dpc)
+		return;
+
 	save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_DPC);
 	if (!save_state)
 		return;
@@ -224,10 +246,9 @@  static int dpc_get_aer_uncorrect_severity(struct pci_dev *dev,
 	return 1;
 }
 
-static irqreturn_t dpc_handler(int irq, void *context)
+static void dpc_process_error(struct dpc_dev *dpc)
 {
 	struct aer_err_info info;
-	struct dpc_dev *dpc = context;
 	struct pci_dev *pdev = dpc->dev->port;
 	struct device *dev = &dpc->dev->device;
 	u16 cap = dpc->cap_pos, status, source, reason, ext_reason;
@@ -261,6 +282,13 @@  static irqreturn_t dpc_handler(int irq, void *context)
 
 	/* We configure DPC so it only triggers on ERR_FATAL */
 	pcie_do_recovery(pdev, pci_channel_io_frozen, PCIE_PORT_SERVICE_DPC);
+}
+
+static irqreturn_t dpc_handler(int irq, void *context)
+{
+	struct dpc_dev *dpc = context;
+
+	dpc_process_error(dpc);
 
 	return IRQ_HANDLED;
 }
@@ -283,6 +311,230 @@  static irqreturn_t dpc_irq(int irq, void *context)
 	return IRQ_HANDLED;
 }
 
+void dpc_error_resume(struct pci_dev *dev)
+{
+	struct dpc_dev *dpc;
+
+	dpc = to_dpc_dev(dev);
+	if (!dpc)
+		return;
+
+	dpc->error_state = PCI_ERS_RESULT_RECOVERED;
+}
+
+#ifdef CONFIG_ACPI
+
+/*
+ * _DSM wrapper function to enable/disable DPC port.
+ * @dpc   : DPC device structure
+ * @enable: status of DPC port (0 or 1).
+ *
+ * returns 0 on success or errno on failure.
+ */
+static int acpi_enable_dpc_port(struct dpc_dev *dpc, bool enable)
+{
+	union acpi_object *obj;
+	int status;
+	union acpi_object argv4;
+
+	/* Check whether EDR_PORT_ENABLE_DSM is supported in firmware */
+	status = acpi_check_dsm(dpc->adev->handle, &pci_acpi_dsm_guid, 1,
+				1 << EDR_PORT_ENABLE_DSM);
+	if (!status)
+		return -ENOTSUPP;
+
+	argv4.type = ACPI_TYPE_INTEGER;
+	argv4.integer.value = enable;
+
+	obj = acpi_evaluate_dsm(dpc->adev->handle, &pci_acpi_dsm_guid, 1,
+				EDR_PORT_ENABLE_DSM, &argv4);
+	if (!obj)
+		return -EIO;
+
+	if (obj->type == ACPI_TYPE_INTEGER && obj->integer.value == enable)
+		status = 0;
+	else
+		status = -EIO;
+
+	ACPI_FREE(obj);
+
+	return status;
+}
+
+/*
+ * _DSM wrapper function to locate DPC port.
+ * @dpc   : DPC device structure
+ *
+ * returns pci_dev or NULL.
+ */
+static struct pci_dev *acpi_locate_dpc_port(struct dpc_dev *dpc)
+{
+	union acpi_object *obj;
+	int status;
+	u16 port;
+
+	/* Check whether EDR_PORT_LOCATE_DSM is supported in firmware */
+	status = acpi_check_dsm(dpc->adev->handle, &pci_acpi_dsm_guid, 1,
+				1 << EDR_PORT_LOCATE_DSM);
+	if (!status)
+		return dpc->dev->port;
+
+
+	obj = acpi_evaluate_dsm(dpc->adev->handle, &pci_acpi_dsm_guid, 1,
+				EDR_PORT_LOCATE_DSM, NULL);
+	if (!obj)
+		return NULL;
+
+	if (obj->type == ACPI_TYPE_INTEGER) {
+		/*
+		 * Firmware returns DPC port BDF details in following format:
+		 *	15:8 = bus
+		 *	7:3 = device
+		 *	2:0 = function
+		 */
+		port = obj->integer.value;
+		ACPI_FREE(obj);
+	} else {
+		ACPI_FREE(obj);
+		return NULL;
+	}
+
+	return pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(port), port & 0xff);
+}
+
+/*
+ * _OST wrapper function to let firmware know the status of EDR event.
+ * @dpc   : DPC device structure.
+ * @status: Status of EDR event.
+ *
+ */
+static int acpi_send_edr_status(struct dpc_dev *dpc,  u16 status)
+{
+	u32 ost_status;
+	struct pci_dev *pdev = dpc->dev->port;
+
+	dev_dbg(&pdev->dev, "Sending EDR status :%x\n", status);
+
+	ost_status =  PCI_DEVID(pdev->bus->number, pdev->devfn);
+	ost_status = (ost_status << 16) | status;
+
+	if (!acpi_has_method(dpc->adev->handle, "_OST"))
+		return -ENOTSUPP;
+
+	status = acpi_evaluate_ost(dpc->adev->handle,
+				   ACPI_NOTIFY_DISCONNECT_RECOVER,
+				   ost_status, NULL);
+	if (ACPI_FAILURE(status))
+		return -EINVAL;
+
+	return 0;
+}
+
+/*
+ * Helper function used for disconnecting the child devices when EDR event is
+ * received from firmware.
+ */
+static void dpc_disconnect_devices(struct pci_dev *dev)
+{
+	struct pci_dev *udev;
+	struct pci_bus *parent;
+	struct pci_dev *pdev, *temp;
+
+	dev_dbg(&dev->dev, "Disconnecting the child devices\n");
+
+	if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
+		udev = dev;
+	else
+		udev = dev->bus->self;
+
+	parent = udev->subordinate;
+	pci_walk_bus(parent, pci_dev_set_disconnected, NULL);
+
+	pci_lock_rescan_remove();
+	pci_dev_get(dev);
+	list_for_each_entry_safe_reverse(pdev, temp, &parent->devices,
+					 bus_list) {
+		pci_stop_and_remove_bus_device(pdev);
+	}
+	pci_dev_put(dev);
+	pci_unlock_rescan_remove();
+}
+
+static void edr_handle_event(acpi_handle handle, u32 event, void *data)
+{
+	struct dpc_dev *dpc = (struct dpc_dev *) data;
+	struct pci_dev *pdev;
+	u16 status, cap;
+
+	if (event != ACPI_NOTIFY_DISCONNECT_RECOVER)
+		return;
+
+	if (!data) {
+		pr_err("Invalid EDR event\n");
+		return;
+	}
+
+	dev_dbg(&dpc->dev->port->dev, "Valid EDR event received\n");
+
+	/*
+	 * Check if _DSM(0xD) is available, and if present locate the
+	 * port which issued EDR event.
+	 */
+	pdev = acpi_locate_dpc_port(dpc);
+	if (!pdev) {
+		dev_err(&dpc->dev->port->dev, "No valid port found\n");
+		return;
+	}
+
+	/*
+	 * Get DPC priv data for given pdev
+	 */
+	dpc = to_dpc_dev(pdev);
+	dpc->error_state = PCI_ERS_RESULT_DISCONNECT;
+	pdev = dpc->dev->port;
+	cap = dpc->cap_pos;
+
+	/*
+	 * Check if the port supports DPC:
+	 *
+	 * if port does not support DPC, then let firmware handle
+	 * the error recovery and OS is responsible for cleaning
+	 * up the child devices.
+	 *
+	 * if port supports DPC, then fall back to default error
+	 * recovery.
+	 *
+	 */
+	if (cap) {
+		/* Check if there is a valid DPC trigger */
+		pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status);
+		if (!(status & PCI_EXP_DPC_STATUS_TRIGGER)) {
+			dev_err(&pdev->dev, "Invalid DPC trigger\n");
+			return;
+		}
+		dpc_process_error(dpc);
+	}
+
+	if (dpc->error_state == PCI_ERS_RESULT_RECOVERED) {
+		/*
+		 * Recovery is successful, so send
+		 * _OST(0xF, BDF << 16 | 0x80, "") to firmware.
+		 */
+		status = 0x80;
+	} else {
+		/*
+		 * Recovery is not successful, so disconnect child devices
+		 * and send _OST(0xF, BDF << 16 | 0x81, "") to firmware.
+		 */
+		dpc_disconnect_devices(pdev);
+		status = 0x81;
+	}
+
+	acpi_send_edr_status(dpc, status);
+}
+
+#endif
+
 #define FLAG(x, y) (((x) & (y)) ? '+' : '-')
 static int dpc_probe(struct pcie_device *dev)
 {
@@ -291,9 +543,10 @@  static int dpc_probe(struct pcie_device *dev)
 	struct device *device = &dev->device;
 	int status;
 	u16 ctl, cap;
-
-	if (pcie_aer_get_firmware_first(pdev))
-		return -ENOTSUPP;
+#ifdef CONFIG_ACPI
+	struct acpi_device *adev = ACPI_COMPANION(&pdev->dev);
+	acpi_status astatus;
+#endif
 
 	dpc = devm_kzalloc(device, sizeof(*dpc), GFP_KERNEL);
 	if (!dpc)
@@ -302,16 +555,53 @@  static int dpc_probe(struct pcie_device *dev)
 	dpc->cap_pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DPC);
 	dpc->dev = dev;
 	set_service_data(dev, dpc);
+	dpc->error_state = PCI_ERS_RESULT_NONE;
+
+	if (!pcie_aer_get_firmware_first(pdev))
+		if (pci_aer_available() && dpc->cap_pos)
+			dpc->native_dpc = 1;
 
-	status = devm_request_threaded_irq(device, dev->irq, dpc_irq,
-					   dpc_handler, IRQF_SHARED,
-					   "pcie-dpc", dpc);
-	if (status) {
-		dev_warn(device, "request IRQ%d failed: %d\n", dev->irq,
-			 status);
-		return status;
+	/*
+	 * If native support is not enabled and ACPI is not
+	 * enabled then return error.
+	 */
+	if (!dpc->native_dpc && !IS_ENABLED(CONFIG_APCI))
+		return -ENODEV;
+
+	if (dpc->native_dpc) {
+		status = devm_request_threaded_irq(device, dev->irq, dpc_irq,
+						   dpc_handler, IRQF_SHARED,
+						   "pcie-dpc", dpc);
+		if (status) {
+			dev_warn(device, "request IRQ%d failed: %d\n", dev->irq,
+				 status);
+			return status;
+		}
 	}
 
+#ifdef CONFIG_ACPI
+	if (!dpc->native_dpc) {
+		if (!adev) {
+			dev_err(device, "No valid acpi device found\n");
+			return -ENODEV;
+		}
+
+		dpc->adev = adev;
+
+		/* Register ACPI notifier for EDR event */
+		astatus = acpi_install_notify_handler(adev->handle,
+						      ACPI_SYSTEM_NOTIFY,
+						      edr_handle_event,
+						      dpc);
+
+		if (ACPI_FAILURE(astatus)) {
+			dev_err(device, "Install notifier failed\n");
+			return -EBUSY;
+		}
+
+		acpi_enable_dpc_port(dpc, true);
+	}
+#endif
 	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CAP, &cap);
 	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &ctl);
 
@@ -325,8 +615,12 @@  static int dpc_probe(struct pcie_device *dev)
 		}
 	}
 
-	ctl = (ctl & 0xfff4) | PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN;
-	pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, ctl);
+	if (dpc->native_dpc) {
+		ctl = (ctl & 0xfff4) | PCI_EXP_DPC_CTL_EN_FATAL |
+			PCI_EXP_DPC_CTL_INT_EN;
+		pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL,
+				      ctl);
+	}
 
 	dev_info(device, "DPC error containment capabilities: Int Msg #%d, RPExt%c PoisonedTLP%c SwTrigger%c RP PIO Log %d, DL_ActiveErr%c\n",
 		cap & PCI_EXP_DPC_IRQ, FLAG(cap, PCI_EXP_DPC_CAP_RP_EXT),
@@ -344,6 +638,9 @@  static void dpc_remove(struct pcie_device *dev)
 	struct pci_dev *pdev = dev->port;
 	u16 ctl;
 
+	if (!dpc->native_dpc)
+		return;
+
 	pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &ctl);
 	ctl &= ~(PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN);
 	pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, ctl);
@@ -356,6 +653,7 @@  static struct pcie_port_service_driver dpcdriver = {
 	.probe		= dpc_probe,
 	.remove		= dpc_remove,
 	.reset_link	= dpc_reset_link,
+	.error_resume   = dpc_error_resume,
 };
 
 int __init pcie_dpc_init(void)
diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c
index 865f12f4b314..050cbb7a5083 100644
--- a/drivers/pci/pcie/portdrv_core.c
+++ b/drivers/pci/pcie/portdrv_core.c
@@ -249,9 +249,14 @@  static int get_port_device_capability(struct pci_dev *dev)
 		pcie_pme_interrupt_enable(dev, false);
 	}
 
+	/*
+	 * If EDR support is enabled in OS, then even if AER is not handled in
+	 * OS, DPC service can be enabled.
+	 */
 	if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_DPC) &&
-	    pci_aer_available() && services & PCIE_PORT_SERVICE_AER &&
-	    (pcie_ports_native || host->native_dpc))
+	    ((IS_ENABLED(CONFIG_PCIE_EDR) && !host->native_dpc) ||
+	    (pci_aer_available() && services & PCIE_PORT_SERVICE_AER &&
+	    (pcie_ports_native || host->native_dpc))))
 		services |= PCIE_PORT_SERVICE_DPC;
 
 	if (pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM ||