diff mbox series

[v5,05/10] PCI/AER: Apply function level reset to RCiEP on fatal error

Message ID 20200918204603.62100-6-sean.v.kelley@intel.com
State Superseded
Delegated to: Bjorn Helgaas
Headers show
Series None | expand

Commit Message

Sean V Kelley Sept. 18, 2020, 8:45 p.m. UTC
From: Qiuxu Zhuo <qiuxu.zhuo@intel.com>

Attempt to do function level reset for an RCiEP associated with an
RCEC device on fatal error.

Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
---
 drivers/pci/pcie/err.c | 31 ++++++++++++++++++++++---------
 1 file changed, 22 insertions(+), 9 deletions(-)

Comments

Jonathan Cameron Sept. 21, 2020, 11:13 a.m. UTC | #1
On Fri, 18 Sep 2020 13:45:58 -0700
Sean V Kelley <sean.v.kelley@intel.com> wrote:

> From: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
> 
> Attempt to do function level reset for an RCiEP associated with an
> RCEC device on fatal error.

I'm not sure the description is correct. Looks like it will do
the reset even if not associated with an RCEC.
I'd just cut this down to:

"Attempt to do a function level reset for an RCiEP on fatal error."

I'm not 100% sure doing an flr will actually help in most cass if you've
reported a fatal error, but I suppose it does no harm!

So with description changed.
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

> 
> Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
> ---
>  drivers/pci/pcie/err.c | 31 ++++++++++++++++++++++---------
>  1 file changed, 22 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
> index e575fa6cee63..5380ecc41506 100644
> --- a/drivers/pci/pcie/err.c
> +++ b/drivers/pci/pcie/err.c
> @@ -169,6 +169,17 @@ static void pci_bridge_walk(struct pci_dev *bridge, int (*cb)(struct pci_dev *,
>  		cb(bridge, userdata);
>  }
>  
> +static pci_ers_result_t flr_on_rciep(struct pci_dev *dev)
> +{
> +	if (!pcie_has_flr(dev))
> +		return PCI_ERS_RESULT_NONE;
> +
> +	if (pcie_flr(dev))
> +		return PCI_ERS_RESULT_DISCONNECT;
> +
> +	return PCI_ERS_RESULT_RECOVERED;
> +}
> +
>  pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
>  			pci_channel_state_t state,	
>  			pci_ers_result_t (*reset_subordinate_devices)(struct pci_dev *pdev))
> @@ -195,15 +206,17 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
>  	if (state == pci_channel_io_frozen) {
>  		pci_bridge_walk(bridge, report_frozen_detected, &status);
>  		if (type == PCI_EXP_TYPE_RC_END) {
> -			pci_warn(dev, "link reset not possible for RCiEP\n");
> -			status = PCI_ERS_RESULT_NONE;
> -			goto failed;
> -		}
> -
> -		status = reset_subordinate_devices(bridge);
> -		if (status != PCI_ERS_RESULT_RECOVERED) {
> -			pci_warn(dev, "subordinate device reset failed\n");
> -			goto failed;
> +			status = flr_on_rciep(dev);
> +			if (status != PCI_ERS_RESULT_RECOVERED) {
> +				pci_warn(dev, "function level reset failed\n");
> +				goto failed;
> +			}
> +		} else {
> +			status = reset_subordinate_devices(bridge);
> +			if (status != PCI_ERS_RESULT_RECOVERED) {
> +				pci_warn(dev, "subordinate device reset failed\n");
> +				goto failed;
> +			}
>  		}
>  	} else {
>  		pci_bridge_walk(bridge, report_normal_detected, &status);
Sean V Kelley Sept. 23, 2020, 2:49 a.m. UTC | #2
On Mon, Sep 21, 2020 at 4:15 AM Jonathan Cameron
<Jonathan.Cameron@huawei.com> wrote:
>
> On Fri, 18 Sep 2020 13:45:58 -0700
> Sean V Kelley <sean.v.kelley@intel.com> wrote:
>
> > From: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
> >
> > Attempt to do function level reset for an RCiEP associated with an
> > RCEC device on fatal error.
>
> I'm not sure the description is correct. Looks like it will do
> the reset even if not associated with an RCEC.
> I'd just cut this down to:
>
> "Attempt to do a function level reset for an RCiEP on fatal error."

Agree. Will change.

>
> I'm not 100% sure doing an flr will actually help in most cass if you've
> reported a fatal error, but I suppose it does no harm!
>
> So with description changed.
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

Will do, thanks.

Sean

>
> >
> > Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
> > ---
> >  drivers/pci/pcie/err.c | 31 ++++++++++++++++++++++---------
> >  1 file changed, 22 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
> > index e575fa6cee63..5380ecc41506 100644
> > --- a/drivers/pci/pcie/err.c
> > +++ b/drivers/pci/pcie/err.c
> > @@ -169,6 +169,17 @@ static void pci_bridge_walk(struct pci_dev *bridge, int (*cb)(struct pci_dev *,
> >               cb(bridge, userdata);
> >  }
> >
> > +static pci_ers_result_t flr_on_rciep(struct pci_dev *dev)
> > +{
> > +     if (!pcie_has_flr(dev))
> > +             return PCI_ERS_RESULT_NONE;
> > +
> > +     if (pcie_flr(dev))
> > +             return PCI_ERS_RESULT_DISCONNECT;
> > +
> > +     return PCI_ERS_RESULT_RECOVERED;
> > +}
> > +
> >  pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
> >                       pci_channel_state_t state,
> >                       pci_ers_result_t (*reset_subordinate_devices)(struct pci_dev *pdev))
> > @@ -195,15 +206,17 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
> >       if (state == pci_channel_io_frozen) {
> >               pci_bridge_walk(bridge, report_frozen_detected, &status);
> >               if (type == PCI_EXP_TYPE_RC_END) {
> > -                     pci_warn(dev, "link reset not possible for RCiEP\n");
> > -                     status = PCI_ERS_RESULT_NONE;
> > -                     goto failed;
> > -             }
> > -
> > -             status = reset_subordinate_devices(bridge);
> > -             if (status != PCI_ERS_RESULT_RECOVERED) {
> > -                     pci_warn(dev, "subordinate device reset failed\n");
> > -                     goto failed;
> > +                     status = flr_on_rciep(dev);
> > +                     if (status != PCI_ERS_RESULT_RECOVERED) {
> > +                             pci_warn(dev, "function level reset failed\n");
> > +                             goto failed;
> > +                     }
> > +             } else {
> > +                     status = reset_subordinate_devices(bridge);
> > +                     if (status != PCI_ERS_RESULT_RECOVERED) {
> > +                             pci_warn(dev, "subordinate device reset failed\n");
> > +                             goto failed;
> > +                     }
> >               }
> >       } else {
> >               pci_bridge_walk(bridge, report_normal_detected, &status);
>
>
diff mbox series

Patch

diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
index e575fa6cee63..5380ecc41506 100644
--- a/drivers/pci/pcie/err.c
+++ b/drivers/pci/pcie/err.c
@@ -169,6 +169,17 @@  static void pci_bridge_walk(struct pci_dev *bridge, int (*cb)(struct pci_dev *,
 		cb(bridge, userdata);
 }
 
+static pci_ers_result_t flr_on_rciep(struct pci_dev *dev)
+{
+	if (!pcie_has_flr(dev))
+		return PCI_ERS_RESULT_NONE;
+
+	if (pcie_flr(dev))
+		return PCI_ERS_RESULT_DISCONNECT;
+
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
 pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
 			pci_channel_state_t state,
 			pci_ers_result_t (*reset_subordinate_devices)(struct pci_dev *pdev))
@@ -195,15 +206,17 @@  pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
 	if (state == pci_channel_io_frozen) {
 		pci_bridge_walk(bridge, report_frozen_detected, &status);
 		if (type == PCI_EXP_TYPE_RC_END) {
-			pci_warn(dev, "link reset not possible for RCiEP\n");
-			status = PCI_ERS_RESULT_NONE;
-			goto failed;
-		}
-
-		status = reset_subordinate_devices(bridge);
-		if (status != PCI_ERS_RESULT_RECOVERED) {
-			pci_warn(dev, "subordinate device reset failed\n");
-			goto failed;
+			status = flr_on_rciep(dev);
+			if (status != PCI_ERS_RESULT_RECOVERED) {
+				pci_warn(dev, "function level reset failed\n");
+				goto failed;
+			}
+		} else {
+			status = reset_subordinate_devices(bridge);
+			if (status != PCI_ERS_RESULT_RECOVERED) {
+				pci_warn(dev, "subordinate device reset failed\n");
+				goto failed;
+			}
 		}
 	} else {
 		pci_bridge_walk(bridge, report_normal_detected, &status);