diff mbox

[v15,3/9] PCI/AER: Handle ERR_FATAL with removal and re-enumeration of devices

Message ID 1525323838-1735-4-git-send-email-poza@codeaurora.org (mailing list archive)
State New, archived
Delegated to: Bjorn Helgaas
Headers show

Commit Message

Oza Pawandeep May 3, 2018, 5:03 a.m. UTC
This patch alters the behavior of handling of ERR_FATAL, where removal
of devices is initiated, followed by reset link, followed by
re-enumeration.

So the errors are handled in a different way as follows:
ERR_NONFATAL => call driver recovery entry points
ERR_FATAL    => remove and re-enumerate

please refer to Documentation/PCI/pci-error-recovery.txt for more details.

Signed-off-by: Oza Pawandeep <poza@codeaurora.org>

Comments

Bjorn Helgaas May 8, 2018, 11:53 p.m. UTC | #1
On Thu, May 03, 2018 at 01:03:52AM -0400, Oza Pawandeep wrote:
> This patch alters the behavior of handling of ERR_FATAL, where removal
> of devices is initiated, followed by reset link, followed by
> re-enumeration.
> 
> So the errors are handled in a different way as follows:
> ERR_NONFATAL => call driver recovery entry points
> ERR_FATAL    => remove and re-enumerate
> 
> please refer to Documentation/PCI/pci-error-recovery.txt for more details.
> 
> Signed-off-by: Oza Pawandeep <poza@codeaurora.org>
> 
> diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c
> index 779b387..206f590 100644
> --- a/drivers/pci/pcie/aer/aerdrv.c
> +++ b/drivers/pci/pcie/aer/aerdrv.c
> @@ -330,6 +330,13 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
>  	reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
>  	pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32);
>  
> +	/*
> +	 * This function is called only on ERR_FATAL now, and since
> +	 * the pci_report_resume is called only in ERR_NONFATAL case,
> +	 * the clearing part has to be taken care here.
> +	 */
> +	aer_error_resume(dev);

I don't understand this part.  Previously the ERR_FATAL path looked like
this:

  do_recovery
    reset_link
      driver->reset_link
        aer_root_reset
          pci_reset_bridge_secondary_bus                # <-- reset
    broadcast_error_message(..., report_resume)
      pci_walk_bus(..., report_resume, ...)
        report_resume
      if (cb == report_resume)
        pci_cleanup_aer_uncorrect_error_status
          pci_write_config_dword(PCI_ERR_UNCOR_STATUS)  # <-- clear status

After this patch, it will look like this:

  do_recovery
    do_fatal_recovery
      pci_cleanup_aer_uncorrect_error_status
        pci_write_config_dword(PCI_ERR_UNCOR_STATUS)    # <-- clear status
      reset_link
        driver->reset_link
          aer_root_reset
            pci_reset_bridge_secondary_bus              # <-- reset
            aer_error_resume
              pcie_capability_write_word(PCI_EXP_DEVSTA)        # <-- clear more
              pci_write_config_dword(PCI_ERR_UNCOR_STATUS)      # <-- clear status

So if I'm understanding correctly, the new path clears the status too
early, then clears it again (plus clearing DEVSTA, which we didn't do
before) later.

I would think we would want to leave aer_root_reset() alone, and just move
the pci_cleanup_aer_uncorrect_error_status() in do_fatal_recovery() down so
it happens after we call reset_link().  That way the reset/clear sequence
would be the same as it was before.

>  	return PCI_ERS_RESULT_RECOVERED;
>  }
>  
> diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
> index 0ea5acc..655d4e8 100644
> --- a/drivers/pci/pcie/aer/aerdrv_core.c
> +++ b/drivers/pci/pcie/aer/aerdrv_core.c
> @@ -20,6 +20,7 @@
>  #include <linux/slab.h>
>  #include <linux/kfifo.h>
>  #include "aerdrv.h"
> +#include "../../pci.h"
>  
>  #define	PCI_EXP_AER_FLAGS	(PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \
>  				 PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE)
> @@ -474,6 +475,44 @@ static pci_ers_result_t reset_link(struct pci_dev *dev)
>  	return status;
>  }
>  
> +static pci_ers_result_t do_fatal_recovery(struct pci_dev *dev, int severity)
> +{
> +	struct pci_dev *udev;
> +	struct pci_bus *parent;
> +	struct pci_dev *pdev, *temp;
> +	pci_ers_result_t result = PCI_ERS_RESULT_RECOVERED;
> +
> +	if (severity == AER_FATAL)
> +		pci_cleanup_aer_uncorrect_error_status(dev);
> +
> +	if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
> +		udev = dev;
> +	else
> +		udev = dev->bus->self;
> +
> +	parent = udev->subordinate;
> +	pci_lock_rescan_remove();
> +	list_for_each_entry_safe_reverse(pdev, temp, &parent->devices,
> +				 bus_list) {
> +		pci_dev_get(pdev);
> +		pci_dev_set_disconnected(pdev, NULL);
> +		if (pci_has_subordinate(pdev))
> +			pci_walk_bus(pdev->subordinate,
> +				     pci_dev_set_disconnected, NULL);
> +		pci_stop_and_remove_bus_device(pdev);
> +		pci_dev_put(pdev);
> +	}
> +
> +	result = reset_link(udev);
> +	if (result == PCI_ERS_RESULT_RECOVERED)
> +		if (pcie_wait_for_link(udev, true))
> +			pci_rescan_bus(udev->bus);
> +
> +	pci_unlock_rescan_remove();
> +
> +	return result;
> +}
> +
>  /**
>   * do_recovery - handle nonfatal/fatal error recovery process
>   * @dev: pointer to a pci_dev data structure of agent detecting an error
> @@ -485,11 +524,15 @@ static pci_ers_result_t reset_link(struct pci_dev *dev)
>   */
>  static void do_recovery(struct pci_dev *dev, int severity)
>  {
> -	pci_ers_result_t status, result = PCI_ERS_RESULT_RECOVERED;
> +	pci_ers_result_t status;
>  	enum pci_channel_state state;
>  
> -	if (severity == AER_FATAL)
> -		state = pci_channel_io_frozen;
> +	if (severity == AER_FATAL) {
> +		status = do_fatal_recovery(dev, severity);
> +		if (status != PCI_ERS_RESULT_RECOVERED)
> +			goto failed;
> +		return;
> +	}
>  	else
>  		state = pci_channel_io_normal;
>  
> @@ -498,12 +541,6 @@ static void do_recovery(struct pci_dev *dev, int severity)
>  			"error_detected",
>  			report_error_detected);
>  
> -	if (severity == AER_FATAL) {
> -		result = reset_link(dev);
> -		if (result != PCI_ERS_RESULT_RECOVERED)
> -			goto failed;
> -	}
> -
>  	if (status == PCI_ERS_RESULT_CAN_RECOVER)
>  		status = broadcast_error_message(dev,
>  				state,
> -- 
> 2.7.4
>
Bjorn Helgaas May 9, 2018, 1:07 p.m. UTC | #2
On Tue, May 08, 2018 at 06:53:30PM -0500, Bjorn Helgaas wrote:
> On Thu, May 03, 2018 at 01:03:52AM -0400, Oza Pawandeep wrote:
> > This patch alters the behavior of handling of ERR_FATAL, where removal
> > of devices is initiated, followed by reset link, followed by
> > re-enumeration.
> > 
> > So the errors are handled in a different way as follows:
> > ERR_NONFATAL => call driver recovery entry points
> > ERR_FATAL    => remove and re-enumerate
> > 
> > please refer to Documentation/PCI/pci-error-recovery.txt for more details.
> > 
> > Signed-off-by: Oza Pawandeep <poza@codeaurora.org>
> > 
> > diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c
> > index 779b387..206f590 100644
> > --- a/drivers/pci/pcie/aer/aerdrv.c
> > +++ b/drivers/pci/pcie/aer/aerdrv.c
> > @@ -330,6 +330,13 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
> >  	reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
> >  	pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32);
> >  
> > +	/*
> > +	 * This function is called only on ERR_FATAL now, and since
> > +	 * the pci_report_resume is called only in ERR_NONFATAL case,
> > +	 * the clearing part has to be taken care here.
> > +	 */
> > +	aer_error_resume(dev);
> 
> I don't understand this part.  Previously the ERR_FATAL path looked like
> this:
> 
>   do_recovery
>     reset_link
>       driver->reset_link
>         aer_root_reset
>           pci_reset_bridge_secondary_bus                # <-- reset
>     broadcast_error_message(..., report_resume)
>       pci_walk_bus(..., report_resume, ...)
>         report_resume
>       if (cb == report_resume)
>         pci_cleanup_aer_uncorrect_error_status
>           pci_write_config_dword(PCI_ERR_UNCOR_STATUS)  # <-- clear status
> 
> After this patch, it will look like this:
> 
>   do_recovery
>     do_fatal_recovery
>       pci_cleanup_aer_uncorrect_error_status
>         pci_write_config_dword(PCI_ERR_UNCOR_STATUS)    # <-- clear status
>       reset_link
>         driver->reset_link
>           aer_root_reset
>             pci_reset_bridge_secondary_bus              # <-- reset
>             aer_error_resume
>               pcie_capability_write_word(PCI_EXP_DEVSTA)        # <-- clear more
>               pci_write_config_dword(PCI_ERR_UNCOR_STATUS)      # <-- clear status
> 
> So if I'm understanding correctly, the new path clears the status too
> early, then clears it again (plus clearing DEVSTA, which we didn't do
> before) later.
> 
> I would think we would want to leave aer_root_reset() alone, and just move
> the pci_cleanup_aer_uncorrect_error_status() in do_fatal_recovery() down so
> it happens after we call reset_link().  That way the reset/clear sequence
> would be the same as it was before.

I've been fiddling with this a bit myself and will post the results to see
what you think.
Oza Pawandeep May 9, 2018, 1:14 p.m. UTC | #3
On 2018-05-09 18:37, Bjorn Helgaas wrote:
> On Tue, May 08, 2018 at 06:53:30PM -0500, Bjorn Helgaas wrote:
>> On Thu, May 03, 2018 at 01:03:52AM -0400, Oza Pawandeep wrote:
>> > This patch alters the behavior of handling of ERR_FATAL, where removal
>> > of devices is initiated, followed by reset link, followed by
>> > re-enumeration.
>> >
>> > So the errors are handled in a different way as follows:
>> > ERR_NONFATAL => call driver recovery entry points
>> > ERR_FATAL    => remove and re-enumerate
>> >
>> > please refer to Documentation/PCI/pci-error-recovery.txt for more details.
>> >
>> > Signed-off-by: Oza Pawandeep <poza@codeaurora.org>
>> >
>> > diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c
>> > index 779b387..206f590 100644
>> > --- a/drivers/pci/pcie/aer/aerdrv.c
>> > +++ b/drivers/pci/pcie/aer/aerdrv.c
>> > @@ -330,6 +330,13 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
>> >  	reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
>> >  	pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32);
>> >
>> > +	/*
>> > +	 * This function is called only on ERR_FATAL now, and since
>> > +	 * the pci_report_resume is called only in ERR_NONFATAL case,
>> > +	 * the clearing part has to be taken care here.
>> > +	 */
>> > +	aer_error_resume(dev);
>> 
>> I don't understand this part.  Previously the ERR_FATAL path looked 
>> like
>> this:
>> 
>>   do_recovery
>>     reset_link
>>       driver->reset_link
>>         aer_root_reset
>>           pci_reset_bridge_secondary_bus                # <-- reset
>>     broadcast_error_message(..., report_resume)
>>       pci_walk_bus(..., report_resume, ...)
>>         report_resume
>>       if (cb == report_resume)
>>         pci_cleanup_aer_uncorrect_error_status
>>           pci_write_config_dword(PCI_ERR_UNCOR_STATUS)  # <-- clear 
>> status
>> 
>> After this patch, it will look like this:
>> 
>>   do_recovery
>>     do_fatal_recovery
>>       pci_cleanup_aer_uncorrect_error_status
>>         pci_write_config_dword(PCI_ERR_UNCOR_STATUS)    # <-- clear 
>> status
>>       reset_link
>>         driver->reset_link
>>           aer_root_reset
>>             pci_reset_bridge_secondary_bus              # <-- reset
>>             aer_error_resume
>>               pcie_capability_write_word(PCI_EXP_DEVSTA)        # <-- 
>> clear more
>>               pci_write_config_dword(PCI_ERR_UNCOR_STATUS)      # <-- 
>> clear status
>> 
>> So if I'm understanding correctly, the new path clears the status too
>> early, then clears it again (plus clearing DEVSTA, which we didn't do
>> before) later.
>> 
>> I would think we would want to leave aer_root_reset() alone, and just 
>> move
>> the pci_cleanup_aer_uncorrect_error_status() in do_fatal_recovery() 
>> down so
>> it happens after we call reset_link().  That way the reset/clear 
>> sequence
>> would be the same as it was before.
> 
> I've been fiddling with this a bit myself and will post the results to 
> see
> what you think.


ok so you are suggesting to move pci_cleanup_aer_uncorrect_error_status 
down which I can do.

And not to call aer_error_resume, because you think its clearing the 
status again.

following code: calls aer_error_resume.
pci_broadcast_error_message()
  /*
                  * If the error is reported by an end point, we think 
this
                  * error is related to the upstream link of the end 
point.
                  */
                 if (state == pci_channel_io_normal)
                         /*
                          * the error is non fatal so the bus is ok, just 
invoke
                          * the callback for the function that logged the 
error.
                          */
                         cb(dev, &result_data);
                 else
                         pci_walk_bus(dev->bus, cb, &result_data);


besides aer_error_resume does following things in addition to clearing 
PCI_ERR_UNCOR_STATUS

/* Clean up Root device status */
	pcie_capability_read_word(dev, PCI_EXP_DEVSTA, &reg16);
	pcie_capability_write_word(dev, PCI_EXP_DEVSTA, reg16);

if (dev->error_state == pci_channel_io_normal)
		status &= ~mask; /* Clear corresponding nonfatal bits */
	else
		status &= mask; /* Clear corresponding fatal bits */
	pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status);


so we have to have conditional call
such as
if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE)
            error_resume


so the code might look like this..

do_recovery
    do_fatal_recovery
        reset_link
          driver->reset_link
            aer_root_reset
                pci_reset_bridge_secondary_bus              # <-- reset
            if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE)
            {
                aer_error_resume
                    pcie_capability_write_word(PCI_EXP_DEVSTA)        # 
<-- clear more
                    pci_write_config_dword(PCI_ERR_UNCOR_STATUS)      # 
<--
            }
            pci_cleanup_aer_uncorrect_error_status(dev);


does it make sense ?

Regards,
Oza.
Bjorn Helgaas May 9, 2018, 11:21 p.m. UTC | #4
On Wed, May 09, 2018 at 06:44:53PM +0530, poza@codeaurora.org wrote:
> On 2018-05-09 18:37, Bjorn Helgaas wrote:
> > On Tue, May 08, 2018 at 06:53:30PM -0500, Bjorn Helgaas wrote:
> > > On Thu, May 03, 2018 at 01:03:52AM -0400, Oza Pawandeep wrote:
> > > > This patch alters the behavior of handling of ERR_FATAL, where removal
> > > > of devices is initiated, followed by reset link, followed by
> > > > re-enumeration.
> > > >
> > > > So the errors are handled in a different way as follows:
> > > > ERR_NONFATAL => call driver recovery entry points
> > > > ERR_FATAL    => remove and re-enumerate
> > > >
> > > > please refer to Documentation/PCI/pci-error-recovery.txt for more details.
> > > >
> > > > Signed-off-by: Oza Pawandeep <poza@codeaurora.org>
> > > >
> > > > diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c
> > > > index 779b387..206f590 100644
> > > > --- a/drivers/pci/pcie/aer/aerdrv.c
> > > > +++ b/drivers/pci/pcie/aer/aerdrv.c
> > > > @@ -330,6 +330,13 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
> > > >         reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
> > > >         pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32);
> > > >
> > > > +       /*
> > > > +        * This function is called only on ERR_FATAL now, and since
> > > > +        * the pci_report_resume is called only in ERR_NONFATAL case,
> > > > +        * the clearing part has to be taken care here.
> > > > +        */
> > > > +       aer_error_resume(dev);
> > > 
> > > I don't understand this part.  Previously the ERR_FATAL path looked
> > > like
> > > this:
> > > 
> > >   do_recovery
> > >     reset_link
> > >       driver->reset_link
> > >         aer_root_reset
> > >           pci_reset_bridge_secondary_bus                # <-- reset
> > >     broadcast_error_message(..., report_resume)
> > >       pci_walk_bus(..., report_resume, ...)
> > >         report_resume
> > >       if (cb == report_resume)
> > >         pci_cleanup_aer_uncorrect_error_status
> > >           pci_write_config_dword(PCI_ERR_UNCOR_STATUS)  # <-- clear
> > > status
> > > 
> > > After this patch, it will look like this:
> > > 
> > >   do_recovery
> > >     do_fatal_recovery
> > >       pci_cleanup_aer_uncorrect_error_status
> > >         pci_write_config_dword(PCI_ERR_UNCOR_STATUS)    # <-- clear
> > > status
> > >       reset_link
> > >         driver->reset_link
> > >           aer_root_reset
> > >             pci_reset_bridge_secondary_bus              # <-- reset
> > >             aer_error_resume
> > >               pcie_capability_write_word(PCI_EXP_DEVSTA)        #
> > > <-- clear more
> > >               pci_write_config_dword(PCI_ERR_UNCOR_STATUS)      #
> > > <-- clear status
> > > 
> > > So if I'm understanding correctly, the new path clears the status too
> > > early, then clears it again (plus clearing DEVSTA, which we didn't do
> > > before) later.
> > > 
> > > I would think we would want to leave aer_root_reset() alone, and
> > > just move
> > > the pci_cleanup_aer_uncorrect_error_status() in do_fatal_recovery()
> > > down so
> > > it happens after we call reset_link().  That way the reset/clear
> > > sequence
> > > would be the same as it was before.
> > 
> > I've been fiddling with this a bit myself and will post the results to
> > see
> > what you think.
> 
> 
> ok so you are suggesting to move pci_cleanup_aer_uncorrect_error_status down
> which I can do.
> 
> And not to call aer_error_resume, because you think its clearing the status
> again.
> 
> following code: calls aer_error_resume.
> pci_broadcast_error_message()
>  /*
>                  * If the error is reported by an end point, we think this
>                  * error is related to the upstream link of the end point.
>                  */
>                 if (state == pci_channel_io_normal)
>                         /*
>                          * the error is non fatal so the bus is ok, just
> invoke
>                          * the callback for the function that logged the
> error.
>                          */
>                         cb(dev, &result_data);
>                 else
>                         pci_walk_bus(dev->bus, cb, &result_data);

Holy crap, I thought this could not possibly get any more complicated,
but you're right; we do actually call aer_error_resume() today via an
extremely convoluted path:

  do_recovery(pci_dev)
    broadcast_error_message(..., error_detected, ...)
    if (AER_FATAL)
      reset_link(pci_dev)
        udev = BRIDGE ? pci_dev : pci_dev->bus->self
        driver->reset_link(udev)
          aer_root_reset(udev)
    if (CAN_RECOVER)
      broadcast_error_message(..., mmio_enabled, ...)
    if (NEED_RESET)
      broadcast_error_message(..., slot_reset, ...)
    broadcast_error_message(dev, ..., report_resume, ...)
      if (BRIDGE)
        report_resume
          driver->resume
            pcie_portdrv_err_resume
              device_for_each_child(..., resume_iter)
                resume_iter
                  driver->error_resume
                    aer_error_resume
        pci_cleanup_aer_uncorrect_error_status(pci_dev)       # only if BRIDGE
          pci_write_config_dword(PCI_ERR_UNCOR_STATUS)

aerdriver is the only port service driver that implements
.error_resume(), and aerdriver only binds to root ports.  I can't
really believe all these device_for_each_child()/resume_iter()
gyrations are necessary when this is AER code calling AER code.

Bjorn
Oza Pawandeep May 10, 2018, 7:01 a.m. UTC | #5
On 2018-05-10 04:51, Bjorn Helgaas wrote:
> On Wed, May 09, 2018 at 06:44:53PM +0530, poza@codeaurora.org wrote:
>> On 2018-05-09 18:37, Bjorn Helgaas wrote:
>> > On Tue, May 08, 2018 at 06:53:30PM -0500, Bjorn Helgaas wrote:
>> > > On Thu, May 03, 2018 at 01:03:52AM -0400, Oza Pawandeep wrote:
>> > > > This patch alters the behavior of handling of ERR_FATAL, where removal
>> > > > of devices is initiated, followed by reset link, followed by
>> > > > re-enumeration.
>> > > >
>> > > > So the errors are handled in a different way as follows:
>> > > > ERR_NONFATAL => call driver recovery entry points
>> > > > ERR_FATAL    => remove and re-enumerate
>> > > >
>> > > > please refer to Documentation/PCI/pci-error-recovery.txt for more details.
>> > > >
>> > > > Signed-off-by: Oza Pawandeep <poza@codeaurora.org>
>> > > >
>> > > > diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c
>> > > > index 779b387..206f590 100644
>> > > > --- a/drivers/pci/pcie/aer/aerdrv.c
>> > > > +++ b/drivers/pci/pcie/aer/aerdrv.c
>> > > > @@ -330,6 +330,13 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
>> > > >         reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
>> > > >         pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32);
>> > > >
>> > > > +       /*
>> > > > +        * This function is called only on ERR_FATAL now, and since
>> > > > +        * the pci_report_resume is called only in ERR_NONFATAL case,
>> > > > +        * the clearing part has to be taken care here.
>> > > > +        */
>> > > > +       aer_error_resume(dev);
>> > >
>> > > I don't understand this part.  Previously the ERR_FATAL path looked
>> > > like
>> > > this:
>> > >
>> > >   do_recovery
>> > >     reset_link
>> > >       driver->reset_link
>> > >         aer_root_reset
>> > >           pci_reset_bridge_secondary_bus                # <-- reset
>> > >     broadcast_error_message(..., report_resume)
>> > >       pci_walk_bus(..., report_resume, ...)
>> > >         report_resume
>> > >       if (cb == report_resume)
>> > >         pci_cleanup_aer_uncorrect_error_status
>> > >           pci_write_config_dword(PCI_ERR_UNCOR_STATUS)  # <-- clear
>> > > status
>> > >
>> > > After this patch, it will look like this:
>> > >
>> > >   do_recovery
>> > >     do_fatal_recovery
>> > >       pci_cleanup_aer_uncorrect_error_status
>> > >         pci_write_config_dword(PCI_ERR_UNCOR_STATUS)    # <-- clear
>> > > status
>> > >       reset_link
>> > >         driver->reset_link
>> > >           aer_root_reset
>> > >             pci_reset_bridge_secondary_bus              # <-- reset
>> > >             aer_error_resume
>> > >               pcie_capability_write_word(PCI_EXP_DEVSTA)        #
>> > > <-- clear more
>> > >               pci_write_config_dword(PCI_ERR_UNCOR_STATUS)      #
>> > > <-- clear status
>> > >
>> > > So if I'm understanding correctly, the new path clears the status too
>> > > early, then clears it again (plus clearing DEVSTA, which we didn't do
>> > > before) later.
>> > >
>> > > I would think we would want to leave aer_root_reset() alone, and
>> > > just move
>> > > the pci_cleanup_aer_uncorrect_error_status() in do_fatal_recovery()
>> > > down so
>> > > it happens after we call reset_link().  That way the reset/clear
>> > > sequence
>> > > would be the same as it was before.
>> >
>> > I've been fiddling with this a bit myself and will post the results to
>> > see
>> > what you think.
>> 
>> 
>> ok so you are suggesting to move 
>> pci_cleanup_aer_uncorrect_error_status down
>> which I can do.
>> 
>> And not to call aer_error_resume, because you think its clearing the 
>> status
>> again.
>> 
>> following code: calls aer_error_resume.
>> pci_broadcast_error_message()
>>  /*
>>                  * If the error is reported by an end point, we think 
>> this
>>                  * error is related to the upstream link of the end 
>> point.
>>                  */
>>                 if (state == pci_channel_io_normal)
>>                         /*
>>                          * the error is non fatal so the bus is ok, 
>> just
>> invoke
>>                          * the callback for the function that logged 
>> the
>> error.
>>                          */
>>                         cb(dev, &result_data);
>>                 else
>>                         pci_walk_bus(dev->bus, cb, &result_data);
> 
> Holy crap, I thought this could not possibly get any more complicated,
> but you're right; we do actually call aer_error_resume() today via an
> extremely convoluted path:
> 
>   do_recovery(pci_dev)
>     broadcast_error_message(..., error_detected, ...)
>     if (AER_FATAL)
>       reset_link(pci_dev)
>         udev = BRIDGE ? pci_dev : pci_dev->bus->self
>         driver->reset_link(udev)
>           aer_root_reset(udev)
>     if (CAN_RECOVER)
>       broadcast_error_message(..., mmio_enabled, ...)
>     if (NEED_RESET)
>       broadcast_error_message(..., slot_reset, ...)
>     broadcast_error_message(dev, ..., report_resume, ...)
>       if (BRIDGE)
>         report_resume
>           driver->resume
>             pcie_portdrv_err_resume
>               device_for_each_child(..., resume_iter)
>                 resume_iter
>                   driver->error_resume
>                     aer_error_resume
>         pci_cleanup_aer_uncorrect_error_status(pci_dev)       # only if 
> BRIDGE
>           pci_write_config_dword(PCI_ERR_UNCOR_STATUS)
> 
> aerdriver is the only port service driver that implements
> .error_resume(), and aerdriver only binds to root ports.  I can't
> really believe all these device_for_each_child()/resume_iter()
> gyrations are necessary when this is AER code calling AER code.
> 
> Bjorn

here is the code of do_fatal_recovery, where I have moved the things 
down and doing only if it is bridge.
let me know how this looks to you, so then I can post v16.


static pci_ers_result_t do_fatal_recovery(struct pci_dev *dev, int 
severity)
{
         struct pci_dev *udev;
         struct pci_bus *parent;
         struct pci_dev *pdev, *temp;
         struct aer_broadcast_data result_data;
         pci_ers_result_t result = PCI_ERS_RESULT_RECOVERED;


         if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
                 udev = dev;
         else
                 udev = dev->bus->self;

         parent = udev->subordinate;
         pci_lock_rescan_remove();
         list_for_each_entry_safe_reverse(pdev, temp, &parent->devices,
                                  bus_list) {
                 pci_dev_get(pdev);
                 pci_dev_set_disconnected(pdev, NULL);
                 if (pci_has_subordinate(pdev))
                         pci_walk_bus(pdev->subordinate,
                                      pci_dev_set_disconnected, NULL);
                 pci_stop_and_remove_bus_device(pdev);
                 pci_dev_put(pdev);
         }

         result = reset_link(udev, severity);
         if (severity == AER_FATAL && dev->hdr_type == 
PCI_HEADER_TYPE_BRIDGE) {
                 pci_walk_bus(dev->subordinate, report_resume, 
&result_data);
                 pci_cleanup_aer_uncorrect_error_status(dev);
                 dev->error_state = pci_channel_io_normal;
         }
         if (result == PCI_ERS_RESULT_RECOVERED)
                 if (pcie_wait_for_link(udev, true))
                         pci_rescan_bus(udev->bus);

         pci_unlock_rescan_remove();

         return result;
}

Regards,
Oza.
Bjorn Helgaas May 10, 2018, 1:10 p.m. UTC | #6
On Thu, May 10, 2018 at 12:31:16PM +0530, poza@codeaurora.org wrote:
> On 2018-05-10 04:51, Bjorn Helgaas wrote:
> > On Wed, May 09, 2018 at 06:44:53PM +0530, poza@codeaurora.org wrote:
> > > On 2018-05-09 18:37, Bjorn Helgaas wrote:
> > > > On Tue, May 08, 2018 at 06:53:30PM -0500, Bjorn Helgaas wrote:
> > > > > On Thu, May 03, 2018 at 01:03:52AM -0400, Oza Pawandeep wrote:
> > > > > > This patch alters the behavior of handling of ERR_FATAL, where removal
> > > > > > of devices is initiated, followed by reset link, followed by
> > > > > > re-enumeration.
> > > > > >
> > > > > > So the errors are handled in a different way as follows:
> > > > > > ERR_NONFATAL => call driver recovery entry points
> > > > > > ERR_FATAL    => remove and re-enumerate
> > > > > >
> > > > > > please refer to Documentation/PCI/pci-error-recovery.txt for more details.
> > > > > >
> > > > > > Signed-off-by: Oza Pawandeep <poza@codeaurora.org>
> > > > > >
> > > > > > diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c
> > > > > > index 779b387..206f590 100644
> > > > > > --- a/drivers/pci/pcie/aer/aerdrv.c
> > > > > > +++ b/drivers/pci/pcie/aer/aerdrv.c
> > > > > > @@ -330,6 +330,13 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
> > > > > >         reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
> > > > > >         pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32);
> > > > > >
> > > > > > +       /*
> > > > > > +        * This function is called only on ERR_FATAL now, and since
> > > > > > +        * the pci_report_resume is called only in ERR_NONFATAL case,
> > > > > > +        * the clearing part has to be taken care here.
> > > > > > +        */
> > > > > > +       aer_error_resume(dev);
> > > > >
> > > > > I don't understand this part.  Previously the ERR_FATAL path looked
> > > > > like
> > > > > this:
> > > > >
> > > > >   do_recovery
> > > > >     reset_link
> > > > >       driver->reset_link
> > > > >         aer_root_reset
> > > > >           pci_reset_bridge_secondary_bus                # <-- reset
> > > > >     broadcast_error_message(..., report_resume)
> > > > >       pci_walk_bus(..., report_resume, ...)
> > > > >         report_resume
> > > > >       if (cb == report_resume)
> > > > >         pci_cleanup_aer_uncorrect_error_status
> > > > >           pci_write_config_dword(PCI_ERR_UNCOR_STATUS)  # <-- clear
> > > > > status
> > > > >
> > > > > After this patch, it will look like this:
> > > > >
> > > > >   do_recovery
> > > > >     do_fatal_recovery
> > > > >       pci_cleanup_aer_uncorrect_error_status
> > > > >         pci_write_config_dword(PCI_ERR_UNCOR_STATUS)    # <-- clear
> > > > > status
> > > > >       reset_link
> > > > >         driver->reset_link
> > > > >           aer_root_reset
> > > > >             pci_reset_bridge_secondary_bus              # <-- reset
> > > > >             aer_error_resume
> > > > >               pcie_capability_write_word(PCI_EXP_DEVSTA)        #
> > > > > <-- clear more
> > > > >               pci_write_config_dword(PCI_ERR_UNCOR_STATUS)      #
> > > > > <-- clear status
> > > > >
> > > > > So if I'm understanding correctly, the new path clears the status too
> > > > > early, then clears it again (plus clearing DEVSTA, which we didn't do
> > > > > before) later.
> > > > >
> > > > > I would think we would want to leave aer_root_reset() alone, and
> > > > > just move
> > > > > the pci_cleanup_aer_uncorrect_error_status() in do_fatal_recovery()
> > > > > down so
> > > > > it happens after we call reset_link().  That way the reset/clear
> > > > > sequence
> > > > > would be the same as it was before.
> > > >
> > > > I've been fiddling with this a bit myself and will post the results to
> > > > see
> > > > what you think.
> > > 
> > > 
> > > ok so you are suggesting to move
> > > pci_cleanup_aer_uncorrect_error_status down
> > > which I can do.
> > > 
> > > And not to call aer_error_resume, because you think its clearing the
> > > status
> > > again.
> > > 
> > > following code: calls aer_error_resume.
> > > pci_broadcast_error_message()
> > >  /*
> > >                  * If the error is reported by an end point, we
> > > think this
> > >                  * error is related to the upstream link of the end
> > > point.
> > >                  */
> > >                 if (state == pci_channel_io_normal)
> > >                         /*
> > >                          * the error is non fatal so the bus is ok,
> > > just
> > > invoke
> > >                          * the callback for the function that logged
> > > the
> > > error.
> > >                          */
> > >                         cb(dev, &result_data);
> > >                 else
> > >                         pci_walk_bus(dev->bus, cb, &result_data);
> > 
> > Holy crap, I thought this could not possibly get any more complicated,
> > but you're right; we do actually call aer_error_resume() today via an
> > extremely convoluted path:
> > 
> >   do_recovery(pci_dev)
> >     broadcast_error_message(..., error_detected, ...)
> >     if (AER_FATAL)
> >       reset_link(pci_dev)
> >         udev = BRIDGE ? pci_dev : pci_dev->bus->self
> >         driver->reset_link(udev)
> >           aer_root_reset(udev)
> >     if (CAN_RECOVER)
> >       broadcast_error_message(..., mmio_enabled, ...)
> >     if (NEED_RESET)
> >       broadcast_error_message(..., slot_reset, ...)
> >     broadcast_error_message(dev, ..., report_resume, ...)
> >       if (BRIDGE)
> >         report_resume
> >           driver->resume
> >             pcie_portdrv_err_resume
> >               device_for_each_child(..., resume_iter)
> >                 resume_iter
> >                   driver->error_resume
> >                     aer_error_resume
> >         pci_cleanup_aer_uncorrect_error_status(pci_dev)       # only if
> > BRIDGE
> >           pci_write_config_dword(PCI_ERR_UNCOR_STATUS)
> > 
> > aerdriver is the only port service driver that implements
> > .error_resume(), and aerdriver only binds to root ports.  I can't
> > really believe all these device_for_each_child()/resume_iter()
> > gyrations are necessary when this is AER code calling AER code.
> > 
> > Bjorn
> 
> here is the code of do_fatal_recovery, where I have moved the things down
> and doing only if it is bridge.
> let me know how this looks to you, so then I can post v16.

This looks superficially OK.  It is very difficult for me to verify that
the behavior is equivalent to the current code, but that's not your fault;
it's just a consequence of the existing design.

I have a couple trivial comments elsewhere, and I'll respond to those
patches individually.

> static pci_ers_result_t do_fatal_recovery(struct pci_dev *dev, int severity)
> {
>         struct pci_dev *udev;
>         struct pci_bus *parent;
>         struct pci_dev *pdev, *temp;
>         struct aer_broadcast_data result_data;
>         pci_ers_result_t result = PCI_ERS_RESULT_RECOVERED;
> 
> 
>         if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
>                 udev = dev;
>         else
>                 udev = dev->bus->self;
> 
>         parent = udev->subordinate;
>         pci_lock_rescan_remove();
>         list_for_each_entry_safe_reverse(pdev, temp, &parent->devices,
>                                  bus_list) {
>                 pci_dev_get(pdev);
>                 pci_dev_set_disconnected(pdev, NULL);
>                 if (pci_has_subordinate(pdev))
>                         pci_walk_bus(pdev->subordinate,
>                                      pci_dev_set_disconnected, NULL);
>                 pci_stop_and_remove_bus_device(pdev);
>                 pci_dev_put(pdev);
>         }
> 
>         result = reset_link(udev, severity);
>         if (severity == AER_FATAL && dev->hdr_type ==
> PCI_HEADER_TYPE_BRIDGE) {
>                 pci_walk_bus(dev->subordinate, report_resume, &result_data);
>                 pci_cleanup_aer_uncorrect_error_status(dev);
>                 dev->error_state = pci_channel_io_normal;
>         }
>         if (result == PCI_ERS_RESULT_RECOVERED)
>                 if (pcie_wait_for_link(udev, true))
>                         pci_rescan_bus(udev->bus);
> 
>         pci_unlock_rescan_remove();
> 
>         return result;
> }
> 
> Regards,
> Oza.
> 
>
Sinan Kaya May 10, 2018, 1:15 p.m. UTC | #7
On 2018-05-10 14:10, Bjorn Helgaas wrote:
> On Thu, May 10, 2018 at 12:31:16PM +0530, poza@codeaurora.org wrote:
>> On 2018-05-10 04:51, Bjorn Helgaas wrote:
>> > On Wed, May 09, 2018 at 06:44:53PM +0530, poza@codeaurora.org wrote:
>> > > On 2018-05-09 18:37, Bjorn Helgaas wrote:
>> > > > On Tue, May 08, 2018 at 06:53:30PM -0500, Bjorn Helgaas wrote:
>> > > > > On Thu, May 03, 2018 at 01:03:52AM -0400, Oza Pawandeep wrote:
>> > > > > > This patch alters the behavior of handling of ERR_FATAL, where removal
>> > > > > > of devices is initiated, followed by reset link, followed by
>> > > > > > re-enumeration.
>> > > > > >
>> > > > > > So the errors are handled in a different way as follows:
>> > > > > > ERR_NONFATAL => call driver recovery entry points
>> > > > > > ERR_FATAL    => remove and re-enumerate
>> > > > > >
>> > > > > > please refer to Documentation/PCI/pci-error-recovery.txt for more details.
>> > > > > >
>> > > > > > Signed-off-by: Oza Pawandeep <poza@codeaurora.org>
>> > > > > >
>> > > > > > diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c
>> > > > > > index 779b387..206f590 100644
>> > > > > > --- a/drivers/pci/pcie/aer/aerdrv.c
>> > > > > > +++ b/drivers/pci/pcie/aer/aerdrv.c
>> > > > > > @@ -330,6 +330,13 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
>> > > > > >         reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
>> > > > > >         pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32);
>> > > > > >
>> > > > > > +       /*
>> > > > > > +        * This function is called only on ERR_FATAL now, and since
>> > > > > > +        * the pci_report_resume is called only in ERR_NONFATAL case,
>> > > > > > +        * the clearing part has to be taken care here.
>> > > > > > +        */
>> > > > > > +       aer_error_resume(dev);
>> > > > >
>> > > > > I don't understand this part.  Previously the ERR_FATAL path looked
>> > > > > like
>> > > > > this:
>> > > > >
>> > > > >   do_recovery
>> > > > >     reset_link
>> > > > >       driver->reset_link
>> > > > >         aer_root_reset
>> > > > >           pci_reset_bridge_secondary_bus                # <-- reset
>> > > > >     broadcast_error_message(..., report_resume)
>> > > > >       pci_walk_bus(..., report_resume, ...)
>> > > > >         report_resume
>> > > > >       if (cb == report_resume)
>> > > > >         pci_cleanup_aer_uncorrect_error_status
>> > > > >           pci_write_config_dword(PCI_ERR_UNCOR_STATUS)  # <-- clear
>> > > > > status
>> > > > >
>> > > > > After this patch, it will look like this:
>> > > > >
>> > > > >   do_recovery
>> > > > >     do_fatal_recovery
>> > > > >       pci_cleanup_aer_uncorrect_error_status
>> > > > >         pci_write_config_dword(PCI_ERR_UNCOR_STATUS)    # <-- clear
>> > > > > status
>> > > > >       reset_link
>> > > > >         driver->reset_link
>> > > > >           aer_root_reset
>> > > > >             pci_reset_bridge_secondary_bus              # <-- reset
>> > > > >             aer_error_resume
>> > > > >               pcie_capability_write_word(PCI_EXP_DEVSTA)        #
>> > > > > <-- clear more
>> > > > >               pci_write_config_dword(PCI_ERR_UNCOR_STATUS)      #
>> > > > > <-- clear status
>> > > > >
>> > > > > So if I'm understanding correctly, the new path clears the status too
>> > > > > early, then clears it again (plus clearing DEVSTA, which we didn't do
>> > > > > before) later.
>> > > > >
>> > > > > I would think we would want to leave aer_root_reset() alone, and
>> > > > > just move
>> > > > > the pci_cleanup_aer_uncorrect_error_status() in do_fatal_recovery()
>> > > > > down so
>> > > > > it happens after we call reset_link().  That way the reset/clear
>> > > > > sequence
>> > > > > would be the same as it was before.
>> > > >
>> > > > I've been fiddling with this a bit myself and will post the results to
>> > > > see
>> > > > what you think.
>> > >
>> > >
>> > > ok so you are suggesting to move
>> > > pci_cleanup_aer_uncorrect_error_status down
>> > > which I can do.
>> > >
>> > > And not to call aer_error_resume, because you think its clearing the
>> > > status
>> > > again.
>> > >
>> > > following code: calls aer_error_resume.
>> > > pci_broadcast_error_message()
>> > >  /*
>> > >                  * If the error is reported by an end point, we
>> > > think this
>> > >                  * error is related to the upstream link of the end
>> > > point.
>> > >                  */
>> > >                 if (state == pci_channel_io_normal)
>> > >                         /*
>> > >                          * the error is non fatal so the bus is ok,
>> > > just
>> > > invoke
>> > >                          * the callback for the function that logged
>> > > the
>> > > error.
>> > >                          */
>> > >                         cb(dev, &result_data);
>> > >                 else
>> > >                         pci_walk_bus(dev->bus, cb, &result_data);
>> >
>> > Holy crap, I thought this could not possibly get any more complicated,
>> > but you're right; we do actually call aer_error_resume() today via an
>> > extremely convoluted path:
>> >
>> >   do_recovery(pci_dev)
>> >     broadcast_error_message(..., error_detected, ...)
>> >     if (AER_FATAL)
>> >       reset_link(pci_dev)
>> >         udev = BRIDGE ? pci_dev : pci_dev->bus->self
>> >         driver->reset_link(udev)
>> >           aer_root_reset(udev)
>> >     if (CAN_RECOVER)
>> >       broadcast_error_message(..., mmio_enabled, ...)
>> >     if (NEED_RESET)
>> >       broadcast_error_message(..., slot_reset, ...)
>> >     broadcast_error_message(dev, ..., report_resume, ...)
>> >       if (BRIDGE)
>> >         report_resume
>> >           driver->resume
>> >             pcie_portdrv_err_resume
>> >               device_for_each_child(..., resume_iter)
>> >                 resume_iter
>> >                   driver->error_resume
>> >                     aer_error_resume
>> >         pci_cleanup_aer_uncorrect_error_status(pci_dev)       # only if
>> > BRIDGE
>> >           pci_write_config_dword(PCI_ERR_UNCOR_STATUS)
>> >
>> > aerdriver is the only port service driver that implements
>> > .error_resume(), and aerdriver only binds to root ports.  I can't
>> > really believe all these device_for_each_child()/resume_iter()
>> > gyrations are necessary when this is AER code calling AER code.
>> >
>> > Bjorn
>> 
>> here is the code of do_fatal_recovery, where I have moved the things 
>> down
>> and doing only if it is bridge.
>> let me know how this looks to you, so then I can post v16.
> 
> This looks superficially OK.  It is very difficult for me to verify 
> that
> the behavior is equivalent to the current code, but that's not your 
> fault;
> it's just a consequence of the existing design.
> 
> I have a couple trivial comments elsewhere, and I'll respond to those
> patches individually.
> 
>> static pci_ers_result_t do_fatal_recovery(struct pci_dev *dev, int 
>> severity)
>> {
>>         struct pci_dev *udev;
>>         struct pci_bus *parent;
>>         struct pci_dev *pdev, *temp;
>>         struct aer_broadcast_data result_data;
>>         pci_ers_result_t result = PCI_ERS_RESULT_RECOVERED;
>> 
>> 
>>         if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
>>                 udev = dev;
>>         else
>>                 udev = dev->bus->self;
>> 
>>         parent = udev->subordinate;
>>         pci_lock_rescan_remove();
>>         list_for_each_entry_safe_reverse(pdev, temp, &parent->devices,
>>                                  bus_list) {
>>                 pci_dev_get(pdev);
>>                 pci_dev_set_disconnected(pdev, NULL);
>>                 if (pci_has_subordinate(pdev))
>>                         pci_walk_bus(pdev->subordinate,
>>                                      pci_dev_set_disconnected, NULL);
>>                 pci_stop_and_remove_bus_device(pdev);
>>                 pci_dev_put(pdev);
>>         }
>> 
>>         result = reset_link(udev, severity);
>>         if (severity == AER_FATAL && dev->hdr_type ==
>> PCI_HEADER_TYPE_BRIDGE) {
>>                 pci_walk_bus(dev->subordinate, report_resume, 
>> &result_data);

Why are we calling resume?

>>                 pci_cleanup_aer_uncorrect_error_status(dev);
>>                 dev->error_state = pci_channel_io_normal;
>>         }
>>         if (result == PCI_ERS_RESULT_RECOVERED)
>>                 if (pcie_wait_for_link(udev, true))
>>                         pci_rescan_bus(udev->bus);
>> 
>>         pci_unlock_rescan_remove();
>> 
>>         return result;
>> }
>> 
>> Regards,
>> Oza.
>> 
>>
Bjorn Helgaas May 10, 2018, 1:17 p.m. UTC | #8
On Thu, May 03, 2018 at 01:03:52AM -0400, Oza Pawandeep wrote:
> This patch alters the behavior of handling of ERR_FATAL, where removal
> of devices is initiated, followed by reset link, followed by
> re-enumeration.
> 
> So the errors are handled in a different way as follows:
> ERR_NONFATAL => call driver recovery entry points
> ERR_FATAL    => remove and re-enumerate
> 
> please refer to Documentation/PCI/pci-error-recovery.txt for more details.
> 
> Signed-off-by: Oza Pawandeep <poza@codeaurora.org>
> 
> diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c
> index 779b387..206f590 100644
> --- a/drivers/pci/pcie/aer/aerdrv.c
> +++ b/drivers/pci/pcie/aer/aerdrv.c
> @@ -330,6 +330,13 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
>  	reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
>  	pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32);
>  
> +	/*
> +	 * This function is called only on ERR_FATAL now, and since
> +	 * the pci_report_resume is called only in ERR_NONFATAL case,
> +	 * the clearing part has to be taken care here.
> +	 */
> +	aer_error_resume(dev);
> +
>  	return PCI_ERS_RESULT_RECOVERED;
>  }
>  
> diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
> index 0ea5acc..655d4e8 100644
> --- a/drivers/pci/pcie/aer/aerdrv_core.c
> +++ b/drivers/pci/pcie/aer/aerdrv_core.c
> @@ -20,6 +20,7 @@
>  #include <linux/slab.h>
>  #include <linux/kfifo.h>
>  #include "aerdrv.h"
> +#include "../../pci.h"
>  
>  #define	PCI_EXP_AER_FLAGS	(PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \
>  				 PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE)
> @@ -474,6 +475,44 @@ static pci_ers_result_t reset_link(struct pci_dev *dev)
>  	return status;
>  }
>  
> +static pci_ers_result_t do_fatal_recovery(struct pci_dev *dev, int severity)

Here's a possiblity for your consideration.  Expose these two interfaces:

  void pcie_do_nonfatal_recovery(struct pci_dev *dev);
  void pcie_do_fatal_recovery(struct pci_dev *dev, u32 service);

(this would be the end result, after the rename and move to err.c) and move
the fatal/nonfatal testing into the callers, e..g, 

  handle_error_source(...)
  {
    ...
    if (info->severity == AER_NONFATAL)
      pcie_do_nonfatal_recovery(dev);
    else if (info->severity == AER_FATAL)
      pcie_do_fatal_recovery(dev, PCIE_PORT_SERVICE_AER);
  }

Then I don't think you would need this code in reset_link():

  reset_link(...)
  {
    ...
    if (severity == DPC_FATAL)
      service = PCIE_PORT_SERVICE_DPC;
    ...

because you would already have the service.

> +{
> +	struct pci_dev *udev;
> +	struct pci_bus *parent;
> +	struct pci_dev *pdev, *temp;
> +	pci_ers_result_t result = PCI_ERS_RESULT_RECOVERED;
> +
> +	if (severity == AER_FATAL)
> +		pci_cleanup_aer_uncorrect_error_status(dev);
> +
> +	if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
> +		udev = dev;
> +	else
> +		udev = dev->bus->self;
> +
> +	parent = udev->subordinate;
> +	pci_lock_rescan_remove();
> +	list_for_each_entry_safe_reverse(pdev, temp, &parent->devices,
> +				 bus_list) {
> +		pci_dev_get(pdev);
> +		pci_dev_set_disconnected(pdev, NULL);
> +		if (pci_has_subordinate(pdev))
> +			pci_walk_bus(pdev->subordinate,
> +				     pci_dev_set_disconnected, NULL);
> +		pci_stop_and_remove_bus_device(pdev);
> +		pci_dev_put(pdev);
> +	}
> +
> +	result = reset_link(udev);
> +	if (result == PCI_ERS_RESULT_RECOVERED)
> +		if (pcie_wait_for_link(udev, true))
> +			pci_rescan_bus(udev->bus);
> +
> +	pci_unlock_rescan_remove();
> +
> +	return result;
> +}
> +
>  /**
>   * do_recovery - handle nonfatal/fatal error recovery process
>   * @dev: pointer to a pci_dev data structure of agent detecting an error
> @@ -485,11 +524,15 @@ static pci_ers_result_t reset_link(struct pci_dev *dev)
>   */
>  static void do_recovery(struct pci_dev *dev, int severity)
>  {
> -	pci_ers_result_t status, result = PCI_ERS_RESULT_RECOVERED;
> +	pci_ers_result_t status;
>  	enum pci_channel_state state;
>  
> -	if (severity == AER_FATAL)
> -		state = pci_channel_io_frozen;
> +	if (severity == AER_FATAL) {
> +		status = do_fatal_recovery(dev, severity);
> +		if (status != PCI_ERS_RESULT_RECOVERED)
> +			goto failed;
> +		return;
> +	}
>  	else
>  		state = pci_channel_io_normal;
>  
> @@ -498,12 +541,6 @@ static void do_recovery(struct pci_dev *dev, int severity)
>  			"error_detected",
>  			report_error_detected);
>  
> -	if (severity == AER_FATAL) {
> -		result = reset_link(dev);
> -		if (result != PCI_ERS_RESULT_RECOVERED)
> -			goto failed;
> -	}
> -
>  	if (status == PCI_ERS_RESULT_CAN_RECOVER)
>  		status = broadcast_error_message(dev,
>  				state,
> -- 
> 2.7.4
>
Oza Pawandeep May 10, 2018, 2:18 p.m. UTC | #9
On 2018-05-10 18:45, okaya@codeaurora.org wrote:
> On 2018-05-10 14:10, Bjorn Helgaas wrote:
>> On Thu, May 10, 2018 at 12:31:16PM +0530, poza@codeaurora.org wrote:
>>> On 2018-05-10 04:51, Bjorn Helgaas wrote:
>>> > On Wed, May 09, 2018 at 06:44:53PM +0530, poza@codeaurora.org wrote:
>>> > > On 2018-05-09 18:37, Bjorn Helgaas wrote:
>>> > > > On Tue, May 08, 2018 at 06:53:30PM -0500, Bjorn Helgaas wrote:
>>> > > > > On Thu, May 03, 2018 at 01:03:52AM -0400, Oza Pawandeep wrote:
>>> > > > > > This patch alters the behavior of handling of ERR_FATAL, where removal
>>> > > > > > of devices is initiated, followed by reset link, followed by
>>> > > > > > re-enumeration.
>>> > > > > >
>>> > > > > > So the errors are handled in a different way as follows:
>>> > > > > > ERR_NONFATAL => call driver recovery entry points
>>> > > > > > ERR_FATAL    => remove and re-enumerate
>>> > > > > >
>>> > > > > > please refer to Documentation/PCI/pci-error-recovery.txt for more details.
>>> > > > > >
>>> > > > > > Signed-off-by: Oza Pawandeep <poza@codeaurora.org>
>>> > > > > >
>>> > > > > > diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c
>>> > > > > > index 779b387..206f590 100644
>>> > > > > > --- a/drivers/pci/pcie/aer/aerdrv.c
>>> > > > > > +++ b/drivers/pci/pcie/aer/aerdrv.c
>>> > > > > > @@ -330,6 +330,13 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
>>> > > > > >         reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
>>> > > > > >         pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32);
>>> > > > > >
>>> > > > > > +       /*
>>> > > > > > +        * This function is called only on ERR_FATAL now, and since
>>> > > > > > +        * the pci_report_resume is called only in ERR_NONFATAL case,
>>> > > > > > +        * the clearing part has to be taken care here.
>>> > > > > > +        */
>>> > > > > > +       aer_error_resume(dev);
>>> > > > >
>>> > > > > I don't understand this part.  Previously the ERR_FATAL path looked
>>> > > > > like
>>> > > > > this:
>>> > > > >
>>> > > > >   do_recovery
>>> > > > >     reset_link
>>> > > > >       driver->reset_link
>>> > > > >         aer_root_reset
>>> > > > >           pci_reset_bridge_secondary_bus                # <-- reset
>>> > > > >     broadcast_error_message(..., report_resume)
>>> > > > >       pci_walk_bus(..., report_resume, ...)
>>> > > > >         report_resume
>>> > > > >       if (cb == report_resume)
>>> > > > >         pci_cleanup_aer_uncorrect_error_status
>>> > > > >           pci_write_config_dword(PCI_ERR_UNCOR_STATUS)  # <-- clear
>>> > > > > status
>>> > > > >
>>> > > > > After this patch, it will look like this:
>>> > > > >
>>> > > > >   do_recovery
>>> > > > >     do_fatal_recovery
>>> > > > >       pci_cleanup_aer_uncorrect_error_status
>>> > > > >         pci_write_config_dword(PCI_ERR_UNCOR_STATUS)    # <-- clear
>>> > > > > status
>>> > > > >       reset_link
>>> > > > >         driver->reset_link
>>> > > > >           aer_root_reset
>>> > > > >             pci_reset_bridge_secondary_bus              # <-- reset
>>> > > > >             aer_error_resume
>>> > > > >               pcie_capability_write_word(PCI_EXP_DEVSTA)        #
>>> > > > > <-- clear more
>>> > > > >               pci_write_config_dword(PCI_ERR_UNCOR_STATUS)      #
>>> > > > > <-- clear status
>>> > > > >
>>> > > > > So if I'm understanding correctly, the new path clears the status too
>>> > > > > early, then clears it again (plus clearing DEVSTA, which we didn't do
>>> > > > > before) later.
>>> > > > >
>>> > > > > I would think we would want to leave aer_root_reset() alone, and
>>> > > > > just move
>>> > > > > the pci_cleanup_aer_uncorrect_error_status() in do_fatal_recovery()
>>> > > > > down so
>>> > > > > it happens after we call reset_link().  That way the reset/clear
>>> > > > > sequence
>>> > > > > would be the same as it was before.
>>> > > >
>>> > > > I've been fiddling with this a bit myself and will post the results to
>>> > > > see
>>> > > > what you think.
>>> > >
>>> > >
>>> > > ok so you are suggesting to move
>>> > > pci_cleanup_aer_uncorrect_error_status down
>>> > > which I can do.
>>> > >
>>> > > And not to call aer_error_resume, because you think its clearing the
>>> > > status
>>> > > again.
>>> > >
>>> > > following code: calls aer_error_resume.
>>> > > pci_broadcast_error_message()
>>> > >  /*
>>> > >                  * If the error is reported by an end point, we
>>> > > think this
>>> > >                  * error is related to the upstream link of the end
>>> > > point.
>>> > >                  */
>>> > >                 if (state == pci_channel_io_normal)
>>> > >                         /*
>>> > >                          * the error is non fatal so the bus is ok,
>>> > > just
>>> > > invoke
>>> > >                          * the callback for the function that logged
>>> > > the
>>> > > error.
>>> > >                          */
>>> > >                         cb(dev, &result_data);
>>> > >                 else
>>> > >                         pci_walk_bus(dev->bus, cb, &result_data);
>>> >
>>> > Holy crap, I thought this could not possibly get any more complicated,
>>> > but you're right; we do actually call aer_error_resume() today via an
>>> > extremely convoluted path:
>>> >
>>> >   do_recovery(pci_dev)
>>> >     broadcast_error_message(..., error_detected, ...)
>>> >     if (AER_FATAL)
>>> >       reset_link(pci_dev)
>>> >         udev = BRIDGE ? pci_dev : pci_dev->bus->self
>>> >         driver->reset_link(udev)
>>> >           aer_root_reset(udev)
>>> >     if (CAN_RECOVER)
>>> >       broadcast_error_message(..., mmio_enabled, ...)
>>> >     if (NEED_RESET)
>>> >       broadcast_error_message(..., slot_reset, ...)
>>> >     broadcast_error_message(dev, ..., report_resume, ...)
>>> >       if (BRIDGE)
>>> >         report_resume
>>> >           driver->resume
>>> >             pcie_portdrv_err_resume
>>> >               device_for_each_child(..., resume_iter)
>>> >                 resume_iter
>>> >                   driver->error_resume
>>> >                     aer_error_resume
>>> >         pci_cleanup_aer_uncorrect_error_status(pci_dev)       # only if
>>> > BRIDGE
>>> >           pci_write_config_dword(PCI_ERR_UNCOR_STATUS)
>>> >
>>> > aerdriver is the only port service driver that implements
>>> > .error_resume(), and aerdriver only binds to root ports.  I can't
>>> > really believe all these device_for_each_child()/resume_iter()
>>> > gyrations are necessary when this is AER code calling AER code.
>>> >
>>> > Bjorn
>>> 
>>> here is the code of do_fatal_recovery, where I have moved the things 
>>> down
>>> and doing only if it is bridge.
>>> let me know how this looks to you, so then I can post v16.
>> 
>> This looks superficially OK.  It is very difficult for me to verify 
>> that
>> the behavior is equivalent to the current code, but that's not your 
>> fault;
>> it's just a consequence of the existing design.
>> 
>> I have a couple trivial comments elsewhere, and I'll respond to those
>> patches individually.
>> 
>>> static pci_ers_result_t do_fatal_recovery(struct pci_dev *dev, int 
>>> severity)
>>> {
>>>         struct pci_dev *udev;
>>>         struct pci_bus *parent;
>>>         struct pci_dev *pdev, *temp;
>>>         struct aer_broadcast_data result_data;
>>>         pci_ers_result_t result = PCI_ERS_RESULT_RECOVERED;
>>> 
>>> 
>>>         if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
>>>                 udev = dev;
>>>         else
>>>                 udev = dev->bus->self;
>>> 
>>>         parent = udev->subordinate;
>>>         pci_lock_rescan_remove();
>>>         list_for_each_entry_safe_reverse(pdev, temp, 
>>> &parent->devices,
>>>                                  bus_list) {
>>>                 pci_dev_get(pdev);
>>>                 pci_dev_set_disconnected(pdev, NULL);
>>>                 if (pci_has_subordinate(pdev))
>>>                         pci_walk_bus(pdev->subordinate,
>>>                                      pci_dev_set_disconnected, NULL);
>>>                 pci_stop_and_remove_bus_device(pdev);
>>>                 pci_dev_put(pdev);
>>>         }
>>> 
>>>         result = reset_link(udev, severity);
>>>         if (severity == AER_FATAL && dev->hdr_type ==
>>> PCI_HEADER_TYPE_BRIDGE) {
>>>                 pci_walk_bus(dev->subordinate, report_resume, 
>>> &result_data);
> 
> Why are we calling resume?

the reason we have to call resume here, because we are not calling 
aer_resume() any more in root_reset.
and we have to call resume only in bridge case.
please have a look at couple of conversation back with Bjorn.
the objective is to align the sequence close to the current code.

> 
>>>                 pci_cleanup_aer_uncorrect_error_status(dev);
>>>                 dev->error_state = pci_channel_io_normal;
>>>         }
>>>         if (result == PCI_ERS_RESULT_RECOVERED)
>>>                 if (pcie_wait_for_link(udev, true))
>>>                         pci_rescan_bus(udev->bus);
>>> 
>>>         pci_unlock_rescan_remove();
>>> 
>>>         return result;
>>> }
>>> 
>>> Regards,
>>> Oza.
>>> 
>>>
diff mbox

Patch

diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c
index 779b387..206f590 100644
--- a/drivers/pci/pcie/aer/aerdrv.c
+++ b/drivers/pci/pcie/aer/aerdrv.c
@@ -330,6 +330,13 @@  static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
 	reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
 	pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32);
 
+	/*
+	 * This function is called only on ERR_FATAL now, and since
+	 * the pci_report_resume is called only in ERR_NONFATAL case,
+	 * the clearing part has to be taken care here.
+	 */
+	aer_error_resume(dev);
+
 	return PCI_ERS_RESULT_RECOVERED;
 }
 
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index 0ea5acc..655d4e8 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -20,6 +20,7 @@ 
 #include <linux/slab.h>
 #include <linux/kfifo.h>
 #include "aerdrv.h"
+#include "../../pci.h"
 
 #define	PCI_EXP_AER_FLAGS	(PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \
 				 PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE)
@@ -474,6 +475,44 @@  static pci_ers_result_t reset_link(struct pci_dev *dev)
 	return status;
 }
 
+static pci_ers_result_t do_fatal_recovery(struct pci_dev *dev, int severity)
+{
+	struct pci_dev *udev;
+	struct pci_bus *parent;
+	struct pci_dev *pdev, *temp;
+	pci_ers_result_t result = PCI_ERS_RESULT_RECOVERED;
+
+	if (severity == AER_FATAL)
+		pci_cleanup_aer_uncorrect_error_status(dev);
+
+	if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
+		udev = dev;
+	else
+		udev = dev->bus->self;
+
+	parent = udev->subordinate;
+	pci_lock_rescan_remove();
+	list_for_each_entry_safe_reverse(pdev, temp, &parent->devices,
+				 bus_list) {
+		pci_dev_get(pdev);
+		pci_dev_set_disconnected(pdev, NULL);
+		if (pci_has_subordinate(pdev))
+			pci_walk_bus(pdev->subordinate,
+				     pci_dev_set_disconnected, NULL);
+		pci_stop_and_remove_bus_device(pdev);
+		pci_dev_put(pdev);
+	}
+
+	result = reset_link(udev);
+	if (result == PCI_ERS_RESULT_RECOVERED)
+		if (pcie_wait_for_link(udev, true))
+			pci_rescan_bus(udev->bus);
+
+	pci_unlock_rescan_remove();
+
+	return result;
+}
+
 /**
  * do_recovery - handle nonfatal/fatal error recovery process
  * @dev: pointer to a pci_dev data structure of agent detecting an error
@@ -485,11 +524,15 @@  static pci_ers_result_t reset_link(struct pci_dev *dev)
  */
 static void do_recovery(struct pci_dev *dev, int severity)
 {
-	pci_ers_result_t status, result = PCI_ERS_RESULT_RECOVERED;
+	pci_ers_result_t status;
 	enum pci_channel_state state;
 
-	if (severity == AER_FATAL)
-		state = pci_channel_io_frozen;
+	if (severity == AER_FATAL) {
+		status = do_fatal_recovery(dev, severity);
+		if (status != PCI_ERS_RESULT_RECOVERED)
+			goto failed;
+		return;
+	}
 	else
 		state = pci_channel_io_normal;
 
@@ -498,12 +541,6 @@  static void do_recovery(struct pci_dev *dev, int severity)
 			"error_detected",
 			report_error_detected);
 
-	if (severity == AER_FATAL) {
-		result = reset_link(dev);
-		if (result != PCI_ERS_RESULT_RECOVERED)
-			goto failed;
-	}
-
 	if (status == PCI_ERS_RESULT_CAN_RECOVER)
 		status = broadcast_error_message(dev,
 				state,