diff mbox series

scsi: pm80xx: port reset timeout error handling correction.

Message ID 20211228111753.10802-1-Ajish.Koshy@microchip.com (mailing list archive)
State Accepted
Headers show
Series scsi: pm80xx: port reset timeout error handling correction. | expand

Commit Message

Ajish Koshy Dec. 28, 2021, 11:17 a.m. UTC
Error handling steps were not in sequence as per the programmers
manual. Expected sequence:
 -PHY_DOWN (PORT_IN_RESET)
 -PORT_RESET_TIMER_TMO
 -Host aborts pending I/Os
 -Host deregister the device
 -Host sends HW_EVENT_PHY_DOWN ack

Earlier, we were sending HW_EVENT_PHY_DOWN ack first and then
deregister the device.

Signed-off-by: Ajish Koshy <Ajish.Koshy@microchip.com>
Signed-off-by: Viswas G <Viswas.G@microchip.com>
---
 drivers/scsi/pm8001/pm8001_sas.c | 7 ++++++-
 drivers/scsi/pm8001/pm8001_sas.h | 3 +++
 drivers/scsi/pm8001/pm80xx_hwi.c | 7 +++++--
 3 files changed, 14 insertions(+), 3 deletions(-)

Comments

Martin K. Petersen Jan. 5, 2022, 5:53 a.m. UTC | #1
Ajish,

> Error handling steps were not in sequence as per the programmers
> manual. Expected sequence:

Applied to 5.17/scsi-staging, thanks!
Jinpu Wang Jan. 5, 2022, 6:19 a.m. UTC | #2
Hi Ajish,


On Tue, Dec 28, 2021 at 12:15 PM Ajish Koshy <Ajish.Koshy@microchip.com> wrote:
>
> Error handling steps were not in sequence as per the programmers
> manual. Expected sequence:
>  -PHY_DOWN (PORT_IN_RESET)
>  -PORT_RESET_TIMER_TMO
>  -Host aborts pending I/Os
>  -Host deregister the device
>  -Host sends HW_EVENT_PHY_DOWN ack

Just to make sure, does the same sequence work for old pm8001 chip?
>
> Earlier, we were sending HW_EVENT_PHY_DOWN ack first and then
> deregister the device.
>
> Signed-off-by: Ajish Koshy <Ajish.Koshy@microchip.com>
> Signed-off-by: Viswas G <Viswas.G@microchip.com>
> ---
>  drivers/scsi/pm8001/pm8001_sas.c | 7 ++++++-
>  drivers/scsi/pm8001/pm8001_sas.h | 3 +++
>  drivers/scsi/pm8001/pm80xx_hwi.c | 7 +++++--
>  3 files changed, 14 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c
> index c9a16eef38c1..160ee8b228c9 100644
> --- a/drivers/scsi/pm8001/pm8001_sas.c
> +++ b/drivers/scsi/pm8001/pm8001_sas.c
> @@ -1199,7 +1199,7 @@ int pm8001_abort_task(struct sas_task *task)
>         struct pm8001_device *pm8001_dev;
>         struct pm8001_tmf_task tmf_task;
>         int rc = TMF_RESP_FUNC_FAILED, ret;
> -       u32 phy_id;
> +       u32 phy_id, port_id;
>         struct sas_task_slow slow_task;
>
>         if (unlikely(!task || !task->lldd_task || !task->dev))
> @@ -1246,6 +1246,7 @@ int pm8001_abort_task(struct sas_task *task)
>                         DECLARE_COMPLETION_ONSTACK(completion_reset);
>                         DECLARE_COMPLETION_ONSTACK(completion);
>                         struct pm8001_phy *phy = pm8001_ha->phy + phy_id;
> +                       port_id = phy->port->port_id;
>
>                         /* 1. Set Device state as Recovery */
>                         pm8001_dev->setds_completion = &completion;
> @@ -1297,6 +1298,10 @@ int pm8001_abort_task(struct sas_task *task)
>                                                 PORT_RESET_TMO);
>                                 if (phy->port_reset_status == PORT_RESET_TMO) {
>                                         pm8001_dev_gone_notify(dev);
> +                                       PM8001_CHIP_DISP->hw_event_ack_req(
> +                                               pm8001_ha, 0,
> +                                               0x07, /*HW_EVENT_PHY_DOWN ack*/
> +                                               port_id, phy_id, 0, 0);
>                                         goto out;
>                                 }
>                         }
> diff --git a/drivers/scsi/pm8001/pm8001_sas.h b/drivers/scsi/pm8001/pm8001_sas.h
> index 83eec16d021d..a17da1cebce1 100644
> --- a/drivers/scsi/pm8001/pm8001_sas.h
> +++ b/drivers/scsi/pm8001/pm8001_sas.h
> @@ -216,6 +216,9 @@ struct pm8001_dispatch {
>                 u32 state);
>         int (*sas_re_init_req)(struct pm8001_hba_info *pm8001_ha);
>         int (*fatal_errors)(struct pm8001_hba_info *pm8001_ha);
> +       void (*hw_event_ack_req)(struct pm8001_hba_info *pm8001_ha,
> +               u32 Qnum, u32 SEA, u32 port_id, u32 phyId, u32 param0,
> +               u32 param1);
>  };
>
>  struct pm8001_chip_info {
> diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c
> index 0849ecc913c7..97750d0ebee9 100644
> --- a/drivers/scsi/pm8001/pm80xx_hwi.c
> +++ b/drivers/scsi/pm8001/pm80xx_hwi.c
> @@ -3709,8 +3709,10 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void *piomb)
>                 break;
>         case HW_EVENT_PORT_RESET_TIMER_TMO:
>                 pm8001_dbg(pm8001_ha, MSG, "HW_EVENT_PORT_RESET_TIMER_TMO\n");
> -               pm80xx_hw_event_ack_req(pm8001_ha, 0, HW_EVENT_PHY_DOWN,
> -                       port_id, phy_id, 0, 0);
> +               if (!pm8001_ha->phy[phy_id].reset_completion) {
> +                       pm80xx_hw_event_ack_req(pm8001_ha, 0, HW_EVENT_PHY_DOWN,
> +                               port_id, phy_id, 0, 0);
> +               }
>                 sas_phy_disconnected(sas_phy);
>                 phy->phy_attached = 0;
>                 sas_notify_port_event(sas_phy, PORTE_LINK_RESET_ERR,
> @@ -5051,4 +5053,5 @@ const struct pm8001_dispatch pm8001_80xx_dispatch = {
>         .fw_flash_update_req    = pm8001_chip_fw_flash_update_req,
>         .set_dev_state_req      = pm8001_chip_set_dev_state_req,
>         .fatal_errors           = pm80xx_fatal_errors,
> +       .hw_event_ack_req       = pm80xx_hw_event_ack_req,
>  };
> --
> 2.27.0
>
Ajish Koshy Jan. 6, 2022, 1:12 p.m. UTC | #3
Hi Jinpu,

> Hi Ajish,
> 
> 
> On Tue, Dec 28, 2021 at 12:15 PM Ajish Koshy <Ajish.Koshy@microchip.com>
> wrote:
> >
> > Error handling steps were not in sequence as per the programmers
> > manual. Expected sequence:
> >  -PHY_DOWN (PORT_IN_RESET)
> >  -PORT_RESET_TIMER_TMO
> >  -Host aborts pending I/Os
> >  -Host deregister the device
> >  -Host sends HW_EVENT_PHY_DOWN ack
> 
> Just to make sure, does the same sequence work for old pm8001 chip?

Currently this code is modified and executed based on 8006 controller.
I need to check with team members about the this sequence on old
controller pm8001 chip.

> >
> > Earlier, we were sending HW_EVENT_PHY_DOWN ack first and then
> > deregister the device.
> >
> > Signed-off-by: Ajish Koshy <Ajish.Koshy@microchip.com>
> > Signed-off-by: Viswas G <Viswas.G@microchip.com>
> > ---
> >  drivers/scsi/pm8001/pm8001_sas.c | 7 ++++++-
> > drivers/scsi/pm8001/pm8001_sas.h | 3 +++
> > drivers/scsi/pm8001/pm80xx_hwi.c | 7 +++++--
> >  3 files changed, 14 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/scsi/pm8001/pm8001_sas.c
> > b/drivers/scsi/pm8001/pm8001_sas.c
> > index c9a16eef38c1..160ee8b228c9 100644
> > --- a/drivers/scsi/pm8001/pm8001_sas.c
> > +++ b/drivers/scsi/pm8001/pm8001_sas.c
> > @@ -1199,7 +1199,7 @@ int pm8001_abort_task(struct sas_task *task)
> >         struct pm8001_device *pm8001_dev;
> >         struct pm8001_tmf_task tmf_task;
> >         int rc = TMF_RESP_FUNC_FAILED, ret;
> > -       u32 phy_id;
> > +       u32 phy_id, port_id;
> >         struct sas_task_slow slow_task;
> >
> >         if (unlikely(!task || !task->lldd_task || !task->dev)) @@
> > -1246,6 +1246,7 @@ int pm8001_abort_task(struct sas_task *task)
> >                         DECLARE_COMPLETION_ONSTACK(completion_reset);
> >                         DECLARE_COMPLETION_ONSTACK(completion);
> >                         struct pm8001_phy *phy = pm8001_ha->phy +
> > phy_id;
> > +                       port_id = phy->port->port_id;
> >
> >                         /* 1. Set Device state as Recovery */
> >                         pm8001_dev->setds_completion = &completion; @@
> > -1297,6 +1298,10 @@ int pm8001_abort_task(struct sas_task *task)
> >                                                 PORT_RESET_TMO);
> >                                 if (phy->port_reset_status == PORT_RESET_TMO) {
> >                                         pm8001_dev_gone_notify(dev);
> > +                                       PM8001_CHIP_DISP->hw_event_ack_req(
> > +                                               pm8001_ha, 0,
> > +                                               0x07, /*HW_EVENT_PHY_DOWN ack*/
> > +                                               port_id, phy_id, 0,
> > + 0);
> >                                         goto out;
> >                                 }
> >                         }
> > diff --git a/drivers/scsi/pm8001/pm8001_sas.h
> > b/drivers/scsi/pm8001/pm8001_sas.h
> > index 83eec16d021d..a17da1cebce1 100644
> > --- a/drivers/scsi/pm8001/pm8001_sas.h
> > +++ b/drivers/scsi/pm8001/pm8001_sas.h
> > @@ -216,6 +216,9 @@ struct pm8001_dispatch {
> >                 u32 state);
> >         int (*sas_re_init_req)(struct pm8001_hba_info *pm8001_ha);
> >         int (*fatal_errors)(struct pm8001_hba_info *pm8001_ha);
> > +       void (*hw_event_ack_req)(struct pm8001_hba_info *pm8001_ha,
> > +               u32 Qnum, u32 SEA, u32 port_id, u32 phyId, u32 param0,
> > +               u32 param1);
> >  };
> >
> >  struct pm8001_chip_info {
> > diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c
> > b/drivers/scsi/pm8001/pm80xx_hwi.c
> > index 0849ecc913c7..97750d0ebee9 100644
> > --- a/drivers/scsi/pm8001/pm80xx_hwi.c
> > +++ b/drivers/scsi/pm8001/pm80xx_hwi.c
> > @@ -3709,8 +3709,10 @@ static int mpi_hw_event(struct
> pm8001_hba_info *pm8001_ha, void *piomb)
> >                 break;
> >         case HW_EVENT_PORT_RESET_TIMER_TMO:
> >                 pm8001_dbg(pm8001_ha, MSG,
> "HW_EVENT_PORT_RESET_TIMER_TMO\n");
> > -               pm80xx_hw_event_ack_req(pm8001_ha, 0,
> HW_EVENT_PHY_DOWN,
> > -                       port_id, phy_id, 0, 0);
> > +               if (!pm8001_ha->phy[phy_id].reset_completion) {
> > +                       pm80xx_hw_event_ack_req(pm8001_ha, 0,
> HW_EVENT_PHY_DOWN,
> > +                               port_id, phy_id, 0, 0);
> > +               }
> >                 sas_phy_disconnected(sas_phy);
> >                 phy->phy_attached = 0;
> >                 sas_notify_port_event(sas_phy, PORTE_LINK_RESET_ERR,
> > @@ -5051,4 +5053,5 @@ const struct pm8001_dispatch
> pm8001_80xx_dispatch = {
> >         .fw_flash_update_req    = pm8001_chip_fw_flash_update_req,
> >         .set_dev_state_req      = pm8001_chip_set_dev_state_req,
> >         .fatal_errors           = pm80xx_fatal_errors,
> > +       .hw_event_ack_req       = pm80xx_hw_event_ack_req,
> >  };
> > --
> > 2.27.0
> >

Thanks,
Ajish
Jinpu Wang Jan. 6, 2022, 2:11 p.m. UTC | #4
On Thu, Jan 6, 2022 at 2:12 PM <Ajish.Koshy@microchip.com> wrote:
>
> Hi Jinpu,
>
> > Hi Ajish,
> >
> >
> > On Tue, Dec 28, 2021 at 12:15 PM Ajish Koshy <Ajish.Koshy@microchip.com>
> > wrote:
> > >
> > > Error handling steps were not in sequence as per the programmers
> > > manual. Expected sequence:
> > >  -PHY_DOWN (PORT_IN_RESET)
> > >  -PORT_RESET_TIMER_TMO
> > >  -Host aborts pending I/Os
> > >  -Host deregister the device
> > >  -Host sends HW_EVENT_PHY_DOWN ack
> >
> > Just to make sure, does the same sequence work for old pm8001 chip?
>
> Currently this code is modified and executed based on 8006 controller.
> I need to check with team members about the this sequence on old
> controller pm8001 chip.
Thanks & Regards
>
> > >
> > > Earlier, we were sending HW_EVENT_PHY_DOWN ack first and then
> > > deregister the device.
> > >
> > > Signed-off-by: Ajish Koshy <Ajish.Koshy@microchip.com>
> > > Signed-off-by: Viswas G <Viswas.G@microchip.com>
> > > ---
> > >  drivers/scsi/pm8001/pm8001_sas.c | 7 ++++++-
> > > drivers/scsi/pm8001/pm8001_sas.h | 3 +++
> > > drivers/scsi/pm8001/pm80xx_hwi.c | 7 +++++--
> > >  3 files changed, 14 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/drivers/scsi/pm8001/pm8001_sas.c
> > > b/drivers/scsi/pm8001/pm8001_sas.c
> > > index c9a16eef38c1..160ee8b228c9 100644
> > > --- a/drivers/scsi/pm8001/pm8001_sas.c
> > > +++ b/drivers/scsi/pm8001/pm8001_sas.c
> > > @@ -1199,7 +1199,7 @@ int pm8001_abort_task(struct sas_task *task)
> > >         struct pm8001_device *pm8001_dev;
> > >         struct pm8001_tmf_task tmf_task;
> > >         int rc = TMF_RESP_FUNC_FAILED, ret;
> > > -       u32 phy_id;
> > > +       u32 phy_id, port_id;
> > >         struct sas_task_slow slow_task;
> > >
> > >         if (unlikely(!task || !task->lldd_task || !task->dev)) @@
> > > -1246,6 +1246,7 @@ int pm8001_abort_task(struct sas_task *task)
> > >                         DECLARE_COMPLETION_ONSTACK(completion_reset);
> > >                         DECLARE_COMPLETION_ONSTACK(completion);
> > >                         struct pm8001_phy *phy = pm8001_ha->phy +
> > > phy_id;
> > > +                       port_id = phy->port->port_id;
> > >
> > >                         /* 1. Set Device state as Recovery */
> > >                         pm8001_dev->setds_completion = &completion; @@
> > > -1297,6 +1298,10 @@ int pm8001_abort_task(struct sas_task *task)
> > >                                                 PORT_RESET_TMO);
> > >                                 if (phy->port_reset_status == PORT_RESET_TMO) {
> > >                                         pm8001_dev_gone_notify(dev);
> > > +                                       PM8001_CHIP_DISP->hw_event_ack_req(
> > > +                                               pm8001_ha, 0,
> > > +                                               0x07, /*HW_EVENT_PHY_DOWN ack*/
> > > +                                               port_id, phy_id, 0,
> > > + 0);
> > >                                         goto out;
> > >                                 }
> > >                         }
> > > diff --git a/drivers/scsi/pm8001/pm8001_sas.h
> > > b/drivers/scsi/pm8001/pm8001_sas.h
> > > index 83eec16d021d..a17da1cebce1 100644
> > > --- a/drivers/scsi/pm8001/pm8001_sas.h
> > > +++ b/drivers/scsi/pm8001/pm8001_sas.h
> > > @@ -216,6 +216,9 @@ struct pm8001_dispatch {
> > >                 u32 state);
> > >         int (*sas_re_init_req)(struct pm8001_hba_info *pm8001_ha);
> > >         int (*fatal_errors)(struct pm8001_hba_info *pm8001_ha);
> > > +       void (*hw_event_ack_req)(struct pm8001_hba_info *pm8001_ha,
> > > +               u32 Qnum, u32 SEA, u32 port_id, u32 phyId, u32 param0,
> > > +               u32 param1);
> > >  };
> > >
> > >  struct pm8001_chip_info {
> > > diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c
> > > b/drivers/scsi/pm8001/pm80xx_hwi.c
> > > index 0849ecc913c7..97750d0ebee9 100644
> > > --- a/drivers/scsi/pm8001/pm80xx_hwi.c
> > > +++ b/drivers/scsi/pm8001/pm80xx_hwi.c
> > > @@ -3709,8 +3709,10 @@ static int mpi_hw_event(struct
> > pm8001_hba_info *pm8001_ha, void *piomb)
> > >                 break;
> > >         case HW_EVENT_PORT_RESET_TIMER_TMO:
> > >                 pm8001_dbg(pm8001_ha, MSG,
> > "HW_EVENT_PORT_RESET_TIMER_TMO\n");
> > > -               pm80xx_hw_event_ack_req(pm8001_ha, 0,
> > HW_EVENT_PHY_DOWN,
> > > -                       port_id, phy_id, 0, 0);
> > > +               if (!pm8001_ha->phy[phy_id].reset_completion) {
> > > +                       pm80xx_hw_event_ack_req(pm8001_ha, 0,
> > HW_EVENT_PHY_DOWN,
> > > +                               port_id, phy_id, 0, 0);
> > > +               }
> > >                 sas_phy_disconnected(sas_phy);
> > >                 phy->phy_attached = 0;
> > >                 sas_notify_port_event(sas_phy, PORTE_LINK_RESET_ERR,
> > > @@ -5051,4 +5053,5 @@ const struct pm8001_dispatch
> > pm8001_80xx_dispatch = {
> > >         .fw_flash_update_req    = pm8001_chip_fw_flash_update_req,
> > >         .set_dev_state_req      = pm8001_chip_set_dev_state_req,
> > >         .fatal_errors           = pm80xx_fatal_errors,
> > > +       .hw_event_ack_req       = pm80xx_hw_event_ack_req,
> > >  };
> > > --
> > > 2.27.0
> > >
>
> Thanks,
> Ajish
Martin K. Petersen Jan. 10, 2022, 10:04 p.m. UTC | #5
On Tue, 28 Dec 2021 16:47:53 +0530, Ajish Koshy wrote:

> Error handling steps were not in sequence as per the programmers
> manual. Expected sequence:
>  -PHY_DOWN (PORT_IN_RESET)
>  -PORT_RESET_TIMER_TMO
>  -Host aborts pending I/Os
>  -Host deregister the device
>  -Host sends HW_EVENT_PHY_DOWN ack
> 
> [...]

Applied to 5.17/scsi-queue, thanks!

[1/1] scsi: pm80xx: port reset timeout error handling correction.
      https://git.kernel.org/mkp/scsi/c/ee05cb71f9f7
diff mbox series

Patch

diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c
index c9a16eef38c1..160ee8b228c9 100644
--- a/drivers/scsi/pm8001/pm8001_sas.c
+++ b/drivers/scsi/pm8001/pm8001_sas.c
@@ -1199,7 +1199,7 @@  int pm8001_abort_task(struct sas_task *task)
 	struct pm8001_device *pm8001_dev;
 	struct pm8001_tmf_task tmf_task;
 	int rc = TMF_RESP_FUNC_FAILED, ret;
-	u32 phy_id;
+	u32 phy_id, port_id;
 	struct sas_task_slow slow_task;
 
 	if (unlikely(!task || !task->lldd_task || !task->dev))
@@ -1246,6 +1246,7 @@  int pm8001_abort_task(struct sas_task *task)
 			DECLARE_COMPLETION_ONSTACK(completion_reset);
 			DECLARE_COMPLETION_ONSTACK(completion);
 			struct pm8001_phy *phy = pm8001_ha->phy + phy_id;
+			port_id = phy->port->port_id;
 
 			/* 1. Set Device state as Recovery */
 			pm8001_dev->setds_completion = &completion;
@@ -1297,6 +1298,10 @@  int pm8001_abort_task(struct sas_task *task)
 						PORT_RESET_TMO);
 				if (phy->port_reset_status == PORT_RESET_TMO) {
 					pm8001_dev_gone_notify(dev);
+					PM8001_CHIP_DISP->hw_event_ack_req(
+						pm8001_ha, 0,
+						0x07, /*HW_EVENT_PHY_DOWN ack*/
+						port_id, phy_id, 0, 0);
 					goto out;
 				}
 			}
diff --git a/drivers/scsi/pm8001/pm8001_sas.h b/drivers/scsi/pm8001/pm8001_sas.h
index 83eec16d021d..a17da1cebce1 100644
--- a/drivers/scsi/pm8001/pm8001_sas.h
+++ b/drivers/scsi/pm8001/pm8001_sas.h
@@ -216,6 +216,9 @@  struct pm8001_dispatch {
 		u32 state);
 	int (*sas_re_init_req)(struct pm8001_hba_info *pm8001_ha);
 	int (*fatal_errors)(struct pm8001_hba_info *pm8001_ha);
+	void (*hw_event_ack_req)(struct pm8001_hba_info *pm8001_ha,
+		u32 Qnum, u32 SEA, u32 port_id, u32 phyId, u32 param0,
+		u32 param1);
 };
 
 struct pm8001_chip_info {
diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c
index 0849ecc913c7..97750d0ebee9 100644
--- a/drivers/scsi/pm8001/pm80xx_hwi.c
+++ b/drivers/scsi/pm8001/pm80xx_hwi.c
@@ -3709,8 +3709,10 @@  static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void *piomb)
 		break;
 	case HW_EVENT_PORT_RESET_TIMER_TMO:
 		pm8001_dbg(pm8001_ha, MSG, "HW_EVENT_PORT_RESET_TIMER_TMO\n");
-		pm80xx_hw_event_ack_req(pm8001_ha, 0, HW_EVENT_PHY_DOWN,
-			port_id, phy_id, 0, 0);
+		if (!pm8001_ha->phy[phy_id].reset_completion) {
+			pm80xx_hw_event_ack_req(pm8001_ha, 0, HW_EVENT_PHY_DOWN,
+				port_id, phy_id, 0, 0);
+		}
 		sas_phy_disconnected(sas_phy);
 		phy->phy_attached = 0;
 		sas_notify_port_event(sas_phy, PORTE_LINK_RESET_ERR,
@@ -5051,4 +5053,5 @@  const struct pm8001_dispatch pm8001_80xx_dispatch = {
 	.fw_flash_update_req	= pm8001_chip_fw_flash_update_req,
 	.set_dev_state_req	= pm8001_chip_set_dev_state_req,
 	.fatal_errors		= pm80xx_fatal_errors,
+	.hw_event_ack_req	= pm80xx_hw_event_ack_req,
 };