diff mbox

[v5] pciehp: only wait command complete for really hotplug control

Message ID 1401491219-1496-1-git-send-email-yinghai@kernel.org (mailing list archive)
State New, archived
Delegated to: Bjorn Helgaas
Headers show

Commit Message

Yinghai Lu May 30, 2014, 11:06 p.m. UTC
On system with 16 PCI express hotplug slots, customer complain every slot
will report "Command not completed in 1000 msec" during initialization.

Intel says that we should only wait command complete only for
           Electromechanical Interlock Control
           Power Controller Control
           Power Indicator Control
           Attention Indicator Control

For detail please check CF118 on
  http://www.intel.com/content/www/us/en/processors/xeon/xeon-e7-v2-spec-update.html

To address the problem:
change to only wait when (EIC, PCC, PIC, AIC) bits get changed.

With that we could save 16 seconds during booting, later with 32 sockets system
with 64 pcie hotplug slots we could save 64 seconds.

-v5: only add checking for intel chipset.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>

---
 drivers/pci/hotplug/pciehp.h     |    1 +
 drivers/pci/hotplug/pciehp_hpc.c |   25 ++++++++++++++++++++-----
 2 files changed, 21 insertions(+), 5 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Bjorn Helgaas May 30, 2014, 11:27 p.m. UTC | #1
On Fri, May 30, 2014 at 5:06 PM, Yinghai Lu <yinghai@kernel.org> wrote:
> On system with 16 PCI express hotplug slots, customer complain every slot
> will report "Command not completed in 1000 msec" during initialization.
>
> Intel says that we should only wait command complete only for
>            Electromechanical Interlock Control
>            Power Controller Control
>            Power Indicator Control
>            Attention Indicator Control
>
> For detail please check CF118 on
>   http://www.intel.com/content/www/us/en/processors/xeon/xeon-e7-v2-spec-update.html
>
> To address the problem:
> change to only wait when (EIC, PCC, PIC, AIC) bits get changed.
>
> With that we could save 16 seconds during booting, later with 32 sockets system
> with 64 pcie hotplug slots we could save 64 seconds.
>
> -v5: only add checking for intel chipset.
>
> Signed-off-by: Yinghai Lu <yinghai@kernel.org>
>
> ---
>  drivers/pci/hotplug/pciehp.h     |    1 +
>  drivers/pci/hotplug/pciehp_hpc.c |   25 ++++++++++++++++++++-----
>  2 files changed, 21 insertions(+), 5 deletions(-)
>
> Index: linux-2.6/drivers/pci/hotplug/pciehp_hpc.c
> ===================================================================
> --- linux-2.6.orig/drivers/pci/hotplug/pciehp_hpc.c
> +++ linux-2.6/drivers/pci/hotplug/pciehp_hpc.c
> @@ -143,6 +143,11 @@ static void pcie_wait_cmd(struct control
>                 ctrl_dbg(ctrl, "Command not completed in 1000 msec\n");
>  }
>
> +/* Some vendors only set CC for real hotplug events */
> +#define  PCI_EXP_SLTCTL_REAL_WAIT_MASK (PCI_EXP_SLTCTL_EIC | \
> +                                       PCI_EXP_SLTCTL_PCC | \
> +                                       PCI_EXP_SLTCTL_PIC | \
> +                                       PCI_EXP_SLTCTL_AIC)
>  /**
>   * pcie_write_cmd - Issue controller command
>   * @ctrl: controller to which the command is issued
> @@ -152,8 +157,9 @@ static void pcie_wait_cmd(struct control
>  static void pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask)
>  {
>         struct pci_dev *pdev = ctrl_dev(ctrl);
> +       u16 slot_ctrl, old_slot_ctrl;
>         u16 slot_status;
> -       u16 slot_ctrl;
> +       int wait = 1;
>
>         mutex_lock(&ctrl->ctrl_lock);
>
> @@ -181,9 +187,8 @@ static void pcie_write_cmd(struct contro
>                 }
>         }
>
> -       pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &slot_ctrl);
> -       slot_ctrl &= ~mask;
> -       slot_ctrl |= (cmd & mask);
> +       pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &old_slot_ctrl);
> +       slot_ctrl = (old_slot_ctrl & ~mask) | (cmd & mask);
>         ctrl->cmd_busy = 1;
>         smp_mb();
>         pcie_capability_write_word(pdev, PCI_EXP_SLTCTL, slot_ctrl);
> @@ -191,7 +196,13 @@ static void pcie_write_cmd(struct contro
>         /*
>          * Wait for command completion.
>          */
> -       if (!ctrl->no_cmd_complete) {
> +       if (ctrl->no_cmd_complete)
> +               wait = 0;
> +       else if (ctrl->real_cmd_complete &&
> +                ((PCI_EXP_SLTCTL_REAL_WAIT_MASK & old_slot_ctrl) ==
> +                 (PCI_EXP_SLTCTL_REAL_WAIT_MASK & slot_ctrl)))
> +               wait = 0;
> +       if (wait) {
>                 int poll = 0;
>                 /*
>                  * if hotplug interrupt is not enabled or command
> @@ -783,6 +794,10 @@ struct controller *pcie_init(struct pcie
>             !(POWER_CTRL(ctrl) | ATTN_LED(ctrl) | PWR_LED(ctrl) | EMI(ctrl)))
>             ctrl->no_cmd_complete = 1;
>
> +       /* Intel current only support "real" hotplug event with CC */
> +       if (pdev->vendor == PCI_VENDOR_ID_INTEL)
> +               ctrl->real_cmd_complete = 1;

Do you have information that says this Intel erratum will never be
fixed, even in future hardware?  If it ever is fixed, this code will
stop working.

And of course, this only helps Intel hardware, and you said the AMD
and Nvidia have similar issues.  If we're going to make a change here,
we should try to deal with all of this hardware.

I'd like to know what happens if you use the v4 patch but just change
the test to:

  if (!ctrl->no_cmd_complete && !(slot_status & PCI_EXP_SLTSTA_CC) && ...

I think that should fix the boot delay.  I think there will still be a
timeout when the first "real" hotplug operation happens, but that's a
rare event with a human involved, so I doubt it's a big problem.  And
if it is a problem, I suggested a simple way to deal with it.

Bjorn

>          /* Check if Data Link Layer Link Active Reporting is implemented */
>          pcie_capability_read_dword(pdev, PCI_EXP_LNKCAP, &link_cap);
>          if (link_cap & PCI_EXP_LNKCAP_DLLLARC) {
> Index: linux-2.6/drivers/pci/hotplug/pciehp.h
> ===================================================================
> --- linux-2.6.orig/drivers/pci/hotplug/pciehp.h
> +++ linux-2.6/drivers/pci/hotplug/pciehp.h
> @@ -97,6 +97,7 @@ struct controller {
>         unsigned int no_cmd_complete:1;
>         unsigned int link_active_reporting:1;
>         unsigned int notification_enabled:1;
> +       unsigned int real_cmd_complete:1;
>         unsigned int power_fault_detected;
>  };
>
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yinghai Lu May 31, 2014, 1:35 a.m. UTC | #2
On Fri, May 30, 2014 at 4:27 PM, Bjorn Helgaas <bhelgaas@google.com> wrote:
> On Fri, May 30, 2014 at 5:06 PM, Yinghai Lu <yinghai@kernel.org> wrote:
>> +       /* Intel current only support "real" hotplug event with CC */
>> +       if (pdev->vendor == PCI_VENDOR_ID_INTEL)
>> +               ctrl->real_cmd_complete = 1;
>
> Do you have information that says this Intel erratum will never be
> fixed, even in future hardware?  If it ever is fixed, this code will
> stop working.

non-real hotplug should be done very fast as it is within chipset.

Do you want to add device id checking?

>
> And of course, this only helps Intel hardware, and you said the AMD
> and Nvidia have similar issues.  If we're going to make a change here,
> we should try to deal with all of this hardware.

Would add them later if needed.

Also remember those systems only have two hotplug slots.

I don't have access those systems anymore.

>
> I'd like to know what happens if you use the v4 patch but just change
> the test to:
>
>   if (!ctrl->no_cmd_complete && !(slot_status & PCI_EXP_SLTSTA_CC) && ...
>
> I think that should fix the boot delay.  I think there will still be a
> timeout when the first "real" hotplug operation happens, but that's a
> rare event with a human involved, so I doubt it's a big problem.  And
> if it is a problem, I suggested a simple way to deal with it.

that will cause wrong delay before any cmd write (including real hotplug) that
is just after non-real command as the non-real command will keep cmd_busy,
and CC is not set.

Thanks

Yinghai
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bjorn Helgaas June 2, 2014, 4:47 p.m. UTC | #3
On Fri, May 30, 2014 at 7:35 PM, Yinghai Lu <yinghai@kernel.org> wrote:
> On Fri, May 30, 2014 at 4:27 PM, Bjorn Helgaas <bhelgaas@google.com> wrote:
>> On Fri, May 30, 2014 at 5:06 PM, Yinghai Lu <yinghai@kernel.org> wrote:
>>> +       /* Intel current only support "real" hotplug event with CC */
>>> +       if (pdev->vendor == PCI_VENDOR_ID_INTEL)
>>> +               ctrl->real_cmd_complete = 1;
>>
>> Do you have information that says this Intel erratum will never be
>> fixed, even in future hardware?  If it ever is fixed, this code will
>> stop working.
>
> non-real hotplug should be done very fast as it is within chipset.

I'm not going to merge changes based on assumptions about how fast
things happen because of where they might be implemented in the
hardware.

> Do you want to add device id checking?

I doubt it's feasible to add device ID checking because I think it's
going to be too hard to make a complete list.

>> And of course, this only helps Intel hardware, and you said the AMD
>> and Nvidia have similar issues.  If we're going to make a change here,
>> we should try to deal with all of this hardware.
>
> Would add them later if needed.
>
> Also remember those systems only have two hotplug slots.
>
> I don't have access those systems anymore.
>
>>
>> I'd like to know what happens if you use the v4 patch but just change
>> the test to:
>>
>>   if (!ctrl->no_cmd_complete && !(slot_status & PCI_EXP_SLTSTA_CC) && ...
>>
>> I think that should fix the boot delay.  I think there will still be a
>> timeout when the first "real" hotplug operation happens, but that's a
>> rare event with a human involved, so I doubt it's a big problem.  And
>> if it is a problem, I suggested a simple way to deal with it.
>
> that will cause wrong delay before any cmd write (including real hotplug) that
> is just after non-real command as the non-real command will keep cmd_busy,
> and CC is not set.

I think you repeated what I just said.  Let me repeat it again in more
detail to see if we're really saying the same thing.

At boot-time, we currently do this:
- set cmd_busy = 1
- write DLLSCE, ABPE, PDCE, etc.
- wait for CC to be set
- with Intel/AMD/Nvidia parts, we timeout here after 1 second because
they don't set CC

Later, when hotplugging a card, we do this:
- set cmd_busy = 1
- write EIC, PCC, etc.
- wait for CC to be set
- device sets CC, and we set cmd_busy = 0

All future hotplug events involve EIC, PCC, etc., so there are no
timeouts.  The only timeout occurs at boot-time.

My proposal is this:

At boot-time, we do this:
- wait for CC if (cmd_busy).  But cmd_busy == 0, so we don't wait
- set cmd_busy = 1
- write DLLSCE, ABPE, PDCE, etc.
- we do not wait for CC here, so there's no timeout

Later, on the first hotplug event, we do this:
- now cmd_busy == 1, so we wait for CC, and with Intel/AMD/Nvidia, we
timeout here after 1 second and set cmd_busy = 0
- set cmd_busy = 1
- write EIC, PCC, etc.
- device sets CC, and we set cmd_busy = 0

On all future hotplug events, we do this:
- wait for CC if (cmd_busy).  But cmd_busy == 0, so we don't wait
- set cmd_busy = 1
- write EIC, PCC, etc.
- device sets CC, and we set cmd_busy = 0

So the only timeout here is on the first real hotplug event.  This is
a smaller problem than the boot-time timeouts because:

1) Instead of delaying boot for 64 seconds (waiting for 64 1-second
timeouts in a 64-slot system), there is a 1-second timeout on the
first hotplug event in each slot.
2) A hotplug event involves a human going to the machine,
disconnecting cables, etc., so a 1-second delay here has less impact.
3) The delay only affects the first event in each slot.  There are no
timeouts for future events.

I don't know if it's even worth bothering to fix this delay.  But if
you think it does need to be fixed, here's one possibility:

At boot-time, we do this:
- wait for CC if (cmd_busy).  But cmd_busy == 0, so we don't wait
- set cmd_busy = 1
- write DLLSCE, ABPE, PDCE, etc.
- save a timestamp of when the last command was written
- we do not wait for CC here, so there's no timeout

Later, on hotplug events, we do this:
- if (cmd_busy)
    read clock
    if (clock - timestamp < 1 second)
      wait for CC
  set cmd_busy = 0
- set cmd_busy = 1
- write EIC, PCC, etc.
- save a timestamp of when last command was written
- device sets CC, and we set cmd_busy = 0

This should avoid all the timeouts, unless we have a real hotplug
event less than one second after we initialize the controller at
boot-time.

Bjorn
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

Index: linux-2.6/drivers/pci/hotplug/pciehp_hpc.c
===================================================================
--- linux-2.6.orig/drivers/pci/hotplug/pciehp_hpc.c
+++ linux-2.6/drivers/pci/hotplug/pciehp_hpc.c
@@ -143,6 +143,11 @@  static void pcie_wait_cmd(struct control
 		ctrl_dbg(ctrl, "Command not completed in 1000 msec\n");
 }
 
+/* Some vendors only set CC for real hotplug events */
+#define  PCI_EXP_SLTCTL_REAL_WAIT_MASK (PCI_EXP_SLTCTL_EIC | \
+					PCI_EXP_SLTCTL_PCC | \
+					PCI_EXP_SLTCTL_PIC | \
+					PCI_EXP_SLTCTL_AIC)
 /**
  * pcie_write_cmd - Issue controller command
  * @ctrl: controller to which the command is issued
@@ -152,8 +157,9 @@  static void pcie_wait_cmd(struct control
 static void pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask)
 {
 	struct pci_dev *pdev = ctrl_dev(ctrl);
+	u16 slot_ctrl, old_slot_ctrl;
 	u16 slot_status;
-	u16 slot_ctrl;
+	int wait = 1;
 
 	mutex_lock(&ctrl->ctrl_lock);
 
@@ -181,9 +187,8 @@  static void pcie_write_cmd(struct contro
 		}
 	}
 
-	pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &slot_ctrl);
-	slot_ctrl &= ~mask;
-	slot_ctrl |= (cmd & mask);
+	pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &old_slot_ctrl);
+	slot_ctrl = (old_slot_ctrl & ~mask) | (cmd & mask);
 	ctrl->cmd_busy = 1;
 	smp_mb();
 	pcie_capability_write_word(pdev, PCI_EXP_SLTCTL, slot_ctrl);
@@ -191,7 +196,13 @@  static void pcie_write_cmd(struct contro
 	/*
 	 * Wait for command completion.
 	 */
-	if (!ctrl->no_cmd_complete) {
+	if (ctrl->no_cmd_complete)
+		wait = 0;
+	else if (ctrl->real_cmd_complete &&
+		 ((PCI_EXP_SLTCTL_REAL_WAIT_MASK & old_slot_ctrl) ==
+		  (PCI_EXP_SLTCTL_REAL_WAIT_MASK & slot_ctrl)))
+		wait = 0;
+	if (wait) {
 		int poll = 0;
 		/*
 		 * if hotplug interrupt is not enabled or command
@@ -783,6 +794,10 @@  struct controller *pcie_init(struct pcie
 	    !(POWER_CTRL(ctrl) | ATTN_LED(ctrl) | PWR_LED(ctrl) | EMI(ctrl)))
 	    ctrl->no_cmd_complete = 1;
 
+	/* Intel current only support "real" hotplug event with CC */
+	if (pdev->vendor == PCI_VENDOR_ID_INTEL)
+		ctrl->real_cmd_complete = 1;
+
         /* Check if Data Link Layer Link Active Reporting is implemented */
         pcie_capability_read_dword(pdev, PCI_EXP_LNKCAP, &link_cap);
         if (link_cap & PCI_EXP_LNKCAP_DLLLARC) {
Index: linux-2.6/drivers/pci/hotplug/pciehp.h
===================================================================
--- linux-2.6.orig/drivers/pci/hotplug/pciehp.h
+++ linux-2.6/drivers/pci/hotplug/pciehp.h
@@ -97,6 +97,7 @@  struct controller {
 	unsigned int no_cmd_complete:1;
 	unsigned int link_active_reporting:1;
 	unsigned int notification_enabled:1;
+	unsigned int real_cmd_complete:1;
 	unsigned int power_fault_detected;
 };