diff mbox

Issue with Enable LTR while pcie_aspm off

Message ID 20180413211651.GA80087@bhelgaas-glaptop.roam.corp.google.com (mailing list archive)
State New, archived
Delegated to: Bjorn Helgaas
Headers show

Commit Message

Bjorn Helgaas April 13, 2018, 9:16 p.m. UTC
On Fri, Apr 13, 2018 at 09:13:42PM +0530, Srinath Mannam wrote:
> Hi Bjorn,
> 
> Thank you very much for quick response.
> 
> I am using samsung NVMe card as EP connected to our platform. As per
> link capabilities device does not support L1.s.
> But LTR is enabled in DEVCAP2.

Interesting that the device advertises LTR support, but not L1.2
support.  As far as I can tell, that's legal per spec, but I don't
know what the LTR info would be used for.  The only use I know about
is comparison with LTR_L1.2_THRESHOLD, as in PCIe r4.0, sec 5.5.1.

Can you attach lspci -vv output for the NVMe device and all the
devices in the path leading to it?

> In my observation after setting LTR without enabling ASPM (common
> clock and link retraining) in software, device stopped responding.

From sec 5.5.1, it seems clear that LTR would have to be enabled
before L1.2 is enabled, because the decision to enter L1.2 depends on
information from LTR messages.

Since the device advertised LTR support, it seems like a hardware
defect if enabling it breaks something.  But I can imagine that if it
doesn't advertise L1.2 support, nobody expected LTR to be used.  Can
you try the patch below?  It should make it so we don't enable LTR
unless we also support L1.2.  Maybe that's enough to avoid the issue.

Bjorn

Comments

Srinath Mannam April 14, 2018, 3:34 a.m. UTC | #1
Hi Bjorn,

Thank you for the patch. I will try on monday and let you know the results.

I am sorry, in previous mail by mistake I have written L1.s support is
not there, Actually I wanted to write L0s support is not there.
L1 and L1ss support is there.

But In our platform we required to disable ASPM.

RC in our platform supports upto Gen3 only.

I dnot have setup access now. I will share lspci output and results of
your patch on monday.

Hi Rajat,

Device stop responding means, completion timeout for config read.


Regards,
Srinath.

On Sat, Apr 14, 2018 at 2:46 AM, Bjorn Helgaas <helgaas@kernel.org> wrote:
> On Fri, Apr 13, 2018 at 09:13:42PM +0530, Srinath Mannam wrote:
>> Hi Bjorn,
>>
>> Thank you very much for quick response.
>>
>> I am using samsung NVMe card as EP connected to our platform. As per
>> link capabilities device does not support L1.s.
>> But LTR is enabled in DEVCAP2.
>
> Interesting that the device advertises LTR support, but not L1.2
> support.  As far as I can tell, that's legal per spec, but I don't
> know what the LTR info would be used for.  The only use I know about
> is comparison with LTR_L1.2_THRESHOLD, as in PCIe r4.0, sec 5.5.1.
>
> Can you attach lspci -vv output for the NVMe device and all the
> devices in the path leading to it?
>
>> In my observation after setting LTR without enabling ASPM (common
>> clock and link retraining) in software, device stopped responding.
>
> From sec 5.5.1, it seems clear that LTR would have to be enabled
> before L1.2 is enabled, because the decision to enter L1.2 depends on
> information from LTR messages.
>
> Since the device advertised LTR support, it seems like a hardware
> defect if enabling it breaks something.  But I can imagine that if it
> doesn't advertise L1.2 support, nobody expected LTR to be used.  Can
> you try the patch below?  It should make it so we don't enable LTR
> unless we also support L1.2.  Maybe that's enough to avoid the issue.
>
> Bjorn
>
>
> diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
> index f76eb7704f64..9e2212889e7e 100644
> --- a/drivers/pci/pcie/aspm.c
> +++ b/drivers/pci/pcie/aspm.c
> @@ -400,6 +400,16 @@ static void pcie_get_aspm_reg(struct pci_dev *pdev,
>                 info->l1ss_cap = 0;
>                 return;
>         }
> +
> +       /*
> +        * If we don't have LTR for the entire path from the Root Complex
> +        * to this device, we can't use L1.2.  PCIe r4.0, secs 5.5.4, 6.18.
> +        */
> +       if (!pdev->ltr_path) {
> +               info->l1ss_cap &= ~PCI_L1SS_CTL1_PCIPM_L1_2;
> +               info->l1ss_cap &= ~PCI_L1SS_CTL1_ASPM_L1_2;
> +       }
> +
>         pci_read_config_dword(pdev, info->l1ss_cap_ptr + PCI_L1SS_CTL1,
>                               &info->l1ss_ctl1);
>         pci_read_config_dword(pdev, info->l1ss_cap_ptr + PCI_L1SS_CTL2,
> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> index ac91b6fd0bcd..9a224612b3f8 100644
> --- a/drivers/pci/probe.c
> +++ b/drivers/pci/probe.c
> @@ -1954,12 +1954,29 @@ static void pci_configure_relaxed_ordering(struct pci_dev *dev)
>  static void pci_configure_ltr(struct pci_dev *dev)
>  {
>  #ifdef CONFIG_PCIEASPM
> -       u32 cap;
> +       u32 cap, l1ss_cap, l1ss;
>         struct pci_dev *bridge;
>
>         if (!pci_is_pcie(dev))
>                 return;
>
> +       /*
> +        * Per PCIe r4.0, sec 5.5.4, we must program LTR_L1.2_THRESHOLD if
> +        * either L1.2 enable bit is set.  That suggests that LTR must be
> +        * enabled before L1.2.  If the device (and the entire path leading
> +        * to it) advertises L1.2 and LTR support, enable LTR.
> +        */
> +       l1ss_cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_L1SS);
> +       if (!l1ss_cap)
> +               return;
> +
> +       pci_read_config_dword(dev, l1ss_cap + PCI_L1SS_CAP, &l1ss);
> +       if (!(l1ss & PCI_L1SS_CAP_L1_PM_SS))
> +               return;
> +
> +       if (!(l1ss & (PCI_L1SS_CAP_PCIPM_L1_2 | PCI_L1SS_CAP_ASPM_L1_2)))
> +               return;
> +
>         pcie_capability_read_dword(dev, PCI_EXP_DEVCAP2, &cap);
>         if (!(cap & PCI_EXP_DEVCAP2_LTR))
>                 return;
> @@ -1967,7 +1984,7 @@ static void pci_configure_ltr(struct pci_dev *dev)
>         /*
>          * Software must not enable LTR in an Endpoint unless the Root
>          * Complex and all intermediate Switches indicate support for LTR.
> -        * PCIe r3.1, sec 6.18.
> +        * PCIe r4.0, sec 6.18.
>          */
>         if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT)
>                 dev->ltr_path = 1;
Bjorn Helgaas April 14, 2018, 4:09 p.m. UTC | #2
On Sat, Apr 14, 2018 at 09:04:05AM +0530, Srinath Mannam wrote:
> I am sorry, in previous mail by mistake I have written L1.s support is
> not there, Actually I wanted to write L0s support is not there.
> L1 and L1ss support is there.

If your endpoint (and everything in the path) advertise both LTR and
L1ss support, that patch probably won't make a difference.

It *might* make a difference if only part of the path supports both,
because my reading of the spec is that L1ss requires LTR and LTR
requires the entire path to support LTR, and we currently don't
enforce that "entire path" part before enabling L1ss.

> But In our platform we required to disable ASPM.

We're trying to figure out exactly *why* you must disable ASPM.  If
it's because of a hardware defect, e.g., the device advertises ASPM
support but it's actually broken, we probably need to add a quirk.
Given the complexity of ASPM, it's surprising we don't have similar
quirks already.

> RC in our platform supports upto Gen3 only.

I don't think there's a connection between ASPM or LTR and the link
speed (Gen2, Gen3, etc), is there?

Bjorn
Srinath Mannam April 16, 2018, 3:33 p.m. UTC | #3
Hi Bjorn,

Please find my comments inline.

On Sat, Apr 14, 2018 at 9:39 PM, Bjorn Helgaas <helgaas@kernel.org> wrote:
> On Sat, Apr 14, 2018 at 09:04:05AM +0530, Srinath Mannam wrote:
>> I am sorry, in previous mail by mistake I have written L1.s support is
>> not there, Actually I wanted to write L0s support is not there.
>> L1 and L1ss support is there.
>
> If your endpoint (and everything in the path) advertise both LTR and
> L1ss support, that patch probably won't make a difference.
>
> It *might* make a difference if only part of the path supports both,
> because my reading of the spec is that L1ss requires LTR and LTR
> requires the entire path to support LTR, and we currently don't
> enforce that "entire path" part before enabling L1ss.
>
Yes, this patch did not work.
>> But In our platform we required to disable ASPM.
>
> We're trying to figure out exactly *why* you must disable ASPM.  If
> it's because of a hardware defect, e.g., the device advertises ASPM
> support but it's actually broken, we probably need to add a quirk.
> Given the complexity of ASPM, it's surprising we don't have similar
> quirks already.
>
We see issues with ASPM enabled. Some link issues observed so for time being
we are using with aspm disabled until we fix that issue.
with LTR enabled also we observed some problem, that after LTR messages received
from EP, we see completion timeout with config write.
So I thought If LTR configuration function also part of aspm file, as
it was under CONFIG_ASPM.
using pcie_aspm boot arg I can disable both ASPM and LTR.
If this is not possible, then I will go for alternative solution of
quirk implementation as you suggest.
>> RC in our platform supports upto Gen3 only.
>
> I don't think there's a connection between ASPM or LTR and the link
> speed (Gen2, Gen3, etc), is there?
>
Here is the lspci output of RC and EP. EP is directly connected to our
RC, nothing in between.
RC:
0000:00:00.0 PCI bridge: Broadcom Limited Device d714 (prog-if 00
[Normal decode])
        Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop-
ParErr- Stepping- SERR- FastB2B- DisINTx-
        Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort-
<TAbort- <MAbort- >SERR- <PERR- INTx-
        Latency: 0
        Interrupt: pin A routed to IRQ 116
        Bus: primary=00, secondary=01, subordinate=01, sec-latency=0
        I/O behind bridge: 00000000-00000fff [size=4K]
        Memory behind bridge: 80000000-800fffff [size=1M]
        Prefetchable memory behind bridge:
00000000fff00000-00000000000fffff [empty]
        Secondary status: 66MHz- FastB2B- ParErr- DEVSEL=fast >TAbort-
<TAbort- <MAbort- <SERR- <PERR-
        BridgeCtl: Parity- SERR- NoISA- VGA- MAbort- >Reset- FastB2B-
                PriDiscTmr- SecDiscTmr- DiscTmrStat- DiscTmrSERREn-
        Capabilities: [48] Power Management version 3
                Flags: PMEClk- DSI- D1- D2- AuxCurrent=0mA
PME(D0+,D1-,D2-,D3hot+,D3cold+)
                Status: D0 NoSoftRst+ PME-Enable- DSel=0 DScale=1 PME-
        Capabilities: [ac] Express (v2) Root Port (Slot-), MSI 00
                DevCap: MaxPayload 512 bytes, PhantFunc 0
                        ExtTag+ RBE+
                DevCtl: Report errors: Correctable+ Non-Fatal+ Fatal+
Unsupported+
                        RlxdOrd+ ExtTag+ PhantFunc- AuxPwr+ NoSnoop+
                        MaxPayload 512 bytes, MaxReadReq 512 bytes
                DevSta: CorrErr- UncorrErr- FatalErr- UnsuppReq-
AuxPwr+ TransPend-
                LnkCap: Port #0, Speed 8GT/s, Width x4, ASPM L0s L1,
Exit Latency L0s <1us, L1 <2us
                        ClockPM+ Surprise- LLActRep- BwNot+ ASPMOptComp+
                LnkCtl: ASPM Disabled; RCB 64 bytes Disabled- CommClk+
                        ExtSynch- ClockPM- AutWidDis- BWInt- AutBWInt-
                LnkSta: Speed 8GT/s, Width x4, TrErr- Train- SlotClk+
DLActive- BWMgmt+ ABWMgmt-
                RootCtl: ErrCorrectable- ErrNon-Fatal- ErrFatal-
PMEIntEna+ CRSVisible+
                RootCap: CRSVisible-
                RootSta: PME ReqID 0000, PMEStatus- PMEPending-
                DevCap2: Completion Timeout: Range ABCD, TimeoutDis+,
LTR+, OBFF Via WAKE# ARIFwd+
                         AtomicOpsCap: Routing- 32bit- 64bit- 128bitCAS-
                DevCtl2: Completion Timeout: 50us to 50ms,
TimeoutDis-, LTR+, OBFF Disabled ARIFwd-
                         AtomicOpsCtl: ReqEn- EgressBlck-
                LnkCtl2: Target Link Speed: 8GT/s, EnterCompliance- SpeedDis-
                         Transmit Margin: Normal Operating Range,
EnterModifiedCompliance- ComplianceSOS-
                         Compliance De-emphasis: -6dB
                LnkSta2: Current De-emphasis Level: -3.5dB,
EqualizationComplete+, EqualizationPhase1+
                         EqualizationPhase2+, EqualizationPhase3+,
LinkEqualizationRequest-
        Capabilities: [100 v1] Advanced Error Reporting
                UESta:  DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt-
UnxCmplt- RxOF- MalfTLP- ECRC- UnsupReq- ACSViol-
                UEMsk:  DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt-
UnxCmplt- RxOF- MalfTLP- ECRC- UnsupReq- ACSViol-
                UESvrt: DLP+ SDES+ TLP- FCP+ CmpltTO- CmpltAbrt-
UnxCmplt- RxOF+ MalfTLP+ ECRC- UnsupReq- ACSViol-
                CESta:  RxErr- BadTLP- BadDLLP- Rollover- Timeout- NonFatalErr-
                CEMsk:  RxErr- BadTLP- BadDLLP- Rollover- Timeout- NonFatalErr+
                AERCap: First Error Pointer: 00, ECRCGenCap+
ECRCGenEn- ECRCChkCap+ ECRCChkEn-
                        MultHdrRecCap- MultHdrRecEn- TLPPfxPres- HdrLogCap-
                HeaderLog: 00000000 00000000 00000000 00000000
                RootCmd: CERptEn+ NFERptEn+ FERptEn+
                RootSta: CERcvd- MultCERcvd- UERcvd- MultUERcvd-
                         FirstFatal- NonFatalMsg- FatalMsg- IntMsg 0
                ErrorSrc: ERR_COR: 0000 ERR_FATAL/NONFATAL: 0000
        Capabilities: [180 v1] Vendor Specific Information: ID=0000
Rev=0 Len=028 <?>
        Capabilities: [240 v1] L1 PM Substates
                L1SubCap: PCI-PM_L1.2+ PCI-PM_L1.1+ ASPM_L1.2+
ASPM_L1.1+ L1_PM_Substates+
                          PortCommonModeRestoreTime=8us PortTPowerOnTime=10us
                L1SubCtl1: PCI-PM_L1.2- PCI-PM_L1.1- ASPM_L1.2- ASPM_L1.1-
                           T_CommonMode=1us LTR1.2_Threshold=0ns
                L1SubCtl2: T_PwrOn=10us
        Capabilities: [300 v1] #19
        Kernel driver in use: pcieport

EP:
0000:01:00.0 Non-Volatile memory controller: Samsung Electronics Co
Ltd NVMe SSD Controller SM961/PM961 (prog-if 02 [NVM Express])
        Subsystem: Samsung Electronics Co Ltd NVMe SSD Controller SM961/PM961
        Control: I/O- Mem- BusMaster- SpecCycle- MemWINV- VGASnoop-
ParErr- Stepping- SERR- FastB2B- DisINTx-
        Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort-
<TAbort- <MAbort- >SERR- <PERR- INTx-
        Interrupt: pin A routed to IRQ 255
        Region 0: Memory at 400000000 (64-bit, non-prefetchable)
[disabled] [size=16K]
        Capabilities: [40] Power Management version 3
                Flags: PMEClk- DSI- D1- D2- AuxCurrent=0mA
PME(D0-,D1-,D2-,D3hot-,D3cold-)
                Status: D0 NoSoftRst+ PME-Enable- DSel=0 DScale=0 PME-
        Capabilities: [50] MSI: Enable- Count=1/32 Maskable- 64bit+
                Address: 0000000000000000  Data: 0000
        Capabilities: [70] Express (v2) Endpoint, MSI 00
                DevCap: MaxPayload 256 bytes, PhantFunc 0, Latency L0s
unlimited, L1 unlimited
                        ExtTag- AttnBtn- AttnInd- PwrInd- RBE+
FLReset+ SlotPowerLimit 0.000W
                DevCtl: Report errors: Correctable- Non-Fatal- Fatal-
Unsupported-
                        RlxdOrd+ ExtTag- PhantFunc- AuxPwr- NoSnoop+ FLReset-
                        MaxPayload 256 bytes, MaxReadReq 256 bytes
                DevSta: CorrErr- UncorrErr- FatalErr- UnsuppReq-
AuxPwr+ TransPend-
                LnkCap: Port #0, Speed 8GT/s, Width x4, ASPM L1, Exit
Latency L1 <64us
                        ClockPM+ Surprise- LLActRep- BwNot- ASPMOptComp+
                LnkCtl: ASPM Disabled; RCB 64 bytes Disabled- CommClk+
                        ExtSynch- ClockPM- AutWidDis- BWInt- AutBWInt-
                LnkSta: Speed 8GT/s, Width x4, TrErr- Train- SlotClk+
DLActive- BWMgmt- ABWMgmt-
                DevCap2: Completion Timeout: Range ABCD, TimeoutDis+,
LTR+, OBFF Not Supported
                         AtomicOpsCap: 32bit- 64bit- 128bitCAS-
                DevCtl2: Completion Timeout: 50us to 50ms,
TimeoutDis-, LTR-, OBFF Disabled
                         AtomicOpsCtl: ReqEn-
                LnkCtl2: Target Link Speed: 8GT/s, EnterCompliance- SpeedDis-
                         Transmit Margin: Normal Operating Range,
EnterModifiedCompliance- ComplianceSOS-
                         Compliance De-emphasis: -6dB
                LnkSta2: Current De-emphasis Level: -6dB,
EqualizationComplete+, EqualizationPhase1+
                         EqualizationPhase2+, EqualizationPhase3+,
LinkEqualizationRequest-
        Capabilities: [b0] MSI-X: Enable- Count=8 Masked-
                Vector table: BAR=0 offset=00003000
                PBA: BAR=0 offset=00002000
        Capabilities: [100 v2] Advanced Error Reporting
                UESta:  DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt-
UnxCmplt- RxOF- MalfTLP- ECRC- UnsupReq- ACSViol-
                UEMsk:  DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt-
UnxCmplt- RxOF- MalfTLP- ECRC- UnsupReq- ACSViol-
                UESvrt: DLP+ SDES+ TLP- FCP+ CmpltTO- CmpltAbrt-
UnxCmplt- RxOF+ MalfTLP+ ECRC- UnsupReq- ACSViol-
                CESta:  RxErr- BadTLP- BadDLLP- Rollover- Timeout- NonFatalErr-
                CEMsk:  RxErr- BadTLP- BadDLLP- Rollover- Timeout- NonFatalErr+
                AERCap: First Error Pointer: 00, ECRCGenCap+
ECRCGenEn- ECRCChkCap+ ECRCChkEn-
                        MultHdrRecCap- MultHdrRecEn- TLPPfxPres- HdrLogCap-
                HeaderLog: 00000000 00000000 00000000 00000000
        Capabilities: [148 v1] Device Serial Number 00-00-00-00-00-00-00-00
        Capabilities: [158 v1] Power Budgeting <?>
        Capabilities: [168 v1] #19
        Capabilities: [188 v1] Latency Tolerance Reporting
                Max snoop latency: 0ns
                Max no snoop latency: 0ns
        Capabilities: [190 v1] L1 PM Substates
                L1SubCap: PCI-PM_L1.2+ PCI-PM_L1.1+ ASPM_L1.2+
ASPM_L1.1+ L1_PM_Substates+
                          PortCommonModeRestoreTime=10us PortTPowerOnTime=10us
                L1SubCtl1: PCI-PM_L1.2- PCI-PM_L1.1- ASPM_L1.2- ASPM_L1.1-
                           T_CommonMode=0us LTR1.2_Threshold=0ns
                L1SubCtl2: T_PwrOn=10us



> Bjorn
Bjorn Helgaas April 16, 2018, 9:35 p.m. UTC | #4
[+cc Keith, linux-nvme, LKML; another possible ASPM issue with Samsung
NVMe SSD 960 EVO]

On Mon, Apr 16, 2018 at 09:03:33PM +0530, Srinath Mannam wrote:
> On Sat, Apr 14, 2018 at 9:39 PM, Bjorn Helgaas <helgaas@kernel.org> wrote:
> > On Sat, Apr 14, 2018 at 09:04:05AM +0530, Srinath Mannam wrote:
> >> I am sorry, in previous mail by mistake I have written L1.s support is
> >> not there, Actually I wanted to write L0s support is not there.
> >> L1 and L1ss support is there.
> >
> > If your endpoint (and everything in the path) advertise both LTR and
> > L1ss support, that patch probably won't make a difference.
> >
> > It *might* make a difference if only part of the path supports both,
> > because my reading of the spec is that L1ss requires LTR and LTR
> > requires the entire path to support LTR, and we currently don't
> > enforce that "entire path" part before enabling L1ss.
> >
> Yes, this patch did not work.

OK, thanks for checking.  Since there's only one link in the path and
both ends advertise L1SS and LTR support, I wouldn't expect it to make
a difference.

> >> But In our platform we required to disable ASPM.
>
> > We're trying to figure out exactly *why* you must disable ASPM.  If
> > it's because of a hardware defect, e.g., the device advertises ASPM
> > support but it's actually broken, we probably need to add a quirk.
> > Given the complexity of ASPM, it's surprising we don't have similar
> > quirks already.
>
> We see issues with ASPM enabled. Some link issues observed so for
> time being we are using with aspm disabled until we fix that issue.

I see other reports of ASPM issues with that Samsung 960 PRO NVMe SSD.
Maybe they're related?

  https://lkml.kernel.org/r/20171214184701.GA6322@libmpq.org
  https://forums.lenovo.com/t5/ThinkCentre-A-E-M-S-Series/M900-Tiny-UEFI-Bug-M-2-NVMe-SSD-amp-8260-WiFi-ASPM-disabled-Much/td-p/3570469

You might try setting NVME_QUIRK_NO_APST to see if that's related.
There are some quirks that sound similar:

  8427bbc22486 ("nvme-pci: disable APST on Samsung SSD 960 EVO + ASUS PRIME B350M-A")
  467c77d4cbef ("nvme-pci: disable APST for Samsung NVMe SSD 960 EVO + ASUS PRIME Z370-A")

Keith, et al, here's the relevant part of Srinath's lspci.  Both ends
of the link claim to support ASPM including L1SS and LTR, but Srinath
has to disable ASPM to get the SSD to work reliably.  Just FYI.

> 0000:00:00.0 PCI bridge: Broadcom Limited Device d714 (prog-if 00 [Normal decode])
>        Bus: primary=00, secondary=01, subordinate=01, sec-latency=0
>        Capabilities: [ac] Express (v2) Root Port (Slot-), MSI 00
>                LnkCap: Port #0, Speed 8GT/s, Width x4, ASPM L0s L1, Exit Latency L0s <1us, L1 <2us
>                        ClockPM+ Surprise- LLActRep- BwNot+ ASPMOptComp+
>                LnkCtl: ASPM Disabled; RCB 64 bytes Disabled- CommClk+
>                        ExtSynch- ClockPM- AutWidDis- BWInt- AutBWInt-
>                DevCap2: Completion Timeout: Range ABCD, TimeoutDis+, LTR+, OBFF Via WAKE# ARIFwd+
>                         AtomicOpsCap: Routing- 32bit- 64bit- 128bitCAS-
>                DevCtl2: Completion Timeout: 50us to 50ms, TimeoutDis-, LTR+, OBFF Disabled ARIFwd-
>                         AtomicOpsCtl: ReqEn- EgressBlck-
>        Capabilities: [240 v1] L1 PM Substates
>                L1SubCap: PCI-PM_L1.2+ PCI-PM_L1.1+ ASPM_L1.2+ ASPM_L1.1+ L1_PM_Substates+
>                          PortCommonModeRestoreTime=8us PortTPowerOnTime=10us
>                L1SubCtl1: PCI-PM_L1.2- PCI-PM_L1.1- ASPM_L1.2- ASPM_L1.1-
>                           T_CommonMode=1us LTR1.2_Threshold=0ns

> 0000:01:00.0 Non-Volatile memory controller: Samsung Electronics Co Ltd NVMe SSD Controller SM961/PM961 (prog-if 02 [NVM Express])
>        Capabilities: [70] Express (v2) Endpoint, MSI 00
>                LnkCap: Port #0, Speed 8GT/s, Width x4, ASPM L1, Exit Latency L1 <64us
>                        ClockPM+ Surprise- LLActRep- BwNot- ASPMOptComp+
>                LnkCtl: ASPM Disabled; RCB 64 bytes Disabled- CommClk+
>                        ExtSynch- ClockPM- AutWidDis- BWInt- AutBWInt-
>                DevCap2: Completion Timeout: Range ABCD, TimeoutDis+, LTR+, OBFF Not Supported
>                         AtomicOpsCap: 32bit- 64bit- 128bitCAS-
>                DevCtl2: Completion Timeout: 50us to 50ms, TimeoutDis-, LTR-, OBFF Disabled
>                         AtomicOpsCtl: ReqEn- 
>        Capabilities: [190 v1] L1 PM Substates
>                L1SubCap: PCI-PM_L1.2+ PCI-PM_L1.1+ ASPM_L1.2+ ASPM_L1.1+ L1_PM_Substates+
>                          PortCommonModeRestoreTime=10us PortTPowerOnTime=10us
>                L1SubCtl1: PCI-PM_L1.2- PCI-PM_L1.1- ASPM_L1.2- ASPM_L1.1-
>                           T_CommonMode=0us LTR1.2_Threshold=0ns


> with LTR enabled also we observed some problem, that after LTR
> messages received from EP, we see completion timeout with config
> write.

> So I thought If LTR configuration function also part of aspm file,
> as it was under CONFIG_ASPM.  using pcie_aspm boot arg I can disable
> both ASPM and LTR.

> If this is not possible, then I will go for alternative solution of
> quirk implementation as you suggest.

Is this platform a lab prototype or is it already shipping?  If it's
already shipping, you probably need some sort of upstream solution
like an NVMe or PCIe quirk, but if not, maybe you can just hack your
bringup kernel to disable ASPM and LTR until you fix the root cause.  

Bjorn
Srinath Mannam April 17, 2018, 9:03 a.m. UTC | #5
Hi Bjorn,

Thank you for more insight you have given about the problem.

For us the issue comes before we disable apst feature.
on APST quirk set, NVMe driver disable apst by send a command to NVMe
controller.
We see issue at the time of NVMe initialization only.

So APST quirk did not helped.



On Tue, Apr 17, 2018 at 3:05 AM, Bjorn Helgaas <helgaas@kernel.org> wrote:
> [+cc Keith, linux-nvme, LKML; another possible ASPM issue with Samsung
> NVMe SSD 960 EVO]
>
> On Mon, Apr 16, 2018 at 09:03:33PM +0530, Srinath Mannam wrote:
>> On Sat, Apr 14, 2018 at 9:39 PM, Bjorn Helgaas <helgaas@kernel.org> wrote:
>> > On Sat, Apr 14, 2018 at 09:04:05AM +0530, Srinath Mannam wrote:
>> >> I am sorry, in previous mail by mistake I have written L1.s support is
>> >> not there, Actually I wanted to write L0s support is not there.
>> >> L1 and L1ss support is there.
>> >
>> > If your endpoint (and everything in the path) advertise both LTR and
>> > L1ss support, that patch probably won't make a difference.
>> >
>> > It *might* make a difference if only part of the path supports both,
>> > because my reading of the spec is that L1ss requires LTR and LTR
>> > requires the entire path to support LTR, and we currently don't
>> > enforce that "entire path" part before enabling L1ss.
>> >
>> Yes, this patch did not work.
>
> OK, thanks for checking.  Since there's only one link in the path and
> both ends advertise L1SS and LTR support, I wouldn't expect it to make
> a difference.
>
>> >> But In our platform we required to disable ASPM.
>>
>> > We're trying to figure out exactly *why* you must disable ASPM.  If
>> > it's because of a hardware defect, e.g., the device advertises ASPM
>> > support but it's actually broken, we probably need to add a quirk.
>> > Given the complexity of ASPM, it's surprising we don't have similar
>> > quirks already.
>>
>> We see issues with ASPM enabled. Some link issues observed so for
>> time being we are using with aspm disabled until we fix that issue.
>
> I see other reports of ASPM issues with that Samsung 960 PRO NVMe SSD.
> Maybe they're related?
>
>   https://lkml.kernel.org/r/20171214184701.GA6322@libmpq.org
>   https://forums.lenovo.com/t5/ThinkCentre-A-E-M-S-Series/M900-Tiny-UEFI-Bug-M-2-NVMe-SSD-amp-8260-WiFi-ASPM-disabled-Much/td-p/3570469
>
> You might try setting NVME_QUIRK_NO_APST to see if that's related.
> There are some quirks that sound similar:
>
>   8427bbc22486 ("nvme-pci: disable APST on Samsung SSD 960 EVO + ASUS PRIME B350M-A")
>   467c77d4cbef ("nvme-pci: disable APST for Samsung NVMe SSD 960 EVO + ASUS PRIME Z370-A")
>
> Keith, et al, here's the relevant part of Srinath's lspci.  Both ends
> of the link claim to support ASPM including L1SS and LTR, but Srinath
> has to disable ASPM to get the SSD to work reliably.  Just FYI.
>
>> 0000:00:00.0 PCI bridge: Broadcom Limited Device d714 (prog-if 00 [Normal decode])
>>        Bus: primary=00, secondary=01, subordinate=01, sec-latency=0
>>        Capabilities: [ac] Express (v2) Root Port (Slot-), MSI 00
>>                LnkCap: Port #0, Speed 8GT/s, Width x4, ASPM L0s L1, Exit Latency L0s <1us, L1 <2us
>>                        ClockPM+ Surprise- LLActRep- BwNot+ ASPMOptComp+
>>                LnkCtl: ASPM Disabled; RCB 64 bytes Disabled- CommClk+
>>                        ExtSynch- ClockPM- AutWidDis- BWInt- AutBWInt-
>>                DevCap2: Completion Timeout: Range ABCD, TimeoutDis+, LTR+, OBFF Via WAKE# ARIFwd+
>>                         AtomicOpsCap: Routing- 32bit- 64bit- 128bitCAS-
>>                DevCtl2: Completion Timeout: 50us to 50ms, TimeoutDis-, LTR+, OBFF Disabled ARIFwd-
>>                         AtomicOpsCtl: ReqEn- EgressBlck-
>>        Capabilities: [240 v1] L1 PM Substates
>>                L1SubCap: PCI-PM_L1.2+ PCI-PM_L1.1+ ASPM_L1.2+ ASPM_L1.1+ L1_PM_Substates+
>>                          PortCommonModeRestoreTime=8us PortTPowerOnTime=10us
>>                L1SubCtl1: PCI-PM_L1.2- PCI-PM_L1.1- ASPM_L1.2- ASPM_L1.1-
>>                           T_CommonMode=1us LTR1.2_Threshold=0ns
>
>> 0000:01:00.0 Non-Volatile memory controller: Samsung Electronics Co Ltd NVMe SSD Controller SM961/PM961 (prog-if 02 [NVM Express])
>>        Capabilities: [70] Express (v2) Endpoint, MSI 00
>>                LnkCap: Port #0, Speed 8GT/s, Width x4, ASPM L1, Exit Latency L1 <64us
>>                        ClockPM+ Surprise- LLActRep- BwNot- ASPMOptComp+
>>                LnkCtl: ASPM Disabled; RCB 64 bytes Disabled- CommClk+
>>                        ExtSynch- ClockPM- AutWidDis- BWInt- AutBWInt-
>>                DevCap2: Completion Timeout: Range ABCD, TimeoutDis+, LTR+, OBFF Not Supported
>>                         AtomicOpsCap: 32bit- 64bit- 128bitCAS-
>>                DevCtl2: Completion Timeout: 50us to 50ms, TimeoutDis-, LTR-, OBFF Disabled
>>                         AtomicOpsCtl: ReqEn-
>>        Capabilities: [190 v1] L1 PM Substates
>>                L1SubCap: PCI-PM_L1.2+ PCI-PM_L1.1+ ASPM_L1.2+ ASPM_L1.1+ L1_PM_Substates+
>>                          PortCommonModeRestoreTime=10us PortTPowerOnTime=10us
>>                L1SubCtl1: PCI-PM_L1.2- PCI-PM_L1.1- ASPM_L1.2- ASPM_L1.1-
>>                           T_CommonMode=0us LTR1.2_Threshold=0ns
>
>
>> with LTR enabled also we observed some problem, that after LTR
>> messages received from EP, we see completion timeout with config
>> write.
>
>> So I thought If LTR configuration function also part of aspm file,
>> as it was under CONFIG_ASPM.  using pcie_aspm boot arg I can disable
>> both ASPM and LTR.
>
>> If this is not possible, then I will go for alternative solution of
>> quirk implementation as you suggest.
>
> Is this platform a lab prototype or is it already shipping?  If it's
> already shipping, you probably need some sort of upstream solution
> like an NVMe or PCIe quirk, but if not, maybe you can just hack your
> bringup kernel to disable ASPM and LTR until you fix the root cause.
>
we are at evolution stage so we need to fix this ASAP.
As you said earlier, Can I add sysfs interface to enable LTR same as
we do L1SS or in the part of aspm cap init function.

> Bjorn

Regards,
Srinath.
diff mbox

Patch

diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index f76eb7704f64..9e2212889e7e 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -400,6 +400,16 @@  static void pcie_get_aspm_reg(struct pci_dev *pdev,
 		info->l1ss_cap = 0;
 		return;
 	}
+
+	/*
+	 * If we don't have LTR for the entire path from the Root Complex
+	 * to this device, we can't use L1.2.  PCIe r4.0, secs 5.5.4, 6.18.
+	 */
+	if (!pdev->ltr_path) {
+		info->l1ss_cap &= ~PCI_L1SS_CTL1_PCIPM_L1_2;
+		info->l1ss_cap &= ~PCI_L1SS_CTL1_ASPM_L1_2;
+	}
+
 	pci_read_config_dword(pdev, info->l1ss_cap_ptr + PCI_L1SS_CTL1,
 			      &info->l1ss_ctl1);
 	pci_read_config_dword(pdev, info->l1ss_cap_ptr + PCI_L1SS_CTL2,
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index ac91b6fd0bcd..9a224612b3f8 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1954,12 +1954,29 @@  static void pci_configure_relaxed_ordering(struct pci_dev *dev)
 static void pci_configure_ltr(struct pci_dev *dev)
 {
 #ifdef CONFIG_PCIEASPM
-	u32 cap;
+	u32 cap, l1ss_cap, l1ss;
 	struct pci_dev *bridge;
 
 	if (!pci_is_pcie(dev))
 		return;
 
+	/*
+	 * Per PCIe r4.0, sec 5.5.4, we must program LTR_L1.2_THRESHOLD if
+	 * either L1.2 enable bit is set.  That suggests that LTR must be
+	 * enabled before L1.2.  If the device (and the entire path leading
+	 * to it) advertises L1.2 and LTR support, enable LTR.
+	 */
+	l1ss_cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_L1SS);
+	if (!l1ss_cap)
+		return;
+
+	pci_read_config_dword(dev, l1ss_cap + PCI_L1SS_CAP, &l1ss);
+	if (!(l1ss & PCI_L1SS_CAP_L1_PM_SS))
+		return;
+
+	if (!(l1ss & (PCI_L1SS_CAP_PCIPM_L1_2 | PCI_L1SS_CAP_ASPM_L1_2)))
+		return;
+
 	pcie_capability_read_dword(dev, PCI_EXP_DEVCAP2, &cap);
 	if (!(cap & PCI_EXP_DEVCAP2_LTR))
 		return;
@@ -1967,7 +1984,7 @@  static void pci_configure_ltr(struct pci_dev *dev)
 	/*
 	 * Software must not enable LTR in an Endpoint unless the Root
 	 * Complex and all intermediate Switches indicate support for LTR.
-	 * PCIe r3.1, sec 6.18.
+	 * PCIe r4.0, sec 6.18.
 	 */
 	if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT)
 		dev->ltr_path = 1;