diff mbox series

[v6,2/5] PCI: qcom: Add retry logic for link to be stable in L1ss

Message ID 1662713084-8106-3-git-send-email-quic_krichai@quicinc.com (mailing list archive)
State Superseded
Delegated to: Lorenzo Pieralisi
Headers show
Series PCI: qcom: Add system suspend & resume support | expand

Commit Message

Krishna chaitanya chundru Sept. 9, 2022, 8:44 a.m. UTC
Some specific devices are taking time to settle the link in L1ss.
So added a retry logic before returning from the suspend op.

Signed-off-by: Krishna chaitanya chundru <quic_krichai@quicinc.com>
---
 drivers/pci/controller/dwc/pcie-qcom.c | 36 +++++++++++++++++++++++-----------
 1 file changed, 25 insertions(+), 11 deletions(-)

Comments

Bjorn Helgaas Sept. 9, 2022, 7:50 p.m. UTC | #1
On Fri, Sep 09, 2022 at 02:14:41PM +0530, Krishna chaitanya chundru wrote:
> Some specific devices are taking time to settle the link in L1ss.
> So added a retry logic before returning from the suspend op.

"L1ss" is not a state.  If you mean "L1.1" or "L1.2", say that.  Also
in code comments below.

s/So added a/Add/

What are these specific devices?  Is this a qcom controller defect?
An endpoint defect that should be addressed via some kind of generic
quirk?

> Signed-off-by: Krishna chaitanya chundru <quic_krichai@quicinc.com>
> ---
>  drivers/pci/controller/dwc/pcie-qcom.c | 36 +++++++++++++++++++++++-----------
>  1 file changed, 25 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
> index 6e04d0d..15c2067 100644
> --- a/drivers/pci/controller/dwc/pcie-qcom.c
> +++ b/drivers/pci/controller/dwc/pcie-qcom.c
> @@ -1809,26 +1809,40 @@ static int qcom_pcie_probe(struct platform_device *pdev)
>  static int __maybe_unused qcom_pcie_pm_suspend(struct qcom_pcie *pcie)
>  {
>  	u32 val;
> +	ktime_t timeout, start;
>  	struct dw_pcie *pci = pcie->pci;
>  	struct device *dev = pci->dev;
>  
>  	if (!pcie->cfg->supports_system_suspend)
>  		return 0;
>  
> -	/* if the link is not active turn off clocks */
> -	if (!dw_pcie_link_up(pci)) {
> -		dev_info(dev, "Link is not active\n");
> -		goto suspend;
> -	}
> +	start = ktime_get();
> +	/* Wait max 200 ms */
> +	timeout = ktime_add_ms(start, 200);
>  
> -	/* if the link is not in l1ss don't turn off clocks */
> -	val = readl(pcie->parf + PCIE20_PARF_PM_STTS);
> -	if (!(val & PCIE20_PARF_PM_STTS_LINKST_IN_L1SUB)) {
> -		dev_warn(dev, "Link is not in L1ss\n");
> -		return 0;
> +	while (1) {
> +
> +		if (!dw_pcie_link_up(pci)) {
> +			dev_warn(dev, "Link is not active\n");
> +			break;
> +		}
> +
> +		/* if the link is not in l1ss don't turn off clocks */
> +		val = readl(pcie->parf + PCIE20_PARF_PM_STTS);
> +		if ((val & PCIE20_PARF_PM_STTS_LINKST_IN_L1SUB)) {
> +			dev_dbg(dev, "Link enters L1ss after %d  ms\n",
> +					ktime_to_ms(ktime_get() - start));
> +			break;
> +		}
> +
> +		if (ktime_after(ktime_get(), timeout)) {
> +			dev_warn(dev, "Link is not in L1ss\n");
> +			return 0;
> +		}
> +
> +		udelay(1000);
>  	}
>  
> -suspend:
>  	if (pcie->cfg->ops->suspend)
>  		pcie->cfg->ops->suspend(pcie);
>  
> -- 
> 2.7.4
>
Krishna chaitanya chundru Sept. 12, 2022, 4:09 p.m. UTC | #2
On 9/10/2022 1:20 AM, Bjorn Helgaas wrote:
> On Fri, Sep 09, 2022 at 02:14:41PM +0530, Krishna chaitanya chundru wrote:
>> Some specific devices are taking time to settle the link in L1ss.
>> So added a retry logic before returning from the suspend op.
> "L1ss" is not a state.  If you mean "L1.1" or "L1.2", say that.  Also
> in code comments below.
Yes L1ss means L1.2 and L1.2 We will update it next patch
> s/So added a/Add/
>
> What are these specific devices?  Is this a qcom controller defect?
> An endpoint defect that should be addressed via some kind of generic
> quirk?

This is depending up on the endpoint devices and it varies to device to 
device.

We are thinking this is not a defect if there is some traffic in the 
link the link will

not go to L1ss .

>
>> Signed-off-by: Krishna chaitanya chundru <quic_krichai@quicinc.com>
>> ---
>>   drivers/pci/controller/dwc/pcie-qcom.c | 36 +++++++++++++++++++++++-----------
>>   1 file changed, 25 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
>> index 6e04d0d..15c2067 100644
>> --- a/drivers/pci/controller/dwc/pcie-qcom.c
>> +++ b/drivers/pci/controller/dwc/pcie-qcom.c
>> @@ -1809,26 +1809,40 @@ static int qcom_pcie_probe(struct platform_device *pdev)
>>   static int __maybe_unused qcom_pcie_pm_suspend(struct qcom_pcie *pcie)
>>   {
>>   	u32 val;
>> +	ktime_t timeout, start;
>>   	struct dw_pcie *pci = pcie->pci;
>>   	struct device *dev = pci->dev;
>>   
>>   	if (!pcie->cfg->supports_system_suspend)
>>   		return 0;
>>   
>> -	/* if the link is not active turn off clocks */
>> -	if (!dw_pcie_link_up(pci)) {
>> -		dev_info(dev, "Link is not active\n");
>> -		goto suspend;
>> -	}
>> +	start = ktime_get();
>> +	/* Wait max 200 ms */
>> +	timeout = ktime_add_ms(start, 200);
>>   
>> -	/* if the link is not in l1ss don't turn off clocks */
>> -	val = readl(pcie->parf + PCIE20_PARF_PM_STTS);
>> -	if (!(val & PCIE20_PARF_PM_STTS_LINKST_IN_L1SUB)) {
>> -		dev_warn(dev, "Link is not in L1ss\n");
>> -		return 0;
>> +	while (1) {
>> +
>> +		if (!dw_pcie_link_up(pci)) {
>> +			dev_warn(dev, "Link is not active\n");
>> +			break;
>> +		}
>> +
>> +		/* if the link is not in l1ss don't turn off clocks */
>> +		val = readl(pcie->parf + PCIE20_PARF_PM_STTS);
>> +		if ((val & PCIE20_PARF_PM_STTS_LINKST_IN_L1SUB)) {
>> +			dev_dbg(dev, "Link enters L1ss after %d  ms\n",
>> +					ktime_to_ms(ktime_get() - start));
>> +			break;
>> +		}
>> +
>> +		if (ktime_after(ktime_get(), timeout)) {
>> +			dev_warn(dev, "Link is not in L1ss\n");
>> +			return 0;
>> +		}
>> +
>> +		udelay(1000);
>>   	}
>>   
>> -suspend:
>>   	if (pcie->cfg->ops->suspend)
>>   		pcie->cfg->ops->suspend(pcie);
>>   
>> -- 
>> 2.7.4
>>
Manivannan Sadhasivam Sept. 12, 2022, 5:33 p.m. UTC | #3
On Mon, Sep 12, 2022 at 09:39:36PM +0530, Krishna Chaitanya Chundru wrote:
> 
> On 9/10/2022 1:20 AM, Bjorn Helgaas wrote:
> > On Fri, Sep 09, 2022 at 02:14:41PM +0530, Krishna chaitanya chundru wrote:
> > > Some specific devices are taking time to settle the link in L1ss.
> > > So added a retry logic before returning from the suspend op.
> > "L1ss" is not a state.  If you mean "L1.1" or "L1.2", say that.  Also
> > in code comments below.
> Yes L1ss means L1.2 and L1.2 We will update it next patch
> > s/So added a/Add/
> > 
> > What are these specific devices?  Is this a qcom controller defect?
> > An endpoint defect that should be addressed via some kind of generic
> > quirk?
> 
> This is depending up on the endpoint devices and it varies to device to
> device.
> 

Can we identify the source of the traffic? Is the NVMe driver not
flushing it's queues correctly?

> We are thinking this is not a defect if there is some traffic in the link
> the link will
> 
> not go to L1ss .
> 

Is this hack still required even after switching to syscore ops?

Thanks,
Mani

> > 
> > > Signed-off-by: Krishna chaitanya chundru <quic_krichai@quicinc.com>
> > > ---
> > >   drivers/pci/controller/dwc/pcie-qcom.c | 36 +++++++++++++++++++++++-----------
> > >   1 file changed, 25 insertions(+), 11 deletions(-)
> > > 
> > > diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
> > > index 6e04d0d..15c2067 100644
> > > --- a/drivers/pci/controller/dwc/pcie-qcom.c
> > > +++ b/drivers/pci/controller/dwc/pcie-qcom.c
> > > @@ -1809,26 +1809,40 @@ static int qcom_pcie_probe(struct platform_device *pdev)
> > >   static int __maybe_unused qcom_pcie_pm_suspend(struct qcom_pcie *pcie)
> > >   {
> > >   	u32 val;
> > > +	ktime_t timeout, start;
> > >   	struct dw_pcie *pci = pcie->pci;
> > >   	struct device *dev = pci->dev;
> > >   	if (!pcie->cfg->supports_system_suspend)
> > >   		return 0;
> > > -	/* if the link is not active turn off clocks */
> > > -	if (!dw_pcie_link_up(pci)) {
> > > -		dev_info(dev, "Link is not active\n");
> > > -		goto suspend;
> > > -	}
> > > +	start = ktime_get();
> > > +	/* Wait max 200 ms */
> > > +	timeout = ktime_add_ms(start, 200);
> > > -	/* if the link is not in l1ss don't turn off clocks */
> > > -	val = readl(pcie->parf + PCIE20_PARF_PM_STTS);
> > > -	if (!(val & PCIE20_PARF_PM_STTS_LINKST_IN_L1SUB)) {
> > > -		dev_warn(dev, "Link is not in L1ss\n");
> > > -		return 0;
> > > +	while (1) {
> > > +
> > > +		if (!dw_pcie_link_up(pci)) {
> > > +			dev_warn(dev, "Link is not active\n");
> > > +			break;
> > > +		}
> > > +
> > > +		/* if the link is not in l1ss don't turn off clocks */
> > > +		val = readl(pcie->parf + PCIE20_PARF_PM_STTS);
> > > +		if ((val & PCIE20_PARF_PM_STTS_LINKST_IN_L1SUB)) {
> > > +			dev_dbg(dev, "Link enters L1ss after %d  ms\n",
> > > +					ktime_to_ms(ktime_get() - start));
> > > +			break;
> > > +		}
> > > +
> > > +		if (ktime_after(ktime_get(), timeout)) {
> > > +			dev_warn(dev, "Link is not in L1ss\n");
> > > +			return 0;
> > > +		}
> > > +
> > > +		udelay(1000);
> > >   	}
> > > -suspend:
> > >   	if (pcie->cfg->ops->suspend)
> > >   		pcie->cfg->ops->suspend(pcie);
> > > -- 
> > > 2.7.4
> > >
Krishna chaitanya chundru Sept. 13, 2022, 2:24 p.m. UTC | #4
On 9/12/2022 11:03 PM, Manivannan Sadhasivam wrote:
> On Mon, Sep 12, 2022 at 09:39:36PM +0530, Krishna Chaitanya Chundru wrote:
>> On 9/10/2022 1:20 AM, Bjorn Helgaas wrote:
>>> On Fri, Sep 09, 2022 at 02:14:41PM +0530, Krishna chaitanya chundru wrote:
>>>> Some specific devices are taking time to settle the link in L1ss.
>>>> So added a retry logic before returning from the suspend op.
>>> "L1ss" is not a state.  If you mean "L1.1" or "L1.2", say that.  Also
>>> in code comments below.
>> Yes L1ss means L1.2 and L1.2 We will update it next patch
>>> s/So added a/Add/
>>>
>>> What are these specific devices?  Is this a qcom controller defect?
>>> An endpoint defect that should be addressed via some kind of generic
>>> quirk?
>> This is depending up on the endpoint devices and it varies to device to
>> device.
>>
> Can we identify the source of the traffic? Is the NVMe driver not
> flushing it's queues correctly?

We found that it is not from nvme data, we are seeing some physical 
layer activity on the

protocol analyzer.

>
>> We are thinking this is not a defect if there is some traffic in the link
>> the link will
>>
>> not go to L1ss .
>>
> Is this hack still required even after switching to syscore ops?
>
> Thanks,
> Mani

Yes, mani it is still required. And just before this sycore ops there 
will be a pci transaction to

mask msix interrupts.

>>>> Signed-off-by: Krishna chaitanya chundru <quic_krichai@quicinc.com>
>>>> ---
>>>>    drivers/pci/controller/dwc/pcie-qcom.c | 36 +++++++++++++++++++++++-----------
>>>>    1 file changed, 25 insertions(+), 11 deletions(-)
>>>>
>>>> diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
>>>> index 6e04d0d..15c2067 100644
>>>> --- a/drivers/pci/controller/dwc/pcie-qcom.c
>>>> +++ b/drivers/pci/controller/dwc/pcie-qcom.c
>>>> @@ -1809,26 +1809,40 @@ static int qcom_pcie_probe(struct platform_device *pdev)
>>>>    static int __maybe_unused qcom_pcie_pm_suspend(struct qcom_pcie *pcie)
>>>>    {
>>>>    	u32 val;
>>>> +	ktime_t timeout, start;
>>>>    	struct dw_pcie *pci = pcie->pci;
>>>>    	struct device *dev = pci->dev;
>>>>    	if (!pcie->cfg->supports_system_suspend)
>>>>    		return 0;
>>>> -	/* if the link is not active turn off clocks */
>>>> -	if (!dw_pcie_link_up(pci)) {
>>>> -		dev_info(dev, "Link is not active\n");
>>>> -		goto suspend;
>>>> -	}
>>>> +	start = ktime_get();
>>>> +	/* Wait max 200 ms */
>>>> +	timeout = ktime_add_ms(start, 200);
>>>> -	/* if the link is not in l1ss don't turn off clocks */
>>>> -	val = readl(pcie->parf + PCIE20_PARF_PM_STTS);
>>>> -	if (!(val & PCIE20_PARF_PM_STTS_LINKST_IN_L1SUB)) {
>>>> -		dev_warn(dev, "Link is not in L1ss\n");
>>>> -		return 0;
>>>> +	while (1) {
>>>> +
>>>> +		if (!dw_pcie_link_up(pci)) {
>>>> +			dev_warn(dev, "Link is not active\n");
>>>> +			break;
>>>> +		}
>>>> +
>>>> +		/* if the link is not in l1ss don't turn off clocks */
>>>> +		val = readl(pcie->parf + PCIE20_PARF_PM_STTS);
>>>> +		if ((val & PCIE20_PARF_PM_STTS_LINKST_IN_L1SUB)) {
>>>> +			dev_dbg(dev, "Link enters L1ss after %d  ms\n",
>>>> +					ktime_to_ms(ktime_get() - start));
>>>> +			break;
>>>> +		}
>>>> +
>>>> +		if (ktime_after(ktime_get(), timeout)) {
>>>> +			dev_warn(dev, "Link is not in L1ss\n");
>>>> +			return 0;
>>>> +		}
>>>> +
>>>> +		udelay(1000);
>>>>    	}
>>>> -suspend:
>>>>    	if (pcie->cfg->ops->suspend)
>>>>    		pcie->cfg->ops->suspend(pcie);
>>>> -- 
>>>> 2.7.4
>>>>
Manivannan Sadhasivam Sept. 13, 2022, 4:39 p.m. UTC | #5
On Tue, Sep 13, 2022 at 07:54:22PM +0530, Krishna Chaitanya Chundru wrote:
> 
> On 9/12/2022 11:03 PM, Manivannan Sadhasivam wrote:
> > On Mon, Sep 12, 2022 at 09:39:36PM +0530, Krishna Chaitanya Chundru wrote:
> > > On 9/10/2022 1:20 AM, Bjorn Helgaas wrote:
> > > > On Fri, Sep 09, 2022 at 02:14:41PM +0530, Krishna chaitanya chundru wrote:
> > > > > Some specific devices are taking time to settle the link in L1ss.
> > > > > So added a retry logic before returning from the suspend op.
> > > > "L1ss" is not a state.  If you mean "L1.1" or "L1.2", say that.  Also
> > > > in code comments below.
> > > Yes L1ss means L1.2 and L1.2 We will update it next patch
> > > > s/So added a/Add/
> > > > 
> > > > What are these specific devices?  Is this a qcom controller defect?
> > > > An endpoint defect that should be addressed via some kind of generic
> > > > quirk?
> > > This is depending up on the endpoint devices and it varies to device to
> > > device.
> > > 
> > Can we identify the source of the traffic? Is the NVMe driver not
> > flushing it's queues correctly?
> 
> We found that it is not from nvme data, we are seeing some physical layer
> activity on the
> 
> protocol analyzer.
> 

Okay

> > 
> > > We are thinking this is not a defect if there is some traffic in the link
> > > the link will
> > > 
> > > not go to L1ss .
> > > 
> > Is this hack still required even after switching to syscore ops?
> > 
> > Thanks,
> > Mani
> 
> Yes, mani it is still required. And just before this sycore ops there will
> be a pci transaction to
> 
> mask msix interrupts.
> 

Hmm. I'm getting slightly confused here. What really happens when you do
the resource teardown during suspend and the link has not entered L1SS?

Since PHY is powered by MX domain, I'm wondering why we should wait for
the link to be in L1SS?

Thanks,
Mani

> > > > > Signed-off-by: Krishna chaitanya chundru <quic_krichai@quicinc.com>
> > > > > ---
> > > > >    drivers/pci/controller/dwc/pcie-qcom.c | 36 +++++++++++++++++++++++-----------
> > > > >    1 file changed, 25 insertions(+), 11 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
> > > > > index 6e04d0d..15c2067 100644
> > > > > --- a/drivers/pci/controller/dwc/pcie-qcom.c
> > > > > +++ b/drivers/pci/controller/dwc/pcie-qcom.c
> > > > > @@ -1809,26 +1809,40 @@ static int qcom_pcie_probe(struct platform_device *pdev)
> > > > >    static int __maybe_unused qcom_pcie_pm_suspend(struct qcom_pcie *pcie)
> > > > >    {
> > > > >    	u32 val;
> > > > > +	ktime_t timeout, start;
> > > > >    	struct dw_pcie *pci = pcie->pci;
> > > > >    	struct device *dev = pci->dev;
> > > > >    	if (!pcie->cfg->supports_system_suspend)
> > > > >    		return 0;
> > > > > -	/* if the link is not active turn off clocks */
> > > > > -	if (!dw_pcie_link_up(pci)) {
> > > > > -		dev_info(dev, "Link is not active\n");
> > > > > -		goto suspend;
> > > > > -	}
> > > > > +	start = ktime_get();
> > > > > +	/* Wait max 200 ms */
> > > > > +	timeout = ktime_add_ms(start, 200);
> > > > > -	/* if the link is not in l1ss don't turn off clocks */
> > > > > -	val = readl(pcie->parf + PCIE20_PARF_PM_STTS);
> > > > > -	if (!(val & PCIE20_PARF_PM_STTS_LINKST_IN_L1SUB)) {
> > > > > -		dev_warn(dev, "Link is not in L1ss\n");
> > > > > -		return 0;
> > > > > +	while (1) {
> > > > > +
> > > > > +		if (!dw_pcie_link_up(pci)) {
> > > > > +			dev_warn(dev, "Link is not active\n");
> > > > > +			break;
> > > > > +		}
> > > > > +
> > > > > +		/* if the link is not in l1ss don't turn off clocks */
> > > > > +		val = readl(pcie->parf + PCIE20_PARF_PM_STTS);
> > > > > +		if ((val & PCIE20_PARF_PM_STTS_LINKST_IN_L1SUB)) {
> > > > > +			dev_dbg(dev, "Link enters L1ss after %d  ms\n",
> > > > > +					ktime_to_ms(ktime_get() - start));
> > > > > +			break;
> > > > > +		}
> > > > > +
> > > > > +		if (ktime_after(ktime_get(), timeout)) {
> > > > > +			dev_warn(dev, "Link is not in L1ss\n");
> > > > > +			return 0;
> > > > > +		}
> > > > > +
> > > > > +		udelay(1000);
> > > > >    	}
> > > > > -suspend:
> > > > >    	if (pcie->cfg->ops->suspend)
> > > > >    		pcie->cfg->ops->suspend(pcie);
> > > > > -- 
> > > > > 2.7.4
> > > > >
Krishna chaitanya chundru Sept. 14, 2022, 1:45 a.m. UTC | #6
On 9/13/2022 10:09 PM, Manivannan Sadhasivam wrote:
> On Tue, Sep 13, 2022 at 07:54:22PM +0530, Krishna Chaitanya Chundru wrote:
>> On 9/12/2022 11:03 PM, Manivannan Sadhasivam wrote:
>>> On Mon, Sep 12, 2022 at 09:39:36PM +0530, Krishna Chaitanya Chundru wrote:
>>>> On 9/10/2022 1:20 AM, Bjorn Helgaas wrote:
>>>>> On Fri, Sep 09, 2022 at 02:14:41PM +0530, Krishna chaitanya chundru wrote:
>>>>>> Some specific devices are taking time to settle the link in L1ss.
>>>>>> So added a retry logic before returning from the suspend op.
>>>>> "L1ss" is not a state.  If you mean "L1.1" or "L1.2", say that.  Also
>>>>> in code comments below.
>>>> Yes L1ss means L1.2 and L1.2 We will update it next patch
>>>>> s/So added a/Add/
>>>>>
>>>>> What are these specific devices?  Is this a qcom controller defect?
>>>>> An endpoint defect that should be addressed via some kind of generic
>>>>> quirk?
>>>> This is depending up on the endpoint devices and it varies to device to
>>>> device.
>>>>
>>> Can we identify the source of the traffic? Is the NVMe driver not
>>> flushing it's queues correctly?
>> We found that it is not from nvme data, we are seeing some physical layer
>> activity on the
>>
>> protocol analyzer.
>>
> Okay
>
>>>> We are thinking this is not a defect if there is some traffic in the link
>>>> the link will
>>>>
>>>> not go to L1ss .
>>>>
>>> Is this hack still required even after switching to syscore ops?
>>>
>>> Thanks,
>>> Mani
>> Yes, mani it is still required. And just before this sycore ops there will
>> be a pci transaction to
>>
>> mask msix interrupts.
>>
> Hmm. I'm getting slightly confused here. What really happens when you do
> the resource teardown during suspend and the link has not entered L1SS?
>
> Since PHY is powered by MX domain, I'm wondering why we should wait for
> the link to be in L1SS?
>
> Thanks,
> Mani

Mani, we need to turn off the link only after link entered in to L1ss. 
If we do before that

some transactions will be disturbed and we see a link down.

Mx power rail will control digital logic of the PHY and tries to retain 
the link state only,

The analog logic is controlled by the CX rail only, so when the link is 
in L1ss only we turn off

clks and phy.

>>>>>> Signed-off-by: Krishna chaitanya chundru <quic_krichai@quicinc.com>
>>>>>> ---
>>>>>>     drivers/pci/controller/dwc/pcie-qcom.c | 36 +++++++++++++++++++++++-----------
>>>>>>     1 file changed, 25 insertions(+), 11 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
>>>>>> index 6e04d0d..15c2067 100644
>>>>>> --- a/drivers/pci/controller/dwc/pcie-qcom.c
>>>>>> +++ b/drivers/pci/controller/dwc/pcie-qcom.c
>>>>>> @@ -1809,26 +1809,40 @@ static int qcom_pcie_probe(struct platform_device *pdev)
>>>>>>     static int __maybe_unused qcom_pcie_pm_suspend(struct qcom_pcie *pcie)
>>>>>>     {
>>>>>>     	u32 val;
>>>>>> +	ktime_t timeout, start;
>>>>>>     	struct dw_pcie *pci = pcie->pci;
>>>>>>     	struct device *dev = pci->dev;
>>>>>>     	if (!pcie->cfg->supports_system_suspend)
>>>>>>     		return 0;
>>>>>> -	/* if the link is not active turn off clocks */
>>>>>> -	if (!dw_pcie_link_up(pci)) {
>>>>>> -		dev_info(dev, "Link is not active\n");
>>>>>> -		goto suspend;
>>>>>> -	}
>>>>>> +	start = ktime_get();
>>>>>> +	/* Wait max 200 ms */
>>>>>> +	timeout = ktime_add_ms(start, 200);
>>>>>> -	/* if the link is not in l1ss don't turn off clocks */
>>>>>> -	val = readl(pcie->parf + PCIE20_PARF_PM_STTS);
>>>>>> -	if (!(val & PCIE20_PARF_PM_STTS_LINKST_IN_L1SUB)) {
>>>>>> -		dev_warn(dev, "Link is not in L1ss\n");
>>>>>> -		return 0;
>>>>>> +	while (1) {
>>>>>> +
>>>>>> +		if (!dw_pcie_link_up(pci)) {
>>>>>> +			dev_warn(dev, "Link is not active\n");
>>>>>> +			break;
>>>>>> +		}
>>>>>> +
>>>>>> +		/* if the link is not in l1ss don't turn off clocks */
>>>>>> +		val = readl(pcie->parf + PCIE20_PARF_PM_STTS);
>>>>>> +		if ((val & PCIE20_PARF_PM_STTS_LINKST_IN_L1SUB)) {
>>>>>> +			dev_dbg(dev, "Link enters L1ss after %d  ms\n",
>>>>>> +					ktime_to_ms(ktime_get() - start));
>>>>>> +			break;
>>>>>> +		}
>>>>>> +
>>>>>> +		if (ktime_after(ktime_get(), timeout)) {
>>>>>> +			dev_warn(dev, "Link is not in L1ss\n");
>>>>>> +			return 0;
>>>>>> +		}
>>>>>> +
>>>>>> +		udelay(1000);
>>>>>>     	}
>>>>>> -suspend:
>>>>>>     	if (pcie->cfg->ops->suspend)
>>>>>>     		pcie->cfg->ops->suspend(pcie);
>>>>>> -- 
>>>>>> 2.7.4
>>>>>>
Manivannan Sadhasivam Sept. 14, 2022, 5:59 a.m. UTC | #7
On Wed, Sep 14, 2022 at 07:15:35AM +0530, Krishna Chaitanya Chundru wrote:
> 
> On 9/13/2022 10:09 PM, Manivannan Sadhasivam wrote:
> > On Tue, Sep 13, 2022 at 07:54:22PM +0530, Krishna Chaitanya Chundru wrote:
> > > On 9/12/2022 11:03 PM, Manivannan Sadhasivam wrote:
> > > > On Mon, Sep 12, 2022 at 09:39:36PM +0530, Krishna Chaitanya Chundru wrote:
> > > > > On 9/10/2022 1:20 AM, Bjorn Helgaas wrote:
> > > > > > On Fri, Sep 09, 2022 at 02:14:41PM +0530, Krishna chaitanya chundru wrote:
> > > > > > > Some specific devices are taking time to settle the link in L1ss.
> > > > > > > So added a retry logic before returning from the suspend op.
> > > > > > "L1ss" is not a state.  If you mean "L1.1" or "L1.2", say that.  Also
> > > > > > in code comments below.
> > > > > Yes L1ss means L1.2 and L1.2 We will update it next patch
> > > > > > s/So added a/Add/
> > > > > > 
> > > > > > What are these specific devices?  Is this a qcom controller defect?
> > > > > > An endpoint defect that should be addressed via some kind of generic
> > > > > > quirk?
> > > > > This is depending up on the endpoint devices and it varies to device to
> > > > > device.
> > > > > 
> > > > Can we identify the source of the traffic? Is the NVMe driver not
> > > > flushing it's queues correctly?
> > > We found that it is not from nvme data, we are seeing some physical layer
> > > activity on the
> > > 
> > > protocol analyzer.
> > > 
> > Okay
> > 
> > > > > We are thinking this is not a defect if there is some traffic in the link
> > > > > the link will
> > > > > 
> > > > > not go to L1ss .
> > > > > 
> > > > Is this hack still required even after switching to syscore ops?
> > > > 
> > > > Thanks,
> > > > Mani
> > > Yes, mani it is still required. And just before this sycore ops there will
> > > be a pci transaction to
> > > 
> > > mask msix interrupts.
> > > 
> > Hmm. I'm getting slightly confused here. What really happens when you do
> > the resource teardown during suspend and the link has not entered L1SS?
> > 
> > Since PHY is powered by MX domain, I'm wondering why we should wait for
> > the link to be in L1SS?
> > 
> > Thanks,
> > Mani
> 
> Mani, we need to turn off the link only after link entered in to L1ss. If we
> do before that
> 
> some transactions will be disturbed and we see a link down.
> 
> Mx power rail will control digital logic of the PHY and tries to retain the
> link state only,
> 
> The analog logic is controlled by the CX rail only, so when the link is in
> L1ss only we turn off
> 
> clks and phy.
> 

Okay, thanks for the clarification. Please add this info as a comment just above
the change.

Thanks,
Mani

> > > > > > > Signed-off-by: Krishna chaitanya chundru <quic_krichai@quicinc.com>
> > > > > > > ---
> > > > > > >     drivers/pci/controller/dwc/pcie-qcom.c | 36 +++++++++++++++++++++++-----------
> > > > > > >     1 file changed, 25 insertions(+), 11 deletions(-)
> > > > > > > 
> > > > > > > diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
> > > > > > > index 6e04d0d..15c2067 100644
> > > > > > > --- a/drivers/pci/controller/dwc/pcie-qcom.c
> > > > > > > +++ b/drivers/pci/controller/dwc/pcie-qcom.c
> > > > > > > @@ -1809,26 +1809,40 @@ static int qcom_pcie_probe(struct platform_device *pdev)
> > > > > > >     static int __maybe_unused qcom_pcie_pm_suspend(struct qcom_pcie *pcie)
> > > > > > >     {
> > > > > > >     	u32 val;
> > > > > > > +	ktime_t timeout, start;
> > > > > > >     	struct dw_pcie *pci = pcie->pci;
> > > > > > >     	struct device *dev = pci->dev;
> > > > > > >     	if (!pcie->cfg->supports_system_suspend)
> > > > > > >     		return 0;
> > > > > > > -	/* if the link is not active turn off clocks */
> > > > > > > -	if (!dw_pcie_link_up(pci)) {
> > > > > > > -		dev_info(dev, "Link is not active\n");
> > > > > > > -		goto suspend;
> > > > > > > -	}
> > > > > > > +	start = ktime_get();
> > > > > > > +	/* Wait max 200 ms */
> > > > > > > +	timeout = ktime_add_ms(start, 200);
> > > > > > > -	/* if the link is not in l1ss don't turn off clocks */
> > > > > > > -	val = readl(pcie->parf + PCIE20_PARF_PM_STTS);
> > > > > > > -	if (!(val & PCIE20_PARF_PM_STTS_LINKST_IN_L1SUB)) {
> > > > > > > -		dev_warn(dev, "Link is not in L1ss\n");
> > > > > > > -		return 0;
> > > > > > > +	while (1) {
> > > > > > > +
> > > > > > > +		if (!dw_pcie_link_up(pci)) {
> > > > > > > +			dev_warn(dev, "Link is not active\n");
> > > > > > > +			break;
> > > > > > > +		}
> > > > > > > +
> > > > > > > +		/* if the link is not in l1ss don't turn off clocks */
> > > > > > > +		val = readl(pcie->parf + PCIE20_PARF_PM_STTS);
> > > > > > > +		if ((val & PCIE20_PARF_PM_STTS_LINKST_IN_L1SUB)) {
> > > > > > > +			dev_dbg(dev, "Link enters L1ss after %d  ms\n",
> > > > > > > +					ktime_to_ms(ktime_get() - start));
> > > > > > > +			break;
> > > > > > > +		}
> > > > > > > +
> > > > > > > +		if (ktime_after(ktime_get(), timeout)) {
> > > > > > > +			dev_warn(dev, "Link is not in L1ss\n");
> > > > > > > +			return 0;
> > > > > > > +		}
> > > > > > > +
> > > > > > > +		udelay(1000);
> > > > > > >     	}
> > > > > > > -suspend:
> > > > > > >     	if (pcie->cfg->ops->suspend)
> > > > > > >     		pcie->cfg->ops->suspend(pcie);
> > > > > > > -- 
> > > > > > > 2.7.4
> > > > > > >
kernel test robot Sept. 19, 2022, 4:23 p.m. UTC | #8
Hi Krishna,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on helgaas-pci/next]
[also build test WARNING on next-20220919]
[cannot apply to clk/clk-next linus/master v6.0-rc6]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Krishna-chaitanya-chundru/PCI-qcom-Add-system-suspend-resume-support/20220909-164906
base:   https://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git next
config: arm64-randconfig-r002-20220919 (https://download.01.org/0day-ci/archive/20220920/202209200020.ASFgBZac-lkp@intel.com/config)
compiler: clang version 16.0.0 (https://github.com/llvm/llvm-project 791a7ae1ba3efd6bca96338e10ffde557ba83920)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install arm64 cross compiling tool for clang build
        # apt-get install binutils-aarch64-linux-gnu
        # https://github.com/intel-lab-lkp/linux/commit/629a2c707a31ccfdf891d6b580cf3e8c62ab9169
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Krishna-chaitanya-chundru/PCI-qcom-Add-system-suspend-resume-support/20220909-164906
        git checkout 629a2c707a31ccfdf891d6b580cf3e8c62ab9169
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=arm64 SHELL=/bin/bash drivers/pci/controller/dwc/

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> drivers/pci/controller/dwc/pcie-qcom.c:1834:6: warning: format specifies type 'int' but the argument has type 's64' (aka 'long long') [-Wformat]
                                           ktime_to_ms(ktime_get() - start));
                                           ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/dev_printk.h:158:46: note: expanded from macro 'dev_dbg'
           dev_printk(KERN_DEBUG, dev, dev_fmt(fmt), ##__VA_ARGS__)
                                               ~~~     ^~~~~~~~~~~
   include/linux/dev_printk.h:129:34: note: expanded from macro 'dev_printk'
                   _dev_printk(level, dev, fmt, ##__VA_ARGS__);            \
                                           ~~~    ^~~~~~~~~~~
   1 warning generated.


vim +1834 drivers/pci/controller/dwc/pcie-qcom.c

  1808	
  1809	static int __maybe_unused qcom_pcie_pm_suspend(struct qcom_pcie *pcie)
  1810	{
  1811		u32 val;
  1812		ktime_t timeout, start;
  1813		struct dw_pcie *pci = pcie->pci;
  1814		struct device *dev = pci->dev;
  1815	
  1816		if (!pcie->cfg->supports_system_suspend)
  1817			return 0;
  1818	
  1819		start = ktime_get();
  1820		/* Wait max 200 ms */
  1821		timeout = ktime_add_ms(start, 200);
  1822	
  1823		while (1) {
  1824	
  1825			if (!dw_pcie_link_up(pci)) {
  1826				dev_warn(dev, "Link is not active\n");
  1827				break;
  1828			}
  1829	
  1830			/* if the link is not in l1ss don't turn off clocks */
  1831			val = readl(pcie->parf + PCIE20_PARF_PM_STTS);
  1832			if ((val & PCIE20_PARF_PM_STTS_LINKST_IN_L1SUB)) {
  1833				dev_dbg(dev, "Link enters L1ss after %d  ms\n",
> 1834						ktime_to_ms(ktime_get() - start));
  1835				break;
  1836			}
  1837	
  1838			if (ktime_after(ktime_get(), timeout)) {
  1839				dev_warn(dev, "Link is not in L1ss\n");
  1840				return 0;
  1841			}
  1842	
  1843			udelay(1000);
  1844		}
  1845	
  1846		if (pcie->cfg->ops->suspend)
  1847			pcie->cfg->ops->suspend(pcie);
  1848	
  1849		pcie->is_suspended = true;
  1850	
  1851		return 0;
  1852	}
  1853
diff mbox series

Patch

diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
index 6e04d0d..15c2067 100644
--- a/drivers/pci/controller/dwc/pcie-qcom.c
+++ b/drivers/pci/controller/dwc/pcie-qcom.c
@@ -1809,26 +1809,40 @@  static int qcom_pcie_probe(struct platform_device *pdev)
 static int __maybe_unused qcom_pcie_pm_suspend(struct qcom_pcie *pcie)
 {
 	u32 val;
+	ktime_t timeout, start;
 	struct dw_pcie *pci = pcie->pci;
 	struct device *dev = pci->dev;
 
 	if (!pcie->cfg->supports_system_suspend)
 		return 0;
 
-	/* if the link is not active turn off clocks */
-	if (!dw_pcie_link_up(pci)) {
-		dev_info(dev, "Link is not active\n");
-		goto suspend;
-	}
+	start = ktime_get();
+	/* Wait max 200 ms */
+	timeout = ktime_add_ms(start, 200);
 
-	/* if the link is not in l1ss don't turn off clocks */
-	val = readl(pcie->parf + PCIE20_PARF_PM_STTS);
-	if (!(val & PCIE20_PARF_PM_STTS_LINKST_IN_L1SUB)) {
-		dev_warn(dev, "Link is not in L1ss\n");
-		return 0;
+	while (1) {
+
+		if (!dw_pcie_link_up(pci)) {
+			dev_warn(dev, "Link is not active\n");
+			break;
+		}
+
+		/* if the link is not in l1ss don't turn off clocks */
+		val = readl(pcie->parf + PCIE20_PARF_PM_STTS);
+		if ((val & PCIE20_PARF_PM_STTS_LINKST_IN_L1SUB)) {
+			dev_dbg(dev, "Link enters L1ss after %d  ms\n",
+					ktime_to_ms(ktime_get() - start));
+			break;
+		}
+
+		if (ktime_after(ktime_get(), timeout)) {
+			dev_warn(dev, "Link is not in L1ss\n");
+			return 0;
+		}
+
+		udelay(1000);
 	}
 
-suspend:
 	if (pcie->cfg->ops->suspend)
 		pcie->cfg->ops->suspend(pcie);