diff mbox series

[v5,3/3] PCI: qcom: Add retry logic for link to be stable in L1ss

Message ID 1659526134-22978-4-git-send-email-quic_krichai@quicinc.com (mailing list archive)
State Not Applicable
Headers show
Series PCI: Restrict pci transactions after pci suspend | expand

Commit Message

Krishna chaitanya chundru Aug. 3, 2022, 11:28 a.m. UTC
Some specific devices are taking time to settle the link in L1ss.
So added a retry logic before returning from the suspend op.

Signed-off-by: Krishna chaitanya chundru <quic_krichai@quicinc.com>
---
 drivers/pci/controller/dwc/pcie-qcom.c | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

Comments

kernel test robot Aug. 4, 2022, 10:24 a.m. UTC | #1
Hi Krishna,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on helgaas-pci/next]
[also build test WARNING on next-20220803]
[cannot apply to linus/master v5.19]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Krishna-chaitanya-chundru/PCI-Restrict-pci-transactions-after-pci-suspend/20220803-193033
base:   https://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git next
config: mips-allyesconfig (https://download.01.org/0day-ci/archive/20220804/202208041821.cik847re-lkp@intel.com/config)
compiler: mips-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/64b4ad561ad4a28aa8840303f29669bf7af77757
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Krishna-chaitanya-chundru/PCI-Restrict-pci-transactions-after-pci-suspend/20220803-193033
        git checkout 64b4ad561ad4a28aa8840303f29669bf7af77757
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=mips SHELL=/bin/bash drivers/pci/controller/dwc/

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   In file included from include/linux/device.h:15,
                    from include/linux/node.h:18,
                    from include/linux/cpu.h:17,
                    from include/linux/of_device.h:5,
                    from drivers/pci/controller/dwc/pcie-qcom.c:20:
   drivers/pci/controller/dwc/pcie-qcom.c: In function 'qcom_pcie_pm_suspend':
>> drivers/pci/controller/dwc/pcie-qcom.c:1846:39: warning: format '%d' expects argument of type 'int', but argument 3 has type 's64' {aka 'long long int'} [-Wformat=]
    1846 |                         dev_info(dev, "Link enters L1ss after %d ms\n",
         |                                       ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/dev_printk.h:110:30: note: in definition of macro 'dev_printk_index_wrap'
     110 |                 _p_func(dev, fmt, ##__VA_ARGS__);                       \
         |                              ^~~
   include/linux/dev_printk.h:150:58: note: in expansion of macro 'dev_fmt'
     150 |         dev_printk_index_wrap(_dev_info, KERN_INFO, dev, dev_fmt(fmt), ##__VA_ARGS__)
         |                                                          ^~~~~~~
   drivers/pci/controller/dwc/pcie-qcom.c:1846:25: note: in expansion of macro 'dev_info'
    1846 |                         dev_info(dev, "Link enters L1ss after %d ms\n",
         |                         ^~~~~~~~
   drivers/pci/controller/dwc/pcie-qcom.c:1846:64: note: format string is defined here
    1846 |                         dev_info(dev, "Link enters L1ss after %d ms\n",
         |                                                               ~^
         |                                                                |
         |                                                                int
         |                                                               %lld


vim +1846 drivers/pci/controller/dwc/pcie-qcom.c

  1827	
  1828	static int __maybe_unused qcom_pcie_pm_suspend(struct device *dev)
  1829	{
  1830		struct qcom_pcie *pcie = dev_get_drvdata(dev);
  1831		u32 val;
  1832		ktime_t timeout, start;
  1833	
  1834		if (!pcie->cfg->supports_system_suspend)
  1835			return 0;
  1836	
  1837		start = ktime_get();
  1838		/* Wait max 100 ms */
  1839		timeout = ktime_add_ms(start, 100);
  1840		while (1) {
  1841			bool timedout = ktime_after(ktime_get(), timeout);
  1842	
  1843			/* if the link is not in l1ss don't turn off clocks */
  1844			val = readl(pcie->parf + PCIE20_PARF_PM_STTS);
  1845			if ((val & PCIE20_PARF_PM_STTS_LINKST_IN_L1SUB)) {
> 1846				dev_info(dev, "Link enters L1ss after %d ms\n",
  1847						ktime_to_ms(ktime_get() - start));
  1848				break;
  1849			}
  1850	
  1851			if (timedout) {
  1852				dev_warn(dev, "Link is not in L1ss\n");
  1853				return 0;
  1854			}
  1855			usleep_range(1000, 1200);
  1856		}
  1857	
  1858		if (pcie->cfg->ops->suspend)
  1859			pcie->cfg->ops->suspend(pcie);
  1860	
  1861		pcie->is_suspended = true;
  1862	
  1863		return 0;
  1864	}
  1865
Matthias Kaehlcke Aug. 4, 2022, 9:33 p.m. UTC | #2
On Wed, Aug 03, 2022 at 04:58:54PM +0530, Krishna chaitanya chundru wrote:
> Some specific devices are taking time to settle the link in L1ss.
> So added a retry logic before returning from the suspend op.
> 
> Signed-off-by: Krishna chaitanya chundru <quic_krichai@quicinc.com>
> ---
>  drivers/pci/controller/dwc/pcie-qcom.c | 25 ++++++++++++++++++++-----
>  1 file changed, 20 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
> index f7dd5dc..f3201bd 100644
> --- a/drivers/pci/controller/dwc/pcie-qcom.c
> +++ b/drivers/pci/controller/dwc/pcie-qcom.c
> @@ -1829,15 +1829,30 @@ static int __maybe_unused qcom_pcie_pm_suspend(struct device *dev)
>  {
>  	struct qcom_pcie *pcie = dev_get_drvdata(dev);
>  	u32 val;
> +	ktime_t timeout, start;
>  
>  	if (!pcie->cfg->supports_system_suspend)
>  		return 0;
>  
> -	/* if the link is not in l1ss don't turn off clocks */
> -	val = readl(pcie->parf + PCIE20_PARF_PM_STTS);
> -	if (!(val & PCIE20_PARF_PM_STTS_LINKST_IN_L1SUB)) {
> -		dev_warn(dev, "Link is not in L1ss\n");
> -		return 0;
> +	start = ktime_get();
> +	/* Wait max 100 ms */
> +	timeout = ktime_add_ms(start, 100);

In my tests 100 ms is ample margin for most NVMe models (it's often 0 and
generally < 10), however with one model I saw delays of up to 150 ms, so
this should probably be 200 ms or so (it's a long time, but most of the
time the actual delay is significantly lower

> +	while (1) {
> +		bool timedout = ktime_after(ktime_get(), timeout);

'timedout' looks very similar to the other local variable 'timeout'
in this function. Actually why not just do without the new variable and
put this after reading the register.

   		if (ktime_after(ktime_get(), timeout)) {
			dev_warn(dev, "Link is not in L1ss\n");
 			return 0;
		}

> +
> +		/* if the link is not in l1ss don't turn off clocks */
> +		val = readl(pcie->parf + PCIE20_PARF_PM_STTS);
> +		if ((val & PCIE20_PARF_PM_STTS_LINKST_IN_L1SUB)) {
> +			dev_info(dev, "Link enters L1ss after %d ms\n",
> +					ktime_to_ms(ktime_get() - start));


Probably this should be dev_dbg() to avoid cluttering the kernel log that
isn't relevant most of the time.

> +			break;
> +		}
> +
> +		if (timedout) {
> +			dev_warn(dev, "Link is not in L1ss\n");
> +			return 0;
> +		}
> +		usleep_range(1000, 1200);

You could use fsleep() instead of specifying a range.

Based on my testing I think a slightly higher delay like 5ms wouldn't hurt.
That would result in less 'busy looping' for slower NVMes and would still
be reasonable fast for those that need 10 ms or so.

Actually you could replace the entire loop with something like this:

	if (readl_poll_timeout(pcie->parf + PCIE20_PARF_PM_STTS, val,
	    val & PCIE20_PARF_PM_STTS_LINKST_IN_L1SUB, 5000, 200000) {
	    dev_warn(dev, "Link is not in L1ss\n");
	    return 0;
	}
kernel test robot Aug. 5, 2022, 3:14 a.m. UTC | #3
Hi Krishna,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on helgaas-pci/next]
[also build test WARNING on next-20220804]
[cannot apply to linus/master v5.19]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Krishna-chaitanya-chundru/PCI-Restrict-pci-transactions-after-pci-suspend/20220803-193033
base:   https://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git next
config: arm64-randconfig-r024-20220804 (https://download.01.org/0day-ci/archive/20220805/202208051112.qnLsiuff-lkp@intel.com/config)
compiler: clang version 16.0.0 (https://github.com/llvm/llvm-project 26dd42705c2af0b8f6e5d6cdb32c9bd5ed9524eb)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install arm64 cross compiling tool for clang build
        # apt-get install binutils-aarch64-linux-gnu
        # https://github.com/intel-lab-lkp/linux/commit/64b4ad561ad4a28aa8840303f29669bf7af77757
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Krishna-chaitanya-chundru/PCI-Restrict-pci-transactions-after-pci-suspend/20220803-193033
        git checkout 64b4ad561ad4a28aa8840303f29669bf7af77757
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=arm64 SHELL=/bin/bash drivers/pci/controller/dwc/

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> drivers/pci/controller/dwc/pcie-qcom.c:1847:6: warning: format specifies type 'int' but the argument has type 's64' (aka 'long long') [-Wformat]
                                           ktime_to_ms(ktime_get() - start));
                                           ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/dev_printk.h:150:67: note: expanded from macro 'dev_info'
           dev_printk_index_wrap(_dev_info, KERN_INFO, dev, dev_fmt(fmt), ##__VA_ARGS__)
                                                                    ~~~     ^~~~~~~~~~~
   include/linux/dev_printk.h:110:23: note: expanded from macro 'dev_printk_index_wrap'
                   _p_func(dev, fmt, ##__VA_ARGS__);                       \
                                ~~~    ^~~~~~~~~~~
   1 warning generated.


vim +1847 drivers/pci/controller/dwc/pcie-qcom.c

  1827	
  1828	static int __maybe_unused qcom_pcie_pm_suspend(struct device *dev)
  1829	{
  1830		struct qcom_pcie *pcie = dev_get_drvdata(dev);
  1831		u32 val;
  1832		ktime_t timeout, start;
  1833	
  1834		if (!pcie->cfg->supports_system_suspend)
  1835			return 0;
  1836	
  1837		start = ktime_get();
  1838		/* Wait max 100 ms */
  1839		timeout = ktime_add_ms(start, 100);
  1840		while (1) {
  1841			bool timedout = ktime_after(ktime_get(), timeout);
  1842	
  1843			/* if the link is not in l1ss don't turn off clocks */
  1844			val = readl(pcie->parf + PCIE20_PARF_PM_STTS);
  1845			if ((val & PCIE20_PARF_PM_STTS_LINKST_IN_L1SUB)) {
  1846				dev_info(dev, "Link enters L1ss after %d ms\n",
> 1847						ktime_to_ms(ktime_get() - start));
  1848				break;
  1849			}
  1850	
  1851			if (timedout) {
  1852				dev_warn(dev, "Link is not in L1ss\n");
  1853				return 0;
  1854			}
  1855			usleep_range(1000, 1200);
  1856		}
  1857	
  1858		if (pcie->cfg->ops->suspend)
  1859			pcie->cfg->ops->suspend(pcie);
  1860	
  1861		pcie->is_suspended = true;
  1862	
  1863		return 0;
  1864	}
  1865
Krishna chaitanya chundru Aug. 24, 2022, 3:41 a.m. UTC | #4
On 8/5/2022 3:03 AM, Matthias Kaehlcke wrote:
> On Wed, Aug 03, 2022 at 04:58:54PM +0530, Krishna chaitanya chundru wrote:
>> Some specific devices are taking time to settle the link in L1ss.
>> So added a retry logic before returning from the suspend op.
>>
>> Signed-off-by: Krishna chaitanya chundru <quic_krichai@quicinc.com>
>> ---
>>   drivers/pci/controller/dwc/pcie-qcom.c | 25 ++++++++++++++++++++-----
>>   1 file changed, 20 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
>> index f7dd5dc..f3201bd 100644
>> --- a/drivers/pci/controller/dwc/pcie-qcom.c
>> +++ b/drivers/pci/controller/dwc/pcie-qcom.c
>> @@ -1829,15 +1829,30 @@ static int __maybe_unused qcom_pcie_pm_suspend(struct device *dev)
>>   {
>>   	struct qcom_pcie *pcie = dev_get_drvdata(dev);
>>   	u32 val;
>> +	ktime_t timeout, start;
>>   
>>   	if (!pcie->cfg->supports_system_suspend)
>>   		return 0;
>>   
>> -	/* if the link is not in l1ss don't turn off clocks */
>> -	val = readl(pcie->parf + PCIE20_PARF_PM_STTS);
>> -	if (!(val & PCIE20_PARF_PM_STTS_LINKST_IN_L1SUB)) {
>> -		dev_warn(dev, "Link is not in L1ss\n");
>> -		return 0;
>> +	start = ktime_get();
>> +	/* Wait max 100 ms */
>> +	timeout = ktime_add_ms(start, 100);
> In my tests 100 ms is ample margin for most NVMe models (it's often 0 and
> generally < 10), however with one model I saw delays of up to 150 ms, so
> this should probably be 200 ms or so (it's a long time, but most of the
> time the actual delay is significantly lower
ok I will increase the time to 200.
>
>> +	while (1) {
>> +		bool timedout = ktime_after(ktime_get(), timeout);
> 'timedout' looks very similar to the other local variable 'timeout'
> in this function. Actually why not just do without the new variable and
> put this after reading the register.
>
>     		if (ktime_after(ktime_get(), timeout)) {
> 			dev_warn(dev, "Link is not in L1ss\n");
>   			return 0;
> 		}
ok sure will update in the next patch.
>> +
>> +		/* if the link is not in l1ss don't turn off clocks */
>> +		val = readl(pcie->parf + PCIE20_PARF_PM_STTS);
>> +		if ((val & PCIE20_PARF_PM_STTS_LINKST_IN_L1SUB)) {
>> +			dev_info(dev, "Link enters L1ss after %d ms\n",
>> +					ktime_to_ms(ktime_get() - start));
>
> Probably this should be dev_dbg() to avoid cluttering the kernel log that
> isn't relevant most of the time.
ok sure will update in next patch.
>
>> +			break;
>> +		}
>> +
>> +		if (timedout) {
>> +			dev_warn(dev, "Link is not in L1ss\n");
>> +			return 0;
>> +		}
>> +		usleep_range(1000, 1200);
> You could use fsleep() instead of specifying a range.
>
> Based on my testing I think a slightly higher delay like 5ms wouldn't hurt.
> That would result in less 'busy looping' for slower NVMes and would still
> be reasonable fast for those that need 10 ms or so.
>
> Actually you could replace the entire loop with something like this:
>
> 	if (readl_poll_timeout(pcie->parf + PCIE20_PARF_PM_STTS, val,
> 	    val & PCIE20_PARF_PM_STTS_LINKST_IN_L1SUB, 5000, 200000) {
> 	    dev_warn(dev, "Link is not in L1ss\n");
> 	    return 0;
> 	}

Ok we will look in to this option and will update the patch if needed.
Krishna chaitanya chundru Sept. 9, 2022, 8:49 a.m. UTC | #5
On 8/5/2022 3:03 AM, Matthias Kaehlcke wrote:
> On Wed, Aug 03, 2022 at 04:58:54PM +0530, Krishna chaitanya chundru wrote:
>> Some specific devices are taking time to settle the link in L1ss.
>> So added a retry logic before returning from the suspend op.
>>
>> Signed-off-by: Krishna chaitanya chundru <quic_krichai@quicinc.com>
>> ---
>>   drivers/pci/controller/dwc/pcie-qcom.c | 25 ++++++++++++++++++++-----
>>   1 file changed, 20 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
>> index f7dd5dc..f3201bd 100644
>> --- a/drivers/pci/controller/dwc/pcie-qcom.c
>> +++ b/drivers/pci/controller/dwc/pcie-qcom.c
>> @@ -1829,15 +1829,30 @@ static int __maybe_unused qcom_pcie_pm_suspend(struct device *dev)
>>   {
>>   	struct qcom_pcie *pcie = dev_get_drvdata(dev);
>>   	u32 val;
>> +	ktime_t timeout, start;
>>   
>>   	if (!pcie->cfg->supports_system_suspend)
>>   		return 0;
>>   
>> -	/* if the link is not in l1ss don't turn off clocks */
>> -	val = readl(pcie->parf + PCIE20_PARF_PM_STTS);
>> -	if (!(val & PCIE20_PARF_PM_STTS_LINKST_IN_L1SUB)) {
>> -		dev_warn(dev, "Link is not in L1ss\n");
>> -		return 0;
>> +	start = ktime_get();
>> +	/* Wait max 100 ms */
>> +	timeout = ktime_add_ms(start, 100);
> In my tests 100 ms is ample margin for most NVMe models (it's often 0 and
> generally < 10), however with one model I saw delays of up to 150 ms, so
> this should probably be 200 ms or so (it's a long time, but most of the
> time the actual delay is significantly lower
>
>> +	while (1) {
>> +		bool timedout = ktime_after(ktime_get(), timeout);
> 'timedout' looks very similar to the other local variable 'timeout'
> in this function. Actually why not just do without the new variable and
> put this after reading the register.
>
>     		if (ktime_after(ktime_get(), timeout)) {
> 			dev_warn(dev, "Link is not in L1ss\n");
>   			return 0;
> 		}
>
>> +
>> +		/* if the link is not in l1ss don't turn off clocks */
>> +		val = readl(pcie->parf + PCIE20_PARF_PM_STTS);
>> +		if ((val & PCIE20_PARF_PM_STTS_LINKST_IN_L1SUB)) {
>> +			dev_info(dev, "Link enters L1ss after %d ms\n",
>> +					ktime_to_ms(ktime_get() - start));
>
> Probably this should be dev_dbg() to avoid cluttering the kernel log that
> isn't relevant most of the time.
>
>> +			break;
>> +		}
>> +
>> +		if (timedout) {
>> +			dev_warn(dev, "Link is not in L1ss\n");
>> +			return 0;
>> +		}
>> +		usleep_range(1000, 1200);
> You could use fsleep() instead of specifying a range.
>
> Based on my testing I think a slightly higher delay like 5ms wouldn't hurt.
> That would result in less 'busy looping' for slower NVMes and would still
> be reasonable fast for those that need 10 ms or so.
>
> Actually you could replace the entire loop with something like this:
>
> 	if (readl_poll_timeout(pcie->parf + PCIE20_PARF_PM_STTS, val,
> 	    val & PCIE20_PARF_PM_STTS_LINKST_IN_L1SUB, 5000, 200000) {
> 	    dev_warn(dev, "Link is not in L1ss\n");
> 	    return 0;
> 	}

Hi Matthias,

In the v6 patch series we tried to include this logic, but as we are 
going with syscore ops

all the interrupts will be disabled by that time. and this timeout logic 
is enabling interrupts

and this causes the suspend to fail. So going with the previous logic.

Thanks & Regards,

Krishna chaitanya.
diff mbox series

Patch

diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
index f7dd5dc..f3201bd 100644
--- a/drivers/pci/controller/dwc/pcie-qcom.c
+++ b/drivers/pci/controller/dwc/pcie-qcom.c
@@ -1829,15 +1829,30 @@  static int __maybe_unused qcom_pcie_pm_suspend(struct device *dev)
 {
 	struct qcom_pcie *pcie = dev_get_drvdata(dev);
 	u32 val;
+	ktime_t timeout, start;
 
 	if (!pcie->cfg->supports_system_suspend)
 		return 0;
 
-	/* if the link is not in l1ss don't turn off clocks */
-	val = readl(pcie->parf + PCIE20_PARF_PM_STTS);
-	if (!(val & PCIE20_PARF_PM_STTS_LINKST_IN_L1SUB)) {
-		dev_warn(dev, "Link is not in L1ss\n");
-		return 0;
+	start = ktime_get();
+	/* Wait max 100 ms */
+	timeout = ktime_add_ms(start, 100);
+	while (1) {
+		bool timedout = ktime_after(ktime_get(), timeout);
+
+		/* if the link is not in l1ss don't turn off clocks */
+		val = readl(pcie->parf + PCIE20_PARF_PM_STTS);
+		if ((val & PCIE20_PARF_PM_STTS_LINKST_IN_L1SUB)) {
+			dev_info(dev, "Link enters L1ss after %d ms\n",
+					ktime_to_ms(ktime_get() - start));
+			break;
+		}
+
+		if (timedout) {
+			dev_warn(dev, "Link is not in L1ss\n");
+			return 0;
+		}
+		usleep_range(1000, 1200);
 	}
 
 	if (pcie->cfg->ops->suspend)