diff mbox

[v2,2/4] platform/x86: intel_telemetry: Fix suspend stats

Message ID 1511274984-6165-3-git-send-email-souvik.k.chakravarty@intel.com (mailing list archive)
State Changes Requested, archived
Delegated to: Andy Shevchenko
Headers show

Commit Message

Chakravarty, Souvik K Nov. 21, 2017, 2:36 p.m. UTC
Suspend stats are not reported consistently due to a limitation in the PMC
firmware. This limitation causes a delay in updating the s0ix counters and
residencies in the telemetry log upon s0ix exit. As a consequence, reading
these counters from the suspend-exit notifier may result in zero read.

This patch fixes this issue by cross-verifying the s0ix residencies from
the GCR TELEM registers in case the counters are not incremented in the
telemetry log after suspend.

This fixes https://bugzilla.kernel.org/show_bug.cgi?id=197833

We also remove unnecessary 'static' qualifiers from local variables.

Reported-and-tested-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@intel.com>
Signed-off-by: Souvik Kumar Chakravarty <souvik.k.chakravarty@intel.com>
---
 drivers/platform/x86/intel_telemetry_debugfs.c | 29 ++++++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

Changes since v1:
 * Use pmc_ipc_gcr_readq API to read 64-bits at a time

Comments

kernel test robot Nov. 22, 2017, 10:09 p.m. UTC | #1
Hi Souvik,

I love your patch! Perhaps something to improve:

[auto build test WARNING on platform-drivers-x86/for-next]
[also build test WARNING on v4.14 next-20171122]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Souvik-Kumar-Chakravarty/platform-x86-intel_telemetry-Fix-logs-and-formatting/20171123-052155
base:   git://git.infradead.org/users/dvhart/linux-platform-drivers-x86.git for-next
config: x86_64-allyesconfig (attached as .config)
compiler: gcc-6 (Debian 6.4.0-9) 6.4.0 20171026
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

Note: it may well be a FALSE warning. FWIW you are at least aware of it now.
http://gcc.gnu.org/wiki/Better_Uninitialized_Warnings

All warnings (new ones prefixed by >>):

   drivers/platform/x86/intel_telemetry_debugfs.c: In function 'pm_suspend_exit_cb':
>> drivers/platform/x86/intel_telemetry_debugfs.c:915:25: warning: 'suspend_deep_ctr_exit' may be used uninitialized in this function [-Wmaybe-uninitialized]
       suspend_deep_ctr_exit++;
       ~~~~~~~~~~~~~~~~~~~~~^~
>> drivers/platform/x86/intel_telemetry_debugfs.c:912:25: warning: 'suspend_shlw_ctr_exit' may be used uninitialized in this function [-Wmaybe-uninitialized]
       suspend_shlw_ctr_exit++;
       ~~~~~~~~~~~~~~~~~~~~~^~

vim +/suspend_deep_ctr_exit +915 drivers/platform/x86/intel_telemetry_debugfs.c

   854	
   855	static int pm_suspend_exit_cb(void)
   856	{
   857		struct telemetry_evtlog evtlog[TELEM_MAX_OS_ALLOCATED_EVENTS];
   858		struct telemetry_debugfs_conf *conf = debugfs_conf;
   859		u32 suspend_shlw_ctr_exit, suspend_deep_ctr_exit;
   860		u64 suspend_shlw_res_exit, suspend_deep_res_exit;
   861		int ret, index;
   862	
   863		if (!suspend_prep_ok)
   864			goto out;
   865	
   866		ret = telemetry_raw_read_eventlog(TELEM_IOSS, evtlog,
   867						  TELEM_MAX_OS_ALLOCATED_EVENTS);
   868		if (ret < 0)
   869			goto out;
   870	
   871		for (index = 0; index < ret; index++) {
   872			TELEM_CHECK_AND_PARSE_CTRS(conf->s0ix_shlw_occ_id,
   873						   suspend_shlw_ctr_exit);
   874	
   875			TELEM_CHECK_AND_PARSE_CTRS(conf->s0ix_deep_occ_id,
   876						   suspend_deep_ctr_exit);
   877	
   878			TELEM_CHECK_AND_PARSE_CTRS(conf->s0ix_shlw_res_id,
   879						   suspend_shlw_res_exit);
   880	
   881			TELEM_CHECK_AND_PARSE_CTRS(conf->s0ix_deep_res_id,
   882						   suspend_deep_res_exit);
   883		}
   884	
   885		if ((suspend_shlw_ctr_exit < suspend_shlw_ctr_temp) ||
   886		    (suspend_deep_ctr_exit < suspend_deep_ctr_temp) ||
   887		    (suspend_shlw_res_exit < suspend_shlw_res_temp) ||
   888		    (suspend_deep_res_exit < suspend_deep_res_temp)) {
   889			pr_err("Wrong s0ix counters detected\n");
   890			goto out;
   891		}
   892	
   893		/*
   894		 * Due to some design limitations in the firmware, sometimes the
   895		 * counters do not get updated by the time we reach here. As a
   896		 * workaround, we try to see if this was a genuine case of sleep
   897		 * failure or not by cross-checking from PMC GCR registers directly.
   898		 */
   899		if (suspend_shlw_ctr_exit == suspend_shlw_ctr_temp &&
   900		    suspend_deep_ctr_exit == suspend_deep_ctr_temp) {
   901			ret = intel_pmc_gcr_readq(PMC_GCR_TELEM_SHLW_S0IX_REG,
   902						  &suspend_shlw_res_exit);
   903			if (ret < 0)
   904				goto out;
   905	
   906			ret = intel_pmc_gcr_readq(PMC_GCR_TELEM_DEEP_S0IX_REG,
   907						  &suspend_deep_res_exit);
   908			if (ret < 0)
   909				goto out;
   910	
   911			if (suspend_shlw_res_exit > suspend_shlw_res_temp)
 > 912				suspend_shlw_ctr_exit++;
   913	
   914			if (suspend_deep_res_exit > suspend_deep_res_temp)
 > 915				suspend_deep_ctr_exit++;
   916		}
   917	
   918		suspend_shlw_ctr_exit -= suspend_shlw_ctr_temp;
   919		suspend_deep_ctr_exit -= suspend_deep_ctr_temp;
   920		suspend_shlw_res_exit -= suspend_shlw_res_temp;
   921		suspend_deep_res_exit -= suspend_deep_res_temp;
   922	
   923		if (suspend_shlw_ctr_exit == 1) {
   924			conf->suspend_stats.shlw_ctr +=
   925			suspend_shlw_ctr_exit;
   926	
   927			conf->suspend_stats.shlw_res +=
   928			suspend_shlw_res_exit;
   929		}
   930		/* Shallow Wakes Case */
   931		else if (suspend_shlw_ctr_exit > 1) {
   932			conf->suspend_stats.shlw_swake_ctr +=
   933			suspend_shlw_ctr_exit;
   934	
   935			conf->suspend_stats.shlw_swake_res +=
   936			suspend_shlw_res_exit;
   937		}
   938	
   939		if (suspend_deep_ctr_exit == 1) {
   940			conf->suspend_stats.deep_ctr +=
   941			suspend_deep_ctr_exit;
   942	
   943			conf->suspend_stats.deep_res +=
   944			suspend_deep_res_exit;
   945		}
   946	
   947		/* Shallow Wakes Case */
   948		else if (suspend_deep_ctr_exit > 1) {
   949			conf->suspend_stats.deep_swake_ctr +=
   950			suspend_deep_ctr_exit;
   951	
   952			conf->suspend_stats.deep_swake_res +=
   953			suspend_deep_res_exit;
   954		}
   955	
   956	out:
   957		suspend_prep_ok = 0;
   958		return NOTIFY_OK;
   959	}
   960	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
Andy Shevchenko Nov. 23, 2017, 9:25 p.m. UTC | #2
On Tue, Nov 21, 2017 at 4:36 PM, Souvik Kumar Chakravarty
<souvik.k.chakravarty@intel.com> wrote:
> Suspend stats are not reported consistently due to a limitation in the PMC
> firmware. This limitation causes a delay in updating the s0ix counters and
> residencies in the telemetry log upon s0ix exit. As a consequence, reading
> these counters from the suspend-exit notifier may result in zero read.
>
> This patch fixes this issue by cross-verifying the s0ix residencies from
> the GCR TELEM registers in case the counters are not incremented in the
> telemetry log after suspend.
>
> This fixes https://bugzilla.kernel.org/show_bug.cgi?id=197833
>
> We also remove unnecessary 'static' qualifiers from local variables.
>
> Reported-and-tested-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@intel.com>
> Signed-off-by: Souvik Kumar Chakravarty <souvik.k.chakravarty@intel.com>

> -       static u32 suspend_shlw_ctr_exit, suspend_deep_ctr_exit;
> -       static u64 suspend_shlw_res_exit, suspend_deep_res_exit;
>         struct telemetry_debugfs_conf *conf = debugfs_conf;
> +       u32 suspend_shlw_ctr_exit, suspend_deep_ctr_exit;
> +       u64 suspend_shlw_res_exit, suspend_deep_res_exit;
>         int ret, index;

> +       if (suspend_shlw_ctr_exit == suspend_shlw_ctr_temp &&
> +           suspend_deep_ctr_exit == suspend_deep_ctr_temp) {

kbuildbot is absolutely right. How this code is supposed to work? It's flaky.

Please, redesign this approach.
Chakravarty, Souvik K Nov. 24, 2017, 2:51 a.m. UTC | #3
On Fri, November 24, 2017 at 2:55 AM, Andy Shevchenko <andy.shevchenko@gmail.com> wrote:
> On Tue, Nov 21, 2017 at 4:36 PM, Souvik Kumar Chakravarty

> <souvik.k.chakravarty@intel.com> wrote:

> > Suspend stats are not reported consistently due to a limitation in the

> > PMC firmware. This limitation causes a delay in updating the s0ix

> > counters and residencies in the telemetry log upon s0ix exit. As a

> > consequence, reading these counters from the suspend-exit notifier may result

> in zero read.

> >

> > This patch fixes this issue by cross-verifying the s0ix residencies

> > from the GCR TELEM registers in case the counters are not incremented

> > in the telemetry log after suspend.

> >

> > This fixes https://bugzilla.kernel.org/show_bug.cgi?id=197833

> >

> > We also remove unnecessary 'static' qualifiers from local variables.

> >

> > Reported-and-tested-by: Rajneesh Bhardwaj

> > <rajneesh.bhardwaj@intel.com>

> > Signed-off-by: Souvik Kumar Chakravarty

> > <souvik.k.chakravarty@intel.com>

> 

> > -       static u32 suspend_shlw_ctr_exit, suspend_deep_ctr_exit;

> > -       static u64 suspend_shlw_res_exit, suspend_deep_res_exit;

> >         struct telemetry_debugfs_conf *conf = debugfs_conf;

> > +       u32 suspend_shlw_ctr_exit, suspend_deep_ctr_exit;

> > +       u64 suspend_shlw_res_exit, suspend_deep_res_exit;

> >         int ret, index;

> 

> > +       if (suspend_shlw_ctr_exit == suspend_shlw_ctr_temp &&

> > +           suspend_deep_ctr_exit == suspend_deep_ctr_temp) {

> 

> kbuildbot is absolutely right. How this code is supposed to work? It's flaky.


suspend_shlw_ctr_exit & suspend_deep_ctr_exit have already been initialized before this comparison (comparing the counters before and after sleep).
I will explicitly initialize them so that kbuildbot does not complain.
> 

> Please, redesign this approach.

> 

> --

> With Best Regards,

> Andy Shevchenko
diff mbox

Patch

diff --git a/drivers/platform/x86/intel_telemetry_debugfs.c b/drivers/platform/x86/intel_telemetry_debugfs.c
index 4249e826..d7e90fd 100644
--- a/drivers/platform/x86/intel_telemetry_debugfs.c
+++ b/drivers/platform/x86/intel_telemetry_debugfs.c
@@ -855,9 +855,9 @@  static int pm_suspend_prep_cb(void)
 static int pm_suspend_exit_cb(void)
 {
 	struct telemetry_evtlog evtlog[TELEM_MAX_OS_ALLOCATED_EVENTS];
-	static u32 suspend_shlw_ctr_exit, suspend_deep_ctr_exit;
-	static u64 suspend_shlw_res_exit, suspend_deep_res_exit;
 	struct telemetry_debugfs_conf *conf = debugfs_conf;
+	u32 suspend_shlw_ctr_exit, suspend_deep_ctr_exit;
+	u64 suspend_shlw_res_exit, suspend_deep_res_exit;
 	int ret, index;
 
 	if (!suspend_prep_ok)
@@ -890,6 +890,31 @@  static int pm_suspend_exit_cb(void)
 		goto out;
 	}
 
+	/*
+	 * Due to some design limitations in the firmware, sometimes the
+	 * counters do not get updated by the time we reach here. As a
+	 * workaround, we try to see if this was a genuine case of sleep
+	 * failure or not by cross-checking from PMC GCR registers directly.
+	 */
+	if (suspend_shlw_ctr_exit == suspend_shlw_ctr_temp &&
+	    suspend_deep_ctr_exit == suspend_deep_ctr_temp) {
+		ret = intel_pmc_gcr_readq(PMC_GCR_TELEM_SHLW_S0IX_REG,
+					  &suspend_shlw_res_exit);
+		if (ret < 0)
+			goto out;
+
+		ret = intel_pmc_gcr_readq(PMC_GCR_TELEM_DEEP_S0IX_REG,
+					  &suspend_deep_res_exit);
+		if (ret < 0)
+			goto out;
+
+		if (suspend_shlw_res_exit > suspend_shlw_res_temp)
+			suspend_shlw_ctr_exit++;
+
+		if (suspend_deep_res_exit > suspend_deep_res_temp)
+			suspend_deep_ctr_exit++;
+	}
+
 	suspend_shlw_ctr_exit -= suspend_shlw_ctr_temp;
 	suspend_deep_ctr_exit -= suspend_deep_ctr_temp;
 	suspend_shlw_res_exit -= suspend_shlw_res_temp;