diff mbox series

[2/4] selftests/resctrl: SNC support for CMT

Message ID 75849cb145429798b21c23b6be4abd7ece9df57b.1709721159.git.maciej.wieczor-retman@intel.com (mailing list archive)
State New
Headers show
Series SNC support for resctrl selftests | expand

Commit Message

Maciej Wieczor-Retman March 6, 2024, 10:39 a.m. UTC
Cache Monitoring Technology (CMT) works by measuring how much data in L3
cache is occupied by a given process identified by its Resource
Monitoring ID (RMID).

On systems with Sub-Numa Clusters (SNC) enabled, a process can occupy
not only the cache that belongs to its own NUMA node but also pieces of
other NUMA nodes' caches that lie on the same socket.

A simple correction to make the CMT selftest NUMA-aware is to sum values
reported by all nodes on the same socket for a given RMID.

Reported-by: "Shaopeng Tan (Fujitsu)" <tan.shaopeng@fujitsu.com>
Closes: https://lore.kernel.org/all/TYAPR01MB6330B9B17686EF426D2C3F308B25A@TYAPR01MB6330.jpnprd01.prod.outlook.com/
Signed-off-by: Maciej Wieczor-Retman <maciej.wieczor-retman@intel.com>
---
 tools/testing/selftests/resctrl/cache.c       | 17 +++++++++++------
 tools/testing/selftests/resctrl/resctrl.h     |  4 +++-
 tools/testing/selftests/resctrl/resctrl_val.c |  9 ++++++---
 3 files changed, 20 insertions(+), 10 deletions(-)

Comments

Ilpo Järvinen March 8, 2024, 1:53 p.m. UTC | #1
On Wed, 6 Mar 2024, Maciej Wieczor-Retman wrote:

> Cache Monitoring Technology (CMT) works by measuring how much data in L3
> cache is occupied by a given process identified by its Resource
> Monitoring ID (RMID).
> 
> On systems with Sub-Numa Clusters (SNC) enabled, a process can occupy
> not only the cache that belongs to its own NUMA node but also pieces of
> other NUMA nodes' caches that lie on the same socket.
> 
> A simple correction to make the CMT selftest NUMA-aware is to sum values
> reported by all nodes on the same socket for a given RMID.
> 
> Reported-by: "Shaopeng Tan (Fujitsu)" <tan.shaopeng@fujitsu.com>
> Closes: https://lore.kernel.org/all/TYAPR01MB6330B9B17686EF426D2C3F308B25A@TYAPR01MB6330.jpnprd01.prod.outlook.com/
> Signed-off-by: Maciej Wieczor-Retman <maciej.wieczor-retman@intel.com>
> ---
>  tools/testing/selftests/resctrl/cache.c       | 17 +++++++++++------
>  tools/testing/selftests/resctrl/resctrl.h     |  4 +++-
>  tools/testing/selftests/resctrl/resctrl_val.c |  9 ++++++---
>  3 files changed, 20 insertions(+), 10 deletions(-)
> 
> diff --git a/tools/testing/selftests/resctrl/cache.c b/tools/testing/selftests/resctrl/cache.c
> index 1b339d6bbff1..dab81920033b 100644
> --- a/tools/testing/selftests/resctrl/cache.c
> +++ b/tools/testing/selftests/resctrl/cache.c
> @@ -161,16 +161,21 @@ int perf_event_measure(int pe_fd, struct perf_event_read *pe_read,
>   *
>   * Return: =0 on success. <0 on failure.
>   */
> -int measure_llc_resctrl(const char *filename, int bm_pid)
> +int measure_llc_resctrl(const char *filename, int bm_pid, const char *ctrlgrp,
> +			const char *mongrp, int res_id)
>  {
> -	unsigned long llc_occu_resc = 0;
> +	unsigned long sum = 0, llc_occu_resc = 0;
>  	int ret;
>  
> -	ret = get_llc_occu_resctrl(&llc_occu_resc);
> -	if (ret < 0)
> -		return ret;
> +	for (int i = 0 ; i < snc_ways() ; i++) {

Spaces as per usual coding style:

	for (int i = 0; i < snc_ways(); i++) {

> +		set_cmt_path(ctrlgrp, mongrp, res_id + i);
> +		ret = get_llc_occu_resctrl(&llc_occu_resc);
> +		if (ret < 0)
> +			return ret;
> +		sum += llc_occu_resc;
> +	}
>  
> -	return print_results_cache(filename, bm_pid, llc_occu_resc);
> +	return print_results_cache(filename, bm_pid, sum);
>  }
>  
>  /*

> @@ -828,6 +828,8 @@ int resctrl_val(const struct resctrl_test *test,
>  	sleep(1);
>  
>  	/* Test runs until the callback setup() tells the test to stop. */
> +	get_domain_id("L3", uparams->cpu, &res_id);

Hardcoding L3 here limits the genericness of this function. You don't even 
need to do it, get_domain_id() does "MB" -> "L3" transformation implicitly 
for you so you can just pass test->resource instead.

Also, I don't understand why you now again make the naming inconsistent 
with "res_id".

If you based this on top of the patches I just posted, resctl_val() 
already the domain_id variable.
Ilpo Järvinen March 8, 2024, 1:59 p.m. UTC | #2
On Fri, 8 Mar 2024, Ilpo Järvinen wrote:

> On Wed, 6 Mar 2024, Maciej Wieczor-Retman wrote:
> 
> > Cache Monitoring Technology (CMT) works by measuring how much data in L3
> > cache is occupied by a given process identified by its Resource
> > Monitoring ID (RMID).
> > 
> > On systems with Sub-Numa Clusters (SNC) enabled, a process can occupy
> > not only the cache that belongs to its own NUMA node but also pieces of
> > other NUMA nodes' caches that lie on the same socket.
> > 
> > A simple correction to make the CMT selftest NUMA-aware is to sum values
> > reported by all nodes on the same socket for a given RMID.
> > 
> > Reported-by: "Shaopeng Tan (Fujitsu)" <tan.shaopeng@fujitsu.com>
> > Closes: https://lore.kernel.org/all/TYAPR01MB6330B9B17686EF426D2C3F308B25A@TYAPR01MB6330.jpnprd01.prod.outlook.com/
> > Signed-off-by: Maciej Wieczor-Retman <maciej.wieczor-retman@intel.com>
> > ---

> > @@ -828,6 +828,8 @@ int resctrl_val(const struct resctrl_test *test,
> >  	sleep(1);
> >  
> >  	/* Test runs until the callback setup() tells the test to stop. */
> > +	get_domain_id("L3", uparams->cpu, &res_id);
> 
> Hardcoding L3 here limits the genericness of this function. You don't even 
> need to do it, get_domain_id() does "MB" -> "L3" transformation implicitly 
> for you so you can just pass test->resource instead.
> 
> Also, I don't understand why you now again make the naming inconsistent 
> with "res_id".
> 
> If you based this on top of the patches I just posted, resctl_val() 
> already the domain_id variable.

Ah, I retract what I said. I see you actually want it only from L3.

> > +     res_id *= snc_ways();

I don't understand what this is trying to achieve and how.
Maciej Wieczor-Retman March 13, 2024, 10:23 a.m. UTC | #3
On 2024-03-08 at 15:59:02 +0200, Ilpo Järvinen wrote:
>On Fri, 8 Mar 2024, Ilpo Järvinen wrote:
>
>> On Wed, 6 Mar 2024, Maciej Wieczor-Retman wrote:
>> 
>> > Cache Monitoring Technology (CMT) works by measuring how much data in L3
>> > cache is occupied by a given process identified by its Resource
>> > Monitoring ID (RMID).
>> > 
>> > On systems with Sub-Numa Clusters (SNC) enabled, a process can occupy
>> > not only the cache that belongs to its own NUMA node but also pieces of
>> > other NUMA nodes' caches that lie on the same socket.
>> > 
>> > A simple correction to make the CMT selftest NUMA-aware is to sum values
>> > reported by all nodes on the same socket for a given RMID.
>> > 
>> > Reported-by: "Shaopeng Tan (Fujitsu)" <tan.shaopeng@fujitsu.com>
>> > Closes: https://lore.kernel.org/all/TYAPR01MB6330B9B17686EF426D2C3F308B25A@TYAPR01MB6330.jpnprd01.prod.outlook.com/
>> > Signed-off-by: Maciej Wieczor-Retman <maciej.wieczor-retman@intel.com>
>> > ---
>
>> > @@ -828,6 +828,8 @@ int resctrl_val(const struct resctrl_test *test,
>> >  	sleep(1);
>> >  
>> >  	/* Test runs until the callback setup() tells the test to stop. */
>> > +	get_domain_id("L3", uparams->cpu, &res_id);
>> 
>> Hardcoding L3 here limits the genericness of this function. You don't even 
>> need to do it, get_domain_id() does "MB" -> "L3" transformation implicitly 
>> for you so you can just pass test->resource instead.
>> 
>> Also, I don't understand why you now again make the naming inconsistent 
>> with "res_id".
>> 
>> If you based this on top of the patches I just posted, resctl_val() 
>> already the domain_id variable.
>
>Ah, I retract what I said. I see you actually want it only from L3.
>
>> > +     res_id *= snc_ways();
>
>I don't understand what this is trying to achieve and how.

We exchanged some private messages on this but I'll post the explanation here
too if anyone else was looking for it.

get_domain_id("L3"...) essentially gives us the number of the socket (on
platforms that have one L3 cache per socket - I still have too look into other
ones). The problem here is that to get an accurate reading with SNC enabled we
need to collect values from all nodes on a single socket that has the CPU our
test is running on. So we need to find the first node on that socket so then we
can loop through all the nodes on that socket. To do that we multiply res_id by
the amount of SNC nodes per socket (res_id *= snc_ways()) and that's it.

I'll add some helper with an explaining comment on what it does in the next
version.

>
>-- 
> i.
diff mbox series

Patch

diff --git a/tools/testing/selftests/resctrl/cache.c b/tools/testing/selftests/resctrl/cache.c
index 1b339d6bbff1..dab81920033b 100644
--- a/tools/testing/selftests/resctrl/cache.c
+++ b/tools/testing/selftests/resctrl/cache.c
@@ -161,16 +161,21 @@  int perf_event_measure(int pe_fd, struct perf_event_read *pe_read,
  *
  * Return: =0 on success. <0 on failure.
  */
-int measure_llc_resctrl(const char *filename, int bm_pid)
+int measure_llc_resctrl(const char *filename, int bm_pid, const char *ctrlgrp,
+			const char *mongrp, int res_id)
 {
-	unsigned long llc_occu_resc = 0;
+	unsigned long sum = 0, llc_occu_resc = 0;
 	int ret;
 
-	ret = get_llc_occu_resctrl(&llc_occu_resc);
-	if (ret < 0)
-		return ret;
+	for (int i = 0 ; i < snc_ways() ; i++) {
+		set_cmt_path(ctrlgrp, mongrp, res_id + i);
+		ret = get_llc_occu_resctrl(&llc_occu_resc);
+		if (ret < 0)
+			return ret;
+		sum += llc_occu_resc;
+	}
 
-	return print_results_cache(filename, bm_pid, llc_occu_resc);
+	return print_results_cache(filename, bm_pid, sum);
 }
 
 /*
diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h
index 41811e87f81c..178fb2eab13a 100644
--- a/tools/testing/selftests/resctrl/resctrl.h
+++ b/tools/testing/selftests/resctrl/resctrl.h
@@ -133,6 +133,7 @@  extern pid_t bm_pid, ppid;
 extern char llc_occup_path[1024];
 
 int snc_ways(void);
+void set_cmt_path(const char *ctrlgrp, const char *mongrp, char sock_num);
 int get_vendor(void);
 bool check_resctrlfs_support(void);
 int filter_dmesg(void);
@@ -182,7 +183,8 @@  int perf_open(struct perf_event_attr *pea, pid_t pid, int cpu_no);
 int perf_event_reset_enable(int pe_fd);
 int perf_event_measure(int pe_fd, struct perf_event_read *pe_read,
 		       const char *filename, int bm_pid);
-int measure_llc_resctrl(const char *filename, int bm_pid);
+int measure_llc_resctrl(const char *filename, int bm_pid, const char *ctrlgrp,
+			const char *mongrp, int res_id);
 void show_cache_info(int no_of_bits, __u64 avg_llc_val, size_t cache_span, bool lines);
 
 /*
diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c
index 5a49f07a6c85..e75e3923ebe2 100644
--- a/tools/testing/selftests/resctrl/resctrl_val.c
+++ b/tools/testing/selftests/resctrl/resctrl_val.c
@@ -557,7 +557,7 @@  static int print_results_bw(char *filename,  int bm_pid, float bw_imc,
 	return 0;
 }
 
-static void set_cmt_path(const char *ctrlgrp, const char *mongrp, char sock_num)
+void set_cmt_path(const char *ctrlgrp, const char *mongrp, char sock_num)
 {
 	if (strlen(ctrlgrp) && strlen(mongrp))
 		sprintf(llc_occup_path,	CON_MON_LCC_OCCUP_PATH,	RESCTRL_PATH,
@@ -698,8 +698,8 @@  int resctrl_val(const struct resctrl_test *test,
 {
 	char *resctrl_val = param->resctrl_val;
 	unsigned long bw_resc_start = 0;
+	int res_id, ret = 0, pipefd[2];
 	struct sigaction sigact;
-	int ret = 0, pipefd[2];
 	char pipe_message = 0;
 	union sigval value;
 
@@ -828,6 +828,8 @@  int resctrl_val(const struct resctrl_test *test,
 	sleep(1);
 
 	/* Test runs until the callback setup() tells the test to stop. */
+	get_domain_id("L3", uparams->cpu, &res_id);
+	res_id *= snc_ways();
 	while (1) {
 		ret = param->setup(test, uparams, param);
 		if (ret == END_OF_TESTS) {
@@ -844,7 +846,8 @@  int resctrl_val(const struct resctrl_test *test,
 				break;
 		} else if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) {
 			sleep(1);
-			ret = measure_llc_resctrl(param->filename, bm_pid);
+			ret = measure_llc_resctrl(param->filename, bm_pid, param->ctrlgrp,
+						  param->mongrp, res_id);
 			if (ret)
 				break;
 		}