diff mbox series

[v4,1/2] selftests/resctrl: Adjust effective L3 cache size with SNC enabled

Message ID e5f2fbe4f492d37569c389aebcb91168f98783ba.1720774981.git.maciej.wieczor-retman@intel.com (mailing list archive)
State New
Headers show
Series selftests/resctrl: SNC kernel support discovery | expand

Commit Message

Maciej Wieczor-Retman July 12, 2024, 9:04 a.m. UTC
Sub-NUMA Cluster divides CPUs sharing an L3 cache into separate NUMA
nodes. Systems may support splitting into either two, three or four
nodes.

When SNC mode is enabled the effective amount of L3 cache available
for allocation is divided by the number of nodes per L3.

Detect which SNC mode is active by comparing the number of CPUs
that share a cache with CPU0, with the number of CPUs on node0.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Co-developed-by: Maciej Wieczor-Retman <maciej.wieczor-retman@intel.com>
Signed-off-by: Maciej Wieczor-Retman <maciej.wieczor-retman@intel.com>
---
Changelog v4:
- Make returned value a static local variable so the function only runs
  the logic once. (Reinette)

Changelog v3:
- Add comparison between present and online cpus to test if the
  calculated SNC mode is credible. (Reinette)
- Added comment to cache size modification to better explain why it is
  needed there. (Reinette)
- Fix facts in patch message. (Reinette)
- Change snc_ways() to snc_nodes_per_l3_cache(). (Reinette)

 tools/testing/selftests/resctrl/resctrl.h   |  4 ++
 tools/testing/selftests/resctrl/resctrlfs.c | 73 +++++++++++++++++++++
 2 files changed, 77 insertions(+)

Comments

Reinette Chatre Aug. 12, 2024, 11:40 p.m. UTC | #1
Hi Maciej,

On 7/12/24 2:04 AM, Maciej Wieczor-Retman wrote:
> Sub-NUMA Cluster divides CPUs sharing an L3 cache into separate NUMA
> nodes. Systems may support splitting into either two, three or four
> nodes.
> 
> When SNC mode is enabled the effective amount of L3 cache available
> for allocation is divided by the number of nodes per L3.
> 
> Detect which SNC mode is active by comparing the number of CPUs
> that share a cache with CPU0, with the number of CPUs on node0.
> 
> Signed-off-by: Tony Luck <tony.luck@intel.com>
> Co-developed-by: Maciej Wieczor-Retman <maciej.wieczor-retman@intel.com>
> Signed-off-by: Maciej Wieczor-Retman <maciej.wieczor-retman@intel.com>

Since you are "From:" author there is no need for a "Co-developed-by"
for you. Tony does need one. Please check: "When to use Acked-by:, Cc:,
and Co-developed-by:" in Documentation/process/submitting-patches.rst
(checkpatch.pl also warns about this).

> ---
> Changelog v4:
> - Make returned value a static local variable so the function only runs
>    the logic once. (Reinette)
> 
> Changelog v3:
> - Add comparison between present and online cpus to test if the
>    calculated SNC mode is credible. (Reinette)
> - Added comment to cache size modification to better explain why it is
>    needed there. (Reinette)
> - Fix facts in patch message. (Reinette)
> - Change snc_ways() to snc_nodes_per_l3_cache(). (Reinette)
> 
>   tools/testing/selftests/resctrl/resctrl.h   |  4 ++
>   tools/testing/selftests/resctrl/resctrlfs.c | 73 +++++++++++++++++++++
>   2 files changed, 77 insertions(+)
> 
> diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h
> index 2dda56084588..851b37c9c38a 100644
> --- a/tools/testing/selftests/resctrl/resctrl.h
> +++ b/tools/testing/selftests/resctrl/resctrl.h
> @@ -11,6 +11,7 @@
>   #include <signal.h>
>   #include <dirent.h>
>   #include <stdbool.h>
> +#include <ctype.h>
>   #include <sys/stat.h>
>   #include <sys/ioctl.h>
>   #include <sys/mount.h>
> @@ -43,6 +44,8 @@
>   
>   #define DEFAULT_SPAN		(250 * MB)
>   
> +#define MAX_SNC		4
> +
>   /*
>    * user_params:		User supplied parameters
>    * @cpu:		CPU number to which the benchmark will be bound to
> @@ -120,6 +123,7 @@ extern volatile int *value_sink;
>   
>   extern char llc_occup_path[1024];
>   
> +int snc_nodes_per_l3_cache(void);
>   int get_vendor(void);
>   bool check_resctrlfs_support(void);
>   int filter_dmesg(void);
> diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c
> index 250c320349a7..803dd415984c 100644
> --- a/tools/testing/selftests/resctrl/resctrlfs.c
> +++ b/tools/testing/selftests/resctrl/resctrlfs.c
> @@ -156,6 +156,68 @@ int get_domain_id(const char *resource, int cpu_no, int *domain_id)
>   	return 0;
>   }
>   
> +/*
> + * Count number of CPUs in a /sys bit map

"bit map" -> "bitmap"

> + */
> +static unsigned int count_sys_bitmap_bits(char *name)
> +{
> +	FILE *fp = fopen(name, "r");
> +	int count = 0, c;
> +
> +	if (!fp)
> +		return 0;
> +
> +	while ((c = fgetc(fp)) != EOF) {
> +		if (!isxdigit(c))
> +			continue;
> +		switch (c) {
> +		case 'f':
> +			count++;
> +		case '7': case 'b': case 'd': case 'e':
> +			count++;
> +		case '3': case '5': case '6': case '9': case 'a': case 'c':
> +			count++;
> +		case '1': case '2': case '4': case '8':
> +			count++;
> +		}
> +	}
> +	fclose(fp);
> +
> +	return count;
> +}
> +
> +/*
> + * Detect SNC by comparing #CPUs in node0 with #CPUs sharing LLC with CPU0.
> + * If some CPUs are offline the numbers may not be exact multiples of each
> + * other. Any offline CPUs on node0 will be also gone from shared_cpu_map of
> + * CPU0 but offline CPUs from other nodes will only make the cache_cpus value
> + * lower. Still try to get the ratio right by preventing the second possibility.

This all seems unnecessary since the next patch sets snc_mode to 1 if there
are any offline CPUs.
Seems more appropriate to move the offline CPU handling to this patch.

> + */
> +int snc_nodes_per_l3_cache(void)
> +{
> +	int node_cpus, cache_cpus, i;
> +	static int snc_mode;
> +
> +	if (!snc_mode) {
> +		node_cpus = count_sys_bitmap_bits("/sys/devices/system/node/node0/cpumap");
> +		cache_cpus = count_sys_bitmap_bits("/sys/devices/system/cpu/cpu0/cache/index3/shared_cpu_map");
> +
> +		if (!node_cpus || !cache_cpus) {
> +			ksft_print_msg("Could not determine Sub-NUMA Cluster mode.\n");
> +			return 1;
> +		}
> +
> +		for (i = 1; i <= MAX_SNC ; i++) {

(nit: unnecessary space)

> +			if (i * node_cpus >= cache_cpus) {
> +				snc_mode = i;
> +				break;
> +			}

This is irrelevant after the subsequent patch but note that there are scenarios
where above loop cannot set snc_mode and the function will thus return 0 since
snc_mode is not initialized. This is a problem in division done by following hunk.

> +		}
> +	}
> +
> +	return snc_mode;
> +}
> +
>   /*
>    * get_cache_size - Get cache size for a specified CPU
>    * @cpu_no:	CPU number
> @@ -211,6 +273,17 @@ int get_cache_size(int cpu_no, const char *cache_type, unsigned long *cache_size
>   			break;
>   	}
>   
> +	/*
> +	 * The amount of cache represented by each bit in the masks
> +	 * in the schemata file is reduced by a factor equal to SNC
> +	 * nodes per L3 cache.
> +	 * E.g. on a SNC-2 system with a 100MB L3 cache a test that
> +	 * allocates memory from its local SNC node (default behavior
> +	 * without using libnuma) will only see 50 MB llc_occupancy
> +	 * with a fully populated L3 mask in the schemata file.
> +	 */
> +	if (cache_num == 3)
> +		*cache_size /= snc_nodes_per_l3_cache();
>   	return 0;
>   }
>   

Reinette
Maciej Wieczor-Retman Aug. 27, 2024, 6:45 a.m. UTC | #2
Hi, thanks for the review,

On 2024-08-12 at 16:40:00 -0700, Reinette Chatre wrote:
>Hi Maciej,
>
>On 7/12/24 2:04 AM, Maciej Wieczor-Retman wrote:
>> Sub-NUMA Cluster divides CPUs sharing an L3 cache into separate NUMA
>> nodes. Systems may support splitting into either two, three or four
>> nodes.
>> 
>> When SNC mode is enabled the effective amount of L3 cache available
>> for allocation is divided by the number of nodes per L3.
>> 
>> Detect which SNC mode is active by comparing the number of CPUs
>> that share a cache with CPU0, with the number of CPUs on node0.
>> 
>> Signed-off-by: Tony Luck <tony.luck@intel.com>
>> Co-developed-by: Maciej Wieczor-Retman <maciej.wieczor-retman@intel.com>
>> Signed-off-by: Maciej Wieczor-Retman <maciej.wieczor-retman@intel.com>
>
>Since you are "From:" author there is no need for a "Co-developed-by"
>for you. Tony does need one. Please check: "When to use Acked-by:, Cc:,
>and Co-developed-by:" in Documentation/process/submitting-patches.rst
>(checkpatch.pl also warns about this).

Thanks, I changed patch's author at some point and I think I forgot to change
the tags.

>
>> ---
>> Changelog v4:
>> - Make returned value a static local variable so the function only runs
>>    the logic once. (Reinette)
>> 
>> Changelog v3:
>> - Add comparison between present and online cpus to test if the
>>    calculated SNC mode is credible. (Reinette)
>> - Added comment to cache size modification to better explain why it is
>>    needed there. (Reinette)
>> - Fix facts in patch message. (Reinette)
>> - Change snc_ways() to snc_nodes_per_l3_cache(). (Reinette)
>> 
>>   tools/testing/selftests/resctrl/resctrl.h   |  4 ++
>>   tools/testing/selftests/resctrl/resctrlfs.c | 73 +++++++++++++++++++++
>>   2 files changed, 77 insertions(+)
>> 
>> diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h
>> index 2dda56084588..851b37c9c38a 100644
>> --- a/tools/testing/selftests/resctrl/resctrl.h
>> +++ b/tools/testing/selftests/resctrl/resctrl.h
>> @@ -11,6 +11,7 @@
>>   #include <signal.h>
>>   #include <dirent.h>
>>   #include <stdbool.h>
>> +#include <ctype.h>
>>   #include <sys/stat.h>
>>   #include <sys/ioctl.h>
>>   #include <sys/mount.h>
>> @@ -43,6 +44,8 @@
>>   #define DEFAULT_SPAN		(250 * MB)
>> +#define MAX_SNC		4
>> +
>>   /*
>>    * user_params:		User supplied parameters
>>    * @cpu:		CPU number to which the benchmark will be bound to
>> @@ -120,6 +123,7 @@ extern volatile int *value_sink;
>>   extern char llc_occup_path[1024];
>> +int snc_nodes_per_l3_cache(void);
>>   int get_vendor(void);
>>   bool check_resctrlfs_support(void);
>>   int filter_dmesg(void);
>> diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c
>> index 250c320349a7..803dd415984c 100644
>> --- a/tools/testing/selftests/resctrl/resctrlfs.c
>> +++ b/tools/testing/selftests/resctrl/resctrlfs.c
>> @@ -156,6 +156,68 @@ int get_domain_id(const char *resource, int cpu_no, int *domain_id)
>>   	return 0;
>>   }
>> +/*
>> + * Count number of CPUs in a /sys bit map
>
>"bit map" -> "bitmap"

Will do.

>
>> + */
>> +static unsigned int count_sys_bitmap_bits(char *name)
>> +{
>> +	FILE *fp = fopen(name, "r");
>> +	int count = 0, c;
>> +
>> +	if (!fp)
>> +		return 0;
>> +
>> +	while ((c = fgetc(fp)) != EOF) {
>> +		if (!isxdigit(c))
>> +			continue;
>> +		switch (c) {
>> +		case 'f':
>> +			count++;
>> +		case '7': case 'b': case 'd': case 'e':
>> +			count++;
>> +		case '3': case '5': case '6': case '9': case 'a': case 'c':
>> +			count++;
>> +		case '1': case '2': case '4': case '8':
>> +			count++;
>> +		}
>> +	}
>> +	fclose(fp);
>> +
>> +	return count;
>> +}
>> +
>> +/*
>> + * Detect SNC by comparing #CPUs in node0 with #CPUs sharing LLC with CPU0.
>> + * If some CPUs are offline the numbers may not be exact multiples of each
>> + * other. Any offline CPUs on node0 will be also gone from shared_cpu_map of
>> + * CPU0 but offline CPUs from other nodes will only make the cache_cpus value
>> + * lower. Still try to get the ratio right by preventing the second possibility.
>
>This all seems unnecessary since the next patch sets snc_mode to 1 if there
>are any offline CPUs.
>Seems more appropriate to move the offline CPU handling to this patch.

Okay, I'll move it here.

>
>> + */
>> +int snc_nodes_per_l3_cache(void)
>> +{
>> +	int node_cpus, cache_cpus, i;
>> +	static int snc_mode;
>> +
>> +	if (!snc_mode) {
>> +		node_cpus = count_sys_bitmap_bits("/sys/devices/system/node/node0/cpumap");
>> +		cache_cpus = count_sys_bitmap_bits("/sys/devices/system/cpu/cpu0/cache/index3/shared_cpu_map");
>> +
>> +		if (!node_cpus || !cache_cpus) {
>> +			ksft_print_msg("Could not determine Sub-NUMA Cluster mode.\n");
>> +			return 1;
>> +		}
>> +
>> +		for (i = 1; i <= MAX_SNC ; i++) {
>
>(nit: unnecessary space)

Will fix.

>
>> +			if (i * node_cpus >= cache_cpus) {
>> +				snc_mode = i;
>> +				break;
>> +			}
>
>This is irrelevant after the subsequent patch but note that there are scenarios
>where above loop cannot set snc_mode and the function will thus return 0 since
>snc_mode is not initialized. This is a problem in division done by following hunk.

Oh right, I'll set initial value to 1.

>
>> +		}
>> +	}
>> +
>> +	return snc_mode;
>> +}
>> +
>>   /*
>>    * get_cache_size - Get cache size for a specified CPU
>>    * @cpu_no:	CPU number
>> @@ -211,6 +273,17 @@ int get_cache_size(int cpu_no, const char *cache_type, unsigned long *cache_size
>>   			break;
>>   	}
>> +	/*
>> +	 * The amount of cache represented by each bit in the masks
>> +	 * in the schemata file is reduced by a factor equal to SNC
>> +	 * nodes per L3 cache.
>> +	 * E.g. on a SNC-2 system with a 100MB L3 cache a test that
>> +	 * allocates memory from its local SNC node (default behavior
>> +	 * without using libnuma) will only see 50 MB llc_occupancy
>> +	 * with a fully populated L3 mask in the schemata file.
>> +	 */
>> +	if (cache_num == 3)
>> +		*cache_size /= snc_nodes_per_l3_cache();
>>   	return 0;
>>   }
>
>Reinette
diff mbox series

Patch

diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h
index 2dda56084588..851b37c9c38a 100644
--- a/tools/testing/selftests/resctrl/resctrl.h
+++ b/tools/testing/selftests/resctrl/resctrl.h
@@ -11,6 +11,7 @@ 
 #include <signal.h>
 #include <dirent.h>
 #include <stdbool.h>
+#include <ctype.h>
 #include <sys/stat.h>
 #include <sys/ioctl.h>
 #include <sys/mount.h>
@@ -43,6 +44,8 @@ 
 
 #define DEFAULT_SPAN		(250 * MB)
 
+#define MAX_SNC		4
+
 /*
  * user_params:		User supplied parameters
  * @cpu:		CPU number to which the benchmark will be bound to
@@ -120,6 +123,7 @@  extern volatile int *value_sink;
 
 extern char llc_occup_path[1024];
 
+int snc_nodes_per_l3_cache(void);
 int get_vendor(void);
 bool check_resctrlfs_support(void);
 int filter_dmesg(void);
diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c
index 250c320349a7..803dd415984c 100644
--- a/tools/testing/selftests/resctrl/resctrlfs.c
+++ b/tools/testing/selftests/resctrl/resctrlfs.c
@@ -156,6 +156,68 @@  int get_domain_id(const char *resource, int cpu_no, int *domain_id)
 	return 0;
 }
 
+/*
+ * Count number of CPUs in a /sys bit map
+ */
+static unsigned int count_sys_bitmap_bits(char *name)
+{
+	FILE *fp = fopen(name, "r");
+	int count = 0, c;
+
+	if (!fp)
+		return 0;
+
+	while ((c = fgetc(fp)) != EOF) {
+		if (!isxdigit(c))
+			continue;
+		switch (c) {
+		case 'f':
+			count++;
+		case '7': case 'b': case 'd': case 'e':
+			count++;
+		case '3': case '5': case '6': case '9': case 'a': case 'c':
+			count++;
+		case '1': case '2': case '4': case '8':
+			count++;
+		}
+	}
+	fclose(fp);
+
+	return count;
+}
+
+/*
+ * Detect SNC by comparing #CPUs in node0 with #CPUs sharing LLC with CPU0.
+ * If some CPUs are offline the numbers may not be exact multiples of each
+ * other. Any offline CPUs on node0 will be also gone from shared_cpu_map of
+ * CPU0 but offline CPUs from other nodes will only make the cache_cpus value
+ * lower. Still try to get the ratio right by preventing the second possibility.
+ */
+int snc_nodes_per_l3_cache(void)
+{
+	int node_cpus, cache_cpus, i;
+	static int snc_mode;
+
+	if (!snc_mode) {
+		node_cpus = count_sys_bitmap_bits("/sys/devices/system/node/node0/cpumap");
+		cache_cpus = count_sys_bitmap_bits("/sys/devices/system/cpu/cpu0/cache/index3/shared_cpu_map");
+
+		if (!node_cpus || !cache_cpus) {
+			ksft_print_msg("Could not determine Sub-NUMA Cluster mode.\n");
+			return 1;
+		}
+
+		for (i = 1; i <= MAX_SNC ; i++) {
+			if (i * node_cpus >= cache_cpus) {
+				snc_mode = i;
+				break;
+			}
+		}
+	}
+
+	return snc_mode;
+}
+
 /*
  * get_cache_size - Get cache size for a specified CPU
  * @cpu_no:	CPU number
@@ -211,6 +273,17 @@  int get_cache_size(int cpu_no, const char *cache_type, unsigned long *cache_size
 			break;
 	}
 
+	/*
+	 * The amount of cache represented by each bit in the masks
+	 * in the schemata file is reduced by a factor equal to SNC
+	 * nodes per L3 cache.
+	 * E.g. on a SNC-2 system with a 100MB L3 cache a test that
+	 * allocates memory from its local SNC node (default behavior
+	 * without using libnuma) will only see 50 MB llc_occupancy
+	 * with a fully populated L3 mask in the schemata file.
+	 */
+	if (cache_num == 3)
+		*cache_size /= snc_nodes_per_l3_cache();
 	return 0;
 }