diff mbox series

[v2,02/13] selftests/resctrl: Calculate resctrl FS derived mem bw over sleep(1) only

Message ID 20240311135230.7007-3-ilpo.jarvinen@linux.intel.com (mailing list archive)
State New
Headers show
Series selftests/resctrl: resctrl_val() related cleanups & improvements | expand

Commit Message

Ilpo Järvinen March 11, 2024, 1:52 p.m. UTC
For MBM/MBA tests, measure_vals() calls get_mem_bw_imc() that performs
the measurement over a duration of sleep(1) call. The memory bandwidth
numbers from IMC are derived over this duration. The resctrl FS derived
memory bandwidth, however, is calculated inside measure_vals() and only
takes delta between the previous value and the current one which
besides the actual test, also samples inter-test noise.

Rework the logic in measure_vals() and get_mem_bw_imc() such that the
resctrl FS memory bandwidth section covers much shorter duration
closely matching that of the IMC perf counters to improve measurement
accuracy.

Suggested-by: Reinette Chatre <reinette.chatre@intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 tools/testing/selftests/resctrl/resctrl_val.c | 72 +++++++++++++------
 1 file changed, 50 insertions(+), 22 deletions(-)

Comments

Reinette Chatre March 20, 2024, 4:53 a.m. UTC | #1
Hi Ilpo,

On 3/11/2024 6:52 AM, Ilpo Järvinen wrote:
> For MBM/MBA tests, measure_vals() calls get_mem_bw_imc() that performs
> the measurement over a duration of sleep(1) call. The memory bandwidth
> numbers from IMC are derived over this duration. The resctrl FS derived
> memory bandwidth, however, is calculated inside measure_vals() and only
> takes delta between the previous value and the current one which
> besides the actual test, also samples inter-test noise.
> 
> Rework the logic in measure_vals() and get_mem_bw_imc() such that the
> resctrl FS memory bandwidth section covers much shorter duration
> closely matching that of the IMC perf counters to improve measurement
> accuracy.
> 

Thank you very much for doing this.

> Suggested-by: Reinette Chatre <reinette.chatre@intel.com>
> Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
> ---
>  tools/testing/selftests/resctrl/resctrl_val.c | 72 +++++++++++++------
>  1 file changed, 50 insertions(+), 22 deletions(-)
> 
> diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c
> index 36139cba7be8..4df2cd738f88 100644
> --- a/tools/testing/selftests/resctrl/resctrl_val.c
> +++ b/tools/testing/selftests/resctrl/resctrl_val.c
> @@ -293,28 +293,35 @@ static int initialize_mem_bw_imc(void)
>  }
>  
>  /*
> - * get_mem_bw_imc:	Memory band width as reported by iMC counters
> + * perf_open_imc_mem_bw - Open perf fds for IMCs
>   * @cpu_no:		CPU number that the benchmark PID is binded to
> - * @bw_report:		Bandwidth report type (reads, writes)
> - *
> - * Memory B/W utilized by a process on a socket can be calculated using
> - * iMC counters. Perf events are used to read these counters.
> - *
> - * Return: = 0 on success. < 0 on failure.

This "Return" still seems relevant.

>   */
> -static int get_mem_bw_imc(int cpu_no, char *bw_report, float *bw_imc)
> +static int perf_open_imc_mem_bw(int cpu_no)
>  {
> -	float reads, writes, of_mul_read, of_mul_write;
>  	int imc, j, ret;
>  
> -	/* Start all iMC counters to log values (both read and write) */
> -	reads = 0, writes = 0, of_mul_read = 1, of_mul_write = 1;
>  	for (imc = 0; imc < imcs; imc++) {
>  		for (j = 0; j < 2; j++) {
>  			ret = open_perf_event(imc, cpu_no, j);
>  			if (ret)
>  				return -1;
>  		}

I'm feeling more strongly that this inner loop makes the code harder to
understand and unwinding it would make it easier to understand.

> +	}
> +
> +	return 0;
> +}
> +
> +/*
> + * do_mem_bw_test - Perform memory bandwidth test
> + *
> + * Runs memory bandwidth test over one second period. Also, handles starting
> + * and stopping of the IMC perf counters around the test.
> + */
> +static void do_imc_mem_bw_test(void)
> +{
> +	int imc, j;
> +
> +	for (imc = 0; imc < imcs; imc++) {
>  		for (j = 0; j < 2; j++)
>  			membw_ioctl_perf_event_ioc_reset_enable(imc, j);

Here also. I find these loops unnecessary. I do not think it optimizes anything
and it makes the code harder to understand.

>  	}
> @@ -326,6 +333,24 @@ static int get_mem_bw_imc(int cpu_no, char *bw_report, float *bw_imc)
>  		for (j = 0; j < 2; j++)
>  			membw_ioctl_perf_event_ioc_disable(imc, j);

Same here.

>  	}
> +}
> +
> +/*
> + * get_mem_bw_imc - Memory band width as reported by iMC counters
> + * @bw_report:		Bandwidth report type (reads, writes)
> + *
> + * Memory B/W utilized by a process on a socket can be calculated using
> + * iMC counters. Perf events are used to read these counters.

In the above there are three variations of the same: "band width", "Bandwidth",
and "B/W". Please just use one and use it consistently.

> + *
> + * Return: = 0 on success. < 0 on failure.
> + */
> +static int get_mem_bw_imc(char *bw_report, float *bw_imc)
> +{
> +	float reads, writes, of_mul_read, of_mul_write;
> +	int imc, j;
> +
> +	/* Start all iMC counters to log values (both read and write) */
> +	reads = 0, writes = 0, of_mul_read = 1, of_mul_write = 1;
>  
>  	/*
>  	 * Get results which are stored in struct type imc_counter_config
> @@ -593,10 +618,9 @@ static void initialize_llc_occu_resctrl(const char *ctrlgrp, const char *mongrp,
>  }
>  
>  static int measure_vals(const struct user_params *uparams,
> -			struct resctrl_val_param *param,
> -			unsigned long *bw_resc_start)
> +			struct resctrl_val_param *param)
>  {
> -	unsigned long bw_resc, bw_resc_end;
> +	unsigned long bw_resc, bw_resc_start, bw_resc_end;
>  	float bw_imc;
>  	int ret;
>  
> @@ -607,22 +631,27 @@ static int measure_vals(const struct user_params *uparams,
>  	 * Compare the two values to validate resctrl value.
>  	 * It takes 1sec to measure the data.
>  	 */
> -	ret = get_mem_bw_imc(uparams->cpu, param->bw_report, &bw_imc);
> +	ret = perf_open_imc_mem_bw(uparams->cpu);
>  	if (ret < 0)
>  		return ret;
>  
> +	ret = get_mem_bw_resctrl(&bw_resc_start);
> +	if (ret < 0)
> +		return ret;
> +
> +	do_imc_mem_bw_test();
> +
>  	ret = get_mem_bw_resctrl(&bw_resc_end);
>  	if (ret < 0)
>  		return ret;
>  
> -	bw_resc = (bw_resc_end - *bw_resc_start) / MB;
> -	ret = print_results_bw(param->filename, bm_pid, bw_imc, bw_resc);
> -	if (ret)
> +	ret = get_mem_bw_imc(param->bw_report, &bw_imc);
> +	if (ret < 0)
>  		return ret;
>  
> -	*bw_resc_start = bw_resc_end;
> +	bw_resc = (bw_resc_end - bw_resc_start) / MB;
>  
> -	return 0;
> +	return print_results_bw(param->filename, bm_pid, bw_imc, bw_resc);
>  }
>  
>  /*
> @@ -696,7 +725,6 @@ int resctrl_val(const struct resctrl_test *test,
>  		struct resctrl_val_param *param)
>  {
>  	char *resctrl_val = param->resctrl_val;
> -	unsigned long bw_resc_start = 0;

In the current implementation the first iteration's starting measurement
is, as seen above, 0 ... which makes the first measurement unreliable
and dropped for both the MBA and MBM tests. In this enhancement, the
first measurement is no longer skewed so much so I wonder if this enhancement
can be expanded to the analysis phase where first measurement no longer
needs to be dropped?

>  	struct sigaction sigact;
>  	int ret = 0, pipefd[2];
>  	char pipe_message = 0;
> @@ -838,7 +866,7 @@ int resctrl_val(const struct resctrl_test *test,
>  
>  		if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) ||
>  		    !strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) {
> -			ret = measure_vals(uparams, param, &bw_resc_start);
> +			ret = measure_vals(uparams, param);
>  			if (ret)
>  				break;
>  		} else if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) {

Reinette
Ilpo Järvinen March 22, 2024, 12:11 p.m. UTC | #2
On Tue, 19 Mar 2024, Reinette Chatre wrote:
> On 3/11/2024 6:52 AM, Ilpo Järvinen wrote:
> > For MBM/MBA tests, measure_vals() calls get_mem_bw_imc() that performs
> > the measurement over a duration of sleep(1) call. The memory bandwidth
> > numbers from IMC are derived over this duration. The resctrl FS derived
> > memory bandwidth, however, is calculated inside measure_vals() and only
> > takes delta between the previous value and the current one which
> > besides the actual test, also samples inter-test noise.
> > 
> > Rework the logic in measure_vals() and get_mem_bw_imc() such that the
> > resctrl FS memory bandwidth section covers much shorter duration
> > closely matching that of the IMC perf counters to improve measurement
> > accuracy.
> > 
> 
> Thank you very much for doing this.
> 
> > Suggested-by: Reinette Chatre <reinette.chatre@intel.com>
> > Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
> > ---
> >  tools/testing/selftests/resctrl/resctrl_val.c | 72 +++++++++++++------
> >  1 file changed, 50 insertions(+), 22 deletions(-)
> > 
> > diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c
> > index 36139cba7be8..4df2cd738f88 100644
> > --- a/tools/testing/selftests/resctrl/resctrl_val.c
> > +++ b/tools/testing/selftests/resctrl/resctrl_val.c

> > -static int get_mem_bw_imc(int cpu_no, char *bw_report, float *bw_imc)
> > +static int perf_open_imc_mem_bw(int cpu_no)
> >  {
> > -	float reads, writes, of_mul_read, of_mul_write;
> >  	int imc, j, ret;
> >  
> > -	/* Start all iMC counters to log values (both read and write) */
> > -	reads = 0, writes = 0, of_mul_read = 1, of_mul_write = 1;
> >  	for (imc = 0; imc < imcs; imc++) {
> >  		for (j = 0; j < 2; j++) {
> >  			ret = open_perf_event(imc, cpu_no, j);
> >  			if (ret)
> >  				return -1;
> >  		}
> 
> I'm feeling more strongly that this inner loop makes the code harder to
> understand and unwinding it would make it easier to understand.

Okay, I'll unwind them in the first patch.

> >  	}
> > +}
> > +
> > +/*
> > + * get_mem_bw_imc - Memory band width as reported by iMC counters
> > + * @bw_report:		Bandwidth report type (reads, writes)
> > + *
> > + * Memory B/W utilized by a process on a socket can be calculated using
> > + * iMC counters. Perf events are used to read these counters.
> 
> In the above there are three variations of the same: "band width", "Bandwidth",
> and "B/W". Please just use one and use it consistently.

Okay but I'll do that in a separate patch because these are just the 
"removed" lines above, the diff is more messy than the actual change 
here as is often the case with this kind of split function refactoring 
because the diff algorithm fails to pair the lines optimally from 
human-readed PoV.

> > + * Return: = 0 on success. < 0 on failure.
> > + */
> > +static int get_mem_bw_imc(char *bw_report, float *bw_imc)
> > +{
> > +	float reads, writes, of_mul_read, of_mul_write;
> > +	int imc, j;
> > +
> > +	/* Start all iMC counters to log values (both read and write) */
> > +	reads = 0, writes = 0, of_mul_read = 1, of_mul_write = 1;
> >  
> >  	/*
> >  	 * Get results which are stored in struct type imc_counter_config

> > @@ -696,7 +725,6 @@ int resctrl_val(const struct resctrl_test *test,
> >  		struct resctrl_val_param *param)
> >  {
> >  	char *resctrl_val = param->resctrl_val;
> > -	unsigned long bw_resc_start = 0;
> 
> In the current implementation the first iteration's starting measurement
> is, as seen above, 0 ... which makes the first measurement unreliable
> and dropped for both the MBA and MBM tests. In this enhancement, the
> first measurement is no longer skewed so much so I wonder if this enhancement
> can be expanded to the analysis phase where first measurement no longer
> needs to be dropped?

In ideal world, yes, but I'll have to check the raw numbers. My general 
feel is that the numbers tend to converge slowly with more iterations 
being run so the first iteration might still be "off" by quite much (this 
is definitely the case with CAT tests iterations but I'm not entirely sure 
any more how it is with other selftests).

Thanks for the review.
Reinette Chatre March 22, 2024, 5:44 p.m. UTC | #3
Hi Ilpo,

On 3/22/2024 5:11 AM, Ilpo Järvinen wrote:
> On Tue, 19 Mar 2024, Reinette Chatre wrote:
>> On 3/11/2024 6:52 AM, Ilpo Järvinen wrote:


>>> -static int get_mem_bw_imc(int cpu_no, char *bw_report, float *bw_imc)
>>> +static int perf_open_imc_mem_bw(int cpu_no)
>>>  {
>>> -	float reads, writes, of_mul_read, of_mul_write;
>>>  	int imc, j, ret;
>>>  
>>> -	/* Start all iMC counters to log values (both read and write) */
>>> -	reads = 0, writes = 0, of_mul_read = 1, of_mul_write = 1;
>>>  	for (imc = 0; imc < imcs; imc++) {
>>>  		for (j = 0; j < 2; j++) {
>>>  			ret = open_perf_event(imc, cpu_no, j);
>>>  			if (ret)
>>>  				return -1;
>>>  		}
>>
>> I'm feeling more strongly that this inner loop makes the code harder to
>> understand and unwinding it would make it easier to understand.
> 
> Okay, I'll unwind them in the first patch.

Thank you very much.

> 
>>>  	}
>>> +}
>>> +
>>> +/*
>>> + * get_mem_bw_imc - Memory band width as reported by iMC counters
>>> + * @bw_report:		Bandwidth report type (reads, writes)
>>> + *
>>> + * Memory B/W utilized by a process on a socket can be calculated using
>>> + * iMC counters. Perf events are used to read these counters.
>>
>> In the above there are three variations of the same: "band width", "Bandwidth",
>> and "B/W". Please just use one and use it consistently.
> 
> Okay but I'll do that in a separate patch because these are just the 
> "removed" lines above, the diff is more messy than the actual change 
> here as is often the case with this kind of split function refactoring 
> because the diff algorithm fails to pair the lines optimally from 
> human-readed PoV.

I have not used these much myself bit I've heard folks having success
getting more readable diffs when using the --patience or --histogram
algorithms.

> 
>>> + * Return: = 0 on success. < 0 on failure.
>>> + */
>>> +static int get_mem_bw_imc(char *bw_report, float *bw_imc)
>>> +{
>>> +	float reads, writes, of_mul_read, of_mul_write;
>>> +	int imc, j;
>>> +
>>> +	/* Start all iMC counters to log values (both read and write) */
>>> +	reads = 0, writes = 0, of_mul_read = 1, of_mul_write = 1;
>>>  
>>>  	/*
>>>  	 * Get results which are stored in struct type imc_counter_config
> 
>>> @@ -696,7 +725,6 @@ int resctrl_val(const struct resctrl_test *test,
>>>  		struct resctrl_val_param *param)
>>>  {
>>>  	char *resctrl_val = param->resctrl_val;
>>> -	unsigned long bw_resc_start = 0;
>>
>> In the current implementation the first iteration's starting measurement
>> is, as seen above, 0 ... which makes the first measurement unreliable
>> and dropped for both the MBA and MBM tests. In this enhancement, the
>> first measurement is no longer skewed so much so I wonder if this enhancement
>> can be expanded to the analysis phase where first measurement no longer
>> needs to be dropped?
> 
> In ideal world, yes, but I'll have to check the raw numbers. My general 
> feel is that the numbers tend to converge slowly with more iterations 
> being run so the first iteration might still be "off" by quite much (this 
> is definitely the case with CAT tests iterations but I'm not entirely sure 
> any more how it is with other selftests).
> 

From what I can tell the CAT test is not dropping any results. It looks
to me that any "settling" is and should be handled in the test before
the data collection starts.

Reinette
Ilpo Järvinen March 25, 2024, 1:08 p.m. UTC | #4
On Fri, 22 Mar 2024, Reinette Chatre wrote:
> On 3/22/2024 5:11 AM, Ilpo Järvinen wrote:
> > On Tue, 19 Mar 2024, Reinette Chatre wrote:
> >> On 3/11/2024 6:52 AM, Ilpo Järvinen wrote:

> >>> + * Return: = 0 on success. < 0 on failure.
> >>> + */
> >>> +static int get_mem_bw_imc(char *bw_report, float *bw_imc)
> >>> +{
> >>> +	float reads, writes, of_mul_read, of_mul_write;
> >>> +	int imc, j;
> >>> +
> >>> +	/* Start all iMC counters to log values (both read and write) */
> >>> +	reads = 0, writes = 0, of_mul_read = 1, of_mul_write = 1;
> >>>  
> >>>  	/*
> >>>  	 * Get results which are stored in struct type imc_counter_config
> > 
> >>> @@ -696,7 +725,6 @@ int resctrl_val(const struct resctrl_test *test,
> >>>  		struct resctrl_val_param *param)
> >>>  {
> >>>  	char *resctrl_val = param->resctrl_val;
> >>> -	unsigned long bw_resc_start = 0;
> >>
> >> In the current implementation the first iteration's starting measurement
> >> is, as seen above, 0 ... which makes the first measurement unreliable
> >> and dropped for both the MBA and MBM tests. In this enhancement, the
> >> first measurement is no longer skewed so much so I wonder if this enhancement
> >> can be expanded to the analysis phase where first measurement no longer
> >> needs to be dropped?
> > 
> > In ideal world, yes, but I'll have to check the raw numbers. My general 
> > feel is that the numbers tend to converge slowly with more iterations 
> > being run so the first iteration might still be "off" by quite much (this 
> > is definitely the case with CAT tests iterations but I'm not entirely sure 
> > any more how it is with other selftests).
> 
> >From what I can tell the CAT test is not dropping any results. It looks
> to me that any "settling" is and should be handled in the test before
> the data collection starts.

It doesn't, but the "settling" is there in the raw numbers. I've 
considered adding warm-up test(s) before the actual runs to improve the 
situation but there's just so many thing still to do...
diff mbox series

Patch

diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c
index 36139cba7be8..4df2cd738f88 100644
--- a/tools/testing/selftests/resctrl/resctrl_val.c
+++ b/tools/testing/selftests/resctrl/resctrl_val.c
@@ -293,28 +293,35 @@  static int initialize_mem_bw_imc(void)
 }
 
 /*
- * get_mem_bw_imc:	Memory band width as reported by iMC counters
+ * perf_open_imc_mem_bw - Open perf fds for IMCs
  * @cpu_no:		CPU number that the benchmark PID is binded to
- * @bw_report:		Bandwidth report type (reads, writes)
- *
- * Memory B/W utilized by a process on a socket can be calculated using
- * iMC counters. Perf events are used to read these counters.
- *
- * Return: = 0 on success. < 0 on failure.
  */
-static int get_mem_bw_imc(int cpu_no, char *bw_report, float *bw_imc)
+static int perf_open_imc_mem_bw(int cpu_no)
 {
-	float reads, writes, of_mul_read, of_mul_write;
 	int imc, j, ret;
 
-	/* Start all iMC counters to log values (both read and write) */
-	reads = 0, writes = 0, of_mul_read = 1, of_mul_write = 1;
 	for (imc = 0; imc < imcs; imc++) {
 		for (j = 0; j < 2; j++) {
 			ret = open_perf_event(imc, cpu_no, j);
 			if (ret)
 				return -1;
 		}
+	}
+
+	return 0;
+}
+
+/*
+ * do_mem_bw_test - Perform memory bandwidth test
+ *
+ * Runs memory bandwidth test over one second period. Also, handles starting
+ * and stopping of the IMC perf counters around the test.
+ */
+static void do_imc_mem_bw_test(void)
+{
+	int imc, j;
+
+	for (imc = 0; imc < imcs; imc++) {
 		for (j = 0; j < 2; j++)
 			membw_ioctl_perf_event_ioc_reset_enable(imc, j);
 	}
@@ -326,6 +333,24 @@  static int get_mem_bw_imc(int cpu_no, char *bw_report, float *bw_imc)
 		for (j = 0; j < 2; j++)
 			membw_ioctl_perf_event_ioc_disable(imc, j);
 	}
+}
+
+/*
+ * get_mem_bw_imc - Memory band width as reported by iMC counters
+ * @bw_report:		Bandwidth report type (reads, writes)
+ *
+ * Memory B/W utilized by a process on a socket can be calculated using
+ * iMC counters. Perf events are used to read these counters.
+ *
+ * Return: = 0 on success. < 0 on failure.
+ */
+static int get_mem_bw_imc(char *bw_report, float *bw_imc)
+{
+	float reads, writes, of_mul_read, of_mul_write;
+	int imc, j;
+
+	/* Start all iMC counters to log values (both read and write) */
+	reads = 0, writes = 0, of_mul_read = 1, of_mul_write = 1;
 
 	/*
 	 * Get results which are stored in struct type imc_counter_config
@@ -593,10 +618,9 @@  static void initialize_llc_occu_resctrl(const char *ctrlgrp, const char *mongrp,
 }
 
 static int measure_vals(const struct user_params *uparams,
-			struct resctrl_val_param *param,
-			unsigned long *bw_resc_start)
+			struct resctrl_val_param *param)
 {
-	unsigned long bw_resc, bw_resc_end;
+	unsigned long bw_resc, bw_resc_start, bw_resc_end;
 	float bw_imc;
 	int ret;
 
@@ -607,22 +631,27 @@  static int measure_vals(const struct user_params *uparams,
 	 * Compare the two values to validate resctrl value.
 	 * It takes 1sec to measure the data.
 	 */
-	ret = get_mem_bw_imc(uparams->cpu, param->bw_report, &bw_imc);
+	ret = perf_open_imc_mem_bw(uparams->cpu);
 	if (ret < 0)
 		return ret;
 
+	ret = get_mem_bw_resctrl(&bw_resc_start);
+	if (ret < 0)
+		return ret;
+
+	do_imc_mem_bw_test();
+
 	ret = get_mem_bw_resctrl(&bw_resc_end);
 	if (ret < 0)
 		return ret;
 
-	bw_resc = (bw_resc_end - *bw_resc_start) / MB;
-	ret = print_results_bw(param->filename, bm_pid, bw_imc, bw_resc);
-	if (ret)
+	ret = get_mem_bw_imc(param->bw_report, &bw_imc);
+	if (ret < 0)
 		return ret;
 
-	*bw_resc_start = bw_resc_end;
+	bw_resc = (bw_resc_end - bw_resc_start) / MB;
 
-	return 0;
+	return print_results_bw(param->filename, bm_pid, bw_imc, bw_resc);
 }
 
 /*
@@ -696,7 +725,6 @@  int resctrl_val(const struct resctrl_test *test,
 		struct resctrl_val_param *param)
 {
 	char *resctrl_val = param->resctrl_val;
-	unsigned long bw_resc_start = 0;
 	struct sigaction sigact;
 	int ret = 0, pipefd[2];
 	char pipe_message = 0;
@@ -838,7 +866,7 @@  int resctrl_val(const struct resctrl_test *test,
 
 		if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) ||
 		    !strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) {
-			ret = measure_vals(uparams, param, &bw_resc_start);
+			ret = measure_vals(uparams, param);
 			if (ret)
 				break;
 		} else if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) {