diff mbox series

[v2,08/26] selftests/resctrl: Split measure_cache_vals()

Message ID 20231120111340.7805-9-ilpo.jarvinen@linux.intel.com (mailing list archive)
State New
Headers show
Series selftests/resctrl: CAT test improvements & generalized test framework | expand

Commit Message

Ilpo Järvinen Nov. 20, 2023, 11:13 a.m. UTC
measure_cache_vals() does a different thing depending on the test case
that called it:
  - For CAT, it measures LLC misses through perf.
  - For CMT, it measures LLC occupancy through resctrl.

Split these two functionalities into own functions the CAT and CMT
tests can call directly. Replace passing the struct resctrl_val_param
parameter with the filename because it's more generic and all those
functions need out of resctrl_val.

Co-developed-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 tools/testing/selftests/resctrl/cache.c       | 66 ++++++++++++-------
 tools/testing/selftests/resctrl/resctrl.h     |  2 +-
 tools/testing/selftests/resctrl/resctrl_val.c |  2 +-
 3 files changed, 43 insertions(+), 27 deletions(-)

Comments

Reinette Chatre Nov. 28, 2023, 10:13 p.m. UTC | #1
Hi Ilpo,

On 11/20/2023 3:13 AM, Ilpo Järvinen wrote:
> measure_cache_vals() does a different thing depending on the test case
> that called it:
>   - For CAT, it measures LLC misses through perf.
>   - For CMT, it measures LLC occupancy through resctrl.
> 
> Split these two functionalities into own functions the CAT and CMT
> tests can call directly. Replace passing the struct resctrl_val_param
> parameter with the filename because it's more generic and all those
> functions need out of resctrl_val.
> 
> Co-developed-by: Fenghua Yu <fenghua.yu@intel.com>
> Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
> Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
> ---
>  tools/testing/selftests/resctrl/cache.c       | 66 ++++++++++++-------
>  tools/testing/selftests/resctrl/resctrl.h     |  2 +-
>  tools/testing/selftests/resctrl/resctrl_val.c |  2 +-
>  3 files changed, 43 insertions(+), 27 deletions(-)
> 
> diff --git a/tools/testing/selftests/resctrl/cache.c b/tools/testing/selftests/resctrl/cache.c
> index 8aa6d67db978..129d1c293518 100644
> --- a/tools/testing/selftests/resctrl/cache.c
> +++ b/tools/testing/selftests/resctrl/cache.c
> @@ -147,7 +147,7 @@ static int get_llc_occu_resctrl(unsigned long *llc_occupancy)
>   *
>   * Return:		0 on success. non-zero on failure.
>   */
> -static int print_results_cache(char *filename, int bm_pid,
> +static int print_results_cache(const char *filename, int bm_pid,
>  			       unsigned long llc_value)
>  {
>  	FILE *fp;
> @@ -169,35 +169,51 @@ static int print_results_cache(char *filename, int bm_pid,
>  	return 0;
>  }
>  
> -int measure_cache_vals(struct resctrl_val_param *param, int bm_pid)
> +/*
> + * perf_event_measure - Measure perf events
> + * @filename:	Filename for writing the results
> + * @bm_pid:	PID that runs the benchmark
> + *
> + * Measures perf events (e.g., cache misses) and writes the results into
> + * @filename. @bm_pid is written to the results file along with the measured
> + * value.
> + *
> + * Return: =0 on success. <0 on failure.

I do not think this is accurate. It looks like this function returns
the return value of print_results_cache() which returns errno on failure.
If this is the case then I think this proves that returning a
positive integer on failure should be avoided since it just creates
traps.

> + */
> +static int perf_event_measure(const char *filename, int bm_pid)
>  {
> -	unsigned long llc_perf_miss = 0, llc_occu_resc = 0, llc_value = 0;
> +	unsigned long llc_perf_miss = 0;
>  	int ret;
>  
> -	/*
> -	 * Measure cache miss from perf.
> -	 */
> -	if (!strncmp(param->resctrl_val, CAT_STR, sizeof(CAT_STR))) {
> -		ret = get_llc_perf(&llc_perf_miss);
> -		if (ret < 0)
> -			return ret;
> -		llc_value = llc_perf_miss;
> -	}
> +	ret = get_llc_perf(&llc_perf_miss);
> +	if (ret < 0)
> +		return ret;
>  
> -	/*
> -	 * Measure llc occupancy from resctrl.
> -	 */
> -	if (!strncmp(param->resctrl_val, CMT_STR, sizeof(CMT_STR))) {
> -		ret = get_llc_occu_resctrl(&llc_occu_resc);
> -		if (ret < 0)
> -			return ret;
> -		llc_value = llc_occu_resc;
> -	}
> -	ret = print_results_cache(param->filename, bm_pid, llc_value);
> -	if (ret)
> +	ret = print_results_cache(filename, bm_pid, llc_perf_miss);
> +	return ret;
> +}

Perhaps print_results_cache() can be made to return negative error
and this just be "return print_results_cache(...)" and the function
comment be accurate?

> +
> +/*
> + * measure_llc_resctrl - Measure resctrl llc value from resctrl

llc -> LLC

> + * @filename:	Filename for writing the results
> + * @bm_pid:	PID that runs the benchmark
> + *
> + * Measures llc occupancy from resctrl and writes the results into @filename.

llc -> LLC

> + * @bm_pid is written to the results file along with the measured value.
> + *
> + * Return: =0 on success. <0 on failure.

same issue ?


Reinette
Ilpo Järvinen Dec. 7, 2023, 2:32 p.m. UTC | #2
On Tue, 28 Nov 2023, Reinette Chatre wrote:

> Hi Ilpo,
> 
> On 11/20/2023 3:13 AM, Ilpo Järvinen wrote:
> > measure_cache_vals() does a different thing depending on the test case
> > that called it:
> >   - For CAT, it measures LLC misses through perf.
> >   - For CMT, it measures LLC occupancy through resctrl.
> > 
> > Split these two functionalities into own functions the CAT and CMT
> > tests can call directly. Replace passing the struct resctrl_val_param
> > parameter with the filename because it's more generic and all those
> > functions need out of resctrl_val.
> > 
> > Co-developed-by: Fenghua Yu <fenghua.yu@intel.com>
> > Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
> > Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
> > ---
> >  tools/testing/selftests/resctrl/cache.c       | 66 ++++++++++++-------
> >  tools/testing/selftests/resctrl/resctrl.h     |  2 +-
> >  tools/testing/selftests/resctrl/resctrl_val.c |  2 +-
> >  3 files changed, 43 insertions(+), 27 deletions(-)
> > 
> > diff --git a/tools/testing/selftests/resctrl/cache.c b/tools/testing/selftests/resctrl/cache.c
> > index 8aa6d67db978..129d1c293518 100644
> > --- a/tools/testing/selftests/resctrl/cache.c
> > +++ b/tools/testing/selftests/resctrl/cache.c
> > @@ -147,7 +147,7 @@ static int get_llc_occu_resctrl(unsigned long *llc_occupancy)
> >   *
> >   * Return:		0 on success. non-zero on failure.
> >   */
> > -static int print_results_cache(char *filename, int bm_pid,
> > +static int print_results_cache(const char *filename, int bm_pid,
> >  			       unsigned long llc_value)
> >  {
> >  	FILE *fp;
> > @@ -169,35 +169,51 @@ static int print_results_cache(char *filename, int bm_pid,
> >  	return 0;
> >  }
> >  
> > -int measure_cache_vals(struct resctrl_val_param *param, int bm_pid)
> > +/*
> > + * perf_event_measure - Measure perf events
> > + * @filename:	Filename for writing the results
> > + * @bm_pid:	PID that runs the benchmark
> > + *
> > + * Measures perf events (e.g., cache misses) and writes the results into
> > + * @filename. @bm_pid is written to the results file along with the measured
> > + * value.
> > + *
> > + * Return: =0 on success. <0 on failure.
> 
> I do not think this is accurate. It looks like this function returns
> the return value of print_results_cache() which returns errno on failure.
> If this is the case then I think this proves that returning a
> positive integer on failure should be avoided since it just creates
> traps.
>
> > + */
> > +static int perf_event_measure(const char *filename, int bm_pid)
> >  {
> > -	unsigned long llc_perf_miss = 0, llc_occu_resc = 0, llc_value = 0;
> > +	unsigned long llc_perf_miss = 0;
> >  	int ret;
> >  
> > -	/*
> > -	 * Measure cache miss from perf.
> > -	 */
> > -	if (!strncmp(param->resctrl_val, CAT_STR, sizeof(CAT_STR))) {
> > -		ret = get_llc_perf(&llc_perf_miss);
> > -		if (ret < 0)
> > -			return ret;
> > -		llc_value = llc_perf_miss;
> > -	}
> > +	ret = get_llc_perf(&llc_perf_miss);
> > +	if (ret < 0)
> > +		return ret;
> >  
> > -	/*
> > -	 * Measure llc occupancy from resctrl.
> > -	 */
> > -	if (!strncmp(param->resctrl_val, CMT_STR, sizeof(CMT_STR))) {
> > -		ret = get_llc_occu_resctrl(&llc_occu_resc);
> > -		if (ret < 0)
> > -			return ret;
> > -		llc_value = llc_occu_resc;
> > -	}
> > -	ret = print_results_cache(param->filename, bm_pid, llc_value);
> > -	if (ret)
> > +	ret = print_results_cache(filename, bm_pid, llc_perf_miss);
> > +	return ret;
> > +}
> 
> Perhaps print_results_cache() can be made to return negative error
> and this just be "return print_results_cache(...)" and the function
> comment be accurate?

I think, I'll just change all "return errno;" to "return -1" before this,
however, one open question which impacts whether this is actually Fixes 
class issue:

It seems that perror()'s manpage doesn't answer one important question, 
whether it ifself can alter errno or not. The resctrl selftest code 
assumes it doesn't but some evidence I came across says otherwise so doing 
return errno; after calling perror() might not even be valid at all.

So I'm tempted to create an additional Fixes patch about the return change 
into the front of the series.
Reinette Chatre Dec. 7, 2023, 6:02 p.m. UTC | #3
Hi Ilpo,

On 12/7/2023 6:32 AM, Ilpo Järvinen wrote:
> On Tue, 28 Nov 2023, Reinette Chatre wrote:
>> On 11/20/2023 3:13 AM, Ilpo Järvinen wrote:

...
>>> -	/*
>>> -	 * Measure llc occupancy from resctrl.
>>> -	 */
>>> -	if (!strncmp(param->resctrl_val, CMT_STR, sizeof(CMT_STR))) {
>>> -		ret = get_llc_occu_resctrl(&llc_occu_resc);
>>> -		if (ret < 0)
>>> -			return ret;
>>> -		llc_value = llc_occu_resc;
>>> -	}
>>> -	ret = print_results_cache(param->filename, bm_pid, llc_value);
>>> -	if (ret)
>>> +	ret = print_results_cache(filename, bm_pid, llc_perf_miss);
>>> +	return ret;
>>> +}
>>
>> Perhaps print_results_cache() can be made to return negative error
>> and this just be "return print_results_cache(...)" and the function
>> comment be accurate?
> 
> I think, I'll just change all "return errno;" to "return -1" before this,
> however, one open question which impacts whether this is actually Fixes 
> class issue:
> 
> It seems that perror()'s manpage doesn't answer one important question, 
> whether it ifself can alter errno or not. The resctrl selftest code 
> assumes it doesn't but some evidence I came across says otherwise so doing 
> return errno; after calling perror() might not even be valid at all.
> 
> So I'm tempted to create an additional Fixes patch about the return change 
> into the front of the series.
> 

I would not trust errno to contain code of earlier calls after a call to perror().
If errno is needed I think it should be saved before calling perror(). Looking
at perror() at [1] I do not see an effort to restore errno before it returns,
and looking at the implementation of perror() there appears to be many
opportunities for errno to change.

Reinette

[1] https://sourceware.org/git/?p=glibc.git;a=blob;f=stdio-common/perror.c;h=51e621e332a5e2aa76ecefb3bcf325efb43b345f;hb=HEAD#l47
Ilpo Järvinen Dec. 7, 2023, 6:33 p.m. UTC | #4
On Thu, 7 Dec 2023, Reinette Chatre wrote:
> On 12/7/2023 6:32 AM, Ilpo Järvinen wrote:
> > On Tue, 28 Nov 2023, Reinette Chatre wrote:
> >> On 11/20/2023 3:13 AM, Ilpo Järvinen wrote:
> 
> ...
> >>> -	/*
> >>> -	 * Measure llc occupancy from resctrl.
> >>> -	 */
> >>> -	if (!strncmp(param->resctrl_val, CMT_STR, sizeof(CMT_STR))) {
> >>> -		ret = get_llc_occu_resctrl(&llc_occu_resc);
> >>> -		if (ret < 0)
> >>> -			return ret;
> >>> -		llc_value = llc_occu_resc;
> >>> -	}
> >>> -	ret = print_results_cache(param->filename, bm_pid, llc_value);
> >>> -	if (ret)
> >>> +	ret = print_results_cache(filename, bm_pid, llc_perf_miss);
> >>> +	return ret;
> >>> +}
> >>
> >> Perhaps print_results_cache() can be made to return negative error
> >> and this just be "return print_results_cache(...)" and the function
> >> comment be accurate?
> > 
> > I think, I'll just change all "return errno;" to "return -1" before this,
> > however, one open question which impacts whether this is actually Fixes 
> > class issue:
> > 
> > It seems that perror()'s manpage doesn't answer one important question, 
> > whether it ifself can alter errno or not. The resctrl selftest code 
> > assumes it doesn't but some evidence I came across says otherwise so doing 
> > return errno; after calling perror() might not even be valid at all.
> > 
> > So I'm tempted to create an additional Fixes patch about the return change 
> > into the front of the series.
> > 
> 
> I would not trust errno to contain code of earlier calls after a call to perror().
> If errno is needed I think it should be saved before calling perror(). Looking
> at perror() at [1] I do not see an effort to restore errno before it returns,
> and looking at the implementation of perror() there appears to be many
> opportunities for errno to change.
> 
> Reinette
> 
> [1] https://sourceware.org/git/?p=glibc.git;a=blob;f=stdio-common/perror.c;h=51e621e332a5e2aa76ecefb3bcf325efb43b345f;hb=HEAD#l47

I already spent some moments in converting all return error -> return -1, 
since all such places do perror() calls anyway (which I also converted to 
ksft_perror() or ksft_print_msg() where perror() didn't make any sense) 
there's not much added value in returning the errno which was not 
correctly done in the existing code anyway.
Reinette Chatre Dec. 7, 2023, 6:35 p.m. UTC | #5
On 12/7/2023 10:33 AM, Ilpo Järvinen wrote:
> 
> I already spent some moments in converting all return error -> return -1, 
> since all such places do perror() calls anyway (which I also converted to 
> ksft_perror() or ksft_print_msg() where perror() didn't make any sense) 
> there's not much added value in returning the errno which was not 
> correctly done in the existing code anyway.

Thank you very much Ilpo.

Reinette
diff mbox series

Patch

diff --git a/tools/testing/selftests/resctrl/cache.c b/tools/testing/selftests/resctrl/cache.c
index 8aa6d67db978..129d1c293518 100644
--- a/tools/testing/selftests/resctrl/cache.c
+++ b/tools/testing/selftests/resctrl/cache.c
@@ -147,7 +147,7 @@  static int get_llc_occu_resctrl(unsigned long *llc_occupancy)
  *
  * Return:		0 on success. non-zero on failure.
  */
-static int print_results_cache(char *filename, int bm_pid,
+static int print_results_cache(const char *filename, int bm_pid,
 			       unsigned long llc_value)
 {
 	FILE *fp;
@@ -169,35 +169,51 @@  static int print_results_cache(char *filename, int bm_pid,
 	return 0;
 }
 
-int measure_cache_vals(struct resctrl_val_param *param, int bm_pid)
+/*
+ * perf_event_measure - Measure perf events
+ * @filename:	Filename for writing the results
+ * @bm_pid:	PID that runs the benchmark
+ *
+ * Measures perf events (e.g., cache misses) and writes the results into
+ * @filename. @bm_pid is written to the results file along with the measured
+ * value.
+ *
+ * Return: =0 on success. <0 on failure.
+ */
+static int perf_event_measure(const char *filename, int bm_pid)
 {
-	unsigned long llc_perf_miss = 0, llc_occu_resc = 0, llc_value = 0;
+	unsigned long llc_perf_miss = 0;
 	int ret;
 
-	/*
-	 * Measure cache miss from perf.
-	 */
-	if (!strncmp(param->resctrl_val, CAT_STR, sizeof(CAT_STR))) {
-		ret = get_llc_perf(&llc_perf_miss);
-		if (ret < 0)
-			return ret;
-		llc_value = llc_perf_miss;
-	}
+	ret = get_llc_perf(&llc_perf_miss);
+	if (ret < 0)
+		return ret;
 
-	/*
-	 * Measure llc occupancy from resctrl.
-	 */
-	if (!strncmp(param->resctrl_val, CMT_STR, sizeof(CMT_STR))) {
-		ret = get_llc_occu_resctrl(&llc_occu_resc);
-		if (ret < 0)
-			return ret;
-		llc_value = llc_occu_resc;
-	}
-	ret = print_results_cache(param->filename, bm_pid, llc_value);
-	if (ret)
+	ret = print_results_cache(filename, bm_pid, llc_perf_miss);
+	return ret;
+}
+
+/*
+ * measure_llc_resctrl - Measure resctrl llc value from resctrl
+ * @filename:	Filename for writing the results
+ * @bm_pid:	PID that runs the benchmark
+ *
+ * Measures llc occupancy from resctrl and writes the results into @filename.
+ * @bm_pid is written to the results file along with the measured value.
+ *
+ * Return: =0 on success. <0 on failure.
+ */
+int measure_llc_resctrl(const char *filename, int bm_pid)
+{
+	unsigned long llc_occu_resc = 0;
+	int ret;
+
+	ret = get_llc_occu_resctrl(&llc_occu_resc);
+	if (ret < 0)
 		return ret;
 
-	return 0;
+	ret = print_results_cache(filename, bm_pid, llc_occu_resc);
+	return ret;
 }
 
 /*
@@ -252,7 +268,7 @@  int cat_val(struct resctrl_val_param *param, size_t span)
 		}
 
 		sleep(1);
-		ret = measure_cache_vals(param, bm_pid);
+		ret = perf_event_measure(param->filename, bm_pid);
 		if (ret)
 			goto pe_close;
 	}
diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h
index a911b08fa595..d35e3ba4dfa2 100644
--- a/tools/testing/selftests/resctrl/resctrl.h
+++ b/tools/testing/selftests/resctrl/resctrl.h
@@ -114,7 +114,7 @@  int cmt_resctrl_val(int cpu_no, int n, const char * const *benchmark_cmd);
 unsigned int count_bits(unsigned long n);
 void cmt_test_cleanup(void);
 int get_core_sibling(int cpu_no);
-int measure_cache_vals(struct resctrl_val_param *param, int bm_pid);
+int measure_llc_resctrl(const char *filename, int bm_pid);
 int show_cache_info(unsigned long sum_llc_val, int no_of_bits,
 		    size_t cache_span, unsigned long max_diff,
 		    unsigned long max_diff_percent, unsigned long num_of_runs,
diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c
index 88789678917b..a07ba336db48 100644
--- a/tools/testing/selftests/resctrl/resctrl_val.c
+++ b/tools/testing/selftests/resctrl/resctrl_val.c
@@ -830,7 +830,7 @@  int resctrl_val(const char * const *benchmark_cmd, struct resctrl_val_param *par
 				break;
 		} else if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) {
 			sleep(1);
-			ret = measure_cache_vals(param, bm_pid);
+			ret = measure_llc_resctrl(param->filename, bm_pid);
 			if (ret)
 				break;
 		}