diff mbox series

[kvm-unit-tests] x86/pmu: Reset the expected count of the fixed counter 0 when i386

Message ID 20220801131814.24364-1-likexu@tencent.com (mailing list archive)
State New, archived
Headers show
Series [kvm-unit-tests] x86/pmu: Reset the expected count of the fixed counter 0 when i386 | expand

Commit Message

Like Xu Aug. 1, 2022, 1:18 p.m. UTC
From: Like Xu <likexu@tencent.com>

The pmu test check_counter_overflow() always fails with the "./configure
--arch=i386". The cnt.count obtained from the latter run of measure()
(based on fixed counter 0) is not equal to the expected value (based
on gp counter 0) and there is a positive error with a value of 2.

The two extra instructions come from inline wrmsr() and inline rdmsr()
inside the global_disable() binary code block. Specifically, for each msr
access, the i386 code will have two assembly mov instructions before
rdmsr/wrmsr (mark it for fixed counter 0, bit 32), but only one assembly
mov is needed for x86_64 and gp counter 0 on i386.

Fix the expected init cnt.count for fixed counter 0 overflow based on
the same fixed counter 0, not always using gp counter 0.

Signed-off-by: Like Xu <likexu@tencent.com>
---
 x86/pmu.c | 4 ++++
 1 file changed, 4 insertions(+)

Comments

Sean Christopherson Aug. 1, 2022, 3:43 p.m. UTC | #1
On Mon, Aug 01, 2022, Like Xu wrote:
> From: Like Xu <likexu@tencent.com>
> 
> The pmu test check_counter_overflow() always fails with the "./configure
> --arch=i386". The cnt.count obtained from the latter run of measure()
> (based on fixed counter 0) is not equal to the expected value (based
> on gp counter 0) and there is a positive error with a value of 2.
> 
> The two extra instructions come from inline wrmsr() and inline rdmsr()
> inside the global_disable() binary code block. Specifically, for each msr
> access, the i386 code will have two assembly mov instructions before
> rdmsr/wrmsr (mark it for fixed counter 0, bit 32), but only one assembly
> mov is needed for x86_64 and gp counter 0 on i386.
> 
> Fix the expected init cnt.count for fixed counter 0 overflow based on
> the same fixed counter 0, not always using gp counter 0.
> 
> Signed-off-by: Like Xu <likexu@tencent.com>
> ---
>  x86/pmu.c | 4 ++++
>  1 file changed, 4 insertions(+)
> 
> diff --git a/x86/pmu.c b/x86/pmu.c
> index 01be1e9..4bb24e9 100644
> --- a/x86/pmu.c
> +++ b/x86/pmu.c
> @@ -304,6 +304,10 @@ static void check_counter_overflow(void)
>  
>  		if (i == nr_gp_counters) {
>  			cnt.ctr = fixed_events[0].unit_sel;
> +			cnt.count = 0;
> +			measure(&cnt, 1);

Not directly related to this patch...

Unless I've missed something, every invocation of start_event() and measure() first
sets evt.count=0.  Rather than force every caller to ensure count is zeroed, why not
zero the count during start_event() and then drop all of the manual zeroing?

diff --git a/x86/pmu.c b/x86/pmu.c
index 01be1e90..ef804272 100644
--- a/x86/pmu.c
+++ b/x86/pmu.c
@@ -141,7 +141,7 @@ static void global_disable(pmu_counter_t *cnt)

 static void start_event(pmu_counter_t *evt)
 {
-    wrmsr(evt->ctr, evt->count);
+    wrmsr(evt->ctr, 0);
     if (is_gp(evt))
            wrmsr(MSR_P6_EVNTSEL0 + event_to_global_idx(evt),
                            evt->config | EVNTSEL_EN);


Accumulating counts can be handled by reading the current count before start_event(),
and doing something like stuffing a high count to test an edge case could be handled
by an inner helper, e.g. by adding __start_event().
Like Xu Aug. 2, 2022, 7:05 a.m. UTC | #2
On 1/8/2022 11:43 pm, Sean Christopherson wrote:
> Not directly related to this patch...
> 
> Unless I've missed something, every invocation of start_event() and measure() first
> sets evt.count=0.  Rather than force every caller to ensure count is zeroed, why not
> zero the count during start_event() and then drop all of the manual zeroing?
> 

None object to this idea, after all, there is obvious redundancy here.

> diff --git a/x86/pmu.c b/x86/pmu.c
> index 01be1e90..ef804272 100644
> --- a/x86/pmu.c
> +++ b/x86/pmu.c
> @@ -141,7 +141,7 @@ static void global_disable(pmu_counter_t *cnt)
> 
>   static void start_event(pmu_counter_t *evt)
>   {
> -    wrmsr(evt->ctr, evt->count);
> +    wrmsr(evt->ctr, 0);

Now we have to fix the last call to measure() in check_counter_overflow(), since 
it will
also call start_event() after it has been modified and in that case, the 
requested high count
has to be passed in from another function parameter.

Also, the naming of start_event() does not imply that the counter will be set to 
zero implicitly,
it just lets a counter continue to run, not caring about the current value of 
the counter,
which is more flexible.

I may try to do that on the test-cases of AMD vPMU, to help verify the gain of 
your idea.

>       if (is_gp(evt))
>              wrmsr(MSR_P6_EVNTSEL0 + event_to_global_idx(evt),
>                              evt->config | EVNTSEL_EN);
> 
> 
> Accumulating counts can be handled by reading the current count before start_event(),
> and doing something like stuffing a high count to test an edge case could be handled
> by an inner helper, e.g. by adding __start_event().
Sean Christopherson Aug. 2, 2022, 2:09 p.m. UTC | #3
On Tue, Aug 02, 2022, Like Xu wrote:
> On 1/8/2022 11:43 pm, Sean Christopherson wrote:
> > Not directly related to this patch...
> > 
> > Unless I've missed something, every invocation of start_event() and measure() first
> > sets evt.count=0.  Rather than force every caller to ensure count is zeroed, why not
> > zero the count during start_event() and then drop all of the manual zeroing?
> > 
> 
> None object to this idea, after all, there is obvious redundancy here.
> 
> > diff --git a/x86/pmu.c b/x86/pmu.c
> > index 01be1e90..ef804272 100644
> > --- a/x86/pmu.c
> > +++ b/x86/pmu.c
> > @@ -141,7 +141,7 @@ static void global_disable(pmu_counter_t *cnt)
> > 
> >   static void start_event(pmu_counter_t *evt)
> >   {
> > -    wrmsr(evt->ctr, evt->count);
> > +    wrmsr(evt->ctr, 0);
> 
> Now we have to fix the last call to measure() in check_counter_overflow(),
> since it will also call start_event() after it has been modified and in that
> case, the requested high count has to be passed in from another function
> parameter.

Drat, I suspected an overflow case would want to use a non-zero count, and I
explicitly looked for that case and _still_ missed it.

Anyways, why not just open code measure() for that one-off case?

		__start_event(&cnt, cnt.count);
		loop();
		stop_event(&cnt);

> Also, the naming of start_event() does not imply that the counter will be set
> to zero implicitly, it just lets a counter continue to run, not caring about
> the current value of the counter, which is more flexible.

Sure, but flexibility isn't the only consideration.  Readability and robustness
also matter.  And IMO, requiring callers to zero out a field in the common case
isn't exactly flexible.

Looking at pmu.c more, measure() is guilty of the same bad behavior.  It forces
the common case to pass in unnecessary information in order to give flexibility to
a single use case.  It's just syntatic sugar, but it really does help readers as
it's not obvious that the "1" specifies the number of events, whereas measure_one()
vs. measure_many() are relatively self-explanatory.

---
 x86/pmu.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/x86/pmu.c b/x86/pmu.c
index 01be1e90..e67f1fc2 100644
--- a/x86/pmu.c
+++ b/x86/pmu.c
@@ -177,7 +177,7 @@ static void stop_event(pmu_counter_t *evt)
 	evt->count = rdmsr(evt->ctr);
 }

-static void measure(pmu_counter_t *evt, int count)
+static void measure_many(pmu_counter_t *evt, int count)
 {
 	int i;
 	for (i = 0; i < count; i++)
@@ -187,6 +187,11 @@ static void measure(pmu_counter_t *evt, int count)
 		stop_event(&evt[i]);
 }

+static void measure_one(pmu_counter_t *evt)
+{
+	measure_many(evt, 1);
+}
+
 static bool verify_event(uint64_t count, struct pmu_event *e)
 {
 	// printf("%d <= %ld <= %d\n", e->min, count, e->max);
@@ -210,7 +215,7 @@ static void check_gp_counter(struct pmu_event *evt)

 	for (i = 0; i < nr_gp_counters; i++, cnt.ctr++) {
 		cnt.count = 0;
-		measure(&cnt, 1);
+		measure_one(&cnt);
 		report(verify_event(cnt.count, evt), "%s-%d", evt->name, i);
 	}
 }
@@ -238,7 +243,7 @@ static void check_fixed_counters(void)
 	for (i = 0; i < nr_fixed_counters; i++) {
 		cnt.count = 0;
 		cnt.ctr = fixed_events[i].unit_sel;
-		measure(&cnt, 1);
+		measure_one(&cnt);
 		report(verify_event(cnt.count, &fixed_events[i]), "fixed-%d", i);
 	}
 }
@@ -267,7 +272,7 @@ static void check_counters_many(void)
 		n++;
 	}

-	measure(cnt, n);
+	measure_many(cnt, n);

 	for (i = 0; i < n; i++)
 		if (!verify_counter(&cnt[i]))
@@ -286,7 +291,7 @@ static void check_counter_overflow(void)
 		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */,
 		.count = 0,
 	};
-	measure(&cnt, 1);
+	measure_one(&cnt);
 	count = cnt.count;

 	/* clear status before test */
@@ -312,7 +317,7 @@ static void check_counter_overflow(void)
 		else
 			cnt.config &= ~EVNTSEL_INT;
 		idx = event_to_global_idx(&cnt);
-		measure(&cnt, 1);
+		measure_one(&cnt);
 		report(cnt.count == 1, "cntr-%d", i);
 		status = rdmsr(MSR_CORE_PERF_GLOBAL_STATUS);
 		report(status & (1ull << idx), "status-%d", i);
@@ -333,7 +338,7 @@ static void check_gp_counter_cmask(void)
 		.count = 0,
 	};
 	cnt.config |= (0x2 << EVNTSEL_CMASK_SHIFT);
-	measure(&cnt, 1);
+	measure_one(&cnt);
 	report(cnt.count < gp_events[1].min, "cmask");
 }


base-commit: 14b54ed754c8a8cae7a22895e4a0b745a3227a4b
--
diff mbox series

Patch

diff --git a/x86/pmu.c b/x86/pmu.c
index 01be1e9..4bb24e9 100644
--- a/x86/pmu.c
+++ b/x86/pmu.c
@@ -304,6 +304,10 @@  static void check_counter_overflow(void)
 
 		if (i == nr_gp_counters) {
 			cnt.ctr = fixed_events[0].unit_sel;
+			cnt.count = 0;
+			measure(&cnt, 1);
+			count = cnt.count;
+			cnt.count = 1 - count;
 			cnt.count &= (1ull << pmu_fixed_counter_width()) - 1;
 		}