diff mbox series

[3/4] KVM: x86/pmu: Reuse find_perf_hw_id() and drop find_fixed_event()

Message ID 20211116122030.4698-4-likexu@tencent.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86/pmu: An insightful refactoring of vPMU code | expand

Commit Message

Like Xu Nov. 16, 2021, 12:20 p.m. UTC
From: Like Xu <likexu@tencent.com>

Since we set the same semantic event value for the fixed counter in
pmc->eventsel, returning the perf_hw_id for the fixed counter via
find_fixed_event() can be painlessly replaced by find_perf_hw_id()
with the help of pmc_is_fixed() check.

Signed-off-by: Like Xu <likexu@tencent.com>
---
 arch/x86/kvm/pmu.c           |  2 +-
 arch/x86/kvm/pmu.h           |  1 -
 arch/x86/kvm/svm/pmu.c       | 11 ++++-------
 arch/x86/kvm/vmx/pmu_intel.c | 29 ++++++++++++++++-------------
 4 files changed, 21 insertions(+), 22 deletions(-)

Comments

Paolo Bonzini Nov. 18, 2021, 2:43 p.m. UTC | #1
On 11/16/21 13:20, Like Xu wrote:
> +	/* return PERF_COUNT_HW_MAX as AMD doesn't have fixed events */
> +	if (pmc_is_fixed(pmc))
> +		return PERF_COUNT_HW_MAX;
> +

This should have a WARN_ON, since reprogram_fixed_counter will never see 
an enabled fixed counter on AMD.

Paolo
Paolo Bonzini Nov. 18, 2021, 3 p.m. UTC | #2
On 11/16/21 13:20, Like Xu wrote:
> +static inline unsigned int intel_find_fixed_event(int idx)
> +{
> +	u32 event;
> +	size_t size = ARRAY_SIZE(fixed_pmc_events);
> +
> +	if (idx >= size)
> +		return PERF_COUNT_HW_MAX;
> +
> +	event = fixed_pmc_events[array_index_nospec(idx, size)];
> +	return intel_arch_events[event].event_type;
> +}
> +
> +
>   static unsigned int intel_find_perf_hw_id(struct kvm_pmc *pmc)
>   {
>   	struct kvm_pmu *pmu = pmc_to_pmu(pmc);
> @@ -75,6 +88,9 @@ static unsigned int intel_find_perf_hw_id(struct kvm_pmc *pmc)
>   	u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
>   	int i;
>   
> +	if (pmc_is_fixed(pmc))
> +		return intel_find_fixed_event(pmc->idx - INTEL_PMC_IDX_FIXED);

Is intel_find_fixed_event needed at all?  As you point out in the commit
message, eventsel/unit_mask are valid so you can do

@@ -88,13 +75,11 @@ static unsigned int intel_pmc_perf_hw_id(struct kvm_pmc *pmc)
  	u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
  	int i;
  
-	if (pmc_is_fixed(pmc))
-		return intel_find_fixed_event(pmc->idx - INTEL_PMC_IDX_FIXED);
-
  	for (i = 0; i < ARRAY_SIZE(intel_arch_events); i++)
  		if (intel_arch_events[i].eventsel == event_select
  		    && intel_arch_events[i].unit_mask == unit_mask
-		    && (pmu->available_event_types & (1 << i)))
+		    && (pmc_is_fixed(pmc) ||
+			pmu->available_event_types & (1 << i)))
  			break;
  
  	if (i == ARRAY_SIZE(intel_arch_events))

What do you think?  It's less efficient but makes fixed/gp more similar.

Can you please resubmit the series based on the review feedback?

Thanks,
Like Xu Nov. 19, 2021, 7:16 a.m. UTC | #3
On 18/11/2021 11:00 pm, Paolo Bonzini wrote:
> On 11/16/21 13:20, Like Xu wrote:
>> +static inline unsigned int intel_find_fixed_event(int idx)
>> +{
>> +    u32 event;
>> +    size_t size = ARRAY_SIZE(fixed_pmc_events);
>> +
>> +    if (idx >= size)
>> +        return PERF_COUNT_HW_MAX;
>> +
>> +    event = fixed_pmc_events[array_index_nospec(idx, size)];
>> +    return intel_arch_events[event].event_type;
>> +}
>> +
>> +
>>   static unsigned int intel_find_perf_hw_id(struct kvm_pmc *pmc)
>>   {
>>       struct kvm_pmu *pmu = pmc_to_pmu(pmc);
>> @@ -75,6 +88,9 @@ static unsigned int intel_find_perf_hw_id(struct kvm_pmc *pmc)
>>       u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
>>       int i;
>> +    if (pmc_is_fixed(pmc))
>> +        return intel_find_fixed_event(pmc->idx - INTEL_PMC_IDX_FIXED);
> 
> Is intel_find_fixed_event needed at all?  As you point out in the commit
> message, eventsel/unit_mask are valid so you can do
> 
> @@ -88,13 +75,11 @@ static unsigned int intel_pmc_perf_hw_id(struct kvm_pmc *pmc)
>       u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
>       int i;
> 
> -    if (pmc_is_fixed(pmc))
> -        return intel_find_fixed_event(pmc->idx - INTEL_PMC_IDX_FIXED);
> -
>       for (i = 0; i < ARRAY_SIZE(intel_arch_events); i++)
>           if (intel_arch_events[i].eventsel == event_select
>               && intel_arch_events[i].unit_mask == unit_mask
> -            && (pmu->available_event_types & (1 << i)))
> +            && (pmc_is_fixed(pmc) ||
> +            pmu->available_event_types & (1 << i)))

It's a good move while the tricky thing I've found recently is:

the events masked in pmu->available_event_types are just *Intel CPUID* events
(they're not a subset or superset of the *kernel generic* hw events),
some of which can be programmed and enabled with generic or fixed counter.

According Intel SDM, when an Intel CPUID event (e.g. "instructions retirement")
is not masked in pmu->available_event_types (configured via Intel CPUID.0A.EBX),
the guest should not use generic or fixed counter to count/sample this event.

This issue is detailed in another patch set [1] and comments need to be collected.

It's proposed to get [V2] merged and continue to review the fixes from [1] 
seamlessly,
and then further unify all fixed/gp stuff including intel_find_fixed_event() as 
a follow up.

[1] https://lore.kernel.org/kvm/20211112095139.21775-1-likexu@tencent.com/
[V2] https://lore.kernel.org/kvm/20211119064856.77948-1-likexu@tencent.com/

>               break;
> 
>       if (i == ARRAY_SIZE(intel_arch_events))
> 
> What do you think?  It's less efficient but makes fixed/gp more similar.
> 
> Can you please resubmit the series based on the review feedback?
> 
> Thanks,
> 
>
Paolo Bonzini Nov. 19, 2021, 10:29 a.m. UTC | #4
On 11/19/21 08:16, Like Xu wrote:
> 
> It's proposed to get [V2] merged and continue to review the fixes from 
> [1] seamlessly,
> and then further unify all fixed/gp stuff including 
> intel_find_fixed_event() as a follow up.

I agree and I'll review it soon.  Though, why not add the

+            && (pmc_is_fixed(pmc) ||
+            pmu->available_event_types & (1 << i)))

version in v2 of this patch? :)

Paolo

> [1] https://lore.kernel.org/kvm/20211112095139.21775-1-likexu@tencent.com/
> [V2] https://lore.kernel.org/kvm/20211119064856.77948-1-likexu@tencent.com/
Like Xu Nov. 19, 2021, 11:10 a.m. UTC | #5
On 19/11/2021 6:29 pm, Paolo Bonzini wrote:
> On 11/19/21 08:16, Like Xu wrote:
>>
>> It's proposed to get [V2] merged and continue to review the fixes from [1] 
>> seamlessly,
>> and then further unify all fixed/gp stuff including intel_find_fixed_event() 
>> as a follow up.
> 
> I agree and I'll review it soon.  Though, why not add the
> 
> +            && (pmc_is_fixed(pmc) ||
> +            pmu->available_event_types & (1 << i)))
> 

If we have a fixed ctr 0 for "retired instructions" event
but the bit 01 of the guest CPUID 0AH.EBX leaf is masked,

thus in that case, we got true from "pmc_is_fixed(pmc)"
and false from "pmu->available_event_types & (1 << i)",

thus it will break and continue to program a perf_event for pmc.

(SDM says, Bit 01: Instruction retired event not available if 1 or if EAX[31:24]<2.)

But the right behavior is that KVM should not program perf_event
for this pmc since this event should not be available (whether it's gp or fixed)
and the counter msr pair can be accessed but does not work.

The proposal final code may look like :

/* UMask and Event Select Encodings for Intel CPUID Events */
static inline bool is_intel_cpuid_event(u8 event_select, u8 unit_mask)
{
	if ((!unit_mask && event_select == 0x3C) ||
	    (!unit_mask && event_select == 0xC0) ||
	    (unit_mask == 0x01 && event_select == 0x3C) ||
	    (unit_mask == 0x4F && event_select == 0x2E) ||
	    (unit_mask == 0x41 && event_select == 0x2E) ||
	    (!unit_mask && event_select == 0xC4) ||
	    (!unit_mask && event_select == 0xC5))
		return true;

	/* the unimplemented topdown.slots event check is kipped. */
	return false;
}

static unsigned int intel_pmc_perf_hw_id(struct kvm_pmc *pmc)
{
	struct kvm_pmu *pmu = pmc_to_pmu(pmc);
	u8 event_select = pmc->eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
	u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
	int i;

	for (i = 0; i < PERF_COUNT_HW_MAX; i++) {
		if (kernel_generic_events[i].eventsel != event_select ||
		    kernel_generic_events[i].unit_mask != unit_mask)
			continue;

		if (is_intel_cpuid_event(event_select, unit_mask) &&
		    !test_bit(i, pmu->avail_cpuid_events))
			return PERF_COUNT_HW_MAX + 1;

		break;
	}

	return (i == PERF_COUNT_HW_MAX) ? i : kernel_generic_events[i].event_type;
}


> version in v2 of this patch? :)
> 
> Paolo
> 
>> [1] https://lore.kernel.org/kvm/20211112095139.21775-1-likexu@tencent.com/
>> [V2] https://lore.kernel.org/kvm/20211119064856.77948-1-likexu@tencent.com/
>
diff mbox series

Patch

diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 903dc6a532cc..3c45467b4275 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -262,7 +262,7 @@  void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx)
 
 	pmc->current_config = (u64)ctrl;
 	pmc_reprogram_counter(pmc, PERF_TYPE_HARDWARE,
-			      kvm_x86_ops.pmu_ops->find_fixed_event(idx),
+			      kvm_x86_ops.pmu_ops->find_perf_hw_id(pmc),
 			      !(en_field & 0x2), /* exclude user */
 			      !(en_field & 0x1), /* exclude kernel */
 			      pmi, false, false);
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index e7a5d4b6fa94..354339710d0d 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -25,7 +25,6 @@  struct kvm_event_hw_type_mapping {
 
 struct kvm_pmu_ops {
 	unsigned int (*find_perf_hw_id)(struct kvm_pmc *pmc);
-	unsigned (*find_fixed_event)(int idx);
 	bool (*pmc_is_enabled)(struct kvm_pmc *pmc);
 	struct kvm_pmc *(*pmc_idx_to_pmc)(struct kvm_pmu *pmu, int pmc_idx);
 	struct kvm_pmc *(*rdpmc_ecx_to_pmc)(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index 1d31bd5c6803..eeaeb58d501b 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -140,6 +140,10 @@  static unsigned int amd_find_perf_hw_id(struct kvm_pmc *pmc)
 	u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
 	int i;
 
+	/* return PERF_COUNT_HW_MAX as AMD doesn't have fixed events */
+	if (pmc_is_fixed(pmc))
+		return PERF_COUNT_HW_MAX;
+
 	for (i = 0; i < ARRAY_SIZE(amd_event_mapping); i++)
 		if (amd_event_mapping[i].eventsel == event_select
 		    && amd_event_mapping[i].unit_mask == unit_mask)
@@ -151,12 +155,6 @@  static unsigned int amd_find_perf_hw_id(struct kvm_pmc *pmc)
 	return amd_event_mapping[i].event_type;
 }
 
-/* return PERF_COUNT_HW_MAX as AMD doesn't have fixed events */
-static unsigned amd_find_fixed_event(int idx)
-{
-	return PERF_COUNT_HW_MAX;
-}
-
 /* check if a PMC is enabled by comparing it against global_ctrl bits. Because
  * AMD CPU doesn't have global_ctrl MSR, all PMCs are enabled (return TRUE).
  */
@@ -321,7 +319,6 @@  static void amd_pmu_reset(struct kvm_vcpu *vcpu)
 
 struct kvm_pmu_ops amd_pmu_ops = {
 	.find_perf_hw_id = amd_find_perf_hw_id,
-	.find_fixed_event = amd_find_fixed_event,
 	.pmc_is_enabled = amd_pmc_is_enabled,
 	.pmc_idx_to_pmc = amd_pmc_idx_to_pmc,
 	.rdpmc_ecx_to_pmc = amd_rdpmc_ecx_to_pmc,
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index f1cc6192ead7..8ba8b4ab1fb7 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -68,6 +68,19 @@  static void global_ctrl_changed(struct kvm_pmu *pmu, u64 data)
 		reprogram_counter(pmu, bit);
 }
 
+static inline unsigned int intel_find_fixed_event(int idx)
+{
+	u32 event;
+	size_t size = ARRAY_SIZE(fixed_pmc_events);
+
+	if (idx >= size)
+		return PERF_COUNT_HW_MAX;
+
+	event = fixed_pmc_events[array_index_nospec(idx, size)];
+	return intel_arch_events[event].event_type;
+}
+
+
 static unsigned int intel_find_perf_hw_id(struct kvm_pmc *pmc)
 {
 	struct kvm_pmu *pmu = pmc_to_pmu(pmc);
@@ -75,6 +88,9 @@  static unsigned int intel_find_perf_hw_id(struct kvm_pmc *pmc)
 	u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
 	int i;
 
+	if (pmc_is_fixed(pmc))
+		return intel_find_fixed_event(pmc->idx - INTEL_PMC_IDX_FIXED);
+
 	for (i = 0; i < ARRAY_SIZE(intel_arch_events); i++)
 		if (intel_arch_events[i].eventsel == event_select
 		    && intel_arch_events[i].unit_mask == unit_mask
@@ -87,18 +103,6 @@  static unsigned int intel_find_perf_hw_id(struct kvm_pmc *pmc)
 	return intel_arch_events[i].event_type;
 }
 
-static unsigned intel_find_fixed_event(int idx)
-{
-	u32 event;
-	size_t size = ARRAY_SIZE(fixed_pmc_events);
-
-	if (idx >= size)
-		return PERF_COUNT_HW_MAX;
-
-	event = fixed_pmc_events[array_index_nospec(idx, size)];
-	return intel_arch_events[event].event_type;
-}
-
 /* check if a PMC is enabled by comparing it with globl_ctrl bits. */
 static bool intel_pmc_is_enabled(struct kvm_pmc *pmc)
 {
@@ -722,7 +726,6 @@  static void intel_pmu_cleanup(struct kvm_vcpu *vcpu)
 
 struct kvm_pmu_ops intel_pmu_ops = {
 	.find_perf_hw_id = intel_find_perf_hw_id,
-	.find_fixed_event = intel_find_fixed_event,
 	.pmc_is_enabled = intel_pmc_is_enabled,
 	.pmc_idx_to_pmc = intel_pmc_idx_to_pmc,
 	.rdpmc_ecx_to_pmc = intel_rdpmc_ecx_to_pmc,