diff mbox series

[1/4] KVM: x86/svm/pmu: Limit the maximum number of supported GP counters

Message ID 20220905123946.95223-2-likexu@tencent.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86/svm/pmu: Add AMD Guest PerfMonV2 support | expand

Commit Message

Like Xu Sept. 5, 2022, 12:39 p.m. UTC
From: Like Xu <likexu@tencent.com>

The AMD PerfMonV2 specification allows for a maximum of 16 GP counters,
which is clearly not supported with zero code effort in the current KVM.

A local macro (named like INTEL_PMC_MAX_GENERIC) is introduced to
take back control of this virt capability, which also makes it easier to
statically partition all available counters between hosts and guests.

Signed-off-by: Like Xu <likexu@tencent.com>
---
 arch/x86/kvm/pmu.h     | 2 ++
 arch/x86/kvm/svm/pmu.c | 7 ++++---
 arch/x86/kvm/x86.c     | 2 ++
 3 files changed, 8 insertions(+), 3 deletions(-)

Comments

Jim Mattson Sept. 5, 2022, 5:26 p.m. UTC | #1
On Mon, Sep 5, 2022 at 5:45 AM Like Xu <like.xu.linux@gmail.com> wrote:
>
> From: Like Xu <likexu@tencent.com>
>
> The AMD PerfMonV2 specification allows for a maximum of 16 GP counters,
> which is clearly not supported with zero code effort in the current KVM.
>
> A local macro (named like INTEL_PMC_MAX_GENERIC) is introduced to
> take back control of this virt capability, which also makes it easier to
> statically partition all available counters between hosts and guests.
>
> Signed-off-by: Like Xu <likexu@tencent.com>
> ---
>  arch/x86/kvm/pmu.h     | 2 ++
>  arch/x86/kvm/svm/pmu.c | 7 ++++---
>  arch/x86/kvm/x86.c     | 2 ++
>  3 files changed, 8 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
> index 847e7112a5d3..e3a3813b6a38 100644
> --- a/arch/x86/kvm/pmu.h
> +++ b/arch/x86/kvm/pmu.h
> @@ -18,6 +18,8 @@
>  #define VMWARE_BACKDOOR_PMC_REAL_TIME          0x10001
>  #define VMWARE_BACKDOOR_PMC_APPARENT_TIME      0x10002
>
> +#define KVM_AMD_PMC_MAX_GENERIC        AMD64_NUM_COUNTERS_CORE
> +
>  struct kvm_event_hw_type_mapping {
>         u8 eventsel;
>         u8 unit_mask;
> diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
> index 2ec420b85d6a..f99f2c869664 100644
> --- a/arch/x86/kvm/svm/pmu.c
> +++ b/arch/x86/kvm/svm/pmu.c
> @@ -192,9 +192,10 @@ static void amd_pmu_init(struct kvm_vcpu *vcpu)
>         struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
>         int i;
>
> -       BUILD_BUG_ON(AMD64_NUM_COUNTERS_CORE > INTEL_PMC_MAX_GENERIC);
> +       BUILD_BUG_ON(AMD64_NUM_COUNTERS_CORE > KVM_AMD_PMC_MAX_GENERIC);
> +       BUILD_BUG_ON(KVM_AMD_PMC_MAX_GENERIC > INTEL_PMC_MAX_GENERIC);
>
> -       for (i = 0; i < AMD64_NUM_COUNTERS_CORE ; i++) {
> +       for (i = 0; i < KVM_AMD_PMC_MAX_GENERIC ; i++) {
>                 pmu->gp_counters[i].type = KVM_PMC_GP;
>                 pmu->gp_counters[i].vcpu = vcpu;
>                 pmu->gp_counters[i].idx = i;
> @@ -207,7 +208,7 @@ static void amd_pmu_reset(struct kvm_vcpu *vcpu)
>         struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
>         int i;
>
> -       for (i = 0; i < AMD64_NUM_COUNTERS_CORE; i++) {
> +       for (i = 0; i < KVM_AMD_PMC_MAX_GENERIC; i++) {
>                 struct kvm_pmc *pmc = &pmu->gp_counters[i];
>
>                 pmc_stop_counter(pmc);
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 43a6a7efc6ec..b9738efd8425 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1444,12 +1444,14 @@ static const u32 msrs_to_save_all[] = {
>         MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,

IIRC, the effective maximum on the Intel side is 18, despite what
INTEL_PMC_MAX_GENERIC says, due to collisions with other existing MSR
indices. That's why the Intel list stops here. Should we introduce a
KVM_INTEL_PMC_MAX_GENERIC as well?

>         MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG,
>
> +       /* This part of MSRs should match KVM_AMD_PMC_MAX_GENERIC. */

Perhaps the comment above should be moved down two lines, since the
next two lines deal with the legacy counters.

>         MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3,
>         MSR_K7_PERFCTR0, MSR_K7_PERFCTR1, MSR_K7_PERFCTR2, MSR_K7_PERFCTR3,
>         MSR_F15H_PERF_CTL0, MSR_F15H_PERF_CTL1, MSR_F15H_PERF_CTL2,
>         MSR_F15H_PERF_CTL3, MSR_F15H_PERF_CTL4, MSR_F15H_PERF_CTL5,
>         MSR_F15H_PERF_CTR0, MSR_F15H_PERF_CTR1, MSR_F15H_PERF_CTR2,
>         MSR_F15H_PERF_CTR3, MSR_F15H_PERF_CTR4, MSR_F15H_PERF_CTR5,

At some point, we may want to consider populating the PMU MSR list
dynamically, rather than statically enumerating all of them (for both
AMD and Intel).

Reviewed-by: Jim Mattson <jmattson@google.com>
Like Xu Sept. 6, 2022, 12:38 p.m. UTC | #2
On 6/9/2022 1:26 am, Jim Mattson wrote:
> On Mon, Sep 5, 2022 at 5:45 AM Like Xu <like.xu.linux@gmail.com> wrote:
>>
>> From: Like Xu <likexu@tencent.com>
>>
>> The AMD PerfMonV2 specification allows for a maximum of 16 GP counters,
>> which is clearly not supported with zero code effort in the current KVM.
>>
>> A local macro (named like INTEL_PMC_MAX_GENERIC) is introduced to
>> take back control of this virt capability, which also makes it easier to
>> statically partition all available counters between hosts and guests.
>>
>> Signed-off-by: Like Xu <likexu@tencent.com>
>> ---
>>   arch/x86/kvm/pmu.h     | 2 ++
>>   arch/x86/kvm/svm/pmu.c | 7 ++++---
>>   arch/x86/kvm/x86.c     | 2 ++
>>   3 files changed, 8 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
>> index 847e7112a5d3..e3a3813b6a38 100644
>> --- a/arch/x86/kvm/pmu.h
>> +++ b/arch/x86/kvm/pmu.h
>> @@ -18,6 +18,8 @@
>>   #define VMWARE_BACKDOOR_PMC_REAL_TIME          0x10001
>>   #define VMWARE_BACKDOOR_PMC_APPARENT_TIME      0x10002
>>
>> +#define KVM_AMD_PMC_MAX_GENERIC        AMD64_NUM_COUNTERS_CORE
>> +
>>   struct kvm_event_hw_type_mapping {
>>          u8 eventsel;
>>          u8 unit_mask;
>> diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
>> index 2ec420b85d6a..f99f2c869664 100644
>> --- a/arch/x86/kvm/svm/pmu.c
>> +++ b/arch/x86/kvm/svm/pmu.c
>> @@ -192,9 +192,10 @@ static void amd_pmu_init(struct kvm_vcpu *vcpu)
>>          struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
>>          int i;
>>
>> -       BUILD_BUG_ON(AMD64_NUM_COUNTERS_CORE > INTEL_PMC_MAX_GENERIC);
>> +       BUILD_BUG_ON(AMD64_NUM_COUNTERS_CORE > KVM_AMD_PMC_MAX_GENERIC);
>> +       BUILD_BUG_ON(KVM_AMD_PMC_MAX_GENERIC > INTEL_PMC_MAX_GENERIC);
>>
>> -       for (i = 0; i < AMD64_NUM_COUNTERS_CORE ; i++) {
>> +       for (i = 0; i < KVM_AMD_PMC_MAX_GENERIC ; i++) {
>>                  pmu->gp_counters[i].type = KVM_PMC_GP;
>>                  pmu->gp_counters[i].vcpu = vcpu;
>>                  pmu->gp_counters[i].idx = i;
>> @@ -207,7 +208,7 @@ static void amd_pmu_reset(struct kvm_vcpu *vcpu)
>>          struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
>>          int i;
>>
>> -       for (i = 0; i < AMD64_NUM_COUNTERS_CORE; i++) {
>> +       for (i = 0; i < KVM_AMD_PMC_MAX_GENERIC; i++) {
>>                  struct kvm_pmc *pmc = &pmu->gp_counters[i];
>>
>>                  pmc_stop_counter(pmc);
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index 43a6a7efc6ec..b9738efd8425 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -1444,12 +1444,14 @@ static const u32 msrs_to_save_all[] = {
>>          MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
> 
> IIRC, the effective maximum on the Intel side is 18, despite what
> INTEL_PMC_MAX_GENERIC says, due to collisions with other existing MSR

Emm, check https://lore.kernel.org/kvm/20220906081604.24035-1-likexu@tencent.com/

> indices. That's why the Intel list stops here. Should we introduce a
> KVM_INTEL_PMC_MAX_GENERIC as well?

Yes, this suggestion will be applied in the next version.

> 
>>          MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG,
>>
>> +       /* This part of MSRs should match KVM_AMD_PMC_MAX_GENERIC. */
> 
> Perhaps the comment above should be moved down two lines, since the
> next two lines deal with the legacy counters.

Applied, thanks.

> 
>>          MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3,
>>          MSR_K7_PERFCTR0, MSR_K7_PERFCTR1, MSR_K7_PERFCTR2, MSR_K7_PERFCTR3,
>>          MSR_F15H_PERF_CTL0, MSR_F15H_PERF_CTL1, MSR_F15H_PERF_CTL2,
>>          MSR_F15H_PERF_CTL3, MSR_F15H_PERF_CTL4, MSR_F15H_PERF_CTL5,
>>          MSR_F15H_PERF_CTR0, MSR_F15H_PERF_CTR1, MSR_F15H_PERF_CTR2,
>>          MSR_F15H_PERF_CTR3, MSR_F15H_PERF_CTR4, MSR_F15H_PERF_CTR5,
> 
> At some point, we may want to consider populating the PMU MSR list
> dynamically, rather than statically enumerating all of them (for both
> AMD and Intel).

The uncertainty of msrs_to_save_all[] may cause troubles for legacy user spaces.
I had draft patches to rewrite pmu msr accesses for host-initiated as first move.

> 
> Reviewed-by: Jim Mattson <jmattson@google.com>
diff mbox series

Patch

diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index 847e7112a5d3..e3a3813b6a38 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -18,6 +18,8 @@ 
 #define VMWARE_BACKDOOR_PMC_REAL_TIME		0x10001
 #define VMWARE_BACKDOOR_PMC_APPARENT_TIME	0x10002
 
+#define KVM_AMD_PMC_MAX_GENERIC	AMD64_NUM_COUNTERS_CORE
+
 struct kvm_event_hw_type_mapping {
 	u8 eventsel;
 	u8 unit_mask;
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index 2ec420b85d6a..f99f2c869664 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -192,9 +192,10 @@  static void amd_pmu_init(struct kvm_vcpu *vcpu)
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 	int i;
 
-	BUILD_BUG_ON(AMD64_NUM_COUNTERS_CORE > INTEL_PMC_MAX_GENERIC);
+	BUILD_BUG_ON(AMD64_NUM_COUNTERS_CORE > KVM_AMD_PMC_MAX_GENERIC);
+	BUILD_BUG_ON(KVM_AMD_PMC_MAX_GENERIC > INTEL_PMC_MAX_GENERIC);
 
-	for (i = 0; i < AMD64_NUM_COUNTERS_CORE ; i++) {
+	for (i = 0; i < KVM_AMD_PMC_MAX_GENERIC ; i++) {
 		pmu->gp_counters[i].type = KVM_PMC_GP;
 		pmu->gp_counters[i].vcpu = vcpu;
 		pmu->gp_counters[i].idx = i;
@@ -207,7 +208,7 @@  static void amd_pmu_reset(struct kvm_vcpu *vcpu)
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 	int i;
 
-	for (i = 0; i < AMD64_NUM_COUNTERS_CORE; i++) {
+	for (i = 0; i < KVM_AMD_PMC_MAX_GENERIC; i++) {
 		struct kvm_pmc *pmc = &pmu->gp_counters[i];
 
 		pmc_stop_counter(pmc);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 43a6a7efc6ec..b9738efd8425 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1444,12 +1444,14 @@  static const u32 msrs_to_save_all[] = {
 	MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
 	MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG,
 
+	/* This part of MSRs should match KVM_AMD_PMC_MAX_GENERIC. */
 	MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3,
 	MSR_K7_PERFCTR0, MSR_K7_PERFCTR1, MSR_K7_PERFCTR2, MSR_K7_PERFCTR3,
 	MSR_F15H_PERF_CTL0, MSR_F15H_PERF_CTL1, MSR_F15H_PERF_CTL2,
 	MSR_F15H_PERF_CTL3, MSR_F15H_PERF_CTL4, MSR_F15H_PERF_CTL5,
 	MSR_F15H_PERF_CTR0, MSR_F15H_PERF_CTR1, MSR_F15H_PERF_CTR2,
 	MSR_F15H_PERF_CTR3, MSR_F15H_PERF_CTR4, MSR_F15H_PERF_CTR5,
+
 	MSR_IA32_XFD, MSR_IA32_XFD_ERR,
 };