diff mbox series

KVM: x86/pmu: Return KVM_MSR_RET_INVALID for invalid PMU MSR access

Message ID 20240709145500.45547-1-dapeng1.mi@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86/pmu: Return KVM_MSR_RET_INVALID for invalid PMU MSR access | expand

Commit Message

Dapeng Mi July 9, 2024, 2:55 p.m. UTC
Return KVM_MSR_RET_INVALID instead of 0 to inject #GP to guest for all
invalid PMU MSRs access

Currently KVM silently drops the access and doesn't inject #GP for some
invalid PMU MSRs like MSR_P6_PERFCTR0/MSR_P6_PERFCTR1,
MSR_P6_EVNTSEL0/MSR_P6_EVNTSEL1, but KVM still injects #GP for all other
invalid PMU MSRs. This leads to guest see different behavior on invalid
PMU access and may confuse guest.

This behavior is introduced by the
'commit 5753785fa977 ("KVM: do not #GP on perf MSR writes when vPMU is disabled")'
in 2012. This commit seems to want to keep back compatible with weird
behavior of some guests in vPMU disabled case, but strongly suspect if
it's still available nowadays.

Since Perfmon v6 starts, the GP counters could become discontinuous on
HW, It's possible that HW doesn't support GP counters 0 and 1.
Considering this situation KVM should inject #GP for all invalid PMU MSRs
access.

Cc: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
---
 arch/x86/kvm/x86.c                             | 18 ------------------
 .../selftests/kvm/x86_64/pmu_counters_test.c   |  7 +------
 2 files changed, 1 insertion(+), 24 deletions(-)


base-commit: 771df9ffadb8204e61d3e98f36c5067102aab78f

Comments

Sean Christopherson July 9, 2024, 6:45 p.m. UTC | #1
On Tue, Jul 09, 2024, Dapeng Mi wrote:
> Return KVM_MSR_RET_INVALID instead of 0 to inject #GP to guest for all
> invalid PMU MSRs access
> 
> Currently KVM silently drops the access and doesn't inject #GP for some
> invalid PMU MSRs like MSR_P6_PERFCTR0/MSR_P6_PERFCTR1,
> MSR_P6_EVNTSEL0/MSR_P6_EVNTSEL1, but KVM still injects #GP for all other
> invalid PMU MSRs. This leads to guest see different behavior on invalid
> PMU access and may confuse guest.

This is by design.  I'm not saying it's _good_ design, but it is very much
intended.  More importantly, it's established behavior, i.e. having KVM inject
#GP could break existing setups.

> This behavior is introduced by the
> 'commit 5753785fa977 ("KVM: do not #GP on perf MSR writes when vPMU is disabled")'
> in 2012. This commit seems to want to keep back compatible with weird
> behavior of some guests in vPMU disabled case,

Ya, because at the time, guest kernels hadn't been taught to play nice with
unexpected virtualization setups, i.e. VMs without PMUs.

> but strongly suspect if it's still available nowadays.

I don't follow this comment.

> Since Perfmon v6 starts, the GP counters could become discontinuous on
> HW, It's possible that HW doesn't support GP counters 0 and 1.
> Considering this situation KVM should inject #GP for all invalid PMU MSRs
> access.

IIUC, the behavior you want is inject a #GP if the vCPU has a PMU and the MSR is
not valid.  We can do that and still maintain backwards compatibility, hopefully
without too much ugliness (maybe even an improvement!).

This? (completely untested)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5aa7581802f7..b5e95e5f1f32 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4063,9 +4063,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
        case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
        case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
        case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
-               if (kvm_pmu_is_valid_msr(vcpu, msr))
-                       return kvm_pmu_set_msr(vcpu, msr_info);
-
+               if (vcpu_to_pmu(vcpu)->version)
+                       goto default_handler;
                if (data)
                        kvm_pr_unimpl_wrmsr(vcpu, msr, data);
                break;
@@ -4146,6 +4145,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                break;
 #endif
        default:
+default_handler:
                if (kvm_pmu_is_valid_msr(vcpu, msr))
                        return kvm_pmu_set_msr(vcpu, msr_info);
 
@@ -4251,8 +4251,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
        case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
        case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
        case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
-               if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
-                       return kvm_pmu_get_msr(vcpu, msr_info);
+               if (vcpu_to_pmu(vcpu)->version)
+                       goto default_handler;
                msr_info->data = 0;
                break;
        case MSR_IA32_UCODE_REV:
@@ -4505,6 +4505,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                break;
 #endif
        default:
+default_handler:
                if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
                        return kvm_pmu_get_msr(vcpu, msr_info);
 
diff --git a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
index 96446134c00b..0de606b542ac 100644
--- a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
+++ b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
@@ -344,7 +344,8 @@ static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success,
 static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters,
                                 uint8_t nr_counters, uint32_t or_mask)
 {
-       const bool pmu_has_fast_mode = !guest_get_pmu_version();
+       const u8 pmu_version = guest_get_pmu_version();
+       const bool pmu_has_fast_mode = !pmu_version;
        uint8_t i;
 
        for (i = 0; i < nr_possible_counters; i++) {
@@ -363,12 +364,13 @@ static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters
                const bool expect_success = i < nr_counters || (or_mask & BIT(i));
 
                /*
-                * KVM drops writes to MSR_P6_PERFCTR[0|1] if the counters are
-                * unsupported, i.e. doesn't #GP and reads back '0'.
+                * KVM drops writes to MSR_P6_PERFCTR[0|1] if the vCPU doesn't
+                * have a PMU, i.e. doesn't #GP and reads back '0'.
                 */
                const uint64_t expected_val = expect_success ? test_val : 0;
-               const bool expect_gp = !expect_success && msr != MSR_P6_PERFCTR0 &&
-                                      msr != MSR_P6_PERFCTR1;
+               const bool expect_gp = !expect_success &&
+                                      (pmu_version ||
+                                       (msr != MSR_P6_PERFCTR0 && msr != MSR_P6_PERFCTR1));
                uint32_t rdpmc_idx;
                uint8_t vector;
                uint64_t val;
Dapeng Mi July 10, 2024, 3:13 a.m. UTC | #2
On 7/10/2024 2:45 AM, Sean Christopherson wrote:
> On Tue, Jul 09, 2024, Dapeng Mi wrote:
>> Return KVM_MSR_RET_INVALID instead of 0 to inject #GP to guest for all
>> invalid PMU MSRs access
>>
>> Currently KVM silently drops the access and doesn't inject #GP for some
>> invalid PMU MSRs like MSR_P6_PERFCTR0/MSR_P6_PERFCTR1,
>> MSR_P6_EVNTSEL0/MSR_P6_EVNTSEL1, but KVM still injects #GP for all other
>> invalid PMU MSRs. This leads to guest see different behavior on invalid
>> PMU access and may confuse guest.
> This is by design.  I'm not saying it's _good_ design, but it is very much
> intended.  More importantly, it's established behavior, i.e. having KVM inject
> #GP could break existing setups.
>
>> This behavior is introduced by the
>> 'commit 5753785fa977 ("KVM: do not #GP on perf MSR writes when vPMU is disabled")'
>> in 2012. This commit seems to want to keep back compatible with weird
>> behavior of some guests in vPMU disabled case,
> Ya, because at the time, guest kernels hadn't been taught to play nice with
> unexpected virtualization setups, i.e. VMs without PMUs.
>
>> but strongly suspect if it's still available nowadays.
> I don't follow this comment.
>
>> Since Perfmon v6 starts, the GP counters could become discontinuous on
>> HW, It's possible that HW doesn't support GP counters 0 and 1.
>> Considering this situation KVM should inject #GP for all invalid PMU MSRs
>> access.
> IIUC, the behavior you want is inject a #GP if the vCPU has a PMU and the MSR is
> not valid.  We can do that and still maintain backwards compatibility, hopefully
> without too much ugliness (maybe even an improvement!).
>
> This? (completely untested)

Seems no better method. Would adopt this. Thanks.

>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 5aa7581802f7..b5e95e5f1f32 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -4063,9 +4063,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>         case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
>         case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
>         case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
> -               if (kvm_pmu_is_valid_msr(vcpu, msr))
> -                       return kvm_pmu_set_msr(vcpu, msr_info);
> -
> +               if (vcpu_to_pmu(vcpu)->version)
> +                       goto default_handler;
>                 if (data)
>                         kvm_pr_unimpl_wrmsr(vcpu, msr, data);
>                 break;
> @@ -4146,6 +4145,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>                 break;
>  #endif
>         default:
> +default_handler:
>                 if (kvm_pmu_is_valid_msr(vcpu, msr))
>                         return kvm_pmu_set_msr(vcpu, msr_info);
>  
> @@ -4251,8 +4251,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>         case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
>         case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
>         case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
> -               if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
> -                       return kvm_pmu_get_msr(vcpu, msr_info);
> +               if (vcpu_to_pmu(vcpu)->version)
> +                       goto default_handler;
>                 msr_info->data = 0;
>                 break;
>         case MSR_IA32_UCODE_REV:
> @@ -4505,6 +4505,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>                 break;
>  #endif
>         default:
> +default_handler:
>                 if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
>                         return kvm_pmu_get_msr(vcpu, msr_info);
>  
> diff --git a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
> index 96446134c00b..0de606b542ac 100644
> --- a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
> +++ b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
> @@ -344,7 +344,8 @@ static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success,
>  static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters,
>                                  uint8_t nr_counters, uint32_t or_mask)
>  {
> -       const bool pmu_has_fast_mode = !guest_get_pmu_version();
> +       const u8 pmu_version = guest_get_pmu_version();
> +       const bool pmu_has_fast_mode = !pmu_version;
>         uint8_t i;
>  
>         for (i = 0; i < nr_possible_counters; i++) {
> @@ -363,12 +364,13 @@ static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters
>                 const bool expect_success = i < nr_counters || (or_mask & BIT(i));
>  
>                 /*
> -                * KVM drops writes to MSR_P6_PERFCTR[0|1] if the counters are
> -                * unsupported, i.e. doesn't #GP and reads back '0'.
> +                * KVM drops writes to MSR_P6_PERFCTR[0|1] if the vCPU doesn't
> +                * have a PMU, i.e. doesn't #GP and reads back '0'.
>                  */
>                 const uint64_t expected_val = expect_success ? test_val : 0;
> -               const bool expect_gp = !expect_success && msr != MSR_P6_PERFCTR0 &&
> -                                      msr != MSR_P6_PERFCTR1;
> +               const bool expect_gp = !expect_success &&
> +                                      (pmu_version ||
> +                                       (msr != MSR_P6_PERFCTR0 && msr != MSR_P6_PERFCTR1));
>                 uint32_t rdpmc_idx;
>                 uint8_t vector;
>                 uint64_t val;
>
diff mbox series

Patch

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 994743266480..d92321d37892 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4051,16 +4051,6 @@  int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_IA32_MC0_CTL2 ... MSR_IA32_MCx_CTL2(KVM_MAX_MCE_BANKS) - 1:
 		return set_msr_mce(vcpu, msr_info);
 
-	case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
-	case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
-	case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
-	case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
-		if (kvm_pmu_is_valid_msr(vcpu, msr))
-			return kvm_pmu_set_msr(vcpu, msr_info);
-
-		if (data)
-			kvm_pr_unimpl_wrmsr(vcpu, msr, data);
-		break;
 	case MSR_K7_CLK_CTL:
 		/*
 		 * Ignore all writes to this no longer documented MSR.
@@ -4239,14 +4229,6 @@  int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_DRAM_ENERGY_STATUS:	/* DRAM controller */
 		msr_info->data = 0;
 		break;
-	case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
-	case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
-	case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
-	case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
-		if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
-			return kvm_pmu_get_msr(vcpu, msr_info);
-		msr_info->data = 0;
-		break;
 	case MSR_IA32_UCODE_REV:
 		msr_info->data = vcpu->arch.microcode_version;
 		break;
diff --git a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
index 698cb36989db..62ed765d2aa7 100644
--- a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
+++ b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
@@ -376,13 +376,8 @@  static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters
 		 */
 		const bool expect_success = i < nr_counters || (or_mask & BIT(i));
 
-		/*
-		 * KVM drops writes to MSR_P6_PERFCTR[0|1] if the counters are
-		 * unsupported, i.e. doesn't #GP and reads back '0'.
-		 */
 		const uint64_t expected_val = expect_success ? test_val : 0;
-		const bool expect_gp = !expect_success && msr != MSR_P6_PERFCTR0 &&
-				       msr != MSR_P6_PERFCTR1;
+		const bool expect_gp = !expect_success;
 		uint32_t rdpmc_idx;
 		uint8_t vector;
 		uint64_t val;