diff mbox series

i386: Disable BTS and PEBS

Message ID 20220718032206.34488-1-zhenzhong.duan@intel.com (mailing list archive)
State New, archived
Headers show
Series i386: Disable BTS and PEBS | expand

Commit Message

Duan, Zhenzhong July 18, 2022, 3:22 a.m. UTC
Since below KVM commit, KVM hided BTS as it's not supported yet.
b9181c8ef356 ("KVM: x86/pmu: Avoid exposing Intel BTS feature")

After below KVM commit, it gave control of MSR_IA32_MISC_ENABLES to userspace.
9fc222967a39 ("KVM: x86: Give host userspace full control of MSR_IA32_MISC_ENABLES")

So qemu takes the responsibility to hide BTS.
Without fix, we get below warning in guest kernel:

[] unchecked MSR access error: WRMSR to 0x1d9 (tried to write 0x00000000000001c0) at rIP: 0xffffffffaa070644 (native_write_msr+0x4/0x20)
[] Call Trace:
[]  <TASK>
[]  intel_pmu_enable_bts+0x5d/0x70
[]  bts_event_add+0x77/0x90
[]  event_sched_in.isra.135+0x99/0x1e0

Tested-by: Xiangfei Ma <xiangfeix.ma@intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
 target/i386/cpu.h     | 6 ++++--
 target/i386/kvm/kvm.c | 4 ++++
 2 files changed, 8 insertions(+), 2 deletions(-)

Comments

Like Xu July 18, 2022, 3:57 a.m. UTC | #1
On 18/7/2022 11:22 am, Zhenzhong Duan wrote:
> Since below KVM commit, KVM hided BTS as it's not supported yet.
> b9181c8ef356 ("KVM: x86/pmu: Avoid exposing Intel BTS feature")
> 
> After below KVM commit, it gave control of MSR_IA32_MISC_ENABLES to userspace.
> 9fc222967a39 ("KVM: x86: Give host userspace full control of MSR_IA32_MISC_ENABLES")
> 
> So qemu takes the responsibility to hide BTS.
> Without fix, we get below warning in guest kernel:
> 
> [] unchecked MSR access error: WRMSR to 0x1d9 (tried to write 0x00000000000001c0) at rIP: 0xffffffffaa070644 (native_write_msr+0x4/0x20)
> [] Call Trace:
> []  <TASK>
> []  intel_pmu_enable_bts+0x5d/0x70
> []  bts_event_add+0x77/0x90
> []  event_sched_in.isra.135+0x99/0x1e0
> 
> Tested-by: Xiangfei Ma <xiangfeix.ma@intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
>   target/i386/cpu.h     | 6 ++++--
>   target/i386/kvm/kvm.c | 4 ++++
>   2 files changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> index 82004b65b944..8a83d0995c66 100644
> --- a/target/i386/cpu.h
> +++ b/target/i386/cpu.h
> @@ -434,8 +434,10 @@ typedef enum X86Seg {
>   
>   #define MSR_IA32_MISC_ENABLE            0x1a0
>   /* Indicates good rep/movs microcode on some processors: */
> -#define MSR_IA32_MISC_ENABLE_DEFAULT    1
> -#define MSR_IA32_MISC_ENABLE_MWAIT      (1ULL << 18)
> +#define MSR_IA32_MISC_ENABLE_DEFAULT      1
> +#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL  (1ULL << 11)
> +#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL (1ULL << 12)
> +#define MSR_IA32_MISC_ENABLE_MWAIT        (1ULL << 18)
>   
>   #define MSR_MTRRphysBase(reg)           (0x200 + 2 * (reg))
>   #define MSR_MTRRphysMask(reg)           (0x200 + 2 * (reg) + 1)
> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> index f148a6d52fa4..002e0520dd76 100644
> --- a/target/i386/kvm/kvm.c
> +++ b/target/i386/kvm/kvm.c
> @@ -2180,6 +2180,10 @@ void kvm_arch_reset_vcpu(X86CPU *cpu)
>   {
>       CPUX86State *env = &cpu->env;
>   
> +    /* Disable BTS feature which is unsupported on KVM */
> +    env->msr_ia32_misc_enable |= MSR_IA32_MISC_ENABLE_BTS_UNAVAIL;
> +    env->msr_ia32_misc_enable |= MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL;

Would it be more readable to group msr_ia32_misc_enable code into this function:

	static void x86_cpu_reset(DeviceState *dev)

and, why disable PEBS (we need it at least for "-cpu host,migratable=no") ?

Also, the behavior of MISC_ENABLE_EMON is also inconsistent with "pmu=off”.

> +
>       env->xcr0 = 1;
>       if (kvm_irqchip_in_kernel()) {
>           env->mp_state = cpu_is_bsp(cpu) ? KVM_MP_STATE_RUNNABLE :
Duan, Zhenzhong July 18, 2022, 7:44 a.m. UTC | #2
>-----Original Message-----
>From: Like Xu <like.xu.linux@gmail.com>
>Sent: Monday, July 18, 2022 11:57 AM
>To: Duan, Zhenzhong <zhenzhong.duan@intel.com>
>Cc: pbonzini@redhat.com; mtosatti@redhat.com; Christopherson,, Sean
><seanjc@google.com>; Ma, XiangfeiX <xiangfeix.ma@intel.com>; qemu-
>devel@nongnu.org
>Subject: Re: [PATCH] i386: Disable BTS and PEBS
>
>On 18/7/2022 11:22 am, Zhenzhong Duan wrote:
>> Since below KVM commit, KVM hided BTS as it's not supported yet.
>> b9181c8ef356 ("KVM: x86/pmu: Avoid exposing Intel BTS feature")
>>
>> After below KVM commit, it gave control of MSR_IA32_MISC_ENABLES to
>userspace.
>> 9fc222967a39 ("KVM: x86: Give host userspace full control of
>> MSR_IA32_MISC_ENABLES")
>>
>> So qemu takes the responsibility to hide BTS.
>> Without fix, we get below warning in guest kernel:
>>
>> [] unchecked MSR access error: WRMSR to 0x1d9 (tried to write
>> 0x00000000000001c0) at rIP: 0xffffffffaa070644
>(native_write_msr+0x4/0x20) [] Call Trace:
>> []  <TASK>
>> []  intel_pmu_enable_bts+0x5d/0x70
>> []  bts_event_add+0x77/0x90
>> []  event_sched_in.isra.135+0x99/0x1e0
>>
>> Tested-by: Xiangfei Ma <xiangfeix.ma@intel.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>>   target/i386/cpu.h     | 6 ++++--
>>   target/i386/kvm/kvm.c | 4 ++++
>>   2 files changed, 8 insertions(+), 2 deletions(-)
>>
>> diff --git a/target/i386/cpu.h b/target/i386/cpu.h index
>> 82004b65b944..8a83d0995c66 100644
>> --- a/target/i386/cpu.h
>> +++ b/target/i386/cpu.h
>> @@ -434,8 +434,10 @@ typedef enum X86Seg {
>>
>>   #define MSR_IA32_MISC_ENABLE            0x1a0
>>   /* Indicates good rep/movs microcode on some processors: */
>> -#define MSR_IA32_MISC_ENABLE_DEFAULT    1
>> -#define MSR_IA32_MISC_ENABLE_MWAIT      (1ULL << 18)
>> +#define MSR_IA32_MISC_ENABLE_DEFAULT      1
>> +#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL  (1ULL << 11) #define
>> +MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL (1ULL << 12)
>> +#define MSR_IA32_MISC_ENABLE_MWAIT        (1ULL << 18)
>>
>>   #define MSR_MTRRphysBase(reg)           (0x200 + 2 * (reg))
>>   #define MSR_MTRRphysMask(reg)           (0x200 + 2 * (reg) + 1)
>> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index
>> f148a6d52fa4..002e0520dd76 100644
>> --- a/target/i386/kvm/kvm.c
>> +++ b/target/i386/kvm/kvm.c
>> @@ -2180,6 +2180,10 @@ void kvm_arch_reset_vcpu(X86CPU *cpu)
>>   {
>>       CPUX86State *env = &cpu->env;
>>
>> +    /* Disable BTS feature which is unsupported on KVM */
>> +    env->msr_ia32_misc_enable |= MSR_IA32_MISC_ENABLE_BTS_UNAVAIL;
>> +    env->msr_ia32_misc_enable |=
>MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL;
>
>Would it be more readable to group msr_ia32_misc_enable code into this
>function:
>
>	static void x86_cpu_reset(DeviceState *dev)

OK, will do. Initially I thought BTS/PEBS stuffs will only be implemented in KVM,
so thought putting it in kvm_arch_reset_vcpu() may be better.

>
>and, why disable PEBS (we need it at least for "-cpu host,migratable=no") ?

Will fix. I see in below commit in KVM side, PEBS is initialized as unavailable together with BTS.
9fc222967a39 ("KVM: x86: Give host userspace full control of MSR_IA32_MISC_ENABLES").
Then I thought PEBS is unsupported too.

>
>Also, the behavior of MISC_ENABLE_EMON is also inconsistent with
>"pmu=off”.

Will init EMON bit based on pmu setting. Thanks!

Zhenzhong
Paolo Bonzini July 18, 2022, 4:08 p.m. UTC | #3
On 7/18/22 05:22, Zhenzhong Duan wrote:
> Since below KVM commit, KVM hided BTS as it's not supported yet.
> b9181c8ef356 ("KVM: x86/pmu: Avoid exposing Intel BTS feature")
> 
> After below KVM commit, it gave control of MSR_IA32_MISC_ENABLES to userspace.
> 9fc222967a39 ("KVM: x86: Give host userspace full control of MSR_IA32_MISC_ENABLES")
> 
> So qemu takes the responsibility to hide BTS.
> Without fix, we get below warning in guest kernel:
> 
> [] unchecked MSR access error: WRMSR to 0x1d9 (tried to write 0x00000000000001c0) at rIP: 0xffffffffaa070644 (native_write_msr+0x4/0x20)
> [] Call Trace:
> []  <TASK>
> []  intel_pmu_enable_bts+0x5d/0x70
> []  bts_event_add+0x77/0x90
> []  event_sched_in.isra.135+0x99/0x1e0
> 
> Tested-by: Xiangfei Ma <xiangfeix.ma@intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
>   target/i386/cpu.h     | 6 ++++--
>   target/i386/kvm/kvm.c | 4 ++++
>   2 files changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> index 82004b65b944..8a83d0995c66 100644
> --- a/target/i386/cpu.h
> +++ b/target/i386/cpu.h
> @@ -434,8 +434,10 @@ typedef enum X86Seg {
>   
>   #define MSR_IA32_MISC_ENABLE            0x1a0
>   /* Indicates good rep/movs microcode on some processors: */
> -#define MSR_IA32_MISC_ENABLE_DEFAULT    1
> -#define MSR_IA32_MISC_ENABLE_MWAIT      (1ULL << 18)
> +#define MSR_IA32_MISC_ENABLE_DEFAULT      1
> +#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL  (1ULL << 11)
> +#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL (1ULL << 12)
> +#define MSR_IA32_MISC_ENABLE_MWAIT        (1ULL << 18)
>   
>   #define MSR_MTRRphysBase(reg)           (0x200 + 2 * (reg))
>   #define MSR_MTRRphysMask(reg)           (0x200 + 2 * (reg) + 1)
> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> index f148a6d52fa4..002e0520dd76 100644
> --- a/target/i386/kvm/kvm.c
> +++ b/target/i386/kvm/kvm.c
> @@ -2180,6 +2180,10 @@ void kvm_arch_reset_vcpu(X86CPU *cpu)
>   {
>       CPUX86State *env = &cpu->env;
>   
> +    /* Disable BTS feature which is unsupported on KVM */
> +    env->msr_ia32_misc_enable |= MSR_IA32_MISC_ENABLE_BTS_UNAVAIL;
> +    env->msr_ia32_misc_enable |= MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL;
> +
>       env->xcr0 = 1;
>       if (kvm_irqchip_in_kernel()) {
>           env->mp_state = cpu_is_bsp(cpu) ? KVM_MP_STATE_RUNNABLE :

This needs to be fixed in the kernel because old QEMU/new KVM is 
supported.  But apart from that, where does Linux check 
MSR_IA32_MISC_ENABLE_BTS_UNAVAIL and MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL?

Paolo
Sean Christopherson July 18, 2022, 8:12 p.m. UTC | #4
On Mon, Jul 18, 2022, Paolo Bonzini wrote:
> This needs to be fixed in the kernel because old QEMU/new KVM is supported.

I can't object to adding a quirk for this since KVM is breaking userspace, but on
the KVM side we really need to stop "sanitizing" userspace inputs unless it puts
the host at risk, because inevitably it leads to needing a quirk.

> But apart from that, where does Linux check MSR_IA32_MISC_ENABLE_BTS_UNAVAIL
> and MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL?

The kernel uses synthetic feature flags that are set by:

  static void init_intel(struct cpuinfo_x86 *c)

	if (boot_cpu_has(X86_FEATURE_DS)) {
		unsigned int l1, l2;

		rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
		if (!(l1 & (1<<11)))
			set_cpu_cap(c, X86_FEATURE_BTS);
		if (!(l1 & (1<<12)))
			set_cpu_cap(c, X86_FEATURE_PEBS);
	}

and consumed by:

  void __init intel_ds_init(void)

	/*
	 * No support for 32bit formats
	 */
	if (!boot_cpu_has(X86_FEATURE_DTES64))
		return;

	x86_pmu.bts  = boot_cpu_has(X86_FEATURE_BTS);
	x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
	x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
Paolo Bonzini July 19, 2022, 6:18 p.m. UTC | #5
On 7/18/22 22:12, Sean Christopherson wrote:
> On Mon, Jul 18, 2022, Paolo Bonzini wrote:
>> This needs to be fixed in the kernel because old QEMU/new KVM is supported.
> 
> I can't object to adding a quirk for this since KVM is breaking userspace, but on
> the KVM side we really need to stop "sanitizing" userspace inputs unless it puts
> the host at risk, because inevitably it leads to needing a quirk.

The problem is not the sanitizing, it's that userspace literally cannot 
know that this needs to be done because the feature bits are "backwards" 
(1 = unavailable).

The right way to fix it is probably to use feature MSRs and, by default, 
leave the features marked as unavailable.  I'll think it through and 
post a patch tomorrow for both KVM and QEMU (to enable PEBS).

>> But apart from that, where does Linux check MSR_IA32_MISC_ENABLE_BTS_UNAVAIL
>> and MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL?
> 
> The kernel uses synthetic feature flags that are set by:
> 
>    static void init_intel(struct cpuinfo_x86 *c)
> 
> 	if (boot_cpu_has(X86_FEATURE_DS)) {
> 		unsigned int l1, l2;
> 
> 		rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
> 		if (!(l1 & (1<<11)))
> 			set_cpu_cap(c, X86_FEATURE_BTS);
> 		if (!(l1 & (1<<12)))
> 			set_cpu_cap(c, X86_FEATURE_PEBS);
> 	}

Gah, shift constants are evil.   I sent 
https://lore.kernel.org/all/20220719174714.2410374-1-pbonzini@redhat.com/ to 
clean this up.

Paolo

> and consumed by:
> 
>    void __init intel_ds_init(void)
> 
> 	/*
> 	 * No support for 32bit formats
> 	 */
> 	if (!boot_cpu_has(X86_FEATURE_DTES64))
> 		return;
> 
> 	x86_pmu.bts  = boot_cpu_has(X86_FEATURE_BTS);
> 	x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
> 	x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
>
Sean Christopherson July 19, 2022, 6:53 p.m. UTC | #6
On Tue, Jul 19, 2022, Paolo Bonzini wrote:
> On 7/18/22 22:12, Sean Christopherson wrote:
> > On Mon, Jul 18, 2022, Paolo Bonzini wrote:
> > > This needs to be fixed in the kernel because old QEMU/new KVM is supported.
> > 
> > I can't object to adding a quirk for this since KVM is breaking userspace, but on
> > the KVM side we really need to stop "sanitizing" userspace inputs unless it puts
> > the host at risk, because inevitably it leads to needing a quirk.
> 
> The problem is not the sanitizing, it's that userspace literally cannot know
> that this needs to be done because the feature bits are "backwards" (1 =
> unavailable).

Yes, the bits being inverted contributed to KVM not providing a way for userspace
to enumerate PEBS and BTS support, but lack of enumeration is a seperate issue.

If KVM had simply ignored invalid guest state from the get go, then userspace would
never have gained a dependency on KVM sanitizing guest state.  The fact that KVM
didn't enumerate support in any way is an orthogonal problem.  To play nice with
older userspace, KVM will need to add a quirk to restore the sanizting code, but
that doesn't solve the enumeration issue.  And vice versa, solving the enuemaration
problem doesn't magically fix old userspace.

> The right way to fix it is probably to use feature MSRs and, by default,
> leave the features marked as unavailable.  I'll think it through and post a
> patch tomorrow for both KVM and QEMU (to enable PEBS).

Yeah, lack of CPUID bits is annoying.
Duan, Zhenzhong July 20, 2022, 2:35 a.m. UTC | #7
>-----Original Message-----
>From: Sean Christopherson <seanjc@google.com>
>Sent: Wednesday, July 20, 2022 2:53 AM
>To: Paolo Bonzini <pbonzini@redhat.com>
>Cc: Duan, Zhenzhong <zhenzhong.duan@intel.com>; qemu-
>devel@nongnu.org; mtosatti@redhat.com; likexu@tencent.com; Ma,
>XiangfeiX <xiangfeix.ma@intel.com>
>Subject: Re: [PATCH] i386: Disable BTS and PEBS
>
>On Tue, Jul 19, 2022, Paolo Bonzini wrote:
>> On 7/18/22 22:12, Sean Christopherson wrote:
>> > On Mon, Jul 18, 2022, Paolo Bonzini wrote:
>> > > This needs to be fixed in the kernel because old QEMU/new KVM is
>supported.
>> >
>> > I can't object to adding a quirk for this since KVM is breaking
>> > userspace, but on the KVM side we really need to stop "sanitizing"
>> > userspace inputs unless it puts the host at risk, because inevitably it
>leads to needing a quirk.
>>
>> The problem is not the sanitizing, it's that userspace literally
>> cannot know that this needs to be done because the feature bits are
>> "backwards" (1 = unavailable).
>
>Yes, the bits being inverted contributed to KVM not providing a way for
>userspace to enumerate PEBS and BTS support, but lack of enumeration is a
>seperate issue.
>
>If KVM had simply ignored invalid guest state from the get go, then
>userspace would never have gained a dependency on KVM sanitizing guest
>state.  The fact that KVM didn't enumerate support in any way is an
>orthogonal problem.  To play nice with older userspace, KVM will need to
>add a quirk to restore the sanizting code, but that doesn't solve the
>enumeration issue.  And vice versa, solving the enuemaration problem
>doesn't magically fix old userspace.
Hi,

I didn't clearly understand the boundary of when to use quirk and when to fix it directly, appreciate your guide.
My previous understanding for quirk is about backward compatibility, old behavior vs. new behavior.
But this issue is more like a regression or bug, and the sanitizing code is only in kvm/next branch,
not in kernel upstream yet, why bother to use a quirk?

Thanks
Zhenzhong
Sean Christopherson July 20, 2022, 3:48 p.m. UTC | #8
On Wed, Jul 20, 2022, Duan, Zhenzhong wrote:
> >On Tue, Jul 19, 2022, Paolo Bonzini wrote:
> >> On 7/18/22 22:12, Sean Christopherson wrote:
> >> > On Mon, Jul 18, 2022, Paolo Bonzini wrote:
> >> > > This needs to be fixed in the kernel because old QEMU/new KVM is supported.
> >> >
> >> > I can't object to adding a quirk for this since KVM is breaking userspace,
> >> > but on the KVM side we really need to stop "sanitizing" userspace inputs
> >> > unless it puts the host at risk, because inevitably it leads to needing
> >> > a quirk.
> >>
> >> The problem is not the sanitizing, it's that userspace literally
> >> cannot know that this needs to be done because the feature bits are
> >> "backwards" (1 = unavailable).
> >
> >Yes, the bits being inverted contributed to KVM not providing a way for
> >userspace to enumerate PEBS and BTS support, but lack of enumeration is a
> >seperate issue.
> >
> >If KVM had simply ignored invalid guest state from the get go, then
> >userspace would never have gained a dependency on KVM sanitizing guest
> >state.  The fact that KVM didn't enumerate support in any way is an
> >orthogonal problem.  To play nice with older userspace, KVM will need to
> >add a quirk to restore the sanizting code, but that doesn't solve the
> >enumeration issue.  And vice versa, solving the enuemaration problem
> >doesn't magically fix old userspace.
> Hi,
> 
> I didn't clearly understand the boundary of when to use quirk and when to fix
> it directly, appreciate your guide.  My previous understanding for quirk is
> about backward compatibility, old behavior vs. new behavior.  But this issue
> is more like a regression or bug, and the sanitizing code is only in kvm/next
> branch, not in kernel upstream yet, why bother to use a quirk?

Oh!  Yay!  You're absolutely right.  And now I understand why Paolo is saying the
problem has nothing to do with sanitizing...

I was thinking that KVM had been doing the sanitizing before the PEBS enabling,
but that bad behavior was introduced by bef6ecca46ac ("KVM: x86/pmu: Set
MSR_IA32_MISC_ENABLE_EMON bit when vPMU is enabled").

Sorry for the noise.
Like Xu July 21, 2022, 2:42 a.m. UTC | #9
On 20/7/2022 2:53 am, Sean Christopherson wrote:
> On Tue, Jul 19, 2022, Paolo Bonzini wrote:
>> On 7/18/22 22:12, Sean Christopherson wrote:
>>> On Mon, Jul 18, 2022, Paolo Bonzini wrote:
>>>> This needs to be fixed in the kernel because old QEMU/new KVM is supported.
>>>
>>> I can't object to adding a quirk for this since KVM is breaking userspace, but on
>>> the KVM side we really need to stop "sanitizing" userspace inputs unless it puts
>>> the host at risk, because inevitably it leads to needing a quirk.
>>
>> The problem is not the sanitizing, it's that userspace literally cannot know
>> that this needs to be done because the feature bits are "backwards" (1 =
>> unavailable).
> 
> Yes, the bits being inverted contributed to KVM not providing a way for userspace
> to enumerate PEBS and BTS support, but lack of enumeration is a seperate issue.
> 
> If KVM had simply ignored invalid guest state from the get go, then userspace would
> never have gained a dependency on KVM sanitizing guest state.  The fact that KVM
> didn't enumerate support in any way is an orthogonal problem.  To play nice with
> older userspace, KVM will need to add a quirk to restore the sanizting code, but
> that doesn't solve the enumeration issue.  And vice versa, solving the enuemaration
> problem doesn't magically fix old userspace.
> 
>> The right way to fix it is probably to use feature MSRs and, by default,
>> leave the features marked as unavailable.  I'll think it through and post a
>> patch tomorrow for both KVM and QEMU (to enable PEBS).

Try to help:

KVM already have MSR_IA32_PERF_CAPABILITIES as a feature msr (to enable LBR/PEBS),
and KVM_CAP_PMU_CAPABILITY as vm ioctl extension for model specific crappiness.

> 
> Yeah, lack of CPUID bits is annoying.
> 
>
Duan, Zhenzhong Aug. 19, 2022, 1:38 a.m. UTC | #10
>-----Original Message-----
>From: Paolo Bonzini <paolo.bonzini@gmail.com> On Behalf Of Paolo Bonzini
>Sent: Wednesday, July 20, 2022 2:19 AM
>To: Christopherson,, Sean <seanjc@google.com>
>Cc: Duan, Zhenzhong <zhenzhong.duan@intel.com>; qemu-
>devel@nongnu.org; mtosatti@redhat.com; likexu@tencent.com; Ma,
>XiangfeiX <xiangfeix.ma@intel.com>
>Subject: Re: [PATCH] i386: Disable BTS and PEBS
>
>On 7/18/22 22:12, Sean Christopherson wrote:
>> On Mon, Jul 18, 2022, Paolo Bonzini wrote:
>>> This needs to be fixed in the kernel because old QEMU/new KVM is
>supported.
>>
>> I can't object to adding a quirk for this since KVM is breaking
>> userspace, but on the KVM side we really need to stop "sanitizing"
>> userspace inputs unless it puts the host at risk, because inevitably it leads
>to needing a quirk.
>
>The problem is not the sanitizing, it's that userspace literally cannot know
>that this needs to be done because the feature bits are "backwards"
>(1 = unavailable).
>
>The right way to fix it is probably to use feature MSRs and, by default, leave
>the features marked as unavailable.  I'll think it through and post a patch
>tomorrow for both KVM and QEMU (to enable PEBS).
Hi Paolo,

Can we ask the status of your patch? QA still reproduce with newest upstream code.

Thanks
Zhenzhong
diff mbox series

Patch

diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 82004b65b944..8a83d0995c66 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -434,8 +434,10 @@  typedef enum X86Seg {
 
 #define MSR_IA32_MISC_ENABLE            0x1a0
 /* Indicates good rep/movs microcode on some processors: */
-#define MSR_IA32_MISC_ENABLE_DEFAULT    1
-#define MSR_IA32_MISC_ENABLE_MWAIT      (1ULL << 18)
+#define MSR_IA32_MISC_ENABLE_DEFAULT      1
+#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL  (1ULL << 11)
+#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL (1ULL << 12)
+#define MSR_IA32_MISC_ENABLE_MWAIT        (1ULL << 18)
 
 #define MSR_MTRRphysBase(reg)           (0x200 + 2 * (reg))
 #define MSR_MTRRphysMask(reg)           (0x200 + 2 * (reg) + 1)
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index f148a6d52fa4..002e0520dd76 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -2180,6 +2180,10 @@  void kvm_arch_reset_vcpu(X86CPU *cpu)
 {
     CPUX86State *env = &cpu->env;
 
+    /* Disable BTS feature which is unsupported on KVM */
+    env->msr_ia32_misc_enable |= MSR_IA32_MISC_ENABLE_BTS_UNAVAIL;
+    env->msr_ia32_misc_enable |= MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL;
+
     env->xcr0 = 1;
     if (kvm_irqchip_in_kernel()) {
         env->mp_state = cpu_is_bsp(cpu) ? KVM_MP_STATE_RUNNABLE :