diff mbox series

[RFC,2/2] KVM: VMX: Stop/resume host PT before/after VM entry when PT_MODE_HOST_GUEST

Message ID 20220825085625.867763-3-xiaoyao.li@intel.com (mailing list archive)
State New, archived
Headers show
Series KVM: VMX: Fix VM entry failure on PT_MODE_HOST_GUEST while host is using PT | expand

Commit Message

Xiaoyao Li Aug. 25, 2022, 8:56 a.m. UTC
Current implementation in pt_guest_enter() has two issues when pt mode
is PT_MODE_HOST_GUEST.

1. It relies on VM_ENTRY_LOAD_IA32_RTIT_CTL to disable host's Intel PT
   for the case that host's RTIT_CTL_TRACEEN is 1 while guest's is 0.

   However, it causes VM entry failure due to violating the requirement
   stated in SDM "VM-Execution Control Fields"

   If the logical processor is operating with Intel PT enabled (if
   IA32_RTIT_CTL.TraceEn = 1) at the time of VM entry, the "load
   IA32_RTIT_CTL" VM-entry control must be 0.

2. In the case both host and guest enable Intel PT, it disables host's
   Intel PT by manually clearing MSR_IA32_RTIT_CTL for the purpose to
   context switch host and guest's PT configurations.

   However, PT PMI can be delivered later and before VM entry. In the PT
   PMI handler, it will a) update the host PT MSRs which leads to what KVM
   stores in vmx->pt_desc.host becomes stale, and b) re-enable Intel PT
   which leads to VM entry failure as #1.

To fix the above two issues, call intel_pt_stop() exposed by Intel PT
driver to disable Intel PT of host unconditionally, it can ensure
MSR_IA32_RTIT_CTL.TraceEn is 0 and following PT PMI does nothing.

As paired, call intel_pt_resume() after VM exit.

Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
---
 arch/x86/kvm/vmx/vmx.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

Comments

Sean Christopherson Aug. 25, 2022, 3:34 p.m. UTC | #1
On Thu, Aug 25, 2022, Xiaoyao Li wrote:
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index d7f8331d6f7e..3e9ce8f600d2 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -38,6 +38,7 @@
>  #include <asm/fpu/api.h>
>  #include <asm/fpu/xstate.h>
>  #include <asm/idtentry.h>
> +#include <asm/intel_pt.h>
>  #include <asm/io.h>
>  #include <asm/irq_remapping.h>
>  #include <asm/kexec.h>
> @@ -1128,13 +1129,19 @@ static void pt_guest_enter(struct vcpu_vmx *vmx)
>  	if (vmx_pt_mode_is_system())
>  		return;
>  
> +	/*
> +	 * Stop Intel PT on host to avoid vm-entry failure since
> +	 * VM_ENTRY_LOAD_IA32_RTIT_CTL is set
> +	 */
> +	intel_pt_stop();
> +
>  	/*
>  	 * GUEST_IA32_RTIT_CTL is already set in the VMCS.
>  	 * Save host state before VM entry.
>  	 */
>  	rdmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl);

KVM's manual save/restore of MSR_IA32_RTIT_CTL should be dropped.  If PT/RTIT can
trace post-VMXON, then intel_pt_stop() will disable tracing and intel_pt_resume()
will restore the host's desired value.

>  	if (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) {
> -		wrmsrl(MSR_IA32_RTIT_CTL, 0);
> +		/* intel_pt_stop() ensures RTIT_CTL.TraceEn is zero */
>  		pt_save_msr(&vmx->pt_desc.host, vmx->pt_desc.num_address_ranges);

Isn't this at risk of the same corruption?  What prevents a PT NMI that arrives
after this point from changing other RTIT MSRs, thus causing KVM to restore the
wrong values?

>  		pt_load_msr(&vmx->pt_desc.guest, vmx->pt_desc.num_address_ranges);
>  	}
> @@ -1156,6 +1163,8 @@ static void pt_guest_exit(struct vcpu_vmx *vmx)
>  	 */
>  	if (vmx->pt_desc.host.ctl)
>  		wrmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl);
> +
> +	intel_pt_resume();
>  }
>  
>  void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
> -- 
> 2.27.0
>
Xiaoyao Li Aug. 25, 2022, 3:45 p.m. UTC | #2
On 8/25/2022 11:34 PM, Sean Christopherson wrote:
> On Thu, Aug 25, 2022, Xiaoyao Li wrote:
>> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
>> index d7f8331d6f7e..3e9ce8f600d2 100644
>> --- a/arch/x86/kvm/vmx/vmx.c
>> +++ b/arch/x86/kvm/vmx/vmx.c
>> @@ -38,6 +38,7 @@
>>   #include <asm/fpu/api.h>
>>   #include <asm/fpu/xstate.h>
>>   #include <asm/idtentry.h>
>> +#include <asm/intel_pt.h>
>>   #include <asm/io.h>
>>   #include <asm/irq_remapping.h>
>>   #include <asm/kexec.h>
>> @@ -1128,13 +1129,19 @@ static void pt_guest_enter(struct vcpu_vmx *vmx)
>>   	if (vmx_pt_mode_is_system())
>>   		return;
>>   
>> +	/*
>> +	 * Stop Intel PT on host to avoid vm-entry failure since
>> +	 * VM_ENTRY_LOAD_IA32_RTIT_CTL is set
>> +	 */
>> +	intel_pt_stop();
>> +
>>   	/*
>>   	 * GUEST_IA32_RTIT_CTL is already set in the VMCS.
>>   	 * Save host state before VM entry.
>>   	 */
>>   	rdmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl);
> 
> KVM's manual save/restore of MSR_IA32_RTIT_CTL should be dropped.  

No. It cannot. Please see below.

> If PT/RTIT can
> trace post-VMXON, then intel_pt_stop() will disable tracing and intel_pt_resume()
> will restore the host's desired value.

intel_pt_stop() and intel_pt_resume() touches host's RTIT_CTL only when 
host enables/uses Intel PT. Otherwise, they're just noop. In this case, 
we cannot assume host's RTIT_CTL is zero (only the RTIT_CTL.TraceEn is 
0). After VM-exit, RTIT_CTL is cleared, we need to restore it.

>>   	if (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) {
>> -		wrmsrl(MSR_IA32_RTIT_CTL, 0);
>> +		/* intel_pt_stop() ensures RTIT_CTL.TraceEn is zero */
>>   		pt_save_msr(&vmx->pt_desc.host, vmx->pt_desc.num_address_ranges);
> 
> Isn't this at risk of the same corruption?  What prevents a PT NMI that arrives
> after this point from changing other RTIT MSRs, thus causing KVM to restore the
> wrong values?

intel_pt_stop() -> pt_event_stop() will do

	WRITE_ONCE(pt->handle_nmi, 0);

which ensure PT NMI handler as noop that at the beginning of 
intel_pt_interrupt():

	if (!READ_ONCE(pt->handle_nmi))
		return;

>>   		pt_load_msr(&vmx->pt_desc.guest, vmx->pt_desc.num_address_ranges);
>>   	}
>> @@ -1156,6 +1163,8 @@ static void pt_guest_exit(struct vcpu_vmx *vmx)
>>   	 */
>>   	if (vmx->pt_desc.host.ctl)
>>   		wrmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl);
>> +
>> +	intel_pt_resume();
>>   }
>>   
>>   void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
>> -- 
>> 2.27.0
>>
Sean Christopherson Aug. 25, 2022, 3:59 p.m. UTC | #3
On Thu, Aug 25, 2022, Xiaoyao Li wrote:
> On 8/25/2022 11:34 PM, Sean Christopherson wrote:
> > On Thu, Aug 25, 2022, Xiaoyao Li wrote:
> > > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> > > index d7f8331d6f7e..3e9ce8f600d2 100644
> > > --- a/arch/x86/kvm/vmx/vmx.c
> > > +++ b/arch/x86/kvm/vmx/vmx.c
> > > @@ -38,6 +38,7 @@
> > >   #include <asm/fpu/api.h>
> > >   #include <asm/fpu/xstate.h>
> > >   #include <asm/idtentry.h>
> > > +#include <asm/intel_pt.h>
> > >   #include <asm/io.h>
> > >   #include <asm/irq_remapping.h>
> > >   #include <asm/kexec.h>
> > > @@ -1128,13 +1129,19 @@ static void pt_guest_enter(struct vcpu_vmx *vmx)
> > >   	if (vmx_pt_mode_is_system())
> > >   		return;
> > > +	/*
> > > +	 * Stop Intel PT on host to avoid vm-entry failure since
> > > +	 * VM_ENTRY_LOAD_IA32_RTIT_CTL is set
> > > +	 */
> > > +	intel_pt_stop();
> > > +
> > >   	/*
> > >   	 * GUEST_IA32_RTIT_CTL is already set in the VMCS.
> > >   	 * Save host state before VM entry.
> > >   	 */
> > >   	rdmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl);
> > 
> > KVM's manual save/restore of MSR_IA32_RTIT_CTL should be dropped.
> 
> No. It cannot. Please see below.
> 
> > If PT/RTIT can
> > trace post-VMXON, then intel_pt_stop() will disable tracing and intel_pt_resume()
> > will restore the host's desired value.
> 
> intel_pt_stop() and intel_pt_resume() touches host's RTIT_CTL only when host
> enables/uses Intel PT. Otherwise, they're just noop. In this case, we cannot
> assume host's RTIT_CTL is zero (only the RTIT_CTL.TraceEn is 0). After
> VM-exit, RTIT_CTL is cleared, we need to restore it.

But ensuring the RTIT_CTL.TraceEn=0 is all that's needed to make VM-Entry happy,
and if the host isn't using Intel PT, what do we care if other bits that, for all
intents and purposes are ignored, are lost across VM-Entry/VM-Exit?  I gotta
imaging the perf will fully initialize RTIT_CTL if it starts using PT.

Actually, if the host isn't actively using Intel PT, can KVM avoid saving the
other RTIT MSRs?

Even better, can we hand that off to perf?  I really dislike KVM making assumptions
about perf's internal behavior.  E.g. can this be made to look like

	intel_pt_guest_enter(vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN);

and

	intel_pt_guest_exit(vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN);

> > >   	if (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) {
> > > -		wrmsrl(MSR_IA32_RTIT_CTL, 0);
> > > +		/* intel_pt_stop() ensures RTIT_CTL.TraceEn is zero */
> > >   		pt_save_msr(&vmx->pt_desc.host, vmx->pt_desc.num_address_ranges);
> > 
> > Isn't this at risk of the same corruption?  What prevents a PT NMI that arrives
> > after this point from changing other RTIT MSRs, thus causing KVM to restore the
> > wrong values?
> 
> intel_pt_stop() -> pt_event_stop() will do
> 
> 	WRITE_ONCE(pt->handle_nmi, 0);
> 
> which ensure PT NMI handler as noop that at the beginning of
> intel_pt_interrupt():
> 
> 	if (!READ_ONCE(pt->handle_nmi))
> 		return;

Ah, right.  

> 
> > >   		pt_load_msr(&vmx->pt_desc.guest, vmx->pt_desc.num_address_ranges);
> > >   	}
> > > @@ -1156,6 +1163,8 @@ static void pt_guest_exit(struct vcpu_vmx *vmx)
> > >   	 */
> > >   	if (vmx->pt_desc.host.ctl)
> > >   		wrmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl);
> > > +
> > > +	intel_pt_resume();
> > >   }
> > >   void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
> > > -- 
> > > 2.27.0
> > > 
>
Xiaoyao Li Aug. 26, 2022, 6:32 a.m. UTC | #4
On 8/25/2022 11:59 PM, Sean Christopherson wrote:
> On Thu, Aug 25, 2022, Xiaoyao Li wrote:
>> On 8/25/2022 11:34 PM, Sean Christopherson wrote:
>>> On Thu, Aug 25, 2022, Xiaoyao Li wrote:
>>>> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
>>>> index d7f8331d6f7e..3e9ce8f600d2 100644
>>>> --- a/arch/x86/kvm/vmx/vmx.c
>>>> +++ b/arch/x86/kvm/vmx/vmx.c
>>>> @@ -38,6 +38,7 @@
>>>>    #include <asm/fpu/api.h>
>>>>    #include <asm/fpu/xstate.h>
>>>>    #include <asm/idtentry.h>
>>>> +#include <asm/intel_pt.h>
>>>>    #include <asm/io.h>
>>>>    #include <asm/irq_remapping.h>
>>>>    #include <asm/kexec.h>
>>>> @@ -1128,13 +1129,19 @@ static void pt_guest_enter(struct vcpu_vmx *vmx)
>>>>    	if (vmx_pt_mode_is_system())
>>>>    		return;
>>>> +	/*
>>>> +	 * Stop Intel PT on host to avoid vm-entry failure since
>>>> +	 * VM_ENTRY_LOAD_IA32_RTIT_CTL is set
>>>> +	 */
>>>> +	intel_pt_stop();
>>>> +
>>>>    	/*
>>>>    	 * GUEST_IA32_RTIT_CTL is already set in the VMCS.
>>>>    	 * Save host state before VM entry.
>>>>    	 */
>>>>    	rdmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl);
>>>
>>> KVM's manual save/restore of MSR_IA32_RTIT_CTL should be dropped.
>>
>> No. It cannot. Please see below.
>>
>>> If PT/RTIT can
>>> trace post-VMXON, then intel_pt_stop() will disable tracing and intel_pt_resume()
>>> will restore the host's desired value.
>>
>> intel_pt_stop() and intel_pt_resume() touches host's RTIT_CTL only when host
>> enables/uses Intel PT. Otherwise, they're just noop. In this case, we cannot
>> assume host's RTIT_CTL is zero (only the RTIT_CTL.TraceEn is 0). After
>> VM-exit, RTIT_CTL is cleared, we need to restore it.
> 
> But ensuring the RTIT_CTL.TraceEn=0 is all that's needed to make VM-Entry happy,
> and if the host isn't using Intel PT, what do we care if other bits that, for all
> intents and purposes are ignored, are lost across VM-Entry/VM-Exit?  I gotta
> imaging the perf will fully initialize RTIT_CTL if it starts using PT.

Personally, I agree with it.

But I'm not sure if there is a criteria that host context needs to be 
unchanged after being virtualized.

> Actually, if the host isn't actively using Intel PT, can KVM avoid saving the
> other RTIT MSRs?

I don't think it's a good idea that it requires PT driver never and 
won't rely on the previous value of PT MSRs. But it's OK if handing it 
over to perf as the idea you gave below.

> Even better, can we hand that off to perf?  I really dislike KVM making assumptions
> about perf's internal behavior.  E.g. can this be made to look like

you mean let perf subsystem to do the context save/restore staff of host 
and KVM focuses on save/restore of guest context, right?

I would like to see comment from perf folks on this and maybe need their 
help on how to implement.

> 	intel_pt_guest_enter(vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN);
> 
> and
> 
> 	intel_pt_guest_exit(vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN);
>
Sean Christopherson Aug. 26, 2022, 3:08 p.m. UTC | #5
On Fri, Aug 26, 2022, Xiaoyao Li wrote:
> On 8/25/2022 11:59 PM, Sean Christopherson wrote:
> > But ensuring the RTIT_CTL.TraceEn=0 is all that's needed to make VM-Entry happy,
> > and if the host isn't using Intel PT, what do we care if other bits that, for all
> > intents and purposes are ignored, are lost across VM-Entry/VM-Exit?  I gotta
> > imaging the perf will fully initialize RTIT_CTL if it starts using PT.
> 
> Personally, I agree with it.
> 
> But I'm not sure if there is a criteria that host context needs to be
> unchanged after being virtualized.
> 
> > Actually, if the host isn't actively using Intel PT, can KVM avoid saving the
> > other RTIT MSRs?
> 
> I don't think it's a good idea that it requires PT driver never and won't
> rely on the previous value of PT MSRs. But it's OK if handing it over to
> perf as the idea you gave below.

Yep, my thought exactly.

> > Even better, can we hand that off to perf?  I really dislike KVM making assumptions
> > about perf's internal behavior.  E.g. can this be made to look like
> 
> you mean let perf subsystem to do the context save/restore staff of host and
> KVM focuses on save/restore of guest context, right?

Yep!  KVM already more or less does this for "regular" PMU MSRs, though in that
case perf hands back a list of MSRs+data.  But for Intel PT I don't see any point
in having KVM do the actual MSR accesses.  Tracing has to be turned off _before_
VM-Enter, so using the MSR load/save lists doesn't buy us anything. 
 
> I would like to see comment from perf folks on this and maybe need their
> help on how to implement.
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index d7f8331d6f7e..3e9ce8f600d2 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -38,6 +38,7 @@ 
 #include <asm/fpu/api.h>
 #include <asm/fpu/xstate.h>
 #include <asm/idtentry.h>
+#include <asm/intel_pt.h>
 #include <asm/io.h>
 #include <asm/irq_remapping.h>
 #include <asm/kexec.h>
@@ -1128,13 +1129,19 @@  static void pt_guest_enter(struct vcpu_vmx *vmx)
 	if (vmx_pt_mode_is_system())
 		return;
 
+	/*
+	 * Stop Intel PT on host to avoid vm-entry failure since
+	 * VM_ENTRY_LOAD_IA32_RTIT_CTL is set
+	 */
+	intel_pt_stop();
+
 	/*
 	 * GUEST_IA32_RTIT_CTL is already set in the VMCS.
 	 * Save host state before VM entry.
 	 */
 	rdmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl);
 	if (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) {
-		wrmsrl(MSR_IA32_RTIT_CTL, 0);
+		/* intel_pt_stop() ensures RTIT_CTL.TraceEn is zero */
 		pt_save_msr(&vmx->pt_desc.host, vmx->pt_desc.num_address_ranges);
 		pt_load_msr(&vmx->pt_desc.guest, vmx->pt_desc.num_address_ranges);
 	}
@@ -1156,6 +1163,8 @@  static void pt_guest_exit(struct vcpu_vmx *vmx)
 	 */
 	if (vmx->pt_desc.host.ctl)
 		wrmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl);
+
+	intel_pt_resume();
 }
 
 void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,