diff mbox

[v2,7/8] KVM: x86: add Intel PT msr RTIT_CTL read/write

Message ID 1509401117-15521-8-git-send-email-luwei.kang@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Luwei Kang Oct. 30, 2017, 10:05 p.m. UTC
From: Chao Peng <chao.p.peng@linux.intel.com>

L1 hypervisor can't get the capability of "TraceEn can be
set in VMX operation"(IA32_VMX_MISC[14]=0) and any attempt
to set IA32_RTIT_CTL.TraceEn in VMX operation using WRMSR
will cause a general-protection exception if IA32_VMX_MISC[14]
is 0. So we need to leave out write this msr in L1 VMX
operation.

Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
Signed-off-by: Luwei Kang <luwei.kang@intel.com>
---
 arch/x86/kvm/vmx.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

Comments

Paolo Bonzini Nov. 13, 2017, 4:22 p.m. UTC | #1
On 30/10/2017 23:05, Luwei Kang wrote:
> From: Chao Peng <chao.p.peng@linux.intel.com>
> 
> L1 hypervisor can't get the capability of "TraceEn can be
> set in VMX operation"(IA32_VMX_MISC[14]=0) and any attempt
> to set IA32_RTIT_CTL.TraceEn in VMX operation using WRMSR
> will cause a general-protection exception if IA32_VMX_MISC[14]
> is 0. So we need to leave out write this msr in L1 VMX
> operation.
> 
> Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
> Signed-off-by: Luwei Kang <luwei.kang@intel.com>
> ---
>  arch/x86/kvm/vmx.c | 11 +++++++++++
>  1 file changed, 11 insertions(+)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 091120e..8f61a8d 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -936,6 +936,7 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
>  static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked);
>  static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
>  					    u16 error_code);
> +static bool vmx_pt_supported(void);
>  
>  static DEFINE_PER_CPU(struct vmcs *, vmxarea);
>  static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
> @@ -3384,6 +3385,11 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  			return 1;
>  		msr_info->data = vcpu->arch.ia32_xss;
>  		break;
> +	case MSR_IA32_RTIT_CTL:
> +		if (!vmx_pt_supported())
> +			return 1;
> +		msr_info->data = vmcs_read64(GUEST_IA32_RTIT_CTL);
> +		break;
>  	case MSR_TSC_AUX:
>  		if (!msr_info->host_initiated &&
>  		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
> @@ -3508,6 +3514,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  		else
>  			clear_atomic_switch_msr(vmx, MSR_IA32_XSS);
>  		break;
> +	case MSR_IA32_RTIT_CTL:
> +		if (!vmx_pt_supported() || to_vmx(vcpu)->nested.vmxon)
> +			return 1;

VMXON must also clear TraceEn bit (see 23.4 in the SDM).

You also need to support R/W of all the other MSRs, in order to make
them accessible to userspace, and add them in msrs_to_save and
kvm_init_msr_list.

Regarding the save/restore of the state, I am now wondering if you could
also use XSAVES and XRSTORS instead of multiple rdmsr/wrmsr in a loop.
The cost is two MSR writes on vmenter (because the guest must run with
IA32_XSS=0) and two on vmexit.

Thanks,

Paolo

> +		vmcs_write64(GUEST_IA32_RTIT_CTL, data);
> +		break;
>  	case MSR_TSC_AUX:
>  		if (!msr_info->host_initiated &&
>  		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
>
Luwei Kang Nov. 14, 2017, 6:59 a.m. UTC | #2
> >  static DEFINE_PER_CPU(struct vmcs *, vmxarea);  static

> > DEFINE_PER_CPU(struct vmcs *, current_vmcs); @@ -3384,6 +3385,11 @@

> > static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)

> >  			return 1;

> >  		msr_info->data = vcpu->arch.ia32_xss;

> >  		break;

> > +	case MSR_IA32_RTIT_CTL:

> > +		if (!vmx_pt_supported())

> > +			return 1;

> > +		msr_info->data = vmcs_read64(GUEST_IA32_RTIT_CTL);

> > +		break;

> >  	case MSR_TSC_AUX:

> >  		if (!msr_info->host_initiated &&

> >  		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) @@ -3508,6 +3514,11

> > @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)

> >  		else

> >  			clear_atomic_switch_msr(vmx, MSR_IA32_XSS);

> >  		break;

> > +	case MSR_IA32_RTIT_CTL:

> > +		if (!vmx_pt_supported() || to_vmx(vcpu)->nested.vmxon)

> > +			return 1;

> 

> VMXON must also clear TraceEn bit (see 23.4 in the SDM).


Will clear TraceEn bit in handle_vmon() function.

> 

> You also need to support R/W of all the other MSRs, in order to make them accessible to userspace, and add them in msrs_to_save and kvm_init_msr_list.

> 


Will add it in next version. This is use for live migration, is that right?

> Regarding the save/restore of the state, I am now wondering if you could also use XSAVES and XRSTORS instead of multiple rdmsr/wrmsr in a loop.

> The cost is two MSR writes on vmenter (because the guest must run with IA32_XSS=0) and two on vmexit.

> 


If use XSAVES and XRSTORS for context switch.
1. Before  kvm_load_guest_fpu(vcpu), we need to save host RTIT_CTL, disable PT and restore the value of  "vmx->pt_desc.guest.ctl" to GUEST_IA32_RTIT_CTL. Is that right?
2. After VM-exit (step out from kvm_x86_ops->run(vcpu)),  we need to save the status of GUEST_IA32_RTIT_CTL . TRACEEN=0 and others MSRs are still in guest status. Where to enable PT if in host-guest mode. I think we should enable PT after vm-exit but it may cause #GP. " If XRSTORS would restore (or initialize) PT state and IA32_RTIT_CTL.TraceEn = 1, the instruction causes a general-protection exception. SDM 13.5.6". if enable after kvm_put_guest_fpu() I think it too late.)

Thanks,
Luwei Kang
> 

> > +		vmcs_write64(GUEST_IA32_RTIT_CTL, data);

> > +		break;

> >  	case MSR_TSC_AUX:

> >  		if (!msr_info->host_initiated &&

> >  		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))

> >
Paolo Bonzini Nov. 14, 2017, 10:34 a.m. UTC | #3
On 14/11/2017 07:59, Kang, Luwei wrote:
> Will add it in next version. This is use for live migration, is that right?

Yes.

> If use XSAVES and XRSTORS for context switch.
> 1. Before  kvm_load_guest_fpu(vcpu), we need to save host RTIT_CTL, disable PT and restore the value of  "vmx->pt_desc.guest.ctl" to GUEST_IA32_RTIT_CTL. Is that right?

The idea is to make the MSR get/set handlers operate on an XSAVES state
separate from the guest FPU state.  RTIT_CTL in the XSAVES state is
special cased to always be zero.

Then you could do

  if (!system mode) {
    if (guest RTIT_CTL.TraceEn != 0) {
      set PT in IA32_XSS
      XSAVES the host PT state
      // RTIT_CTL.TraceEn is now zero, and remains zero after XRSTORS
      XRSTORS the guest PT state
      clear PT in IA32_XSS
    } else {
      save host RTIT_CTL
    }
    // guest RTIT_CTL.TraceEn will be loaded by vmentry
  }

on vmentry, and

  if (!system mode) {
    // RTIT_CTL.TraceEn is zero here
    if (guest RTIT_CTL.TraceEn != 0) {
      set PT in IA32_XSS
      // no need to XSAVES guest state, all MSR writes cause a vmexit
      XRSTORS the host PT state
      clear PT in IA32_XSS
    } else if (host RTIT_CTL.TraceEn != 0) {
      restore host RTIT_CTL
    }
  }

on vmexit.  It should still be cheaper than many rdmsr/wrmsr operations.

Paolo

> 2. After VM-exit (step out from kvm_x86_ops->run(vcpu)), we need to
> save the status of GUEST_IA32_RTIT_CTL . TRACEEN=0 and others MSRs are
> still in guest status. Where to enable PT if in host-guest mode. I think
> we should enable PT after vm-exit but it may cause #GP. " If XRSTORS
> would restore (or initialize) PT state and IA32_RTIT_CTL.TraceEn = 1,
> the instruction causes a general-protection exception. SDM 13.5.6". if
> enable after kvm_put_guest_fpu() I think it too late.)
diff mbox

Patch

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 091120e..8f61a8d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -936,6 +936,7 @@  static void vmx_get_segment(struct kvm_vcpu *vcpu,
 static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked);
 static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
 					    u16 error_code);
+static bool vmx_pt_supported(void);
 
 static DEFINE_PER_CPU(struct vmcs *, vmxarea);
 static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -3384,6 +3385,11 @@  static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			return 1;
 		msr_info->data = vcpu->arch.ia32_xss;
 		break;
+	case MSR_IA32_RTIT_CTL:
+		if (!vmx_pt_supported())
+			return 1;
+		msr_info->data = vmcs_read64(GUEST_IA32_RTIT_CTL);
+		break;
 	case MSR_TSC_AUX:
 		if (!msr_info->host_initiated &&
 		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
@@ -3508,6 +3514,11 @@  static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		else
 			clear_atomic_switch_msr(vmx, MSR_IA32_XSS);
 		break;
+	case MSR_IA32_RTIT_CTL:
+		if (!vmx_pt_supported() || to_vmx(vcpu)->nested.vmxon)
+			return 1;
+		vmcs_write64(GUEST_IA32_RTIT_CTL, data);
+		break;
 	case MSR_TSC_AUX:
 		if (!msr_info->host_initiated &&
 		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))