diff mbox

[3/3] KVM: x86: correct mwait and monitor emulation

Message ID 1403101176-23664-4-git-send-email-namit@cs.technion.ac.il (mailing list archive)
State New, archived
Headers show

Commit Message

Nadav Amit June 18, 2014, 2:19 p.m. UTC
mwait and monitor are currently handled as nop. Considering this behavior, they
should still be handled correctly, i.e., check execution conditions and generate
exceptions when required. mwait and monitor may also be executed in real-mode
and are not handled in that case.  This patch performs the emulation of
monitor-mwait according to Intel SDM (other than checking whether interrupt can
be used as a break event).

Signed-off-by: Nadav Amit <namit@cs.technion.ac.il>
---
 arch/x86/kvm/emulate.c | 41 +++++++++++++++++++++++++++++++++++++++--
 arch/x86/kvm/svm.c     | 22 ++--------------------
 arch/x86/kvm/vmx.c     | 27 +++++++++++----------------
 3 files changed, 52 insertions(+), 38 deletions(-)

Comments

Paolo Bonzini June 18, 2014, 4:32 p.m. UTC | #1
Il 18/06/2014 16:19, Nadav Amit ha scritto:
> mwait and monitor are currently handled as nop. Considering this behavior, they
> should still be handled correctly, i.e., check execution conditions and generate
> exceptions when required. mwait and monitor may also be executed in real-mode
> and are not handled in that case.  This patch performs the emulation of
> monitor-mwait according to Intel SDM (other than checking whether interrupt can
> be used as a break event).
>
> Signed-off-by: Nadav Amit <namit@cs.technion.ac.il>
> ---
>  arch/x86/kvm/emulate.c | 41 +++++++++++++++++++++++++++++++++++++++--
>  arch/x86/kvm/svm.c     | 22 ++--------------------
>  arch/x86/kvm/vmx.c     | 27 +++++++++++----------------
>  3 files changed, 52 insertions(+), 38 deletions(-)
>
> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
> index ef7a5a0..424b58d 100644
> --- a/arch/x86/kvm/emulate.c
> +++ b/arch/x86/kvm/emulate.c
> @@ -3344,6 +3344,43 @@ static int em_bswap(struct x86_emulate_ctxt *ctxt)
>  	return X86EMUL_CONTINUE;
>  }
>
> +static int em_monitor(struct x86_emulate_ctxt *ctxt)
> +{
> +	int rc;
> +	struct segmented_address addr;
> +	u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
> +	u64 rax = reg_read(ctxt, VCPU_REGS_RAX);
> +	u8 byte;
> +
> +	if (ctxt->mode != X86EMUL_MODE_PROT64)
> +		rcx = (u32)rcx;
> +	if (rcx != 0)
> +		return emulate_gp(ctxt, 0);
> +
> +	addr.seg = seg_override(ctxt);
> +	addr.ea = ctxt->ad_bytes == 8 ? rax : (u32)rax;
> +	rc = segmented_read(ctxt, addr, &byte, 1);
> +	if (rc != X86EMUL_CONTINUE)
> +		return rc;
> +
> +	printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
> +	return X86EMUL_CONTINUE;
> +}
> +
> +static int em_mwait(struct x86_emulate_ctxt *ctxt)
> +{
> +	u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
> +
> +	if (ctxt->mode != X86EMUL_MODE_PROT64)
> +		rcx = (u32)rcx;
> +	if ((rcx & ~1UL) != 0)
> +		return emulate_gp(ctxt, 0);
> +
> +	/* Accepting interrupt as break event regardless to cpuid */
> +	printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
> +	return X86EMUL_CONTINUE;
> +}
> +
>  static bool valid_cr(int nr)
>  {
>  	switch (nr) {
> @@ -3557,8 +3594,8 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
>  		F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
>
>  static const struct opcode group7_rm1[] = {
> -	DI(SrcNone | Priv, monitor),
> -	DI(SrcNone | Priv, mwait),
> +	II(SrcNone | Priv | NoBigReal | UDOnPriv, em_monitor, monitor),
> +	II(SrcNone | Priv | NoBigReal | UDOnPriv, em_mwait, mwait),
>  	N, N, N, N, N, N,
>  };
>
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index ec8366c..a524e04 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -3274,24 +3274,6 @@ static int pause_interception(struct vcpu_svm *svm)
>  	return 1;
>  }
>
> -static int nop_interception(struct vcpu_svm *svm)
> -{
> -	skip_emulated_instruction(&(svm->vcpu));
> -	return 1;
> -}
> -
> -static int monitor_interception(struct vcpu_svm *svm)
> -{
> -	printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
> -	return nop_interception(svm);
> -}
> -
> -static int mwait_interception(struct vcpu_svm *svm)
> -{
> -	printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
> -	return nop_interception(svm);
> -}
> -
>  static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
>  	[SVM_EXIT_READ_CR0]			= cr_interception,
>  	[SVM_EXIT_READ_CR3]			= cr_interception,
> @@ -3349,8 +3331,8 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
>  	[SVM_EXIT_CLGI]				= clgi_interception,
>  	[SVM_EXIT_SKINIT]			= skinit_interception,
>  	[SVM_EXIT_WBINVD]                       = emulate_on_interception,
> -	[SVM_EXIT_MONITOR]			= monitor_interception,
> -	[SVM_EXIT_MWAIT]			= mwait_interception,
> +	[SVM_EXIT_MONITOR]			= emulate_on_interception,
> +	[SVM_EXIT_MWAIT]			= emulate_on_interception,
>  	[SVM_EXIT_XSETBV]			= xsetbv_interception,
>  	[SVM_EXIT_NPF]				= pf_interception,
>  };
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 801332e..7023e71 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -5672,22 +5672,17 @@ static int handle_pause(struct kvm_vcpu *vcpu)
>  	return 1;
>  }
>
> -static int handle_nop(struct kvm_vcpu *vcpu)
> +static int handle_emulate(struct kvm_vcpu *vcpu)
>  {
> -	skip_emulated_instruction(vcpu);
> -	return 1;
> -}
> +	int err = emulate_instruction(vcpu, 0);
>
> -static int handle_mwait(struct kvm_vcpu *vcpu)
> -{
> -	printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
> -	return handle_nop(vcpu);
> -}
> -
> -static int handle_monitor(struct kvm_vcpu *vcpu)
> -{
> -	printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
> -	return handle_nop(vcpu);
> +	if (err != EMULATE_DONE) {
> +		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
> +		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
> +		vcpu->run->internal.ndata = 0;
> +		return 0;
> +	}
> +	return 1;
>  }
>
>  /*
> @@ -6651,8 +6646,8 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
>  	[EXIT_REASON_EPT_VIOLATION]	      = handle_ept_violation,
>  	[EXIT_REASON_EPT_MISCONFIG]           = handle_ept_misconfig,
>  	[EXIT_REASON_PAUSE_INSTRUCTION]       = handle_pause,
> -	[EXIT_REASON_MWAIT_INSTRUCTION]	      = handle_mwait,
> -	[EXIT_REASON_MONITOR_INSTRUCTION]     = handle_monitor,
> +	[EXIT_REASON_MWAIT_INSTRUCTION]	      = handle_emulate,
> +	[EXIT_REASON_MONITOR_INSTRUCTION]     = handle_emulate,
>  	[EXIT_REASON_INVEPT]                  = handle_invept,
>  };
>
>

In VMX, handle_invd can be reused as handle_emulate.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bandan Das June 18, 2014, 4:43 p.m. UTC | #2
Nadav Amit <namit@cs.technion.ac.il> writes:

> mwait and monitor are currently handled as nop. Considering this behavior, they
> should still be handled correctly, i.e., check execution conditions and generate
> exceptions when required. mwait and monitor may also be executed in real-mode

Is this necessary ? They are NOPs and kvm prints that out (correctly so) to dmesg.
Implementing them correctly is a different thing, but adding extra checks for NOPs 
just seems like adding extra cycles. 


> and are not handled in that case.  This patch performs the emulation of
> monitor-mwait according to Intel SDM (other than checking whether interrupt can
> be used as a break event).
>
> Signed-off-by: Nadav Amit <namit@cs.technion.ac.il>
> ---
>  arch/x86/kvm/emulate.c | 41 +++++++++++++++++++++++++++++++++++++++--
>  arch/x86/kvm/svm.c     | 22 ++--------------------
>  arch/x86/kvm/vmx.c     | 27 +++++++++++----------------
>  3 files changed, 52 insertions(+), 38 deletions(-)
>
> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
> index ef7a5a0..424b58d 100644
> --- a/arch/x86/kvm/emulate.c
> +++ b/arch/x86/kvm/emulate.c
> @@ -3344,6 +3344,43 @@ static int em_bswap(struct x86_emulate_ctxt *ctxt)
>  	return X86EMUL_CONTINUE;
>  }
>  
> +static int em_monitor(struct x86_emulate_ctxt *ctxt)
> +{
> +	int rc;
> +	struct segmented_address addr;
> +	u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
> +	u64 rax = reg_read(ctxt, VCPU_REGS_RAX);
> +	u8 byte;
> +
> +	if (ctxt->mode != X86EMUL_MODE_PROT64)
> +		rcx = (u32)rcx;
> +	if (rcx != 0)
> +		return emulate_gp(ctxt, 0);
> +
> +	addr.seg = seg_override(ctxt);
> +	addr.ea = ctxt->ad_bytes == 8 ? rax : (u32)rax;
> +	rc = segmented_read(ctxt, addr, &byte, 1);
> +	if (rc != X86EMUL_CONTINUE)
> +		return rc;
> +
> +	printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
> +	return X86EMUL_CONTINUE;
> +}
> +
> +static int em_mwait(struct x86_emulate_ctxt *ctxt)
> +{
> +	u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
> +
> +	if (ctxt->mode != X86EMUL_MODE_PROT64)
> +		rcx = (u32)rcx;
> +	if ((rcx & ~1UL) != 0)
> +		return emulate_gp(ctxt, 0);
> +
> +	/* Accepting interrupt as break event regardless to cpuid */
> +	printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
> +	return X86EMUL_CONTINUE;
> +}
> +
>  static bool valid_cr(int nr)
>  {
>  	switch (nr) {
> @@ -3557,8 +3594,8 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
>  		F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
>  
>  static const struct opcode group7_rm1[] = {
> -	DI(SrcNone | Priv, monitor),
> -	DI(SrcNone | Priv, mwait),
> +	II(SrcNone | Priv | NoBigReal | UDOnPriv, em_monitor, monitor),
> +	II(SrcNone | Priv | NoBigReal | UDOnPriv, em_mwait, mwait),
>  	N, N, N, N, N, N,
>  };
>  
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index ec8366c..a524e04 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -3274,24 +3274,6 @@ static int pause_interception(struct vcpu_svm *svm)
>  	return 1;
>  }
>  
> -static int nop_interception(struct vcpu_svm *svm)
> -{
> -	skip_emulated_instruction(&(svm->vcpu));
> -	return 1;
> -}
> -
> -static int monitor_interception(struct vcpu_svm *svm)
> -{
> -	printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
> -	return nop_interception(svm);
> -}
> -
> -static int mwait_interception(struct vcpu_svm *svm)
> -{
> -	printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
> -	return nop_interception(svm);
> -}
> -
>  static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
>  	[SVM_EXIT_READ_CR0]			= cr_interception,
>  	[SVM_EXIT_READ_CR3]			= cr_interception,
> @@ -3349,8 +3331,8 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
>  	[SVM_EXIT_CLGI]				= clgi_interception,
>  	[SVM_EXIT_SKINIT]			= skinit_interception,
>  	[SVM_EXIT_WBINVD]                       = emulate_on_interception,
> -	[SVM_EXIT_MONITOR]			= monitor_interception,
> -	[SVM_EXIT_MWAIT]			= mwait_interception,
> +	[SVM_EXIT_MONITOR]			= emulate_on_interception,
> +	[SVM_EXIT_MWAIT]			= emulate_on_interception,
>  	[SVM_EXIT_XSETBV]			= xsetbv_interception,
>  	[SVM_EXIT_NPF]				= pf_interception,
>  };
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 801332e..7023e71 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -5672,22 +5672,17 @@ static int handle_pause(struct kvm_vcpu *vcpu)
>  	return 1;
>  }
>  
> -static int handle_nop(struct kvm_vcpu *vcpu)
> +static int handle_emulate(struct kvm_vcpu *vcpu)
>  {
> -	skip_emulated_instruction(vcpu);
> -	return 1;
> -}
> +	int err = emulate_instruction(vcpu, 0);
>  
> -static int handle_mwait(struct kvm_vcpu *vcpu)
> -{
> -	printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
> -	return handle_nop(vcpu);
> -}
> -
> -static int handle_monitor(struct kvm_vcpu *vcpu)
> -{
> -	printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
> -	return handle_nop(vcpu);
> +	if (err != EMULATE_DONE) {
> +		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
> +		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
> +		vcpu->run->internal.ndata = 0;
> +		return 0;
> +	}
> +	return 1;
>  }
>  
>  /*
> @@ -6651,8 +6646,8 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
>  	[EXIT_REASON_EPT_VIOLATION]	      = handle_ept_violation,
>  	[EXIT_REASON_EPT_MISCONFIG]           = handle_ept_misconfig,
>  	[EXIT_REASON_PAUSE_INSTRUCTION]       = handle_pause,
> -	[EXIT_REASON_MWAIT_INSTRUCTION]	      = handle_mwait,
> -	[EXIT_REASON_MONITOR_INSTRUCTION]     = handle_monitor,
> +	[EXIT_REASON_MWAIT_INSTRUCTION]	      = handle_emulate,
> +	[EXIT_REASON_MONITOR_INSTRUCTION]     = handle_emulate,
>  	[EXIT_REASON_INVEPT]                  = handle_invept,
>  };
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini June 18, 2014, 4:44 p.m. UTC | #3
Il 18/06/2014 18:43, Bandan Das ha scritto:
>> > mwait and monitor are currently handled as nop. Considering this behavior, they
>> > should still be handled correctly, i.e., check execution conditions and generate
>> > exceptions when required. mwait and monitor may also be executed in real-mode
> Is this necessary ? They are NOPs and kvm prints that out (correctly so) to dmesg.
> Implementing them correctly is a different thing, but adding extra checks for NOPs
> just seems like adding extra cycles.

Raising the correct exception is a good thing, though.  The guest is 
going to busy wait anyway, it doesn't matter how fast it does that. :)

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bandan Das June 18, 2014, 5:33 p.m. UTC | #4
Paolo Bonzini <pbonzini@redhat.com> writes:

> Il 18/06/2014 18:43, Bandan Das ha scritto:
>>> > mwait and monitor are currently handled as nop. Considering this behavior, they
>>> > should still be handled correctly, i.e., check execution conditions and generate
>>> > exceptions when required. mwait and monitor may also be executed in real-mode
>> Is this necessary ? They are NOPs and kvm prints that out (correctly so) to dmesg.
>> Implementing them correctly is a different thing, but adding extra checks for NOPs
>> just seems like adding extra cycles.
>
> Raising the correct exception is a good thing, though.  The guest is
> going to busy wait anyway, it doesn't matter how fast it does that. :)

Thanks, makes sense. I was also thinking why to advertise it at all, but it's 
probably more common than I think it is.

> Paolo
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Northup June 18, 2014, 5:59 p.m. UTC | #5
On Wed, Jun 18, 2014 at 7:19 AM, Nadav Amit <namit@cs.technion.ac.il> wrote:
> mwait and monitor are currently handled as nop. Considering this behavior, they
> should still be handled correctly, i.e., check execution conditions and generate
> exceptions when required. mwait and monitor may also be executed in real-mode
> and are not handled in that case.  This patch performs the emulation of
> monitor-mwait according to Intel SDM (other than checking whether interrupt can
> be used as a break event).
>
> Signed-off-by: Nadav Amit <namit@cs.technion.ac.il>
> ---
>  arch/x86/kvm/emulate.c | 41 +++++++++++++++++++++++++++++++++++++++--
>  arch/x86/kvm/svm.c     | 22 ++--------------------
>  arch/x86/kvm/vmx.c     | 27 +++++++++++----------------
>  3 files changed, 52 insertions(+), 38 deletions(-)
>
> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
> index ef7a5a0..424b58d 100644
> --- a/arch/x86/kvm/emulate.c
> +++ b/arch/x86/kvm/emulate.c
> @@ -3344,6 +3344,43 @@ static int em_bswap(struct x86_emulate_ctxt *ctxt)
>         return X86EMUL_CONTINUE;
>  }
>
> +static int em_monitor(struct x86_emulate_ctxt *ctxt)
> +{
> +       int rc;
> +       struct segmented_address addr;
> +       u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
> +       u64 rax = reg_read(ctxt, VCPU_REGS_RAX);
> +       u8 byte;

I'd request:

u32 ebx, ecx, edx, eax = 1;
ctxt->opt->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
if (!(ecx & FFL(MWAIT)))
        return emulate_ud(ctxt);

and also in em_mwait.

> +
> +       if (ctxt->mode != X86EMUL_MODE_PROT64)
> +               rcx = (u32)rcx;
> +       if (rcx != 0)
> +               return emulate_gp(ctxt, 0);
> +
> +       addr.seg = seg_override(ctxt);
> +       addr.ea = ctxt->ad_bytes == 8 ? rax : (u32)rax;
> +       rc = segmented_read(ctxt, addr, &byte, 1);
> +       if (rc != X86EMUL_CONTINUE)
> +               return rc;
> +
> +       printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
> +       return X86EMUL_CONTINUE;
> +}
> +
> +static int em_mwait(struct x86_emulate_ctxt *ctxt)
> +{
> +       u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
> +
> +       if (ctxt->mode != X86EMUL_MODE_PROT64)
> +               rcx = (u32)rcx;
> +       if ((rcx & ~1UL) != 0)
> +               return emulate_gp(ctxt, 0);
> +
> +       /* Accepting interrupt as break event regardless to cpuid */
> +       printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
> +       return X86EMUL_CONTINUE;
> +}
> +
>  static bool valid_cr(int nr)
>  {
>         switch (nr) {
> @@ -3557,8 +3594,8 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
>                 F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
>
>  static const struct opcode group7_rm1[] = {
> -       DI(SrcNone | Priv, monitor),
> -       DI(SrcNone | Priv, mwait),
> +       II(SrcNone | Priv | NoBigReal | UDOnPriv, em_monitor, monitor),
> +       II(SrcNone | Priv | NoBigReal | UDOnPriv, em_mwait, mwait),
>         N, N, N, N, N, N,
>  };
>
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index ec8366c..a524e04 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -3274,24 +3274,6 @@ static int pause_interception(struct vcpu_svm *svm)
>         return 1;
>  }
>
> -static int nop_interception(struct vcpu_svm *svm)
> -{
> -       skip_emulated_instruction(&(svm->vcpu));
> -       return 1;
> -}
> -
> -static int monitor_interception(struct vcpu_svm *svm)
> -{
> -       printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
> -       return nop_interception(svm);
> -}
> -
> -static int mwait_interception(struct vcpu_svm *svm)
> -{
> -       printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
> -       return nop_interception(svm);
> -}
> -
>  static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
>         [SVM_EXIT_READ_CR0]                     = cr_interception,
>         [SVM_EXIT_READ_CR3]                     = cr_interception,
> @@ -3349,8 +3331,8 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
>         [SVM_EXIT_CLGI]                         = clgi_interception,
>         [SVM_EXIT_SKINIT]                       = skinit_interception,
>         [SVM_EXIT_WBINVD]                       = emulate_on_interception,
> -       [SVM_EXIT_MONITOR]                      = monitor_interception,
> -       [SVM_EXIT_MWAIT]                        = mwait_interception,
> +       [SVM_EXIT_MONITOR]                      = emulate_on_interception,
> +       [SVM_EXIT_MWAIT]                        = emulate_on_interception,
>         [SVM_EXIT_XSETBV]                       = xsetbv_interception,
>         [SVM_EXIT_NPF]                          = pf_interception,
>  };
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 801332e..7023e71 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -5672,22 +5672,17 @@ static int handle_pause(struct kvm_vcpu *vcpu)
>         return 1;
>  }
>
> -static int handle_nop(struct kvm_vcpu *vcpu)
> +static int handle_emulate(struct kvm_vcpu *vcpu)
>  {
> -       skip_emulated_instruction(vcpu);
> -       return 1;
> -}
> +       int err = emulate_instruction(vcpu, 0);
>
> -static int handle_mwait(struct kvm_vcpu *vcpu)
> -{
> -       printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
> -       return handle_nop(vcpu);
> -}
> -
> -static int handle_monitor(struct kvm_vcpu *vcpu)
> -{
> -       printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
> -       return handle_nop(vcpu);
> +       if (err != EMULATE_DONE) {
> +               vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
> +               vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
> +               vcpu->run->internal.ndata = 0;
> +               return 0;
> +       }
> +       return 1;
>  }
>
>  /*
> @@ -6651,8 +6646,8 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
>         [EXIT_REASON_EPT_VIOLATION]           = handle_ept_violation,
>         [EXIT_REASON_EPT_MISCONFIG]           = handle_ept_misconfig,
>         [EXIT_REASON_PAUSE_INSTRUCTION]       = handle_pause,
> -       [EXIT_REASON_MWAIT_INSTRUCTION]       = handle_mwait,
> -       [EXIT_REASON_MONITOR_INSTRUCTION]     = handle_monitor,
> +       [EXIT_REASON_MWAIT_INSTRUCTION]       = handle_emulate,
> +       [EXIT_REASON_MONITOR_INSTRUCTION]     = handle_emulate,
>         [EXIT_REASON_INVEPT]                  = handle_invept,
>  };
>
> --
> 1.9.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Nadav Amit June 18, 2014, 6:23 p.m. UTC | #6
On 6/18/14, 8:59 PM, Eric Northup wrote:
> On Wed, Jun 18, 2014 at 7:19 AM, Nadav Amit <namit@cs.technion.ac.il> wrote:
>> mwait and monitor are currently handled as nop. Considering this behavior, they
>> should still be handled correctly, i.e., check execution conditions and generate
>> exceptions when required. mwait and monitor may also be executed in real-mode
>> and are not handled in that case.  This patch performs the emulation of
>> monitor-mwait according to Intel SDM (other than checking whether interrupt can
>> be used as a break event).
>>
>> Signed-off-by: Nadav Amit <namit@cs.technion.ac.il>
>> ---
>>   arch/x86/kvm/emulate.c | 41 +++++++++++++++++++++++++++++++++++++++--
>>   arch/x86/kvm/svm.c     | 22 ++--------------------
>>   arch/x86/kvm/vmx.c     | 27 +++++++++++----------------
>>   3 files changed, 52 insertions(+), 38 deletions(-)
>>
>> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
>> index ef7a5a0..424b58d 100644
>> --- a/arch/x86/kvm/emulate.c
>> +++ b/arch/x86/kvm/emulate.c
>> @@ -3344,6 +3344,43 @@ static int em_bswap(struct x86_emulate_ctxt *ctxt)
>>          return X86EMUL_CONTINUE;
>>   }
>>
>> +static int em_monitor(struct x86_emulate_ctxt *ctxt)
>> +{
>> +       int rc;
>> +       struct segmented_address addr;
>> +       u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
>> +       u64 rax = reg_read(ctxt, VCPU_REGS_RAX);
>> +       u8 byte;
>
> I'd request:
>
> u32 ebx, ecx, edx, eax = 1;
> ctxt->opt->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
> if (!(ecx & FFL(MWAIT)))
>          return emulate_ud(ctxt);
>
> and also in em_mwait.
>

I had similar implementation on previous version, which also checked on 
mwait whether "interrupt as break event" matches ECX value. However, I 
was under the impression that it was decided that MWAIT will always be 
emulated as NOP to avoid misbehaving VMs that ignore CPUID (see the 
discussion at http://www.spinics.net/lists/kvm/msg102766.html ).

Nadav
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Northup June 18, 2014, 6:30 p.m. UTC | #7
Quoting Gabriel's post http://www.spinics.net/lists/kvm/msg103792.html :

[...]

> E.g., OS X 10.5 *does* check CPUID, and panics if it doesn't find it.
> It needs the MONITOR cpuid flag to be on, *and* the actual
> instructions to work.




On Wed, Jun 18, 2014 at 11:23 AM, Nadav Amit <nadav.amit@gmail.com> wrote:
> On 6/18/14, 8:59 PM, Eric Northup wrote:
>>
>> On Wed, Jun 18, 2014 at 7:19 AM, Nadav Amit <namit@cs.technion.ac.il>
>> wrote:
>>>
>>> mwait and monitor are currently handled as nop. Considering this
>>> behavior, they
>>> should still be handled correctly, i.e., check execution conditions and
>>> generate
>>> exceptions when required. mwait and monitor may also be executed in
>>> real-mode
>>> and are not handled in that case.  This patch performs the emulation of
>>> monitor-mwait according to Intel SDM (other than checking whether
>>> interrupt can
>>> be used as a break event).
>>>
>>> Signed-off-by: Nadav Amit <namit@cs.technion.ac.il>
>>> ---
>>>   arch/x86/kvm/emulate.c | 41 +++++++++++++++++++++++++++++++++++++++--
>>>   arch/x86/kvm/svm.c     | 22 ++--------------------
>>>   arch/x86/kvm/vmx.c     | 27 +++++++++++----------------
>>>   3 files changed, 52 insertions(+), 38 deletions(-)
>>>
>>> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
>>> index ef7a5a0..424b58d 100644
>>> --- a/arch/x86/kvm/emulate.c
>>> +++ b/arch/x86/kvm/emulate.c
>>> @@ -3344,6 +3344,43 @@ static int em_bswap(struct x86_emulate_ctxt *ctxt)
>>>          return X86EMUL_CONTINUE;
>>>   }
>>>
>>> +static int em_monitor(struct x86_emulate_ctxt *ctxt)
>>> +{
>>> +       int rc;
>>> +       struct segmented_address addr;
>>> +       u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
>>> +       u64 rax = reg_read(ctxt, VCPU_REGS_RAX);
>>> +       u8 byte;
>>
>>
>> I'd request:
>>
>> u32 ebx, ecx, edx, eax = 1;
>> ctxt->opt->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
>> if (!(ecx & FFL(MWAIT)))
>>          return emulate_ud(ctxt);
>>
>> and also in em_mwait.
>>
>
> I had similar implementation on previous version, which also checked on
> mwait whether "interrupt as break event" matches ECX value. However, I was
> under the impression that it was decided that MWAIT will always be emulated
> as NOP to avoid misbehaving VMs that ignore CPUID (see the discussion at
> http://www.spinics.net/lists/kvm/msg102766.html ).
>
> Nadav
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gabriel L. Somlo June 18, 2014, 6:59 p.m. UTC | #8
On Wed, Jun 18, 2014 at 11:30:07AM -0700, Eric Northup wrote:
> Quoting Gabriel's post http://www.spinics.net/lists/kvm/msg103792.html :
> 
> [...]
> 
> > E.g., OS X 10.5 *does* check CPUID, and panics if it doesn't find it.
> > It needs the MONITOR cpuid flag to be on, *and* the actual
> > instructions to work.

That was an argument in favor of finding a mechanism to allow (qemu)
users to enable an otherwise default-off monitor cpuid flag.

We definitely don't want to advertise monitor/mwait availability to
guests which would otherwise sanely fail back to a hlt-based idle loop
when cpuid tells them monitor/mwait are not available :)

However, check my earlier proposal of backing out of monitor/mwait
entirely (as it turns out, there's a kernel command line to tell OS X
not to use monitor/mwait, which is IMHO vastly preferable to creating
"undocumented" KVM hacks :)

Thanks much,
--Gabriel
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini June 19, 2014, 11:34 a.m. UTC | #9
Il 18/06/2014 19:59, Eric Northup ha scritto:
> On Wed, Jun 18, 2014 at 7:19 AM, Nadav Amit <namit@cs.technion.ac.il> wrote:
>> mwait and monitor are currently handled as nop. Considering this behavior, they
>> should still be handled correctly, i.e., check execution conditions and generate
>> exceptions when required. mwait and monitor may also be executed in real-mode
>> and are not handled in that case.  This patch performs the emulation of
>> monitor-mwait according to Intel SDM (other than checking whether interrupt can
>> be used as a break event).
>>
>> Signed-off-by: Nadav Amit <namit@cs.technion.ac.il>
>> ---
>>  arch/x86/kvm/emulate.c | 41 +++++++++++++++++++++++++++++++++++++++--
>>  arch/x86/kvm/svm.c     | 22 ++--------------------
>>  arch/x86/kvm/vmx.c     | 27 +++++++++++----------------
>>  3 files changed, 52 insertions(+), 38 deletions(-)
>>
>> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
>> index ef7a5a0..424b58d 100644
>> --- a/arch/x86/kvm/emulate.c
>> +++ b/arch/x86/kvm/emulate.c
>> @@ -3344,6 +3344,43 @@ static int em_bswap(struct x86_emulate_ctxt *ctxt)
>>         return X86EMUL_CONTINUE;
>>  }
>>
>> +static int em_monitor(struct x86_emulate_ctxt *ctxt)
>> +{
>> +       int rc;
>> +       struct segmented_address addr;
>> +       u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
>> +       u64 rax = reg_read(ctxt, VCPU_REGS_RAX);
>> +       u8 byte;
>
> I'd request:
>
> u32 ebx, ecx, edx, eax = 1;
> ctxt->opt->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
> if (!(ecx & FFL(MWAIT)))
>         return emulate_ud(ctxt);
>
> and also in em_mwait.

Ignoring the fact that this should never be true 
(KVM_GET_SUPPORTED_CPUID never reports the MWAIT bit), why should 
MONITOR and MWAIT be special?  We do not do this kind of check for SSE 
or AVX instructions.

An alternative is to record the address that was being waited on, and 
invoke PLE (kvm_vcpu_on_spin) if the current address matches the last 
one.  A VMEXIT + emulation takes a couple thousand cycles, which is the 
same order of magnitude as the PLE window.

Even if there is a workaround, I don't think reverting the patch is 
necessary.  The patch was there for a fringe case anyway (recent 
versions of Mac OS X get CPUID right), so I don't think the availability 
of a work around changes the assessment of how ugly/useful MONITOR/MWAIT is.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index ef7a5a0..424b58d 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -3344,6 +3344,43 @@  static int em_bswap(struct x86_emulate_ctxt *ctxt)
 	return X86EMUL_CONTINUE;
 }
 
+static int em_monitor(struct x86_emulate_ctxt *ctxt)
+{
+	int rc;
+	struct segmented_address addr;
+	u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
+	u64 rax = reg_read(ctxt, VCPU_REGS_RAX);
+	u8 byte;
+
+	if (ctxt->mode != X86EMUL_MODE_PROT64)
+		rcx = (u32)rcx;
+	if (rcx != 0)
+		return emulate_gp(ctxt, 0);
+
+	addr.seg = seg_override(ctxt);
+	addr.ea = ctxt->ad_bytes == 8 ? rax : (u32)rax;
+	rc = segmented_read(ctxt, addr, &byte, 1);
+	if (rc != X86EMUL_CONTINUE)
+		return rc;
+
+	printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
+	return X86EMUL_CONTINUE;
+}
+
+static int em_mwait(struct x86_emulate_ctxt *ctxt)
+{
+	u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
+
+	if (ctxt->mode != X86EMUL_MODE_PROT64)
+		rcx = (u32)rcx;
+	if ((rcx & ~1UL) != 0)
+		return emulate_gp(ctxt, 0);
+
+	/* Accepting interrupt as break event regardless to cpuid */
+	printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
+	return X86EMUL_CONTINUE;
+}
+
 static bool valid_cr(int nr)
 {
 	switch (nr) {
@@ -3557,8 +3594,8 @@  static int check_perm_out(struct x86_emulate_ctxt *ctxt)
 		F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
 
 static const struct opcode group7_rm1[] = {
-	DI(SrcNone | Priv, monitor),
-	DI(SrcNone | Priv, mwait),
+	II(SrcNone | Priv | NoBigReal | UDOnPriv, em_monitor, monitor),
+	II(SrcNone | Priv | NoBigReal | UDOnPriv, em_mwait, mwait),
 	N, N, N, N, N, N,
 };
 
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index ec8366c..a524e04 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3274,24 +3274,6 @@  static int pause_interception(struct vcpu_svm *svm)
 	return 1;
 }
 
-static int nop_interception(struct vcpu_svm *svm)
-{
-	skip_emulated_instruction(&(svm->vcpu));
-	return 1;
-}
-
-static int monitor_interception(struct vcpu_svm *svm)
-{
-	printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
-	return nop_interception(svm);
-}
-
-static int mwait_interception(struct vcpu_svm *svm)
-{
-	printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
-	return nop_interception(svm);
-}
-
 static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
 	[SVM_EXIT_READ_CR0]			= cr_interception,
 	[SVM_EXIT_READ_CR3]			= cr_interception,
@@ -3349,8 +3331,8 @@  static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
 	[SVM_EXIT_CLGI]				= clgi_interception,
 	[SVM_EXIT_SKINIT]			= skinit_interception,
 	[SVM_EXIT_WBINVD]                       = emulate_on_interception,
-	[SVM_EXIT_MONITOR]			= monitor_interception,
-	[SVM_EXIT_MWAIT]			= mwait_interception,
+	[SVM_EXIT_MONITOR]			= emulate_on_interception,
+	[SVM_EXIT_MWAIT]			= emulate_on_interception,
 	[SVM_EXIT_XSETBV]			= xsetbv_interception,
 	[SVM_EXIT_NPF]				= pf_interception,
 };
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 801332e..7023e71 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5672,22 +5672,17 @@  static int handle_pause(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
-static int handle_nop(struct kvm_vcpu *vcpu)
+static int handle_emulate(struct kvm_vcpu *vcpu)
 {
-	skip_emulated_instruction(vcpu);
-	return 1;
-}
+	int err = emulate_instruction(vcpu, 0);
 
-static int handle_mwait(struct kvm_vcpu *vcpu)
-{
-	printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
-	return handle_nop(vcpu);
-}
-
-static int handle_monitor(struct kvm_vcpu *vcpu)
-{
-	printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
-	return handle_nop(vcpu);
+	if (err != EMULATE_DONE) {
+		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+		vcpu->run->internal.ndata = 0;
+		return 0;
+	}
+	return 1;
 }
 
 /*
@@ -6651,8 +6646,8 @@  static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 	[EXIT_REASON_EPT_VIOLATION]	      = handle_ept_violation,
 	[EXIT_REASON_EPT_MISCONFIG]           = handle_ept_misconfig,
 	[EXIT_REASON_PAUSE_INSTRUCTION]       = handle_pause,
-	[EXIT_REASON_MWAIT_INSTRUCTION]	      = handle_mwait,
-	[EXIT_REASON_MONITOR_INSTRUCTION]     = handle_monitor,
+	[EXIT_REASON_MWAIT_INSTRUCTION]	      = handle_emulate,
+	[EXIT_REASON_MONITOR_INSTRUCTION]     = handle_emulate,
 	[EXIT_REASON_INVEPT]                  = handle_invept,
 };