diff mbox series

[v5,2/4] KVM: x86: report negative values from wrmsr to userspace

Message ID 20200921131923.120833-3-mlevitsk@redhat.com (mailing list archive)
State New, archived
Headers show
Series KVM: nSVM: ondemand nested state allocation | expand

Commit Message

Maxim Levitsky Sept. 21, 2020, 1:19 p.m. UTC
This will allow us to make some MSR writes fatal to the guest
(e.g when out of memory condition occurs)

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 arch/x86/kvm/emulate.c | 7 +++++--
 arch/x86/kvm/x86.c     | 5 +++--
 2 files changed, 8 insertions(+), 4 deletions(-)

Comments

Sean Christopherson Sept. 21, 2020, 4:08 p.m. UTC | #1
On Mon, Sep 21, 2020 at 04:19:21PM +0300, Maxim Levitsky wrote:
> This will allow us to make some MSR writes fatal to the guest
> (e.g when out of memory condition occurs)
> 
> Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> ---
>  arch/x86/kvm/emulate.c | 7 +++++--
>  arch/x86/kvm/x86.c     | 5 +++--
>  2 files changed, 8 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
> index 1d450d7710d63..d855304f5a509 100644
> --- a/arch/x86/kvm/emulate.c
> +++ b/arch/x86/kvm/emulate.c
> @@ -3702,13 +3702,16 @@ static int em_dr_write(struct x86_emulate_ctxt *ctxt)
>  static int em_wrmsr(struct x86_emulate_ctxt *ctxt)
>  {
>  	u64 msr_data;
> +	int ret;
>  
>  	msr_data = (u32)reg_read(ctxt, VCPU_REGS_RAX)
>  		| ((u64)reg_read(ctxt, VCPU_REGS_RDX) << 32);
> -	if (ctxt->ops->set_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), msr_data))
> +
> +	ret = ctxt->ops->set_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), msr_data);
> +	if (ret > 0)
>  		return emulate_gp(ctxt, 0);
>  
> -	return X86EMUL_CONTINUE;
> +	return ret < 0 ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE;
>  }
>  
>  static int em_rdmsr(struct x86_emulate_ctxt *ctxt)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 063d70e736f7f..b6c67ab7c4f34 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1612,15 +1612,16 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
>  {
>  	u32 ecx = kvm_rcx_read(vcpu);
>  	u64 data = kvm_read_edx_eax(vcpu);
> +	int ret = kvm_set_msr(vcpu, ecx, data);
>  
> -	if (kvm_set_msr(vcpu, ecx, data)) {
> +	if (ret > 0) {
>  		trace_kvm_msr_write_ex(ecx, data);
>  		kvm_inject_gp(vcpu, 0);
>  		return 1;
>  	}
>  
>  	trace_kvm_msr_write(ecx, data);

Tracing the access as non-faulting feels wrong.  The WRMSR has not completed,
e.g. if userspace cleanly handles -ENOMEM and restarts the guest, KVM would
trace the WRMSR twice.

What about:

	int ret = kvm_set_msr(vcpu, ecx, data);

	if (ret < 0)
		return ret;

	if (ret) {
		trace_kvm_msr_write_ex(ecx, data);
		kvm_inject_gp(vcpu, 0);
		return 1;
	}

	trace_kvm_msr_write(ecx, data);
	return kvm_skip_emulated_instruction(vcpu);

> -	return kvm_skip_emulated_instruction(vcpu);
> +	return ret < 0 ? ret : kvm_skip_emulated_instruction(vcpu);
>  }
>  EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
>  
> -- 
> 2.26.2
>
Maxim Levitsky Sept. 22, 2020, 4:13 p.m. UTC | #2
On Mon, 2020-09-21 at 09:08 -0700, Sean Christopherson wrote:
> On Mon, Sep 21, 2020 at 04:19:21PM +0300, Maxim Levitsky wrote:
> > This will allow us to make some MSR writes fatal to the guest
> > (e.g when out of memory condition occurs)
> > 
> > Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> > ---
> >  arch/x86/kvm/emulate.c | 7 +++++--
> >  arch/x86/kvm/x86.c     | 5 +++--
> >  2 files changed, 8 insertions(+), 4 deletions(-)
> > 
> > diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
> > index 1d450d7710d63..d855304f5a509 100644
> > --- a/arch/x86/kvm/emulate.c
> > +++ b/arch/x86/kvm/emulate.c
> > @@ -3702,13 +3702,16 @@ static int em_dr_write(struct x86_emulate_ctxt *ctxt)
> >  static int em_wrmsr(struct x86_emulate_ctxt *ctxt)
> >  {
> >  	u64 msr_data;
> > +	int ret;
> >  
> >  	msr_data = (u32)reg_read(ctxt, VCPU_REGS_RAX)
> >  		| ((u64)reg_read(ctxt, VCPU_REGS_RDX) << 32);
> > -	if (ctxt->ops->set_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), msr_data))
> > +
> > +	ret = ctxt->ops->set_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), msr_data);
> > +	if (ret > 0)
> >  		return emulate_gp(ctxt, 0);
> >  
> > -	return X86EMUL_CONTINUE;
> > +	return ret < 0 ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE;
> >  }
> >  
> >  static int em_rdmsr(struct x86_emulate_ctxt *ctxt)
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 063d70e736f7f..b6c67ab7c4f34 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -1612,15 +1612,16 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
> >  {
> >  	u32 ecx = kvm_rcx_read(vcpu);
> >  	u64 data = kvm_read_edx_eax(vcpu);
> > +	int ret = kvm_set_msr(vcpu, ecx, data);
> >  
> > -	if (kvm_set_msr(vcpu, ecx, data)) {
> > +	if (ret > 0) {
> >  		trace_kvm_msr_write_ex(ecx, data);
> >  		kvm_inject_gp(vcpu, 0);
> >  		return 1;
> >  	}
> >  
> >  	trace_kvm_msr_write(ecx, data);
> 
> Tracing the access as non-faulting feels wrong.  The WRMSR has not completed,
> e.g. if userspace cleanly handles -ENOMEM and restarts the guest, KVM would
> trace the WRMSR twice.

I guess you are right. Since in this case we didn't actually executed the
instruction (exception can also be thought as an execution of an instruction,
since it leads to the exception handler), but in
this case we just fail
and let the userspace do something so we can restart from the same point again.
 
So I'll go with your suggestion.

Thanks for the review,
	Best regards,
		Maxim Levitsky

> 
> What about:
> 
> 	int ret = kvm_set_msr(vcpu, ecx, data);
> 
> 	if (ret < 0)
> 		return ret;
> 
> 	if (ret) {
> 		trace_kvm_msr_write_ex(ecx, data);
> 		kvm_inject_gp(vcpu, 0);
> 		return 1;
> 	}
> 
> 	trace_kvm_msr_write(ecx, data);
> 	return kvm_skip_emulated_instruction(vcpu);
> 
> > -	return kvm_skip_emulated_instruction(vcpu);
> > +	return ret < 0 ? ret : kvm_skip_emulated_instruction(vcpu);
> >  }
> >  EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
> >  
> > -- 
> > 2.26.2
> >
diff mbox series

Patch

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 1d450d7710d63..d855304f5a509 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -3702,13 +3702,16 @@  static int em_dr_write(struct x86_emulate_ctxt *ctxt)
 static int em_wrmsr(struct x86_emulate_ctxt *ctxt)
 {
 	u64 msr_data;
+	int ret;
 
 	msr_data = (u32)reg_read(ctxt, VCPU_REGS_RAX)
 		| ((u64)reg_read(ctxt, VCPU_REGS_RDX) << 32);
-	if (ctxt->ops->set_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), msr_data))
+
+	ret = ctxt->ops->set_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), msr_data);
+	if (ret > 0)
 		return emulate_gp(ctxt, 0);
 
-	return X86EMUL_CONTINUE;
+	return ret < 0 ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE;
 }
 
 static int em_rdmsr(struct x86_emulate_ctxt *ctxt)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 063d70e736f7f..b6c67ab7c4f34 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1612,15 +1612,16 @@  int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
 {
 	u32 ecx = kvm_rcx_read(vcpu);
 	u64 data = kvm_read_edx_eax(vcpu);
+	int ret = kvm_set_msr(vcpu, ecx, data);
 
-	if (kvm_set_msr(vcpu, ecx, data)) {
+	if (ret > 0) {
 		trace_kvm_msr_write_ex(ecx, data);
 		kvm_inject_gp(vcpu, 0);
 		return 1;
 	}
 
 	trace_kvm_msr_write(ecx, data);
-	return kvm_skip_emulated_instruction(vcpu);
+	return ret < 0 ? ret : kvm_skip_emulated_instruction(vcpu);
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);