diff mbox series

[v5,07/34] KVM: SVM: Add required changes to support intercepts under SEV-ES

Message ID eb73a31713e8ddc324b61c4d4425f27cbf5eae50.1607620209.git.thomas.lendacky@amd.com (mailing list archive)
State New, archived
Headers show
Series SEV-ES hypervisor support | expand

Commit Message

Tom Lendacky Dec. 10, 2020, 5:09 p.m. UTC
From: Tom Lendacky <thomas.lendacky@amd.com>

When a guest is running under SEV-ES, the hypervisor cannot access the
guest register state. There are numerous places in the KVM code where
certain registers are accessed that are not allowed to be accessed (e.g.
RIP, CR0, etc). Add checks to prevent register accesses and add intercept
update support at various points within the KVM code.

Also, when handling a VMGEXIT, exceptions are passed back through the
GHCB. Since the RDMSR/WRMSR intercepts (may) inject a #GP on error,
update the SVM intercepts to handle this for SEV-ES guests.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 arch/x86/include/asm/svm.h |   3 +-
 arch/x86/kvm/svm/svm.c     | 111 +++++++++++++++++++++++++++++++++----
 arch/x86/kvm/x86.c         |   6 +-
 3 files changed, 107 insertions(+), 13 deletions(-)

Comments

Paolo Bonzini Dec. 14, 2020, 3:33 p.m. UTC | #1
On 10/12/20 18:09, Tom Lendacky wrote:
> @@ -2797,7 +2838,27 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
>   
>   static int wrmsr_interception(struct vcpu_svm *svm)
>   {
> -	return kvm_emulate_wrmsr(&svm->vcpu);
> +	u32 ecx;
> +	u64 data;
> +
> +	if (!sev_es_guest(svm->vcpu.kvm))
> +		return kvm_emulate_wrmsr(&svm->vcpu);
> +
> +	ecx = kvm_rcx_read(&svm->vcpu);
> +	data = kvm_read_edx_eax(&svm->vcpu);
> +	if (kvm_set_msr(&svm->vcpu, ecx, data)) {
> +		trace_kvm_msr_write_ex(ecx, data);
> +		ghcb_set_sw_exit_info_1(svm->ghcb, 1);
> +		ghcb_set_sw_exit_info_2(svm->ghcb,
> +					X86_TRAP_GP |
> +					SVM_EVTINJ_TYPE_EXEPT |
> +					SVM_EVTINJ_VALID);
> +		return 1;
> +	}
> +
> +	trace_kvm_msr_write(ecx, data);
> +
> +	return kvm_skip_emulated_instruction(&svm->vcpu);
>   }
>   
>   static int msr_interception(struct vcpu_svm *svm)

This code duplication is ugly, and does not work with userspace MSR 
filters too.

But we can instead trap the completion of the MSR read/write to use 
ghcb_set_sw_exit_info_1 instead of kvm_inject_gp, with a callback like

static int svm_complete_emulated_msr(struct kvm_vcpu *vcpu, int err)
{
         if (!sev_es_guest(svm->vcpu.kvm) || !err)
                 return kvm_complete_insn_gp(&svm->vcpu, err);

         ghcb_set_sw_exit_info_1(svm->ghcb, 1);
         ghcb_set_sw_exit_info_2(svm->ghcb,
                                 X86_TRAP_GP |
                                 SVM_EVTINJ_TYPE_EXEPT |
                                 SVM_EVTINJ_VALID);
         return 1;
}


...
	.complete_emulated_msr = svm_complete_emulated_msr,

> @@ -2827,7 +2888,14 @@ static int interrupt_window_interception(struct vcpu_svm *svm)
>   static int pause_interception(struct vcpu_svm *svm)
>   {
>   	struct kvm_vcpu *vcpu = &svm->vcpu;
> -	bool in_kernel = (svm_get_cpl(vcpu) == 0);
> +	bool in_kernel;
> +
> +	/*
> +	 * CPL is not made available for an SEV-ES guest, so just set in_kernel
> +	 * to true.
> +	 */
> +	in_kernel = (sev_es_guest(svm->vcpu.kvm)) ? true
> +						  : (svm_get_cpl(vcpu) == 0);
>   
>   	if (!kvm_pause_in_guest(vcpu->kvm))
>   		grow_ple_window(vcpu);

See below.

> @@ -3273,6 +3351,13 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
>   	struct vcpu_svm *svm = to_svm(vcpu);
>   	struct vmcb *vmcb = svm->vmcb;
>   
> +	/*
> +	 * SEV-ES guests to not expose RFLAGS. Use the VMCB interrupt mask
> +	 * bit to determine the state of the IF flag.
> +	 */
> +	if (sev_es_guest(svm->vcpu.kvm))
> +		return !(vmcb->control.int_state & SVM_GUEST_INTERRUPT_MASK);

This seems wrong, you have to take into account 
SVM_INTERRUPT_SHADOW_MASK as well.  Also, even though GIF is not really 
used by SEV-ES guests, I think it's nicer to put this check afterwards.

That is:

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 4372e45c8f06..2dd9c9698480 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3247,7 +3247,14 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
  	if (!gif_set(svm))
  		return true;

-	if (is_guest_mode(vcpu)) {
+	if (sev_es_guest(svm->vcpu.kvm)) {
+		/*
+		 * SEV-ES guests to not expose RFLAGS. Use the VMCB interrupt mask
+		 * bit to determine the state of the IF flag.
+		 */
+		if (!(vmcb->control.int_state & SVM_GUEST_INTERRUPT_MASK))
+			return true;
+	} else if (is_guest_mode(vcpu)) {
  		/* As long as interrupts are being delivered...  */
  		if ((svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK)
  		    ? !(svm->nested.hsave->save.rflags & X86_EFLAGS_IF)



>   	if (!gif_set(svm))
>   		return true;
>   
> @@ -3458,6 +3543,12 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
>   		svm->vcpu.arch.nmi_injected = true;
>   		break;
>   	case SVM_EXITINTINFO_TYPE_EXEPT:
> +		/*
> +		 * Never re-inject a #VC exception.
> +		 */
> +		if (vector == X86_TRAP_VC)
> +			break;
> +
>   		/*
>   		 * In case of software exceptions, do not reinject the vector,
>   		 * but re-execute the instruction instead. Rewind RIP first
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index a3fdc16cfd6f..b6809a2851d2 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -4018,7 +4018,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
>   {
>   	int idx;
>   
> -	if (vcpu->preempted)
> +	if (vcpu->preempted && !vcpu->arch.guest_state_protected)
>   		vcpu->arch.preempted_in_kernel = !kvm_x86_ops.get_cpl(vcpu);

This has to be true, otherwise no directed yield will be done at all:

	if (READ_ONCE(vcpu->preempted) && yield_to_kernel_mode &&
	    !kvm_arch_vcpu_in_kernel(vcpu))
		continue;

Or more easily, just use in_kernel == false in pause_interception, like

+	/*
+	 * CPL is not made available for an SEV-ES guest, therefore
+	 * vcpu->arch.preempted_in_kernel can never be true.  Just
+	 * set in_kernel to false as well.
+	 */
+	in_kernel = !sev_es_guest(svm->vcpu.kvm) && svm_get_cpl(vcpu) == 0;

>   
>   	/*
> @@ -8161,7 +8161,9 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
>   {
>   	struct kvm_run *kvm_run = vcpu->run;
>   
> -	kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
> +	kvm_run->if_flag = (vcpu->arch.guest_state_protected)
> +		? kvm_arch_interrupt_allowed(vcpu)
> +		: (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;

Here indeed you only want the interrupt allowed bit, not the interrupt 
window.  But we can just be bold and always set it to true.

- for userspace irqchip, kvm_run->ready_for_interrupt_injection is set 
just below and it will always be false if kvm_arch_interrupt_allowed is 
false

- for in-kernel APIC, if_flag is documented to be invalid (though it 
actually is valid).  For split irqchip, they can just use 
kvm_run->ready_for_interrupt_injection; for entirely in-kernel interrupt 
handling, userspace does not need if_flag at all.

Paolo

>   	kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0;
>   	kvm_run->cr8 = kvm_get_cr8(vcpu);
>   	kvm_run->apic_base = kvm_get_apic_base(vcpu);
>
Tom Lendacky Dec. 14, 2020, 7 p.m. UTC | #2
On 12/14/20 9:33 AM, Paolo Bonzini wrote:
> On 10/12/20 18:09, Tom Lendacky wrote:
>> @@ -2797,7 +2838,27 @@ static int svm_set_msr(struct kvm_vcpu *vcpu,
>> struct msr_data *msr)
>>     static int wrmsr_interception(struct vcpu_svm *svm)
>>   {
>> -    return kvm_emulate_wrmsr(&svm->vcpu);
>> +    u32 ecx;
>> +    u64 data;
>> +
>> +    if (!sev_es_guest(svm->vcpu.kvm))
>> +        return kvm_emulate_wrmsr(&svm->vcpu);
>> +
>> +    ecx = kvm_rcx_read(&svm->vcpu);
>> +    data = kvm_read_edx_eax(&svm->vcpu);
>> +    if (kvm_set_msr(&svm->vcpu, ecx, data)) {
>> +        trace_kvm_msr_write_ex(ecx, data);
>> +        ghcb_set_sw_exit_info_1(svm->ghcb, 1);
>> +        ghcb_set_sw_exit_info_2(svm->ghcb,
>> +                    X86_TRAP_GP |
>> +                    SVM_EVTINJ_TYPE_EXEPT |
>> +                    SVM_EVTINJ_VALID);
>> +        return 1;
>> +    }
>> +
>> +    trace_kvm_msr_write(ecx, data);
>> +
>> +    return kvm_skip_emulated_instruction(&svm->vcpu);
>>   }
>>     static int msr_interception(struct vcpu_svm *svm)
> 
> This code duplication is ugly, and does not work with userspace MSR
> filters too.

Agree and I missed that the userspace MSR support went in.

> 
> But we can instead trap the completion of the MSR read/write to use
> ghcb_set_sw_exit_info_1 instead of kvm_inject_gp, with a callback like
> 
> static int svm_complete_emulated_msr(struct kvm_vcpu *vcpu, int err)
> {
>         if (!sev_es_guest(svm->vcpu.kvm) || !err)
>                 return kvm_complete_insn_gp(&svm->vcpu, err);
> 
>         ghcb_set_sw_exit_info_1(svm->ghcb, 1);
>         ghcb_set_sw_exit_info_2(svm->ghcb,
>                                 X86_TRAP_GP |
>                                 SVM_EVTINJ_TYPE_EXEPT |
>                                 SVM_EVTINJ_VALID);
>         return 1;
> }

If we use the kvm_complete_insn_gp() we lose the tracing and it needs to
be able to deal with read completion setting the registers. It also needs
to deal with both kvm_emulate_rdmsr/wrmsr() when not bouncing to
userspace. Let me take a shot at covering all the cases and see what I can
come up with.

I noticed that the userspace completion path doesn't have tracing
invocations, trace_kvm_msr_read/write_ex() or trace_kvm_msr_read/write(),
is that by design?

> 
> 
> ...
>     .complete_emulated_msr = svm_complete_emulated_msr,
> 
>> @@ -2827,7 +2888,14 @@ static int interrupt_window_interception(struct
>> vcpu_svm *svm)
>>   static int pause_interception(struct vcpu_svm *svm)
>>   {
>>       struct kvm_vcpu *vcpu = &svm->vcpu;
>> -    bool in_kernel = (svm_get_cpl(vcpu) == 0);
>> +    bool in_kernel;
>> +
>> +    /*
>> +     * CPL is not made available for an SEV-ES guest, so just set
>> in_kernel
>> +     * to true.
>> +     */
>> +    in_kernel = (sev_es_guest(svm->vcpu.kvm)) ? true
>> +                          : (svm_get_cpl(vcpu) == 0);
>>         if (!kvm_pause_in_guest(vcpu->kvm))
>>           grow_ple_window(vcpu);
> 
> See below.
> 
>> @@ -3273,6 +3351,13 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
>>       struct vcpu_svm *svm = to_svm(vcpu);
>>       struct vmcb *vmcb = svm->vmcb;
>>   +    /*
>> +     * SEV-ES guests to not expose RFLAGS. Use the VMCB interrupt mask
>> +     * bit to determine the state of the IF flag.
>> +     */
>> +    if (sev_es_guest(svm->vcpu.kvm))
>> +        return !(vmcb->control.int_state & SVM_GUEST_INTERRUPT_MASK);
> 
> This seems wrong, you have to take into account SVM_INTERRUPT_SHADOW_MASK
> as well.  Also, even though GIF is not really used by SEV-ES guests, I
> think it's nicer to put this check afterwards.
> 
> That is:
> 
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 4372e45c8f06..2dd9c9698480 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -3247,7 +3247,14 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
>      if (!gif_set(svm))
>          return true;
> 
> -    if (is_guest_mode(vcpu)) {
> +    if (sev_es_guest(svm->vcpu.kvm)) {
> +        /*
> +         * SEV-ES guests to not expose RFLAGS. Use the VMCB interrupt mask
> +         * bit to determine the state of the IF flag.
> +         */
> +        if (!(vmcb->control.int_state & SVM_GUEST_INTERRUPT_MASK))
> +            return true;
> +    } else if (is_guest_mode(vcpu)) {
>          /* As long as interrupts are being delivered...  */
>          if ((svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK)
>              ? !(svm->nested.hsave->save.rflags & X86_EFLAGS_IF)
> 
> 

Yup, I'll make that change.

> 
>>       if (!gif_set(svm))
>>           return true;
>>   @@ -3458,6 +3543,12 @@ static void svm_complete_interrupts(struct
>> vcpu_svm *svm)
>>           svm->vcpu.arch.nmi_injected = true;
>>           break;
>>       case SVM_EXITINTINFO_TYPE_EXEPT:
>> +        /*
>> +         * Never re-inject a #VC exception.
>> +         */
>> +        if (vector == X86_TRAP_VC)
>> +            break;
>> +
>>           /*
>>            * In case of software exceptions, do not reinject the vector,
>>            * but re-execute the instruction instead. Rewind RIP first
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index a3fdc16cfd6f..b6809a2851d2 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -4018,7 +4018,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
>>   {
>>       int idx;
>>   -    if (vcpu->preempted)
>> +    if (vcpu->preempted && !vcpu->arch.guest_state_protected)
>>           vcpu->arch.preempted_in_kernel = !kvm_x86_ops.get_cpl(vcpu);
> 
> This has to be true, otherwise no directed yield will be done at all:
> 
>     if (READ_ONCE(vcpu->preempted) && yield_to_kernel_mode &&
>         !kvm_arch_vcpu_in_kernel(vcpu))
>         continue;
> 
> Or more easily, just use in_kernel == false in pause_interception, like
> 
> +    /*
> +     * CPL is not made available for an SEV-ES guest, therefore
> +     * vcpu->arch.preempted_in_kernel can never be true.  Just
> +     * set in_kernel to false as well.
> +     */
> +    in_kernel = !sev_es_guest(svm->vcpu.kvm) && svm_get_cpl(vcpu) == 0;

Sounds good, I'll make that change.

> 
>>         /*
>> @@ -8161,7 +8161,9 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
>>   {
>>       struct kvm_run *kvm_run = vcpu->run;
>>   -    kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
>> +    kvm_run->if_flag = (vcpu->arch.guest_state_protected)
>> +        ? kvm_arch_interrupt_allowed(vcpu)
>> +        : (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
> 
> Here indeed you only want the interrupt allowed bit, not the interrupt
> window.  But we can just be bold and always set it to true.
> 
> - for userspace irqchip, kvm_run->ready_for_interrupt_injection is set
> just below and it will always be false if kvm_arch_interrupt_allowed is false
> 
> - for in-kernel APIC, if_flag is documented to be invalid (though it
> actually is valid).  For split irqchip, they can just use
> kvm_run->ready_for_interrupt_injection; for entirely in-kernel interrupt
> handling, userspace does not need if_flag at all.

Ok, I'll make that change.

Thanks,
Tom

> 
> Paolo
> 
>>       kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0;
>>       kvm_run->cr8 = kvm_get_cr8(vcpu);
>>       kvm_run->apic_base = kvm_get_apic_base(vcpu);
>>
> 
>
diff mbox series

Patch

diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 1edf24f51b53..bce28482d63d 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -178,7 +178,8 @@  struct __attribute__ ((__packed__)) vmcb_control_area {
 #define LBR_CTL_ENABLE_MASK BIT_ULL(0)
 #define VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK BIT_ULL(1)
 
-#define SVM_INTERRUPT_SHADOW_MASK 1
+#define SVM_INTERRUPT_SHADOW_MASK	BIT_ULL(0)
+#define SVM_GUEST_INTERRUPT_MASK	BIT_ULL(1)
 
 #define SVM_IOIO_STR_SHIFT 2
 #define SVM_IOIO_REP_SHIFT 3
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index cd4c9884e5a8..857d0d3f2752 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -36,6 +36,7 @@ 
 #include <asm/mce.h>
 #include <asm/spec-ctrl.h>
 #include <asm/cpu_device_id.h>
+#include <asm/traps.h>
 
 #include <asm/virtext.h>
 #include "trace.h"
@@ -340,6 +341,13 @@  static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
+	/*
+	 * SEV-ES does not expose the next RIP. The RIP update is controlled by
+	 * the type of exit and the #VC handler in the guest.
+	 */
+	if (sev_es_guest(vcpu->kvm))
+		goto done;
+
 	if (nrips && svm->vmcb->control.next_rip != 0) {
 		WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS));
 		svm->next_rip = svm->vmcb->control.next_rip;
@@ -351,6 +359,8 @@  static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
 	} else {
 		kvm_rip_write(vcpu, svm->next_rip);
 	}
+
+done:
 	svm_set_interrupt_shadow(vcpu, 0);
 
 	return 1;
@@ -1652,9 +1662,18 @@  static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
 
 static void update_cr0_intercept(struct vcpu_svm *svm)
 {
-	ulong gcr0 = svm->vcpu.arch.cr0;
-	u64 *hcr0 = &svm->vmcb->save.cr0;
+	ulong gcr0;
+	u64 *hcr0;
+
+	/*
+	 * SEV-ES guests must always keep the CR intercepts cleared. CR
+	 * tracking is done using the CR write traps.
+	 */
+	if (sev_es_guest(svm->vcpu.kvm))
+		return;
 
+	gcr0 = svm->vcpu.arch.cr0;
+	hcr0 = &svm->vmcb->save.cr0;
 	*hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
 		| (gcr0 & SVM_CR0_SELECTIVE_MASK);
 
@@ -1674,7 +1693,7 @@  void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 	struct vcpu_svm *svm = to_svm(vcpu);
 
 #ifdef CONFIG_X86_64
-	if (vcpu->arch.efer & EFER_LME) {
+	if (vcpu->arch.efer & EFER_LME && !vcpu->arch.guest_state_protected) {
 		if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
 			vcpu->arch.efer |= EFER_LMA;
 			svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
@@ -2608,7 +2627,29 @@  static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
 static int rdmsr_interception(struct vcpu_svm *svm)
 {
-	return kvm_emulate_rdmsr(&svm->vcpu);
+	u32 ecx;
+	u64 data;
+
+	if (!sev_es_guest(svm->vcpu.kvm))
+		return kvm_emulate_rdmsr(&svm->vcpu);
+
+	ecx = kvm_rcx_read(&svm->vcpu);
+	if (kvm_get_msr(&svm->vcpu, ecx, &data)) {
+		trace_kvm_msr_read_ex(ecx);
+		ghcb_set_sw_exit_info_1(svm->ghcb, 1);
+		ghcb_set_sw_exit_info_2(svm->ghcb,
+					X86_TRAP_GP |
+					SVM_EVTINJ_TYPE_EXEPT |
+					SVM_EVTINJ_VALID);
+		return 1;
+	}
+
+	trace_kvm_msr_read(ecx, data);
+
+	kvm_rax_write(&svm->vcpu, data & -1u);
+	kvm_rdx_write(&svm->vcpu, (data >> 32) & -1u);
+
+	return kvm_skip_emulated_instruction(&svm->vcpu);
 }
 
 static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
@@ -2797,7 +2838,27 @@  static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 
 static int wrmsr_interception(struct vcpu_svm *svm)
 {
-	return kvm_emulate_wrmsr(&svm->vcpu);
+	u32 ecx;
+	u64 data;
+
+	if (!sev_es_guest(svm->vcpu.kvm))
+		return kvm_emulate_wrmsr(&svm->vcpu);
+
+	ecx = kvm_rcx_read(&svm->vcpu);
+	data = kvm_read_edx_eax(&svm->vcpu);
+	if (kvm_set_msr(&svm->vcpu, ecx, data)) {
+		trace_kvm_msr_write_ex(ecx, data);
+		ghcb_set_sw_exit_info_1(svm->ghcb, 1);
+		ghcb_set_sw_exit_info_2(svm->ghcb,
+					X86_TRAP_GP |
+					SVM_EVTINJ_TYPE_EXEPT |
+					SVM_EVTINJ_VALID);
+		return 1;
+	}
+
+	trace_kvm_msr_write(ecx, data);
+
+	return kvm_skip_emulated_instruction(&svm->vcpu);
 }
 
 static int msr_interception(struct vcpu_svm *svm)
@@ -2827,7 +2888,14 @@  static int interrupt_window_interception(struct vcpu_svm *svm)
 static int pause_interception(struct vcpu_svm *svm)
 {
 	struct kvm_vcpu *vcpu = &svm->vcpu;
-	bool in_kernel = (svm_get_cpl(vcpu) == 0);
+	bool in_kernel;
+
+	/*
+	 * CPL is not made available for an SEV-ES guest, so just set in_kernel
+	 * to true.
+	 */
+	in_kernel = (sev_es_guest(svm->vcpu.kvm)) ? true
+						  : (svm_get_cpl(vcpu) == 0);
 
 	if (!kvm_pause_in_guest(vcpu->kvm))
 		grow_ple_window(vcpu);
@@ -3090,10 +3158,13 @@  static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
 
 	trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM);
 
-	if (!svm_is_intercept(svm, INTERCEPT_CR0_WRITE))
-		vcpu->arch.cr0 = svm->vmcb->save.cr0;
-	if (npt_enabled)
-		vcpu->arch.cr3 = svm->vmcb->save.cr3;
+	/* SEV-ES guests must use the CR write traps to track CR registers. */
+	if (!sev_es_guest(vcpu->kvm)) {
+		if (!svm_is_intercept(svm, INTERCEPT_CR0_WRITE))
+			vcpu->arch.cr0 = svm->vmcb->save.cr0;
+		if (npt_enabled)
+			vcpu->arch.cr3 = svm->vmcb->save.cr3;
+	}
 
 	if (is_guest_mode(vcpu)) {
 		int vmexit;
@@ -3205,6 +3276,13 @@  static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
+	/*
+	 * SEV-ES guests must always keep the CR intercepts cleared. CR
+	 * tracking is done using the CR write traps.
+	 */
+	if (sev_es_guest(vcpu->kvm))
+		return;
+
 	if (nested_svm_virtualize_tpr(vcpu))
 		return;
 
@@ -3273,6 +3351,13 @@  bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
 	struct vcpu_svm *svm = to_svm(vcpu);
 	struct vmcb *vmcb = svm->vmcb;
 
+	/*
+	 * SEV-ES guests to not expose RFLAGS. Use the VMCB interrupt mask
+	 * bit to determine the state of the IF flag.
+	 */
+	if (sev_es_guest(svm->vcpu.kvm))
+		return !(vmcb->control.int_state & SVM_GUEST_INTERRUPT_MASK);
+
 	if (!gif_set(svm))
 		return true;
 
@@ -3458,6 +3543,12 @@  static void svm_complete_interrupts(struct vcpu_svm *svm)
 		svm->vcpu.arch.nmi_injected = true;
 		break;
 	case SVM_EXITINTINFO_TYPE_EXEPT:
+		/*
+		 * Never re-inject a #VC exception.
+		 */
+		if (vector == X86_TRAP_VC)
+			break;
+
 		/*
 		 * In case of software exceptions, do not reinject the vector,
 		 * but re-execute the instruction instead. Rewind RIP first
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a3fdc16cfd6f..b6809a2851d2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4018,7 +4018,7 @@  void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
 	int idx;
 
-	if (vcpu->preempted)
+	if (vcpu->preempted && !vcpu->arch.guest_state_protected)
 		vcpu->arch.preempted_in_kernel = !kvm_x86_ops.get_cpl(vcpu);
 
 	/*
@@ -8161,7 +8161,9 @@  static void post_kvm_run_save(struct kvm_vcpu *vcpu)
 {
 	struct kvm_run *kvm_run = vcpu->run;
 
-	kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
+	kvm_run->if_flag = (vcpu->arch.guest_state_protected)
+		? kvm_arch_interrupt_allowed(vcpu)
+		: (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
 	kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0;
 	kvm_run->cr8 = kvm_get_cr8(vcpu);
 	kvm_run->apic_base = kvm_get_apic_base(vcpu);