diff mbox series

[RFC,22/35] KVM: SVM: Add support for CR0 write traps for an SEV-ES guest

Message ID 68f885b63b18e5c72eae92c9c681296083c0ccd8.1600114548.git.thomas.lendacky@amd.com (mailing list archive)
State New, archived
Headers show
Series SEV-ES hypervisor support | expand

Commit Message

Tom Lendacky Sept. 14, 2020, 8:15 p.m. UTC
From: Tom Lendacky <thomas.lendacky@amd.com>

For SEV-ES guests, the interception of control register write access
is not recommended. Control register interception occurs prior to the
control register being modified and the hypervisor is unable to modify
the control register itself because the register is located in the
encrypted register state.

SEV-ES guests introduce new control register write traps. These traps
provide intercept support of a control register write after the control
register has been modified. The new control register value is provided in
the VMCB EXITINFO1 field, allowing the hypervisor to track the setting
of the guest control registers.

Add support to track the value of the guest CR0 register using the control
register write trap so that the hypervisor understands the guest operating
mode.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/include/uapi/asm/svm.h | 17 +++++++++++++
 arch/x86/kvm/svm/svm.c          | 20 +++++++++++++++
 arch/x86/kvm/x86.c              | 43 ++++++++++++++++++++++++---------
 4 files changed, 69 insertions(+), 12 deletions(-)

Comments

Sean Christopherson Sept. 14, 2020, 10:13 p.m. UTC | #1
On Mon, Sep 14, 2020 at 03:15:36PM -0500, Tom Lendacky wrote:
> From: Tom Lendacky <thomas.lendacky@amd.com>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index b65bd0c986d4..6f5988c305e1 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -799,11 +799,29 @@ bool pdptrs_changed(struct kvm_vcpu *vcpu)
>  }
>  EXPORT_SYMBOL_GPL(pdptrs_changed);
>  
> +static void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0,
> +			     unsigned long cr0)

What about using __kvm_set_cr*() instead of kvm_post_set_cr*()?  That would
show that __kvm_set_cr*() is a subordinate of kvm_set_cr*(), and from the
SVM side would provide the hint that the code is skipping the front end of
kvm_set_cr*().

> +{
> +	unsigned long update_bits = X86_CR0_PG | X86_CR0_WP;
> +
> +	if ((cr0 ^ old_cr0) & X86_CR0_PG) {
> +		kvm_clear_async_pf_completion_queue(vcpu);
> +		kvm_async_pf_hash_reset(vcpu);
> +	}
> +
> +	if ((cr0 ^ old_cr0) & update_bits)
> +		kvm_mmu_reset_context(vcpu);
> +
> +	if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
> +	    kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
> +	    !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
> +		kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
> +}
> +
>  int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
>  {
>  	unsigned long old_cr0 = kvm_read_cr0(vcpu);
>  	unsigned long pdptr_bits = X86_CR0_CD | X86_CR0_NW | X86_CR0_PG;
> -	unsigned long update_bits = X86_CR0_PG | X86_CR0_WP;
>  
>  	cr0 |= X86_CR0_ET;
>  
> @@ -842,22 +860,23 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
>  
>  	kvm_x86_ops.set_cr0(vcpu, cr0);
>  
> -	if ((cr0 ^ old_cr0) & X86_CR0_PG) {
> -		kvm_clear_async_pf_completion_queue(vcpu);
> -		kvm_async_pf_hash_reset(vcpu);
> -	}
> +	kvm_post_set_cr0(vcpu, old_cr0, cr0);
>  
> -	if ((cr0 ^ old_cr0) & update_bits)
> -		kvm_mmu_reset_context(vcpu);
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(kvm_set_cr0);
>  
> -	if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
> -	    kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
> -	    !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
> -		kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
> +int kvm_track_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)

I really dislike the "track" terminology.  For me, using "track" as the verb
in a function implies the function activates tracking.  But it's probably a
moot point, because similar to EFER, I don't see any reason to put the front
end of the emulation into x86.c.  Both getting old_cr0 and setting
vcpu->arch.cr0 can be done in svm.c

> +{
> +	unsigned long old_cr0 = kvm_read_cr0(vcpu);
> +
> +	vcpu->arch.cr0 = cr0;
> +
> +	kvm_post_set_cr0(vcpu, old_cr0, cr0);
>  
>  	return 0;
>  }
> -EXPORT_SYMBOL_GPL(kvm_set_cr0);
> +EXPORT_SYMBOL_GPL(kvm_track_cr0);
>  
>  void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
>  {
> -- 
> 2.28.0
>
Tom Lendacky Sept. 15, 2020, 3:56 p.m. UTC | #2
On 9/14/20 5:13 PM, Sean Christopherson wrote:
> On Mon, Sep 14, 2020 at 03:15:36PM -0500, Tom Lendacky wrote:
>> From: Tom Lendacky <thomas.lendacky@amd.com>
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index b65bd0c986d4..6f5988c305e1 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -799,11 +799,29 @@ bool pdptrs_changed(struct kvm_vcpu *vcpu)
>>  }
>>  EXPORT_SYMBOL_GPL(pdptrs_changed);
>>  
>> +static void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0,
>> +			     unsigned long cr0)
> 
> What about using __kvm_set_cr*() instead of kvm_post_set_cr*()?  That would
> show that __kvm_set_cr*() is a subordinate of kvm_set_cr*(), and from the
> SVM side would provide the hint that the code is skipping the front end of
> kvm_set_cr*().

Ok, I'll change this (and the others) to __kvm_set_cr* and export them.

> 
>> +{
>> +	unsigned long update_bits = X86_CR0_PG | X86_CR0_WP;
>> +
>> +	if ((cr0 ^ old_cr0) & X86_CR0_PG) {
>> +		kvm_clear_async_pf_completion_queue(vcpu);
>> +		kvm_async_pf_hash_reset(vcpu);
>> +	}
>> +
>> +	if ((cr0 ^ old_cr0) & update_bits)
>> +		kvm_mmu_reset_context(vcpu);
>> +
>> +	if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
>> +	    kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
>> +	    !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
>> +		kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
>> +}
>> +
>>  int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
>>  {
>>  	unsigned long old_cr0 = kvm_read_cr0(vcpu);
>>  	unsigned long pdptr_bits = X86_CR0_CD | X86_CR0_NW | X86_CR0_PG;
>> -	unsigned long update_bits = X86_CR0_PG | X86_CR0_WP;
>>  
>>  	cr0 |= X86_CR0_ET;
>>  
>> @@ -842,22 +860,23 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
>>  
>>  	kvm_x86_ops.set_cr0(vcpu, cr0);
>>  
>> -	if ((cr0 ^ old_cr0) & X86_CR0_PG) {
>> -		kvm_clear_async_pf_completion_queue(vcpu);
>> -		kvm_async_pf_hash_reset(vcpu);
>> -	}
>> +	kvm_post_set_cr0(vcpu, old_cr0, cr0);
>>  
>> -	if ((cr0 ^ old_cr0) & update_bits)
>> -		kvm_mmu_reset_context(vcpu);
>> +	return 0;
>> +}
>> +EXPORT_SYMBOL_GPL(kvm_set_cr0);
>>  
>> -	if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
>> -	    kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
>> -	    !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
>> -		kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
>> +int kvm_track_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
> 
> I really dislike the "track" terminology.  For me, using "track" as the verb
> in a function implies the function activates tracking.  But it's probably a
> moot point, because similar to EFER, I don't see any reason to put the front
> end of the emulation into x86.c.  Both getting old_cr0 and setting
> vcpu->arch.cr0 can be done in svm.c

Yup, I can move that to svm.c.

Thanks,
Tom

> 
>> +{
>> +	unsigned long old_cr0 = kvm_read_cr0(vcpu);
>> +
>> +	vcpu->arch.cr0 = cr0;
>> +
>> +	kvm_post_set_cr0(vcpu, old_cr0, cr0);
>>  
>>  	return 0;
>>  }
>> -EXPORT_SYMBOL_GPL(kvm_set_cr0);
>> +EXPORT_SYMBOL_GPL(kvm_track_cr0);
>>  
>>  void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
>>  {
>> -- 
>> 2.28.0
>>
Paolo Bonzini Nov. 30, 2020, 6:15 p.m. UTC | #3
On 15/09/20 00:13, Sean Christopherson wrote:
>> +static void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0,
>> +			     unsigned long cr0)
> What about using __kvm_set_cr*() instead of kvm_post_set_cr*()?  That would
> show that __kvm_set_cr*() is a subordinate of kvm_set_cr*(), and from the
> SVM side would provide the hint that the code is skipping the front end of
> kvm_set_cr*().

No, kvm_post_set_cr0 is exactly the right name because it doesn't set 
any state.  __kvm_set_cr0 tells me that it is a (rarely used) way to set 
CR0, which this function isn't.

Sorry Tom for not catching this earlier.

Paolo

>> +{
>> +	unsigned long update_bits = X86_CR0_PG | X86_CR0_WP;
>> +
>> +	if ((cr0 ^ old_cr0) & X86_CR0_PG) {
>> +		kvm_clear_async_pf_completion_queue(vcpu);
>> +		kvm_async_pf_hash_reset(vcpu);
>> +	}
>> +
>> +	if ((cr0 ^ old_cr0) & update_bits)
>> +		kvm_mmu_reset_context(vcpu);
>> +
>> +	if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
>> +	    kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
>> +	    !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
>> +		kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
>> +}
>> +
>>   int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
>>   {
>>   	unsigned long old_cr0 = kvm_read_cr0(vcpu);
>>   	unsigned long pdptr_bits = X86_CR0_CD | X86_CR0_NW | X86_CR0_PG;
>> -	unsigned long update_bits = X86_CR0_PG | X86_CR0_WP;
>>   
>>   	cr0 |= X86_CR0_ET;
>>   
>> @@ -842,22 +860,23 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
>>   
>>   	kvm_x86_ops.set_cr0(vcpu, cr0);
>>   
>> -	if ((cr0 ^ old_cr0) & X86_CR0_PG) {
>> -		kvm_clear_async_pf_completion_queue(vcpu);
>> -		kvm_async_pf_hash_reset(vcpu);
>> -	}
>> +	kvm_post_set_cr0(vcpu, old_cr0, cr0);
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b535b690eb66..9cc9b65bea7e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1432,6 +1432,7 @@  int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
 int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
 int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
 int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8);
+int kvm_track_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
 int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val);
 int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val);
 unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index ce937a242995..cc45d7996e9c 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -78,6 +78,22 @@ 
 #define SVM_EXIT_XSETBV        0x08d
 #define SVM_EXIT_RDPRU         0x08e
 #define SVM_EXIT_EFER_WRITE_TRAP		0x08f
+#define SVM_EXIT_CR0_WRITE_TRAP			0x090
+#define SVM_EXIT_CR1_WRITE_TRAP			0x091
+#define SVM_EXIT_CR2_WRITE_TRAP			0x092
+#define SVM_EXIT_CR3_WRITE_TRAP			0x093
+#define SVM_EXIT_CR4_WRITE_TRAP			0x094
+#define SVM_EXIT_CR5_WRITE_TRAP			0x095
+#define SVM_EXIT_CR6_WRITE_TRAP			0x096
+#define SVM_EXIT_CR7_WRITE_TRAP			0x097
+#define SVM_EXIT_CR8_WRITE_TRAP			0x098
+#define SVM_EXIT_CR9_WRITE_TRAP			0x099
+#define SVM_EXIT_CR10_WRITE_TRAP		0x09a
+#define SVM_EXIT_CR11_WRITE_TRAP		0x09b
+#define SVM_EXIT_CR12_WRITE_TRAP		0x09c
+#define SVM_EXIT_CR13_WRITE_TRAP		0x09d
+#define SVM_EXIT_CR14_WRITE_TRAP		0x09e
+#define SVM_EXIT_CR15_WRITE_TRAP		0x09f
 #define SVM_EXIT_NPF           0x400
 #define SVM_EXIT_AVIC_INCOMPLETE_IPI		0x401
 #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS	0x402
@@ -185,6 +201,7 @@ 
 	{ SVM_EXIT_MWAIT,       "mwait" }, \
 	{ SVM_EXIT_XSETBV,      "xsetbv" }, \
 	{ SVM_EXIT_EFER_WRITE_TRAP,	"write_efer_trap" }, \
+	{ SVM_EXIT_CR0_WRITE_TRAP,	"write_cr0_trap" }, \
 	{ SVM_EXIT_NPF,         "npf" }, \
 	{ SVM_EXIT_AVIC_INCOMPLETE_IPI,		"avic_incomplete_ipi" }, \
 	{ SVM_EXIT_AVIC_UNACCELERATED_ACCESS,   "avic_unaccelerated_access" }, \
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index ac467225a51d..506656988559 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2413,6 +2413,25 @@  static int cr_interception(struct vcpu_svm *svm)
 	return kvm_complete_insn_gp(&svm->vcpu, err);
 }
 
+static int cr_trap(struct vcpu_svm *svm)
+{
+	unsigned int cr;
+
+	cr = svm->vmcb->control.exit_code - SVM_EXIT_CR0_WRITE_TRAP;
+
+	switch (cr) {
+	case 0:
+		kvm_track_cr0(&svm->vcpu, svm->vmcb->control.exit_info_1);
+		break;
+	default:
+		WARN(1, "unhandled CR%d write trap", cr);
+		kvm_queue_exception(&svm->vcpu, UD_VECTOR);
+		return 1;
+	}
+
+	return kvm_complete_insn_gp(&svm->vcpu, 0);
+}
+
 static int dr_interception(struct vcpu_svm *svm)
 {
 	int reg, dr;
@@ -2956,6 +2975,7 @@  static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
 	[SVM_EXIT_XSETBV]			= xsetbv_interception,
 	[SVM_EXIT_RDPRU]			= rdpru_interception,
 	[SVM_EXIT_EFER_WRITE_TRAP]		= efer_trap,
+	[SVM_EXIT_CR0_WRITE_TRAP]		= cr_trap,
 	[SVM_EXIT_NPF]				= npf_interception,
 	[SVM_EXIT_RSM]                          = rsm_interception,
 	[SVM_EXIT_AVIC_INCOMPLETE_IPI]		= avic_incomplete_ipi_interception,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b65bd0c986d4..6f5988c305e1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -799,11 +799,29 @@  bool pdptrs_changed(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(pdptrs_changed);
 
+static void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0,
+			     unsigned long cr0)
+{
+	unsigned long update_bits = X86_CR0_PG | X86_CR0_WP;
+
+	if ((cr0 ^ old_cr0) & X86_CR0_PG) {
+		kvm_clear_async_pf_completion_queue(vcpu);
+		kvm_async_pf_hash_reset(vcpu);
+	}
+
+	if ((cr0 ^ old_cr0) & update_bits)
+		kvm_mmu_reset_context(vcpu);
+
+	if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
+	    kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
+	    !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
+		kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
+}
+
 int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 {
 	unsigned long old_cr0 = kvm_read_cr0(vcpu);
 	unsigned long pdptr_bits = X86_CR0_CD | X86_CR0_NW | X86_CR0_PG;
-	unsigned long update_bits = X86_CR0_PG | X86_CR0_WP;
 
 	cr0 |= X86_CR0_ET;
 
@@ -842,22 +860,23 @@  int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 
 	kvm_x86_ops.set_cr0(vcpu, cr0);
 
-	if ((cr0 ^ old_cr0) & X86_CR0_PG) {
-		kvm_clear_async_pf_completion_queue(vcpu);
-		kvm_async_pf_hash_reset(vcpu);
-	}
+	kvm_post_set_cr0(vcpu, old_cr0, cr0);
 
-	if ((cr0 ^ old_cr0) & update_bits)
-		kvm_mmu_reset_context(vcpu);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_set_cr0);
 
-	if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
-	    kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
-	    !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
-		kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
+int kvm_track_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
+{
+	unsigned long old_cr0 = kvm_read_cr0(vcpu);
+
+	vcpu->arch.cr0 = cr0;
+
+	kvm_post_set_cr0(vcpu, old_cr0, cr0);
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(kvm_set_cr0);
+EXPORT_SYMBOL_GPL(kvm_track_cr0);
 
 void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
 {