diff mbox series

[2/3] KVM: SEV: only access GHCB fields once

Message ID 20230804173355.51753-3-pbonzini@redhat.com (mailing list archive)
State New, archived
Headers show
Series KVM: SEV: only access GHCB fields once | expand

Commit Message

Paolo Bonzini Aug. 4, 2023, 5:33 p.m. UTC
A KVM guest using SEV-ES or SEV-SNP with multiple vCPUs can trigger
a double fetch race condition vulnerability and invoke the VMGEXIT
handler recursively.

sev_handle_vmgexit() maps the GHCB page using kvm_vcpu_map() and then
fetches the exit code using ghcb_get_sw_exit_code().  Soon after,
sev_es_validate_vmgexit() fetches the exit code again. Since the GHCB
page is shared with the guest, the guest is able to quickly swap the
values with another vCPU and hence bypass the validation. One vmexit code
that can be rejected by sev_es_validate_vmgexit() is SVM_EXIT_VMGEXIT;
if sev_handle_vmgexit() observes it in the second fetch, the call
to svm_invoke_exit_handler() will invoke sev_handle_vmgexit() again
recursively.

To avoid the race, always fetch the GHCB data from the places where
sev_es_sync_from_ghcb stores it.

Exploiting recursions on linux kernel has been proven feasible
in the past, but the impact is mitigated by stack guard pages
(CONFIG_VMAP_STACK).  Still, if an attacker manages to call the handler
multiple times, they can theoretically trigger a stack overflow and
cause a denial-of-service, or potentially guest-to-host escape in kernel
configurations without stack guard pages.

Note that winning the race reliably in every iteration is very tricky
due to the very tight window of the fetches; depending on the compiler
settings, they are often consecutive because of optimization and inlining.

Tested by booting an SEV-ES RHEL9 guest.

Fixes: CVE-2023-4155
Fixes: 291bd20d5d88 ("KVM: SVM: Add initial support for a VMGEXIT VMEXIT")
Cc: stable@vger.kernel.org
Reported-by: Andy Nguyen <theflow@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/sev.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

Comments

Tom Lendacky Aug. 15, 2023, 3:51 p.m. UTC | #1
On 8/4/23 12:33, Paolo Bonzini wrote:
> A KVM guest using SEV-ES or SEV-SNP with multiple vCPUs can trigger
> a double fetch race condition vulnerability and invoke the VMGEXIT
> handler recursively.
> 
> sev_handle_vmgexit() maps the GHCB page using kvm_vcpu_map() and then
> fetches the exit code using ghcb_get_sw_exit_code().  Soon after,
> sev_es_validate_vmgexit() fetches the exit code again. Since the GHCB
> page is shared with the guest, the guest is able to quickly swap the
> values with another vCPU and hence bypass the validation. One vmexit code
> that can be rejected by sev_es_validate_vmgexit() is SVM_EXIT_VMGEXIT;
> if sev_handle_vmgexit() observes it in the second fetch, the call
> to svm_invoke_exit_handler() will invoke sev_handle_vmgexit() again
> recursively.
> 
> To avoid the race, always fetch the GHCB data from the places where
> sev_es_sync_from_ghcb stores it.
> 
> Exploiting recursions on linux kernel has been proven feasible
> in the past, but the impact is mitigated by stack guard pages
> (CONFIG_VMAP_STACK).  Still, if an attacker manages to call the handler
> multiple times, they can theoretically trigger a stack overflow and
> cause a denial-of-service, or potentially guest-to-host escape in kernel
> configurations without stack guard pages.
> 
> Note that winning the race reliably in every iteration is very tricky
> due to the very tight window of the fetches; depending on the compiler
> settings, they are often consecutive because of optimization and inlining.
> 
> Tested by booting an SEV-ES RHEL9 guest.
> 
> Fixes: CVE-2023-4155
> Fixes: 291bd20d5d88 ("KVM: SVM: Add initial support for a VMGEXIT VMEXIT")
> Cc: stable@vger.kernel.org
> Reported-by: Andy Nguyen <theflow@google.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

Just one very minor comment below, otherwise

Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>

> ---
>   arch/x86/kvm/svm/sev.c | 25 ++++++++++++++-----------
>   1 file changed, 14 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index e898f0b2b0ba..ca4ba5fe9a01 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -2445,9 +2445,15 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
>   	memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
>   }
>   
> +static u64 kvm_ghcb_get_sw_exit_code(struct vmcb_control_area *control)
> +{

Since ghcb is in the function name it might be nice to have a comment 
indicating that the actual GHCB value was copied to the VMCB fields as 
part of sev_es_sync_from_ghcb() and this is used to avoid reading from the 
GHCB after validation.

Thanks,
Tom

> +	return (((u64)control->exit_code_hi) << 32) | control->exit_code;
> +}
> +
>   static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
diff mbox series

Patch

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index e898f0b2b0ba..ca4ba5fe9a01 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -2445,9 +2445,15 @@  static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
 	memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
 }
 
+static u64 kvm_ghcb_get_sw_exit_code(struct vmcb_control_area *control)
+{
+	return (((u64)control->exit_code_hi) << 32) | control->exit_code;
+}
+
 static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
 {
-	struct kvm_vcpu *vcpu;
+	struct vmcb_control_area *control = &svm->vmcb->control;
+	struct kvm_vcpu *vcpu = &svm->vcpu;
 	struct ghcb *ghcb;
 	u64 exit_code;
 	u64 reason;
@@ -2458,7 +2464,7 @@  static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
 	 * Retrieve the exit code now even though it may not be marked valid
 	 * as it could help with debugging.
 	 */
-	exit_code = ghcb_get_sw_exit_code(ghcb);
+	exit_code = kvm_ghcb_get_sw_exit_code(control);
 
 	/* Only GHCB Usage code 0 is supported */
 	if (ghcb->ghcb_usage) {
@@ -2473,7 +2479,7 @@  static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
 	    !kvm_ghcb_sw_exit_info_2_is_valid(svm))
 		goto vmgexit_err;
 
-	switch (ghcb_get_sw_exit_code(ghcb)) {
+	switch (exit_code) {
 	case SVM_EXIT_READ_DR7:
 		break;
 	case SVM_EXIT_WRITE_DR7:
@@ -2490,18 +2496,18 @@  static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
 		if (!kvm_ghcb_rax_is_valid(svm) ||
 		    !kvm_ghcb_rcx_is_valid(svm))
 			goto vmgexit_err;
-		if (ghcb_get_rax(ghcb) == 0xd)
+		if (vcpu->arch.regs[VCPU_REGS_RAX] == 0xd)
 			if (!kvm_ghcb_xcr0_is_valid(svm))
 				goto vmgexit_err;
 		break;
 	case SVM_EXIT_INVD:
 		break;
 	case SVM_EXIT_IOIO:
-		if (ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_STR_MASK) {
+		if (control->exit_info_1 & SVM_IOIO_STR_MASK) {
 			if (!kvm_ghcb_sw_scratch_is_valid(svm))
 				goto vmgexit_err;
 		} else {
-			if (!(ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_TYPE_MASK))
+			if (!(control->exit_info_1 & SVM_IOIO_TYPE_MASK))
 				if (!kvm_ghcb_rax_is_valid(svm))
 					goto vmgexit_err;
 		}
@@ -2509,7 +2515,7 @@  static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
 	case SVM_EXIT_MSR:
 		if (!kvm_ghcb_rcx_is_valid(svm))
 			goto vmgexit_err;
-		if (ghcb_get_sw_exit_info_1(ghcb)) {
+		if (control->exit_info_1) {
 			if (!kvm_ghcb_rax_is_valid(svm) ||
 			    !kvm_ghcb_rdx_is_valid(svm))
 				goto vmgexit_err;
@@ -2553,8 +2559,6 @@  static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
 	return 0;
 
 vmgexit_err:
-	vcpu = &svm->vcpu;
-
 	if (reason == GHCB_ERR_INVALID_USAGE) {
 		vcpu_unimpl(vcpu, "vmgexit: ghcb usage %#x is not valid\n",
 			    ghcb->ghcb_usage);
@@ -2852,8 +2856,6 @@  int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
 
 	trace_kvm_vmgexit_enter(vcpu->vcpu_id, ghcb);
 
-	exit_code = ghcb_get_sw_exit_code(ghcb);
-
 	sev_es_sync_from_ghcb(svm);
 	ret = sev_es_validate_vmgexit(svm);
 	if (ret)
@@ -2862,6 +2864,7 @@  int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
 	ghcb_set_sw_exit_info_1(ghcb, 0);
 	ghcb_set_sw_exit_info_2(ghcb, 0);
 
+	exit_code = kvm_ghcb_get_sw_exit_code(control);
 	switch (exit_code) {
 	case SVM_VMGEXIT_MMIO_READ:
 		ret = setup_vmgexit_scratch(svm, true, control->exit_info_2);