diff mbox series

[v5,27/34] KVM: SVM: Add support for booting APs for an SEV-ES guest

Message ID 47d11ed1c1a48ab71858fc3cde766bf67a4612d1.1607620209.git.thomas.lendacky@amd.com (mailing list archive)
State New, archived
Headers show
Series SEV-ES hypervisor support | expand

Commit Message

Tom Lendacky Dec. 10, 2020, 5:10 p.m. UTC
From: Tom Lendacky <thomas.lendacky@amd.com>

Typically under KVM, an AP is booted using the INIT-SIPI-SIPI sequence,
where the guest vCPU register state is updated and then the vCPU is VMRUN
to begin execution of the AP. For an SEV-ES guest, this won't work because
the guest register state is encrypted.

Following the GHCB specification, the hypervisor must not alter the guest
register state, so KVM must track an AP/vCPU boot. Should the guest want
to park the AP, it must use the AP Reset Hold exit event in place of, for
example, a HLT loop.

First AP boot (first INIT-SIPI-SIPI sequence):
  Execute the AP (vCPU) as it was initialized and measured by the SEV-ES
  support. It is up to the guest to transfer control of the AP to the
  proper location.

Subsequent AP boot:
  KVM will expect to receive an AP Reset Hold exit event indicating that
  the vCPU is being parked and will require an INIT-SIPI-SIPI sequence to
  awaken it. When the AP Reset Hold exit event is received, KVM will place
  the vCPU into a simulated HLT mode. Upon receiving the INIT-SIPI-SIPI
  sequence, KVM will make the vCPU runnable. It is again up to the guest
  to then transfer control of the AP to the proper location.

The GHCB specification also requires the hypervisor to save the address of
an AP Jump Table so that, for example, vCPUs that have been parked by UEFI
can be started by the OS. Provide support for the AP Jump Table set/get
exit code.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/svm/sev.c          | 50 +++++++++++++++++++++++++++++++++
 arch/x86/kvm/svm/svm.c          |  7 +++++
 arch/x86/kvm/svm/svm.h          |  3 ++
 arch/x86/kvm/x86.c              |  9 ++++++
 5 files changed, 71 insertions(+)

Comments

Paolo Bonzini Dec. 14, 2020, 4:03 p.m. UTC | #1
On 10/12/20 18:10, Tom Lendacky wrote:
> From: Tom Lendacky <thomas.lendacky@amd.com>
> 
> Typically under KVM, an AP is booted using the INIT-SIPI-SIPI sequence,
> where the guest vCPU register state is updated and then the vCPU is VMRUN
> to begin execution of the AP. For an SEV-ES guest, this won't work because
> the guest register state is encrypted.
> 
> Following the GHCB specification, the hypervisor must not alter the guest
> register state, so KVM must track an AP/vCPU boot. Should the guest want
> to park the AP, it must use the AP Reset Hold exit event in place of, for
> example, a HLT loop.
> 
> First AP boot (first INIT-SIPI-SIPI sequence):
>    Execute the AP (vCPU) as it was initialized and measured by the SEV-ES
>    support. It is up to the guest to transfer control of the AP to the
>    proper location.
> 
> Subsequent AP boot:
>    KVM will expect to receive an AP Reset Hold exit event indicating that
>    the vCPU is being parked and will require an INIT-SIPI-SIPI sequence to
>    awaken it. When the AP Reset Hold exit event is received, KVM will place
>    the vCPU into a simulated HLT mode. Upon receiving the INIT-SIPI-SIPI
>    sequence, KVM will make the vCPU runnable. It is again up to the guest
>    to then transfer control of the AP to the proper location.
> 
> The GHCB specification also requires the hypervisor to save the address of
> an AP Jump Table so that, for example, vCPUs that have been parked by UEFI
> can be started by the OS. Provide support for the AP Jump Table set/get
> exit code.
> 
> Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
> ---
>   arch/x86/include/asm/kvm_host.h |  2 ++
>   arch/x86/kvm/svm/sev.c          | 50 +++++++++++++++++++++++++++++++++
>   arch/x86/kvm/svm/svm.c          |  7 +++++
>   arch/x86/kvm/svm/svm.h          |  3 ++
>   arch/x86/kvm/x86.c              |  9 ++++++
>   5 files changed, 71 insertions(+)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 048b08437c33..60a3b9d33407 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1286,6 +1286,8 @@ struct kvm_x86_ops {
>   
>   	void (*migrate_timers)(struct kvm_vcpu *vcpu);
>   	void (*msr_filter_changed)(struct kvm_vcpu *vcpu);
> +
> +	void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector);
>   };
>   
>   struct kvm_x86_nested_ops {
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index a7531de760b5..b47285384b1f 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -17,6 +17,8 @@
>   #include <linux/processor.h>
>   #include <linux/trace_events.h>
>   
> +#include <asm/trapnr.h>
> +
>   #include "x86.h"
>   #include "svm.h"
>   #include "cpuid.h"
> @@ -1449,6 +1451,8 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
>   		if (!ghcb_sw_scratch_is_valid(ghcb))
>   			goto vmgexit_err;
>   		break;
> +	case SVM_VMGEXIT_AP_HLT_LOOP:
> +	case SVM_VMGEXIT_AP_JUMP_TABLE:
>   	case SVM_VMGEXIT_UNSUPPORTED_EVENT:
>   		break;
>   	default:
> @@ -1770,6 +1774,35 @@ int sev_handle_vmgexit(struct vcpu_svm *svm)
>   					    control->exit_info_2,
>   					    svm->ghcb_sa);
>   		break;
> +	case SVM_VMGEXIT_AP_HLT_LOOP:
> +		svm->ap_hlt_loop = true;

This value needs to be communicated to userspace.  Let's get this right 
from the beginning and use a new KVM_MP_STATE_* value instead (perhaps 
reuse KVM_MP_STATE_STOPPED but for x86 #define it as 
KVM_MP_STATE_AP_HOLD_RECEIVED?).

> @@ -68,6 +68,7 @@ struct kvm_sev_info {
>  	int fd;			/* SEV device fd */
>  	unsigned long pages_locked; /* Number of pages locked */
>  	struct list_head regions_list;  /* List of registered regions */
> +	u64 ap_jump_table;	/* SEV-ES AP Jump Table address */

Do you have any plans for migration of this value?  How does the guest 
ensure that the hypervisor does not screw with it?

Paolo

> +		ret = kvm_emulate_halt(&svm->vcpu);
> +		break;
> +	case SVM_VMGEXIT_AP_JUMP_TABLE: {
> +		struct kvm_sev_info *sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info;
> +
> +		switch (control->exit_info_1) {
> +		case 0:
> +			/* Set AP jump table address */
> +			sev->ap_jump_table = control->exit_info_2;
> +			break;
> +		case 1:
> +			/* Get AP jump table address */
> +			ghcb_set_sw_exit_info_2(ghcb, sev->ap_jump_table);
> +			break;
> +		default:
> +			pr_err("svm: vmgexit: unsupported AP jump table request - exit_info_1=%#llx\n",
> +			       control->exit_info_1);
> +			ghcb_set_sw_exit_info_1(ghcb, 1);
> +			ghcb_set_sw_exit_info_2(ghcb,
> +						X86_TRAP_UD |
> +						SVM_EVTINJ_TYPE_EXEPT |
> +						SVM_EVTINJ_VALID);
> +		}
> +
> +		ret = 1;
> +		break;
> +	}
>   	case SVM_VMGEXIT_UNSUPPORTED_EVENT:
>   		vcpu_unimpl(&svm->vcpu,
>   			    "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n",
> @@ -1790,3 +1823,20 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
>   	return kvm_sev_es_string_io(&svm->vcpu, size, port,
>   				    svm->ghcb_sa, svm->ghcb_sa_len, in);
>   }
> +
> +void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
> +{
> +	struct vcpu_svm *svm = to_svm(vcpu);
> +
> +	/* First SIPI: Use the values as initially set by the VMM */
> +	if (!svm->ap_hlt_loop)
> +		return;
> +
> +	/*
> +	 * Subsequent SIPI: Return from an AP Reset Hold VMGEXIT, where
> +	 * the guest will set the CS and RIP. Set SW_EXIT_INFO_2 to a
> +	 * non-zero value.
> +	 */
> +	ghcb_set_sw_exit_info_2(svm->ghcb, 1);
> +	svm->ap_hlt_loop = false;
> +}
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 8d22ae25a0f8..2dbc20701ef5 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -4400,6 +4400,11 @@ static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
>   		   (vmcb_is_intercept(&svm->vmcb->control, INTERCEPT_INIT));
>   }
>   
> +static void svm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
> +{
> +	sev_vcpu_deliver_sipi_vector(vcpu, vector);
> +}
> +
>   static void svm_vm_destroy(struct kvm *kvm)
>   {
>   	avic_vm_destroy(kvm);
> @@ -4541,6 +4546,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
>   	.apic_init_signal_blocked = svm_apic_init_signal_blocked,
>   
>   	.msr_filter_changed = svm_msr_filter_changed,
> +
> +	.vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector,
>   };
>   
>   static struct kvm_x86_init_ops svm_init_ops __initdata = {
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index b3f03dede6ac..5d570d5a6a2c 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -68,6 +68,7 @@ struct kvm_sev_info {
>   	int fd;			/* SEV device fd */
>   	unsigned long pages_locked; /* Number of pages locked */
>   	struct list_head regions_list;  /* List of registered regions */
> +	u64 ap_jump_table;	/* SEV-ES AP Jump Table address */
>   };
>   
>   struct kvm_svm {
> @@ -174,6 +175,7 @@ struct vcpu_svm {
>   	struct vmcb_save_area *vmsa;
>   	struct ghcb *ghcb;
>   	struct kvm_host_map ghcb_map;
> +	bool ap_hlt_loop;
>   
>   	/* SEV-ES scratch area support */
>   	void *ghcb_sa;
> @@ -574,5 +576,6 @@ void sev_hardware_teardown(void);
>   void sev_free_vcpu(struct kvm_vcpu *vcpu);
>   int sev_handle_vmgexit(struct vcpu_svm *svm);
>   int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in);
> +void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
>   
>   #endif
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index ddd614a76744..4fd216b61a89 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -10144,6 +10144,15 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
>   {
>   	struct kvm_segment cs;
>   
> +	/*
> +	 * Guests with protected state can't have their state altered by KVM,
> +	 * call the vcpu_deliver_sipi_vector() x86 op for processing.
> +	 */
> +	if (vcpu->arch.guest_state_protected) {
> +		kvm_x86_ops.vcpu_deliver_sipi_vector(vcpu, vector);
> +		return;
> +	}
> +
>   	kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
>   	cs.selector = vector << 8;
>   	cs.base = vector << 12;
>
Paolo Bonzini Dec. 14, 2020, 4:05 p.m. UTC | #2
On 10/12/20 18:10, Tom Lendacky wrote:
> @@ -10144,6 +10144,15 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
>   {
>   	struct kvm_segment cs;
>   
> +	/*
> +	 * Guests with protected state can't have their state altered by KVM,
> +	 * call the vcpu_deliver_sipi_vector() x86 op for processing.
> +	 */
> +	if (vcpu->arch.guest_state_protected) {
> +		kvm_x86_ops.vcpu_deliver_sipi_vector(vcpu, vector);
> +		return;
> +	}
> +

Also, I don't mind that you just call 
kvm_x86_ops.vcpu_deliver_sipi_vector from lapic.c, and make VMX just do

	.vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,

(SVM would do it if !guest_state_protected).  This matches more or less 
how I redid the MSR part.

Paolo
Tom Lendacky Dec. 14, 2020, 7:46 p.m. UTC | #3
On 12/14/20 10:03 AM, Paolo Bonzini wrote:
> On 10/12/20 18:10, Tom Lendacky wrote:
>> From: Tom Lendacky <thomas.lendacky@amd.com>
>>
>> Typically under KVM, an AP is booted using the INIT-SIPI-SIPI sequence,
>> where the guest vCPU register state is updated and then the vCPU is VMRUN
>> to begin execution of the AP. For an SEV-ES guest, this won't work because
>> the guest register state is encrypted.
>>
>> Following the GHCB specification, the hypervisor must not alter the guest
>> register state, so KVM must track an AP/vCPU boot. Should the guest want
>> to park the AP, it must use the AP Reset Hold exit event in place of, for
>> example, a HLT loop.
>>
>> First AP boot (first INIT-SIPI-SIPI sequence):
>>    Execute the AP (vCPU) as it was initialized and measured by the SEV-ES
>>    support. It is up to the guest to transfer control of the AP to the
>>    proper location.
>>
>> Subsequent AP boot:
>>    KVM will expect to receive an AP Reset Hold exit event indicating that
>>    the vCPU is being parked and will require an INIT-SIPI-SIPI sequence to
>>    awaken it. When the AP Reset Hold exit event is received, KVM will place
>>    the vCPU into a simulated HLT mode. Upon receiving the INIT-SIPI-SIPI
>>    sequence, KVM will make the vCPU runnable. It is again up to the guest
>>    to then transfer control of the AP to the proper location.
>>
>> The GHCB specification also requires the hypervisor to save the address of
>> an AP Jump Table so that, for example, vCPUs that have been parked by UEFI
>> can be started by the OS. Provide support for the AP Jump Table set/get
>> exit code.
>>
>> Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
>> ---
>>   arch/x86/include/asm/kvm_host.h |  2 ++
>>   arch/x86/kvm/svm/sev.c          | 50 +++++++++++++++++++++++++++++++++
>>   arch/x86/kvm/svm/svm.c          |  7 +++++
>>   arch/x86/kvm/svm/svm.h          |  3 ++
>>   arch/x86/kvm/x86.c              |  9 ++++++
>>   5 files changed, 71 insertions(+)
>>
>> diff --git a/arch/x86/include/asm/kvm_host.h
>> b/arch/x86/include/asm/kvm_host.h
>> index 048b08437c33..60a3b9d33407 100644
>> --- a/arch/x86/include/asm/kvm_host.h
>> +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -1286,6 +1286,8 @@ struct kvm_x86_ops {
>>         void (*migrate_timers)(struct kvm_vcpu *vcpu);
>>       void (*msr_filter_changed)(struct kvm_vcpu *vcpu);
>> +
>> +    void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector);
>>   };
>>     struct kvm_x86_nested_ops {
>> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
>> index a7531de760b5..b47285384b1f 100644
>> --- a/arch/x86/kvm/svm/sev.c
>> +++ b/arch/x86/kvm/svm/sev.c
>> @@ -17,6 +17,8 @@
>>   #include <linux/processor.h>
>>   #include <linux/trace_events.h>
>>   +#include <asm/trapnr.h>
>> +
>>   #include "x86.h"
>>   #include "svm.h"
>>   #include "cpuid.h"
>> @@ -1449,6 +1451,8 @@ static int sev_es_validate_vmgexit(struct vcpu_svm
>> *svm)
>>           if (!ghcb_sw_scratch_is_valid(ghcb))
>>               goto vmgexit_err;
>>           break;
>> +    case SVM_VMGEXIT_AP_HLT_LOOP:
>> +    case SVM_VMGEXIT_AP_JUMP_TABLE:
>>       case SVM_VMGEXIT_UNSUPPORTED_EVENT:
>>           break;
>>       default:
>> @@ -1770,6 +1774,35 @@ int sev_handle_vmgexit(struct vcpu_svm *svm)
>>                           control->exit_info_2,
>>                           svm->ghcb_sa);
>>           break;
>> +    case SVM_VMGEXIT_AP_HLT_LOOP:
>> +        svm->ap_hlt_loop = true;
> 
> This value needs to be communicated to userspace.  Let's get this right
> from the beginning and use a new KVM_MP_STATE_* value instead (perhaps
> reuse KVM_MP_STATE_STOPPED but for x86 #define it as
> KVM_MP_STATE_AP_HOLD_RECEIVED?).

Ok, let me look into this.

> 
>> @@ -68,6 +68,7 @@ struct kvm_sev_info {
>>      int fd;            /* SEV device fd */
>>      unsigned long pages_locked; /* Number of pages locked */
>>      struct list_head regions_list;  /* List of registered regions */
>> +    u64 ap_jump_table;    /* SEV-ES AP Jump Table address */
> 
> Do you have any plans for migration of this value?  How does the guest
> ensure that the hypervisor does not screw with it?

I'll be sure that this is part of the SEV-ES live migration support.

For SEV-ES, we can't guarantee that the hypervisor doesn't screw with it.
This is something that SEV-SNP will be able to address.

Thanks,
Tom

> 
> Paolo
>
Tom Lendacky Dec. 15, 2020, 8:25 p.m. UTC | #4
On 12/14/20 1:46 PM, Tom Lendacky wrote:
> On 12/14/20 10:03 AM, Paolo Bonzini wrote:
>> On 10/12/20 18:10, Tom Lendacky wrote:
>>> From: Tom Lendacky <thomas.lendacky@amd.com>
>>>
>>> +    case SVM_VMGEXIT_AP_HLT_LOOP:
>>> +        svm->ap_hlt_loop = true;
>>
>> This value needs to be communicated to userspace.  Let's get this right
>> from the beginning and use a new KVM_MP_STATE_* value instead (perhaps
>> reuse KVM_MP_STATE_STOPPED but for x86 #define it as
>> KVM_MP_STATE_AP_HOLD_RECEIVED?).
> 
> Ok, let me look into this.

Paolo, is this something along the lines of what you were thinking, or am
I off base? I created kvm_emulate_ap_reset_hold() to keep the code
consolidated and remove the duplication, but can easily make those changes
local to sev.c. I'd also like to rename SVM_VMGEXIT_AP_HLT_LOOP to
SVM_VMGEXIT_AP_RESET_HOLD to more closely match the GHBC document, but
that can be done later (if possible, since it is already part of the uapi
include file).

Thanks,
Tom

---
KVM: SVM: Add support for booting APs for an SEV-ES guest

From: Tom Lendacky <thomas.lendacky@amd.com>

Typically under KVM, an AP is booted using the INIT-SIPI-SIPI sequence,
where the guest vCPU register state is updated and then the vCPU is VMRUN
to begin execution of the AP. For an SEV-ES guest, this won't work because
the guest register state is encrypted.

Following the GHCB specification, the hypervisor must not alter the guest
register state, so KVM must track an AP/vCPU boot. Should the guest want
to park the AP, it must use the AP Reset Hold exit event in place of, for
example, a HLT loop.

First AP boot (first INIT-SIPI-SIPI sequence):
  Execute the AP (vCPU) as it was initialized and measured by the SEV-ES
  support. It is up to the guest to transfer control of the AP to the
  proper location.

Subsequent AP boot:
  KVM will expect to receive an AP Reset Hold exit event indicating that
  the vCPU is being parked and will require an INIT-SIPI-SIPI sequence to
  awaken it. When the AP Reset Hold exit event is received, KVM will place
  the vCPU into a simulated HLT mode. Upon receiving the INIT-SIPI-SIPI
  sequence, KVM will make the vCPU runnable. It is again up to the guest
  to then transfer control of the AP to the proper location.

  To differentiate between an actual HLT and an AP Reset Hold, a new MP
  state is introduced, KVM_MP_STATE_AP_RESET_HOLD, which the vCPU is
  placed in upon receiving the AP Reset Hold exit event. Additionally, to
  communicate the AP Reset Hold exit event up to userspace (if needed), a
  new exit reason is introduced, KVM_EXIT_AP_RESET_HOLD.

A new x86 ops function is introduced, vcpu_deliver_sipi_vector, in order
to accomplish AP booting. For VMX, vcpu_deliver_sipi_vector is set to the
original SIPI delivery function, kvm_vcpu_deliver_sipi_vector(). SVM adds
a new function that, for non SEV-ES guests, invokes the original SIPI
delivery function, kvm_vcpu_deliver_sipi_vector(), but for SEV-ES guests,
implements the logic above.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 arch/x86/include/asm/kvm_host.h |    3 +++
 arch/x86/kvm/lapic.c            |    2 +-
 arch/x86/kvm/svm/sev.c          |   22 ++++++++++++++++++++++
 arch/x86/kvm/svm/svm.c          |   10 ++++++++++
 arch/x86/kvm/svm/svm.h          |    2 ++
 arch/x86/kvm/vmx/vmx.c          |    2 ++
 arch/x86/kvm/x86.c              |   20 +++++++++++++++++---
 include/uapi/linux/kvm.h        |    2 ++
 8 files changed, 59 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 39707e72b062..23d7b203c060 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1287,6 +1287,8 @@ struct kvm_x86_ops {
 	void (*migrate_timers)(struct kvm_vcpu *vcpu);
 	void (*msr_filter_changed)(struct kvm_vcpu *vcpu);
 	int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err);
+
+	void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector);
 };
 
 struct kvm_x86_nested_ops {
@@ -1468,6 +1470,7 @@ int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in);
 int kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
 int kvm_emulate_halt(struct kvm_vcpu *vcpu);
 int kvm_vcpu_halt(struct kvm_vcpu *vcpu);
+int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu);
 int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
 
 void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 6a87623aa578..a2f08ed777d8 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2898,7 +2898,7 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
 			/* evaluate pending_events before reading the vector */
 			smp_rmb();
 			sipi_vector = apic->sipi_vector;
-			kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector);
+			kvm_x86_ops.vcpu_deliver_sipi_vector(vcpu, sipi_vector);
 			vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 		}
 	}
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 8b5ef0fe4490..4045de7f8f8b 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -1561,6 +1561,7 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
 			goto vmgexit_err;
 		break;
 	case SVM_VMGEXIT_NMI_COMPLETE:
+	case SVM_VMGEXIT_AP_HLT_LOOP:
 	case SVM_VMGEXIT_AP_JUMP_TABLE:
 	case SVM_VMGEXIT_UNSUPPORTED_EVENT:
 		break;
@@ -1886,6 +1887,9 @@ int sev_handle_vmgexit(struct vcpu_svm *svm)
 	case SVM_VMGEXIT_NMI_COMPLETE:
 		ret = svm_invoke_exit_handler(svm, SVM_EXIT_IRET);
 		break;
+	case SVM_VMGEXIT_AP_HLT_LOOP:
+		ret = kvm_emulate_ap_reset_hold(&svm->vcpu);
+		break;
 	case SVM_VMGEXIT_AP_JUMP_TABLE: {
 		struct kvm_sev_info *sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info;
 
@@ -2038,3 +2042,21 @@ void sev_es_vcpu_put(struct vcpu_svm *svm)
 		wrmsrl(host_save_user_msrs[i].index, svm->host_user_msrs[i]);
 	}
 }
+
+void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	/* First SIPI: Use the values as initially set by the VMM */
+	if (!svm->received_first_sipi) {
+		svm->received_first_sipi = true;
+		return;
+	}
+
+	/*
+	 * Subsequent SIPI: Return from an AP Reset Hold VMGEXIT, where
+	 * the guest will set the CS and RIP. Set SW_EXIT_INFO_2 to a
+	 * non-zero value.
+	 */
+	ghcb_set_sw_exit_info_2(svm->ghcb, 1);
+}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 941e5251e13f..5c37fa68ee56 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4382,6 +4382,14 @@ static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
 		   (vmcb_is_intercept(&svm->vmcb->control, INTERCEPT_INIT));
 }
 
+static void svm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
+{
+	if (!sev_es_guest(vcpu->kvm))
+		return kvm_vcpu_deliver_sipi_vector(vcpu, vector);
+
+	sev_vcpu_deliver_sipi_vector(vcpu, vector);
+}
+
 static void svm_vm_destroy(struct kvm *kvm)
 {
 	avic_vm_destroy(kvm);
@@ -4524,6 +4532,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
 
 	.msr_filter_changed = svm_msr_filter_changed,
 	.complete_emulated_msr = svm_complete_emulated_msr,
+
+	.vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector,
 };
 
 static struct kvm_x86_init_ops svm_init_ops __initdata = {
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 5431e6335e2e..0fe874ae5498 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -185,6 +185,7 @@ struct vcpu_svm {
 	struct vmcb_save_area *vmsa;
 	struct ghcb *ghcb;
 	struct kvm_host_map ghcb_map;
+	bool received_first_sipi;
 
 	/* SEV-ES scratch area support */
 	void *ghcb_sa;
@@ -591,6 +592,7 @@ void sev_es_init_vmcb(struct vcpu_svm *svm);
 void sev_es_create_vcpu(struct vcpu_svm *svm);
 void sev_es_vcpu_load(struct vcpu_svm *svm, int cpu);
 void sev_es_vcpu_put(struct vcpu_svm *svm);
+void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
 
 /* vmenter.S */
 
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 75c9c6a0a3a4..2af05d3b0590 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7707,6 +7707,8 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
 	.msr_filter_changed = vmx_msr_filter_changed,
 	.complete_emulated_msr = kvm_complete_insn_gp,
 	.cpu_dirty_log_size = vmx_cpu_dirty_log_size,
+
+	.vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
 };
 
 static __init int hardware_setup(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 648c677b12e9..622612f88da7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7974,17 +7974,22 @@ void kvm_arch_exit(void)
 	kmem_cache_destroy(x86_fpu_cache);
 }
 
-int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
+int __kvm_vcpu_halt(struct kvm_vcpu *vcpu, int state, int reason)
 {
 	++vcpu->stat.halt_exits;
 	if (lapic_in_kernel(vcpu)) {
-		vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
+		vcpu->arch.mp_state = state;
 		return 1;
 	} else {
-		vcpu->run->exit_reason = KVM_EXIT_HLT;
+		vcpu->run->exit_reason = reason;
 		return 0;
 	}
 }
+
+int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
+{
+	return __kvm_vcpu_halt(vcpu, KVM_MP_STATE_HALTED, KVM_EXIT_HLT);
+}
 EXPORT_SYMBOL_GPL(kvm_vcpu_halt);
 
 int kvm_emulate_halt(struct kvm_vcpu *vcpu)
@@ -7998,6 +8003,14 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_halt);
 
+int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu)
+{
+	int ret = kvm_skip_emulated_instruction(vcpu);
+
+	return __kvm_vcpu_halt(vcpu, KVM_MP_STATE_AP_RESET_HOLD, KVM_EXIT_AP_RESET_HOLD) && ret;
+}
+EXPORT_SYMBOL_GPL(kvm_emulate_ap_reset_hold);
+
 #ifdef CONFIG_X86_64
 static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
 			        unsigned long clock_type)
@@ -10150,6 +10163,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
 	kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
 	kvm_rip_write(vcpu, 0);
 }
+EXPORT_SYMBOL_GPL(kvm_vcpu_deliver_sipi_vector);
 
 int kvm_arch_hardware_enable(void)
 {
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 886802b8ffba..374c67875cdb 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -251,6 +251,7 @@ struct kvm_hyperv_exit {
 #define KVM_EXIT_X86_RDMSR        29
 #define KVM_EXIT_X86_WRMSR        30
 #define KVM_EXIT_DIRTY_RING_FULL  31
+#define KVM_EXIT_AP_RESET_HOLD    32
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
@@ -573,6 +574,7 @@ struct kvm_vapic_addr {
 #define KVM_MP_STATE_CHECK_STOP        6
 #define KVM_MP_STATE_OPERATING         7
 #define KVM_MP_STATE_LOAD              8
+#define KVM_MP_STATE_AP_RESET_HOLD     9
 
 struct kvm_mp_state {
 	__u32 mp_state;
Tom Lendacky Jan. 4, 2021, 5:38 p.m. UTC | #5
On 12/15/20 2:25 PM, Tom Lendacky wrote:
> On 12/14/20 1:46 PM, Tom Lendacky wrote:
>> On 12/14/20 10:03 AM, Paolo Bonzini wrote:
>>> On 10/12/20 18:10, Tom Lendacky wrote:
>>>> From: Tom Lendacky <thomas.lendacky@amd.com>
>>>>
>>>> +    case SVM_VMGEXIT_AP_HLT_LOOP:
>>>> +        svm->ap_hlt_loop = true;
>>>
>>> This value needs to be communicated to userspace.  Let's get this right
>>> from the beginning and use a new KVM_MP_STATE_* value instead (perhaps
>>> reuse KVM_MP_STATE_STOPPED but for x86 #define it as
>>> KVM_MP_STATE_AP_HOLD_RECEIVED?).
>>
>> Ok, let me look into this.
> 
> Paolo, is this something along the lines of what you were thinking, or am
> I off base? I created kvm_emulate_ap_reset_hold() to keep the code
> consolidated and remove the duplication, but can easily make those changes
> local to sev.c. I'd also like to rename SVM_VMGEXIT_AP_HLT_LOOP to
> SVM_VMGEXIT_AP_RESET_HOLD to more closely match the GHBC document, but
> that can be done later (if possible, since it is already part of the uapi
> include file).

Paolo, a quick ping after the holidays as to whether this is the approach 
you were thinking. I think there are a couple of places in x86.c to update 
(vcpu_block() and kvm_arch_vcpu_ioctl_get_mpstate()), also.

Thanks,
Tom

> 
> Thanks,
> Tom
> 
> ---
> KVM: SVM: Add support for booting APs for an SEV-ES guest
> 
> From: Tom Lendacky <thomas.lendacky@amd.com>
> 
> Typically under KVM, an AP is booted using the INIT-SIPI-SIPI sequence,
> where the guest vCPU register state is updated and then the vCPU is VMRUN
> to begin execution of the AP. For an SEV-ES guest, this won't work because
> the guest register state is encrypted.
> 
> Following the GHCB specification, the hypervisor must not alter the guest
> register state, so KVM must track an AP/vCPU boot. Should the guest want
> to park the AP, it must use the AP Reset Hold exit event in place of, for
> example, a HLT loop.
> 
> First AP boot (first INIT-SIPI-SIPI sequence):
>    Execute the AP (vCPU) as it was initialized and measured by the SEV-ES
>    support. It is up to the guest to transfer control of the AP to the
>    proper location.
> 
> Subsequent AP boot:
>    KVM will expect to receive an AP Reset Hold exit event indicating that
>    the vCPU is being parked and will require an INIT-SIPI-SIPI sequence to
>    awaken it. When the AP Reset Hold exit event is received, KVM will place
>    the vCPU into a simulated HLT mode. Upon receiving the INIT-SIPI-SIPI
>    sequence, KVM will make the vCPU runnable. It is again up to the guest
>    to then transfer control of the AP to the proper location.
> 
>    To differentiate between an actual HLT and an AP Reset Hold, a new MP
>    state is introduced, KVM_MP_STATE_AP_RESET_HOLD, which the vCPU is
>    placed in upon receiving the AP Reset Hold exit event. Additionally, to
>    communicate the AP Reset Hold exit event up to userspace (if needed), a
>    new exit reason is introduced, KVM_EXIT_AP_RESET_HOLD.
> 
> A new x86 ops function is introduced, vcpu_deliver_sipi_vector, in order
> to accomplish AP booting. For VMX, vcpu_deliver_sipi_vector is set to the
> original SIPI delivery function, kvm_vcpu_deliver_sipi_vector(). SVM adds
> a new function that, for non SEV-ES guests, invokes the original SIPI
> delivery function, kvm_vcpu_deliver_sipi_vector(), but for SEV-ES guests,
> implements the logic above.
> 
> Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
> ---
>   arch/x86/include/asm/kvm_host.h |    3 +++
>   arch/x86/kvm/lapic.c            |    2 +-
>   arch/x86/kvm/svm/sev.c          |   22 ++++++++++++++++++++++
>   arch/x86/kvm/svm/svm.c          |   10 ++++++++++
>   arch/x86/kvm/svm/svm.h          |    2 ++
>   arch/x86/kvm/vmx/vmx.c          |    2 ++
>   arch/x86/kvm/x86.c              |   20 +++++++++++++++++---
>   include/uapi/linux/kvm.h        |    2 ++
>   8 files changed, 59 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 39707e72b062..23d7b203c060 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1287,6 +1287,8 @@ struct kvm_x86_ops {
>   	void (*migrate_timers)(struct kvm_vcpu *vcpu);
>   	void (*msr_filter_changed)(struct kvm_vcpu *vcpu);
>   	int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err);
> +
> +	void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector);
>   };
>   
>   struct kvm_x86_nested_ops {
> @@ -1468,6 +1470,7 @@ int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in);
>   int kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
>   int kvm_emulate_halt(struct kvm_vcpu *vcpu);
>   int kvm_vcpu_halt(struct kvm_vcpu *vcpu);
> +int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu);
>   int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
>   
>   void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 6a87623aa578..a2f08ed777d8 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -2898,7 +2898,7 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
>   			/* evaluate pending_events before reading the vector */
>   			smp_rmb();
>   			sipi_vector = apic->sipi_vector;
> -			kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector);
> +			kvm_x86_ops.vcpu_deliver_sipi_vector(vcpu, sipi_vector);
>   			vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
>   		}
>   	}
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index 8b5ef0fe4490..4045de7f8f8b 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -1561,6 +1561,7 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
>   			goto vmgexit_err;
>   		break;
>   	case SVM_VMGEXIT_NMI_COMPLETE:
> +	case SVM_VMGEXIT_AP_HLT_LOOP:
>   	case SVM_VMGEXIT_AP_JUMP_TABLE:
>   	case SVM_VMGEXIT_UNSUPPORTED_EVENT:
>   		break;
> @@ -1886,6 +1887,9 @@ int sev_handle_vmgexit(struct vcpu_svm *svm)
>   	case SVM_VMGEXIT_NMI_COMPLETE:
>   		ret = svm_invoke_exit_handler(svm, SVM_EXIT_IRET);
>   		break;
> +	case SVM_VMGEXIT_AP_HLT_LOOP:
> +		ret = kvm_emulate_ap_reset_hold(&svm->vcpu);
> +		break;
>   	case SVM_VMGEXIT_AP_JUMP_TABLE: {
>   		struct kvm_sev_info *sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info;
>   
> @@ -2038,3 +2042,21 @@ void sev_es_vcpu_put(struct vcpu_svm *svm)
>   		wrmsrl(host_save_user_msrs[i].index, svm->host_user_msrs[i]);
>   	}
>   }
> +
> +void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
> +{
> +	struct vcpu_svm *svm = to_svm(vcpu);
> +
> +	/* First SIPI: Use the values as initially set by the VMM */
> +	if (!svm->received_first_sipi) {
> +		svm->received_first_sipi = true;
> +		return;
> +	}
> +
> +	/*
> +	 * Subsequent SIPI: Return from an AP Reset Hold VMGEXIT, where
> +	 * the guest will set the CS and RIP. Set SW_EXIT_INFO_2 to a
> +	 * non-zero value.
> +	 */
> +	ghcb_set_sw_exit_info_2(svm->ghcb, 1);
> +}
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 941e5251e13f..5c37fa68ee56 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -4382,6 +4382,14 @@ static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
>   		   (vmcb_is_intercept(&svm->vmcb->control, INTERCEPT_INIT));
>   }
>   
> +static void svm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
> +{
> +	if (!sev_es_guest(vcpu->kvm))
> +		return kvm_vcpu_deliver_sipi_vector(vcpu, vector);
> +
> +	sev_vcpu_deliver_sipi_vector(vcpu, vector);
> +}
> +
>   static void svm_vm_destroy(struct kvm *kvm)
>   {
>   	avic_vm_destroy(kvm);
> @@ -4524,6 +4532,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
>   
>   	.msr_filter_changed = svm_msr_filter_changed,
>   	.complete_emulated_msr = svm_complete_emulated_msr,
> +
> +	.vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector,
>   };
>   
>   static struct kvm_x86_init_ops svm_init_ops __initdata = {
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index 5431e6335e2e..0fe874ae5498 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -185,6 +185,7 @@ struct vcpu_svm {
>   	struct vmcb_save_area *vmsa;
>   	struct ghcb *ghcb;
>   	struct kvm_host_map ghcb_map;
> +	bool received_first_sipi;
>   
>   	/* SEV-ES scratch area support */
>   	void *ghcb_sa;
> @@ -591,6 +592,7 @@ void sev_es_init_vmcb(struct vcpu_svm *svm);
>   void sev_es_create_vcpu(struct vcpu_svm *svm);
>   void sev_es_vcpu_load(struct vcpu_svm *svm, int cpu);
>   void sev_es_vcpu_put(struct vcpu_svm *svm);
> +void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
>   
>   /* vmenter.S */
>   
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 75c9c6a0a3a4..2af05d3b0590 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -7707,6 +7707,8 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
>   	.msr_filter_changed = vmx_msr_filter_changed,
>   	.complete_emulated_msr = kvm_complete_insn_gp,
>   	.cpu_dirty_log_size = vmx_cpu_dirty_log_size,
> +
> +	.vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
>   };
>   
>   static __init int hardware_setup(void)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 648c677b12e9..622612f88da7 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -7974,17 +7974,22 @@ void kvm_arch_exit(void)
>   	kmem_cache_destroy(x86_fpu_cache);
>   }
>   
> -int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
> +int __kvm_vcpu_halt(struct kvm_vcpu *vcpu, int state, int reason)
>   {
>   	++vcpu->stat.halt_exits;
>   	if (lapic_in_kernel(vcpu)) {
> -		vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
> +		vcpu->arch.mp_state = state;
>   		return 1;
>   	} else {
> -		vcpu->run->exit_reason = KVM_EXIT_HLT;
> +		vcpu->run->exit_reason = reason;
>   		return 0;
>   	}
>   }
> +
> +int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
> +{
> +	return __kvm_vcpu_halt(vcpu, KVM_MP_STATE_HALTED, KVM_EXIT_HLT);
> +}
>   EXPORT_SYMBOL_GPL(kvm_vcpu_halt);
>   
>   int kvm_emulate_halt(struct kvm_vcpu *vcpu)
> @@ -7998,6 +8003,14 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
>   }
>   EXPORT_SYMBOL_GPL(kvm_emulate_halt);
>   
> +int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu)
> +{
> +	int ret = kvm_skip_emulated_instruction(vcpu);
> +
> +	return __kvm_vcpu_halt(vcpu, KVM_MP_STATE_AP_RESET_HOLD, KVM_EXIT_AP_RESET_HOLD) && ret;
> +}
> +EXPORT_SYMBOL_GPL(kvm_emulate_ap_reset_hold);
> +
>   #ifdef CONFIG_X86_64
>   static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
>   			        unsigned long clock_type)
> @@ -10150,6 +10163,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
>   	kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
>   	kvm_rip_write(vcpu, 0);
>   }
> +EXPORT_SYMBOL_GPL(kvm_vcpu_deliver_sipi_vector);
>   
>   int kvm_arch_hardware_enable(void)
>   {
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 886802b8ffba..374c67875cdb 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -251,6 +251,7 @@ struct kvm_hyperv_exit {
>   #define KVM_EXIT_X86_RDMSR        29
>   #define KVM_EXIT_X86_WRMSR        30
>   #define KVM_EXIT_DIRTY_RING_FULL  31
> +#define KVM_EXIT_AP_RESET_HOLD    32
>   
>   /* For KVM_EXIT_INTERNAL_ERROR */
>   /* Emulate instruction failed. */
> @@ -573,6 +574,7 @@ struct kvm_vapic_addr {
>   #define KVM_MP_STATE_CHECK_STOP        6
>   #define KVM_MP_STATE_OPERATING         7
>   #define KVM_MP_STATE_LOAD              8
> +#define KVM_MP_STATE_AP_RESET_HOLD     9
>   
>   struct kvm_mp_state {
>   	__u32 mp_state;
>
Paolo Bonzini Jan. 4, 2021, 5:50 p.m. UTC | #6
On 04/01/21 18:38, Tom Lendacky wrote:
>>
>> Paolo, is this something along the lines of what you were thinking, or am
>> I off base? I created kvm_emulate_ap_reset_hold() to keep the code
>> consolidated and remove the duplication, but can easily make those 
>> changes
>> local to sev.c. I'd also like to rename SVM_VMGEXIT_AP_HLT_LOOP to
>> SVM_VMGEXIT_AP_RESET_HOLD to more closely match the GHBC document, but
>> that can be done later (if possible, since it is already part of the uapi
>> include file).
> 
> Paolo, a quick ping after the holidays as to whether this is the 
> approach you were thinking. I think there are a couple of places in 
> x86.c to update (vcpu_block() and kvm_arch_vcpu_ioctl_get_mpstate()), also.

Yes, this is the basic idea.

Paolo
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 048b08437c33..60a3b9d33407 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1286,6 +1286,8 @@  struct kvm_x86_ops {
 
 	void (*migrate_timers)(struct kvm_vcpu *vcpu);
 	void (*msr_filter_changed)(struct kvm_vcpu *vcpu);
+
+	void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector);
 };
 
 struct kvm_x86_nested_ops {
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index a7531de760b5..b47285384b1f 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -17,6 +17,8 @@ 
 #include <linux/processor.h>
 #include <linux/trace_events.h>
 
+#include <asm/trapnr.h>
+
 #include "x86.h"
 #include "svm.h"
 #include "cpuid.h"
@@ -1449,6 +1451,8 @@  static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
 		if (!ghcb_sw_scratch_is_valid(ghcb))
 			goto vmgexit_err;
 		break;
+	case SVM_VMGEXIT_AP_HLT_LOOP:
+	case SVM_VMGEXIT_AP_JUMP_TABLE:
 	case SVM_VMGEXIT_UNSUPPORTED_EVENT:
 		break;
 	default:
@@ -1770,6 +1774,35 @@  int sev_handle_vmgexit(struct vcpu_svm *svm)
 					    control->exit_info_2,
 					    svm->ghcb_sa);
 		break;
+	case SVM_VMGEXIT_AP_HLT_LOOP:
+		svm->ap_hlt_loop = true;
+		ret = kvm_emulate_halt(&svm->vcpu);
+		break;
+	case SVM_VMGEXIT_AP_JUMP_TABLE: {
+		struct kvm_sev_info *sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info;
+
+		switch (control->exit_info_1) {
+		case 0:
+			/* Set AP jump table address */
+			sev->ap_jump_table = control->exit_info_2;
+			break;
+		case 1:
+			/* Get AP jump table address */
+			ghcb_set_sw_exit_info_2(ghcb, sev->ap_jump_table);
+			break;
+		default:
+			pr_err("svm: vmgexit: unsupported AP jump table request - exit_info_1=%#llx\n",
+			       control->exit_info_1);
+			ghcb_set_sw_exit_info_1(ghcb, 1);
+			ghcb_set_sw_exit_info_2(ghcb,
+						X86_TRAP_UD |
+						SVM_EVTINJ_TYPE_EXEPT |
+						SVM_EVTINJ_VALID);
+		}
+
+		ret = 1;
+		break;
+	}
 	case SVM_VMGEXIT_UNSUPPORTED_EVENT:
 		vcpu_unimpl(&svm->vcpu,
 			    "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n",
@@ -1790,3 +1823,20 @@  int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
 	return kvm_sev_es_string_io(&svm->vcpu, size, port,
 				    svm->ghcb_sa, svm->ghcb_sa_len, in);
 }
+
+void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	/* First SIPI: Use the values as initially set by the VMM */
+	if (!svm->ap_hlt_loop)
+		return;
+
+	/*
+	 * Subsequent SIPI: Return from an AP Reset Hold VMGEXIT, where
+	 * the guest will set the CS and RIP. Set SW_EXIT_INFO_2 to a
+	 * non-zero value.
+	 */
+	ghcb_set_sw_exit_info_2(svm->ghcb, 1);
+	svm->ap_hlt_loop = false;
+}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 8d22ae25a0f8..2dbc20701ef5 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4400,6 +4400,11 @@  static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
 		   (vmcb_is_intercept(&svm->vmcb->control, INTERCEPT_INIT));
 }
 
+static void svm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
+{
+	sev_vcpu_deliver_sipi_vector(vcpu, vector);
+}
+
 static void svm_vm_destroy(struct kvm *kvm)
 {
 	avic_vm_destroy(kvm);
@@ -4541,6 +4546,8 @@  static struct kvm_x86_ops svm_x86_ops __initdata = {
 	.apic_init_signal_blocked = svm_apic_init_signal_blocked,
 
 	.msr_filter_changed = svm_msr_filter_changed,
+
+	.vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector,
 };
 
 static struct kvm_x86_init_ops svm_init_ops __initdata = {
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index b3f03dede6ac..5d570d5a6a2c 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -68,6 +68,7 @@  struct kvm_sev_info {
 	int fd;			/* SEV device fd */
 	unsigned long pages_locked; /* Number of pages locked */
 	struct list_head regions_list;  /* List of registered regions */
+	u64 ap_jump_table;	/* SEV-ES AP Jump Table address */
 };
 
 struct kvm_svm {
@@ -174,6 +175,7 @@  struct vcpu_svm {
 	struct vmcb_save_area *vmsa;
 	struct ghcb *ghcb;
 	struct kvm_host_map ghcb_map;
+	bool ap_hlt_loop;
 
 	/* SEV-ES scratch area support */
 	void *ghcb_sa;
@@ -574,5 +576,6 @@  void sev_hardware_teardown(void);
 void sev_free_vcpu(struct kvm_vcpu *vcpu);
 int sev_handle_vmgexit(struct vcpu_svm *svm);
 int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in);
+void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
 
 #endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ddd614a76744..4fd216b61a89 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10144,6 +10144,15 @@  void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
 {
 	struct kvm_segment cs;
 
+	/*
+	 * Guests with protected state can't have their state altered by KVM,
+	 * call the vcpu_deliver_sipi_vector() x86 op for processing.
+	 */
+	if (vcpu->arch.guest_state_protected) {
+		kvm_x86_ops.vcpu_deliver_sipi_vector(vcpu, vector);
+		return;
+	}
+
 	kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
 	cs.selector = vector << 8;
 	cs.base = vector << 12;