diff mbox series

[v19,108/130] KVM: TDX: Handle TDX PV HLT hypercall

Message ID c083430632ba9e80abd09bccd5609fb3cd9d9c63.1708933498.git.isaku.yamahata@intel.com (mailing list archive)
State New, archived
Headers show
Series [v19,001/130] x86/virt/tdx: Rename _offset to _member for TD_SYSINFO_MAP() macro | expand

Commit Message

Isaku Yamahata Feb. 26, 2024, 8:26 a.m. UTC
From: Isaku Yamahata <isaku.yamahata@intel.com>

Wire up TDX PV HLT hypercall to the KVM backend function.

Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
---
v19:
- move tdvps_state_non_arch_check() to this patch

v18:
- drop buggy_hlt_workaround and use TDH.VP.RD(TD_VCPU_STATE_DETAILS)

Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
---
 arch/x86/kvm/vmx/tdx.c | 26 +++++++++++++++++++++++++-
 arch/x86/kvm/vmx/tdx.h |  4 ++++
 2 files changed, 29 insertions(+), 1 deletion(-)

Comments

Chao Gao April 3, 2024, 3:25 a.m. UTC | #1
On Mon, Feb 26, 2024 at 12:26:50AM -0800, isaku.yamahata@intel.com wrote:
>From: Isaku Yamahata <isaku.yamahata@intel.com>
>
>Wire up TDX PV HLT hypercall to the KVM backend function.
>
>Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
>---
>v19:
>- move tdvps_state_non_arch_check() to this patch
>
>v18:
>- drop buggy_hlt_workaround and use TDH.VP.RD(TD_VCPU_STATE_DETAILS)
>
>Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
>---
> arch/x86/kvm/vmx/tdx.c | 26 +++++++++++++++++++++++++-
> arch/x86/kvm/vmx/tdx.h |  4 ++++
> 2 files changed, 29 insertions(+), 1 deletion(-)
>
>diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
>index eb68d6c148b6..a2caf2ae838c 100644
>--- a/arch/x86/kvm/vmx/tdx.c
>+++ b/arch/x86/kvm/vmx/tdx.c
>@@ -688,7 +688,18 @@ void tdx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
> 
> bool tdx_protected_apic_has_interrupt(struct kvm_vcpu *vcpu)
> {
>-	return pi_has_pending_interrupt(vcpu);
>+	bool ret = pi_has_pending_interrupt(vcpu);

Maybe
	bool has_pending_interrupt = pi_has_pending_interrupt(vcpu);

"ret" isn't a good name. or even call pi_has_pending_interrupt() directly in
the if statement below.

>+	union tdx_vcpu_state_details details;
>+	struct vcpu_tdx *tdx = to_tdx(vcpu);
>+
>+	if (ret || vcpu->arch.mp_state != KVM_MP_STATE_HALTED)
>+		return true;

Question: why mp_state matters here?

>+
>+	if (tdx->interrupt_disabled_hlt)
>+		return false;

Shouldn't we move this into vt_interrupt_allowed()? VMX calls the function to
check if interrupt is disabled. KVM can clear tdx->interrupt_disabled_hlt on
every TD-enter and set it only on TD-exit due to the guest making a
TDVMCALL(hlt) w/ interrupt disabled.

>+
>+	details.full = td_state_non_arch_read64(tdx, TD_VCPU_STATE_DETAILS_NON_ARCH);
>+	return !!details.vmxip;
> }
> 
> void tdx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
>@@ -1130,6 +1141,17 @@ static int tdx_emulate_cpuid(struct kvm_vcpu *vcpu)
> 	return 1;
> }
> 
>+static int tdx_emulate_hlt(struct kvm_vcpu *vcpu)
>+{
>+	struct vcpu_tdx *tdx = to_tdx(vcpu);
>+
>+	/* See tdx_protected_apic_has_interrupt() to avoid heavy seamcall */
>+	tdx->interrupt_disabled_hlt = tdvmcall_a0_read(vcpu);
>+
>+	tdvmcall_set_return_code(vcpu, TDVMCALL_SUCCESS);
>+	return kvm_emulate_halt_noskip(vcpu);
>+}
>+
> static int handle_tdvmcall(struct kvm_vcpu *vcpu)
> {
> 	if (tdvmcall_exit_type(vcpu))
>@@ -1138,6 +1160,8 @@ static int handle_tdvmcall(struct kvm_vcpu *vcpu)
> 	switch (tdvmcall_leaf(vcpu)) {
> 	case EXIT_REASON_CPUID:
> 		return tdx_emulate_cpuid(vcpu);
>+	case EXIT_REASON_HLT:
>+		return tdx_emulate_hlt(vcpu);
> 	default:
> 		break;
> 	}
>diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h
>index 4399d474764f..11c74c34555f 100644
>--- a/arch/x86/kvm/vmx/tdx.h
>+++ b/arch/x86/kvm/vmx/tdx.h
>@@ -104,6 +104,8 @@ struct vcpu_tdx {
> 	bool host_state_need_restore;
> 	u64 msr_host_kernel_gs_base;
> 
>+	bool interrupt_disabled_hlt;
>+
> 	/*
> 	 * Dummy to make pmu_intel not corrupt memory.
> 	 * TODO: Support PMU for TDX.  Future work.
>@@ -166,6 +168,7 @@ static __always_inline void tdvps_vmcs_check(u32 field, u8 bits)
> }
> 
> static __always_inline void tdvps_management_check(u64 field, u8 bits) {}
>+static __always_inline void tdvps_state_non_arch_check(u64 field, u8 bits) {}
> 
> #define TDX_BUILD_TDVPS_ACCESSORS(bits, uclass, lclass)				\
> static __always_inline u##bits td_##lclass##_read##bits(struct vcpu_tdx *tdx,	\
>@@ -226,6 +229,7 @@ TDX_BUILD_TDVPS_ACCESSORS(32, VMCS, vmcs);
> TDX_BUILD_TDVPS_ACCESSORS(64, VMCS, vmcs);
> 
> TDX_BUILD_TDVPS_ACCESSORS(8, MANAGEMENT, management);
>+TDX_BUILD_TDVPS_ACCESSORS(64, STATE_NON_ARCH, state_non_arch);
> 
> static __always_inline u64 td_tdcs_exec_read64(struct kvm_tdx *kvm_tdx, u32 field)
> {
>-- 
>2.25.1
>
>
Sean Christopherson April 3, 2024, 2:49 p.m. UTC | #2
On Wed, Apr 03, 2024, Chao Gao wrote:
> On Mon, Feb 26, 2024 at 12:26:50AM -0800, isaku.yamahata@intel.com wrote:
> >From: Isaku Yamahata <isaku.yamahata@intel.com>
> >
> >Wire up TDX PV HLT hypercall to the KVM backend function.
> >
> >Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> >---
> >v19:
> >- move tdvps_state_non_arch_check() to this patch
> >
> >v18:
> >- drop buggy_hlt_workaround and use TDH.VP.RD(TD_VCPU_STATE_DETAILS)
> >
> >Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> >---
> > arch/x86/kvm/vmx/tdx.c | 26 +++++++++++++++++++++++++-
> > arch/x86/kvm/vmx/tdx.h |  4 ++++
> > 2 files changed, 29 insertions(+), 1 deletion(-)
> >
> >diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> >index eb68d6c148b6..a2caf2ae838c 100644
> >--- a/arch/x86/kvm/vmx/tdx.c
> >+++ b/arch/x86/kvm/vmx/tdx.c
> >@@ -688,7 +688,18 @@ void tdx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
> > 
> > bool tdx_protected_apic_has_interrupt(struct kvm_vcpu *vcpu)
> > {
> >-	return pi_has_pending_interrupt(vcpu);
> >+	bool ret = pi_has_pending_interrupt(vcpu);
> 
> Maybe
> 	bool has_pending_interrupt = pi_has_pending_interrupt(vcpu);
> 
> "ret" isn't a good name. or even call pi_has_pending_interrupt() directly in
> the if statement below.

Ya, or split the if-statement into multiple chucks, with comments explaining
what each non-intuitive chunk is doing.  The pi_has_pending_interrupt(vcpu) check
is self-explanatory, the halted thing, not so much.  They are terminal statements,
there's zero reason to pre-check the PID.

E.g.

	/*
	 * Comment explaining why KVM needs to assume a non-halted vCPU has a
	 * pending interrupt (KVM can't see RFLAGS.IF).
	 */
	if (vcpu->arch.mp_state != KVM_MP_STATE_HALTED)
		return true;

	if (pi_has_pending_interrupt(vcpu))
		return;

> >+	union tdx_vcpu_state_details details;
> >+	struct vcpu_tdx *tdx = to_tdx(vcpu);
> >+
> >+	if (ret || vcpu->arch.mp_state != KVM_MP_STATE_HALTED)
> >+		return true;
> 
> Question: why mp_state matters here?
> >+
> >+	if (tdx->interrupt_disabled_hlt)
> >+		return false;
> 
> Shouldn't we move this into vt_interrupt_allowed()? VMX calls the function to
> check if interrupt is disabled. KVM can clear tdx->interrupt_disabled_hlt on
> every TD-enter and set it only on TD-exit due to the guest making a
> TDVMCALL(hlt) w/ interrupt disabled.

I'm pretty sure interrupt_disabled_hlt shouldn't exist, should "a0", a.k.a. r12,
be preserved at this point?

	/* Another comment explaning magic code. */
	if (to_vmx(vcpu)->exit_reason.basic == EXIT_REASON_HLT &&
	    tdvmcall_a0_read(vcpu))
		return false;


Actually, can't this all be:

	if (to_vmx(vcpu)->exit_reason.basic != EXIT_REASON_HLT)
		return true;

	if (!tdvmcall_a0_read(vcpu))
		return false;

	if (pi_has_pending_interrupt(vcpu))
		return true;

	return tdx_has_pending_virtual_interrupt(vcpu);
Isaku Yamahata April 4, 2024, 11:25 p.m. UTC | #3
On Wed, Apr 03, 2024 at 07:49:28AM -0700,
Sean Christopherson <seanjc@google.com> wrote:

> On Wed, Apr 03, 2024, Chao Gao wrote:
> > On Mon, Feb 26, 2024 at 12:26:50AM -0800, isaku.yamahata@intel.com wrote:
> > >From: Isaku Yamahata <isaku.yamahata@intel.com>
> > >
> > >Wire up TDX PV HLT hypercall to the KVM backend function.
> > >
> > >Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> > >---
> > >v19:
> > >- move tdvps_state_non_arch_check() to this patch
> > >
> > >v18:
> > >- drop buggy_hlt_workaround and use TDH.VP.RD(TD_VCPU_STATE_DETAILS)
> > >
> > >Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> > >---
> > > arch/x86/kvm/vmx/tdx.c | 26 +++++++++++++++++++++++++-
> > > arch/x86/kvm/vmx/tdx.h |  4 ++++
> > > 2 files changed, 29 insertions(+), 1 deletion(-)
> > >
> > >diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> > >index eb68d6c148b6..a2caf2ae838c 100644
> > >--- a/arch/x86/kvm/vmx/tdx.c
> > >+++ b/arch/x86/kvm/vmx/tdx.c
> > >@@ -688,7 +688,18 @@ void tdx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
> > > 
> > > bool tdx_protected_apic_has_interrupt(struct kvm_vcpu *vcpu)
> > > {
> > >-	return pi_has_pending_interrupt(vcpu);
> > >+	bool ret = pi_has_pending_interrupt(vcpu);
> > 
> > Maybe
> > 	bool has_pending_interrupt = pi_has_pending_interrupt(vcpu);
> > 
> > "ret" isn't a good name. or even call pi_has_pending_interrupt() directly in
> > the if statement below.
> 
> Ya, or split the if-statement into multiple chucks, with comments explaining
> what each non-intuitive chunk is doing.  The pi_has_pending_interrupt(vcpu) check
> is self-explanatory, the halted thing, not so much.  They are terminal statements,
> there's zero reason to pre-check the PID.
> 
> E.g.
> 
> 	/*
> 	 * Comment explaining why KVM needs to assume a non-halted vCPU has a
> 	 * pending interrupt (KVM can't see RFLAGS.IF).
> 	 */
> 	if (vcpu->arch.mp_state != KVM_MP_STATE_HALTED)
> 		return true;
> 
> 	if (pi_has_pending_interrupt(vcpu))
> 		return;
> 
> > >+	union tdx_vcpu_state_details details;
> > >+	struct vcpu_tdx *tdx = to_tdx(vcpu);
> > >+
> > >+	if (ret || vcpu->arch.mp_state != KVM_MP_STATE_HALTED)
> > >+		return true;
> > 
> > Question: why mp_state matters here?
> > >+
> > >+	if (tdx->interrupt_disabled_hlt)
> > >+		return false;
> > 
> > Shouldn't we move this into vt_interrupt_allowed()? VMX calls the function to
> > check if interrupt is disabled.

Chao, are you suggesting to implement tdx_interrupt_allowed() as
"EXIT_REASON_HLT && a0" instead of "return true"?
I don't think it makes sense because it's rare case and we can't avoid spurious
wakeup for TDX case.


> >KVM can clear tdx->interrupt_disabled_hlt on
> > every TD-enter and set it only on TD-exit due to the guest making a
> > TDVMCALL(hlt) w/ interrupt disabled.
> 
> I'm pretty sure interrupt_disabled_hlt shouldn't exist, should "a0", a.k.a. r12,
> be preserved at this point?
> 
> 	/* Another comment explaning magic code. */
> 	if (to_vmx(vcpu)->exit_reason.basic == EXIT_REASON_HLT &&
> 	    tdvmcall_a0_read(vcpu))
> 		return false;
> 
> 
> Actually, can't this all be:
> 
> 	if (to_vmx(vcpu)->exit_reason.basic != EXIT_REASON_HLT)
> 		return true;
> 
> 	if (!tdvmcall_a0_read(vcpu))
> 		return false;
> 
> 	if (pi_has_pending_interrupt(vcpu))
> 		return true;
> 
> 	return tdx_has_pending_virtual_interrupt(vcpu);
> 

Thanks for the suggestion.  This is much cleaner.  Will update the function.
Chao Gao April 7, 2024, 3:50 a.m. UTC | #4
>> > >+	union tdx_vcpu_state_details details;
>> > >+	struct vcpu_tdx *tdx = to_tdx(vcpu);
>> > >+
>> > >+	if (ret || vcpu->arch.mp_state != KVM_MP_STATE_HALTED)
>> > >+		return true;
>> > 
>> > Question: why mp_state matters here?
>> > >+
>> > >+	if (tdx->interrupt_disabled_hlt)
>> > >+		return false;
>> > 
>> > Shouldn't we move this into vt_interrupt_allowed()? VMX calls the function to
>> > check if interrupt is disabled.
>
>Chao, are you suggesting to implement tdx_interrupt_allowed() as
>"EXIT_REASON_HLT && a0" instead of "return true"?
>I don't think it makes sense because it's rare case and we can't avoid spurious
>wakeup for TDX case.

Yes. KVM differeniates "interrupt allowed" from "has interrupt", e.g.,

static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
...

	if (kvm_arch_interrupt_allowed(vcpu) &&
	    (kvm_cpu_has_interrupt(vcpu) ||
	    kvm_guest_apic_has_interrupt(vcpu)))
		return true;


I think tdx_protected_apic_has_interrupt() mixes them together, which isn't
good.

Probably it is a minor thing; if no one else thinks it is better to move the
"interrupt allowed" check to tdx_interrupt_allowed(), I am also fine with not
doing that.
Isaku Yamahata April 8, 2024, 6:56 p.m. UTC | #5
On Sun, Apr 07, 2024 at 11:50:04AM +0800,
Chao Gao <chao.gao@intel.com> wrote:

> >> > >+	union tdx_vcpu_state_details details;
> >> > >+	struct vcpu_tdx *tdx = to_tdx(vcpu);
> >> > >+
> >> > >+	if (ret || vcpu->arch.mp_state != KVM_MP_STATE_HALTED)
> >> > >+		return true;
> >> > 
> >> > Question: why mp_state matters here?
> >> > >+
> >> > >+	if (tdx->interrupt_disabled_hlt)
> >> > >+		return false;
> >> > 
> >> > Shouldn't we move this into vt_interrupt_allowed()? VMX calls the function to
> >> > check if interrupt is disabled.
> >
> >Chao, are you suggesting to implement tdx_interrupt_allowed() as
> >"EXIT_REASON_HLT && a0" instead of "return true"?
> >I don't think it makes sense because it's rare case and we can't avoid spurious
> >wakeup for TDX case.
> 
> Yes. KVM differeniates "interrupt allowed" from "has interrupt", e.g.,
> 
> static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
> ...
> 
> 	if (kvm_arch_interrupt_allowed(vcpu) &&
> 	    (kvm_cpu_has_interrupt(vcpu) ||
> 	    kvm_guest_apic_has_interrupt(vcpu)))
> 		return true;
> 
> 
> I think tdx_protected_apic_has_interrupt() mixes them together, which isn't
> good.

Your point is code clarity.  Ok, we can code in that way. I don't expect any
performance difference.
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index eb68d6c148b6..a2caf2ae838c 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -688,7 +688,18 @@  void tdx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
 bool tdx_protected_apic_has_interrupt(struct kvm_vcpu *vcpu)
 {
-	return pi_has_pending_interrupt(vcpu);
+	bool ret = pi_has_pending_interrupt(vcpu);
+	union tdx_vcpu_state_details details;
+	struct vcpu_tdx *tdx = to_tdx(vcpu);
+
+	if (ret || vcpu->arch.mp_state != KVM_MP_STATE_HALTED)
+		return true;
+
+	if (tdx->interrupt_disabled_hlt)
+		return false;
+
+	details.full = td_state_non_arch_read64(tdx, TD_VCPU_STATE_DETAILS_NON_ARCH);
+	return !!details.vmxip;
 }
 
 void tdx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
@@ -1130,6 +1141,17 @@  static int tdx_emulate_cpuid(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
+static int tdx_emulate_hlt(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_tdx *tdx = to_tdx(vcpu);
+
+	/* See tdx_protected_apic_has_interrupt() to avoid heavy seamcall */
+	tdx->interrupt_disabled_hlt = tdvmcall_a0_read(vcpu);
+
+	tdvmcall_set_return_code(vcpu, TDVMCALL_SUCCESS);
+	return kvm_emulate_halt_noskip(vcpu);
+}
+
 static int handle_tdvmcall(struct kvm_vcpu *vcpu)
 {
 	if (tdvmcall_exit_type(vcpu))
@@ -1138,6 +1160,8 @@  static int handle_tdvmcall(struct kvm_vcpu *vcpu)
 	switch (tdvmcall_leaf(vcpu)) {
 	case EXIT_REASON_CPUID:
 		return tdx_emulate_cpuid(vcpu);
+	case EXIT_REASON_HLT:
+		return tdx_emulate_hlt(vcpu);
 	default:
 		break;
 	}
diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h
index 4399d474764f..11c74c34555f 100644
--- a/arch/x86/kvm/vmx/tdx.h
+++ b/arch/x86/kvm/vmx/tdx.h
@@ -104,6 +104,8 @@  struct vcpu_tdx {
 	bool host_state_need_restore;
 	u64 msr_host_kernel_gs_base;
 
+	bool interrupt_disabled_hlt;
+
 	/*
 	 * Dummy to make pmu_intel not corrupt memory.
 	 * TODO: Support PMU for TDX.  Future work.
@@ -166,6 +168,7 @@  static __always_inline void tdvps_vmcs_check(u32 field, u8 bits)
 }
 
 static __always_inline void tdvps_management_check(u64 field, u8 bits) {}
+static __always_inline void tdvps_state_non_arch_check(u64 field, u8 bits) {}
 
 #define TDX_BUILD_TDVPS_ACCESSORS(bits, uclass, lclass)				\
 static __always_inline u##bits td_##lclass##_read##bits(struct vcpu_tdx *tdx,	\
@@ -226,6 +229,7 @@  TDX_BUILD_TDVPS_ACCESSORS(32, VMCS, vmcs);
 TDX_BUILD_TDVPS_ACCESSORS(64, VMCS, vmcs);
 
 TDX_BUILD_TDVPS_ACCESSORS(8, MANAGEMENT, management);
+TDX_BUILD_TDVPS_ACCESSORS(64, STATE_NON_ARCH, state_non_arch);
 
 static __always_inline u64 td_tdcs_exec_read64(struct kvm_tdx *kvm_tdx, u32 field)
 {