diff mbox series

[v3,5/5] KVM: selftests: KVM: SVM: Add Idle HLT intercept test

Message ID 20240528041926.3989-6-manali.shukla@amd.com (mailing list archive)
State New, archived
Headers show
Series Add support for the Idle HLT intercept feature | expand

Commit Message

Manali Shukla May 28, 2024, 4:19 a.m. UTC
From: Manali Shukla <Manali.Shukla@amd.com>

Execution of the HLT instruction results in VMEXIT. Hypervisor observes
pending V_INTR and V_NMI events just after VMEXIT generated by HLT for
the vCPU and causes VM entry to service the pending events.  The Idle
HLT intercept feature avoids the wasteful VMEXIT during halt if there
are pending V_INTR and V_NMI events for the vCPU.

The selftest for Idle HLT intercept instruments above-mentioned scenario.

Signed-off-by: Manali Shukla <Manali.Shukla@amd.com>
---
 tools/testing/selftests/kvm/Makefile          |  1 +
 .../selftests/kvm/include/x86_64/processor.h  |  1 +
 .../selftests/kvm/x86_64/svm_idle_hlt_test.c  | 89 +++++++++++++++++++
 3 files changed, 91 insertions(+)
 create mode 100644 tools/testing/selftests/kvm/x86_64/svm_idle_hlt_test.c

Comments

Chao Gao May 28, 2024, 7:46 a.m. UTC | #1
>+static void guest_code(void)
>+{
>+	uint32_t icr_val;
>+	int i;
>+
>+	xapic_enable();
>+
>+	icr_val = (APIC_DEST_SELF | APIC_INT_ASSERT | VINTR_VECTOR);
>+
>+	for (i = 0; i < NUM_ITERATIONS; i++) {
>+		cli();
>+		xapic_write_reg(APIC_ICR, icr_val);
>+		safe_halt();
>+		GUEST_ASSERT(READ_ONCE(irq_received));
>+		WRITE_ONCE(irq_received, false);

any reason to use READ/WRITE_ONCE here?

>+	}
>+	GUEST_DONE();
>+}
>+
>+static void guest_vintr_handler(struct ex_regs *regs)
>+{
>+	WRITE_ONCE(irq_received, true);
>+	xapic_write_reg(APIC_EOI, 0x00);
>+}
>+
>+int main(int argc, char *argv[])
>+{
>+	struct kvm_vm *vm;
>+	struct kvm_vcpu *vcpu;
>+	struct ucall uc;
>+	uint64_t  halt_exits, vintr_exits;
>+
>+	/* Check the extension for binary stats */
>+	TEST_REQUIRE(this_cpu_has(X86_FEATURE_IDLE_HLT));

IIUC, this test assumes that the IDLE_HLT feature is enabled for guests if it
is supported by the CPU. But this isn't true in some cases:

1. an old KVM runs on new hardware
2. the feature bit is somehow cleared, e.g., by "clearcpuid" cmdline


>+	TEST_REQUIRE(kvm_has_cap(KVM_CAP_BINARY_STATS_FD));
>+
>+	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
>+
>+	vm_install_exception_handler(vm, VINTR_VECTOR, guest_vintr_handler);
>+	virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
>+
>+	vcpu_run(vcpu);
>+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
>+
>+	halt_exits = vcpu_get_stat(vcpu, HALT_EXITS);
>+	vintr_exits = vcpu_get_stat(vcpu, IRQ_WINDOW_EXITS);
>+
>+	switch (get_ucall(vcpu, &uc)) {
>+	case UCALL_ABORT:
>+		REPORT_GUEST_ASSERT(uc);
>+		/* NOT REACHED */
>+	case UCALL_DONE:
>+		break;
>+
>+	default:
>+		TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
>+	}
>+
>+	TEST_ASSERT_EQ(halt_exits, 0);
>+	pr_debug("Guest executed VINTR followed by halts: %d times.\n"
>+		 "The guest exited due to halt: %ld times and number\n"
>+		 "of vintr exits: %ld.\n",
>+		 NUM_ITERATIONS, halt_exits, vintr_exits);
>+
>+	kvm_vm_free(vm);
>+	return 0;
>+}
>-- 
>2.34.1
>
>
Manali Shukla May 30, 2024, 1:19 p.m. UTC | #2
Hi Chao,
Thank you for reviewing my patches.

On 5/28/2024 1:16 PM, Chao Gao wrote:
>> +static void guest_code(void)
>> +{
>> +	uint32_t icr_val;
>> +	int i;
>> +
>> +	xapic_enable();
>> +
>> +	icr_val = (APIC_DEST_SELF | APIC_INT_ASSERT | VINTR_VECTOR);
>> +
>> +	for (i = 0; i < NUM_ITERATIONS; i++) {
>> +		cli();
>> +		xapic_write_reg(APIC_ICR, icr_val);
>> +		safe_halt();
>> +		GUEST_ASSERT(READ_ONCE(irq_received));
>> +		WRITE_ONCE(irq_received, false);
> 
> any reason to use READ/WRITE_ONCE here?

This is done to ensure that irq is already received at this point,
as irq_received is set to true in guest_vintr_handler.

> 
>> +	}
>> +	GUEST_DONE();
>> +}
>> +
>> +static void guest_vintr_handler(struct ex_regs *regs)
>> +{
>> +	WRITE_ONCE(irq_received, true);
>> +	xapic_write_reg(APIC_EOI, 0x00);
>> +}
>> +
>> +int main(int argc, char *argv[])
>> +{
>> +	struct kvm_vm *vm;
>> +	struct kvm_vcpu *vcpu;
>> +	struct ucall uc;
>> +	uint64_t  halt_exits, vintr_exits;
>> +
>> +	/* Check the extension for binary stats */
>> +	TEST_REQUIRE(this_cpu_has(X86_FEATURE_IDLE_HLT));
> 
> IIUC, this test assumes that the IDLE_HLT feature is enabled for guests if it
> is supported by the CPU. But this isn't true in some cases:
> 
I understand you are intending to create a capability for IDLE HLT intercept feature, but in my
opinion, the IDLE Halt intercept feature doesn't require user space to do anything for the feature
itself.

> 1. an old KVM runs on new hardware
> 2. the feature bit is somehow cleared, e.g., by "clearcpuid" cmdline
In both the case, the test case will fail.

In General, if the feature bit for the Idle halt feature is cleared somehow, or new KVM runs
on old hardware, the idle halt exits will be replaced with halt exits.

If the old KVM runs on new hardware, the idle halt feature is never enabled via KVM.
So, the presence of a pending V_INTR event during the execution of the halt instruction
won't result into the idle-halt exit; rather, it will result in a halt exit followed
by a vintr exit. 

-Manali

> >> +	TEST_REQUIRE(kvm_has_cap(KVM_CAP_BINARY_STATS_FD));
>> +
>> +	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
>> +
>> +	vm_install_exception_handler(vm, VINTR_VECTOR, guest_vintr_handler);
>> +	virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
>> +
>> +	vcpu_run(vcpu);
>> +	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
>> +
>> +	halt_exits = vcpu_get_stat(vcpu, HALT_EXITS);
>> +	vintr_exits = vcpu_get_stat(vcpu, IRQ_WINDOW_EXITS);
>> +
>> +	switch (get_ucall(vcpu, &uc)) {
>> +	case UCALL_ABORT:
>> +		REPORT_GUEST_ASSERT(uc);
>> +		/* NOT REACHED */
>> +	case UCALL_DONE:
>> +		break;
>> +
>> +	default:
>> +		TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
>> +	}
>> +
>> +	TEST_ASSERT_EQ(halt_exits, 0);
>> +	pr_debug("Guest executed VINTR followed by halts: %d times.\n"
>> +		 "The guest exited due to halt: %ld times and number\n"
>> +		 "of vintr exits: %ld.\n",
>> +		 NUM_ITERATIONS, halt_exits, vintr_exits);
>> +
>> +	kvm_vm_free(vm);
>> +	return 0;
>> +}
>> -- 
>> 2.34.1
>>
>>
Chao Gao May 31, 2024, 6:49 a.m. UTC | #3
On Thu, May 30, 2024 at 06:49:56PM +0530, Manali Shukla wrote:
>Hi Chao,
>Thank you for reviewing my patches.
>
>On 5/28/2024 1:16 PM, Chao Gao wrote:
>>> +static void guest_code(void)
>>> +{
>>> +	uint32_t icr_val;
>>> +	int i;
>>> +
>>> +	xapic_enable();
>>> +
>>> +	icr_val = (APIC_DEST_SELF | APIC_INT_ASSERT | VINTR_VECTOR);
>>> +
>>> +	for (i = 0; i < NUM_ITERATIONS; i++) {
>>> +		cli();
>>> +		xapic_write_reg(APIC_ICR, icr_val);
>>> +		safe_halt();
>>> +		GUEST_ASSERT(READ_ONCE(irq_received));
>>> +		WRITE_ONCE(irq_received, false);
>> 
>> any reason to use READ/WRITE_ONCE here?
>
>This is done to ensure that irq is already received at this point,
>as irq_received is set to true in guest_vintr_handler.

OK. so, READ_ONCE() is to ensure that irq_received is always read directly
from memory. Otherwise, the compiler might assume it remains false (in the
2nd and subsequent iterations) and apply some optimizations.

However, I don't understand why WRITE_ONCE() is necessary here. Is it to
prevent the compiler from merging all writes to irq_received across
iterations into a single write (e.g., simply drop writes in the 2nd
and subsequent iterations)? I'm not sure.

I suggest adding one comment here because it isn't obvious to everyone.

>
>> 
>>> +	}
>>> +	GUEST_DONE();
>>> +}
>>> +
>>> +static void guest_vintr_handler(struct ex_regs *regs)
>>> +{
>>> +	WRITE_ONCE(irq_received, true);
>>> +	xapic_write_reg(APIC_EOI, 0x00);
>>> +}
>>> +
>>> +int main(int argc, char *argv[])
>>> +{
>>> +	struct kvm_vm *vm;
>>> +	struct kvm_vcpu *vcpu;
>>> +	struct ucall uc;
>>> +	uint64_t  halt_exits, vintr_exits;
>>> +
>>> +	/* Check the extension for binary stats */
>>> +	TEST_REQUIRE(this_cpu_has(X86_FEATURE_IDLE_HLT));
>> 
>> IIUC, this test assumes that the IDLE_HLT feature is enabled for guests if it
>> is supported by the CPU. But this isn't true in some cases:
>> 
>I understand you are intending to create a capability for IDLE HLT intercept feature, but in my
>opinion, the IDLE Halt intercept feature doesn't require user space to do anything for the feature
>itself.

Yes, I agree. Actually, I was thinking about:

1. make the feature bit visible from /proc/cpuinfo by removing the leading ""
   from the comment following the bit definition in patch 1

2. parse /proc/cpuinfo to determine if this IDLE_HLT feature is supported by the
   kernel

But I am not sure if it's worth it. I'll defer to maintainers.
Manali Shukla June 19, 2024, 5:09 p.m. UTC | #4
Hi Chao,

Thanks for reviewing the patches.

On 5/31/2024 12:19 PM, Chao Gao wrote:
> On Thu, May 30, 2024 at 06:49:56PM +0530, Manali Shukla wrote:
>> Hi Chao,
>> Thank you for reviewing my patches.
>>
>> On 5/28/2024 1:16 PM, Chao Gao wrote:
>>>> +static void guest_code(void)
>>>> +{
>>>> +	uint32_t icr_val;
>>>> +	int i;
>>>> +
>>>> +	xapic_enable();
>>>> +
>>>> +	icr_val = (APIC_DEST_SELF | APIC_INT_ASSERT | VINTR_VECTOR);
>>>> +
>>>> +	for (i = 0; i < NUM_ITERATIONS; i++) {
>>>> +		cli();
>>>> +		xapic_write_reg(APIC_ICR, icr_val);
>>>> +		safe_halt();
>>>> +		GUEST_ASSERT(READ_ONCE(irq_received));
>>>> +		WRITE_ONCE(irq_received, false);
>>>
>>> any reason to use READ/WRITE_ONCE here?
>>
>> This is done to ensure that irq is already received at this point,
>> as irq_received is set to true in guest_vintr_handler.
> 
> OK. so, READ_ONCE() is to ensure that irq_received is always read directly
> from memory. Otherwise, the compiler might assume it remains false (in the
> 2nd and subsequent iterations) and apply some optimizations.
> 
> However, I don't understand why WRITE_ONCE() is necessary here. Is it to
> prevent the compiler from merging all writes to irq_received across
> iterations into a single write (e.g., simply drop writes in the 2nd
> and subsequent iterations)? I'm not sure.
> 

Compiler optimizing this out is one case. If WRITE_ONCE to irq_received is
not called, the test will not be able to figure out that whether 
irq_received has a stale "true" from the previous iteration (maybe the vintr
interrupt handler did not get invoked) or a fresh "true" from the current
iteration. 


> I suggest adding one comment here because it isn't obvious to everyone.
>
Sure I will add the comment in V4.
 
>>
>>>
>>>> +	}
>>>> +	GUEST_DONE();
>>>> +}
>>>> +
>>>> +static void guest_vintr_handler(struct ex_regs *regs)
>>>> +{
>>>> +	WRITE_ONCE(irq_received, true);
>>>> +	xapic_write_reg(APIC_EOI, 0x00);
>>>> +}
>>>> +
>>>> +int main(int argc, char *argv[])
>>>> +{
>>>> +	struct kvm_vm *vm;
>>>> +	struct kvm_vcpu *vcpu;
>>>> +	struct ucall uc;
>>>> +	uint64_t  halt_exits, vintr_exits;
>>>> +
>>>> +	/* Check the extension for binary stats */
>>>> +	TEST_REQUIRE(this_cpu_has(X86_FEATURE_IDLE_HLT));
>>>
>>> IIUC, this test assumes that the IDLE_HLT feature is enabled for guests if it
>>> is supported by the CPU. But this isn't true in some cases:
>>>
>> I understand you are intending to create a capability for IDLE HLT intercept feature, but in my
>> opinion, the IDLE Halt intercept feature doesn't require user space to do anything for the feature
>> itself.
> 
> Yes, I agree. Actually, I was thinking about:
> 
> 1. make the feature bit visible from /proc/cpuinfo by removing the leading ""
>    from the comment following the bit definition in patch 1
> 
> 2. parse /proc/cpuinfo to determine if this IDLE_HLT feature is supported by the
>    kernel
> 
> But I am not sure if it's worth it. I'll defer to maintainers.

Ack.

-Manali
Sean Christopherson Aug. 13, 2024, 3:38 p.m. UTC | #5
On Fri, May 31, 2024, Chao Gao wrote:
> On Thu, May 30, 2024 at 06:49:56PM +0530, Manali Shukla wrote:
> >>> +	/* Check the extension for binary stats */
> >>> +	TEST_REQUIRE(this_cpu_has(X86_FEATURE_IDLE_HLT));
> >> 
> >> IIUC, this test assumes that the IDLE_HLT feature is enabled for guests if it
> >> is supported by the CPU. But this isn't true in some cases:
> >> 
> >I understand you are intending to create a capability for IDLE HLT intercept
> >feature, but in my opinion, the IDLE Halt intercept feature doesn't require
> >user space to do anything for the feature itself.
> 
> Yes, I agree. Actually, I was thinking about:
> 
> 1. make the feature bit visible from /proc/cpuinfo by removing the leading ""
>    from the comment following the bit definition in patch 1
> 
> 2. parse /proc/cpuinfo to determine if this IDLE_HLT feature is supported by the
>    kernel

Neither of these is sufficient/correct.  E.g. they'll get false positives if run
on a kernel that recognizes IDLE_HLT, but that doesn't have KVM support for
enabling the feature.

The canonical way to check for features in KVM selftests is kvm_cpu_has(), which
looks at KVM_GET_SUPPORTED_CPUID (by default, selftests VMs enable all features,
i.e. reflect the result of KVM_GET_SUPPORTED_CPUID into KVM_SET_CPUID2).

> But I am not sure if it's worth it. I'll defer to maintainers.
Sean Christopherson Aug. 13, 2024, 4:03 p.m. UTC | #6
On Tue, Aug 13, 2024, Sean Christopherson wrote:
> On Fri, May 31, 2024, Chao Gao wrote:
> > On Thu, May 30, 2024 at 06:49:56PM +0530, Manali Shukla wrote:
> > >>> +	/* Check the extension for binary stats */
> > >>> +	TEST_REQUIRE(this_cpu_has(X86_FEATURE_IDLE_HLT));
> > >> 
> > >> IIUC, this test assumes that the IDLE_HLT feature is enabled for guests if it
> > >> is supported by the CPU. But this isn't true in some cases:
> > >> 
> > >I understand you are intending to create a capability for IDLE HLT intercept
> > >feature, but in my opinion, the IDLE Halt intercept feature doesn't require
> > >user space to do anything for the feature itself.
> > 
> > Yes, I agree. Actually, I was thinking about:
> > 
> > 1. make the feature bit visible from /proc/cpuinfo by removing the leading ""
> >    from the comment following the bit definition in patch 1
> > 
> > 2. parse /proc/cpuinfo to determine if this IDLE_HLT feature is supported by the
> >    kernel
> 
> Neither of these is sufficient/correct.  E.g. they'll get false positives if run
> on a kernel that recognizes IDLE_HLT, but that doesn't have KVM support for
> enabling the feature.
> 
> The canonical way to check for features in KVM selftests is kvm_cpu_has(), which
> looks at KVM_GET_SUPPORTED_CPUID (by default, selftests VMs enable all features,
> i.e. reflect the result of KVM_GET_SUPPORTED_CPUID into KVM_SET_CPUID2).

Never mind, brain fart.  That only works for features KVM is exposing to the guest,
this is purely a KVM/host feature.

That said, doesn't this test also require that AVIC be enabled?  Oh, or does this
happen to work because KVM uses V_INTR to detect interrupt windows?
diff mbox series

Patch

diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 6de9994971c9..bd97586d7c04 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -93,6 +93,7 @@  TEST_GEN_PROGS_x86_64 += x86_64/smaller_maxphyaddr_emulation_test
 TEST_GEN_PROGS_x86_64 += x86_64/smm_test
 TEST_GEN_PROGS_x86_64 += x86_64/state_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
+TEST_GEN_PROGS_x86_64 += x86_64/svm_idle_hlt_test
 TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
 TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test
 TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_shutdown_test
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index f74f31df96d2..0036937b1be4 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -192,6 +192,7 @@  struct kvm_x86_cpu_feature {
 #define X86_FEATURE_PAUSEFILTER         KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 10)
 #define X86_FEATURE_PFTHRESHOLD         KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 12)
 #define	X86_FEATURE_VGIF		KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 16)
+#define X86_FEATURE_IDLE_HLT		KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 30)
 #define X86_FEATURE_SEV			KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 1)
 #define X86_FEATURE_SEV_ES		KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 3)
 
diff --git a/tools/testing/selftests/kvm/x86_64/svm_idle_hlt_test.c b/tools/testing/selftests/kvm/x86_64/svm_idle_hlt_test.c
new file mode 100644
index 000000000000..594caac7194b
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/svm_idle_hlt_test.c
@@ -0,0 +1,89 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  Copyright (C) 2024 Advanced Micro Devices, Inc.
+ *
+ */
+#include <kvm_util.h>
+#include <processor.h>
+#include <test_util.h>
+#include "svm_util.h"
+#include "apic.h"
+
+#define VINTR_VECTOR     0x30
+#define NUM_ITERATIONS   1000
+
+static bool irq_received;
+
+/*
+ * The guest code instruments the scenario where there is a V_INTR pending
+ * event available while hlt instruction is executed. The HLT VM Exit doesn't
+ * occur in above-mentioned scenario if Idle HLT intercept feature is enabled.
+ */
+
+static void guest_code(void)
+{
+	uint32_t icr_val;
+	int i;
+
+	xapic_enable();
+
+	icr_val = (APIC_DEST_SELF | APIC_INT_ASSERT | VINTR_VECTOR);
+
+	for (i = 0; i < NUM_ITERATIONS; i++) {
+		cli();
+		xapic_write_reg(APIC_ICR, icr_val);
+		safe_halt();
+		GUEST_ASSERT(READ_ONCE(irq_received));
+		WRITE_ONCE(irq_received, false);
+	}
+	GUEST_DONE();
+}
+
+static void guest_vintr_handler(struct ex_regs *regs)
+{
+	WRITE_ONCE(irq_received, true);
+	xapic_write_reg(APIC_EOI, 0x00);
+}
+
+int main(int argc, char *argv[])
+{
+	struct kvm_vm *vm;
+	struct kvm_vcpu *vcpu;
+	struct ucall uc;
+	uint64_t  halt_exits, vintr_exits;
+
+	/* Check the extension for binary stats */
+	TEST_REQUIRE(this_cpu_has(X86_FEATURE_IDLE_HLT));
+	TEST_REQUIRE(kvm_has_cap(KVM_CAP_BINARY_STATS_FD));
+
+	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+	vm_install_exception_handler(vm, VINTR_VECTOR, guest_vintr_handler);
+	virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+
+	vcpu_run(vcpu);
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+	halt_exits = vcpu_get_stat(vcpu, HALT_EXITS);
+	vintr_exits = vcpu_get_stat(vcpu, IRQ_WINDOW_EXITS);
+
+	switch (get_ucall(vcpu, &uc)) {
+	case UCALL_ABORT:
+		REPORT_GUEST_ASSERT(uc);
+		/* NOT REACHED */
+	case UCALL_DONE:
+		break;
+
+	default:
+		TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+	}
+
+	TEST_ASSERT_EQ(halt_exits, 0);
+	pr_debug("Guest executed VINTR followed by halts: %d times.\n"
+		 "The guest exited due to halt: %ld times and number\n"
+		 "of vintr exits: %ld.\n",
+		 NUM_ITERATIONS, halt_exits, vintr_exits);
+
+	kvm_vm_free(vm);
+	return 0;
+}