diff mbox series

[RFC,07/26] KVM: VMX: Emulate ACPI (CPUID.0x01.edx[bit 22]) feature

Message ID 20240203091214.411862-8-zhao1.liu@linux.intel.com (mailing list archive)
State RFC, archived
Headers show
Series Intel Thread Director Virtualization | expand

Commit Message

Zhao Liu Feb. 3, 2024, 9:11 a.m. UTC
From: Zhuocheng Ding <zhuocheng.ding@intel.com>

The ACPI (Thermal Monitor and Software Controlled Clock Facilities)
feature is a dependency of thermal interrupt processing so that
it is required for the HFI notification (a thermal interrupt)
handling.

To support VM to handle thermal interrupt, we need to emulate ACPI
feature in KVM:

1. Emulate MSR_IA32_THERM_CONTROL (alias, IA32_CLOCK_MODULATION),
MSR_IA32_THERM_INTERRUPT and MSR_IA32_THERM_STATUS with dummy values.

According to SDM [1], the ACPI feature means:

"The ACPI flag (bit 22) of the CPUID feature flags indicates the
presence of the IA32_THERM_STATUS, IA32_THERM_INTERRUPT,
IA32_CLOCK_MODULATION MSRs, and the xAPIC thermal LVT entry."

It is enough to use dummy values in KVM to emulate the RDMSR/WRMSR on
them.

2. Add the thermal interrupt injection interfaces.

This interface reflects the integrity of the ACPI emulation. Although
thermal interrupts are not actually injected into the Guest now, in the
following HFI/ITD emulations, thermal interrupt will be injected into
Guest once the conditions are met.

3. Additionally, expose the CPUID bit of the ACPI feature to the VM,
which can help enable thermal interrupt handling in the VM.

[1]: SDM, vol. 3B, section 15.8.4.1, Detection of Software Controlled
Clock Modulation Extension.

Tested-by: Yanting Jiang <yanting.jiang@intel.com>
Signed-off-by: Zhuocheng Ding <zhuocheng.ding@intel.com>
Co-developed-by: Zhao Liu <zhao1.liu@intel.com>
Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
---
 arch/x86/kvm/cpuid.c   |  2 +-
 arch/x86/kvm/irq.h     |  1 +
 arch/x86/kvm/lapic.c   |  9 ++++
 arch/x86/kvm/svm/svm.c |  3 ++
 arch/x86/kvm/vmx/vmx.c | 94 ++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/vmx/vmx.h |  3 ++
 arch/x86/kvm/x86.c     |  3 ++
 7 files changed, 114 insertions(+), 1 deletion(-)
diff mbox series

Patch

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index adba49afb5fe..1ad547651022 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -623,7 +623,7 @@  void kvm_set_cpu_caps(void)
 		F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) |
 		F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
 		F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLUSH) |
-		0 /* Reserved, DS, ACPI */ | F(MMX) |
+		0 /* Reserved, DS */ | F(ACPI) | F(MMX) |
 		F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
 		0 /* HTT, TM, Reserved, PBE */
 	);
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index c2d7cfe82d00..e11c1fb6e1e6 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -99,6 +99,7 @@  static inline int irqchip_in_kernel(struct kvm *kvm)
 void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
 void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);
+void kvm_apic_therm_deliver(struct kvm_vcpu *vcpu);
 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
 void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu);
 void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 3242f3da2457..af8572798976 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2783,6 +2783,15 @@  void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
 		kvm_apic_local_deliver(apic, APIC_LVT0);
 }
 
+void kvm_apic_therm_deliver(struct kvm_vcpu *vcpu)
+{
+	struct kvm_lapic *apic = vcpu->arch.apic;
+
+	if (apic)
+		kvm_apic_local_deliver(apic, APIC_LVTTHMR);
+}
+EXPORT_SYMBOL_GPL(kvm_apic_therm_deliver);
+
 static const struct kvm_io_device_ops apic_mmio_ops = {
 	.read     = apic_mmio_read,
 	.write    = apic_mmio_write,
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index e90b429c84f1..2e22d5e86768 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4288,6 +4288,9 @@  static bool svm_has_emulated_msr(struct kvm *kvm, u32 index)
 	switch (index) {
 	case MSR_IA32_MCG_EXT_CTL:
 	case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
+	case MSR_IA32_THERM_CONTROL:
+	case MSR_IA32_THERM_INTERRUPT:
+	case MSR_IA32_THERM_STATUS:
 		return false;
 	case MSR_IA32_SMBASE:
 		if (!IS_ENABLED(CONFIG_KVM_SMM))
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 8f5981635fe5..aa37b55cf045 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -157,6 +157,32 @@  module_param(allow_smaller_maxphyaddr, bool, S_IRUGO);
 	RTIT_STATUS_ERROR | RTIT_STATUS_STOPPED | \
 	RTIT_STATUS_BYTECNT))
 
+/*
+ * TM2 (CPUID.01H:ECX[8]), DTHERM (CPUID.06H:EAX[0]), PLN (CPUID.06H:EAX[4]),
+ * and HWP (CPUID.06H:EAX[7]) are not emulated in kvm.
+ */
+#define MSR_IA32_THERM_STATUS_RO_MASK (THERM_STATUS_PROCHOT | \
+	THERM_STATUS_PROCHOT_FORCEPR_EVENT | THERM_STATUS_CRITICAL_TEMP)
+#define MSR_IA32_THERM_STATUS_RWC0_MASK (THERM_STATUS_PROCHOT_LOG | \
+	THERM_STATUS_PROCHOT_FORCEPR_LOG | THERM_STATUS_CRITICAL_TEMP_LOG)
+/* MSR_IA32_THERM_STATUS unavailable bits mask: unsupported and reserved bits. */
+#define MSR_IA32_THERM_STATUS_UNAVAIL_MASK (~(MSR_IA32_THERM_STATUS_RO_MASK | \
+	MSR_IA32_THERM_STATUS_RWC0_MASK))
+
+/* ECMD (CPUID.06H:EAX[5]) is not emulated in kvm. */
+#define MSR_IA32_THERM_CONTROL_AVAIL_MASK (THERM_ON_DEM_CLO_MOD_ENABLE | \
+	THERM_ON_DEM_CLO_MOD_DUTY_CYC_MASK)
+
+/*
+ * MSR_IA32_THERM_INTERRUPT available bits mask.
+ * PLN (CPUID.06H:EAX[4]) and HFN (CPUID.06H:EAX[24]) are not emulated in kvm.
+ */
+#define MSR_IA32_THERM_INTERRUPT_AVAIL_MASK (THERM_INT_HIGH_ENABLE | \
+	THERM_INT_LOW_ENABLE | THERM_INT_PROCHOT_ENABLE | \
+	THERM_INT_FORCEPR_ENABLE | THERM_INT_CRITICAL_TEM_ENABLE | \
+	THERM_MASK_THRESHOLD0 | THERM_INT_THRESHOLD0_ENABLE | \
+	THERM_MASK_THRESHOLD1 | THERM_INT_THRESHOLD1_ENABLE)
+
 /*
  * List of MSRs that can be directly passed to the guest.
  * In addition to these x2apic and PT MSRs are handled specially.
@@ -1470,6 +1496,19 @@  void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
 	}
 }
 
+static void vmx_inject_therm_interrupt(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * From SDM, the ACPI flag also indicates the presence of the
+	 * xAPIC thermal LVT entry.
+	 */
+	if (!guest_cpuid_has(vcpu, X86_FEATURE_ACPI))
+		return;
+
+	if (irqchip_in_kernel(vcpu->kvm))
+		kvm_apic_therm_deliver(vcpu);
+}
+
 /*
  * Switches to specified vcpu, until a matching vcpu_put(), but assumes
  * vcpu mutex is already taken.
@@ -2109,6 +2148,24 @@  static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_IA32_DEBUGCTLMSR:
 		msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
 		break;
+	case MSR_IA32_THERM_CONTROL:
+		if (!msr_info->host_initiated &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_ACPI))
+			return 1;
+		msr_info->data = vmx->msr_ia32_therm_control;
+		break;
+	case MSR_IA32_THERM_INTERRUPT:
+		if (!msr_info->host_initiated &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_ACPI))
+			return 1;
+		msr_info->data = vmx->msr_ia32_therm_interrupt;
+		break;
+	case MSR_IA32_THERM_STATUS:
+		if (!msr_info->host_initiated &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_ACPI))
+			return 1;
+		msr_info->data = vmx->msr_ia32_therm_status;
+		break;
 	default:
 	find_uret_msr:
 		msr = vmx_find_uret_msr(vmx, msr_info->index);
@@ -2452,6 +2509,40 @@  static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		}
 		ret = kvm_set_msr_common(vcpu, msr_info);
 		break;
+	case MSR_IA32_THERM_CONTROL:
+		if (!msr_info->host_initiated &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_ACPI))
+			return 1;
+		if (!msr_info->host_initiated &&
+		    data & ~MSR_IA32_THERM_CONTROL_AVAIL_MASK)
+			return 1;
+		vmx->msr_ia32_therm_control = data;
+		break;
+	case MSR_IA32_THERM_INTERRUPT:
+		if (!msr_info->host_initiated &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_ACPI))
+			return 1;
+		if (!msr_info->host_initiated &&
+		    data & ~MSR_IA32_THERM_INTERRUPT_AVAIL_MASK)
+			return 1;
+		vmx->msr_ia32_therm_interrupt = data;
+		break;
+	case MSR_IA32_THERM_STATUS:
+		if (!msr_info->host_initiated &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_ACPI))
+			return 1;
+		/* Unsupported and reserved bits: generate the exception. */
+		if (!msr_info->host_initiated &&
+		    data & MSR_IA32_THERM_STATUS_UNAVAIL_MASK)
+			return 1;
+		if (!msr_info->host_initiated) {
+			data = vmx_set_msr_rwc0_bits(data, vmx->msr_ia32_therm_status,
+						     MSR_IA32_THERM_STATUS_RWC0_MASK);
+			data = vmx_set_msr_ro_bits(data, vmx->msr_ia32_therm_status,
+						   MSR_IA32_THERM_STATUS_RO_MASK);
+		}
+		vmx->msr_ia32_therm_status = data;
+		break;
 
 	default:
 	find_uret_msr:
@@ -4870,6 +4961,9 @@  static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	vmx->spec_ctrl = 0;
 
 	vmx->msr_ia32_umwait_control = 0;
+	vmx->msr_ia32_therm_control = 0;
+	vmx->msr_ia32_therm_interrupt = 0;
+	vmx->msr_ia32_therm_status = 0;
 
 	vmx->hv_deadline_tsc = -1;
 	kvm_set_cr8(vcpu, 0);
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index e3b0985bb74a..e159dd5b7a66 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -282,6 +282,9 @@  struct vcpu_vmx {
 
 	u64		      spec_ctrl;
 	u32		      msr_ia32_umwait_control;
+	u64		      msr_ia32_therm_control;
+	u64		      msr_ia32_therm_interrupt;
+	u64		      msr_ia32_therm_status;
 
 	/*
 	 * loaded_vmcs points to the VMCS currently used in this vcpu. For a
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cd9a7251c768..50aceb0ce4ee 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1545,6 +1545,9 @@  static const u32 emulated_msrs_all[] = {
 	MSR_AMD64_TSC_RATIO,
 	MSR_IA32_POWER_CTL,
 	MSR_IA32_UCODE_REV,
+	MSR_IA32_THERM_CONTROL,
+	MSR_IA32_THERM_INTERRUPT,
+	MSR_IA32_THERM_STATUS,
 
 	/*
 	 * KVM always supports the "true" VMX control MSRs, even if the host