diff mbox series

[RFC,2/3] x86/kvm/vmx: opitmize tsc dealine timer emulation

Message ID 4f3be35206af83a6e95a98ef6f7448b3899058d2.1532380136.git.isaku.yamahata@gmail.com (mailing list archive)
State New, archived
Headers show
Series kvm/x86/vmx optimize tsc deadline emulation latency | expand

Commit Message

Isaku Yamahata July 23, 2018, 9:17 p.m. UTC
From: Isaku Yamahata <isaku.yamahata@intel.com>

This patch tries to optimize tsc dealine timer emulation
by skipping various checks when possible.
This patch implement fast exit handler for
- preempt exit handler to emulate tsc deadline timer
- wrmsr exit handler to emulate write to tsc deadline
The result can be measured by cyclic test and also by ftracing local
timer interrupt so that local timer interrupt can be injected faster.

Signed-off-by: Isaku Yamahata <isaku.yamahata@gmail.com>
---
 arch/x86/kvm/lapic.c |  17 +----
 arch/x86/kvm/lapic.h |  16 +++++
 arch/x86/kvm/vmx.c   | 148 +++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c   |  10 +--
 4 files changed, 171 insertions(+), 20 deletions(-)
diff mbox series

Patch

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 2c4d19afc76d..46e45f9288da 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -99,6 +99,7 @@  static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
 }
 
 struct static_key_deferred apic_hw_disabled __read_mostly;
+EXPORT_SYMBOL_GPL(apic_hw_disabled);
 struct static_key_deferred apic_sw_disabled __read_mostly;
 
 static inline int apic_enabled(struct kvm_lapic *apic)
@@ -292,21 +293,6 @@  static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type)
 	return kvm_lapic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK;
 }
 
-static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
-{
-	return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT;
-}
-
-static inline int apic_lvtt_period(struct kvm_lapic *apic)
-{
-	return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC;
-}
-
-static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
-{
-	return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE;
-}
-
 static inline int apic_lvt_nmi_mode(u32 lvt_val)
 {
 	return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
@@ -2462,6 +2448,7 @@  void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
 	kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
 				sizeof(u32));
 }
+EXPORT_SYMBOL_GPL(kvm_lapic_sync_to_vapic);
 
 int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
 {
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index ed0ed39abd36..5ce944a3ed9b 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -105,6 +105,7 @@  void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data);
 void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset);
 void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector);
 
+void kvm_update_cr8_intercept(struct kvm_vcpu *vcpu);
 int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
 void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
 void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu);
@@ -234,4 +235,19 @@  static inline enum lapic_mode kvm_apic_mode(u64 apic_base)
 	return apic_base & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
 }
 
+static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
+{
+	return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT;
+}
+
+static inline int apic_lvtt_period(struct kvm_lapic *apic)
+{
+	return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC;
+}
+
+static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
+{
+	return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE;
+}
+
 #endif
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e30da9a2430c..08d13febc4b5 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -9971,10 +9971,137 @@  static void vmx_arm_hv_timer(struct kvm_vcpu *vcpu)
 	vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, delta_tsc);
 }
 
+#ifdef CONFIG_X86_64
+static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc);
+static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu);
+#endif
+
+/*
+ * handle tsc deadline setting in fast path
+ */
+static int handle_wrmsr_tscdeadline_fast(struct kvm_vcpu *vcpu, u64 data)
+{
+	int ret = -EOPNOTSUPP;
+#ifdef CONFIG_X86_64
+	struct kvm_lapic *apic = vcpu->arch.apic;
+
+	if (!kvm_x86_ops->set_hv_timer ||
+	    !lapic_in_kernel(vcpu) ||
+	    apic_lvtt_oneshot(apic) ||
+	    apic_lvtt_period(apic)) {
+		goto out;
+	}
+	/* simplified version of kvm_set_lapic_tscdeadline_msr(vcpu, msr.data)
+	 * lapic timer advance should be addressed?
+	 */
+	atomic_set(&apic->lapic_timer.pending, 0);
+	apic->lapic_timer.tscdeadline = data;
+	if (data) {
+		apic->lapic_timer.hv_timer_in_use = true;
+		ret = vmx_set_hv_timer(vcpu, data) < 0 ? -1 : 0;
+	}
+out:
+#endif
+	return ret;
+}
+
+static int handle_wrmsr_fast(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * fast path of handle_wrmsr() -> kvm_set_msr() -> vmx_set_msr()
+	 */
+	int ret = -EOPNOTSUPP;
+
+	struct msr_data msr;
+	u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX];
+	u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u)
+		| ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32);
+
+	msr.data = data;
+	msr.index = ecx;
+	msr.host_initiated = false;
+
+	/* simplified version of kvm_set_msr(vcpu, &msr) */
+	switch (msr.index) {
+	case MSR_IA32_TSCDEADLINE:
+		ret = handle_wrmsr_tscdeadline_fast(vcpu, data);
+		break;
+	/* more MSRs to be optimized? */
+	default:
+		break;
+	}
+
+	if (ret == 0)
+		ret = kvm_skip_emulated_instruction(vcpu) ? 0 : -EOPNOTSUPP;
+	return ret;
+}
+
+static int handle_preemption_timer_fast(struct kvm_vcpu *vcpu)
+{
+	int ret = -EOPNOTSUPP;
+#ifdef CONFIG_X86_64
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct kvm_lapic *apic = vcpu->arch.apic;
+	struct kvm_timer *ktimer = &apic->lapic_timer;
+	u32 reg;
+	int vector;
+
+	/* don't care complicated cases */
+	if (!vmx_interrupt_allowed(vcpu) ||
+	    vmx->rmode.vm86_active ||
+	    !apic_lvtt_tscdeadline(apic) ||
+	    !kvm_lapic_hv_timer_in_use(vcpu) ||
+	    is_smm(vcpu) ||
+	    atomic_read(&apic->lapic_timer.pending) ||
+	    !kvm_apic_hw_enabled(apic))
+		goto out;
+	vmx_cancel_hv_timer(vcpu);
+	ktimer->hv_timer_in_use = false;
+	ktimer->expired_tscdeadline = ktimer->tscdeadline;
+	apic->lapic_timer.tscdeadline = 0;
+
+	reg = kvm_lapic_get_reg(apic, APIC_LVTT);
+	if (reg & APIC_LVT_MASKED)
+		goto out;
+	vector = reg & APIC_VECTOR_MASK;
+	kvm_lapic_set_vector(vector, apic->regs + APIC_TMR);
+	if (vcpu->arch.apicv_active) {
+		/* simplified version of
+		 * vmx_deliver_posted_interrupt(vcpu, vector);
+		 */
+		pi_test_and_set_pir(vector, &vmx->pi_desc);
+		pi_test_and_set_on(&vmx->pi_desc);
+	} else {
+		kvm_lapic_set_irr(vector, apic);
+
+		if (!vcpu->arch.apicv_active)
+			kvm_update_cr8_intercept(vcpu);
+		/* simplified version of vmx_inject_irq() */
+		vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
+			     vector | INTR_INFO_VALID_MASK |
+			     INTR_TYPE_EXT_INTR);
+		vmx_clear_hlt(vcpu);
+	}
+	ret = 0;
+out:
+#endif
+	return ret;
+}
+
+static int (*const kvm_vmx_exit_handlers_fast[]) (struct kvm_vcpu *vcpu) = {
+	[EXIT_REASON_MSR_WRITE]		= handle_wrmsr_fast,
+	[EXIT_REASON_PREEMPTION_TIMER]	= handle_preemption_timer_fast,
+};
+
+static const int kvm_vmx_max_exit_handlers_fast =
+	ARRAY_SIZE(kvm_vmx_exit_handlers_fast);
+
+
 static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	unsigned long cr3, cr4, evmcs_rsp;
+continue_vmx_vcpu_run:
 
 	/* Record the guest's net vcpu time for enforced NMI injections. */
 	if (unlikely(!enable_vnmi &&
@@ -10242,6 +10369,27 @@  static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 
 	vmx_complete_atomic_exit(vmx);
 	vmx_recover_nmi_blocking(vmx);
+
+	if (!is_guest_mode(vcpu) &&
+	    vmx->exit_reason < kvm_vmx_max_exit_handlers_fast &&
+	    kvm_vmx_exit_handlers_fast[vmx->exit_reason]) {
+		bool idtv_info_valid =
+			vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
+		int type = vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK;
+
+		if (!idtv_info_valid ||
+		    !(type == INTR_TYPE_NMI_INTR ||
+		      type == INTR_TYPE_SOFT_EXCEPTION ||
+		      type == INTR_TYPE_SOFT_INTR ||
+		      type == INTR_TYPE_EXT_INTR)) {
+			int ret = kvm_vmx_exit_handlers_fast[vmx->exit_reason](
+				vcpu);
+
+			if (ret == 0)
+				goto continue_vmx_vcpu_run;
+		}
+	}
+
 	vmx_complete_interrupts(vmx);
 }
 STACK_FRAME_NON_STANDARD(vmx_vcpu_run);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3b9165dae5c6..befd32afd9d7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -98,7 +98,6 @@  static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
 #define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \
                                     KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
 
-static void update_cr8_intercept(struct kvm_vcpu *vcpu);
 static void process_nmi(struct kvm_vcpu *vcpu);
 static void enter_smm(struct kvm_vcpu *vcpu);
 static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
@@ -3158,7 +3157,7 @@  static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
 	r = kvm_apic_set_state(vcpu, s);
 	if (r)
 		return r;
-	update_cr8_intercept(vcpu);
+	kvm_update_cr8_intercept(vcpu);
 
 	return 0;
 }
@@ -6775,7 +6774,7 @@  static void post_kvm_run_save(struct kvm_vcpu *vcpu)
 		kvm_vcpu_ready_for_interrupt_injection(vcpu);
 }
 
-static void update_cr8_intercept(struct kvm_vcpu *vcpu)
+void kvm_update_cr8_intercept(struct kvm_vcpu *vcpu)
 {
 	int max_irr, tpr;
 
@@ -6800,6 +6799,7 @@  static void update_cr8_intercept(struct kvm_vcpu *vcpu)
 
 	kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
 }
+EXPORT_SYMBOL_GPL(kvm_update_cr8_intercept);
 
 static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
 {
@@ -7372,7 +7372,7 @@  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		}
 
 		if (kvm_lapic_enabled(vcpu)) {
-			update_cr8_intercept(vcpu);
+			kvm_update_cr8_intercept(vcpu);
 			kvm_lapic_sync_to_vapic(vcpu);
 		}
 	}
@@ -8069,7 +8069,7 @@  static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 	kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
 	kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
 
-	update_cr8_intercept(vcpu);
+	kvm_update_cr8_intercept(vcpu);
 
 	/* Older userspace won't unhalt the vcpu on reset. */
 	if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&