diff mbox series

[06/15] KVM: x86: Register perf callbacks only when actively handling interrupt

Message ID 20210827005718.585190-7-seanjc@google.com (mailing list archive)
State New, archived
Headers show
Series [01/15] KVM: x86: Register perf callbacks after calling vendor's hardware_setup() | expand

Commit Message

Sean Christopherson Aug. 27, 2021, 12:57 a.m. UTC
Register KVM's perf callback only when handling an interrupt that may be
a PMI (sadly this includes IRQs), and unregister the callback immediately
after handling the interrupt (or closing the window).  Registering the
callback on a per-CPU basis (with preemption disabled!), fixes a mostly
theoretical bug where perf could dereference a NULL pointer due to KVM
unloading and unregistering the callbacks in between perf queries of the
callback functions.  The precise registration will also allow for future
cleanups and optimizations, e.g. the existence of the callbacks can serve
as the "in guest" check.

Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/x86.c         | 27 +++++++++++++++++----------
 arch/x86/kvm/x86.h         | 10 ++++++++++
 include/linux/perf_event.h |  2 ++
 kernel/events/core.c       | 12 ++++++++++++
 4 files changed, 41 insertions(+), 10 deletions(-)

Comments

Peter Zijlstra Aug. 27, 2021, 7:21 a.m. UTC | #1
On Thu, Aug 26, 2021 at 05:57:09PM -0700, Sean Christopherson wrote:
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 9bc1375d6ed9..2f28d9d8dc94 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -6485,6 +6485,18 @@ static void perf_pending_event(struct irq_work *entry)
>  #ifdef CONFIG_HAVE_GUEST_PERF_EVENTS
>  DEFINE_PER_CPU(struct perf_guest_info_callbacks *, perf_guest_cbs);
>  
> +void __perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
> +{
> +	__this_cpu_write(perf_guest_cbs, cbs);
> +}
> +EXPORT_SYMBOL_GPL(__perf_register_guest_info_callbacks);
> +
> +void __perf_unregister_guest_info_callbacks(void)
> +{
> +	__this_cpu_write(perf_guest_cbs, NULL);
> +}
> +EXPORT_SYMBOL_GPL(__perf_unregister_guest_info_callbacks);

This is 100% broken, and a prime example of why I hate modules.

It provides an interface for all modules, and completely fails to
validate even the most basic usage.

By using __this_cpu*() it omits the preemption checks, so you can call
this with preemption enabled, no problem.

By not checking the previous state, multiple modules can call this
interleaved without issue.

Basically assume any EXPORTed function is hostile, binary modules and
out-of-tree modules *are* just that. It's a cesspit out there.
diff mbox series

Patch

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bae951344e28..bc4ee6ea7752 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8274,28 +8274,31 @@  int kvm_is_in_guest(void)
 
 static int kvm_is_user_mode(void)
 {
-	int user_mode = 3;
+	struct kvm_vcpu *vcpu = __this_cpu_read(current_vcpu);
 
-	if (__this_cpu_read(current_vcpu))
-		user_mode = static_call(kvm_x86_get_cpl)(__this_cpu_read(current_vcpu));
+	if (WARN_ON_ONCE(!vcpu))
+		return 0;
 
-	return user_mode != 0;
+	return static_call(kvm_x86_get_cpl)(vcpu) != 0;
 }
 
 static unsigned long kvm_get_guest_ip(void)
 {
-	unsigned long ip = 0;
+	struct kvm_vcpu *vcpu = __this_cpu_read(current_vcpu);
 
-	if (__this_cpu_read(current_vcpu))
-		ip = kvm_rip_read(__this_cpu_read(current_vcpu));
+	if (WARN_ON_ONCE(!vcpu))
+		return 0;
 
-	return ip;
+	return kvm_rip_read(vcpu);
 }
 
 static void kvm_handle_intel_pt_intr(void)
 {
 	struct kvm_vcpu *vcpu = __this_cpu_read(current_vcpu);
 
+	if (WARN_ON_ONCE(!vcpu))
+		return;
+
 	kvm_make_request(KVM_REQ_PMI, vcpu);
 	__set_bit(MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT,
 			(unsigned long *)&vcpu->arch.pmu.global_status);
@@ -8308,6 +8311,12 @@  static struct perf_guest_info_callbacks kvm_guest_cbs = {
 	.handle_intel_pt_intr	= NULL,
 };
 
+void kvm_register_perf_callbacks(void)
+{
+	__perf_register_guest_info_callbacks(&kvm_guest_cbs);
+}
+EXPORT_SYMBOL_GPL(kvm_register_perf_callbacks);
+
 #ifdef CONFIG_X86_64
 static void pvclock_gtod_update_fn(struct work_struct *work)
 {
@@ -11063,7 +11072,6 @@  int kvm_arch_hardware_setup(void *opaque)
 
 	if (ops->intel_pt_intr_in_guest && ops->intel_pt_intr_in_guest())
 		kvm_guest_cbs.handle_intel_pt_intr = kvm_handle_intel_pt_intr;
-	perf_register_guest_info_callbacks(&kvm_guest_cbs);
 
 	if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
 		supported_xss = 0;
@@ -11092,7 +11100,6 @@  int kvm_arch_hardware_setup(void *opaque)
 
 void kvm_arch_hardware_unsetup(void)
 {
-	perf_unregister_guest_info_callbacks();
 	kvm_guest_cbs.handle_intel_pt_intr = NULL;
 
 	static_call(kvm_x86_hardware_unsetup)();
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 7d66d63dc55a..5cedc0e8a5d5 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -387,15 +387,25 @@  static inline bool kvm_cstate_in_guest(struct kvm *kvm)
 	return kvm->arch.cstate_in_guest;
 }
 
+void kvm_register_perf_callbacks(void);
+static inline void kvm_unregister_perf_callbacks(void)
+{
+	__perf_unregister_guest_info_callbacks();
+}
+
 DECLARE_PER_CPU(struct kvm_vcpu *, current_vcpu);
 
 static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu)
 {
 	__this_cpu_write(current_vcpu, vcpu);
+
+	kvm_register_perf_callbacks();
 }
 
 static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu)
 {
+	kvm_unregister_perf_callbacks();
+
 	__this_cpu_write(current_vcpu, NULL);
 }
 
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index c98253dae037..7a367bf1b78d 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1238,6 +1238,8 @@  extern void perf_event_bpf_event(struct bpf_prog *prog,
 
 #ifdef CONFIG_HAVE_GUEST_PERF_EVENTS
 DECLARE_PER_CPU(struct perf_guest_info_callbacks *, perf_guest_cbs);
+extern void __perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
+extern void __perf_unregister_guest_info_callbacks(void);
 extern void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
 extern void perf_unregister_guest_info_callbacks(void);
 #endif /* CONFIG_HAVE_GUEST_PERF_EVENTS */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 9bc1375d6ed9..2f28d9d8dc94 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6485,6 +6485,18 @@  static void perf_pending_event(struct irq_work *entry)
 #ifdef CONFIG_HAVE_GUEST_PERF_EVENTS
 DEFINE_PER_CPU(struct perf_guest_info_callbacks *, perf_guest_cbs);
 
+void __perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
+{
+	__this_cpu_write(perf_guest_cbs, cbs);
+}
+EXPORT_SYMBOL_GPL(__perf_register_guest_info_callbacks);
+
+void __perf_unregister_guest_info_callbacks(void)
+{
+	__this_cpu_write(perf_guest_cbs, NULL);
+}
+EXPORT_SYMBOL_GPL(__perf_unregister_guest_info_callbacks);
+
 void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
 {
 	int cpu;