diff mbox

perf/x86/intel/pt: Don't die on VMXON

Message ID 871t6izv9z.fsf@ashishki-desk.ger.corp.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Alexander Shishkin April 6, 2016, 2:06 p.m. UTC
Peter Zijlstra <peterz@infradead.org> writes:

> Looks good, however:
>
>> +	if (test_cpu_cap(&boot_cpu_data, X86_FEATURE_VMX)) {
>
> Borislav tells me this ought to be boot_cpu_has(X86_FEATURE_VMX)

Right. I copied it from another place in the driver, which I'll then
patch as well.

3x a charm:

From 1ac911d5096af88217b3a26b26d594b02f0db614 Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Tue, 29 Mar 2016 17:43:10 +0300
Subject: [PATCH] perf/x86/intel/pt: Don't die on VMXON

Some versions of Intel PT do not support tracing across VMXON, more
specifically, VMXON will clear TraceEn control bit and any attempt to
set it before VMXOFF will throw a #GP, which in the current state of
things will crash the kernel. Namely,

$ perf record -e intel_pt// kvm -nographic

on such a machine will kill it.

To avoid this, notify the intel_pt driver before VMXON and after
VMXOFF so that it knows when not to enable itself.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
---
 arch/x86/events/intel/pt.c        | 75 +++++++++++++++++++++++++++++++++------
 arch/x86/events/intel/pt.h        |  2 ++
 arch/x86/include/asm/perf_event.h |  4 +++
 arch/x86/kvm/vmx.c                |  4 +++
 4 files changed, 74 insertions(+), 11 deletions(-)

Comments

Ingo Molnar April 13, 2016, 9:49 a.m. UTC | #1
* Alexander Shishkin <alexander.shishkin@linux.intel.com> wrote:

> Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
> ---
>  arch/x86/events/intel/pt.c        | 75 +++++++++++++++++++++++++++++++++------
>  arch/x86/events/intel/pt.h        |  2 ++
>  arch/x86/include/asm/perf_event.h |  4 +++
>  arch/x86/kvm/vmx.c                |  4 +++
>  4 files changed, 74 insertions(+), 11 deletions(-)

> +void intel_pt_vmx(int on)
> +{
> +	struct pt *pt = this_cpu_ptr(&pt_ctx);
> +	struct perf_event *event;
> +	unsigned long flags;
> +
> +	/* PT plays nice with VMX, do nothing */
> +	if (pt_pmu.vmx)
> +		return;
> +
> +	/*
> +	 * VMXON will clear RTIT_CTL.TraceEn; we need to make
> +	 * sure to not try to set it while VMX is on. Disable
> +	 * interrupts to avoid racing with pmu callbacks;
> +	 * concurrent PMI should be handled fine.
> +	 */
> +	local_irq_save(flags);
> +	WRITE_ONCE(pt->vmx_on, on);
> +
> +	if (on) {
> +		/* prevent pt_config_stop() from writing RTIT_CTL */
> +		event = pt->handle.event;
> +		if (event)
> +			event->hw.config = 0;
> +	}
> +	local_irq_restore(flags);
> +}
> +EXPORT_SYMBOL_GPL(intel_pt_vmx);

> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -3075,6 +3075,8 @@ static __init int vmx_disabled_by_bios(void)
>  
>  static void kvm_cpu_vmxon(u64 addr)
>  {
> +	intel_pt_vmx(1);
> +
>  	asm volatile (ASM_VMX_VMXON_RAX
>  			: : "a"(&addr), "m"(addr)
>  			: "memory", "cc");
> @@ -3144,6 +3146,8 @@ static void vmclear_local_loaded_vmcss(void)
>  static void kvm_cpu_vmxoff(void)
>  {
>  	asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
> +
> +	intel_pt_vmx(0);
>  }

Yeah so the name intel_pt_vmx() is pretty information-free because it has no verb, 
only nouns - please name new functions descriptively to after what they do!

Something like intel_pt_set_vmx_state() or so?

Thanks,

	Ingo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Shishkin April 13, 2016, 10:32 a.m. UTC | #2
Ingo Molnar <mingo@kernel.org> writes:

>> @@ -3144,6 +3146,8 @@ static void vmclear_local_loaded_vmcss(void)
>>  static void kvm_cpu_vmxoff(void)
>>  {
>>  	asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
>> +
>> +	intel_pt_vmx(0);
>>  }
>
> Yeah so the name intel_pt_vmx() is pretty information-free because it has no verb, 
> only nouns - please name new functions descriptively to after what they do!

I do agree that it can use a better name (and this is a second attempt
already).

> Something like intel_pt_set_vmx_state() or so?

Hmm how about intel_pt_handle_vmx()? Ideally, akin to the VMXON/VMXOFF
insns, this could be two functions (intel_pt_handle_vmx{on,off}()) if
the global namespace can take it.

Regards,
--
Alex
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ingo Molnar April 13, 2016, 10:53 a.m. UTC | #3
* Alexander Shishkin <alexander.shishkin@linux.intel.com> wrote:

> Ingo Molnar <mingo@kernel.org> writes:
> 
> >> @@ -3144,6 +3146,8 @@ static void vmclear_local_loaded_vmcss(void)
> >>  static void kvm_cpu_vmxoff(void)
> >>  {
> >>  	asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
> >> +
> >> +	intel_pt_vmx(0);
> >>  }
> >
> > Yeah so the name intel_pt_vmx() is pretty information-free because it has no verb, 
> > only nouns - please name new functions descriptively to after what they do!
> 
> I do agree that it can use a better name (and this is a second attempt
> already).
> 
> > Something like intel_pt_set_vmx_state() or so?
> 
> Hmm how about intel_pt_handle_vmx()? Ideally, akin to the VMXON/VMXOFF insns, 
> this could be two functions (intel_pt_handle_vmx{on,off}()) if the global 
> namespace can take it.

Sure, intel_pt_handle_vmx(0/1) sounds good too. I wouldn't split it into two 
functions ...

Thanks,

	Ingo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 127f58c179..32b613e863 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -136,9 +136,21 @@  static int __init pt_pmu_hw_init(void)
 	struct dev_ext_attribute *de_attrs;
 	struct attribute **attrs;
 	size_t size;
+	u64 reg;
 	int ret;
 	long i;
 
+	if (boot_cpu_has(X86_FEATURE_VMX)) {
+		/*
+		 * Intel SDM, 36.5 "Tracing post-VMXON" says that
+		 * "IA32_VMX_MISC[bit 14]" being 1 means PT can trace
+		 * post-VMXON.
+		 */
+		rdmsrl(MSR_IA32_VMX_MISC, reg);
+		if (reg & BIT(14))
+			pt_pmu.vmx = true;
+	}
+
 	attrs = NULL;
 
 	for (i = 0; i < PT_CPUID_LEAVES; i++) {
@@ -269,20 +281,23 @@  static void pt_config(struct perf_event *event)
 
 	reg |= (event->attr.config & PT_CONFIG_MASK);
 
+	event->hw.config = reg;
 	wrmsrl(MSR_IA32_RTIT_CTL, reg);
 }
 
-static void pt_config_start(bool start)
+static void pt_config_stop(struct perf_event *event)
 {
-	u64 ctl;
+	u64 ctl = READ_ONCE(event->hw.config);
+
+	/* may be already stopped by a PMI */
+	if (!(ctl & RTIT_CTL_TRACEEN))
+		return;
 
-	rdmsrl(MSR_IA32_RTIT_CTL, ctl);
-	if (start)
-		ctl |= RTIT_CTL_TRACEEN;
-	else
-		ctl &= ~RTIT_CTL_TRACEEN;
+	ctl &= ~RTIT_CTL_TRACEEN;
 	wrmsrl(MSR_IA32_RTIT_CTL, ctl);
 
+	WRITE_ONCE(event->hw.config, ctl);
+
 	/*
 	 * A wrmsr that disables trace generation serializes other PT
 	 * registers and causes all data packets to be written to memory,
@@ -291,8 +306,7 @@  static void pt_config_start(bool start)
 	 * The below WMB, separating data store and aux_head store matches
 	 * the consumer's RMB that separates aux_head load and data load.
 	 */
-	if (!start)
-		wmb();
+	wmb();
 }
 
 static void pt_config_buffer(void *buf, unsigned int topa_idx,
@@ -922,11 +936,17 @@  void intel_pt_interrupt(void)
 	if (!ACCESS_ONCE(pt->handle_nmi))
 		return;
 
-	pt_config_start(false);
+	/*
+	 * If VMX is on and PT does not support it, don't touch anything.
+	 */
+	if (ACCESS_ONCE(pt->vmx_on))
+		return;
 
 	if (!event)
 		return;
 
+	pt_config_stop(event);
+
 	buf = perf_get_aux(&pt->handle);
 	if (!buf)
 		return;
@@ -963,6 +983,35 @@  void intel_pt_interrupt(void)
 	}
 }
 
+void intel_pt_vmx(int on)
+{
+	struct pt *pt = this_cpu_ptr(&pt_ctx);
+	struct perf_event *event;
+	unsigned long flags;
+
+	/* PT plays nice with VMX, do nothing */
+	if (pt_pmu.vmx)
+		return;
+
+	/*
+	 * VMXON will clear RTIT_CTL.TraceEn; we need to make
+	 * sure to not try to set it while VMX is on. Disable
+	 * interrupts to avoid racing with pmu callbacks;
+	 * concurrent PMI should be handled fine.
+	 */
+	local_irq_save(flags);
+	WRITE_ONCE(pt->vmx_on, on);
+
+	if (on) {
+		/* prevent pt_config_stop() from writing RTIT_CTL */
+		event = pt->handle.event;
+		if (event)
+			event->hw.config = 0;
+	}
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(intel_pt_vmx);
+
 /*
  * PMU callbacks
  */
@@ -973,6 +1022,9 @@  static void pt_event_start(struct perf_event *event, int mode)
 	struct pt *pt = this_cpu_ptr(&pt_ctx);
 	struct pt_buffer *buf;
 
+	if (ACCESS_ONCE(pt->vmx_on))
+		return;
+
 	buf = perf_aux_output_begin(&pt->handle, event);
 	if (!buf)
 		goto fail_stop;
@@ -1007,7 +1059,8 @@  static void pt_event_stop(struct perf_event *event, int mode)
 	 * see comment in intel_pt_interrupt().
 	 */
 	ACCESS_ONCE(pt->handle_nmi) = 0;
-	pt_config_start(false);
+
+	pt_config_stop(event);
 
 	if (event->hw.state == PERF_HES_STOPPED)
 		return;
diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h
index 336878a5d2..b0731630cd 100644
--- a/arch/x86/events/intel/pt.h
+++ b/arch/x86/events/intel/pt.h
@@ -65,6 +65,7 @@  enum pt_capabilities {
 struct pt_pmu {
 	struct pmu		pmu;
 	u32			caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
+	bool			vmx;
 };
 
 /**
@@ -111,6 +112,7 @@  struct pt_buffer {
 struct pt {
 	struct perf_output_handle handle;
 	int			handle_nmi;
+	int			vmx_on;
 };
 
 #endif /* __INTEL_PT_H__ */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 5a2ed3ed2f..fcc5956f3f 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -285,6 +285,10 @@  static inline void perf_events_lapic_init(void)	{ }
 static inline void perf_check_microcode(void) { }
 #endif
 
+#ifdef CONFIG_CPU_SUP_INTEL
+ extern void intel_pt_vmx(int on);
+#endif
+
 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
  extern void amd_pmu_enable_virt(void);
  extern void amd_pmu_disable_virt(void);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1735ae9d68..fa8c98f5fa 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3075,6 +3075,8 @@  static __init int vmx_disabled_by_bios(void)
 
 static void kvm_cpu_vmxon(u64 addr)
 {
+	intel_pt_vmx(1);
+
 	asm volatile (ASM_VMX_VMXON_RAX
 			: : "a"(&addr), "m"(addr)
 			: "memory", "cc");
@@ -3144,6 +3146,8 @@  static void vmclear_local_loaded_vmcss(void)
 static void kvm_cpu_vmxoff(void)
 {
 	asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
+
+	intel_pt_vmx(0);
 }
 
 static void hardware_disable(void)