diff mbox

perf/x86/intel/pt: Don't die on VMXON

Message ID 1459527854-5899-1-git-send-email-alexander.shishkin@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Alexander Shishkin April 1, 2016, 4:24 p.m. UTC
Some versions of Intel PT do not support tracing across VMXON, more
specifically, VMXON will clear TraceEn control bit and any attempt to
set it before VMXOFF will throw a #GP, which in the current state of
things will crash the kernel. Namely,

$ perf record -e intel_pt// kvm -nographic

on such a machine will kill it.

To avoid this, notify the intel_pt driver before VMXON and after
VMXOFF so that it knows when not to enable itself.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
---
 arch/x86/events/intel/pt.c        | 71 +++++++++++++++++++++++++++++++++------
 arch/x86/events/intel/pt.h        |  2 ++
 arch/x86/include/asm/perf_event.h |  4 +++
 arch/x86/kvm/vmx.c                |  4 +++
 4 files changed, 70 insertions(+), 11 deletions(-)

Comments

Peter Zijlstra April 6, 2016, 8:52 a.m. UTC | #1
On Fri, Apr 01, 2016 at 07:24:14PM +0300, Alexander Shishkin wrote:
> +static void pt_config_stop(struct perf_event *event)
>  {
> +	u64 ctl = READ_ONCE(event->hw.config);
>  
> +	/* may be already stopped by a PMI*/
> +	if (!(ctl & RTIT_CTL_TRACEEN))
> +		return;
> +
> +	ctl ^= RTIT_CTL_TRACEEN;

Would that not be much less confusing when written like |= ?

>  	wrmsrl(MSR_IA32_RTIT_CTL, ctl);
>  
> +	WRITE_ONCE(event->hw.config, ctl);
> +
>  	/*
>  	 * A wrmsr that disables trace generation serializes other PT
>  	 * registers and causes all data packets to be written to memory,


> +void intel_pt_vmxon(int entry)
> +{
> +	struct pt *pt = this_cpu_ptr(&pt_ctx);
> +	struct perf_event *event;
> +	unsigned long flags;
> +
> +	/* PT plays nice with VMX, do nothing */
> +	if (pt_pmu.vmx)
> +		return;
> +
> +	/*
> +	 * VMX entry will clear RTIT_CTL.TraceEn; we need to make
> +	 * sure to not try to set it while VMX is on. Disable
> +	 * interrupts to avoid racing with pmu callbacks;
> +	 * concurrent PMI should be handled fine.
> +	 */
> +	local_irq_save(flags);
> +	WRITE_ONCE(pt->vmx_on, entry);

So you mix: "VMX is on" and "VMX entry", please pick one.

Since the function is called vmxon, I find .entry a very confusing
argument name.

> +
> +	if (entry) {
> +		/* prevent pt_config_stop() from writing RTIT_CTL */
> +		event = pt->handle.event;
> +		if (event)
> +			event->hw.config = 0;
> +	}
> +	local_irq_restore(flags);
> +}
> +EXPORT_SYMBOL_GPL(intel_pt_vmxon);
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 127f58c179..a16f2ebc12 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -136,9 +136,17 @@  static int __init pt_pmu_hw_init(void)
 	struct dev_ext_attribute *de_attrs;
 	struct attribute **attrs;
 	size_t size;
+	u64 reg;
 	int ret;
 	long i;
 
+	if (test_cpu_cap(&boot_cpu_data, X86_FEATURE_VMX)) {
+		/* Intel SDM, 36.5 "Tracing post-VMXON" */
+		rdmsrl(MSR_IA32_VMX_MISC, reg);
+		if (reg & BIT(14))
+			pt_pmu.vmx = true;
+	}
+
 	attrs = NULL;
 
 	for (i = 0; i < PT_CPUID_LEAVES; i++) {
@@ -269,20 +277,23 @@  static void pt_config(struct perf_event *event)
 
 	reg |= (event->attr.config & PT_CONFIG_MASK);
 
+	event->hw.config = reg;
 	wrmsrl(MSR_IA32_RTIT_CTL, reg);
 }
 
-static void pt_config_start(bool start)
+static void pt_config_stop(struct perf_event *event)
 {
-	u64 ctl;
+	u64 ctl = READ_ONCE(event->hw.config);
 
-	rdmsrl(MSR_IA32_RTIT_CTL, ctl);
-	if (start)
-		ctl |= RTIT_CTL_TRACEEN;
-	else
-		ctl &= ~RTIT_CTL_TRACEEN;
+	/* may be already stopped by a PMI*/
+	if (!(ctl & RTIT_CTL_TRACEEN))
+		return;
+
+	ctl ^= RTIT_CTL_TRACEEN;
 	wrmsrl(MSR_IA32_RTIT_CTL, ctl);
 
+	WRITE_ONCE(event->hw.config, ctl);
+
 	/*
 	 * A wrmsr that disables trace generation serializes other PT
 	 * registers and causes all data packets to be written to memory,
@@ -291,8 +302,7 @@  static void pt_config_start(bool start)
 	 * The below WMB, separating data store and aux_head store matches
 	 * the consumer's RMB that separates aux_head load and data load.
 	 */
-	if (!start)
-		wmb();
+	wmb();
 }
 
 static void pt_config_buffer(void *buf, unsigned int topa_idx,
@@ -922,11 +932,17 @@  void intel_pt_interrupt(void)
 	if (!ACCESS_ONCE(pt->handle_nmi))
 		return;
 
-	pt_config_start(false);
+	/*
+	 * If VMX is on and PT does not support it, don't touch anything.
+	 */
+	if (ACCESS_ONCE(pt->vmx_on))
+		return;
 
 	if (!event)
 		return;
 
+	pt_config_stop(event);
+
 	buf = perf_get_aux(&pt->handle);
 	if (!buf)
 		return;
@@ -963,6 +979,35 @@  void intel_pt_interrupt(void)
 	}
 }
 
+void intel_pt_vmxon(int entry)
+{
+	struct pt *pt = this_cpu_ptr(&pt_ctx);
+	struct perf_event *event;
+	unsigned long flags;
+
+	/* PT plays nice with VMX, do nothing */
+	if (pt_pmu.vmx)
+		return;
+
+	/*
+	 * VMX entry will clear RTIT_CTL.TraceEn; we need to make
+	 * sure to not try to set it while VMX is on. Disable
+	 * interrupts to avoid racing with pmu callbacks;
+	 * concurrent PMI should be handled fine.
+	 */
+	local_irq_save(flags);
+	WRITE_ONCE(pt->vmx_on, entry);
+
+	if (entry) {
+		/* prevent pt_config_stop() from writing RTIT_CTL */
+		event = pt->handle.event;
+		if (event)
+			event->hw.config = 0;
+	}
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(intel_pt_vmxon);
+
 /*
  * PMU callbacks
  */
@@ -973,6 +1018,9 @@  static void pt_event_start(struct perf_event *event, int mode)
 	struct pt *pt = this_cpu_ptr(&pt_ctx);
 	struct pt_buffer *buf;
 
+	if (ACCESS_ONCE(pt->vmx_on))
+		return;
+
 	buf = perf_aux_output_begin(&pt->handle, event);
 	if (!buf)
 		goto fail_stop;
@@ -1007,7 +1055,8 @@  static void pt_event_stop(struct perf_event *event, int mode)
 	 * see comment in intel_pt_interrupt().
 	 */
 	ACCESS_ONCE(pt->handle_nmi) = 0;
-	pt_config_start(false);
+
+	pt_config_stop(event);
 
 	if (event->hw.state == PERF_HES_STOPPED)
 		return;
diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h
index 336878a5d2..b0731630cd 100644
--- a/arch/x86/events/intel/pt.h
+++ b/arch/x86/events/intel/pt.h
@@ -65,6 +65,7 @@  enum pt_capabilities {
 struct pt_pmu {
 	struct pmu		pmu;
 	u32			caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
+	bool			vmx;
 };
 
 /**
@@ -111,6 +112,7 @@  struct pt_buffer {
 struct pt {
 	struct perf_output_handle handle;
 	int			handle_nmi;
+	int			vmx_on;
 };
 
 #endif /* __INTEL_PT_H__ */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 5a2ed3ed2f..8d34c39982 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -285,6 +285,10 @@  static inline void perf_events_lapic_init(void)	{ }
 static inline void perf_check_microcode(void) { }
 #endif
 
+#ifdef CONFIG_CPU_SUP_INTEL
+ extern void intel_pt_vmxon(int entry);
+#endif
+
 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
  extern void amd_pmu_enable_virt(void);
  extern void amd_pmu_disable_virt(void);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1735ae9d68..744ea43b79 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3075,6 +3075,8 @@  static __init int vmx_disabled_by_bios(void)
 
 static void kvm_cpu_vmxon(u64 addr)
 {
+	intel_pt_vmxon(1);
+
 	asm volatile (ASM_VMX_VMXON_RAX
 			: : "a"(&addr), "m"(addr)
 			: "memory", "cc");
@@ -3144,6 +3146,8 @@  static void vmclear_local_loaded_vmcss(void)
 static void kvm_cpu_vmxoff(void)
 {
 	asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
+
+	intel_pt_vmxon(0);
 }
 
 static void hardware_disable(void)