From patchwork Fri Apr 1 16:24:14 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Alexander Shishkin X-Patchwork-Id: 8726591 Return-Path: X-Original-To: patchwork-kvm@patchwork.kernel.org Delivered-To: patchwork-parsemail@patchwork1.web.kernel.org Received: from mail.kernel.org (mail.kernel.org [198.145.29.136]) by patchwork1.web.kernel.org (Postfix) with ESMTP id 4E70A9FC82 for ; Fri, 1 Apr 2016 16:28:01 +0000 (UTC) Received: from mail.kernel.org (localhost [127.0.0.1]) by mail.kernel.org (Postfix) with ESMTP id 482832013D for ; Fri, 1 Apr 2016 16:28:00 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 2A22920398 for ; Fri, 1 Apr 2016 16:27:59 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1750940AbcDAQ1q (ORCPT ); Fri, 1 Apr 2016 12:27:46 -0400 Received: from mga03.intel.com ([134.134.136.65]:2217 "EHLO mga03.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750812AbcDAQ1p (ORCPT ); Fri, 1 Apr 2016 12:27:45 -0400 Received: from orsmga001.jf.intel.com ([10.7.209.18]) by orsmga103.jf.intel.com with ESMTP; 01 Apr 2016 09:27:11 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.24,427,1455004800"; d="scan'208";a="923517025" Received: from um.fi.intel.com (HELO localhost) ([10.237.72.212]) by orsmga001.jf.intel.com with ESMTP; 01 Apr 2016 09:27:08 -0700 From: Alexander Shishkin To: Peter Zijlstra Cc: Gleb Natapov , Paolo Bonzini , x86@kernel.org, kvm@vger.kernel.org, Ingo Molnar , linux-kernel@vger.kernel.org, tglx@linutronix.de, hpa@zytor.com, Arnaldo Carvalho de Melo , Alexander Shishkin Subject: [PATCH] perf/x86/intel/pt: Don't die on VMXON Date: Fri, 1 Apr 2016 19:24:14 +0300 Message-Id: <1459527854-5899-1-git-send-email-alexander.shishkin@linux.intel.com> X-Mailer: git-send-email 2.8.0.rc3 Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org X-Spam-Status: No, score=-7.9 required=5.0 tests=BAYES_00, RCVD_IN_DNSWL_HI, RP_MATCHES_RCVD, UNPARSEABLE_RELAY autolearn=unavailable version=3.3.1 X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on mail.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP Some versions of Intel PT do not support tracing across VMXON, more specifically, VMXON will clear TraceEn control bit and any attempt to set it before VMXOFF will throw a #GP, which in the current state of things will crash the kernel. Namely, $ perf record -e intel_pt// kvm -nographic on such a machine will kill it. To avoid this, notify the intel_pt driver before VMXON and after VMXOFF so that it knows when not to enable itself. Signed-off-by: Alexander Shishkin --- arch/x86/events/intel/pt.c | 71 +++++++++++++++++++++++++++++++++------ arch/x86/events/intel/pt.h | 2 ++ arch/x86/include/asm/perf_event.h | 4 +++ arch/x86/kvm/vmx.c | 4 +++ 4 files changed, 70 insertions(+), 11 deletions(-) diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 127f58c179..a16f2ebc12 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -136,9 +136,17 @@ static int __init pt_pmu_hw_init(void) struct dev_ext_attribute *de_attrs; struct attribute **attrs; size_t size; + u64 reg; int ret; long i; + if (test_cpu_cap(&boot_cpu_data, X86_FEATURE_VMX)) { + /* Intel SDM, 36.5 "Tracing post-VMXON" */ + rdmsrl(MSR_IA32_VMX_MISC, reg); + if (reg & BIT(14)) + pt_pmu.vmx = true; + } + attrs = NULL; for (i = 0; i < PT_CPUID_LEAVES; i++) { @@ -269,20 +277,23 @@ static void pt_config(struct perf_event *event) reg |= (event->attr.config & PT_CONFIG_MASK); + event->hw.config = reg; wrmsrl(MSR_IA32_RTIT_CTL, reg); } -static void pt_config_start(bool start) +static void pt_config_stop(struct perf_event *event) { - u64 ctl; + u64 ctl = READ_ONCE(event->hw.config); - rdmsrl(MSR_IA32_RTIT_CTL, ctl); - if (start) - ctl |= RTIT_CTL_TRACEEN; - else - ctl &= ~RTIT_CTL_TRACEEN; + /* may be already stopped by a PMI*/ + if (!(ctl & RTIT_CTL_TRACEEN)) + return; + + ctl ^= RTIT_CTL_TRACEEN; wrmsrl(MSR_IA32_RTIT_CTL, ctl); + WRITE_ONCE(event->hw.config, ctl); + /* * A wrmsr that disables trace generation serializes other PT * registers and causes all data packets to be written to memory, @@ -291,8 +302,7 @@ static void pt_config_start(bool start) * The below WMB, separating data store and aux_head store matches * the consumer's RMB that separates aux_head load and data load. */ - if (!start) - wmb(); + wmb(); } static void pt_config_buffer(void *buf, unsigned int topa_idx, @@ -922,11 +932,17 @@ void intel_pt_interrupt(void) if (!ACCESS_ONCE(pt->handle_nmi)) return; - pt_config_start(false); + /* + * If VMX is on and PT does not support it, don't touch anything. + */ + if (ACCESS_ONCE(pt->vmx_on)) + return; if (!event) return; + pt_config_stop(event); + buf = perf_get_aux(&pt->handle); if (!buf) return; @@ -963,6 +979,35 @@ void intel_pt_interrupt(void) } } +void intel_pt_vmxon(int entry) +{ + struct pt *pt = this_cpu_ptr(&pt_ctx); + struct perf_event *event; + unsigned long flags; + + /* PT plays nice with VMX, do nothing */ + if (pt_pmu.vmx) + return; + + /* + * VMX entry will clear RTIT_CTL.TraceEn; we need to make + * sure to not try to set it while VMX is on. Disable + * interrupts to avoid racing with pmu callbacks; + * concurrent PMI should be handled fine. + */ + local_irq_save(flags); + WRITE_ONCE(pt->vmx_on, entry); + + if (entry) { + /* prevent pt_config_stop() from writing RTIT_CTL */ + event = pt->handle.event; + if (event) + event->hw.config = 0; + } + local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(intel_pt_vmxon); + /* * PMU callbacks */ @@ -973,6 +1018,9 @@ static void pt_event_start(struct perf_event *event, int mode) struct pt *pt = this_cpu_ptr(&pt_ctx); struct pt_buffer *buf; + if (ACCESS_ONCE(pt->vmx_on)) + return; + buf = perf_aux_output_begin(&pt->handle, event); if (!buf) goto fail_stop; @@ -1007,7 +1055,8 @@ static void pt_event_stop(struct perf_event *event, int mode) * see comment in intel_pt_interrupt(). */ ACCESS_ONCE(pt->handle_nmi) = 0; - pt_config_start(false); + + pt_config_stop(event); if (event->hw.state == PERF_HES_STOPPED) return; diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h index 336878a5d2..b0731630cd 100644 --- a/arch/x86/events/intel/pt.h +++ b/arch/x86/events/intel/pt.h @@ -65,6 +65,7 @@ enum pt_capabilities { struct pt_pmu { struct pmu pmu; u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES]; + bool vmx; }; /** @@ -111,6 +112,7 @@ struct pt_buffer { struct pt { struct perf_output_handle handle; int handle_nmi; + int vmx_on; }; #endif /* __INTEL_PT_H__ */ diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 5a2ed3ed2f..8d34c39982 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -285,6 +285,10 @@ static inline void perf_events_lapic_init(void) { } static inline void perf_check_microcode(void) { } #endif +#ifdef CONFIG_CPU_SUP_INTEL + extern void intel_pt_vmxon(int entry); +#endif + #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) extern void amd_pmu_enable_virt(void); extern void amd_pmu_disable_virt(void); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1735ae9d68..744ea43b79 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3075,6 +3075,8 @@ static __init int vmx_disabled_by_bios(void) static void kvm_cpu_vmxon(u64 addr) { + intel_pt_vmxon(1); + asm volatile (ASM_VMX_VMXON_RAX : : "a"(&addr), "m"(addr) : "memory", "cc"); @@ -3144,6 +3146,8 @@ static void vmclear_local_loaded_vmcss(void) static void kvm_cpu_vmxoff(void) { asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); + + intel_pt_vmxon(0); } static void hardware_disable(void)