Message ID | 1404740066-4374-1-git-send-email-kan.liang@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Mon, Jul 07, 2014 at 06:34:25AM -0700, kan.liang@intel.com wrote: > + /* > + * Access LBR MSR may cause #GP under certain circumstances. > + * E.g. KVM doesn't support LBR MSR > + * Check all LBT MSR here. > + * Disable LBR access if any LBR MSRs can not be accessed. > + */ > + if (x86_pmu.lbr_nr) { > + x86_pmu.lbr_msr_access = check_msr(x86_pmu.lbr_tos); > + for (i = 0; i < x86_pmu.lbr_nr; i++) { > + x86_pmu.lbr_msr_access &= > + check_msr(x86_pmu.lbr_from + i); > + x86_pmu.lbr_msr_access &= > + check_msr(x86_pmu.lbr_to + i); > + } > + } So I was going to refer you to an email I send earlier telling you that this was wrong, but then found it got stuck in a mailqueue on my laptop :-( In any case its wrong; just clear lbr_nr and kill lbr_msr_access. We already check lbr_nr in all the right places and you added lbr_msr_access to far too few of them (also the fact that every place you check lbr_msr_access already had an lbr_nr test should've been a clue).
On Mon, Jul 07, 2014 at 06:34:25AM -0700, kan.liang@intel.com wrote: > @@ -555,7 +577,11 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, > { > u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask); > > - if (hwc->extra_reg.reg) > + if (hwc->extra_reg.reg && > + ((hwc->extra_reg.idx == EXTRA_REG_RSP_0) ? > + x86_pmu.extra_msr_access[0] : true) && > + ((hwc->extra_reg.idx == EXTRA_REG_RSP_1) ? > + x86_pmu.extra_msr_access[1] : true)) > wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config); > wrmsrl(hwc->config_base, (hwc->config | enable_mask) & ~disable_mask); > } > diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c > index adb02aa..3d18765 100644 > --- a/arch/x86/kernel/cpu/perf_event_intel.c > +++ b/arch/x86/kernel/cpu/perf_event_intel.c > + /* > + * Access extra MSR may cause #GP under certain circumstances. > + * E.g. KVM doesn't support offcore event > + * Check all extra_regs here. > + */ > + if (x86_pmu.extra_regs) { > + x86_pmu.extra_msr_access[0] = > + check_msr(x86_pmu.extra_regs[EXTRA_REG_RSP_0].msr); > + > + /* Not all platforms have EXTRA_REG_RSP_1 */ > + if (x86_pmu.extra_regs[EXTRA_REG_RSP_1].idx == EXTRA_REG_RSP_1) > + x86_pmu.extra_msr_access[1] = > + check_msr(x86_pmu.extra_regs[EXTRA_REG_RSP_1].msr); > + /* > + * If there is no EXTRA_REG_RSP_1 support, > + * just set the flag to be true. > + * So it is ignored at the runtime check. > + */ > + else > + x86_pmu.extra_msr_access[1] = true; > + } This too is wrong in many ways; there's more than 2 extra_msrs on many systems. And the place you check is abysmal, if we know at init time that we don't have those MSRs, WTF do you allow event creation that would use them, only to then misbehave?
> > On Mon, Jul 07, 2014 at 06:34:25AM -0700, kan.liang@intel.com wrote: > > + /* > > + * Access LBR MSR may cause #GP under certain circumstances. > > + * E.g. KVM doesn't support LBR MSR > > + * Check all LBT MSR here. > > + * Disable LBR access if any LBR MSRs can not be accessed. > > + */ > > + if (x86_pmu.lbr_nr) { > > + x86_pmu.lbr_msr_access = check_msr(x86_pmu.lbr_tos); > > + for (i = 0; i < x86_pmu.lbr_nr; i++) { > > + x86_pmu.lbr_msr_access &= > > + check_msr(x86_pmu.lbr_from + i); > > + x86_pmu.lbr_msr_access &= > > + check_msr(x86_pmu.lbr_to + i); > > + } > > + } > > So I was going to refer you to an email I send earlier telling you that this was > wrong, but then found it got stuck in a mailqueue on my laptop :-( > > In any case its wrong; just clear lbr_nr and kill lbr_msr_access. We already > check lbr_nr in all the right places and you added lbr_msr_access to far too > few of them (also the fact that every place you check lbr_msr_access already > had an lbr_nr test should've been a clue). OK. If lbr_nr is well checked at runtime, I will simply set lbr_nr to 0 here (once check_msr failed). Thanks, Kan -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> -----Original Message----- > From: Peter Zijlstra [mailto:peterz@infradead.org] > Sent: Tuesday, July 08, 2014 5:29 AM > To: Liang, Kan > Cc: andi@firstfloor.org; linux-kernel@vger.kernel.org; kvm@vger.kernel.org > Subject: Re: [PATCH V3 1/2] perf ignore LBR and offcore_rsp. > > On Mon, Jul 07, 2014 at 06:34:25AM -0700, kan.liang@intel.com wrote: > > @@ -555,7 +577,11 @@ static inline void __x86_pmu_enable_event(struct > > hw_perf_event *hwc, { > > u64 disable_mask = > > __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask); > > > > - if (hwc->extra_reg.reg) > > + if (hwc->extra_reg.reg && > > + ((hwc->extra_reg.idx == EXTRA_REG_RSP_0) ? > > + x86_pmu.extra_msr_access[0] : true) && > > + ((hwc->extra_reg.idx == EXTRA_REG_RSP_1) ? > > + x86_pmu.extra_msr_access[1] : true)) > > wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config); > > wrmsrl(hwc->config_base, (hwc->config | enable_mask) & > > ~disable_mask); } > > > diff --git a/arch/x86/kernel/cpu/perf_event_intel.c > > b/arch/x86/kernel/cpu/perf_event_intel.c > > index adb02aa..3d18765 100644 > > --- a/arch/x86/kernel/cpu/perf_event_intel.c > > +++ b/arch/x86/kernel/cpu/perf_event_intel.c > > > + /* > > + * Access extra MSR may cause #GP under certain circumstances. > > + * E.g. KVM doesn't support offcore event > > + * Check all extra_regs here. > > + */ > > + if (x86_pmu.extra_regs) { > > + x86_pmu.extra_msr_access[0] = > > + > check_msr(x86_pmu.extra_regs[EXTRA_REG_RSP_0].msr); > > + > > + /* Not all platforms have EXTRA_REG_RSP_1 */ > > + if (x86_pmu.extra_regs[EXTRA_REG_RSP_1].idx == > EXTRA_REG_RSP_1) > > + x86_pmu.extra_msr_access[1] = > > + > check_msr(x86_pmu.extra_regs[EXTRA_REG_RSP_1].msr); > > + /* > > + * If there is no EXTRA_REG_RSP_1 support, > > + * just set the flag to be true. > > + * So it is ignored at the runtime check. > > + */ > > + else > > + x86_pmu.extra_msr_access[1] = true; > > + } > > This too is wrong in many ways; there's more than 2 extra_msrs on many > systems. > Right, there are four extra reg types on Intel systems. Since my previous test only triggers the crash with RSP_0 and RSP_1, so I only handle these two msrs. I will handle all the extra msrs then. > And the place you check is abysmal, if we know at init time that we don't > have those MSRs, WTF do you allow event creation that would use them, > only to then misbehave? Right, we can check it at all the possible creation places. I think the most common place to check should be x86_pmu_extra_regs. For RSP_0 and RSP_1, I also plan to do the check in intel_fixup_er, so extra_reg will not be updated. For LBR select, lbr_sel_map will be cleared at runtime once check_msr failed. Besides the three places, is there any place I missed? Thanks, Kan -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Jul 08, 2014 at 02:22:25PM +0000, Liang, Kan wrote: > > This too is wrong in many ways; there's more than 2 extra_msrs on many > > systems. > > > Right, there are four extra reg types on Intel systems. Since my > previous test only triggers the crash with RSP_0 and RSP_1, so I only > handle these two msrs. I will handle all the extra msrs then. Yeah, so to other two are PEBS related, and I think we disable PEBS early on so you'll never hit them or something; didn't look too closely. > > And the place you check is abysmal, if we know at init time that we don't > > have those MSRs, WTF do you allow event creation that would use them, > > only to then misbehave? > > Right, we can check it at all the possible creation places. I think > the most common place to check should be x86_pmu_extra_regs. For > RSP_0 and RSP_1, I also plan to do the check in intel_fixup_er, so > extra_reg will not be updated. For LBR select, lbr_sel_map will be > cleared at runtime once check_msr failed. Besides the three places, > is there any place I missed? I think that's sufficient; but didn't stare too hard.
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 3b2f9bd..7e286d7 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -458,12 +458,19 @@ struct x86_pmu { u64 lbr_sel_mask; /* LBR_SELECT valid bits */ const int *lbr_sel_map; /* lbr_select mappings */ bool lbr_double_abort; /* duplicated lbr aborts */ - + bool lbr_msr_access; /* LBR MSR can be accessed */ /* * Extra registers for events */ struct extra_reg *extra_regs; unsigned int er_flags; + /* + * EXTRA REG MSR can be accessed + * The extra offcore registers are completely unrelated to each other. + * So it needs a flag for each extra register. + * The extra offcore MSRs are MSR_OFFCORE_RSP_0 and MSR_OFFCORE_RSP_1. + */ + bool extra_msr_access[2]; /* * Intel host/guest support (KVM) @@ -525,6 +532,21 @@ extern u64 __read_mostly hw_cache_extra_regs [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX]; +/* + * Under certain circumstances, access certain MSR may cause #GP. + * The function tests if the input MSR can be safely accessed. + */ +static inline bool check_msr(unsigned long msr) +{ + u64 value; + + if (rdmsrl_safe(msr, &value) < 0) + return false; + if (wrmsrl_safe(msr, value) < 0) + return false; + return true; +} + u64 x86_perf_event_update(struct perf_event *event); static inline unsigned int x86_pmu_config_addr(int index) @@ -555,7 +577,11 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, { u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask); - if (hwc->extra_reg.reg) + if (hwc->extra_reg.reg && + ((hwc->extra_reg.idx == EXTRA_REG_RSP_0) ? + x86_pmu.extra_msr_access[0] : true) && + ((hwc->extra_reg.idx == EXTRA_REG_RSP_1) ? + x86_pmu.extra_msr_access[1] : true)) wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config); wrmsrl(hwc->config_base, (hwc->config | enable_mask) & ~disable_mask); } diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index adb02aa..3d18765 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2262,7 +2262,7 @@ __init int intel_pmu_init(void) union cpuid10_ebx ebx; struct event_constraint *c; unsigned int unused; - int version; + int version, i; if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { switch (boot_cpu_data.x86) { @@ -2565,6 +2565,43 @@ __init int intel_pmu_init(void) } } + /* + * Access LBR MSR may cause #GP under certain circumstances. + * E.g. KVM doesn't support LBR MSR + * Check all LBT MSR here. + * Disable LBR access if any LBR MSRs can not be accessed. + */ + if (x86_pmu.lbr_nr) { + x86_pmu.lbr_msr_access = check_msr(x86_pmu.lbr_tos); + for (i = 0; i < x86_pmu.lbr_nr; i++) { + x86_pmu.lbr_msr_access &= + check_msr(x86_pmu.lbr_from + i); + x86_pmu.lbr_msr_access &= + check_msr(x86_pmu.lbr_to + i); + } + } + /* + * Access extra MSR may cause #GP under certain circumstances. + * E.g. KVM doesn't support offcore event + * Check all extra_regs here. + */ + if (x86_pmu.extra_regs) { + x86_pmu.extra_msr_access[0] = + check_msr(x86_pmu.extra_regs[EXTRA_REG_RSP_0].msr); + + /* Not all platforms have EXTRA_REG_RSP_1 */ + if (x86_pmu.extra_regs[EXTRA_REG_RSP_1].idx == EXTRA_REG_RSP_1) + x86_pmu.extra_msr_access[1] = + check_msr(x86_pmu.extra_regs[EXTRA_REG_RSP_1].msr); + /* + * If there is no EXTRA_REG_RSP_1 support, + * just set the flag to be true. + * So it is ignored at the runtime check. + */ + else + x86_pmu.extra_msr_access[1] = true; + } + /* Support full width counters using alternative MSR range */ if (x86_pmu.intel_cap.full_width_write) { x86_pmu.max_period = x86_pmu.cntval_mask; diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index 9dd2459..9508d1e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -172,7 +172,7 @@ static void intel_pmu_lbr_reset_64(void) void intel_pmu_lbr_reset(void) { - if (!x86_pmu.lbr_nr) + if (!x86_pmu.lbr_nr || !x86_pmu.lbr_msr_access) return; if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) @@ -334,7 +334,7 @@ void intel_pmu_lbr_read(void) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - if (!cpuc->lbr_users) + if (!cpuc->lbr_users || !x86_pmu.lbr_msr_access) return; if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)