Message ID | 20200313021616.112322-4-like.xu@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Guest Last Branch Recording Enabling | expand |
> diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c > index 3bb738f5a472..e919187a0751 100644 > --- a/arch/x86/events/core.c > +++ b/arch/x86/events/core.c > @@ -74,7 +74,8 @@ u64 x86_perf_event_update(struct perf_event *event) > int idx = hwc->idx; > u64 delta; > > - if (idx == INTEL_PMC_IDX_FIXED_BTS) > + if ((idx == INTEL_PMC_IDX_FIXED_BTS) || > + (idx == INTEL_PMC_IDX_FIXED_VLBR)) > return 0; > > /* > @@ -1102,7 +1103,8 @@ static inline void x86_assign_hw_event(struct perf_event *event, > hwc->last_cpu = smp_processor_id(); > hwc->last_tag = ++cpuc->tags[i]; > > - if (hwc->idx == INTEL_PMC_IDX_FIXED_BTS) { > + if ((hwc->idx == INTEL_PMC_IDX_FIXED_BTS) || > + (hwc->idx == INTEL_PMC_IDX_FIXED_VLBR)) { > hwc->config_base = 0; > hwc->event_base = 0; > } else if (hwc->idx >= INTEL_PMC_IDX_FIXED) { > @@ -1233,7 +1235,8 @@ int x86_perf_event_set_period(struct perf_event *event) > s64 period = hwc->sample_period; > int ret = 0, idx = hwc->idx; > > - if (idx == INTEL_PMC_IDX_FIXED_BTS) > + if ((idx == INTEL_PMC_IDX_FIXED_BTS) || > + (idx == INTEL_PMC_IDX_FIXED_VLBR)) > return 0; > > /* That seems unfortunate; can that be >= INTEL_PMC_IDX_FIXED_BTS ? If so, that probably wants a comment with the definitions. Or otherwise check for !hwc->event_base. That should be 0 for both these things. > diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c > index 3be51aa06e67..901c82032f4a 100644 > --- a/arch/x86/events/intel/core.c > +++ b/arch/x86/events/intel/core.c > @@ -2157,6 +2157,9 @@ static void intel_pmu_disable_event(struct perf_event *event) > return; > } > > + if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_VLBR)) > + return; > + Please check code-gen to see if you can cut down on brancher here; there's 4 cases: - vlbr - bts - fixed - gp perhaps you can write it like so: (also see https://lkml.kernel.org/r/20190828090217.GN2386@hirez.programming.kicks-ass.net ) static void intel_pmu_enable_event(struct perf_event *event) { ... int idx = hwx->idx; if (idx < INTEL_PMC_IDX_FIXED) { intel_set_masks(event, idx); __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); } else if (idx < INTEL_PMC_IDX_FIXED_BTS) { intel_set_masks(event, idx); intel_pmu_enable_fixed(event); } else if (idx == INTEL_PMC_IDX_FIXED_BTS) { intel_pmu_enable_bts(hwc->config); } /* nothing for INTEL_PMC_IDX_FIXED_VLBR */ } That should sort the branches in order of: gp,fixed,bts,vlbr > cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx); > cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); > cpuc->intel_cp_status &= ~(1ull << hwc->idx); > @@ -2241,6 +2244,9 @@ static void intel_pmu_enable_event(struct perf_event *event) > return; > } > > + if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_VLBR)) > + return; > + > if (event->attr.exclude_host) > cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx); > if (event->attr.exclude_guest) idem. > @@ -2595,6 +2601,15 @@ intel_bts_constraints(struct perf_event *event) > return NULL; > } > > +static struct event_constraint * > +intel_guest_event_constraints(struct perf_event *event) > +{ > + if (unlikely(is_guest_lbr_event(event))) > + return &guest_lbr_constraint; > + > + return NULL; > +} This is a mis-nomer, it isn't just any guest_event > + > static int intel_alt_er(int idx, u64 config) > { > int alt_idx = idx; > @@ -2785,6 +2800,10 @@ __intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, > { > struct event_constraint *c; > > + c = intel_guest_event_constraints(event); > + if (c) > + return c; > + > c = intel_bts_constraints(event); > if (c) > return c; > diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h > index 1025bc6eb04f..9a62264a3068 100644 > --- a/arch/x86/events/perf_event.h > +++ b/arch/x86/events/perf_event.h > @@ -969,6 +969,20 @@ static inline bool intel_pmu_has_bts(struct perf_event *event) > return intel_pmu_has_bts_period(event, hwc->sample_period); > } > > +static inline bool is_guest_event(struct perf_event *event) > +{ > + if (event->attr.exclude_host && is_kernel_event(event)) > + return true; > + return false; > +} I don't like this one, what if another in-kernel users generates an event with exclude_host set ? > @@ -989,6 +1003,7 @@ void release_ds_buffers(void); > void reserve_ds_buffers(void); > > extern struct event_constraint bts_constraint; > +extern struct event_constraint guest_lbr_constraint; > > void intel_pmu_enable_bts(u64 config); > > diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h > index e018a1cf604c..674130aca75a 100644 > --- a/arch/x86/include/asm/perf_event.h > +++ b/arch/x86/include/asm/perf_event.h > @@ -181,9 +181,19 @@ struct x86_pmu_capability { > #define GLOBAL_STATUS_UNC_OVF BIT_ULL(61) > #define GLOBAL_STATUS_ASIF BIT_ULL(60) > #define GLOBAL_STATUS_COUNTERS_FROZEN BIT_ULL(59) > -#define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(58) > +#define GLOBAL_STATUS_LBRS_FROZEN_BIT 58 > +#define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(GLOBAL_STATUS_LBRS_FROZEN_BIT) > #define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(55) > > +/* > + * We model guest LBR event tracing as another fixed-mode PMC like BTS. > + * > + * We choose bit 58 (LBRS_FROZEN_BIT) which is used to indicate that the LBR > + * stack is frozen on a hardware PMI request in the PERF_GLOBAL_STATUS msr, > + * and the 59th PMC counter (if any) is not supposed to use it as well. Is this saying that STATUS.58 should never be set? I don't really understand the language. > + */ > +#define INTEL_PMC_IDX_FIXED_VLBR GLOBAL_STATUS_LBRS_FROZEN_BIT > + > /* > * Adaptive PEBS v4 > */
Hi Peter, First of all, thanks for your comments! On 2020/4/10 0:37, Peter Zijlstra wrote: >> diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c >> index 3bb738f5a472..e919187a0751 100644 >> --- a/arch/x86/events/core.c >> +++ b/arch/x86/events/core.c >> @@ -74,7 +74,8 @@ u64 x86_perf_event_update(struct perf_event *event) >> int idx = hwc->idx; >> u64 delta; >> >> - if (idx == INTEL_PMC_IDX_FIXED_BTS) >> + if ((idx == INTEL_PMC_IDX_FIXED_BTS) || >> + (idx == INTEL_PMC_IDX_FIXED_VLBR)) >> return 0; >> >> /* >> @@ -1102,7 +1103,8 @@ static inline void x86_assign_hw_event(struct perf_event *event, >> hwc->last_cpu = smp_processor_id(); >> hwc->last_tag = ++cpuc->tags[i]; >> >> - if (hwc->idx == INTEL_PMC_IDX_FIXED_BTS) { >> + if ((hwc->idx == INTEL_PMC_IDX_FIXED_BTS) || >> + (hwc->idx == INTEL_PMC_IDX_FIXED_VLBR)) { >> hwc->config_base = 0; >> hwc->event_base = 0; >> } else if (hwc->idx >= INTEL_PMC_IDX_FIXED) { >> @@ -1233,7 +1235,8 @@ int x86_perf_event_set_period(struct perf_event *event) >> s64 period = hwc->sample_period; >> int ret = 0, idx = hwc->idx; >> >> - if (idx == INTEL_PMC_IDX_FIXED_BTS) >> + if ((idx == INTEL_PMC_IDX_FIXED_BTS) || >> + (idx == INTEL_PMC_IDX_FIXED_VLBR)) >> return 0; >> >> /* > That seems unfortunate; can that be >= INTEL_PMC_IDX_FIXED_BTS ? If so, > that probably wants a comment with the definitions. > > Or otherwise check for !hwc->event_base. That should be 0 for both these > things. Yes, the !hwc->event_base looks good to me. > >> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c >> index 3be51aa06e67..901c82032f4a 100644 >> --- a/arch/x86/events/intel/core.c >> +++ b/arch/x86/events/intel/core.c >> @@ -2157,6 +2157,9 @@ static void intel_pmu_disable_event(struct perf_event *event) >> return; >> } >> >> + if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_VLBR)) >> + return; >> + > Please check code-gen to see if you can cut down on brancher here; > there's 4 cases: > > - vlbr > - bts > - fixed > - gp > > perhaps you can write it like so: > > (also see https://lkml.kernel.org/r/20190828090217.GN2386@hirez.programming.kicks-ass.net ) > > static void intel_pmu_enable_event(struct perf_event *event) > { > ... > int idx = hwx->idx; > > if (idx < INTEL_PMC_IDX_FIXED) { > intel_set_masks(event, idx); > __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); > } else if (idx < INTEL_PMC_IDX_FIXED_BTS) { > intel_set_masks(event, idx); > intel_pmu_enable_fixed(event); > } else if (idx == INTEL_PMC_IDX_FIXED_BTS) { > intel_pmu_enable_bts(hwc->config); > } > > /* nothing for INTEL_PMC_IDX_FIXED_VLBR */ > } > > That should sort the branches in order of: gp,fixed,bts,vlbr Note the current order is: bts, pebs, fixed, gp. Sure, let me try to refactor it in this way. > >> cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx); >> cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); >> cpuc->intel_cp_status &= ~(1ull << hwc->idx); >> @@ -2241,6 +2244,9 @@ static void intel_pmu_enable_event(struct perf_event *event) >> return; >> } >> >> + if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_VLBR)) >> + return; >> + >> if (event->attr.exclude_host) >> cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx); >> if (event->attr.exclude_guest) > idem. idem. > >> @@ -2595,6 +2601,15 @@ intel_bts_constraints(struct perf_event *event) >> return NULL; >> } >> >> +static struct event_constraint * >> +intel_guest_event_constraints(struct perf_event *event) >> +{ >> + if (unlikely(is_guest_lbr_event(event))) >> + return &guest_lbr_constraint; >> + >> + return NULL; >> +} > This is a mis-nomer, it isn't just any guest_event Sure, I'll rename it to intel_guest_lbr_event_constraints() instead of using it as a unified interface to get all of guest event constraints. > >> + >> static int intel_alt_er(int idx, u64 config) >> { >> int alt_idx = idx; >> @@ -2785,6 +2800,10 @@ __intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, >> { >> struct event_constraint *c; >> >> + c = intel_guest_event_constraints(event); >> + if (c) >> + return c; >> + >> c = intel_bts_constraints(event); >> if (c) >> return c; >> diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h >> index 1025bc6eb04f..9a62264a3068 100644 >> --- a/arch/x86/events/perf_event.h >> +++ b/arch/x86/events/perf_event.h >> @@ -969,6 +969,20 @@ static inline bool intel_pmu_has_bts(struct perf_event *event) >> return intel_pmu_has_bts_period(event, hwc->sample_period); >> } >> >> +static inline bool is_guest_event(struct perf_event *event) >> +{ >> + if (event->attr.exclude_host && is_kernel_event(event)) >> + return true; >> + return false; >> +} > I don't like this one, what if another in-kernel users generates an > event with exclude_host set ? Thanks for the clear attitude. How about: - remove the is_guest_event() to avoid potential misuse; - move all checks into is_guest_lbr_event() and make it dedicated: static inline bool is_guest_lbr_event(struct perf_event *event) { if (is_kernel_event(event) && event->attr.exclude_host && needs_branch_stack(event)) return true; return false; } In this case, it's safe to generate an event with exclude_host set and also use LBR to count guest or nothing for other in-kernel users because the intel_guest_lbr_event_constraints() makes LBR exclusive. For this generic usage, I may rename: - is_guest_lbr_event() to is_lbr_no_counter_event(); - intel_guest_lbr_event_constraints() to intel_lbr_no_counter_event_constraints(); Is this acceptable to you? If there is anything needs to be improved, please let me know. >> @@ -989,6 +1003,7 @@ void release_ds_buffers(void); >> void reserve_ds_buffers(void); >> >> extern struct event_constraint bts_constraint; >> +extern struct event_constraint guest_lbr_constraint; >> >> void intel_pmu_enable_bts(u64 config); >> >> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h >> index e018a1cf604c..674130aca75a 100644 >> --- a/arch/x86/include/asm/perf_event.h >> +++ b/arch/x86/include/asm/perf_event.h >> @@ -181,9 +181,19 @@ struct x86_pmu_capability { >> #define GLOBAL_STATUS_UNC_OVF BIT_ULL(61) >> #define GLOBAL_STATUS_ASIF BIT_ULL(60) >> #define GLOBAL_STATUS_COUNTERS_FROZEN BIT_ULL(59) >> -#define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(58) >> +#define GLOBAL_STATUS_LBRS_FROZEN_BIT 58 >> +#define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(GLOBAL_STATUS_LBRS_FROZEN_BIT) >> #define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(55) >> >> +/* >> + * We model guest LBR event tracing as another fixed-mode PMC like BTS. >> + * >> + * We choose bit 58 (LBRS_FROZEN_BIT) which is used to indicate that the LBR >> + * stack is frozen on a hardware PMI request in the PERF_GLOBAL_STATUS msr, >> + * and the 59th PMC counter (if any) is not supposed to use it as well. > Is this saying that STATUS.58 should never be set? I don't really > understand the language. My fault, and let me make it more clearly: We choose bit 58 because it's used to indicate LBR stack frozen state not like other overflow conditions in the PERF_GLOBAL_STATUS msr, and it will not be used for any actual fixed events. > >> + */ >> +#define INTEL_PMC_IDX_FIXED_VLBR GLOBAL_STATUS_LBRS_FROZEN_BIT >> + >> /* >> * Adaptive PEBS v4 >> */
Hi Peter, On 2020/4/10 11:03, Xu, Like wrote: > Hi Peter, > > First of all, thanks for your comments! > > On 2020/4/10 0:37, Peter Zijlstra wrote: >>> diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c >>> index 3bb738f5a472..e919187a0751 100644 >>> --- a/arch/x86/events/core.c >>> +++ b/arch/x86/events/core.c >>> @@ -74,7 +74,8 @@ u64 x86_perf_event_update(struct perf_event *event) >>> int idx = hwc->idx; >>> u64 delta; >>> - if (idx == INTEL_PMC_IDX_FIXED_BTS) >>> + if ((idx == INTEL_PMC_IDX_FIXED_BTS) || >>> + (idx == INTEL_PMC_IDX_FIXED_VLBR)) >>> return 0; >>> /* >>> @@ -1102,7 +1103,8 @@ static inline void x86_assign_hw_event(struct >>> perf_event *event, >>> hwc->last_cpu = smp_processor_id(); >>> hwc->last_tag = ++cpuc->tags[i]; >>> - if (hwc->idx == INTEL_PMC_IDX_FIXED_BTS) { >>> + if ((hwc->idx == INTEL_PMC_IDX_FIXED_BTS) || >>> + (hwc->idx == INTEL_PMC_IDX_FIXED_VLBR)) { >>> hwc->config_base = 0; >>> hwc->event_base = 0; >>> } else if (hwc->idx >= INTEL_PMC_IDX_FIXED) { >>> @@ -1233,7 +1235,8 @@ int x86_perf_event_set_period(struct perf_event >>> *event) >>> s64 period = hwc->sample_period; >>> int ret = 0, idx = hwc->idx; >>> - if (idx == INTEL_PMC_IDX_FIXED_BTS) >>> + if ((idx == INTEL_PMC_IDX_FIXED_BTS) || >>> + (idx == INTEL_PMC_IDX_FIXED_VLBR)) >>> return 0; >>> /* >> That seems unfortunate; can that be >= INTEL_PMC_IDX_FIXED_BTS ? If so, >> that probably wants a comment with the definitions. >> >> Or otherwise check for !hwc->event_base. That should be 0 for both these >> things. > Yes, the !hwc->event_base looks good to me. >> >>> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c >>> index 3be51aa06e67..901c82032f4a 100644 >>> --- a/arch/x86/events/intel/core.c >>> +++ b/arch/x86/events/intel/core.c >>> @@ -2157,6 +2157,9 @@ static void intel_pmu_disable_event(struct >>> perf_event *event) >>> return; >>> } >>> + if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_VLBR)) >>> + return; >>> + >> Please check code-gen to see if you can cut down on brancher here; >> there's 4 cases: >> >> - vlbr >> - bts >> - fixed >> - gp >> >> perhaps you can write it like so: >> >> (also see >> https://lkml.kernel.org/r/20190828090217.GN2386@hirez.programming.kicks-ass.net >> ) >> >> static void intel_pmu_enable_event(struct perf_event *event) >> { >> ... >> int idx = hwx->idx; >> >> if (idx < INTEL_PMC_IDX_FIXED) { >> intel_set_masks(event, idx); >> __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); >> } else if (idx < INTEL_PMC_IDX_FIXED_BTS) { >> intel_set_masks(event, idx); >> intel_pmu_enable_fixed(event); >> } else if (idx == INTEL_PMC_IDX_FIXED_BTS) { >> intel_pmu_enable_bts(hwc->config); >> } >> >> /* nothing for INTEL_PMC_IDX_FIXED_VLBR */ >> } >> >> That should sort the branches in order of: gp,fixed,bts,vlbr > > Note the current order is: bts, pebs, fixed, gp. > > Sure, let me try to refactor it in this way. >> >>> cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx); >>> cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); >>> cpuc->intel_cp_status &= ~(1ull << hwc->idx); >>> @@ -2241,6 +2244,9 @@ static void intel_pmu_enable_event(struct >>> perf_event *event) >>> return; >>> } >>> + if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_VLBR)) >>> + return; >>> + >>> if (event->attr.exclude_host) >>> cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx); >>> if (event->attr.exclude_guest) >> idem. > idem. >> >>> @@ -2595,6 +2601,15 @@ intel_bts_constraints(struct perf_event *event) >>> return NULL; >>> } >>> +static struct event_constraint * >>> +intel_guest_event_constraints(struct perf_event *event) >>> +{ >>> + if (unlikely(is_guest_lbr_event(event))) >>> + return &guest_lbr_constraint; >>> + >>> + return NULL; >>> +} >> This is a mis-nomer, it isn't just any guest_event > > Sure, I'll rename it to intel_guest_lbr_event_constraints() > instead of using it as a unified interface to get all of guest event > constraints. > >> >>> + >>> static int intel_alt_er(int idx, u64 config) >>> { >>> int alt_idx = idx; >>> @@ -2785,6 +2800,10 @@ __intel_get_event_constraints(struct >>> cpu_hw_events *cpuc, int idx, >>> { >>> struct event_constraint *c; >>> + c = intel_guest_event_constraints(event); >>> + if (c) >>> + return c; >>> + >>> c = intel_bts_constraints(event); >>> if (c) >>> return c; >>> diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h >>> index 1025bc6eb04f..9a62264a3068 100644 >>> --- a/arch/x86/events/perf_event.h >>> +++ b/arch/x86/events/perf_event.h >>> @@ -969,6 +969,20 @@ static inline bool intel_pmu_has_bts(struct >>> perf_event *event) >>> return intel_pmu_has_bts_period(event, hwc->sample_period); >>> } >>> +static inline bool is_guest_event(struct perf_event *event) >>> +{ >>> + if (event->attr.exclude_host && is_kernel_event(event)) >>> + return true; >>> + return false; >>> +} >> I don't like this one, what if another in-kernel users generates an >> event with exclude_host set ? > Thanks for the clear attitude. > > How about: > - remove the is_guest_event() to avoid potential misuse; > - move all checks into is_guest_lbr_event() and make it dedicated: > > static inline bool is_guest_lbr_event(struct perf_event *event) > { > if (is_kernel_event(event) && > event->attr.exclude_host && needs_branch_stack(event)) > return true; > return false; > } > > In this case, it's safe to generate an event with exclude_host set > and also use LBR to count guest or nothing for other in-kernel users > because the intel_guest_lbr_event_constraints() makes LBR exclusive. > > For this generic usage, I may rename: > - is_guest_lbr_event() to is_lbr_no_counter_event(); > - intel_guest_lbr_event_constraints() to > intel_lbr_no_counter_event_constraints(); > > Is this acceptable to you? > If there is anything needs to be improved, please let me know. Do you have any preference for this ? If you have more comments for the general idea or code details, please let me know. For example, you may take a look at the interface named intel_pmu_create_lbr_event() in the "[PATCH v9 07/10] KVM: x86/pmu: Add LBR feature emulation via guest LBR event". If not, I'll spin the next version based on your current feedback. Thanks, Like Xu > >>> @@ -989,6 +1003,7 @@ void release_ds_buffers(void); >>> void reserve_ds_buffers(void); >>> extern struct event_constraint bts_constraint; >>> +extern struct event_constraint guest_lbr_constraint; >>> void intel_pmu_enable_bts(u64 config); >>> diff --git a/arch/x86/include/asm/perf_event.h >>> b/arch/x86/include/asm/perf_event.h >>> index e018a1cf604c..674130aca75a 100644 >>> --- a/arch/x86/include/asm/perf_event.h >>> +++ b/arch/x86/include/asm/perf_event.h >>> @@ -181,9 +181,19 @@ struct x86_pmu_capability { >>> #define GLOBAL_STATUS_UNC_OVF BIT_ULL(61) >>> #define GLOBAL_STATUS_ASIF BIT_ULL(60) >>> #define GLOBAL_STATUS_COUNTERS_FROZEN BIT_ULL(59) >>> -#define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(58) >>> +#define GLOBAL_STATUS_LBRS_FROZEN_BIT 58 >>> +#define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(GLOBAL_STATUS_LBRS_FROZEN_BIT) >>> #define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(55) >>> +/* >>> + * We model guest LBR event tracing as another fixed-mode PMC like BTS. >>> + * >>> + * We choose bit 58 (LBRS_FROZEN_BIT) which is used to indicate that >>> the LBR >>> + * stack is frozen on a hardware PMI request in the PERF_GLOBAL_STATUS >>> msr, >>> + * and the 59th PMC counter (if any) is not supposed to use it as well. >> Is this saying that STATUS.58 should never be set? I don't really >> understand the language. > My fault, and let me make it more clearly: > > We choose bit 58 because it's used to indicate LBR stack frozen state > not like other overflow conditions in the PERF_GLOBAL_STATUS msr, > and it will not be used for any actual fixed events. > >> >>> + */ >>> +#define INTEL_PMC_IDX_FIXED_VLBR GLOBAL_STATUS_LBRS_FROZEN_BIT >>> + >>> /* >>> * Adaptive PEBS v4 >>> */ >
On Fri, Apr 10, 2020 at 11:03:33AM +0800, Xu, Like wrote: > On 2020/4/10 0:37, Peter Zijlstra wrote: > > That should sort the branches in order of: gp,fixed,bts,vlbr > > Note the current order is: bts, pebs, fixed, gp. Yeah, and that means that gp (which is I think the most common case) is the most expensive. > Sure, let me try to refactor it in this way. Thanks! > > > +static inline bool is_guest_event(struct perf_event *event) > > > +{ > > > + if (event->attr.exclude_host && is_kernel_event(event)) > > > + return true; > > > + return false; > > > +} > > I don't like this one, what if another in-kernel users generates an > > event with exclude_host set ? > Thanks for the clear attitude. > > How about: > - remove the is_guest_event() to avoid potential misuse; > - move all checks into is_guest_lbr_event() and make it dedicated: > > static inline bool is_guest_lbr_event(struct perf_event *event) > { > if (is_kernel_event(event) && > event->attr.exclude_host && needs_branch_stack(event)) > return true; > return false; > } > > In this case, it's safe to generate an event with exclude_host set > and also use LBR to count guest or nothing for other in-kernel users > because the intel_guest_lbr_event_constraints() makes LBR exclusive. > > For this generic usage, I may rename: > - is_guest_lbr_event() to is_lbr_no_counter_event(); > - intel_guest_lbr_event_constraints() to > intel_lbr_no_counter_event_constraints(); > > Is this acceptable to you? I suppose so, please put a comment on top of that function though, so we don't forget. > > > @@ -181,9 +181,19 @@ struct x86_pmu_capability { > > > #define GLOBAL_STATUS_UNC_OVF BIT_ULL(61) > > > #define GLOBAL_STATUS_ASIF BIT_ULL(60) > > > #define GLOBAL_STATUS_COUNTERS_FROZEN BIT_ULL(59) > > > -#define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(58) > > > +#define GLOBAL_STATUS_LBRS_FROZEN_BIT 58 > > > +#define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(GLOBAL_STATUS_LBRS_FROZEN_BIT) > > > #define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(55) > > > +/* > > > + * We model guest LBR event tracing as another fixed-mode PMC like BTS. > > > + * > > > + * We choose bit 58 (LBRS_FROZEN_BIT) which is used to indicate that the LBR > > > + * stack is frozen on a hardware PMI request in the PERF_GLOBAL_STATUS msr, > > > + * and the 59th PMC counter (if any) is not supposed to use it as well. > > Is this saying that STATUS.58 should never be set? I don't really > > understand the language. > My fault, and let me make it more clearly: > > We choose bit 58 because it's used to indicate LBR stack frozen state > not like other overflow conditions in the PERF_GLOBAL_STATUS msr, > and it will not be used for any actual fixed events. That's only with v4, also we unconditionally mask that bit in handle_pmi_common(), so it'll never be set in the overflow handling. That's all fine, I suppose, all you want is means of programming the LBR registers, we don't actually do anything with then in the host context. Please write a more elaborate comment here.
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 3bb738f5a472..e919187a0751 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -74,7 +74,8 @@ u64 x86_perf_event_update(struct perf_event *event) int idx = hwc->idx; u64 delta; - if (idx == INTEL_PMC_IDX_FIXED_BTS) + if ((idx == INTEL_PMC_IDX_FIXED_BTS) || + (idx == INTEL_PMC_IDX_FIXED_VLBR)) return 0; /* @@ -1102,7 +1103,8 @@ static inline void x86_assign_hw_event(struct perf_event *event, hwc->last_cpu = smp_processor_id(); hwc->last_tag = ++cpuc->tags[i]; - if (hwc->idx == INTEL_PMC_IDX_FIXED_BTS) { + if ((hwc->idx == INTEL_PMC_IDX_FIXED_BTS) || + (hwc->idx == INTEL_PMC_IDX_FIXED_VLBR)) { hwc->config_base = 0; hwc->event_base = 0; } else if (hwc->idx >= INTEL_PMC_IDX_FIXED) { @@ -1233,7 +1235,8 @@ int x86_perf_event_set_period(struct perf_event *event) s64 period = hwc->sample_period; int ret = 0, idx = hwc->idx; - if (idx == INTEL_PMC_IDX_FIXED_BTS) + if ((idx == INTEL_PMC_IDX_FIXED_BTS) || + (idx == INTEL_PMC_IDX_FIXED_VLBR)) return 0; /* diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 3be51aa06e67..901c82032f4a 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2157,6 +2157,9 @@ static void intel_pmu_disable_event(struct perf_event *event) return; } + if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_VLBR)) + return; + cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx); cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); cpuc->intel_cp_status &= ~(1ull << hwc->idx); @@ -2241,6 +2244,9 @@ static void intel_pmu_enable_event(struct perf_event *event) return; } + if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_VLBR)) + return; + if (event->attr.exclude_host) cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx); if (event->attr.exclude_guest) @@ -2595,6 +2601,15 @@ intel_bts_constraints(struct perf_event *event) return NULL; } +static struct event_constraint * +intel_guest_event_constraints(struct perf_event *event) +{ + if (unlikely(is_guest_lbr_event(event))) + return &guest_lbr_constraint; + + return NULL; +} + static int intel_alt_er(int idx, u64 config) { int alt_idx = idx; @@ -2785,6 +2800,10 @@ __intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, { struct event_constraint *c; + c = intel_guest_event_constraints(event); + if (c) + return c; + c = intel_bts_constraints(event); if (c) return c; diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 5ed88e578eaa..ff1f35b4f420 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -1353,3 +1353,6 @@ int x86_perf_get_lbr(struct x86_pmu_lbr *stack) return 0; } EXPORT_SYMBOL_GPL(x86_perf_get_lbr); + +struct event_constraint guest_lbr_constraint = + EVENT_CONSTRAINT(0, 1ULL << GLOBAL_STATUS_LBRS_FROZEN_BIT, 0); diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 1025bc6eb04f..9a62264a3068 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -969,6 +969,20 @@ static inline bool intel_pmu_has_bts(struct perf_event *event) return intel_pmu_has_bts_period(event, hwc->sample_period); } +static inline bool is_guest_event(struct perf_event *event) +{ + if (event->attr.exclude_host && is_kernel_event(event)) + return true; + return false; +} + +static inline bool is_guest_lbr_event(struct perf_event *event) +{ + if (is_guest_event(event) && needs_branch_stack(event)) + return true; + return false; +} + int intel_pmu_save_and_restart(struct perf_event *event); struct event_constraint * @@ -989,6 +1003,7 @@ void release_ds_buffers(void); void reserve_ds_buffers(void); extern struct event_constraint bts_constraint; +extern struct event_constraint guest_lbr_constraint; void intel_pmu_enable_bts(u64 config); diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index e018a1cf604c..674130aca75a 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -181,9 +181,19 @@ struct x86_pmu_capability { #define GLOBAL_STATUS_UNC_OVF BIT_ULL(61) #define GLOBAL_STATUS_ASIF BIT_ULL(60) #define GLOBAL_STATUS_COUNTERS_FROZEN BIT_ULL(59) -#define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(58) +#define GLOBAL_STATUS_LBRS_FROZEN_BIT 58 +#define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(GLOBAL_STATUS_LBRS_FROZEN_BIT) #define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(55) +/* + * We model guest LBR event tracing as another fixed-mode PMC like BTS. + * + * We choose bit 58 (LBRS_FROZEN_BIT) which is used to indicate that the LBR + * stack is frozen on a hardware PMI request in the PERF_GLOBAL_STATUS msr, + * and the 59th PMC counter (if any) is not supposed to use it as well. + */ +#define INTEL_PMC_IDX_FIXED_VLBR GLOBAL_STATUS_LBRS_FROZEN_BIT + /* * Adaptive PEBS v4 */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 547773f5894e..b94b695f2d7e 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1082,6 +1082,13 @@ static inline bool is_sampling_event(struct perf_event *event) return event->attr.sample_period != 0; } +#define TASK_TOMBSTONE ((void *)-1L) + +static inline bool is_kernel_event(struct perf_event *event) +{ + return READ_ONCE(event->owner) == TASK_TOMBSTONE; +} + /* * Return 1 for a software event, 0 for a hardware event */ diff --git a/kernel/events/core.c b/kernel/events/core.c index e453589da97c..75d47fcb9a0d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -164,13 +164,6 @@ static void perf_ctx_unlock(struct perf_cpu_context *cpuctx, raw_spin_unlock(&cpuctx->ctx.lock); } -#define TASK_TOMBSTONE ((void *)-1L) - -static bool is_kernel_event(struct perf_event *event) -{ - return READ_ONCE(event->owner) == TASK_TOMBSTONE; -} - /* * On task ctx scheduling... *
The hypervisor may request the perf subsystem to schedule a time window to directly access the LBR stack msrs for its own use. Normally, it would create a guest LBR event with callstack mode enabled, which is scheduled along with other LBR events on the host but in an exclusive way. To avoid wasting a counter for the guest LBR event, the perf tracks it via is_guest_lbr_event() and assigns it with a fake INTEL_PMC_IDX_FIXED_VLBR counter with the help of new guest_lbr_constraint. Inspired by BTS event, there is actually no hardware counter allocated for guest LBR events. Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ingo Molnar <mingo@kernel.org> Signed-off-by: Like Xu <like.xu@linux.intel.com> --- arch/x86/events/core.c | 9 ++++++--- arch/x86/events/intel/core.c | 19 +++++++++++++++++++ arch/x86/events/intel/lbr.c | 3 +++ arch/x86/events/perf_event.h | 15 +++++++++++++++ arch/x86/include/asm/perf_event.h | 12 +++++++++++- include/linux/perf_event.h | 7 +++++++ kernel/events/core.c | 7 ------- 7 files changed, 61 insertions(+), 11 deletions(-)