Message ID | 20240628065111.59718-3-adrian.hunter@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [V8,01/12] perf/core: Add aux_pause, aux_resume, aux_start_paused | expand |
On Fri, Jun 28, 2024 at 09:51:01AM +0300, Adrian Hunter wrote: > @@ -534,7 +537,20 @@ static void pt_config(struct perf_event *event) > reg |= (event->attr.config & PT_CONFIG_MASK); > > event->hw.config = reg; > + > + /* > + * Allow resume before starting so as not to overwrite a value set by a > + * PMI. > + */ > + WRITE_ONCE(pt->resume_allowed, 1); barrier(); > pt_config_start(event); barrier(); > + /* > + * Allow pause after starting so its pt_config_stop() doesn't race with > + * pt_config_start(). > + */ > + WRITE_ONCE(pt->pause_allowed, 1); IIRC you need those barrier()s, because if the compiler 'helpfully' inlines the static pt_config_start(), you loose your sequence point and things can get re-ordered. WRITE_ONCE() only ensures the store is whole and ordered against other volatile ops, but not against much else. > } > > static void pt_config_stop(struct perf_event *event) > @@ -1511,6 +1527,7 @@ void intel_pt_interrupt(void) > buf = perf_aux_output_begin(&pt->handle, event); > if (!buf) { > event->hw.state = PERF_HES_STOPPED; > + pt->resume_allowed = 0; > return; > } > > @@ -1519,6 +1536,7 @@ void intel_pt_interrupt(void) > ret = pt_buffer_reset_markers(buf, &pt->handle); > if (ret) { > perf_aux_output_end(&pt->handle, 0); > + pt->resume_allowed = 0; > return; > } > Above you WRITE_ONCE() on ->resume_allowed, here you do not. Some *SAN thing or other is bound to get upset about things like that. > @@ -1573,6 +1591,26 @@ static void pt_event_start(struct perf_event *event, int mode) > struct pt *pt = this_cpu_ptr(&pt_ctx); > struct pt_buffer *buf; > > + if (mode & PERF_EF_RESUME) { > + if (READ_ONCE(pt->resume_allowed)) { At this point I seem to have lost the plot, how do ->resume_allowed and ->aux_paused interact? > + u64 status; > + > + /* > + * Only if the trace is not active and the error and > + * stopped bits are clear, is it safe to start, but a > + * PMI might have just cleared these, so resume_allowed > + * must be checked again also. > + */ > + rdmsrl(MSR_IA32_RTIT_STATUS, status); > + if (!(status & (RTIT_STATUS_TRIGGEREN | > + RTIT_STATUS_ERROR | > + RTIT_STATUS_STOPPED)) && > + READ_ONCE(pt->resume_allowed)) > + pt_config_start(event); > + } > + return; > + } > + > buf = perf_aux_output_begin(&pt->handle, event); > if (!buf) > goto fail_stop; > @@ -1601,6 +1639,16 @@ static void pt_event_stop(struct perf_event *event, int mode) > { > struct pt *pt = this_cpu_ptr(&pt_ctx); > > + if (mode & PERF_EF_PAUSE) { > + if (READ_ONCE(pt->pause_allowed)) > + pt_config_stop(event); > + return; > + } > + > + /* Protect against racing */ No F1 cars allowed? Sure the comment can elucidate the reader as to what actual race one is concerned about, no? > + WRITE_ONCE(pt->pause_allowed, 0); > + WRITE_ONCE(pt->resume_allowed, 0); > + > /* > * Protect against the PMI racing with disabling wrmsr, > * see comment in intel_pt_interrupt(). > @@ -1659,8 +1707,12 @@ static long pt_event_snapshot_aux(struct perf_event *event, > /* > * Here, handle_nmi tells us if the tracing is on > */ > - if (READ_ONCE(pt->handle_nmi)) > + if (READ_ONCE(pt->handle_nmi)) { > + /* Protect against racing */ > + WRITE_ONCE(pt->pause_allowed, 0); > + WRITE_ONCE(pt->resume_allowed, 0); barrier()? > pt_config_stop(event); > + } > > pt_read_offset(buf); > pt_update_head(pt); > @@ -1677,8 +1729,11 @@ static long pt_event_snapshot_aux(struct perf_event *event, > * Compiler barrier not needed as we couldn't have been > * preempted by anything that touches pt->handle_nmi. > */ > - if (pt->handle_nmi) > + if (pt->handle_nmi) { > + WRITE_ONCE(pt->resume_allowed, 1); > pt_config_start(event); > + WRITE_ONCE(pt->pause_allowed, 1); barrier() went missing again? > + } > > return ret; > }
On 1/07/24 13:46, Peter Zijlstra wrote: > On Fri, Jun 28, 2024 at 09:51:01AM +0300, Adrian Hunter wrote: > >> @@ -534,7 +537,20 @@ static void pt_config(struct perf_event *event) >> reg |= (event->attr.config & PT_CONFIG_MASK); >> >> event->hw.config = reg; >> + >> + /* >> + * Allow resume before starting so as not to overwrite a value set by a >> + * PMI. >> + */ >> + WRITE_ONCE(pt->resume_allowed, 1); > barrier(); >> pt_config_start(event); > barrier(); >> + /* >> + * Allow pause after starting so its pt_config_stop() doesn't race with >> + * pt_config_start(). >> + */ >> + WRITE_ONCE(pt->pause_allowed, 1); > > IIRC you need those barrier()s, because if the compiler 'helpfully' > inlines the static pt_config_start(), you loose your sequence point and > things can get re-ordered. WRITE_ONCE() only ensures the store is whole > and ordered against other volatile ops, but not against much else. Yes barriers are needed. gcc even says: ...an implementation is free to reorder and combine volatile accesses that occur between sequence points... Accesses to non-volatile objects are not ordered with respect to volatile accesses. https://gcc.gnu.org/onlinedocs/gcc/Volatiles.html > >> } >> >> static void pt_config_stop(struct perf_event *event) >> @@ -1511,6 +1527,7 @@ void intel_pt_interrupt(void) >> buf = perf_aux_output_begin(&pt->handle, event); >> if (!buf) { >> event->hw.state = PERF_HES_STOPPED; >> + pt->resume_allowed = 0; >> return; >> } >> >> @@ -1519,6 +1536,7 @@ void intel_pt_interrupt(void) >> ret = pt_buffer_reset_markers(buf, &pt->handle); >> if (ret) { >> perf_aux_output_end(&pt->handle, 0); >> + pt->resume_allowed = 0; >> return; >> } >> > > Above you WRITE_ONCE() on ->resume_allowed, here you do not. Some *SAN > thing or other is bound to get upset about things like that. > >> @@ -1573,6 +1591,26 @@ static void pt_event_start(struct perf_event *event, int mode) >> struct pt *pt = this_cpu_ptr(&pt_ctx); >> struct pt_buffer *buf; >> >> + if (mode & PERF_EF_RESUME) { >> + if (READ_ONCE(pt->resume_allowed)) { > > At this point I seem to have lost the plot, how do ->resume_allowed and > ->aux_paused interact? This is assuming there is no guaranteed order that events in the group are started / stopped, so there is a need to protect against pt being resumed when it is not yet configured, or when it is in the process of being stopped. Updates to event->hw.state don't happen at quite the right places to be used instead, and the needs of resume are slightly different to pause. resume_allowed means pt is configured and so ->start(PERF_EF_RESUME) can actually safely start pt. aux_paused is separate and takes effect when there is a regular start. > >> + u64 status; >> + >> + /* >> + * Only if the trace is not active and the error and >> + * stopped bits are clear, is it safe to start, but a >> + * PMI might have just cleared these, so resume_allowed >> + * must be checked again also. >> + */ >> + rdmsrl(MSR_IA32_RTIT_STATUS, status); >> + if (!(status & (RTIT_STATUS_TRIGGEREN | >> + RTIT_STATUS_ERROR | >> + RTIT_STATUS_STOPPED)) && >> + READ_ONCE(pt->resume_allowed)) >> + pt_config_start(event); >> + } >> + return; >> + } >> + >> buf = perf_aux_output_begin(&pt->handle, event); >> if (!buf) >> goto fail_stop; >> @@ -1601,6 +1639,16 @@ static void pt_event_stop(struct perf_event *event, int mode) >> { >> struct pt *pt = this_cpu_ptr(&pt_ctx); >> >> + if (mode & PERF_EF_PAUSE) { >> + if (READ_ONCE(pt->pause_allowed)) >> + pt_config_stop(event); >> + return; >> + } >> + >> + /* Protect against racing */ > > No F1 cars allowed? Sure the comment can elucidate the reader as to what > actual race one is concerned about, no? Yeah this needs more comments. > >> + WRITE_ONCE(pt->pause_allowed, 0); >> + WRITE_ONCE(pt->resume_allowed, 0); >> + >> /* >> * Protect against the PMI racing with disabling wrmsr, >> * see comment in intel_pt_interrupt(). >> @@ -1659,8 +1707,12 @@ static long pt_event_snapshot_aux(struct perf_event *event, >> /* >> * Here, handle_nmi tells us if the tracing is on >> */ >> - if (READ_ONCE(pt->handle_nmi)) >> + if (READ_ONCE(pt->handle_nmi)) { >> + /* Protect against racing */ >> + WRITE_ONCE(pt->pause_allowed, 0); >> + WRITE_ONCE(pt->resume_allowed, 0); > > barrier()? > >> pt_config_stop(event); >> + } >> >> pt_read_offset(buf); >> pt_update_head(pt); >> @@ -1677,8 +1729,11 @@ static long pt_event_snapshot_aux(struct perf_event *event, >> * Compiler barrier not needed as we couldn't have been >> * preempted by anything that touches pt->handle_nmi. >> */ >> - if (pt->handle_nmi) >> + if (pt->handle_nmi) { >> + WRITE_ONCE(pt->resume_allowed, 1); >> pt_config_start(event); >> + WRITE_ONCE(pt->pause_allowed, 1); > > barrier() went missing again? > >> + } >> >> return ret; >> }
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 14db6d9d318b..fab54d7f7f7f 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -418,6 +418,9 @@ static void pt_config_start(struct perf_event *event) struct pt *pt = this_cpu_ptr(&pt_ctx); u64 ctl = event->hw.config; + if (READ_ONCE(event->aux_paused)) + return; + ctl |= RTIT_CTL_TRACEEN; if (READ_ONCE(pt->vmx_on)) perf_aux_output_flag(&pt->handle, PERF_AUX_FLAG_PARTIAL); @@ -534,7 +537,20 @@ static void pt_config(struct perf_event *event) reg |= (event->attr.config & PT_CONFIG_MASK); event->hw.config = reg; + + /* + * Allow resume before starting so as not to overwrite a value set by a + * PMI. + */ + WRITE_ONCE(pt->resume_allowed, 1); + pt_config_start(event); + + /* + * Allow pause after starting so its pt_config_stop() doesn't race with + * pt_config_start(). + */ + WRITE_ONCE(pt->pause_allowed, 1); } static void pt_config_stop(struct perf_event *event) @@ -1511,6 +1527,7 @@ void intel_pt_interrupt(void) buf = perf_aux_output_begin(&pt->handle, event); if (!buf) { event->hw.state = PERF_HES_STOPPED; + pt->resume_allowed = 0; return; } @@ -1519,6 +1536,7 @@ void intel_pt_interrupt(void) ret = pt_buffer_reset_markers(buf, &pt->handle); if (ret) { perf_aux_output_end(&pt->handle, 0); + pt->resume_allowed = 0; return; } @@ -1573,6 +1591,26 @@ static void pt_event_start(struct perf_event *event, int mode) struct pt *pt = this_cpu_ptr(&pt_ctx); struct pt_buffer *buf; + if (mode & PERF_EF_RESUME) { + if (READ_ONCE(pt->resume_allowed)) { + u64 status; + + /* + * Only if the trace is not active and the error and + * stopped bits are clear, is it safe to start, but a + * PMI might have just cleared these, so resume_allowed + * must be checked again also. + */ + rdmsrl(MSR_IA32_RTIT_STATUS, status); + if (!(status & (RTIT_STATUS_TRIGGEREN | + RTIT_STATUS_ERROR | + RTIT_STATUS_STOPPED)) && + READ_ONCE(pt->resume_allowed)) + pt_config_start(event); + } + return; + } + buf = perf_aux_output_begin(&pt->handle, event); if (!buf) goto fail_stop; @@ -1601,6 +1639,16 @@ static void pt_event_stop(struct perf_event *event, int mode) { struct pt *pt = this_cpu_ptr(&pt_ctx); + if (mode & PERF_EF_PAUSE) { + if (READ_ONCE(pt->pause_allowed)) + pt_config_stop(event); + return; + } + + /* Protect against racing */ + WRITE_ONCE(pt->pause_allowed, 0); + WRITE_ONCE(pt->resume_allowed, 0); + /* * Protect against the PMI racing with disabling wrmsr, * see comment in intel_pt_interrupt(). @@ -1659,8 +1707,12 @@ static long pt_event_snapshot_aux(struct perf_event *event, /* * Here, handle_nmi tells us if the tracing is on */ - if (READ_ONCE(pt->handle_nmi)) + if (READ_ONCE(pt->handle_nmi)) { + /* Protect against racing */ + WRITE_ONCE(pt->pause_allowed, 0); + WRITE_ONCE(pt->resume_allowed, 0); pt_config_stop(event); + } pt_read_offset(buf); pt_update_head(pt); @@ -1677,8 +1729,11 @@ static long pt_event_snapshot_aux(struct perf_event *event, * Compiler barrier not needed as we couldn't have been * preempted by anything that touches pt->handle_nmi. */ - if (pt->handle_nmi) + if (pt->handle_nmi) { + WRITE_ONCE(pt->resume_allowed, 1); pt_config_start(event); + WRITE_ONCE(pt->pause_allowed, 1); + } return ret; } @@ -1794,7 +1849,9 @@ static __init int pt_init(void) if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) pt_pmu.pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG; - pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE; + pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | + PERF_PMU_CAP_ITRACE | + PERF_PMU_CAP_AUX_PAUSE; pt_pmu.pmu.attr_groups = pt_attr_groups; pt_pmu.pmu.task_ctx_nr = perf_sw_context; pt_pmu.pmu.event_init = pt_event_init; diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h index 96906a62aacd..b9527205e028 100644 --- a/arch/x86/events/intel/pt.h +++ b/arch/x86/events/intel/pt.h @@ -117,6 +117,8 @@ struct pt_filters { * @filters: last configured filters * @handle_nmi: do handle PT PMI on this cpu, there's an active event * @vmx_on: 1 if VMX is ON on this cpu + * @pause_allowed: PERF_EF_PAUSE is allowed to stop tracing + * @resume_allowed: PERF_EF_RESUME is allowed to start tracing * @output_base: cached RTIT_OUTPUT_BASE MSR value * @output_mask: cached RTIT_OUTPUT_MASK MSR value */ @@ -125,6 +127,8 @@ struct pt { struct pt_filters filters; int handle_nmi; int vmx_on; + int pause_allowed; + int resume_allowed; u64 output_base; u64 output_mask; };