diff mbox series

[V8,02/12] perf/x86/intel/pt: Add support for pause / resume

Message ID 20240628065111.59718-3-adrian.hunter@intel.com (mailing list archive)
State New, archived
Headers show
Series [V8,01/12] perf/core: Add aux_pause, aux_resume, aux_start_paused | expand

Commit Message

Adrian Hunter June 28, 2024, 6:51 a.m. UTC
Prevent tracing to start if aux_paused.

Implement support for PERF_EF_PAUSE / PERF_EF_RESUME. When aux_paused, stop
tracing. When not aux_paused, only start tracing if it isn't currently
meant to be stopped.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
---
 arch/x86/events/intel/pt.c | 63 ++++++++++++++++++++++++++++++++++++--
 arch/x86/events/intel/pt.h |  4 +++
 2 files changed, 64 insertions(+), 3 deletions(-)

Comments

Peter Zijlstra July 1, 2024, 10:46 a.m. UTC | #1
On Fri, Jun 28, 2024 at 09:51:01AM +0300, Adrian Hunter wrote:

> @@ -534,7 +537,20 @@ static void pt_config(struct perf_event *event)
>  	reg |= (event->attr.config & PT_CONFIG_MASK);
>  
>  	event->hw.config = reg;
> +
> +	/*
> +	 * Allow resume before starting so as not to overwrite a value set by a
> +	 * PMI.
> +	 */
> +	WRITE_ONCE(pt->resume_allowed, 1);
	barrier();
>  	pt_config_start(event);
	barrier();
> +	/*
> +	 * Allow pause after starting so its pt_config_stop() doesn't race with
> +	 * pt_config_start().
> +	 */
> +	WRITE_ONCE(pt->pause_allowed, 1);

IIRC you need those barrier()s, because if the compiler 'helpfully'
inlines the static pt_config_start(), you loose your sequence point and
things can get re-ordered. WRITE_ONCE() only ensures the store is whole
and ordered against other volatile ops, but not against much else.

>  }
>  
>  static void pt_config_stop(struct perf_event *event)
> @@ -1511,6 +1527,7 @@ void intel_pt_interrupt(void)
>  		buf = perf_aux_output_begin(&pt->handle, event);
>  		if (!buf) {
>  			event->hw.state = PERF_HES_STOPPED;
> +			pt->resume_allowed = 0;
>  			return;
>  		}
>  
> @@ -1519,6 +1536,7 @@ void intel_pt_interrupt(void)
>  		ret = pt_buffer_reset_markers(buf, &pt->handle);
>  		if (ret) {
>  			perf_aux_output_end(&pt->handle, 0);
> +			pt->resume_allowed = 0;
>  			return;
>  		}
>  

Above you WRITE_ONCE() on ->resume_allowed, here you do not. Some *SAN
thing or other is bound to get upset about things like that.

> @@ -1573,6 +1591,26 @@ static void pt_event_start(struct perf_event *event, int mode)
>  	struct pt *pt = this_cpu_ptr(&pt_ctx);
>  	struct pt_buffer *buf;
>  
> +	if (mode & PERF_EF_RESUME) {
> +		if (READ_ONCE(pt->resume_allowed)) {

At this point I seem to have lost the plot, how do ->resume_allowed and
->aux_paused interact?

> +			u64 status;
> +
> +			/*
> +			 * Only if the trace is not active and the error and
> +			 * stopped bits are clear, is it safe to start, but a
> +			 * PMI might have just cleared these, so resume_allowed
> +			 * must be checked again also.
> +			 */
> +			rdmsrl(MSR_IA32_RTIT_STATUS, status);
> +			if (!(status & (RTIT_STATUS_TRIGGEREN |
> +					RTIT_STATUS_ERROR |
> +					RTIT_STATUS_STOPPED)) &&
> +			   READ_ONCE(pt->resume_allowed))
> +				pt_config_start(event);
> +		}
> +		return;
> +	}
> +
>  	buf = perf_aux_output_begin(&pt->handle, event);
>  	if (!buf)
>  		goto fail_stop;
> @@ -1601,6 +1639,16 @@ static void pt_event_stop(struct perf_event *event, int mode)
>  {
>  	struct pt *pt = this_cpu_ptr(&pt_ctx);
>  
> +	if (mode & PERF_EF_PAUSE) {
> +		if (READ_ONCE(pt->pause_allowed))
> +			pt_config_stop(event);
> +		return;
> +	}
> +
> +	/* Protect against racing */

No F1 cars allowed? Sure the comment can elucidate the reader as to what
actual race one is concerned about, no?

> +	WRITE_ONCE(pt->pause_allowed, 0);
> +	WRITE_ONCE(pt->resume_allowed, 0);
> +
>  	/*
>  	 * Protect against the PMI racing with disabling wrmsr,
>  	 * see comment in intel_pt_interrupt().
> @@ -1659,8 +1707,12 @@ static long pt_event_snapshot_aux(struct perf_event *event,
>  	/*
>  	 * Here, handle_nmi tells us if the tracing is on
>  	 */
> -	if (READ_ONCE(pt->handle_nmi))
> +	if (READ_ONCE(pt->handle_nmi)) {
> +		/* Protect against racing */
> +		WRITE_ONCE(pt->pause_allowed, 0);
> +		WRITE_ONCE(pt->resume_allowed, 0);

barrier()?

>  		pt_config_stop(event);
> +	}
>  
>  	pt_read_offset(buf);
>  	pt_update_head(pt);
> @@ -1677,8 +1729,11 @@ static long pt_event_snapshot_aux(struct perf_event *event,
>  	 * Compiler barrier not needed as we couldn't have been
>  	 * preempted by anything that touches pt->handle_nmi.
>  	 */
> -	if (pt->handle_nmi)
> +	if (pt->handle_nmi) {
> +		WRITE_ONCE(pt->resume_allowed, 1);
>  		pt_config_start(event);
> +		WRITE_ONCE(pt->pause_allowed, 1);

barrier() went missing again?

> +	}
>  
>  	return ret;
>  }
Adrian Hunter July 11, 2024, 1:35 p.m. UTC | #2
On 1/07/24 13:46, Peter Zijlstra wrote:
> On Fri, Jun 28, 2024 at 09:51:01AM +0300, Adrian Hunter wrote:
> 
>> @@ -534,7 +537,20 @@ static void pt_config(struct perf_event *event)
>>  	reg |= (event->attr.config & PT_CONFIG_MASK);
>>  
>>  	event->hw.config = reg;
>> +
>> +	/*
>> +	 * Allow resume before starting so as not to overwrite a value set by a
>> +	 * PMI.
>> +	 */
>> +	WRITE_ONCE(pt->resume_allowed, 1);
> 	barrier();
>>  	pt_config_start(event);
> 	barrier();
>> +	/*
>> +	 * Allow pause after starting so its pt_config_stop() doesn't race with
>> +	 * pt_config_start().
>> +	 */
>> +	WRITE_ONCE(pt->pause_allowed, 1);
> 
> IIRC you need those barrier()s, because if the compiler 'helpfully'
> inlines the static pt_config_start(), you loose your sequence point and
> things can get re-ordered. WRITE_ONCE() only ensures the store is whole
> and ordered against other volatile ops, but not against much else.

Yes barriers are needed.  gcc even says:

	...an implementation is free to reorder and combine volatile
	accesses that occur between sequence points...

	Accesses to non-volatile objects are not ordered with respect
	to volatile accesses.

	https://gcc.gnu.org/onlinedocs/gcc/Volatiles.html

> 
>>  }
>>  
>>  static void pt_config_stop(struct perf_event *event)
>> @@ -1511,6 +1527,7 @@ void intel_pt_interrupt(void)
>>  		buf = perf_aux_output_begin(&pt->handle, event);
>>  		if (!buf) {
>>  			event->hw.state = PERF_HES_STOPPED;
>> +			pt->resume_allowed = 0;
>>  			return;
>>  		}
>>  
>> @@ -1519,6 +1536,7 @@ void intel_pt_interrupt(void)
>>  		ret = pt_buffer_reset_markers(buf, &pt->handle);
>>  		if (ret) {
>>  			perf_aux_output_end(&pt->handle, 0);
>> +			pt->resume_allowed = 0;
>>  			return;
>>  		}
>>  
> 
> Above you WRITE_ONCE() on ->resume_allowed, here you do not. Some *SAN
> thing or other is bound to get upset about things like that.
> 
>> @@ -1573,6 +1591,26 @@ static void pt_event_start(struct perf_event *event, int mode)
>>  	struct pt *pt = this_cpu_ptr(&pt_ctx);
>>  	struct pt_buffer *buf;
>>  
>> +	if (mode & PERF_EF_RESUME) {
>> +		if (READ_ONCE(pt->resume_allowed)) {
> 
> At this point I seem to have lost the plot, how do ->resume_allowed and
> ->aux_paused interact?

This is assuming there is no guaranteed order that events
in the group are started / stopped, so there is a need to
protect against pt being resumed when it is not yet configured,
or when it is in the process of being stopped.

Updates to event->hw.state don't happen at quite the right
places to be used instead, and the needs of resume are slightly
different to pause.

resume_allowed means pt is configured and so ->start(PERF_EF_RESUME)
can actually safely start pt. aux_paused is separate and takes
effect when there is a regular start.

> 
>> +			u64 status;
>> +
>> +			/*
>> +			 * Only if the trace is not active and the error and
>> +			 * stopped bits are clear, is it safe to start, but a
>> +			 * PMI might have just cleared these, so resume_allowed
>> +			 * must be checked again also.
>> +			 */
>> +			rdmsrl(MSR_IA32_RTIT_STATUS, status);
>> +			if (!(status & (RTIT_STATUS_TRIGGEREN |
>> +					RTIT_STATUS_ERROR |
>> +					RTIT_STATUS_STOPPED)) &&
>> +			   READ_ONCE(pt->resume_allowed))
>> +				pt_config_start(event);
>> +		}
>> +		return;
>> +	}
>> +
>>  	buf = perf_aux_output_begin(&pt->handle, event);
>>  	if (!buf)
>>  		goto fail_stop;
>> @@ -1601,6 +1639,16 @@ static void pt_event_stop(struct perf_event *event, int mode)
>>  {
>>  	struct pt *pt = this_cpu_ptr(&pt_ctx);
>>  
>> +	if (mode & PERF_EF_PAUSE) {
>> +		if (READ_ONCE(pt->pause_allowed))
>> +			pt_config_stop(event);
>> +		return;
>> +	}
>> +
>> +	/* Protect against racing */
> 
> No F1 cars allowed? Sure the comment can elucidate the reader as to what
> actual race one is concerned about, no?

Yeah this needs more comments.

> 
>> +	WRITE_ONCE(pt->pause_allowed, 0);
>> +	WRITE_ONCE(pt->resume_allowed, 0);
>> +
>>  	/*
>>  	 * Protect against the PMI racing with disabling wrmsr,
>>  	 * see comment in intel_pt_interrupt().
>> @@ -1659,8 +1707,12 @@ static long pt_event_snapshot_aux(struct perf_event *event,
>>  	/*
>>  	 * Here, handle_nmi tells us if the tracing is on
>>  	 */
>> -	if (READ_ONCE(pt->handle_nmi))
>> +	if (READ_ONCE(pt->handle_nmi)) {
>> +		/* Protect against racing */
>> +		WRITE_ONCE(pt->pause_allowed, 0);
>> +		WRITE_ONCE(pt->resume_allowed, 0);
> 
> barrier()?
> 
>>  		pt_config_stop(event);
>> +	}
>>  
>>  	pt_read_offset(buf);
>>  	pt_update_head(pt);
>> @@ -1677,8 +1729,11 @@ static long pt_event_snapshot_aux(struct perf_event *event,
>>  	 * Compiler barrier not needed as we couldn't have been
>>  	 * preempted by anything that touches pt->handle_nmi.
>>  	 */
>> -	if (pt->handle_nmi)
>> +	if (pt->handle_nmi) {
>> +		WRITE_ONCE(pt->resume_allowed, 1);
>>  		pt_config_start(event);
>> +		WRITE_ONCE(pt->pause_allowed, 1);
> 
> barrier() went missing again?
> 
>> +	}
>>  
>>  	return ret;
>>  }
diff mbox series

Patch

diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 14db6d9d318b..fab54d7f7f7f 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -418,6 +418,9 @@  static void pt_config_start(struct perf_event *event)
 	struct pt *pt = this_cpu_ptr(&pt_ctx);
 	u64 ctl = event->hw.config;
 
+	if (READ_ONCE(event->aux_paused))
+		return;
+
 	ctl |= RTIT_CTL_TRACEEN;
 	if (READ_ONCE(pt->vmx_on))
 		perf_aux_output_flag(&pt->handle, PERF_AUX_FLAG_PARTIAL);
@@ -534,7 +537,20 @@  static void pt_config(struct perf_event *event)
 	reg |= (event->attr.config & PT_CONFIG_MASK);
 
 	event->hw.config = reg;
+
+	/*
+	 * Allow resume before starting so as not to overwrite a value set by a
+	 * PMI.
+	 */
+	WRITE_ONCE(pt->resume_allowed, 1);
+
 	pt_config_start(event);
+
+	/*
+	 * Allow pause after starting so its pt_config_stop() doesn't race with
+	 * pt_config_start().
+	 */
+	WRITE_ONCE(pt->pause_allowed, 1);
 }
 
 static void pt_config_stop(struct perf_event *event)
@@ -1511,6 +1527,7 @@  void intel_pt_interrupt(void)
 		buf = perf_aux_output_begin(&pt->handle, event);
 		if (!buf) {
 			event->hw.state = PERF_HES_STOPPED;
+			pt->resume_allowed = 0;
 			return;
 		}
 
@@ -1519,6 +1536,7 @@  void intel_pt_interrupt(void)
 		ret = pt_buffer_reset_markers(buf, &pt->handle);
 		if (ret) {
 			perf_aux_output_end(&pt->handle, 0);
+			pt->resume_allowed = 0;
 			return;
 		}
 
@@ -1573,6 +1591,26 @@  static void pt_event_start(struct perf_event *event, int mode)
 	struct pt *pt = this_cpu_ptr(&pt_ctx);
 	struct pt_buffer *buf;
 
+	if (mode & PERF_EF_RESUME) {
+		if (READ_ONCE(pt->resume_allowed)) {
+			u64 status;
+
+			/*
+			 * Only if the trace is not active and the error and
+			 * stopped bits are clear, is it safe to start, but a
+			 * PMI might have just cleared these, so resume_allowed
+			 * must be checked again also.
+			 */
+			rdmsrl(MSR_IA32_RTIT_STATUS, status);
+			if (!(status & (RTIT_STATUS_TRIGGEREN |
+					RTIT_STATUS_ERROR |
+					RTIT_STATUS_STOPPED)) &&
+			   READ_ONCE(pt->resume_allowed))
+				pt_config_start(event);
+		}
+		return;
+	}
+
 	buf = perf_aux_output_begin(&pt->handle, event);
 	if (!buf)
 		goto fail_stop;
@@ -1601,6 +1639,16 @@  static void pt_event_stop(struct perf_event *event, int mode)
 {
 	struct pt *pt = this_cpu_ptr(&pt_ctx);
 
+	if (mode & PERF_EF_PAUSE) {
+		if (READ_ONCE(pt->pause_allowed))
+			pt_config_stop(event);
+		return;
+	}
+
+	/* Protect against racing */
+	WRITE_ONCE(pt->pause_allowed, 0);
+	WRITE_ONCE(pt->resume_allowed, 0);
+
 	/*
 	 * Protect against the PMI racing with disabling wrmsr,
 	 * see comment in intel_pt_interrupt().
@@ -1659,8 +1707,12 @@  static long pt_event_snapshot_aux(struct perf_event *event,
 	/*
 	 * Here, handle_nmi tells us if the tracing is on
 	 */
-	if (READ_ONCE(pt->handle_nmi))
+	if (READ_ONCE(pt->handle_nmi)) {
+		/* Protect against racing */
+		WRITE_ONCE(pt->pause_allowed, 0);
+		WRITE_ONCE(pt->resume_allowed, 0);
 		pt_config_stop(event);
+	}
 
 	pt_read_offset(buf);
 	pt_update_head(pt);
@@ -1677,8 +1729,11 @@  static long pt_event_snapshot_aux(struct perf_event *event,
 	 * Compiler barrier not needed as we couldn't have been
 	 * preempted by anything that touches pt->handle_nmi.
 	 */
-	if (pt->handle_nmi)
+	if (pt->handle_nmi) {
+		WRITE_ONCE(pt->resume_allowed, 1);
 		pt_config_start(event);
+		WRITE_ONCE(pt->pause_allowed, 1);
+	}
 
 	return ret;
 }
@@ -1794,7 +1849,9 @@  static __init int pt_init(void)
 	if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries))
 		pt_pmu.pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG;
 
-	pt_pmu.pmu.capabilities	|= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE;
+	pt_pmu.pmu.capabilities		|= PERF_PMU_CAP_EXCLUSIVE |
+					   PERF_PMU_CAP_ITRACE |
+					   PERF_PMU_CAP_AUX_PAUSE;
 	pt_pmu.pmu.attr_groups		 = pt_attr_groups;
 	pt_pmu.pmu.task_ctx_nr		 = perf_sw_context;
 	pt_pmu.pmu.event_init		 = pt_event_init;
diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h
index 96906a62aacd..b9527205e028 100644
--- a/arch/x86/events/intel/pt.h
+++ b/arch/x86/events/intel/pt.h
@@ -117,6 +117,8 @@  struct pt_filters {
  * @filters:		last configured filters
  * @handle_nmi:		do handle PT PMI on this cpu, there's an active event
  * @vmx_on:		1 if VMX is ON on this cpu
+ * @pause_allowed:	PERF_EF_PAUSE is allowed to stop tracing
+ * @resume_allowed:	PERF_EF_RESUME is allowed to start tracing
  * @output_base:	cached RTIT_OUTPUT_BASE MSR value
  * @output_mask:	cached RTIT_OUTPUT_MASK MSR value
  */
@@ -125,6 +127,8 @@  struct pt {
 	struct pt_filters	filters;
 	int			handle_nmi;
 	int			vmx_on;
+	int			pause_allowed;
+	int			resume_allowed;
 	u64			output_base;
 	u64			output_mask;
 };