diff mbox series

[v3,06/10] coresight: trbe: Fix handling of spurious interrupts

Message ID 20210914102641.1852544-7-suzuki.poulose@arm.com (mailing list archive)
State New, archived
Headers show
Series coresight: TRBE and Self-Hosted trace fixes | expand

Commit Message

Suzuki K Poulose Sept. 14, 2021, 10:26 a.m. UTC
On a spurious IRQ, right now we disable the TRBE and then re-enable
it back, resetting the "buffer" pointers(i.e BASE, LIMIT and more
importantly WRITE) to the original pointers from the AUX handle.
This implies that we overwrite any trace that was written so far,
(by overwriting TRBPTR) while we should have ignored the IRQ.

This patch cleans the behavior, by only stopping the TRBE if the
IRQ was indeed raised, as we can read the TRBSR without stopping
the TRBE (Only writes to the TRBSR requires the TRBE disabled).
And also, on detecting a spurious IRQ after examining the TRBSR,
we simply re-enable the TRBE without touching the other parameters.

Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Mike Leach <mike.leach@linaro.org>
Cc: Leo Yan <leo.yan@linaro.org>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
---
 drivers/hwtracing/coresight/coresight-trbe.c | 30 ++++++++++----------
 1 file changed, 15 insertions(+), 15 deletions(-)

Comments

Anshuman Khandual Sept. 15, 2021, 5:44 a.m. UTC | #1
On 9/14/21 3:56 PM, Suzuki K Poulose wrote:
> On a spurious IRQ, right now we disable the TRBE and then re-enable
> it back, resetting the "buffer" pointers(i.e BASE, LIMIT and more
> importantly WRITE) to the original pointers from the AUX handle.
> This implies that we overwrite any trace that was written so far,
> (by overwriting TRBPTR) while we should have ignored the IRQ.
> 
> This patch cleans the behavior, by only stopping the TRBE if the
> IRQ was indeed raised, as we can read the TRBSR without stopping
> the TRBE (Only writes to the TRBSR requires the TRBE disabled).
> And also, on detecting a spurious IRQ after examining the TRBSR,
> we simply re-enable the TRBE without touching the other parameters.
> 
> Cc: Anshuman Khandual <anshuman.khandual@arm.com>
> Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
> Cc: Mike Leach <mike.leach@linaro.org>
> Cc: Leo Yan <leo.yan@linaro.org>
> Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>

Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>

> ---
>  drivers/hwtracing/coresight/coresight-trbe.c | 30 ++++++++++----------
>  1 file changed, 15 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/hwtracing/coresight/coresight-trbe.c b/drivers/hwtracing/coresight/coresight-trbe.c
> index 5297b11f26b7..de99dd0aecd3 100644
> --- a/drivers/hwtracing/coresight/coresight-trbe.c
> +++ b/drivers/hwtracing/coresight/coresight-trbe.c
> @@ -677,16 +677,16 @@ static int arm_trbe_disable(struct coresight_device *csdev)
>  
>  static void trbe_handle_spurious(struct perf_output_handle *handle)
>  {
> -	struct trbe_buf *buf = etm_perf_sink_config(handle);
> +	u64 limitr = read_sysreg_s(SYS_TRBLIMITR_EL1);
>  
> -	buf->trbe_limit = compute_trbe_buffer_limit(handle);
> -	buf->trbe_write = buf->trbe_base + PERF_IDX2OFF(handle->head, buf);
> -	if (buf->trbe_limit == buf->trbe_base) {
> -		trbe_drain_and_disable_local();
> -		perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
> -		return;
> -	}
> -	trbe_enable_hw(buf);
> +	/*
> +	 * If the IRQ was spurious, simply re-enable the TRBE
> +	 * back without modifying the buffer parameters to
> +	 * retain the trace collected so far.
> +	 */
> +	limitr |= TRBLIMITR_ENABLE;
> +	write_sysreg_s(limitr, SYS_TRBLIMITR_EL1);
> +	isb();
>  }
>  
>  static void trbe_handle_overflow(struct perf_output_handle *handle)
> @@ -759,12 +759,7 @@ static irqreturn_t arm_trbe_irq_handler(int irq, void *dev)
>  	enum trbe_fault_action act;
>  	u64 status;
>  
> -	/*
> -	 * Ensure the trace is visible to the CPUs and
> -	 * any external aborts have been resolved.
> -	 */
> -	trbe_drain_and_disable_local();
> -
> +	/* Reads to TRBSR_EL1 is fine when TRBE is active */
>  	status = read_sysreg_s(SYS_TRBSR_EL1);
>  	/*
>  	 * If the pending IRQ was handled by update_buffer callback
> @@ -773,6 +768,11 @@ static irqreturn_t arm_trbe_irq_handler(int irq, void *dev)
>  	if (!is_trbe_irq(status))
>  		return IRQ_NONE;
>  
> +	/*
> +	 * Ensure the trace is visible to the CPUs and
> +	 * any external aborts have been resolved.
> +	 */
> +	trbe_drain_and_disable_local();
>  	clr_trbe_irq();
>  	isb();
>  
>
Mathieu Poirier Sept. 21, 2021, 5:24 p.m. UTC | #2
On Tue, Sep 14, 2021 at 11:26:37AM +0100, Suzuki K Poulose wrote:
> On a spurious IRQ, right now we disable the TRBE and then re-enable
> it back, resetting the "buffer" pointers(i.e BASE, LIMIT and more
> importantly WRITE) to the original pointers from the AUX handle.
> This implies that we overwrite any trace that was written so far,
> (by overwriting TRBPTR) while we should have ignored the IRQ.
> 
> This patch cleans the behavior, by only stopping the TRBE if the
> IRQ was indeed raised, as we can read the TRBSR without stopping
> the TRBE (Only writes to the TRBSR requires the TRBE disabled).
> And also, on detecting a spurious IRQ after examining the TRBSR,
> we simply re-enable the TRBE without touching the other parameters.
> 
> Cc: Anshuman Khandual <anshuman.khandual@arm.com>
> Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
> Cc: Mike Leach <mike.leach@linaro.org>
> Cc: Leo Yan <leo.yan@linaro.org>
> Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
> ---
>  drivers/hwtracing/coresight/coresight-trbe.c | 30 ++++++++++----------
>  1 file changed, 15 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/hwtracing/coresight/coresight-trbe.c b/drivers/hwtracing/coresight/coresight-trbe.c
> index 5297b11f26b7..de99dd0aecd3 100644
> --- a/drivers/hwtracing/coresight/coresight-trbe.c
> +++ b/drivers/hwtracing/coresight/coresight-trbe.c
> @@ -677,16 +677,16 @@ static int arm_trbe_disable(struct coresight_device *csdev)
>  
>  static void trbe_handle_spurious(struct perf_output_handle *handle)
>  {
> -	struct trbe_buf *buf = etm_perf_sink_config(handle);
> +	u64 limitr = read_sysreg_s(SYS_TRBLIMITR_EL1);
>  
> -	buf->trbe_limit = compute_trbe_buffer_limit(handle);
> -	buf->trbe_write = buf->trbe_base + PERF_IDX2OFF(handle->head, buf);
> -	if (buf->trbe_limit == buf->trbe_base) {
> -		trbe_drain_and_disable_local();
> -		perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
> -		return;
> -	}
> -	trbe_enable_hw(buf);
> +	/*
> +	 * If the IRQ was spurious, simply re-enable the TRBE
> +	 * back without modifying the buffer parameters to
> +	 * retain the trace collected so far.
> +	 */
> +	limitr |= TRBLIMITR_ENABLE;
> +	write_sysreg_s(limitr, SYS_TRBLIMITR_EL1);
> +	isb();

I understand (and agree with) this part of the patch...

>  }
>  
>  static void trbe_handle_overflow(struct perf_output_handle *handle)
> @@ -759,12 +759,7 @@ static irqreturn_t arm_trbe_irq_handler(int irq, void *dev)
>  	enum trbe_fault_action act;
>  	u64 status;
>  
> -	/*
> -	 * Ensure the trace is visible to the CPUs and
> -	 * any external aborts have been resolved.
> -	 */
> -	trbe_drain_and_disable_local();
> -
> +	/* Reads to TRBSR_EL1 is fine when TRBE is active */
>  	status = read_sysreg_s(SYS_TRBSR_EL1);
>  	/*
>  	 * If the pending IRQ was handled by update_buffer callback
> @@ -773,6 +768,11 @@ static irqreturn_t arm_trbe_irq_handler(int irq, void *dev)
>  	if (!is_trbe_irq(status))
>  		return IRQ_NONE;
>  
> +	/*
> +	 * Ensure the trace is visible to the CPUs and
> +	 * any external aborts have been resolved.
> +	 */
> +	trbe_drain_and_disable_local();

But not this part...  I can see why you'd want to move this after the check for
is_trbe_irq(), but not how it relates to spurious interrupts.  To me it seems
like it is addressing another issue.  If those code snippets are related then a
good dose of comments is missing.

Thanks,
Mathieu

>  	clr_trbe_irq();
>  	isb();
>  
> -- 
> 2.24.1
>
Suzuki K Poulose Sept. 21, 2021, 9:29 p.m. UTC | #3
On 21/09/2021 18:24, Mathieu Poirier wrote:
> On Tue, Sep 14, 2021 at 11:26:37AM +0100, Suzuki K Poulose wrote:
>> On a spurious IRQ, right now we disable the TRBE and then re-enable
>> it back, resetting the "buffer" pointers(i.e BASE, LIMIT and more
>> importantly WRITE) to the original pointers from the AUX handle.
>> This implies that we overwrite any trace that was written so far,
>> (by overwriting TRBPTR) while we should have ignored the IRQ.
>>
>> This patch cleans the behavior, by only stopping the TRBE if the
>> IRQ was indeed raised, as we can read the TRBSR without stopping
>> the TRBE (Only writes to the TRBSR requires the TRBE disabled).
>> And also, on detecting a spurious IRQ after examining the TRBSR,
>> we simply re-enable the TRBE without touching the other parameters.
>>
>> Cc: Anshuman Khandual <anshuman.khandual@arm.com>
>> Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
>> Cc: Mike Leach <mike.leach@linaro.org>
>> Cc: Leo Yan <leo.yan@linaro.org>
>> Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
>> ---
>>   drivers/hwtracing/coresight/coresight-trbe.c | 30 ++++++++++----------
>>   1 file changed, 15 insertions(+), 15 deletions(-)
>>
>> diff --git a/drivers/hwtracing/coresight/coresight-trbe.c b/drivers/hwtracing/coresight/coresight-trbe.c
>> index 5297b11f26b7..de99dd0aecd3 100644
>> --- a/drivers/hwtracing/coresight/coresight-trbe.c
>> +++ b/drivers/hwtracing/coresight/coresight-trbe.c
>> @@ -677,16 +677,16 @@ static int arm_trbe_disable(struct coresight_device *csdev)
>>   
>>   static void trbe_handle_spurious(struct perf_output_handle *handle)
>>   {
>> -	struct trbe_buf *buf = etm_perf_sink_config(handle);
>> +	u64 limitr = read_sysreg_s(SYS_TRBLIMITR_EL1);
>>   
>> -	buf->trbe_limit = compute_trbe_buffer_limit(handle);
>> -	buf->trbe_write = buf->trbe_base + PERF_IDX2OFF(handle->head, buf);
>> -	if (buf->trbe_limit == buf->trbe_base) {
>> -		trbe_drain_and_disable_local();
>> -		perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
>> -		return;
>> -	}
>> -	trbe_enable_hw(buf);
>> +	/*
>> +	 * If the IRQ was spurious, simply re-enable the TRBE
>> +	 * back without modifying the buffer parameters to
>> +	 * retain the trace collected so far.
>> +	 */
>> +	limitr |= TRBLIMITR_ENABLE;
>> +	write_sysreg_s(limitr, SYS_TRBLIMITR_EL1);
>> +	isb();
> 
> I understand (and agree with) this part of the patch...
> 
>>   }
>>   
>>   static void trbe_handle_overflow(struct perf_output_handle *handle)
>> @@ -759,12 +759,7 @@ static irqreturn_t arm_trbe_irq_handler(int irq, void *dev)
>>   	enum trbe_fault_action act;
>>   	u64 status;
>>   
>> -	/*
>> -	 * Ensure the trace is visible to the CPUs and
>> -	 * any external aborts have been resolved.
>> -	 */
>> -	trbe_drain_and_disable_local();
>> -
>> +	/* Reads to TRBSR_EL1 is fine when TRBE is active */
>>   	status = read_sysreg_s(SYS_TRBSR_EL1);
>>   	/*
>>   	 * If the pending IRQ was handled by update_buffer callback

[0] See below

>> @@ -773,6 +768,11 @@ static irqreturn_t arm_trbe_irq_handler(int irq, void *dev)
>>   	if (!is_trbe_irq(status))
>>   		return IRQ_NONE;
>>   
>> +	/*
>> +	 * Ensure the trace is visible to the CPUs and
>> +	 * any external aborts have been resolved.
>> +	 */
>> +	trbe_drain_and_disable_local();
> 
> But not this part...  I can see why you'd want to move this after the check for
> is_trbe_irq(), but not how it relates to spurious interrupts.  To me it seems
> like it is addressing another issue.  If those code snippets are related then a
> good dose of comments is missing.

This step is to make sure that we stop the TRBE only when there was
really something to process. (i.e, TRBSR indicates an IRQ was raised).
Also, there is a comment [0] above, for handling a case where the TRBE
event was consumed by the "update_buffer()" due to a race with IRQ
handler. Thus we stop the TRBE only when we need to analyse the cause
and take an action. I agree there is a bit of disconnect.

I can think of the following options:

  - Split the patch to 2. with
    1. Don't stop the trbe if there is no IRQ (the bit explained above)
    2. Don't reset the TRBE ptrs on spurious IRQ

    OR

  - Add the above comment to the section.

The commit description has a hint, "This patch cleans the behavior, by 
only stopping the TRBE if the IRQ was indeed raised", but I agree that
the code could be documented too.

Let me know what you think.

Thanks for the review.

Suzuki
Mathieu Poirier Sept. 22, 2021, 5:13 p.m. UTC | #4
On Tue, Sep 21, 2021 at 10:29:20PM +0100, Suzuki K Poulose wrote:
> On 21/09/2021 18:24, Mathieu Poirier wrote:
> > On Tue, Sep 14, 2021 at 11:26:37AM +0100, Suzuki K Poulose wrote:
> > > On a spurious IRQ, right now we disable the TRBE and then re-enable
> > > it back, resetting the "buffer" pointers(i.e BASE, LIMIT and more
> > > importantly WRITE) to the original pointers from the AUX handle.
> > > This implies that we overwrite any trace that was written so far,
> > > (by overwriting TRBPTR) while we should have ignored the IRQ.
> > > 
> > > This patch cleans the behavior, by only stopping the TRBE if the
> > > IRQ was indeed raised, as we can read the TRBSR without stopping
> > > the TRBE (Only writes to the TRBSR requires the TRBE disabled).
> > > And also, on detecting a spurious IRQ after examining the TRBSR,
> > > we simply re-enable the TRBE without touching the other parameters.
> > > 
> > > Cc: Anshuman Khandual <anshuman.khandual@arm.com>
> > > Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
> > > Cc: Mike Leach <mike.leach@linaro.org>
> > > Cc: Leo Yan <leo.yan@linaro.org>
> > > Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
> > > ---
> > >   drivers/hwtracing/coresight/coresight-trbe.c | 30 ++++++++++----------
> > >   1 file changed, 15 insertions(+), 15 deletions(-)
> > > 
> > > diff --git a/drivers/hwtracing/coresight/coresight-trbe.c b/drivers/hwtracing/coresight/coresight-trbe.c
> > > index 5297b11f26b7..de99dd0aecd3 100644
> > > --- a/drivers/hwtracing/coresight/coresight-trbe.c
> > > +++ b/drivers/hwtracing/coresight/coresight-trbe.c
> > > @@ -677,16 +677,16 @@ static int arm_trbe_disable(struct coresight_device *csdev)
> > >   static void trbe_handle_spurious(struct perf_output_handle *handle)
> > >   {
> > > -	struct trbe_buf *buf = etm_perf_sink_config(handle);
> > > +	u64 limitr = read_sysreg_s(SYS_TRBLIMITR_EL1);
> > > -	buf->trbe_limit = compute_trbe_buffer_limit(handle);
> > > -	buf->trbe_write = buf->trbe_base + PERF_IDX2OFF(handle->head, buf);
> > > -	if (buf->trbe_limit == buf->trbe_base) {
> > > -		trbe_drain_and_disable_local();
> > > -		perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
> > > -		return;
> > > -	}
> > > -	trbe_enable_hw(buf);
> > > +	/*
> > > +	 * If the IRQ was spurious, simply re-enable the TRBE
> > > +	 * back without modifying the buffer parameters to
> > > +	 * retain the trace collected so far.
> > > +	 */
> > > +	limitr |= TRBLIMITR_ENABLE;
> > > +	write_sysreg_s(limitr, SYS_TRBLIMITR_EL1);
> > > +	isb();
> > 
> > I understand (and agree with) this part of the patch...
> > 
> > >   }
> > >   static void trbe_handle_overflow(struct perf_output_handle *handle)
> > > @@ -759,12 +759,7 @@ static irqreturn_t arm_trbe_irq_handler(int irq, void *dev)
> > >   	enum trbe_fault_action act;
> > >   	u64 status;
> > > -	/*
> > > -	 * Ensure the trace is visible to the CPUs and
> > > -	 * any external aborts have been resolved.
> > > -	 */
> > > -	trbe_drain_and_disable_local();
> > > -
> > > +	/* Reads to TRBSR_EL1 is fine when TRBE is active */
> > >   	status = read_sysreg_s(SYS_TRBSR_EL1);
> > >   	/*
> > >   	 * If the pending IRQ was handled by update_buffer callback
> 
> [0] See below
> 
> > > @@ -773,6 +768,11 @@ static irqreturn_t arm_trbe_irq_handler(int irq, void *dev)
> > >   	if (!is_trbe_irq(status))
> > >   		return IRQ_NONE;
> > > +	/*
> > > +	 * Ensure the trace is visible to the CPUs and
> > > +	 * any external aborts have been resolved.
> > > +	 */
> > > +	trbe_drain_and_disable_local();
> > 
> > But not this part...  I can see why you'd want to move this after the check for
> > is_trbe_irq(), but not how it relates to spurious interrupts.  To me it seems
> > like it is addressing another issue.  If those code snippets are related then a
> > good dose of comments is missing.
> 
> This step is to make sure that we stop the TRBE only when there was
> really something to process. (i.e, TRBSR indicates an IRQ was raised).
> Also, there is a comment [0] above, for handling a case where the TRBE
> event was consumed by the "update_buffer()" due to a race with IRQ
> handler. Thus we stop the TRBE only when we need to analyse the cause
> and take an action. I agree there is a bit of disconnect.
> 
> I can think of the following options:
> 
>  - Split the patch to 2. with
>    1. Don't stop the trbe if there is no IRQ (the bit explained above)
>    2. Don't reset the TRBE ptrs on spurious IRQ

Please do two patches.

Other than this patch I commented on 07 and picked up 08.  Patches 09 and 10
won't apply if 06 and 07 aren't present so please address comments for 06 and 07
and resend all 4 patches (06, 07, 09, 10).

Thanks,
Mathieu

> 
>    OR
> 
>  - Add the above comment to the section.
> 
> The commit description has a hint, "This patch cleans the behavior, by only
> stopping the TRBE if the IRQ was indeed raised", but I agree that
> the code could be documented too.
> 
> Let me know what you think.
> 
> Thanks for the review.
> 
> Suzuki
diff mbox series

Patch

diff --git a/drivers/hwtracing/coresight/coresight-trbe.c b/drivers/hwtracing/coresight/coresight-trbe.c
index 5297b11f26b7..de99dd0aecd3 100644
--- a/drivers/hwtracing/coresight/coresight-trbe.c
+++ b/drivers/hwtracing/coresight/coresight-trbe.c
@@ -677,16 +677,16 @@  static int arm_trbe_disable(struct coresight_device *csdev)
 
 static void trbe_handle_spurious(struct perf_output_handle *handle)
 {
-	struct trbe_buf *buf = etm_perf_sink_config(handle);
+	u64 limitr = read_sysreg_s(SYS_TRBLIMITR_EL1);
 
-	buf->trbe_limit = compute_trbe_buffer_limit(handle);
-	buf->trbe_write = buf->trbe_base + PERF_IDX2OFF(handle->head, buf);
-	if (buf->trbe_limit == buf->trbe_base) {
-		trbe_drain_and_disable_local();
-		perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
-		return;
-	}
-	trbe_enable_hw(buf);
+	/*
+	 * If the IRQ was spurious, simply re-enable the TRBE
+	 * back without modifying the buffer parameters to
+	 * retain the trace collected so far.
+	 */
+	limitr |= TRBLIMITR_ENABLE;
+	write_sysreg_s(limitr, SYS_TRBLIMITR_EL1);
+	isb();
 }
 
 static void trbe_handle_overflow(struct perf_output_handle *handle)
@@ -759,12 +759,7 @@  static irqreturn_t arm_trbe_irq_handler(int irq, void *dev)
 	enum trbe_fault_action act;
 	u64 status;
 
-	/*
-	 * Ensure the trace is visible to the CPUs and
-	 * any external aborts have been resolved.
-	 */
-	trbe_drain_and_disable_local();
-
+	/* Reads to TRBSR_EL1 is fine when TRBE is active */
 	status = read_sysreg_s(SYS_TRBSR_EL1);
 	/*
 	 * If the pending IRQ was handled by update_buffer callback
@@ -773,6 +768,11 @@  static irqreturn_t arm_trbe_irq_handler(int irq, void *dev)
 	if (!is_trbe_irq(status))
 		return IRQ_NONE;
 
+	/*
+	 * Ensure the trace is visible to the CPUs and
+	 * any external aborts have been resolved.
+	 */
+	trbe_drain_and_disable_local();
 	clr_trbe_irq();
 	isb();