diff mbox series

[V3,01/14] coresight: etm-perf: Allow an event to use different sinks

Message ID 1611737738-1493-2-git-send-email-anshuman.khandual@arm.com (mailing list archive)
State New
Headers show
Series arm64: coresight: Enable ETE and TRBE | expand

Commit Message

Anshuman Khandual Jan. 27, 2021, 8:55 a.m. UTC
From: Suzuki K Poulose <suzuki.poulose@arm.com>

When there are multiple sinks on the system, in the absence
of a specified sink, it is quite possible that a default sink
for an ETM could be different from that of another ETM. However
we do not support having multiple sinks for an event yet. This
patch allows the event to use the default sinks on the ETMs
where they are scheduled as long as the sinks are of the same
type.

e.g, if we have 1x1 topology with per-CPU ETRs, the event can
use the per-CPU ETR for the session. However, if the sinks
are of different type, e.g TMC-ETR on one and a custom sink
on another, the event will only trace on the first detected
sink.

Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Mike Leach <mike.leach@linaro.org>
Tested-by: Linu Cherian <lcherian@marvell.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
---
 drivers/hwtracing/coresight/coresight-etm-perf.c | 48 +++++++++++++++++++-----
 1 file changed, 38 insertions(+), 10 deletions(-)

Comments

Mathieu Poirier Feb. 1, 2021, 11:17 p.m. UTC | #1
Hi Anshuman,

I have started reviewing this set.  As it is quite voluminous comments will
come over serveral days.  I will let you know when I am done.

On Wed, Jan 27, 2021 at 02:25:25PM +0530, Anshuman Khandual wrote:
> From: Suzuki K Poulose <suzuki.poulose@arm.com>
> 
> When there are multiple sinks on the system, in the absence
> of a specified sink, it is quite possible that a default sink
> for an ETM could be different from that of another ETM. However
> we do not support having multiple sinks for an event yet. This
> patch allows the event to use the default sinks on the ETMs
> where they are scheduled as long as the sinks are of the same
> type.
> 
> e.g, if we have 1x1 topology with per-CPU ETRs, the event can
> use the per-CPU ETR for the session. However, if the sinks
> are of different type, e.g TMC-ETR on one and a custom sink
> on another, the event will only trace on the first detected
> sink.
>

I found the above changelog very confusing - I read it several times and still
couldn't get all of it.  In the end this patch prevents sinks of different types
from being used for session, and this is what the text should reflect.
 
> Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
> Cc: Mike Leach <mike.leach@linaro.org>
> Tested-by: Linu Cherian <lcherian@marvell.com>
> Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
> Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
> ---
>  drivers/hwtracing/coresight/coresight-etm-perf.c | 48 +++++++++++++++++++-----
>  1 file changed, 38 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
> index bdc34ca..eb9e7e9 100644
> --- a/drivers/hwtracing/coresight/coresight-etm-perf.c
> +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
> @@ -204,6 +204,13 @@ static void etm_free_aux(void *data)
>  	schedule_work(&event_data->work);
>  }
>  
> +static bool sinks_match(struct coresight_device *a, struct coresight_device *b)
> +{
> +	if (!a || !b)
> +		return false;
> +	return (sink_ops(a) == sink_ops(b));

Yes

> +}
> +
>  static void *etm_setup_aux(struct perf_event *event, void **pages,
>  			   int nr_pages, bool overwrite)
>  {
> @@ -212,6 +219,7 @@ static void *etm_setup_aux(struct perf_event *event, void **pages,
>  	cpumask_t *mask;
>  	struct coresight_device *sink = NULL;

        struct coresight_device *user_sink = NULL;

>  	struct etm_event_data *event_data = NULL;
> +	bool sink_forced = false;
>  
>  	event_data = alloc_event_data(cpu);
>  	if (!event_data)
> @@ -222,6 +230,7 @@ static void *etm_setup_aux(struct perf_event *event, void **pages,
>  	if (event->attr.config2) {
>  		id = (u32)event->attr.config2;
>  		sink = coresight_get_sink_by_id(id);

                user_sink = coresight_get_sink_by_id(id);

> +		sink_forced = true;
>  	}
>  
>  	mask = &event_data->mask;
> @@ -235,7 +244,7 @@ static void *etm_setup_aux(struct perf_event *event, void **pages,
>  	 */
>  	for_each_cpu(cpu, mask) {
>  		struct list_head *path;
> -		struct coresight_device *csdev;

                struct coresight_device *last_sink = NULL;

> +		struct coresight_device *csdev, *new_sink;
>  
>  		csdev = per_cpu(csdev_src, cpu);
>  		/*
> @@ -249,21 +258,35 @@ static void *etm_setup_aux(struct perf_event *event, void **pages,
>  		}
>  
>  		/*
> -		 * No sink provided - look for a default sink for one of the
> -		 * devices. At present we only support topology where all CPUs
> -		 * use the same sink [N:1], so only need to find one sink. The
> -		 * coresight_build_path later will remove any CPU that does not
> -		 * attach to the sink, or if we have not found a sink.
> +		 * No sink provided - look for a default sink for all the devices.
> +		 * We only support multiple sinks, only if all the default sinks
> +		 * are of the same type, so that the sink buffer can be shared
> +		 * as the event moves around. We don't trace on a CPU if it can't

s/can't/can't./

> +		 *

Extra line

>  		 */
> -		if (!sink)
> -			sink = coresight_find_default_sink(csdev);
> +		if (!sink_forced) {
> +			new_sink = coresight_find_default_sink(csdev);
> +			if (!new_sink) {
> +				cpumask_clear_cpu(cpu, mask);
> +				continue;
> +			}
> +			/* Skip checks for the first sink */
> +			if (!sink) {
> +			       sink = new_sink;
> +			} else if (!sinks_match(new_sink, sink)) {
> +				cpumask_clear_cpu(cpu, mask);
> +				continue;
> +			}
> +		} else {
> +			new_sink = sink;
> +		}

                if (!user_sink) {
                        /* find default sink for this CPU */
                        sink = coresight_find_default_sink(csdev);
                        if (!sink) {
                                cpumask_clear_cpu(cpu, mask);
                                continue;
                        }

                        /* Chech new sink with last sink */
                        if (last_sink && !sink_match(last_sink, sink)) {
                                cpumask_clear_cpu(cpu, mask);
                                continue;
                        }

                        last_sink = sink;
                } else {
                        sink = user_sink;
                }

>  
>  		/*
>  		 * Building a path doesn't enable it, it simply builds a
>  		 * list of devices from source to sink that can be
>  		 * referenced later when the path is actually needed.
>  		 */
> -		path = coresight_build_path(csdev, sink);
> +		path = coresight_build_path(csdev, new_sink);
>  		if (IS_ERR(path)) {
>  			cpumask_clear_cpu(cpu, mask);
>  			continue;
> @@ -284,7 +307,12 @@ static void *etm_setup_aux(struct perf_event *event, void **pages,
>  	if (!sink_ops(sink)->alloc_buffer || !sink_ops(sink)->free_buffer)
>  		goto err;
>  
> -	/* Allocate the sink buffer for this session */
> +	/*
> +	 * Allocate the sink buffer for this session. All the sinks
> +	 * where this event can be scheduled are ensured to be of the
> +	 * same type. Thus the same sink configuration is used by the
> +	 * sinks.
> +	 */
>  	event_data->snk_config =
>  			sink_ops(sink)->alloc_buffer(sink, event, pages,
>  						     nr_pages, overwrite);
> -- 
> 2.7.4
>
Suzuki K Poulose Feb. 2, 2021, 9:42 a.m. UTC | #2
On 2/1/21 11:17 PM, Mathieu Poirier wrote:
> Hi Anshuman,
> 
> I have started reviewing this set.  As it is quite voluminous comments will
> come over serveral days.  I will let you know when I am done.
> 
> On Wed, Jan 27, 2021 at 02:25:25PM +0530, Anshuman Khandual wrote:
>> From: Suzuki K Poulose <suzuki.poulose@arm.com>
>>
>> When there are multiple sinks on the system, in the absence
>> of a specified sink, it is quite possible that a default sink
>> for an ETM could be different from that of another ETM. However
>> we do not support having multiple sinks for an event yet. This
>> patch allows the event to use the default sinks on the ETMs
>> where they are scheduled as long as the sinks are of the same
>> type.
>>
>> e.g, if we have 1x1 topology with per-CPU ETRs, the event can
>> use the per-CPU ETR for the session. However, if the sinks
>> are of different type, e.g TMC-ETR on one and a custom sink
>> on another, the event will only trace on the first detected
>> sink.
>>
> 
> I found the above changelog very confusing - I read it several times and still
> couldn't get all of it.  In the end this patch prevents sinks of different types
> from being used for session, and this is what the text should reflect.

Sorry about that. Your inference is correct, but it is only a side effect
of the primary motive. How about the following :

"When a sink is not specified by the user, the etm perf driver
finds a suitable sink automatically based on the first ETM, where
this event could be scheduled. Then we allocate the sink buffer based
on the selected sink. This is fine for a CPU bound event as the "sink"
is always guaranteed to be reachable from the ETM (as this is the only
ETM where the event is going to be scheduled). However, if we have a task
bound event, the event could be scheduled on any of the ETMs on the
system. In this case, currently we automatically select a sink and exclude
any ETMs that are not reachable from the selected sink. This is
problematic for 1x1 configurations as we end up in tracing the event
only on the "first" ETM, as the default sink is local to the first
ETM and unreachable from the rest.
However, we could allow the other ETMs to trace if they all have a
sink that is compatible with the "selected" sink and can use the
sink buffer. This can be easily done by verifying that they are
all driven by the same driver and matches the same subtype."


>   
>> Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
>> Cc: Mike Leach <mike.leach@linaro.org>
>> Tested-by: Linu Cherian <lcherian@marvell.com>
>> Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
>> Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
>> ---
>>   drivers/hwtracing/coresight/coresight-etm-perf.c | 48 +++++++++++++++++++-----
>>   1 file changed, 38 insertions(+), 10 deletions(-)
>>
>> diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
>> index bdc34ca..eb9e7e9 100644
>> --- a/drivers/hwtracing/coresight/coresight-etm-perf.c
>> +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
>> @@ -204,6 +204,13 @@ static void etm_free_aux(void *data)
>>   	schedule_work(&event_data->work);
>>   }
>>   
>> +static bool sinks_match(struct coresight_device *a, struct coresight_device *b)
>> +{
>> +	if (!a || !b)
>> +		return false;
>> +	return (sink_ops(a) == sink_ops(b));
> 
> Yes

I think we can tighten this by verifying the dev->sub_type matches too.

> 
>> +}
>> +
>>   static void *etm_setup_aux(struct perf_event *event, void **pages,
>>   			   int nr_pages, bool overwrite)
>>   {
>> @@ -212,6 +219,7 @@ static void *etm_setup_aux(struct perf_event *event, void **pages,
>>   	cpumask_t *mask;
>>   	struct coresight_device *sink = NULL;
> 
>          struct coresight_device *user_sink = NULL;
> 
>>   	struct etm_event_data *event_data = NULL;
>> +	bool sink_forced = false;
>>   
>>   	event_data = alloc_event_data(cpu);
>>   	if (!event_data)
>> @@ -222,6 +230,7 @@ static void *etm_setup_aux(struct perf_event *event, void **pages,
>>   	if (event->attr.config2) {
>>   		id = (u32)event->attr.config2;
>>   		sink = coresight_get_sink_by_id(id);
> 
>                  user_sink = coresight_get_sink_by_id(id);
> 
>> +		sink_forced = true;
>>   	}
>>   
>>   	mask = &event_data->mask;
>> @@ -235,7 +244,7 @@ static void *etm_setup_aux(struct perf_event *event, void **pages,
>>   	 */
>>   	for_each_cpu(cpu, mask) {
>>   		struct list_head *path;
>> -		struct coresight_device *csdev;
> 
>                  struct coresight_device *last_sink = NULL;
> 
>> +		struct coresight_device *csdev, *new_sink;
>>   
>>   		csdev = per_cpu(csdev_src, cpu);
>>   		/*
>> @@ -249,21 +258,35 @@ static void *etm_setup_aux(struct perf_event *event, void **pages,
>>   		}
>>   
>>   		/*
>> -		 * No sink provided - look for a default sink for one of the
>> -		 * devices. At present we only support topology where all CPUs
>> -		 * use the same sink [N:1], so only need to find one sink. The
>> -		 * coresight_build_path later will remove any CPU that does not
>> -		 * attach to the sink, or if we have not found a sink.
>> +		 * No sink provided - look for a default sink for all the devices.
>> +		 * We only support multiple sinks, only if all the default sinks
>> +		 * are of the same type, so that the sink buffer can be shared
>> +		 * as the event moves around. We don't trace on a CPU if it can't
> 
> s/can't/can't./
> 
>> +		 *
> 
> Extra line
> 

OK

>>   		 */
>> -		if (!sink)
>> -			sink = coresight_find_default_sink(csdev);
>> +		if (!sink_forced) {
>> +			new_sink = coresight_find_default_sink(csdev);
>> +			if (!new_sink) {
>> +				cpumask_clear_cpu(cpu, mask);
>> +				continue;
>> +			}
>> +			/* Skip checks for the first sink */
>> +			if (!sink) {
>> +			       sink = new_sink;
>> +			} else if (!sinks_match(new_sink, sink)) {
>> +				cpumask_clear_cpu(cpu, mask);
>> +				continue;
>> +			}
>> +		} else {
>> +			new_sink = sink;
>> +		}
> 
>                  if (!user_sink) {
>                          /* find default sink for this CPU */
>                          sink = coresight_find_default_sink(csdev);
>                          if (!sink) {
>                                  cpumask_clear_cpu(cpu, mask);
>                                  continue;
>                          }
> 
>                          /* Chech new sink with last sink */
>                          if (last_sink && !sink_match(last_sink, sink)) {
>                                  cpumask_clear_cpu(cpu, mask);
>                                  continue;
>                          }
> 
>                          last_sink = sink;
>                  } else {
>                          sink = user_sink;
>                  }
> 

Agreed, it is much better readable.

Suzuki
Mike Leach Feb. 2, 2021, 4:33 p.m. UTC | #3
Hi,

On Tue, 2 Feb 2021 at 09:42, Suzuki K Poulose <suzuki.poulose@arm.com> wrote:
>
> On 2/1/21 11:17 PM, Mathieu Poirier wrote:
> > Hi Anshuman,
> >
> > I have started reviewing this set.  As it is quite voluminous comments will
> > come over serveral days.  I will let you know when I am done.
> >
> > On Wed, Jan 27, 2021 at 02:25:25PM +0530, Anshuman Khandual wrote:
> >> From: Suzuki K Poulose <suzuki.poulose@arm.com>
> >>
> >> When there are multiple sinks on the system, in the absence
> >> of a specified sink, it is quite possible that a default sink
> >> for an ETM could be different from that of another ETM. However
> >> we do not support having multiple sinks for an event yet. This
> >> patch allows the event to use the default sinks on the ETMs
> >> where they are scheduled as long as the sinks are of the same
> >> type.
> >>
> >> e.g, if we have 1x1 topology with per-CPU ETRs, the event can
> >> use the per-CPU ETR for the session. However, if the sinks
> >> are of different type, e.g TMC-ETR on one and a custom sink
> >> on another, the event will only trace on the first detected
> >> sink.
> >>
> >
> > I found the above changelog very confusing - I read it several times and still
> > couldn't get all of it.  In the end this patch prevents sinks of different types
> > from being used for session, and this is what the text should reflect.
>
> Sorry about that. Your inference is correct, but it is only a side effect
> of the primary motive. How about the following :
>
> "When a sink is not specified by the user, the etm perf driver
> finds a suitable sink automatically based on the first ETM, where
> this event could be scheduled. Then we allocate the sink buffer based
> on the selected sink. This is fine for a CPU bound event as the "sink"
> is always guaranteed to be reachable from the ETM (as this is the only
> ETM where the event is going to be scheduled). However, if we have a task
> bound event, the event could be scheduled on any of the ETMs on the
> system. In this case, currently we automatically select a sink and exclude
> any ETMs that are not reachable from the selected sink. This is
> problematic for 1x1 configurations as we end up in tracing the event
> only on the "first" ETM, as the default sink is local to the first
> ETM and unreachable from the rest.
> However, we could allow the other ETMs to trace if they all have a
> sink that is compatible with the "selected" sink and can use the
> sink buffer. This can be easily done by verifying that they are
> all driven by the same driver and matches the same subtype."
>


Not sure that the logic here makes total sense - I can't see _why_
multiple sinks need to be of the same type.

1) This patch is designed to allow multiple sinks to be used in a 1:1
topology system - but there is no specific restriction here - and N:M
should work on the same basis
2) This implies that multiple sinks will work within the coresight
infrastucture.
3)  The sink interface -> struct coresight_ops_sink allows sinks to be
abstracted - therefore whichever sink is chosen the coresight
infrastructure calls the operations for the given sink.
4) Each individual sink, will have its own hardware buffer - copied
into the perf buffers at some appropriate point.

Thus if the users specifies a selected sink - we need to eliminate any
source that cannot reach it.
If not we need to find the relevant default sink for the source, which
might be a shared ETR, or per CPU TRBE / ETR, and the abstraction
logic ought to handle getting the captured data to the correct place.
If it doesn't then we are on shaky ground with any multiple sink
solution.

On the face of it - type is irrelevant. If I am missing something -
this patch needs a better explanation.


>
> >
> >> Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
> >> Cc: Mike Leach <mike.leach@linaro.org>
> >> Tested-by: Linu Cherian <lcherian@marvell.com>
> >> Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
> >> Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
> >> ---
> >>   drivers/hwtracing/coresight/coresight-etm-perf.c | 48 +++++++++++++++++++-----
> >>   1 file changed, 38 insertions(+), 10 deletions(-)
> >>
> >> diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
> >> index bdc34ca..eb9e7e9 100644
> >> --- a/drivers/hwtracing/coresight/coresight-etm-perf.c
> >> +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
> >> @@ -204,6 +204,13 @@ static void etm_free_aux(void *data)
> >>      schedule_work(&event_data->work);
> >>   }
> >>
> >> +static bool sinks_match(struct coresight_device *a, struct coresight_device *b)
> >> +{
> >> +    if (!a || !b)
> >> +            return false;
> >> +    return (sink_ops(a) == sink_ops(b));
> >
> > Yes
>
> I think we can tighten this by verifying the dev->sub_type matches too.
>
> >
> >> +}
> >> +
> >>   static void *etm_setup_aux(struct perf_event *event, void **pages,
> >>                         int nr_pages, bool overwrite)
> >>   {
> >> @@ -212,6 +219,7 @@ static void *etm_setup_aux(struct perf_event *event, void **pages,
> >>      cpumask_t *mask;
> >>      struct coresight_device *sink = NULL;
> >
> >          struct coresight_device *user_sink = NULL;
> >
> >>      struct etm_event_data *event_data = NULL;
> >> +    bool sink_forced = false;
> >>
> >>      event_data = alloc_event_data(cpu);
> >>      if (!event_data)
> >> @@ -222,6 +230,7 @@ static void *etm_setup_aux(struct perf_event *event, void **pages,
> >>      if (event->attr.config2) {
> >>              id = (u32)event->attr.config2;
> >>              sink = coresight_get_sink_by_id(id);
> >
> >                  user_sink = coresight_get_sink_by_id(id);
> >
> >> +            sink_forced = true;

The comment for this block uses the term "selected sink", and the
functions use _default_sink . This may read better if we rename the
bool to "selected_sink" rather than "sink_forced"

Regards

Mike
> >>      }
> >>
> >>      mask = &event_data->mask;
> >> @@ -235,7 +244,7 @@ static void *etm_setup_aux(struct perf_event *event, void **pages,
> >>       */
> >>      for_each_cpu(cpu, mask) {
> >>              struct list_head *path;
> >> -            struct coresight_device *csdev;
> >
> >                  struct coresight_device *last_sink = NULL;
> >
> >> +            struct coresight_device *csdev, *new_sink;
> >>
> >>              csdev = per_cpu(csdev_src, cpu);
> >>              /*
> >> @@ -249,21 +258,35 @@ static void *etm_setup_aux(struct perf_event *event, void **pages,
> >>              }
> >>
> >>              /*
> >> -             * No sink provided - look for a default sink for one of the
> >> -             * devices. At present we only support topology where all CPUs
> >> -             * use the same sink [N:1], so only need to find one sink. The
> >> -             * coresight_build_path later will remove any CPU that does not
> >> -             * attach to the sink, or if we have not found a sink.
> >> +             * No sink provided - look for a default sink for all the devices.
> >> +             * We only support multiple sinks, only if all the default sinks
> >> +             * are of the same type, so that the sink buffer can be shared
> >> +             * as the event moves around. We don't trace on a CPU if it can't
> >
> > s/can't/can't./
> >
> >> +             *
> >
> > Extra line
> >
>
> OK
>
> >>               */
> >> -            if (!sink)
> >> -                    sink = coresight_find_default_sink(csdev);
> >> +            if (!sink_forced) {
> >> +                    new_sink = coresight_find_default_sink(csdev);
> >> +                    if (!new_sink) {
> >> +                            cpumask_clear_cpu(cpu, mask);
> >> +                            continue;
> >> +                    }
> >> +                    /* Skip checks for the first sink */
> >> +                    if (!sink) {
> >> +                           sink = new_sink;
> >> +                    } else if (!sinks_match(new_sink, sink)) {
> >> +                            cpumask_clear_cpu(cpu, mask);
> >> +                            continue;
> >> +                    }
> >> +            } else {
> >> +                    new_sink = sink;
> >> +            }
> >
> >                  if (!user_sink) {
> >                          /* find default sink for this CPU */
> >                          sink = coresight_find_default_sink(csdev);
> >                          if (!sink) {
> >                                  cpumask_clear_cpu(cpu, mask);
> >                                  continue;
> >                          }
> >
> >                          /* Chech new sink with last sink */
> >                          if (last_sink && !sink_match(last_sink, sink)) {
> >                                  cpumask_clear_cpu(cpu, mask);
> >                                  continue;
> >                          }
> >
> >                          last_sink = sink;
> >                  } else {
> >                          sink = user_sink;
> >                  }
> >
>
> Agreed, it is much better readable.
>
> Suzuki



--
Mike Leach
Principal Engineer, ARM Ltd.
Manchester Design Centre. UK
Mathieu Poirier Feb. 2, 2021, 4:37 p.m. UTC | #4
On Tue, Feb 02, 2021 at 09:42:34AM +0000, Suzuki K Poulose wrote:
> On 2/1/21 11:17 PM, Mathieu Poirier wrote:
> > Hi Anshuman,
> > 
> > I have started reviewing this set.  As it is quite voluminous comments will
> > come over serveral days.  I will let you know when I am done.
> > 
> > On Wed, Jan 27, 2021 at 02:25:25PM +0530, Anshuman Khandual wrote:
> > > From: Suzuki K Poulose <suzuki.poulose@arm.com>
> > > 
> > > When there are multiple sinks on the system, in the absence
> > > of a specified sink, it is quite possible that a default sink
> > > for an ETM could be different from that of another ETM. However
> > > we do not support having multiple sinks for an event yet. This
> > > patch allows the event to use the default sinks on the ETMs
> > > where they are scheduled as long as the sinks are of the same
> > > type.
> > > 
> > > e.g, if we have 1x1 topology with per-CPU ETRs, the event can
> > > use the per-CPU ETR for the session. However, if the sinks
> > > are of different type, e.g TMC-ETR on one and a custom sink
> > > on another, the event will only trace on the first detected
> > > sink.
> > > 
> > 
> > I found the above changelog very confusing - I read it several times and still
> > couldn't get all of it.  In the end this patch prevents sinks of different types
> > from being used for session, and this is what the text should reflect.
> 
> Sorry about that. Your inference is correct, but it is only a side effect
> of the primary motive. How about the following :
> 
> "When a sink is not specified by the user, the etm perf driver
> finds a suitable sink automatically based on the first ETM, where
> this event could be scheduled. Then we allocate the sink buffer based
> on the selected sink. This is fine for a CPU bound event as the "sink"
> is always guaranteed to be reachable from the ETM (as this is the only
> ETM where the event is going to be scheduled). However, if we have a task
> bound event, the event could be scheduled on any of the ETMs on the
> system. In this case, currently we automatically select a sink and exclude
> any ETMs that are not reachable from the selected sink. This is
> problematic for 1x1 configurations as we end up in tracing the event
> only on the "first" ETM, as the default sink is local to the first
> ETM and unreachable from the rest.
> However, we could allow the other ETMs to trace if they all have a
> sink that is compatible with the "selected" sink and can use the
> sink buffer. This can be easily done by verifying that they are
> all driven by the same driver and matches the same subtype."
>

Much better, thanks for the rework.
 
> 
> > > Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
> > > Cc: Mike Leach <mike.leach@linaro.org>
> > > Tested-by: Linu Cherian <lcherian@marvell.com>
> > > Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
> > > Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
> > > ---
> > >   drivers/hwtracing/coresight/coresight-etm-perf.c | 48 +++++++++++++++++++-----
> > >   1 file changed, 38 insertions(+), 10 deletions(-)
> > > 
> > > diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
> > > index bdc34ca..eb9e7e9 100644
> > > --- a/drivers/hwtracing/coresight/coresight-etm-perf.c
> > > +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
> > > @@ -204,6 +204,13 @@ static void etm_free_aux(void *data)
> > >   	schedule_work(&event_data->work);
> > >   }
> > > +static bool sinks_match(struct coresight_device *a, struct coresight_device *b)
> > > +{
> > > +	if (!a || !b)
> > > +		return false;
> > > +	return (sink_ops(a) == sink_ops(b));
> > 
> > Yes
> 
> I think we can tighten this by verifying the dev->sub_type matches too.
> 

We could do that but I'm not sure we need to.  I remember spending a few minutes
yesterday thinking about ways to make the test more stringent but in the end I
thought what you had was sufficient, at least for now.  I'll leave that one to
you - proceed as you see fit. 

> > 
> > > +}
> > > +
> > >   static void *etm_setup_aux(struct perf_event *event, void **pages,
> > >   			   int nr_pages, bool overwrite)
> > >   {
> > > @@ -212,6 +219,7 @@ static void *etm_setup_aux(struct perf_event *event, void **pages,
> > >   	cpumask_t *mask;
> > >   	struct coresight_device *sink = NULL;
> > 
> >          struct coresight_device *user_sink = NULL;
> > 
> > >   	struct etm_event_data *event_data = NULL;
> > > +	bool sink_forced = false;
> > >   	event_data = alloc_event_data(cpu);
> > >   	if (!event_data)
> > > @@ -222,6 +230,7 @@ static void *etm_setup_aux(struct perf_event *event, void **pages,
> > >   	if (event->attr.config2) {
> > >   		id = (u32)event->attr.config2;
> > >   		sink = coresight_get_sink_by_id(id);
> > 
> >                  user_sink = coresight_get_sink_by_id(id);
> > 
> > > +		sink_forced = true;
> > >   	}
> > >   	mask = &event_data->mask;
> > > @@ -235,7 +244,7 @@ static void *etm_setup_aux(struct perf_event *event, void **pages,
> > >   	 */
> > >   	for_each_cpu(cpu, mask) {
> > >   		struct list_head *path;
> > > -		struct coresight_device *csdev;
> > 
> >                  struct coresight_device *last_sink = NULL;
> > 
> > > +		struct coresight_device *csdev, *new_sink;
> > >   		csdev = per_cpu(csdev_src, cpu);
> > >   		/*
> > > @@ -249,21 +258,35 @@ static void *etm_setup_aux(struct perf_event *event, void **pages,
> > >   		}
> > >   		/*
> > > -		 * No sink provided - look for a default sink for one of the
> > > -		 * devices. At present we only support topology where all CPUs
> > > -		 * use the same sink [N:1], so only need to find one sink. The
> > > -		 * coresight_build_path later will remove any CPU that does not
> > > -		 * attach to the sink, or if we have not found a sink.
> > > +		 * No sink provided - look for a default sink for all the devices.
> > > +		 * We only support multiple sinks, only if all the default sinks
> > > +		 * are of the same type, so that the sink buffer can be shared
> > > +		 * as the event moves around. We don't trace on a CPU if it can't
> > 
> > s/can't/can't./
> > 
> > > +		 *
> > 
> > Extra line
> > 
> 
> OK
> 
> > >   		 */
> > > -		if (!sink)
> > > -			sink = coresight_find_default_sink(csdev);
> > > +		if (!sink_forced) {
> > > +			new_sink = coresight_find_default_sink(csdev);
> > > +			if (!new_sink) {
> > > +				cpumask_clear_cpu(cpu, mask);
> > > +				continue;
> > > +			}
> > > +			/* Skip checks for the first sink */
> > > +			if (!sink) {
> > > +			       sink = new_sink;
> > > +			} else if (!sinks_match(new_sink, sink)) {
> > > +				cpumask_clear_cpu(cpu, mask);
> > > +				continue;
> > > +			}
> > > +		} else {
> > > +			new_sink = sink;
> > > +		}
> > 
> >                  if (!user_sink) {
> >                          /* find default sink for this CPU */
> >                          sink = coresight_find_default_sink(csdev);
> >                          if (!sink) {
> >                                  cpumask_clear_cpu(cpu, mask);
> >                                  continue;
> >                          }
> > 
> >                          /* Chech new sink with last sink */
> >                          if (last_sink && !sink_match(last_sink, sink)) {
> >                                  cpumask_clear_cpu(cpu, mask);
> >                                  continue;
> >                          }
> > 
> >                          last_sink = sink;
> >                  } else {
> >                          sink = user_sink;
> >                  }
> > 
> 
> Agreed, it is much better readable.
> 
> Suzuki
Suzuki K Poulose Feb. 2, 2021, 10:41 p.m. UTC | #5
On 2/2/21 4:33 PM, Mike Leach wrote:
> Hi,
> 
> On Tue, 2 Feb 2021 at 09:42, Suzuki K Poulose <suzuki.poulose@arm.com> wrote:
>>
>> On 2/1/21 11:17 PM, Mathieu Poirier wrote:
>>> Hi Anshuman,
>>>
>>> I have started reviewing this set.  As it is quite voluminous comments will
>>> come over serveral days.  I will let you know when I am done.
>>>
>>> On Wed, Jan 27, 2021 at 02:25:25PM +0530, Anshuman Khandual wrote:
>>>> From: Suzuki K Poulose <suzuki.poulose@arm.com>
>>>>
>>>> When there are multiple sinks on the system, in the absence
>>>> of a specified sink, it is quite possible that a default sink
>>>> for an ETM could be different from that of another ETM. However
>>>> we do not support having multiple sinks for an event yet. This
>>>> patch allows the event to use the default sinks on the ETMs
>>>> where they are scheduled as long as the sinks are of the same
>>>> type.
>>>>
>>>> e.g, if we have 1x1 topology with per-CPU ETRs, the event can
>>>> use the per-CPU ETR for the session. However, if the sinks
>>>> are of different type, e.g TMC-ETR on one and a custom sink
>>>> on another, the event will only trace on the first detected
>>>> sink.
>>>>
>>>
>>> I found the above changelog very confusing - I read it several times and still
>>> couldn't get all of it.  In the end this patch prevents sinks of different types
>>> from being used for session, and this is what the text should reflect.
>>
>> Sorry about that. Your inference is correct, but it is only a side effect
>> of the primary motive. How about the following :
>>
>> "When a sink is not specified by the user, the etm perf driver
>> finds a suitable sink automatically based on the first ETM, where
>> this event could be scheduled. Then we allocate the sink buffer based
>> on the selected sink. This is fine for a CPU bound event as the "sink"
>> is always guaranteed to be reachable from the ETM (as this is the only
>> ETM where the event is going to be scheduled). However, if we have a task
>> bound event, the event could be scheduled on any of the ETMs on the
>> system. In this case, currently we automatically select a sink and exclude
>> any ETMs that are not reachable from the selected sink. This is
>> problematic for 1x1 configurations as we end up in tracing the event
>> only on the "first" ETM, as the default sink is local to the first
>> ETM and unreachable from the rest.
>> However, we could allow the other ETMs to trace if they all have a
>> sink that is compatible with the "selected" sink and can use the
>> sink buffer. This can be easily done by verifying that they are
>> all driven by the same driver and matches the same subtype."
>>
> 
> 
> Not sure that the logic here makes total sense - I can't see _why_
> multiple sinks need to be of the same type.

Because we have a single "sink_config" (read, single sink specific
buffer) for an event. i.e, we do the sink_ops->alloc_buffer() only once
and rightly so. This allocates any buffers that is used by a given sink.
e.g, for ETR it allocates an etr_perf_buffer. Now if we wanted the same
event to run on an ETM with TRBE, the TRBE doesn't have any buffer set up to
collect the trace and cant make any sense of etr_perf_buffer.
However, if there is another ETM with a different ETR, the second
ETR can make sense of the sink_config (etr_perf_buffer) and trace the event.
Please remember that this only applies to task bound events where
the event can be scheduled on different ETMs.

> 
> 1) This patch is designed to allow multiple sinks to be used in a 1:1
> topology system - but there is no specific restriction here - and N:M
> should work on the same basis

Yes, this should work in any topology.

> 2) This implies that multiple sinks will work within the coresight
> infrastucture.

I am afraid I don't understand the context here.

> 3)  The sink interface -> struct coresight_ops_sink allows sinks to be
> abstracted - therefore whichever sink is chosen the coresight
> infrastructure calls the operations for the given sink.

Correct. The patch is trying to ensure that a private data
setup by one driver is not interpreted by another driver as
its own private data. (the private data being sink_config)

> 4) Each individual sink, will have its own hardware buffer - copied
> into the perf buffers at some appropriate point.

Correct. Supporting multiple types of sinks for a single event
is complex and not worth the benefit of the extra complexity.
Moreover we don't expect sane systems to have such a
configuration.

> 
> Thus if the users specifies a selected sink - we need to eliminate any
> source that cannot reach it.

Yes, we do that now.

> If not we need to find the relevant default sink for the source, which
> might be a shared ETR, or per CPU TRBE / ETR, and the abstraction
> logic ought to handle getting the captured data to the correct place.

The abstraction logic works fine, but the per-event private data is
something that makes this complex.

> If it doesn't then we are on shaky ground with any multiple sink
> solution.
> 
> On the face of it - type is irrelevant. If I am missing something -
> this patch needs a better explanation.

I hope the explanation above makes it clear. Please let me know
otherwise.

>>>> +}
>>>> +
>>>>    static void *etm_setup_aux(struct perf_event *event, void **pages,
>>>>                          int nr_pages, bool overwrite)
>>>>    {
>>>> @@ -212,6 +219,7 @@ static void *etm_setup_aux(struct perf_event *event, void **pages,
>>>>       cpumask_t *mask;
>>>>       struct coresight_device *sink = NULL;
>>>
>>>           struct coresight_device *user_sink = NULL;
>>>
>>>>       struct etm_event_data *event_data = NULL;
>>>> +    bool sink_forced = false;
>>>>
>>>>       event_data = alloc_event_data(cpu);
>>>>       if (!event_data)
>>>> @@ -222,6 +230,7 @@ static void *etm_setup_aux(struct perf_event *event, void **pages,
>>>>       if (event->attr.config2) {
>>>>               id = (u32)event->attr.config2;
>>>>               sink = coresight_get_sink_by_id(id);
>>>
>>>                   user_sink = coresight_get_sink_by_id(id);
>>>
>>>> +            sink_forced = true;
> 
> The comment for this block uses the term "selected sink", and the
> functions use _default_sink . This may read better if we rename the
> bool to "selected_sink" rather than "sink_forced"

I have removed the bool and replaced with the user_sink as Mathieu
suggested.

Thanks for the review
Suzuki
Mike Leach Feb. 4, 2021, 12:27 p.m. UTC | #6
Hi Suzuki,.

On Tue, 2 Feb 2021 at 22:42, Suzuki K Poulose <suzuki.poulose@arm.com> wrote:
>
> On 2/2/21 4:33 PM, Mike Leach wrote:
> > Hi,
> >
> > On Tue, 2 Feb 2021 at 09:42, Suzuki K Poulose <suzuki.poulose@arm.com> wrote:
> >>
> >> On 2/1/21 11:17 PM, Mathieu Poirier wrote:
> >>> Hi Anshuman,
> >>>
> >>> I have started reviewing this set.  As it is quite voluminous comments will
> >>> come over serveral days.  I will let you know when I am done.
> >>>
> >>> On Wed, Jan 27, 2021 at 02:25:25PM +0530, Anshuman Khandual wrote:
> >>>> From: Suzuki K Poulose <suzuki.poulose@arm.com>
> >>>>
> >>>> When there are multiple sinks on the system, in the absence
> >>>> of a specified sink, it is quite possible that a default sink
> >>>> for an ETM could be different from that of another ETM. However
> >>>> we do not support having multiple sinks for an event yet. This
> >>>> patch allows the event to use the default sinks on the ETMs
> >>>> where they are scheduled as long as the sinks are of the same
> >>>> type.
> >>>>
> >>>> e.g, if we have 1x1 topology with per-CPU ETRs, the event can
> >>>> use the per-CPU ETR for the session. However, if the sinks
> >>>> are of different type, e.g TMC-ETR on one and a custom sink
> >>>> on another, the event will only trace on the first detected
> >>>> sink.
> >>>>
> >>>
> >>> I found the above changelog very confusing - I read it several times and still
> >>> couldn't get all of it.  In the end this patch prevents sinks of different types
> >>> from being used for session, and this is what the text should reflect.
> >>
> >> Sorry about that. Your inference is correct, but it is only a side effect
> >> of the primary motive. How about the following :
> >>
> >> "When a sink is not specified by the user, the etm perf driver
> >> finds a suitable sink automatically based on the first ETM, where
> >> this event could be scheduled. Then we allocate the sink buffer based
> >> on the selected sink. This is fine for a CPU bound event as the "sink"
> >> is always guaranteed to be reachable from the ETM (as this is the only
> >> ETM where the event is going to be scheduled). However, if we have a task
> >> bound event, the event could be scheduled on any of the ETMs on the
> >> system. In this case, currently we automatically select a sink and exclude
> >> any ETMs that are not reachable from the selected sink. This is
> >> problematic for 1x1 configurations as we end up in tracing the event
> >> only on the "first" ETM, as the default sink is local to the first
> >> ETM and unreachable from the rest.
> >> However, we could allow the other ETMs to trace if they all have a
> >> sink that is compatible with the "selected" sink and can use the
> >> sink buffer. This can be easily done by verifying that they are
> >> all driven by the same driver and matches the same subtype."
> >>
> >
> >
> > Not sure that the logic here makes total sense - I can't see _why_
> > multiple sinks need to be of the same type.
>
> Because we have a single "sink_config" (read, single sink specific
> buffer) for an event. i.e, we do the sink_ops->alloc_buffer() only once
> and rightly so. This allocates any buffers that is used by a given sink.
> e.g, for ETR it allocates an etr_perf_buffer. Now if we wanted the same
> event to run on an ETM with TRBE, the TRBE doesn't have any buffer set up to
> collect the trace and cant make any sense of etr_perf_buffer.
> However, if there is another ETM with a different ETR, the second
> ETR can make sense of the sink_config (etr_perf_buffer) and trace the event.
> Please remember that this only applies to task bound events where
> the event can be scheduled on different ETMs.
>

I have had a look around the ETR driver and can see the issue -
something very sink specific is being bound to the more generic event,
which does undermine the abstraction a little bit!
The ETR drivers refer to per-thread and cpu-wide - not cpu-bound /
task-bound - could do to get consistent terminology here. The cpu-wide
version is designed to be called multiple times - but obviously only
allocates a single buffer.

This implies that for the cpu-wide case, we could tolerate sinks of
differing types, as setup_aux will be called once per CPU. Whether we
want to is another question.

Given we don't yet have any systems that sport differing sink types,
its probably not worth allowing.
A short comment explaining the type matching is needed due to
underlying private data structure incompatibility would be useful
though.

Regards

Mike



> >
> > 1) This patch is designed to allow multiple sinks to be used in a 1:1
> > topology system - but there is no specific restriction here - and N:M
> > should work on the same basis
>
> Yes, this should work in any topology.
>
> > 2) This implies that multiple sinks will work within the coresight
> > infrastucture.
>
> I am afraid I don't understand the context here.
>
> > 3)  The sink interface -> struct coresight_ops_sink allows sinks to be
> > abstracted - therefore whichever sink is chosen the coresight
> > infrastructure calls the operations for the given sink.
>
> Correct. The patch is trying to ensure that a private data
> setup by one driver is not interpreted by another driver as
> its own private data. (the private data being sink_config)
>
> > 4) Each individual sink, will have its own hardware buffer - copied
> > into the perf buffers at some appropriate point.
>
> Correct. Supporting multiple types of sinks for a single event
> is complex and not worth the benefit of the extra complexity.
> Moreover we don't expect sane systems to have such a
> configuration.
>
> >
> > Thus if the users specifies a selected sink - we need to eliminate any
> > source that cannot reach it.
>
> Yes, we do that now.
>
> > If not we need to find the relevant default sink for the source, which
> > might be a shared ETR, or per CPU TRBE / ETR, and the abstraction
> > logic ought to handle getting the captured data to the correct place.
>
> The abstraction logic works fine, but the per-event private data is
> something that makes this complex.
>
> > If it doesn't then we are on shaky ground with any multiple sink
> > solution.
> >
> > On the face of it - type is irrelevant. If I am missing something -
> > this patch needs a better explanation.
>
> I hope the explanation above makes it clear. Please let me know
> otherwise.
>
> >>>> +}
> >>>> +
> >>>>    static void *etm_setup_aux(struct perf_event *event, void **pages,
> >>>>                          int nr_pages, bool overwrite)
> >>>>    {
> >>>> @@ -212,6 +219,7 @@ static void *etm_setup_aux(struct perf_event *event, void **pages,
> >>>>       cpumask_t *mask;
> >>>>       struct coresight_device *sink = NULL;
> >>>
> >>>           struct coresight_device *user_sink = NULL;
> >>>
> >>>>       struct etm_event_data *event_data = NULL;
> >>>> +    bool sink_forced = false;
> >>>>
> >>>>       event_data = alloc_event_data(cpu);
> >>>>       if (!event_data)
> >>>> @@ -222,6 +230,7 @@ static void *etm_setup_aux(struct perf_event *event, void **pages,
> >>>>       if (event->attr.config2) {
> >>>>               id = (u32)event->attr.config2;
> >>>>               sink = coresight_get_sink_by_id(id);
> >>>
> >>>                   user_sink = coresight_get_sink_by_id(id);
> >>>
> >>>> +            sink_forced = true;
> >
> > The comment for this block uses the term "selected sink", and the
> > functions use _default_sink . This may read better if we rename the
> > bool to "selected_sink" rather than "sink_forced"
>
> I have removed the bool and replaced with the user_sink as Mathieu
> suggested.
>
> Thanks for the review
> Suzuki



--
Mike Leach
Principal Engineer, ARM Ltd.
Manchester Design Centre. UK
diff mbox series

Patch

diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
index bdc34ca..eb9e7e9 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -204,6 +204,13 @@  static void etm_free_aux(void *data)
 	schedule_work(&event_data->work);
 }
 
+static bool sinks_match(struct coresight_device *a, struct coresight_device *b)
+{
+	if (!a || !b)
+		return false;
+	return (sink_ops(a) == sink_ops(b));
+}
+
 static void *etm_setup_aux(struct perf_event *event, void **pages,
 			   int nr_pages, bool overwrite)
 {
@@ -212,6 +219,7 @@  static void *etm_setup_aux(struct perf_event *event, void **pages,
 	cpumask_t *mask;
 	struct coresight_device *sink = NULL;
 	struct etm_event_data *event_data = NULL;
+	bool sink_forced = false;
 
 	event_data = alloc_event_data(cpu);
 	if (!event_data)
@@ -222,6 +230,7 @@  static void *etm_setup_aux(struct perf_event *event, void **pages,
 	if (event->attr.config2) {
 		id = (u32)event->attr.config2;
 		sink = coresight_get_sink_by_id(id);
+		sink_forced = true;
 	}
 
 	mask = &event_data->mask;
@@ -235,7 +244,7 @@  static void *etm_setup_aux(struct perf_event *event, void **pages,
 	 */
 	for_each_cpu(cpu, mask) {
 		struct list_head *path;
-		struct coresight_device *csdev;
+		struct coresight_device *csdev, *new_sink;
 
 		csdev = per_cpu(csdev_src, cpu);
 		/*
@@ -249,21 +258,35 @@  static void *etm_setup_aux(struct perf_event *event, void **pages,
 		}
 
 		/*
-		 * No sink provided - look for a default sink for one of the
-		 * devices. At present we only support topology where all CPUs
-		 * use the same sink [N:1], so only need to find one sink. The
-		 * coresight_build_path later will remove any CPU that does not
-		 * attach to the sink, or if we have not found a sink.
+		 * No sink provided - look for a default sink for all the devices.
+		 * We only support multiple sinks, only if all the default sinks
+		 * are of the same type, so that the sink buffer can be shared
+		 * as the event moves around. We don't trace on a CPU if it can't
+		 *
 		 */
-		if (!sink)
-			sink = coresight_find_default_sink(csdev);
+		if (!sink_forced) {
+			new_sink = coresight_find_default_sink(csdev);
+			if (!new_sink) {
+				cpumask_clear_cpu(cpu, mask);
+				continue;
+			}
+			/* Skip checks for the first sink */
+			if (!sink) {
+			       sink = new_sink;
+			} else if (!sinks_match(new_sink, sink)) {
+				cpumask_clear_cpu(cpu, mask);
+				continue;
+			}
+		} else {
+			new_sink = sink;
+		}
 
 		/*
 		 * Building a path doesn't enable it, it simply builds a
 		 * list of devices from source to sink that can be
 		 * referenced later when the path is actually needed.
 		 */
-		path = coresight_build_path(csdev, sink);
+		path = coresight_build_path(csdev, new_sink);
 		if (IS_ERR(path)) {
 			cpumask_clear_cpu(cpu, mask);
 			continue;
@@ -284,7 +307,12 @@  static void *etm_setup_aux(struct perf_event *event, void **pages,
 	if (!sink_ops(sink)->alloc_buffer || !sink_ops(sink)->free_buffer)
 		goto err;
 
-	/* Allocate the sink buffer for this session */
+	/*
+	 * Allocate the sink buffer for this session. All the sinks
+	 * where this event can be scheduled are ensured to be of the
+	 * same type. Thus the same sink configuration is used by the
+	 * sinks.
+	 */
 	event_data->snk_config =
 			sink_ops(sink)->alloc_buffer(sink, event, pages,
 						     nr_pages, overwrite);