diff mbox series

perf: arm_pmuv3: Use BR_RETIRED for HW branch event if enabled

Message ID 20240905204732.20546-1-ilkka@os.amperecomputing.com (mailing list archive)
State New, archived
Headers show
Series perf: arm_pmuv3: Use BR_RETIRED for HW branch event if enabled | expand

Commit Message

Ilkka Koskinen Sept. 5, 2024, 8:47 p.m. UTC
The PMU driver attempts to use PC_WRITE_RETIRED for the HW branch event,
if enabled. However, PC_WRITE_RETIRED counts only taken branches,
whereas BR_RETIRED counts also non-taken ones.

Furthermore, perf uses HW branch event to calculate branch misses ratio,
implying BR_RETIRED is the correct event to count.

Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
---
 drivers/perf/arm_pmuv3.c | 27 ++++-----------------------
 1 file changed, 4 insertions(+), 23 deletions(-)

Comments

Anshuman Khandual Sept. 6, 2024, 4:58 a.m. UTC | #1
On 9/6/24 02:17, Ilkka Koskinen wrote:
> The PMU driver attempts to use PC_WRITE_RETIRED for the HW branch event,
> if enabled. However, PC_WRITE_RETIRED counts only taken branches,
> whereas BR_RETIRED counts also non-taken ones.
> 
> Furthermore, perf uses HW branch event to calculate branch misses ratio,
> implying BR_RETIRED is the correct event to count.
But is the event BR_RETIRED always guaranteed to be available. Should not
armpmu->pmceid_bitmap be checked first ?

> 
> Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
> ---
>  drivers/perf/arm_pmuv3.c | 27 ++++-----------------------
>  1 file changed, 4 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
> index d246840797b6..a8ed08df1411 100644
> --- a/drivers/perf/arm_pmuv3.c
> +++ b/drivers/perf/arm_pmuv3.c
> @@ -46,6 +46,7 @@ static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = {
>  	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV8_PMUV3_PERFCTR_INST_RETIRED,
>  	[PERF_COUNT_HW_CACHE_REFERENCES]	= ARMV8_PMUV3_PERFCTR_L1D_CACHE,
>  	[PERF_COUNT_HW_CACHE_MISSES]		= ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
> +	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= ARMV8_PMUV3_PERFCTR_BR_RETIRED,
>  	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
>  	[PERF_COUNT_HW_BUS_CYCLES]		= ARMV8_PMUV3_PERFCTR_BUS_CYCLES,
>  	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= ARMV8_PMUV3_PERFCTR_STALL_FRONTEND,
> @@ -1083,28 +1084,6 @@ static void armv8pmu_reset(void *info)
>  	armv8pmu_pmcr_write(pmcr);
>  }
>  
> -static int __armv8_pmuv3_map_event_id(struct arm_pmu *armpmu,
> -				      struct perf_event *event)
> -{
> -	if (event->attr.type == PERF_TYPE_HARDWARE &&
> -	    event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) {
> -
> -		if (test_bit(ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED,
> -			     armpmu->pmceid_bitmap))
> -			return ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED;
> -
> -		if (test_bit(ARMV8_PMUV3_PERFCTR_BR_RETIRED,
> -			     armpmu->pmceid_bitmap))
> -			return ARMV8_PMUV3_PERFCTR_BR_RETIRED;

If BR_RETIRED event is absent on the platform, PC_WRITE_RETIRED still remains
a good alternative to fallback on. Hence wondering if the above order could
just be changed to use BR_RETIRED first when available.

> -
> -		return HW_OP_UNSUPPORTED;
> -	}
> -
> -	return armpmu_map_event(event, &armv8_pmuv3_perf_map,
> -				&armv8_pmuv3_perf_cache_map,
> -				ARMV8_PMU_EVTYPE_EVENT);
> -}
> -
>  static int __armv8_pmuv3_map_event(struct perf_event *event,
>  				   const unsigned (*extra_event_map)
>  						  [PERF_COUNT_HW_MAX],
> @@ -1116,7 +1095,9 @@ static int __armv8_pmuv3_map_event(struct perf_event *event,
>  	int hw_event_id;
>  	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
>  
> -	hw_event_id = __armv8_pmuv3_map_event_id(armpmu, event);
> +	hw_event_id = armpmu_map_event(event, &armv8_pmuv3_perf_map,
> +				       &armv8_pmuv3_perf_cache_map,
> +				       ARMV8_PMU_EVTYPE_EVENT);
>  
>  	/*
>  	 * CHAIN events only work when paired with an adjacent counter, and it
Ilkka Koskinen Sept. 6, 2024, 6:28 p.m. UTC | #2
On Fri, 6 Sep 2024, Anshuman Khandual wrote:
> On 9/6/24 02:17, Ilkka Koskinen wrote:
>> The PMU driver attempts to use PC_WRITE_RETIRED for the HW branch event,
>> if enabled. However, PC_WRITE_RETIRED counts only taken branches,
>> whereas BR_RETIRED counts also non-taken ones.
>>
>> Furthermore, perf uses HW branch event to calculate branch misses ratio,
>> implying BR_RETIRED is the correct event to count.
> But is the event BR_RETIRED always guaranteed to be available. Should not
> armpmu->pmceid_bitmap be checked first ?
>
>>
>> Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
>> ---
>>  drivers/perf/arm_pmuv3.c | 27 ++++-----------------------
>>  1 file changed, 4 insertions(+), 23 deletions(-)
>>
>> diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
>> index d246840797b6..a8ed08df1411 100644
>> --- a/drivers/perf/arm_pmuv3.c
>> +++ b/drivers/perf/arm_pmuv3.c
>> @@ -46,6 +46,7 @@ static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = {
>>  	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV8_PMUV3_PERFCTR_INST_RETIRED,
>>  	[PERF_COUNT_HW_CACHE_REFERENCES]	= ARMV8_PMUV3_PERFCTR_L1D_CACHE,
>>  	[PERF_COUNT_HW_CACHE_MISSES]		= ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
>> +	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= ARMV8_PMUV3_PERFCTR_BR_RETIRED,
>>  	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
>>  	[PERF_COUNT_HW_BUS_CYCLES]		= ARMV8_PMUV3_PERFCTR_BUS_CYCLES,
>>  	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= ARMV8_PMUV3_PERFCTR_STALL_FRONTEND,
>> @@ -1083,28 +1084,6 @@ static void armv8pmu_reset(void *info)
>>  	armv8pmu_pmcr_write(pmcr);
>>  }
>>
>> -static int __armv8_pmuv3_map_event_id(struct arm_pmu *armpmu,
>> -				      struct perf_event *event)
>> -{
>> -	if (event->attr.type == PERF_TYPE_HARDWARE &&
>> -	    event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) {
>> -
>> -		if (test_bit(ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED,
>> -			     armpmu->pmceid_bitmap))
>> -			return ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED;
>> -
>> -		if (test_bit(ARMV8_PMUV3_PERFCTR_BR_RETIRED,
>> -			     armpmu->pmceid_bitmap))
>> -			return ARMV8_PMUV3_PERFCTR_BR_RETIRED;
>
> If BR_RETIRED event is absent on the platform, PC_WRITE_RETIRED still remains
> a good alternative to fallback on. Hence wondering if the above order could
> just be changed to use BR_RETIRED first when available.

If PC_WRITE_RETIRED is a good alternative, then I guess, it's just better 
change the order of those two. I send a new version just doing that.

Cheers, Ilkka
diff mbox series

Patch

diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
index d246840797b6..a8ed08df1411 100644
--- a/drivers/perf/arm_pmuv3.c
+++ b/drivers/perf/arm_pmuv3.c
@@ -46,6 +46,7 @@  static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = {
 	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV8_PMUV3_PERFCTR_INST_RETIRED,
 	[PERF_COUNT_HW_CACHE_REFERENCES]	= ARMV8_PMUV3_PERFCTR_L1D_CACHE,
 	[PERF_COUNT_HW_CACHE_MISSES]		= ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= ARMV8_PMUV3_PERFCTR_BR_RETIRED,
 	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
 	[PERF_COUNT_HW_BUS_CYCLES]		= ARMV8_PMUV3_PERFCTR_BUS_CYCLES,
 	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= ARMV8_PMUV3_PERFCTR_STALL_FRONTEND,
@@ -1083,28 +1084,6 @@  static void armv8pmu_reset(void *info)
 	armv8pmu_pmcr_write(pmcr);
 }
 
-static int __armv8_pmuv3_map_event_id(struct arm_pmu *armpmu,
-				      struct perf_event *event)
-{
-	if (event->attr.type == PERF_TYPE_HARDWARE &&
-	    event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) {
-
-		if (test_bit(ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED,
-			     armpmu->pmceid_bitmap))
-			return ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED;
-
-		if (test_bit(ARMV8_PMUV3_PERFCTR_BR_RETIRED,
-			     armpmu->pmceid_bitmap))
-			return ARMV8_PMUV3_PERFCTR_BR_RETIRED;
-
-		return HW_OP_UNSUPPORTED;
-	}
-
-	return armpmu_map_event(event, &armv8_pmuv3_perf_map,
-				&armv8_pmuv3_perf_cache_map,
-				ARMV8_PMU_EVTYPE_EVENT);
-}
-
 static int __armv8_pmuv3_map_event(struct perf_event *event,
 				   const unsigned (*extra_event_map)
 						  [PERF_COUNT_HW_MAX],
@@ -1116,7 +1095,9 @@  static int __armv8_pmuv3_map_event(struct perf_event *event,
 	int hw_event_id;
 	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 
-	hw_event_id = __armv8_pmuv3_map_event_id(armpmu, event);
+	hw_event_id = armpmu_map_event(event, &armv8_pmuv3_perf_map,
+				       &armv8_pmuv3_perf_cache_map,
+				       ARMV8_PMU_EVTYPE_EVENT);
 
 	/*
 	 * CHAIN events only work when paired with an adjacent counter, and it