diff mbox series

[3/3] perf/arm-cmn: Enable per-DTC counter allocation

Message ID 849f65566582cb102c6d0843d0f26e231180f8ac.1697824215.git.robin.murphy@arm.com (mailing list archive)
State New, archived
Headers show
Series perf/arm-cmn: Multi-DTC improvements | expand

Commit Message

Robin Murphy Oct. 20, 2023, 5:51 p.m. UTC
Finally enable independent per-DTC-domain counter allocation, except on
CMN-600 where we still need to cope with not knowing the domain topology
and thus keep counter indices sychronised across domains. This allows
users to simultaneously count up to 8 targeted events per domain, rather
than 8 globally, for up to 4x wider coverage on maximum configurations.

Even though this now looks deceptively simple, I stand by my previous
assertion that it was a flippin' nightmare to implement; all the real
head-scratchers are hidden in the foundations in the previous patch...

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---
 drivers/perf/arm-cmn.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

Comments

Ilkka Koskinen Oct. 20, 2023, 11 p.m. UTC | #1
Hi Robin,

It seems that I somehow managed to reply to patch 2/3 second time with the 
comments that were supposed to be here... :(

On Fri, 20 Oct 2023, Robin Murphy wrote:
> Finally enable independent per-DTC-domain counter allocation, except on
> CMN-600 where we still need to cope with not knowing the domain topology
> and thus keep counter indices sychronised across domains. This allows
> users to simultaneously count up to 8 targeted events per domain, rather
> than 8 globally, for up to 4x wider coverage on maximum configurations.
>
> Even though this now looks deceptively simple, I stand by my previous
> assertion that it was a flippin' nightmare to implement; all the real
> head-scratchers are hidden in the foundations in the previous patch...
>
> Signed-off-by: Robin Murphy <robin.murphy@arm.com>

Thanks! I had that on my task list but never had time to start working on
it.

Reviewed-by: Ilkka Koskinen <ilkka at os.amperecomputing.com>

Cheers, Ilkka


> ---
> drivers/perf/arm-cmn.c | 18 ++++++++++--------
> 1 file changed, 10 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
> index 675f1638013e..9479e919c063 100644
> --- a/drivers/perf/arm-cmn.c
> +++ b/drivers/perf/arm-cmn.c
> @@ -1570,7 +1570,7 @@ struct arm_cmn_val {
> 	u8 dtm_count[CMN_MAX_DTMS];
> 	u8 occupid[CMN_MAX_DTMS][SEL_MAX];
> 	u8 wp[CMN_MAX_DTMS][4];
> -	int dtc_count;
> +	int dtc_count[CMN_MAX_DTCS];
> 	bool cycles;
> };
>
> @@ -1591,7 +1591,8 @@ static void arm_cmn_val_add_event(struct arm_cmn *cmn, struct arm_cmn_val *val,
> 		return;
> 	}
>
> -	val->dtc_count++;
> +	for_each_hw_dtc_idx(hw, dtc, idx)
> +		val->dtc_count[dtc]++;
>
> 	for_each_hw_dn(hw, dn, i) {
> 		int wp_idx, dtm = dn->dtm, sel = hw->filter_sel;
> @@ -1638,8 +1639,9 @@ static int arm_cmn_validate_group(struct arm_cmn *cmn, struct perf_event *event)
> 		goto done;
> 	}
>
> -	if (val->dtc_count == CMN_DT_NUM_COUNTERS)
> -		goto done;
> +	for (i = 0; i < CMN_MAX_DTCS; i++)
> +		if (val->dtc_count[i] == CMN_DT_NUM_COUNTERS)
> +			goto done;
>
> 	for_each_hw_dn(hw, dn, i) {
> 		int wp_idx, wp_cmb, dtm = dn->dtm, sel = hw->filter_sel;
> @@ -1806,9 +1808,9 @@ static int arm_cmn_event_add(struct perf_event *event, int flags)
> 		return 0;
> 	}
>
> -	/* Grab a free global counter first... */
> +	/* Grab the global counters first... */
> 	for_each_hw_dtc_idx(hw, j, idx) {
> -		if (j > 0) {
> +		if (cmn->part == PART_CMN600 && j > 0) {
> 			idx = hw->dtc_idx[0];
> 		} else {
> 			idx = 0;
> @@ -1819,10 +1821,10 @@ static int arm_cmn_event_add(struct perf_event *event, int flags)
> 		hw->dtc_idx[j] = idx;
> 	}
>
> -	/* ...then the local counters to feed it. */
> +	/* ...then the local counters to feed them */
> 	for_each_hw_dn(hw, dn, i) {
> 		struct arm_cmn_dtm *dtm = &cmn->dtms[dn->dtm] + hw->dtm_offset;
> -		unsigned int dtm_idx, shift, d = 0;
> +		unsigned int dtm_idx, shift, d = max_t(int, dn->dtc, 0);
> 		u64 reg;
>
> 		dtm_idx = 0;
> -- 
> 2.39.2.101.g768bb238c484.dirty
>
>
diff mbox series

Patch

diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index 675f1638013e..9479e919c063 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -1570,7 +1570,7 @@  struct arm_cmn_val {
 	u8 dtm_count[CMN_MAX_DTMS];
 	u8 occupid[CMN_MAX_DTMS][SEL_MAX];
 	u8 wp[CMN_MAX_DTMS][4];
-	int dtc_count;
+	int dtc_count[CMN_MAX_DTCS];
 	bool cycles;
 };
 
@@ -1591,7 +1591,8 @@  static void arm_cmn_val_add_event(struct arm_cmn *cmn, struct arm_cmn_val *val,
 		return;
 	}
 
-	val->dtc_count++;
+	for_each_hw_dtc_idx(hw, dtc, idx)
+		val->dtc_count[dtc]++;
 
 	for_each_hw_dn(hw, dn, i) {
 		int wp_idx, dtm = dn->dtm, sel = hw->filter_sel;
@@ -1638,8 +1639,9 @@  static int arm_cmn_validate_group(struct arm_cmn *cmn, struct perf_event *event)
 		goto done;
 	}
 
-	if (val->dtc_count == CMN_DT_NUM_COUNTERS)
-		goto done;
+	for (i = 0; i < CMN_MAX_DTCS; i++)
+		if (val->dtc_count[i] == CMN_DT_NUM_COUNTERS)
+			goto done;
 
 	for_each_hw_dn(hw, dn, i) {
 		int wp_idx, wp_cmb, dtm = dn->dtm, sel = hw->filter_sel;
@@ -1806,9 +1808,9 @@  static int arm_cmn_event_add(struct perf_event *event, int flags)
 		return 0;
 	}
 
-	/* Grab a free global counter first... */
+	/* Grab the global counters first... */
 	for_each_hw_dtc_idx(hw, j, idx) {
-		if (j > 0) {
+		if (cmn->part == PART_CMN600 && j > 0) {
 			idx = hw->dtc_idx[0];
 		} else {
 			idx = 0;
@@ -1819,10 +1821,10 @@  static int arm_cmn_event_add(struct perf_event *event, int flags)
 		hw->dtc_idx[j] = idx;
 	}
 
-	/* ...then the local counters to feed it. */
+	/* ...then the local counters to feed them */
 	for_each_hw_dn(hw, dn, i) {
 		struct arm_cmn_dtm *dtm = &cmn->dtms[dn->dtm] + hw->dtm_offset;
-		unsigned int dtm_idx, shift, d = 0;
+		unsigned int dtm_idx, shift, d = max_t(int, dn->dtc, 0);
 		u64 reg;
 
 		dtm_idx = 0;