diff mbox series

[v2,22/22] interconnect: qcom: icc-rpm: Fix bandwidth calculations

Message ID 20230526-topic-smd_icc-v2-22-e5934b07d813@linaro.org (mailing list archive)
State Superseded
Headers show
Series Restructure RPM SMD ICC | expand

Commit Message

Konrad Dybcio June 9, 2023, 8:19 p.m. UTC
Up until now, we've been aggregating the bandwidth values and only
dividing them by the bus width of the source node. This was completely
wrong, as different nodes on a given path may (and usually do) have
varying bus widths.  That in turn, resulted in the calculated clock rates
being completely bogus - usually they ended up being much higher, as
NoC_A<->NoC_B links are very wide.

Since we're not using the aggregate bandwidth value for anything other
than clock rate calculations, remodel qcom_icc_bus_aggregate() to
calculate the per-context clock rate for a given provider, taking into
account the bus width of every individual node.

Fixes: 30c8fa3ec61a ("interconnect: qcom: Add MSM8916 interconnect provider driver")
Reported-by: Stephan Gerhold <stephan@gerhold.net>
Signed-off-by: Konrad Dybcio <konrad.dybcio@linaro.org>
---
 drivers/interconnect/qcom/icc-rpm.c | 59 ++++++++++++-------------------------
 1 file changed, 19 insertions(+), 40 deletions(-)

Comments

Stephan Gerhold June 10, 2023, 7:06 p.m. UTC | #1
On Fri, Jun 09, 2023 at 10:19:27PM +0200, Konrad Dybcio wrote:
> Up until now, we've been aggregating the bandwidth values and only
> dividing them by the bus width of the source node. This was completely
> wrong, as different nodes on a given path may (and usually do) have
> varying bus widths.  That in turn, resulted in the calculated clock rates
> being completely bogus - usually they ended up being much higher, as
> NoC_A<->NoC_B links are very wide.
> 
> Since we're not using the aggregate bandwidth value for anything other
> than clock rate calculations, remodel qcom_icc_bus_aggregate() to
> calculate the per-context clock rate for a given provider, taking into
> account the bus width of every individual node.
> 
> Fixes: 30c8fa3ec61a ("interconnect: qcom: Add MSM8916 interconnect provider driver")
> Reported-by: Stephan Gerhold <stephan@gerhold.net>
> Signed-off-by: Konrad Dybcio <konrad.dybcio@linaro.org>
> ---
>  drivers/interconnect/qcom/icc-rpm.c | 59 ++++++++++++-------------------------
>  1 file changed, 19 insertions(+), 40 deletions(-)
> 
> diff --git a/drivers/interconnect/qcom/icc-rpm.c b/drivers/interconnect/qcom/icc-rpm.c
> index 1508233632f6..d177a76abe2a 100644
> --- a/drivers/interconnect/qcom/icc-rpm.c
> +++ b/drivers/interconnect/qcom/icc-rpm.c
> @@ -293,58 +293,44 @@ static int qcom_icc_bw_aggregate(struct icc_node *node, u32 tag, u32 avg_bw,
>  }
>  
>  /**
> - * qcom_icc_bus_aggregate - aggregate bandwidth by traversing all nodes
> + * qcom_icc_bus_aggregate - calculate bus clock rates by traversing all nodes
>   * @provider: generic interconnect provider
> - * @agg_avg: an array for aggregated average bandwidth of buckets
> - * @agg_peak: an array for aggregated peak bandwidth of buckets
> - * @max_agg_avg: pointer to max value of aggregated average bandwidth
> + * @agg_clk_rate: array containing the aggregated clock rates in kHz
>   */
> -static void qcom_icc_bus_aggregate(struct icc_provider *provider,
> -				   u64 *agg_avg, u64 *agg_peak,
> -				   u64 *max_agg_avg)
> +static void qcom_icc_bus_aggregate(struct icc_provider *provider, u64 *agg_clk_rate)
>  {
> -	struct icc_node *node;
> +	u64 agg_avg_rate, agg_rate;
>  	struct qcom_icc_node *qn;
> -	u64 sum_avg[QCOM_SMD_RPM_STATE_NUM];
> +	struct icc_node *node;
>  	int i;
>  
> -	/* Initialise aggregate values */
> -	for (i = 0; i < QCOM_SMD_RPM_STATE_NUM; i++) {
> -		agg_avg[i] = 0;
> -		agg_peak[i] = 0;
> -	}
> -
> -	*max_agg_avg = 0;
> -
>  	/*
> -	 * Iterate nodes on the interconnect and aggregate bandwidth
> -	 * requests for every bucket.
> +	 * Iterate nodes on the provider, aggregate bandwidth requests for
> +	 * every bucket and convert them into bus clock rates.
>  	 */
>  	list_for_each_entry(node, &provider->nodes, node_list) {
>  		qn = node->data;
>  		for (i = 0; i < QCOM_SMD_RPM_STATE_NUM; i++) {
>  			if (qn->channels)
> -				sum_avg[i] = div_u64(qn->sum_avg[i], qn->channels);
> +				agg_avg_rate = div_u64(qn->sum_avg[i], qn->channels);
>  			else
> -				sum_avg[i] = qn->sum_avg[i];
> -			agg_avg[i] += sum_avg[i];
> -			agg_peak[i] = max_t(u64, agg_peak[i], qn->max_peak[i]);
> +				agg_avg_rate = qn->sum_avg[i];
> +
> +			agg_rate = max_t(u64, agg_avg_rate, qn->max_peak[i]);
> +			do_div(agg_rate, qn->buswidth);
> +
> +			agg_clk_rate[i] = max_t(u64, agg_clk_rate[i], agg_rate);
>  		}
>  	}
> -
> -	/* Find maximum values across all buckets */
> -	for (i = 0; i < QCOM_SMD_RPM_STATE_NUM; i++)
> -		*max_agg_avg = max_t(u64, *max_agg_avg, agg_avg[i]);
>  }
>  
>  static int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
>  {
> -	struct qcom_icc_provider *qp;
>  	struct qcom_icc_node *src_qn = NULL, *dst_qn = NULL;
> +	u64 agg_clk_rate[QCOM_SMD_RPM_STATE_NUM] = { 0 };
>  	struct icc_provider *provider;
> +	struct qcom_icc_provider *qp;
>  	u64 active_rate, sleep_rate;
> -	u64 agg_avg[QCOM_SMD_RPM_STATE_NUM], agg_peak[QCOM_SMD_RPM_STATE_NUM];
> -	u64 max_agg_avg;
>  	int ret;
>  
>  	src_qn = src->data;
> @@ -353,7 +339,9 @@ static int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
>  	provider = src->provider;
>  	qp = to_qcom_provider(provider);
>  
> -	qcom_icc_bus_aggregate(provider, agg_avg, agg_peak, &max_agg_avg);
> +	qcom_icc_bus_aggregate(provider, agg_clk_rate);
> +	active_rate = agg_clk_rate[QCOM_SMD_RPM_ACTIVE_STATE];
> +	sleep_rate = agg_clk_rate[QCOM_SMD_RPM_SLEEP_STATE];
>  
>  	ret = qcom_icc_rpm_set(src_qn, src_qn->sum_avg);
>  	if (ret)
> @@ -369,15 +357,6 @@ static int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
>  	if (!qp->bus_clk_desc && !qp->bus_clk)
>  		return 0;
>  
> -	/* Intentionally keep the rates in kHz as that's what RPM accepts */

I kind of liked this comment because otherwise it's not obvious why
you're not converting from "ICC units" anywhere.

Anyway:

Reviewed-by: Stephan Gerhold <stephan@gerhold.net>

Thanks for going through the giant maze to fix this!

Stephan
Konrad Dybcio June 10, 2023, 7:09 p.m. UTC | #2
On 10.06.2023 21:06, Stephan Gerhold wrote:
> On Fri, Jun 09, 2023 at 10:19:27PM +0200, Konrad Dybcio wrote:
>> Up until now, we've been aggregating the bandwidth values and only
>> dividing them by the bus width of the source node. This was completely
>> wrong, as different nodes on a given path may (and usually do) have
>> varying bus widths.  That in turn, resulted in the calculated clock rates
>> being completely bogus - usually they ended up being much higher, as
>> NoC_A<->NoC_B links are very wide.
>>
>> Since we're not using the aggregate bandwidth value for anything other
>> than clock rate calculations, remodel qcom_icc_bus_aggregate() to
>> calculate the per-context clock rate for a given provider, taking into
>> account the bus width of every individual node.
>>
>> Fixes: 30c8fa3ec61a ("interconnect: qcom: Add MSM8916 interconnect provider driver")
>> Reported-by: Stephan Gerhold <stephan@gerhold.net>
>> Signed-off-by: Konrad Dybcio <konrad.dybcio@linaro.org>
>> ---
>>  drivers/interconnect/qcom/icc-rpm.c | 59 ++++++++++++-------------------------
>>  1 file changed, 19 insertions(+), 40 deletions(-)
>>
>> diff --git a/drivers/interconnect/qcom/icc-rpm.c b/drivers/interconnect/qcom/icc-rpm.c
>> index 1508233632f6..d177a76abe2a 100644
>> --- a/drivers/interconnect/qcom/icc-rpm.c
>> +++ b/drivers/interconnect/qcom/icc-rpm.c
>> @@ -293,58 +293,44 @@ static int qcom_icc_bw_aggregate(struct icc_node *node, u32 tag, u32 avg_bw,
>>  }
>>  
>>  /**
>> - * qcom_icc_bus_aggregate - aggregate bandwidth by traversing all nodes
>> + * qcom_icc_bus_aggregate - calculate bus clock rates by traversing all nodes
>>   * @provider: generic interconnect provider
>> - * @agg_avg: an array for aggregated average bandwidth of buckets
>> - * @agg_peak: an array for aggregated peak bandwidth of buckets
>> - * @max_agg_avg: pointer to max value of aggregated average bandwidth
>> + * @agg_clk_rate: array containing the aggregated clock rates in kHz
>>   */
>> -static void qcom_icc_bus_aggregate(struct icc_provider *provider,
>> -				   u64 *agg_avg, u64 *agg_peak,
>> -				   u64 *max_agg_avg)
>> +static void qcom_icc_bus_aggregate(struct icc_provider *provider, u64 *agg_clk_rate)
>>  {
>> -	struct icc_node *node;
>> +	u64 agg_avg_rate, agg_rate;
>>  	struct qcom_icc_node *qn;
>> -	u64 sum_avg[QCOM_SMD_RPM_STATE_NUM];
>> +	struct icc_node *node;
>>  	int i;
>>  
>> -	/* Initialise aggregate values */
>> -	for (i = 0; i < QCOM_SMD_RPM_STATE_NUM; i++) {
>> -		agg_avg[i] = 0;
>> -		agg_peak[i] = 0;
>> -	}
>> -
>> -	*max_agg_avg = 0;
>> -
>>  	/*
>> -	 * Iterate nodes on the interconnect and aggregate bandwidth
>> -	 * requests for every bucket.
>> +	 * Iterate nodes on the provider, aggregate bandwidth requests for
>> +	 * every bucket and convert them into bus clock rates.
>>  	 */
>>  	list_for_each_entry(node, &provider->nodes, node_list) {
>>  		qn = node->data;
>>  		for (i = 0; i < QCOM_SMD_RPM_STATE_NUM; i++) {
>>  			if (qn->channels)
>> -				sum_avg[i] = div_u64(qn->sum_avg[i], qn->channels);
>> +				agg_avg_rate = div_u64(qn->sum_avg[i], qn->channels);
>>  			else
>> -				sum_avg[i] = qn->sum_avg[i];
>> -			agg_avg[i] += sum_avg[i];
>> -			agg_peak[i] = max_t(u64, agg_peak[i], qn->max_peak[i]);
>> +				agg_avg_rate = qn->sum_avg[i];
>> +
>> +			agg_rate = max_t(u64, agg_avg_rate, qn->max_peak[i]);
>> +			do_div(agg_rate, qn->buswidth);
>> +
>> +			agg_clk_rate[i] = max_t(u64, agg_clk_rate[i], agg_rate);
>>  		}
>>  	}
>> -
>> -	/* Find maximum values across all buckets */
>> -	for (i = 0; i < QCOM_SMD_RPM_STATE_NUM; i++)
>> -		*max_agg_avg = max_t(u64, *max_agg_avg, agg_avg[i]);
>>  }
>>  
>>  static int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
>>  {
>> -	struct qcom_icc_provider *qp;
>>  	struct qcom_icc_node *src_qn = NULL, *dst_qn = NULL;
>> +	u64 agg_clk_rate[QCOM_SMD_RPM_STATE_NUM] = { 0 };
>>  	struct icc_provider *provider;
>> +	struct qcom_icc_provider *qp;
>>  	u64 active_rate, sleep_rate;
>> -	u64 agg_avg[QCOM_SMD_RPM_STATE_NUM], agg_peak[QCOM_SMD_RPM_STATE_NUM];
>> -	u64 max_agg_avg;
>>  	int ret;
>>  
>>  	src_qn = src->data;
>> @@ -353,7 +339,9 @@ static int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
>>  	provider = src->provider;
>>  	qp = to_qcom_provider(provider);
>>  
>> -	qcom_icc_bus_aggregate(provider, agg_avg, agg_peak, &max_agg_avg);
>> +	qcom_icc_bus_aggregate(provider, agg_clk_rate);
>> +	active_rate = agg_clk_rate[QCOM_SMD_RPM_ACTIVE_STATE];
>> +	sleep_rate = agg_clk_rate[QCOM_SMD_RPM_SLEEP_STATE];
>>  
>>  	ret = qcom_icc_rpm_set(src_qn, src_qn->sum_avg);
>>  	if (ret)
>> @@ -369,15 +357,6 @@ static int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
>>  	if (!qp->bus_clk_desc && !qp->bus_clk)
>>  		return 0;
>>  
>> -	/* Intentionally keep the rates in kHz as that's what RPM accepts */
> 
> I kind of liked this comment because otherwise it's not obvious why
> you're not converting from "ICC units" anywhere.
I figured the kerneldoc change would be enough:

@agg_clk_rate: array containing the aggregated clock rates in kHz

> 
> Anyway:
> 
> Reviewed-by: Stephan Gerhold <stephan@gerhold.net>
> 
> Thanks for going through the giant maze to fix this!
Thanks for being there along the way.. This spaghetti is far too much
for a single human..

Konrad

> 
> Stephan
diff mbox series

Patch

diff --git a/drivers/interconnect/qcom/icc-rpm.c b/drivers/interconnect/qcom/icc-rpm.c
index 1508233632f6..d177a76abe2a 100644
--- a/drivers/interconnect/qcom/icc-rpm.c
+++ b/drivers/interconnect/qcom/icc-rpm.c
@@ -293,58 +293,44 @@  static int qcom_icc_bw_aggregate(struct icc_node *node, u32 tag, u32 avg_bw,
 }
 
 /**
- * qcom_icc_bus_aggregate - aggregate bandwidth by traversing all nodes
+ * qcom_icc_bus_aggregate - calculate bus clock rates by traversing all nodes
  * @provider: generic interconnect provider
- * @agg_avg: an array for aggregated average bandwidth of buckets
- * @agg_peak: an array for aggregated peak bandwidth of buckets
- * @max_agg_avg: pointer to max value of aggregated average bandwidth
+ * @agg_clk_rate: array containing the aggregated clock rates in kHz
  */
-static void qcom_icc_bus_aggregate(struct icc_provider *provider,
-				   u64 *agg_avg, u64 *agg_peak,
-				   u64 *max_agg_avg)
+static void qcom_icc_bus_aggregate(struct icc_provider *provider, u64 *agg_clk_rate)
 {
-	struct icc_node *node;
+	u64 agg_avg_rate, agg_rate;
 	struct qcom_icc_node *qn;
-	u64 sum_avg[QCOM_SMD_RPM_STATE_NUM];
+	struct icc_node *node;
 	int i;
 
-	/* Initialise aggregate values */
-	for (i = 0; i < QCOM_SMD_RPM_STATE_NUM; i++) {
-		agg_avg[i] = 0;
-		agg_peak[i] = 0;
-	}
-
-	*max_agg_avg = 0;
-
 	/*
-	 * Iterate nodes on the interconnect and aggregate bandwidth
-	 * requests for every bucket.
+	 * Iterate nodes on the provider, aggregate bandwidth requests for
+	 * every bucket and convert them into bus clock rates.
 	 */
 	list_for_each_entry(node, &provider->nodes, node_list) {
 		qn = node->data;
 		for (i = 0; i < QCOM_SMD_RPM_STATE_NUM; i++) {
 			if (qn->channels)
-				sum_avg[i] = div_u64(qn->sum_avg[i], qn->channels);
+				agg_avg_rate = div_u64(qn->sum_avg[i], qn->channels);
 			else
-				sum_avg[i] = qn->sum_avg[i];
-			agg_avg[i] += sum_avg[i];
-			agg_peak[i] = max_t(u64, agg_peak[i], qn->max_peak[i]);
+				agg_avg_rate = qn->sum_avg[i];
+
+			agg_rate = max_t(u64, agg_avg_rate, qn->max_peak[i]);
+			do_div(agg_rate, qn->buswidth);
+
+			agg_clk_rate[i] = max_t(u64, agg_clk_rate[i], agg_rate);
 		}
 	}
-
-	/* Find maximum values across all buckets */
-	for (i = 0; i < QCOM_SMD_RPM_STATE_NUM; i++)
-		*max_agg_avg = max_t(u64, *max_agg_avg, agg_avg[i]);
 }
 
 static int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
 {
-	struct qcom_icc_provider *qp;
 	struct qcom_icc_node *src_qn = NULL, *dst_qn = NULL;
+	u64 agg_clk_rate[QCOM_SMD_RPM_STATE_NUM] = { 0 };
 	struct icc_provider *provider;
+	struct qcom_icc_provider *qp;
 	u64 active_rate, sleep_rate;
-	u64 agg_avg[QCOM_SMD_RPM_STATE_NUM], agg_peak[QCOM_SMD_RPM_STATE_NUM];
-	u64 max_agg_avg;
 	int ret;
 
 	src_qn = src->data;
@@ -353,7 +339,9 @@  static int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
 	provider = src->provider;
 	qp = to_qcom_provider(provider);
 
-	qcom_icc_bus_aggregate(provider, agg_avg, agg_peak, &max_agg_avg);
+	qcom_icc_bus_aggregate(provider, agg_clk_rate);
+	active_rate = agg_clk_rate[QCOM_SMD_RPM_ACTIVE_STATE];
+	sleep_rate = agg_clk_rate[QCOM_SMD_RPM_SLEEP_STATE];
 
 	ret = qcom_icc_rpm_set(src_qn, src_qn->sum_avg);
 	if (ret)
@@ -369,15 +357,6 @@  static int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
 	if (!qp->bus_clk_desc && !qp->bus_clk)
 		return 0;
 
-	/* Intentionally keep the rates in kHz as that's what RPM accepts */
-	active_rate = max(agg_avg[QCOM_SMD_RPM_ACTIVE_STATE],
-			  agg_peak[QCOM_SMD_RPM_ACTIVE_STATE]);
-	do_div(active_rate, src_qn->buswidth);
-
-	sleep_rate = max(agg_avg[QCOM_SMD_RPM_SLEEP_STATE],
-			 agg_peak[QCOM_SMD_RPM_SLEEP_STATE]);
-	do_div(sleep_rate, src_qn->buswidth);
-
 	/*
 	 * Downstream checks whether the requested rate is zero, but it makes little sense
 	 * to vote for a value that's below the lower threshold, so let's not do so.