[5/5] cpuidle-pseries: Block Extended CEDE(1) which adds no additional value.
diff mbox series

Message ID 1594120299-31389-6-git-send-email-ego@linux.vnet.ibm.com
State Not Applicable, archived
Headers show
Series
  • cpuidle-pseries: Parse extended CEDE information for idle.
Related show

Commit Message

Gautham R. Shenoy July 7, 2020, 11:11 a.m. UTC
From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>

The Extended CEDE state with latency-hint = 1 is only different from
normal CEDE (with latency-hint = 0) in that a CPU in Extended CEDE(1)
does not wakeup on timer events. Both CEDE and Extended CEDE(1) map to
the same hardware idle state. Since we already get SMT folding from
the normal CEDE, the Extended CEDE(1) doesn't provide any additional
value. This patch blocks Extended CEDE(1).

Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
---
 drivers/cpuidle/cpuidle-pseries.c | 57 ++++++++++++++++++++++++++++++++++++---
 1 file changed, 54 insertions(+), 3 deletions(-)

Comments

Vaidyanathan Srinivasan July 20, 2020, 6:34 a.m. UTC | #1
* Gautham R Shenoy <ego@linux.vnet.ibm.com> [2020-07-07 16:41:39]:

> From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
> 
> The Extended CEDE state with latency-hint = 1 is only different from
> normal CEDE (with latency-hint = 0) in that a CPU in Extended CEDE(1)
> does not wakeup on timer events. Both CEDE and Extended CEDE(1) map to
> the same hardware idle state. Since we already get SMT folding from
> the normal CEDE, the Extended CEDE(1) doesn't provide any additional
> value. This patch blocks Extended CEDE(1).
> 
> Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>

Reviewed-by: Vaidyanathan Srinivasan <svaidy@linux.ibm.com>

> ---
>  drivers/cpuidle/cpuidle-pseries.c | 57 ++++++++++++++++++++++++++++++++++++---
>  1 file changed, 54 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c
> index 6f893cd..be0b8b2 100644
> --- a/drivers/cpuidle/cpuidle-pseries.c
> +++ b/drivers/cpuidle/cpuidle-pseries.c
> @@ -350,6 +350,43 @@ static int pseries_cpuidle_driver_init(void)
>  	return 0;
>  }
> 
> +#define XCEDE1_HINT	1
> +#define ERR_NO_VALUE_ADD	(-1)
> +#define ERR_NO_EE_WAKEUP	(-2)
> +
> +/*
> + * Returns 0 if the Extende CEDE state with @hint is not blocked in
> + * cpuidle framework.
> + *
> + * Returns ERR_NO_EE_WAKEUP if the Extended CEDE state is blocked due
> + * to not being responsive to external interrupts.
> + *
> + * Returns ERR_NO_VALUE_ADD if the Extended CEDE state does not provide
> + * added value addition over the normal CEDE.
> + */
> +static int cpuidle_xcede_blocked(u8 hint, u64 latency_us, u8 responsive_to_irqs)
> +{
> +
> +	/*
> +	 * We will only allow extended CEDE states that are responsive
> +	 * to irqs do not require an H_PROD to be woken up.
> +	 */
> +	if (!responsive_to_irqs)
> +		return ERR_NO_EE_WAKEUP;
> +
> +	/*
> +	 * We already obtain SMT folding benefits from CEDE (which is
> +	 * CEDE with hint 0). Furthermore, CEDE is also responsive to
> +	 * timer-events, while XCEDE1 requires an external
> +	 * interrupt/H_PROD to be woken up. Hence, block XCEDE1 since
> +	 * it adds no further value.
> +	 */
> +	if (hint == XCEDE1_HINT)
> +		return ERR_NO_VALUE_ADD;
> +
> +	return 0;
> +}
> +
>  static int add_pseries_idle_states(void)
>  {
>  	int nr_states = 2; /* By default we have snooze, CEDE */
> @@ -365,15 +402,29 @@ static int add_pseries_idle_states(void)
>  		char name[CPUIDLE_NAME_LEN];
>  		unsigned int latency_hint = xcede_records[i].latency_hint;
>  		u64 residency_us;
> +		int rc;
> +
> +		if (latency_us < min_latency_us)
> +			min_latency_us = latency_us;
> +
> +		rc = cpuidle_xcede_blocked(latency_hint, latency_us,
> +					   xcede_records[i].responsive_to_irqs);
> 
> -		if (!xcede_records[i].responsive_to_irqs) {
> +		if (rc) {
> +			switch (rc) {
> +			case ERR_NO_VALUE_ADD:
> +				pr_info("cpuidle : Skipping XCEDE%d. No additional value-add\n",
> +					latency_hint);
> +				break;
> +			case ERR_NO_EE_WAKEUP:
>  			pr_info("cpuidle : Skipping XCEDE%d. Not responsive to IRQs\n",
>  				latency_hint);
> +			break;
> +			}
> +
>  			continue;
>  		}
> 
> -		if (latency_us < min_latency_us)
> -			min_latency_us = latency_us;
>  		snprintf(name, CPUIDLE_NAME_LEN, "XCEDE%d", latency_hint);
> 
>  		/*


We need these heuristics to select/reject idle states exposed by
platform firmware to Linux primarily because not all states are really
useful to Linux on a given setup.

--Vaidy

Patch
diff mbox series

diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c
index 6f893cd..be0b8b2 100644
--- a/drivers/cpuidle/cpuidle-pseries.c
+++ b/drivers/cpuidle/cpuidle-pseries.c
@@ -350,6 +350,43 @@  static int pseries_cpuidle_driver_init(void)
 	return 0;
 }
 
+#define XCEDE1_HINT	1
+#define ERR_NO_VALUE_ADD	(-1)
+#define ERR_NO_EE_WAKEUP	(-2)
+
+/*
+ * Returns 0 if the Extende CEDE state with @hint is not blocked in
+ * cpuidle framework.
+ *
+ * Returns ERR_NO_EE_WAKEUP if the Extended CEDE state is blocked due
+ * to not being responsive to external interrupts.
+ *
+ * Returns ERR_NO_VALUE_ADD if the Extended CEDE state does not provide
+ * added value addition over the normal CEDE.
+ */
+static int cpuidle_xcede_blocked(u8 hint, u64 latency_us, u8 responsive_to_irqs)
+{
+
+	/*
+	 * We will only allow extended CEDE states that are responsive
+	 * to irqs do not require an H_PROD to be woken up.
+	 */
+	if (!responsive_to_irqs)
+		return ERR_NO_EE_WAKEUP;
+
+	/*
+	 * We already obtain SMT folding benefits from CEDE (which is
+	 * CEDE with hint 0). Furthermore, CEDE is also responsive to
+	 * timer-events, while XCEDE1 requires an external
+	 * interrupt/H_PROD to be woken up. Hence, block XCEDE1 since
+	 * it adds no further value.
+	 */
+	if (hint == XCEDE1_HINT)
+		return ERR_NO_VALUE_ADD;
+
+	return 0;
+}
+
 static int add_pseries_idle_states(void)
 {
 	int nr_states = 2; /* By default we have snooze, CEDE */
@@ -365,15 +402,29 @@  static int add_pseries_idle_states(void)
 		char name[CPUIDLE_NAME_LEN];
 		unsigned int latency_hint = xcede_records[i].latency_hint;
 		u64 residency_us;
+		int rc;
+
+		if (latency_us < min_latency_us)
+			min_latency_us = latency_us;
+
+		rc = cpuidle_xcede_blocked(latency_hint, latency_us,
+					   xcede_records[i].responsive_to_irqs);
 
-		if (!xcede_records[i].responsive_to_irqs) {
+		if (rc) {
+			switch (rc) {
+			case ERR_NO_VALUE_ADD:
+				pr_info("cpuidle : Skipping XCEDE%d. No additional value-add\n",
+					latency_hint);
+				break;
+			case ERR_NO_EE_WAKEUP:
 			pr_info("cpuidle : Skipping XCEDE%d. Not responsive to IRQs\n",
 				latency_hint);
+			break;
+			}
+
 			continue;
 		}
 
-		if (latency_us < min_latency_us)
-			min_latency_us = latency_us;
 		snprintf(name, CPUIDLE_NAME_LEN, "XCEDE%d", latency_hint);
 
 		/*