diff mbox series

[v3,5/7] drivers: firmware: psci: Add hierarchical domain idle states converter

Message ID 1580736940-6985-6-git-send-email-mkshah@codeaurora.org (mailing list archive)
State Not Applicable, archived
Headers show
Series Add RSC power domain support | expand

Commit Message

Maulik Shah Feb. 3, 2020, 1:35 p.m. UTC
From: Ulf Hansson <ulf.hansson@linaro.org>

If the hierarchical CPU topology is used, but the OS initiated mode isn't
supported, we need to rely solely on the regular cpuidle framework to
manage the idle state selection, rather than using genpd and its
governor.

For this reason, introduce a new PSCI DT helper function,
psci_dt_pm_domains_parse_states(), which parses and converts the
hierarchically described domain idle states from DT, into regular flattened
cpuidle states. The converted states are added to the existing cpuidle
driver's array of idle states, which make them available for cpuidle.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
[applied to new path, resolved conflicts]
Signed-off-by: Maulik Shah <mkshah@codeaurora.org>
---
 drivers/cpuidle/cpuidle-psci-domain.c | 137 +++++++++++++++++++++++++++++-----
 drivers/cpuidle/cpuidle-psci.c        |  41 +++++-----
 drivers/cpuidle/cpuidle-psci.h        |  11 +++
 3 files changed, 153 insertions(+), 36 deletions(-)

Comments

Sudeep Holla Feb. 3, 2020, 5:08 p.m. UTC | #1
On Mon, Feb 03, 2020 at 07:05:38PM +0530, Maulik Shah wrote:
> From: Ulf Hansson <ulf.hansson@linaro.org>
>
> If the hierarchical CPU topology is used, but the OS initiated mode isn't
> supported, we need to rely solely on the regular cpuidle framework to
> manage the idle state selection, rather than using genpd and its
> governor.
>
> For this reason, introduce a new PSCI DT helper function,
> psci_dt_pm_domains_parse_states(), which parses and converts the
> hierarchically described domain idle states from DT, into regular flattened
> cpuidle states. The converted states are added to the existing cpuidle
> driver's array of idle states, which make them available for cpuidle.
>

And what's the main motivation for this if OSI is not supported in the
firmware ?

> Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
> [applied to new path, resolved conflicts]
> Signed-off-by: Maulik Shah <mkshah@codeaurora.org>
> ---
>  drivers/cpuidle/cpuidle-psci-domain.c | 137 +++++++++++++++++++++++++++++-----
>  drivers/cpuidle/cpuidle-psci.c        |  41 +++++-----
>  drivers/cpuidle/cpuidle-psci.h        |  11 +++
>  3 files changed, 153 insertions(+), 36 deletions(-)
>
> diff --git a/drivers/cpuidle/cpuidle-psci-domain.c b/drivers/cpuidle/cpuidle-psci-domain.c
> index 423f03b..3c417f7 100644
> --- a/drivers/cpuidle/cpuidle-psci-domain.c
> +++ b/drivers/cpuidle/cpuidle-psci-domain.c
> @@ -26,13 +26,17 @@ struct psci_pd_provider {
>  };
>
>  static LIST_HEAD(psci_pd_providers);
> -static bool osi_mode_enabled __initdata;
> +static bool osi_mode_enabled;
>
>  static int psci_pd_power_off(struct generic_pm_domain *pd)
>  {
>  	struct genpd_power_state *state = &pd->states[pd->state_idx];
>  	u32 *pd_state;
>
> +	/* If we have failed to enable OSI mode, then abort power off. */
> +	if ((psci_has_osi_support()) && !osi_mode_enabled)
> +		return -EBUSY;
> +

Why is this needed ? IIUC we don't create genpd domains if OSI is not
enabled.

>  	if (!state->data)
>  		return 0;
>
> @@ -101,6 +105,105 @@ static void psci_pd_free_states(struct genpd_power_state *states,
>  	kfree(states);
>  }
>
> +static void psci_pd_convert_states(struct cpuidle_state *idle_state,
> +			u32 *psci_state, struct genpd_power_state *state)
> +{
> +	u32 *state_data = state->data;
> +	u64 target_residency_us = state->residency_ns;
> +	u64 exit_latency_us = state->power_on_latency_ns +
> +			state->power_off_latency_ns;
> +
> +	*psci_state = *state_data;
> +	do_div(target_residency_us, 1000);
> +	idle_state->target_residency = target_residency_us;
> +	do_div(exit_latency_us, 1000);
> +	idle_state->exit_latency = exit_latency_us;
> +	idle_state->enter = &psci_enter_domain_idle_state;
> +	idle_state->flags |= CPUIDLE_FLAG_TIMER_STOP;
> +
> +	strncpy(idle_state->name, to_of_node(state->fwnode)->name,
> +		CPUIDLE_NAME_LEN - 1);
> +	strncpy(idle_state->desc, to_of_node(state->fwnode)->name,
> +		CPUIDLE_NAME_LEN - 1);
> +}
> +
> +static bool psci_pd_is_provider(struct device_node *np)
> +{
> +	struct psci_pd_provider *pd_prov, *it;
> +
> +	list_for_each_entry_safe(pd_prov, it, &psci_pd_providers, link) {
> +		if (pd_prov->node == np)
> +			return true;
> +	}
> +
> +	return false;
> +}
> +
> +int __init psci_dt_pm_domains_parse_states(struct cpuidle_driver *drv,
> +			struct device_node *cpu_node, u32 *psci_states)
> +{
> +	struct genpd_power_state *pd_states;
> +	struct of_phandle_args args;
> +	int ret, pd_state_count, i, state_idx, psci_idx;
> +	u32 cpu_psci_state = psci_states[drv->state_count - 1];
> +	struct device_node *np = of_node_get(cpu_node);
> +
> +	/* Walk the CPU topology to find compatible domain idle states. */
> +	while (np) {
> +		ret = of_parse_phandle_with_args(np, "power-domains",
> +					"#power-domain-cells", 0, &args);
> +		of_node_put(np);
> +		if (ret)
> +			return 0;
> +
> +		np = args.np;
> +
> +		/* Verify that the node represents a psci pd provider. */
> +		if (!psci_pd_is_provider(np)) {
> +			of_node_put(np);
> +			return 0;
> +		}
> +
> +		/* Parse for compatible domain idle states. */
> +		ret = psci_pd_parse_states(np, &pd_states, &pd_state_count);
> +		if (ret) {
> +			of_node_put(np);
> +			return ret;
> +		}
> +
> +		i = 0;
> +		while (i < pd_state_count) {
> +
> +			state_idx = drv->state_count;
> +			if (state_idx >= CPUIDLE_STATE_MAX) {
> +				pr_warn("exceeding max cpuidle states\n");
> +				of_node_put(np);
> +				return 0;
> +			}
> +
> +			psci_idx = state_idx + i;
> +			psci_pd_convert_states(&drv->states[state_idx + i],
> +					&psci_states[psci_idx], &pd_states[i]);
> +
> +			/*
> +			 * In the hierarchical CPU topology the master PM domain
> +			 * idle state's DT property, "arm,psci-suspend-param",
> +			 * don't contain the bits for the idle state of the CPU,
> +			 * let's add those here.
> +			 */
> +			psci_states[psci_idx] |= cpu_psci_state;

No we can't do that. Refer previous discussions around that.

> +			pr_debug("psci-power-state %#x index %d\n",
> +				psci_states[psci_idx], psci_idx);
> +
> +			drv->state_count++;
> +			i++;
> +		}
> +		psci_pd_free_states(pd_states, pd_state_count);
> +	}
> +
> +	return 0;
> +}
> +
>  static int __init psci_pd_init(struct device_node *np)
>  {
>  	struct generic_pm_domain *pd;
> @@ -125,11 +228,14 @@ static int __init psci_pd_init(struct device_node *np)
>  	 * Parse the domain idle states and let genpd manage the state selection
>  	 * for those being compatible with "domain-idle-state".
>  	 */
> -	ret = psci_pd_parse_states(np, &states, &state_count);
> -	if (ret)
> -		goto free_name;
>
> -	pd->free_states = psci_pd_free_states;
> +	if (psci_has_osi_support()) {
> +		ret = psci_pd_parse_states(np, &states, &state_count);
> +		if (ret)
> +			goto free_name;
> +		pd->free_states = psci_pd_free_states;
> +	}
> +
>  	pd->name = kbasename(pd->name);
>  	pd->power_off = psci_pd_power_off;
>  	pd->states = states;
> @@ -236,10 +342,6 @@ static int __init psci_idle_init_domains(void)
>  	if (!np)
>  		return -ENODEV;
>
> -	/* Currently limit the hierarchical topology to be used in OSI mode. */
> -	if (!psci_has_osi_support())
> -		goto out;
> -
>  	/*
>  	 * Parse child nodes for the "#power-domain-cells" property and
>  	 * initialize a genpd/genpd-of-provider pair when it's found.
> @@ -265,14 +367,16 @@ static int __init psci_idle_init_domains(void)
>  		goto remove_pd;
>
>  	/* Try to enable OSI mode. */
> -	ret = psci_set_osi_mode();
> -	if (ret) {
> -		pr_warn("failed to enable OSI mode: %d\n", ret);
> -		psci_pd_remove_topology(np);
> -		goto remove_pd;
> +	if (psci_has_osi_support()) {
> +		ret = psci_set_osi_mode();
> +		if (ret) {
> +			pr_warn("failed to enable OSI mode: %d\n", ret);
> +			psci_pd_remove_topology(np);
> +			goto remove_pd;
> +		} else
> +			osi_mode_enabled = true;
>  	}
>
> -	osi_mode_enabled = true;
>  	of_node_put(np);
>  	pr_info("Initialized CPU PM domain topology\n");
>  	return pd_count;
> @@ -293,9 +397,6 @@ struct device __init *psci_dt_attach_cpu(int cpu)
>  {
>  	struct device *dev;
>
> -	if (!osi_mode_enabled)
> -		return NULL;
> -
>  	dev = dev_pm_domain_attach_by_name(get_cpu_device(cpu), "psci");
>  	if (IS_ERR_OR_NULL(dev))
>  		return dev;
> diff --git a/drivers/cpuidle/cpuidle-psci.c b/drivers/cpuidle/cpuidle-psci.c
> index edd7a54..3fa2aee 100644
> --- a/drivers/cpuidle/cpuidle-psci.c
> +++ b/drivers/cpuidle/cpuidle-psci.c
> @@ -49,7 +49,7 @@ static inline int psci_enter_state(int idx, u32 state)
>  	return CPU_PM_CPU_IDLE_ENTER_PARAM(psci_cpu_suspend_enter, idx, state);
>  }
>
> -static int psci_enter_domain_idle_state(struct cpuidle_device *dev,
> +int psci_enter_domain_idle_state(struct cpuidle_device *dev,
>  					struct cpuidle_driver *drv, int idx)
>  {
>  	struct psci_cpuidle_data *data = this_cpu_ptr(&psci_cpuidle_data);
> @@ -193,24 +193,29 @@ static int __init psci_dt_cpu_init_idle(struct cpuidle_driver *drv,
>  		goto free_mem;
>  	}
>
> -	/* Currently limit the hierarchical topology to be used in OSI mode. */
> -	if (psci_has_osi_support()) {
> -		data->dev = psci_dt_attach_cpu(cpu);
> -		if (IS_ERR(data->dev)) {
> -			ret = PTR_ERR(data->dev);
> +	if (!psci_has_osi_support()) {
> +		ret = psci_dt_pm_domains_parse_states(drv, cpu_node,
> +					      psci_states);
> +		if (ret)
>  			goto free_mem;
> -		}
> -
> -		/*
> -		 * Using the deepest state for the CPU to trigger a potential
> -		 * selection of a shared state for the domain, assumes the
> -		 * domain states are all deeper states.
> -		 */
> -		if (data->dev) {
> -			drv->states[state_count - 1].enter =
> -				psci_enter_domain_idle_state;
> -			psci_cpuidle_use_cpuhp = true;
> -		}
> +	}
> +
> +	data->dev = psci_dt_attach_cpu(cpu);
> +	if (IS_ERR(data->dev)) {
> +		ret = PTR_ERR(data->dev);
> +		goto free_mem;
> +	}
> +
> +	/*
> +	 * Using the deepest state for the CPU to trigger a potential
> +	 * selection of a shared state for the domain, assumes the
> +	 * domain states are all deeper states.
> +	 */
> +
> +	if (data->dev) {
> +		drv->states[state_count - 1].enter =
> +			psci_enter_domain_idle_state;
> +		psci_cpuidle_use_cpuhp = true;
>  	}
>
>  	/* Idle states parsed correctly, store them in the per-cpu struct. */

--
Regards,
Sudeep
Maulik Shah Feb. 4, 2020, 4:52 a.m. UTC | #2
On 2/3/2020 10:38 PM, Sudeep Holla wrote:
> On Mon, Feb 03, 2020 at 07:05:38PM +0530, Maulik Shah wrote:
>> From: Ulf Hansson <ulf.hansson@linaro.org>
>>
>> If the hierarchical CPU topology is used, but the OS initiated mode isn't
>> supported, we need to rely solely on the regular cpuidle framework to
>> manage the idle state selection, rather than using genpd and its
>> governor.
>>
>> For this reason, introduce a new PSCI DT helper function,
>> psci_dt_pm_domains_parse_states(), which parses and converts the
>> hierarchically described domain idle states from DT, into regular flattened
>> cpuidle states. The converted states are added to the existing cpuidle
>> driver's array of idle states, which make them available for cpuidle.
>>
> And what's the main motivation for this if OSI is not supported in the
> firmware ?

Hi Sudeep,

Main motivation is to do last-man activities before the CPU cluster can 
enter a deep idle state.

>> Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
>> [applied to new path, resolved conflicts]
>> Signed-off-by: Maulik Shah <mkshah@codeaurora.org>
>> ---
>>   drivers/cpuidle/cpuidle-psci-domain.c | 137 +++++++++++++++++++++++++++++-----
>>   drivers/cpuidle/cpuidle-psci.c        |  41 +++++-----
>>   drivers/cpuidle/cpuidle-psci.h        |  11 +++
>>   3 files changed, 153 insertions(+), 36 deletions(-)
>>
>> diff --git a/drivers/cpuidle/cpuidle-psci-domain.c b/drivers/cpuidle/cpuidle-psci-domain.c
>> index 423f03b..3c417f7 100644
>> --- a/drivers/cpuidle/cpuidle-psci-domain.c
>> +++ b/drivers/cpuidle/cpuidle-psci-domain.c
>> @@ -26,13 +26,17 @@ struct psci_pd_provider {
>>   };
>>
>>   static LIST_HEAD(psci_pd_providers);
>> -static bool osi_mode_enabled __initdata;
>> +static bool osi_mode_enabled;
>>
>>   static int psci_pd_power_off(struct generic_pm_domain *pd)
>>   {
>>   	struct genpd_power_state *state = &pd->states[pd->state_idx];
>>   	u32 *pd_state;
>>
>> +	/* If we have failed to enable OSI mode, then abort power off. */
>> +	if ((psci_has_osi_support()) && !osi_mode_enabled)
>> +		return -EBUSY;
>> +
> Why is this needed ? IIUC we don't create genpd domains if OSI is not
> enabled.

we do create genpd domains, for cpu domains, we just abort power off 
here since idle states are converted into regular flattened mode.

however genpd poweroff will be used by parent domain (rsc in this case) 
which is kept in hireachy in DTSI with cluster domain to do last man 
activities.

>>   	if (!state->data)
>>   		return 0;
>>
>> @@ -101,6 +105,105 @@ static void psci_pd_free_states(struct genpd_power_state *states,
>>   	kfree(states);
>>   }
>>
>> +static void psci_pd_convert_states(struct cpuidle_state *idle_state,
>> +			u32 *psci_state, struct genpd_power_state *state)
>> +{
>> +	u32 *state_data = state->data;
>> +	u64 target_residency_us = state->residency_ns;
>> +	u64 exit_latency_us = state->power_on_latency_ns +
>> +			state->power_off_latency_ns;
>> +
>> +	*psci_state = *state_data;
>> +	do_div(target_residency_us, 1000);
>> +	idle_state->target_residency = target_residency_us;
>> +	do_div(exit_latency_us, 1000);
>> +	idle_state->exit_latency = exit_latency_us;
>> +	idle_state->enter = &psci_enter_domain_idle_state;
>> +	idle_state->flags |= CPUIDLE_FLAG_TIMER_STOP;
>> +
>> +	strncpy(idle_state->name, to_of_node(state->fwnode)->name,
>> +		CPUIDLE_NAME_LEN - 1);
>> +	strncpy(idle_state->desc, to_of_node(state->fwnode)->name,
>> +		CPUIDLE_NAME_LEN - 1);
>> +}
>> +
>> +static bool psci_pd_is_provider(struct device_node *np)
>> +{
>> +	struct psci_pd_provider *pd_prov, *it;
>> +
>> +	list_for_each_entry_safe(pd_prov, it, &psci_pd_providers, link) {
>> +		if (pd_prov->node == np)
>> +			return true;
>> +	}
>> +
>> +	return false;
>> +}
>> +
>> +int __init psci_dt_pm_domains_parse_states(struct cpuidle_driver *drv,
>> +			struct device_node *cpu_node, u32 *psci_states)
>> +{
>> +	struct genpd_power_state *pd_states;
>> +	struct of_phandle_args args;
>> +	int ret, pd_state_count, i, state_idx, psci_idx;
>> +	u32 cpu_psci_state = psci_states[drv->state_count - 1];
>> +	struct device_node *np = of_node_get(cpu_node);
>> +
>> +	/* Walk the CPU topology to find compatible domain idle states. */
>> +	while (np) {
>> +		ret = of_parse_phandle_with_args(np, "power-domains",
>> +					"#power-domain-cells", 0, &args);
>> +		of_node_put(np);
>> +		if (ret)
>> +			return 0;
>> +
>> +		np = args.np;
>> +
>> +		/* Verify that the node represents a psci pd provider. */
>> +		if (!psci_pd_is_provider(np)) {
>> +			of_node_put(np);
>> +			return 0;
>> +		}
>> +
>> +		/* Parse for compatible domain idle states. */
>> +		ret = psci_pd_parse_states(np, &pd_states, &pd_state_count);
>> +		if (ret) {
>> +			of_node_put(np);
>> +			return ret;
>> +		}
>> +
>> +		i = 0;
>> +		while (i < pd_state_count) {
>> +
>> +			state_idx = drv->state_count;
>> +			if (state_idx >= CPUIDLE_STATE_MAX) {
>> +				pr_warn("exceeding max cpuidle states\n");
>> +				of_node_put(np);
>> +				return 0;
>> +			}
>> +
>> +			psci_idx = state_idx + i;
>> +			psci_pd_convert_states(&drv->states[state_idx + i],
>> +					&psci_states[psci_idx], &pd_states[i]);
>> +
>> +			/*
>> +			 * In the hierarchical CPU topology the master PM domain
>> +			 * idle state's DT property, "arm,psci-suspend-param",
>> +			 * don't contain the bits for the idle state of the CPU,
>> +			 * let's add those here.
>> +			 */
>> +			psci_states[psci_idx] |= cpu_psci_state;
> No we can't do that. Refer previous discussions around that.

Thanks for pointing this.

i will remove this in next version, we already have cpu idle state bits 
present in cluster modes.

>
>> +			pr_debug("psci-power-state %#x index %d\n",
>> +				psci_states[psci_idx], psci_idx);
>> +
>> +			drv->state_count++;
>> +			i++;
>> +		}
>> +		psci_pd_free_states(pd_states, pd_state_count);
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>>   static int __init psci_pd_init(struct device_node *np)
>>   {
>>   	struct generic_pm_domain *pd;
>> @@ -125,11 +228,14 @@ static int __init psci_pd_init(struct device_node *np)
>>   	 * Parse the domain idle states and let genpd manage the state selection
>>   	 * for those being compatible with "domain-idle-state".
>>   	 */
>> -	ret = psci_pd_parse_states(np, &states, &state_count);
>> -	if (ret)
>> -		goto free_name;
>>
>> -	pd->free_states = psci_pd_free_states;
>> +	if (psci_has_osi_support()) {
>> +		ret = psci_pd_parse_states(np, &states, &state_count);
>> +		if (ret)
>> +			goto free_name;
>> +		pd->free_states = psci_pd_free_states;
>> +	}
>> +
>>   	pd->name = kbasename(pd->name);
>>   	pd->power_off = psci_pd_power_off;
>>   	pd->states = states;
>> @@ -236,10 +342,6 @@ static int __init psci_idle_init_domains(void)
>>   	if (!np)
>>   		return -ENODEV;
>>
>> -	/* Currently limit the hierarchical topology to be used in OSI mode. */
>> -	if (!psci_has_osi_support())
>> -		goto out;
>> -
>>   	/*
>>   	 * Parse child nodes for the "#power-domain-cells" property and
>>   	 * initialize a genpd/genpd-of-provider pair when it's found.
>> @@ -265,14 +367,16 @@ static int __init psci_idle_init_domains(void)
>>   		goto remove_pd;
>>
>>   	/* Try to enable OSI mode. */
>> -	ret = psci_set_osi_mode();
>> -	if (ret) {
>> -		pr_warn("failed to enable OSI mode: %d\n", ret);
>> -		psci_pd_remove_topology(np);
>> -		goto remove_pd;
>> +	if (psci_has_osi_support()) {
>> +		ret = psci_set_osi_mode();
>> +		if (ret) {
>> +			pr_warn("failed to enable OSI mode: %d\n", ret);
>> +			psci_pd_remove_topology(np);
>> +			goto remove_pd;
>> +		} else
>> +			osi_mode_enabled = true;
>>   	}
>>
>> -	osi_mode_enabled = true;
>>   	of_node_put(np);
>>   	pr_info("Initialized CPU PM domain topology\n");
>>   	return pd_count;
>> @@ -293,9 +397,6 @@ struct device __init *psci_dt_attach_cpu(int cpu)
>>   {
>>   	struct device *dev;
>>
>> -	if (!osi_mode_enabled)
>> -		return NULL;
>> -
>>   	dev = dev_pm_domain_attach_by_name(get_cpu_device(cpu), "psci");
>>   	if (IS_ERR_OR_NULL(dev))
>>   		return dev;
>> diff --git a/drivers/cpuidle/cpuidle-psci.c b/drivers/cpuidle/cpuidle-psci.c
>> index edd7a54..3fa2aee 100644
>> --- a/drivers/cpuidle/cpuidle-psci.c
>> +++ b/drivers/cpuidle/cpuidle-psci.c
>> @@ -49,7 +49,7 @@ static inline int psci_enter_state(int idx, u32 state)
>>   	return CPU_PM_CPU_IDLE_ENTER_PARAM(psci_cpu_suspend_enter, idx, state);
>>   }
>>
>> -static int psci_enter_domain_idle_state(struct cpuidle_device *dev,
>> +int psci_enter_domain_idle_state(struct cpuidle_device *dev,
>>   					struct cpuidle_driver *drv, int idx)
>>   {
>>   	struct psci_cpuidle_data *data = this_cpu_ptr(&psci_cpuidle_data);
>> @@ -193,24 +193,29 @@ static int __init psci_dt_cpu_init_idle(struct cpuidle_driver *drv,
>>   		goto free_mem;
>>   	}
>>
>> -	/* Currently limit the hierarchical topology to be used in OSI mode. */
>> -	if (psci_has_osi_support()) {
>> -		data->dev = psci_dt_attach_cpu(cpu);
>> -		if (IS_ERR(data->dev)) {
>> -			ret = PTR_ERR(data->dev);
>> +	if (!psci_has_osi_support()) {
>> +		ret = psci_dt_pm_domains_parse_states(drv, cpu_node,
>> +					      psci_states);
>> +		if (ret)
>>   			goto free_mem;
>> -		}
>> -
>> -		/*
>> -		 * Using the deepest state for the CPU to trigger a potential
>> -		 * selection of a shared state for the domain, assumes the
>> -		 * domain states are all deeper states.
>> -		 */
>> -		if (data->dev) {
>> -			drv->states[state_count - 1].enter =
>> -				psci_enter_domain_idle_state;
>> -			psci_cpuidle_use_cpuhp = true;
>> -		}
>> +	}
>> +
>> +	data->dev = psci_dt_attach_cpu(cpu);
>> +	if (IS_ERR(data->dev)) {
>> +		ret = PTR_ERR(data->dev);
>> +		goto free_mem;
>> +	}
>> +
>> +	/*
>> +	 * Using the deepest state for the CPU to trigger a potential
>> +	 * selection of a shared state for the domain, assumes the
>> +	 * domain states are all deeper states.
>> +	 */
>> +
>> +	if (data->dev) {
>> +		drv->states[state_count - 1].enter =
>> +			psci_enter_domain_idle_state;
>> +		psci_cpuidle_use_cpuhp = true;
>>   	}
>>
>>   	/* Idle states parsed correctly, store them in the per-cpu struct. */
> --
> Regards,
> Sudeep
Sudeep Holla Feb. 4, 2020, 3:21 p.m. UTC | #3
On Tue, Feb 04, 2020 at 10:22:42AM +0530, Maulik Shah wrote:
>
> On 2/3/2020 10:38 PM, Sudeep Holla wrote:
> > On Mon, Feb 03, 2020 at 07:05:38PM +0530, Maulik Shah wrote:
> > > From: Ulf Hansson <ulf.hansson@linaro.org>
> > >
> > > If the hierarchical CPU topology is used, but the OS initiated mode isn't
> > > supported, we need to rely solely on the regular cpuidle framework to
> > > manage the idle state selection, rather than using genpd and its
> > > governor.
> > >
> > > For this reason, introduce a new PSCI DT helper function,
> > > psci_dt_pm_domains_parse_states(), which parses and converts the
> > > hierarchically described domain idle states from DT, into regular flattened
> > > cpuidle states. The converted states are added to the existing cpuidle
> > > driver's array of idle states, which make them available for cpuidle.
> > >
> > And what's the main motivation for this if OSI is not supported in the
> > firmware ?
>
> Hi Sudeep,
>
> Main motivation is to do last-man activities before the CPU cluster can
> enter a deep idle state.
>

Details on those last-man activities will help the discussion. Basically
I am wondering what they are and why they need to done in OSPM ?

> > > Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
> > > [applied to new path, resolved conflicts]
> > > Signed-off-by: Maulik Shah <mkshah@codeaurora.org>
> > > ---
> > >   drivers/cpuidle/cpuidle-psci-domain.c | 137 +++++++++++++++++++++++++++++-----
> > >   drivers/cpuidle/cpuidle-psci.c        |  41 +++++-----
> > >   drivers/cpuidle/cpuidle-psci.h        |  11 +++
> > >   3 files changed, 153 insertions(+), 36 deletions(-)
> > >
> > > diff --git a/drivers/cpuidle/cpuidle-psci-domain.c b/drivers/cpuidle/cpuidle-psci-domain.c
> > > index 423f03b..3c417f7 100644
> > > --- a/drivers/cpuidle/cpuidle-psci-domain.c
> > > +++ b/drivers/cpuidle/cpuidle-psci-domain.c
> > > @@ -26,13 +26,17 @@ struct psci_pd_provider {
> > >   };
> > >
> > >   static LIST_HEAD(psci_pd_providers);
> > > -static bool osi_mode_enabled __initdata;
> > > +static bool osi_mode_enabled;
> > >
> > >   static int psci_pd_power_off(struct generic_pm_domain *pd)
> > >   {
> > >   	struct genpd_power_state *state = &pd->states[pd->state_idx];
> > >   	u32 *pd_state;
> > >
> > > +	/* If we have failed to enable OSI mode, then abort power off. */
> > > +	if ((psci_has_osi_support()) && !osi_mode_enabled)
> > > +		return -EBUSY;
> > > +
> > Why is this needed ? IIUC we don't create genpd domains if OSI is not
> > enabled.
>
> we do create genpd domains, for cpu domains, we just abort power off here
> since idle states are converted into regular flattened mode.
>

OK, IIRC the OSI patches from Ulf didn't add the genpd or rather removed
them in case of any failure to enable OSI. Has that been changed ? If so,
why ?

> however genpd poweroff will be used by parent domain (rsc in this case)
> which is kept in hireachy in DTSI with cluster domain to do last man
> activities.
>

I am bit confused here. Either we do OSI or PC and what you are describing
sounds like a mix-n-match to me and I am totally against it.

--
Regards,
Sudeep
Maulik Shah Feb. 5, 2020, 12:23 p.m. UTC | #4
On 2/4/2020 8:51 PM, Sudeep Holla wrote:
> On Tue, Feb 04, 2020 at 10:22:42AM +0530, Maulik Shah wrote:
>> On 2/3/2020 10:38 PM, Sudeep Holla wrote:
>>> On Mon, Feb 03, 2020 at 07:05:38PM +0530, Maulik Shah wrote:
>>>> From: Ulf Hansson <ulf.hansson@linaro.org>
>>>>
>>>> If the hierarchical CPU topology is used, but the OS initiated mode isn't
>>>> supported, we need to rely solely on the regular cpuidle framework to
>>>> manage the idle state selection, rather than using genpd and its
>>>> governor.
>>>>
>>>> For this reason, introduce a new PSCI DT helper function,
>>>> psci_dt_pm_domains_parse_states(), which parses and converts the
>>>> hierarchically described domain idle states from DT, into regular flattened
>>>> cpuidle states. The converted states are added to the existing cpuidle
>>>> driver's array of idle states, which make them available for cpuidle.
>>>>
>>> And what's the main motivation for this if OSI is not supported in the
>>> firmware ?
>> Hi Sudeep,
>>
>> Main motivation is to do last-man activities before the CPU cluster can
>> enter a deep idle state.
>>
> Details on those last-man activities will help the discussion. Basically
> I am wondering what they are and why they need to done in OSPM ?

Hi Sudeep,

there are cases like,

Last cpu going to deepest idle mode need to lower various resoruce 
requirements (for eg DDR freq).

This is done by calling rpmh_flush which send SLEEP values for various 
shared resources.

>>>> Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
>>>> [applied to new path, resolved conflicts]
>>>> Signed-off-by: Maulik Shah <mkshah@codeaurora.org>
>>>> ---
>>>>    drivers/cpuidle/cpuidle-psci-domain.c | 137 +++++++++++++++++++++++++++++-----
>>>>    drivers/cpuidle/cpuidle-psci.c        |  41 +++++-----
>>>>    drivers/cpuidle/cpuidle-psci.h        |  11 +++
>>>>    3 files changed, 153 insertions(+), 36 deletions(-)
>>>>
>>>> diff --git a/drivers/cpuidle/cpuidle-psci-domain.c b/drivers/cpuidle/cpuidle-psci-domain.c
>>>> index 423f03b..3c417f7 100644
>>>> --- a/drivers/cpuidle/cpuidle-psci-domain.c
>>>> +++ b/drivers/cpuidle/cpuidle-psci-domain.c
>>>> @@ -26,13 +26,17 @@ struct psci_pd_provider {
>>>>    };
>>>>
>>>>    static LIST_HEAD(psci_pd_providers);
>>>> -static bool osi_mode_enabled __initdata;
>>>> +static bool osi_mode_enabled;
>>>>
>>>>    static int psci_pd_power_off(struct generic_pm_domain *pd)
>>>>    {
>>>>    	struct genpd_power_state *state = &pd->states[pd->state_idx];
>>>>    	u32 *pd_state;
>>>>
>>>> +	/* If we have failed to enable OSI mode, then abort power off. */
>>>> +	if ((psci_has_osi_support()) && !osi_mode_enabled)
>>>> +		return -EBUSY;
>>>> +
>>> Why is this needed ? IIUC we don't create genpd domains if OSI is not
>>> enabled.
>> we do create genpd domains, for cpu domains, we just abort power off here
>> since idle states are converted into regular flattened mode.
>>
> OK, IIRC the OSI patches from Ulf didn't add the genpd or rather removed
> them in case of any failure to enable OSI. Has that been changed ? If so,
> why ?
>
>> however genpd poweroff will be used by parent domain (rsc in this case)
>> which is kept in hireachy in DTSI with cluster domain to do last man
>> activities.
>>
> I am bit confused here. Either we do OSI or PC and what you are describing
> sounds like a mix-n-match to me and I am totally against it.

we still do PC based on sc7180. there is no OSI.

can you please check v4 series, i have cleaned this change by remove 
converter part.

Thanks,

Maulik

>
> --
> Regards,
> Sudeep
Sudeep Holla Feb. 5, 2020, 2:06 p.m. UTC | #5
On Wed, Feb 05, 2020 at 05:53:00PM +0530, Maulik Shah wrote:
>
> On 2/4/2020 8:51 PM, Sudeep Holla wrote:
> > On Tue, Feb 04, 2020 at 10:22:42AM +0530, Maulik Shah wrote:
> > > On 2/3/2020 10:38 PM, Sudeep Holla wrote:
> > > > On Mon, Feb 03, 2020 at 07:05:38PM +0530, Maulik Shah wrote:
> > > > > From: Ulf Hansson <ulf.hansson@linaro.org>
> > > > >
> > > > > If the hierarchical CPU topology is used, but the OS initiated mode isn't
> > > > > supported, we need to rely solely on the regular cpuidle framework to
> > > > > manage the idle state selection, rather than using genpd and its
> > > > > governor.
> > > > >
> > > > > For this reason, introduce a new PSCI DT helper function,
> > > > > psci_dt_pm_domains_parse_states(), which parses and converts the
> > > > > hierarchically described domain idle states from DT, into regular flattened
> > > > > cpuidle states. The converted states are added to the existing cpuidle
> > > > > driver's array of idle states, which make them available for cpuidle.
> > > > >
> > > > And what's the main motivation for this if OSI is not supported in the
> > > > firmware ?
> > > Hi Sudeep,
> > >
> > > Main motivation is to do last-man activities before the CPU cluster can
> > > enter a deep idle state.
> > >
> > Details on those last-man activities will help the discussion. Basically
> > I am wondering what they are and why they need to done in OSPM ?
>
> Hi Sudeep,
>
> there are cases like,
>
> Last cpu going to deepest idle mode need to lower various resoruce
> requirements (for eg DDR freq).
>

In PC mode, only PSCI implementation knows the last man and there shouldn't
be any notion of it in OS. If you need it, you may need OSI. You are still
mixing up the things. NACK for any such approach, sorry.

--
Regards,
Sudeep
Ulf Hansson Feb. 5, 2020, 3:55 p.m. UTC | #6
On Wed, 5 Feb 2020 at 15:06, Sudeep Holla <sudeep.holla@arm.com> wrote:
>
> On Wed, Feb 05, 2020 at 05:53:00PM +0530, Maulik Shah wrote:
> >
> > On 2/4/2020 8:51 PM, Sudeep Holla wrote:
> > > On Tue, Feb 04, 2020 at 10:22:42AM +0530, Maulik Shah wrote:
> > > > On 2/3/2020 10:38 PM, Sudeep Holla wrote:
> > > > > On Mon, Feb 03, 2020 at 07:05:38PM +0530, Maulik Shah wrote:
> > > > > > From: Ulf Hansson <ulf.hansson@linaro.org>
> > > > > >
> > > > > > If the hierarchical CPU topology is used, but the OS initiated mode isn't
> > > > > > supported, we need to rely solely on the regular cpuidle framework to
> > > > > > manage the idle state selection, rather than using genpd and its
> > > > > > governor.
> > > > > >
> > > > > > For this reason, introduce a new PSCI DT helper function,
> > > > > > psci_dt_pm_domains_parse_states(), which parses and converts the
> > > > > > hierarchically described domain idle states from DT, into regular flattened
> > > > > > cpuidle states. The converted states are added to the existing cpuidle
> > > > > > driver's array of idle states, which make them available for cpuidle.
> > > > > >
> > > > > And what's the main motivation for this if OSI is not supported in the
> > > > > firmware ?
> > > > Hi Sudeep,
> > > >
> > > > Main motivation is to do last-man activities before the CPU cluster can
> > > > enter a deep idle state.
> > > >
> > > Details on those last-man activities will help the discussion. Basically
> > > I am wondering what they are and why they need to done in OSPM ?
> >
> > Hi Sudeep,
> >
> > there are cases like,
> >
> > Last cpu going to deepest idle mode need to lower various resoruce
> > requirements (for eg DDR freq).
> >
>
> In PC mode, only PSCI implementation knows the last man and there shouldn't
> be any notion of it in OS. If you need it, you may need OSI. You are still
> mixing up the things. NACK for any such approach, sorry.

Sudeep, I don't quite agree with your NACK to this. At least not yet. :-)

I do agree that the best suited solution seems to be OSI, as to
support this kind of SoC requirements.

However, if for some reason the PC mode is being used, we could still
allow Linux to control "last-man activities" as it knows what each CPU
has voted for when going idle. Yes, the PSCI FW decides in the end,
but that doesn't really matter. Or is there another technical reason
to why you object?

As a matter of fact, if we allow support for PC mode with
"last-man-activities", it would allow us to make a fair
performance/energy comparison between the two PSCI CPU suspend modes,
for the same SoC. I would be thrilled about looking into doing such
tests, I bet you are as well!?

Kind regards
Uffe
Sudeep Holla Feb. 5, 2020, 4:18 p.m. UTC | #7
On Wed, Feb 05, 2020 at 04:55:17PM +0100, Ulf Hansson wrote:
> On Wed, 5 Feb 2020 at 15:06, Sudeep Holla <sudeep.holla@arm.com> wrote:
> >
> > On Wed, Feb 05, 2020 at 05:53:00PM +0530, Maulik Shah wrote:
> > >
> > > On 2/4/2020 8:51 PM, Sudeep Holla wrote:
> > > > On Tue, Feb 04, 2020 at 10:22:42AM +0530, Maulik Shah wrote:
> > > > > On 2/3/2020 10:38 PM, Sudeep Holla wrote:
> > > > > > On Mon, Feb 03, 2020 at 07:05:38PM +0530, Maulik Shah wrote:
> > > > > > > From: Ulf Hansson <ulf.hansson@linaro.org>
> > > > > > >
> > > > > > > If the hierarchical CPU topology is used, but the OS initiated mode isn't
> > > > > > > supported, we need to rely solely on the regular cpuidle framework to
> > > > > > > manage the idle state selection, rather than using genpd and its
> > > > > > > governor.
> > > > > > >
> > > > > > > For this reason, introduce a new PSCI DT helper function,
> > > > > > > psci_dt_pm_domains_parse_states(), which parses and converts the
> > > > > > > hierarchically described domain idle states from DT, into regular flattened
> > > > > > > cpuidle states. The converted states are added to the existing cpuidle
> > > > > > > driver's array of idle states, which make them available for cpuidle.
> > > > > > >
> > > > > > And what's the main motivation for this if OSI is not supported in the
> > > > > > firmware ?
> > > > > Hi Sudeep,
> > > > >
> > > > > Main motivation is to do last-man activities before the CPU cluster can
> > > > > enter a deep idle state.
> > > > >
> > > > Details on those last-man activities will help the discussion. Basically
> > > > I am wondering what they are and why they need to done in OSPM ?
> > >
> > > Hi Sudeep,
> > >
> > > there are cases like,
> > >
> > > Last cpu going to deepest idle mode need to lower various resoruce
> > > requirements (for eg DDR freq).
> > >
> >
> > In PC mode, only PSCI implementation knows the last man and there shouldn't
> > be any notion of it in OS. If you need it, you may need OSI. You are still
> > mixing up the things. NACK for any such approach, sorry.
>
> Sudeep, I don't quite agree with your NACK to this. At least not yet. :-)
>

OK, I am not surprised :-)

> I do agree that the best suited solution seems to be OSI, as to
> support this kind of SoC requirements.
>

That's the main point. We need to draw some line as what we want to do
with PC and OSI mode. If we plan to take up all last man responsibility
in the kernel, what's the point in not supporting OSI in the firmware
then ? I can't buy it yet.

> However, if for some reason the PC mode is being used, we could still
> allow Linux to control "last-man activities" as it knows what each CPU
> has voted for when going idle. Yes, the PSCI FW decides in the end,
> but that doesn't really matter. Or is there another technical reason
> to why you object?
>

Precisely, FW decides and let it. Just because we can do in the kernel
doesn't mean we must do it. It's clear in the spec and doing it in the
kernel will be sub-optimal if PSCI f/w aborted entering the deeper
state that required some action in the first place.

> As a matter of fact, if we allow support for PC mode with
> "last-man-activities", it would allow us to make a fair
> performance/energy comparison between the two PSCI CPU suspend modes,
> for the same SoC. I would be thrilled about looking into doing such
> tests, I bet you are as well!?
>

I was, but not anymore, especially if we want such changes in the kernel
to do so.

Just use OSI as that was the point of adding all these after years of
discussion claiming it's more optimal compared to PC. Now telling that
you need more changes to compare it with PC just doesn't make any sense
at all to me.

--
Regards,
Sudeep
Ulf Hansson Feb. 6, 2020, 8:45 a.m. UTC | #8
On Wed, 5 Feb 2020 at 17:18, Sudeep Holla <sudeep.holla@arm.com> wrote:
>
> On Wed, Feb 05, 2020 at 04:55:17PM +0100, Ulf Hansson wrote:
> > On Wed, 5 Feb 2020 at 15:06, Sudeep Holla <sudeep.holla@arm.com> wrote:
> > >
> > > On Wed, Feb 05, 2020 at 05:53:00PM +0530, Maulik Shah wrote:
> > > >
> > > > On 2/4/2020 8:51 PM, Sudeep Holla wrote:
> > > > > On Tue, Feb 04, 2020 at 10:22:42AM +0530, Maulik Shah wrote:
> > > > > > On 2/3/2020 10:38 PM, Sudeep Holla wrote:
> > > > > > > On Mon, Feb 03, 2020 at 07:05:38PM +0530, Maulik Shah wrote:
> > > > > > > > From: Ulf Hansson <ulf.hansson@linaro.org>
> > > > > > > >
> > > > > > > > If the hierarchical CPU topology is used, but the OS initiated mode isn't
> > > > > > > > supported, we need to rely solely on the regular cpuidle framework to
> > > > > > > > manage the idle state selection, rather than using genpd and its
> > > > > > > > governor.
> > > > > > > >
> > > > > > > > For this reason, introduce a new PSCI DT helper function,
> > > > > > > > psci_dt_pm_domains_parse_states(), which parses and converts the
> > > > > > > > hierarchically described domain idle states from DT, into regular flattened
> > > > > > > > cpuidle states. The converted states are added to the existing cpuidle
> > > > > > > > driver's array of idle states, which make them available for cpuidle.
> > > > > > > >
> > > > > > > And what's the main motivation for this if OSI is not supported in the
> > > > > > > firmware ?
> > > > > > Hi Sudeep,
> > > > > >
> > > > > > Main motivation is to do last-man activities before the CPU cluster can
> > > > > > enter a deep idle state.
> > > > > >
> > > > > Details on those last-man activities will help the discussion. Basically
> > > > > I am wondering what they are and why they need to done in OSPM ?
> > > >
> > > > Hi Sudeep,
> > > >
> > > > there are cases like,
> > > >
> > > > Last cpu going to deepest idle mode need to lower various resoruce
> > > > requirements (for eg DDR freq).
> > > >
> > >
> > > In PC mode, only PSCI implementation knows the last man and there shouldn't
> > > be any notion of it in OS. If you need it, you may need OSI. You are still
> > > mixing up the things. NACK for any such approach, sorry.
> >
> > Sudeep, I don't quite agree with your NACK to this. At least not yet. :-)
> >
>
> OK, I am not surprised :-)

Apologize for troubling you again. :-)

>
> > I do agree that the best suited solution seems to be OSI, as to
> > support this kind of SoC requirements.
> >
>
> That's the main point. We need to draw some line as what we want to do
> with PC and OSI mode. If we plan to take up all last man responsibility
> in the kernel, what's the point in not supporting OSI in the firmware
> then ? I can't buy it yet.
>
> > However, if for some reason the PC mode is being used, we could still
> > allow Linux to control "last-man activities" as it knows what each CPU
> > has voted for when going idle. Yes, the PSCI FW decides in the end,
> > but that doesn't really matter. Or is there another technical reason
> > to why you object?
> >
>
> Precisely, FW decides and let it. Just because we can do in the kernel
> doesn't mean we must do it. It's clear in the spec and doing it in the
> kernel will be sub-optimal if PSCI f/w aborted entering the deeper
> state that required some action in the first place.

Yes, it may be suboptimal for PC-mode.

On the other hand, we already fire CPU PM notifiers while exit/enter
idle states (except for WFI). Those may also be suboptimal for kind of
the similar reasons.

Maybe it's not the best argument, but it sounds like allowing us to
control cluster power on/off notifications for last-man activities,
would just conform to the similar behaviour we already have. No?

>
> > As a matter of fact, if we allow support for PC mode with
> > "last-man-activities", it would allow us to make a fair
> > performance/energy comparison between the two PSCI CPU suspend modes,
> > for the same SoC. I would be thrilled about looking into doing such
> > tests, I bet you are as well!?
> >
>
> I was, but not anymore, especially if we want such changes in the kernel
> to do so.
>
> Just use OSI as that was the point of adding all these after years of
> discussion claiming it's more optimal compared to PC. Now telling that
> you need more changes to compare it with PC just doesn't make any sense
> at all to me.

Fair enough.

I was just pondering over if there are other reasons to why we may want this.

One other thing that could be problematic to support, is when are
other resources, I/O controllers for example, sharing the same power
rail as a cluster. When such controller is in use, idle states of the
cluster must be prevented. Without using genpd to model the CPU
topology, it may be difficult to deal with this.

Of course, using PC mode when trying to deal with this
platform/board-requirement would also be suboptimal. In other words,
your argument about when using OSI vs PC mode, still stands.

Kind regards
Uffe
Lina Iyer Feb. 6, 2020, 8:45 p.m. UTC | #9
On Thu, Feb 06 2020 at 01:46 -0700, Ulf Hansson wrote:
>On Wed, 5 Feb 2020 at 17:18, Sudeep Holla <sudeep.holla@arm.com> wrote:
>>
>> On Wed, Feb 05, 2020 at 04:55:17PM +0100, Ulf Hansson wrote:
>> > On Wed, 5 Feb 2020 at 15:06, Sudeep Holla <sudeep.holla@arm.com> wrote:
>> > >
>> > > On Wed, Feb 05, 2020 at 05:53:00PM +0530, Maulik Shah wrote:
>> > > >
>> > > > On 2/4/2020 8:51 PM, Sudeep Holla wrote:
>> > > > > On Tue, Feb 04, 2020 at 10:22:42AM +0530, Maulik Shah wrote:
>> > > > > > On 2/3/2020 10:38 PM, Sudeep Holla wrote:
>> > > > > > > On Mon, Feb 03, 2020 at 07:05:38PM +0530, Maulik Shah wrote:
>> > > > > > > > From: Ulf Hansson <ulf.hansson@linaro.org>
>> > > > > > > >
>> I was, but not anymore, especially if we want such changes in the kernel
>> to do so.
>>
>> Just use OSI as that was the point of adding all these after years of
>> discussion claiming it's more optimal compared to PC. Now telling that
>> you need more changes to compare it with PC just doesn't make any sense
>> at all to me.
>
>Fair enough.
>
>I was just pondering over if there are other reasons to why we may want this.
>
>One other thing that could be problematic to support, is when are
>other resources, I/O controllers for example, sharing the same power
>rail as a cluster. When such controller is in use, idle states of the
>cluster must be prevented. Without using genpd to model the CPU
>topology, it may be difficult to deal with this.
>
>Of course, using PC mode when trying to deal with this
>platform/board-requirement would also be suboptimal. In other words,
>your argument about when using OSI vs PC mode, still stands.
>
I understand the arguments for using PC vs OSI and agree with it. But
what in PSCI is against Linux knowing when the last core is powering
down when the PSCI is configured to do only Platform Cordinated.
There should not be any objection to drivers knowing when all the cores
are powered down, be it reference counting CPU PM notifications or using
a cleaner approach like this where GendPD framwork does everything
cleanly and gives a nice callback. ARM architecture allows for different
aspects of CPU access be handled at different levels. I see this as an
extension of that approach.

-- Lina
Bjorn Andersson Feb. 6, 2020, 9:11 p.m. UTC | #10
On Wed 05 Feb 06:06 PST 2020, Sudeep Holla wrote:

> On Wed, Feb 05, 2020 at 05:53:00PM +0530, Maulik Shah wrote:
> >
> > On 2/4/2020 8:51 PM, Sudeep Holla wrote:
> > > On Tue, Feb 04, 2020 at 10:22:42AM +0530, Maulik Shah wrote:
> > > > On 2/3/2020 10:38 PM, Sudeep Holla wrote:
> > > > > On Mon, Feb 03, 2020 at 07:05:38PM +0530, Maulik Shah wrote:
> > > > > > From: Ulf Hansson <ulf.hansson@linaro.org>
> > > > > >
> > > > > > If the hierarchical CPU topology is used, but the OS initiated mode isn't
> > > > > > supported, we need to rely solely on the regular cpuidle framework to
> > > > > > manage the idle state selection, rather than using genpd and its
> > > > > > governor.
> > > > > >
> > > > > > For this reason, introduce a new PSCI DT helper function,
> > > > > > psci_dt_pm_domains_parse_states(), which parses and converts the
> > > > > > hierarchically described domain idle states from DT, into regular flattened
> > > > > > cpuidle states. The converted states are added to the existing cpuidle
> > > > > > driver's array of idle states, which make them available for cpuidle.
> > > > > >
> > > > > And what's the main motivation for this if OSI is not supported in the
> > > > > firmware ?
> > > > Hi Sudeep,
> > > >
> > > > Main motivation is to do last-man activities before the CPU cluster can
> > > > enter a deep idle state.
> > > >
> > > Details on those last-man activities will help the discussion. Basically
> > > I am wondering what they are and why they need to done in OSPM ?
> >
> > Hi Sudeep,
> >
> > there are cases like,
> >
> > Last cpu going to deepest idle mode need to lower various resoruce
> > requirements (for eg DDR freq).
> >
> 
> In PC mode, only PSCI implementation knows the last man and there shouldn't
> be any notion of it in OS. If you need it, you may need OSI. You are still
> mixing up the things. NACK for any such approach, sorry.
> 

Forgive me if I'm misunderstanding PSCI's role here, but doesn't it deal
with the power management of the "processor subsystem" in the SoC?


In the Qualcomm platforms most resources (voltage rails, clocks, etc)
are controlled through a power controller that provides controls for a
state when the CPU subsystem is running and when it's asleep. This
allows non-CPU-related device to control if resources that are shared
with the CPU subsystem should be kept on when the last CPU/cluster goes
down.

An example of this would be the display controller voting to keep a
voltage rail on after the CPU subsystem collapses, because the display
is still on.

But as long as the CPU subsystem is running it will keep these resources
available and there's no need to change these votes (e.g. if the display
is turned on and off while the CPU is active the sleep-requests cancels
out), so they are simply cached/batched up in the RPMh driver and what
Maulik's series is attempting to do is to flush the cached values when
Linux believes that the firmware might decide to enter a lower power
state.

Regards,
Bjorn
Sudeep Holla Feb. 7, 2020, 11:20 a.m. UTC | #11
On Thu, Feb 06, 2020 at 01:45:14PM -0700, Lina Iyer wrote:
> On Thu, Feb 06 2020 at 01:46 -0700, Ulf Hansson wrote:
> > On Wed, 5 Feb 2020 at 17:18, Sudeep Holla <sudeep.holla@arm.com> wrote:
> > > 
> > > On Wed, Feb 05, 2020 at 04:55:17PM +0100, Ulf Hansson wrote:
> > > > On Wed, 5 Feb 2020 at 15:06, Sudeep Holla <sudeep.holla@arm.com> wrote:
> > > > >
> > > > > On Wed, Feb 05, 2020 at 05:53:00PM +0530, Maulik Shah wrote:
> > > > > >
> > > > > > On 2/4/2020 8:51 PM, Sudeep Holla wrote:
> > > > > > > On Tue, Feb 04, 2020 at 10:22:42AM +0530, Maulik Shah wrote:
> > > > > > > > On 2/3/2020 10:38 PM, Sudeep Holla wrote:
> > > > > > > > > On Mon, Feb 03, 2020 at 07:05:38PM +0530, Maulik Shah wrote:
> > > > > > > > > > From: Ulf Hansson <ulf.hansson@linaro.org>
> > > > > > > > > >
> > > I was, but not anymore, especially if we want such changes in the kernel
> > > to do so.
> > > 
> > > Just use OSI as that was the point of adding all these after years of
> > > discussion claiming it's more optimal compared to PC. Now telling that
> > > you need more changes to compare it with PC just doesn't make any sense
> > > at all to me.
> > 
> > Fair enough.
> > 
> > I was just pondering over if there are other reasons to why we may want this.
> > 
> > One other thing that could be problematic to support, is when are
> > other resources, I/O controllers for example, sharing the same power
> > rail as a cluster. When such controller is in use, idle states of the
> > cluster must be prevented. Without using genpd to model the CPU
> > topology, it may be difficult to deal with this.
> > 
> > Of course, using PC mode when trying to deal with this
> > platform/board-requirement would also be suboptimal. In other words,
> > your argument about when using OSI vs PC mode, still stands.
> > 
> I understand the arguments for using PC vs OSI and agree with it. But
> what in PSCI is against Linux knowing when the last core is powering
> down when the PSCI is configured to do only Platform Cordinated.

Nothing :D. But knowing the evolution and reasons for adding OSI in the
PSCI specification and having argued about benefits of OSI over PC for
years and finally when we have it in mainline, this argument of using
PC for exact reasons why OSI evolved is something I can't understand
and I am confused.

> There should not be any objection to drivers knowing when all the cores
> are powered down, be it reference counting CPU PM notifications or using
> a cleaner approach like this where GendPD framwork does everything
> cleanly and gives a nice callback. ARM architecture allows for different
> aspects of CPU access be handled at different levels. I see this as an
> extension of that approach.
>

One thing that was repeatedly pointed out during OSI patch review was no
extra overhead for PC mode where firmware can make decisions. So, just
use OSI now and let us be done with this discussion of OSI vs PC. If PC
is what you think you need for future, we can revert all OSI changes and
start discussing again :-)

--
Regards,
Sudeep
Sudeep Holla Feb. 7, 2020, 11:25 a.m. UTC | #12
On Thu, Feb 06, 2020 at 01:11:33PM -0800, Bjorn Andersson wrote:
> On Wed 05 Feb 06:06 PST 2020, Sudeep Holla wrote:
>
> > On Wed, Feb 05, 2020 at 05:53:00PM +0530, Maulik Shah wrote:
> > >
> > > On 2/4/2020 8:51 PM, Sudeep Holla wrote:
> > > > On Tue, Feb 04, 2020 at 10:22:42AM +0530, Maulik Shah wrote:
> > > > > On 2/3/2020 10:38 PM, Sudeep Holla wrote:
> > > > > > On Mon, Feb 03, 2020 at 07:05:38PM +0530, Maulik Shah wrote:
> > > > > > > From: Ulf Hansson <ulf.hansson@linaro.org>
> > > > > > >
> > > > > > > If the hierarchical CPU topology is used, but the OS initiated mode isn't
> > > > > > > supported, we need to rely solely on the regular cpuidle framework to
> > > > > > > manage the idle state selection, rather than using genpd and its
> > > > > > > governor.
> > > > > > >
> > > > > > > For this reason, introduce a new PSCI DT helper function,
> > > > > > > psci_dt_pm_domains_parse_states(), which parses and converts the
> > > > > > > hierarchically described domain idle states from DT, into regular flattened
> > > > > > > cpuidle states. The converted states are added to the existing cpuidle
> > > > > > > driver's array of idle states, which make them available for cpuidle.
> > > > > > >
> > > > > > And what's the main motivation for this if OSI is not supported in the
> > > > > > firmware ?
> > > > > Hi Sudeep,
> > > > >
> > > > > Main motivation is to do last-man activities before the CPU cluster can
> > > > > enter a deep idle state.
> > > > >
> > > > Details on those last-man activities will help the discussion. Basically
> > > > I am wondering what they are and why they need to done in OSPM ?
> > >
> > > Hi Sudeep,
> > >
> > > there are cases like,
> > >
> > > Last cpu going to deepest idle mode need to lower various resoruce
> > > requirements (for eg DDR freq).
> > >
> >
> > In PC mode, only PSCI implementation knows the last man and there shouldn't
> > be any notion of it in OS. If you need it, you may need OSI. You are still
> > mixing up the things. NACK for any such approach, sorry.
> >
>
> Forgive me if I'm misunderstanding PSCI's role here, but doesn't it deal
> with the power management of the "processor subsystem" in the SoC?
>

Yes.

> In the Qualcomm platforms most resources (voltage rails, clocks, etc)
> are controlled through a power controller that provides controls for a
> state when the CPU subsystem is running and when it's asleep. This
> allows non-CPU-related device to control if resources that are shared
> with the CPU subsystem should be kept on when the last CPU/cluster goes
> down.
>

I understand that.

> An example of this would be the display controller voting to keep a
> voltage rail on after the CPU subsystem collapses, because the display
> is still on.
>

OK

> But as long as the CPU subsystem is running it will keep these resources
> available and there's no need to change these votes (e.g. if the display
> is turned on and off while the CPU is active the sleep-requests cancels
> out), so they are simply cached/batched up in the RPMh driver and what
> Maulik's series is attempting to do is to flush the cached values when
> Linux believes that the firmware might decide to enter a lower power
> state.
>

I understand all these. What I am arguing is that in PC mode, PSCI
firmware is the one who needs to vote and not OSPM because it is
responsible for pulling the plugs off the CPU/Cluster. So lets us not
bring that to OSPM. OSI was invented to do all such crazy things in OSPM,
please feel free to play with that ;-)

--
Regards,
Sudeep
Ulf Hansson Feb. 7, 2020, 12:32 p.m. UTC | #13
[...]

> > I understand the arguments for using PC vs OSI and agree with it. But
> > what in PSCI is against Linux knowing when the last core is powering
> > down when the PSCI is configured to do only Platform Cordinated.
>
> Nothing :D. But knowing the evolution and reasons for adding OSI in the
> PSCI specification and having argued about benefits of OSI over PC for
> years and finally when we have it in mainline, this argument of using
> PC for exact reasons why OSI evolved is something I can't understand
> and I am confused.
>
> > There should not be any objection to drivers knowing when all the cores
> > are powered down, be it reference counting CPU PM notifications or using
> > a cleaner approach like this where GendPD framwork does everything
> > cleanly and gives a nice callback. ARM architecture allows for different
> > aspects of CPU access be handled at different levels. I see this as an
> > extension of that approach.
> >
>
> One thing that was repeatedly pointed out during OSI patch review was no
> extra overhead for PC mode where firmware can make decisions. So, just
> use OSI now and let us be done with this discussion of OSI vs PC. If PC
> is what you think you need for future, we can revert all OSI changes and
> start discussing again :-)

Just to make it clear, I fully agree with you in regards to overhead
for PC-mode. This is especially critical for ARM SoCs with lots of
cores, I assume.

However, the overhead you refer to, is *only* going to be present in
case when the DTS has the hierarchical CPU topology description with
"power-domains". Because, that is *optional* to use, I am expecting
only those SoC/platforms that needs to manage last-man activities to
use this layout, the others will remain unaffected.

That said, does that address your concern?

Kind regards
Uffe
Lorenzo Pieralisi Feb. 7, 2020, 2:48 p.m. UTC | #14
On Fri, Feb 07, 2020 at 01:32:28PM +0100, Ulf Hansson wrote:
> [...]
> 
> > > I understand the arguments for using PC vs OSI and agree with it. But
> > > what in PSCI is against Linux knowing when the last core is powering
> > > down when the PSCI is configured to do only Platform Cordinated.
> >
> > Nothing :D. But knowing the evolution and reasons for adding OSI in the
> > PSCI specification and having argued about benefits of OSI over PC for
> > years and finally when we have it in mainline, this argument of using
> > PC for exact reasons why OSI evolved is something I can't understand
> > and I am confused.
> >
> > > There should not be any objection to drivers knowing when all the cores
> > > are powered down, be it reference counting CPU PM notifications or using
> > > a cleaner approach like this where GendPD framwork does everything
> > > cleanly and gives a nice callback. ARM architecture allows for different
> > > aspects of CPU access be handled at different levels. I see this as an
> > > extension of that approach.
> > >
> >
> > One thing that was repeatedly pointed out during OSI patch review was no
> > extra overhead for PC mode where firmware can make decisions. So, just
> > use OSI now and let us be done with this discussion of OSI vs PC. If PC
> > is what you think you need for future, we can revert all OSI changes and
> > start discussing again :-)
> 
> Just to make it clear, I fully agree with you in regards to overhead
> for PC-mode. This is especially critical for ARM SoCs with lots of
> cores, I assume.
> 
> However, the overhead you refer to, is *only* going to be present in
> case when the DTS has the hierarchical CPU topology description with
> "power-domains". Because, that is *optional* to use, I am expecting
> only those SoC/platforms that needs to manage last-man activities to
> use this layout, the others will remain unaffected.

In PC mode not only there is no need but it is wrong to manage
any last-man activity in the kernel. I wonder why we are still
talking about this to be honest.

Code to handle PSCI platform coordinated mode has been/is in
the kernel today and that's all is needed according to the PSCI
specifications.

Thanks,
Lorenzo
Ulf Hansson Feb. 7, 2020, 3:52 p.m. UTC | #15
On Fri, 7 Feb 2020 at 15:48, Lorenzo Pieralisi
<lorenzo.pieralisi@arm.com> wrote:
>
> On Fri, Feb 07, 2020 at 01:32:28PM +0100, Ulf Hansson wrote:
> > [...]
> >
> > > > I understand the arguments for using PC vs OSI and agree with it. But
> > > > what in PSCI is against Linux knowing when the last core is powering
> > > > down when the PSCI is configured to do only Platform Cordinated.
> > >
> > > Nothing :D. But knowing the evolution and reasons for adding OSI in the
> > > PSCI specification and having argued about benefits of OSI over PC for
> > > years and finally when we have it in mainline, this argument of using
> > > PC for exact reasons why OSI evolved is something I can't understand
> > > and I am confused.
> > >
> > > > There should not be any objection to drivers knowing when all the cores
> > > > are powered down, be it reference counting CPU PM notifications or using
> > > > a cleaner approach like this where GendPD framwork does everything
> > > > cleanly and gives a nice callback. ARM architecture allows for different
> > > > aspects of CPU access be handled at different levels. I see this as an
> > > > extension of that approach.
> > > >
> > >
> > > One thing that was repeatedly pointed out during OSI patch review was no
> > > extra overhead for PC mode where firmware can make decisions. So, just
> > > use OSI now and let us be done with this discussion of OSI vs PC. If PC
> > > is what you think you need for future, we can revert all OSI changes and
> > > start discussing again :-)
> >
> > Just to make it clear, I fully agree with you in regards to overhead
> > for PC-mode. This is especially critical for ARM SoCs with lots of
> > cores, I assume.
> >
> > However, the overhead you refer to, is *only* going to be present in
> > case when the DTS has the hierarchical CPU topology description with
> > "power-domains". Because, that is *optional* to use, I am expecting
> > only those SoC/platforms that needs to manage last-man activities to
> > use this layout, the others will remain unaffected.
>
> In PC mode not only there is no need but it is wrong to manage
> any last-man activity in the kernel. I wonder why we are still
> talking about this to be honest.

I guess the discussion is here because there is a use case to consider now.

For sure, we agree on what is the best solution. But this is rather
about what can we do to improve the current situation, if we should do
anything.

>
> Code to handle PSCI platform coordinated mode has been/is in
> the kernel today and that's all is needed according to the PSCI
> specifications.

PSCI specifies CPU power management, not SoC power management. If
these things were completely decoupled, I would agree with you, but
that's not the case. Maybe SCMI, etc, helps with this in future.

Anyway, my fear is that not many ARM vendors implements OSI support,
but still they have "last-man-activities" to deal with. This is not
only QCOM.

I guess an option would be to add OSI support to the public ARM
Trusted Firmware, then we could more easily point to that - rather
than trying to mitigate the problem on the kernel side.

Kind regards
Uffe
Sudeep Holla Feb. 7, 2020, 4:05 p.m. UTC | #16
On Fri, Feb 07, 2020 at 01:32:28PM +0100, Ulf Hansson wrote:
> [...]
>
> > > I understand the arguments for using PC vs OSI and agree with it. But
> > > what in PSCI is against Linux knowing when the last core is powering
> > > down when the PSCI is configured to do only Platform Cordinated.
> >
> > Nothing :D. But knowing the evolution and reasons for adding OSI in the
> > PSCI specification and having argued about benefits of OSI over PC for
> > years and finally when we have it in mainline, this argument of using
> > PC for exact reasons why OSI evolved is something I can't understand
> > and I am confused.
> >
> > > There should not be any objection to drivers knowing when all the cores
> > > are powered down, be it reference counting CPU PM notifications or using
> > > a cleaner approach like this where GendPD framwork does everything
> > > cleanly and gives a nice callback. ARM architecture allows for different
> > > aspects of CPU access be handled at different levels. I see this as an
> > > extension of that approach.
> > >
> >
> > One thing that was repeatedly pointed out during OSI patch review was no
> > extra overhead for PC mode where firmware can make decisions. So, just
> > use OSI now and let us be done with this discussion of OSI vs PC. If PC
> > is what you think you need for future, we can revert all OSI changes and
> > start discussing again :-)
>
> Just to make it clear, I fully agree with you in regards to overhead
> for PC-mode. This is especially critical for ARM SoCs with lots of
> cores, I assume.
>
> However, the overhead you refer to, is *only* going to be present in
> case when the DTS has the hierarchical CPU topology description with
> "power-domains". Because, that is *optional* to use, I am expecting
> only those SoC/platforms that needs to manage last-man activities to
> use this layout, the others will remain unaffected.
>
> That said, does that address your concern?
>

I have already expressed my view and concerns in response to Lina and
Bjorn's emails.

--
Regards,
Sudeep
Sudeep Holla Feb. 7, 2020, 4:15 p.m. UTC | #17
On Fri, Feb 07, 2020 at 04:52:52PM +0100, Ulf Hansson wrote:
> On Fri, 7 Feb 2020 at 15:48, Lorenzo Pieralisi
> <lorenzo.pieralisi@arm.com> wrote:
> >
> > On Fri, Feb 07, 2020 at 01:32:28PM +0100, Ulf Hansson wrote:
> > > [...]
> > >
> > > > > I understand the arguments for using PC vs OSI and agree with it. But
> > > > > what in PSCI is against Linux knowing when the last core is powering
> > > > > down when the PSCI is configured to do only Platform Cordinated.
> > > >
> > > > Nothing :D. But knowing the evolution and reasons for adding OSI in the
> > > > PSCI specification and having argued about benefits of OSI over PC for
> > > > years and finally when we have it in mainline, this argument of using
> > > > PC for exact reasons why OSI evolved is something I can't understand
> > > > and I am confused.
> > > >
> > > > > There should not be any objection to drivers knowing when all the cores
> > > > > are powered down, be it reference counting CPU PM notifications or using
> > > > > a cleaner approach like this where GendPD framwork does everything
> > > > > cleanly and gives a nice callback. ARM architecture allows for different
> > > > > aspects of CPU access be handled at different levels. I see this as an
> > > > > extension of that approach.
> > > > >
> > > >
> > > > One thing that was repeatedly pointed out during OSI patch review was no
> > > > extra overhead for PC mode where firmware can make decisions. So, just
> > > > use OSI now and let us be done with this discussion of OSI vs PC. If PC
> > > > is what you think you need for future, we can revert all OSI changes and
> > > > start discussing again :-)
> > >
> > > Just to make it clear, I fully agree with you in regards to overhead
> > > for PC-mode. This is especially critical for ARM SoCs with lots of
> > > cores, I assume.
> > >
> > > However, the overhead you refer to, is *only* going to be present in
> > > case when the DTS has the hierarchical CPU topology description with
> > > "power-domains". Because, that is *optional* to use, I am expecting
> > > only those SoC/platforms that needs to manage last-man activities to
> > > use this layout, the others will remain unaffected.
> >
> > In PC mode not only there is no need but it is wrong to manage
> > any last-man activity in the kernel. I wonder why we are still
> > talking about this to be honest.
>
> I guess the discussion is here because there is a use case to consider now.
>

If this is what Bjorn presented in his email, I have responded to that.
If it's any different, please let us know the complete details.

> For sure, we agree on what is the best solution. But this is rather
> about what can we do to improve the current situation, if we should do
> anything.
>

Sure, and I haven't found a reason to do that in OSPM yet(as part of the
discussion in this thread)

> >
> > Code to handle PSCI platform coordinated mode has been/is in
> > the kernel today and that's all is needed according to the PSCI
> > specifications.
>
> PSCI specifies CPU power management, not SoC power management. If
> these things were completely decoupled, I would agree with you, but
> that's not the case. Maybe SCMI, etc, helps with this in future.
>

Why does that not work even if they are not decoupled. The IO/device
that share with CPU votes from OSPM and the CPU/Cluster from PSCI in
PC mode. There is no argument there, but why it needs to be done in OSPM
is the objection here.

> Anyway, my fear is that not many ARM vendors implements OSI support,
> but still they have "last-man-activities" to deal with. This is not
> only QCOM.
>

I am interested to hear from them. And the same question to same too as
above.

> I guess an option would be to add OSI support to the public ARM
> Trusted Firmware, then we could more easily point to that - rather
> than trying to mitigate the problem on the kernel side.
>

I would say go for it. But don't mix responsibility of OSPM in PC vs OSI.
We have discussed this for years and I hope this discussion ends ASAP.
I don't see any point in dragging this any further.

--
Regards,
Sudeep
Ulf Hansson Feb. 8, 2020, 10:25 a.m. UTC | #18
On Fri, 7 Feb 2020 at 17:15, Sudeep Holla <sudeep.holla@arm.com> wrote:
>
> On Fri, Feb 07, 2020 at 04:52:52PM +0100, Ulf Hansson wrote:
> > On Fri, 7 Feb 2020 at 15:48, Lorenzo Pieralisi
> > <lorenzo.pieralisi@arm.com> wrote:
> > >
> > > On Fri, Feb 07, 2020 at 01:32:28PM +0100, Ulf Hansson wrote:
> > > > [...]
> > > >
> > > > > > I understand the arguments for using PC vs OSI and agree with it. But
> > > > > > what in PSCI is against Linux knowing when the last core is powering
> > > > > > down when the PSCI is configured to do only Platform Cordinated.
> > > > >
> > > > > Nothing :D. But knowing the evolution and reasons for adding OSI in the
> > > > > PSCI specification and having argued about benefits of OSI over PC for
> > > > > years and finally when we have it in mainline, this argument of using
> > > > > PC for exact reasons why OSI evolved is something I can't understand
> > > > > and I am confused.
> > > > >
> > > > > > There should not be any objection to drivers knowing when all the cores
> > > > > > are powered down, be it reference counting CPU PM notifications or using
> > > > > > a cleaner approach like this where GendPD framwork does everything
> > > > > > cleanly and gives a nice callback. ARM architecture allows for different
> > > > > > aspects of CPU access be handled at different levels. I see this as an
> > > > > > extension of that approach.
> > > > > >
> > > > >
> > > > > One thing that was repeatedly pointed out during OSI patch review was no
> > > > > extra overhead for PC mode where firmware can make decisions. So, just
> > > > > use OSI now and let us be done with this discussion of OSI vs PC. If PC
> > > > > is what you think you need for future, we can revert all OSI changes and
> > > > > start discussing again :-)
> > > >
> > > > Just to make it clear, I fully agree with you in regards to overhead
> > > > for PC-mode. This is especially critical for ARM SoCs with lots of
> > > > cores, I assume.
> > > >
> > > > However, the overhead you refer to, is *only* going to be present in
> > > > case when the DTS has the hierarchical CPU topology description with
> > > > "power-domains". Because, that is *optional* to use, I am expecting
> > > > only those SoC/platforms that needs to manage last-man activities to
> > > > use this layout, the others will remain unaffected.
> > >
> > > In PC mode not only there is no need but it is wrong to manage
> > > any last-man activity in the kernel. I wonder why we are still
> > > talking about this to be honest.
> >
> > I guess the discussion is here because there is a use case to consider now.
> >
>
> If this is what Bjorn presented in his email, I have responded to that.
> If it's any different, please let us know the complete details.
>
> > For sure, we agree on what is the best solution. But this is rather
> > about what can we do to improve the current situation, if we should do
> > anything.
> >
>
> Sure, and I haven't found a reason to do that in OSPM yet(as part of the
> discussion in this thread)
>
> > >
> > > Code to handle PSCI platform coordinated mode has been/is in
> > > the kernel today and that's all is needed according to the PSCI
> > > specifications.
> >
> > PSCI specifies CPU power management, not SoC power management. If
> > these things were completely decoupled, I would agree with you, but
> > that's not the case. Maybe SCMI, etc, helps with this in future.
> >
>
> Why does that not work even if they are not decoupled. The IO/device
> that share with CPU votes from OSPM and the CPU/Cluster from PSCI in
> PC mode. There is no argument there, but why it needs to be done in OSPM
> is the objection here.

That implies the votes from I/O devices needs to reach the FW
immediately when the vote is done. No caching or other optimizations
can be done at OSPM.

In principle, the FW needs to have an always up to date view of the
votes, etc. That sounds highly inefficient, both from energy and
latency point of view, at least in my opinion.

>
> > Anyway, my fear is that not many ARM vendors implements OSI support,
> > but still they have "last-man-activities" to deal with. This is not
> > only QCOM.
> >
>
> I am interested to hear from them. And the same question to same too as
> above.

I have been talking to some of them. But, yes, we need to hear more from them.

>
> > I guess an option would be to add OSI support to the public ARM
> > Trusted Firmware, then we could more easily point to that - rather
> > than trying to mitigate the problem on the kernel side.
> >
>
> I would say go for it. But don't mix responsibility of OSPM in PC vs OSI.
> We have discussed this for years and I hope this discussion ends ASAP.
> I don't see any point in dragging this any further.

Okay.

KInd regards
Uffe
Sudeep Holla Feb. 10, 2020, 10:31 a.m. UTC | #19
On Sat, Feb 08, 2020 at 11:25:18AM +0100, Ulf Hansson wrote:
> On Fri, 7 Feb 2020 at 17:15, Sudeep Holla <sudeep.holla@arm.com> wrote:
> >
> > On Fri, Feb 07, 2020 at 04:52:52PM +0100, Ulf Hansson wrote:
> > > On Fri, 7 Feb 2020 at 15:48, Lorenzo Pieralisi
> > > <lorenzo.pieralisi@arm.com> wrote:
> > > >
> > > > On Fri, Feb 07, 2020 at 01:32:28PM +0100, Ulf Hansson wrote:
> > > > > [...]
> > > > >
> > > > > > > I understand the arguments for using PC vs OSI and agree with it. But
> > > > > > > what in PSCI is against Linux knowing when the last core is powering
> > > > > > > down when the PSCI is configured to do only Platform Cordinated.
> > > > > >
> > > > > > Nothing :D. But knowing the evolution and reasons for adding OSI in the
> > > > > > PSCI specification and having argued about benefits of OSI over PC for
> > > > > > years and finally when we have it in mainline, this argument of using
> > > > > > PC for exact reasons why OSI evolved is something I can't understand
> > > > > > and I am confused.
> > > > > >
> > > > > > > There should not be any objection to drivers knowing when all the cores
> > > > > > > are powered down, be it reference counting CPU PM notifications or using
> > > > > > > a cleaner approach like this where GendPD framwork does everything
> > > > > > > cleanly and gives a nice callback. ARM architecture allows for different
> > > > > > > aspects of CPU access be handled at different levels. I see this as an
> > > > > > > extension of that approach.
> > > > > > >
> > > > > >
> > > > > > One thing that was repeatedly pointed out during OSI patch review was no
> > > > > > extra overhead for PC mode where firmware can make decisions. So, just
> > > > > > use OSI now and let us be done with this discussion of OSI vs PC. If PC
> > > > > > is what you think you need for future, we can revert all OSI changes and
> > > > > > start discussing again :-)
> > > > >
> > > > > Just to make it clear, I fully agree with you in regards to overhead
> > > > > for PC-mode. This is especially critical for ARM SoCs with lots of
> > > > > cores, I assume.
> > > > >
> > > > > However, the overhead you refer to, is *only* going to be present in
> > > > > case when the DTS has the hierarchical CPU topology description with
> > > > > "power-domains". Because, that is *optional* to use, I am expecting
> > > > > only those SoC/platforms that needs to manage last-man activities to
> > > > > use this layout, the others will remain unaffected.
> > > >
> > > > In PC mode not only there is no need but it is wrong to manage
> > > > any last-man activity in the kernel. I wonder why we are still
> > > > talking about this to be honest.
> > >
> > > I guess the discussion is here because there is a use case to consider now.
> > >
> >
> > If this is what Bjorn presented in his email, I have responded to that.
> > If it's any different, please let us know the complete details.
> >
> > > For sure, we agree on what is the best solution. But this is rather
> > > about what can we do to improve the current situation, if we should do
> > > anything.
> > >
> >
> > Sure, and I haven't found a reason to do that in OSPM yet(as part of the
> > discussion in this thread)
> >
> > > >
> > > > Code to handle PSCI platform coordinated mode has been/is in
> > > > the kernel today and that's all is needed according to the PSCI
> > > > specifications.
> > >
> > > PSCI specifies CPU power management, not SoC power management. If
> > > these things were completely decoupled, I would agree with you, but
> > > that's not the case. Maybe SCMI, etc, helps with this in future.
> > >
> >
> > Why does that not work even if they are not decoupled. The IO/device
> > that share with CPU votes from OSPM and the CPU/Cluster from PSCI in
> > PC mode. There is no argument there, but why it needs to be done in OSPM
> > is the objection here.
>
> That implies the votes from I/O devices needs to reach the FW
> immediately when the vote is done. No caching or other optimizations
> can be done at OSPM.
>
> In principle, the FW needs to have an always up to date view of the
> votes, etc. That sounds highly inefficient, both from energy and
> latency point of view, at least in my opinion.
>

Sorry but I need to re-iterate, use OSI if you need all those fancy
caching and other optimizations.

> >
> > > Anyway, my fear is that not many ARM vendors implements OSI support,
> > > but still they have "last-man-activities" to deal with. This is not
> > > only QCOM.
> > >
> >
> > I am interested to hear from them. And the same question to same too as
> > above.
>
> I have been talking to some of them. But, yes, we need to hear more from them.
>
> >
> > > I guess an option would be to add OSI support to the public ARM
> > > Trusted Firmware, then we could more easily point to that - rather
> > > than trying to mitigate the problem on the kernel side.
> > >
> >
> > I would say go for it. But don't mix responsibility of OSPM in PC vs OSI.
> > We have discussed this for years and I hope this discussion ends ASAP.
> > I don't see any point in dragging this any further.
>
> Okay.
>

I keep saying that but still responding to the discussions. I must stop ;-)

--
Regards,
Sudeep
diff mbox series

Patch

diff --git a/drivers/cpuidle/cpuidle-psci-domain.c b/drivers/cpuidle/cpuidle-psci-domain.c
index 423f03b..3c417f7 100644
--- a/drivers/cpuidle/cpuidle-psci-domain.c
+++ b/drivers/cpuidle/cpuidle-psci-domain.c
@@ -26,13 +26,17 @@  struct psci_pd_provider {
 };
 
 static LIST_HEAD(psci_pd_providers);
-static bool osi_mode_enabled __initdata;
+static bool osi_mode_enabled;
 
 static int psci_pd_power_off(struct generic_pm_domain *pd)
 {
 	struct genpd_power_state *state = &pd->states[pd->state_idx];
 	u32 *pd_state;
 
+	/* If we have failed to enable OSI mode, then abort power off. */
+	if ((psci_has_osi_support()) && !osi_mode_enabled)
+		return -EBUSY;
+
 	if (!state->data)
 		return 0;
 
@@ -101,6 +105,105 @@  static void psci_pd_free_states(struct genpd_power_state *states,
 	kfree(states);
 }
 
+static void psci_pd_convert_states(struct cpuidle_state *idle_state,
+			u32 *psci_state, struct genpd_power_state *state)
+{
+	u32 *state_data = state->data;
+	u64 target_residency_us = state->residency_ns;
+	u64 exit_latency_us = state->power_on_latency_ns +
+			state->power_off_latency_ns;
+
+	*psci_state = *state_data;
+	do_div(target_residency_us, 1000);
+	idle_state->target_residency = target_residency_us;
+	do_div(exit_latency_us, 1000);
+	idle_state->exit_latency = exit_latency_us;
+	idle_state->enter = &psci_enter_domain_idle_state;
+	idle_state->flags |= CPUIDLE_FLAG_TIMER_STOP;
+
+	strncpy(idle_state->name, to_of_node(state->fwnode)->name,
+		CPUIDLE_NAME_LEN - 1);
+	strncpy(idle_state->desc, to_of_node(state->fwnode)->name,
+		CPUIDLE_NAME_LEN - 1);
+}
+
+static bool psci_pd_is_provider(struct device_node *np)
+{
+	struct psci_pd_provider *pd_prov, *it;
+
+	list_for_each_entry_safe(pd_prov, it, &psci_pd_providers, link) {
+		if (pd_prov->node == np)
+			return true;
+	}
+
+	return false;
+}
+
+int __init psci_dt_pm_domains_parse_states(struct cpuidle_driver *drv,
+			struct device_node *cpu_node, u32 *psci_states)
+{
+	struct genpd_power_state *pd_states;
+	struct of_phandle_args args;
+	int ret, pd_state_count, i, state_idx, psci_idx;
+	u32 cpu_psci_state = psci_states[drv->state_count - 1];
+	struct device_node *np = of_node_get(cpu_node);
+
+	/* Walk the CPU topology to find compatible domain idle states. */
+	while (np) {
+		ret = of_parse_phandle_with_args(np, "power-domains",
+					"#power-domain-cells", 0, &args);
+		of_node_put(np);
+		if (ret)
+			return 0;
+
+		np = args.np;
+
+		/* Verify that the node represents a psci pd provider. */
+		if (!psci_pd_is_provider(np)) {
+			of_node_put(np);
+			return 0;
+		}
+
+		/* Parse for compatible domain idle states. */
+		ret = psci_pd_parse_states(np, &pd_states, &pd_state_count);
+		if (ret) {
+			of_node_put(np);
+			return ret;
+		}
+
+		i = 0;
+		while (i < pd_state_count) {
+
+			state_idx = drv->state_count;
+			if (state_idx >= CPUIDLE_STATE_MAX) {
+				pr_warn("exceeding max cpuidle states\n");
+				of_node_put(np);
+				return 0;
+			}
+
+			psci_idx = state_idx + i;
+			psci_pd_convert_states(&drv->states[state_idx + i],
+					&psci_states[psci_idx], &pd_states[i]);
+
+			/*
+			 * In the hierarchical CPU topology the master PM domain
+			 * idle state's DT property, "arm,psci-suspend-param",
+			 * don't contain the bits for the idle state of the CPU,
+			 * let's add those here.
+			 */
+			psci_states[psci_idx] |= cpu_psci_state;
+			pr_debug("psci-power-state %#x index %d\n",
+				psci_states[psci_idx], psci_idx);
+
+			drv->state_count++;
+			i++;
+		}
+		psci_pd_free_states(pd_states, pd_state_count);
+	}
+
+	return 0;
+}
+
 static int __init psci_pd_init(struct device_node *np)
 {
 	struct generic_pm_domain *pd;
@@ -125,11 +228,14 @@  static int __init psci_pd_init(struct device_node *np)
 	 * Parse the domain idle states and let genpd manage the state selection
 	 * for those being compatible with "domain-idle-state".
 	 */
-	ret = psci_pd_parse_states(np, &states, &state_count);
-	if (ret)
-		goto free_name;
 
-	pd->free_states = psci_pd_free_states;
+	if (psci_has_osi_support()) {
+		ret = psci_pd_parse_states(np, &states, &state_count);
+		if (ret)
+			goto free_name;
+		pd->free_states = psci_pd_free_states;
+	}
+
 	pd->name = kbasename(pd->name);
 	pd->power_off = psci_pd_power_off;
 	pd->states = states;
@@ -236,10 +342,6 @@  static int __init psci_idle_init_domains(void)
 	if (!np)
 		return -ENODEV;
 
-	/* Currently limit the hierarchical topology to be used in OSI mode. */
-	if (!psci_has_osi_support())
-		goto out;
-
 	/*
 	 * Parse child nodes for the "#power-domain-cells" property and
 	 * initialize a genpd/genpd-of-provider pair when it's found.
@@ -265,14 +367,16 @@  static int __init psci_idle_init_domains(void)
 		goto remove_pd;
 
 	/* Try to enable OSI mode. */
-	ret = psci_set_osi_mode();
-	if (ret) {
-		pr_warn("failed to enable OSI mode: %d\n", ret);
-		psci_pd_remove_topology(np);
-		goto remove_pd;
+	if (psci_has_osi_support()) {
+		ret = psci_set_osi_mode();
+		if (ret) {
+			pr_warn("failed to enable OSI mode: %d\n", ret);
+			psci_pd_remove_topology(np);
+			goto remove_pd;
+		} else
+			osi_mode_enabled = true;
 	}
 
-	osi_mode_enabled = true;
 	of_node_put(np);
 	pr_info("Initialized CPU PM domain topology\n");
 	return pd_count;
@@ -293,9 +397,6 @@  struct device __init *psci_dt_attach_cpu(int cpu)
 {
 	struct device *dev;
 
-	if (!osi_mode_enabled)
-		return NULL;
-
 	dev = dev_pm_domain_attach_by_name(get_cpu_device(cpu), "psci");
 	if (IS_ERR_OR_NULL(dev))
 		return dev;
diff --git a/drivers/cpuidle/cpuidle-psci.c b/drivers/cpuidle/cpuidle-psci.c
index edd7a54..3fa2aee 100644
--- a/drivers/cpuidle/cpuidle-psci.c
+++ b/drivers/cpuidle/cpuidle-psci.c
@@ -49,7 +49,7 @@  static inline int psci_enter_state(int idx, u32 state)
 	return CPU_PM_CPU_IDLE_ENTER_PARAM(psci_cpu_suspend_enter, idx, state);
 }
 
-static int psci_enter_domain_idle_state(struct cpuidle_device *dev,
+int psci_enter_domain_idle_state(struct cpuidle_device *dev,
 					struct cpuidle_driver *drv, int idx)
 {
 	struct psci_cpuidle_data *data = this_cpu_ptr(&psci_cpuidle_data);
@@ -193,24 +193,29 @@  static int __init psci_dt_cpu_init_idle(struct cpuidle_driver *drv,
 		goto free_mem;
 	}
 
-	/* Currently limit the hierarchical topology to be used in OSI mode. */
-	if (psci_has_osi_support()) {
-		data->dev = psci_dt_attach_cpu(cpu);
-		if (IS_ERR(data->dev)) {
-			ret = PTR_ERR(data->dev);
+	if (!psci_has_osi_support()) {
+		ret = psci_dt_pm_domains_parse_states(drv, cpu_node,
+					      psci_states);
+		if (ret)
 			goto free_mem;
-		}
-
-		/*
-		 * Using the deepest state for the CPU to trigger a potential
-		 * selection of a shared state for the domain, assumes the
-		 * domain states are all deeper states.
-		 */
-		if (data->dev) {
-			drv->states[state_count - 1].enter =
-				psci_enter_domain_idle_state;
-			psci_cpuidle_use_cpuhp = true;
-		}
+	}
+
+	data->dev = psci_dt_attach_cpu(cpu);
+	if (IS_ERR(data->dev)) {
+		ret = PTR_ERR(data->dev);
+		goto free_mem;
+	}
+
+	/*
+	 * Using the deepest state for the CPU to trigger a potential
+	 * selection of a shared state for the domain, assumes the
+	 * domain states are all deeper states.
+	 */
+
+	if (data->dev) {
+		drv->states[state_count - 1].enter =
+			psci_enter_domain_idle_state;
+		psci_cpuidle_use_cpuhp = true;
 	}
 
 	/* Idle states parsed correctly, store them in the per-cpu struct. */
diff --git a/drivers/cpuidle/cpuidle-psci.h b/drivers/cpuidle/cpuidle-psci.h
index 7299a04..18c93d7 100644
--- a/drivers/cpuidle/cpuidle-psci.h
+++ b/drivers/cpuidle/cpuidle-psci.h
@@ -3,15 +3,26 @@ 
 #ifndef __CPUIDLE_PSCI_H
 #define __CPUIDLE_PSCI_H
 
+#include <linux/cpuidle.h>
+
 struct device_node;
 
 void psci_set_domain_state(u32 state);
 int __init psci_dt_parse_state_node(struct device_node *np, u32 *state);
+int psci_enter_domain_idle_state(struct cpuidle_device *dev,
+			  struct cpuidle_driver *drv, int idx);
 
 #ifdef CONFIG_PM_GENERIC_DOMAINS_OF
 struct device __init *psci_dt_attach_cpu(int cpu);
+int psci_dt_pm_domains_parse_states(struct cpuidle_driver *drv,
+				    struct device_node *cpu_node,
+				    u32 *psci_states);
 #else
 static inline struct device __init *psci_dt_attach_cpu(int cpu) { return NULL; }
+static inline int psci_dt_pm_domains_parse_states(
+					struct cpuidle_driver *drv,
+					struct device_node *cpu_node,
+					u32 *psci_states) { return 0; }
 #endif
 
 #endif /* __CPUIDLE_PSCI_H */