diff mbox series

[v2,2/2] cpufreq: Specify default governor on command line

Message ID 20200623142138.209513-3-qperret@google.com (mailing list archive)
State Changes Requested, archived
Headers show
Series cpufreq: Specify the default governor on command line | expand

Commit Message

Quentin Perret June 23, 2020, 2:21 p.m. UTC
Currently, the only way to specify the default CPUfreq governor is via
Kconfig options, which suits users who can build the kernel themselves
perfectly.

However, for those who use a distro-like kernel (such as Android, with
the Generic Kernel Image project), the only way to use a different
default is to boot to userspace, and to then switch using the sysfs
interface. Being able to specify the default governor on the command
line, like is the case for cpuidle, would enable those users to specify
their governor of choice earlier on, and to simplify slighlty the
userspace boot procedure.

To support this use-case, add a kernel command line parameter enabling
to specify a default governor for CPUfreq, which takes precedence over
the builtin default.

This implementation has one notable limitation: the default governor
must be registered before the driver. This is solved for builtin
governors and drivers using appropriate *_initcall() functions. And in
the modular case, this must be reflected as a constraint on the module
loading order.

Signed-off-by: Quentin Perret <qperret@google.com>
---
 .../admin-guide/kernel-parameters.txt         |  5 ++++
 Documentation/admin-guide/pm/cpufreq.rst      |  6 ++---
 drivers/cpufreq/cpufreq.c                     | 23 +++++++++++++++----
 3 files changed, 26 insertions(+), 8 deletions(-)

Comments

Viresh Kumar June 24, 2020, 5:50 a.m. UTC | #1
On 23-06-20, 15:21, Quentin Perret wrote:
> Currently, the only way to specify the default CPUfreq governor is via
> Kconfig options, which suits users who can build the kernel themselves
> perfectly.
> 
> However, for those who use a distro-like kernel (such as Android, with
> the Generic Kernel Image project), the only way to use a different
> default is to boot to userspace, and to then switch using the sysfs
> interface. Being able to specify the default governor on the command
> line, like is the case for cpuidle, would enable those users to specify
> their governor of choice earlier on, and to simplify slighlty the
> userspace boot procedure.
> 
> To support this use-case, add a kernel command line parameter enabling
> to specify a default governor for CPUfreq, which takes precedence over
> the builtin default.
> 
> This implementation has one notable limitation: the default governor
> must be registered before the driver. This is solved for builtin
> governors and drivers using appropriate *_initcall() functions. And in
> the modular case, this must be reflected as a constraint on the module
> loading order.
> 
> Signed-off-by: Quentin Perret <qperret@google.com>
> ---
>  .../admin-guide/kernel-parameters.txt         |  5 ++++
>  Documentation/admin-guide/pm/cpufreq.rst      |  6 ++---
>  drivers/cpufreq/cpufreq.c                     | 23 +++++++++++++++----
>  3 files changed, 26 insertions(+), 8 deletions(-)
> 
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index fb95fad81c79..5fd3c9f187eb 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -703,6 +703,11 @@
>  	cpufreq.off=1	[CPU_FREQ]
>  			disable the cpufreq sub-system
>  
> +	cpufreq.default_governor=
> +			[CPU_FREQ] Name of the default cpufreq governor to use.
> +			This governor must be registered in the kernel before
> +			the cpufreq driver probes.
> +
>  	cpu_init_udelay=N
>  			[X86] Delay for N microsec between assert and de-assert
>  			of APIC INIT to start processors.  This delay occurs
> diff --git a/Documentation/admin-guide/pm/cpufreq.rst b/Documentation/admin-guide/pm/cpufreq.rst
> index 0c74a7784964..368e612145d2 100644
> --- a/Documentation/admin-guide/pm/cpufreq.rst
> +++ b/Documentation/admin-guide/pm/cpufreq.rst
> @@ -147,9 +147,9 @@ CPUs in it.
>  
>  The next major initialization step for a new policy object is to attach a
>  scaling governor to it (to begin with, that is the default scaling governor
> -determined by the kernel configuration, but it may be changed later
> -via ``sysfs``).  First, a pointer to the new policy object is passed to the
> -governor's ``->init()`` callback which is expected to initialize all of the
> +determined by the kernel command line or configuration, but it may be changed
> +later via ``sysfs``).  First, a pointer to the new policy object is passed to
> +the governor's ``->init()`` callback which is expected to initialize all of the
>  data structures necessary to handle the given policy and, possibly, to add
>  a governor ``sysfs`` interface to it.  Next, the governor is started by
>  invoking its ``->start()`` callback.
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index 0128de3603df..4b1a5c0173cf 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -50,6 +50,9 @@ static LIST_HEAD(cpufreq_governor_list);
>  #define for_each_governor(__governor)				\
>  	list_for_each_entry(__governor, &cpufreq_governor_list, governor_list)
>  
> +static char cpufreq_param_governor[CPUFREQ_NAME_LEN];
> +static struct cpufreq_governor *default_governor;
> +
>  /**
>   * The "cpufreq driver" - the arch- or hardware-dependent low
>   * level driver of CPUFreq support, and its spinlock. This lock
> @@ -1055,7 +1058,6 @@ __weak struct cpufreq_governor *cpufreq_default_governor(void)
>  
>  static int cpufreq_init_policy(struct cpufreq_policy *policy)
>  {
> -	struct cpufreq_governor *def_gov = cpufreq_default_governor();
>  	struct cpufreq_governor *gov = NULL;
>  	unsigned int pol = CPUFREQ_POLICY_UNKNOWN;
>  
> @@ -1065,8 +1067,8 @@ static int cpufreq_init_policy(struct cpufreq_policy *policy)
>  		if (gov) {
>  			pr_debug("Restoring governor %s for cpu %d\n",
>  				 policy->governor->name, policy->cpu);
> -		} else if (def_gov) {
> -			gov = def_gov;
> +		} else if (default_governor) {
> +			gov = default_governor;
>  		} else {
>  			return -ENODATA;
>  		}
> @@ -1074,8 +1076,8 @@ static int cpufreq_init_policy(struct cpufreq_policy *policy)
>  		/* Use the default policy if there is no last_policy. */
>  		if (policy->last_policy) {
>  			pol = policy->last_policy;
> -		} else if (def_gov) {
> -			pol = cpufreq_parse_policy(def_gov->name);
> +		} else if (default_governor) {
> +			pol = cpufreq_parse_policy(default_governor->name);
>  			/*
>  			 * In case the default governor is neiter "performance"
>  			 * nor "powersave", fall back to the initial policy
> @@ -2320,6 +2322,9 @@ int cpufreq_register_governor(struct cpufreq_governor *governor)
>  		list_add(&governor->governor_list, &cpufreq_governor_list);
>  	}
>  
> +	if (!strncasecmp(cpufreq_param_governor, governor->name, CPUFREQ_NAME_LEN))
> +		default_governor = governor;
> +
>  	mutex_unlock(&cpufreq_governor_mutex);
>  	return err;
>  }
> @@ -2348,6 +2353,8 @@ void cpufreq_unregister_governor(struct cpufreq_governor *governor)
>  
>  	mutex_lock(&cpufreq_governor_mutex);
>  	list_del(&governor->governor_list);
> +	if (governor == default_governor)
> +		default_governor = cpufreq_default_governor();
>  	mutex_unlock(&cpufreq_governor_mutex);
>  }
>  EXPORT_SYMBOL_GPL(cpufreq_unregister_governor);
> @@ -2789,7 +2796,13 @@ static int __init cpufreq_core_init(void)
>  	cpufreq_global_kobject = kobject_create_and_add("cpufreq", &cpu_subsys.dev_root->kobj);
>  	BUG_ON(!cpufreq_global_kobject);
>  
> +	mutex_lock(&cpufreq_governor_mutex);
> +	if (!default_governor)
> +		default_governor = cpufreq_default_governor();
> +	mutex_unlock(&cpufreq_governor_mutex);

I don't think locking is required here at core-initcall level. Apart
from that:

Acked-by: Viresh Kumar <viresh.kumar@linaro.org>

> +
>  	return 0;
>  }
>  module_param(off, int, 0444);
> +module_param_string(default_governor, cpufreq_param_governor, CPUFREQ_NAME_LEN, 0444);
>  core_initcall(cpufreq_core_init);
> -- 
> 2.27.0.111.gc72c7da667-goog
Rafael J. Wysocki June 24, 2020, 12:51 p.m. UTC | #2
On Wed, Jun 24, 2020 at 7:50 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:
>
> On 23-06-20, 15:21, Quentin Perret wrote:
> > Currently, the only way to specify the default CPUfreq governor is via
> > Kconfig options, which suits users who can build the kernel themselves
> > perfectly.
> >
> > However, for those who use a distro-like kernel (such as Android, with
> > the Generic Kernel Image project), the only way to use a different
> > default is to boot to userspace, and to then switch using the sysfs
> > interface. Being able to specify the default governor on the command
> > line, like is the case for cpuidle, would enable those users to specify
> > their governor of choice earlier on, and to simplify slighlty the
> > userspace boot procedure.
> >
> > To support this use-case, add a kernel command line parameter enabling
> > to specify a default governor for CPUfreq, which takes precedence over
> > the builtin default.
> >
> > This implementation has one notable limitation: the default governor
> > must be registered before the driver. This is solved for builtin
> > governors and drivers using appropriate *_initcall() functions. And in
> > the modular case, this must be reflected as a constraint on the module
> > loading order.
> >
> > Signed-off-by: Quentin Perret <qperret@google.com>
> > ---
> >  .../admin-guide/kernel-parameters.txt         |  5 ++++
> >  Documentation/admin-guide/pm/cpufreq.rst      |  6 ++---
> >  drivers/cpufreq/cpufreq.c                     | 23 +++++++++++++++----
> >  3 files changed, 26 insertions(+), 8 deletions(-)
> >
> > diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> > index fb95fad81c79..5fd3c9f187eb 100644
> > --- a/Documentation/admin-guide/kernel-parameters.txt
> > +++ b/Documentation/admin-guide/kernel-parameters.txt
> > @@ -703,6 +703,11 @@
> >       cpufreq.off=1   [CPU_FREQ]
> >                       disable the cpufreq sub-system
> >
> > +     cpufreq.default_governor=
> > +                     [CPU_FREQ] Name of the default cpufreq governor to use.
> > +                     This governor must be registered in the kernel before
> > +                     the cpufreq driver probes.
> > +
> >       cpu_init_udelay=N
> >                       [X86] Delay for N microsec between assert and de-assert
> >                       of APIC INIT to start processors.  This delay occurs
> > diff --git a/Documentation/admin-guide/pm/cpufreq.rst b/Documentation/admin-guide/pm/cpufreq.rst
> > index 0c74a7784964..368e612145d2 100644
> > --- a/Documentation/admin-guide/pm/cpufreq.rst
> > +++ b/Documentation/admin-guide/pm/cpufreq.rst
> > @@ -147,9 +147,9 @@ CPUs in it.
> >
> >  The next major initialization step for a new policy object is to attach a
> >  scaling governor to it (to begin with, that is the default scaling governor
> > -determined by the kernel configuration, but it may be changed later
> > -via ``sysfs``).  First, a pointer to the new policy object is passed to the
> > -governor's ``->init()`` callback which is expected to initialize all of the
> > +determined by the kernel command line or configuration, but it may be changed
> > +later via ``sysfs``).  First, a pointer to the new policy object is passed to
> > +the governor's ``->init()`` callback which is expected to initialize all of the
> >  data structures necessary to handle the given policy and, possibly, to add
> >  a governor ``sysfs`` interface to it.  Next, the governor is started by
> >  invoking its ``->start()`` callback.
> > diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> > index 0128de3603df..4b1a5c0173cf 100644
> > --- a/drivers/cpufreq/cpufreq.c
> > +++ b/drivers/cpufreq/cpufreq.c
> > @@ -50,6 +50,9 @@ static LIST_HEAD(cpufreq_governor_list);
> >  #define for_each_governor(__governor)                                \
> >       list_for_each_entry(__governor, &cpufreq_governor_list, governor_list)
> >
> > +static char cpufreq_param_governor[CPUFREQ_NAME_LEN];
> > +static struct cpufreq_governor *default_governor;
> > +
> >  /**
> >   * The "cpufreq driver" - the arch- or hardware-dependent low
> >   * level driver of CPUFreq support, and its spinlock. This lock
> > @@ -1055,7 +1058,6 @@ __weak struct cpufreq_governor *cpufreq_default_governor(void)
> >
> >  static int cpufreq_init_policy(struct cpufreq_policy *policy)
> >  {
> > -     struct cpufreq_governor *def_gov = cpufreq_default_governor();
> >       struct cpufreq_governor *gov = NULL;
> >       unsigned int pol = CPUFREQ_POLICY_UNKNOWN;
> >
> > @@ -1065,8 +1067,8 @@ static int cpufreq_init_policy(struct cpufreq_policy *policy)
> >               if (gov) {
> >                       pr_debug("Restoring governor %s for cpu %d\n",
> >                                policy->governor->name, policy->cpu);
> > -             } else if (def_gov) {
> > -                     gov = def_gov;
> > +             } else if (default_governor) {
> > +                     gov = default_governor;
> >               } else {
> >                       return -ENODATA;
> >               }
> > @@ -1074,8 +1076,8 @@ static int cpufreq_init_policy(struct cpufreq_policy *policy)
> >               /* Use the default policy if there is no last_policy. */
> >               if (policy->last_policy) {
> >                       pol = policy->last_policy;
> > -             } else if (def_gov) {
> > -                     pol = cpufreq_parse_policy(def_gov->name);
> > +             } else if (default_governor) {
> > +                     pol = cpufreq_parse_policy(default_governor->name);
> >                       /*
> >                        * In case the default governor is neiter "performance"
> >                        * nor "powersave", fall back to the initial policy
> > @@ -2320,6 +2322,9 @@ int cpufreq_register_governor(struct cpufreq_governor *governor)
> >               list_add(&governor->governor_list, &cpufreq_governor_list);
> >       }
> >
> > +     if (!strncasecmp(cpufreq_param_governor, governor->name, CPUFREQ_NAME_LEN))
> > +             default_governor = governor;
> > +
> >       mutex_unlock(&cpufreq_governor_mutex);
> >       return err;
> >  }
> > @@ -2348,6 +2353,8 @@ void cpufreq_unregister_governor(struct cpufreq_governor *governor)
> >
> >       mutex_lock(&cpufreq_governor_mutex);
> >       list_del(&governor->governor_list);
> > +     if (governor == default_governor)
> > +             default_governor = cpufreq_default_governor();
> >       mutex_unlock(&cpufreq_governor_mutex);
> >  }
> >  EXPORT_SYMBOL_GPL(cpufreq_unregister_governor);
> > @@ -2789,7 +2796,13 @@ static int __init cpufreq_core_init(void)
> >       cpufreq_global_kobject = kobject_create_and_add("cpufreq", &cpu_subsys.dev_root->kobj);
> >       BUG_ON(!cpufreq_global_kobject);
> >
> > +     mutex_lock(&cpufreq_governor_mutex);
> > +     if (!default_governor)
> > +             default_governor = cpufreq_default_governor();
> > +     mutex_unlock(&cpufreq_governor_mutex);
>
> I don't think locking is required here at core-initcall level.

It isn't necessary AFAICS, but it may as well be regarded as
annotation (kind of instead of having a comment explaining why it need
not be used).
Quentin Perret June 24, 2020, 3:32 p.m. UTC | #3
On Wednesday 24 Jun 2020 at 14:51:04 (+0200), Rafael J. Wysocki wrote:
> On Wed, Jun 24, 2020 at 7:50 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:
> > > @@ -2789,7 +2796,13 @@ static int __init cpufreq_core_init(void)
> > >       cpufreq_global_kobject = kobject_create_and_add("cpufreq", &cpu_subsys.dev_root->kobj);
> > >       BUG_ON(!cpufreq_global_kobject);
> > >
> > > +     mutex_lock(&cpufreq_governor_mutex);
> > > +     if (!default_governor)
> > > +             default_governor = cpufreq_default_governor();
> > > +     mutex_unlock(&cpufreq_governor_mutex);
> >
> > I don't think locking is required here at core-initcall level.
> 
> It isn't necessary AFAICS, but it may as well be regarded as
> annotation (kind of instead of having a comment explaining why it need
> not be used).

Right, but I must admit that, looking at this more, I'm getting a bit
confused with the overall locking for governors :/

When in cpufreq_init_policy() we find a governor using
find_governor(policy->last_governor), what guarantees this governor is
not concurrently unregistered? That is, what guarantees this governor
doesn't go away between that find_governor() call, and the subsequent
call to try_module_get() in cpufreq_set_policy() down the line?

Can we somewhat assume that whatever governor is referred to by
policy->last_governor will have a non-null refcount? Or are the
cpufreq_online() and cpufreq_unregister_governor() path mutually
exclusive? Or is there something else?

Thanks,
Quentin
Viresh Kumar June 25, 2020, 8:50 a.m. UTC | #4
On 24-06-20, 16:32, Quentin Perret wrote:
> Right, but I must admit that, looking at this more, I'm getting a bit
> confused with the overall locking for governors :/
> 
> When in cpufreq_init_policy() we find a governor using
> find_governor(policy->last_governor), what guarantees this governor is
> not concurrently unregistered? That is, what guarantees this governor
> doesn't go away between that find_governor() call, and the subsequent
> call to try_module_get() in cpufreq_set_policy() down the line?
> 
> Can we somewhat assume that whatever governor is referred to by
> policy->last_governor will have a non-null refcount? Or are the
> cpufreq_online() and cpufreq_unregister_governor() path mutually
> exclusive? Or is there something else?

This should be sufficient to fix pending issues I believe. Based over your
patches.
Rafael J. Wysocki June 25, 2020, 10:52 a.m. UTC | #5
On Thu, Jun 25, 2020 at 10:50 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:
>
> On 24-06-20, 16:32, Quentin Perret wrote:
> > Right, but I must admit that, looking at this more, I'm getting a bit
> > confused with the overall locking for governors :/
> >
> > When in cpufreq_init_policy() we find a governor using
> > find_governor(policy->last_governor), what guarantees this governor is
> > not concurrently unregistered? That is, what guarantees this governor
> > doesn't go away between that find_governor() call, and the subsequent
> > call to try_module_get() in cpufreq_set_policy() down the line?
> >
> > Can we somewhat assume that whatever governor is referred to by
> > policy->last_governor will have a non-null refcount? Or are the
> > cpufreq_online() and cpufreq_unregister_governor() path mutually
> > exclusive? Or is there something else?
>
> This should be sufficient to fix pending issues I believe. Based over your
> patches.

LGTM, but can you post it in a new thread to let Patchwork pick it up?

> -------------------------8<-------------------------
> From: Viresh Kumar <viresh.kumar@linaro.org>
> Date: Thu, 25 Jun 2020 13:15:23 +0530
> Subject: [PATCH] cpufreq: Fix locking issues with governors
>
> The locking around governors handling isn't adequate currently. The list
> of governors should never be traversed without locking in place. Also we
> must make sure the governor isn't removed while it is still referenced
> by code.
>
> Reported-by: Quentin Perret <qperret@google.com>
> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
> ---
>  drivers/cpufreq/cpufreq.c | 59 ++++++++++++++++++++++++---------------
>  1 file changed, 36 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index 4b1a5c0173cf..dad6b85f4c89 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -624,6 +624,24 @@ static struct cpufreq_governor *find_governor(const char *str_governor)
>         return NULL;
>  }
>
> +static struct cpufreq_governor *get_governor(const char *str_governor)
> +{
> +       struct cpufreq_governor *t;
> +
> +       mutex_lock(&cpufreq_governor_mutex);
> +       t = find_governor(str_governor);
> +       if (!t)
> +               goto unlock;
> +
> +       if (!try_module_get(t->owner))
> +               t = NULL;
> +
> +unlock:
> +       mutex_unlock(&cpufreq_governor_mutex);
> +
> +       return t;
> +}
> +
>  static unsigned int cpufreq_parse_policy(char *str_governor)
>  {
>         if (!strncasecmp(str_governor, "performance", CPUFREQ_NAME_LEN))
> @@ -643,28 +661,14 @@ static struct cpufreq_governor *cpufreq_parse_governor(char *str_governor)
>  {
>         struct cpufreq_governor *t;
>
> -       mutex_lock(&cpufreq_governor_mutex);
> -
> -       t = find_governor(str_governor);
> -       if (!t) {
> -               int ret;
> -
> -               mutex_unlock(&cpufreq_governor_mutex);
> -
> -               ret = request_module("cpufreq_%s", str_governor);
> -               if (ret)
> -                       return NULL;
> -
> -               mutex_lock(&cpufreq_governor_mutex);
> +       t = get_governor(str_governor);
> +       if (t)
> +               return t;
>
> -               t = find_governor(str_governor);
> -       }
> -       if (t && !try_module_get(t->owner))
> -               t = NULL;
> -
> -       mutex_unlock(&cpufreq_governor_mutex);
> +       if (request_module("cpufreq_%s", str_governor))
> +               return NULL;
>
> -       return t;
> +       return get_governor(str_governor);
>  }
>
>  /**
> @@ -818,12 +822,14 @@ static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy,
>                 goto out;
>         }
>
> +       mutex_lock(&cpufreq_governor_mutex);
>         for_each_governor(t) {
>                 if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char))
>                     - (CPUFREQ_NAME_LEN + 2)))
> -                       goto out;
> +                       break;
>                 i += scnprintf(&buf[i], CPUFREQ_NAME_PLEN, "%s ", t->name);
>         }
> +       mutex_unlock(&cpufreq_governor_mutex);
>  out:
>         i += sprintf(&buf[i], "\n");
>         return i;
> @@ -1060,11 +1066,14 @@ static int cpufreq_init_policy(struct cpufreq_policy *policy)
>  {
>         struct cpufreq_governor *gov = NULL;
>         unsigned int pol = CPUFREQ_POLICY_UNKNOWN;
> +       bool put_governor = false;
> +       int ret;
>
>         if (has_target()) {
>                 /* Update policy governor to the one used before hotplug. */
> -               gov = find_governor(policy->last_governor);
> +               gov = get_governor(policy->last_governor);
>                 if (gov) {
> +                       put_governor = true;
>                         pr_debug("Restoring governor %s for cpu %d\n",
>                                  policy->governor->name, policy->cpu);
>                 } else if (default_governor) {
> @@ -1091,7 +1100,11 @@ static int cpufreq_init_policy(struct cpufreq_policy *policy)
>                         return -ENODATA;
>         }
>
> -       return cpufreq_set_policy(policy, gov, pol);
> +       ret = cpufreq_set_policy(policy, gov, pol);
> +       if (put_governor)
> +               module_put(gov->owner);
> +
> +       return ret;
>  }
>
>  static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu)
Viresh Kumar June 25, 2020, 11:36 a.m. UTC | #6
After your last email (reply to my patch), I noticed a change which
isn't required. :)

On 23-06-20, 15:21, Quentin Perret wrote:
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index 0128de3603df..4b1a5c0173cf 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -50,6 +50,9 @@ static LIST_HEAD(cpufreq_governor_list);
>  #define for_each_governor(__governor)				\
>  	list_for_each_entry(__governor, &cpufreq_governor_list, governor_list)
>  
> +static char cpufreq_param_governor[CPUFREQ_NAME_LEN];
> +static struct cpufreq_governor *default_governor;
> +
>  /**
>   * The "cpufreq driver" - the arch- or hardware-dependent low
>   * level driver of CPUFreq support, and its spinlock. This lock
> @@ -1055,7 +1058,6 @@ __weak struct cpufreq_governor *cpufreq_default_governor(void)
>  
>  static int cpufreq_init_policy(struct cpufreq_policy *policy)
>  {
> -	struct cpufreq_governor *def_gov = cpufreq_default_governor();
>  	struct cpufreq_governor *gov = NULL;
>  	unsigned int pol = CPUFREQ_POLICY_UNKNOWN;
>  
> @@ -1065,8 +1067,8 @@ static int cpufreq_init_policy(struct cpufreq_policy *policy)
>  		if (gov) {
>  			pr_debug("Restoring governor %s for cpu %d\n",
>  				 policy->governor->name, policy->cpu);
> -		} else if (def_gov) {
> -			gov = def_gov;
> +		} else if (default_governor) {
> +			gov = default_governor;
>  		} else {
>  			return -ENODATA;
>  		}


> @@ -1074,8 +1076,8 @@ static int cpufreq_init_policy(struct cpufreq_policy *policy)
>  		/* Use the default policy if there is no last_policy. */
>  		if (policy->last_policy) {
>  			pol = policy->last_policy;
> -		} else if (def_gov) {
> -			pol = cpufreq_parse_policy(def_gov->name);
> +		} else if (default_governor) {
> +			pol = cpufreq_parse_policy(default_governor->name);

This change is not right IMO. This part handles the set-policy case,
where there are no governors. Right now this code, for some reasons
unknown to me, forcefully uses the default governor set to indicate
the policy, which is not a great idea in my opinion TBH. This doesn't
and shouldn't care about governor modules and should only be looking
at strings instead of governor pointer.

Rafael, I even think we should remove this code completely and just
rely on what the driver has sent to us. Using the selected governor
for set policy drivers is very confusing and also we shouldn't be
forced to compiling any governor for the set-policy case.
Rafael J. Wysocki June 25, 2020, 11:44 a.m. UTC | #7
On Thu, Jun 25, 2020 at 1:36 PM Viresh Kumar <viresh.kumar@linaro.org> wrote:
>
> After your last email (reply to my patch), I noticed a change which
> isn't required. :)
>
> On 23-06-20, 15:21, Quentin Perret wrote:
> > diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> > index 0128de3603df..4b1a5c0173cf 100644
> > --- a/drivers/cpufreq/cpufreq.c
> > +++ b/drivers/cpufreq/cpufreq.c
> > @@ -50,6 +50,9 @@ static LIST_HEAD(cpufreq_governor_list);
> >  #define for_each_governor(__governor)                                \
> >       list_for_each_entry(__governor, &cpufreq_governor_list, governor_list)
> >
> > +static char cpufreq_param_governor[CPUFREQ_NAME_LEN];
> > +static struct cpufreq_governor *default_governor;
> > +
> >  /**
> >   * The "cpufreq driver" - the arch- or hardware-dependent low
> >   * level driver of CPUFreq support, and its spinlock. This lock
> > @@ -1055,7 +1058,6 @@ __weak struct cpufreq_governor *cpufreq_default_governor(void)
> >
> >  static int cpufreq_init_policy(struct cpufreq_policy *policy)
> >  {
> > -     struct cpufreq_governor *def_gov = cpufreq_default_governor();
> >       struct cpufreq_governor *gov = NULL;
> >       unsigned int pol = CPUFREQ_POLICY_UNKNOWN;
> >
> > @@ -1065,8 +1067,8 @@ static int cpufreq_init_policy(struct cpufreq_policy *policy)
> >               if (gov) {
> >                       pr_debug("Restoring governor %s for cpu %d\n",
> >                                policy->governor->name, policy->cpu);
> > -             } else if (def_gov) {
> > -                     gov = def_gov;
> > +             } else if (default_governor) {
> > +                     gov = default_governor;
> >               } else {
> >                       return -ENODATA;
> >               }
>
>
> > @@ -1074,8 +1076,8 @@ static int cpufreq_init_policy(struct cpufreq_policy *policy)
> >               /* Use the default policy if there is no last_policy. */
> >               if (policy->last_policy) {
> >                       pol = policy->last_policy;
> > -             } else if (def_gov) {
> > -                     pol = cpufreq_parse_policy(def_gov->name);
> > +             } else if (default_governor) {
> > +                     pol = cpufreq_parse_policy(default_governor->name);
>
> This change is not right IMO. This part handles the set-policy case,
> where there are no governors. Right now this code, for some reasons
> unknown to me, forcefully uses the default governor set to indicate
> the policy, which is not a great idea in my opinion TBH. This doesn't
> and shouldn't care about governor modules and should only be looking
> at strings instead of governor pointer.

Sounds right.

> Rafael, I even think we should remove this code completely and just
> rely on what the driver has sent to us. Using the selected governor
> for set policy drivers is very confusing and also we shouldn't be
> forced to compiling any governor for the set-policy case.

Well, AFAICS the idea was to use the default governor as a kind of
default policy proxy, but I agree that strings should be sufficient
for that.

I'll have a look at what to do with that code.
Quentin Perret June 25, 2020, 11:53 a.m. UTC | #8
On Thursday 25 Jun 2020 at 13:44:34 (+0200), Rafael J. Wysocki wrote:
> On Thu, Jun 25, 2020 at 1:36 PM Viresh Kumar <viresh.kumar@linaro.org> wrote:
> > This change is not right IMO. This part handles the set-policy case,
> > where there are no governors. Right now this code, for some reasons
> > unknown to me, forcefully uses the default governor set to indicate
> > the policy, which is not a great idea in my opinion TBH. This doesn't
> > and shouldn't care about governor modules and should only be looking
> > at strings instead of governor pointer.
> 
> Sounds right.
> 
> > Rafael, I even think we should remove this code completely and just
> > rely on what the driver has sent to us. Using the selected governor
> > for set policy drivers is very confusing and also we shouldn't be
> > forced to compiling any governor for the set-policy case.
> 
> Well, AFAICS the idea was to use the default governor as a kind of
> default policy proxy, but I agree that strings should be sufficient
> for that.

I agree with all the above. I'd much rather not rely on the default
governor name to populate the default policy, too, so +1 from me.

Thanks,
Quentin
Rafael J. Wysocki June 25, 2020, 1:28 p.m. UTC | #9
On Thu, Jun 25, 2020 at 1:53 PM Quentin Perret <qperret@google.com> wrote:
>
> On Thursday 25 Jun 2020 at 13:44:34 (+0200), Rafael J. Wysocki wrote:
> > On Thu, Jun 25, 2020 at 1:36 PM Viresh Kumar <viresh.kumar@linaro.org> wrote:
> > > This change is not right IMO. This part handles the set-policy case,
> > > where there are no governors. Right now this code, for some reasons
> > > unknown to me, forcefully uses the default governor set to indicate
> > > the policy, which is not a great idea in my opinion TBH. This doesn't
> > > and shouldn't care about governor modules and should only be looking
> > > at strings instead of governor pointer.
> >
> > Sounds right.
> >
> > > Rafael, I even think we should remove this code completely and just
> > > rely on what the driver has sent to us. Using the selected governor
> > > for set policy drivers is very confusing and also we shouldn't be
> > > forced to compiling any governor for the set-policy case.
> >
> > Well, AFAICS the idea was to use the default governor as a kind of
> > default policy proxy, but I agree that strings should be sufficient
> > for that.
>
> I agree with all the above. I'd much rather not rely on the default
> governor name to populate the default policy, too, so +1 from me.

So before this series the default governor was selected at the kernel
configuration time (pre-build) and was always built-in.  Because it
could not go away, its name could be used to indicate the default
policy for the "setpolicy" drivers.

After this series, however, it cannot be used this way reliably, but
you can still pass cpufreq_param_governor to cpufreq_parse_policy()
instead of def_gov->name in cpufreq_init_policy(), can't you?
Quentin Perret June 25, 2020, 1:49 p.m. UTC | #10
On Thursday 25 Jun 2020 at 15:28:43 (+0200), Rafael J. Wysocki wrote:
> On Thu, Jun 25, 2020 at 1:53 PM Quentin Perret <qperret@google.com> wrote:
> >
> > On Thursday 25 Jun 2020 at 13:44:34 (+0200), Rafael J. Wysocki wrote:
> > > On Thu, Jun 25, 2020 at 1:36 PM Viresh Kumar <viresh.kumar@linaro.org> wrote:
> > > > This change is not right IMO. This part handles the set-policy case,
> > > > where there are no governors. Right now this code, for some reasons
> > > > unknown to me, forcefully uses the default governor set to indicate
> > > > the policy, which is not a great idea in my opinion TBH. This doesn't
> > > > and shouldn't care about governor modules and should only be looking
> > > > at strings instead of governor pointer.
> > >
> > > Sounds right.
> > >
> > > > Rafael, I even think we should remove this code completely and just
> > > > rely on what the driver has sent to us. Using the selected governor
> > > > for set policy drivers is very confusing and also we shouldn't be
> > > > forced to compiling any governor for the set-policy case.
> > >
> > > Well, AFAICS the idea was to use the default governor as a kind of
> > > default policy proxy, but I agree that strings should be sufficient
> > > for that.
> >
> > I agree with all the above. I'd much rather not rely on the default
> > governor name to populate the default policy, too, so +1 from me.
> 
> So before this series the default governor was selected at the kernel
> configuration time (pre-build) and was always built-in.  Because it
> could not go away, its name could be used to indicate the default
> policy for the "setpolicy" drivers.
> 
> After this series, however, it cannot be used this way reliably, but
> you can still pass cpufreq_param_governor to cpufreq_parse_policy()
> instead of def_gov->name in cpufreq_init_policy(), can't you?

Good point. I also need to fallback to the default builtin governor if
the command line parameter isn't valid (or non-existent), so perhaps
something like so?

iff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index dad6b85f4c89..20a2020abf88 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -653,6 +653,23 @@ static unsigned int cpufreq_parse_policy(char *str_governor)
        return CPUFREQ_POLICY_UNKNOWN;
 }
 
+static unsigned int cpufreq_default_policy(void)
+{
+       unsigned int pol;
+
+       pol = cpufreq_parse_policy(cpufreq_param_governor);
+       if (pol != CPUFREQ_POLICY_UNKNOWN)
+               return pol;
+
+       if (IS_BUILTIN(CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE))
+               return CPUFREQ_POLICY_PERFORMANCE;
+
+       if (IS_BUILTIN(CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE))
+               return CPUFREQ_POLICY_POWERSAVE;
+
+       return CPUFREQ_POLICY_UNKNOWN;
+}
+
 /**
  * cpufreq_parse_governor - parse a governor string only for has_target()
  * @str_governor: Governor name.
@@ -1085,8 +1102,8 @@ static int cpufreq_init_policy(struct cpufreq_policy *policy)
                /* Use the default policy if there is no last_policy. */
                if (policy->last_policy) {
                        pol = policy->last_policy;
-               } else if (default_governor) {
-                       pol = cpufreq_parse_policy(default_governor->name);
+               } else {
+                       pol = cpufreq_default_policy();
                        /*
                         * In case the default governor is neiter "performance"
                         * nor "powersave", fall back to the initial policy
Rafael J. Wysocki June 25, 2020, 2:08 p.m. UTC | #11
On Thu, Jun 25, 2020 at 3:50 PM Quentin Perret <qperret@google.com> wrote:
>
> On Thursday 25 Jun 2020 at 15:28:43 (+0200), Rafael J. Wysocki wrote:
> > On Thu, Jun 25, 2020 at 1:53 PM Quentin Perret <qperret@google.com> wrote:
> > >
> > > On Thursday 25 Jun 2020 at 13:44:34 (+0200), Rafael J. Wysocki wrote:
> > > > On Thu, Jun 25, 2020 at 1:36 PM Viresh Kumar <viresh.kumar@linaro.org> wrote:
> > > > > This change is not right IMO. This part handles the set-policy case,
> > > > > where there are no governors. Right now this code, for some reasons
> > > > > unknown to me, forcefully uses the default governor set to indicate
> > > > > the policy, which is not a great idea in my opinion TBH. This doesn't
> > > > > and shouldn't care about governor modules and should only be looking
> > > > > at strings instead of governor pointer.
> > > >
> > > > Sounds right.
> > > >
> > > > > Rafael, I even think we should remove this code completely and just
> > > > > rely on what the driver has sent to us. Using the selected governor
> > > > > for set policy drivers is very confusing and also we shouldn't be
> > > > > forced to compiling any governor for the set-policy case.
> > > >
> > > > Well, AFAICS the idea was to use the default governor as a kind of
> > > > default policy proxy, but I agree that strings should be sufficient
> > > > for that.
> > >
> > > I agree with all the above. I'd much rather not rely on the default
> > > governor name to populate the default policy, too, so +1 from me.
> >
> > So before this series the default governor was selected at the kernel
> > configuration time (pre-build) and was always built-in.  Because it
> > could not go away, its name could be used to indicate the default
> > policy for the "setpolicy" drivers.
> >
> > After this series, however, it cannot be used this way reliably, but
> > you can still pass cpufreq_param_governor to cpufreq_parse_policy()
> > instead of def_gov->name in cpufreq_init_policy(), can't you?
>
> Good point. I also need to fallback to the default builtin governor if
> the command line parameter isn't valid (or non-existent), so perhaps
> something like so?

Yes, that should work if I haven't missed anything.

> iff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index dad6b85f4c89..20a2020abf88 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -653,6 +653,23 @@ static unsigned int cpufreq_parse_policy(char *str_governor)
>         return CPUFREQ_POLICY_UNKNOWN;
>  }
>
> +static unsigned int cpufreq_default_policy(void)
> +{
> +       unsigned int pol;
> +
> +       pol = cpufreq_parse_policy(cpufreq_param_governor);
> +       if (pol != CPUFREQ_POLICY_UNKNOWN)
> +               return pol;
> +
> +       if (IS_BUILTIN(CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE))
> +               return CPUFREQ_POLICY_PERFORMANCE;
> +
> +       if (IS_BUILTIN(CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE))
> +               return CPUFREQ_POLICY_POWERSAVE;
> +
> +       return CPUFREQ_POLICY_UNKNOWN;
> +}
> +
>  /**
>   * cpufreq_parse_governor - parse a governor string only for has_target()
>   * @str_governor: Governor name.
> @@ -1085,8 +1102,8 @@ static int cpufreq_init_policy(struct cpufreq_policy *policy)
>                 /* Use the default policy if there is no last_policy. */
>                 if (policy->last_policy) {
>                         pol = policy->last_policy;
> -               } else if (default_governor) {
> -                       pol = cpufreq_parse_policy(default_governor->name);
> +               } else {
> +                       pol = cpufreq_default_policy();
>                         /*
>                          * In case the default governor is neiter "performance"
>                          * nor "powersave", fall back to the initial policy
Viresh Kumar June 26, 2020, 2:53 a.m. UTC | #12
On 23-06-20, 15:21, Quentin Perret wrote:
> @@ -2789,7 +2796,13 @@ static int __init cpufreq_core_init(void)
>  	cpufreq_global_kobject = kobject_create_and_add("cpufreq", &cpu_subsys.dev_root->kobj);
>  	BUG_ON(!cpufreq_global_kobject);
>  
> +	mutex_lock(&cpufreq_governor_mutex);
> +	if (!default_governor)

Also is this check really required ? The pointer will always be NULL
at this point, isn't it ?

> +		default_governor = cpufreq_default_governor();
> +	mutex_unlock(&cpufreq_governor_mutex);
> +
>  	return 0;
>  }
>  module_param(off, int, 0444);
> +module_param_string(default_governor, cpufreq_param_governor, CPUFREQ_NAME_LEN, 0444);
>  core_initcall(cpufreq_core_init);
> -- 
> 2.27.0.111.gc72c7da667-goog
Quentin Perret June 26, 2020, 8:09 a.m. UTC | #13
On Friday 26 Jun 2020 at 08:23:46 (+0530), Viresh Kumar wrote:
> On 23-06-20, 15:21, Quentin Perret wrote:
> > @@ -2789,7 +2796,13 @@ static int __init cpufreq_core_init(void)
> >  	cpufreq_global_kobject = kobject_create_and_add("cpufreq", &cpu_subsys.dev_root->kobj);
> >  	BUG_ON(!cpufreq_global_kobject);
> >  
> > +	mutex_lock(&cpufreq_governor_mutex);
> > +	if (!default_governor)
> 
> Also is this check really required ? The pointer will always be NULL
> at this point, isn't it ?

Not necessarily in this implementation -- the governors are registered
at core_initcall time too, so I don't think we can assume any ordering
there.

But it looks like your new version has fixed that by design, so I'll go
look at it some more, and try it out.

Thanks for the help!
Quentin
> 
> > +		default_governor = cpufreq_default_governor();
> > +	mutex_unlock(&cpufreq_governor_mutex);
> > +
> >  	return 0;
> >  }
> >  module_param(off, int, 0444);
> > +module_param_string(default_governor, cpufreq_param_governor, CPUFREQ_NAME_LEN, 0444);
> >  core_initcall(cpufreq_core_init);
> > -- 
> > 2.27.0.111.gc72c7da667-goog
> 
> -- 
> viresh
diff mbox series

Patch

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index fb95fad81c79..5fd3c9f187eb 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -703,6 +703,11 @@ 
 	cpufreq.off=1	[CPU_FREQ]
 			disable the cpufreq sub-system
 
+	cpufreq.default_governor=
+			[CPU_FREQ] Name of the default cpufreq governor to use.
+			This governor must be registered in the kernel before
+			the cpufreq driver probes.
+
 	cpu_init_udelay=N
 			[X86] Delay for N microsec between assert and de-assert
 			of APIC INIT to start processors.  This delay occurs
diff --git a/Documentation/admin-guide/pm/cpufreq.rst b/Documentation/admin-guide/pm/cpufreq.rst
index 0c74a7784964..368e612145d2 100644
--- a/Documentation/admin-guide/pm/cpufreq.rst
+++ b/Documentation/admin-guide/pm/cpufreq.rst
@@ -147,9 +147,9 @@  CPUs in it.
 
 The next major initialization step for a new policy object is to attach a
 scaling governor to it (to begin with, that is the default scaling governor
-determined by the kernel configuration, but it may be changed later
-via ``sysfs``).  First, a pointer to the new policy object is passed to the
-governor's ``->init()`` callback which is expected to initialize all of the
+determined by the kernel command line or configuration, but it may be changed
+later via ``sysfs``).  First, a pointer to the new policy object is passed to
+the governor's ``->init()`` callback which is expected to initialize all of the
 data structures necessary to handle the given policy and, possibly, to add
 a governor ``sysfs`` interface to it.  Next, the governor is started by
 invoking its ``->start()`` callback.
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 0128de3603df..4b1a5c0173cf 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -50,6 +50,9 @@  static LIST_HEAD(cpufreq_governor_list);
 #define for_each_governor(__governor)				\
 	list_for_each_entry(__governor, &cpufreq_governor_list, governor_list)
 
+static char cpufreq_param_governor[CPUFREQ_NAME_LEN];
+static struct cpufreq_governor *default_governor;
+
 /**
  * The "cpufreq driver" - the arch- or hardware-dependent low
  * level driver of CPUFreq support, and its spinlock. This lock
@@ -1055,7 +1058,6 @@  __weak struct cpufreq_governor *cpufreq_default_governor(void)
 
 static int cpufreq_init_policy(struct cpufreq_policy *policy)
 {
-	struct cpufreq_governor *def_gov = cpufreq_default_governor();
 	struct cpufreq_governor *gov = NULL;
 	unsigned int pol = CPUFREQ_POLICY_UNKNOWN;
 
@@ -1065,8 +1067,8 @@  static int cpufreq_init_policy(struct cpufreq_policy *policy)
 		if (gov) {
 			pr_debug("Restoring governor %s for cpu %d\n",
 				 policy->governor->name, policy->cpu);
-		} else if (def_gov) {
-			gov = def_gov;
+		} else if (default_governor) {
+			gov = default_governor;
 		} else {
 			return -ENODATA;
 		}
@@ -1074,8 +1076,8 @@  static int cpufreq_init_policy(struct cpufreq_policy *policy)
 		/* Use the default policy if there is no last_policy. */
 		if (policy->last_policy) {
 			pol = policy->last_policy;
-		} else if (def_gov) {
-			pol = cpufreq_parse_policy(def_gov->name);
+		} else if (default_governor) {
+			pol = cpufreq_parse_policy(default_governor->name);
 			/*
 			 * In case the default governor is neiter "performance"
 			 * nor "powersave", fall back to the initial policy
@@ -2320,6 +2322,9 @@  int cpufreq_register_governor(struct cpufreq_governor *governor)
 		list_add(&governor->governor_list, &cpufreq_governor_list);
 	}
 
+	if (!strncasecmp(cpufreq_param_governor, governor->name, CPUFREQ_NAME_LEN))
+		default_governor = governor;
+
 	mutex_unlock(&cpufreq_governor_mutex);
 	return err;
 }
@@ -2348,6 +2353,8 @@  void cpufreq_unregister_governor(struct cpufreq_governor *governor)
 
 	mutex_lock(&cpufreq_governor_mutex);
 	list_del(&governor->governor_list);
+	if (governor == default_governor)
+		default_governor = cpufreq_default_governor();
 	mutex_unlock(&cpufreq_governor_mutex);
 }
 EXPORT_SYMBOL_GPL(cpufreq_unregister_governor);
@@ -2789,7 +2796,13 @@  static int __init cpufreq_core_init(void)
 	cpufreq_global_kobject = kobject_create_and_add("cpufreq", &cpu_subsys.dev_root->kobj);
 	BUG_ON(!cpufreq_global_kobject);
 
+	mutex_lock(&cpufreq_governor_mutex);
+	if (!default_governor)
+		default_governor = cpufreq_default_governor();
+	mutex_unlock(&cpufreq_governor_mutex);
+
 	return 0;
 }
 module_param(off, int, 0444);
+module_param_string(default_governor, cpufreq_param_governor, CPUFREQ_NAME_LEN, 0444);
 core_initcall(cpufreq_core_init);