diff mbox series

[v3] cpufreq: fix race on cpufreq online

Message ID 20220510154236.88753-1-schspa@gmail.com (mailing list archive)
State Changes Requested, archived
Headers show
Series [v3] cpufreq: fix race on cpufreq online | expand

Commit Message

Schspa Shi May 10, 2022, 3:42 p.m. UTC
When cpufreq online failed, policy->cpus are not empty while
cpufreq sysfs file available, we may access some data freed.

Take policy->clk as an example:

static int cpufreq_online(unsigned int cpu)
{
  ...
  // policy->cpus != 0 at this time
  down_write(&policy->rwsem);
  ret = cpufreq_add_dev_interface(policy);
  up_write(&policy->rwsem);

  down_write(&policy->rwsem);
  ...
  /* cpufreq nitialization fails in some cases */
  if (cpufreq_driver->get && has_target()) {
    policy->cur = cpufreq_driver->get(policy->cpu);
    if (!policy->cur) {
      ret = -EIO;
      pr_err("%s: ->get() failed\n", __func__);
      goto out_destroy_policy;
    }
  }
  ...
  up_write(&policy->rwsem);
  ...

  return 0;

out_destroy_policy:
	for_each_cpu(j, policy->real_cpus)
		remove_cpu_dev_symlink(policy, get_cpu_device(j));
    up_write(&policy->rwsem);
...
out_exit_policy:
  if (cpufreq_driver->exit)
    cpufreq_driver->exit(policy);
      clk_put(policy->clk);
      // policy->clk is a wild pointer
...
                                    ^
                                    |
                            Another process access
                            __cpufreq_get
                              cpufreq_verify_current_freq
                                cpufreq_generic_get
                                  // acces wild pointer of policy->clk;
                                    |
                                    |
out_offline_policy:                 |
  cpufreq_policy_free(policy);      |
    // deleted here, and will wait for no body reference
    cpufreq_policy_put_kobj(policy);
}

We can fix it by clear the policy->cpus mask.
Both show_scaling_cur_freq and show_cpuinfo_cur_freq will return an
error by checking this mask, thus avoiding UAF.

Signed-off-by: Schspa Shi <schspa@gmail.com>

---

Changelog:
v1 -> v2:
        - Fix bad critical region enlarge which causes uninitialized
          unlock.
v2 -> v3:
        - Remove the missed down_write() before
          cpumask_and(policy->cpus, policy->cpus, cpu_online_mask);

Signed-off-by: Schspa Shi <schspa@gmail.com>
---
 drivers/cpufreq/cpufreq.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

Comments

Viresh Kumar May 11, 2022, 4:35 a.m. UTC | #1
Don't use in-reply-to for single patches. It is mostly used when you are
updating a single patch in a patchset and it makes sense to continue the
discussion in the same thread. In this case, we have a fresh patchset and it
makes the same thread messy.

On 10-05-22, 23:42, Schspa Shi wrote:
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index 80f535cc8a75..d93958dbdab8 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -1337,12 +1337,12 @@ static int cpufreq_online(unsigned int cpu)
>  		down_write(&policy->rwsem);
>  		policy->cpu = cpu;
>  		policy->governor = NULL;
> -		up_write(&policy->rwsem);
>  	} else {
>  		new_policy = true;
>  		policy = cpufreq_policy_alloc(cpu);
>  		if (!policy)
>  			return -ENOMEM;
> +		down_write(&policy->rwsem);
>  	}

I am not sure, but maybe there were issues in calling init() with rwsem held, as
it may want to call some API from there.

>  	if (!new_policy && cpufreq_driver->online) {
> @@ -1382,7 +1382,6 @@ static int cpufreq_online(unsigned int cpu)
>  		cpumask_copy(policy->related_cpus, policy->cpus);
>  	}
>  
> -	down_write(&policy->rwsem);
>  	/*
>  	 * affected cpus must always be the one, which are online. We aren't
>  	 * managing offline cpus here.
> @@ -1533,7 +1532,7 @@ static int cpufreq_online(unsigned int cpu)
>  	for_each_cpu(j, policy->real_cpus)
>  		remove_cpu_dev_symlink(policy, get_cpu_device(j));
>  
> -	up_write(&policy->rwsem);
> +	cpumask_clear(policy->cpus);

I don't think you can do that safely. offline() or exit() may depend on
policy->cpus being set to all CPUs.

>  
>  out_offline_policy:
>  	if (cpufreq_driver->offline)
> @@ -1542,6 +1541,7 @@ static int cpufreq_online(unsigned int cpu)
>  out_exit_policy:
>  	if (cpufreq_driver->exit)
>  		cpufreq_driver->exit(policy);
> +	up_write(&policy->rwsem);
>  
>  out_free_policy:
>  	cpufreq_policy_free(policy);

Apart from the issues highlighted about, I think we are trying to fix an issue
locally which can happen at other places as well. Does something like this fix
your problem at hand ?

Untested.
Schspa Shi May 11, 2022, 8:10 a.m. UTC | #2
Viresh Kumar <viresh.kumar@linaro.org> writes:

> Don't use in-reply-to for single patches. It is mostly used when you are
> updating a single patch in a patchset and it makes sense to continue the
> discussion in the same thread. In this case, we have a fresh patchset and it
> makes the same thread messy.

Thanks for reminding me. will send a new thread for the next time.

>
> On 10-05-22, 23:42, Schspa Shi wrote:
>> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
>> index 80f535cc8a75..d93958dbdab8 100644
>> --- a/drivers/cpufreq/cpufreq.c
>> +++ b/drivers/cpufreq/cpufreq.c
>> @@ -1337,12 +1337,12 @@ static int cpufreq_online(unsigned int cpu)
>>              down_write(&policy->rwsem);
>>              policy->cpu = cpu;
>>              policy->governor = NULL;
>> -            up_write(&policy->rwsem);
>>      } else {
>>              new_policy = true;
>>              policy = cpufreq_policy_alloc(cpu);
>>              if (!policy)
>>                      return -ENOMEM;
>> +            down_write(&policy->rwsem);
>>      }
>
> I am not sure, but maybe there were issues in calling init() with rwsem held, as
> it may want to call some API from there.
>

I have checked all the init() implement of the fellowing files, It should be OK.
Function find command:
  ag "init[\s]+=" drivers/cpufreq

All the init() implement only initialize policy object without holding this lock
and won't call cpufreq APIs need to hold this lock.

drivers/cpufreq/pxa3xx-cpufreq.c
205:    .init           = pxa3xx_cpufreq_init,

drivers/cpufreq/powernow-k7.c
668:    .init           = powernow_cpu_init,

drivers/cpufreq/sparc-us2e-cpufreq.c
334:            driver->init = us2e_freq_cpu_init;

drivers/cpufreq/s5pv210-cpufreq.c
581:    .init           = s5pv210_cpu_init,

drivers/cpufreq/amd-pstate.c
637:    .init           = amd_pstate_cpu_init,

drivers/cpufreq/sc520_freq.c
93:     .init   = sc520_freq_cpu_init,

drivers/cpufreq/tegra186-cpufreq.c
125:    .init = tegra186_cpufreq_init,

drivers/cpufreq/davinci-cpufreq.c
102:    .init           = davinci_cpu_init,

drivers/cpufreq/spear-cpufreq.c
167:    .init           = spear_cpufreq_init,

drivers/cpufreq/acpi-cpufreq.c
950:    .init           = acpi_cpufreq_cpu_init,

drivers/cpufreq/mediatek-cpufreq-hw.c
286:    .init           = mtk_cpufreq_hw_cpu_init,

drivers/cpufreq/cpufreq_conservative.c
323:    .init = cs_init,

drivers/cpufreq/sa1100-cpufreq.c
194:    .init           = sa1100_cpu_init,

drivers/cpufreq/tegra194-cpufreq.c
279:    .init = tegra194_cpufreq_init,

drivers/cpufreq/longrun.c
279:    .init           = longrun_cpu_init,

drivers/cpufreq/cpufreq_userspace.c
119:    .init           = cpufreq_userspace_policy_init,

drivers/cpufreq/brcmstb-avs-cpufreq.c
730:    .init           = brcm_avs_cpufreq_init,

drivers/cpufreq/ia64-acpi-cpufreq.c
328:    .init           = acpi_cpufreq_cpu_init,

drivers/cpufreq/loongson2_cpufreq.c
95:     .init = loongson2_cpufreq_cpu_init,

drivers/cpufreq/omap-cpufreq.c
150:    .init           = omap_cpu_init,

drivers/cpufreq/e_powersaver.c
376:    .init           = eps_cpu_init,

drivers/cpufreq/cpufreq_ondemand.c
409:    .init = od_init,

drivers/cpufreq/s3c24xx-cpufreq.c
426:    .init           = s3c_cpufreq_init,

drivers/cpufreq/scmi-cpufreq.c
280:    .init   = scmi_cpufreq_init,

drivers/cpufreq/scpi-cpufreq.c
198:    .init   = scpi_cpufreq_init,

drivers/cpufreq/gx-suspmod.c
440:    .init           = cpufreq_gx_cpu_init,

drivers/cpufreq/speedstep-centrino.c
509:    .init           = centrino_cpu_init,

drivers/cpufreq/intel_pstate.c
2788:   .init           = intel_pstate_cpu_init,
3110:   .init           = intel_cpufreq_cpu_init,

drivers/cpufreq/kirkwood-cpufreq.c
97:     .init   = kirkwood_cpufreq_cpu_init,

drivers/cpufreq/pasemi-cpufreq.c
247:    .init           = pas_cpufreq_cpu_init,

drivers/cpufreq/elanfreq.c
195:    .init           = elanfreq_cpu_init,

drivers/cpufreq/speedstep-ich.c
316:    .init   = speedstep_cpu_init,

drivers/cpufreq/ppc_cbe_cpufreq.c
138:    .init           = cbe_cpufreq_cpu_init,

drivers/cpufreq/sa1110-cpufreq.c
318:    .init           = sa1110_cpu_init,

drivers/cpufreq/sparc-us3-cpufreq.c
182:            driver->init = us3_freq_cpu_init;

drivers/cpufreq/s3c64xx-cpufreq.c
200:    .init           = s3c64xx_cpufreq_driver_init,

drivers/cpufreq/cppc_cpufreq.c
684:    .init = cppc_cpufreq_cpu_init,

drivers/cpufreq/cpufreq_governor.h
159:            .init = cpufreq_dbs_governor_init,                      \

drivers/cpufreq/qoriq-cpufreq.c
254:    .init           = qoriq_cpufreq_cpu_init,

drivers/cpufreq/vexpress-spc-cpufreq.c
473:    .init                   = ve_spc_cpufreq_init,

drivers/cpufreq/pmac64-cpufreq.c
331:    .init           = g5_cpufreq_cpu_init,

drivers/cpufreq/pmac32-cpufreq.c
439:    .init           = pmac_cpufreq_cpu_init,

drivers/cpufreq/longhaul.c
906:    .init   = longhaul_cpu_init,

drivers/cpufreq/pxa2xx-cpufreq.c
296:    .init   = pxa_cpufreq_init,

drivers/cpufreq/pcc-cpufreq.c
574:    .init = pcc_cpufreq_cpu_init,

drivers/cpufreq/loongson1-cpufreq.c
123:    .init           = ls1x_cpufreq_init,

drivers/cpufreq/s3c2416-cpufreq.c
483:    .init           = s3c2416_cpufreq_driver_init,

drivers/cpufreq/powernow-k6.c
253:    .init           = powernow_k6_cpu_init,

drivers/cpufreq/p4-clockmod.c
227:    .init           = cpufreq_p4_cpu_init,

drivers/cpufreq/powernv-cpufreq.c
1036:   .init           = powernv_cpufreq_cpu_init,

drivers/cpufreq/imx6q-cpufreq.c
205:    .init = imx6q_cpufreq_init,

drivers/cpufreq/sh-cpufreq.c
154:    .init           = sh_cpufreq_cpu_init,

drivers/cpufreq/powernow-k8.c
1143:   .init           = powernowk8_cpu_init,

drivers/cpufreq/maple-cpufreq.c
150:    .init           = maple_cpufreq_cpu_init,

drivers/cpufreq/cpufreq-dt.c
181:    .init = cpufreq_init,

drivers/cpufreq/speedstep-smi.c
295:    .init           = speedstep_cpu_init,

drivers/cpufreq/qcom-cpufreq-hw.c
626:    .init           = qcom_cpufreq_hw_cpu_init,

drivers/cpufreq/mediatek-cpufreq.c
470:    .init = mtk_cpufreq_init,

drivers/cpufreq/bmips-cpufreq.c
153:    .init           = bmips_cpufreq_init,

drivers/cpufreq/cpufreq-nforce2.c
373:    .init = nforce2_cpu_init,

>>      if (!new_policy && cpufreq_driver->online) {
>> @@ -1382,7 +1382,6 @@ static int cpufreq_online(unsigned int cpu)
>>              cpumask_copy(policy->related_cpus, policy->cpus);
>>      }
>>
>> -    down_write(&policy->rwsem);
>>      /*
>>       * affected cpus must always be the one, which are online. We aren't
>>       * managing offline cpus here.
>> @@ -1533,7 +1532,7 @@ static int cpufreq_online(unsigned int cpu)
>>      for_each_cpu(j, policy->real_cpus)
>>              remove_cpu_dev_symlink(policy, get_cpu_device(j));
>>
>> -    up_write(&policy->rwsem);
>> +    cpumask_clear(policy->cpus);
>
> I don't think you can do that safely. offline() or exit() may depend on
> policy->cpus being set to all CPUs.
>

OK, I will move this after exit(). and there will be no effect with those
two APIs. But policy->cpus must be clear before release policy->rwsem.

>>
>>  out_offline_policy:
>>      if (cpufreq_driver->offline)
>> @@ -1542,6 +1541,7 @@ static int cpufreq_online(unsigned int cpu)
>>  out_exit_policy:
>>      if (cpufreq_driver->exit)
>>              cpufreq_driver->exit(policy);
>> +    up_write(&policy->rwsem);
>>
>>  out_free_policy:
>>      cpufreq_policy_free(policy);
>
> Apart from the issues highlighted about, I think we are trying to fix an issue
> locally which can happen at other places as well. Does something like this fix
> your problem at hand ?
>
> Untested.
>
> --
> viresh
>
> -------------------------8<-------------------------
>
> From e379921d3efa58a40d9565a4506a2580318a437d Mon Sep 17 00:00:00 2001
> Message-Id: <e379921d3efa58a40d9565a4506a2580318a437d.1652243573.git.viresh.kumar@linaro.org>
> From: Viresh Kumar <viresh.kumar@linaro.org>
> Date: Wed, 11 May 2022 09:13:26 +0530
> Subject: [PATCH] cpufreq: Allow sysfs access only for active policies
>
> It is currently possible, in a corner case, to access the sysfs files
> and reach show_cpuinfo_cur_freq(), etc, for a partly initialized policy.
>
> This can happen for example if cpufreq_online() fails after adding the
> sysfs files, which are immediately accessed by another process. There
> can easily be other such cases, which aren't identified yet.
>
> Process A:                                    Process B
>
> cpufreq_online()
>   down_write(&policy->rwsem);
>   if (new_policy) {
>     ret = cpufreq_add_dev_interface(policy);
>     /* This fails after adding few files */
>     if (ret)
>       goto out_destroy_policy;
>
>     ...
>   }
>
>   ...
>
> out_destroy_policy:
>   ...
>   up_write(&policy->rwsem);
>                                               /*
>                                                * This will end up accessing the policy
>                                                * which isn't fully initialized.
>                                                */
>                                               show_cpuinfo_cur_freq()
>
> if (cpufreq_driver->offline)
>     cpufreq_driver->offline(policy);
>
>   if (cpufreq_driver->exit)
>     cpufreq_driver->exit(policy);
>
>   cpufreq_policy_free(policy);
>
> Fix these by checking in show/store if the policy is active or not and
> update policy_is_inactive() to also check if the policy is added to the
> global list or not.
>
> Reported-by: Schspa Shi <schspa@gmail.com>
> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
> ---
>  drivers/cpufreq/cpufreq.c | 10 ++++++----
>  include/linux/cpufreq.h   |  3 ++-
>  2 files changed, 8 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index fbaa8e6c7d23..036314d05ded 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -948,13 +948,14 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
>  {
>       struct cpufreq_policy *policy = to_policy(kobj);
>       struct freq_attr *fattr = to_attr(attr);
> -     ssize_t ret;
> +     ssize_t ret = -EBUSY;
>
>       if (!fattr->show)
>               return -EIO;
>
>       down_read(&policy->rwsem);
> -     ret = fattr->show(policy, buf);
> +     if (!policy_is_inactive(policy))
> +             ret = fattr->show(policy, buf);
>       up_read(&policy->rwsem);
>
>       return ret;
> @@ -965,7 +966,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
>  {
>       struct cpufreq_policy *policy = to_policy(kobj);
>       struct freq_attr *fattr = to_attr(attr);
> -     ssize_t ret = -EINVAL;
> +     ssize_t ret = -EBUSY;
>
>       if (!fattr->store)
>               return -EIO;
> @@ -979,7 +980,8 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
>
>       if (cpu_online(policy->cpu)) {
>               down_write(&policy->rwsem);
> -             ret = fattr->store(policy, buf, count);
> +             if (!policy_is_inactive(policy))
> +                     ret = fattr->store(policy, buf, count);
>               up_write(&policy->rwsem);
>       }
>
> diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
> index 35c7d6db4139..03e5e114c996 100644
> --- a/include/linux/cpufreq.h
> +++ b/include/linux/cpufreq.h
> @@ -209,7 +209,8 @@ static inline void cpufreq_cpu_put(struct cpufreq_policy *policy) { }
>
>  static inline bool policy_is_inactive(struct cpufreq_policy *policy)
>  {
> -     return cpumask_empty(policy->cpus);
> +     return unlikely(cpumask_empty(policy->cpus) ||
> +                     list_empty(&policy->policy_list));
>  }
>

I don't think this fully solves my problem.
1. There is some case which cpufreq_online failed after the policy is added to
   cpufreq_policy_list.
2. policy->policy_list is not protected by &policy->rwsem, and we
can't relay on this to
   indict the policy is fine.

From this point of view, we can fix this problem through the state of
this linked list.
But the above two problems need to be solved first.

1. Remove policy from cpufreq_policy_list when cpufreq_init_policy failed.
>  static inline bool policy_is_shared(struct cpufreq_policy *policy)

static int cpufreq_online(unsigned int cpu)
{
        ......
        if (new_policy) {
                ret = cpufreq_add_dev_interface(policy);
                if (ret)
                        goto out_destroy_policy;

                cpufreq_stats_create_table(policy);

                write_lock_irqsave(&cpufreq_driver_lock, flags);
                list_add(&policy->policy_list, &cpufreq_policy_list);
                write_unlock_irqrestore(&cpufreq_driver_lock, flags);
        }
        ret = cpufreq_init_policy(policy);
        if (ret) {
                pr_err("%s: Failed to initialize policy for cpu: %d (%d)\n",
                       __func__, cpu, ret);
                goto out_destroy_policy;
        }

        up_write(&policy->rwsem);
        ......
        return 0;

out_destroy_policy:
        for_each_cpu(j, policy->real_cpus)
                remove_cpu_dev_symlink(policy, get_cpu_device(j));

        if (new_policy) {
                write_lock_irqsave(&cpufreq_driver_lock, flags);
                list_del(&policy->policy_list);
                write_unlock_irqrestore(&cpufreq_driver_lock, flags);
    }

        up_write(&policy->rwsem);

2. we need to add lock to cpufreq_policy_free.
static void cpufreq_policy_free(struct cpufreq_policy *policy)
{
        unsigned long flags;
        int cpu;

        /* add write lock to make &policy->policy_list stable. */
        down_write(&policy->rwsem);
        /* Remove policy from list */
        write_lock_irqsave(&cpufreq_driver_lock, flags);
        list_del(&policy->policy_list);
        ......
        kfree(policy->min_freq_req);

        up_write(&policy->rwsem);
        cpufreq_policy_put_kobj(policy);
        free_cpumask_var(policy->real_cpus);
        free_cpumask_var(policy->related_cpus);
        free_cpumask_var(policy->cpus);
        kfree(policy);
        ......
}


--
Zhaohui Shi
BRs
Viresh Kumar May 11, 2022, 12:21 p.m. UTC | #3
On 11-05-22, 16:10, Schspa Shi wrote:
> Viresh Kumar <viresh.kumar@linaro.org> writes:
> > I am not sure, but maybe there were issues in calling init() with rwsem held, as
> > it may want to call some API from there.
> >
> 
> I have checked all the init() implement of the fellowing files, It should be OK.
> Function find command:
>   ag "init[\s]+=" drivers/cpufreq
> 
> All the init() implement only initialize policy object without holding this lock
> and won't call cpufreq APIs need to hold this lock.

Okay, we can see if someone complains later then :)

> > I don't think you can do that safely. offline() or exit() may depend on
> > policy->cpus being set to all CPUs.
> OK, I will move this after exit(). and there will be no effect with those
> two APIs. But policy->cpus must be clear before release policy->rwsem.

Hmm, I don't think depending on the values of policy->cpus is a good idea to be
honest. This design is inviting bugs to come in at another place. We need a
clear flag for this, a new flag or something like policy_list.

Also I see the same bug happening while the policy is removed. The kobject is
put after the rwsem is dropped.

> >  static inline bool policy_is_inactive(struct cpufreq_policy *policy)
> >  {
> > -     return cpumask_empty(policy->cpus);
> > +     return unlikely(cpumask_empty(policy->cpus) ||
> > +                     list_empty(&policy->policy_list));
> >  }
> >
> 
> I don't think this fully solves my problem.
> 1. There is some case which cpufreq_online failed after the policy is added to
>    cpufreq_policy_list.

And I missed that :(

> 2. policy->policy_list is not protected by &policy->rwsem, and we
> can't relay on this to
>    indict the policy is fine.

Ahh..

> >From this point of view, we can fix this problem through the state of
> this linked list.
> But the above two problems need to be solved first.

I feel overriding policy_list for this is going to make it complex/messy.

Maybe something like this then:

-------------------------8<-------------------------

From dacc8d09d4d7b3d9a8bca8d78fc72199c16dc4a5 Mon Sep 17 00:00:00 2001
Message-Id: <dacc8d09d4d7b3d9a8bca8d78fc72199c16dc4a5.1652271581.git.viresh.kumar@linaro.org>
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 11 May 2022 09:13:26 +0530
Subject: [PATCH] cpufreq: Allow sysfs access only for active policies

It is currently possible, in a corner case, to access the sysfs files
and reach show_cpuinfo_cur_freq(), etc, for a partly initialized policy.

This can happen for example if cpufreq_online() fails after adding the
sysfs files, which are immediately accessed by another process. There
can easily be other such cases, which aren't identified yet, like while
the policy is getting freed.

Process A:					Process B

cpufreq_online()
  down_write(&policy->rwsem);
  if (new_policy) {
    ret = cpufreq_add_dev_interface(policy);
    /* This fails after adding few files */
    if (ret)
      goto out_destroy_policy;

    ...
  }

  ...

out_destroy_policy:
  ...
  up_write(&policy->rwsem);
						/*
						 * This will end up accessing the policy
						 * which isn't fully initialized.
						 */
						show_cpuinfo_cur_freq()

if (cpufreq_driver->offline)
    cpufreq_driver->offline(policy);

  if (cpufreq_driver->exit)
    cpufreq_driver->exit(policy);

  cpufreq_policy_free(policy);

Fix these by checking in show/store if the policy is sysfs ready or not.

Reported-by: Schspa Shi <schspa@gmail.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/cpufreq.c | 18 ++++++++++++++----
 include/linux/cpufreq.h   |  3 +++
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index c8bf6c68597c..65c2bbcf555d 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -948,13 +948,14 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
 {
 	struct cpufreq_policy *policy = to_policy(kobj);
 	struct freq_attr *fattr = to_attr(attr);
-	ssize_t ret;
+	ssize_t ret = -EBUSY;
 
 	if (!fattr->show)
 		return -EIO;
 
 	down_read(&policy->rwsem);
-	ret = fattr->show(policy, buf);
+	if (policy->sysfs_ready)
+		ret = fattr->show(policy, buf);
 	up_read(&policy->rwsem);
 
 	return ret;
@@ -965,7 +966,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
 {
 	struct cpufreq_policy *policy = to_policy(kobj);
 	struct freq_attr *fattr = to_attr(attr);
-	ssize_t ret = -EINVAL;
+	ssize_t ret = -EBUSY;
 
 	if (!fattr->store)
 		return -EIO;
@@ -979,7 +980,8 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
 
 	if (cpu_online(policy->cpu)) {
 		down_write(&policy->rwsem);
-		ret = fattr->store(policy, buf, count);
+		if (policy->sysfs_ready)
+			ret = fattr->store(policy, buf, count);
 		up_write(&policy->rwsem);
 	}
 
@@ -1280,6 +1282,11 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy)
 	unsigned long flags;
 	int cpu;
 
+	/* Disallow sysfs interactions now */
+	down_write(&policy->rwsem);
+	policy->sysfs_ready = false;
+	up_write(&policy->rwsem);
+
 	/* Remove policy from list */
 	write_lock_irqsave(&cpufreq_driver_lock, flags);
 	list_del(&policy->policy_list);
@@ -1516,6 +1523,9 @@ static int cpufreq_online(unsigned int cpu)
 		goto out_destroy_policy;
 	}
 
+	/* We can allow sysfs interactions now */
+	policy->sysfs_ready = true;
+
 	up_write(&policy->rwsem);
 
 	kobject_uevent(&policy->kobj, KOBJ_ADD);
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 35c7d6db4139..7e4384e535fd 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -101,6 +101,9 @@ struct cpufreq_policy {
 	 */
 	struct rw_semaphore	rwsem;
 
+	/* Policy is ready for sysfs interactions */
+	bool			sysfs_ready;
+
 	/*
 	 * Fast switch flags:
 	 * - fast_switch_possible should be set by the driver if it can
Rafael J. Wysocki May 11, 2022, 12:59 p.m. UTC | #4
On Wed, May 11, 2022 at 2:21 PM Viresh Kumar <viresh.kumar@linaro.org> wrote:
>
> On 11-05-22, 16:10, Schspa Shi wrote:
> > Viresh Kumar <viresh.kumar@linaro.org> writes:
> > > I am not sure, but maybe there were issues in calling init() with rwsem held, as
> > > it may want to call some API from there.
> > >
> >
> > I have checked all the init() implement of the fellowing files, It should be OK.
> > Function find command:
> >   ag "init[\s]+=" drivers/cpufreq
> >
> > All the init() implement only initialize policy object without holding this lock
> > and won't call cpufreq APIs need to hold this lock.
>
> Okay, we can see if someone complains later then :)
>
> > > I don't think you can do that safely. offline() or exit() may depend on
> > > policy->cpus being set to all CPUs.
> > OK, I will move this after exit(). and there will be no effect with those
> > two APIs. But policy->cpus must be clear before release policy->rwsem.
>
> Hmm, I don't think depending on the values of policy->cpus is a good idea to be
> honest. This design is inviting bugs to come in at another place. We need a
> clear flag for this, a new flag or something like policy_list.
>
> Also I see the same bug happening while the policy is removed. The kobject is
> put after the rwsem is dropped.
>
> > >  static inline bool policy_is_inactive(struct cpufreq_policy *policy)
> > >  {
> > > -     return cpumask_empty(policy->cpus);
> > > +     return unlikely(cpumask_empty(policy->cpus) ||
> > > +                     list_empty(&policy->policy_list));
> > >  }
> > >
> >
> > I don't think this fully solves my problem.
> > 1. There is some case which cpufreq_online failed after the policy is added to
> >    cpufreq_policy_list.
>
> And I missed that :(
>
> > 2. policy->policy_list is not protected by &policy->rwsem, and we
> > can't relay on this to
> >    indict the policy is fine.
>
> Ahh..
>
> > >From this point of view, we can fix this problem through the state of
> > this linked list.
> > But the above two problems need to be solved first.
>
> I feel overriding policy_list for this is going to make it complex/messy.
>
> Maybe something like this then:

There are two things.

One is the possible race with respect to the sysfs access occurring
during failing initialization and the other is that ->offline() or
->exit() can be called with or without holding the policy rwsem
depending on the code path.

Namely, cpufreq_offline() calls them under the policy rwsem, but
cpufreq_remove_dev() calls ->exit() outside the rwsem.  Also they are
called outside the rwsem in cpufreq_online().

Moreover, ->offline() and ->exit() cannot expect policy->cpus to be
populated, because they are called when it is empty from
cpufreq_offline().

So the $subject patch is correct AFAICS even though it doesn't address
all of the above.

>
> -------------------------8<-------------------------
>
> From dacc8d09d4d7b3d9a8bca8d78fc72199c16dc4a5 Mon Sep 17 00:00:00 2001
> Message-Id: <dacc8d09d4d7b3d9a8bca8d78fc72199c16dc4a5.1652271581.git.viresh.kumar@linaro.org>
> From: Viresh Kumar <viresh.kumar@linaro.org>
> Date: Wed, 11 May 2022 09:13:26 +0530
> Subject: [PATCH] cpufreq: Allow sysfs access only for active policies
>
> It is currently possible, in a corner case, to access the sysfs files
> and reach show_cpuinfo_cur_freq(), etc, for a partly initialized policy.
>
> This can happen for example if cpufreq_online() fails after adding the
> sysfs files, which are immediately accessed by another process. There
> can easily be other such cases, which aren't identified yet, like while
> the policy is getting freed.
>
> Process A:                                      Process B
>
> cpufreq_online()
>   down_write(&policy->rwsem);
>   if (new_policy) {
>     ret = cpufreq_add_dev_interface(policy);
>     /* This fails after adding few files */
>     if (ret)
>       goto out_destroy_policy;
>
>     ...
>   }
>
>   ...
>
> out_destroy_policy:
>   ...
>   up_write(&policy->rwsem);
>                                                 /*
>                                                  * This will end up accessing the policy
>                                                  * which isn't fully initialized.
>                                                  */
>                                                 show_cpuinfo_cur_freq()
>
> if (cpufreq_driver->offline)
>     cpufreq_driver->offline(policy);
>
>   if (cpufreq_driver->exit)
>     cpufreq_driver->exit(policy);
>
>   cpufreq_policy_free(policy);
>
> Fix these by checking in show/store if the policy is sysfs ready or not.
>
> Reported-by: Schspa Shi <schspa@gmail.com>
> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
> ---
>  drivers/cpufreq/cpufreq.c | 18 ++++++++++++++----
>  include/linux/cpufreq.h   |  3 +++
>  2 files changed, 17 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index c8bf6c68597c..65c2bbcf555d 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -948,13 +948,14 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
>  {
>         struct cpufreq_policy *policy = to_policy(kobj);
>         struct freq_attr *fattr = to_attr(attr);
> -       ssize_t ret;
> +       ssize_t ret = -EBUSY;
>
>         if (!fattr->show)
>                 return -EIO;
>
>         down_read(&policy->rwsem);
> -       ret = fattr->show(policy, buf);
> +       if (policy->sysfs_ready)
> +               ret = fattr->show(policy, buf);
>         up_read(&policy->rwsem);
>
>         return ret;
> @@ -965,7 +966,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
>  {
>         struct cpufreq_policy *policy = to_policy(kobj);
>         struct freq_attr *fattr = to_attr(attr);
> -       ssize_t ret = -EINVAL;
> +       ssize_t ret = -EBUSY;
>
>         if (!fattr->store)
>                 return -EIO;
> @@ -979,7 +980,8 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
>
>         if (cpu_online(policy->cpu)) {
>                 down_write(&policy->rwsem);
> -               ret = fattr->store(policy, buf, count);
> +               if (policy->sysfs_ready)
> +                       ret = fattr->store(policy, buf, count);
>                 up_write(&policy->rwsem);
>         }
>
> @@ -1280,6 +1282,11 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy)
>         unsigned long flags;
>         int cpu;
>
> +       /* Disallow sysfs interactions now */
> +       down_write(&policy->rwsem);
> +       policy->sysfs_ready = false;
> +       up_write(&policy->rwsem);
> +
>         /* Remove policy from list */
>         write_lock_irqsave(&cpufreq_driver_lock, flags);
>         list_del(&policy->policy_list);
> @@ -1516,6 +1523,9 @@ static int cpufreq_online(unsigned int cpu)
>                 goto out_destroy_policy;
>         }
>
> +       /* We can allow sysfs interactions now */
> +       policy->sysfs_ready = true;
> +
>         up_write(&policy->rwsem);
>
>         kobject_uevent(&policy->kobj, KOBJ_ADD);
> diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
> index 35c7d6db4139..7e4384e535fd 100644
> --- a/include/linux/cpufreq.h
> +++ b/include/linux/cpufreq.h
> @@ -101,6 +101,9 @@ struct cpufreq_policy {
>          */
>         struct rw_semaphore     rwsem;
>
> +       /* Policy is ready for sysfs interactions */
> +       bool                    sysfs_ready;
> +
>         /*
>          * Fast switch flags:
>          * - fast_switch_possible should be set by the driver if it can
> --
> 2.31.1.272.g89b43f80a514
>
Schspa Shi May 11, 2022, 1:12 p.m. UTC | #5
Viresh Kumar <viresh.kumar@linaro.org> writes:

> On 11-05-22, 16:10, Schspa Shi wrote:
>> Viresh Kumar <viresh.kumar@linaro.org> writes:
>> > I am not sure, but maybe there were issues in calling init() with rwsem held, as
>> > it may want to call some API from there.
>> >
>>
>> I have checked all the init() implement of the fellowing files, It should be OK.
>> Function find command:
>>   ag "init[\s]+=" drivers/cpufreq
>>
>> All the init() implement only initialize policy object without holding this lock
>> and won't call cpufreq APIs need to hold this lock.
>
> Okay, we can see if someone complains later then :)
>
>> > I don't think you can do that safely. offline() or exit() may depend on
>> > policy->cpus being set to all CPUs.
>> OK, I will move this after exit(). and there will be no effect with those
>> two APIs. But policy->cpus must be clear before release policy->rwsem.
>
> Hmm, I don't think depending on the values of policy->cpus is a good idea to be
> honest. This design is inviting bugs to come in at another place. We need a
> clear flag for this, a new flag or something like policy_list.
>
> Also I see the same bug happening while the policy is removed. The kobject is
> put after the rwsem is dropped.
>
>> >  static inline bool policy_is_inactive(struct cpufreq_policy *policy)
>> >  {
>> > -     return cpumask_empty(policy->cpus);
>> > +     return unlikely(cpumask_empty(policy->cpus) ||
>> > +                     list_empty(&policy->policy_list));
>> >  }
>> >
>>
>> I don't think this fully solves my problem.
>> 1. There is some case which cpufreq_online failed after the policy is added to
>>    cpufreq_policy_list.
>
> And I missed that :(
>
>> 2. policy->policy_list is not protected by &policy->rwsem, and we
>> can't relay on this to
>>    indict the policy is fine.
>
> Ahh..
>
>> >From this point of view, we can fix this problem through the state of
>> this linked list.
>> But the above two problems need to be solved first.
>
> I feel overriding policy_list for this is going to make it complex/messy.
>

Yes, I agree with it.

> Maybe something like this then:
>
> -------------------------8<-------------------------
>
> From dacc8d09d4d7b3d9a8bca8d78fc72199c16dc4a5 Mon Sep 17 00:00:00 2001
> Message-Id: <dacc8d09d4d7b3d9a8bca8d78fc72199c16dc4a5.1652271581.git.viresh.kumar@linaro.org>
> From: Viresh Kumar <viresh.kumar@linaro.org>
> Date: Wed, 11 May 2022 09:13:26 +0530
> Subject: [PATCH] cpufreq: Allow sysfs access only for active policies
>
> It is currently possible, in a corner case, to access the sysfs files
> and reach show_cpuinfo_cur_freq(), etc, for a partly initialized policy.
>
> This can happen for example if cpufreq_online() fails after adding the
> sysfs files, which are immediately accessed by another process. There
> can easily be other such cases, which aren't identified yet, like while
> the policy is getting freed.
>
> Process A:                                    Process B
>
> cpufreq_online()
>   down_write(&policy->rwsem);
>   if (new_policy) {
>     ret = cpufreq_add_dev_interface(policy);
>     /* This fails after adding few files */
>     if (ret)
>       goto out_destroy_policy;
>
>     ...
>   }
>
>   ...
>
> out_destroy_policy:
>   ...
>   up_write(&policy->rwsem);
>                                               /*
>                                                * This will end up accessing the policy
>                                                * which isn't fully initialized.
>                                                */
>                                               show_cpuinfo_cur_freq()
>
> if (cpufreq_driver->offline)
>     cpufreq_driver->offline(policy);
>
>   if (cpufreq_driver->exit)
>     cpufreq_driver->exit(policy);
>
>   cpufreq_policy_free(policy);
>
> Fix these by checking in show/store if the policy is sysfs ready or not.
>
> Reported-by: Schspa Shi <schspa@gmail.com>
> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
> ---
>  drivers/cpufreq/cpufreq.c | 18 ++++++++++++++----
>  include/linux/cpufreq.h   |  3 +++
>  2 files changed, 17 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index c8bf6c68597c..65c2bbcf555d 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -948,13 +948,14 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
>  {
>       struct cpufreq_policy *policy = to_policy(kobj);
>       struct freq_attr *fattr = to_attr(attr);
> -     ssize_t ret;
> +     ssize_t ret = -EBUSY;
>
>       if (!fattr->show)
>               return -EIO;
>
>       down_read(&policy->rwsem);
> -     ret = fattr->show(policy, buf);
> +     if (policy->sysfs_ready)
> +             ret = fattr->show(policy, buf);
>       up_read(&policy->rwsem);
>
>       return ret;
> @@ -965,7 +966,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
>  {
>       struct cpufreq_policy *policy = to_policy(kobj);
>       struct freq_attr *fattr = to_attr(attr);
> -     ssize_t ret = -EINVAL;
> +     ssize_t ret = -EBUSY;
>
>       if (!fattr->store)
>               return -EIO;
> @@ -979,7 +980,8 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
>
>       if (cpu_online(policy->cpu)) {
>               down_write(&policy->rwsem);
> -             ret = fattr->store(policy, buf, count);
> +             if (policy->sysfs_ready)
> +                     ret = fattr->store(policy, buf, count);
>               up_write(&policy->rwsem);
>       }
>
> @@ -1280,6 +1282,11 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy)
>       unsigned long flags;
>       int cpu;
>
> +     /* Disallow sysfs interactions now */
> +     down_write(&policy->rwsem);
> +     policy->sysfs_ready = false;
> +     up_write(&policy->rwsem);
> +
>       /* Remove policy from list */
>       write_lock_irqsave(&cpufreq_driver_lock, flags);
>       list_del(&policy->policy_list);
> @@ -1516,6 +1523,9 @@ static int cpufreq_online(unsigned int cpu)
>               goto out_destroy_policy;
>       }
>
> +     /* We can allow sysfs interactions now */
> +     policy->sysfs_ready = true;
> +
>       up_write(&policy->rwsem);
>
>       kobject_uevent(&policy->kobj, KOBJ_ADD);
> diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
> index 35c7d6db4139..7e4384e535fd 100644
> --- a/include/linux/cpufreq.h
> +++ b/include/linux/cpufreq.h
> @@ -101,6 +101,9 @@ struct cpufreq_policy {
>        */
>       struct rw_semaphore     rwsem;
>
> +     /* Policy is ready for sysfs interactions */
> +     bool                    sysfs_ready;
> +

Do we need to add this flag to some APIs like
  unsigned int cpufreq_get(unsigned int cpu);
  void refresh_frequency_limits(struct cpufreq_policy *policy);
too ?

But if we made this change it seems to have the same meaning as
policy_is_inactive.

>       /*
>        * Fast switch flags:
>        * - fast_switch_possible should be set by the driver if it can

---
BRs

Schspa Shi
Rafael J. Wysocki May 11, 2022, 1:19 p.m. UTC | #6
On Wed, May 11, 2022 at 2:59 PM Rafael J. Wysocki <rafael@kernel.org> wrote:
>
> On Wed, May 11, 2022 at 2:21 PM Viresh Kumar <viresh.kumar@linaro.org> wrote:
> >
> > On 11-05-22, 16:10, Schspa Shi wrote:
> > > Viresh Kumar <viresh.kumar@linaro.org> writes:
> > > > I am not sure, but maybe there were issues in calling init() with rwsem held, as
> > > > it may want to call some API from there.
> > > >
> > >
> > > I have checked all the init() implement of the fellowing files, It should be OK.
> > > Function find command:
> > >   ag "init[\s]+=" drivers/cpufreq
> > >
> > > All the init() implement only initialize policy object without holding this lock
> > > and won't call cpufreq APIs need to hold this lock.
> >
> > Okay, we can see if someone complains later then :)
> >
> > > > I don't think you can do that safely. offline() or exit() may depend on
> > > > policy->cpus being set to all CPUs.
> > > OK, I will move this after exit(). and there will be no effect with those
> > > two APIs. But policy->cpus must be clear before release policy->rwsem.
> >
> > Hmm, I don't think depending on the values of policy->cpus is a good idea to be
> > honest. This design is inviting bugs to come in at another place. We need a
> > clear flag for this, a new flag or something like policy_list.

Why?

> > Also I see the same bug happening while the policy is removed. The kobject is
> > put after the rwsem is dropped.

This shouldn't be a problem because of the wait_for_completion() in
cpufreq_policy_put_kobj().  It is known that cpufreq_sysfs_release()
has run when cpufreq_policy_put_kobj() returns, so it is safe to free
the policy then.

> > > >  static inline bool policy_is_inactive(struct cpufreq_policy *policy)
> > > >  {
> > > > -     return cpumask_empty(policy->cpus);
> > > > +     return unlikely(cpumask_empty(policy->cpus) ||
> > > > +                     list_empty(&policy->policy_list));
> > > >  }
> > > >
> > >
> > > I don't think this fully solves my problem.
> > > 1. There is some case which cpufreq_online failed after the policy is added to
> > >    cpufreq_policy_list.
> >
> > And I missed that :(
> >
> > > 2. policy->policy_list is not protected by &policy->rwsem, and we
> > > can't relay on this to
> > >    indict the policy is fine.
> >
> > Ahh..
> >
> > > >From this point of view, we can fix this problem through the state of
> > > this linked list.
> > > But the above two problems need to be solved first.
> >
> > I feel overriding policy_list for this is going to make it complex/messy.
> >
> > Maybe something like this then:
>
> There are two things.
>
> One is the possible race with respect to the sysfs access occurring
> during failing initialization and the other is that ->offline() or
> ->exit() can be called with or without holding the policy rwsem
> depending on the code path.
>
> Namely, cpufreq_offline() calls them under the policy rwsem, but
> cpufreq_remove_dev() calls ->exit() outside the rwsem.  Also they are
> called outside the rwsem in cpufreq_online().
>
> Moreover, ->offline() and ->exit() cannot expect policy->cpus to be
> populated, because they are called when it is empty from
> cpufreq_offline().
>
> So the $subject patch is correct AFAICS even though it doesn't address
> all of the above.

TBH, I'm not sure why show() doesn't check policy_is_inactive() under the rwsem.

Moreover, I'm not sure why the locking dance in store() is necessary.
Rafael J. Wysocki May 11, 2022, 1:42 p.m. UTC | #7
On Wed, May 11, 2022 at 3:19 PM Rafael J. Wysocki <rafael@kernel.org> wrote:
>
> On Wed, May 11, 2022 at 2:59 PM Rafael J. Wysocki <rafael@kernel.org> wrote:
> >
> > On Wed, May 11, 2022 at 2:21 PM Viresh Kumar <viresh.kumar@linaro.org> wrote:
> > >
> > > On 11-05-22, 16:10, Schspa Shi wrote:
> > > > Viresh Kumar <viresh.kumar@linaro.org> writes:
> > > > > I am not sure, but maybe there were issues in calling init() with rwsem held, as
> > > > > it may want to call some API from there.
> > > > >
> > > >
> > > > I have checked all the init() implement of the fellowing files, It should be OK.
> > > > Function find command:
> > > >   ag "init[\s]+=" drivers/cpufreq
> > > >
> > > > All the init() implement only initialize policy object without holding this lock
> > > > and won't call cpufreq APIs need to hold this lock.
> > >
> > > Okay, we can see if someone complains later then :)
> > >
> > > > > I don't think you can do that safely. offline() or exit() may depend on
> > > > > policy->cpus being set to all CPUs.
> > > > OK, I will move this after exit(). and there will be no effect with those
> > > > two APIs. But policy->cpus must be clear before release policy->rwsem.
> > >
> > > Hmm, I don't think depending on the values of policy->cpus is a good idea to be
> > > honest. This design is inviting bugs to come in at another place. We need a
> > > clear flag for this, a new flag or something like policy_list.
>
> Why?
>
> > > Also I see the same bug happening while the policy is removed. The kobject is
> > > put after the rwsem is dropped.
>
> This shouldn't be a problem because of the wait_for_completion() in
> cpufreq_policy_put_kobj().  It is known that cpufreq_sysfs_release()
> has run when cpufreq_policy_put_kobj() returns, so it is safe to free
> the policy then.
>
> > > > >  static inline bool policy_is_inactive(struct cpufreq_policy *policy)
> > > > >  {
> > > > > -     return cpumask_empty(policy->cpus);
> > > > > +     return unlikely(cpumask_empty(policy->cpus) ||
> > > > > +                     list_empty(&policy->policy_list));
> > > > >  }
> > > > >
> > > >
> > > > I don't think this fully solves my problem.
> > > > 1. There is some case which cpufreq_online failed after the policy is added to
> > > >    cpufreq_policy_list.
> > >
> > > And I missed that :(
> > >
> > > > 2. policy->policy_list is not protected by &policy->rwsem, and we
> > > > can't relay on this to
> > > >    indict the policy is fine.
> > >
> > > Ahh..
> > >
> > > > >From this point of view, we can fix this problem through the state of
> > > > this linked list.
> > > > But the above two problems need to be solved first.
> > >
> > > I feel overriding policy_list for this is going to make it complex/messy.
> > >
> > > Maybe something like this then:
> >
> > There are two things.
> >
> > One is the possible race with respect to the sysfs access occurring
> > during failing initialization and the other is that ->offline() or
> > ->exit() can be called with or without holding the policy rwsem
> > depending on the code path.
> >
> > Namely, cpufreq_offline() calls them under the policy rwsem, but
> > cpufreq_remove_dev() calls ->exit() outside the rwsem.  Also they are
> > called outside the rwsem in cpufreq_online().
> >
> > Moreover, ->offline() and ->exit() cannot expect policy->cpus to be
> > populated, because they are called when it is empty from
> > cpufreq_offline().
> >
> > So the $subject patch is correct AFAICS even though it doesn't address
> > all of the above.

Actually, I meant the last version of it, that is:

https://patchwork.kernel.org/project/linux-pm/patch/20220510154236.88753-1-schspa@gmail.com/

> TBH, I'm not sure why show() doesn't check policy_is_inactive() under the rwsem.
>
> Moreover, I'm not sure why the locking dance in store() is necessary.
Schspa Shi May 11, 2022, 1:42 p.m. UTC | #8
"Rafael J. Wysocki" <rafael@kernel.org> writes:

> On Wed, May 11, 2022 at 2:59 PM Rafael J. Wysocki <rafael@kernel.org> wrote:
>>
>> On Wed, May 11, 2022 at 2:21 PM Viresh Kumar <viresh.kumar@linaro.org> wrote:
>> >
>> > On 11-05-22, 16:10, Schspa Shi wrote:
>> > > Viresh Kumar <viresh.kumar@linaro.org> writes:
>> > > > I am not sure, but maybe there were issues in calling init() with rwsem held, as
>> > > > it may want to call some API from there.
>> > > >
>> > >
>> > > I have checked all the init() implement of the fellowing files, It should be OK.
>> > > Function find command:
>> > >   ag "init[\s]+=" drivers/cpufreq
>> > >
>> > > All the init() implement only initialize policy object without holding this lock
>> > > and won't call cpufreq APIs need to hold this lock.
>> >
>> > Okay, we can see if someone complains later then :)
>> >
>> > > > I don't think you can do that safely. offline() or exit() may depend on
>> > > > policy->cpus being set to all CPUs.
>> > > OK, I will move this after exit(). and there will be no effect with those
>> > > two APIs. But policy->cpus must be clear before release policy->rwsem.
>> >
>> > Hmm, I don't think depending on the values of policy->cpus is a good idea to be
>> > honest. This design is inviting bugs to come in at another place. We need a
>> > clear flag for this, a new flag or something like policy_list.
>
> Why?
>
>> > Also I see the same bug happening while the policy is removed. The kobject is
>> > put after the rwsem is dropped.
>
> This shouldn't be a problem because of the wait_for_completion() in
> cpufreq_policy_put_kobj().  It is known that cpufreq_sysfs_release()
> has run when cpufreq_policy_put_kobj() returns, so it is safe to free
> the policy then.
>
>> > > >  static inline bool policy_is_inactive(struct cpufreq_policy *policy)
>> > > >  {
>> > > > -     return cpumask_empty(policy->cpus);
>> > > > +     return unlikely(cpumask_empty(policy->cpus) ||
>> > > > +                     list_empty(&policy->policy_list));
>> > > >  }
>> > > >
>> > >
>> > > I don't think this fully solves my problem.
>> > > 1. There is some case which cpufreq_online failed after the policy is added to
>> > >    cpufreq_policy_list.
>> >
>> > And I missed that :(
>> >
>> > > 2. policy->policy_list is not protected by &policy->rwsem, and we
>> > > can't relay on this to
>> > >    indict the policy is fine.
>> >
>> > Ahh..
>> >
>> > > >From this point of view, we can fix this problem through the state of
>> > > this linked list.
>> > > But the above two problems need to be solved first.
>> >
>> > I feel overriding policy_list for this is going to make it complex/messy.
>> >
>> > Maybe something like this then:
>>
>> There are two things.
>>
>> One is the possible race with respect to the sysfs access occurring
>> during failing initialization and the other is that ->offline() or
>> ->exit() can be called with or without holding the policy rwsem
>> depending on the code path.
>>
>> Namely, cpufreq_offline() calls them under the policy rwsem, but
>> cpufreq_remove_dev() calls ->exit() outside the rwsem.  Also they are
>> called outside the rwsem in cpufreq_online().
>>
>> Moreover, ->offline() and ->exit() cannot expect policy->cpus to be
>> populated, because they are called when it is empty from
>> cpufreq_offline().
>>
>> So the $subject patch is correct AFAICS even though it doesn't address
>> all of the above.
>
> TBH, I'm not sure why show() doesn't check policy_is_inactive() under the rwsem.
>

There is a exist bugs, and somebody try to fixed, please see commit
Fixes: 2f66196208c9 ("cpufreq: check if policy is inactive early in
__cpufreq_get()")

> Moreover, I'm not sure why the locking dance in store() is necessary.

The store interface hold cpu_hotplug_lock via
    cpus_read_trylock();
, cannot run in parallel with cpufreq_online() & cpufreq_offline().

---
BRs

Schspa Shi
Rafael J. Wysocki May 11, 2022, 1:50 p.m. UTC | #9
On Wed, May 11, 2022 at 3:42 PM Schspa Shi <schspa@gmail.com> wrote:
>
> "Rafael J. Wysocki" <rafael@kernel.org> writes:
>
> > On Wed, May 11, 2022 at 2:59 PM Rafael J. Wysocki <rafael@kernel.org> wrote:
> >>
> >> On Wed, May 11, 2022 at 2:21 PM Viresh Kumar <viresh.kumar@linaro.org> wrote:
> >> >
> >> > On 11-05-22, 16:10, Schspa Shi wrote:
> >> > > Viresh Kumar <viresh.kumar@linaro.org> writes:
> >> > > > I am not sure, but maybe there were issues in calling init() with rwsem held, as
> >> > > > it may want to call some API from there.
> >> > > >
> >> > >
> >> > > I have checked all the init() implement of the fellowing files, It should be OK.
> >> > > Function find command:
> >> > >   ag "init[\s]+=" drivers/cpufreq
> >> > >
> >> > > All the init() implement only initialize policy object without holding this lock
> >> > > and won't call cpufreq APIs need to hold this lock.
> >> >
> >> > Okay, we can see if someone complains later then :)
> >> >
> >> > > > I don't think you can do that safely. offline() or exit() may depend on
> >> > > > policy->cpus being set to all CPUs.
> >> > > OK, I will move this after exit(). and there will be no effect with those
> >> > > two APIs. But policy->cpus must be clear before release policy->rwsem.
> >> >
> >> > Hmm, I don't think depending on the values of policy->cpus is a good idea to be
> >> > honest. This design is inviting bugs to come in at another place. We need a
> >> > clear flag for this, a new flag or something like policy_list.
> >
> > Why?
> >
> >> > Also I see the same bug happening while the policy is removed. The kobject is
> >> > put after the rwsem is dropped.
> >
> > This shouldn't be a problem because of the wait_for_completion() in
> > cpufreq_policy_put_kobj().  It is known that cpufreq_sysfs_release()
> > has run when cpufreq_policy_put_kobj() returns, so it is safe to free
> > the policy then.
> >
> >> > > >  static inline bool policy_is_inactive(struct cpufreq_policy *policy)
> >> > > >  {
> >> > > > -     return cpumask_empty(policy->cpus);
> >> > > > +     return unlikely(cpumask_empty(policy->cpus) ||
> >> > > > +                     list_empty(&policy->policy_list));
> >> > > >  }
> >> > > >
> >> > >
> >> > > I don't think this fully solves my problem.
> >> > > 1. There is some case which cpufreq_online failed after the policy is added to
> >> > >    cpufreq_policy_list.
> >> >
> >> > And I missed that :(
> >> >
> >> > > 2. policy->policy_list is not protected by &policy->rwsem, and we
> >> > > can't relay on this to
> >> > >    indict the policy is fine.
> >> >
> >> > Ahh..
> >> >
> >> > > >From this point of view, we can fix this problem through the state of
> >> > > this linked list.
> >> > > But the above two problems need to be solved first.
> >> >
> >> > I feel overriding policy_list for this is going to make it complex/messy.
> >> >
> >> > Maybe something like this then:
> >>
> >> There are two things.
> >>
> >> One is the possible race with respect to the sysfs access occurring
> >> during failing initialization and the other is that ->offline() or
> >> ->exit() can be called with or without holding the policy rwsem
> >> depending on the code path.
> >>
> >> Namely, cpufreq_offline() calls them under the policy rwsem, but
> >> cpufreq_remove_dev() calls ->exit() outside the rwsem.  Also they are
> >> called outside the rwsem in cpufreq_online().
> >>
> >> Moreover, ->offline() and ->exit() cannot expect policy->cpus to be
> >> populated, because they are called when it is empty from
> >> cpufreq_offline().
> >>
> >> So the $subject patch is correct AFAICS even though it doesn't address
> >> all of the above.
> >
> > TBH, I'm not sure why show() doesn't check policy_is_inactive() under the rwsem.
> >
>
> There is a exist bugs, and somebody try to fixed, please see commit
> Fixes: 2f66196208c9 ("cpufreq: check if policy is inactive early in
> __cpufreq_get()")

Well, exactly.

This only addressed one bug out of a category.

> > Moreover, I'm not sure why the locking dance in store() is necessary.
>
> The store interface hold cpu_hotplug_lock via
>     cpus_read_trylock();
> , cannot run in parallel with cpufreq_online() & cpufreq_offline().

So the reason why is to prevent store() from running in parallel with
the two functions above.  Which generally  is because the policy
configuration is in-flight then.  However, I'm wondering about what
exactly would break then.
Rafael J. Wysocki May 11, 2022, 2:15 p.m. UTC | #10
On Tue, May 10, 2022 at 5:42 PM Schspa Shi <schspa@gmail.com> wrote:
>
> When cpufreq online failed, policy->cpus are not empty while
> cpufreq sysfs file available, we may access some data freed.
>
> Take policy->clk as an example:
>
> static int cpufreq_online(unsigned int cpu)
> {
>   ...
>   // policy->cpus != 0 at this time
>   down_write(&policy->rwsem);
>   ret = cpufreq_add_dev_interface(policy);
>   up_write(&policy->rwsem);
>
>   down_write(&policy->rwsem);
>   ...
>   /* cpufreq nitialization fails in some cases */
>   if (cpufreq_driver->get && has_target()) {
>     policy->cur = cpufreq_driver->get(policy->cpu);
>     if (!policy->cur) {
>       ret = -EIO;
>       pr_err("%s: ->get() failed\n", __func__);
>       goto out_destroy_policy;
>     }
>   }
>   ...
>   up_write(&policy->rwsem);
>   ...
>
>   return 0;
>
> out_destroy_policy:
>         for_each_cpu(j, policy->real_cpus)
>                 remove_cpu_dev_symlink(policy, get_cpu_device(j));
>     up_write(&policy->rwsem);
> ...
> out_exit_policy:
>   if (cpufreq_driver->exit)
>     cpufreq_driver->exit(policy);
>       clk_put(policy->clk);
>       // policy->clk is a wild pointer
> ...
>                                     ^
>                                     |
>                             Another process access
>                             __cpufreq_get
>                               cpufreq_verify_current_freq
>                                 cpufreq_generic_get
>                                   // acces wild pointer of policy->clk;
>                                     |
>                                     |
> out_offline_policy:                 |
>   cpufreq_policy_free(policy);      |
>     // deleted here, and will wait for no body reference
>     cpufreq_policy_put_kobj(policy);
> }
>
> We can fix it by clear the policy->cpus mask.
> Both show_scaling_cur_freq and show_cpuinfo_cur_freq will return an
> error by checking this mask, thus avoiding UAF.

So the UAF only happens if something is freed by ->offline() or
->exit() and I'm not sure where the mask is checked in the
scaling_cur_freq() path.

Overall, the patch is really two changes in one IMO.

> Signed-off-by: Schspa Shi <schspa@gmail.com>
>
> ---
>
> Changelog:
> v1 -> v2:
>         - Fix bad critical region enlarge which causes uninitialized
>           unlock.
> v2 -> v3:
>         - Remove the missed down_write() before
>           cpumask_and(policy->cpus, policy->cpus, cpu_online_mask);
>
> Signed-off-by: Schspa Shi <schspa@gmail.com>
> ---
>  drivers/cpufreq/cpufreq.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index 80f535cc8a75..d93958dbdab8 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -1337,12 +1337,12 @@ static int cpufreq_online(unsigned int cpu)
>                 down_write(&policy->rwsem);
>                 policy->cpu = cpu;
>                 policy->governor = NULL;
> -               up_write(&policy->rwsem);
>         } else {
>                 new_policy = true;
>                 policy = cpufreq_policy_alloc(cpu);
>                 if (!policy)
>                         return -ENOMEM;
> +               down_write(&policy->rwsem);
>         }
>
>         if (!new_policy && cpufreq_driver->online) {
> @@ -1382,7 +1382,6 @@ static int cpufreq_online(unsigned int cpu)
>                 cpumask_copy(policy->related_cpus, policy->cpus);
>         }
>
> -       down_write(&policy->rwsem);
>         /*
>          * affected cpus must always be the one, which are online. We aren't
>          * managing offline cpus here.

The first change, which could and probably should be a separate patch,
ends here.

You prevent the rwsem from being dropped in the existing policy case
and acquire it right after creating a new policy.

This way ->online() always runs under the rwsem, which definitely
sounds like a good idea, and policy->cpus is manipulated under the
rwsem which IMV is required.

As a side-effect, ->init() is also run under the rwsem, but that
shouldn't be a problem as per your discussion with Viresh.

So the above would be patch 1 in a series.

The change below is a separate one and it addresses the particular
race you've discovered, as long as patch 1 above is present.  It would
be patch 2 in the series.

> @@ -1533,7 +1532,7 @@ static int cpufreq_online(unsigned int cpu)
>         for_each_cpu(j, policy->real_cpus)
>                 remove_cpu_dev_symlink(policy, get_cpu_device(j));
>
> -       up_write(&policy->rwsem);
> +       cpumask_clear(policy->cpus);

It is OK to clear policy->cpus here, because ->offline() and ->exit()
are called with policy->cpus clear from cpufreq_offline() and
cpufreq_remove_dev(), so they cannot assume policy->cpus to be
populated when they are invoked.  However, this needs to be stated in
the changelog of patch 2.

>  out_offline_policy:
>         if (cpufreq_driver->offline)
> @@ -1542,6 +1541,7 @@ static int cpufreq_online(unsigned int cpu)
>  out_exit_policy:
>         if (cpufreq_driver->exit)
>                 cpufreq_driver->exit(policy);
> +       up_write(&policy->rwsem);

It is consistent to run ->offline() and ->exit() under the rwsem, so
this change is OK too.

>  out_free_policy:
>         cpufreq_policy_free(policy);
> --

That said, there still are races that are not addressed by the above,
so I would add patch 3 changing show() to check policy_is_inactive()
under the rwsem.

Thanks!
Schspa Shi May 12, 2022, 5:51 a.m. UTC | #11
"Rafael J. Wysocki" <rafael@kernel.org> writes:

> On Tue, May 10, 2022 at 5:42 PM Schspa Shi <schspa@gmail.com> 
> wrote:
>>
>> When cpufreq online failed, policy->cpus are not empty while
>> cpufreq sysfs file available, we may access some data freed.
>>
>> Take policy->clk as an example:
>>
>> static int cpufreq_online(unsigned int cpu)
>> {
>>   ...
>>   // policy->cpus != 0 at this time
>>   down_write(&policy->rwsem);
>>   ret = cpufreq_add_dev_interface(policy);
>>   up_write(&policy->rwsem);
>>
>>   down_write(&policy->rwsem);
>>   ...
>>   /* cpufreq nitialization fails in some cases */
>>   if (cpufreq_driver->get && has_target()) {
>>     policy->cur = cpufreq_driver->get(policy->cpu);
>>     if (!policy->cur) {
>>       ret = -EIO;
>>       pr_err("%s: ->get() failed\n", __func__);
>>       goto out_destroy_policy;
>>     }
>>   }
>>   ...
>>   up_write(&policy->rwsem);
>>   ...
>>
>>   return 0;
>>
>> out_destroy_policy:
>>         for_each_cpu(j, policy->real_cpus)
>>                 remove_cpu_dev_symlink(policy, 
>>                 get_cpu_device(j));
>>     up_write(&policy->rwsem);
>> ...
>> out_exit_policy:
>>   if (cpufreq_driver->exit)
>>     cpufreq_driver->exit(policy);
>>       clk_put(policy->clk);
>>       // policy->clk is a wild pointer
>> ...
>>                                     ^
>>                                     |
>>                             Another process access
>>                             __cpufreq_get
>>                               cpufreq_verify_current_freq
>>                                 cpufreq_generic_get
>>                                   // acces wild pointer of 
>>                                   policy->clk;
>>                                     |
>>                                     |
>> out_offline_policy:                 |
>>   cpufreq_policy_free(policy);      |
>>     // deleted here, and will wait for no body reference
>>     cpufreq_policy_put_kobj(policy);
>> }
>>
>> We can fix it by clear the policy->cpus mask.
>> Both show_scaling_cur_freq and show_cpuinfo_cur_freq will 
>> return an
>> error by checking this mask, thus avoiding UAF.
>
> So the UAF only happens if something is freed by ->offline() or
> ->exit() and I'm not sure where the mask is checked in the
> scaling_cur_freq() path.
>

In the current code, it is checked in the following path:
show();
  down_read(&policy->rwsem);
  ret = fattr->show(policy, buf);
    show_cpuinfo_cur_freq
      __cpufreq_get
        if (unlikely(policy_is_inactive(policy)))
          return 0;
  up_read(&policy->rwsem);

> Overall, the patch is really two changes in one IMO.
>
>> Signed-off-by: Schspa Shi <schspa@gmail.com>
>>
>> ---
>>
>> Changelog:
>> v1 -> v2:
>>         - Fix bad critical region enlarge which causes 
>>         uninitialized
>>           unlock.
>> v2 -> v3:
>>         - Remove the missed down_write() before
>>           cpumask_and(policy->cpus, policy->cpus, 
>>           cpu_online_mask);
>>
>> Signed-off-by: Schspa Shi <schspa@gmail.com>
>> ---
>>  drivers/cpufreq/cpufreq.c | 6 +++---
>>  1 file changed, 3 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/cpufreq/cpufreq.c 
>> b/drivers/cpufreq/cpufreq.c
>> index 80f535cc8a75..d93958dbdab8 100644
>> --- a/drivers/cpufreq/cpufreq.c
>> +++ b/drivers/cpufreq/cpufreq.c
>> @@ -1337,12 +1337,12 @@ static int cpufreq_online(unsigned int 
>> cpu)
>>                 down_write(&policy->rwsem);
>>                 policy->cpu = cpu;
>>                 policy->governor = NULL;
>> -               up_write(&policy->rwsem);
>>         } else {
>>                 new_policy = true;
>>                 policy = cpufreq_policy_alloc(cpu);
>>                 if (!policy)
>>                         return -ENOMEM;
>> +               down_write(&policy->rwsem);
>>         }
>>
>>         if (!new_policy && cpufreq_driver->online) {
>> @@ -1382,7 +1382,6 @@ static int cpufreq_online(unsigned int 
>> cpu)
>>                 cpumask_copy(policy->related_cpus, 
>>                 policy->cpus);
>>         }
>>
>> -       down_write(&policy->rwsem);
>>         /*
>>          * affected cpus must always be the one, which are 
>>          online. We aren't
>>          * managing offline cpus here.
>
> The first change, which could and probably should be a separate 
> patch,
> ends here.
>
> You prevent the rwsem from being dropped in the existing policy 
> case
> and acquire it right after creating a new policy.
>
> This way ->online() always runs under the rwsem, which 
> definitely
> sounds like a good idea, and policy->cpus is manipulated under 
> the
> rwsem which IMV is required.
>
> As a side-effect, ->init() is also run under the rwsem, but that
> shouldn't be a problem as per your discussion with Viresh.
>
> So the above would be patch 1 in a series.
>
> The change below is a separate one and it addresses the 
> particular
> race you've discovered, as long as patch 1 above is present.  It 
> would
> be patch 2 in the series.
>
>> @@ -1533,7 +1532,7 @@ static int cpufreq_online(unsigned int 
>> cpu)
>>         for_each_cpu(j, policy->real_cpus)
>>                 remove_cpu_dev_symlink(policy, 
>>                 get_cpu_device(j));
>>
>> -       up_write(&policy->rwsem);
>> +       cpumask_clear(policy->cpus);
>
> It is OK to clear policy->cpus here, because ->offline() and 
> ->exit()
> are called with policy->cpus clear from cpufreq_offline() and
> cpufreq_remove_dev(), so they cannot assume policy->cpus to be
> populated when they are invoked.  However, this needs to be 
> stated in
> the changelog of patch 2.
>

OK, I will separate it into two patch.

>>  out_offline_policy:
>>         if (cpufreq_driver->offline)
>> @@ -1542,6 +1541,7 @@ static int cpufreq_online(unsigned int 
>> cpu)
>>  out_exit_policy:
>>         if (cpufreq_driver->exit)
>>                 cpufreq_driver->exit(policy);
>> +       up_write(&policy->rwsem);
>
> It is consistent to run ->offline() and ->exit() under the 
> rwsem, so
> this change is OK too.
>
>>  out_free_policy:
>>         cpufreq_policy_free(policy);
>> --
>
> That said, there still are races that are not addressed by the 
> above,
> so I would add patch 3 changing show() to check 
> policy_is_inactive()
> under the rwsem.
>

Yes, let me upload a new patch for this change.

> Thanks!

---
BRs
Schspa Shi
Viresh Kumar May 12, 2022, 5:56 a.m. UTC | #12
On 11-05-22, 14:59, Rafael J. Wysocki wrote:
> There are two things.
> 
> One is the possible race with respect to the sysfs access occurring
> during failing initialization and the other is that ->offline() or
> ->exit() can be called with or without holding the policy rwsem
> depending on the code path.
> 
> Namely, cpufreq_offline() calls them under the policy rwsem, but
> cpufreq_remove_dev() calls ->exit() outside the rwsem.  Also they are
> called outside the rwsem in cpufreq_online().

Right.

> Moreover, ->offline() and ->exit() cannot expect policy->cpus to be
> populated, because they are called when it is empty from
> cpufreq_offline().

Correct.

> So the $subject patch is correct AFAICS even though it doesn't address
> all of the above.
Viresh Kumar May 12, 2022, 6:56 a.m. UTC | #13
On 11-05-22, 15:19, Rafael J. Wysocki wrote:
> On Wed, May 11, 2022 at 2:59 PM Rafael J. Wysocki <rafael@kernel.org> wrote:
> > > Hmm, I don't think depending on the values of policy->cpus is a good idea to be
> > > honest. This design is inviting bugs to come in at another place. We need a
> > > clear flag for this, a new flag or something like policy_list.
> 
> Why?

Because it doesn't mean anything unless we have code elsewhere which checks this
specifically. It should be fine though after using policy_is_inactive() in
show/store as you suggested, which I too tried to do in a patch :)

> > > Also I see the same bug happening while the policy is removed. The kobject is
> > > put after the rwsem is dropped.
> 
> This shouldn't be a problem because of the wait_for_completion() in
> cpufreq_policy_put_kobj().  It is known that cpufreq_sysfs_release()
> has run when cpufreq_policy_put_kobj() returns, so it is safe to free
> the policy then.

I agree to that, but the destruction of stuff happens right in
cpufreq_policy_free() where it starts removing the policy from the list and
clears cpufreq_cpu_data. I don't know if it will break anything or not, but we
should disallow any further sysfs operations once we have reached
cpufreq_policy_free().

> TBH, I'm not sure why show() doesn't check policy_is_inactive() under the rwsem.

I agree, both show/store should have it.

> Moreover, I'm not sure why the locking dance in store() is necessary.

commit fdd320da84c6 ("cpufreq: Lock CPU online/offline in cpufreq_register_driver()")
Rafael J. Wysocki May 12, 2022, 10:37 a.m. UTC | #14
On Thu, May 12, 2022 at 7:52 AM Schspa Shi <schspa@gmail.com> wrote:
>
>
> "Rafael J. Wysocki" <rafael@kernel.org> writes:
>
> > On Tue, May 10, 2022 at 5:42 PM Schspa Shi <schspa@gmail.com>
> > wrote:
> >>
> >> When cpufreq online failed, policy->cpus are not empty while
> >> cpufreq sysfs file available, we may access some data freed.
> >>
> >> Take policy->clk as an example:
> >>
> >> static int cpufreq_online(unsigned int cpu)
> >> {
> >>   ...
> >>   // policy->cpus != 0 at this time
> >>   down_write(&policy->rwsem);
> >>   ret = cpufreq_add_dev_interface(policy);
> >>   up_write(&policy->rwsem);
> >>
> >>   down_write(&policy->rwsem);
> >>   ...
> >>   /* cpufreq nitialization fails in some cases */
> >>   if (cpufreq_driver->get && has_target()) {
> >>     policy->cur = cpufreq_driver->get(policy->cpu);
> >>     if (!policy->cur) {
> >>       ret = -EIO;
> >>       pr_err("%s: ->get() failed\n", __func__);
> >>       goto out_destroy_policy;
> >>     }
> >>   }
> >>   ...
> >>   up_write(&policy->rwsem);
> >>   ...
> >>
> >>   return 0;
> >>
> >> out_destroy_policy:
> >>         for_each_cpu(j, policy->real_cpus)
> >>                 remove_cpu_dev_symlink(policy,
> >>                 get_cpu_device(j));
> >>     up_write(&policy->rwsem);
> >> ...
> >> out_exit_policy:
> >>   if (cpufreq_driver->exit)
> >>     cpufreq_driver->exit(policy);
> >>       clk_put(policy->clk);
> >>       // policy->clk is a wild pointer
> >> ...
> >>                                     ^
> >>                                     |
> >>                             Another process access
> >>                             __cpufreq_get
> >>                               cpufreq_verify_current_freq
> >>                                 cpufreq_generic_get
> >>                                   // acces wild pointer of
> >>                                   policy->clk;
> >>                                     |
> >>                                     |
> >> out_offline_policy:                 |
> >>   cpufreq_policy_free(policy);      |
> >>     // deleted here, and will wait for no body reference
> >>     cpufreq_policy_put_kobj(policy);
> >> }
> >>
> >> We can fix it by clear the policy->cpus mask.
> >> Both show_scaling_cur_freq and show_cpuinfo_cur_freq will
> >> return an
> >> error by checking this mask, thus avoiding UAF.
> >
> > So the UAF only happens if something is freed by ->offline() or
> > ->exit() and I'm not sure where the mask is checked in the
> > scaling_cur_freq() path.
> >
>
> In the current code, it is checked in the following path:
> show();
>   down_read(&policy->rwsem);
>   ret = fattr->show(policy, buf);
>     show_cpuinfo_cur_freq
>       __cpufreq_get
>         if (unlikely(policy_is_inactive(policy)))
>           return 0;
>   up_read(&policy->rwsem);

This is cpuinfo_cur_freq and I was talking about scaling_cur_freq.

> > Overall, the patch is really two changes in one IMO.
> >
> >> Signed-off-by: Schspa Shi <schspa@gmail.com>
> >>
> >> ---
> >>
> >> Changelog:
> >> v1 -> v2:
> >>         - Fix bad critical region enlarge which causes
> >>         uninitialized
> >>           unlock.
> >> v2 -> v3:
> >>         - Remove the missed down_write() before
> >>           cpumask_and(policy->cpus, policy->cpus,
> >>           cpu_online_mask);
> >>
> >> Signed-off-by: Schspa Shi <schspa@gmail.com>
> >> ---
> >>  drivers/cpufreq/cpufreq.c | 6 +++---
> >>  1 file changed, 3 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/drivers/cpufreq/cpufreq.c
> >> b/drivers/cpufreq/cpufreq.c
> >> index 80f535cc8a75..d93958dbdab8 100644
> >> --- a/drivers/cpufreq/cpufreq.c
> >> +++ b/drivers/cpufreq/cpufreq.c
> >> @@ -1337,12 +1337,12 @@ static int cpufreq_online(unsigned int
> >> cpu)
> >>                 down_write(&policy->rwsem);
> >>                 policy->cpu = cpu;
> >>                 policy->governor = NULL;
> >> -               up_write(&policy->rwsem);
> >>         } else {
> >>                 new_policy = true;
> >>                 policy = cpufreq_policy_alloc(cpu);
> >>                 if (!policy)
> >>                         return -ENOMEM;
> >> +               down_write(&policy->rwsem);
> >>         }
> >>
> >>         if (!new_policy && cpufreq_driver->online) {
> >> @@ -1382,7 +1382,6 @@ static int cpufreq_online(unsigned int
> >> cpu)
> >>                 cpumask_copy(policy->related_cpus,
> >>                 policy->cpus);
> >>         }
> >>
> >> -       down_write(&policy->rwsem);
> >>         /*
> >>          * affected cpus must always be the one, which are
> >>          online. We aren't
> >>          * managing offline cpus here.
> >
> > The first change, which could and probably should be a separate
> > patch,
> > ends here.
> >
> > You prevent the rwsem from being dropped in the existing policy
> > case
> > and acquire it right after creating a new policy.
> >
> > This way ->online() always runs under the rwsem, which
> > definitely
> > sounds like a good idea, and policy->cpus is manipulated under
> > the
> > rwsem which IMV is required.
> >
> > As a side-effect, ->init() is also run under the rwsem, but that
> > shouldn't be a problem as per your discussion with Viresh.
> >
> > So the above would be patch 1 in a series.
> >
> > The change below is a separate one and it addresses the
> > particular
> > race you've discovered, as long as patch 1 above is present.  It
> > would
> > be patch 2 in the series.
> >
> >> @@ -1533,7 +1532,7 @@ static int cpufreq_online(unsigned int
> >> cpu)
> >>         for_each_cpu(j, policy->real_cpus)
> >>                 remove_cpu_dev_symlink(policy,
> >>                 get_cpu_device(j));
> >>
> >> -       up_write(&policy->rwsem);
> >> +       cpumask_clear(policy->cpus);
> >
> > It is OK to clear policy->cpus here, because ->offline() and
> > ->exit()
> > are called with policy->cpus clear from cpufreq_offline() and
> > cpufreq_remove_dev(), so they cannot assume policy->cpus to be
> > populated when they are invoked.  However, this needs to be
> > stated in
> > the changelog of patch 2.
> >
>
> OK, I will separate it into two patch.
>
> >>  out_offline_policy:
> >>         if (cpufreq_driver->offline)
> >> @@ -1542,6 +1541,7 @@ static int cpufreq_online(unsigned int
> >> cpu)
> >>  out_exit_policy:
> >>         if (cpufreq_driver->exit)
> >>                 cpufreq_driver->exit(policy);
> >> +       up_write(&policy->rwsem);
> >
> > It is consistent to run ->offline() and ->exit() under the
> > rwsem, so
> > this change is OK too.
> >
> >>  out_free_policy:
> >>         cpufreq_policy_free(policy);
> >> --
> >
> > That said, there still are races that are not addressed by the
> > above,
> > so I would add patch 3 changing show() to check
> > policy_is_inactive()
> > under the rwsem.
> >
>
> Yes, let me upload a new patch for this change.

Cool, thanks!
Rafael J. Wysocki May 12, 2022, 10:49 a.m. UTC | #15
On Thu, May 12, 2022 at 8:56 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:
>
> On 11-05-22, 15:19, Rafael J. Wysocki wrote:
> > On Wed, May 11, 2022 at 2:59 PM Rafael J. Wysocki <rafael@kernel.org> wrote:
> > > > Hmm, I don't think depending on the values of policy->cpus is a good idea to be
> > > > honest. This design is inviting bugs to come in at another place. We need a
> > > > clear flag for this, a new flag or something like policy_list.
> >
> > Why?
>
> Because it doesn't mean anything unless we have code elsewhere which checks this
> specifically. It should be fine though after using policy_is_inactive() in
> show/store as you suggested, which I too tried to do in a patch :)
>
> > > > Also I see the same bug happening while the policy is removed. The kobject is
> > > > put after the rwsem is dropped.
> >
> > This shouldn't be a problem because of the wait_for_completion() in
> > cpufreq_policy_put_kobj().  It is known that cpufreq_sysfs_release()
> > has run when cpufreq_policy_put_kobj() returns, so it is safe to free
> > the policy then.
>
> I agree to that, but the destruction of stuff happens right in
> cpufreq_policy_free() where it starts removing the policy from the list and
> clears cpufreq_cpu_data. I don't know if it will break anything or not, but we
> should disallow any further sysfs operations once we have reached
> cpufreq_policy_free().

Well, would there be a problem with moving the
cpufreq_policy_put_kobj() call to the front of cpufreq_policy_free()?
If we did that, we'd know that everything could be torn down safely,
because nobody would be holding references to the policy any more.

> > TBH, I'm not sure why show() doesn't check policy_is_inactive() under the rwsem.
>
> I agree, both show/store should have it.
>
> > Moreover, I'm not sure why the locking dance in store() is necessary.
>
> commit fdd320da84c6 ("cpufreq: Lock CPU online/offline in cpufreq_register_driver()")

I get that, but I'm wondering if locking CPU hotplug from store() is
needed at all.  I mean, if we are in store(), we are holding an active
reference to the policy kobject, so the policy cannot go away until we
are done anyway.  Thus it should be sufficient to use the policy rwsem
for synchronization.
Viresh Kumar May 13, 2022, 4:27 a.m. UTC | #16
On 12-05-22, 12:49, Rafael J. Wysocki wrote:
> Well, would there be a problem with moving the
> cpufreq_policy_put_kobj() call to the front of cpufreq_policy_free()?

Emptying cpufreq_cpu_data first is required, else someone else will
end up doing kobject_get() again.

> If we did that, we'd know that everything could be torn down safely,
> because nobody would be holding references to the policy any more.

With the way we are progressing now, we will always have policy->cpus
empty while we reach cpufreq_policy_free(). With that I think we will
be safe with the current code here. I would also add a BUG_ON() here
for non empty policy->cpus to be safe.

> > > TBH, I'm not sure why show() doesn't check policy_is_inactive() under the rwsem.
> >
> > I agree, both show/store should have it.
> >
> > > Moreover, I'm not sure why the locking dance in store() is necessary.
> >
> > commit fdd320da84c6 ("cpufreq: Lock CPU online/offline in cpufreq_register_driver()")
> 
> I get that, but I'm wondering if locking CPU hotplug from store() is
> needed at all.  I mean, if we are in store(), we are holding an active
> reference to the policy kobject, so the policy cannot go away until we
> are done anyway.  Thus it should be sufficient to use the policy rwsem
> for synchronization.

I think after the current patchset is applied and we have the inactive
policy check in store(), we won't required the dance after all.
Viresh Kumar May 24, 2022, 11:14 a.m. UTC | #17
On 13-05-22, 09:57, Viresh Kumar wrote:
> On 12-05-22, 12:49, Rafael J. Wysocki wrote:
> > > > Moreover, I'm not sure why the locking dance in store() is necessary.
> > >
> > > commit fdd320da84c6 ("cpufreq: Lock CPU online/offline in cpufreq_register_driver()")
> > 
> > I get that, but I'm wondering if locking CPU hotplug from store() is
> > needed at all.  I mean, if we are in store(), we are holding an active
> > reference to the policy kobject, so the policy cannot go away until we
> > are done anyway.  Thus it should be sufficient to use the policy rwsem
> > for synchronization.
> 
> I think after the current patchset is applied and we have the inactive
> policy check in store(), we won't required the dance after all.

I was writing a patch for this and then thought maybe look at mails
around this time, when you sent the patch, and found the reason why we
need the locking dance :)

https://lore.kernel.org/lkml/20150729091136.GN7557@n2100.arm.linux.org.uk/

I will add a comment for this now.
Rafael J. Wysocki May 24, 2022, 11:22 a.m. UTC | #18
On Tue, May 24, 2022 at 1:15 PM Viresh Kumar <viresh.kumar@linaro.org> wrote:
>
> On 13-05-22, 09:57, Viresh Kumar wrote:
> > On 12-05-22, 12:49, Rafael J. Wysocki wrote:
> > > > > Moreover, I'm not sure why the locking dance in store() is necessary.
> > > >
> > > > commit fdd320da84c6 ("cpufreq: Lock CPU online/offline in cpufreq_register_driver()")
> > >
> > > I get that, but I'm wondering if locking CPU hotplug from store() is
> > > needed at all.  I mean, if we are in store(), we are holding an active
> > > reference to the policy kobject, so the policy cannot go away until we
> > > are done anyway.  Thus it should be sufficient to use the policy rwsem
> > > for synchronization.
> >
> > I think after the current patchset is applied and we have the inactive
> > policy check in store(), we won't required the dance after all.
>
> I was writing a patch for this and then thought maybe look at mails
> around this time, when you sent the patch, and found the reason why we
> need the locking dance :)
>
> https://lore.kernel.org/lkml/20150729091136.GN7557@n2100.arm.linux.org.uk/
>
> I will add a comment for this now.

Well, again, if we are in store(), we are holding a reference to the
policy kobject, so this is not initialization time.
Viresh Kumar May 24, 2022, 11:29 a.m. UTC | #19
On 24-05-22, 13:22, Rafael J. Wysocki wrote:
> On Tue, May 24, 2022 at 1:15 PM Viresh Kumar <viresh.kumar@linaro.org> wrote:
> >
> > On 13-05-22, 09:57, Viresh Kumar wrote:
> > > On 12-05-22, 12:49, Rafael J. Wysocki wrote:
> > > > > > Moreover, I'm not sure why the locking dance in store() is necessary.
> > > > >
> > > > > commit fdd320da84c6 ("cpufreq: Lock CPU online/offline in cpufreq_register_driver()")
> > > >
> > > > I get that, but I'm wondering if locking CPU hotplug from store() is
> > > > needed at all.  I mean, if we are in store(), we are holding an active
> > > > reference to the policy kobject, so the policy cannot go away until we
> > > > are done anyway.  Thus it should be sufficient to use the policy rwsem
> > > > for synchronization.
> > >
> > > I think after the current patchset is applied and we have the inactive
> > > policy check in store(), we won't required the dance after all.
> >
> > I was writing a patch for this and then thought maybe look at mails
> > around this time, when you sent the patch, and found the reason why we
> > need the locking dance :)
> >
> > https://lore.kernel.org/lkml/20150729091136.GN7557@n2100.arm.linux.org.uk/

Actually no, this is for the lock in cpufreq_driver_register().

> Well, again, if we are in store(), we are holding a reference to the
> policy kobject, so this is not initialization time.

This is the commit which made the change.

commit 4f750c930822 ("cpufreq: Synchronize the cpufreq store_*() routines with CPU hotplug")
Rafael J. Wysocki May 24, 2022, 11:48 a.m. UTC | #20
On Tue, May 24, 2022 at 1:29 PM Viresh Kumar <viresh.kumar@linaro.org> wrote:
>
> On 24-05-22, 13:22, Rafael J. Wysocki wrote:
> > On Tue, May 24, 2022 at 1:15 PM Viresh Kumar <viresh.kumar@linaro.org> wrote:
> > >
> > > On 13-05-22, 09:57, Viresh Kumar wrote:
> > > > On 12-05-22, 12:49, Rafael J. Wysocki wrote:
> > > > > > > Moreover, I'm not sure why the locking dance in store() is necessary.
> > > > > >
> > > > > > commit fdd320da84c6 ("cpufreq: Lock CPU online/offline in cpufreq_register_driver()")
> > > > >
> > > > > I get that, but I'm wondering if locking CPU hotplug from store() is
> > > > > needed at all.  I mean, if we are in store(), we are holding an active
> > > > > reference to the policy kobject, so the policy cannot go away until we
> > > > > are done anyway.  Thus it should be sufficient to use the policy rwsem
> > > > > for synchronization.
> > > >
> > > > I think after the current patchset is applied and we have the inactive
> > > > policy check in store(), we won't required the dance after all.
> > >
> > > I was writing a patch for this and then thought maybe look at mails
> > > around this time, when you sent the patch, and found the reason why we
> > > need the locking dance :)
> > >
> > > https://lore.kernel.org/lkml/20150729091136.GN7557@n2100.arm.linux.org.uk/
>
> Actually no, this is for the lock in cpufreq_driver_register().
>
> > Well, again, if we are in store(), we are holding a reference to the
> > policy kobject, so this is not initialization time.
>
> This is the commit which made the change.
>
> commit 4f750c930822 ("cpufreq: Synchronize the cpufreq store_*() routines with CPU hotplug")

So this was done before the entire CPU hotplug rework and it was
useful at that time.

The current code always runs cpufreq_set_policy() under policy->rwsem
and governors are stopped under policy->rwsem, so this particular race
cannot happen AFAICS.

Locking CPU hotplug prevents CPUs from going away while store() is
running, but in order to run store(), the caller must hold an active
reference to the policy kobject.  That prevents the policy from being
freed and so policy->rwsem can be acquired.  After policy->rwsem has
been acquired, policy->cpus can be checked to determine whether or not
there are any online CPUs for the given policy (there may be none),
because policy->cpus is only manipulated under policy->rwsem.

If a CPU that belongs to the given policy is going away,
cpufreq_offline() has to remove it from policy->cpus under
policy->rwsem, so either it has to wait for store() to release
policy->rwsem, or store() will acquire policy->rwsem after it and will
find that policy->cpus is empty.
Rafael J. Wysocki May 24, 2022, 11:53 a.m. UTC | #21
On Tue, May 24, 2022 at 1:48 PM Rafael J. Wysocki <rafael@kernel.org> wrote:
>
> On Tue, May 24, 2022 at 1:29 PM Viresh Kumar <viresh.kumar@linaro.org> wrote:
> >
> > On 24-05-22, 13:22, Rafael J. Wysocki wrote:
> > > On Tue, May 24, 2022 at 1:15 PM Viresh Kumar <viresh.kumar@linaro.org> wrote:
> > > >
> > > > On 13-05-22, 09:57, Viresh Kumar wrote:
> > > > > On 12-05-22, 12:49, Rafael J. Wysocki wrote:
> > > > > > > > Moreover, I'm not sure why the locking dance in store() is necessary.
> > > > > > >
> > > > > > > commit fdd320da84c6 ("cpufreq: Lock CPU online/offline in cpufreq_register_driver()")
> > > > > >
> > > > > > I get that, but I'm wondering if locking CPU hotplug from store() is
> > > > > > needed at all.  I mean, if we are in store(), we are holding an active
> > > > > > reference to the policy kobject, so the policy cannot go away until we
> > > > > > are done anyway.  Thus it should be sufficient to use the policy rwsem
> > > > > > for synchronization.
> > > > >
> > > > > I think after the current patchset is applied and we have the inactive
> > > > > policy check in store(), we won't required the dance after all.
> > > >
> > > > I was writing a patch for this and then thought maybe look at mails
> > > > around this time, when you sent the patch, and found the reason why we
> > > > need the locking dance :)
> > > >
> > > > https://lore.kernel.org/lkml/20150729091136.GN7557@n2100.arm.linux.org.uk/
> >
> > Actually no, this is for the lock in cpufreq_driver_register().
> >
> > > Well, again, if we are in store(), we are holding a reference to the
> > > policy kobject, so this is not initialization time.
> >
> > This is the commit which made the change.
> >
> > commit 4f750c930822 ("cpufreq: Synchronize the cpufreq store_*() routines with CPU hotplug")
>
> So this was done before the entire CPU hotplug rework and it was
> useful at that time.
>
> The current code always runs cpufreq_set_policy() under policy->rwsem
> and governors are stopped under policy->rwsem, so this particular race
> cannot happen AFAICS.
>
> Locking CPU hotplug prevents CPUs from going away while store() is
> running, but in order to run store(), the caller must hold an active
> reference to the policy kobject.  That prevents the policy from being
> freed and so policy->rwsem can be acquired.  After policy->rwsem has
> been acquired, policy->cpus can be checked to determine whether or not
> there are any online CPUs for the given policy (there may be none),
> because policy->cpus is only manipulated under policy->rwsem.
>
> If a CPU that belongs to the given policy is going away,
> cpufreq_offline() has to remove it from policy->cpus under
> policy->rwsem, so either it has to wait for store() to release
> policy->rwsem, or store() will acquire policy->rwsem after it and will
> find that policy->cpus is empty.

Moreover, locking CPU hotplug doesn't actually prevent
cpufreq_remove_dev() from running which can happen when the cpufreq
driver is unregistered, for example.
Viresh Kumar May 25, 2022, 5:32 a.m. UTC | #22
On 24-05-22, 13:53, Rafael J. Wysocki wrote:
> On Tue, May 24, 2022 at 1:48 PM Rafael J. Wysocki <rafael@kernel.org> wrote:
> > So this was done before the entire CPU hotplug rework and it was
> > useful at that time.
> >
> > The current code always runs cpufreq_set_policy() under policy->rwsem
> > and governors are stopped under policy->rwsem, so this particular race
> > cannot happen AFAICS.
> >
> > Locking CPU hotplug prevents CPUs from going away while store() is
> > running, but in order to run store(), the caller must hold an active
> > reference to the policy kobject.  That prevents the policy from being
> > freed and so policy->rwsem can be acquired.  After policy->rwsem has
> > been acquired, policy->cpus can be checked to determine whether or not
> > there are any online CPUs for the given policy (there may be none),
> > because policy->cpus is only manipulated under policy->rwsem.
> >
> > If a CPU that belongs to the given policy is going away,
> > cpufreq_offline() has to remove it from policy->cpus under
> > policy->rwsem, so either it has to wait for store() to release
> > policy->rwsem, or store() will acquire policy->rwsem after it and will
> > find that policy->cpus is empty.
> 
> Moreover, locking CPU hotplug doesn't actually prevent
> cpufreq_remove_dev() from running which can happen when the cpufreq
> driver is unregistered, for example.

Right, we can get rid of this now I believe.
diff mbox series

Patch

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 80f535cc8a75..d93958dbdab8 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1337,12 +1337,12 @@  static int cpufreq_online(unsigned int cpu)
 		down_write(&policy->rwsem);
 		policy->cpu = cpu;
 		policy->governor = NULL;
-		up_write(&policy->rwsem);
 	} else {
 		new_policy = true;
 		policy = cpufreq_policy_alloc(cpu);
 		if (!policy)
 			return -ENOMEM;
+		down_write(&policy->rwsem);
 	}
 
 	if (!new_policy && cpufreq_driver->online) {
@@ -1382,7 +1382,6 @@  static int cpufreq_online(unsigned int cpu)
 		cpumask_copy(policy->related_cpus, policy->cpus);
 	}
 
-	down_write(&policy->rwsem);
 	/*
 	 * affected cpus must always be the one, which are online. We aren't
 	 * managing offline cpus here.
@@ -1533,7 +1532,7 @@  static int cpufreq_online(unsigned int cpu)
 	for_each_cpu(j, policy->real_cpus)
 		remove_cpu_dev_symlink(policy, get_cpu_device(j));
 
-	up_write(&policy->rwsem);
+	cpumask_clear(policy->cpus);
 
 out_offline_policy:
 	if (cpufreq_driver->offline)
@@ -1542,6 +1541,7 @@  static int cpufreq_online(unsigned int cpu)
 out_exit_policy:
 	if (cpufreq_driver->exit)
 		cpufreq_driver->exit(policy);
+	up_write(&policy->rwsem);
 
 out_free_policy:
 	cpufreq_policy_free(policy);