diff mbox series

[V2,2/3] arch_topology: Avoid use-after-free for scale_freq_data

Message ID 9dba462b4d09a1a8a9fbb75740b74bf91a09a3e1.1623825725.git.viresh.kumar@linaro.org (mailing list archive)
State Superseded, archived
Headers show
Series cpufreq: cppc: Add support for frequency invariance | expand

Commit Message

Viresh Kumar June 16, 2021, 6:48 a.m. UTC
Currently topology_scale_freq_tick() may end up using a pointer to
struct scale_freq_data, which was previously cleared by
topology_clear_scale_freq_source(), as there is no protection in place
here. The users of topology_clear_scale_freq_source() though needs a
guarantee that the previous scale_freq_data isn't used anymore.

Since topology_scale_freq_tick() is called from scheduler tick, we don't
want to add locking in there. Use the RCU update mechanism instead
(which is already used by the scheduler's utilization update path) to
guarantee race free updates here.

Cc: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/base/arch_topology.c | 27 +++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

Comments

Greg KH June 16, 2021, 7:57 a.m. UTC | #1
On Wed, Jun 16, 2021 at 12:18:08PM +0530, Viresh Kumar wrote:
> Currently topology_scale_freq_tick() may end up using a pointer to
> struct scale_freq_data, which was previously cleared by
> topology_clear_scale_freq_source(), as there is no protection in place
> here. The users of topology_clear_scale_freq_source() though needs a
> guarantee that the previous scale_freq_data isn't used anymore.
> 
> Since topology_scale_freq_tick() is called from scheduler tick, we don't
> want to add locking in there. Use the RCU update mechanism instead
> (which is already used by the scheduler's utilization update path) to
> guarantee race free updates here.
> 
> Cc: Paul E. McKenney <paulmck@kernel.org>
> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>

So this is a bugfix for problems in the current codebase?  What commit
does this fix?  Should it go to the stable kernels?

> ---
>  drivers/base/arch_topology.c | 27 +++++++++++++++++++++------
>  1 file changed, 21 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
> index c1179edc0f3b..921312a8d957 100644
> --- a/drivers/base/arch_topology.c
> +++ b/drivers/base/arch_topology.c
> @@ -18,10 +18,11 @@
>  #include <linux/cpumask.h>
>  #include <linux/init.h>
>  #include <linux/percpu.h>
> +#include <linux/rcupdate.h>
>  #include <linux/sched.h>
>  #include <linux/smp.h>
>  
> -static DEFINE_PER_CPU(struct scale_freq_data *, sft_data);
> +static DEFINE_PER_CPU(struct scale_freq_data __rcu *, sft_data);
>  static struct cpumask scale_freq_counters_mask;
>  static bool scale_freq_invariant;
>  
> @@ -66,16 +67,20 @@ void topology_set_scale_freq_source(struct scale_freq_data *data,
>  	if (cpumask_empty(&scale_freq_counters_mask))
>  		scale_freq_invariant = topology_scale_freq_invariant();
>  
> +	rcu_read_lock();
> +
>  	for_each_cpu(cpu, cpus) {
> -		sfd = per_cpu(sft_data, cpu);
> +		sfd = rcu_dereference(*per_cpu_ptr(&sft_data, cpu));
>  
>  		/* Use ARCH provided counters whenever possible */
>  		if (!sfd || sfd->source != SCALE_FREQ_SOURCE_ARCH) {
> -			per_cpu(sft_data, cpu) = data;
> +			rcu_assign_pointer(per_cpu(sft_data, cpu), data);
>  			cpumask_set_cpu(cpu, &scale_freq_counters_mask);
>  		}
>  	}
>  
> +	rcu_read_unlock();
> +
>  	update_scale_freq_invariant(true);
>  }
>  EXPORT_SYMBOL_GPL(topology_set_scale_freq_source);
> @@ -86,22 +91,32 @@ void topology_clear_scale_freq_source(enum scale_freq_source source,
>  	struct scale_freq_data *sfd;
>  	int cpu;
>  
> +	rcu_read_lock();
> +
>  	for_each_cpu(cpu, cpus) {
> -		sfd = per_cpu(sft_data, cpu);
> +		sfd = rcu_dereference(*per_cpu_ptr(&sft_data, cpu));
>  
>  		if (sfd && sfd->source == source) {
> -			per_cpu(sft_data, cpu) = NULL;
> +			rcu_assign_pointer(per_cpu(sft_data, cpu), NULL);
>  			cpumask_clear_cpu(cpu, &scale_freq_counters_mask);
>  		}
>  	}
>  
> +	rcu_read_unlock();
> +
> +	/*
> +	 * Make sure all references to previous sft_data are dropped to avoid
> +	 * use-after-free races.
> +	 */
> +	synchronize_rcu();

What race is happening?  How could the current code race?  Only when a
cpu is removed?

thanks,

greg k-h
Viresh Kumar June 16, 2021, 8:18 a.m. UTC | #2
On 16-06-21, 09:57, Greg Kroah-Hartman wrote:
> On Wed, Jun 16, 2021 at 12:18:08PM +0530, Viresh Kumar wrote:
> > Currently topology_scale_freq_tick() may end up using a pointer to
> > struct scale_freq_data, which was previously cleared by
> > topology_clear_scale_freq_source(), as there is no protection in place
> > here. The users of topology_clear_scale_freq_source() though needs a
> > guarantee that the previous scale_freq_data isn't used anymore.
> > 
> > Since topology_scale_freq_tick() is called from scheduler tick, we don't
> > want to add locking in there. Use the RCU update mechanism instead
> > (which is already used by the scheduler's utilization update path) to
> > guarantee race free updates here.
> > 
> > Cc: Paul E. McKenney <paulmck@kernel.org>
> > Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
> 
> So this is a bugfix for problems in the current codebase?  What commit
> does this fix?  Should it go to the stable kernels?

There is only one user of topology_clear_scale_freq_source()
(cppc-cpufreq driver, which is already reverted in pm/linux-next). So
in the upcoming 5.13 kernel release, there will be no one using this
API and so no one will break.

And so I skipped the fixes tag, I can add it though.

> > ---
> >  drivers/base/arch_topology.c | 27 +++++++++++++++++++++------
> >  1 file changed, 21 insertions(+), 6 deletions(-)
> > 
> > diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
> > index c1179edc0f3b..921312a8d957 100644
> > --- a/drivers/base/arch_topology.c
> > +++ b/drivers/base/arch_topology.c
> > @@ -18,10 +18,11 @@
> >  #include <linux/cpumask.h>
> >  #include <linux/init.h>
> >  #include <linux/percpu.h>
> > +#include <linux/rcupdate.h>
> >  #include <linux/sched.h>
> >  #include <linux/smp.h>
> >  
> > -static DEFINE_PER_CPU(struct scale_freq_data *, sft_data);
> > +static DEFINE_PER_CPU(struct scale_freq_data __rcu *, sft_data);
> >  static struct cpumask scale_freq_counters_mask;
> >  static bool scale_freq_invariant;
> >  
> > @@ -66,16 +67,20 @@ void topology_set_scale_freq_source(struct scale_freq_data *data,
> >  	if (cpumask_empty(&scale_freq_counters_mask))
> >  		scale_freq_invariant = topology_scale_freq_invariant();
> >  
> > +	rcu_read_lock();
> > +
> >  	for_each_cpu(cpu, cpus) {
> > -		sfd = per_cpu(sft_data, cpu);
> > +		sfd = rcu_dereference(*per_cpu_ptr(&sft_data, cpu));
> >  
> >  		/* Use ARCH provided counters whenever possible */
> >  		if (!sfd || sfd->source != SCALE_FREQ_SOURCE_ARCH) {
> > -			per_cpu(sft_data, cpu) = data;
> > +			rcu_assign_pointer(per_cpu(sft_data, cpu), data);
> >  			cpumask_set_cpu(cpu, &scale_freq_counters_mask);
> >  		}
> >  	}
> >  
> > +	rcu_read_unlock();
> > +
> >  	update_scale_freq_invariant(true);
> >  }
> >  EXPORT_SYMBOL_GPL(topology_set_scale_freq_source);
> > @@ -86,22 +91,32 @@ void topology_clear_scale_freq_source(enum scale_freq_source source,
> >  	struct scale_freq_data *sfd;
> >  	int cpu;
> >  
> > +	rcu_read_lock();
> > +
> >  	for_each_cpu(cpu, cpus) {
> > -		sfd = per_cpu(sft_data, cpu);
> > +		sfd = rcu_dereference(*per_cpu_ptr(&sft_data, cpu));
> >  
> >  		if (sfd && sfd->source == source) {
> > -			per_cpu(sft_data, cpu) = NULL;
> > +			rcu_assign_pointer(per_cpu(sft_data, cpu), NULL);
> >  			cpumask_clear_cpu(cpu, &scale_freq_counters_mask);
> >  		}
> >  	}
> >  
> > +	rcu_read_unlock();
> > +
> > +	/*
> > +	 * Make sure all references to previous sft_data are dropped to avoid
> > +	 * use-after-free races.
> > +	 */
> > +	synchronize_rcu();
> 
> What race is happening?  How could the current code race?  Only when a
> cpu is removed?

topology_scale_freq_tick() is called by the scheduler for each CPU
from scheduler_tick().

It is possible that topology_scale_freq_tick() ends up using an older
copy of sft_data pointer, while it is being removed by
topology_clear_scale_freq_source() because a CPU went away or a
cpufreq driver went away, or during normal suspend/resume (where CPUs
are hot-unplugged).

synchronize_rcu() makes sure that all RCU critical sections that
started before it is called, will finish before it returns. And so the
callers of topology_clear_scale_freq_source() don't need to worry
about their callback getting called anymore.
Greg KH June 16, 2021, 8:31 a.m. UTC | #3
On Wed, Jun 16, 2021 at 01:48:59PM +0530, Viresh Kumar wrote:
> On 16-06-21, 09:57, Greg Kroah-Hartman wrote:
> > On Wed, Jun 16, 2021 at 12:18:08PM +0530, Viresh Kumar wrote:
> > > Currently topology_scale_freq_tick() may end up using a pointer to
> > > struct scale_freq_data, which was previously cleared by
> > > topology_clear_scale_freq_source(), as there is no protection in place
> > > here. The users of topology_clear_scale_freq_source() though needs a
> > > guarantee that the previous scale_freq_data isn't used anymore.
> > > 
> > > Since topology_scale_freq_tick() is called from scheduler tick, we don't
> > > want to add locking in there. Use the RCU update mechanism instead
> > > (which is already used by the scheduler's utilization update path) to
> > > guarantee race free updates here.
> > > 
> > > Cc: Paul E. McKenney <paulmck@kernel.org>
> > > Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
> > 
> > So this is a bugfix for problems in the current codebase?  What commit
> > does this fix?  Should it go to the stable kernels?
> 
> There is only one user of topology_clear_scale_freq_source()
> (cppc-cpufreq driver, which is already reverted in pm/linux-next). So
> in the upcoming 5.13 kernel release, there will be no one using this
> API and so no one will break.
> 
> And so I skipped the fixes tag, I can add it though.

It would be nice to have to answer this type of question, otherwise you
will have automated scripts trying to backport this to kernels where it
does not belong :)

thanks,

greg k-h
Viresh Kumar June 16, 2021, 9:10 a.m. UTC | #4
On 16-06-21, 10:31, Greg Kroah-Hartman wrote:
> On Wed, Jun 16, 2021 at 01:48:59PM +0530, Viresh Kumar wrote:
> > On 16-06-21, 09:57, Greg Kroah-Hartman wrote:
> > > On Wed, Jun 16, 2021 at 12:18:08PM +0530, Viresh Kumar wrote:
> > > > Currently topology_scale_freq_tick() may end up using a pointer to
> > > > struct scale_freq_data, which was previously cleared by
> > > > topology_clear_scale_freq_source(), as there is no protection in place
> > > > here. The users of topology_clear_scale_freq_source() though needs a
> > > > guarantee that the previous scale_freq_data isn't used anymore.
> > > > 
> > > > Since topology_scale_freq_tick() is called from scheduler tick, we don't
> > > > want to add locking in there. Use the RCU update mechanism instead
> > > > (which is already used by the scheduler's utilization update path) to
> > > > guarantee race free updates here.
> > > > 
> > > > Cc: Paul E. McKenney <paulmck@kernel.org>
> > > > Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
> > > 
> > > So this is a bugfix for problems in the current codebase?  What commit
> > > does this fix?  Should it go to the stable kernels?
> > 
> > There is only one user of topology_clear_scale_freq_source()
> > (cppc-cpufreq driver, which is already reverted in pm/linux-next). So
> > in the upcoming 5.13 kernel release, there will be no one using this
> > API and so no one will break.
> > 
> > And so I skipped the fixes tag, I can add it though.
> 
> It would be nice to have to answer this type of question, otherwise you
> will have automated scripts trying to backport this to kernels where it
> does not belong :)

Sure, I will add these to the next version.
Ionela Voinescu June 16, 2021, 11:25 a.m. UTC | #5
Hey,

On Wednesday 16 Jun 2021 at 12:18:08 (+0530), Viresh Kumar wrote:
> Currently topology_scale_freq_tick() may end up using a pointer to
> struct scale_freq_data, which was previously cleared by
> topology_clear_scale_freq_source(), as there is no protection in place
> here. The users of topology_clear_scale_freq_source() though needs a
> guarantee that the previous scale_freq_data isn't used anymore.
> 

Please correct me if I'm wrong, but my understanding is that this is
only a problem for the cppc cpufreq invariance functionality. Let's
consider a scenario where CPUs are either hotplugged out or the cpufreq
CPPC driver module is removed; topology_clear_scale_freq_source() would
get called and the sfd_data will be set to NULL. But if at the same
time topology_scale_freq_tick() got an old reference of sfd_data,
set_freq_scale() will be called. This is only a problem for CPPC cpufreq
as cppc_scale_freq_tick() will end up using driver internal data that
might have been freed during the hotplug callbacks or the exit path.

If this is the case, wouldn't the synchronisation issue be better
resolved in the CPPC cpufreq driver, rather than here?

Thank you,
Ionela.

> Since topology_scale_freq_tick() is called from scheduler tick, we don't
> want to add locking in there. Use the RCU update mechanism instead
> (which is already used by the scheduler's utilization update path) to
> guarantee race free updates here.
> 
> Cc: Paul E. McKenney <paulmck@kernel.org>
> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
> ---
>  drivers/base/arch_topology.c | 27 +++++++++++++++++++++------
>  1 file changed, 21 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
> index c1179edc0f3b..921312a8d957 100644
> --- a/drivers/base/arch_topology.c
> +++ b/drivers/base/arch_topology.c
> @@ -18,10 +18,11 @@
>  #include <linux/cpumask.h>
>  #include <linux/init.h>
>  #include <linux/percpu.h>
> +#include <linux/rcupdate.h>
>  #include <linux/sched.h>
>  #include <linux/smp.h>
>  
> -static DEFINE_PER_CPU(struct scale_freq_data *, sft_data);
> +static DEFINE_PER_CPU(struct scale_freq_data __rcu *, sft_data);
>  static struct cpumask scale_freq_counters_mask;
>  static bool scale_freq_invariant;
>  
> @@ -66,16 +67,20 @@ void topology_set_scale_freq_source(struct scale_freq_data *data,
>  	if (cpumask_empty(&scale_freq_counters_mask))
>  		scale_freq_invariant = topology_scale_freq_invariant();
>  
> +	rcu_read_lock();
> +
>  	for_each_cpu(cpu, cpus) {
> -		sfd = per_cpu(sft_data, cpu);
> +		sfd = rcu_dereference(*per_cpu_ptr(&sft_data, cpu));
>  
>  		/* Use ARCH provided counters whenever possible */
>  		if (!sfd || sfd->source != SCALE_FREQ_SOURCE_ARCH) {
> -			per_cpu(sft_data, cpu) = data;
> +			rcu_assign_pointer(per_cpu(sft_data, cpu), data);
>  			cpumask_set_cpu(cpu, &scale_freq_counters_mask);
>  		}
>  	}
>  
> +	rcu_read_unlock();
> +
>  	update_scale_freq_invariant(true);
>  }
>  EXPORT_SYMBOL_GPL(topology_set_scale_freq_source);
> @@ -86,22 +91,32 @@ void topology_clear_scale_freq_source(enum scale_freq_source source,
>  	struct scale_freq_data *sfd;
>  	int cpu;
>  
> +	rcu_read_lock();
> +
>  	for_each_cpu(cpu, cpus) {
> -		sfd = per_cpu(sft_data, cpu);
> +		sfd = rcu_dereference(*per_cpu_ptr(&sft_data, cpu));
>  
>  		if (sfd && sfd->source == source) {
> -			per_cpu(sft_data, cpu) = NULL;
> +			rcu_assign_pointer(per_cpu(sft_data, cpu), NULL);
>  			cpumask_clear_cpu(cpu, &scale_freq_counters_mask);
>  		}
>  	}
>  
> +	rcu_read_unlock();
> +
> +	/*
> +	 * Make sure all references to previous sft_data are dropped to avoid
> +	 * use-after-free races.
> +	 */
> +	synchronize_rcu();
> +
>  	update_scale_freq_invariant(false);
>  }
>  EXPORT_SYMBOL_GPL(topology_clear_scale_freq_source);
>  
>  void topology_scale_freq_tick(void)
>  {
> -	struct scale_freq_data *sfd = *this_cpu_ptr(&sft_data);
> +	struct scale_freq_data *sfd = rcu_dereference_sched(*this_cpu_ptr(&sft_data));
>  
>  	if (sfd)
>  		sfd->set_freq_scale();
> -- 
> 2.31.1.272.g89b43f80a514
>
Viresh Kumar June 16, 2021, 11:36 a.m. UTC | #6
Hi Ionela,

On 16-06-21, 12:25, Ionela Voinescu wrote:
> Please correct me if I'm wrong, but my understanding is that this is
> only a problem for the cppc cpufreq invariance functionality. Let's
> consider a scenario where CPUs are either hotplugged out or the cpufreq
> CPPC driver module is removed; topology_clear_scale_freq_source() would
> get called and the sfd_data will be set to NULL. But if at the same
> time topology_scale_freq_tick() got an old reference of sfd_data,
> set_freq_scale() will be called. This is only a problem for CPPC cpufreq
> as cppc_scale_freq_tick() will end up using driver internal data that
> might have been freed during the hotplug callbacks or the exit path.

For now, yes, CPPC is the only one affected.

> If this is the case, wouldn't the synchronisation issue be better
> resolved in the CPPC cpufreq driver, rather than here?

Hmm, the way I see it is that topology_clear_scale_freq_source() is an API
provided by topology core and the topology core needs to guarantee that it
doesn't use the data any longer after topology_clear_scale_freq_source() is
called.

The same is true for other APIs, like:

irq_work_sync();
kthread_cancel_work_sync();

It isn't the user which needs to take this into account, but the API provider.

There may be more users of this in the future, lets say another cpufreq driver,
and so keeping this synchronization at the API provider is the right thing to do
IMHO.

And from the user's perspective, like cppc, it doesn't have any control over who
is using its callback and how and when. It is very very difficult to provide
something like this at the users, redundant anyway. For example cppc won't ever
know when topology_scale_freq_tick() has stopped calling its callback.

For example this is what cppc driver needs to do now:

+static void cppc_cpufreq_stop_cpu(struct cpufreq_policy *policy,
+                                 unsigned int cpu)
+{
+       struct cppc_freq_invariance *cppc_fi = &per_cpu(cppc_freq_inv, cpu);
+
+       topology_clear_scale_freq_source(SCALE_FREQ_SOURCE_CPPC, cpumask_of(cpu));
+       irq_work_sync(&cppc_fi->irq_work);
+       kthread_cancel_work_sync(&cppc_fi->work);
+}

The driver uses APIs provided by 3 layers, topology, irq-work, kthread and all
must provide these guarantees.

A very similar thing is implemented in kernel/sched/cpufreq.c for example.
Ionela Voinescu June 16, 2021, noon UTC | #7
On Wednesday 16 Jun 2021 at 17:06:04 (+0530), Viresh Kumar wrote:
> Hi Ionela,
> 
> On 16-06-21, 12:25, Ionela Voinescu wrote:
> > Please correct me if I'm wrong, but my understanding is that this is
> > only a problem for the cppc cpufreq invariance functionality. Let's
> > consider a scenario where CPUs are either hotplugged out or the cpufreq
> > CPPC driver module is removed; topology_clear_scale_freq_source() would
> > get called and the sfd_data will be set to NULL. But if at the same
> > time topology_scale_freq_tick() got an old reference of sfd_data,
> > set_freq_scale() will be called. This is only a problem for CPPC cpufreq
> > as cppc_scale_freq_tick() will end up using driver internal data that
> > might have been freed during the hotplug callbacks or the exit path.
> 
> For now, yes, CPPC is the only one affected.
> 
> > If this is the case, wouldn't the synchronisation issue be better
> > resolved in the CPPC cpufreq driver, rather than here?
> 
> Hmm, the way I see it is that topology_clear_scale_freq_source() is an API
> provided by topology core and the topology core needs to guarantee that it
> doesn't use the data any longer after topology_clear_scale_freq_source() is
> called.
> 
> The same is true for other APIs, like:
> 
> irq_work_sync();
> kthread_cancel_work_sync();
> 
> It isn't the user which needs to take this into account, but the API provider.
> 

I would agree if it wasn't for the fact that the driver provides the
set_freq_scale() implementation that ends up using driver internal data
which could have been freed by the driver's own .exit()/stop_cpu()
callbacks. The API and the generic implementation has the responsibility
of making sure of sane access to its own structures.

Even if we would want to keep drivers from shooting themselves in the
foot, I would prefer we postpone it until we have more users for this,
before we add any synchronisation mechanisms to functionality called
on the tick.

Let's see if there's a less invasive solution to fix CPPC for now, what
do you think?

Thanks,
Ionela.

> There may be more users of this in the future, lets say another cpufreq driver,
> and so keeping this synchronization at the API provider is the right thing to do
> IMHO.
> 
> And from the user's perspective, like cppc, it doesn't have any control over who
> is using its callback and how and when. It is very very difficult to provide

> something like this at the users, redundant anyway. For example cppc won't ever
> know when topology_scale_freq_tick() has stopped calling its callback.
> 
> For example this is what cppc driver needs to do now:
> 
> +static void cppc_cpufreq_stop_cpu(struct cpufreq_policy *policy,
> +                                 unsigned int cpu)
> +{
> +       struct cppc_freq_invariance *cppc_fi = &per_cpu(cppc_freq_inv, cpu);
> +
> +       topology_clear_scale_freq_source(SCALE_FREQ_SOURCE_CPPC, cpumask_of(cpu));
> +       irq_work_sync(&cppc_fi->irq_work);
> +       kthread_cancel_work_sync(&cppc_fi->work);
> +}
> 
> The driver uses APIs provided by 3 layers, topology, irq-work, kthread and all
> must provide these guarantees.
> 
> A very similar thing is implemented in kernel/sched/cpufreq.c for example.
> 
> -- 
> viresh
Viresh Kumar June 17, 2021, 3:06 a.m. UTC | #8
On 16-06-21, 13:00, Ionela Voinescu wrote:
> I would agree if it wasn't for the fact that the driver provides the
> set_freq_scale() implementation that ends up using driver internal data
> which could have been freed by the driver's own .exit()/stop_cpu()
> callbacks. The API and the generic implementation has the responsibility
> of making sure of sane access to its own structures.

How do you see timer core or workqueue core then ? Why do they make
sure they don't end up calling user's function once we ask them not to
?

And also scheduler's own util update mechanism, the exact thing
happens there. Users (cpufreq governors) call
cpufreq_add_update_util_hook() and cpufreq_remove_update_util_hook()
to pass their own data structure "struct update_util_data", which has
ia callback within. This is what happens from scheduler's context in
those cases.

static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
{
	struct update_util_data *data;

	data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data,
						  cpu_of(rq)));
	if (data)
		data->func(data, rq_clock(rq), flags);
}

Also note that some kind of synchronisation mechanism is indeed
required between topology_set_scale_freq_source() and
topology_clear_scale_freq_source(), there is no race there today,
sure, but this is an API which is made generic.

> Even if we would want to keep drivers from shooting themselves in the
> foot, I would prefer we postpone it until we have more users for this,
> before we add any synchronisation mechanisms to functionality called
> on the tick.

The rcu mechanism is very much used in the scheduler itself because it
is lightweight. Honestly I don't even see any other way (w.r.t.
locking) users can fix it at their end. They don't know which was the
last tick that used their callback.

> Let's see if there's a less invasive solution to fix CPPC for now, what
> do you think?

For me, this change is required in the API despite how CPPC ends up
using it. Else we are failing at defining the API itself IMHO.
diff mbox series

Patch

diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index c1179edc0f3b..921312a8d957 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -18,10 +18,11 @@ 
 #include <linux/cpumask.h>
 #include <linux/init.h>
 #include <linux/percpu.h>
+#include <linux/rcupdate.h>
 #include <linux/sched.h>
 #include <linux/smp.h>
 
-static DEFINE_PER_CPU(struct scale_freq_data *, sft_data);
+static DEFINE_PER_CPU(struct scale_freq_data __rcu *, sft_data);
 static struct cpumask scale_freq_counters_mask;
 static bool scale_freq_invariant;
 
@@ -66,16 +67,20 @@  void topology_set_scale_freq_source(struct scale_freq_data *data,
 	if (cpumask_empty(&scale_freq_counters_mask))
 		scale_freq_invariant = topology_scale_freq_invariant();
 
+	rcu_read_lock();
+
 	for_each_cpu(cpu, cpus) {
-		sfd = per_cpu(sft_data, cpu);
+		sfd = rcu_dereference(*per_cpu_ptr(&sft_data, cpu));
 
 		/* Use ARCH provided counters whenever possible */
 		if (!sfd || sfd->source != SCALE_FREQ_SOURCE_ARCH) {
-			per_cpu(sft_data, cpu) = data;
+			rcu_assign_pointer(per_cpu(sft_data, cpu), data);
 			cpumask_set_cpu(cpu, &scale_freq_counters_mask);
 		}
 	}
 
+	rcu_read_unlock();
+
 	update_scale_freq_invariant(true);
 }
 EXPORT_SYMBOL_GPL(topology_set_scale_freq_source);
@@ -86,22 +91,32 @@  void topology_clear_scale_freq_source(enum scale_freq_source source,
 	struct scale_freq_data *sfd;
 	int cpu;
 
+	rcu_read_lock();
+
 	for_each_cpu(cpu, cpus) {
-		sfd = per_cpu(sft_data, cpu);
+		sfd = rcu_dereference(*per_cpu_ptr(&sft_data, cpu));
 
 		if (sfd && sfd->source == source) {
-			per_cpu(sft_data, cpu) = NULL;
+			rcu_assign_pointer(per_cpu(sft_data, cpu), NULL);
 			cpumask_clear_cpu(cpu, &scale_freq_counters_mask);
 		}
 	}
 
+	rcu_read_unlock();
+
+	/*
+	 * Make sure all references to previous sft_data are dropped to avoid
+	 * use-after-free races.
+	 */
+	synchronize_rcu();
+
 	update_scale_freq_invariant(false);
 }
 EXPORT_SYMBOL_GPL(topology_clear_scale_freq_source);
 
 void topology_scale_freq_tick(void)
 {
-	struct scale_freq_data *sfd = *this_cpu_ptr(&sft_data);
+	struct scale_freq_data *sfd = rcu_dereference_sched(*this_cpu_ptr(&sft_data));
 
 	if (sfd)
 		sfd->set_freq_scale();