diff mbox series

[v2,2/3] xen/sched: carve out memory allocation and freeing from schedule_cpu_rm()

Message ID 20220815110410.19872-3-jgross@suse.com (mailing list archive)
State Superseded
Headers show
Series xen/sched: fix cpu hotplug | expand

Commit Message

Jürgen Groß Aug. 15, 2022, 11:04 a.m. UTC
In order to prepare not allocating or freeing memory from
schedule_cpu_rm(), move this functionality to dedicated functions.

For now call those functions from schedule_cpu_rm().

No change of behavior expected.

Signed-off-by: Juergen Gross <jgross@suse.com>
---
V2:
- add const (Jan Beulich)
- use "unsigned int" for loop index (Jan Beulich)
- use xmalloc_flex_struct() (Jan Beulich)
- use XFREE() (Jan Beulich)
- hold rcu lock longer (Jan Beulich)
- add ASSERT() (Jan Beulich)

Signed-off-by: Juergen Gross <jgross@suse.com>
---
 xen/common/sched/core.c    | 133 +++++++++++++++++++++----------------
 xen/common/sched/private.h |   9 +++
 2 files changed, 86 insertions(+), 56 deletions(-)

Comments

Jan Beulich Aug. 15, 2022, 11:52 a.m. UTC | #1
On 15.08.2022 13:04, Juergen Gross wrote:
> --- a/xen/common/sched/core.c
> +++ b/xen/common/sched/core.c
> @@ -3237,6 +3237,65 @@ out:
>      return ret;
>  }
>  
> +static struct cpu_rm_data *schedule_cpu_rm_alloc(unsigned int cpu)
> +{
> +    struct cpu_rm_data *data;
> +    const struct sched_resource *sr;
> +    unsigned int idx;
> +
> +    rcu_read_lock(&sched_res_rculock);
> +
> +    sr = get_sched_res(cpu);
> +    data = xmalloc_flex_struct(struct cpu_rm_data, sr, sr->granularity - 1);
> +    if ( !data )
> +        goto out;
> +
> +    data->old_ops = sr->scheduler;
> +    data->vpriv_old = idle_vcpu[cpu]->sched_unit->priv;
> +    data->ppriv_old = sr->sched_priv;

Repeating a v1 comment:

"At least from an abstract perspective, doesn't reading fields from
 sr require the RCU lock to be held continuously (i.e. not dropping
 it at the end of this function and re-acquiring it in the caller)?"

Initially I thought you did respond to this in some way, but when
looking for a matching reply I couldn't find one.

Jan
Jürgen Groß Aug. 15, 2022, 11:55 a.m. UTC | #2
On 15.08.22 13:52, Jan Beulich wrote:
> On 15.08.2022 13:04, Juergen Gross wrote:
>> --- a/xen/common/sched/core.c
>> +++ b/xen/common/sched/core.c
>> @@ -3237,6 +3237,65 @@ out:
>>       return ret;
>>   }
>>   
>> +static struct cpu_rm_data *schedule_cpu_rm_alloc(unsigned int cpu)
>> +{
>> +    struct cpu_rm_data *data;
>> +    const struct sched_resource *sr;
>> +    unsigned int idx;
>> +
>> +    rcu_read_lock(&sched_res_rculock);
>> +
>> +    sr = get_sched_res(cpu);
>> +    data = xmalloc_flex_struct(struct cpu_rm_data, sr, sr->granularity - 1);
>> +    if ( !data )
>> +        goto out;
>> +
>> +    data->old_ops = sr->scheduler;
>> +    data->vpriv_old = idle_vcpu[cpu]->sched_unit->priv;
>> +    data->ppriv_old = sr->sched_priv;
> 
> Repeating a v1 comment:
> 
> "At least from an abstract perspective, doesn't reading fields from
>   sr require the RCU lock to be held continuously (i.e. not dropping
>   it at the end of this function and re-acquiring it in the caller)?"
> 
> Initially I thought you did respond to this in some way, but when
> looking for a matching reply I couldn't find one.

Oh, sorry.

The RCU lock is protecting only the sr, not any data pointers in the sr
are referencing. So it is fine to drop the RCU lock after reading some
of the fields from the sr and storing it in the cpu_rm_data memory.


Juergen
Jan Beulich Aug. 15, 2022, noon UTC | #3
On 15.08.2022 13:55, Juergen Gross wrote:
> On 15.08.22 13:52, Jan Beulich wrote:
>> On 15.08.2022 13:04, Juergen Gross wrote:
>>> --- a/xen/common/sched/core.c
>>> +++ b/xen/common/sched/core.c
>>> @@ -3237,6 +3237,65 @@ out:
>>>       return ret;
>>>   }
>>>   
>>> +static struct cpu_rm_data *schedule_cpu_rm_alloc(unsigned int cpu)
>>> +{
>>> +    struct cpu_rm_data *data;
>>> +    const struct sched_resource *sr;
>>> +    unsigned int idx;
>>> +
>>> +    rcu_read_lock(&sched_res_rculock);
>>> +
>>> +    sr = get_sched_res(cpu);
>>> +    data = xmalloc_flex_struct(struct cpu_rm_data, sr, sr->granularity - 1);
>>> +    if ( !data )
>>> +        goto out;
>>> +
>>> +    data->old_ops = sr->scheduler;
>>> +    data->vpriv_old = idle_vcpu[cpu]->sched_unit->priv;
>>> +    data->ppriv_old = sr->sched_priv;
>>
>> Repeating a v1 comment:
>>
>> "At least from an abstract perspective, doesn't reading fields from
>>   sr require the RCU lock to be held continuously (i.e. not dropping
>>   it at the end of this function and re-acquiring it in the caller)?"
>>
>> Initially I thought you did respond to this in some way, but when
>> looking for a matching reply I couldn't find one.
> 
> Oh, sorry.
> 
> The RCU lock is protecting only the sr, not any data pointers in the sr
> are referencing. So it is fine to drop the RCU lock after reading some
> of the fields from the sr and storing it in the cpu_rm_data memory.

Hmm, interesting. "Protecting only the sr" then means what exactly?
Just its allocation, but not its contents?

Plus it's not just the pointers - sr->granularity also would better not
increase in the meantime ... Quite likely there's a reason why that also
cannot happen, yet even then I think a brief code comment might be
helpful here.

Jan
Jürgen Groß Aug. 15, 2022, 12:16 p.m. UTC | #4
On 15.08.22 14:00, Jan Beulich wrote:
> On 15.08.2022 13:55, Juergen Gross wrote:
>> On 15.08.22 13:52, Jan Beulich wrote:
>>> On 15.08.2022 13:04, Juergen Gross wrote:
>>>> --- a/xen/common/sched/core.c
>>>> +++ b/xen/common/sched/core.c
>>>> @@ -3237,6 +3237,65 @@ out:
>>>>        return ret;
>>>>    }
>>>>    
>>>> +static struct cpu_rm_data *schedule_cpu_rm_alloc(unsigned int cpu)
>>>> +{
>>>> +    struct cpu_rm_data *data;
>>>> +    const struct sched_resource *sr;
>>>> +    unsigned int idx;
>>>> +
>>>> +    rcu_read_lock(&sched_res_rculock);
>>>> +
>>>> +    sr = get_sched_res(cpu);
>>>> +    data = xmalloc_flex_struct(struct cpu_rm_data, sr, sr->granularity - 1);
>>>> +    if ( !data )
>>>> +        goto out;
>>>> +
>>>> +    data->old_ops = sr->scheduler;
>>>> +    data->vpriv_old = idle_vcpu[cpu]->sched_unit->priv;
>>>> +    data->ppriv_old = sr->sched_priv;
>>>
>>> Repeating a v1 comment:
>>>
>>> "At least from an abstract perspective, doesn't reading fields from
>>>    sr require the RCU lock to be held continuously (i.e. not dropping
>>>    it at the end of this function and re-acquiring it in the caller)?"
>>>
>>> Initially I thought you did respond to this in some way, but when
>>> looking for a matching reply I couldn't find one.
>>
>> Oh, sorry.
>>
>> The RCU lock is protecting only the sr, not any data pointers in the sr
>> are referencing. So it is fine to drop the RCU lock after reading some
>> of the fields from the sr and storing it in the cpu_rm_data memory.
> 
> Hmm, interesting. "Protecting only the sr" then means what exactly?
> Just its allocation, but not its contents?

Correct.

> Plus it's not just the pointers - sr->granularity also would better not
> increase in the meantime ... Quite likely there's a reason why that also
> cannot happen, yet even then I think a brief code comment might be
> helpful here.

Okay, will add something like:

"Between schedule_cpu_rm_alloc() and the real cpu removal action the relevant
  contents of struct sched_resource can't change, as the cpu in question is
  locked against any other movement to or from cpupools, and the data copied
  by schedule_cpu_rm_alloc() is cpupool specific."

Is that okay?


Juergen
Jan Beulich Aug. 15, 2022, 12:34 p.m. UTC | #5
On 15.08.2022 14:16, Juergen Gross wrote:
> On 15.08.22 14:00, Jan Beulich wrote:
>> On 15.08.2022 13:55, Juergen Gross wrote:
>>> On 15.08.22 13:52, Jan Beulich wrote:
>>>> On 15.08.2022 13:04, Juergen Gross wrote:
>>>>> --- a/xen/common/sched/core.c
>>>>> +++ b/xen/common/sched/core.c
>>>>> @@ -3237,6 +3237,65 @@ out:
>>>>>        return ret;
>>>>>    }
>>>>>    
>>>>> +static struct cpu_rm_data *schedule_cpu_rm_alloc(unsigned int cpu)
>>>>> +{
>>>>> +    struct cpu_rm_data *data;
>>>>> +    const struct sched_resource *sr;
>>>>> +    unsigned int idx;
>>>>> +
>>>>> +    rcu_read_lock(&sched_res_rculock);
>>>>> +
>>>>> +    sr = get_sched_res(cpu);
>>>>> +    data = xmalloc_flex_struct(struct cpu_rm_data, sr, sr->granularity - 1);
>>>>> +    if ( !data )
>>>>> +        goto out;
>>>>> +
>>>>> +    data->old_ops = sr->scheduler;
>>>>> +    data->vpriv_old = idle_vcpu[cpu]->sched_unit->priv;
>>>>> +    data->ppriv_old = sr->sched_priv;
>>>>
>>>> Repeating a v1 comment:
>>>>
>>>> "At least from an abstract perspective, doesn't reading fields from
>>>>    sr require the RCU lock to be held continuously (i.e. not dropping
>>>>    it at the end of this function and re-acquiring it in the caller)?"
>>>>
>>>> Initially I thought you did respond to this in some way, but when
>>>> looking for a matching reply I couldn't find one.
>>>
>>> Oh, sorry.
>>>
>>> The RCU lock is protecting only the sr, not any data pointers in the sr
>>> are referencing. So it is fine to drop the RCU lock after reading some
>>> of the fields from the sr and storing it in the cpu_rm_data memory.
>>
>> Hmm, interesting. "Protecting only the sr" then means what exactly?
>> Just its allocation, but not its contents?
> 
> Correct.
> 
>> Plus it's not just the pointers - sr->granularity also would better not
>> increase in the meantime ... Quite likely there's a reason why that also
>> cannot happen, yet even then I think a brief code comment might be
>> helpful here.
> 
> Okay, will add something like:
> 
> "Between schedule_cpu_rm_alloc() and the real cpu removal action the relevant
>   contents of struct sched_resource can't change, as the cpu in question is
>   locked against any other movement to or from cpupools, and the data copied
>   by schedule_cpu_rm_alloc() is cpupool specific."
> 
> Is that okay?

Well, I guess I need to leave this to the scheduler maintainers then. I
have to admit that it's not clear to me why all of sr->granularity,
sr->scheduler, or sr->sched_priv would be "cpupool specific". I may be
able to agree for sr->granularity, but the other two I thought was
scheduler data, not cpupool data. For sr->granularity in turn (but
perhaps also the other two fields) it's not obvious to me that pool
properties can't change in a racing manner.

Jan
Jürgen Groß Aug. 15, 2022, 12:47 p.m. UTC | #6
On 15.08.22 14:34, Jan Beulich wrote:
> On 15.08.2022 14:16, Juergen Gross wrote:
>> On 15.08.22 14:00, Jan Beulich wrote:
>>> On 15.08.2022 13:55, Juergen Gross wrote:
>>>> On 15.08.22 13:52, Jan Beulich wrote:
>>>>> On 15.08.2022 13:04, Juergen Gross wrote:
>>>>>> --- a/xen/common/sched/core.c
>>>>>> +++ b/xen/common/sched/core.c
>>>>>> @@ -3237,6 +3237,65 @@ out:
>>>>>>         return ret;
>>>>>>     }
>>>>>>     
>>>>>> +static struct cpu_rm_data *schedule_cpu_rm_alloc(unsigned int cpu)
>>>>>> +{
>>>>>> +    struct cpu_rm_data *data;
>>>>>> +    const struct sched_resource *sr;
>>>>>> +    unsigned int idx;
>>>>>> +
>>>>>> +    rcu_read_lock(&sched_res_rculock);
>>>>>> +
>>>>>> +    sr = get_sched_res(cpu);
>>>>>> +    data = xmalloc_flex_struct(struct cpu_rm_data, sr, sr->granularity - 1);
>>>>>> +    if ( !data )
>>>>>> +        goto out;
>>>>>> +
>>>>>> +    data->old_ops = sr->scheduler;
>>>>>> +    data->vpriv_old = idle_vcpu[cpu]->sched_unit->priv;
>>>>>> +    data->ppriv_old = sr->sched_priv;
>>>>>
>>>>> Repeating a v1 comment:
>>>>>
>>>>> "At least from an abstract perspective, doesn't reading fields from
>>>>>     sr require the RCU lock to be held continuously (i.e. not dropping
>>>>>     it at the end of this function and re-acquiring it in the caller)?"
>>>>>
>>>>> Initially I thought you did respond to this in some way, but when
>>>>> looking for a matching reply I couldn't find one.
>>>>
>>>> Oh, sorry.
>>>>
>>>> The RCU lock is protecting only the sr, not any data pointers in the sr
>>>> are referencing. So it is fine to drop the RCU lock after reading some
>>>> of the fields from the sr and storing it in the cpu_rm_data memory.
>>>
>>> Hmm, interesting. "Protecting only the sr" then means what exactly?
>>> Just its allocation, but not its contents?
>>
>> Correct.
>>
>>> Plus it's not just the pointers - sr->granularity also would better not
>>> increase in the meantime ... Quite likely there's a reason why that also
>>> cannot happen, yet even then I think a brief code comment might be
>>> helpful here.
>>
>> Okay, will add something like:
>>
>> "Between schedule_cpu_rm_alloc() and the real cpu removal action the relevant
>>    contents of struct sched_resource can't change, as the cpu in question is
>>    locked against any other movement to or from cpupools, and the data copied
>>    by schedule_cpu_rm_alloc() is cpupool specific."
>>
>> Is that okay?
> 
> Well, I guess I need to leave this to the scheduler maintainers then. I
> have to admit that it's not clear to me why all of sr->granularity,
> sr->scheduler, or sr->sched_priv would be "cpupool specific". I may be

sr->scheduler is the pointer to the scheduler ops array which is set when
a cpu is added to a cpupool (the scheduler is a cpupool property). The same
applies to sr->granularity: this value is per-cpupool, too. sr->sched_priv
is only changed when a cpu is added to or removed from a cpupool, as this
is the per-cpu data of a scheduler, which needs to stay when scheduling is
happening on the cpu, thus it is allowed to be removed only in case the
cpu is removed from or added to the cpupool.

> able to agree for sr->granularity, but the other two I thought was
> scheduler data, not cpupool data. For sr->granularity in turn (but
> perhaps also the other two fields) it's not obvious to me that pool
> properties can't change in a racing manner.

They can't. Otherwise the scheduler would explode.


Juergen
Jürgen Groß Aug. 15, 2022, 2:28 p.m. UTC | #7
On 15.08.22 14:47, Juergen Gross wrote:
> On 15.08.22 14:34, Jan Beulich wrote:
>> On 15.08.2022 14:16, Juergen Gross wrote:
>>> On 15.08.22 14:00, Jan Beulich wrote:
>>>> On 15.08.2022 13:55, Juergen Gross wrote:
>>>>> On 15.08.22 13:52, Jan Beulich wrote:
>>>>>> On 15.08.2022 13:04, Juergen Gross wrote:
>>>>>>> --- a/xen/common/sched/core.c
>>>>>>> +++ b/xen/common/sched/core.c
>>>>>>> @@ -3237,6 +3237,65 @@ out:
>>>>>>>         return ret;
>>>>>>>     }
>>>>>>> +static struct cpu_rm_data *schedule_cpu_rm_alloc(unsigned int cpu)
>>>>>>> +{
>>>>>>> +    struct cpu_rm_data *data;
>>>>>>> +    const struct sched_resource *sr;
>>>>>>> +    unsigned int idx;
>>>>>>> +
>>>>>>> +    rcu_read_lock(&sched_res_rculock);
>>>>>>> +
>>>>>>> +    sr = get_sched_res(cpu);
>>>>>>> +    data = xmalloc_flex_struct(struct cpu_rm_data, sr, sr->granularity - 
>>>>>>> 1);
>>>>>>> +    if ( !data )
>>>>>>> +        goto out;
>>>>>>> +
>>>>>>> +    data->old_ops = sr->scheduler;
>>>>>>> +    data->vpriv_old = idle_vcpu[cpu]->sched_unit->priv;
>>>>>>> +    data->ppriv_old = sr->sched_priv;
>>>>>>
>>>>>> Repeating a v1 comment:
>>>>>>
>>>>>> "At least from an abstract perspective, doesn't reading fields from
>>>>>>     sr require the RCU lock to be held continuously (i.e. not dropping
>>>>>>     it at the end of this function and re-acquiring it in the caller)?"
>>>>>>
>>>>>> Initially I thought you did respond to this in some way, but when
>>>>>> looking for a matching reply I couldn't find one.
>>>>>
>>>>> Oh, sorry.
>>>>>
>>>>> The RCU lock is protecting only the sr, not any data pointers in the sr
>>>>> are referencing. So it is fine to drop the RCU lock after reading some
>>>>> of the fields from the sr and storing it in the cpu_rm_data memory.
>>>>
>>>> Hmm, interesting. "Protecting only the sr" then means what exactly?
>>>> Just its allocation, but not its contents?
>>>
>>> Correct.
>>>
>>>> Plus it's not just the pointers - sr->granularity also would better not
>>>> increase in the meantime ... Quite likely there's a reason why that also
>>>> cannot happen, yet even then I think a brief code comment might be
>>>> helpful here.
>>>
>>> Okay, will add something like:
>>>
>>> "Between schedule_cpu_rm_alloc() and the real cpu removal action the relevant
>>>    contents of struct sched_resource can't change, as the cpu in question is
>>>    locked against any other movement to or from cpupools, and the data copied
>>>    by schedule_cpu_rm_alloc() is cpupool specific."
>>>
>>> Is that okay?
>>
>> Well, I guess I need to leave this to the scheduler maintainers then. I
>> have to admit that it's not clear to me why all of sr->granularity,
>> sr->scheduler, or sr->sched_priv would be "cpupool specific". I may be
> 
> sr->scheduler is the pointer to the scheduler ops array which is set when
> a cpu is added to a cpupool (the scheduler is a cpupool property). The same
> applies to sr->granularity: this value is per-cpupool, too. sr->sched_priv
> is only changed when a cpu is added to or removed from a cpupool, as this
> is the per-cpu data of a scheduler, which needs to stay when scheduling is
> happening on the cpu, thus it is allowed to be removed only in case the
> cpu is removed from or added to the cpupool.
> 
>> able to agree for sr->granularity, but the other two I thought was
>> scheduler data, not cpupool data. For sr->granularity in turn (but
>> perhaps also the other two fields) it's not obvious to me that pool
>> properties can't change in a racing manner.
> 
> They can't. Otherwise the scheduler would explode.

BTW, I'll rework the comment a little bit to:

"Between schedule_cpu_rm_alloc() and the real cpu removal action the relevant
  contents of struct sched_resource can't change, as the cpu in question is
  locked against any other movement to or from cpupools, and the data copied
  by schedule_cpu_rm_alloc() is modified only in case the cpu in question is
  being moved from or to a cpupool."

Will resend the series tomorrow morning if nobody objects.


Juergen
diff mbox series

Patch

diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
index 085a9dd335..d0b6513b6f 100644
--- a/xen/common/sched/core.c
+++ b/xen/common/sched/core.c
@@ -3237,6 +3237,65 @@  out:
     return ret;
 }
 
+static struct cpu_rm_data *schedule_cpu_rm_alloc(unsigned int cpu)
+{
+    struct cpu_rm_data *data;
+    const struct sched_resource *sr;
+    unsigned int idx;
+
+    rcu_read_lock(&sched_res_rculock);
+
+    sr = get_sched_res(cpu);
+    data = xmalloc_flex_struct(struct cpu_rm_data, sr, sr->granularity - 1);
+    if ( !data )
+        goto out;
+
+    data->old_ops = sr->scheduler;
+    data->vpriv_old = idle_vcpu[cpu]->sched_unit->priv;
+    data->ppriv_old = sr->sched_priv;
+
+    for ( idx = 0; idx < sr->granularity - 1; idx++ )
+    {
+        data->sr[idx] = sched_alloc_res();
+        if ( data->sr[idx] )
+        {
+            data->sr[idx]->sched_unit_idle = sched_alloc_unit_mem();
+            if ( !data->sr[idx]->sched_unit_idle )
+            {
+                sched_res_free(&data->sr[idx]->rcu);
+                data->sr[idx] = NULL;
+            }
+        }
+        if ( !data->sr[idx] )
+        {
+            while ( idx > 0 )
+                sched_res_free(&data->sr[--idx]->rcu);
+            XFREE(data);
+            goto out;
+        }
+
+        data->sr[idx]->curr = data->sr[idx]->sched_unit_idle;
+        data->sr[idx]->scheduler = &sched_idle_ops;
+        data->sr[idx]->granularity = 1;
+
+        /* We want the lock not to change when replacing the resource. */
+        data->sr[idx]->schedule_lock = sr->schedule_lock;
+    }
+
+ out:
+    rcu_read_unlock(&sched_res_rculock);
+
+    return data;
+}
+
+static void schedule_cpu_rm_free(struct cpu_rm_data *mem, unsigned int cpu)
+{
+    sched_free_udata(mem->old_ops, mem->vpriv_old);
+    sched_free_pdata(mem->old_ops, mem->ppriv_old, cpu);
+
+    xfree(mem);
+}
+
 /*
  * Remove a pCPU from its cpupool. Its scheduler becomes &sched_idle_ops
  * (the idle scheduler).
@@ -3245,53 +3304,23 @@  out:
  */
 int schedule_cpu_rm(unsigned int cpu)
 {
-    void *ppriv_old, *vpriv_old;
-    struct sched_resource *sr, **sr_new = NULL;
+    struct sched_resource *sr;
+    struct cpu_rm_data *data;
     struct sched_unit *unit;
-    struct scheduler *old_ops;
     spinlock_t *old_lock;
     unsigned long flags;
-    int idx, ret = -ENOMEM;
+    int idx = 0;
     unsigned int cpu_iter;
 
+    data = schedule_cpu_rm_alloc(cpu);
+    if ( !data )
+        return -ENOMEM;
+
     rcu_read_lock(&sched_res_rculock);
 
     sr = get_sched_res(cpu);
-    old_ops = sr->scheduler;
 
-    if ( sr->granularity > 1 )
-    {
-        sr_new = xmalloc_array(struct sched_resource *, sr->granularity - 1);
-        if ( !sr_new )
-            goto out;
-        for ( idx = 0; idx < sr->granularity - 1; idx++ )
-        {
-            sr_new[idx] = sched_alloc_res();
-            if ( sr_new[idx] )
-            {
-                sr_new[idx]->sched_unit_idle = sched_alloc_unit_mem();
-                if ( !sr_new[idx]->sched_unit_idle )
-                {
-                    sched_res_free(&sr_new[idx]->rcu);
-                    sr_new[idx] = NULL;
-                }
-            }
-            if ( !sr_new[idx] )
-            {
-                for ( idx--; idx >= 0; idx-- )
-                    sched_res_free(&sr_new[idx]->rcu);
-                goto out;
-            }
-            sr_new[idx]->curr = sr_new[idx]->sched_unit_idle;
-            sr_new[idx]->scheduler = &sched_idle_ops;
-            sr_new[idx]->granularity = 1;
-
-            /* We want the lock not to change when replacing the resource. */
-            sr_new[idx]->schedule_lock = sr->schedule_lock;
-        }
-    }
-
-    ret = 0;
+    ASSERT(sr->granularity);
     ASSERT(sr->cpupool != NULL);
     ASSERT(cpumask_test_cpu(cpu, &cpupool_free_cpus));
     ASSERT(!cpumask_test_cpu(cpu, sr->cpupool->cpu_valid));
@@ -3299,10 +3328,6 @@  int schedule_cpu_rm(unsigned int cpu)
     /* See comment in schedule_cpu_add() regarding lock switching. */
     old_lock = pcpu_schedule_lock_irqsave(cpu, &flags);
 
-    vpriv_old = idle_vcpu[cpu]->sched_unit->priv;
-    ppriv_old = sr->sched_priv;
-
-    idx = 0;
     for_each_cpu ( cpu_iter, sr->cpus )
     {
         per_cpu(sched_res_idx, cpu_iter) = 0;
@@ -3316,27 +3341,27 @@  int schedule_cpu_rm(unsigned int cpu)
         else
         {
             /* Initialize unit. */
-            unit = sr_new[idx]->sched_unit_idle;
-            unit->res = sr_new[idx];
+            unit = data->sr[idx]->sched_unit_idle;
+            unit->res = data->sr[idx];
             unit->is_running = true;
             sched_unit_add_vcpu(unit, idle_vcpu[cpu_iter]);
             sched_domain_insert_unit(unit, idle_vcpu[cpu_iter]->domain);
 
             /* Adjust cpu masks of resources (old and new). */
             cpumask_clear_cpu(cpu_iter, sr->cpus);
-            cpumask_set_cpu(cpu_iter, sr_new[idx]->cpus);
+            cpumask_set_cpu(cpu_iter, data->sr[idx]->cpus);
             cpumask_set_cpu(cpu_iter, &sched_res_mask);
 
             /* Init timer. */
-            init_timer(&sr_new[idx]->s_timer, s_timer_fn, NULL, cpu_iter);
+            init_timer(&data->sr[idx]->s_timer, s_timer_fn, NULL, cpu_iter);
 
             /* Last resource initializations and insert resource pointer. */
-            sr_new[idx]->master_cpu = cpu_iter;
-            set_sched_res(cpu_iter, sr_new[idx]);
+            data->sr[idx]->master_cpu = cpu_iter;
+            set_sched_res(cpu_iter, data->sr[idx]);
 
             /* Last action: set the new lock pointer. */
             smp_mb();
-            sr_new[idx]->schedule_lock = &sched_free_cpu_lock;
+            data->sr[idx]->schedule_lock = &sched_free_cpu_lock;
 
             idx++;
         }
@@ -3352,16 +3377,12 @@  int schedule_cpu_rm(unsigned int cpu)
     /* _Not_ pcpu_schedule_unlock(): schedule_lock may have changed! */
     spin_unlock_irqrestore(old_lock, flags);
 
-    sched_deinit_pdata(old_ops, ppriv_old, cpu);
-
-    sched_free_udata(old_ops, vpriv_old);
-    sched_free_pdata(old_ops, ppriv_old, cpu);
+    sched_deinit_pdata(data->old_ops, data->ppriv_old, cpu);
 
-out:
     rcu_read_unlock(&sched_res_rculock);
-    xfree(sr_new);
+    schedule_cpu_rm_free(data, cpu);
 
-    return ret;
+    return 0;
 }
 
 struct scheduler *scheduler_get_default(void)
diff --git a/xen/common/sched/private.h b/xen/common/sched/private.h
index 38251b1f7b..601d639699 100644
--- a/xen/common/sched/private.h
+++ b/xen/common/sched/private.h
@@ -600,6 +600,15 @@  struct affinity_masks {
 
 bool update_node_aff_alloc(struct affinity_masks *affinity);
 void update_node_aff_free(struct affinity_masks *affinity);
+
+/* Memory allocation related data for schedule_cpu_rm(). */
+struct cpu_rm_data {
+    const struct scheduler *old_ops;
+    void *ppriv_old;
+    void *vpriv_old;
+    struct sched_resource *sr[];
+};
+
 void sched_rm_cpu(unsigned int cpu);
 const cpumask_t *sched_get_opt_cpumask(enum sched_gran opt, unsigned int cpu);
 void schedule_dump(struct cpupool *c);