diff mbox

[for,4.7,4/4] xen: adopt .deinit_pdata and improve timer handling

Message ID 146231201861.25631.15476137738176988146.stgit@Solace.fritz.box (mailing list archive)
State New, archived
Headers show

Commit Message

Dario Faggioli May 3, 2016, 9:46 p.m. UTC
The scheduling hooks API is now used properly, and no
initialization or de-initialization happen in
alloc/free_pdata any longer.

In fact, just like it is for Credit2, there is no real
need for implementing alloc_pdata and free_pdata.

This also made it possible to improve the replenishment
timer handling logic, such that now the timer is always
kept on one of the pCPU of the scheduler it's servicing.
Before this commit, in fact, even if the pCPU where the
timer happened to be initialized at creation time was
moved to another cpupool, the timer stayed there,
potentially inferfearing with the new scheduler of the
pCPU itself.

Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
--
Cc: Meng Xu <mengxu@cis.upenn.edu>
Cc: George Dunlap <george.dunlap@citrix.com>
Cc: Tianyang Chen <tiche@seas.upenn.edu>
Cc: Wei Liu <wei.liu2@citrix.com>
---
 xen/common/sched_rt.c |   74 ++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 55 insertions(+), 19 deletions(-)

Comments

George Dunlap May 4, 2016, 3:51 p.m. UTC | #1
On 03/05/16 22:46, Dario Faggioli wrote:
> The scheduling hooks API is now used properly, and no
> initialization or de-initialization happen in
> alloc/free_pdata any longer.
> 
> In fact, just like it is for Credit2, there is no real
> need for implementing alloc_pdata and free_pdata.
> 
> This also made it possible to improve the replenishment
> timer handling logic, such that now the timer is always
> kept on one of the pCPU of the scheduler it's servicing.
> Before this commit, in fact, even if the pCPU where the
> timer happened to be initialized at creation time was
> moved to another cpupool, the timer stayed there,
> potentially inferfearing with the new scheduler of the

* interfering

> pCPU itself.
> 
> Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>

I don't know much about the logic, so I'll wait for Meng Xu to review it.

 -George

> --
> Cc: Meng Xu <mengxu@cis.upenn.edu>
> Cc: George Dunlap <george.dunlap@citrix.com>
> Cc: Tianyang Chen <tiche@seas.upenn.edu>
> Cc: Wei Liu <wei.liu2@citrix.com>
> ---
>  xen/common/sched_rt.c |   74 ++++++++++++++++++++++++++++++++++++-------------
>  1 file changed, 55 insertions(+), 19 deletions(-)
> 
> diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
> index 673fc92..7f8f411 100644
> --- a/xen/common/sched_rt.c
> +++ b/xen/common/sched_rt.c
> @@ -590,6 +590,10 @@ rt_init(struct scheduler *ops)
>      if ( prv == NULL )
>          return -ENOMEM;
>  
> +    prv->repl_timer = xzalloc(struct timer);
> +    if ( prv->repl_timer == NULL )
> +        return -ENOMEM;
> +
>      spin_lock_init(&prv->lock);
>      INIT_LIST_HEAD(&prv->sdom);
>      INIT_LIST_HEAD(&prv->runq);
> @@ -600,12 +604,6 @@ rt_init(struct scheduler *ops)
>  
>      ops->sched_data = prv;
>  
> -    /*
> -     * The timer initialization will happen later when
> -     * the first pcpu is added to this pool in alloc_pdata.
> -     */
> -    prv->repl_timer = NULL;
> -
>      return 0;
>  }
>  
> @@ -614,7 +612,8 @@ rt_deinit(struct scheduler *ops)
>  {
>      struct rt_private *prv = rt_priv(ops);
>  
> -    kill_timer(prv->repl_timer);
> +    ASSERT(prv->repl_timer->status == TIMER_STATUS_invalid ||
> +           prv->repl_timer->status == TIMER_STATUS_killed);
>      xfree(prv->repl_timer);
>  
>      ops->sched_data = NULL;
> @@ -632,9 +631,19 @@ rt_init_pdata(const struct scheduler *ops, void *pdata, int cpu)
>      spinlock_t *old_lock;
>      unsigned long flags;
>  
> -    /* Move the scheduler lock to our global runqueue lock.  */
>      old_lock = pcpu_schedule_lock_irqsave(cpu, &flags);
>  
> +    /*
> +     * TIMER_STATUS_invalid means we are the first cpu that sees the timer
> +     * allocated but not initialized, and so it's up to us to initialize it.
> +     */
> +    if ( prv->repl_timer->status == TIMER_STATUS_invalid )
> +    {
> +        init_timer(prv->repl_timer, repl_timer_handler, (void*) ops, cpu);
> +        dprintk(XENLOG_DEBUG, "RTDS: timer initialized on cpu %u\n", cpu);
> +    }
> +
> +    /* Move the scheduler lock to our global runqueue lock.  */
>      per_cpu(schedule_data, cpu).schedule_lock = &prv->lock;
>  
>      /* _Not_ pcpu_schedule_unlock(): per_cpu().schedule_lock changed! */
> @@ -659,6 +668,20 @@ rt_switch_sched(struct scheduler *new_ops, unsigned int cpu,
>       */
>      ASSERT(per_cpu(schedule_data, cpu).schedule_lock != &prv->lock);
>  
> +    /*
> +     * If we are the absolute first cpu being switched toward this
> +     * scheduler (in which case we'll see TIMER_STATUS_invalid), or the
> +     * first one that is added back to the cpupool that had all its cpus
> +     * removed (in which case we'll see TIMER_STATUS_killed), it's our
> +     * job to (re)initialize the timer.
> +     */
> +    if ( prv->repl_timer->status == TIMER_STATUS_invalid ||
> +         prv->repl_timer->status == TIMER_STATUS_killed )
> +    {
> +        init_timer(prv->repl_timer, repl_timer_handler, (void*) new_ops, cpu);
> +        dprintk(XENLOG_DEBUG, "RTDS: timer initialized on cpu %u\n", cpu);
> +    }
> +
>      idle_vcpu[cpu]->sched_priv = vdata;
>      per_cpu(scheduler, cpu) = new_ops;
>      per_cpu(schedule_data, cpu).sched_priv = NULL; /* no pdata */
> @@ -672,23 +695,36 @@ rt_switch_sched(struct scheduler *new_ops, unsigned int cpu,
>      per_cpu(schedule_data, cpu).schedule_lock = &prv->lock;
>  }
>  
> -static void *
> -rt_alloc_pdata(const struct scheduler *ops, int cpu)
> +static void
> +rt_deinit_pdata(const struct scheduler *ops, void *pcpu, int cpu)
>  {
> +    unsigned long flags;
>      struct rt_private *prv = rt_priv(ops);
>  
> -    if ( prv->repl_timer == NULL )
> -    {
> -        /* Allocate the timer on the first cpu of this pool. */
> -        prv->repl_timer = xzalloc(struct timer);
> +    spin_lock_irqsave(&prv->lock, flags);
>  
> -        if ( prv->repl_timer == NULL )
> -            return ERR_PTR(-ENOMEM);
> +    if ( prv->repl_timer->cpu == cpu )
> +    {
> +        struct cpupool *c = per_cpu(cpupool, cpu);
> +        unsigned int new_cpu = cpumask_cycle(cpu, cpupool_online_cpumask(c));
>  
> -        init_timer(prv->repl_timer, repl_timer_handler, (void *)ops, cpu);
> +        /*
> +         * Make sure the timer run on one of the cpus that are still available
> +         * to this scheduler. If there aren't any left, it means it's the time
> +         * to just kill it.
> +         */
> +        if ( new_cpu >= nr_cpu_ids )
> +        {
> +            kill_timer(prv->repl_timer);
> +            dprintk(XENLOG_DEBUG, "RTDS: timer killed on cpu %d\n", cpu);
> +        }
> +        else
> +        {
> +            migrate_timer(prv->repl_timer, new_cpu);
> +        }
>      }
>  
> -    return NULL;
> +    spin_unlock_irqrestore(&prv->lock, flags);
>  }
>  
>  static void *
> @@ -1433,9 +1469,9 @@ static const struct scheduler sched_rtds_def = {
>      .dump_settings  = rt_dump,
>      .init           = rt_init,
>      .deinit         = rt_deinit,
> -    .alloc_pdata    = rt_alloc_pdata,
>      .init_pdata     = rt_init_pdata,
>      .switch_sched   = rt_switch_sched,
> +    .deinit_pdata   = rt_deinit_pdata,
>      .alloc_domdata  = rt_alloc_domdata,
>      .free_domdata   = rt_free_domdata,
>      .init_domain    = rt_dom_init,
>
Meng Xu May 4, 2016, 3:53 p.m. UTC | #2
On Wed, May 4, 2016 at 11:51 AM, George Dunlap <george.dunlap@citrix.com> wrote:
> On 03/05/16 22:46, Dario Faggioli wrote:
>> The scheduling hooks API is now used properly, and no
>> initialization or de-initialization happen in
>> alloc/free_pdata any longer.
>>
>> In fact, just like it is for Credit2, there is no real
>> need for implementing alloc_pdata and free_pdata.
>>
>> This also made it possible to improve the replenishment
>> timer handling logic, such that now the timer is always
>> kept on one of the pCPU of the scheduler it's servicing.
>> Before this commit, in fact, even if the pCPU where the
>> timer happened to be initialized at creation time was
>> moved to another cpupool, the timer stayed there,
>> potentially inferfearing with the new scheduler of the
>
> * interfering
>
>> pCPU itself.
>>
>> Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
>
> I don't know much about the logic, so I'll wait for Meng Xu to review it.
>

I will look at it this week...(I will try to do it asap...)

Meng
Dario Faggioli May 6, 2016, 11:05 p.m. UTC | #3
On Wed, 2016-05-04 at 11:53 -0400, Meng Xu wrote:
> On Wed, May 4, 2016 at 11:51 AM, George Dunlap <george.dunlap@citrix.
> com> wrote:
> > On 03/05/16 22:46, Dario Faggioli wrote:
> > > This also made it possible to improve the replenishment
> > > timer handling logic, such that now the timer is always
> > > kept on one of the pCPU of the scheduler it's servicing.
> > > Before this commit, in fact, even if the pCPU where the
> > > timer happened to be initialized at creation time was
> > > moved to another cpupool, the timer stayed there,
> > > potentially inferfearing with the new scheduler of the
> > * interfering
> > 
> > > 
> > > pCPU itself.
> > > 
> > > Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
> > I don't know much about the logic, so I'll wait for Meng Xu to
> > review it.
> > 
> I will look at it this week...(I will try to do it asap...)
> 
Hey Meng... di d you have the chance? I hate to push, but we really
like to have the chance to get this into 4.7.

Thanks and Regards,
Dario
Meng Xu May 7, 2016, 9:19 p.m. UTC | #4
On Tue, May 3, 2016 at 5:46 PM, Dario Faggioli
<dario.faggioli@citrix.com> wrote:
>
> The scheduling hooks API is now used properly, and no
> initialization or de-initialization happen in
> alloc/free_pdata any longer.
>
> In fact, just like it is for Credit2, there is no real
> need for implementing alloc_pdata and free_pdata.
>
> This also made it possible to improve the replenishment
> timer handling logic, such that now the timer is always
> kept on one of the pCPU of the scheduler it's servicing.
> Before this commit, in fact, even if the pCPU where the
> timer happened to be initialized at creation time was
> moved to another cpupool, the timer stayed there,
> potentially inferfearing with the new scheduler of the
> pCPU itself.
>
> Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
> --

Reviewed-and-Tested-by: Meng Xu <mengxu@cis.upenn.edu>

I do have a minor comment about the patch, although it is not
important at all and it is not really about this patch...

> @@ -614,7 +612,8 @@ rt_deinit(struct scheduler *ops)
>  {
>      struct rt_private *prv = rt_priv(ops);
>
> -    kill_timer(prv->repl_timer);
> +    ASSERT(prv->repl_timer->status == TIMER_STATUS_invalid ||
> +           prv->repl_timer->status == TIMER_STATUS_killed);

I found in xen/timer.h, the comment after the definition of the
TIMER_STATUS_invalid is

                #define TIMER_STATUS_invalid  0 /* Should never see
this.           */

This comment is a little contrary to how the status is used here.
Actually, what does it exactly means by "Should never see this"?

This _invalid status is used in timer.h and it is the status when a
timer is initialized by init_timer().

So I'm thinking maybe this comment can be better improved to avoid confusion?

Anyway, this is just a comment and should not be a blocker, IMO. I
just want to raise it up since I saw it... :-)


===About the testing I did===
---Below is how I tested it---
I booted up two vcpus, created one cpupool for each type of
schedulers, and migrated them around.
The scripts to run the test cases can be found at
https://github.com/PennPanda/scripts/tree/master/xen/xen-test

---Below is the testing scenarios---
echo "start test case 1..."
xl cpupool-list
xl cpupool-destroy cpupool-credit
xl cpupool-destroy cpupool-credit2
xl cpupool-destroy cpupool-rtds
xl cpupool-create ${cpupool_credit_file}
xl cpupool-create ${cpupool_credit2_file}
xl cpupool-create ${cpupool_rtds_file}
# Add cpus to each cpupool
echo "Add CPUs to each cpupool"
for ((i=0;i<5; i+=1));do
xl cpupool-cpu-remove Pool-0 ${i}
done
echo "xl cpupool-cpu-add cpupool-credit 0"
xl cpupool-cpu-add cpupool-credit 0
echo "xl cpupool-cpu-add cpupool-credit2 1,2"
xl cpupool-cpu-add cpupool-credit2 1
xl cpupool-cpu-add cpupool-credit2 2
echo "xl cpupool-cpu-add cpupool-rtds 3,4"
xl cpupool-cpu-add cpupool-rtds 3
xl cpupool-cpu-add cpupool-rtds 4
xl cpupool-list -c
xl cpupool-list
# Migrate vm1 among cpupools
echo "Migrate ${vm1_name} among cpupools"
xl cpupool-migrate ${vm1_name} cpupool-rtds
xl cpupool-migrate ${vm1_name} cpupool-credit2
xl cpupool-migrate ${vm1_name} cpupool-rtds
xl cpupool-migrate ${vm1_name} cpupool-credit
xl cpupool-migrate ${vm1_name} cpupool-rtds

Thank you very much and best regards,

Meng
Meng Xu May 8, 2016, 3:12 a.m. UTC | #5
On Sat, May 7, 2016 at 5:19 PM, Meng Xu <mengxu@cis.upenn.edu> wrote:
> On Tue, May 3, 2016 at 5:46 PM, Dario Faggioli
> <dario.faggioli@citrix.com> wrote:
>>
>> The scheduling hooks API is now used properly, and no
>> initialization or de-initialization happen in
>> alloc/free_pdata any longer.
>>
>> In fact, just like it is for Credit2, there is no real
>> need for implementing alloc_pdata and free_pdata.
>>
>> This also made it possible to improve the replenishment
>> timer handling logic, such that now the timer is always
>> kept on one of the pCPU of the scheduler it's servicing.
>> Before this commit, in fact, even if the pCPU where the
>> timer happened to be initialized at creation time was
>> moved to another cpupool, the timer stayed there,
>> potentially inferfearing with the new scheduler of the
>> pCPU itself.
>>
>> Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
>> --
>
> Reviewed-and-Tested-by: Meng Xu <mengxu@cis.upenn.edu>

>
> ---Below is the testing scenarios---
> echo "start test case 1..."
> xl cpupool-list
> xl cpupool-destroy cpupool-credit
> xl cpupool-destroy cpupool-credit2
> xl cpupool-destroy cpupool-rtds
> xl cpupool-create ${cpupool_credit_file}
> xl cpupool-create ${cpupool_credit2_file}
> xl cpupool-create ${cpupool_rtds_file}
> # Add cpus to each cpupool
> echo "Add CPUs to each cpupool"
> for ((i=0;i<5; i+=1));do
> xl cpupool-cpu-remove Pool-0 ${i}
> done
> echo "xl cpupool-cpu-add cpupool-credit 0"
> xl cpupool-cpu-add cpupool-credit 0
> echo "xl cpupool-cpu-add cpupool-credit2 1,2"
> xl cpupool-cpu-add cpupool-credit2 1
> xl cpupool-cpu-add cpupool-credit2 2
> echo "xl cpupool-cpu-add cpupool-rtds 3,4"
> xl cpupool-cpu-add cpupool-rtds 3
> xl cpupool-cpu-add cpupool-rtds 4
> xl cpupool-list -c
> xl cpupool-list
> # Migrate vm1 among cpupools
> echo "Migrate ${vm1_name} among cpupools"
> xl cpupool-migrate ${vm1_name} cpupool-rtds
> xl cpupool-migrate ${vm1_name} cpupool-credit2
> xl cpupool-migrate ${vm1_name} cpupool-rtds
> xl cpupool-migrate ${vm1_name} cpupool-credit
> xl cpupool-migrate ${vm1_name} cpupool-rtds
>

I forgot one thing in the previous email.
When I tried to migrate Domain-0 from the Pool-0 (with rtds or credit
scheduler) to another newly created pool, say cpupool-credit, it
always fails.

This situation happens even when I boot into credit scheduler and try
to migrate Domain-0 to another cpupool.

I'm wondering if Domain-0 can be migrated among cpupools?
From the Xen wiki: http://wiki.xenproject.org/wiki/Cpupools_Howto, it
seems Domain-0 can be migrated....

Thanks,

Meng

-----------
Meng Xu
PhD Student in Computer and Information Science
University of Pennsylvania
http://www.cis.upenn.edu/~mengxu/
Jürgen Groß May 9, 2016, 8:07 a.m. UTC | #6
On 08/05/16 05:12, Meng Xu wrote:
> On Sat, May 7, 2016 at 5:19 PM, Meng Xu <mengxu@cis.upenn.edu> wrote:
>> On Tue, May 3, 2016 at 5:46 PM, Dario Faggioli
>> <dario.faggioli@citrix.com> wrote:
>>>
>>> The scheduling hooks API is now used properly, and no
>>> initialization or de-initialization happen in
>>> alloc/free_pdata any longer.
>>>
>>> In fact, just like it is for Credit2, there is no real
>>> need for implementing alloc_pdata and free_pdata.
>>>
>>> This also made it possible to improve the replenishment
>>> timer handling logic, such that now the timer is always
>>> kept on one of the pCPU of the scheduler it's servicing.
>>> Before this commit, in fact, even if the pCPU where the
>>> timer happened to be initialized at creation time was
>>> moved to another cpupool, the timer stayed there,
>>> potentially inferfearing with the new scheduler of the
>>> pCPU itself.
>>>
>>> Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
>>> --
>>
>> Reviewed-and-Tested-by: Meng Xu <mengxu@cis.upenn.edu>
> 
>>
>> ---Below is the testing scenarios---
>> echo "start test case 1..."
>> xl cpupool-list
>> xl cpupool-destroy cpupool-credit
>> xl cpupool-destroy cpupool-credit2
>> xl cpupool-destroy cpupool-rtds
>> xl cpupool-create ${cpupool_credit_file}
>> xl cpupool-create ${cpupool_credit2_file}
>> xl cpupool-create ${cpupool_rtds_file}
>> # Add cpus to each cpupool
>> echo "Add CPUs to each cpupool"
>> for ((i=0;i<5; i+=1));do
>> xl cpupool-cpu-remove Pool-0 ${i}
>> done
>> echo "xl cpupool-cpu-add cpupool-credit 0"
>> xl cpupool-cpu-add cpupool-credit 0
>> echo "xl cpupool-cpu-add cpupool-credit2 1,2"
>> xl cpupool-cpu-add cpupool-credit2 1
>> xl cpupool-cpu-add cpupool-credit2 2
>> echo "xl cpupool-cpu-add cpupool-rtds 3,4"
>> xl cpupool-cpu-add cpupool-rtds 3
>> xl cpupool-cpu-add cpupool-rtds 4
>> xl cpupool-list -c
>> xl cpupool-list
>> # Migrate vm1 among cpupools
>> echo "Migrate ${vm1_name} among cpupools"
>> xl cpupool-migrate ${vm1_name} cpupool-rtds
>> xl cpupool-migrate ${vm1_name} cpupool-credit2
>> xl cpupool-migrate ${vm1_name} cpupool-rtds
>> xl cpupool-migrate ${vm1_name} cpupool-credit
>> xl cpupool-migrate ${vm1_name} cpupool-rtds
>>
> 
> I forgot one thing in the previous email.
> When I tried to migrate Domain-0 from the Pool-0 (with rtds or credit
> scheduler) to another newly created pool, say cpupool-credit, it
> always fails.
> 
> This situation happens even when I boot into credit scheduler and try
> to migrate Domain-0 to another cpupool.
> 
> I'm wondering if Domain-0 can be migrated among cpupools?
> From the Xen wiki: http://wiki.xenproject.org/wiki/Cpupools_Howto, it
> seems Domain-0 can be migrated....

It can't. Domain-0 is always member of Pool-0.

I think at least an update of the xl man page would be a good idea.
I'll do a patch.


Juergen
Dario Faggioli May 9, 2016, 1:22 p.m. UTC | #7
On Sat, 2016-05-07 at 23:19 +0200, Meng Xu wrote:
> On Tue, May 3, 2016 at 5:46 PM, Dario Faggioli
> <dario.faggioli@citrix.com> wrote:
> > 
> > 
> > The scheduling hooks API is now used properly, and no
> > initialization or de-initialization happen in
> > alloc/free_pdata any longer.
> > 
> > In fact, just like it is for Credit2, there is no real
> > need for implementing alloc_pdata and free_pdata.
> > 
> > This also made it possible to improve the replenishment
> > timer handling logic, such that now the timer is always
> > kept on one of the pCPU of the scheduler it's servicing.
> > Before this commit, in fact, even if the pCPU where the
> > timer happened to be initialized at creation time was
> > moved to another cpupool, the timer stayed there,
> > potentially inferfearing with the new scheduler of the
> > pCPU itself.
> > 
> > Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
> > --
> Reviewed-and-Tested-by: Meng Xu <mengxu@cis.upenn.edu>
> 
Thanks! :-)

> I do have a minor comment about the patch, although it is not
> important at all and it is not really about this patch...
> 
> > 
> > @@ -614,7 +612,8 @@ rt_deinit(struct scheduler *ops)
> >  {
> >      struct rt_private *prv = rt_priv(ops);
> > 
> > -    kill_timer(prv->repl_timer);
> > +    ASSERT(prv->repl_timer->status == TIMER_STATUS_invalid ||
> > +           prv->repl_timer->status == TIMER_STATUS_killed);
> I found in xen/timer.h, the comment after the definition of the
> TIMER_STATUS_invalid is
> 
>                 #define TIMER_STATUS_invalid  0 /* Should never see
> this.           */
> 
> This comment is a little contrary to how the status is used here.
> Actually, what does it exactly means by "Should never see this"?
> 
> This _invalid status is used in timer.h and it is the status when a
> timer is initialized by init_timer().
> 
As far as my understanding goes, this means that a timer, during its
operations, should never be found in this state.

In fact, this mark a situation where the timer has been allocated but
never initialized, and there are ASSERT()s around to enforce that.

However, if what one wants is _exactly_ to check whether the timer has
been allocated ut not initialized, I don't see why I can't use this.

> So I'm thinking maybe this comment can be better improved to avoid
> confusion?
> 
I don't think things are confusing, neither right now, nor after this
patch, but I'm open to others' opinion. :-)

Thanks and Regards,
Dario
Meng Xu May 9, 2016, 2:08 p.m. UTC | #8
>
>
>
> > I do have a minor comment about the patch, although it is not
> > important at all and it is not really about this patch...
> >
> > >
> > > @@ -614,7 +612,8 @@ rt_deinit(struct scheduler *ops)
> > >  {
> > >      struct rt_private *prv = rt_priv(ops);
> > >
> > > -    kill_timer(prv->repl_timer);
> > > +    ASSERT(prv->repl_timer->status == TIMER_STATUS_invalid ||
> > > +           prv->repl_timer->status == TIMER_STATUS_killed);
> > I found in xen/timer.h, the comment after the definition of the
> > TIMER_STATUS_invalid is
> >
> >                 #define TIMER_STATUS_invalid  0 /* Should never see
> > this.           */
> >
> > This comment is a little contrary to how the status is used here.
> > Actually, what does it exactly means by "Should never see this"?
> >
> > This _invalid status is used in timer.h and it is the status when a
> > timer is initialized by init_timer().
> >
> As far as my understanding goes, this means that a timer, during its
> operations, should never be found in this state.
>
> In fact, this mark a situation where the timer has been allocated but
> never initialized, and there are ASSERT()s around to enforce that.
>
> However, if what one wants is _exactly_ to check whether the timer has
> been allocated ut not initialized, I don't see why I can't use this.



You can use this. Actually, I agree with how you used this here.
Actually, this is also how the existing init_timer() uses it.


>
>
> > So I'm thinking maybe this comment can be better improved to avoid
> > confusion?
> >
> I don't think things are confusing, neither right now, nor after this
> patch, but I'm open to others' opinion. :-)


Hmm, I won't get confused with the comment from now on, but I'm unsure
if someone else will or not. The tricky thing is when I know it, I
won't feel weird. However, when I first read it, I feel a little
confusing if not reading the other parts of the code related to this
macro.

Anyway, I'm ok with either way: change the comment or not.

Best Regards,

Meng


-----------
Meng Xu
PhD Student in Computer and Information Science
University of Pennsylvania
http://www.cis.upenn.edu/~mengxu/
George Dunlap May 9, 2016, 2:46 p.m. UTC | #9
On Sat, May 7, 2016 at 10:19 PM, Meng Xu <mengxu@cis.upenn.edu> wrote:
> On Tue, May 3, 2016 at 5:46 PM, Dario Faggioli
> <dario.faggioli@citrix.com> wrote:
>>
>> The scheduling hooks API is now used properly, and no
>> initialization or de-initialization happen in
>> alloc/free_pdata any longer.
>>
>> In fact, just like it is for Credit2, there is no real
>> need for implementing alloc_pdata and free_pdata.
>>
>> This also made it possible to improve the replenishment
>> timer handling logic, such that now the timer is always
>> kept on one of the pCPU of the scheduler it's servicing.
>> Before this commit, in fact, even if the pCPU where the
>> timer happened to be initialized at creation time was
>> moved to another cpupool, the timer stayed there,
>> potentially inferfearing with the new scheduler of the
>> pCPU itself.
>>
>> Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
>> --
>
> Reviewed-and-Tested-by: Meng Xu <mengxu@cis.upenn.edu>

And on that basis:

Acked-by: George Dunlap <george.dunlap@citrix.com>

But it looks like it still needs a release ack?

 -George
Dario Faggioli May 9, 2016, 2:52 p.m. UTC | #10
On Mon, 2016-05-09 at 10:08 -0400, Meng Xu wrote:
> > I don't think things are confusing, neither right now, nor after
> > this
> > patch, but I'm open to others' opinion. :-)
> 
> Hmm, I won't get confused with the comment from now on, but I'm
> unsure
> if someone else will or not. The tricky thing is when I know it, I
> won't feel weird. However, when I first read it, I feel a little
> confusing if not reading the other parts of the code related to this
> macro.
> 
I don't feel the same, but I understand the concern.

I think we have two options here:
 1. we just do nothing;
 2. you send a patch that, according to your best judgement, improve 
    things (as we all do all the time! :-P).

:-D

> Anyway, I'm ok with either way: change the comment or not.
> 
Me too, and in fact, I'm not changing it, but I won't stop you tryingto
do so. :-)

Regards,
Dario
Wei Liu May 9, 2016, 2:58 p.m. UTC | #11
On Mon, May 09, 2016 at 03:46:23PM +0100, George Dunlap wrote:
> On Sat, May 7, 2016 at 10:19 PM, Meng Xu <mengxu@cis.upenn.edu> wrote:
> > On Tue, May 3, 2016 at 5:46 PM, Dario Faggioli
> > <dario.faggioli@citrix.com> wrote:
> >>
> >> The scheduling hooks API is now used properly, and no
> >> initialization or de-initialization happen in
> >> alloc/free_pdata any longer.
> >>
> >> In fact, just like it is for Credit2, there is no real
> >> need for implementing alloc_pdata and free_pdata.
> >>
> >> This also made it possible to improve the replenishment
> >> timer handling logic, such that now the timer is always
> >> kept on one of the pCPU of the scheduler it's servicing.
> >> Before this commit, in fact, even if the pCPU where the
> >> timer happened to be initialized at creation time was
> >> moved to another cpupool, the timer stayed there,
> >> potentially inferfearing with the new scheduler of the
> >> pCPU itself.
> >>
> >> Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
> >> --
> >
> > Reviewed-and-Tested-by: Meng Xu <mengxu@cis.upenn.edu>
> 
> And on that basis:
> 
> Acked-by: George Dunlap <george.dunlap@citrix.com>
> 
> But it looks like it still needs a release ack?
> 

Release-acked-by: Wei Liu <wei.liu2@citrix.com>

>  -George
Meng Xu May 9, 2016, 2:58 p.m. UTC | #12
On Mon, May 9, 2016 at 10:52 AM, Dario Faggioli
<dario.faggioli@citrix.com> wrote:
> On Mon, 2016-05-09 at 10:08 -0400, Meng Xu wrote:
>> > I don't think things are confusing, neither right now, nor after
>> > this
>> > patch, but I'm open to others' opinion. :-)
>>
>> Hmm, I won't get confused with the comment from now on, but I'm
>> unsure
>> if someone else will or not. The tricky thing is when I know it, I
>> won't feel weird. However, when I first read it, I feel a little
>> confusing if not reading the other parts of the code related to this
>> macro.
>>
> I don't feel the same, but I understand the concern.
>
> I think we have two options here:
>  1. we just do nothing;
>  2. you send a patch that, according to your best judgement, improve
>     things (as we all do all the time! :-P).
>
> :-D
>
>> Anyway, I'm ok with either way: change the comment or not.
>>
> Me too, and in fact, I'm not changing it, but I won't stop you tryingto
> do so. :-)
>

OK. I can do it... But is just one comment line change too small to be
a patch? ;-)

Thanks,

Meng

-----------
Meng Xu
PhD Student in Computer and Information Science
University of Pennsylvania
http://www.cis.upenn.edu/~mengxu/
George Dunlap May 9, 2016, 3:35 p.m. UTC | #13
On Mon, May 9, 2016 at 3:58 PM, Wei Liu <wei.liu2@citrix.com> wrote:
> On Mon, May 09, 2016 at 03:46:23PM +0100, George Dunlap wrote:
>> On Sat, May 7, 2016 at 10:19 PM, Meng Xu <mengxu@cis.upenn.edu> wrote:
>> > On Tue, May 3, 2016 at 5:46 PM, Dario Faggioli
>> > <dario.faggioli@citrix.com> wrote:
>> >>
>> >> The scheduling hooks API is now used properly, and no
>> >> initialization or de-initialization happen in
>> >> alloc/free_pdata any longer.
>> >>
>> >> In fact, just like it is for Credit2, there is no real
>> >> need for implementing alloc_pdata and free_pdata.
>> >>
>> >> This also made it possible to improve the replenishment
>> >> timer handling logic, such that now the timer is always
>> >> kept on one of the pCPU of the scheduler it's servicing.
>> >> Before this commit, in fact, even if the pCPU where the
>> >> timer happened to be initialized at creation time was
>> >> moved to another cpupool, the timer stayed there,
>> >> potentially inferfearing with the new scheduler of the
>> >> pCPU itself.
>> >>
>> >> Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
>> >> --
>> >
>> > Reviewed-and-Tested-by: Meng Xu <mengxu@cis.upenn.edu>
>>
>> And on that basis:
>>
>> Acked-by: George Dunlap <george.dunlap@citrix.com>
>>
>> But it looks like it still needs a release ack?
>>
>
> Release-acked-by: Wei Liu <wei.liu2@citrix.com>

And pushed.  Thanks.

 -George
diff mbox

Patch

diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
index 673fc92..7f8f411 100644
--- a/xen/common/sched_rt.c
+++ b/xen/common/sched_rt.c
@@ -590,6 +590,10 @@  rt_init(struct scheduler *ops)
     if ( prv == NULL )
         return -ENOMEM;
 
+    prv->repl_timer = xzalloc(struct timer);
+    if ( prv->repl_timer == NULL )
+        return -ENOMEM;
+
     spin_lock_init(&prv->lock);
     INIT_LIST_HEAD(&prv->sdom);
     INIT_LIST_HEAD(&prv->runq);
@@ -600,12 +604,6 @@  rt_init(struct scheduler *ops)
 
     ops->sched_data = prv;
 
-    /*
-     * The timer initialization will happen later when
-     * the first pcpu is added to this pool in alloc_pdata.
-     */
-    prv->repl_timer = NULL;
-
     return 0;
 }
 
@@ -614,7 +612,8 @@  rt_deinit(struct scheduler *ops)
 {
     struct rt_private *prv = rt_priv(ops);
 
-    kill_timer(prv->repl_timer);
+    ASSERT(prv->repl_timer->status == TIMER_STATUS_invalid ||
+           prv->repl_timer->status == TIMER_STATUS_killed);
     xfree(prv->repl_timer);
 
     ops->sched_data = NULL;
@@ -632,9 +631,19 @@  rt_init_pdata(const struct scheduler *ops, void *pdata, int cpu)
     spinlock_t *old_lock;
     unsigned long flags;
 
-    /* Move the scheduler lock to our global runqueue lock.  */
     old_lock = pcpu_schedule_lock_irqsave(cpu, &flags);
 
+    /*
+     * TIMER_STATUS_invalid means we are the first cpu that sees the timer
+     * allocated but not initialized, and so it's up to us to initialize it.
+     */
+    if ( prv->repl_timer->status == TIMER_STATUS_invalid )
+    {
+        init_timer(prv->repl_timer, repl_timer_handler, (void*) ops, cpu);
+        dprintk(XENLOG_DEBUG, "RTDS: timer initialized on cpu %u\n", cpu);
+    }
+
+    /* Move the scheduler lock to our global runqueue lock.  */
     per_cpu(schedule_data, cpu).schedule_lock = &prv->lock;
 
     /* _Not_ pcpu_schedule_unlock(): per_cpu().schedule_lock changed! */
@@ -659,6 +668,20 @@  rt_switch_sched(struct scheduler *new_ops, unsigned int cpu,
      */
     ASSERT(per_cpu(schedule_data, cpu).schedule_lock != &prv->lock);
 
+    /*
+     * If we are the absolute first cpu being switched toward this
+     * scheduler (in which case we'll see TIMER_STATUS_invalid), or the
+     * first one that is added back to the cpupool that had all its cpus
+     * removed (in which case we'll see TIMER_STATUS_killed), it's our
+     * job to (re)initialize the timer.
+     */
+    if ( prv->repl_timer->status == TIMER_STATUS_invalid ||
+         prv->repl_timer->status == TIMER_STATUS_killed )
+    {
+        init_timer(prv->repl_timer, repl_timer_handler, (void*) new_ops, cpu);
+        dprintk(XENLOG_DEBUG, "RTDS: timer initialized on cpu %u\n", cpu);
+    }
+
     idle_vcpu[cpu]->sched_priv = vdata;
     per_cpu(scheduler, cpu) = new_ops;
     per_cpu(schedule_data, cpu).sched_priv = NULL; /* no pdata */
@@ -672,23 +695,36 @@  rt_switch_sched(struct scheduler *new_ops, unsigned int cpu,
     per_cpu(schedule_data, cpu).schedule_lock = &prv->lock;
 }
 
-static void *
-rt_alloc_pdata(const struct scheduler *ops, int cpu)
+static void
+rt_deinit_pdata(const struct scheduler *ops, void *pcpu, int cpu)
 {
+    unsigned long flags;
     struct rt_private *prv = rt_priv(ops);
 
-    if ( prv->repl_timer == NULL )
-    {
-        /* Allocate the timer on the first cpu of this pool. */
-        prv->repl_timer = xzalloc(struct timer);
+    spin_lock_irqsave(&prv->lock, flags);
 
-        if ( prv->repl_timer == NULL )
-            return ERR_PTR(-ENOMEM);
+    if ( prv->repl_timer->cpu == cpu )
+    {
+        struct cpupool *c = per_cpu(cpupool, cpu);
+        unsigned int new_cpu = cpumask_cycle(cpu, cpupool_online_cpumask(c));
 
-        init_timer(prv->repl_timer, repl_timer_handler, (void *)ops, cpu);
+        /*
+         * Make sure the timer run on one of the cpus that are still available
+         * to this scheduler. If there aren't any left, it means it's the time
+         * to just kill it.
+         */
+        if ( new_cpu >= nr_cpu_ids )
+        {
+            kill_timer(prv->repl_timer);
+            dprintk(XENLOG_DEBUG, "RTDS: timer killed on cpu %d\n", cpu);
+        }
+        else
+        {
+            migrate_timer(prv->repl_timer, new_cpu);
+        }
     }
 
-    return NULL;
+    spin_unlock_irqrestore(&prv->lock, flags);
 }
 
 static void *
@@ -1433,9 +1469,9 @@  static const struct scheduler sched_rtds_def = {
     .dump_settings  = rt_dump,
     .init           = rt_init,
     .deinit         = rt_deinit,
-    .alloc_pdata    = rt_alloc_pdata,
     .init_pdata     = rt_init_pdata,
     .switch_sched   = rt_switch_sched,
+    .deinit_pdata   = rt_deinit_pdata,
     .alloc_domdata  = rt_alloc_domdata,
     .free_domdata   = rt_free_domdata,
     .init_domain    = rt_dom_init,