diff mbox

[v5,RFC] xen: sched: convert RTDS from time to event driven model

Message ID 1454992407-5436-1-git-send-email-tiche@seas.upenn.edu (mailing list archive)
State New, archived
Headers show

Commit Message

Tianyang Chen Feb. 9, 2016, 4:33 a.m. UTC
Changes since v4:
    removed unnecessary replenishment queue checks in vcpu_wake()
    extended replq_remove() to all cases in vcpu_sleep()
    used _deadline_queue_insert() helper function for both queues
    _replq_insert() and _replq_remove() program timer internally

Changes since v3:
    removed running queue.
    added repl queue to keep track of repl events.
    timer is now per scheduler.
    timer is init on a valid cpu in a cpupool.

Signed-off-by: Tianyang Chen <tiche@seas.upenn.edu>
Signed-off-by: Meng Xu <mengxu@cis.upenn.edu>
Signed-off-by: Dagaen Golomb <dgolomb@seas.upenn.edu>
---
 xen/common/sched_rt.c |  337 ++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 251 insertions(+), 86 deletions(-)

Comments

Meng Xu Feb. 16, 2016, 3:55 a.m. UTC | #1
Hi Tianyang,

Thanks for the patch! Great work and really quick action! :-)
I will just comment on something I quickly find out and would look
forwarding to Dario's comment.

On Mon, Feb 8, 2016 at 11:33 PM, Tianyang Chen <tiche@seas.upenn.edu> wrote:
> Changes since v4:
>     removed unnecessary replenishment queue checks in vcpu_wake()
>     extended replq_remove() to all cases in vcpu_sleep()
>     used _deadline_queue_insert() helper function for both queues
>     _replq_insert() and _replq_remove() program timer internally
>
> Changes since v3:
>     removed running queue.
>     added repl queue to keep track of repl events.
>     timer is now per scheduler.
>     timer is init on a valid cpu in a cpupool.
>
> Signed-off-by: Tianyang Chen <tiche@seas.upenn.edu>
> Signed-off-by: Meng Xu <mengxu@cis.upenn.edu>
> Signed-off-by: Dagaen Golomb <dgolomb@seas.upenn.edu>
> ---
>  xen/common/sched_rt.c |  337
++++++++++++++++++++++++++++++++++++-------------
>  1 file changed, 251 insertions(+), 86 deletions(-)
>
> diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
> index 2e5430f..1f0bb7b 100644
> --- a/xen/common/sched_rt.c
> +++ b/xen/common/sched_rt.c
> @@ -16,6 +16,7 @@
>  #include <xen/delay.h>
>  #include <xen/event.h>
>  #include <xen/time.h>
> +#include <xen/timer.h>
>  #include <xen/perfc.h>
>  #include <xen/sched-if.h>
>  #include <xen/softirq.h>
> @@ -87,7 +88,7 @@
>  #define RTDS_DEFAULT_BUDGET     (MICROSECS(4000))
>
>  #define UPDATE_LIMIT_SHIFT      10
> -#define MAX_SCHEDULE            (MILLISECS(1))
> +
>  /*
>   * Flags
>   */
> @@ -142,6 +143,12 @@ static cpumask_var_t *_cpumask_scratch;
>   */
>  static unsigned int nr_rt_ops;
>
> +/* handler for the replenishment timer */
> +static void repl_handler(void *data);
> +
> +/* checks if a timer is active or not */
> +bool_t active_timer(struct timer* t);
> +
>  /*
>   * Systme-wide private data, include global RunQueue/DepletedQ
>   * Global lock is referenced by schedule_data.schedule_lock from all
> @@ -152,7 +159,9 @@ struct rt_private {
>      struct list_head sdom;      /* list of availalbe domains, used for
dump */
>      struct list_head runq;      /* ordered list of runnable vcpus */
>      struct list_head depletedq; /* unordered list of depleted vcpus */
> +    struct list_head replq;     /* ordered list of vcpus that need
replenishment */
>      cpumask_t tickled;          /* cpus been tickled */
> +    struct timer *repl_timer;   /* replenishment timer */
>  };
>
>  /*
> @@ -160,6 +169,7 @@ struct rt_private {
>   */
>  struct rt_vcpu {
>      struct list_head q_elem;    /* on the runq/depletedq list */
> +    struct list_head replq_elem;/* on the repl event list */
>
>      /* Up-pointers */
>      struct rt_dom *sdom;
> @@ -213,8 +223,14 @@ static inline struct list_head *rt_depletedq(const
struct scheduler *ops)
>      return &rt_priv(ops)->depletedq;
>  }
>
> +static inline struct list_head *rt_replq(const struct scheduler *ops)
> +{
> +    return &rt_priv(ops)->replq;
> +}
> +
>  /*
> - * Queue helper functions for runq and depletedq
> + * Queue helper functions for runq, depletedq
> + * and replenishment event queue
>   */
>  static int
>  __vcpu_on_q(const struct rt_vcpu *svc)
> @@ -228,6 +244,18 @@ __q_elem(struct list_head *elem)
>      return list_entry(elem, struct rt_vcpu, q_elem);
>  }
>
> +static struct rt_vcpu *
> +__replq_elem(struct list_head *elem)
> +{
> +    return list_entry(elem, struct rt_vcpu, replq_elem);
> +}
> +
> +static int
> +__vcpu_on_replq(const struct rt_vcpu *svc)
> +{
> +   return !list_empty(&svc->replq_elem);
> +}
> +
>  /*
>   * Debug related code, dump vcpu/cpu information
>   */
> @@ -288,7 +316,7 @@ rt_dump_pcpu(const struct scheduler *ops, int cpu)
>  static void
>  rt_dump(const struct scheduler *ops)
>  {
> -    struct list_head *runq, *depletedq, *iter;
> +    struct list_head *runq, *depletedq, *replq, *iter;
>      struct rt_private *prv = rt_priv(ops);
>      struct rt_vcpu *svc;
>      struct rt_dom *sdom;
> @@ -301,6 +329,7 @@ rt_dump(const struct scheduler *ops)
>
>      runq = rt_runq(ops);
>      depletedq = rt_depletedq(ops);
> +    replq = rt_replq(ops);
>
>      printk("Global RunQueue info:\n");
>      list_for_each( iter, runq )
> @@ -316,6 +345,13 @@ rt_dump(const struct scheduler *ops)
>          rt_dump_vcpu(ops, svc);
>      }
>
> +    printk("Global Replenishment Event info:\n");
> +    list_for_each( iter, replq )
> +    {
> +        svc = __replq_elem(iter);
> +        rt_dump_vcpu(ops, svc);
> +    }
> +
>      printk("Domain info:\n");
>      list_for_each( iter, &prv->sdom )
>      {
> @@ -388,6 +424,66 @@ __q_remove(struct rt_vcpu *svc)
>  }
>
>  /*
> + * Removing a vcpu from the replenishment queue could
> + * re-program the timer for the next replenishment event
> + * if the timer is currently active
> + */
> +static inline void
> +__replq_remove(const struct scheduler *ops, struct rt_vcpu *svc)
> +{
> +    struct rt_private *prv = rt_priv(ops);
> +    struct list_head *replq = rt_replq(ops);
> +    struct timer* repl_timer = prv->repl_timer;
> +
> +    if ( __vcpu_on_replq(svc) )
> +    {
> +        /*
> +         * disarm the timer if removing the first replenishment event
> +         * which is going to happen next
> +         */
> +        if( active_timer(repl_timer) )
> +        {
> +            struct rt_vcpu *next_repl = __replq_elem(replq->next);
> +
> +            if( next_repl->cur_deadline == svc->cur_deadline )
> +                repl_timer->expires = 0;
> +
> +            list_del_init(&svc->replq_elem);
> +
> +            /* re-arm the timer for the next replenishment event */
> +            if( !list_empty(replq) )
> +            {
> +                struct rt_vcpu *svc_next = __replq_elem(replq->next);
> +                set_timer(repl_timer, svc_next->cur_deadline);
> +            }
> +        }
> +

This blank line should go away.. It is quite misleading. At very first
glance, I thought it is the else for if ( __vcpu_on_replq(svc) );
But after second read, I found it is actually for the if(
active_timer(repl_timer) )

> +        else
> +            list_del_init(&svc->replq_elem);
> +    }
> +}
> +
> +/*
> + * An utility function that inserts a vcpu to a
> + * queue based on certain order (EDF)
> + */
> +static void
> +_deadline_queue_insert(struct rt_vcpu * (*_get_q_elem)(struct list_head
*elem),
> +    struct rt_vcpu *svc, struct list_head *elem, struct list_head *queue)
> +{
> +    struct list_head *iter;
> +
> +    list_for_each(iter, queue)
> +    {
> +        struct rt_vcpu * iter_svc = (*_get_q_elem)(iter);
> +        if ( svc->cur_deadline <= iter_svc->cur_deadline )
> +                break;
> +    }
> +
> +    list_add_tail(elem, iter);
> +}
> +
> +/*
>   * Insert svc with budget in RunQ according to EDF:
>   * vcpus with smaller deadlines go first.
>   * Insert svc without budget in DepletedQ unsorted;
> @@ -397,7 +493,6 @@ __runq_insert(const struct scheduler *ops, struct
rt_vcpu *svc)
>  {
>      struct rt_private *prv = rt_priv(ops);
>      struct list_head *runq = rt_runq(ops);
> -    struct list_head *iter;
>
>      ASSERT( spin_is_locked(&prv->lock) );
>
> @@ -405,22 +500,37 @@ __runq_insert(const struct scheduler *ops, struct
rt_vcpu *svc)
>
>      /* add svc to runq if svc still has budget */
>      if ( svc->cur_budget > 0 )
> -    {
> -        list_for_each(iter, runq)
> -        {
> -            struct rt_vcpu * iter_svc = __q_elem(iter);
> -            if ( svc->cur_deadline <= iter_svc->cur_deadline )
> -                    break;
> -         }
> -        list_add_tail(&svc->q_elem, iter);
> -    }
> +        _deadline_queue_insert(&__q_elem, svc, &svc->q_elem, runq);
>      else
> -    {
>          list_add(&svc->q_elem, &prv->depletedq);
> -    }
>  }
>
>  /*
> + * Insert svc into the repl even list:
> + * vcpus that needs to be repl earlier go first.
> + * scheduler private lock serializes this operation
> + * it could re-program the timer if it fires later than
> + * this vcpu's cur_deadline. Also, this is used to program
> + * the timer for the first time.
> + */
> +static void
> +__replq_insert(const struct scheduler *ops, struct rt_vcpu *svc)
> +{
> +    struct list_head *replq = rt_replq(ops);
> +    struct rt_private *prv = rt_priv(ops);
> +    struct timer *repl_timer = prv->repl_timer;
> +
> +    ASSERT( !__vcpu_on_replq(svc) );
> +
> +    _deadline_queue_insert(&__replq_elem, svc, &svc->replq_elem, replq);
> +
> +    if( repl_timer->expires == 0 ||
> +        ( active_timer(repl_timer) && repl_timer->expires >
svc->cur_deadline ) )
> +        set_timer(repl_timer,svc->cur_deadline);
> +}
> +
> +
> +/*
>   * Init/Free related code
>   */
>  static int
> @@ -449,11 +559,18 @@ rt_init(struct scheduler *ops)
>      INIT_LIST_HEAD(&prv->sdom);
>      INIT_LIST_HEAD(&prv->runq);
>      INIT_LIST_HEAD(&prv->depletedq);
> +    INIT_LIST_HEAD(&prv->replq);
>
>      cpumask_clear(&prv->tickled);
>
>      ops->sched_data = prv;
>
> +    /*
> +     * The timer initialization will happen later when
> +     * the first pcpu is added to this pool in alloc_pdata
> +     */
> +    prv->repl_timer = NULL;
> +
>      return 0;
>
>   no_mem:
> @@ -473,6 +590,10 @@ rt_deinit(const struct scheduler *ops)
>          xfree(_cpumask_scratch);
>          _cpumask_scratch = NULL;
>      }
> +
> +    kill_timer(prv->repl_timer);
> +    xfree(prv->repl_timer);
> +
>      xfree(prv);
>  }
>
> @@ -493,6 +614,17 @@ rt_alloc_pdata(const struct scheduler *ops, int cpu)
>      if ( !alloc_cpumask_var(&_cpumask_scratch[cpu]) )
>          return NULL;
>
> +    if( prv->repl_timer == NULL )
> +    {
> +        /* allocate the timer on the first cpu of this pool */
> +        prv->repl_timer = xzalloc(struct timer);
> +
> +        if(prv->repl_timer == NULL )
> +            return NULL;
> +
> +        init_timer(prv->repl_timer, repl_handler, (void *)ops, cpu);
> +    }
> +
>      /* 1 indicates alloc. succeed in schedule.c */
>      return (void *)1;
>  }
> @@ -586,6 +718,7 @@ rt_alloc_vdata(const struct scheduler *ops, struct
vcpu *vc, void *dd)
>          return NULL;
>
>      INIT_LIST_HEAD(&svc->q_elem);
> +    INIT_LIST_HEAD(&svc->replq_elem);
>      svc->flags = 0U;
>      svc->sdom = dd;
>      svc->vcpu = vc;
> @@ -609,7 +742,8 @@ rt_free_vdata(const struct scheduler *ops, void *priv)
>  }
>
>  /*
> - * This function is called in sched_move_domain() in schedule.c
> + * It is called in sched_move_domain() and sched_init_vcpu
> + * in schedule.c
>   * When move a domain to a new cpupool.
>   * It inserts vcpus of moving domain to the scheduler's RunQ in
>   * dest. cpupool.
> @@ -651,6 +785,10 @@ rt_vcpu_remove(const struct scheduler *ops, struct
vcpu *vc)
>      lock = vcpu_schedule_lock_irq(vc);
>      if ( __vcpu_on_q(svc) )
>          __q_remove(svc);
> +
> +    if( __vcpu_on_replq(svc) )
> +        __replq_remove(ops,svc);
> +
>      vcpu_schedule_unlock_irq(lock, vc);
>  }
>
> @@ -785,44 +923,6 @@ __runq_pick(const struct scheduler *ops, const
cpumask_t *mask)
>  }
>
>  /*
> - * Update vcpu's budget and
> - * sort runq by insert the modifed vcpu back to runq
> - * lock is grabbed before calling this function
> - */
> -static void
> -__repl_update(const struct scheduler *ops, s_time_t now)
> -{
> -    struct list_head *runq = rt_runq(ops);
> -    struct list_head *depletedq = rt_depletedq(ops);
> -    struct list_head *iter;
> -    struct list_head *tmp;
> -    struct rt_vcpu *svc = NULL;
> -
> -    list_for_each_safe(iter, tmp, runq)
> -    {
> -        svc = __q_elem(iter);
> -        if ( now < svc->cur_deadline )
> -            break;
> -
> -        rt_update_deadline(now, svc);
> -        /* reinsert the vcpu if its deadline is updated */
> -        __q_remove(svc);
> -        __runq_insert(ops, svc);
> -    }
> -
> -    list_for_each_safe(iter, tmp, depletedq)
> -    {
> -        svc = __q_elem(iter);
> -        if ( now >= svc->cur_deadline )
> -        {
> -            rt_update_deadline(now, svc);
> -            __q_remove(svc); /* remove from depleted queue */
> -            __runq_insert(ops, svc); /* add to runq */
> -        }
> -    }
> -}
> -
> -/*
>   * schedule function for rt scheduler.
>   * The lock is already grabbed in schedule.c, no need to lock here
>   */
> @@ -841,7 +941,6 @@ rt_schedule(const struct scheduler *ops, s_time_t
now, bool_t tasklet_work_sched
>      /* burn_budget would return for IDLE VCPU */
>      burn_budget(ops, scurr, now);
>
> -    __repl_update(ops, now);
>
>      if ( tasklet_work_scheduled )
>      {
> @@ -868,6 +967,8 @@ rt_schedule(const struct scheduler *ops, s_time_t
now, bool_t tasklet_work_sched
>          set_bit(__RTDS_delayed_runq_add, &scurr->flags);
>
>      snext->last_start = now;
> +
> +    ret.time =  -1; /* if an idle vcpu is picked */
>      if ( !is_idle_vcpu(snext->vcpu) )
>      {
>          if ( snext != scurr )
> @@ -880,9 +981,11 @@ rt_schedule(const struct scheduler *ops, s_time_t
now, bool_t tasklet_work_sched
>              snext->vcpu->processor = cpu;
>              ret.migrated = 1;
>          }
> +
> +        ret.time = snext->budget; /* invoke the scheduler next time */
> +
>      }
>
> -    ret.time = MIN(snext->budget, MAX_SCHEDULE); /* sched quantum */
>      ret.task = snext->vcpu;
>
>      /* TRACE */
> @@ -914,7 +1017,7 @@ static void
>  rt_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc)
>  {
>      struct rt_vcpu * const svc = rt_vcpu(vc);
> -
> +

Next patch should avoid this. You may have some trailing space in the line.
git has some command to mark the trailing whitespace.
You can have a look at this post:
http://stackoverflow.com/questions/5257553/coloring-white-space-in-git-diffs-output

>      BUG_ON( is_idle_vcpu(vc) );
>      SCHED_STAT_CRANK(vcpu_sleep);
>
> @@ -924,6 +1027,9 @@ rt_vcpu_sleep(const struct scheduler *ops, struct
vcpu *vc)
>          __q_remove(svc);
>      else if ( svc->flags & RTDS_delayed_runq_add )
>          clear_bit(__RTDS_delayed_runq_add, &svc->flags);
> +
> +    if( __vcpu_on_replq(svc) )
> +        __replq_remove(ops, svc);
>  }
>
>  /*
> @@ -1026,10 +1132,6 @@ rt_vcpu_wake(const struct scheduler *ops, struct
vcpu *vc)
>  {
>      struct rt_vcpu * const svc = rt_vcpu(vc);
>      s_time_t now = NOW();
> -    struct rt_private *prv = rt_priv(ops);
> -    struct rt_vcpu *snext = NULL; /* highest priority on RunQ */
> -    struct rt_dom *sdom = NULL;
> -    cpumask_t *online;
>
>      BUG_ON( is_idle_vcpu(vc) );
>
> @@ -1051,6 +1153,18 @@ rt_vcpu_wake(const struct scheduler *ops, struct
vcpu *vc)
>      else
>          SCHED_STAT_CRANK(vcpu_wake_not_runnable);
>
> +    /* budget repl here is needed before inserting back to runq. If so,
> +     * it should be re-inserted back to the replenishment queue.
> +     */
> +    if ( now >= svc->cur_deadline)
> +    {
> +        rt_update_deadline(now, svc);
> +        __replq_remove(ops, svc);
> +    }
> +
> +    if( !__vcpu_on_replq(svc) )
> +        __replq_insert(ops, svc);
> +
>      /* If context hasn't been saved for this vcpu yet, we can't put it on
>       * the Runqueue/DepletedQ. Instead, we set a flag so that it will be
>       * put on the Runqueue/DepletedQ after the context has been saved.
> @@ -1061,22 +1175,10 @@ rt_vcpu_wake(const struct scheduler *ops, struct
vcpu *vc)
>          return;
>      }
>
> -    if ( now >= svc->cur_deadline)
> -        rt_update_deadline(now, svc);
> -
>      /* insert svc to runq/depletedq because svc is not in queue now */
>      __runq_insert(ops, svc);
>
> -    __repl_update(ops, now);
> -
> -    ASSERT(!list_empty(&prv->sdom));
> -    sdom = list_entry(prv->sdom.next, struct rt_dom, sdom_elem);
> -    online = cpupool_domain_cpumask(sdom->dom);
> -    snext = __runq_pick(ops, online); /* pick snext from ALL valid cpus
*/
> -
> -    runq_tickle(ops, snext);
> -
> -    return;
> +    runq_tickle(ops, svc);
>  }
>
>  /*
> @@ -1087,10 +1189,6 @@ static void
>  rt_context_saved(const struct scheduler *ops, struct vcpu *vc)
>  {
>      struct rt_vcpu *svc = rt_vcpu(vc);
> -    struct rt_vcpu *snext = NULL;
> -    struct rt_dom *sdom = NULL;
> -    struct rt_private *prv = rt_priv(ops);
> -    cpumask_t *online;
>      spinlock_t *lock = vcpu_schedule_lock_irq(vc);
>
>      clear_bit(__RTDS_scheduled, &svc->flags);
> @@ -1102,14 +1200,7 @@ rt_context_saved(const struct scheduler *ops,
struct vcpu *vc)
>           likely(vcpu_runnable(vc)) )
>      {
>          __runq_insert(ops, svc);
> -        __repl_update(ops, NOW());
> -
> -        ASSERT(!list_empty(&prv->sdom));
> -        sdom = list_entry(prv->sdom.next, struct rt_dom, sdom_elem);
> -        online = cpupool_domain_cpumask(sdom->dom);
> -        snext = __runq_pick(ops, online); /* pick snext from ALL cpus */
> -
> -        runq_tickle(ops, snext);
> +        runq_tickle(ops, svc);
>      }
>  out:
>      vcpu_schedule_unlock_irq(lock, vc);
> @@ -1168,6 +1259,80 @@ rt_dom_cntl(
>      return rc;
>  }
>
> +/*
> + * The replenishment timer handler picks vcpus
> + * from the replq and does the actual replenishment
> + */
> +static void repl_handler(void *data){
> +    unsigned long flags;
> +    s_time_t now = NOW();
> +    struct scheduler *ops = data;
> +    struct rt_private *prv = rt_priv(ops);
> +    struct list_head *replq = rt_replq(ops);
> +    struct timer *repl_timer = prv->repl_timer;
> +    struct list_head *iter, *tmp;
> +    struct rt_vcpu *svc = NULL;
> +
> +    spin_lock_irqsave(&prv->lock, flags);
> +
> +    stop_timer(repl_timer);
> +
> +    list_for_each_safe(iter, tmp, replq)
> +    {
> +        svc = __replq_elem(iter);
> +
> +        if ( now >= svc->cur_deadline )
> +        {
> +            rt_update_deadline(now, svc);
> +
> +            /*
> +             * when the replenishment happens
> +             * svc is either on a pcpu or on
> +             * runq/depletedq
> +             */
> +            if( __vcpu_on_q(svc) )
> +            {
> +                /* put back to runq */
> +                __q_remove(svc);
> +                __runq_insert(ops, svc);
> +            }
> +
> +            /*
> +             * tickle regardless where it's at
> +             * because a running vcpu could have
> +             * a later deadline than others after
> +             * replenishment
> +             */
> +            runq_tickle(ops, svc);
> +
> +            /* update replenishment event queue */
> +            __replq_remove(ops, svc);
> +            __replq_insert(ops, svc);
> +        }
> +

This white line should be removed...

> +        else
> +            break;
> +    }
> +
> +    /*
> +     * use the vcpu that's on the top
> +     * or else don't program the timer
> +     */
> +    if( !list_empty(replq) )
> +        set_timer(repl_timer, __replq_elem(replq->next)->cur_deadline);
> +
> +    spin_unlock_irqrestore(&prv->lock, flags);
> +
> +}
> +
> +/* checks if a timer has been stopped or not */
> +bool_t active_timer(struct timer *timer)
> +{
> +    ASSERT(timer->status >= TIMER_STATUS_inactive);
> +    ASSERT(timer->status <= TIMER_STATUS_in_list);
> +    return (timer->status >= TIMER_STATUS_in_heap);
> +}
> +
>  static struct rt_private _rt_priv;
>
>  static const struct scheduler sched_rtds_def = {
> --
> 1.7.9.5
>

Thanks and best regards,

Meng
Tianyang Chen Feb. 18, 2016, 1:55 a.m. UTC | #2
On 2/15/2016 10:55 PM, Meng Xu wrote:
> Hi Tianyang,
>
> Thanks for the patch! Great work and really quick action! :-)
> I will just comment on something I quickly find out and would look
> forwarding to Dario's comment.
>
> On Mon, Feb 8, 2016 at 11:33 PM, Tianyang Chen <tiche@seas.upenn.edu
> <mailto:tiche@seas.upenn.edu>> wrote:
>  > Changes since v4:
>  >     removed unnecessary replenishment queue checks in vcpu_wake()
>  >     extended replq_remove() to all cases in vcpu_sleep()
>  >     used _deadline_queue_insert() helper function for both queues
>  >     _replq_insert() and _replq_remove() program timer internally
>  >
>  > Changes since v3:
>  >     removed running queue.
>  >     added repl queue to keep track of repl events.
>  >     timer is now per scheduler.
>  >     timer is init on a valid cpu in a cpupool.
>  >
>  > Signed-off-by: Tianyang Chen <tiche@seas.upenn.edu
> <mailto:tiche@seas.upenn.edu>>
>  > Signed-off-by: Meng Xu <mengxu@cis.upenn.edu
> <mailto:mengxu@cis.upenn.edu>>
>  > Signed-off-by: Dagaen Golomb <dgolomb@seas.upenn.edu
> <mailto:dgolomb@seas.upenn.edu>>
>  > ---
>  >  xen/common/sched_rt.c |  337
> ++++++++++++++++++++++++++++++++++++-------------
>  >  1 file changed, 251 insertions(+), 86 deletions(-)
>  >
>  > diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
>  > index 2e5430f..1f0bb7b 100644
>  > --- a/xen/common/sched_rt.c
>  > +++ b/xen/common/sched_rt.c
>  > @@ -16,6 +16,7 @@
>  >  #include <xen/delay.h>
>  >  #include <xen/event.h>
>  >  #include <xen/time.h>
>  > +#include <xen/timer.h>
>  >  #include <xen/perfc.h>
>  >  #include <xen/sched-if.h>
>  >  #include <xen/softirq.h>
>  > @@ -87,7 +88,7 @@
>  >  #define RTDS_DEFAULT_BUDGET     (MICROSECS(4000))
>  >
>  >  #define UPDATE_LIMIT_SHIFT      10
>  > -#define MAX_SCHEDULE            (MILLISECS(1))
>  > +
>  >  /*
>  >   * Flags
>  >   */
>  > @@ -142,6 +143,12 @@ static cpumask_var_t *_cpumask_scratch;
>  >   */
>  >  static unsigned int nr_rt_ops;
>  >
>  > +/* handler for the replenishment timer */
>  > +static void repl_handler(void *data);
>  > +
>  > +/* checks if a timer is active or not */
>  > +bool_t active_timer(struct timer* t);
>  > +
>  >  /*
>  >   * Systme-wide private data, include global RunQueue/DepletedQ
>  >   * Global lock is referenced by schedule_data.schedule_lock from all
>  > @@ -152,7 +159,9 @@ struct rt_private {
>  >      struct list_head sdom;      /* list of availalbe domains, used
> for dump */
>  >      struct list_head runq;      /* ordered list of runnable vcpus */
>  >      struct list_head depletedq; /* unordered list of depleted vcpus */
>  > +    struct list_head replq;     /* ordered list of vcpus that need
> replenishment */
>  >      cpumask_t tickled;          /* cpus been tickled */
>  > +    struct timer *repl_timer;   /* replenishment timer */
>  >  };
>  >
>  >  /*
>  > @@ -160,6 +169,7 @@ struct rt_private {
>  >   */
>  >  struct rt_vcpu {
>  >      struct list_head q_elem;    /* on the runq/depletedq list */
>  > +    struct list_head replq_elem;/* on the repl event list */
>  >
>  >      /* Up-pointers */
>  >      struct rt_dom *sdom;
>  > @@ -213,8 +223,14 @@ static inline struct list_head
> *rt_depletedq(const struct scheduler *ops)
>  >      return &rt_priv(ops)->depletedq;
>  >  }
>  >
>  > +static inline struct list_head *rt_replq(const struct scheduler *ops)
>  > +{
>  > +    return &rt_priv(ops)->replq;
>  > +}
>  > +
>  >  /*
>  > - * Queue helper functions for runq and depletedq
>  > + * Queue helper functions for runq, depletedq
>  > + * and replenishment event queue
>  >   */
>  >  static int
>  >  __vcpu_on_q(const struct rt_vcpu *svc)
>  > @@ -228,6 +244,18 @@ __q_elem(struct list_head *elem)
>  >      return list_entry(elem, struct rt_vcpu, q_elem);
>  >  }
>  >
>  > +static struct rt_vcpu *
>  > +__replq_elem(struct list_head *elem)
>  > +{
>  > +    return list_entry(elem, struct rt_vcpu, replq_elem);
>  > +}
>  > +
>  > +static int
>  > +__vcpu_on_replq(const struct rt_vcpu *svc)
>  > +{
>  > +   return !list_empty(&svc->replq_elem);
>  > +}
>  > +
>  >  /*
>  >   * Debug related code, dump vcpu/cpu information
>  >   */
>  > @@ -288,7 +316,7 @@ rt_dump_pcpu(const struct scheduler *ops, int cpu)
>  >  static void
>  >  rt_dump(const struct scheduler *ops)
>  >  {
>  > -    struct list_head *runq, *depletedq, *iter;
>  > +    struct list_head *runq, *depletedq, *replq, *iter;
>  >      struct rt_private *prv = rt_priv(ops);
>  >      struct rt_vcpu *svc;
>  >      struct rt_dom *sdom;
>  > @@ -301,6 +329,7 @@ rt_dump(const struct scheduler *ops)
>  >
>  >      runq = rt_runq(ops);
>  >      depletedq = rt_depletedq(ops);
>  > +    replq = rt_replq(ops);
>  >
>  >      printk("Global RunQueue info:\n");
>  >      list_for_each( iter, runq )
>  > @@ -316,6 +345,13 @@ rt_dump(const struct scheduler *ops)
>  >          rt_dump_vcpu(ops, svc);
>  >      }
>  >
>  > +    printk("Global Replenishment Event info:\n");
>  > +    list_for_each( iter, replq )
>  > +    {
>  > +        svc = __replq_elem(iter);
>  > +        rt_dump_vcpu(ops, svc);
>  > +    }
>  > +
>  >      printk("Domain info:\n");
>  >      list_for_each( iter, &prv->sdom )
>  >      {
>  > @@ -388,6 +424,66 @@ __q_remove(struct rt_vcpu *svc)
>  >  }
>  >
>  >  /*
>  > + * Removing a vcpu from the replenishment queue could
>  > + * re-program the timer for the next replenishment event
>  > + * if the timer is currently active
>  > + */
>  > +static inline void
>  > +__replq_remove(const struct scheduler *ops, struct rt_vcpu *svc)
>  > +{
>  > +    struct rt_private *prv = rt_priv(ops);
>  > +    struct list_head *replq = rt_replq(ops);
>  > +    struct timer* repl_timer = prv->repl_timer;
>  > +
>  > +    if ( __vcpu_on_replq(svc) )
>  > +    {
>  > +        /*
>  > +         * disarm the timer if removing the first replenishment event
>  > +         * which is going to happen next
>  > +         */
>  > +        if( active_timer(repl_timer) )
>  > +        {
>  > +            struct rt_vcpu *next_repl = __replq_elem(replq->next);
>  > +
>  > +            if( next_repl->cur_deadline == svc->cur_deadline )
>  > +                repl_timer->expires = 0;
>  > +
>  > +            list_del_init(&svc->replq_elem);
>  > +
>  > +            /* re-arm the timer for the next replenishment event */
>  > +            if( !list_empty(replq) )
>  > +            {
>  > +                struct rt_vcpu *svc_next = __replq_elem(replq->next);
>  > +                set_timer(repl_timer, svc_next->cur_deadline);
>  > +            }
>  > +        }
>  > +
>
> This blank line should go away.. It is quite misleading. At very first
> glance, I thought it is the else for if ( __vcpu_on_replq(svc) );
> But after second read, I found it is actually for the if(
> a
ctive_timer(repl_timer) )
>

Sure

>  > @@ -880,9 +981,11 @@ rt_schedule(const struct scheduler *ops,
> s_time_t now, bool_t tasklet_work_sched
>  >              snext->vcpu->processor = cpu;
>  >              ret.migrated = 1;
>  >          }
>  > +
>  > +        ret.time = snext->budget; /* invoke the scheduler next time */
>  > +
>  >      }
>  >
>  > -    ret.time = MIN(snext->budget, MAX_SCHEDULE); /* sched quantum */
>  >      ret.task = snext->vcpu;
>  >
>  >      /* TRACE */
>  > @@ -914,7 +1017,7 @@ static void
>  >  rt_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc)
>  >  {
>  >      struct rt_vcpu * const svc = rt_vcpu(vc);
>  > -
>  > +
>
> Next patch should avoid this. You may have some trailing space in the line.
> git has some command to mark the trailing whitespace.
> You can have a look at this post:
> http://stackoverflow.com/questions/5257553/coloring-white-space-in-git-diffs-output
>

ok I will check that out.

Dario: Is there anything else we need to pick up in this patch?

Thanks,
Tianyang
Tianyang Chen Feb. 24, 2016, 3:23 p.m. UTC | #3
Hey Dario: We are aiming for the next release and would appreciate it if 
you can leave some comments on this version. Thanks.


Tianyang

On 2/8/2016 11:33 PM, Tianyang Chen wrote:
> Changes since v4:
>      removed unnecessary replenishment queue checks in vcpu_wake()
>      extended replq_remove() to all cases in vcpu_sleep()
>      used _deadline_queue_insert() helper function for both queues
>      _replq_insert() and _replq_remove() program timer internally
>
> Changes since v3:
>      removed running queue.
>      added repl queue to keep track of repl events.
>      timer is now per scheduler.
>      timer is init on a valid cpu in a cpupool.
>
> Signed-off-by: Tianyang Chen <tiche@seas.upenn.edu>
> Signed-off-by: Meng Xu <mengxu@cis.upenn.edu>
> Signed-off-by: Dagaen Golomb <dgolomb@seas.upenn.edu>
> ---
>   xen/common/sched_rt.c |  337 ++++++++++++++++++++++++++++++++++++-------------
>   1 file changed, 251 insertions(+), 86 deletions(-)
>
> diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
> index 2e5430f..1f0bb7b 100644
> --- a/xen/common/sched_rt.c
> +++ b/xen/common/sched_rt.c
> @@ -16,6 +16,7 @@
>   #include <xen/delay.h>
>   #include <xen/event.h>
>   #include <xen/time.h>
> +#include <xen/timer.h>
>   #include <xen/perfc.h>
>   #include <xen/sched-if.h>
>   #include <xen/softirq.h>
> @@ -87,7 +88,7 @@
>   #define RTDS_DEFAULT_BUDGET     (MICROSECS(4000))
>
>   #define UPDATE_LIMIT_SHIFT      10
> -#define MAX_SCHEDULE            (MILLISECS(1))
> +
>   /*
>    * Flags
>    */
> @@ -142,6 +143,12 @@ static cpumask_var_t *_cpumask_scratch;
>    */
>   static unsigned int nr_rt_ops;
>
> +/* handler for the replenishment timer */
> +static void repl_handler(void *data);
> +
> +/* checks if a timer is active or not */
> +bool_t active_timer(struct timer* t);
> +
>   /*
>    * Systme-wide private data, include global RunQueue/DepletedQ
>    * Global lock is referenced by schedule_data.schedule_lock from all
> @@ -152,7 +159,9 @@ struct rt_private {
>       struct list_head sdom;      /* list of availalbe domains, used for dump */
>       struct list_head runq;      /* ordered list of runnable vcpus */
>       struct list_head depletedq; /* unordered list of depleted vcpus */
> +    struct list_head replq;     /* ordered list of vcpus that need replenishment */
>       cpumask_t tickled;          /* cpus been tickled */
> +    struct timer *repl_timer;   /* replenishment timer */
>   };
>
>   /*
> @@ -160,6 +169,7 @@ struct rt_private {
>    */
>   struct rt_vcpu {
>       struct list_head q_elem;    /* on the runq/depletedq list */
> +    struct list_head replq_elem;/* on the repl event list */
>
>       /* Up-pointers */
>       struct rt_dom *sdom;
> @@ -213,8 +223,14 @@ static inline struct list_head *rt_depletedq(const struct scheduler *ops)
>       return &rt_priv(ops)->depletedq;
>   }
>
> +static inline struct list_head *rt_replq(const struct scheduler *ops)
> +{
> +    return &rt_priv(ops)->replq;
> +}
> +
>   /*
> - * Queue helper functions for runq and depletedq
> + * Queue helper functions for runq, depletedq
> + * and replenishment event queue
>    */
>   static int
>   __vcpu_on_q(const struct rt_vcpu *svc)
> @@ -228,6 +244,18 @@ __q_elem(struct list_head *elem)
>       return list_entry(elem, struct rt_vcpu, q_elem);
>   }
>
> +static struct rt_vcpu *
> +__replq_elem(struct list_head *elem)
> +{
> +    return list_entry(elem, struct rt_vcpu, replq_elem);
> +}
> +
> +static int
> +__vcpu_on_replq(const struct rt_vcpu *svc)
> +{
> +   return !list_empty(&svc->replq_elem);
> +}
> +
>   /*
>    * Debug related code, dump vcpu/cpu information
>    */
> @@ -288,7 +316,7 @@ rt_dump_pcpu(const struct scheduler *ops, int cpu)
>   static void
>   rt_dump(const struct scheduler *ops)
>   {
> -    struct list_head *runq, *depletedq, *iter;
> +    struct list_head *runq, *depletedq, *replq, *iter;
>       struct rt_private *prv = rt_priv(ops);
>       struct rt_vcpu *svc;
>       struct rt_dom *sdom;
> @@ -301,6 +329,7 @@ rt_dump(const struct scheduler *ops)
>
>       runq = rt_runq(ops);
>       depletedq = rt_depletedq(ops);
> +    replq = rt_replq(ops);
>
>       printk("Global RunQueue info:\n");
>       list_for_each( iter, runq )
> @@ -316,6 +345,13 @@ rt_dump(const struct scheduler *ops)
>           rt_dump_vcpu(ops, svc);
>       }
>
> +    printk("Global Replenishment Event info:\n");
> +    list_for_each( iter, replq )
> +    {
> +        svc = __replq_elem(iter);
> +        rt_dump_vcpu(ops, svc);
> +    }
> +
>       printk("Domain info:\n");
>       list_for_each( iter, &prv->sdom )
>       {
> @@ -388,6 +424,66 @@ __q_remove(struct rt_vcpu *svc)
>   }
>
>   /*
> + * Removing a vcpu from the replenishment queue could
> + * re-program the timer for the next replenishment event
> + * if the timer is currently active
> + */
> +static inline void
> +__replq_remove(const struct scheduler *ops, struct rt_vcpu *svc)
> +{
> +    struct rt_private *prv = rt_priv(ops);
> +    struct list_head *replq = rt_replq(ops);
> +    struct timer* repl_timer = prv->repl_timer;
> +
> +    if ( __vcpu_on_replq(svc) )
> +    {
> +        /*
> +         * disarm the timer if removing the first replenishment event
> +         * which is going to happen next
> +         */
> +        if( active_timer(repl_timer) )
> +        {
> +            struct rt_vcpu *next_repl = __replq_elem(replq->next);
> +
> +            if( next_repl->cur_deadline == svc->cur_deadline )
> +                repl_timer->expires = 0;
> +
> +            list_del_init(&svc->replq_elem);
> +
> +            /* re-arm the timer for the next replenishment event */
> +            if( !list_empty(replq) )
> +            {
> +                struct rt_vcpu *svc_next = __replq_elem(replq->next);
> +                set_timer(repl_timer, svc_next->cur_deadline);
> +            }
> +        }
> +
> +        else
> +            list_del_init(&svc->replq_elem);
> +    }
> +}
> +
> +/*
> + * An utility function that inserts a vcpu to a
> + * queue based on certain order (EDF)
> + */
> +static void
> +_deadline_queue_insert(struct rt_vcpu * (*_get_q_elem)(struct list_head *elem),
> +    struct rt_vcpu *svc, struct list_head *elem, struct list_head *queue)
> +{
> +    struct list_head *iter;
> +
> +    list_for_each(iter, queue)
> +    {
> +        struct rt_vcpu * iter_svc = (*_get_q_elem)(iter);
> +        if ( svc->cur_deadline <= iter_svc->cur_deadline )
> +                break;
> +    }
> +
> +    list_add_tail(elem, iter);
> +}
> +
> +/*
>    * Insert svc with budget in RunQ according to EDF:
>    * vcpus with smaller deadlines go first.
>    * Insert svc without budget in DepletedQ unsorted;
> @@ -397,7 +493,6 @@ __runq_insert(const struct scheduler *ops, struct rt_vcpu *svc)
>   {
>       struct rt_private *prv = rt_priv(ops);
>       struct list_head *runq = rt_runq(ops);
> -    struct list_head *iter;
>
>       ASSERT( spin_is_locked(&prv->lock) );
>
> @@ -405,22 +500,37 @@ __runq_insert(const struct scheduler *ops, struct rt_vcpu *svc)
>
>       /* add svc to runq if svc still has budget */
>       if ( svc->cur_budget > 0 )
> -    {
> -        list_for_each(iter, runq)
> -        {
> -            struct rt_vcpu * iter_svc = __q_elem(iter);
> -            if ( svc->cur_deadline <= iter_svc->cur_deadline )
> -                    break;
> -         }
> -        list_add_tail(&svc->q_elem, iter);
> -    }
> +        _deadline_queue_insert(&__q_elem, svc, &svc->q_elem, runq);
>       else
> -    {
>           list_add(&svc->q_elem, &prv->depletedq);
> -    }
>   }
>
>   /*
> + * Insert svc into the repl even list:
> + * vcpus that needs to be repl earlier go first.
> + * scheduler private lock serializes this operation
> + * it could re-program the timer if it fires later than
> + * this vcpu's cur_deadline. Also, this is used to program
> + * the timer for the first time.
> + */
> +static void
> +__replq_insert(const struct scheduler *ops, struct rt_vcpu *svc)
> +{
> +    struct list_head *replq = rt_replq(ops);
> +    struct rt_private *prv = rt_priv(ops);
> +    struct timer *repl_timer = prv->repl_timer;
> +
> +    ASSERT( !__vcpu_on_replq(svc) );
> +
> +    _deadline_queue_insert(&__replq_elem, svc, &svc->replq_elem, replq);
> +
> +    if( repl_timer->expires == 0 ||
> +        ( active_timer(repl_timer) && repl_timer->expires > svc->cur_deadline ) )
> +        set_timer(repl_timer,svc->cur_deadline);
> +}
> +
> +
> +/*
>    * Init/Free related code
>    */
>   static int
> @@ -449,11 +559,18 @@ rt_init(struct scheduler *ops)
>       INIT_LIST_HEAD(&prv->sdom);
>       INIT_LIST_HEAD(&prv->runq);
>       INIT_LIST_HEAD(&prv->depletedq);
> +    INIT_LIST_HEAD(&prv->replq);
>
>       cpumask_clear(&prv->tickled);
>
>       ops->sched_data = prv;
>
> +    /*
> +     * The timer initialization will happen later when
> +     * the first pcpu is added to this pool in alloc_pdata
> +     */
> +    prv->repl_timer = NULL;
> +
>       return 0;
>
>    no_mem:
> @@ -473,6 +590,10 @@ rt_deinit(const struct scheduler *ops)
>           xfree(_cpumask_scratch);
>           _cpumask_scratch = NULL;
>       }
> +
> +    kill_timer(prv->repl_timer);
> +    xfree(prv->repl_timer);
> +
>       xfree(prv);
>   }
>
> @@ -493,6 +614,17 @@ rt_alloc_pdata(const struct scheduler *ops, int cpu)
>       if ( !alloc_cpumask_var(&_cpumask_scratch[cpu]) )
>           return NULL;
>
> +    if( prv->repl_timer == NULL )
> +    {
> +        /* allocate the timer on the first cpu of this pool */
> +        prv->repl_timer = xzalloc(struct timer);
> +
> +        if(prv->repl_timer == NULL )
> +            return NULL;
> +
> +        init_timer(prv->repl_timer, repl_handler, (void *)ops, cpu);
> +    }
> +
>       /* 1 indicates alloc. succeed in schedule.c */
>       return (void *)1;
>   }
> @@ -586,6 +718,7 @@ rt_alloc_vdata(const struct scheduler *ops, struct vcpu *vc, void *dd)
>           return NULL;
>
>       INIT_LIST_HEAD(&svc->q_elem);
> +    INIT_LIST_HEAD(&svc->replq_elem);
>       svc->flags = 0U;
>       svc->sdom = dd;
>       svc->vcpu = vc;
> @@ -609,7 +742,8 @@ rt_free_vdata(const struct scheduler *ops, void *priv)
>   }
>
>   /*
> - * This function is called in sched_move_domain() in schedule.c
> + * It is called in sched_move_domain() and sched_init_vcpu
> + * in schedule.c
>    * When move a domain to a new cpupool.
>    * It inserts vcpus of moving domain to the scheduler's RunQ in
>    * dest. cpupool.
> @@ -651,6 +785,10 @@ rt_vcpu_remove(const struct scheduler *ops, struct vcpu *vc)
>       lock = vcpu_schedule_lock_irq(vc);
>       if ( __vcpu_on_q(svc) )
>           __q_remove(svc);
> +
> +    if( __vcpu_on_replq(svc) )
> +        __replq_remove(ops,svc);
> +
>       vcpu_schedule_unlock_irq(lock, vc);
>   }
>
> @@ -785,44 +923,6 @@ __runq_pick(const struct scheduler *ops, const cpumask_t *mask)
>   }
>
>   /*
> - * Update vcpu's budget and
> - * sort runq by insert the modifed vcpu back to runq
> - * lock is grabbed before calling this function
> - */
> -static void
> -__repl_update(const struct scheduler *ops, s_time_t now)
> -{
> -    struct list_head *runq = rt_runq(ops);
> -    struct list_head *depletedq = rt_depletedq(ops);
> -    struct list_head *iter;
> -    struct list_head *tmp;
> -    struct rt_vcpu *svc = NULL;
> -
> -    list_for_each_safe(iter, tmp, runq)
> -    {
> -        svc = __q_elem(iter);
> -        if ( now < svc->cur_deadline )
> -            break;
> -
> -        rt_update_deadline(now, svc);
> -        /* reinsert the vcpu if its deadline is updated */
> -        __q_remove(svc);
> -        __runq_insert(ops, svc);
> -    }
> -
> -    list_for_each_safe(iter, tmp, depletedq)
> -    {
> -        svc = __q_elem(iter);
> -        if ( now >= svc->cur_deadline )
> -        {
> -            rt_update_deadline(now, svc);
> -            __q_remove(svc); /* remove from depleted queue */
> -            __runq_insert(ops, svc); /* add to runq */
> -        }
> -    }
> -}
> -
> -/*
>    * schedule function for rt scheduler.
>    * The lock is already grabbed in schedule.c, no need to lock here
>    */
> @@ -841,7 +941,6 @@ rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched
>       /* burn_budget would return for IDLE VCPU */
>       burn_budget(ops, scurr, now);
>
> -    __repl_update(ops, now);
>
>       if ( tasklet_work_scheduled )
>       {
> @@ -868,6 +967,8 @@ rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched
>           set_bit(__RTDS_delayed_runq_add, &scurr->flags);
>
>       snext->last_start = now;
> +
> +    ret.time =  -1; /* if an idle vcpu is picked */
>       if ( !is_idle_vcpu(snext->vcpu) )
>       {
>           if ( snext != scurr )
> @@ -880,9 +981,11 @@ rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched
>               snext->vcpu->processor = cpu;
>               ret.migrated = 1;
>           }
> +
> +        ret.time = snext->budget; /* invoke the scheduler next time */
> +
>       }
>
> -    ret.time = MIN(snext->budget, MAX_SCHEDULE); /* sched quantum */
>       ret.task = snext->vcpu;
>
>       /* TRACE */
> @@ -914,7 +1017,7 @@ static void
>   rt_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc)
>   {
>       struct rt_vcpu * const svc = rt_vcpu(vc);
> -
> +
>       BUG_ON( is_idle_vcpu(vc) );
>       SCHED_STAT_CRANK(vcpu_sleep);
>
> @@ -924,6 +1027,9 @@ rt_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc)
>           __q_remove(svc);
>       else if ( svc->flags & RTDS_delayed_runq_add )
>           clear_bit(__RTDS_delayed_runq_add, &svc->flags);
> +
> +    if( __vcpu_on_replq(svc) )
> +        __replq_remove(ops, svc);
>   }
>
>   /*
> @@ -1026,10 +1132,6 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu *vc)
>   {
>       struct rt_vcpu * const svc = rt_vcpu(vc);
>       s_time_t now = NOW();
> -    struct rt_private *prv = rt_priv(ops);
> -    struct rt_vcpu *snext = NULL; /* highest priority on RunQ */
> -    struct rt_dom *sdom = NULL;
> -    cpumask_t *online;
>
>       BUG_ON( is_idle_vcpu(vc) );
>
> @@ -1051,6 +1153,18 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu *vc)
>       else
>           SCHED_STAT_CRANK(vcpu_wake_not_runnable);
>
> +    /* budget repl here is needed before inserting back to runq. If so,
> +     * it should be re-inserted back to the replenishment queue.
> +     */
> +    if ( now >= svc->cur_deadline)
> +    {
> +        rt_update_deadline(now, svc);
> +        __replq_remove(ops, svc);
> +    }
> +
> +    if( !__vcpu_on_replq(svc) )
> +        __replq_insert(ops, svc);
> +
>       /* If context hasn't been saved for this vcpu yet, we can't put it on
>        * the Runqueue/DepletedQ. Instead, we set a flag so that it will be
>        * put on the Runqueue/DepletedQ after the context has been saved.
> @@ -1061,22 +1175,10 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu *vc)
>           return;
>       }
>
> -    if ( now >= svc->cur_deadline)
> -        rt_update_deadline(now, svc);
> -
>       /* insert svc to runq/depletedq because svc is not in queue now */
>       __runq_insert(ops, svc);
>
> -    __repl_update(ops, now);
> -
> -    ASSERT(!list_empty(&prv->sdom));
> -    sdom = list_entry(prv->sdom.next, struct rt_dom, sdom_elem);
> -    online = cpupool_domain_cpumask(sdom->dom);
> -    snext = __runq_pick(ops, online); /* pick snext from ALL valid cpus */
> -
> -    runq_tickle(ops, snext);
> -
> -    return;
> +    runq_tickle(ops, svc);
>   }
>
>   /*
> @@ -1087,10 +1189,6 @@ static void
>   rt_context_saved(const struct scheduler *ops, struct vcpu *vc)
>   {
>       struct rt_vcpu *svc = rt_vcpu(vc);
> -    struct rt_vcpu *snext = NULL;
> -    struct rt_dom *sdom = NULL;
> -    struct rt_private *prv = rt_priv(ops);
> -    cpumask_t *online;
>       spinlock_t *lock = vcpu_schedule_lock_irq(vc);
>
>       clear_bit(__RTDS_scheduled, &svc->flags);
> @@ -1102,14 +1200,7 @@ rt_context_saved(const struct scheduler *ops, struct vcpu *vc)
>            likely(vcpu_runnable(vc)) )
>       {
>           __runq_insert(ops, svc);
> -        __repl_update(ops, NOW());
> -
> -        ASSERT(!list_empty(&prv->sdom));
> -        sdom = list_entry(prv->sdom.next, struct rt_dom, sdom_elem);
> -        online = cpupool_domain_cpumask(sdom->dom);
> -        snext = __runq_pick(ops, online); /* pick snext from ALL cpus */
> -
> -        runq_tickle(ops, snext);
> +        runq_tickle(ops, svc);
>       }
>   out:
>       vcpu_schedule_unlock_irq(lock, vc);
> @@ -1168,6 +1259,80 @@ rt_dom_cntl(
>       return rc;
>   }
>
> +/*
> + * The replenishment timer handler picks vcpus
> + * from the replq and does the actual replenishment
> + */
> +static void repl_handler(void *data){
> +    unsigned long flags;
> +    s_time_t now = NOW();
> +    struct scheduler *ops = data;
> +    struct rt_private *prv = rt_priv(ops);
> +    struct list_head *replq = rt_replq(ops);
> +    struct timer *repl_timer = prv->repl_timer;
> +    struct list_head *iter, *tmp;
> +    struct rt_vcpu *svc = NULL;
> +
> +    spin_lock_irqsave(&prv->lock, flags);
> +
> +    stop_timer(repl_timer);
> +
> +    list_for_each_safe(iter, tmp, replq)
> +    {
> +        svc = __replq_elem(iter);
> +
> +        if ( now >= svc->cur_deadline )
> +        {
> +            rt_update_deadline(now, svc);
> +
> +            /*
> +             * when the replenishment happens
> +             * svc is either on a pcpu or on
> +             * runq/depletedq
> +             */
> +            if( __vcpu_on_q(svc) )
> +            {
> +                /* put back to runq */
> +                __q_remove(svc);
> +                __runq_insert(ops, svc);
> +            }
> +
> +            /*
> +             * tickle regardless where it's at
> +             * because a running vcpu could have
> +             * a later deadline than others after
> +             * replenishment
> +             */
> +            runq_tickle(ops, svc);
> +
> +            /* update replenishment event queue */
> +            __replq_remove(ops, svc);
> +            __replq_insert(ops, svc);
> +        }
> +
> +        else
> +            break;
> +    }
> +
> +    /*
> +     * use the vcpu that's on the top
> +     * or else don't program the timer
> +     */
> +    if( !list_empty(replq) )
> +        set_timer(repl_timer, __replq_elem(replq->next)->cur_deadline);
> +
> +    spin_unlock_irqrestore(&prv->lock, flags);
> +
> +}
> +
> +/* checks if a timer has been stopped or not */
> +bool_t active_timer(struct timer *timer)
> +{
> +    ASSERT(timer->status >= TIMER_STATUS_inactive);
> +    ASSERT(timer->status <= TIMER_STATUS_in_list);
> +    return (timer->status >= TIMER_STATUS_in_heap);
> +}
> +
>   static struct rt_private _rt_priv;
>
>   static const struct scheduler sched_rtds_def = {
>
Dario Faggioli Feb. 25, 2016, 2:02 a.m. UTC | #4
Hey,

Here I am, sorry for the delay. :-(

On Mon, 2016-02-08 at 23:33 -0500, Tianyang Chen wrote:
> Changes since v4:
>     removed unnecessary replenishment queue checks in vcpu_wake()
>     extended replq_remove() to all cases in vcpu_sleep()
>     used _deadline_queue_insert() helper function for both queues
>     _replq_insert() and _replq_remove() program timer internally
> 
> Changes since v3:
>     removed running queue.
>     added repl queue to keep track of repl events.
>     timer is now per scheduler.
>     timer is init on a valid cpu in a cpupool.
> 
> Signed-off-by: Tianyang Chen <tiche@seas.upenn.edu>
> Signed-off-by: Meng Xu <mengxu@cis.upenn.edu>
> Signed-off-by: Dagaen Golomb <dgolomb@seas.upenn.edu>
>
So, the actual changelog... why did it disappear? :-)

> diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
> index 2e5430f..1f0bb7b 100644
> --- a/xen/common/sched_rt.c
> +++ b/xen/common/sched_rt.c

> @@ -388,6 +424,66 @@ __q_remove(struct rt_vcpu *svc)
>  }
>  
>  /*
> + * Removing a vcpu from the replenishment queue could
> + * re-program the timer for the next replenishment event
> + * if the timer is currently active
> + */
> +static inline void
> +__replq_remove(const struct scheduler *ops, struct rt_vcpu *svc)
> +{
> +    struct rt_private *prv = rt_priv(ops);
> +    struct list_head *replq = rt_replq(ops);
> +    struct timer* repl_timer = prv->repl_timer;
> +
> +    if ( __vcpu_on_replq(svc) )
> +    {
So, can this be false? If yes, when and why?

> +        /*
> +         * disarm the timer if removing the first replenishment
> event
> +         * which is going to happen next
> +         */
> +        if( active_timer(repl_timer) )
> +        {
>
And here again, isn't it the case that, if there is a vcpu in the
replenishment queue, then the timer _must_ be active? (I.e., if the
above is true, isn't this also always true?)

> +            struct rt_vcpu *next_repl = __replq_elem(replq->next);
> +
> +            if( next_repl->cur_deadline == svc->cur_deadline )
> +                repl_timer->expires = 0;
> +
I think we need to call stop_timer() here, don't we? I know that
set_timer() does that itself, but I think it is indeed necessary. E.g.,
right now, you're manipulating the timer without the timer lock.

> +            list_del_init(&svc->replq_elem);
> +
> +            /* re-arm the timer for the next replenishment event */
> +            if( !list_empty(replq) )
> +            {
> +                struct rt_vcpu *svc_next = __replq_elem(replq-
> >next);
> +                set_timer(repl_timer, svc_next->cur_deadline);
> +            }
> +        }
> +
> +        else
> +            list_del_init(&svc->replq_elem);
>
I don't like the structure of this function, and I'd ask for a
different if/then/else logic to be used, but let's first see --by
answering the questions above-- if some of these if-s can actually go
away. :-)

> +    }
> +}
> +
> +/*
> + * An utility function that inserts a vcpu to a
> + * queue based on certain order (EDF)
>
End long comments with a full stop.

> + */
> +static void
> +_deadline_queue_insert(struct rt_vcpu * (*_get_q_elem)(struct
> list_head *elem),
>
There's really a lot of "underscore-prefixing" in this file.

This is, of course, not your fault, and should not be addressed in this
patch. But, at least when it comes to new code, let's avoid making
things worse.

So, "deadline_queue_insert()" is just ok, I think. Yes, I know I did
suggest to call it how it's called in this patch, underscore included,
but I now think it would be better to get rid of that.

> +    struct rt_vcpu *svc, struct list_head *elem, struct list_head
> *queue)
> +{
> +    struct list_head *iter;
> +
> +    list_for_each(iter, queue)
> +    {
> +        struct rt_vcpu * iter_svc = (*_get_q_elem)(iter);
> +        if ( svc->cur_deadline <= iter_svc->cur_deadline )
> +                break;
>
This looks too much indented.

> @@ -405,22 +500,37 @@ __runq_insert(const struct scheduler *ops,
> struct rt_vcpu *svc)
>  
>      /* add svc to runq if svc still has budget */
>      if ( svc->cur_budget > 0 )
> -    {
> -        list_for_each(iter, runq)
> -        {
> -            struct rt_vcpu * iter_svc = __q_elem(iter);
> -            if ( svc->cur_deadline <= iter_svc->cur_deadline )
> -                    break;
> -         }
> -        list_add_tail(&svc->q_elem, iter);
> -    }
> +        _deadline_queue_insert(&__q_elem, svc, &svc->q_elem, runq);
>      else
> -    {
>          list_add(&svc->q_elem, &prv->depletedq);
> -    }
Can we ASSERT() something about a replenishment event being queued, in
either case?

As I said already, use full words in comments ("Insert svc in the
replenishment timer queue" or "Insert svc in the replenishment events
list").

> + * vcpus that needs to be repl earlier go first.
>
Ditto. And this can just be something like "in replenishment time
order", added to the sentence above.

> + * scheduler private lock serializes this operation
>
This is true for _everything_ in this scheduler right now, so there's
no real need to say it.

> + * it could re-program the timer if it fires later than
> + * this vcpu's cur_deadline. 
>
Do you mean "The timer may be re-programmed if svc is inserted at the
front of the events list" ?

> Also, this is used to program
> + * the timer for the first time.
>
"Also, this is what programs the timer the first time, when called from
xxx"

> + */
> +static void
> +__replq_insert(const struct scheduler *ops, struct rt_vcpu *svc)
> +{
> +    struct list_head *replq = rt_replq(ops);
> +    struct rt_private *prv = rt_priv(ops);
> +    struct timer *repl_timer = prv->repl_timer;
> +
> +    ASSERT( !__vcpu_on_replq(svc) );
> +
> +    _deadline_queue_insert(&__replq_elem, svc, &svc->replq_elem,
> replq);
> +
> +    if( repl_timer->expires == 0 ||
> +        ( active_timer(repl_timer) && repl_timer->expires > svc-
> >cur_deadline ) )
> +        set_timer(repl_timer,svc->cur_deadline);
>
Is it the case that you need to call set_timer() if (and only if) svc
has been inserted at the *front* of the replenishment queue? (If list
was empty, as in the case of first timer activation, it would be the
first and only.)

If that is the case, I'd just make deadline_queue_insert() return a
flag to that effect (e.g., it can return 1 if svc is the new first
element, 0 if it is not), and use the flag itself to guard the (re-)arm 
of the timer... What do you think?

> @@ -651,6 +785,10 @@ rt_vcpu_remove(const struct scheduler *ops,
> struct vcpu *vc)
>      lock = vcpu_schedule_lock_irq(vc);
>      if ( __vcpu_on_q(svc) )
>          __q_remove(svc);
> +
> +    if( __vcpu_on_replq(svc) )
> +        __replq_remove(ops,svc);
> +
As per the code in this patch looks like, you're
checking __vcpu_on_replq(svc) twice. In fact, you do it here and inside
__replq_remove(). I've already said that I'd like for the check inside
__rqplq_remove() to go away so, keep this one here (if it's really
necessary) and kill the other one inside the function.

> @@ -924,6 +1027,9 @@ rt_vcpu_sleep(const struct scheduler *ops,
> struct vcpu *vc)
>          __q_remove(svc);
>      else if ( svc->flags & RTDS_delayed_runq_add )
>          clear_bit(__RTDS_delayed_runq_add, &svc->flags);
> +
> +    if( __vcpu_on_replq(svc) )
> +        __replq_remove(ops, svc);
>
Same here.

> @@ -1051,6 +1153,18 @@ rt_vcpu_wake(const struct scheduler *ops,
> struct vcpu *vc)
>      else
>          SCHED_STAT_CRANK(vcpu_wake_not_runnable);
>  
> +    /* budget repl here is needed before inserting back to runq. If
> so,
>
Comment style.

Also, what do you mean with that "If so"... "If so" what?
> +     * it should be re-inserted back to the replenishment queue.
> +     */
> +    if ( now >= svc->cur_deadline)
> +    {   
> +        rt_update_deadline(now, svc);
> +        __replq_remove(ops, svc);
> +    }
> +
> +    if( !__vcpu_on_replq(svc) )
> +        __replq_insert(ops, svc);
> +    
And here I am again: is it really necessary to check whether svc is not
in the replenishment queue? It looks to me that it really should not be
there... but maybe it can, because we remove the event from the queue
when the vcpu sleeps, but *not* when the vcpu blocks?

> @@ -1168,6 +1259,80 @@ rt_dom_cntl(
>      return rc;
>  }
>  
> +/*
> + * The replenishment timer handler picks vcpus
> + * from the replq and does the actual replenishment
> + */
> +static void repl_handler(void *data){
> +    unsigned long flags;
> +    s_time_t now = NOW();
> +    struct scheduler *ops = data; 
> +    struct rt_private *prv = rt_priv(ops);
> +    struct list_head *replq = rt_replq(ops);
> +    struct timer *repl_timer = prv->repl_timer;
> +    struct list_head *iter, *tmp;
> +    struct rt_vcpu *svc = NULL;
> +
> +    spin_lock_irqsave(&prv->lock, flags);
> +
> +    stop_timer(repl_timer);
> +
> +    list_for_each_safe(iter, tmp, replq)
> +    {
> +        svc = __replq_elem(iter);
> +
> +        if ( now >= svc->cur_deadline )
> +        {
> +            rt_update_deadline(now, svc);
> +
> +            /*
> +             * when the replenishment happens
> +             * svc is either on a pcpu or on
> +             * runq/depletedq
> +             */
> +            if( __vcpu_on_q(svc) )
> +            {
> +                /* put back to runq */
> +                __q_remove(svc);
> +                __runq_insert(ops, svc);
> +            }
> +
> +            /* 
> +             * tickle regardless where it's at 
> +             * because a running vcpu could have
> +             * a later deadline than others after
> +             * replenishment
> +             */
> +            runq_tickle(ops, svc);
> +
> +            /* update replenishment event queue */
> +            __replq_remove(ops, svc);
> +            __replq_insert(ops, svc);
> +        }
> +
> +        else
> +            break;
>
Invert the logic here:

    if ( now < svc->cur_deadline )
        break;

    rt_update_deadline(now, svc);
    ...

Thanks and Regards,
Dario
Tianyang Chen Feb. 25, 2016, 6:15 a.m. UTC | #5
On 2/24/2016 9:02 PM, Dario Faggioli wrote:
> Hey,
>
> Here I am, sorry for the delay. :-(
No problem, I think we are almost there.
>
> On Mon, 2016-02-08 at 23:33 -0500, Tianyang Chen wrote:
>> Changes since v4:
>>      removed unnecessary replenishment queue checks in vcpu_wake()
>>      extended replq_remove() to all cases in vcpu_sleep()
>>      used _deadline_queue_insert() helper function for both queues
>>      _replq_insert() and _replq_remove() program timer internally
>>
>> Changes since v3:
>>      removed running queue.
>>      added repl queue to keep track of repl events.
>>      timer is now per scheduler.
>>      timer is init on a valid cpu in a cpupool.
>>
>> Signed-off-by: Tianyang Chen <tiche@seas.upenn.edu>
>> Signed-off-by: Meng Xu <mengxu@cis.upenn.edu>
>> Signed-off-by: Dagaen Golomb <dgolomb@seas.upenn.edu>
>>
> So, the actual changelog... why did it disappear? :-)

I will add it in the next version.

>> diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
>> index 2e5430f..1f0bb7b 100644
>> --- a/xen/common/sched_rt.c
>> +++ b/xen/common/sched_rt.c
>> @@ -388,6 +424,66 @@ __q_remove(struct rt_vcpu *svc)
>>   }
>>   
>>   /*
>> + * Removing a vcpu from the replenishment queue could
>> + * re-program the timer for the next replenishment event
>> + * if the timer is currently active
>> + */
>> +static inline void
>> +__replq_remove(const struct scheduler *ops, struct rt_vcpu *svc)
>> +{
>> +    struct rt_private *prv = rt_priv(ops);
>> +    struct list_head *replq = rt_replq(ops);
>> +    struct timer* repl_timer = prv->repl_timer;
>> +
>> +    if ( __vcpu_on_replq(svc) )
>> +    {
> So, can this be false? If yes, when and why?
>
>> +        /*
>> +         * disarm the timer if removing the first replenishment
>> event
>> +         * which is going to happen next
>> +         */
>> +        if( active_timer(repl_timer) )
>> +        {
>>
> And here again, isn't it the case that, if there is a vcpu in the
> replenishment queue, then the timer _must_ be active? (I.e., if the
> above is true, isn't this also always true?)

So, the reason for this check is that the code inside timer handler also 
calls this function when
timer is stopped. We don't want to start the timer inside the timer 
handler when it's manipulating
the replenishment queue right? Because it could be removing/adding back 
more than one vcpu
and the timer should only be set at the very end of the timer handler.

In fact, I copied a static function from timer.c just to check if a 
timer is active or not. Not sure if
this is a good practice or not.

>> +            struct rt_vcpu *next_repl = __replq_elem(replq->next);
>> +
>> +            if( next_repl->cur_deadline == svc->cur_deadline )
>> +                repl_timer->expires = 0;
>> +
> I think we need to call stop_timer() here, don't we? I know that
> set_timer() does that itself, but I think it is indeed necessary. E.g.,
> right now, you're manipulating the timer without the timer lock.

So when the timer handler is protected by a global lock, it is still 
necessary to stop the timer?
Also, the timer only runs on a single CPU for a scheduler too.

>> +            list_del_init(&svc->replq_elem);
>> +
>> +            /* re-arm the timer for the next replenishment event */
>> +            if( !list_empty(replq) )
>> +            {
>> +                struct rt_vcpu *svc_next = __replq_elem(replq-
>>> next);
>> +                set_timer(repl_timer, svc_next->cur_deadline);
>> +            }
>> +        }
>> +
>> +        else
>> +            list_del_init(&svc->replq_elem);
>>
> I don't like the structure of this function, and I'd ask for a
> different if/then/else logic to be used, but let's first see --by
> answering the questions above-- if some of these if-s can actually go
> away. :-)

  So this function reduces to:

    
         /*
          * disarm the timer if removing the first replenishment event
          * which is going to happen next
          */
         if( active_timer(repl_timer) )
         {
             stop_timer(replq_timer);
	    repl_timer->expires = 0;

             list_del_init(&svc->replq_elem);

             /* re-arm the timer for the next replenishment event */
             if( !list_empty(replq) )
             {
                 struct rt_vcpu *svc_next = __replq_elem(replq->next);
                 set_timer(repl_timer, svc_next->cur_deadline);
             }
         }

         else
             list_del_init(&svc->replq_elem);


>> +    }
>> +}
>> +
>> +/*
>> + * An utility function that inserts a vcpu to a
>> + * queue based on certain order (EDF)
>>
> End long comments with a full stop.
>
>> + */
>> +static void
>> +_deadline_queue_insert(struct rt_vcpu * (*_get_q_elem)(struct
>> list_head *elem),
>>
> There's really a lot of "underscore-prefixing" in this file.
>
> This is, of course, not your fault, and should not be addressed in this
> patch. But, at least when it comes to new code, let's avoid making
> things worse.
>
> So, "deadline_queue_insert()" is just ok, I think. Yes, I know I did
> suggest to call it how it's called in this patch, underscore included,
> but I now think it would be better to get rid of that.
Sure.
>> +    struct rt_vcpu *svc, struct list_head *elem, struct list_head
>> *queue)
>> +{
>> +    struct list_head *iter;
>> +
>> +    list_for_each(iter, queue)
>> +    {
>> +        struct rt_vcpu * iter_svc = (*_get_q_elem)(iter);
>> +        if ( svc->cur_deadline <= iter_svc->cur_deadline )
>> +                break;
>>
> This looks too much indented.
>
>> @@ -405,22 +500,37 @@ __runq_insert(const struct scheduler *ops,
>> struct rt_vcpu *svc)
>>   
>>       /* add svc to runq if svc still has budget */
>>       if ( svc->cur_budget > 0 )
>> -    {
>> -        list_for_each(iter, runq)
>> -        {
>> -            struct rt_vcpu * iter_svc = __q_elem(iter);
>> -            if ( svc->cur_deadline <= iter_svc->cur_deadline )
>> -                    break;
>> -         }
>> -        list_add_tail(&svc->q_elem, iter);
>> -    }
>> +        _deadline_queue_insert(&__q_elem, svc, &svc->q_elem, runq);
>>       else
>> -    {
>>           list_add(&svc->q_elem, &prv->depletedq);
>> -    }
> Can we ASSERT() something about a replenishment event being queued, in
> either case?
Do you mean asserting if the svc added is on queue after being inserted 
in either case?
> As I said already, use full words in comments ("Insert svc in the
> replenishment timer queue" or "Insert svc in the replenishment events
> list").
>
>> + * vcpus that needs to be repl earlier go first.
>>
> Ditto. And this can just be something like "in replenishment time
> order", added to the sentence above.
>
>> + * scheduler private lock serializes this operation
>>
> This is true for _everything_ in this scheduler right now, so there's
> no real need to say it.
>
>> + * it could re-program the timer if it fires later than
>> + * this vcpu's cur_deadline.
>>
> Do you mean "The timer may be re-programmed if svc is inserted at the
> front of the events list" ?
>
>> Also, this is used to program
>> + * the timer for the first time.
>>
> "Also, this is what programs the timer the first time, when called from
> xxx"
I agree with above and for this one, if svc is going to be in the front 
of the list, it programs the timer.
Or in the other case It sees that repl_timer->expires == 0, which is 
either during system boot or one the only one vcpu
on the replenishment list has been removed, in replq_remove(). So it's 
not called anywhere in such sense.
Maybe "It programs the timer for the first timer, or when the only vcpu 
on replenishment event list has
been removed"?
>> + */
>> +static void
>> +__replq_insert(const struct scheduler *ops, struct rt_vcpu *svc)
>> +{
>> +    struct list_head *replq = rt_replq(ops);
>> +    struct rt_private *prv = rt_priv(ops);
>> +    struct timer *repl_timer = prv->repl_timer;
>> +
>> +    ASSERT( !__vcpu_on_replq(svc) );
>> +
>> +    _deadline_queue_insert(&__replq_elem, svc, &svc->replq_elem,
>> replq);
>> +
>> +    if( repl_timer->expires == 0 ||
>> +        ( active_timer(repl_timer) && repl_timer->expires > svc-
>>> cur_deadline ) )
>> +        set_timer(repl_timer,svc->cur_deadline);
>>
> Is it the case that you need to call set_timer() if (and only if) svc
> has been inserted at the *front* of the replenishment queue? (If list
> was empty, as in the case of first timer activation, it would be the
> first and only.)
>
> If that is the case, I'd just make deadline_queue_insert() return a
> flag to that effect (e.g., it can return 1 if svc is the new first
> element, 0 if it is not), and use the flag itself to guard the (re-)arm
> of the timer... What do you think?
Yeah good call.
>> @@ -651,6 +785,10 @@ rt_vcpu_remove(const struct scheduler *ops,
>> struct vcpu *vc)
>>       lock = vcpu_schedule_lock_irq(vc);
>>       if ( __vcpu_on_q(svc) )
>>           __q_remove(svc);
>> +
>> +    if( __vcpu_on_replq(svc) )
>> +        __replq_remove(ops,svc);
>> +
> As per the code in this patch looks like, you're
> checking __vcpu_on_replq(svc) twice. In fact, you do it here and inside
> __replq_remove(). I've already said that I'd like for the check inside
> __rqplq_remove() to go away so, keep this one here (if it's really
> necessary) and kill the other one inside the function.
>
>> @@ -924,6 +1027,9 @@ rt_vcpu_sleep(const struct scheduler *ops,
>> struct vcpu *vc)
>>           __q_remove(svc);
>>       else if ( svc->flags & RTDS_delayed_runq_add )
>>           clear_bit(__RTDS_delayed_runq_add, &svc->flags);
>> +
>> +    if( __vcpu_on_replq(svc) )
>> +        __replq_remove(ops, svc);
>>
> Same here.
So, what about __q_remove()? It looks like couple places are double 
checking as well.
>> @@ -1051,6 +1153,18 @@ rt_vcpu_wake(const struct scheduler *ops,
>> struct vcpu *vc)
>>       else
>>           SCHED_STAT_CRANK(vcpu_wake_not_runnable);
>>   
>> +    /* budget repl here is needed before inserting back to runq. If
>> so,
>>
> Comment style.
>
> Also, what do you mean with that "If so"... "If so" what?
I mean if the budget is replenished here, then re-insert back. I guess 
it's self-explanatory.
>> +     * it should be re-inserted back to the replenishment queue.
>> +     */
>> +    if ( now >= svc->cur_deadline)
>> +    {
>> +        rt_update_deadline(now, svc);
>> +        __replq_remove(ops, svc);
>> +    }
>> +
>> +    if( !__vcpu_on_replq(svc) )
>> +        __replq_insert(ops, svc);
>> +
> And here I am again: is it really necessary to check whether svc is not
> in the replenishment queue? It looks to me that it really should not be
> there... but maybe it can, because we remove the event from the queue
> when the vcpu sleeps, but *not* when the vcpu blocks?
Yeah. That is the case where I keep getting assertion failure if it's 
removed. I'm thinking when
a vcpu unblocks, it could potentially fall through here. And like you 
said, mostly spurious sleep
happens when a vcpu is running and it could happen in other cases, 
although rare.

Thanks,
Tianyang
Dario Faggioli Feb. 25, 2016, 10:34 a.m. UTC | #6
On Thu, 2016-02-25 at 01:15 -0500, Tianyang Chen wrote:
> 
> On 2/24/2016 9:02 PM, Dario Faggioli wrote:
> > Hey,
> > 
> > Here I am, sorry for the delay. :-(
> No problem, I think we are almost there.
>
We probably are, although a few more adjustments in the timer logic is
required, IMO.

I'll try to be quick in replying to next rounds. :-)

> > On Mon, 2016-02-08 at 23:33 -0500, Tianyang Chen wrote:
> > > 
> > > diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
> > > index 2e5430f..1f0bb7b 100644
> > > --- a/xen/common/sched_rt.c
> > > +++ b/xen/common/sched_rt.c
> > > @@ -388,6 +424,66 @@ __q_remove(struct rt_vcpu *svc)
> > >   }
> > >   
> > >   /*
> > > + * Removing a vcpu from the replenishment queue could
> > > + * re-program the timer for the next replenishment event
> > > + * if the timer is currently active
> > > + */
> > > +static inline void
> > > +__replq_remove(const struct scheduler *ops, struct rt_vcpu *svc)
> > > +{
> > > +    struct rt_private *prv = rt_priv(ops);
> > > +    struct list_head *replq = rt_replq(ops);
> > > +    struct timer* repl_timer = prv->repl_timer;
> > > +
> > > +    if ( __vcpu_on_replq(svc) )
> > > +    {
> > So, can this be false? If yes, when and why?
> > 
> > > +        /*
> > > +         * disarm the timer if removing the first replenishment
> > > event
> > > +         * which is going to happen next
> > > +         */
> > > +        if( active_timer(repl_timer) )
> > > +        {
> > > 
> > And here again, isn't it the case that, if there is a vcpu in the
> > replenishment queue, then the timer _must_ be active? (I.e., if the
> > above is true, isn't this also always true?)
> 
> So, the reason for this check is that the code inside timer handler
> also 
> calls this function when
> timer is stopped. We don't want to start the timer inside the timer 
> handler when it's manipulating
> the replenishment queue right? 
>
So, this function and the timer handler are serialized by the global
RTDS scheduler lock, so I don't see chances for races due to concurrent
execution of them from different pcpus.

And for the case when the timer handler calls this, if that is the
*only* reason why you put this check here, I think it's overkill.
Perhaps this can be dealt with in the timer handler, by just doing the
list_del_init() itself (ugly, but maybe with a comment...). Or we can
make __replq_inset() able to deal with re-insertions, i.e., cases when
the vcpu was already on the replenishment queue, and we're basically
changing it's position (slightly better, but we loose the ASSERT() in
__replq_insert() itself). Or we can even split this function in two,
one (e.g., _replq_remove()) only doing the actual dequeueing and
another one (e.g., replq_remove()) calling _replq_remove() for the
dequeueing and then dealing with the timer (and in this case, the timer
handler can just call _replq_remove(), as, in that case, the
reprogramming will be later anyway.

I can't be sure which solution would look and work best without
actually seeing them... I guess, pick one up and give it a try.

> Because it could be removing/adding back 
> more than one vcpu
> and the timer should only be set at the very end of the timer
> handler.
> 
Ok, so that's why there's a similar check (which I also did not like)
in __replq_insert() then, I see it now.

> In fact, I copied a static function from timer.c just to check if a 
> timer is active or not. Not sure if
> this is a good practice or not.
> 
Yes, I wanted to comment on that too, but then forgot.

Indeed it is *not* a good practise. If you need anything like that, you
should expose the original function (by making it !static and adding it
to an header). However, all the times that something like that happens,
it means that we're abusing the intended API for a particular
subsystem, and should serve as a warning for thinking twice whether
there really is not another way of doing things (of course, there are
cases where there aren't, but this should not be one of those).

> > > +            struct rt_vcpu *next_repl = __replq_elem(replq-
> > > >next);
> > > +
> > > +            if( next_repl->cur_deadline == svc->cur_deadline )
> > > +                repl_timer->expires = 0;
> > > +
> > I think we need to call stop_timer() here, don't we? I know that
> > set_timer() does that itself, but I think it is indeed necessary.
> > E.g.,
> > right now, you're manipulating the timer without the timer lock.
> 
> So when the timer handler is protected by a global lock, it is still 
> necessary to stop the timer?
>
What do you mean with "a global lock"? Timers are maintainer in the
per-cpu heap. timer->expires is what's used to keep the heap... well..
an heap, not to mention that stopping a timer may call for some action
on the timers of a cpu (e.g., look at deactivate_timer() and at the
fact that it may or may actually raise TIMER_SOFTIQ).

So, stopping and starting a timer needs to be done with the proper
timer API calls, because the timer infrastructure needs to know that,
and the key of the heap can't be just changed behind the timer
infrastructure's back, and without the timer lock (check, in the whole
xen source code, how often that happens, outside of timer.c: basically
never! :-D).

> Also, the timer only runs on a single CPU for a scheduler too.
> 
That's how all timer works, and I don't see how that matters here.

>   So this function reduces to:
>     
>          /*
>           * disarm the timer if removing the first replenishment
> event
>           * which is going to happen next
>           */
>          if( active_timer(repl_timer) )
>
This needs to somehow go away.

>          {
>              stop_timer(replq_timer);
> 	    repl_timer->expires = 0;
>
And you shouldn't need to explicitly set this to 0. It's stopped, so it
won't fire, and that's it.

>              list_del_init(&svc->replq_elem);
> 
>              /* re-arm the timer for the next replenishment event */
>              if( !list_empty(replq) )
>              {
>                  struct rt_vcpu *svc_next = __replq_elem(replq-
> >next);
>                  set_timer(repl_timer, svc_next->cur_deadline);
>              }
>
BTW, here, if we're coming from the timer handler, it is certainly the
case that we are removing one or more vcpus from the front of the
replenishment queue, including the one that actually made the timer
trigger. Therefore, it's ok to restart the timer, if there are
replenishment events left, and the one that now become the front of the
queue, is the one we want to use for reprogramming.

But, if we come from, say, sleep, and we've removed a vcpu the
replenishment event for which was, say, at 5th place in the queue,
there should be no need to re-program the timer, should it?

So, it looks to me that this function can also use some hints on
whether the timer should really be re-programmed or not.

> @@ -405,22 +500,37 @@ __runq_insert(const struct scheduler *ops,
> > > struct rt_vcpu *svc)
> > >   
> > >       /* add svc to runq if svc still has budget */
> > >       if ( svc->cur_budget > 0 )
> > > -    {
> > > -        list_for_each(iter, runq)
> > > -        {
> > > -            struct rt_vcpu * iter_svc = __q_elem(iter);
> > > -            if ( svc->cur_deadline <= iter_svc->cur_deadline )
> > > -                    break;
> > > -         }
> > > -        list_add_tail(&svc->q_elem, iter);
> > > -    }
> > > +        _deadline_queue_insert(&__q_elem, svc, &svc->q_elem,
> > > runq);
> > >       else
> > > -    {
> > >           list_add(&svc->q_elem, &prv->depletedq);
> > > -    }
> > Can we ASSERT() something about a replenishment event being queued,
> > in
> > either case?
> Do you mean asserting if the svc added is on queue after being
> inserted 
> in either case?
>
Not sure... I'm thinking whether it is an invariant that, if a vcpu is
inserted in either the runqueue or the depletedq, there should be a
replenighment event queued for it. In both cases, it would be the event
that will give the vcpu itself its full budget, no matter whether it
has still some available (in the former case) or has already run out of
it (in the latter case).

So, if that is the case, I think we should put down an
ASSERT(__vcpu_on_replq(svc)) (or something like that).

> > > + * it could re-program the timer if it fires later than
> > > + * this vcpu's cur_deadline.
> > > 
> > Do you mean "The timer may be re-programmed if svc is inserted at
> > the
> > front of the events list" ?
> > 
> > > Also, this is used to program
> > > + * the timer for the first time.
> > > 
> > "Also, this is what programs the timer the first time, when called
> > from
> > xxx"
> I agree with above and for this one, if svc is going to be in the
> front 
> of the list, it programs the timer.
>
Good. :-)

> Or in the other case It sees that repl_timer->expires == 0, which is 
> either during system boot or one the only one vcpu
> on the replenishment list has been removed, in replq_remove(). So
> it's 
> not called anywhere in such sense.
>
Sorry, I don't get this last part... probably because I really don't
like (and find it rather hard to follow) this reasoning about expires
being (artificially) 0. I really think we should get rid of that part.

> Maybe "It programs the timer for the first timer, or when the only
> vcpu 
> on replenishment event list has
> been removed"?
>
Again, what does it mean "or when the only vcpu on replenishment event
list has been removed"? If there is no one in the replenishment queue,
why do we need to reprogram? If it's for us, why we're not in the
replenishment queue? And if this is about some interdependency between
__replq_remove() and __replq_insert() in some special call sites, such
special case needs to be dealt with locally, in that call site, not by
having obscure counter measures here, for which any user of this
function pays the price (in terms of both performance and code being
harder to understand).

> > > @@ -651,6 +785,10 @@ rt_vcpu_remove(const struct scheduler *ops,
> > > struct vcpu *vc)
> > >       lock = vcpu_schedule_lock_irq(vc);
> > >       if ( __vcpu_on_q(svc) )
> > >           __q_remove(svc);
> > > +
> > > +    if( __vcpu_on_replq(svc) )
> > > +        __replq_remove(ops,svc);
> > > +
> > As per the code in this patch looks like, you're
> > checking __vcpu_on_replq(svc) twice. In fact, you do it here and
> > inside
> > __replq_remove(). I've already said that I'd like for the check
> > inside
> > __rqplq_remove() to go away so, keep this one here (if it's really
> > necessary) and kill the other one inside the function.
> > 
> > > @@ -924,6 +1027,9 @@ rt_vcpu_sleep(const struct scheduler *ops,
> > > struct vcpu *vc)
> > >           __q_remove(svc);
> > >       else if ( svc->flags & RTDS_delayed_runq_add )
> > >           clear_bit(__RTDS_delayed_runq_add, &svc->flags);
> > > +
> > > +    if( __vcpu_on_replq(svc) )
> > > +        __replq_remove(ops, svc);
> > > 
> > Same here.
> So, what about __q_remove()? It looks like couple places are double 
> checking as well.
>
Indeed they are. They're bugs. :-D

Not in the sense that they make the code explode, or misbehave, of
course, but I still dislike them rather passionately. If, after this
patch will be done and checked in, you guys would want to take a stab
at sanitizing that too, it would be awesome. :-)


> > > @@ -1051,6 +1153,18 @@ rt_vcpu_wake(const struct scheduler *ops,
> > > struct vcpu *vc)
> > >       else
> > >           SCHED_STAT_CRANK(vcpu_wake_not_runnable);
> > >   
> > > +    /* budget repl here is needed before inserting back to runq.
> > > If
> > > so,
> > > 
> > Comment style.
> > 
> > Also, what do you mean with that "If so"... "If so" what?
> I mean if the budget is replenished here, then re-insert back. I
> guess 
> it's self-explanatory.
>
I still can parse that. Maybe:

"If deadline passed while svc was asleep/blocked, we need new
scheduling parameters (a new deadline and full budget), and we also
need to schedule a new replenishment event, according to them."

But then again, I think we should try to enforce the fact that, if a
vcpu is not runnable, there's no pending replenishment for it, and make
it an invariant we can throw ASSERT() against.

We're already doing it for sleeps, after all...

> > > +     * it should be re-inserted back to the replenishment queue.
> > > +     */
> > > +    if ( now >= svc->cur_deadline)
> > > +    {
> > > +        rt_update_deadline(now, svc);
> > > +        __replq_remove(ops, svc);
> > > +    }
> > > +
> > > +    if( !__vcpu_on_replq(svc) )
> > > +        __replq_insert(ops, svc);
> > > +
> > And here I am again: is it really necessary to check whether svc is
> > not
> > in the replenishment queue? It looks to me that it really should
> > not be
> > there... but maybe it can, because we remove the event from the
> > queue
> > when the vcpu sleeps, but *not* when the vcpu blocks?
> Yeah. That is the case where I keep getting assertion failure if
> it's 
> removed. 
>
Which one ASSERT() fails?

> I'm thinking when
> a vcpu unblocks, it could potentially fall through here. 
>
Well, when unblocking, wake() is certainly called, yes.

> And like you 
> said, mostly spurious sleep
> happens when a vcpu is running and it could happen in other cases, 
> although rare.
> 
I think I said already there's no such thing as "spurious sleep". Or at
least, I can't think of anything that I would define a spurious sleep,
if you do, please, explain what situation you're referring to.

In any case, one way of dealing with vcpus blocking/offlining/etc could
be to, in context_saved(), in case we are not adding the vcpu back to
the runq, cancel its replenishment event with __replq_remove().

(This may make it possible to avoid doing it in rt_vcpu_sleep() too,
but you'll need to check and test.)

Can you give this a try.

Thanks and Regards,
Dario
Tianyang Chen Feb. 25, 2016, 5:29 p.m. UTC | #7
On 2/25/2016 5:34 AM, Dario Faggioli wrote:
>>>> +     * it should be re-inserted back to the replenishment queue.
>>>> +     */
>>>> +    if ( now >= svc->cur_deadline)
>>>> +    {
>>>> +        rt_update_deadline(now, svc);
>>>> +        __replq_remove(ops, svc);
>>>> +    }
>>>> +
>>>> +    if( !__vcpu_on_replq(svc) )
>>>> +        __replq_insert(ops, svc);
>>>> +
>>> And here I am again: is it really necessary to check whether svc is
>>> not
>>> in the replenishment queue? It looks to me that it really should
>>> not be
>>> there... but maybe it can, because we remove the event from the
>>> queue
>>> when the vcpu sleeps, but *not* when the vcpu blocks?
>> Yeah. That is the case where I keep getting assertion failure if
>> it's
>> removed.
>>
> Which one ASSERT() fails?
>
The replq_insert() fails because it's already on the replenishment queue 
when rt_vcpu_wake() is trying to insert a vcpu again.

(XEN) Assertion '!__vcpu_on_replq(svc)' failed at sched_rt.c:527
(XEN) ----[ Xen-4.7-unstable  x86_64  debug=y  Tainted:    C ]----
(XEN) CPU:    0
(XEN) RIP:    e008:[<ffff82d08012a003>] sched_rt.c#rt_vcpu_wake+0xf0/0x17f
(XEN) RFLAGS: 0000000000010002   CONTEXT: hypervisor (d0v0)
(XEN) rax: 0000000000000001   rbx: ffff83023b522940   rcx: 0000000000000001
(XEN) rdx: 00000031bb1b9980   rsi: ffff82d080342318   rdi: ffff83023b486ca0
(XEN) rbp: ffff8300bfcffd88   rsp: ffff8300bfcffd58   r8:  0000000000000004
(XEN) r9:  00000000deadbeef   r10: ffff82d08025f5c0   r11: 0000000000000206
(XEN) r12: ffff83023b486ca0   r13: ffff8300bfd46000   r14: ffff82d080299b80
(XEN) r15: ffff83023b522d80   cr0: 0000000080050033   cr4: 00000000000406a0
(XEN) cr3: 0000000231c0d000   cr2: ffff880001e80ba8
(XEN) ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: e010   cs: e008
(XEN) Xen stack trace from rsp=ffff8300bfcffd58:
(XEN)    ffff8300bfcffd70 ffff8300bfd46000 0000000216110572 ffff83023b522940
(XEN)    ffff82d08032bc00 0000000000000282 ffff8300bfcffdd8 ffff82d08012be0c
(XEN)    ffff83023b4b5000 ffff83023b4f1000 ffff8300bfd47000 ffff8300bfd46000
(XEN)    0000000000000000 ffff83023b4b4280 0000000000014440 0000000000000001
(XEN)    ffff8300bfcffde8 ffff82d08012c327 ffff8300bfcffe08 ffff82d080169cea
(XEN)    ffff83023b4b5000 000000000000000a ffff8300bfcffe18 ffff82d080169d65
(XEN)    ffff8300bfcffe38 ffff82d08010762a ffff83023b4b4280 ffff83023b4b5000
(XEN)    ffff8300bfcffe68 ffff82d08010822a ffff8300bfcffe68 fffffffffffffff2
(XEN)    ffff88022056dcb4 ffff880230c34440 ffff8300bfcffef8 ffff82d0801096fc
(XEN)    ffff8300bfcffef8 ffff8300bfcfff18 ffff8300bfcffef8 ffff82d080240e85
(XEN)    ffff880200000001 0000000000000000 0000000000000246 ffffffff810013aa
(XEN)    000000000000000a ffffffff810013aa 000000000000e030 ffff8300bfd47000
(XEN)    ffff8802206597f0 ffff880230c34440 0000000000014440 0000000000000001
(XEN)    00007cff403000c7 ffff82d0802439e2 ffffffff8100140a 0000000000000020
(XEN)    ffff88022063c7d0 ffff88022063c7d0 0000000000000001 000000000000dca0
(XEN)    ffff88022056dcb8 ffff880230c34440 0000000000000206 0000000000000004
(XEN)    ffff8802230001a0 ffff880220619000 0000000000000020 ffffffff8100140a
(XEN)    0000000000000000 ffff88022056dcb4 0000000000000004 0001010000000000
(XEN)    ffffffff8100140a 000000000000e033 0000000000000206 ffff88022056dc90
(XEN)    000000000000e02b 0000000000000000 0000000000000000 0000000000000000
(XEN) Xen call trace:
(XEN)    [<ffff82d08012a003>] sched_rt.c#rt_vcpu_wake+0xf0/0x17f
(XEN)    [<ffff82d08012be0c>] vcpu_wake+0x213/0x3d4
(XEN)    [<ffff82d08012c327>] vcpu_unblock+0x4b/0x4d
(XEN)    [<ffff82d080169cea>] vcpu_kick+0x20/0x6f
(XEN)    [<ffff82d080169d65>] vcpu_mark_events_pending+0x2c/0x2f
(XEN)    [<ffff82d08010762a>] event_2l.c#evtchn_2l_set_pending+0xa9/0xb9
(XEN)    [<ffff82d08010822a>] evtchn_send+0x158/0x183
(XEN)    [<ffff82d0801096fc>] do_event_channel_op+0xe21/0x147d
(XEN)    [<ffff82d0802439e2>] lstar_enter+0xe2/0x13c
(XEN)
(XEN)
(XEN) ****************************************
(XEN) Panic on CPU 0:
(XEN) Assertion '!__vcpu_on_replq(svc)' failed at sched_rt.c:527
(XEN) ****************************************
>> I'm thinking when
>> a vcpu unblocks, it could potentially fall through here.
>>
> Well, when unblocking, wake() is certainly called, yes.
>
>> And like you
>> said, mostly spurious sleep
>> happens when a vcpu is running and it could happen in other cases,
>> although rare.
>>
> I think I said already there's no such thing as "spurious sleep". Or at
> least, I can't think of anything that I would define a spurious sleep,
> if you do, please, explain what situation you're referring to.
>
I meant to say spurious wakeup... If rt_vcpu_sleep() removes vcpus from 
replenishment queue, it's perfectly safe for rt_vcpu_wake() to insert 
them back. So, I'm suspecting it's the spurious wakeup that's causing 
troubles because vcpus are not removed prior to rt_vcpu_wake(). However, 
the two RETURNs at the beginning of rt_vcpu_wake() should catch that 
shouldn't it?
> In any case, one way of dealing with vcpus blocking/offlining/etc could
> be to, in context_saved(), in case we are not adding the vcpu back to
> the runq, cancel its replenishment event with __replq_remove().
>
> (This may make it possible to avoid doing it in rt_vcpu_sleep() too,
> but you'll need to check and test.)
>
> Can you give this a try.
That makes sense. Doing it in context_saved() kinda implies that if a 
vcpu is sleeping and taken off, its replenishment event should be 
removed. On the other hand, the logic is the same as removing it in 
rt_vcpu_sleep() but just at different times. Well, I have tried it and 
the check still needs to be there in rt_vcpu_wake(). I will send the 
next version so it's easier to look at.

Thanks,
Tianyang
Dario Faggioli Feb. 25, 2016, 5:51 p.m. UTC | #8
On Thu, 2016-02-25 at 12:29 -0500, Tianyang Chen wrote:
> On 2/25/2016 5:34 AM, Dario Faggioli wrote:
> > > 
> > Which one ASSERT() fails?
> > 
> The replq_insert() fails because it's already on the replenishment
> queue 
> when rt_vcpu_wake() is trying to insert a vcpu again.

> (XEN) Xen call trace:
> (XEN)    [<ffff82d08012a003>] sched_rt.c#rt_vcpu_wake+0xf0/0x17f
> (XEN)    [<ffff82d08012be0c>] vcpu_wake+0x213/0x3d4
> (XEN)    [<ffff82d08012c327>] vcpu_unblock+0x4b/0x4d
> (XEN)    [<ffff82d080169cea>] vcpu_kick+0x20/0x6f
> (XEN)    [<ffff82d080169d65>] vcpu_mark_events_pending+0x2c/0x2f
> (XEN)    [<ffff82d08010762a>]
> event_2l.c#evtchn_2l_set_pending+0xa9/0xb9
> (XEN)    [<ffff82d08010822a>] evtchn_send+0x158/0x183
> (XEN)    [<ffff82d0801096fc>] do_event_channel_op+0xe21/0x147d
> (XEN)    [<ffff82d0802439e2>] lstar_enter+0xe2/0x13c
> (XEN)
> (XEN)
> (XEN) ****************************************
> (XEN) Panic on CPU 0:
> (XEN) Assertion '!__vcpu_on_replq(svc)' failed at sched_rt.c:527
> (XEN) ****************************************
>
Ok, makes sense.

> > > And like you
> > > said, mostly spurious sleep
> > > happens when a vcpu is running and it could happen in other
> > > cases,
> > > although rare.
> > > 
> > I think I said already there's no such thing as "spurious sleep".
> > Or at
> > least, I can't think of anything that I would define a spurious
> > sleep,
> > if you do, please, explain what situation you're referring to.
> > 
> I meant to say spurious wakeup... 
>
If, for spurious wakeup, we refer to wakeups happening when the vcpu is
either running or runnable (and hence in the runqueue already), which I
do, we don't even get to call __replq_insert() in those cases. I mean,
we leave rt_vcpu_wake() before that, don't we?

> If rt_vcpu_sleep() removes vcpus from 
> replenishment queue, it's perfectly safe for rt_vcpu_wake() to
> insert 
> them back. 
>
It's safe against sleeps, not against blockings. That's the point I'm
trying to make.

> So, I'm suspecting it's the spurious wakeup that's causing 
> troubles because vcpus are not removed prior to rt_vcpu_wake().
> However, 
> the two RETURNs at the beginning of rt_vcpu_wake() should catch that 
> shouldn't it?
>
Exactly!

> > In any case, one way of dealing with vcpus blocking/offlining/etc
> > could
> > be to, in context_saved(), in case we are not adding the vcpu back
> > to
> > the runq, cancel its replenishment event with __replq_remove().
> > 
> > (This may make it possible to avoid doing it in rt_vcpu_sleep()
> > too,
> > but you'll need to check and test.)
> > 
> > Can you give this a try.
> That makes sense. Doing it in context_saved() kinda implies that if
> a 
> vcpu is sleeping and taken off, its replenishment event should be 
> removed. On the other hand, the logic is the same as removing it in 
> rt_vcpu_sleep() but just at different times.
>
It is, but, if done properly, it catches more cases, or at least that's
what I'm after.

> Well, I have tried it and 
> the check still needs to be there in rt_vcpu_wake(). I will send the 
> next version so it's easier to look at.
> 
If you're still seeing assertion failures, perhaps context_saved() is
not the right place where to do that... I'll think more about this...

Anyway, yes, let's see the code. Please, also send again, as a follow
up email, the assertion failure log you get if you remove the check.

Thanks and Regards,
Dario
diff mbox

Patch

diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
index 2e5430f..1f0bb7b 100644
--- a/xen/common/sched_rt.c
+++ b/xen/common/sched_rt.c
@@ -16,6 +16,7 @@ 
 #include <xen/delay.h>
 #include <xen/event.h>
 #include <xen/time.h>
+#include <xen/timer.h>
 #include <xen/perfc.h>
 #include <xen/sched-if.h>
 #include <xen/softirq.h>
@@ -87,7 +88,7 @@ 
 #define RTDS_DEFAULT_BUDGET     (MICROSECS(4000))
 
 #define UPDATE_LIMIT_SHIFT      10
-#define MAX_SCHEDULE            (MILLISECS(1))
+
 /*
  * Flags
  */
@@ -142,6 +143,12 @@  static cpumask_var_t *_cpumask_scratch;
  */
 static unsigned int nr_rt_ops;
 
+/* handler for the replenishment timer */
+static void repl_handler(void *data);
+
+/* checks if a timer is active or not */
+bool_t active_timer(struct timer* t);
+
 /*
  * Systme-wide private data, include global RunQueue/DepletedQ
  * Global lock is referenced by schedule_data.schedule_lock from all
@@ -152,7 +159,9 @@  struct rt_private {
     struct list_head sdom;      /* list of availalbe domains, used for dump */
     struct list_head runq;      /* ordered list of runnable vcpus */
     struct list_head depletedq; /* unordered list of depleted vcpus */
+    struct list_head replq;     /* ordered list of vcpus that need replenishment */
     cpumask_t tickled;          /* cpus been tickled */
+    struct timer *repl_timer;   /* replenishment timer */
 };
 
 /*
@@ -160,6 +169,7 @@  struct rt_private {
  */
 struct rt_vcpu {
     struct list_head q_elem;    /* on the runq/depletedq list */
+    struct list_head replq_elem;/* on the repl event list */
 
     /* Up-pointers */
     struct rt_dom *sdom;
@@ -213,8 +223,14 @@  static inline struct list_head *rt_depletedq(const struct scheduler *ops)
     return &rt_priv(ops)->depletedq;
 }
 
+static inline struct list_head *rt_replq(const struct scheduler *ops)
+{
+    return &rt_priv(ops)->replq;
+}
+
 /*
- * Queue helper functions for runq and depletedq
+ * Queue helper functions for runq, depletedq
+ * and replenishment event queue
  */
 static int
 __vcpu_on_q(const struct rt_vcpu *svc)
@@ -228,6 +244,18 @@  __q_elem(struct list_head *elem)
     return list_entry(elem, struct rt_vcpu, q_elem);
 }
 
+static struct rt_vcpu *
+__replq_elem(struct list_head *elem)
+{
+    return list_entry(elem, struct rt_vcpu, replq_elem);
+}
+
+static int
+__vcpu_on_replq(const struct rt_vcpu *svc)
+{
+   return !list_empty(&svc->replq_elem);
+}
+
 /*
  * Debug related code, dump vcpu/cpu information
  */
@@ -288,7 +316,7 @@  rt_dump_pcpu(const struct scheduler *ops, int cpu)
 static void
 rt_dump(const struct scheduler *ops)
 {
-    struct list_head *runq, *depletedq, *iter;
+    struct list_head *runq, *depletedq, *replq, *iter;
     struct rt_private *prv = rt_priv(ops);
     struct rt_vcpu *svc;
     struct rt_dom *sdom;
@@ -301,6 +329,7 @@  rt_dump(const struct scheduler *ops)
 
     runq = rt_runq(ops);
     depletedq = rt_depletedq(ops);
+    replq = rt_replq(ops);
 
     printk("Global RunQueue info:\n");
     list_for_each( iter, runq )
@@ -316,6 +345,13 @@  rt_dump(const struct scheduler *ops)
         rt_dump_vcpu(ops, svc);
     }
 
+    printk("Global Replenishment Event info:\n");
+    list_for_each( iter, replq )
+    {
+        svc = __replq_elem(iter);
+        rt_dump_vcpu(ops, svc);
+    }
+
     printk("Domain info:\n");
     list_for_each( iter, &prv->sdom )
     {
@@ -388,6 +424,66 @@  __q_remove(struct rt_vcpu *svc)
 }
 
 /*
+ * Removing a vcpu from the replenishment queue could
+ * re-program the timer for the next replenishment event
+ * if the timer is currently active
+ */
+static inline void
+__replq_remove(const struct scheduler *ops, struct rt_vcpu *svc)
+{
+    struct rt_private *prv = rt_priv(ops);
+    struct list_head *replq = rt_replq(ops);
+    struct timer* repl_timer = prv->repl_timer;
+
+    if ( __vcpu_on_replq(svc) )
+    {
+        /*
+         * disarm the timer if removing the first replenishment event
+         * which is going to happen next
+         */
+        if( active_timer(repl_timer) )
+        {
+            struct rt_vcpu *next_repl = __replq_elem(replq->next);
+
+            if( next_repl->cur_deadline == svc->cur_deadline )
+                repl_timer->expires = 0;
+
+            list_del_init(&svc->replq_elem);
+
+            /* re-arm the timer for the next replenishment event */
+            if( !list_empty(replq) )
+            {
+                struct rt_vcpu *svc_next = __replq_elem(replq->next);
+                set_timer(repl_timer, svc_next->cur_deadline);
+            }
+        }
+
+        else
+            list_del_init(&svc->replq_elem);
+    }
+}
+
+/*
+ * An utility function that inserts a vcpu to a
+ * queue based on certain order (EDF)
+ */
+static void
+_deadline_queue_insert(struct rt_vcpu * (*_get_q_elem)(struct list_head *elem),
+    struct rt_vcpu *svc, struct list_head *elem, struct list_head *queue)
+{
+    struct list_head *iter;
+
+    list_for_each(iter, queue)
+    {
+        struct rt_vcpu * iter_svc = (*_get_q_elem)(iter);
+        if ( svc->cur_deadline <= iter_svc->cur_deadline )
+                break;
+    }
+
+    list_add_tail(elem, iter);
+}
+
+/*
  * Insert svc with budget in RunQ according to EDF:
  * vcpus with smaller deadlines go first.
  * Insert svc without budget in DepletedQ unsorted;
@@ -397,7 +493,6 @@  __runq_insert(const struct scheduler *ops, struct rt_vcpu *svc)
 {
     struct rt_private *prv = rt_priv(ops);
     struct list_head *runq = rt_runq(ops);
-    struct list_head *iter;
 
     ASSERT( spin_is_locked(&prv->lock) );
 
@@ -405,22 +500,37 @@  __runq_insert(const struct scheduler *ops, struct rt_vcpu *svc)
 
     /* add svc to runq if svc still has budget */
     if ( svc->cur_budget > 0 )
-    {
-        list_for_each(iter, runq)
-        {
-            struct rt_vcpu * iter_svc = __q_elem(iter);
-            if ( svc->cur_deadline <= iter_svc->cur_deadline )
-                    break;
-         }
-        list_add_tail(&svc->q_elem, iter);
-    }
+        _deadline_queue_insert(&__q_elem, svc, &svc->q_elem, runq);
     else
-    {
         list_add(&svc->q_elem, &prv->depletedq);
-    }
 }
 
 /*
+ * Insert svc into the repl even list:
+ * vcpus that needs to be repl earlier go first.
+ * scheduler private lock serializes this operation
+ * it could re-program the timer if it fires later than
+ * this vcpu's cur_deadline. Also, this is used to program
+ * the timer for the first time.
+ */
+static void
+__replq_insert(const struct scheduler *ops, struct rt_vcpu *svc)
+{
+    struct list_head *replq = rt_replq(ops);
+    struct rt_private *prv = rt_priv(ops);
+    struct timer *repl_timer = prv->repl_timer;
+
+    ASSERT( !__vcpu_on_replq(svc) );
+
+    _deadline_queue_insert(&__replq_elem, svc, &svc->replq_elem, replq);
+
+    if( repl_timer->expires == 0 ||
+        ( active_timer(repl_timer) && repl_timer->expires > svc->cur_deadline ) )
+        set_timer(repl_timer,svc->cur_deadline);
+}
+
+
+/*
  * Init/Free related code
  */
 static int
@@ -449,11 +559,18 @@  rt_init(struct scheduler *ops)
     INIT_LIST_HEAD(&prv->sdom);
     INIT_LIST_HEAD(&prv->runq);
     INIT_LIST_HEAD(&prv->depletedq);
+    INIT_LIST_HEAD(&prv->replq);
 
     cpumask_clear(&prv->tickled);
 
     ops->sched_data = prv;
 
+    /* 
+     * The timer initialization will happen later when 
+     * the first pcpu is added to this pool in alloc_pdata
+     */
+    prv->repl_timer = NULL;
+
     return 0;
 
  no_mem:
@@ -473,6 +590,10 @@  rt_deinit(const struct scheduler *ops)
         xfree(_cpumask_scratch);
         _cpumask_scratch = NULL;
     }
+
+    kill_timer(prv->repl_timer);
+    xfree(prv->repl_timer);
+
     xfree(prv);
 }
 
@@ -493,6 +614,17 @@  rt_alloc_pdata(const struct scheduler *ops, int cpu)
     if ( !alloc_cpumask_var(&_cpumask_scratch[cpu]) )
         return NULL;
 
+    if( prv->repl_timer == NULL )
+    {   
+        /* allocate the timer on the first cpu of this pool */
+        prv->repl_timer = xzalloc(struct timer);
+
+        if(prv->repl_timer == NULL )
+            return NULL;
+
+        init_timer(prv->repl_timer, repl_handler, (void *)ops, cpu);
+    }
+
     /* 1 indicates alloc. succeed in schedule.c */
     return (void *)1;
 }
@@ -586,6 +718,7 @@  rt_alloc_vdata(const struct scheduler *ops, struct vcpu *vc, void *dd)
         return NULL;
 
     INIT_LIST_HEAD(&svc->q_elem);
+    INIT_LIST_HEAD(&svc->replq_elem);
     svc->flags = 0U;
     svc->sdom = dd;
     svc->vcpu = vc;
@@ -609,7 +742,8 @@  rt_free_vdata(const struct scheduler *ops, void *priv)
 }
 
 /*
- * This function is called in sched_move_domain() in schedule.c
+ * It is called in sched_move_domain() and sched_init_vcpu
+ * in schedule.c
  * When move a domain to a new cpupool.
  * It inserts vcpus of moving domain to the scheduler's RunQ in
  * dest. cpupool.
@@ -651,6 +785,10 @@  rt_vcpu_remove(const struct scheduler *ops, struct vcpu *vc)
     lock = vcpu_schedule_lock_irq(vc);
     if ( __vcpu_on_q(svc) )
         __q_remove(svc);
+
+    if( __vcpu_on_replq(svc) )
+        __replq_remove(ops,svc);
+
     vcpu_schedule_unlock_irq(lock, vc);
 }
 
@@ -785,44 +923,6 @@  __runq_pick(const struct scheduler *ops, const cpumask_t *mask)
 }
 
 /*
- * Update vcpu's budget and
- * sort runq by insert the modifed vcpu back to runq
- * lock is grabbed before calling this function
- */
-static void
-__repl_update(const struct scheduler *ops, s_time_t now)
-{
-    struct list_head *runq = rt_runq(ops);
-    struct list_head *depletedq = rt_depletedq(ops);
-    struct list_head *iter;
-    struct list_head *tmp;
-    struct rt_vcpu *svc = NULL;
-
-    list_for_each_safe(iter, tmp, runq)
-    {
-        svc = __q_elem(iter);
-        if ( now < svc->cur_deadline )
-            break;
-
-        rt_update_deadline(now, svc);
-        /* reinsert the vcpu if its deadline is updated */
-        __q_remove(svc);
-        __runq_insert(ops, svc);
-    }
-
-    list_for_each_safe(iter, tmp, depletedq)
-    {
-        svc = __q_elem(iter);
-        if ( now >= svc->cur_deadline )
-        {
-            rt_update_deadline(now, svc);
-            __q_remove(svc); /* remove from depleted queue */
-            __runq_insert(ops, svc); /* add to runq */
-        }
-    }
-}
-
-/*
  * schedule function for rt scheduler.
  * The lock is already grabbed in schedule.c, no need to lock here
  */
@@ -841,7 +941,6 @@  rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched
     /* burn_budget would return for IDLE VCPU */
     burn_budget(ops, scurr, now);
 
-    __repl_update(ops, now);
 
     if ( tasklet_work_scheduled )
     {
@@ -868,6 +967,8 @@  rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched
         set_bit(__RTDS_delayed_runq_add, &scurr->flags);
 
     snext->last_start = now;
+
+    ret.time =  -1; /* if an idle vcpu is picked */ 
     if ( !is_idle_vcpu(snext->vcpu) )
     {
         if ( snext != scurr )
@@ -880,9 +981,11 @@  rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched
             snext->vcpu->processor = cpu;
             ret.migrated = 1;
         }
+
+        ret.time = snext->budget; /* invoke the scheduler next time */
+
     }
 
-    ret.time = MIN(snext->budget, MAX_SCHEDULE); /* sched quantum */
     ret.task = snext->vcpu;
 
     /* TRACE */
@@ -914,7 +1017,7 @@  static void
 rt_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc)
 {
     struct rt_vcpu * const svc = rt_vcpu(vc);
-
+ 
     BUG_ON( is_idle_vcpu(vc) );
     SCHED_STAT_CRANK(vcpu_sleep);
 
@@ -924,6 +1027,9 @@  rt_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc)
         __q_remove(svc);
     else if ( svc->flags & RTDS_delayed_runq_add )
         clear_bit(__RTDS_delayed_runq_add, &svc->flags);
+
+    if( __vcpu_on_replq(svc) )
+        __replq_remove(ops, svc);
 }
 
 /*
@@ -1026,10 +1132,6 @@  rt_vcpu_wake(const struct scheduler *ops, struct vcpu *vc)
 {
     struct rt_vcpu * const svc = rt_vcpu(vc);
     s_time_t now = NOW();
-    struct rt_private *prv = rt_priv(ops);
-    struct rt_vcpu *snext = NULL; /* highest priority on RunQ */
-    struct rt_dom *sdom = NULL;
-    cpumask_t *online;
 
     BUG_ON( is_idle_vcpu(vc) );
 
@@ -1051,6 +1153,18 @@  rt_vcpu_wake(const struct scheduler *ops, struct vcpu *vc)
     else
         SCHED_STAT_CRANK(vcpu_wake_not_runnable);
 
+    /* budget repl here is needed before inserting back to runq. If so,
+     * it should be re-inserted back to the replenishment queue.
+     */
+    if ( now >= svc->cur_deadline)
+    {   
+        rt_update_deadline(now, svc);
+        __replq_remove(ops, svc);
+    }
+
+    if( !__vcpu_on_replq(svc) )
+        __replq_insert(ops, svc);
+    
     /* If context hasn't been saved for this vcpu yet, we can't put it on
      * the Runqueue/DepletedQ. Instead, we set a flag so that it will be
      * put on the Runqueue/DepletedQ after the context has been saved.
@@ -1061,22 +1175,10 @@  rt_vcpu_wake(const struct scheduler *ops, struct vcpu *vc)
         return;
     }
 
-    if ( now >= svc->cur_deadline)
-        rt_update_deadline(now, svc);
-
     /* insert svc to runq/depletedq because svc is not in queue now */
     __runq_insert(ops, svc);
 
-    __repl_update(ops, now);
-
-    ASSERT(!list_empty(&prv->sdom));
-    sdom = list_entry(prv->sdom.next, struct rt_dom, sdom_elem);
-    online = cpupool_domain_cpumask(sdom->dom);
-    snext = __runq_pick(ops, online); /* pick snext from ALL valid cpus */
-
-    runq_tickle(ops, snext);
-
-    return;
+    runq_tickle(ops, svc);
 }
 
 /*
@@ -1087,10 +1189,6 @@  static void
 rt_context_saved(const struct scheduler *ops, struct vcpu *vc)
 {
     struct rt_vcpu *svc = rt_vcpu(vc);
-    struct rt_vcpu *snext = NULL;
-    struct rt_dom *sdom = NULL;
-    struct rt_private *prv = rt_priv(ops);
-    cpumask_t *online;
     spinlock_t *lock = vcpu_schedule_lock_irq(vc);
 
     clear_bit(__RTDS_scheduled, &svc->flags);
@@ -1102,14 +1200,7 @@  rt_context_saved(const struct scheduler *ops, struct vcpu *vc)
          likely(vcpu_runnable(vc)) )
     {
         __runq_insert(ops, svc);
-        __repl_update(ops, NOW());
-
-        ASSERT(!list_empty(&prv->sdom));
-        sdom = list_entry(prv->sdom.next, struct rt_dom, sdom_elem);
-        online = cpupool_domain_cpumask(sdom->dom);
-        snext = __runq_pick(ops, online); /* pick snext from ALL cpus */
-
-        runq_tickle(ops, snext);
+        runq_tickle(ops, svc);
     }
 out:
     vcpu_schedule_unlock_irq(lock, vc);
@@ -1168,6 +1259,80 @@  rt_dom_cntl(
     return rc;
 }
 
+/*
+ * The replenishment timer handler picks vcpus
+ * from the replq and does the actual replenishment
+ */
+static void repl_handler(void *data){
+    unsigned long flags;
+    s_time_t now = NOW();
+    struct scheduler *ops = data; 
+    struct rt_private *prv = rt_priv(ops);
+    struct list_head *replq = rt_replq(ops);
+    struct timer *repl_timer = prv->repl_timer;
+    struct list_head *iter, *tmp;
+    struct rt_vcpu *svc = NULL;
+
+    spin_lock_irqsave(&prv->lock, flags);
+
+    stop_timer(repl_timer);
+
+    list_for_each_safe(iter, tmp, replq)
+    {
+        svc = __replq_elem(iter);
+
+        if ( now >= svc->cur_deadline )
+        {
+            rt_update_deadline(now, svc);
+
+            /*
+             * when the replenishment happens
+             * svc is either on a pcpu or on
+             * runq/depletedq
+             */
+            if( __vcpu_on_q(svc) )
+            {
+                /* put back to runq */
+                __q_remove(svc);
+                __runq_insert(ops, svc);
+            }
+
+            /* 
+             * tickle regardless where it's at 
+             * because a running vcpu could have
+             * a later deadline than others after
+             * replenishment
+             */
+            runq_tickle(ops, svc);
+
+            /* update replenishment event queue */
+            __replq_remove(ops, svc);
+            __replq_insert(ops, svc);
+        }
+
+        else
+            break;
+    }
+
+    /* 
+     * use the vcpu that's on the top
+     * or else don't program the timer
+     */
+    if( !list_empty(replq) )
+        set_timer(repl_timer, __replq_elem(replq->next)->cur_deadline);
+
+    spin_unlock_irqrestore(&prv->lock, flags);
+
+}
+
+/* checks if a timer has been stopped or not */
+bool_t active_timer(struct timer *timer)
+{
+    ASSERT(timer->status >= TIMER_STATUS_inactive);
+    ASSERT(timer->status <= TIMER_STATUS_in_list);
+    return (timer->status >= TIMER_STATUS_in_heap);
+}
+
 static struct rt_private _rt_priv;
 
 static const struct scheduler sched_rtds_def = {