Message ID | 1454992407-5436-1-git-send-email-tiche@seas.upenn.edu (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Hi Tianyang, Thanks for the patch! Great work and really quick action! :-) I will just comment on something I quickly find out and would look forwarding to Dario's comment. On Mon, Feb 8, 2016 at 11:33 PM, Tianyang Chen <tiche@seas.upenn.edu> wrote: > Changes since v4: > removed unnecessary replenishment queue checks in vcpu_wake() > extended replq_remove() to all cases in vcpu_sleep() > used _deadline_queue_insert() helper function for both queues > _replq_insert() and _replq_remove() program timer internally > > Changes since v3: > removed running queue. > added repl queue to keep track of repl events. > timer is now per scheduler. > timer is init on a valid cpu in a cpupool. > > Signed-off-by: Tianyang Chen <tiche@seas.upenn.edu> > Signed-off-by: Meng Xu <mengxu@cis.upenn.edu> > Signed-off-by: Dagaen Golomb <dgolomb@seas.upenn.edu> > --- > xen/common/sched_rt.c | 337 ++++++++++++++++++++++++++++++++++++------------- > 1 file changed, 251 insertions(+), 86 deletions(-) > > diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c > index 2e5430f..1f0bb7b 100644 > --- a/xen/common/sched_rt.c > +++ b/xen/common/sched_rt.c > @@ -16,6 +16,7 @@ > #include <xen/delay.h> > #include <xen/event.h> > #include <xen/time.h> > +#include <xen/timer.h> > #include <xen/perfc.h> > #include <xen/sched-if.h> > #include <xen/softirq.h> > @@ -87,7 +88,7 @@ > #define RTDS_DEFAULT_BUDGET (MICROSECS(4000)) > > #define UPDATE_LIMIT_SHIFT 10 > -#define MAX_SCHEDULE (MILLISECS(1)) > + > /* > * Flags > */ > @@ -142,6 +143,12 @@ static cpumask_var_t *_cpumask_scratch; > */ > static unsigned int nr_rt_ops; > > +/* handler for the replenishment timer */ > +static void repl_handler(void *data); > + > +/* checks if a timer is active or not */ > +bool_t active_timer(struct timer* t); > + > /* > * Systme-wide private data, include global RunQueue/DepletedQ > * Global lock is referenced by schedule_data.schedule_lock from all > @@ -152,7 +159,9 @@ struct rt_private { > struct list_head sdom; /* list of availalbe domains, used for dump */ > struct list_head runq; /* ordered list of runnable vcpus */ > struct list_head depletedq; /* unordered list of depleted vcpus */ > + struct list_head replq; /* ordered list of vcpus that need replenishment */ > cpumask_t tickled; /* cpus been tickled */ > + struct timer *repl_timer; /* replenishment timer */ > }; > > /* > @@ -160,6 +169,7 @@ struct rt_private { > */ > struct rt_vcpu { > struct list_head q_elem; /* on the runq/depletedq list */ > + struct list_head replq_elem;/* on the repl event list */ > > /* Up-pointers */ > struct rt_dom *sdom; > @@ -213,8 +223,14 @@ static inline struct list_head *rt_depletedq(const struct scheduler *ops) > return &rt_priv(ops)->depletedq; > } > > +static inline struct list_head *rt_replq(const struct scheduler *ops) > +{ > + return &rt_priv(ops)->replq; > +} > + > /* > - * Queue helper functions for runq and depletedq > + * Queue helper functions for runq, depletedq > + * and replenishment event queue > */ > static int > __vcpu_on_q(const struct rt_vcpu *svc) > @@ -228,6 +244,18 @@ __q_elem(struct list_head *elem) > return list_entry(elem, struct rt_vcpu, q_elem); > } > > +static struct rt_vcpu * > +__replq_elem(struct list_head *elem) > +{ > + return list_entry(elem, struct rt_vcpu, replq_elem); > +} > + > +static int > +__vcpu_on_replq(const struct rt_vcpu *svc) > +{ > + return !list_empty(&svc->replq_elem); > +} > + > /* > * Debug related code, dump vcpu/cpu information > */ > @@ -288,7 +316,7 @@ rt_dump_pcpu(const struct scheduler *ops, int cpu) > static void > rt_dump(const struct scheduler *ops) > { > - struct list_head *runq, *depletedq, *iter; > + struct list_head *runq, *depletedq, *replq, *iter; > struct rt_private *prv = rt_priv(ops); > struct rt_vcpu *svc; > struct rt_dom *sdom; > @@ -301,6 +329,7 @@ rt_dump(const struct scheduler *ops) > > runq = rt_runq(ops); > depletedq = rt_depletedq(ops); > + replq = rt_replq(ops); > > printk("Global RunQueue info:\n"); > list_for_each( iter, runq ) > @@ -316,6 +345,13 @@ rt_dump(const struct scheduler *ops) > rt_dump_vcpu(ops, svc); > } > > + printk("Global Replenishment Event info:\n"); > + list_for_each( iter, replq ) > + { > + svc = __replq_elem(iter); > + rt_dump_vcpu(ops, svc); > + } > + > printk("Domain info:\n"); > list_for_each( iter, &prv->sdom ) > { > @@ -388,6 +424,66 @@ __q_remove(struct rt_vcpu *svc) > } > > /* > + * Removing a vcpu from the replenishment queue could > + * re-program the timer for the next replenishment event > + * if the timer is currently active > + */ > +static inline void > +__replq_remove(const struct scheduler *ops, struct rt_vcpu *svc) > +{ > + struct rt_private *prv = rt_priv(ops); > + struct list_head *replq = rt_replq(ops); > + struct timer* repl_timer = prv->repl_timer; > + > + if ( __vcpu_on_replq(svc) ) > + { > + /* > + * disarm the timer if removing the first replenishment event > + * which is going to happen next > + */ > + if( active_timer(repl_timer) ) > + { > + struct rt_vcpu *next_repl = __replq_elem(replq->next); > + > + if( next_repl->cur_deadline == svc->cur_deadline ) > + repl_timer->expires = 0; > + > + list_del_init(&svc->replq_elem); > + > + /* re-arm the timer for the next replenishment event */ > + if( !list_empty(replq) ) > + { > + struct rt_vcpu *svc_next = __replq_elem(replq->next); > + set_timer(repl_timer, svc_next->cur_deadline); > + } > + } > + This blank line should go away.. It is quite misleading. At very first glance, I thought it is the else for if ( __vcpu_on_replq(svc) ); But after second read, I found it is actually for the if( active_timer(repl_timer) ) > + else > + list_del_init(&svc->replq_elem); > + } > +} > + > +/* > + * An utility function that inserts a vcpu to a > + * queue based on certain order (EDF) > + */ > +static void > +_deadline_queue_insert(struct rt_vcpu * (*_get_q_elem)(struct list_head *elem), > + struct rt_vcpu *svc, struct list_head *elem, struct list_head *queue) > +{ > + struct list_head *iter; > + > + list_for_each(iter, queue) > + { > + struct rt_vcpu * iter_svc = (*_get_q_elem)(iter); > + if ( svc->cur_deadline <= iter_svc->cur_deadline ) > + break; > + } > + > + list_add_tail(elem, iter); > +} > + > +/* > * Insert svc with budget in RunQ according to EDF: > * vcpus with smaller deadlines go first. > * Insert svc without budget in DepletedQ unsorted; > @@ -397,7 +493,6 @@ __runq_insert(const struct scheduler *ops, struct rt_vcpu *svc) > { > struct rt_private *prv = rt_priv(ops); > struct list_head *runq = rt_runq(ops); > - struct list_head *iter; > > ASSERT( spin_is_locked(&prv->lock) ); > > @@ -405,22 +500,37 @@ __runq_insert(const struct scheduler *ops, struct rt_vcpu *svc) > > /* add svc to runq if svc still has budget */ > if ( svc->cur_budget > 0 ) > - { > - list_for_each(iter, runq) > - { > - struct rt_vcpu * iter_svc = __q_elem(iter); > - if ( svc->cur_deadline <= iter_svc->cur_deadline ) > - break; > - } > - list_add_tail(&svc->q_elem, iter); > - } > + _deadline_queue_insert(&__q_elem, svc, &svc->q_elem, runq); > else > - { > list_add(&svc->q_elem, &prv->depletedq); > - } > } > > /* > + * Insert svc into the repl even list: > + * vcpus that needs to be repl earlier go first. > + * scheduler private lock serializes this operation > + * it could re-program the timer if it fires later than > + * this vcpu's cur_deadline. Also, this is used to program > + * the timer for the first time. > + */ > +static void > +__replq_insert(const struct scheduler *ops, struct rt_vcpu *svc) > +{ > + struct list_head *replq = rt_replq(ops); > + struct rt_private *prv = rt_priv(ops); > + struct timer *repl_timer = prv->repl_timer; > + > + ASSERT( !__vcpu_on_replq(svc) ); > + > + _deadline_queue_insert(&__replq_elem, svc, &svc->replq_elem, replq); > + > + if( repl_timer->expires == 0 || > + ( active_timer(repl_timer) && repl_timer->expires > svc->cur_deadline ) ) > + set_timer(repl_timer,svc->cur_deadline); > +} > + > + > +/* > * Init/Free related code > */ > static int > @@ -449,11 +559,18 @@ rt_init(struct scheduler *ops) > INIT_LIST_HEAD(&prv->sdom); > INIT_LIST_HEAD(&prv->runq); > INIT_LIST_HEAD(&prv->depletedq); > + INIT_LIST_HEAD(&prv->replq); > > cpumask_clear(&prv->tickled); > > ops->sched_data = prv; > > + /* > + * The timer initialization will happen later when > + * the first pcpu is added to this pool in alloc_pdata > + */ > + prv->repl_timer = NULL; > + > return 0; > > no_mem: > @@ -473,6 +590,10 @@ rt_deinit(const struct scheduler *ops) > xfree(_cpumask_scratch); > _cpumask_scratch = NULL; > } > + > + kill_timer(prv->repl_timer); > + xfree(prv->repl_timer); > + > xfree(prv); > } > > @@ -493,6 +614,17 @@ rt_alloc_pdata(const struct scheduler *ops, int cpu) > if ( !alloc_cpumask_var(&_cpumask_scratch[cpu]) ) > return NULL; > > + if( prv->repl_timer == NULL ) > + { > + /* allocate the timer on the first cpu of this pool */ > + prv->repl_timer = xzalloc(struct timer); > + > + if(prv->repl_timer == NULL ) > + return NULL; > + > + init_timer(prv->repl_timer, repl_handler, (void *)ops, cpu); > + } > + > /* 1 indicates alloc. succeed in schedule.c */ > return (void *)1; > } > @@ -586,6 +718,7 @@ rt_alloc_vdata(const struct scheduler *ops, struct vcpu *vc, void *dd) > return NULL; > > INIT_LIST_HEAD(&svc->q_elem); > + INIT_LIST_HEAD(&svc->replq_elem); > svc->flags = 0U; > svc->sdom = dd; > svc->vcpu = vc; > @@ -609,7 +742,8 @@ rt_free_vdata(const struct scheduler *ops, void *priv) > } > > /* > - * This function is called in sched_move_domain() in schedule.c > + * It is called in sched_move_domain() and sched_init_vcpu > + * in schedule.c > * When move a domain to a new cpupool. > * It inserts vcpus of moving domain to the scheduler's RunQ in > * dest. cpupool. > @@ -651,6 +785,10 @@ rt_vcpu_remove(const struct scheduler *ops, struct vcpu *vc) > lock = vcpu_schedule_lock_irq(vc); > if ( __vcpu_on_q(svc) ) > __q_remove(svc); > + > + if( __vcpu_on_replq(svc) ) > + __replq_remove(ops,svc); > + > vcpu_schedule_unlock_irq(lock, vc); > } > > @@ -785,44 +923,6 @@ __runq_pick(const struct scheduler *ops, const cpumask_t *mask) > } > > /* > - * Update vcpu's budget and > - * sort runq by insert the modifed vcpu back to runq > - * lock is grabbed before calling this function > - */ > -static void > -__repl_update(const struct scheduler *ops, s_time_t now) > -{ > - struct list_head *runq = rt_runq(ops); > - struct list_head *depletedq = rt_depletedq(ops); > - struct list_head *iter; > - struct list_head *tmp; > - struct rt_vcpu *svc = NULL; > - > - list_for_each_safe(iter, tmp, runq) > - { > - svc = __q_elem(iter); > - if ( now < svc->cur_deadline ) > - break; > - > - rt_update_deadline(now, svc); > - /* reinsert the vcpu if its deadline is updated */ > - __q_remove(svc); > - __runq_insert(ops, svc); > - } > - > - list_for_each_safe(iter, tmp, depletedq) > - { > - svc = __q_elem(iter); > - if ( now >= svc->cur_deadline ) > - { > - rt_update_deadline(now, svc); > - __q_remove(svc); /* remove from depleted queue */ > - __runq_insert(ops, svc); /* add to runq */ > - } > - } > -} > - > -/* > * schedule function for rt scheduler. > * The lock is already grabbed in schedule.c, no need to lock here > */ > @@ -841,7 +941,6 @@ rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched > /* burn_budget would return for IDLE VCPU */ > burn_budget(ops, scurr, now); > > - __repl_update(ops, now); > > if ( tasklet_work_scheduled ) > { > @@ -868,6 +967,8 @@ rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched > set_bit(__RTDS_delayed_runq_add, &scurr->flags); > > snext->last_start = now; > + > + ret.time = -1; /* if an idle vcpu is picked */ > if ( !is_idle_vcpu(snext->vcpu) ) > { > if ( snext != scurr ) > @@ -880,9 +981,11 @@ rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched > snext->vcpu->processor = cpu; > ret.migrated = 1; > } > + > + ret.time = snext->budget; /* invoke the scheduler next time */ > + > } > > - ret.time = MIN(snext->budget, MAX_SCHEDULE); /* sched quantum */ > ret.task = snext->vcpu; > > /* TRACE */ > @@ -914,7 +1017,7 @@ static void > rt_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc) > { > struct rt_vcpu * const svc = rt_vcpu(vc); > - > + Next patch should avoid this. You may have some trailing space in the line. git has some command to mark the trailing whitespace. You can have a look at this post: http://stackoverflow.com/questions/5257553/coloring-white-space-in-git-diffs-output > BUG_ON( is_idle_vcpu(vc) ); > SCHED_STAT_CRANK(vcpu_sleep); > > @@ -924,6 +1027,9 @@ rt_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc) > __q_remove(svc); > else if ( svc->flags & RTDS_delayed_runq_add ) > clear_bit(__RTDS_delayed_runq_add, &svc->flags); > + > + if( __vcpu_on_replq(svc) ) > + __replq_remove(ops, svc); > } > > /* > @@ -1026,10 +1132,6 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu *vc) > { > struct rt_vcpu * const svc = rt_vcpu(vc); > s_time_t now = NOW(); > - struct rt_private *prv = rt_priv(ops); > - struct rt_vcpu *snext = NULL; /* highest priority on RunQ */ > - struct rt_dom *sdom = NULL; > - cpumask_t *online; > > BUG_ON( is_idle_vcpu(vc) ); > > @@ -1051,6 +1153,18 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu *vc) > else > SCHED_STAT_CRANK(vcpu_wake_not_runnable); > > + /* budget repl here is needed before inserting back to runq. If so, > + * it should be re-inserted back to the replenishment queue. > + */ > + if ( now >= svc->cur_deadline) > + { > + rt_update_deadline(now, svc); > + __replq_remove(ops, svc); > + } > + > + if( !__vcpu_on_replq(svc) ) > + __replq_insert(ops, svc); > + > /* If context hasn't been saved for this vcpu yet, we can't put it on > * the Runqueue/DepletedQ. Instead, we set a flag so that it will be > * put on the Runqueue/DepletedQ after the context has been saved. > @@ -1061,22 +1175,10 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu *vc) > return; > } > > - if ( now >= svc->cur_deadline) > - rt_update_deadline(now, svc); > - > /* insert svc to runq/depletedq because svc is not in queue now */ > __runq_insert(ops, svc); > > - __repl_update(ops, now); > - > - ASSERT(!list_empty(&prv->sdom)); > - sdom = list_entry(prv->sdom.next, struct rt_dom, sdom_elem); > - online = cpupool_domain_cpumask(sdom->dom); > - snext = __runq_pick(ops, online); /* pick snext from ALL valid cpus */ > - > - runq_tickle(ops, snext); > - > - return; > + runq_tickle(ops, svc); > } > > /* > @@ -1087,10 +1189,6 @@ static void > rt_context_saved(const struct scheduler *ops, struct vcpu *vc) > { > struct rt_vcpu *svc = rt_vcpu(vc); > - struct rt_vcpu *snext = NULL; > - struct rt_dom *sdom = NULL; > - struct rt_private *prv = rt_priv(ops); > - cpumask_t *online; > spinlock_t *lock = vcpu_schedule_lock_irq(vc); > > clear_bit(__RTDS_scheduled, &svc->flags); > @@ -1102,14 +1200,7 @@ rt_context_saved(const struct scheduler *ops, struct vcpu *vc) > likely(vcpu_runnable(vc)) ) > { > __runq_insert(ops, svc); > - __repl_update(ops, NOW()); > - > - ASSERT(!list_empty(&prv->sdom)); > - sdom = list_entry(prv->sdom.next, struct rt_dom, sdom_elem); > - online = cpupool_domain_cpumask(sdom->dom); > - snext = __runq_pick(ops, online); /* pick snext from ALL cpus */ > - > - runq_tickle(ops, snext); > + runq_tickle(ops, svc); > } > out: > vcpu_schedule_unlock_irq(lock, vc); > @@ -1168,6 +1259,80 @@ rt_dom_cntl( > return rc; > } > > +/* > + * The replenishment timer handler picks vcpus > + * from the replq and does the actual replenishment > + */ > +static void repl_handler(void *data){ > + unsigned long flags; > + s_time_t now = NOW(); > + struct scheduler *ops = data; > + struct rt_private *prv = rt_priv(ops); > + struct list_head *replq = rt_replq(ops); > + struct timer *repl_timer = prv->repl_timer; > + struct list_head *iter, *tmp; > + struct rt_vcpu *svc = NULL; > + > + spin_lock_irqsave(&prv->lock, flags); > + > + stop_timer(repl_timer); > + > + list_for_each_safe(iter, tmp, replq) > + { > + svc = __replq_elem(iter); > + > + if ( now >= svc->cur_deadline ) > + { > + rt_update_deadline(now, svc); > + > + /* > + * when the replenishment happens > + * svc is either on a pcpu or on > + * runq/depletedq > + */ > + if( __vcpu_on_q(svc) ) > + { > + /* put back to runq */ > + __q_remove(svc); > + __runq_insert(ops, svc); > + } > + > + /* > + * tickle regardless where it's at > + * because a running vcpu could have > + * a later deadline than others after > + * replenishment > + */ > + runq_tickle(ops, svc); > + > + /* update replenishment event queue */ > + __replq_remove(ops, svc); > + __replq_insert(ops, svc); > + } > + This white line should be removed... > + else > + break; > + } > + > + /* > + * use the vcpu that's on the top > + * or else don't program the timer > + */ > + if( !list_empty(replq) ) > + set_timer(repl_timer, __replq_elem(replq->next)->cur_deadline); > + > + spin_unlock_irqrestore(&prv->lock, flags); > + > +} > + > +/* checks if a timer has been stopped or not */ > +bool_t active_timer(struct timer *timer) > +{ > + ASSERT(timer->status >= TIMER_STATUS_inactive); > + ASSERT(timer->status <= TIMER_STATUS_in_list); > + return (timer->status >= TIMER_STATUS_in_heap); > +} > + > static struct rt_private _rt_priv; > > static const struct scheduler sched_rtds_def = { > -- > 1.7.9.5 > Thanks and best regards, Meng
On 2/15/2016 10:55 PM, Meng Xu wrote: > Hi Tianyang, > > Thanks for the patch! Great work and really quick action! :-) > I will just comment on something I quickly find out and would look > forwarding to Dario's comment. > > On Mon, Feb 8, 2016 at 11:33 PM, Tianyang Chen <tiche@seas.upenn.edu > <mailto:tiche@seas.upenn.edu>> wrote: > > Changes since v4: > > removed unnecessary replenishment queue checks in vcpu_wake() > > extended replq_remove() to all cases in vcpu_sleep() > > used _deadline_queue_insert() helper function for both queues > > _replq_insert() and _replq_remove() program timer internally > > > > Changes since v3: > > removed running queue. > > added repl queue to keep track of repl events. > > timer is now per scheduler. > > timer is init on a valid cpu in a cpupool. > > > > Signed-off-by: Tianyang Chen <tiche@seas.upenn.edu > <mailto:tiche@seas.upenn.edu>> > > Signed-off-by: Meng Xu <mengxu@cis.upenn.edu > <mailto:mengxu@cis.upenn.edu>> > > Signed-off-by: Dagaen Golomb <dgolomb@seas.upenn.edu > <mailto:dgolomb@seas.upenn.edu>> > > --- > > xen/common/sched_rt.c | 337 > ++++++++++++++++++++++++++++++++++++------------- > > 1 file changed, 251 insertions(+), 86 deletions(-) > > > > diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c > > index 2e5430f..1f0bb7b 100644 > > --- a/xen/common/sched_rt.c > > +++ b/xen/common/sched_rt.c > > @@ -16,6 +16,7 @@ > > #include <xen/delay.h> > > #include <xen/event.h> > > #include <xen/time.h> > > +#include <xen/timer.h> > > #include <xen/perfc.h> > > #include <xen/sched-if.h> > > #include <xen/softirq.h> > > @@ -87,7 +88,7 @@ > > #define RTDS_DEFAULT_BUDGET (MICROSECS(4000)) > > > > #define UPDATE_LIMIT_SHIFT 10 > > -#define MAX_SCHEDULE (MILLISECS(1)) > > + > > /* > > * Flags > > */ > > @@ -142,6 +143,12 @@ static cpumask_var_t *_cpumask_scratch; > > */ > > static unsigned int nr_rt_ops; > > > > +/* handler for the replenishment timer */ > > +static void repl_handler(void *data); > > + > > +/* checks if a timer is active or not */ > > +bool_t active_timer(struct timer* t); > > + > > /* > > * Systme-wide private data, include global RunQueue/DepletedQ > > * Global lock is referenced by schedule_data.schedule_lock from all > > @@ -152,7 +159,9 @@ struct rt_private { > > struct list_head sdom; /* list of availalbe domains, used > for dump */ > > struct list_head runq; /* ordered list of runnable vcpus */ > > struct list_head depletedq; /* unordered list of depleted vcpus */ > > + struct list_head replq; /* ordered list of vcpus that need > replenishment */ > > cpumask_t tickled; /* cpus been tickled */ > > + struct timer *repl_timer; /* replenishment timer */ > > }; > > > > /* > > @@ -160,6 +169,7 @@ struct rt_private { > > */ > > struct rt_vcpu { > > struct list_head q_elem; /* on the runq/depletedq list */ > > + struct list_head replq_elem;/* on the repl event list */ > > > > /* Up-pointers */ > > struct rt_dom *sdom; > > @@ -213,8 +223,14 @@ static inline struct list_head > *rt_depletedq(const struct scheduler *ops) > > return &rt_priv(ops)->depletedq; > > } > > > > +static inline struct list_head *rt_replq(const struct scheduler *ops) > > +{ > > + return &rt_priv(ops)->replq; > > +} > > + > > /* > > - * Queue helper functions for runq and depletedq > > + * Queue helper functions for runq, depletedq > > + * and replenishment event queue > > */ > > static int > > __vcpu_on_q(const struct rt_vcpu *svc) > > @@ -228,6 +244,18 @@ __q_elem(struct list_head *elem) > > return list_entry(elem, struct rt_vcpu, q_elem); > > } > > > > +static struct rt_vcpu * > > +__replq_elem(struct list_head *elem) > > +{ > > + return list_entry(elem, struct rt_vcpu, replq_elem); > > +} > > + > > +static int > > +__vcpu_on_replq(const struct rt_vcpu *svc) > > +{ > > + return !list_empty(&svc->replq_elem); > > +} > > + > > /* > > * Debug related code, dump vcpu/cpu information > > */ > > @@ -288,7 +316,7 @@ rt_dump_pcpu(const struct scheduler *ops, int cpu) > > static void > > rt_dump(const struct scheduler *ops) > > { > > - struct list_head *runq, *depletedq, *iter; > > + struct list_head *runq, *depletedq, *replq, *iter; > > struct rt_private *prv = rt_priv(ops); > > struct rt_vcpu *svc; > > struct rt_dom *sdom; > > @@ -301,6 +329,7 @@ rt_dump(const struct scheduler *ops) > > > > runq = rt_runq(ops); > > depletedq = rt_depletedq(ops); > > + replq = rt_replq(ops); > > > > printk("Global RunQueue info:\n"); > > list_for_each( iter, runq ) > > @@ -316,6 +345,13 @@ rt_dump(const struct scheduler *ops) > > rt_dump_vcpu(ops, svc); > > } > > > > + printk("Global Replenishment Event info:\n"); > > + list_for_each( iter, replq ) > > + { > > + svc = __replq_elem(iter); > > + rt_dump_vcpu(ops, svc); > > + } > > + > > printk("Domain info:\n"); > > list_for_each( iter, &prv->sdom ) > > { > > @@ -388,6 +424,66 @@ __q_remove(struct rt_vcpu *svc) > > } > > > > /* > > + * Removing a vcpu from the replenishment queue could > > + * re-program the timer for the next replenishment event > > + * if the timer is currently active > > + */ > > +static inline void > > +__replq_remove(const struct scheduler *ops, struct rt_vcpu *svc) > > +{ > > + struct rt_private *prv = rt_priv(ops); > > + struct list_head *replq = rt_replq(ops); > > + struct timer* repl_timer = prv->repl_timer; > > + > > + if ( __vcpu_on_replq(svc) ) > > + { > > + /* > > + * disarm the timer if removing the first replenishment event > > + * which is going to happen next > > + */ > > + if( active_timer(repl_timer) ) > > + { > > + struct rt_vcpu *next_repl = __replq_elem(replq->next); > > + > > + if( next_repl->cur_deadline == svc->cur_deadline ) > > + repl_timer->expires = 0; > > + > > + list_del_init(&svc->replq_elem); > > + > > + /* re-arm the timer for the next replenishment event */ > > + if( !list_empty(replq) ) > > + { > > + struct rt_vcpu *svc_next = __replq_elem(replq->next); > > + set_timer(repl_timer, svc_next->cur_deadline); > > + } > > + } > > + > > This blank line should go away.. It is quite misleading. At very first > glance, I thought it is the else for if ( __vcpu_on_replq(svc) ); > But after second read, I found it is actually for the if( > a ctive_timer(repl_timer) ) > Sure > > @@ -880,9 +981,11 @@ rt_schedule(const struct scheduler *ops, > s_time_t now, bool_t tasklet_work_sched > > snext->vcpu->processor = cpu; > > ret.migrated = 1; > > } > > + > > + ret.time = snext->budget; /* invoke the scheduler next time */ > > + > > } > > > > - ret.time = MIN(snext->budget, MAX_SCHEDULE); /* sched quantum */ > > ret.task = snext->vcpu; > > > > /* TRACE */ > > @@ -914,7 +1017,7 @@ static void > > rt_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc) > > { > > struct rt_vcpu * const svc = rt_vcpu(vc); > > - > > + > > Next patch should avoid this. You may have some trailing space in the line. > git has some command to mark the trailing whitespace. > You can have a look at this post: > http://stackoverflow.com/questions/5257553/coloring-white-space-in-git-diffs-output > ok I will check that out. Dario: Is there anything else we need to pick up in this patch? Thanks, Tianyang
Hey Dario: We are aiming for the next release and would appreciate it if you can leave some comments on this version. Thanks. Tianyang On 2/8/2016 11:33 PM, Tianyang Chen wrote: > Changes since v4: > removed unnecessary replenishment queue checks in vcpu_wake() > extended replq_remove() to all cases in vcpu_sleep() > used _deadline_queue_insert() helper function for both queues > _replq_insert() and _replq_remove() program timer internally > > Changes since v3: > removed running queue. > added repl queue to keep track of repl events. > timer is now per scheduler. > timer is init on a valid cpu in a cpupool. > > Signed-off-by: Tianyang Chen <tiche@seas.upenn.edu> > Signed-off-by: Meng Xu <mengxu@cis.upenn.edu> > Signed-off-by: Dagaen Golomb <dgolomb@seas.upenn.edu> > --- > xen/common/sched_rt.c | 337 ++++++++++++++++++++++++++++++++++++------------- > 1 file changed, 251 insertions(+), 86 deletions(-) > > diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c > index 2e5430f..1f0bb7b 100644 > --- a/xen/common/sched_rt.c > +++ b/xen/common/sched_rt.c > @@ -16,6 +16,7 @@ > #include <xen/delay.h> > #include <xen/event.h> > #include <xen/time.h> > +#include <xen/timer.h> > #include <xen/perfc.h> > #include <xen/sched-if.h> > #include <xen/softirq.h> > @@ -87,7 +88,7 @@ > #define RTDS_DEFAULT_BUDGET (MICROSECS(4000)) > > #define UPDATE_LIMIT_SHIFT 10 > -#define MAX_SCHEDULE (MILLISECS(1)) > + > /* > * Flags > */ > @@ -142,6 +143,12 @@ static cpumask_var_t *_cpumask_scratch; > */ > static unsigned int nr_rt_ops; > > +/* handler for the replenishment timer */ > +static void repl_handler(void *data); > + > +/* checks if a timer is active or not */ > +bool_t active_timer(struct timer* t); > + > /* > * Systme-wide private data, include global RunQueue/DepletedQ > * Global lock is referenced by schedule_data.schedule_lock from all > @@ -152,7 +159,9 @@ struct rt_private { > struct list_head sdom; /* list of availalbe domains, used for dump */ > struct list_head runq; /* ordered list of runnable vcpus */ > struct list_head depletedq; /* unordered list of depleted vcpus */ > + struct list_head replq; /* ordered list of vcpus that need replenishment */ > cpumask_t tickled; /* cpus been tickled */ > + struct timer *repl_timer; /* replenishment timer */ > }; > > /* > @@ -160,6 +169,7 @@ struct rt_private { > */ > struct rt_vcpu { > struct list_head q_elem; /* on the runq/depletedq list */ > + struct list_head replq_elem;/* on the repl event list */ > > /* Up-pointers */ > struct rt_dom *sdom; > @@ -213,8 +223,14 @@ static inline struct list_head *rt_depletedq(const struct scheduler *ops) > return &rt_priv(ops)->depletedq; > } > > +static inline struct list_head *rt_replq(const struct scheduler *ops) > +{ > + return &rt_priv(ops)->replq; > +} > + > /* > - * Queue helper functions for runq and depletedq > + * Queue helper functions for runq, depletedq > + * and replenishment event queue > */ > static int > __vcpu_on_q(const struct rt_vcpu *svc) > @@ -228,6 +244,18 @@ __q_elem(struct list_head *elem) > return list_entry(elem, struct rt_vcpu, q_elem); > } > > +static struct rt_vcpu * > +__replq_elem(struct list_head *elem) > +{ > + return list_entry(elem, struct rt_vcpu, replq_elem); > +} > + > +static int > +__vcpu_on_replq(const struct rt_vcpu *svc) > +{ > + return !list_empty(&svc->replq_elem); > +} > + > /* > * Debug related code, dump vcpu/cpu information > */ > @@ -288,7 +316,7 @@ rt_dump_pcpu(const struct scheduler *ops, int cpu) > static void > rt_dump(const struct scheduler *ops) > { > - struct list_head *runq, *depletedq, *iter; > + struct list_head *runq, *depletedq, *replq, *iter; > struct rt_private *prv = rt_priv(ops); > struct rt_vcpu *svc; > struct rt_dom *sdom; > @@ -301,6 +329,7 @@ rt_dump(const struct scheduler *ops) > > runq = rt_runq(ops); > depletedq = rt_depletedq(ops); > + replq = rt_replq(ops); > > printk("Global RunQueue info:\n"); > list_for_each( iter, runq ) > @@ -316,6 +345,13 @@ rt_dump(const struct scheduler *ops) > rt_dump_vcpu(ops, svc); > } > > + printk("Global Replenishment Event info:\n"); > + list_for_each( iter, replq ) > + { > + svc = __replq_elem(iter); > + rt_dump_vcpu(ops, svc); > + } > + > printk("Domain info:\n"); > list_for_each( iter, &prv->sdom ) > { > @@ -388,6 +424,66 @@ __q_remove(struct rt_vcpu *svc) > } > > /* > + * Removing a vcpu from the replenishment queue could > + * re-program the timer for the next replenishment event > + * if the timer is currently active > + */ > +static inline void > +__replq_remove(const struct scheduler *ops, struct rt_vcpu *svc) > +{ > + struct rt_private *prv = rt_priv(ops); > + struct list_head *replq = rt_replq(ops); > + struct timer* repl_timer = prv->repl_timer; > + > + if ( __vcpu_on_replq(svc) ) > + { > + /* > + * disarm the timer if removing the first replenishment event > + * which is going to happen next > + */ > + if( active_timer(repl_timer) ) > + { > + struct rt_vcpu *next_repl = __replq_elem(replq->next); > + > + if( next_repl->cur_deadline == svc->cur_deadline ) > + repl_timer->expires = 0; > + > + list_del_init(&svc->replq_elem); > + > + /* re-arm the timer for the next replenishment event */ > + if( !list_empty(replq) ) > + { > + struct rt_vcpu *svc_next = __replq_elem(replq->next); > + set_timer(repl_timer, svc_next->cur_deadline); > + } > + } > + > + else > + list_del_init(&svc->replq_elem); > + } > +} > + > +/* > + * An utility function that inserts a vcpu to a > + * queue based on certain order (EDF) > + */ > +static void > +_deadline_queue_insert(struct rt_vcpu * (*_get_q_elem)(struct list_head *elem), > + struct rt_vcpu *svc, struct list_head *elem, struct list_head *queue) > +{ > + struct list_head *iter; > + > + list_for_each(iter, queue) > + { > + struct rt_vcpu * iter_svc = (*_get_q_elem)(iter); > + if ( svc->cur_deadline <= iter_svc->cur_deadline ) > + break; > + } > + > + list_add_tail(elem, iter); > +} > + > +/* > * Insert svc with budget in RunQ according to EDF: > * vcpus with smaller deadlines go first. > * Insert svc without budget in DepletedQ unsorted; > @@ -397,7 +493,6 @@ __runq_insert(const struct scheduler *ops, struct rt_vcpu *svc) > { > struct rt_private *prv = rt_priv(ops); > struct list_head *runq = rt_runq(ops); > - struct list_head *iter; > > ASSERT( spin_is_locked(&prv->lock) ); > > @@ -405,22 +500,37 @@ __runq_insert(const struct scheduler *ops, struct rt_vcpu *svc) > > /* add svc to runq if svc still has budget */ > if ( svc->cur_budget > 0 ) > - { > - list_for_each(iter, runq) > - { > - struct rt_vcpu * iter_svc = __q_elem(iter); > - if ( svc->cur_deadline <= iter_svc->cur_deadline ) > - break; > - } > - list_add_tail(&svc->q_elem, iter); > - } > + _deadline_queue_insert(&__q_elem, svc, &svc->q_elem, runq); > else > - { > list_add(&svc->q_elem, &prv->depletedq); > - } > } > > /* > + * Insert svc into the repl even list: > + * vcpus that needs to be repl earlier go first. > + * scheduler private lock serializes this operation > + * it could re-program the timer if it fires later than > + * this vcpu's cur_deadline. Also, this is used to program > + * the timer for the first time. > + */ > +static void > +__replq_insert(const struct scheduler *ops, struct rt_vcpu *svc) > +{ > + struct list_head *replq = rt_replq(ops); > + struct rt_private *prv = rt_priv(ops); > + struct timer *repl_timer = prv->repl_timer; > + > + ASSERT( !__vcpu_on_replq(svc) ); > + > + _deadline_queue_insert(&__replq_elem, svc, &svc->replq_elem, replq); > + > + if( repl_timer->expires == 0 || > + ( active_timer(repl_timer) && repl_timer->expires > svc->cur_deadline ) ) > + set_timer(repl_timer,svc->cur_deadline); > +} > + > + > +/* > * Init/Free related code > */ > static int > @@ -449,11 +559,18 @@ rt_init(struct scheduler *ops) > INIT_LIST_HEAD(&prv->sdom); > INIT_LIST_HEAD(&prv->runq); > INIT_LIST_HEAD(&prv->depletedq); > + INIT_LIST_HEAD(&prv->replq); > > cpumask_clear(&prv->tickled); > > ops->sched_data = prv; > > + /* > + * The timer initialization will happen later when > + * the first pcpu is added to this pool in alloc_pdata > + */ > + prv->repl_timer = NULL; > + > return 0; > > no_mem: > @@ -473,6 +590,10 @@ rt_deinit(const struct scheduler *ops) > xfree(_cpumask_scratch); > _cpumask_scratch = NULL; > } > + > + kill_timer(prv->repl_timer); > + xfree(prv->repl_timer); > + > xfree(prv); > } > > @@ -493,6 +614,17 @@ rt_alloc_pdata(const struct scheduler *ops, int cpu) > if ( !alloc_cpumask_var(&_cpumask_scratch[cpu]) ) > return NULL; > > + if( prv->repl_timer == NULL ) > + { > + /* allocate the timer on the first cpu of this pool */ > + prv->repl_timer = xzalloc(struct timer); > + > + if(prv->repl_timer == NULL ) > + return NULL; > + > + init_timer(prv->repl_timer, repl_handler, (void *)ops, cpu); > + } > + > /* 1 indicates alloc. succeed in schedule.c */ > return (void *)1; > } > @@ -586,6 +718,7 @@ rt_alloc_vdata(const struct scheduler *ops, struct vcpu *vc, void *dd) > return NULL; > > INIT_LIST_HEAD(&svc->q_elem); > + INIT_LIST_HEAD(&svc->replq_elem); > svc->flags = 0U; > svc->sdom = dd; > svc->vcpu = vc; > @@ -609,7 +742,8 @@ rt_free_vdata(const struct scheduler *ops, void *priv) > } > > /* > - * This function is called in sched_move_domain() in schedule.c > + * It is called in sched_move_domain() and sched_init_vcpu > + * in schedule.c > * When move a domain to a new cpupool. > * It inserts vcpus of moving domain to the scheduler's RunQ in > * dest. cpupool. > @@ -651,6 +785,10 @@ rt_vcpu_remove(const struct scheduler *ops, struct vcpu *vc) > lock = vcpu_schedule_lock_irq(vc); > if ( __vcpu_on_q(svc) ) > __q_remove(svc); > + > + if( __vcpu_on_replq(svc) ) > + __replq_remove(ops,svc); > + > vcpu_schedule_unlock_irq(lock, vc); > } > > @@ -785,44 +923,6 @@ __runq_pick(const struct scheduler *ops, const cpumask_t *mask) > } > > /* > - * Update vcpu's budget and > - * sort runq by insert the modifed vcpu back to runq > - * lock is grabbed before calling this function > - */ > -static void > -__repl_update(const struct scheduler *ops, s_time_t now) > -{ > - struct list_head *runq = rt_runq(ops); > - struct list_head *depletedq = rt_depletedq(ops); > - struct list_head *iter; > - struct list_head *tmp; > - struct rt_vcpu *svc = NULL; > - > - list_for_each_safe(iter, tmp, runq) > - { > - svc = __q_elem(iter); > - if ( now < svc->cur_deadline ) > - break; > - > - rt_update_deadline(now, svc); > - /* reinsert the vcpu if its deadline is updated */ > - __q_remove(svc); > - __runq_insert(ops, svc); > - } > - > - list_for_each_safe(iter, tmp, depletedq) > - { > - svc = __q_elem(iter); > - if ( now >= svc->cur_deadline ) > - { > - rt_update_deadline(now, svc); > - __q_remove(svc); /* remove from depleted queue */ > - __runq_insert(ops, svc); /* add to runq */ > - } > - } > -} > - > -/* > * schedule function for rt scheduler. > * The lock is already grabbed in schedule.c, no need to lock here > */ > @@ -841,7 +941,6 @@ rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched > /* burn_budget would return for IDLE VCPU */ > burn_budget(ops, scurr, now); > > - __repl_update(ops, now); > > if ( tasklet_work_scheduled ) > { > @@ -868,6 +967,8 @@ rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched > set_bit(__RTDS_delayed_runq_add, &scurr->flags); > > snext->last_start = now; > + > + ret.time = -1; /* if an idle vcpu is picked */ > if ( !is_idle_vcpu(snext->vcpu) ) > { > if ( snext != scurr ) > @@ -880,9 +981,11 @@ rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched > snext->vcpu->processor = cpu; > ret.migrated = 1; > } > + > + ret.time = snext->budget; /* invoke the scheduler next time */ > + > } > > - ret.time = MIN(snext->budget, MAX_SCHEDULE); /* sched quantum */ > ret.task = snext->vcpu; > > /* TRACE */ > @@ -914,7 +1017,7 @@ static void > rt_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc) > { > struct rt_vcpu * const svc = rt_vcpu(vc); > - > + > BUG_ON( is_idle_vcpu(vc) ); > SCHED_STAT_CRANK(vcpu_sleep); > > @@ -924,6 +1027,9 @@ rt_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc) > __q_remove(svc); > else if ( svc->flags & RTDS_delayed_runq_add ) > clear_bit(__RTDS_delayed_runq_add, &svc->flags); > + > + if( __vcpu_on_replq(svc) ) > + __replq_remove(ops, svc); > } > > /* > @@ -1026,10 +1132,6 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu *vc) > { > struct rt_vcpu * const svc = rt_vcpu(vc); > s_time_t now = NOW(); > - struct rt_private *prv = rt_priv(ops); > - struct rt_vcpu *snext = NULL; /* highest priority on RunQ */ > - struct rt_dom *sdom = NULL; > - cpumask_t *online; > > BUG_ON( is_idle_vcpu(vc) ); > > @@ -1051,6 +1153,18 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu *vc) > else > SCHED_STAT_CRANK(vcpu_wake_not_runnable); > > + /* budget repl here is needed before inserting back to runq. If so, > + * it should be re-inserted back to the replenishment queue. > + */ > + if ( now >= svc->cur_deadline) > + { > + rt_update_deadline(now, svc); > + __replq_remove(ops, svc); > + } > + > + if( !__vcpu_on_replq(svc) ) > + __replq_insert(ops, svc); > + > /* If context hasn't been saved for this vcpu yet, we can't put it on > * the Runqueue/DepletedQ. Instead, we set a flag so that it will be > * put on the Runqueue/DepletedQ after the context has been saved. > @@ -1061,22 +1175,10 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu *vc) > return; > } > > - if ( now >= svc->cur_deadline) > - rt_update_deadline(now, svc); > - > /* insert svc to runq/depletedq because svc is not in queue now */ > __runq_insert(ops, svc); > > - __repl_update(ops, now); > - > - ASSERT(!list_empty(&prv->sdom)); > - sdom = list_entry(prv->sdom.next, struct rt_dom, sdom_elem); > - online = cpupool_domain_cpumask(sdom->dom); > - snext = __runq_pick(ops, online); /* pick snext from ALL valid cpus */ > - > - runq_tickle(ops, snext); > - > - return; > + runq_tickle(ops, svc); > } > > /* > @@ -1087,10 +1189,6 @@ static void > rt_context_saved(const struct scheduler *ops, struct vcpu *vc) > { > struct rt_vcpu *svc = rt_vcpu(vc); > - struct rt_vcpu *snext = NULL; > - struct rt_dom *sdom = NULL; > - struct rt_private *prv = rt_priv(ops); > - cpumask_t *online; > spinlock_t *lock = vcpu_schedule_lock_irq(vc); > > clear_bit(__RTDS_scheduled, &svc->flags); > @@ -1102,14 +1200,7 @@ rt_context_saved(const struct scheduler *ops, struct vcpu *vc) > likely(vcpu_runnable(vc)) ) > { > __runq_insert(ops, svc); > - __repl_update(ops, NOW()); > - > - ASSERT(!list_empty(&prv->sdom)); > - sdom = list_entry(prv->sdom.next, struct rt_dom, sdom_elem); > - online = cpupool_domain_cpumask(sdom->dom); > - snext = __runq_pick(ops, online); /* pick snext from ALL cpus */ > - > - runq_tickle(ops, snext); > + runq_tickle(ops, svc); > } > out: > vcpu_schedule_unlock_irq(lock, vc); > @@ -1168,6 +1259,80 @@ rt_dom_cntl( > return rc; > } > > +/* > + * The replenishment timer handler picks vcpus > + * from the replq and does the actual replenishment > + */ > +static void repl_handler(void *data){ > + unsigned long flags; > + s_time_t now = NOW(); > + struct scheduler *ops = data; > + struct rt_private *prv = rt_priv(ops); > + struct list_head *replq = rt_replq(ops); > + struct timer *repl_timer = prv->repl_timer; > + struct list_head *iter, *tmp; > + struct rt_vcpu *svc = NULL; > + > + spin_lock_irqsave(&prv->lock, flags); > + > + stop_timer(repl_timer); > + > + list_for_each_safe(iter, tmp, replq) > + { > + svc = __replq_elem(iter); > + > + if ( now >= svc->cur_deadline ) > + { > + rt_update_deadline(now, svc); > + > + /* > + * when the replenishment happens > + * svc is either on a pcpu or on > + * runq/depletedq > + */ > + if( __vcpu_on_q(svc) ) > + { > + /* put back to runq */ > + __q_remove(svc); > + __runq_insert(ops, svc); > + } > + > + /* > + * tickle regardless where it's at > + * because a running vcpu could have > + * a later deadline than others after > + * replenishment > + */ > + runq_tickle(ops, svc); > + > + /* update replenishment event queue */ > + __replq_remove(ops, svc); > + __replq_insert(ops, svc); > + } > + > + else > + break; > + } > + > + /* > + * use the vcpu that's on the top > + * or else don't program the timer > + */ > + if( !list_empty(replq) ) > + set_timer(repl_timer, __replq_elem(replq->next)->cur_deadline); > + > + spin_unlock_irqrestore(&prv->lock, flags); > + > +} > + > +/* checks if a timer has been stopped or not */ > +bool_t active_timer(struct timer *timer) > +{ > + ASSERT(timer->status >= TIMER_STATUS_inactive); > + ASSERT(timer->status <= TIMER_STATUS_in_list); > + return (timer->status >= TIMER_STATUS_in_heap); > +} > + > static struct rt_private _rt_priv; > > static const struct scheduler sched_rtds_def = { >
Hey, Here I am, sorry for the delay. :-( On Mon, 2016-02-08 at 23:33 -0500, Tianyang Chen wrote: > Changes since v4: > removed unnecessary replenishment queue checks in vcpu_wake() > extended replq_remove() to all cases in vcpu_sleep() > used _deadline_queue_insert() helper function for both queues > _replq_insert() and _replq_remove() program timer internally > > Changes since v3: > removed running queue. > added repl queue to keep track of repl events. > timer is now per scheduler. > timer is init on a valid cpu in a cpupool. > > Signed-off-by: Tianyang Chen <tiche@seas.upenn.edu> > Signed-off-by: Meng Xu <mengxu@cis.upenn.edu> > Signed-off-by: Dagaen Golomb <dgolomb@seas.upenn.edu> > So, the actual changelog... why did it disappear? :-) > diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c > index 2e5430f..1f0bb7b 100644 > --- a/xen/common/sched_rt.c > +++ b/xen/common/sched_rt.c > @@ -388,6 +424,66 @@ __q_remove(struct rt_vcpu *svc) > } > > /* > + * Removing a vcpu from the replenishment queue could > + * re-program the timer for the next replenishment event > + * if the timer is currently active > + */ > +static inline void > +__replq_remove(const struct scheduler *ops, struct rt_vcpu *svc) > +{ > + struct rt_private *prv = rt_priv(ops); > + struct list_head *replq = rt_replq(ops); > + struct timer* repl_timer = prv->repl_timer; > + > + if ( __vcpu_on_replq(svc) ) > + { So, can this be false? If yes, when and why? > + /* > + * disarm the timer if removing the first replenishment > event > + * which is going to happen next > + */ > + if( active_timer(repl_timer) ) > + { > And here again, isn't it the case that, if there is a vcpu in the replenishment queue, then the timer _must_ be active? (I.e., if the above is true, isn't this also always true?) > + struct rt_vcpu *next_repl = __replq_elem(replq->next); > + > + if( next_repl->cur_deadline == svc->cur_deadline ) > + repl_timer->expires = 0; > + I think we need to call stop_timer() here, don't we? I know that set_timer() does that itself, but I think it is indeed necessary. E.g., right now, you're manipulating the timer without the timer lock. > + list_del_init(&svc->replq_elem); > + > + /* re-arm the timer for the next replenishment event */ > + if( !list_empty(replq) ) > + { > + struct rt_vcpu *svc_next = __replq_elem(replq- > >next); > + set_timer(repl_timer, svc_next->cur_deadline); > + } > + } > + > + else > + list_del_init(&svc->replq_elem); > I don't like the structure of this function, and I'd ask for a different if/then/else logic to be used, but let's first see --by answering the questions above-- if some of these if-s can actually go away. :-) > + } > +} > + > +/* > + * An utility function that inserts a vcpu to a > + * queue based on certain order (EDF) > End long comments with a full stop. > + */ > +static void > +_deadline_queue_insert(struct rt_vcpu * (*_get_q_elem)(struct > list_head *elem), > There's really a lot of "underscore-prefixing" in this file. This is, of course, not your fault, and should not be addressed in this patch. But, at least when it comes to new code, let's avoid making things worse. So, "deadline_queue_insert()" is just ok, I think. Yes, I know I did suggest to call it how it's called in this patch, underscore included, but I now think it would be better to get rid of that. > + struct rt_vcpu *svc, struct list_head *elem, struct list_head > *queue) > +{ > + struct list_head *iter; > + > + list_for_each(iter, queue) > + { > + struct rt_vcpu * iter_svc = (*_get_q_elem)(iter); > + if ( svc->cur_deadline <= iter_svc->cur_deadline ) > + break; > This looks too much indented. > @@ -405,22 +500,37 @@ __runq_insert(const struct scheduler *ops, > struct rt_vcpu *svc) > > /* add svc to runq if svc still has budget */ > if ( svc->cur_budget > 0 ) > - { > - list_for_each(iter, runq) > - { > - struct rt_vcpu * iter_svc = __q_elem(iter); > - if ( svc->cur_deadline <= iter_svc->cur_deadline ) > - break; > - } > - list_add_tail(&svc->q_elem, iter); > - } > + _deadline_queue_insert(&__q_elem, svc, &svc->q_elem, runq); > else > - { > list_add(&svc->q_elem, &prv->depletedq); > - } Can we ASSERT() something about a replenishment event being queued, in either case? As I said already, use full words in comments ("Insert svc in the replenishment timer queue" or "Insert svc in the replenishment events list"). > + * vcpus that needs to be repl earlier go first. > Ditto. And this can just be something like "in replenishment time order", added to the sentence above. > + * scheduler private lock serializes this operation > This is true for _everything_ in this scheduler right now, so there's no real need to say it. > + * it could re-program the timer if it fires later than > + * this vcpu's cur_deadline. > Do you mean "The timer may be re-programmed if svc is inserted at the front of the events list" ? > Also, this is used to program > + * the timer for the first time. > "Also, this is what programs the timer the first time, when called from xxx" > + */ > +static void > +__replq_insert(const struct scheduler *ops, struct rt_vcpu *svc) > +{ > + struct list_head *replq = rt_replq(ops); > + struct rt_private *prv = rt_priv(ops); > + struct timer *repl_timer = prv->repl_timer; > + > + ASSERT( !__vcpu_on_replq(svc) ); > + > + _deadline_queue_insert(&__replq_elem, svc, &svc->replq_elem, > replq); > + > + if( repl_timer->expires == 0 || > + ( active_timer(repl_timer) && repl_timer->expires > svc- > >cur_deadline ) ) > + set_timer(repl_timer,svc->cur_deadline); > Is it the case that you need to call set_timer() if (and only if) svc has been inserted at the *front* of the replenishment queue? (If list was empty, as in the case of first timer activation, it would be the first and only.) If that is the case, I'd just make deadline_queue_insert() return a flag to that effect (e.g., it can return 1 if svc is the new first element, 0 if it is not), and use the flag itself to guard the (re-)arm of the timer... What do you think? > @@ -651,6 +785,10 @@ rt_vcpu_remove(const struct scheduler *ops, > struct vcpu *vc) > lock = vcpu_schedule_lock_irq(vc); > if ( __vcpu_on_q(svc) ) > __q_remove(svc); > + > + if( __vcpu_on_replq(svc) ) > + __replq_remove(ops,svc); > + As per the code in this patch looks like, you're checking __vcpu_on_replq(svc) twice. In fact, you do it here and inside __replq_remove(). I've already said that I'd like for the check inside __rqplq_remove() to go away so, keep this one here (if it's really necessary) and kill the other one inside the function. > @@ -924,6 +1027,9 @@ rt_vcpu_sleep(const struct scheduler *ops, > struct vcpu *vc) > __q_remove(svc); > else if ( svc->flags & RTDS_delayed_runq_add ) > clear_bit(__RTDS_delayed_runq_add, &svc->flags); > + > + if( __vcpu_on_replq(svc) ) > + __replq_remove(ops, svc); > Same here. > @@ -1051,6 +1153,18 @@ rt_vcpu_wake(const struct scheduler *ops, > struct vcpu *vc) > else > SCHED_STAT_CRANK(vcpu_wake_not_runnable); > > + /* budget repl here is needed before inserting back to runq. If > so, > Comment style. Also, what do you mean with that "If so"... "If so" what? > + * it should be re-inserted back to the replenishment queue. > + */ > + if ( now >= svc->cur_deadline) > + { > + rt_update_deadline(now, svc); > + __replq_remove(ops, svc); > + } > + > + if( !__vcpu_on_replq(svc) ) > + __replq_insert(ops, svc); > + And here I am again: is it really necessary to check whether svc is not in the replenishment queue? It looks to me that it really should not be there... but maybe it can, because we remove the event from the queue when the vcpu sleeps, but *not* when the vcpu blocks? > @@ -1168,6 +1259,80 @@ rt_dom_cntl( > return rc; > } > > +/* > + * The replenishment timer handler picks vcpus > + * from the replq and does the actual replenishment > + */ > +static void repl_handler(void *data){ > + unsigned long flags; > + s_time_t now = NOW(); > + struct scheduler *ops = data; > + struct rt_private *prv = rt_priv(ops); > + struct list_head *replq = rt_replq(ops); > + struct timer *repl_timer = prv->repl_timer; > + struct list_head *iter, *tmp; > + struct rt_vcpu *svc = NULL; > + > + spin_lock_irqsave(&prv->lock, flags); > + > + stop_timer(repl_timer); > + > + list_for_each_safe(iter, tmp, replq) > + { > + svc = __replq_elem(iter); > + > + if ( now >= svc->cur_deadline ) > + { > + rt_update_deadline(now, svc); > + > + /* > + * when the replenishment happens > + * svc is either on a pcpu or on > + * runq/depletedq > + */ > + if( __vcpu_on_q(svc) ) > + { > + /* put back to runq */ > + __q_remove(svc); > + __runq_insert(ops, svc); > + } > + > + /* > + * tickle regardless where it's at > + * because a running vcpu could have > + * a later deadline than others after > + * replenishment > + */ > + runq_tickle(ops, svc); > + > + /* update replenishment event queue */ > + __replq_remove(ops, svc); > + __replq_insert(ops, svc); > + } > + > + else > + break; > Invert the logic here: if ( now < svc->cur_deadline ) break; rt_update_deadline(now, svc); ... Thanks and Regards, Dario
On 2/24/2016 9:02 PM, Dario Faggioli wrote: > Hey, > > Here I am, sorry for the delay. :-( No problem, I think we are almost there. > > On Mon, 2016-02-08 at 23:33 -0500, Tianyang Chen wrote: >> Changes since v4: >> removed unnecessary replenishment queue checks in vcpu_wake() >> extended replq_remove() to all cases in vcpu_sleep() >> used _deadline_queue_insert() helper function for both queues >> _replq_insert() and _replq_remove() program timer internally >> >> Changes since v3: >> removed running queue. >> added repl queue to keep track of repl events. >> timer is now per scheduler. >> timer is init on a valid cpu in a cpupool. >> >> Signed-off-by: Tianyang Chen <tiche@seas.upenn.edu> >> Signed-off-by: Meng Xu <mengxu@cis.upenn.edu> >> Signed-off-by: Dagaen Golomb <dgolomb@seas.upenn.edu> >> > So, the actual changelog... why did it disappear? :-) I will add it in the next version. >> diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c >> index 2e5430f..1f0bb7b 100644 >> --- a/xen/common/sched_rt.c >> +++ b/xen/common/sched_rt.c >> @@ -388,6 +424,66 @@ __q_remove(struct rt_vcpu *svc) >> } >> >> /* >> + * Removing a vcpu from the replenishment queue could >> + * re-program the timer for the next replenishment event >> + * if the timer is currently active >> + */ >> +static inline void >> +__replq_remove(const struct scheduler *ops, struct rt_vcpu *svc) >> +{ >> + struct rt_private *prv = rt_priv(ops); >> + struct list_head *replq = rt_replq(ops); >> + struct timer* repl_timer = prv->repl_timer; >> + >> + if ( __vcpu_on_replq(svc) ) >> + { > So, can this be false? If yes, when and why? > >> + /* >> + * disarm the timer if removing the first replenishment >> event >> + * which is going to happen next >> + */ >> + if( active_timer(repl_timer) ) >> + { >> > And here again, isn't it the case that, if there is a vcpu in the > replenishment queue, then the timer _must_ be active? (I.e., if the > above is true, isn't this also always true?) So, the reason for this check is that the code inside timer handler also calls this function when timer is stopped. We don't want to start the timer inside the timer handler when it's manipulating the replenishment queue right? Because it could be removing/adding back more than one vcpu and the timer should only be set at the very end of the timer handler. In fact, I copied a static function from timer.c just to check if a timer is active or not. Not sure if this is a good practice or not. >> + struct rt_vcpu *next_repl = __replq_elem(replq->next); >> + >> + if( next_repl->cur_deadline == svc->cur_deadline ) >> + repl_timer->expires = 0; >> + > I think we need to call stop_timer() here, don't we? I know that > set_timer() does that itself, but I think it is indeed necessary. E.g., > right now, you're manipulating the timer without the timer lock. So when the timer handler is protected by a global lock, it is still necessary to stop the timer? Also, the timer only runs on a single CPU for a scheduler too. >> + list_del_init(&svc->replq_elem); >> + >> + /* re-arm the timer for the next replenishment event */ >> + if( !list_empty(replq) ) >> + { >> + struct rt_vcpu *svc_next = __replq_elem(replq- >>> next); >> + set_timer(repl_timer, svc_next->cur_deadline); >> + } >> + } >> + >> + else >> + list_del_init(&svc->replq_elem); >> > I don't like the structure of this function, and I'd ask for a > different if/then/else logic to be used, but let's first see --by > answering the questions above-- if some of these if-s can actually go > away. :-) So this function reduces to: /* * disarm the timer if removing the first replenishment event * which is going to happen next */ if( active_timer(repl_timer) ) { stop_timer(replq_timer); repl_timer->expires = 0; list_del_init(&svc->replq_elem); /* re-arm the timer for the next replenishment event */ if( !list_empty(replq) ) { struct rt_vcpu *svc_next = __replq_elem(replq->next); set_timer(repl_timer, svc_next->cur_deadline); } } else list_del_init(&svc->replq_elem); >> + } >> +} >> + >> +/* >> + * An utility function that inserts a vcpu to a >> + * queue based on certain order (EDF) >> > End long comments with a full stop. > >> + */ >> +static void >> +_deadline_queue_insert(struct rt_vcpu * (*_get_q_elem)(struct >> list_head *elem), >> > There's really a lot of "underscore-prefixing" in this file. > > This is, of course, not your fault, and should not be addressed in this > patch. But, at least when it comes to new code, let's avoid making > things worse. > > So, "deadline_queue_insert()" is just ok, I think. Yes, I know I did > suggest to call it how it's called in this patch, underscore included, > but I now think it would be better to get rid of that. Sure. >> + struct rt_vcpu *svc, struct list_head *elem, struct list_head >> *queue) >> +{ >> + struct list_head *iter; >> + >> + list_for_each(iter, queue) >> + { >> + struct rt_vcpu * iter_svc = (*_get_q_elem)(iter); >> + if ( svc->cur_deadline <= iter_svc->cur_deadline ) >> + break; >> > This looks too much indented. > >> @@ -405,22 +500,37 @@ __runq_insert(const struct scheduler *ops, >> struct rt_vcpu *svc) >> >> /* add svc to runq if svc still has budget */ >> if ( svc->cur_budget > 0 ) >> - { >> - list_for_each(iter, runq) >> - { >> - struct rt_vcpu * iter_svc = __q_elem(iter); >> - if ( svc->cur_deadline <= iter_svc->cur_deadline ) >> - break; >> - } >> - list_add_tail(&svc->q_elem, iter); >> - } >> + _deadline_queue_insert(&__q_elem, svc, &svc->q_elem, runq); >> else >> - { >> list_add(&svc->q_elem, &prv->depletedq); >> - } > Can we ASSERT() something about a replenishment event being queued, in > either case? Do you mean asserting if the svc added is on queue after being inserted in either case? > As I said already, use full words in comments ("Insert svc in the > replenishment timer queue" or "Insert svc in the replenishment events > list"). > >> + * vcpus that needs to be repl earlier go first. >> > Ditto. And this can just be something like "in replenishment time > order", added to the sentence above. > >> + * scheduler private lock serializes this operation >> > This is true for _everything_ in this scheduler right now, so there's > no real need to say it. > >> + * it could re-program the timer if it fires later than >> + * this vcpu's cur_deadline. >> > Do you mean "The timer may be re-programmed if svc is inserted at the > front of the events list" ? > >> Also, this is used to program >> + * the timer for the first time. >> > "Also, this is what programs the timer the first time, when called from > xxx" I agree with above and for this one, if svc is going to be in the front of the list, it programs the timer. Or in the other case It sees that repl_timer->expires == 0, which is either during system boot or one the only one vcpu on the replenishment list has been removed, in replq_remove(). So it's not called anywhere in such sense. Maybe "It programs the timer for the first timer, or when the only vcpu on replenishment event list has been removed"? >> + */ >> +static void >> +__replq_insert(const struct scheduler *ops, struct rt_vcpu *svc) >> +{ >> + struct list_head *replq = rt_replq(ops); >> + struct rt_private *prv = rt_priv(ops); >> + struct timer *repl_timer = prv->repl_timer; >> + >> + ASSERT( !__vcpu_on_replq(svc) ); >> + >> + _deadline_queue_insert(&__replq_elem, svc, &svc->replq_elem, >> replq); >> + >> + if( repl_timer->expires == 0 || >> + ( active_timer(repl_timer) && repl_timer->expires > svc- >>> cur_deadline ) ) >> + set_timer(repl_timer,svc->cur_deadline); >> > Is it the case that you need to call set_timer() if (and only if) svc > has been inserted at the *front* of the replenishment queue? (If list > was empty, as in the case of first timer activation, it would be the > first and only.) > > If that is the case, I'd just make deadline_queue_insert() return a > flag to that effect (e.g., it can return 1 if svc is the new first > element, 0 if it is not), and use the flag itself to guard the (re-)arm > of the timer... What do you think? Yeah good call. >> @@ -651,6 +785,10 @@ rt_vcpu_remove(const struct scheduler *ops, >> struct vcpu *vc) >> lock = vcpu_schedule_lock_irq(vc); >> if ( __vcpu_on_q(svc) ) >> __q_remove(svc); >> + >> + if( __vcpu_on_replq(svc) ) >> + __replq_remove(ops,svc); >> + > As per the code in this patch looks like, you're > checking __vcpu_on_replq(svc) twice. In fact, you do it here and inside > __replq_remove(). I've already said that I'd like for the check inside > __rqplq_remove() to go away so, keep this one here (if it's really > necessary) and kill the other one inside the function. > >> @@ -924,6 +1027,9 @@ rt_vcpu_sleep(const struct scheduler *ops, >> struct vcpu *vc) >> __q_remove(svc); >> else if ( svc->flags & RTDS_delayed_runq_add ) >> clear_bit(__RTDS_delayed_runq_add, &svc->flags); >> + >> + if( __vcpu_on_replq(svc) ) >> + __replq_remove(ops, svc); >> > Same here. So, what about __q_remove()? It looks like couple places are double checking as well. >> @@ -1051,6 +1153,18 @@ rt_vcpu_wake(const struct scheduler *ops, >> struct vcpu *vc) >> else >> SCHED_STAT_CRANK(vcpu_wake_not_runnable); >> >> + /* budget repl here is needed before inserting back to runq. If >> so, >> > Comment style. > > Also, what do you mean with that "If so"... "If so" what? I mean if the budget is replenished here, then re-insert back. I guess it's self-explanatory. >> + * it should be re-inserted back to the replenishment queue. >> + */ >> + if ( now >= svc->cur_deadline) >> + { >> + rt_update_deadline(now, svc); >> + __replq_remove(ops, svc); >> + } >> + >> + if( !__vcpu_on_replq(svc) ) >> + __replq_insert(ops, svc); >> + > And here I am again: is it really necessary to check whether svc is not > in the replenishment queue? It looks to me that it really should not be > there... but maybe it can, because we remove the event from the queue > when the vcpu sleeps, but *not* when the vcpu blocks? Yeah. That is the case where I keep getting assertion failure if it's removed. I'm thinking when a vcpu unblocks, it could potentially fall through here. And like you said, mostly spurious sleep happens when a vcpu is running and it could happen in other cases, although rare. Thanks, Tianyang
On Thu, 2016-02-25 at 01:15 -0500, Tianyang Chen wrote: > > On 2/24/2016 9:02 PM, Dario Faggioli wrote: > > Hey, > > > > Here I am, sorry for the delay. :-( > No problem, I think we are almost there. > We probably are, although a few more adjustments in the timer logic is required, IMO. I'll try to be quick in replying to next rounds. :-) > > On Mon, 2016-02-08 at 23:33 -0500, Tianyang Chen wrote: > > > > > > diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c > > > index 2e5430f..1f0bb7b 100644 > > > --- a/xen/common/sched_rt.c > > > +++ b/xen/common/sched_rt.c > > > @@ -388,6 +424,66 @@ __q_remove(struct rt_vcpu *svc) > > > } > > > > > > /* > > > + * Removing a vcpu from the replenishment queue could > > > + * re-program the timer for the next replenishment event > > > + * if the timer is currently active > > > + */ > > > +static inline void > > > +__replq_remove(const struct scheduler *ops, struct rt_vcpu *svc) > > > +{ > > > + struct rt_private *prv = rt_priv(ops); > > > + struct list_head *replq = rt_replq(ops); > > > + struct timer* repl_timer = prv->repl_timer; > > > + > > > + if ( __vcpu_on_replq(svc) ) > > > + { > > So, can this be false? If yes, when and why? > > > > > + /* > > > + * disarm the timer if removing the first replenishment > > > event > > > + * which is going to happen next > > > + */ > > > + if( active_timer(repl_timer) ) > > > + { > > > > > And here again, isn't it the case that, if there is a vcpu in the > > replenishment queue, then the timer _must_ be active? (I.e., if the > > above is true, isn't this also always true?) > > So, the reason for this check is that the code inside timer handler > also > calls this function when > timer is stopped. We don't want to start the timer inside the timer > handler when it's manipulating > the replenishment queue right? > So, this function and the timer handler are serialized by the global RTDS scheduler lock, so I don't see chances for races due to concurrent execution of them from different pcpus. And for the case when the timer handler calls this, if that is the *only* reason why you put this check here, I think it's overkill. Perhaps this can be dealt with in the timer handler, by just doing the list_del_init() itself (ugly, but maybe with a comment...). Or we can make __replq_inset() able to deal with re-insertions, i.e., cases when the vcpu was already on the replenishment queue, and we're basically changing it's position (slightly better, but we loose the ASSERT() in __replq_insert() itself). Or we can even split this function in two, one (e.g., _replq_remove()) only doing the actual dequeueing and another one (e.g., replq_remove()) calling _replq_remove() for the dequeueing and then dealing with the timer (and in this case, the timer handler can just call _replq_remove(), as, in that case, the reprogramming will be later anyway. I can't be sure which solution would look and work best without actually seeing them... I guess, pick one up and give it a try. > Because it could be removing/adding back > more than one vcpu > and the timer should only be set at the very end of the timer > handler. > Ok, so that's why there's a similar check (which I also did not like) in __replq_insert() then, I see it now. > In fact, I copied a static function from timer.c just to check if a > timer is active or not. Not sure if > this is a good practice or not. > Yes, I wanted to comment on that too, but then forgot. Indeed it is *not* a good practise. If you need anything like that, you should expose the original function (by making it !static and adding it to an header). However, all the times that something like that happens, it means that we're abusing the intended API for a particular subsystem, and should serve as a warning for thinking twice whether there really is not another way of doing things (of course, there are cases where there aren't, but this should not be one of those). > > > + struct rt_vcpu *next_repl = __replq_elem(replq- > > > >next); > > > + > > > + if( next_repl->cur_deadline == svc->cur_deadline ) > > > + repl_timer->expires = 0; > > > + > > I think we need to call stop_timer() here, don't we? I know that > > set_timer() does that itself, but I think it is indeed necessary. > > E.g., > > right now, you're manipulating the timer without the timer lock. > > So when the timer handler is protected by a global lock, it is still > necessary to stop the timer? > What do you mean with "a global lock"? Timers are maintainer in the per-cpu heap. timer->expires is what's used to keep the heap... well.. an heap, not to mention that stopping a timer may call for some action on the timers of a cpu (e.g., look at deactivate_timer() and at the fact that it may or may actually raise TIMER_SOFTIQ). So, stopping and starting a timer needs to be done with the proper timer API calls, because the timer infrastructure needs to know that, and the key of the heap can't be just changed behind the timer infrastructure's back, and without the timer lock (check, in the whole xen source code, how often that happens, outside of timer.c: basically never! :-D). > Also, the timer only runs on a single CPU for a scheduler too. > That's how all timer works, and I don't see how that matters here. > So this function reduces to: > > /* > * disarm the timer if removing the first replenishment > event > * which is going to happen next > */ > if( active_timer(repl_timer) ) > This needs to somehow go away. > { > stop_timer(replq_timer); > repl_timer->expires = 0; > And you shouldn't need to explicitly set this to 0. It's stopped, so it won't fire, and that's it. > list_del_init(&svc->replq_elem); > > /* re-arm the timer for the next replenishment event */ > if( !list_empty(replq) ) > { > struct rt_vcpu *svc_next = __replq_elem(replq- > >next); > set_timer(repl_timer, svc_next->cur_deadline); > } > BTW, here, if we're coming from the timer handler, it is certainly the case that we are removing one or more vcpus from the front of the replenishment queue, including the one that actually made the timer trigger. Therefore, it's ok to restart the timer, if there are replenishment events left, and the one that now become the front of the queue, is the one we want to use for reprogramming. But, if we come from, say, sleep, and we've removed a vcpu the replenishment event for which was, say, at 5th place in the queue, there should be no need to re-program the timer, should it? So, it looks to me that this function can also use some hints on whether the timer should really be re-programmed or not. > @@ -405,22 +500,37 @@ __runq_insert(const struct scheduler *ops, > > > struct rt_vcpu *svc) > > > > > > /* add svc to runq if svc still has budget */ > > > if ( svc->cur_budget > 0 ) > > > - { > > > - list_for_each(iter, runq) > > > - { > > > - struct rt_vcpu * iter_svc = __q_elem(iter); > > > - if ( svc->cur_deadline <= iter_svc->cur_deadline ) > > > - break; > > > - } > > > - list_add_tail(&svc->q_elem, iter); > > > - } > > > + _deadline_queue_insert(&__q_elem, svc, &svc->q_elem, > > > runq); > > > else > > > - { > > > list_add(&svc->q_elem, &prv->depletedq); > > > - } > > Can we ASSERT() something about a replenishment event being queued, > > in > > either case? > Do you mean asserting if the svc added is on queue after being > inserted > in either case? > Not sure... I'm thinking whether it is an invariant that, if a vcpu is inserted in either the runqueue or the depletedq, there should be a replenighment event queued for it. In both cases, it would be the event that will give the vcpu itself its full budget, no matter whether it has still some available (in the former case) or has already run out of it (in the latter case). So, if that is the case, I think we should put down an ASSERT(__vcpu_on_replq(svc)) (or something like that). > > > + * it could re-program the timer if it fires later than > > > + * this vcpu's cur_deadline. > > > > > Do you mean "The timer may be re-programmed if svc is inserted at > > the > > front of the events list" ? > > > > > Also, this is used to program > > > + * the timer for the first time. > > > > > "Also, this is what programs the timer the first time, when called > > from > > xxx" > I agree with above and for this one, if svc is going to be in the > front > of the list, it programs the timer. > Good. :-) > Or in the other case It sees that repl_timer->expires == 0, which is > either during system boot or one the only one vcpu > on the replenishment list has been removed, in replq_remove(). So > it's > not called anywhere in such sense. > Sorry, I don't get this last part... probably because I really don't like (and find it rather hard to follow) this reasoning about expires being (artificially) 0. I really think we should get rid of that part. > Maybe "It programs the timer for the first timer, or when the only > vcpu > on replenishment event list has > been removed"? > Again, what does it mean "or when the only vcpu on replenishment event list has been removed"? If there is no one in the replenishment queue, why do we need to reprogram? If it's for us, why we're not in the replenishment queue? And if this is about some interdependency between __replq_remove() and __replq_insert() in some special call sites, such special case needs to be dealt with locally, in that call site, not by having obscure counter measures here, for which any user of this function pays the price (in terms of both performance and code being harder to understand). > > > @@ -651,6 +785,10 @@ rt_vcpu_remove(const struct scheduler *ops, > > > struct vcpu *vc) > > > lock = vcpu_schedule_lock_irq(vc); > > > if ( __vcpu_on_q(svc) ) > > > __q_remove(svc); > > > + > > > + if( __vcpu_on_replq(svc) ) > > > + __replq_remove(ops,svc); > > > + > > As per the code in this patch looks like, you're > > checking __vcpu_on_replq(svc) twice. In fact, you do it here and > > inside > > __replq_remove(). I've already said that I'd like for the check > > inside > > __rqplq_remove() to go away so, keep this one here (if it's really > > necessary) and kill the other one inside the function. > > > > > @@ -924,6 +1027,9 @@ rt_vcpu_sleep(const struct scheduler *ops, > > > struct vcpu *vc) > > > __q_remove(svc); > > > else if ( svc->flags & RTDS_delayed_runq_add ) > > > clear_bit(__RTDS_delayed_runq_add, &svc->flags); > > > + > > > + if( __vcpu_on_replq(svc) ) > > > + __replq_remove(ops, svc); > > > > > Same here. > So, what about __q_remove()? It looks like couple places are double > checking as well. > Indeed they are. They're bugs. :-D Not in the sense that they make the code explode, or misbehave, of course, but I still dislike them rather passionately. If, after this patch will be done and checked in, you guys would want to take a stab at sanitizing that too, it would be awesome. :-) > > > @@ -1051,6 +1153,18 @@ rt_vcpu_wake(const struct scheduler *ops, > > > struct vcpu *vc) > > > else > > > SCHED_STAT_CRANK(vcpu_wake_not_runnable); > > > > > > + /* budget repl here is needed before inserting back to runq. > > > If > > > so, > > > > > Comment style. > > > > Also, what do you mean with that "If so"... "If so" what? > I mean if the budget is replenished here, then re-insert back. I > guess > it's self-explanatory. > I still can parse that. Maybe: "If deadline passed while svc was asleep/blocked, we need new scheduling parameters (a new deadline and full budget), and we also need to schedule a new replenishment event, according to them." But then again, I think we should try to enforce the fact that, if a vcpu is not runnable, there's no pending replenishment for it, and make it an invariant we can throw ASSERT() against. We're already doing it for sleeps, after all... > > > + * it should be re-inserted back to the replenishment queue. > > > + */ > > > + if ( now >= svc->cur_deadline) > > > + { > > > + rt_update_deadline(now, svc); > > > + __replq_remove(ops, svc); > > > + } > > > + > > > + if( !__vcpu_on_replq(svc) ) > > > + __replq_insert(ops, svc); > > > + > > And here I am again: is it really necessary to check whether svc is > > not > > in the replenishment queue? It looks to me that it really should > > not be > > there... but maybe it can, because we remove the event from the > > queue > > when the vcpu sleeps, but *not* when the vcpu blocks? > Yeah. That is the case where I keep getting assertion failure if > it's > removed. > Which one ASSERT() fails? > I'm thinking when > a vcpu unblocks, it could potentially fall through here. > Well, when unblocking, wake() is certainly called, yes. > And like you > said, mostly spurious sleep > happens when a vcpu is running and it could happen in other cases, > although rare. > I think I said already there's no such thing as "spurious sleep". Or at least, I can't think of anything that I would define a spurious sleep, if you do, please, explain what situation you're referring to. In any case, one way of dealing with vcpus blocking/offlining/etc could be to, in context_saved(), in case we are not adding the vcpu back to the runq, cancel its replenishment event with __replq_remove(). (This may make it possible to avoid doing it in rt_vcpu_sleep() too, but you'll need to check and test.) Can you give this a try. Thanks and Regards, Dario
On 2/25/2016 5:34 AM, Dario Faggioli wrote: >>>> + * it should be re-inserted back to the replenishment queue. >>>> + */ >>>> + if ( now >= svc->cur_deadline) >>>> + { >>>> + rt_update_deadline(now, svc); >>>> + __replq_remove(ops, svc); >>>> + } >>>> + >>>> + if( !__vcpu_on_replq(svc) ) >>>> + __replq_insert(ops, svc); >>>> + >>> And here I am again: is it really necessary to check whether svc is >>> not >>> in the replenishment queue? It looks to me that it really should >>> not be >>> there... but maybe it can, because we remove the event from the >>> queue >>> when the vcpu sleeps, but *not* when the vcpu blocks? >> Yeah. That is the case where I keep getting assertion failure if >> it's >> removed. >> > Which one ASSERT() fails? > The replq_insert() fails because it's already on the replenishment queue when rt_vcpu_wake() is trying to insert a vcpu again. (XEN) Assertion '!__vcpu_on_replq(svc)' failed at sched_rt.c:527 (XEN) ----[ Xen-4.7-unstable x86_64 debug=y Tainted: C ]---- (XEN) CPU: 0 (XEN) RIP: e008:[<ffff82d08012a003>] sched_rt.c#rt_vcpu_wake+0xf0/0x17f (XEN) RFLAGS: 0000000000010002 CONTEXT: hypervisor (d0v0) (XEN) rax: 0000000000000001 rbx: ffff83023b522940 rcx: 0000000000000001 (XEN) rdx: 00000031bb1b9980 rsi: ffff82d080342318 rdi: ffff83023b486ca0 (XEN) rbp: ffff8300bfcffd88 rsp: ffff8300bfcffd58 r8: 0000000000000004 (XEN) r9: 00000000deadbeef r10: ffff82d08025f5c0 r11: 0000000000000206 (XEN) r12: ffff83023b486ca0 r13: ffff8300bfd46000 r14: ffff82d080299b80 (XEN) r15: ffff83023b522d80 cr0: 0000000080050033 cr4: 00000000000406a0 (XEN) cr3: 0000000231c0d000 cr2: ffff880001e80ba8 (XEN) ds: 0000 es: 0000 fs: 0000 gs: 0000 ss: e010 cs: e008 (XEN) Xen stack trace from rsp=ffff8300bfcffd58: (XEN) ffff8300bfcffd70 ffff8300bfd46000 0000000216110572 ffff83023b522940 (XEN) ffff82d08032bc00 0000000000000282 ffff8300bfcffdd8 ffff82d08012be0c (XEN) ffff83023b4b5000 ffff83023b4f1000 ffff8300bfd47000 ffff8300bfd46000 (XEN) 0000000000000000 ffff83023b4b4280 0000000000014440 0000000000000001 (XEN) ffff8300bfcffde8 ffff82d08012c327 ffff8300bfcffe08 ffff82d080169cea (XEN) ffff83023b4b5000 000000000000000a ffff8300bfcffe18 ffff82d080169d65 (XEN) ffff8300bfcffe38 ffff82d08010762a ffff83023b4b4280 ffff83023b4b5000 (XEN) ffff8300bfcffe68 ffff82d08010822a ffff8300bfcffe68 fffffffffffffff2 (XEN) ffff88022056dcb4 ffff880230c34440 ffff8300bfcffef8 ffff82d0801096fc (XEN) ffff8300bfcffef8 ffff8300bfcfff18 ffff8300bfcffef8 ffff82d080240e85 (XEN) ffff880200000001 0000000000000000 0000000000000246 ffffffff810013aa (XEN) 000000000000000a ffffffff810013aa 000000000000e030 ffff8300bfd47000 (XEN) ffff8802206597f0 ffff880230c34440 0000000000014440 0000000000000001 (XEN) 00007cff403000c7 ffff82d0802439e2 ffffffff8100140a 0000000000000020 (XEN) ffff88022063c7d0 ffff88022063c7d0 0000000000000001 000000000000dca0 (XEN) ffff88022056dcb8 ffff880230c34440 0000000000000206 0000000000000004 (XEN) ffff8802230001a0 ffff880220619000 0000000000000020 ffffffff8100140a (XEN) 0000000000000000 ffff88022056dcb4 0000000000000004 0001010000000000 (XEN) ffffffff8100140a 000000000000e033 0000000000000206 ffff88022056dc90 (XEN) 000000000000e02b 0000000000000000 0000000000000000 0000000000000000 (XEN) Xen call trace: (XEN) [<ffff82d08012a003>] sched_rt.c#rt_vcpu_wake+0xf0/0x17f (XEN) [<ffff82d08012be0c>] vcpu_wake+0x213/0x3d4 (XEN) [<ffff82d08012c327>] vcpu_unblock+0x4b/0x4d (XEN) [<ffff82d080169cea>] vcpu_kick+0x20/0x6f (XEN) [<ffff82d080169d65>] vcpu_mark_events_pending+0x2c/0x2f (XEN) [<ffff82d08010762a>] event_2l.c#evtchn_2l_set_pending+0xa9/0xb9 (XEN) [<ffff82d08010822a>] evtchn_send+0x158/0x183 (XEN) [<ffff82d0801096fc>] do_event_channel_op+0xe21/0x147d (XEN) [<ffff82d0802439e2>] lstar_enter+0xe2/0x13c (XEN) (XEN) (XEN) **************************************** (XEN) Panic on CPU 0: (XEN) Assertion '!__vcpu_on_replq(svc)' failed at sched_rt.c:527 (XEN) **************************************** >> I'm thinking when >> a vcpu unblocks, it could potentially fall through here. >> > Well, when unblocking, wake() is certainly called, yes. > >> And like you >> said, mostly spurious sleep >> happens when a vcpu is running and it could happen in other cases, >> although rare. >> > I think I said already there's no such thing as "spurious sleep". Or at > least, I can't think of anything that I would define a spurious sleep, > if you do, please, explain what situation you're referring to. > I meant to say spurious wakeup... If rt_vcpu_sleep() removes vcpus from replenishment queue, it's perfectly safe for rt_vcpu_wake() to insert them back. So, I'm suspecting it's the spurious wakeup that's causing troubles because vcpus are not removed prior to rt_vcpu_wake(). However, the two RETURNs at the beginning of rt_vcpu_wake() should catch that shouldn't it? > In any case, one way of dealing with vcpus blocking/offlining/etc could > be to, in context_saved(), in case we are not adding the vcpu back to > the runq, cancel its replenishment event with __replq_remove(). > > (This may make it possible to avoid doing it in rt_vcpu_sleep() too, > but you'll need to check and test.) > > Can you give this a try. That makes sense. Doing it in context_saved() kinda implies that if a vcpu is sleeping and taken off, its replenishment event should be removed. On the other hand, the logic is the same as removing it in rt_vcpu_sleep() but just at different times. Well, I have tried it and the check still needs to be there in rt_vcpu_wake(). I will send the next version so it's easier to look at. Thanks, Tianyang
On Thu, 2016-02-25 at 12:29 -0500, Tianyang Chen wrote: > On 2/25/2016 5:34 AM, Dario Faggioli wrote: > > > > > Which one ASSERT() fails? > > > The replq_insert() fails because it's already on the replenishment > queue > when rt_vcpu_wake() is trying to insert a vcpu again. > > (XEN) Xen call trace: > (XEN) [<ffff82d08012a003>] sched_rt.c#rt_vcpu_wake+0xf0/0x17f > (XEN) [<ffff82d08012be0c>] vcpu_wake+0x213/0x3d4 > (XEN) [<ffff82d08012c327>] vcpu_unblock+0x4b/0x4d > (XEN) [<ffff82d080169cea>] vcpu_kick+0x20/0x6f > (XEN) [<ffff82d080169d65>] vcpu_mark_events_pending+0x2c/0x2f > (XEN) [<ffff82d08010762a>] > event_2l.c#evtchn_2l_set_pending+0xa9/0xb9 > (XEN) [<ffff82d08010822a>] evtchn_send+0x158/0x183 > (XEN) [<ffff82d0801096fc>] do_event_channel_op+0xe21/0x147d > (XEN) [<ffff82d0802439e2>] lstar_enter+0xe2/0x13c > (XEN) > (XEN) > (XEN) **************************************** > (XEN) Panic on CPU 0: > (XEN) Assertion '!__vcpu_on_replq(svc)' failed at sched_rt.c:527 > (XEN) **************************************** > Ok, makes sense. > > > And like you > > > said, mostly spurious sleep > > > happens when a vcpu is running and it could happen in other > > > cases, > > > although rare. > > > > > I think I said already there's no such thing as "spurious sleep". > > Or at > > least, I can't think of anything that I would define a spurious > > sleep, > > if you do, please, explain what situation you're referring to. > > > I meant to say spurious wakeup... > If, for spurious wakeup, we refer to wakeups happening when the vcpu is either running or runnable (and hence in the runqueue already), which I do, we don't even get to call __replq_insert() in those cases. I mean, we leave rt_vcpu_wake() before that, don't we? > If rt_vcpu_sleep() removes vcpus from > replenishment queue, it's perfectly safe for rt_vcpu_wake() to > insert > them back. > It's safe against sleeps, not against blockings. That's the point I'm trying to make. > So, I'm suspecting it's the spurious wakeup that's causing > troubles because vcpus are not removed prior to rt_vcpu_wake(). > However, > the two RETURNs at the beginning of rt_vcpu_wake() should catch that > shouldn't it? > Exactly! > > In any case, one way of dealing with vcpus blocking/offlining/etc > > could > > be to, in context_saved(), in case we are not adding the vcpu back > > to > > the runq, cancel its replenishment event with __replq_remove(). > > > > (This may make it possible to avoid doing it in rt_vcpu_sleep() > > too, > > but you'll need to check and test.) > > > > Can you give this a try. > That makes sense. Doing it in context_saved() kinda implies that if > a > vcpu is sleeping and taken off, its replenishment event should be > removed. On the other hand, the logic is the same as removing it in > rt_vcpu_sleep() but just at different times. > It is, but, if done properly, it catches more cases, or at least that's what I'm after. > Well, I have tried it and > the check still needs to be there in rt_vcpu_wake(). I will send the > next version so it's easier to look at. > If you're still seeing assertion failures, perhaps context_saved() is not the right place where to do that... I'll think more about this... Anyway, yes, let's see the code. Please, also send again, as a follow up email, the assertion failure log you get if you remove the check. Thanks and Regards, Dario
diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c index 2e5430f..1f0bb7b 100644 --- a/xen/common/sched_rt.c +++ b/xen/common/sched_rt.c @@ -16,6 +16,7 @@ #include <xen/delay.h> #include <xen/event.h> #include <xen/time.h> +#include <xen/timer.h> #include <xen/perfc.h> #include <xen/sched-if.h> #include <xen/softirq.h> @@ -87,7 +88,7 @@ #define RTDS_DEFAULT_BUDGET (MICROSECS(4000)) #define UPDATE_LIMIT_SHIFT 10 -#define MAX_SCHEDULE (MILLISECS(1)) + /* * Flags */ @@ -142,6 +143,12 @@ static cpumask_var_t *_cpumask_scratch; */ static unsigned int nr_rt_ops; +/* handler for the replenishment timer */ +static void repl_handler(void *data); + +/* checks if a timer is active or not */ +bool_t active_timer(struct timer* t); + /* * Systme-wide private data, include global RunQueue/DepletedQ * Global lock is referenced by schedule_data.schedule_lock from all @@ -152,7 +159,9 @@ struct rt_private { struct list_head sdom; /* list of availalbe domains, used for dump */ struct list_head runq; /* ordered list of runnable vcpus */ struct list_head depletedq; /* unordered list of depleted vcpus */ + struct list_head replq; /* ordered list of vcpus that need replenishment */ cpumask_t tickled; /* cpus been tickled */ + struct timer *repl_timer; /* replenishment timer */ }; /* @@ -160,6 +169,7 @@ struct rt_private { */ struct rt_vcpu { struct list_head q_elem; /* on the runq/depletedq list */ + struct list_head replq_elem;/* on the repl event list */ /* Up-pointers */ struct rt_dom *sdom; @@ -213,8 +223,14 @@ static inline struct list_head *rt_depletedq(const struct scheduler *ops) return &rt_priv(ops)->depletedq; } +static inline struct list_head *rt_replq(const struct scheduler *ops) +{ + return &rt_priv(ops)->replq; +} + /* - * Queue helper functions for runq and depletedq + * Queue helper functions for runq, depletedq + * and replenishment event queue */ static int __vcpu_on_q(const struct rt_vcpu *svc) @@ -228,6 +244,18 @@ __q_elem(struct list_head *elem) return list_entry(elem, struct rt_vcpu, q_elem); } +static struct rt_vcpu * +__replq_elem(struct list_head *elem) +{ + return list_entry(elem, struct rt_vcpu, replq_elem); +} + +static int +__vcpu_on_replq(const struct rt_vcpu *svc) +{ + return !list_empty(&svc->replq_elem); +} + /* * Debug related code, dump vcpu/cpu information */ @@ -288,7 +316,7 @@ rt_dump_pcpu(const struct scheduler *ops, int cpu) static void rt_dump(const struct scheduler *ops) { - struct list_head *runq, *depletedq, *iter; + struct list_head *runq, *depletedq, *replq, *iter; struct rt_private *prv = rt_priv(ops); struct rt_vcpu *svc; struct rt_dom *sdom; @@ -301,6 +329,7 @@ rt_dump(const struct scheduler *ops) runq = rt_runq(ops); depletedq = rt_depletedq(ops); + replq = rt_replq(ops); printk("Global RunQueue info:\n"); list_for_each( iter, runq ) @@ -316,6 +345,13 @@ rt_dump(const struct scheduler *ops) rt_dump_vcpu(ops, svc); } + printk("Global Replenishment Event info:\n"); + list_for_each( iter, replq ) + { + svc = __replq_elem(iter); + rt_dump_vcpu(ops, svc); + } + printk("Domain info:\n"); list_for_each( iter, &prv->sdom ) { @@ -388,6 +424,66 @@ __q_remove(struct rt_vcpu *svc) } /* + * Removing a vcpu from the replenishment queue could + * re-program the timer for the next replenishment event + * if the timer is currently active + */ +static inline void +__replq_remove(const struct scheduler *ops, struct rt_vcpu *svc) +{ + struct rt_private *prv = rt_priv(ops); + struct list_head *replq = rt_replq(ops); + struct timer* repl_timer = prv->repl_timer; + + if ( __vcpu_on_replq(svc) ) + { + /* + * disarm the timer if removing the first replenishment event + * which is going to happen next + */ + if( active_timer(repl_timer) ) + { + struct rt_vcpu *next_repl = __replq_elem(replq->next); + + if( next_repl->cur_deadline == svc->cur_deadline ) + repl_timer->expires = 0; + + list_del_init(&svc->replq_elem); + + /* re-arm the timer for the next replenishment event */ + if( !list_empty(replq) ) + { + struct rt_vcpu *svc_next = __replq_elem(replq->next); + set_timer(repl_timer, svc_next->cur_deadline); + } + } + + else + list_del_init(&svc->replq_elem); + } +} + +/* + * An utility function that inserts a vcpu to a + * queue based on certain order (EDF) + */ +static void +_deadline_queue_insert(struct rt_vcpu * (*_get_q_elem)(struct list_head *elem), + struct rt_vcpu *svc, struct list_head *elem, struct list_head *queue) +{ + struct list_head *iter; + + list_for_each(iter, queue) + { + struct rt_vcpu * iter_svc = (*_get_q_elem)(iter); + if ( svc->cur_deadline <= iter_svc->cur_deadline ) + break; + } + + list_add_tail(elem, iter); +} + +/* * Insert svc with budget in RunQ according to EDF: * vcpus with smaller deadlines go first. * Insert svc without budget in DepletedQ unsorted; @@ -397,7 +493,6 @@ __runq_insert(const struct scheduler *ops, struct rt_vcpu *svc) { struct rt_private *prv = rt_priv(ops); struct list_head *runq = rt_runq(ops); - struct list_head *iter; ASSERT( spin_is_locked(&prv->lock) ); @@ -405,22 +500,37 @@ __runq_insert(const struct scheduler *ops, struct rt_vcpu *svc) /* add svc to runq if svc still has budget */ if ( svc->cur_budget > 0 ) - { - list_for_each(iter, runq) - { - struct rt_vcpu * iter_svc = __q_elem(iter); - if ( svc->cur_deadline <= iter_svc->cur_deadline ) - break; - } - list_add_tail(&svc->q_elem, iter); - } + _deadline_queue_insert(&__q_elem, svc, &svc->q_elem, runq); else - { list_add(&svc->q_elem, &prv->depletedq); - } } /* + * Insert svc into the repl even list: + * vcpus that needs to be repl earlier go first. + * scheduler private lock serializes this operation + * it could re-program the timer if it fires later than + * this vcpu's cur_deadline. Also, this is used to program + * the timer for the first time. + */ +static void +__replq_insert(const struct scheduler *ops, struct rt_vcpu *svc) +{ + struct list_head *replq = rt_replq(ops); + struct rt_private *prv = rt_priv(ops); + struct timer *repl_timer = prv->repl_timer; + + ASSERT( !__vcpu_on_replq(svc) ); + + _deadline_queue_insert(&__replq_elem, svc, &svc->replq_elem, replq); + + if( repl_timer->expires == 0 || + ( active_timer(repl_timer) && repl_timer->expires > svc->cur_deadline ) ) + set_timer(repl_timer,svc->cur_deadline); +} + + +/* * Init/Free related code */ static int @@ -449,11 +559,18 @@ rt_init(struct scheduler *ops) INIT_LIST_HEAD(&prv->sdom); INIT_LIST_HEAD(&prv->runq); INIT_LIST_HEAD(&prv->depletedq); + INIT_LIST_HEAD(&prv->replq); cpumask_clear(&prv->tickled); ops->sched_data = prv; + /* + * The timer initialization will happen later when + * the first pcpu is added to this pool in alloc_pdata + */ + prv->repl_timer = NULL; + return 0; no_mem: @@ -473,6 +590,10 @@ rt_deinit(const struct scheduler *ops) xfree(_cpumask_scratch); _cpumask_scratch = NULL; } + + kill_timer(prv->repl_timer); + xfree(prv->repl_timer); + xfree(prv); } @@ -493,6 +614,17 @@ rt_alloc_pdata(const struct scheduler *ops, int cpu) if ( !alloc_cpumask_var(&_cpumask_scratch[cpu]) ) return NULL; + if( prv->repl_timer == NULL ) + { + /* allocate the timer on the first cpu of this pool */ + prv->repl_timer = xzalloc(struct timer); + + if(prv->repl_timer == NULL ) + return NULL; + + init_timer(prv->repl_timer, repl_handler, (void *)ops, cpu); + } + /* 1 indicates alloc. succeed in schedule.c */ return (void *)1; } @@ -586,6 +718,7 @@ rt_alloc_vdata(const struct scheduler *ops, struct vcpu *vc, void *dd) return NULL; INIT_LIST_HEAD(&svc->q_elem); + INIT_LIST_HEAD(&svc->replq_elem); svc->flags = 0U; svc->sdom = dd; svc->vcpu = vc; @@ -609,7 +742,8 @@ rt_free_vdata(const struct scheduler *ops, void *priv) } /* - * This function is called in sched_move_domain() in schedule.c + * It is called in sched_move_domain() and sched_init_vcpu + * in schedule.c * When move a domain to a new cpupool. * It inserts vcpus of moving domain to the scheduler's RunQ in * dest. cpupool. @@ -651,6 +785,10 @@ rt_vcpu_remove(const struct scheduler *ops, struct vcpu *vc) lock = vcpu_schedule_lock_irq(vc); if ( __vcpu_on_q(svc) ) __q_remove(svc); + + if( __vcpu_on_replq(svc) ) + __replq_remove(ops,svc); + vcpu_schedule_unlock_irq(lock, vc); } @@ -785,44 +923,6 @@ __runq_pick(const struct scheduler *ops, const cpumask_t *mask) } /* - * Update vcpu's budget and - * sort runq by insert the modifed vcpu back to runq - * lock is grabbed before calling this function - */ -static void -__repl_update(const struct scheduler *ops, s_time_t now) -{ - struct list_head *runq = rt_runq(ops); - struct list_head *depletedq = rt_depletedq(ops); - struct list_head *iter; - struct list_head *tmp; - struct rt_vcpu *svc = NULL; - - list_for_each_safe(iter, tmp, runq) - { - svc = __q_elem(iter); - if ( now < svc->cur_deadline ) - break; - - rt_update_deadline(now, svc); - /* reinsert the vcpu if its deadline is updated */ - __q_remove(svc); - __runq_insert(ops, svc); - } - - list_for_each_safe(iter, tmp, depletedq) - { - svc = __q_elem(iter); - if ( now >= svc->cur_deadline ) - { - rt_update_deadline(now, svc); - __q_remove(svc); /* remove from depleted queue */ - __runq_insert(ops, svc); /* add to runq */ - } - } -} - -/* * schedule function for rt scheduler. * The lock is already grabbed in schedule.c, no need to lock here */ @@ -841,7 +941,6 @@ rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched /* burn_budget would return for IDLE VCPU */ burn_budget(ops, scurr, now); - __repl_update(ops, now); if ( tasklet_work_scheduled ) { @@ -868,6 +967,8 @@ rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched set_bit(__RTDS_delayed_runq_add, &scurr->flags); snext->last_start = now; + + ret.time = -1; /* if an idle vcpu is picked */ if ( !is_idle_vcpu(snext->vcpu) ) { if ( snext != scurr ) @@ -880,9 +981,11 @@ rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched snext->vcpu->processor = cpu; ret.migrated = 1; } + + ret.time = snext->budget; /* invoke the scheduler next time */ + } - ret.time = MIN(snext->budget, MAX_SCHEDULE); /* sched quantum */ ret.task = snext->vcpu; /* TRACE */ @@ -914,7 +1017,7 @@ static void rt_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc) { struct rt_vcpu * const svc = rt_vcpu(vc); - + BUG_ON( is_idle_vcpu(vc) ); SCHED_STAT_CRANK(vcpu_sleep); @@ -924,6 +1027,9 @@ rt_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc) __q_remove(svc); else if ( svc->flags & RTDS_delayed_runq_add ) clear_bit(__RTDS_delayed_runq_add, &svc->flags); + + if( __vcpu_on_replq(svc) ) + __replq_remove(ops, svc); } /* @@ -1026,10 +1132,6 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu *vc) { struct rt_vcpu * const svc = rt_vcpu(vc); s_time_t now = NOW(); - struct rt_private *prv = rt_priv(ops); - struct rt_vcpu *snext = NULL; /* highest priority on RunQ */ - struct rt_dom *sdom = NULL; - cpumask_t *online; BUG_ON( is_idle_vcpu(vc) ); @@ -1051,6 +1153,18 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu *vc) else SCHED_STAT_CRANK(vcpu_wake_not_runnable); + /* budget repl here is needed before inserting back to runq. If so, + * it should be re-inserted back to the replenishment queue. + */ + if ( now >= svc->cur_deadline) + { + rt_update_deadline(now, svc); + __replq_remove(ops, svc); + } + + if( !__vcpu_on_replq(svc) ) + __replq_insert(ops, svc); + /* If context hasn't been saved for this vcpu yet, we can't put it on * the Runqueue/DepletedQ. Instead, we set a flag so that it will be * put on the Runqueue/DepletedQ after the context has been saved. @@ -1061,22 +1175,10 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu *vc) return; } - if ( now >= svc->cur_deadline) - rt_update_deadline(now, svc); - /* insert svc to runq/depletedq because svc is not in queue now */ __runq_insert(ops, svc); - __repl_update(ops, now); - - ASSERT(!list_empty(&prv->sdom)); - sdom = list_entry(prv->sdom.next, struct rt_dom, sdom_elem); - online = cpupool_domain_cpumask(sdom->dom); - snext = __runq_pick(ops, online); /* pick snext from ALL valid cpus */ - - runq_tickle(ops, snext); - - return; + runq_tickle(ops, svc); } /* @@ -1087,10 +1189,6 @@ static void rt_context_saved(const struct scheduler *ops, struct vcpu *vc) { struct rt_vcpu *svc = rt_vcpu(vc); - struct rt_vcpu *snext = NULL; - struct rt_dom *sdom = NULL; - struct rt_private *prv = rt_priv(ops); - cpumask_t *online; spinlock_t *lock = vcpu_schedule_lock_irq(vc); clear_bit(__RTDS_scheduled, &svc->flags); @@ -1102,14 +1200,7 @@ rt_context_saved(const struct scheduler *ops, struct vcpu *vc) likely(vcpu_runnable(vc)) ) { __runq_insert(ops, svc); - __repl_update(ops, NOW()); - - ASSERT(!list_empty(&prv->sdom)); - sdom = list_entry(prv->sdom.next, struct rt_dom, sdom_elem); - online = cpupool_domain_cpumask(sdom->dom); - snext = __runq_pick(ops, online); /* pick snext from ALL cpus */ - - runq_tickle(ops, snext); + runq_tickle(ops, svc); } out: vcpu_schedule_unlock_irq(lock, vc); @@ -1168,6 +1259,80 @@ rt_dom_cntl( return rc; } +/* + * The replenishment timer handler picks vcpus + * from the replq and does the actual replenishment + */ +static void repl_handler(void *data){ + unsigned long flags; + s_time_t now = NOW(); + struct scheduler *ops = data; + struct rt_private *prv = rt_priv(ops); + struct list_head *replq = rt_replq(ops); + struct timer *repl_timer = prv->repl_timer; + struct list_head *iter, *tmp; + struct rt_vcpu *svc = NULL; + + spin_lock_irqsave(&prv->lock, flags); + + stop_timer(repl_timer); + + list_for_each_safe(iter, tmp, replq) + { + svc = __replq_elem(iter); + + if ( now >= svc->cur_deadline ) + { + rt_update_deadline(now, svc); + + /* + * when the replenishment happens + * svc is either on a pcpu or on + * runq/depletedq + */ + if( __vcpu_on_q(svc) ) + { + /* put back to runq */ + __q_remove(svc); + __runq_insert(ops, svc); + } + + /* + * tickle regardless where it's at + * because a running vcpu could have + * a later deadline than others after + * replenishment + */ + runq_tickle(ops, svc); + + /* update replenishment event queue */ + __replq_remove(ops, svc); + __replq_insert(ops, svc); + } + + else + break; + } + + /* + * use the vcpu that's on the top + * or else don't program the timer + */ + if( !list_empty(replq) ) + set_timer(repl_timer, __replq_elem(replq->next)->cur_deadline); + + spin_unlock_irqrestore(&prv->lock, flags); + +} + +/* checks if a timer has been stopped or not */ +bool_t active_timer(struct timer *timer) +{ + ASSERT(timer->status >= TIMER_STATUS_inactive); + ASSERT(timer->status <= TIMER_STATUS_in_list); + return (timer->status >= TIMER_STATUS_in_heap); +} + static struct rt_private _rt_priv; static const struct scheduler sched_rtds_def = {