Message ID | 1457724196-4760-1-git-send-email-tiche@seas.upenn.edu (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
I'm focusing on the style and the logic in the replenish handler: > /* > @@ -160,6 +180,7 @@ struct rt_private { > */ > struct rt_vcpu { > struct list_head q_elem; /* on the runq/depletedq list */ > + struct list_head replq_elem;/* on the repl event list */ missing space before /* > > /* Up-pointers */ > struct rt_dom *sdom; > @@ -213,8 +234,14 @@ static inline struct list_head *rt_depletedq(const struct scheduler *ops) > return &rt_priv(ops)->depletedq; > } > > +static inline struct list_head *rt_replq(const struct scheduler *ops) > +{ > + return &rt_priv(ops)->replq; > +} > + > /* > - * Queue helper functions for runq and depletedq > + * Helper functions for manipulating the runqueue, the depleted queue, > + * and the replenishment events queue. > */ > static int > __vcpu_on_q(const struct rt_vcpu *svc) > @@ -228,6 +255,18 @@ __q_elem(struct list_head *elem) > return list_entry(elem, struct rt_vcpu, q_elem); > } > > +static struct rt_vcpu * > +replq_elem(struct list_head *elem) > +{ > + return list_entry(elem, struct rt_vcpu, replq_elem); > +} > + > +static int > +vcpu_on_replq(const struct rt_vcpu *svc) > +{ > + return !list_empty(&svc->replq_elem); > +} > + > /* > * Debug related code, dump vcpu/cpu information > */ > @@ -288,7 +327,7 @@ rt_dump_pcpu(const struct scheduler *ops, int cpu) > static void > rt_dump(const struct scheduler *ops) > { > - struct list_head *runq, *depletedq, *iter; > + struct list_head *runq, *depletedq, *replq, *iter; > struct rt_private *prv = rt_priv(ops); > struct rt_vcpu *svc; > struct rt_dom *sdom; > @@ -301,6 +340,7 @@ rt_dump(const struct scheduler *ops) > > runq = rt_runq(ops); > depletedq = rt_depletedq(ops); > + replq = rt_replq(ops); > > printk("Global RunQueue info:\n"); > list_for_each( iter, runq ) > @@ -316,6 +356,13 @@ rt_dump(const struct scheduler *ops) > rt_dump_vcpu(ops, svc); > } > > + printk("Global Replenishment Event info:\n"); > + list_for_each( iter, replq ) > + { > + svc = replq_elem(iter); > + rt_dump_vcpu(ops, svc); > + } > + > printk("Domain info:\n"); > list_for_each( iter, &prv->sdom ) > { > @@ -380,11 +427,77 @@ rt_update_deadline(s_time_t now, struct rt_vcpu *svc) > return; > } > > +/* > + * A helper function that only removes a vcpu from a queue > + * and it returns 1 if the vcpu was in front of the list. > + */ > +static inline int > +deadline_queue_remove(struct list_head *queue, struct list_head *elem) > +{ > + int pos = 0; Add an empty line here Usually, the variable definition and the main function has an empty line for seperation. > + if ( queue->next != elem ) > + pos = 1; > + > + list_del_init(elem); > + return !pos; > +} > + > static inline void > __q_remove(struct rt_vcpu *svc) > { > - if ( __vcpu_on_q(svc) ) > - list_del_init(&svc->q_elem); > + ASSERT( __vcpu_on_q(svc) ); > + list_del_init(&svc->q_elem); > +} > + > +/* > + * Removing a vcpu from the replenishment queue could > + * re-program the timer for the next replenishment event > + * if it was at the front of the list. > + */ > +static inline void > +__replq_remove(const struct scheduler *ops, struct rt_vcpu *svc) > +{ > + struct rt_private *prv = rt_priv(ops); > + struct list_head *replq = rt_replq(ops); > + struct timer* repl_timer = prv->repl_timer; > + > + ASSERT( vcpu_on_replq(svc) ); > + > + if ( deadline_queue_remove(replq, &svc->replq_elem) ) > + { > + /* re-arm the timer for the next replenishment event */ > + if ( !list_empty(replq) ) > + { > + struct rt_vcpu *svc_next = replq_elem(replq->next); > + set_timer(repl_timer, svc_next->cur_deadline); > + } > + else > + stop_timer(repl_timer); > + } > +} > + > +/* > + * An utility function that inserts a vcpu to a > + * queue based on certain order (EDF). The returned > + * value is 1 if a vcpu has been inserted to the > + * front of a list. > + */ > +static int > +deadline_queue_insert(struct rt_vcpu * (*_get_q_elem)(struct list_head *elem), > + struct rt_vcpu *svc, struct list_head *elem, struct list_head *queue) > +{ > + struct list_head *iter; > + int pos = 0; > + > + list_for_each(iter, queue) space after ( and before ) If not sure about the space, we can refer to the sched_credit.c for the style as well, beside the CODING_STYLE file. :-) > + { > + struct rt_vcpu * iter_svc = (*_get_q_elem)(iter); > + if ( svc->cur_deadline <= iter_svc->cur_deadline ) > + break; > + pos++; > + } > + list_add_tail(elem, iter); > + return !pos; > } > > /* > @@ -397,27 +510,62 @@ __runq_insert(const struct scheduler *ops, struct rt_vcpu *svc) > { > struct rt_private *prv = rt_priv(ops); > struct list_head *runq = rt_runq(ops); > - struct list_head *iter; > > ASSERT( spin_is_locked(&prv->lock) ); > - > ASSERT( !__vcpu_on_q(svc) ); > + ASSERT( vcpu_on_replq(svc) ); > > /* add svc to runq if svc still has budget */ > if ( svc->cur_budget > 0 ) > - { > - list_for_each(iter, runq) > - { > - struct rt_vcpu * iter_svc = __q_elem(iter); > - if ( svc->cur_deadline <= iter_svc->cur_deadline ) > - break; > - } > - list_add_tail(&svc->q_elem, iter); > - } > + deadline_queue_insert(&__q_elem, svc, &svc->q_elem, runq); > else > - { > list_add(&svc->q_elem, &prv->depletedq); > +} > + > +/* > + * Insert svc into the replenishment event list > + * in replenishment time order. > + * vcpus that need to be replished earlier go first. > + * The timer may be re-programmed if svc is inserted > + * at the front of the event list. > + */ > +static void > +__replq_insert(const struct scheduler *ops, struct rt_vcpu *svc) > +{ > + struct list_head *replq = rt_replq(ops); > + struct rt_private *prv = rt_priv(ops); > + struct timer *repl_timer = prv->repl_timer; > + > + ASSERT( !vcpu_on_replq(svc) ); > + > + if ( deadline_queue_insert(&replq_elem, svc, &svc->replq_elem, replq) ) > + set_timer(repl_timer, svc->cur_deadline); > +} > + > +/* > + * Removes and re-inserts an event to the replenishment queue. > + */ > +static void > +replq_reinsert(const struct scheduler *ops, struct rt_vcpu *svc) > +{ > + struct list_head *replq = rt_replq(ops); > + struct rt_private *prv = rt_priv(ops); > + struct timer *repl_timer = prv->repl_timer; > + > + ASSERT( vcpu_on_replq(svc) ); > + > + if ( deadline_queue_remove(replq, &svc->replq_elem) ) > + { > + if ( deadline_queue_insert(&replq_elem, svc, &svc->replq_elem, replq) ) > + set_timer(repl_timer, svc->cur_deadline); > + else > + { > + struct rt_vcpu *svc_next = replq_elem(replq->next); > + set_timer(repl_timer, svc_next->cur_deadline); > + } > } > + else if ( deadline_queue_insert(&replq_elem, svc, &svc->replq_elem, replq) ) > + set_timer(repl_timer, svc->cur_deadline); > } > > /* > @@ -449,11 +597,18 @@ rt_init(struct scheduler *ops) > INIT_LIST_HEAD(&prv->sdom); > INIT_LIST_HEAD(&prv->runq); > INIT_LIST_HEAD(&prv->depletedq); > + INIT_LIST_HEAD(&prv->replq); > > cpumask_clear(&prv->tickled); > > ops->sched_data = prv; > > + /* > + * The timer initialization will happen later when > + * the first pcpu is added to this pool in alloc_pdata. > + */ > + prv->repl_timer = NULL; > + > return 0; > > no_mem: > @@ -473,6 +628,10 @@ rt_deinit(struct scheduler *ops) > xfree(_cpumask_scratch); > _cpumask_scratch = NULL; > } > + > + kill_timer(prv->repl_timer); > + xfree(prv->repl_timer); > + > ops->sched_data = NULL; > xfree(prv); > } > @@ -494,6 +653,17 @@ rt_alloc_pdata(const struct scheduler *ops, int cpu) > if ( !alloc_cpumask_var(&_cpumask_scratch[cpu]) ) > return NULL; > > + if ( prv->repl_timer == NULL ) > + { > + /* Allocate the timer on the first cpu of this pool. */ > + prv->repl_timer = xzalloc(struct timer); > + > + if ( prv->repl_timer == NULL ) > + return NULL; > + > + init_timer(prv->repl_timer, repl_handler, (void *)ops, cpu); > + } > + > /* 1 indicates alloc. succeed in schedule.c */ > return (void *)1; > } > @@ -587,6 +757,7 @@ rt_alloc_vdata(const struct scheduler *ops, struct vcpu *vc, void *dd) > return NULL; > > INIT_LIST_HEAD(&svc->q_elem); > + INIT_LIST_HEAD(&svc->replq_elem); > svc->flags = 0U; > svc->sdom = dd; > svc->vcpu = vc; > @@ -610,7 +781,8 @@ rt_free_vdata(const struct scheduler *ops, void *priv) > } > > /* > - * This function is called in sched_move_domain() in schedule.c > + * It is called in sched_move_domain() and sched_init_vcpu > + * in schedule.c. > * When move a domain to a new cpupool. > * It inserts vcpus of moving domain to the scheduler's RunQ in > * dest. cpupool. > @@ -628,8 +800,13 @@ rt_vcpu_insert(const struct scheduler *ops, struct vcpu *vc) > if ( now >= svc->cur_deadline ) > rt_update_deadline(now, svc); > > - if ( !__vcpu_on_q(svc) && vcpu_runnable(vc) && !vc->is_running ) > - __runq_insert(ops, svc); > + if ( !__vcpu_on_q(svc) && vcpu_runnable(vc) ) > + { > + __replq_insert(ops, svc); > + > + if ( !vc->is_running ) > + __runq_insert(ops, svc); > + } > vcpu_schedule_unlock_irq(lock, vc); > > SCHED_STAT_CRANK(vcpu_insert); > @@ -652,6 +829,10 @@ rt_vcpu_remove(const struct scheduler *ops, struct vcpu *vc) > lock = vcpu_schedule_lock_irq(vc); > if ( __vcpu_on_q(svc) ) > __q_remove(svc); > + > + if ( vcpu_on_replq(svc) ) > + __replq_remove(ops,svc); > + > vcpu_schedule_unlock_irq(lock, vc); > } > > @@ -707,7 +888,14 @@ burn_budget(const struct scheduler *ops, struct rt_vcpu *svc, s_time_t now) > svc->cur_budget -= delta; > > if ( svc->cur_budget < 0 ) Although this line is not changed in the patch. I'm thinking maybe it should be if ( svc->cur_budget <= 0 ) because budget == 0 also means depleted budget. > + { > svc->cur_budget = 0; > + /* > + * Set __RTDS_was_depleted so the replenishment > + * handler can let this vcpu tickle if it was refilled. > + */ > + set_bit(__RTDS_was_depleted, &svc->flags); > + } > > /* TRACE */ > { > @@ -784,44 +972,6 @@ __runq_pick(const struct scheduler *ops, const cpumask_t *mask) > } > > /* > - * Update vcpu's budget and > - * sort runq by insert the modifed vcpu back to runq > - * lock is grabbed before calling this function > - */ > -static void > -__repl_update(const struct scheduler *ops, s_time_t now) > -{ > - struct list_head *runq = rt_runq(ops); > - struct list_head *depletedq = rt_depletedq(ops); > - struct list_head *iter; > - struct list_head *tmp; > - struct rt_vcpu *svc = NULL; > - > - list_for_each_safe(iter, tmp, runq) > - { > - svc = __q_elem(iter); > - if ( now < svc->cur_deadline ) > - break; > - > - rt_update_deadline(now, svc); > - /* reinsert the vcpu if its deadline is updated */ > - __q_remove(svc); > - __runq_insert(ops, svc); > - } > - > - list_for_each_safe(iter, tmp, depletedq) > - { > - svc = __q_elem(iter); > - if ( now >= svc->cur_deadline ) > - { > - rt_update_deadline(now, svc); > - __q_remove(svc); /* remove from depleted queue */ > - __runq_insert(ops, svc); /* add to runq */ > - } > - } > -} > - > -/* > * schedule function for rt scheduler. > * The lock is already grabbed in schedule.c, no need to lock here > */ > @@ -840,8 +990,6 @@ rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched > /* burn_budget would return for IDLE VCPU */ > burn_budget(ops, scurr, now); > > - __repl_update(ops, now); > - > if ( tasklet_work_scheduled ) > { > trace_var(TRC_RTDS_SCHED_TASKLET, 1, 0, NULL); > @@ -868,6 +1016,7 @@ rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched > set_bit(__RTDS_delayed_runq_add, &scurr->flags); > > snext->last_start = now; > + ret.time = -1; /* if an idle vcpu is picked */ > if ( !is_idle_vcpu(snext->vcpu) ) > { > if ( snext != scurr ) > @@ -880,9 +1029,8 @@ rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched > snext->vcpu->processor = cpu; > ret.migrated = 1; > } > + ret.time = snext->budget; /* invoke the scheduler next time */ > } > - > - ret.time = MIN(snext->budget, MAX_SCHEDULE); /* sched quantum */ > ret.task = snext->vcpu; > > return ret; > @@ -903,7 +1051,10 @@ rt_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc) > if ( curr_on_cpu(vc->processor) == vc ) > cpu_raise_softirq(vc->processor, SCHEDULE_SOFTIRQ); > else if ( __vcpu_on_q(svc) ) > + { > __q_remove(svc); > + __replq_remove(ops, svc); > + } > else if ( svc->flags & RTDS_delayed_runq_add ) > clear_bit(__RTDS_delayed_runq_add, &svc->flags); > } > @@ -1008,10 +1159,7 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu *vc) > { > struct rt_vcpu * const svc = rt_vcpu(vc); > s_time_t now = NOW(); > - struct rt_private *prv = rt_priv(ops); > - struct rt_vcpu *snext = NULL; /* highest priority on RunQ */ > - struct rt_dom *sdom = NULL; > - cpumask_t *online; > + bool_t missed; > > BUG_ON( is_idle_vcpu(vc) ); > > @@ -1033,32 +1181,42 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu *vc) > else > SCHED_STAT_CRANK(vcpu_wake_not_runnable); > > - /* If context hasn't been saved for this vcpu yet, we can't put it on > - * the Runqueue/DepletedQ. Instead, we set a flag so that it will be > - * put on the Runqueue/DepletedQ after the context has been saved. > + /* > + * If a deadline passed while svc was asleep/blocked, we need new > + * scheduling parameters (a new deadline and full budget). > + */ > + > + missed = ( now >= svc->cur_deadline ); > + if ( missed ) > + rt_update_deadline(now, svc); > + > + /* > + * If context hasn't been saved for this vcpu yet, we can't put it on > + * the run-queue/depleted-queue. Instead, we set the appropriate flag, > + * the vcpu will be put back on queue after the context has been saved > + * (in rt_context_save()). > */ > if ( unlikely(svc->flags & RTDS_scheduled) ) > { > set_bit(__RTDS_delayed_runq_add, &svc->flags); > + /* > + * The vcpu is waking up already, and we didn't even had the time to > + * remove its next replenishment event from the replenishment queue > + * when it blocked! No big deal. If we did not miss the deadline in > + * the meantime, let's just leave it there. If we did, let's remove it > + * and queue a new one (to occur at our new deadline). > + */ > + if ( missed ) > + replq_reinsert(ops, svc); > return; > } > > - if ( now >= svc->cur_deadline) > - rt_update_deadline(now, svc); > - > + /* Replenishment event got cancelled when we blocked. Add it back. */ > + __replq_insert(ops, svc); > /* insert svc to runq/depletedq because svc is not in queue now */ > __runq_insert(ops, svc); > > - __repl_update(ops, now); > - > - ASSERT(!list_empty(&prv->sdom)); > - sdom = list_entry(prv->sdom.next, struct rt_dom, sdom_elem); > - online = cpupool_domain_cpumask(sdom->dom); > - snext = __runq_pick(ops, online); /* pick snext from ALL valid cpus */ > - > - runq_tickle(ops, snext); > - > - return; > + runq_tickle(ops, svc); > } > > /* > @@ -1069,10 +1227,6 @@ static void > rt_context_saved(const struct scheduler *ops, struct vcpu *vc) > { > struct rt_vcpu *svc = rt_vcpu(vc); > - struct rt_vcpu *snext = NULL; > - struct rt_dom *sdom = NULL; > - struct rt_private *prv = rt_priv(ops); > - cpumask_t *online; > spinlock_t *lock = vcpu_schedule_lock_irq(vc); > > clear_bit(__RTDS_scheduled, &svc->flags); > @@ -1084,15 +1238,11 @@ rt_context_saved(const struct scheduler *ops, struct vcpu *vc) > likely(vcpu_runnable(vc)) ) > { > __runq_insert(ops, svc); > - __repl_update(ops, NOW()); > - > - ASSERT(!list_empty(&prv->sdom)); > - sdom = list_entry(prv->sdom.next, struct rt_dom, sdom_elem); > - online = cpupool_domain_cpumask(sdom->dom); > - snext = __runq_pick(ops, online); /* pick snext from ALL cpus */ > - > - runq_tickle(ops, snext); > + runq_tickle(ops, svc); > } > + else > + __replq_remove(ops, svc); > + > out: > vcpu_schedule_unlock_irq(lock, vc); > } > @@ -1150,6 +1300,101 @@ rt_dom_cntl( > return rc; > } > > +/* > + * The replenishment timer handler picks vcpus > + * from the replq and does the actual replenishment. > + */ > +static void repl_handler(void *data){ > + unsigned long flags; > + s_time_t now = NOW(); > + struct scheduler *ops = data; > + struct rt_private *prv = rt_priv(ops); > + struct list_head *replq = rt_replq(ops); > + struct list_head *runq = rt_runq(ops); > + struct timer *repl_timer = prv->repl_timer; > + struct list_head *iter, *tmp; > + struct list_head tmp_replq; > + struct rt_vcpu *svc = NULL; > + > + spin_lock_irqsave(&prv->lock, flags); Hmm, I haven't thought hard about the choice between spin_lock_irqsave() and spin_lock_irq(), before. Hi Dario, Is it better to use spin_lock_irqsave() or spin_lock_irq() at this place? I'm not quite sure if the handler can be called in an interrupt disabled context? Can it? If interrupt is disabled, how can this handled be triggered? If not, can we use spi_lock_irq, instead? When I used the spin_lock_irq(save), I just refered to what credit2 scheduler does, but didn't think hard enough about which one has better performance. IMO, spin_lock_irqsave is safer, but may take more time. If spin_lock_irq is enough, we don't need overuse spin_lock_irqsave(), do we? > + > + stop_timer(repl_timer); > + > + /* > + * A temperary queue for updated vcpus. > + * It is used to tickle. > + */ > + INIT_LIST_HEAD(&tmp_replq); > + > + list_for_each_safe(iter, tmp, replq) > + { > + svc = replq_elem(iter); > + > + if ( now < svc->cur_deadline ) > + break; > + > + rt_update_deadline(now, svc); > + /* > + * When a vcpu is replenished, it is moved > + * to a temporary queue to tickle. > + */ > + list_del(&svc->replq_elem); > + list_add(&svc->replq_elem, &tmp_replq); > + > + /* > + * If svc is on run queue, we need > + * to put it to the correct place since > + * its deadline changes. > + */ > + if( __vcpu_on_q(svc) ) space before ( > + { > + /* put back to runq */ > + __q_remove(svc); > + __runq_insert(ops, svc); > + } > + } > + > + /* Iterate through the list of updated vcpus. */ > + list_for_each_safe(iter, tmp, &tmp_replq) > + { > + struct vcpu* vc; > + svc = replq_elem(iter); > + vc = svc->vcpu; > + > + if ( curr_on_cpu(vc->processor) == vc && > + ( !list_empty(runq) ) ) ( !list_empty(runq) ) should be indented to curr_on_cpu in the above line. > + { > + /* > + * If the vcpu is running, let the head > + * of the run queue tickle if it has a > + * higher priority. > + */ > + struct rt_vcpu *next_on_runq = __q_elem(runq->next); > + if ( svc->cur_deadline >= next_on_runq->cur_deadline ) It's better to be if ( svc->cur_deadline > next_on_runq->cur_deadline ), to avoid the unnecessary tickle when they have same priority. We assume priority tie is broken arbitrarily. > + runq_tickle(ops, next_on_runq); > + } > + else if ( __vcpu_on_q(svc) ) > + { > + /* Only tickle a vcpu that was depleted. */ Change comment to /* Only need to tickle a vcpu that was depleted. */ to make it clearer. > + if ( test_and_clear_bit(__RTDS_was_depleted, &svc->flags) ) > + runq_tickle(ops, svc); > + } > + > + list_del(&svc->replq_elem); > + /* Move it back to the replenishment event queue. */ > + deadline_queue_insert(&replq_elem, svc, &svc->replq_elem, replq); > + } > + > + /* > + * Use the vcpu that's on the top of the run queue. > + * Otherwise don't program the timer. > + */ > + if( !list_empty(replq) ) space before ( > + set_timer(repl_timer, replq_elem(replq->next)->cur_deadline); > + > + spin_unlock_irqrestore(&prv->lock, flags); > +} > + > static const struct scheduler sched_rtds_def = { > .name = "SMP RTDS Scheduler", > .opt_name = "rtds", Great and expedite work, Tianyang! This version looks good. Can you set up a repo. with the previous version of the patch and this version of the patch so that I can diff. these two versions to make sure I didn't miss anything you modified from the last version. One more thing we should think about is: How can we "prove/test" the correctness of the scheduler? Can we use xentrace to record the scheduling trace and then write a userspace program to check the scheduling trace is obeying the priority rules of the scheduler. Thanks and Best Regards, Meng ----------- Meng Xu PhD Student in Computer and Information Science University of Pennsylvania http://www.cis.upenn.edu/~mengxu/
On 03/11/2016 11:54 PM, Meng Xu wrote: > I'm focusing on the style and the logic in the replenish handler: > >> /* >> @@ -160,6 +180,7 @@ struct rt_private { >> */ >> struct rt_vcpu { >> struct list_head q_elem; /* on the runq/depletedq list */ >> + struct list_head replq_elem;/* on the repl event list */ > > missing space before /* > I wasn't sure if all the comments should be lined up or not. Maybe there should be one more space for all the fields so they nicely line up? >> +static int >> +deadline_queue_insert(struct rt_vcpu * (*_get_q_elem)(struct list_head *elem), >> + struct rt_vcpu *svc, struct list_head *elem, struct list_head *queue) >> +{ >> + struct list_head *iter; >> + int pos = 0; >> + >> + list_for_each(iter, queue) > > space after ( and before ) > If not sure about the space, we can refer to the sched_credit.c for > the style as well, beside the CODING_STYLE file. :-) > Ok. But in __runq_pick() there is no space. Also there is no space in the definition of this macro: #define list_for_each(pos, head) \ Which one should I follow? >> @@ -707,7 +888,14 @@ burn_budget(const struct scheduler *ops, struct rt_vcpu *svc, s_time_t now) >> svc->cur_budget -= delta; >> >> if ( svc->cur_budget < 0 ) > > Although this line is not changed in the patch. I'm thinking maybe it should be > if ( svc->cur_budget <= 0 ) > because budget == 0 also means depleted budget. > Right. >> + /* >> + * If the vcpu is running, let the head >> + * of the run queue tickle if it has a >> + * higher priority. >> + */ >> + struct rt_vcpu *next_on_runq = __q_elem(runq->next); >> + if ( svc->cur_deadline >= next_on_runq->cur_deadline ) > > It's better to be if ( svc->cur_deadline > next_on_runq->cur_deadline > ), to avoid the unnecessary tickle when they have same priority. > > We assume priority tie is broken arbitrarily. OK. > > Great and expedite work, Tianyang! > This version looks good. > Can you set up a repo. with the previous version of the patch and this > version of the patch so that I can diff. these two versions to make > sure I didn't miss anything you modified from the last version. > Sure. > One more thing we should think about is: > How can we "prove/test" the correctness of the scheduler? > Can we use xentrace to record the scheduling trace and then write a > userspace program to check the scheduling trace is obeying the > priority rules of the scheduler. > Thanks for the review Meng, I am still exploring xentrace and it can output scheduling events such as which vcpu is running on a pcpu. I think it's possible for the userspace program to check RTDS, based on cur_budget and cur_deadline. We need to have a very clear outline of rules, for the things we are concerned about. When you say correctness, what does it include? I'm thinking about rules for when a vcpu should preempt, tickle and actually be picked. Thanks, Tianyang
On 12/03/2016 22:21, Chen, Tianyang wrote: > > > On 03/11/2016 11:54 PM, Meng Xu wrote: >> I'm focusing on the style and the logic in the replenish handler: >> >>> /* >>> @@ -160,6 +180,7 @@ struct rt_private { >>> */ >>> struct rt_vcpu { >>> struct list_head q_elem; /* on the runq/depletedq list */ >>> + struct list_head replq_elem;/* on the repl event list */ >> >> missing space before /* >> > > I wasn't sure if all the comments should be lined up or not. Maybe > there should be one more space for all the fields so they nicely line up? At the very least, there should be a space between the ; and / If you were introducing the entire structure at once then aligned comments would be better, or if you were submitting a larger series with some cleanup patches, then modifying the existing layout would be acceptable. In this case however, I would avoid aligning all the comments, as it detracts from the clarity of the patch (whose purpose is a functional change). > >>> +static int >>> +deadline_queue_insert(struct rt_vcpu * (*_get_q_elem)(struct >>> list_head *elem), >>> + struct rt_vcpu *svc, struct list_head *elem, struct list_head >>> *queue) >>> +{ >>> + struct list_head *iter; >>> + int pos = 0; >>> + >>> + list_for_each(iter, queue) >> >> space after ( and before ) >> If not sure about the space, we can refer to the sched_credit.c for >> the style as well, beside the CODING_STYLE file. :-) >> > > Ok. But in __runq_pick() there is no space. Also there is no space in > the definition of this macro: > #define list_for_each(pos, head) \ > Which one should I follow? > Apologies for that. The code is not in a particularly good state, but we do request that all new code adheres to the guidelines, in a hope that eventually it will be consistent. The macro definition won't have spaces because CPP syntax requires that the ( be immediately following the macro name. The Xen coding guidelines require that control structures including if, for, while, and these macro structures (being an extension of a for loop) have spaces between the control name, and immediately inside the control brackets. Therefore, it should end up as ... list_for_each ( iter, queue ) { ... If you wish, you are more than welcome to submit an additional patch whose purpose is to specifically fix the pre-existing style issues, but you are under no obligation to. ~Andrew
On 03/12/2016 05:36 PM, Andrew Cooper wrote: > On 12/03/2016 22:21, Chen, Tianyang wrote: >> >> >> On 03/11/2016 11:54 PM, Meng Xu wrote: >>> I'm focusing on the style and the logic in the replenish handler: >>> >>>> /* >>>> @@ -160,6 +180,7 @@ struct rt_private { >>>> */ >>>> struct rt_vcpu { >>>> struct list_head q_elem; /* on the runq/depletedq list */ >>>> + struct list_head replq_elem;/* on the repl event list */ >>> >>> missing space before /* >>> >> >> I wasn't sure if all the comments should be lined up or not. Maybe >> there should be one more space for all the fields so they nicely line up? > > At the very least, there should be a space between the ; and / > > If you were introducing the entire structure at once then aligned > comments would be better, or if you were submitting a larger series with > some cleanup patches, then modifying the existing layout would be > acceptable. > > In this case however, I would avoid aligning all the comments, as it > detracts from the clarity of the patch (whose purpose is a functional > change). > Right. >> >>>> +static int >>>> +deadline_queue_insert(struct rt_vcpu * (*_get_q_elem)(struct >>>> list_head *elem), >>>> + struct rt_vcpu *svc, struct list_head *elem, struct list_head >>>> *queue) >>>> +{ >>>> + struct list_head *iter; >>>> + int pos = 0; >>>> + >>>> + list_for_each(iter, queue) >>> >>> space after ( and before ) >>> If not sure about the space, we can refer to the sched_credit.c for >>> the style as well, beside the CODING_STYLE file. :-) >>> >> >> Ok. But in __runq_pick() there is no space. Also there is no space in >> the definition of this macro: >> #define list_for_each(pos, head) \ >> Which one should I follow? >> > > Apologies for that. The code is not in a particularly good state, but > we do request that all new code adheres to the guidelines, in a hope > that eventually it will be consistent. > > The macro definition won't have spaces because CPP syntax requires that > the ( be immediately following the macro name. The Xen coding > guidelines require that control structures including if, for, while, and > these macro structures (being an extension of a for loop) have spaces > between the control name, and immediately inside the control brackets. > > Therefore, it should end up as > > ... > list_for_each ( iter, queue ) > { > ... > > If you wish, you are more than welcome to submit an additional patch > whose purpose is to specifically fix the pre-existing style issues, but > you are under no obligation to. > > ~Andrew > Thanks for the clarification. Tianyang Chen
On Sat, Mar 12, 2016 at 5:21 PM, Chen, Tianyang <tiche@seas.upenn.edu> wrote: > > > On 03/11/2016 11:54 PM, Meng Xu wrote: >> >> I'm focusing on the style and the logic in the replenish handler: >> >>> /* >>> @@ -160,6 +180,7 @@ struct rt_private { >>> */ >>> struct rt_vcpu { >>> struct list_head q_elem; /* on the runq/depletedq list */ >>> + struct list_head replq_elem;/* on the repl event list */ >> >> >> missing space before /* >> > > I wasn't sure if all the comments should be lined up or not. Maybe there > should be one more space for all the fields so they nicely line up? > >>> +static int >>> +deadline_queue_insert(struct rt_vcpu * (*_get_q_elem)(struct list_head >>> *elem), >>> + struct rt_vcpu *svc, struct list_head *elem, struct list_head >>> *queue) >>> +{ >>> + struct list_head *iter; >>> + int pos = 0; >>> + >>> + list_for_each(iter, queue) >> >> >> space after ( and before ) >> If not sure about the space, we can refer to the sched_credit.c for >> the style as well, beside the CODING_STYLE file. :-) >> > > Ok. But in __runq_pick() there is no space. Also there is no space in the > definition of this macro: > #define list_for_each(pos, head) \ > Which one should I follow? It should have space before (. Some of the style in the old files may not be correctly updated. :-( But when we introduce new code, we'd better follow the rules.... > >> One more thing we should think about is: >> How can we "prove/test" the correctness of the scheduler? >> Can we use xentrace to record the scheduling trace and then write a >> userspace program to check the scheduling trace is obeying the >> priority rules of the scheduler. >> > Thanks for the review Meng, I am still exploring xentrace and it can output > scheduling events such as which vcpu is running on a pcpu. I think it's > possible for the userspace program to check RTDS, based on cur_budget and > cur_deadline. We need to have a very clear outline of rules, for the things > we are concerned about. When you say correctness, what does it include? I'm > thinking about rules for when a vcpu should preempt, tickle and actually be > picked. What you said should be included... What's in my mind is checking the invariants in the EDF scheduling policy. For example, at any time, the running VCPUs should have higher priority than the VCPUs in runq; At any time, the runq and replenish queue should be sorted based on EDF policy. Meng
On Sun, 2016-03-13 at 11:43 -0400, Meng Xu wrote: > On Sat, Mar 12, 2016 at 5:21 PM, Chen, Tianyang <tiche@seas.upenn.edu > > wrote: > > On 03/11/2016 11:54 PM, Meng Xu wrote: > > > One more thing we should think about is: > > > How can we "prove/test" the correctness of the scheduler? > > > Can we use xentrace to record the scheduling trace and then write > > > a > > > userspace program to check the scheduling trace is obeying the > > > priority rules of the scheduler. > > > > > Thanks for the review Meng, I am still exploring xentrace and it > > can output > > scheduling events such as which vcpu is running on a pcpu. I think > > it's > > possible for the userspace program to check RTDS, based on > > cur_budget and > > cur_deadline. We need to have a very clear outline of rules, for > > the things > > we are concerned about. When you say correctness, what does it > > include? I'm > > thinking about rules for when a vcpu should preempt, tickle and > > actually be > > picked. > What you said should be included... > What's in my mind is checking the invariants in the EDF scheduling > policy. > For example, at any time, the running VCPUs should have higher > priority than the VCPUs in runq; > At any time, the runq and replenish queue should be sorted based on > EDF policy. > This would be rather useful, but it's really difficult. It was "a thing" already when I was doing research on RT systems, i.e., a few years ago. Fact is, there always be (transitory, hopefully) situations where the schedule is not compliant with EDF, because of scheduling overhead, timers resolution, irq waiting being re-enabled, etc. The, as far as I can remember, is how to distinguish with an actual transient state and a real bug in the coding of the algorithm. At the time, there was some work on it, and my research group was also interested in doing something similar for the EDF scheduler we were pushing to Linux. We never got to do much, though, and the only reference I can recall of and find, right now, of others' work is this: https://www.cs.unc.edu/~mollison/unit-trace/index.html http://www.cs.unc.edu/~mollison/pubs/ospert09.pdf It was for LITMUS-RT, so adaptation would be required to make it process a xentrace/xenalyze event log (provided it's finished and working, which I don't think). I can ask my old colleagues if they remember more, and whether anything happened since I left, in the RT community about that (although, the latter, you guys are in a way better position than me to check! :-P). Regards, Dario
On Fri, 2016-03-11 at 23:54 -0500, Meng Xu wrote: > > > @@ -1150,6 +1300,101 @@ rt_dom_cntl( > > return rc; > > } > > > > +/* > > + * The replenishment timer handler picks vcpus > > + * from the replq and does the actual replenishment. > > + */ > > +static void repl_handler(void *data){ > > + unsigned long flags; > > + s_time_t now = NOW(); > > + struct scheduler *ops = data; > > + struct rt_private *prv = rt_priv(ops); > > + struct list_head *replq = rt_replq(ops); > > + struct list_head *runq = rt_runq(ops); > > + struct timer *repl_timer = prv->repl_timer; > > + struct list_head *iter, *tmp; > > + struct list_head tmp_replq; > > + struct rt_vcpu *svc = NULL; > > + > > + spin_lock_irqsave(&prv->lock, flags); > Hmm, I haven't thought hard about the choice between > spin_lock_irqsave() and spin_lock_irq(), before. > > Hi Dario, > Is it better to use spin_lock_irqsave() or spin_lock_irq() at this > place? > Just very quickly about this (I'll comment about the rest of the patch later). > I'm not quite sure if the handler can be called in an interrupt > disabled context? Can it? > I recommend looking at what happens inside init_timer(), i.e., where a pointer to this function is stashed. Then, still in xen/common/timer.c, check where this (and, in general, a timer handling function provided to timer_init()) is actually invoked. When you'll find that spot, the answer to whether spin_lock_irq() is safe or not in here, will appear quite evident. :-) > When I used the spin_lock_irq(save), I just refered to what credit2 > scheduler does, but didn't think hard enough about which one has > better performance. > I'm not sure what you mean when you talk about Credit2, as there are no timers in there. In any case, it is indeed the case that, if just _irq() is safe, we should use it, as it's cheaper. Regards, Dario
On Mon, Mar 14, 2016 at 7:48 AM, Dario Faggioli <dario.faggioli@citrix.com> wrote: > On Sun, 2016-03-13 at 11:43 -0400, Meng Xu wrote: >> On Sat, Mar 12, 2016 at 5:21 PM, Chen, Tianyang <tiche@seas.upenn.edu >> > wrote: >> > On 03/11/2016 11:54 PM, Meng Xu wrote: >> > > One more thing we should think about is: >> > > How can we "prove/test" the correctness of the scheduler? >> > > Can we use xentrace to record the scheduling trace and then write >> > > a >> > > userspace program to check the scheduling trace is obeying the >> > > priority rules of the scheduler. >> > > >> > Thanks for the review Meng, I am still exploring xentrace and it >> > can output >> > scheduling events such as which vcpu is running on a pcpu. I think >> > it's >> > possible for the userspace program to check RTDS, based on >> > cur_budget and >> > cur_deadline. We need to have a very clear outline of rules, for >> > the things >> > we are concerned about. When you say correctness, what does it >> > include? I'm >> > thinking about rules for when a vcpu should preempt, tickle and >> > actually be >> > picked. >> What you said should be included... >> What's in my mind is checking the invariants in the EDF scheduling >> policy. >> For example, at any time, the running VCPUs should have higher >> priority than the VCPUs in runq; >> At any time, the runq and replenish queue should be sorted based on >> EDF policy. >> > This would be rather useful, but it's really difficult. It was "a > thing" already when I was doing research on RT systems, i.e., a few > years ago. > > Fact is, there always be (transitory, hopefully) situations where the > schedule is not compliant with EDF, because of scheduling overhead, > timers resolution, irq waiting being re-enabled, etc. > The, as far as I can remember, is how to distinguish with an actual > transient state and a real bug in the coding of the algorithm. > > At the time, there was some work on it, and my research group was also > interested in doing something similar for the EDF scheduler we were > pushing to Linux. We never got to do much, though, and the only > reference I can recall of and find, right now, of others' work is this: > > https://www.cs.unc.edu/~mollison/unit-trace/index.html > http://www.cs.unc.edu/~mollison/pubs/ospert09.pdf Right! I knew this one in LITMUS and it is great! Every time when Bjorn update LITMUS, he only needs to run a bunches of test to make sure the update does not mess things up. If we could have something like this, that will be awesome! I suspect that they should also have the similar situation as we face here. They also have the scheduling latency, timers resolution, etc. We could probably ask them. Actually, we can bound the time spent in the transient state, that will be also useful! This will at least tell us how well the scheduler follows the gEDF scheduling policy. :-) > > It was for LITMUS-RT, so adaptation would be required to make it > process a xentrace/xenalyze event log (provided it's finished and > working, which I don't think). > > I can ask my old colleagues if they remember more, and whether anything > happened since I left, in the RT community about that (although, the > latter, you guys are in a way better position than me to check! :-P). > Sure! I will also ask around and will get back to this list later. Thanks, Meng ----------- Meng Xu PhD Student in Computer and Information Science University of Pennsylvania http://www.cis.upenn.edu/~mengxu/
Hi Dario, On Mon, Mar 14, 2016 at 7:58 AM, Dario Faggioli <dario.faggioli@citrix.com> wrote: > On Fri, 2016-03-11 at 23:54 -0500, Meng Xu wrote: >> >> > @@ -1150,6 +1300,101 @@ rt_dom_cntl( >> > return rc; >> > } >> > >> > +/* >> > + * The replenishment timer handler picks vcpus >> > + * from the replq and does the actual replenishment. >> > + */ >> > +static void repl_handler(void *data){ >> > + unsigned long flags; >> > + s_time_t now = NOW(); >> > + struct scheduler *ops = data; >> > + struct rt_private *prv = rt_priv(ops); >> > + struct list_head *replq = rt_replq(ops); >> > + struct list_head *runq = rt_runq(ops); >> > + struct timer *repl_timer = prv->repl_timer; >> > + struct list_head *iter, *tmp; >> > + struct list_head tmp_replq; >> > + struct rt_vcpu *svc = NULL; >> > + >> > + spin_lock_irqsave(&prv->lock, flags); >> Hmm, I haven't thought hard about the choice between >> spin_lock_irqsave() and spin_lock_irq(), before. >> >> Hi Dario, >> Is it better to use spin_lock_irqsave() or spin_lock_irq() at this >> place? >> > Just very quickly about this (I'll comment about the rest of the patch > later). > >> I'm not quite sure if the handler can be called in an interrupt >> disabled context? Can it? >> > I recommend looking at what happens inside init_timer(), i.e., where a > pointer to this function is stashed. Then, still in xen/common/timer.c, > check where this (and, in general, a timer handling function provided > to timer_init()) is actually invoked. > > When you'll find that spot, the answer to whether spin_lock_irq() is > safe or not in here, will appear quite evident. :-) Thanks for the pointer! However, it just makes me think that spin_lock_irq() can be used. timer_softirq_action() will call the execute_timer(), which will call the handler function. static void execute_timer(struct timers *ts, struct timer *t) { void (*fn)(void *) = t->function; void *data = t->data; t->status = TIMER_STATUS_inactive; list_add(&t->inactive, &ts->inactive); ts->running = t; spin_unlock_irq(&ts->lock); (*fn)(data); spin_lock_irq(&ts->lock); ts->running = NULL; } I loooked into the spin_unlock_irq() void _spin_unlock_irq(spinlock_t *lock) { _spin_unlock(lock); local_irq_enable(); } in which, lock_irq_enable() will set the interrupt flag: #define local_irq_enable() asm volatile ( "sti" : : : "memory" ) So IMHO, the replenishment handler will be called in interrupt handler *and* with interrupt enabled. The only difference between lock_irq() and lock_irqsave() is that lock_irq() can only be called with interrupt enabled. (lock_irq will check if the interrupt is enabled before it disable the interrupt.) So I think it is safe to use lock_irq() inside replenishment handler, unless there is somewhere call this handler *with interrupt disabled*. OK. I changed the spin_lock_irqsave to spin_lock_irq based on this patch. The system works well: system can boot up and will not crash after I create a VM So I think spin_lock_irq should work... Of course, spin_lock_irqsave() is a more safe choice, with several extra instruction overhead. And in order to be sure _irq works, we need further tests for sure. I actually checked somewhere else in schedule.c and couldn't find a place that the priv->lock is used in an irq context with interrupt disabled. So I guess maybe we overuse the spin_lock_irqsave() in the scheduler? What do you think? I'm ok that we keep using spin_lock_irqsave() for now. But maybe later, it will be a better idea to explore if spin_lock_irq() can replace all spin_lock_irqsave() in the RTDS scheduler? BTW, I'm unsure about the impact of such change on ARM right now. > >> When I used the spin_lock_irq(save), I just refered to what credit2 >> scheduler does, but didn't think hard enough about which one has >> better performance. >> > I'm not sure what you mean when you talk about Credit2, as there are no > timers in there. In any case, it is indeed the case that, if just > _irq() is safe, we should use it, as it's cheaper. I mean when I first wrote the RTDS scheduler, I just use spin_lock_irqsave() instead of spin_lock_irq() without considering the overhead. At that time, I just refer to credit2 and see how it uses locks. Since it uses spin_lock_irqsave() in other functions, say _dom_cntl(), I just use the same function and it worked. ;-) (Well, I should have thought more about the better choice as what I'm doing now. :-)) Meng
On Mon, Mar 14, 2016 at 11:38 AM, Meng Xu <mengxu@cis.upenn.edu> wrote: > Hi Dario, > > On Mon, Mar 14, 2016 at 7:58 AM, Dario Faggioli > <dario.faggioli@citrix.com> wrote: >> On Fri, 2016-03-11 at 23:54 -0500, Meng Xu wrote: >>> >>> > @@ -1150,6 +1300,101 @@ rt_dom_cntl( >>> > return rc; >>> > } >>> > >>> > +/* >>> > + * The replenishment timer handler picks vcpus >>> > + * from the replq and does the actual replenishment. >>> > + */ >>> > +static void repl_handler(void *data){ >>> > + unsigned long flags; >>> > + s_time_t now = NOW(); >>> > + struct scheduler *ops = data; >>> > + struct rt_private *prv = rt_priv(ops); >>> > + struct list_head *replq = rt_replq(ops); >>> > + struct list_head *runq = rt_runq(ops); >>> > + struct timer *repl_timer = prv->repl_timer; >>> > + struct list_head *iter, *tmp; >>> > + struct list_head tmp_replq; >>> > + struct rt_vcpu *svc = NULL; >>> > + >>> > + spin_lock_irqsave(&prv->lock, flags); >>> Hmm, I haven't thought hard about the choice between >>> spin_lock_irqsave() and spin_lock_irq(), before. >>> >>> Hi Dario, >>> Is it better to use spin_lock_irqsave() or spin_lock_irq() at this >>> place? >>> >> Just very quickly about this (I'll comment about the rest of the patch >> later). >> >>> I'm not quite sure if the handler can be called in an interrupt >>> disabled context? Can it? >>> >> I recommend looking at what happens inside init_timer(), i.e., where a >> pointer to this function is stashed. Then, still in xen/common/timer.c, >> check where this (and, in general, a timer handling function provided >> to timer_init()) is actually invoked. >> >> When you'll find that spot, the answer to whether spin_lock_irq() is >> safe or not in here, will appear quite evident. :-) > > Thanks for the pointer! However, it just makes me think that > spin_lock_irq() can be used. > > timer_softirq_action() will call the execute_timer(), which will call > the handler function. > > static void execute_timer(struct timers *ts, struct timer *t) > > { > > void (*fn)(void *) = t->function; > > void *data = t->data; > > > t->status = TIMER_STATUS_inactive; > > list_add(&t->inactive, &ts->inactive); > > > ts->running = t; > > spin_unlock_irq(&ts->lock); > > (*fn)(data); > > spin_lock_irq(&ts->lock); > > ts->running = NULL; > > } > > I loooked into the spin_unlock_irq() > > void _spin_unlock_irq(spinlock_t *lock) > > { > > _spin_unlock(lock); > > local_irq_enable(); > > } > > in which, lock_irq_enable() will set the interrupt flag: > #define local_irq_enable() asm volatile ( "sti" : : : "memory" ) > > So IMHO, the replenishment handler will be called in interrupt handler > *and* with interrupt enabled. > The only difference between lock_irq() and lock_irqsave() is that > lock_irq() can only be called with interrupt enabled. (lock_irq will > check if the interrupt is enabled before it disable the interrupt.) > So I think it is safe to use lock_irq() inside replenishment handler, > unless there is somewhere call this handler *with interrupt disabled*. > > OK. I changed the spin_lock_irqsave to spin_lock_irq based on this > patch. The system works well: system can boot up and will not crash > after I create a VM > So I think spin_lock_irq should work... > Of course, spin_lock_irqsave() is a more safe choice, with several > extra instruction overhead. > And in order to be sure _irq works, we need further tests for sure. > > I actually checked somewhere else in schedule.c and couldn't find a > place that the priv->lock is used in an irq context with interrupt > disabled. > So I guess maybe we overuse the spin_lock_irqsave() in the scheduler? > > What do you think? > > I'm ok that we keep using spin_lock_irqsave() for now. But maybe > later, it will be a better idea to explore if spin_lock_irq() can > replace all spin_lock_irqsave() in the RTDS scheduler? > > BTW, I'm unsure about the impact of such change on ARM right now. > I rethink about the choice of replacing spin_lock_irqsave with spin_lock_irq(). If in the future ,we will introduce new locks and there may exit the situaiton when we want to lock two locks in the same function. In that case, we won't use spin_lock_irq() but have to use spin_lock_irqsave(). If we can mix up spin_lock_irq() with spin_lock_irqsave() in different fucntiosn for the same lock, which I think we can (right?), we should be fine. Otherwise, we will have to keep using spin_lock_irqsave(). Thanks, Meng ----------- Meng Xu PhD Student in Computer and Information Science University of Pennsylvania http://www.cis.upenn.edu/~mengxu/
On Mon, 2016-03-14 at 11:38 -0400, Meng Xu wrote: > Hi Dario, > Hi, > On Mon, Mar 14, 2016 at 7:58 AM, Dario Faggioli > <dario.faggioli@citrix.com> wrote: > > I recommend looking at what happens inside init_timer(), i.e., > > where a > > pointer to this function is stashed. Then, still in > > xen/common/timer.c, > > check where this (and, in general, a timer handling function > > provided > > to timer_init()) is actually invoked. > > > > When you'll find that spot, the answer to whether spin_lock_irq() > > is > > safe or not in here, will appear quite evident. :-) > Thanks for the pointer! However, it just makes me think that > spin_lock_irq() can be used. > Well, why the "just" ? :-) > So IMHO, the replenishment handler will be called in interrupt > handler > *and* with interrupt enabled. > The only difference between lock_irq() and lock_irqsave() is that > lock_irq() can only be called with interrupt enabled. (lock_irq will > check if the interrupt is enabled before it disable the interrupt.) > So I think it is safe to use lock_irq() inside replenishment handler, > unless there is somewhere call this handler *with interrupt > disabled*. > I totally agree: _irq() is ok. (Last sentence it's a bit pointless, considering that this function is a timer handler, and I would not expect to have a timer handler called by much else than one of the timer's interupt. :-) ) > OK. I changed the spin_lock_irqsave to spin_lock_irq based on this > patch. The system works well: system can boot up and will not crash > after I create a VM > So I think spin_lock_irq should work... > Glad to hear that. > Of course, spin_lock_irqsave() is a more safe choice, with several > extra instruction overhead. > And in order to be sure _irq works, we need further tests for sure. > Nah, we're talking about correctness, which is something one should be really be able to assess by looking at the code, rather than by testing! Looking at the code, one concludes that spin_lock_irq() is what should be used. If testing causes crashes due to using it, either: - we were wrong when looking at the code, and we should look better! - there is a bug somewhere else. As said, I'm glad that testing so far is confirming the analysis that seemed the correct one. :-) And I wouldn't say that _irqsave() is "more safe choice". It's either necessary, and then it should be used, or unnecessary, and then it should not. There may be situations whether it is hard or impossible to tell whether interrupt are disabled already or not (because, e.g., there are multiple call paths, with either IRQs disabled or not), and in those cases, using _irqsave() can be seen as wanting to be on the safer side... But this is not one of those cases, so using _irq() is what's right and not less safe than anything else. > I actually checked somewhere else in schedule.c and couldn't find a > place that the priv->lock is used in an irq context with interrupt > disabled. > So I guess maybe we overuse the spin_lock_irqsave() in the scheduler? > Perhaps, but I've got a patch series, which I'm trying to find some time to finish, where I basically convert almost all locking in schedule.c in just plain spin_lock() --i.e., allowing us to keep IRQs enabled. If I manage to, _irq() or _irqsave() would not matter any longer. :-) But I'm confused, by the fact that you mention schedule.c, whether you are talking about locking that happens inside RTDS code, or in schedule.c (me, I'm talking about the latter). > I'm ok that we keep using spin_lock_irqsave() for now. > If you're talking about this patch, then no, _irq() should be used. > But maybe > later, it will be a better idea to explore if spin_lock_irq() can > replace all spin_lock_irqsave() in the RTDS scheduler? > Maybe, but as I said, I'd like to explore other approaches (I am already, actually). > BTW, I'm unsure about the impact of such change on ARM right now. > And I'm unsure about why you think this is, or should be, architecture dependant... can you elaborate? Regards, Dario
On Mon, 2016-03-14 at 12:03 -0400, Meng Xu wrote: > On Mon, Mar 14, 2016 at 11:38 AM, Meng Xu <mengxu@cis.upenn.edu> > wrote: > > > > I'm ok that we keep using spin_lock_irqsave() for now. But maybe > > later, it will be a better idea to explore if spin_lock_irq() can > > replace all spin_lock_irqsave() in the RTDS scheduler? > > > I rethink about the choice of replacing spin_lock_irqsave with > spin_lock_irq(). > If in the future ,we will introduce new locks and there may exit the > situaiton when we want to lock two locks in the same function. In > that > case, we won't use spin_lock_irq() but have to use > spin_lock_irqsave(). If we can mix up spin_lock_irq() with > spin_lock_irqsave() in different fucntiosn for the same lock, which I > think we can (right?), we should be fine. Otherwise, we will have to > keep using spin_lock_irqsave(). > Mixing per se is not a problem, it's how you mix... If you call spin_unlock_irq() within a critical section protected by either spin_lock_irq() or spin_lock_irqsave(), that is not a good mix! :-) if you call _irqsave() inside a critical section protected by either _irq() or _irqsave(), that's what should be done (it's the purpose of _irqsave(), actually!). Actually, in case of nesting, most of the time the inner lock can be taken by just spin_lock(). Look, for instance, at csched2_dump_pcpu(). With more locks (which I agree is something we want for RTDS), the biggest issue is going to be getting the actual nesting right, rather than the various _irq* variants. :-) Regards, Dario
> >> So IMHO, the replenishment handler will be called in interrupt >> handler >> *and* with interrupt enabled. >> The only difference between lock_irq() and lock_irqsave() is that >> lock_irq() can only be called with interrupt enabled. (lock_irq will >> check if the interrupt is enabled before it disable the interrupt.) >> So I think it is safe to use lock_irq() inside replenishment handler, >> unless there is somewhere call this handler *with interrupt >> disabled*. >> > I totally agree: _irq() is ok. (Last sentence it's a bit pointless, > considering that this function is a timer handler, and I would not > expect to have a timer handler called by much else than one of the > timer's interupt. :-) ) > >> OK. I changed the spin_lock_irqsave to spin_lock_irq based on this >> patch. The system works well: system can boot up and will not crash >> after I create a VM >> So I think spin_lock_irq should work... >> > Glad to hear that. > >> Of course, spin_lock_irqsave() is a more safe choice, with several >> extra instruction overhead. >> And in order to be sure _irq works, we need further tests for sure. >> > Nah, we're talking about correctness, which is something one should be > really be able to assess by looking at the code, rather than by > testing! Looking at the code, one concludes that spin_lock_irq() is > what should be used. If testing causes crashes due to using it, either: > - we were wrong when looking at the code, and we should look better! > - there is a bug somewhere else. > > As said, I'm glad that testing so far is confirming the analysis that > seemed the correct one. :-) > > And I wouldn't say that _irqsave() is "more safe choice". It's either > necessary, and then it should be used, or unnecessary, and then it > should not. OK. From what I saw/read in the code, _irq is enough. Further testing just to "make sure" I didn't miss anything. :-) > > There may be situations whether it is hard or impossible to tell > whether interrupt are disabled already or not (because, e.g., there are > multiple call paths, with either IRQs disabled or not), and in those > cases, using _irqsave() can be seen as wanting to be on the safer > side... But this is not one of those cases, so using _irq() is what's > right and not less safe than anything else. > >> I actually checked somewhere else in schedule.c and couldn't find a >> place that the priv->lock is used in an irq context with interrupt >> disabled. >> So I guess maybe we overuse the spin_lock_irqsave() in the scheduler? >> > Perhaps, but I've got a patch series, which I'm trying to find some > time to finish, where I basically convert almost all locking in > schedule.c in just plain spin_lock() --i.e., allowing us to keep IRQs > enabled. If I manage to, _irq() or _irqsave() would not matter any > longer. :-) > > But I'm confused, by the fact that you mention schedule.c, whether you > are talking about locking that happens inside RTDS code, or in > schedule.c (me, I'm talking about the latter). Some of the functions in RTDS code assumes that lock is grab in the caller, which is in the schedule.c. That's what I meant by looking at the locking inside schedule.c. When RTDS functions is called by function in schedule.c with lock held, I want to make sure the lock is not held in an interrupt disabled context, i.e., interrupt has not been disabled before it tries to grab the lock. > >> I'm ok that we keep using spin_lock_irqsave() for now. >> > If you're talking about this patch, then no, _irq() should be used. > OK. then let's use _irq() then unless it causes problem. >> But maybe >> later, it will be a better idea to explore if spin_lock_irq() can >> replace all spin_lock_irqsave() in the RTDS scheduler? >> > Maybe, but as I said, I'd like to explore other approaches (I am > already, actually). Looking forward to your findings. :-) > >> BTW, I'm unsure about the impact of such change on ARM right now. >> > And I'm unsure about why you think this is, or should be, architecture > dependant... can you elaborate? I don't think it should have difference in x86 and in ARM. However, perviously, I remembered that RTDS does not work in ARM because the critical section in context switch in ARM is much longer than that in x86. That's why RTDS reports error in ARM in terms of locks and was fixed by global logic guys. That's why I'm a little bit concern or hold my opinion on the impact on ARM, since I didn't run the code on ARM yet and have no test data to back up my understanding. Thanks and Best Regards, Meng ----------- Meng Xu PhD Student in Computer and Information Science University of Pennsylvania http://www.cis.upenn.edu/~mengxu/
On Mon, Mar 14, 2016 at 12:35 PM, Dario Faggioli <dario.faggioli@citrix.com> wrote: > On Mon, 2016-03-14 at 12:03 -0400, Meng Xu wrote: >> On Mon, Mar 14, 2016 at 11:38 AM, Meng Xu <mengxu@cis.upenn.edu> >> wrote: >> > >> > I'm ok that we keep using spin_lock_irqsave() for now. But maybe >> > later, it will be a better idea to explore if spin_lock_irq() can >> > replace all spin_lock_irqsave() in the RTDS scheduler? >> > >> I rethink about the choice of replacing spin_lock_irqsave with >> spin_lock_irq(). >> If in the future ,we will introduce new locks and there may exit the >> situaiton when we want to lock two locks in the same function. In >> that >> case, we won't use spin_lock_irq() but have to use >> spin_lock_irqsave(). If we can mix up spin_lock_irq() with >> spin_lock_irqsave() in different fucntiosn for the same lock, which I >> think we can (right?), we should be fine. Otherwise, we will have to >> keep using spin_lock_irqsave(). >> > Mixing per se is not a problem, it's how you mix... > > If you call spin_unlock_irq() within a critical section protected by > either spin_lock_irq() or spin_lock_irqsave(), that is not a good mix! > :-) > > if you call _irqsave() inside a critical section protected by either > _irq() or _irqsave(), that's what should be done (it's the purpose of > _irqsave(), actually!). > > Actually, in case of nesting, most of the time the inner lock can be > taken by just spin_lock(). Look, for instance, at csched2_dump_pcpu(). Yes. I totally agree. Then we can use _irq() lock in this patch first. Then we can try to replace _irqsave() lock with irq() lock in another patch for the RTDS scheduler and see if it works. (It should work if I didn't miss something in the corner case.) As to the nested lock issues, we can handle it later when we are facing the issue, as long as we can mix the lock in a correct way. :-D Thanks, Meng
diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c index bfed2e2..58189cd 100644 --- a/xen/common/sched_rt.c +++ b/xen/common/sched_rt.c @@ -3,7 +3,9 @@ * EDF scheduling is a real-time scheduling algorithm used in embedded field. * * by Sisu Xi, 2013, Washington University in Saint Louis - * and Meng Xu, 2014, University of Pennsylvania + * Meng Xu, 2014-2016, University of Pennsylvania + * Tianyang Chen, 2016, University of Pennsylvania + * Dagaen Golomb, 2016, University of Pennsylvania * * based on the code of credit Scheduler */ @@ -16,6 +18,7 @@ #include <xen/delay.h> #include <xen/event.h> #include <xen/time.h> +#include <xen/timer.h> #include <xen/perfc.h> #include <xen/sched-if.h> #include <xen/softirq.h> @@ -87,7 +90,7 @@ #define RTDS_DEFAULT_BUDGET (MICROSECS(4000)) #define UPDATE_LIMIT_SHIFT 10 -#define MAX_SCHEDULE (MILLISECS(1)) + /* * Flags */ @@ -115,6 +118,18 @@ #define RTDS_delayed_runq_add (1<<__RTDS_delayed_runq_add) /* + * The replenishment timer handler needs to check this bit + * to know where a replenished vcpu was, when deciding which + * vcpu should tickle. + * A replenished vcpu should tickle if it was moved from the + * depleted queue to the run queue. + * + Set in burn_budget() if a vcpu has zero budget left. + * + Cleared and checked in the repenishment handler. + */ +#define __RTDS_was_depleted 3 +#define RTDS_was_depleted (1<<__RTDS_was_depleted) + +/* * rt tracing events ("only" 512 available!). Check * include/public/trace.h for more details. */ @@ -142,6 +157,9 @@ static cpumask_var_t *_cpumask_scratch; */ static unsigned int nr_rt_ops; +/* handler for the replenishment timer */ +static void repl_handler(void *data); + /* * Systme-wide private data, include global RunQueue/DepletedQ * Global lock is referenced by schedule_data.schedule_lock from all @@ -152,7 +170,9 @@ struct rt_private { struct list_head sdom; /* list of availalbe domains, used for dump */ struct list_head runq; /* ordered list of runnable vcpus */ struct list_head depletedq; /* unordered list of depleted vcpus */ + struct list_head replq; /* ordered list of vcpus that need replenishment */ cpumask_t tickled; /* cpus been tickled */ + struct timer *repl_timer; /* replenishment timer */ }; /* @@ -160,6 +180,7 @@ struct rt_private { */ struct rt_vcpu { struct list_head q_elem; /* on the runq/depletedq list */ + struct list_head replq_elem;/* on the repl event list */ /* Up-pointers */ struct rt_dom *sdom; @@ -213,8 +234,14 @@ static inline struct list_head *rt_depletedq(const struct scheduler *ops) return &rt_priv(ops)->depletedq; } +static inline struct list_head *rt_replq(const struct scheduler *ops) +{ + return &rt_priv(ops)->replq; +} + /* - * Queue helper functions for runq and depletedq + * Helper functions for manipulating the runqueue, the depleted queue, + * and the replenishment events queue. */ static int __vcpu_on_q(const struct rt_vcpu *svc) @@ -228,6 +255,18 @@ __q_elem(struct list_head *elem) return list_entry(elem, struct rt_vcpu, q_elem); } +static struct rt_vcpu * +replq_elem(struct list_head *elem) +{ + return list_entry(elem, struct rt_vcpu, replq_elem); +} + +static int +vcpu_on_replq(const struct rt_vcpu *svc) +{ + return !list_empty(&svc->replq_elem); +} + /* * Debug related code, dump vcpu/cpu information */ @@ -288,7 +327,7 @@ rt_dump_pcpu(const struct scheduler *ops, int cpu) static void rt_dump(const struct scheduler *ops) { - struct list_head *runq, *depletedq, *iter; + struct list_head *runq, *depletedq, *replq, *iter; struct rt_private *prv = rt_priv(ops); struct rt_vcpu *svc; struct rt_dom *sdom; @@ -301,6 +340,7 @@ rt_dump(const struct scheduler *ops) runq = rt_runq(ops); depletedq = rt_depletedq(ops); + replq = rt_replq(ops); printk("Global RunQueue info:\n"); list_for_each( iter, runq ) @@ -316,6 +356,13 @@ rt_dump(const struct scheduler *ops) rt_dump_vcpu(ops, svc); } + printk("Global Replenishment Event info:\n"); + list_for_each( iter, replq ) + { + svc = replq_elem(iter); + rt_dump_vcpu(ops, svc); + } + printk("Domain info:\n"); list_for_each( iter, &prv->sdom ) { @@ -380,11 +427,77 @@ rt_update_deadline(s_time_t now, struct rt_vcpu *svc) return; } +/* + * A helper function that only removes a vcpu from a queue + * and it returns 1 if the vcpu was in front of the list. + */ +static inline int +deadline_queue_remove(struct list_head *queue, struct list_head *elem) +{ + int pos = 0; + if ( queue->next != elem ) + pos = 1; + + list_del_init(elem); + return !pos; +} + static inline void __q_remove(struct rt_vcpu *svc) { - if ( __vcpu_on_q(svc) ) - list_del_init(&svc->q_elem); + ASSERT( __vcpu_on_q(svc) ); + list_del_init(&svc->q_elem); +} + +/* + * Removing a vcpu from the replenishment queue could + * re-program the timer for the next replenishment event + * if it was at the front of the list. + */ +static inline void +__replq_remove(const struct scheduler *ops, struct rt_vcpu *svc) +{ + struct rt_private *prv = rt_priv(ops); + struct list_head *replq = rt_replq(ops); + struct timer* repl_timer = prv->repl_timer; + + ASSERT( vcpu_on_replq(svc) ); + + if ( deadline_queue_remove(replq, &svc->replq_elem) ) + { + /* re-arm the timer for the next replenishment event */ + if ( !list_empty(replq) ) + { + struct rt_vcpu *svc_next = replq_elem(replq->next); + set_timer(repl_timer, svc_next->cur_deadline); + } + else + stop_timer(repl_timer); + } +} + +/* + * An utility function that inserts a vcpu to a + * queue based on certain order (EDF). The returned + * value is 1 if a vcpu has been inserted to the + * front of a list. + */ +static int +deadline_queue_insert(struct rt_vcpu * (*_get_q_elem)(struct list_head *elem), + struct rt_vcpu *svc, struct list_head *elem, struct list_head *queue) +{ + struct list_head *iter; + int pos = 0; + + list_for_each(iter, queue) + { + struct rt_vcpu * iter_svc = (*_get_q_elem)(iter); + if ( svc->cur_deadline <= iter_svc->cur_deadline ) + break; + pos++; + } + list_add_tail(elem, iter); + return !pos; } /* @@ -397,27 +510,62 @@ __runq_insert(const struct scheduler *ops, struct rt_vcpu *svc) { struct rt_private *prv = rt_priv(ops); struct list_head *runq = rt_runq(ops); - struct list_head *iter; ASSERT( spin_is_locked(&prv->lock) ); - ASSERT( !__vcpu_on_q(svc) ); + ASSERT( vcpu_on_replq(svc) ); /* add svc to runq if svc still has budget */ if ( svc->cur_budget > 0 ) - { - list_for_each(iter, runq) - { - struct rt_vcpu * iter_svc = __q_elem(iter); - if ( svc->cur_deadline <= iter_svc->cur_deadline ) - break; - } - list_add_tail(&svc->q_elem, iter); - } + deadline_queue_insert(&__q_elem, svc, &svc->q_elem, runq); else - { list_add(&svc->q_elem, &prv->depletedq); +} + +/* + * Insert svc into the replenishment event list + * in replenishment time order. + * vcpus that need to be replished earlier go first. + * The timer may be re-programmed if svc is inserted + * at the front of the event list. + */ +static void +__replq_insert(const struct scheduler *ops, struct rt_vcpu *svc) +{ + struct list_head *replq = rt_replq(ops); + struct rt_private *prv = rt_priv(ops); + struct timer *repl_timer = prv->repl_timer; + + ASSERT( !vcpu_on_replq(svc) ); + + if ( deadline_queue_insert(&replq_elem, svc, &svc->replq_elem, replq) ) + set_timer(repl_timer, svc->cur_deadline); +} + +/* + * Removes and re-inserts an event to the replenishment queue. + */ +static void +replq_reinsert(const struct scheduler *ops, struct rt_vcpu *svc) +{ + struct list_head *replq = rt_replq(ops); + struct rt_private *prv = rt_priv(ops); + struct timer *repl_timer = prv->repl_timer; + + ASSERT( vcpu_on_replq(svc) ); + + if ( deadline_queue_remove(replq, &svc->replq_elem) ) + { + if ( deadline_queue_insert(&replq_elem, svc, &svc->replq_elem, replq) ) + set_timer(repl_timer, svc->cur_deadline); + else + { + struct rt_vcpu *svc_next = replq_elem(replq->next); + set_timer(repl_timer, svc_next->cur_deadline); + } } + else if ( deadline_queue_insert(&replq_elem, svc, &svc->replq_elem, replq) ) + set_timer(repl_timer, svc->cur_deadline); } /* @@ -449,11 +597,18 @@ rt_init(struct scheduler *ops) INIT_LIST_HEAD(&prv->sdom); INIT_LIST_HEAD(&prv->runq); INIT_LIST_HEAD(&prv->depletedq); + INIT_LIST_HEAD(&prv->replq); cpumask_clear(&prv->tickled); ops->sched_data = prv; + /* + * The timer initialization will happen later when + * the first pcpu is added to this pool in alloc_pdata. + */ + prv->repl_timer = NULL; + return 0; no_mem: @@ -473,6 +628,10 @@ rt_deinit(struct scheduler *ops) xfree(_cpumask_scratch); _cpumask_scratch = NULL; } + + kill_timer(prv->repl_timer); + xfree(prv->repl_timer); + ops->sched_data = NULL; xfree(prv); } @@ -494,6 +653,17 @@ rt_alloc_pdata(const struct scheduler *ops, int cpu) if ( !alloc_cpumask_var(&_cpumask_scratch[cpu]) ) return NULL; + if ( prv->repl_timer == NULL ) + { + /* Allocate the timer on the first cpu of this pool. */ + prv->repl_timer = xzalloc(struct timer); + + if ( prv->repl_timer == NULL ) + return NULL; + + init_timer(prv->repl_timer, repl_handler, (void *)ops, cpu); + } + /* 1 indicates alloc. succeed in schedule.c */ return (void *)1; } @@ -587,6 +757,7 @@ rt_alloc_vdata(const struct scheduler *ops, struct vcpu *vc, void *dd) return NULL; INIT_LIST_HEAD(&svc->q_elem); + INIT_LIST_HEAD(&svc->replq_elem); svc->flags = 0U; svc->sdom = dd; svc->vcpu = vc; @@ -610,7 +781,8 @@ rt_free_vdata(const struct scheduler *ops, void *priv) } /* - * This function is called in sched_move_domain() in schedule.c + * It is called in sched_move_domain() and sched_init_vcpu + * in schedule.c. * When move a domain to a new cpupool. * It inserts vcpus of moving domain to the scheduler's RunQ in * dest. cpupool. @@ -628,8 +800,13 @@ rt_vcpu_insert(const struct scheduler *ops, struct vcpu *vc) if ( now >= svc->cur_deadline ) rt_update_deadline(now, svc); - if ( !__vcpu_on_q(svc) && vcpu_runnable(vc) && !vc->is_running ) - __runq_insert(ops, svc); + if ( !__vcpu_on_q(svc) && vcpu_runnable(vc) ) + { + __replq_insert(ops, svc); + + if ( !vc->is_running ) + __runq_insert(ops, svc); + } vcpu_schedule_unlock_irq(lock, vc); SCHED_STAT_CRANK(vcpu_insert); @@ -652,6 +829,10 @@ rt_vcpu_remove(const struct scheduler *ops, struct vcpu *vc) lock = vcpu_schedule_lock_irq(vc); if ( __vcpu_on_q(svc) ) __q_remove(svc); + + if ( vcpu_on_replq(svc) ) + __replq_remove(ops,svc); + vcpu_schedule_unlock_irq(lock, vc); } @@ -707,7 +888,14 @@ burn_budget(const struct scheduler *ops, struct rt_vcpu *svc, s_time_t now) svc->cur_budget -= delta; if ( svc->cur_budget < 0 ) + { svc->cur_budget = 0; + /* + * Set __RTDS_was_depleted so the replenishment + * handler can let this vcpu tickle if it was refilled. + */ + set_bit(__RTDS_was_depleted, &svc->flags); + } /* TRACE */ { @@ -784,44 +972,6 @@ __runq_pick(const struct scheduler *ops, const cpumask_t *mask) } /* - * Update vcpu's budget and - * sort runq by insert the modifed vcpu back to runq - * lock is grabbed before calling this function - */ -static void -__repl_update(const struct scheduler *ops, s_time_t now) -{ - struct list_head *runq = rt_runq(ops); - struct list_head *depletedq = rt_depletedq(ops); - struct list_head *iter; - struct list_head *tmp; - struct rt_vcpu *svc = NULL; - - list_for_each_safe(iter, tmp, runq) - { - svc = __q_elem(iter); - if ( now < svc->cur_deadline ) - break; - - rt_update_deadline(now, svc); - /* reinsert the vcpu if its deadline is updated */ - __q_remove(svc); - __runq_insert(ops, svc); - } - - list_for_each_safe(iter, tmp, depletedq) - { - svc = __q_elem(iter); - if ( now >= svc->cur_deadline ) - { - rt_update_deadline(now, svc); - __q_remove(svc); /* remove from depleted queue */ - __runq_insert(ops, svc); /* add to runq */ - } - } -} - -/* * schedule function for rt scheduler. * The lock is already grabbed in schedule.c, no need to lock here */ @@ -840,8 +990,6 @@ rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched /* burn_budget would return for IDLE VCPU */ burn_budget(ops, scurr, now); - __repl_update(ops, now); - if ( tasklet_work_scheduled ) { trace_var(TRC_RTDS_SCHED_TASKLET, 1, 0, NULL); @@ -868,6 +1016,7 @@ rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched set_bit(__RTDS_delayed_runq_add, &scurr->flags); snext->last_start = now; + ret.time = -1; /* if an idle vcpu is picked */ if ( !is_idle_vcpu(snext->vcpu) ) { if ( snext != scurr ) @@ -880,9 +1029,8 @@ rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched snext->vcpu->processor = cpu; ret.migrated = 1; } + ret.time = snext->budget; /* invoke the scheduler next time */ } - - ret.time = MIN(snext->budget, MAX_SCHEDULE); /* sched quantum */ ret.task = snext->vcpu; return ret; @@ -903,7 +1051,10 @@ rt_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc) if ( curr_on_cpu(vc->processor) == vc ) cpu_raise_softirq(vc->processor, SCHEDULE_SOFTIRQ); else if ( __vcpu_on_q(svc) ) + { __q_remove(svc); + __replq_remove(ops, svc); + } else if ( svc->flags & RTDS_delayed_runq_add ) clear_bit(__RTDS_delayed_runq_add, &svc->flags); } @@ -1008,10 +1159,7 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu *vc) { struct rt_vcpu * const svc = rt_vcpu(vc); s_time_t now = NOW(); - struct rt_private *prv = rt_priv(ops); - struct rt_vcpu *snext = NULL; /* highest priority on RunQ */ - struct rt_dom *sdom = NULL; - cpumask_t *online; + bool_t missed; BUG_ON( is_idle_vcpu(vc) ); @@ -1033,32 +1181,42 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu *vc) else SCHED_STAT_CRANK(vcpu_wake_not_runnable); - /* If context hasn't been saved for this vcpu yet, we can't put it on - * the Runqueue/DepletedQ. Instead, we set a flag so that it will be - * put on the Runqueue/DepletedQ after the context has been saved. + /* + * If a deadline passed while svc was asleep/blocked, we need new + * scheduling parameters (a new deadline and full budget). + */ + + missed = ( now >= svc->cur_deadline ); + if ( missed ) + rt_update_deadline(now, svc); + + /* + * If context hasn't been saved for this vcpu yet, we can't put it on + * the run-queue/depleted-queue. Instead, we set the appropriate flag, + * the vcpu will be put back on queue after the context has been saved + * (in rt_context_save()). */ if ( unlikely(svc->flags & RTDS_scheduled) ) { set_bit(__RTDS_delayed_runq_add, &svc->flags); + /* + * The vcpu is waking up already, and we didn't even had the time to + * remove its next replenishment event from the replenishment queue + * when it blocked! No big deal. If we did not miss the deadline in + * the meantime, let's just leave it there. If we did, let's remove it + * and queue a new one (to occur at our new deadline). + */ + if ( missed ) + replq_reinsert(ops, svc); return; } - if ( now >= svc->cur_deadline) - rt_update_deadline(now, svc); - + /* Replenishment event got cancelled when we blocked. Add it back. */ + __replq_insert(ops, svc); /* insert svc to runq/depletedq because svc is not in queue now */ __runq_insert(ops, svc); - __repl_update(ops, now); - - ASSERT(!list_empty(&prv->sdom)); - sdom = list_entry(prv->sdom.next, struct rt_dom, sdom_elem); - online = cpupool_domain_cpumask(sdom->dom); - snext = __runq_pick(ops, online); /* pick snext from ALL valid cpus */ - - runq_tickle(ops, snext); - - return; + runq_tickle(ops, svc); } /* @@ -1069,10 +1227,6 @@ static void rt_context_saved(const struct scheduler *ops, struct vcpu *vc) { struct rt_vcpu *svc = rt_vcpu(vc); - struct rt_vcpu *snext = NULL; - struct rt_dom *sdom = NULL; - struct rt_private *prv = rt_priv(ops); - cpumask_t *online; spinlock_t *lock = vcpu_schedule_lock_irq(vc); clear_bit(__RTDS_scheduled, &svc->flags); @@ -1084,15 +1238,11 @@ rt_context_saved(const struct scheduler *ops, struct vcpu *vc) likely(vcpu_runnable(vc)) ) { __runq_insert(ops, svc); - __repl_update(ops, NOW()); - - ASSERT(!list_empty(&prv->sdom)); - sdom = list_entry(prv->sdom.next, struct rt_dom, sdom_elem); - online = cpupool_domain_cpumask(sdom->dom); - snext = __runq_pick(ops, online); /* pick snext from ALL cpus */ - - runq_tickle(ops, snext); + runq_tickle(ops, svc); } + else + __replq_remove(ops, svc); + out: vcpu_schedule_unlock_irq(lock, vc); } @@ -1150,6 +1300,101 @@ rt_dom_cntl( return rc; } +/* + * The replenishment timer handler picks vcpus + * from the replq and does the actual replenishment. + */ +static void repl_handler(void *data){ + unsigned long flags; + s_time_t now = NOW(); + struct scheduler *ops = data; + struct rt_private *prv = rt_priv(ops); + struct list_head *replq = rt_replq(ops); + struct list_head *runq = rt_runq(ops); + struct timer *repl_timer = prv->repl_timer; + struct list_head *iter, *tmp; + struct list_head tmp_replq; + struct rt_vcpu *svc = NULL; + + spin_lock_irqsave(&prv->lock, flags); + + stop_timer(repl_timer); + + /* + * A temperary queue for updated vcpus. + * It is used to tickle. + */ + INIT_LIST_HEAD(&tmp_replq); + + list_for_each_safe(iter, tmp, replq) + { + svc = replq_elem(iter); + + if ( now < svc->cur_deadline ) + break; + + rt_update_deadline(now, svc); + /* + * When a vcpu is replenished, it is moved + * to a temporary queue to tickle. + */ + list_del(&svc->replq_elem); + list_add(&svc->replq_elem, &tmp_replq); + + /* + * If svc is on run queue, we need + * to put it to the correct place since + * its deadline changes. + */ + if( __vcpu_on_q(svc) ) + { + /* put back to runq */ + __q_remove(svc); + __runq_insert(ops, svc); + } + } + + /* Iterate through the list of updated vcpus. */ + list_for_each_safe(iter, tmp, &tmp_replq) + { + struct vcpu* vc; + svc = replq_elem(iter); + vc = svc->vcpu; + + if ( curr_on_cpu(vc->processor) == vc && + ( !list_empty(runq) ) ) + { + /* + * If the vcpu is running, let the head + * of the run queue tickle if it has a + * higher priority. + */ + struct rt_vcpu *next_on_runq = __q_elem(runq->next); + if ( svc->cur_deadline >= next_on_runq->cur_deadline ) + runq_tickle(ops, next_on_runq); + } + else if ( __vcpu_on_q(svc) ) + { + /* Only tickle a vcpu that was depleted. */ + if ( test_and_clear_bit(__RTDS_was_depleted, &svc->flags) ) + runq_tickle(ops, svc); + } + + list_del(&svc->replq_elem); + /* Move it back to the replenishment event queue. */ + deadline_queue_insert(&replq_elem, svc, &svc->replq_elem, replq); + } + + /* + * Use the vcpu that's on the top of the run queue. + * Otherwise don't program the timer. + */ + if( !list_empty(replq) ) + set_timer(repl_timer, replq_elem(replq->next)->cur_deadline); + + spin_unlock_irqrestore(&prv->lock, flags); +} + static const struct scheduler sched_rtds_def = { .name = "SMP RTDS Scheduler", .opt_name = "rtds",