@@ -16,6 +16,7 @@
#include <xen/delay.h>
#include <xen/event.h>
#include <xen/time.h>
+#include <xen/timer.h>
#include <xen/perfc.h>
#include <xen/sched-if.h>
#include <xen/softirq.h>
@@ -87,7 +88,7 @@
#define RTDS_DEFAULT_BUDGET (MICROSECS(4000))
#define UPDATE_LIMIT_SHIFT 10
-#define MAX_SCHEDULE (MILLISECS(1))
+
/*
* Flags
*/
@@ -147,11 +148,19 @@ static unsigned int nr_rt_ops;
* Global lock is referenced by schedule_data.schedule_lock from all
* physical cpus. It can be grabbed via vcpu_schedule_lock_irq()
*/
+
+/* dedicated timer for replenishment */
+static struct timer repl_timer;
+
+/* handler for the replenishment timer */
+static void repl_handler(void *data);
+
struct rt_private {
spinlock_t lock; /* the global coarse grand lock */
struct list_head sdom; /* list of availalbe domains, used for dump */
struct list_head runq; /* ordered list of runnable vcpus */
struct list_head depletedq; /* unordered list of depleted vcpus */
+ struct list_head runningq; /* current running vcpus */
cpumask_t tickled; /* cpus been tickled */
};
@@ -160,6 +169,7 @@ struct rt_private {
*/
struct rt_vcpu {
struct list_head q_elem; /* on the runq/depletedq list */
+ struct list_head runningq_elem; /* on the runningq list */
struct list_head sdom_elem; /* on the domain VCPU list */
/* Up-pointers */
@@ -215,6 +225,11 @@ static inline struct list_head *rt_depletedq(const struct scheduler *ops)
return &rt_priv(ops)->depletedq;
}
+static inline struct list_head *rt_runningq(const struct scheduler *ops)
+{
+ return &rt_priv(ops)->runningq;
+}
+
/*
* Queue helper functions for runq and depletedq
*/
@@ -230,6 +245,18 @@ __q_elem(struct list_head *elem)
return list_entry(elem, struct rt_vcpu, q_elem);
}
+static int
+__vcpu_on_runningq(const struct rt_vcpu *svc)
+{
+ return !list_empty(&svc->runningq_elem);
+}
+
+static struct rt_vcpu *
+__runningq_elem(struct list_head *elem)
+{
+ return list_entry(elem, struct rt_vcpu, runningq_elem);
+}
+
/*
* Debug related code, dump vcpu/cpu information
*/
@@ -290,7 +317,7 @@ rt_dump_pcpu(const struct scheduler *ops, int cpu)
static void
rt_dump(const struct scheduler *ops)
{
- struct list_head *iter_sdom, *iter_svc, *runq, *depletedq, *iter;
+ struct list_head *iter_sdom, *iter_svc, *runq, *depletedq, *runningq, *iter;
struct rt_private *prv = rt_priv(ops);
struct rt_vcpu *svc;
struct rt_dom *sdom;
@@ -303,6 +330,7 @@ rt_dump(const struct scheduler *ops)
runq = rt_runq(ops);
depletedq = rt_depletedq(ops);
+ runningq = rt_runningq(ops);
printk("Global RunQueue info:\n");
list_for_each( iter, runq )
@@ -318,6 +346,13 @@ rt_dump(const struct scheduler *ops)
rt_dump_vcpu(ops, svc);
}
+ printk("Global RunningQueue info:\n");
+ list_for_each( iter, runningq )
+ {
+ svc = __runningq_elem(iter);
+ rt_dump_vcpu(ops, svc);
+ }
+
printk("Domain info:\n");
list_for_each( iter_sdom, &prv->sdom )
{
@@ -387,6 +422,13 @@ __q_remove(struct rt_vcpu *svc)
list_del_init(&svc->q_elem);
}
+static inline void
+__runningq_remove(struct rt_vcpu *svc)
+{
+ if ( __vcpu_on_runningq(svc) )
+ list_del_init(&svc->runningq_elem);
+}
+
/*
* Insert svc with budget in RunQ according to EDF:
* vcpus with smaller deadlines go first.
@@ -420,6 +462,27 @@ __runq_insert(const struct scheduler *ops, struct rt_vcpu *svc)
}
}
+static void
+__runningq_insert(const struct scheduler *ops, struct rt_vcpu *svc)
+{
+ struct rt_private *prv = rt_priv(ops);
+ struct list_head *runningq = rt_runningq(ops);
+ struct list_head *iter;
+
+ ASSERT( spin_is_locked(&prv->lock) );
+
+ ASSERT( !__vcpu_on_runningq(svc) );
+
+ list_for_each(iter, runningq)
+ {
+ struct rt_vcpu * iter_svc = __runningq_elem(iter);
+ if ( svc->cur_deadline <= iter_svc->cur_deadline )
+ break;
+ }
+
+ list_add_tail(&svc->runningq_elem, iter);
+}
+
/*
* Init/Free related code
*/
@@ -449,11 +512,14 @@ rt_init(struct scheduler *ops)
INIT_LIST_HEAD(&prv->sdom);
INIT_LIST_HEAD(&prv->runq);
INIT_LIST_HEAD(&prv->depletedq);
+ INIT_LIST_HEAD(&prv->runningq);
cpumask_clear(&prv->tickled);
ops->sched_data = prv;
+ init_timer(&repl_timer, repl_handler, ops, 0);
+
return 0;
no_mem:
@@ -473,6 +539,9 @@ rt_deinit(const struct scheduler *ops)
xfree(_cpumask_scratch);
_cpumask_scratch = NULL;
}
+
+ kill_timer(&repl_timer);
+
xfree(prv);
}
@@ -587,6 +656,7 @@ rt_alloc_vdata(const struct scheduler *ops, struct vcpu *vc, void *dd)
return NULL;
INIT_LIST_HEAD(&svc->q_elem);
+ INIT_LIST_HEAD(&svc->runningq_elem);
INIT_LIST_HEAD(&svc->sdom_elem);
svc->flags = 0U;
svc->sdom = dd;
@@ -792,44 +862,6 @@ __runq_pick(const struct scheduler *ops, const cpumask_t *mask)
}
/*
- * Update vcpu's budget and
- * sort runq by insert the modifed vcpu back to runq
- * lock is grabbed before calling this function
- */
-static void
-__repl_update(const struct scheduler *ops, s_time_t now)
-{
- struct list_head *runq = rt_runq(ops);
- struct list_head *depletedq = rt_depletedq(ops);
- struct list_head *iter;
- struct list_head *tmp;
- struct rt_vcpu *svc = NULL;
-
- list_for_each_safe(iter, tmp, runq)
- {
- svc = __q_elem(iter);
- if ( now < svc->cur_deadline )
- break;
-
- rt_update_deadline(now, svc);
- /* reinsert the vcpu if its deadline is updated */
- __q_remove(svc);
- __runq_insert(ops, svc);
- }
-
- list_for_each_safe(iter, tmp, depletedq)
- {
- svc = __q_elem(iter);
- if ( now >= svc->cur_deadline )
- {
- rt_update_deadline(now, svc);
- __q_remove(svc); /* remove from depleted queue */
- __runq_insert(ops, svc); /* add to runq */
- }
- }
-}
-
-/*
* schedule function for rt scheduler.
* The lock is already grabbed in schedule.c, no need to lock here
*/
@@ -848,8 +880,6 @@ rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched
/* burn_budget would return for IDLE VCPU */
burn_budget(ops, scurr, now);
- __repl_update(ops, now);
-
if ( tasklet_work_scheduled )
{
snext = rt_vcpu(idle_vcpu[cpu]);
@@ -875,6 +905,9 @@ rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched
set_bit(__RTDS_delayed_runq_add, &scurr->flags);
snext->last_start = now;
+
+ ret.time = -1; /* if an idle vcpu is picked */
+
if ( !is_idle_vcpu(snext->vcpu) )
{
if ( snext != scurr )
@@ -887,9 +920,10 @@ rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched
snext->vcpu->processor = cpu;
ret.migrated = 1;
}
- }
- ret.time = MIN(snext->budget, MAX_SCHEDULE); /* sched quantum */
+ ret.time = snext->budget; /* invoke the scheduler next time */
+ }
+
ret.task = snext->vcpu;
/* TRACE */
@@ -1033,20 +1067,17 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu *vc)
{
struct rt_vcpu * const svc = rt_vcpu(vc);
s_time_t now = NOW();
- struct rt_private *prv = rt_priv(ops);
- struct rt_vcpu *snext = NULL; /* highest priority on RunQ */
- struct rt_dom *sdom = NULL;
- cpumask_t *online;
BUG_ON( is_idle_vcpu(vc) );
+ /* wake up on a pcpu */
if ( unlikely(curr_on_cpu(vc->processor) == vc) )
{
SCHED_STAT_CRANK(vcpu_wake_running);
return;
}
- /* on RunQ/DepletedQ, just update info is ok */
+ /* on RunQ/DepletedQ */
if ( unlikely(__vcpu_on_q(svc)) )
{
SCHED_STAT_CRANK(vcpu_wake_onrunq);
@@ -1073,52 +1104,47 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu *vc)
/* insert svc to runq/depletedq because svc is not in queue now */
__runq_insert(ops, svc);
+
+ if( repl_timer.expires == 0 || repl_timer.expires > svc->cur_deadline )
+ {
+ /* a newly waken-up vcpu could have an earlier release time
+ * or it could be the first to program the timer
+ */
+ set_timer(&repl_timer,svc->cur_deadline);
+ }
- __repl_update(ops, now);
-
- ASSERT(!list_empty(&prv->sdom));
- sdom = list_entry(prv->sdom.next, struct rt_dom, sdom_elem);
- online = cpupool_scheduler_cpumask(sdom->dom->cpupool);
- snext = __runq_pick(ops, online); /* pick snext from ALL valid cpus */
-
- runq_tickle(ops, snext);
+ runq_tickle(ops, svc);
return;
}
/*
- * scurr has finished context switch, insert it back to the RunQ,
- * and then pick the highest priority vcpu from runq to run
+ * scurr has finished context switch, update RunQ,
+ * and RunningQ
*/
static void
rt_context_saved(const struct scheduler *ops, struct vcpu *vc)
{
struct rt_vcpu *svc = rt_vcpu(vc);
- struct rt_vcpu *snext = NULL;
- struct rt_dom *sdom = NULL;
- struct rt_private *prv = rt_priv(ops);
- cpumask_t *online;
+ struct rt_vcpu *running = rt_vcpu(current);
spinlock_t *lock = vcpu_schedule_lock_irq(vc);
-
+
clear_bit(__RTDS_scheduled, &svc->flags);
- /* not insert idle vcpu to runq */
- if ( is_idle_vcpu(vc) )
- goto out;
- if ( test_and_clear_bit(__RTDS_delayed_runq_add, &svc->flags) &&
- likely(vcpu_runnable(vc)) )
+ if( !is_idle_vcpu(vc) )
{
- __runq_insert(ops, svc);
- __repl_update(ops, NOW());
-
- ASSERT(!list_empty(&prv->sdom));
- sdom = list_entry(prv->sdom.next, struct rt_dom, sdom_elem);
- online = cpupool_scheduler_cpumask(sdom->dom->cpupool);
- snext = __runq_pick(ops, online); /* pick snext from ALL cpus */
+ if ( __vcpu_on_runningq(svc) )
+ __runningq_remove(svc);
- runq_tickle(ops, snext);
+ if ( test_and_clear_bit(__RTDS_delayed_runq_add, &svc->flags) &&
+ likely(vcpu_runnable(vc)) )
+ /* insert the pre-empted vcpu back */
+ __runq_insert(ops, svc);
}
-out:
+
+ if( !is_idle_vcpu(current) )
+ __runningq_insert(ops, running);
+
vcpu_schedule_unlock_irq(lock, vc);
}
@@ -1167,6 +1193,115 @@ rt_dom_cntl(
return rc;
}
+/* The replenishment timer handler scans
+ * all three queues and find the most
+ * imminent release time. If there is no
+ * vcpus, timer is set in wake()
+ */
+static void repl_handler(void *data){
+ unsigned long flags;
+ s_time_t now = NOW();
+ s_time_t min_repl = LONG_MAX; /* max time used in comparisoni */
+ struct scheduler *ops = data;
+ struct rt_private *prv = rt_priv(ops);
+ struct list_head *runq = rt_runq(ops);
+ struct list_head *depletedq = rt_depletedq(ops);
+ struct list_head *runningq = rt_runningq(ops);
+ struct list_head *iter;
+ struct list_head *tmp;
+ struct rt_vcpu *svc = NULL;
+
+ stop_timer(&repl_timer);
+
+ spin_lock_irqsave(&prv->lock, flags);
+
+ /* update runq */
+ list_for_each_safe(iter, tmp, runq)
+ {
+ svc = __q_elem(iter);
+ if ( now < svc->cur_deadline )
+ break;
+
+ rt_update_deadline(now, svc);
+
+ __q_remove(svc);
+ __runq_insert(ops, svc);
+ runq_tickle(ops, svc);
+ }
+
+ /* update depletedq */
+ list_for_each_safe(iter, tmp, depletedq)
+ {
+ svc = __q_elem(iter);
+ if ( now >= svc->cur_deadline )
+ {
+ rt_update_deadline(now, svc);
+
+ __q_remove(svc);
+ __runq_insert(ops, svc);
+ runq_tickle(ops, svc);
+ }
+ }
+
+ /* update runningq */
+ list_for_each_safe(iter, tmp, runningq)
+ {
+ svc = __runningq_elem(iter);
+ if ( now >= svc->cur_deadline )
+ {
+ rt_update_deadline(now, svc);
+
+ __runningq_remove(svc);
+ __runningq_insert(ops, svc);
+
+ }
+ }
+
+ /* find the next release time in runq */
+ if( !list_empty(runq) )
+ {
+ svc = __q_elem(runq->next);
+ min_repl = svc->cur_deadline;
+ }
+
+ /* find the next release time in depletedq*/
+ if( !list_empty(depletedq) )
+ {
+ svc = __q_elem(runq->next);
+ if ( min_repl > svc->cur_deadline )
+ {
+ min_repl = svc->cur_deadline;
+ }
+ }
+
+ /* find the next release time in runningq*/
+ list_for_each(iter, runningq)
+ {
+ svc = __runningq_elem(iter);
+ /* It's possible that the smallest one
+ * has been sleeping on the runningq
+ */
+ if( min_repl > svc->cur_deadline && svc->cur_deadline > now )
+ {
+ min_repl = svc->cur_deadline;
+ break;
+ }
+ }
+
+ /* if timer was triggered but there are no
+ * runnable vcpus on queues, set timer.expires to
+ * 0 so when a vcpu waks up it can reprogram
+ * the timer
+ */
+ if(min_repl == LONG_MAX)
+ repl_timer.expires = 0;
+ else
+ /* reprogram the timer using the most imminent release time*/
+ set_timer(&repl_timer, min_repl);
+
+ spin_unlock_irqrestore(&prv->lock, flags);
+}
+
static struct rt_private _rt_priv;
const struct scheduler sched_rtds_def = {