new file mode 100755
@@ -0,0 +1,6 @@
+#!/bin/sh
+rmmod kvm_intel
+rmmod kvm
+
+insmod $1/kvm.ko
+insmod $1/kvm-intel.ko setaffinity=0x`grep -w sched_setaffinity
/proc/kallsyms | cut -d" " -f1 `
new file mode 100644
@@ -0,0 +1,9 @@
+#ifndef IPI_H
+#define IPI_H
+extern void init_pending_ipi_buf(int cpu);
+extern void destroy_pending_ipi_buf(int cpu);
+extern void preempt_safe_send_ipi(cpumask_t mask, void
(*func)(void*), void* data, int sync);
+extern int insert_pending_ipi(int cpu, cpumask_t mask, void
(*func)(void*), void* data, int sync);
+extern bool pending_ipi_buf_empty(int cpu);
+
+#endif
@@ -14,6 +14,11 @@
#define KVM_API_VERSION 12
+/* any process' pid should be less than 0xffffffff */
+#define MAX_PROCESSID_LEN 11 // the length of string "4294967295";
+#define MAX_PROCESSID ((unsigned long)0x0ffffffff)
+
+
/* for KVM_TRACE_ENABLE */
struct kvm_user_trace_setup {
__u32 buf_size; /* sub_buffer size of each per-cpu */
@@ -63,6 +63,48 @@ struct kvm_io_device *kvm_io_bus_find_dev(struct
kvm_io_bus *bus,
void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
struct kvm_io_device *dev);
+
+/* VCPU is currently running on a physical CPU. */
+#define RUNSTATE_running 0
+
+/* VCPU is runnable, but not currently scheduled on any physical CPU. */
+#define RUNSTATE_runnable 1
+
+/* VCPU is blocked (a.k.a. idle). It is therefore not runnable. */
+#define RUNSTATE_blocked 2
+
+/*
+ * VCPU is not runnable, but it is not blocked.
+ * This is a 'catch all' state for things like hotplug and pauses by the
+ * system administrator (or for critical sections in the hypervisor).
+ * RUNSTATE_blocked dominates this state (it is the preferred state).
+ */
+#define RUNSTATE_offline 3
+
+
+/*
+ * vcpu status
+ * there are 2 variables representing the vcpu status.
+ * pause_flags: scheduler uses this to choose a ready vcpu.
+ * if a vcpu is to run, its status is set to be
VCPU_RUNNING
+ * if a vcpu is scheduled out, its status is set
to be VCPU_YIELD
+ * status: the linux kernel use this varible to determine if a
vcpu is RUNNABLE
+ */
+#define VCPU_RUNNING 1 // the vcpu that is running
+#define VCPU_YIELD 2 // the vcpu should give up cpu
+
+struct vcpu_runstate_info {
+ /* VCPU's current state (RUNSTATE_*). */
+ int state;
+ /* When was current state entered (system time, ns)? */
+ uint64_t state_entry_time;
+ /*
+ * Time spent in each RUNSTATE_* (ns). The sum of these times is
+ * guaranteed not to drift from system time.
+ */
+ uint64_t time[4];
+};
+
struct kvm_vcpu {
struct kvm *kvm;
#ifdef CONFIG_PREEMPT_NOTIFIERS
@@ -92,6 +134,21 @@ struct kvm_vcpu {
#endif
struct kvm_vcpu_arch arch;
+
+ /* Added data structures */
+ volatile unsigned int status;
+ bool is_running; /* TODO: these two
variables are identical! */
+
+ // struct hrtimer timer;
+ struct task_struct* thread;
+ cpumask_t cpu_affinity;
+
+ unsigned long pause_flags;
+ atomic_t pause_count;
+ struct vcpu_runstate_info runstate;
+ void *sched_priv; /* scheduler-specific data */
+ bool set_rt;
+ int processor; /* the processor that scheduler thinks */
};
struct kvm_memory_slot {
@@ -122,6 +179,10 @@ struct kvm_kernel_irq_routing_entry {
struct list_head link;
};
+typedef pid_t vmid_t;
+#define IDLE_VM_ID ((vmid_t)0x0ffffffff)
+#define HOST_VM_ID 1
+#define ANONY_VM_ID 0
struct kvm {
struct mutex lock; /* protects the vcpus array and APIC accesses */
spinlock_t mmu_lock;
@@ -152,6 +213,12 @@ struct kvm {
unsigned long mmu_notifier_seq;
long mmu_notifier_count;
#endif
+ void *sched_priv; /*scheduler specific data */
+ atomic_t pause_count;
+ bool is_paused_by_controller;
+ bool is_dying;
+ vmid_t vmid;
+ struct list_head vm_link;
};
/* The guest did something we don't support. */
@@ -165,6 +232,12 @@ struct kvm {
#define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt)
#define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt)
+static inline int vcpu_runnable(struct kvm_vcpu *v)
+{
+ return !(v->pause_flags |
+ atomic_read(&v->pause_count) |
+ atomic_read(&v->kvm->pause_count));
+}
int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
@@ -178,6 +251,12 @@ void kvm_exit(void);
void kvm_get_kvm(struct kvm *kvm);
void kvm_put_kvm(struct kvm *kvm);
+extern long (*sched_setaffinity_p)(pid_t pid, cpumask_t* new_mask);
+static inline long kvm_sched_setaffinity(pid_t pid, cpumask_t new_mask)
+{
+ return sched_setaffinity_p(pid, &new_mask);
+}
+
#define HPA_MSB ((sizeof(hpa_t) * 8) - 1)
#define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB)
static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; }
@@ -524,4 +603,11 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {}
#endif
+#define test_and_set_bool(b) xchg(&(b), 1)
+#define test_and_clear_bool(b) xchg(&(b), 0)
+static inline s64 NOW(void)
+{
+ struct timespec t = current_kernel_time();
+ return timespec_to_ns(&t);
+}
#endif
new file mode 100644
@@ -0,0 +1,225 @@
+#ifndef SCHED_IF_H
+#define SCHED_IF_H
+
+#include <linux/schedule.h>
+
+#if 0
+extern long (*sched_setaffinity_p)(pid_t pid, cpumask_t new_mask);
+static inline long kvm_sched_setaffinity(pid_t pid, cpumask_t new_mask)
+{
+ return sched_setaffinity_p(pid, new_mask);
+}
+extern long (*sched_setaffinity_p)(pid_t pid, cpumask_t* in_mask);
+static inline long kvm_sched_setaffinity(pid_t pid, cpumask_t new_mask)
+{
+ return sched_setaffinity_p(pid, &new_mask);
+}
+#endif
+
+#define IDLE_VM ((unsigned int)(-1))
+#define NORMAL_VM 0
+#define HOST_VM 1
+
+#define MAX_PARAMS 1
+DECLARE_PER_CPU(rwlock_t, pseudo_cli);
+
+#ifdef CONFIG_PREEMPT
+#define thread_preemptible() (preempt_count() == 0)
+#else
+#define thread_preemptible() 0
+#endif
+
+static inline int vcpu_schedule_try_lock(struct kvm_vcpu *v)
+{
+ unsigned int cpu;
+ struct schedule_data *sd;
+
+ for ( ; ; ){
+ int r;
+ cpu = v->processor;
+ sd = &per_cpu(schedule_data, cpu);
+ r = spin_trylock(&sd->schedule_lock);
+ if (!r) return 0;
+ if (likely(v->processor == cpu))
+ return 1;
+ spin_unlock(&sd->schedule_lock);
+ }
+}
+static inline struct schedule_data* vcpu_schedule_lock(struct kvm_vcpu *v)
+{
+ unsigned int cpu;
+ struct schedule_data *sd;
+
+ for ( ; ; )
+ {
+ cpu = v->processor;
+ sd = &per_cpu(schedule_data, cpu);
+ spin_lock(&sd->schedule_lock);
+ if ( likely(v->processor == cpu) ){
+ return sd;
+ }
+ spin_unlock(&sd->schedule_lock);
+ }
+}
+
+static inline int pseudo_irq_cli(void)
+{
+ int cpu = raw_smp_processor_id();
+ struct schedule_data *sd = &per_cpu(schedule_data, cpu);
+
+ tasklet_disable(&sd->sched_tasklet);
+ tasklet_disable(&sd->tick_tasklet);
+
+ /* maybe, the tasklet is already running now, we try to lock
+ * sched_state to detect this case
+ */
+ while(cmpxchg(&sd->sched_state,
+ SCHEDULER_FREE, SCHEDULER_USER) != SCHEDULER_FREE)
+ schedule();
+
+ return 1;
+}
+static inline int pseudo_irq_save(int flags)
+{
+ int cpu = raw_smp_processor_id();
+ struct schedule_data *sd = &per_cpu(schedule_data, cpu);
+ if(thread_preemptible()){
+ BUG_ON(1);
+ };
+
+ tasklet_disable(&sd->sched_tasklet);
+ tasklet_disable(&sd->tick_tasklet);
+
+ /* maybe, the tasklet is already running now, we try to lock
+ * sched_state to detect this case
+ */
+ while(cmpxchg(&sd->sched_state,
+ SCHEDULER_FREE, SCHEDULER_USER) != SCHEDULER_FREE)
+ schedule();
+
+ return 1;
+}
+static inline void pseudo_irq_sti(void)
+{
+ int cpu = raw_smp_processor_id();
+ struct schedule_data *sd = &per_cpu(schedule_data, cpu);
+ if(thread_preemptible()){
+ BUG_ON(1);
+ };
+ sd->sched_state = SCHEDULER_FREE;
+ barrier();
+ tasklet_enable(&sd->sched_tasklet);
+ tasklet_enable(&sd->tick_tasklet);
+}
+
+static inline void pseudo_irq_restore(int flags)
+{
+ pseudo_irq_sti();
+}
+
+#define vcpu_schedule_lock_irqsave(v, flags) do { \
+ struct schedule_data *sd; \
+ int r = pseudo_irq_save(flags); \
+ BUG_ON(thread_preemptible()); \
+ if(!r) { \
+ BUG_ON(1); \
+ }; \
+ sd = vcpu_schedule_lock((v)); \
+} while ( 0 )
+
+#define vcpu_schedule_lock_irq(v) do { \
+ struct schedule_data *sd; \
+ int r; \
+ BUG_ON(thread_preemptible()); \
+ r = pseudo_irq_cli(); \
+ if(!r) { \
+ BUG_ON(1); \
+ }; \
+ sd = vcpu_schedule_lock((v)); \
+} while ( 0 )
+
+static inline void vcpu_schedule_unlock(struct kvm_vcpu *v)
+{
+ spin_unlock(&per_cpu(schedule_data, v->processor).schedule_lock);
+}
+
+#define vcpu_schedule_unlock_irq(v) do { \
+ vcpu_schedule_unlock(v); \
+ pseudo_irq_sti(); \
+} while ( 0 )
+#define vcpu_schedule_unlock_irqrestore(v, flags) do { \
+ vcpu_schedule_unlock(v); \
+ pseudo_irq_restore(flags); \
+} while ( 0 )
+
+struct kvm;
+struct scheduler {
+ char *name; /* full name for this scheduler */
+ char *opt_name; /* option name for this scheduler */
+ unsigned int sched_id; /* ID for this scheduler */
+
+ void (*init) (void);
+
+ int (*init_vm) (struct kvm*);
+ void (*destroy_vm) (struct kvm*);
+
+ int (*init_vcpu) (struct kvm_vcpu *);
+ void (*destroy_vcpu) (struct kvm_vcpu *);
+
+ void (*sleep) (struct kvm_vcpu *);
+ void (*wake) (struct kvm_vcpu *);
+
+ struct task_slice (*do_schedule) (s_time_t);
+
+ void (*disable_scheduler) (int cpu);
+ int (*start_scheduler) (int cpu);
+
+ void (*stop_schedule) (int cpu);
+
+ int (*pick_cpu) (struct kvm_vcpu *);
+ int (*read_schedule_info) (struct kvm*, char*, int sz);
+ int (*write_schedule_info) (struct kvm*, char*);
+ void (*dump_settings) (void);
+ void (*dump_cpu_state) (int);
+};
+
+extern struct kvm *idle_vm_kvm;
+extern struct kvm *host_vm_kvm;
+#define is_idle_vm(kvm) ((kvm) == idle_vm_kvm)
+#define is_host_vm(kvm) ((kvm) == host_vm_kvm)
+
+extern bool shutting_down;
+
+#define is_idle_vcpu(vcpu) (is_idle_vm((vcpu)->kvm))
+#define is_host_vcpu(vcpu) (is_host_vm((vcpu)->kvm))
+
+#define _VPF_blocked 0
+#define VPF_blocked (1UL<<_VPF_blocked)
+
+ /* VCPU is offline. */
+#define _VPF_down 1
+#define VPF_down (1UL<<_VPF_down)
+
+#define _VPF_migrating 3
+#define VPF_migrating (1UL<<_VPF_migrating)
+
+extern void sched_destroy_vcpu(struct kvm_vcpu *v);
+extern void sched_destroy_vm(struct kvm *);
+extern int sched_init_vcpu(struct kvm_vcpu *v, unsigned int processor);
+extern int sched_init_vm(struct kvm *kvm);
+extern void vcpu_sleep_nosync(struct kvm_vcpu *v);
+extern void vcpu_sleep_sync(struct kvm_vcpu *v);
+extern void vcpu_wake(struct kvm_vcpu *v);
+extern void vm_pause(struct kvm *kvm);
+extern void vm_unpause(struct kvm *kvm);
+extern void scheduler_init(void);
+extern void wait_scheduler_stops(void);
+extern void scheduler_destroy(void);
+extern void scheduler_stop_tickers(void);
+extern void vm_pause_by_systemcontroller(struct kvm* kvm);
+extern void vm_unpause_by_systemcontroller(struct kvm* kvm);
+extern void stop_auto_schedule(void);
+extern void scheduler_start(void);
+
+#define current_vcpu (per_cpu(schedule_data, raw_smp_processor_id()).curr)
+#endif
new file mode 100644
@@ -0,0 +1,35 @@
+#ifndef _SCHEDULE_H
+#define _SCHEDULE_H
+#include <linux/interrupt.h>
+
+typedef s64 s_time_t;
+
+struct task_slice {
+ struct kvm_vcpu *task;
+ s_time_t time;
+};
+
+#define SCHEDULER_FREE 0 /* none is using the scheduler */
+#define SCHEDULER_USER 1 /* it is used by from user requirement*/
+#define SCHEDULER_KERNEL 2 /* it is used by scheduler or ticker */
+
+struct schedule_data {
+ spinlock_t schedule_lock; /* spinlock protecting curr */
+ struct kvm_vcpu *curr; /* current task */
+ struct kvm_vcpu *idle; /* idle task for this cpu */
+ void *sched_priv;
+ struct hrtimer s_timer; /* scheduling timer */
+ int id; /* the cpu id
*/
+ struct hrtimer watchdog; /* the watchdog timer */
+ struct tasklet_struct sched_tasklet; /* per cpu schedule tasklet */
+ wait_queue_head_t ipi_wq; /* ipi helper thread waitqueue */
+ volatile bool ipi_quit; /* the ipi helper should quit */
+ struct tasklet_struct tick_tasklet; /* per cpu tick tasklet */
+ volatile int sched_state; /* the scheduler status */
+ volatile bool in_use; /* indicate the whether the
schedule can work*/
+ volatile bool can_migrate;
+};
+
+DECLARE_PER_CPU(struct schedule_data, schedule_data);
+
+#endif
new file mode 100755
@@ -0,0 +1,60 @@
+#ifndef TRACE_H
+#define TRACE_H
+
+#define NR_TRACES 500
+struct t_rec {
+ u64 cycles; /* local(and global too) cpu tsc */
+ u32 event; /* event id */
+ unsigned long data[5]; /* event data items */
+};
+struct trace_logger {
+ struct t_rec* buf;
+ spinlock_t lock;
+ int ptr;
+};
+#define WATCHDOG_NS 5000000000
+extern int init_trace_buf(int cpu);
+extern void free_trace_buf(int cpu);
+extern enum hrtimer_restart dump_traces(void*);
+extern enum hrtimer_restart dump_cpu_trace(struct hrtimer*);
+extern void trace(u32 event, unsigned long d0, unsigned long d1,
+ unsigned long d2, unsigned long d3, unsigned long d4);
+
+#define TRC_SCHED_VM_ADD 0
+#define TRC_SCHED_SLEEP 1
+#define TRC_SCHED_WAKE 2
+#define TRC_SCHED_YIELD 3
+#define TRC_SCHED_SWITCH_INFPREV 4
+#define TRC_SCHED_SWITCH_INFNEXT 5
+#define TRC_SCHED_SWITCH 6
+
+#define TRC_SCHED_TIMER 7
+#define TRC_SCHED_RELAYED_TIMER 8
+#define TRC_VCPU_SCHEDULE 9
+#define TRC_RUNQ_TICKLE 10
+#define TRC_TASKLET_SCHEDULE 11
+#define TRC_CSCHED_TICK 12
+#define TRC_CSCHED_SLEEP 13
+#define TRC_CSCHED_WAKE 14
+#define TRC_INTERNAL_CSCHED_TICK 15
+#define TRC_DO_PENDING 16
+#define TRC_PSEUDO_CLI 17
+#define TRC_PSEUDO_STI 18
+#define TRC_LOAD_BALANCE 19
+#define TRC_CSCHED_SCHEDULE 20
+#define TRC_PSEUDO_INTR 21
+#define TRC_PSEUDO_EOI 22
+#define TRC_PSEUDO_OPEN_INTR 23
+#define TRC_TIMER_FN 24
+#define TRC_SEND_IPI 25
+#define TRC_INJECT_PEND_IPI 26
+#define TRC_PEND_INTR 27
+#define TRC_RUNQ_SORT 28
+
+#define TRACE_0D(e) trace((e), 0, 0, 0, 0, 0)
+#define TRACE_1D(e,d) trace((e), (d), 0, 0, 0, 0)
+#define TRACE_2D(e,d1,d2) trace((e), (d1), (d2), 0, 0, 0)
+#define TRACE_3D(e,d1,d2,d3) trace((e), (d1), (d2), (d3), 0, 0)
+#define TRACE_4D(e,d1,d2,d3,d4) trace((e), (d1), (d2), (d3), (d4), 0)
+#define TRACE_5D(e,d1,d2,d3,d4,d5) trace((e), (d1), (d2), (d3), (d4), (d5))
+#endif