diff mbox

patch for virtual machine oriented scheduling(2)

Message ID 820ac2e90904220752u4b92994as9afb6c346554eee6@mail.gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

alex April 22, 2009, 2:52 p.m. UTC
If your Linux kernel is below 2.6.27, don't use
smp_call_function_xxx(), use the function preempt_safe_send_ipi()
defined here instead.
---------------------------------------------------------------------------------------------------------
+    barrier();
+    pi->sending_ipi = NOT_SENDING_IPI;
+    TRACE_2D(TRC_SEND_IPI, cpu, __LINE__);
+    put_cpu();
+}
+EXPORT_SYMBOL_GPL(preempt_safe_send_ipi);
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index b43c4ef..b8ebe93 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -13,7 +13,7 @@  endif

 EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm

-kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
+kvm-objs := $(common-objs) ipi.o trace.o schedule.o sched_credit.o
x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
       i8254.o timer.o
 obj-$(CONFIG_KVM) += kvm.o
 kvm-intel-objs = vmx.o
diff --git a/arch/x86/kvm/ipi.c b/arch/x86/kvm/ipi.c
new file mode 100644
index 0000000..a110e7e
--- /dev/null
+++ b/arch/x86/kvm/ipi.c
@@ -0,0 +1,129 @@ 
+#include <linux/kvm_host.h>
+#include <linux/schedule.h>
+#include <linux/sched-if.h>
+#include <linux/ipi.h>
+#include <linux/trace.h>
+
+struct ipi_info{
+    cpumask_t   dest;
+    void (*func)(void *);   /* the function to be called*/
+    void *data;                    /* the function argument */
+    int sync;
+};
+#define MAX_PENDING_IPI            10
+
+#define NOT_SENDING_IPI            0
+#define SENDING_IPI        ((unsigned int)(-1))
+struct pending_ipis {
+    struct ipi_info*   pi;
+    volatile unsigned int cons;
+    volatile unsigned int prod;
+    volatile unsigned int sending_ipi;
+    volatile bool   tasklet_running;
+};
+DEFINE_PER_CPU(struct pending_ipis, pending_ipi);
+DEFINE_PER_CPU(spinlock_t, ipi_lock);
+
+void init_pending_ipi_buf(int cpu)
+{
+    struct pending_ipis* pendings = &per_cpu(pending_ipi, cpu);
+
+    pendings->cons = pendings->prod = 0;
+    pendings->sending_ipi = NOT_SENDING_IPI;
+    pendings->tasklet_running = false;
+    pendings->pi = kzalloc(sizeof(struct ipi_info)*MAX_PENDING_IPI,
GFP_KERNEL);
+    spin_lock_init(&per_cpu(ipi_lock, cpu));
+    BUG_ON(!pendings->pi);
+}
+void destroy_pending_ipi_buf(int cpu)
+{
+    struct pending_ipis* pendings = &per_cpu(pending_ipi, cpu);
+    if(pendings->prod != pendings->cons)
+       printk(" pendings are %d %d\n", pendings->prod, pendings->cons);
+    kfree(pendings->pi);
+}
+int insert_pending_ipi(int cpu, cpumask_t mask, void (*func)(void*),
void* data, int sync)
+{
+    struct pending_ipis* pendings = &per_cpu(pending_ipi, cpu);
+    struct ipi_info *pend_ipi_info;
+    unsigned long flags;
+
+    local_irq_save(flags);
+    if(pendings->prod == (pendings->cons + MAX_PENDING_IPI)){
+       printk("pending IPI buffer full!\n");
+       local_irq_restore(flags);
+       return 1;
+    }
+    pend_ipi_info = pendings->pi + (pendings->prod % MAX_PENDING_IPI);
+    pendings->prod++;
+
+    BUG_ON(!pendings->pi);
+    BUG_ON(!pend_ipi_info);
+
+    pend_ipi_info->func = func;
+    pend_ipi_info->dest = mask;
+    pend_ipi_info->data = data;
+    pend_ipi_info->sync = sync;
+    local_irq_restore(flags);
+    return 0;
+}
+bool pending_ipi_buf_empty(int cpu)
+{
+    struct pending_ipis* pi;
+    pi = &per_cpu(pending_ipi, cpu);
+    return  (pi->prod <= pi->cons);
+}
+extern bool shutting_down;
+void inject_pending_ipi(int cpu)
+{
+    struct sched_param param = { .sched_priority = MAX_RT_PRIO-2};
+    sched_setscheduler(current, SCHED_RR, &param);
+    struct schedule_data *sd = &per_cpu(schedule_data, cpu);
+    struct pending_ipis *pi = &per_cpu(pending_ipi, cpu);
+    kvm_sched_setaffinity(current->pid, cpumask_of_cpu(cpu));
+
+    while(1) {
+       wait_event_interruptible(sd->ipi_wq, (pi->cons < pi->prod) ||
sd->ipi_quit);
+
+       if(sd->ipi_quit) break;
+       preempt_disable();
+       if(cmpxchg(&pi->sending_ipi, NOT_SENDING_IPI, SENDING_IPI) !=
NOT_SENDING_IPI){
+           printk("impossible case!\n");
+           preempt_enable();
+           continue;
+       }
+
+       /* pi->prod != pi->cons not OK? */
+       while(pi->prod > pi->cons){
+           struct ipi_info *ipi = pi->pi+ (pi->cons % MAX_PENDING_IPI);
+           smp_call_function_mask(ipi->dest, ipi->func, ipi->data, ipi->sync);
+           pi->cons++;
+       }
+       barrier();
+       pi->sending_ipi = NOT_SENDING_IPI;
+       preempt_enable();
+       barrier();
+    }
+    sd->ipi_quit = false;
+    printk("ipi thread %d quitted\n", cpu);
+}
+
+void preempt_safe_send_ipi(cpumask_t mask, void (*func)(void*), void*
data, int sync)
+{
+    unsigned long flags;
+    int cpu = get_cpu();
+    struct pending_ipis* pi = &per_cpu(pending_ipi, cpu);;
+
+    if(cmpxchg(&pi->sending_ipi, NOT_SENDING_IPI, SENDING_IPI) !=
NOT_SENDING_IPI) {
+       BUG_ON(1);
+       put_cpu();
+       return;
+    }
+    smp_call_function_mask(mask, func, data, sync);
+