diff mbox series

[RFC,47/49] xen/sched: support core scheduling in continue_running()

Message ID 20190329150934.17694-48-jgross@suse.com (mailing list archive)
State Superseded
Headers show
Series xen: add core scheduling support | expand

Commit Message

Jürgen Groß March 29, 2019, 3:09 p.m. UTC
For core scheduling a transition from an offline vcpu to a running one
must be special cased: the vcpu might be in guest idle but the context
has to be loaded as if a context switch is to be done. For that purpose
add a flag to the vcpu structure which indicates that condition. That
flag is tested in continue_running() and if set the context is loaded
if required.

Carve out some context loading functionality from __context_switch()
into a new function as we need it in continue_running() now, too.

Signed-off-by: Juergen Gross <jgross@suse.com>
---
 xen/arch/x86/domain.c     | 114 +++++++++++++++++++++++++++++++++++++++-------
 xen/arch/x86/hvm/hvm.c    |   2 +
 xen/arch/x86/hvm/vlapic.c |   1 +
 xen/common/domain.c       |   2 +
 xen/common/schedule.c     |  19 +++++---
 xen/include/xen/sched.h   |   3 ++
 6 files changed, 117 insertions(+), 24 deletions(-)
diff mbox series

Patch

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 9acf2e9792..7a51064de0 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1171,7 +1171,10 @@  int arch_set_info_guest(
 
  out:
     if ( flags & VGCF_online )
+    {
+        v->reload_context = true;
         clear_bit(_VPF_down, &v->pause_flags);
+    }
     else
         set_bit(_VPF_down, &v->pause_flags);
     return 0;
@@ -1663,6 +1666,24 @@  static inline void load_default_gdt(seg_desc_t *gdt, unsigned int cpu)
     per_cpu(full_gdt_loaded, cpu) = false;
 }
 
+static void inline csw_load_regs(struct vcpu *v,
+                                 struct cpu_user_regs *stack_regs)
+{
+    memcpy(stack_regs, &v->arch.user_regs, CTXT_SWITCH_STACK_BYTES);
+    if ( cpu_has_xsave )
+    {
+        u64 xcr0 = v->arch.xcr0 ?: XSTATE_FP_SSE;
+
+        if ( xcr0 != get_xcr0() && !set_xcr0(xcr0) )
+            BUG();
+
+        if ( cpu_has_xsaves && is_hvm_vcpu(v) )
+            set_msr_xss(v->arch.hvm.msr_xss);
+    }
+    vcpu_restore_fpu_nonlazy(v, false);
+    v->domain->arch.ctxt_switch->to(v);
+}
+
 static void __context_switch(void)
 {
     struct cpu_user_regs *stack_regs = guest_cpu_user_regs();
@@ -1676,7 +1697,7 @@  static void __context_switch(void)
     ASSERT(p != n);
     ASSERT(!vcpu_cpu_dirty(n));
 
-    if ( !is_idle_domain(pd) )
+    if ( !is_idle_domain(pd) && is_vcpu_online(p) && !p->reload_context )
     {
         memcpy(&p->arch.user_regs, stack_regs, CTXT_SWITCH_STACK_BYTES);
         vcpu_save_fpu(p);
@@ -1692,22 +1713,8 @@  static void __context_switch(void)
         cpumask_set_cpu(cpu, nd->dirty_cpumask);
     write_atomic(&n->dirty_cpu, cpu);
 
-    if ( !is_idle_domain(nd) )
-    {
-        memcpy(stack_regs, &n->arch.user_regs, CTXT_SWITCH_STACK_BYTES);
-        if ( cpu_has_xsave )
-        {
-            u64 xcr0 = n->arch.xcr0 ?: XSTATE_FP_SSE;
-
-            if ( xcr0 != get_xcr0() && !set_xcr0(xcr0) )
-                BUG();
-
-            if ( cpu_has_xsaves && is_hvm_vcpu(n) )
-                set_msr_xss(n->arch.hvm.msr_xss);
-        }
-        vcpu_restore_fpu_nonlazy(n, false);
-        nd->arch.ctxt_switch->to(n);
-    }
+    if ( !is_idle_domain(nd) && is_vcpu_online(n) )
+        csw_load_regs(n, stack_regs);
 
     psr_ctxt_switch_to(nd);
 
@@ -1775,6 +1782,72 @@  static void context_wait_rendezvous_out(struct sched_item *item,
         context_saved(prev);
 }
 
+static void __continue_running(struct vcpu *same)
+{
+    struct domain *d = same->domain;
+    seg_desc_t *gdt;
+    bool full_gdt = need_full_gdt(d);
+    unsigned int cpu = smp_processor_id();
+
+    gdt = !is_pv_32bit_domain(d) ? per_cpu(gdt_table, cpu) :
+                                   per_cpu(compat_gdt_table, cpu);
+
+    if ( same->reload_context )
+    {
+        struct cpu_user_regs *stack_regs = guest_cpu_user_regs();
+
+        get_cpu_info()->use_pv_cr3 = false;
+        get_cpu_info()->xen_cr3 = 0;
+
+        local_irq_disable();
+
+        csw_load_regs(same, stack_regs);
+
+        psr_ctxt_switch_to(d);
+
+        if ( full_gdt )
+            write_full_gdt_ptes(gdt, same);
+
+        write_ptbase(same);
+
+#if defined(CONFIG_PV) && defined(CONFIG_HVM)
+        /* Prefetch the VMCB if we expect to use it later in context switch */
+        if ( cpu_has_svm && is_pv_domain(d) && !is_pv_32bit_domain(d) &&
+             !(read_cr4() & X86_CR4_FSGSBASE) )
+            svm_load_segs(0, 0, 0, 0, 0, 0, 0);
+#endif
+
+        if ( full_gdt )
+            load_full_gdt(same, cpu);
+
+        local_irq_enable();
+
+        if ( is_pv_domain(d) )
+            load_segments(same);
+
+        same->reload_context = false;
+
+        _update_runstate_area(same);
+
+        update_vcpu_system_time(same);
+    }
+    else if ( !is_idle_vcpu(same) && full_gdt != per_cpu(full_gdt_loaded, cpu) )
+    {
+        local_irq_disable();
+
+        if ( full_gdt )
+        {
+            write_full_gdt_ptes(gdt, same);
+            write_ptbase(same);
+            load_full_gdt(same, cpu);
+        }
+        else
+            load_default_gdt(gdt, cpu);
+
+        local_irq_enable();
+    }
+}
+
 void context_switch(struct vcpu *prev, struct vcpu *next)
 {
     unsigned int cpu = smp_processor_id();
@@ -1811,6 +1884,9 @@  void context_switch(struct vcpu *prev, struct vcpu *next)
          (is_idle_domain(nextd) && cpu_online(cpu)) )
     {
         local_irq_enable();
+
+        if ( !is_idle_domain(nextd) )
+            __continue_running(next);
     }
     else
     {
@@ -1822,6 +1898,8 @@  void context_switch(struct vcpu *prev, struct vcpu *next)
         if ( is_pv_domain(nextd) )
             load_segments(next);
 
+        next->reload_context = false;
+
         ctxt_switch_levelling(next);
 
         if ( opt_ibpb && !is_idle_domain(nextd) )
@@ -1886,6 +1964,8 @@  void continue_running(struct vcpu *same)
     if ( !vcpu_runnable(same) )
         sched_vcpu_idle(same);
 
+    __continue_running(same);
+
     /* See the comment above. */
     same->domain->arch.ctxt_switch->tail(same);
     BUG();
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 6668df9f3b..12a6d62dc8 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -1133,6 +1133,7 @@  static int hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h)
 
     /* Auxiliary processors should be woken immediately. */
     v->is_initialised = 1;
+    v->reload_context = true;
     clear_bit(_VPF_down, &v->pause_flags);
     vcpu_wake(v);
 
@@ -3913,6 +3914,7 @@  void hvm_vcpu_reset_state(struct vcpu *v, uint16_t cs, uint16_t ip)
 
     v->arch.flags |= TF_kernel_mode;
     v->is_initialised = 1;
+    v->reload_context = true;
     clear_bit(_VPF_down, &v->pause_flags);
 
  out:
diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c
index a1a43cd792..41f8050c02 100644
--- a/xen/arch/x86/hvm/vlapic.c
+++ b/xen/arch/x86/hvm/vlapic.c
@@ -367,6 +367,7 @@  static void vlapic_accept_irq(struct vcpu *v, uint32_t icr_low)
             domain_lock(v->domain);
             if ( v->is_initialised )
                 wake = test_and_clear_bit(_VPF_down, &v->pause_flags);
+            v->reload_context = wake;
             domain_unlock(v->domain);
             if ( wake )
                 vcpu_wake(v);
diff --git a/xen/common/domain.c b/xen/common/domain.c
index d338a2204c..b467197f05 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -1383,6 +1383,8 @@  long do_vcpu_op(int cmd, unsigned int vcpuid, XEN_GUEST_HANDLE_PARAM(void) arg)
                 rc = -EINVAL;
             else
                 wake = test_and_clear_bit(_VPF_down, &v->pause_flags);
+            if ( wake )
+                v->reload_context = true;
             domain_unlock(d);
             if ( wake )
                 vcpu_wake(v);
diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index f43d00b59f..7b30a153df 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -1775,17 +1775,22 @@  static struct sched_item *sched_wait_rendezvous_in(struct sched_item *prev,
     {
         next = do_schedule(prev, now);
         atomic_set(&next->rendezvous_out_cnt, sched_granularity + 1);
-        return next;
     }
-
-    while ( prev->rendezvous_in_cnt )
+    else
     {
-        pcpu_schedule_unlock_irq(lock, cpu);
-        cpu_relax();
-        pcpu_schedule_lock_irq(cpu);
+        while ( prev->rendezvous_in_cnt )
+        {
+            pcpu_schedule_unlock_irq(lock, cpu);
+            cpu_relax();
+            pcpu_schedule_lock_irq(cpu);
+        }
+        next = prev->next_task;
     }
 
-    return prev->next_task;
+    if ( unlikely(prev == next) )
+        vcpu_runstate_helper(current, RUNSTATE_running, now);
+
+    return next;
 }
 
 static void sched_context_switch(struct vcpu *vprev, struct vcpu *vnext,
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 51b8b6a44f..13085ddf90 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -206,6 +206,9 @@  struct vcpu
     bool             hcall_compat;
 #endif
 
+    /* VCPU was down before (context might need to be reloaded). */
+    bool             reload_context;
+
     /* The CPU, if any, which is holding onto this VCPU's state. */
 #define VCPU_CPU_CLEAN (~0u)
     unsigned int     dirty_cpu;