diff mbox series

[RFC,V2,37/45] x86: optimize loading of GDT at context switch

Message ID 20190506065644.7415-38-jgross@suse.com (mailing list archive)
State Superseded
Headers show
Series xen: add core scheduling support | expand

Commit Message

Jürgen Groß May 6, 2019, 6:56 a.m. UTC
Instead of dynamically decide whether the previous vcpu was using full
or default GDT just add a percpu variable for that purpose. This at
once removes the need for testing vcpu_ids to differ twice.

Cache the need_full_gdt(nd) value in a local variable.

Signed-off-by: Juergen Gross <jgross@suse.com>
---
RFC V2: new patch (split from previous one)
---
 xen/arch/x86/domain.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

Comments

Jan Beulich May 16, 2019, 12:42 p.m. UTC | #1
>>> On 06.05.19 at 08:56, <jgross@suse.com> wrote:
> Instead of dynamically decide whether the previous vcpu was using full
> or default GDT just add a percpu variable for that purpose. This at
> once removes the need for testing vcpu_ids to differ twice.
> 
> Cache the need_full_gdt(nd) value in a local variable.
> 
> Signed-off-by: Juergen Gross <jgross@suse.com>

To be honest I'm not entirely convinced this is a good move. But
since you've done the work, and since the larger source size is
hopefully outweighed by slightly smaller binary size (per-CPU
data accesses aren't entirely cheap either), I'm not going to
object.

> @@ -1658,6 +1664,7 @@ static void __context_switch(void)
>      struct vcpu          *n = current;
>      struct domain        *pd = p->domain, *nd = n->domain;
>      seg_desc_t           *gdt;
> +    bool                  need_full_gdt_n;

This variable is too long, or more precisely has too many underscores
for my taste. Seeing that only a single invocation of need_full_gdt()
remains, I don't think just "full_gdt" would be ambiguous in any way.
At which point
Reviewed-by: Jan Beulich <jbeulich@suse.com>

Jan
Jürgen Groß May 16, 2019, 1:10 p.m. UTC | #2
On 16/05/2019 14:42, Jan Beulich wrote:
>>>> On 06.05.19 at 08:56, <jgross@suse.com> wrote:
>> Instead of dynamically decide whether the previous vcpu was using full
>> or default GDT just add a percpu variable for that purpose. This at
>> once removes the need for testing vcpu_ids to differ twice.
>>
>> Cache the need_full_gdt(nd) value in a local variable.
>>
>> Signed-off-by: Juergen Gross <jgross@suse.com>
> 
> To be honest I'm not entirely convinced this is a good move. But
> since you've done the work, and since the larger source size is
> hopefully outweighed by slightly smaller binary size (per-CPU
> data accesses aren't entirely cheap either), I'm not going to
> object.
> 
>> @@ -1658,6 +1664,7 @@ static void __context_switch(void)
>>      struct vcpu          *n = current;
>>      struct domain        *pd = p->domain, *nd = n->domain;
>>      seg_desc_t           *gdt;
>> +    bool                  need_full_gdt_n;
> 
> This variable is too long, or more precisely has too many underscores
> for my taste. Seeing that only a single invocation of need_full_gdt()
> remains, I don't think just "full_gdt" would be ambiguous in any way.

Fine with me.

> At which point
> Reviewed-by: Jan Beulich <jbeulich@suse.com>

Thanks,


Juergen
diff mbox series

Patch

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 72a365ff6a..d04e704116 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -72,6 +72,8 @@ 
 
 DEFINE_PER_CPU(struct vcpu *, curr_vcpu);
 
+static DEFINE_PER_CPU(bool, full_gdt_loaded);
+
 static void default_idle(void);
 void (*pm_idle) (void) __read_mostly = default_idle;
 void (*dead_idle) (void) __read_mostly = default_dead_idle;
@@ -1638,6 +1640,8 @@  static inline void load_full_gdt(struct vcpu *v, unsigned int cpu)
     gdt_desc.base = GDT_VIRT_START(v);
 
     lgdt(&gdt_desc);
+
+    per_cpu(full_gdt_loaded, cpu) = true;
 }
 
 static inline void load_default_gdt(seg_desc_t *gdt, unsigned int cpu)
@@ -1648,6 +1652,8 @@  static inline void load_default_gdt(seg_desc_t *gdt, unsigned int cpu)
     gdt_desc.base  = (unsigned long)(gdt - FIRST_RESERVED_GDT_ENTRY);
 
     lgdt(&gdt_desc);
+
+    per_cpu(full_gdt_loaded, cpu) = false;
 }
 
 static void __context_switch(void)
@@ -1658,6 +1664,7 @@  static void __context_switch(void)
     struct vcpu          *n = current;
     struct domain        *pd = p->domain, *nd = n->domain;
     seg_desc_t           *gdt;
+    bool                  need_full_gdt_n;
 
     ASSERT(p != n);
     ASSERT(!vcpu_cpu_dirty(n));
@@ -1700,11 +1707,13 @@  static void __context_switch(void)
     gdt = !is_pv_32bit_domain(nd) ? per_cpu(gdt_table, cpu) :
                                     per_cpu(compat_gdt_table, cpu);
 
-    if ( need_full_gdt(nd) )
+    need_full_gdt_n = need_full_gdt(nd);
+
+    if ( need_full_gdt_n )
         write_full_gdt_ptes(gdt, n);
 
-    if ( need_full_gdt(pd) &&
-         ((p->vcpu_id != n->vcpu_id) || !need_full_gdt(nd)) )
+    if ( per_cpu(full_gdt_loaded, cpu) &&
+         ((p->vcpu_id != n->vcpu_id) || !need_full_gdt_n) )
         load_default_gdt(gdt, cpu);
 
     write_ptbase(n);
@@ -1716,8 +1725,7 @@  static void __context_switch(void)
         svm_load_segs(0, 0, 0, 0, 0, 0, 0);
 #endif
 
-    if ( need_full_gdt(nd) &&
-         ((p->vcpu_id != n->vcpu_id) || !need_full_gdt(pd)) )
+    if ( need_full_gdt_n && !per_cpu(full_gdt_loaded, cpu) )
         load_full_gdt(n, cpu);
 
     if ( pd != nd )