diff mbox

[v4,16/26] x86/cpu: Context switch cpuid masks and faulting state in context_switch()

Message ID 1458750989-28967-17-git-send-email-andrew.cooper3@citrix.com (mailing list archive)
State New, archived
Headers show

Commit Message

Andrew Cooper March 23, 2016, 4:36 p.m. UTC
A single ctxt_switch_levelling() function pointer is provided
(defaulting to an empty nop), which is overridden in the appropriate
$VENDOR_init_levelling().

set_cpuid_faulting() is made private and included within
intel_ctxt_switch_levelling().

One functional change is that the faulting configuration is no longer special
cased for dom0.  There was never any need to, and it will cause dom0 to
observe the same information through native and enlightened cpuid.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <JBeulich@suse.com>
---
v3:
 * Don't leave cpuid masking/faulting active for the kexec kernel.
v2:
 * Style fixes
 * ASSERT() that faulting is available in set_cpuid_faulting()
---
 xen/arch/x86/cpu/amd.c          |  3 +++
 xen/arch/x86/cpu/common.c       |  7 +++++++
 xen/arch/x86/cpu/intel.c        | 20 +++++++++++++++-----
 xen/arch/x86/crash.c            |  3 +++
 xen/arch/x86/domain.c           |  4 +---
 xen/include/asm-x86/processor.h |  2 +-
 6 files changed, 30 insertions(+), 9 deletions(-)

Comments

Konrad Rzeszutek Wilk March 28, 2016, 7:27 p.m. UTC | #1
On Wed, Mar 23, 2016 at 04:36:19PM +0000, Andrew Cooper wrote:
> A single ctxt_switch_levelling() function pointer is provided
> (defaulting to an empty nop), which is overridden in the appropriate
> $VENDOR_init_levelling().
> 
> set_cpuid_faulting() is made private and included within
> intel_ctxt_switch_levelling().
> 
> One functional change is that the faulting configuration is no longer special
> cased for dom0.  There was never any need to, and it will cause dom0 to

There was. See 1d6ffea6
    ACPI: add _PDC input override mechanism

And in Linux see xen_check_mwait().

> observe the same information through native and enlightened cpuid.

Which will be a regression when it comes to ACPI C-states  - as we won't
expose the deeper ones (C6 or such) on SandyBridge CPUs.

But looking at this document:
http://www.intel.com/content/dam/www/public/us/en/documents/application-notes/virtualization-technology-flexmigration-application-note.pdf

The CPUID Masking all talks about VM guests - but PV guests are not
really VM (no VMCS container for them). Does that mean that if a PV
guests does an 'native' CPUID it the CPUID results are not masked
by CPUID masking (or faulting?). I would think not since:

> @@ -154,6 +156,11 @@ static void intel_ctxt_switch_levelling(const struct domain *nextd)
>  	struct cpuidmasks *these_masks = &this_cpu(cpuidmasks);
>  	const struct cpuidmasks *masks = &cpuidmask_defaults;
>  
> +	if (cpu_has_cpuid_faulting) {
> +		set_cpuid_faulting(nextd && is_pv_domain(nextd));

Which would give us a NULL for Dom0. So no engagning of CPUID faulting for PV guests.
And I suppose the CPUID masking is only for guests in VMCS container?
Andrew Cooper April 5, 2016, 6:34 p.m. UTC | #2
On 28/03/16 20:27, Konrad Rzeszutek Wilk wrote:
> On Wed, Mar 23, 2016 at 04:36:19PM +0000, Andrew Cooper wrote:
>> A single ctxt_switch_levelling() function pointer is provided
>> (defaulting to an empty nop), which is overridden in the appropriate
>> $VENDOR_init_levelling().
>>
>> set_cpuid_faulting() is made private and included within
>> intel_ctxt_switch_levelling().
>>
>> One functional change is that the faulting configuration is no longer special
>> cased for dom0.  There was never any need to, and it will cause dom0 to
> There was. See 1d6ffea6
>     ACPI: add _PDC input override mechanism
>
> And in Linux see xen_check_mwait().

This logic is fundamentally broken.  It is, and has *alway* been wrong
to try and equate dom0's view of cpuid with the hosts view of cpuid...

>
>> observe the same information through native and enlightened cpuid.
> Which will be a regression when it comes to ACPI C-states

... it can't possibly work for an HVM-based dom0, where there is no
distinction between a native and enlightened cpuid.

I will clearly have to re-break this in a similar way to the MTRR bits,
whitelisted for the hardware domain only, but Linux will have to be
changed to get working deep cstates for PVH/HVMLite.

~Andrew
diff mbox

Patch

diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c
index 0e1c8b9..484d4b0 100644
--- a/xen/arch/x86/cpu/amd.c
+++ b/xen/arch/x86/cpu/amd.c
@@ -326,6 +326,9 @@  static void __init noinline amd_init_levelling(void)
 		       (uint32_t)cpuidmask_defaults._7ab0,
 		       (uint32_t)cpuidmask_defaults._6c);
 	}
+
+	if (levelling_caps)
+		ctxt_switch_levelling = amd_ctxt_switch_levelling;
 }
 
 /*
diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c
index 7ef75b0..fe6eab4 100644
--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -88,6 +88,13 @@  static const struct cpu_dev default_cpu = {
 };
 static const struct cpu_dev *this_cpu = &default_cpu;
 
+static void default_ctxt_switch_levelling(const struct domain *nextd)
+{
+	/* Nop */
+}
+void (* __read_mostly ctxt_switch_levelling)(const struct domain *nextd) =
+	default_ctxt_switch_levelling;
+
 bool_t opt_cpu_info;
 boolean_param("cpuinfo", opt_cpu_info);
 
diff --git a/xen/arch/x86/cpu/intel.c b/xen/arch/x86/cpu/intel.c
index b2666a8..71b1199 100644
--- a/xen/arch/x86/cpu/intel.c
+++ b/xen/arch/x86/cpu/intel.c
@@ -32,13 +32,15 @@  static bool_t __init probe_intel_cpuid_faulting(void)
 	return 1;
 }
 
-static DEFINE_PER_CPU(bool_t, cpuid_faulting_enabled);
-void set_cpuid_faulting(bool_t enable)
+static void set_cpuid_faulting(bool_t enable)
 {
+	static DEFINE_PER_CPU(bool_t, cpuid_faulting_enabled);
+	bool_t *this_enabled = &this_cpu(cpuid_faulting_enabled);
 	uint32_t hi, lo;
 
-	if (!cpu_has_cpuid_faulting ||
-	    this_cpu(cpuid_faulting_enabled) == enable )
+	ASSERT(cpu_has_cpuid_faulting);
+
+	if (*this_enabled == enable)
 		return;
 
 	rdmsr(MSR_INTEL_MISC_FEATURES_ENABLES, lo, hi);
@@ -47,7 +49,7 @@  void set_cpuid_faulting(bool_t enable)
 		lo |= MSR_MISC_FEATURES_CPUID_FAULTING;
 	wrmsr(MSR_INTEL_MISC_FEATURES_ENABLES, lo, hi);
 
-	this_cpu(cpuid_faulting_enabled) = enable;
+	*this_enabled = enable;
 }
 
 /*
@@ -154,6 +156,11 @@  static void intel_ctxt_switch_levelling(const struct domain *nextd)
 	struct cpuidmasks *these_masks = &this_cpu(cpuidmasks);
 	const struct cpuidmasks *masks = &cpuidmask_defaults;
 
+	if (cpu_has_cpuid_faulting) {
+		set_cpuid_faulting(nextd && is_pv_domain(nextd));
+		return;
+	}
+
 #define LAZY(msr, field)						\
 	({								\
 		if (unlikely(these_masks->field != masks->field) &&	\
@@ -227,6 +234,9 @@  static void __init noinline intel_init_levelling(void)
 			       (uint32_t)cpuidmask_defaults.e1cd,
 			       (uint32_t)cpuidmask_defaults.Da1);
 	}
+
+	if (levelling_caps)
+		ctxt_switch_levelling = intel_ctxt_switch_levelling;
 }
 
 static void early_init_intel(struct cpuinfo_x86 *c)
diff --git a/xen/arch/x86/crash.c b/xen/arch/x86/crash.c
index 888a214..f28f527 100644
--- a/xen/arch/x86/crash.c
+++ b/xen/arch/x86/crash.c
@@ -189,6 +189,9 @@  void machine_crash_shutdown(void)
 
     nmi_shootdown_cpus();
 
+    /* Reset CPUID masking and faulting to the host's default. */
+    ctxt_switch_levelling(NULL);
+
     info = kexec_crash_save_info();
     info->xen_phys_start = xen_phys_start;
     info->dom0_pfn_to_mfn_frame_list_list =
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 6ec7554..abc7194 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -2088,9 +2088,7 @@  void context_switch(struct vcpu *prev, struct vcpu *next)
             load_segments(next);
         }
 
-        set_cpuid_faulting(is_pv_domain(nextd) &&
-                           !is_control_domain(nextd) &&
-                           !is_hardware_domain(nextd));
+        ctxt_switch_levelling(nextd);
     }
 
     context_saved(prev);
diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h
index a950554..f29d370 100644
--- a/xen/include/asm-x86/processor.h
+++ b/xen/include/asm-x86/processor.h
@@ -209,7 +209,7 @@  extern struct cpuinfo_x86 boot_cpu_data;
 extern struct cpuinfo_x86 cpu_data[];
 #define current_cpu_data cpu_data[smp_processor_id()]
 
-extern void set_cpuid_faulting(bool_t enable);
+extern void (*ctxt_switch_levelling)(const struct domain *nextd);
 
 extern u64 host_pat;
 extern bool_t opt_cpu_info;