diff mbox

[v2,18/30] x86/cpu: Rework AMD masking MSR setup

Message ID 1454679743-18133-19-git-send-email-andrew.cooper3@citrix.com (mailing list archive)
State New, archived
Headers show

Commit Message

Andrew Cooper Feb. 5, 2016, 1:42 p.m. UTC
This patch is best reviewed as its end result rather than as a diff, as it
rewrites almost all of the setup.

On the BSP, cpuid information is used to evaluate the potential available set
of masking MSRs, and they are unconditionally probed, filling in the
availability information and hardware defaults.

The command line parameters are then combined with the hardware defaults to
further restrict the Xen default masking level.  Each cpu is then context
switched into the default levelling state.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>

v2:
 * Provide extra information if opt_cpu_info
 * Extra comment indicating the expected use of amd_ctxt_switch_levelling()
---
 xen/arch/x86/cpu/amd.c | 267 +++++++++++++++++++++++++++++++------------------
 1 file changed, 170 insertions(+), 97 deletions(-)

Comments

Jan Beulich Feb. 17, 2016, 7:40 a.m. UTC | #1
>>> On 05.02.16 at 14:42, <andrew.cooper3@citrix.com> wrote:
> @@ -126,126 +133,189 @@ static const struct cpuidmask *__init noinline get_cpuidmask(const char *opt)
>  }
>  
>  /*
> + * Sets caps in expected_levelling_cap, probes for the specified mask MSR, and
> + * set caps in levelling_caps if it is found.  Processors prior to Fam 10h
> + * required a 32-bit password for masking MSRs.  Reads the default value into
> + * msr_val.
> + */
> +static void __init __probe_mask_msr(unsigned int msr, uint64_t caps,

Please reduce the leading underscores to at most one.

> +                                    uint64_t *msr_val)
> +{
> +	unsigned int hi, lo;
> +
> +        expected_levelling_cap |= caps;

Indentation.

> +	if ((rdmsr_amd_safe(msr, &lo, &hi) == 0) &&
> +	    (wrmsr_amd_safe(msr, lo, hi) == 0))
> +		levelling_caps |= caps;
> +
> +	*msr_val = ((uint64_t)hi << 32) | lo;
> +}

Why can't this function, currently returning void, simply return the
value read?

> +/*
> + * Context switch levelling state to the next domain.  A parameter of NULL is
> + * used to context switch to the default host state, and is used by the BSP/AP
> + * startup code.
> + */
> +static void amd_ctxt_switch_levelling(const struct domain *nextd)
> +{
> +	struct cpuidmasks *these_masks = &this_cpu(cpuidmasks);
> +	const struct cpuidmasks *masks = &cpuidmask_defaults;

May I suggest naming this "defaults", to aid clarity of the code
below?

> +#define LAZY(cap, msr, field)						\
> +	({								\
> +		if (((levelling_caps & cap) == cap) &&			\
> +		    (these_masks->field != masks->field))		\

Perhaps worth swapping the operands of the && and wrapping
the then left side of it in unlikely(), to hopefully make the most
desirable route through this function a branch-less one?

> +static void __init noinline amd_init_levelling(void)
>  {
> -	static unsigned int feat_ecx, feat_edx;
> -	static unsigned int extfeat_ecx, extfeat_edx;
> -	static unsigned int l7s0_eax, l7s0_ebx;
> -	static unsigned int thermal_ecx;
> -	static bool_t skip_feat, skip_extfeat;
> -	static bool_t skip_l7s0_eax_ebx, skip_thermal_ecx;
> -	static enum { not_parsed, no_mask, set_mask } status;
> -	unsigned int eax, ebx, ecx, edx;
> -
> -	if (status == no_mask)
> -		return;
> +	const struct cpuidmask *m = NULL;
>  
> -	if (status == set_mask)
> -		goto setmask;
> +	probe_masking_msrs();
>  
> -	ASSERT((status == not_parsed) && (c == &boot_cpu_data));
> -	status = no_mask;
> +	if (*opt_famrev != '\0') {
> +		m = get_cpuidmask(opt_famrev);
>  
> -	/* Fam11 doesn't support masking at all. */
> -	if (c->x86 == 0x11)
> -		return;
> +		if (!m)
> +			printk("Invalid processor string: %s\n", opt_famrev);
> +	}
>  
> -	if (~(opt_cpuid_mask_ecx & opt_cpuid_mask_edx &
> -	      opt_cpuid_mask_ext_ecx & opt_cpuid_mask_ext_edx &
> -	      opt_cpuid_mask_l7s0_eax & opt_cpuid_mask_l7s0_ebx &
> -	      opt_cpuid_mask_thermal_ecx)) {
> -		feat_ecx = opt_cpuid_mask_ecx;
> -		feat_edx = opt_cpuid_mask_edx;
> -		extfeat_ecx = opt_cpuid_mask_ext_ecx;
> -		extfeat_edx = opt_cpuid_mask_ext_edx;
> -		l7s0_eax = opt_cpuid_mask_l7s0_eax;
> -		l7s0_ebx = opt_cpuid_mask_l7s0_ebx;
> -		thermal_ecx = opt_cpuid_mask_thermal_ecx;
> -	} else if (*opt_famrev == '\0') {
> -		return;
> -	} else {
> -		const struct cpuidmask *m = get_cpuidmask(opt_famrev);
> +	if ((levelling_caps & LCAP_1cd) == LCAP_1cd) {
> +		uint32_t ecx, edx, tmp;
>  
> -		if (!m) {
> -			printk("Invalid processor string: %s\n", opt_famrev);
> -			printk("CPUID will not be masked\n");
> -			return;
> +		cpuid(0x00000001, &tmp, &tmp, &ecx, &edx);

Didn't you collect raw CPUID output already?

> +		if(~(opt_cpuid_mask_ecx & opt_cpuid_mask_edx)) {
> +			ecx &= opt_cpuid_mask_ecx;
> +			edx &= opt_cpuid_mask_edx;
> +		} else if ( m ) {

Partial Xen coding style slipped in here.

Jan
Andrew Cooper Feb. 17, 2016, 10:56 a.m. UTC | #2
On 17/02/16 07:40, Jan Beulich wrote:
>
>> +	if ((rdmsr_amd_safe(msr, &lo, &hi) == 0) &&
>> +	    (wrmsr_amd_safe(msr, lo, hi) == 0))
>> +		levelling_caps |= caps;
>> +
>> +	*msr_val = ((uint64_t)hi << 32) | lo;
>> +}
> Why can't this function, currently returning void, simply return the
> value read?

Hmm - it can.  This current layout is an artefact of several changes in
design.

>> +static void __init noinline amd_init_levelling(void)
>>  {
>> -	static unsigned int feat_ecx, feat_edx;
>> -	static unsigned int extfeat_ecx, extfeat_edx;
>> -	static unsigned int l7s0_eax, l7s0_ebx;
>> -	static unsigned int thermal_ecx;
>> -	static bool_t skip_feat, skip_extfeat;
>> -	static bool_t skip_l7s0_eax_ebx, skip_thermal_ecx;
>> -	static enum { not_parsed, no_mask, set_mask } status;
>> -	unsigned int eax, ebx, ecx, edx;
>> -
>> -	if (status == no_mask)
>> -		return;
>> +	const struct cpuidmask *m = NULL;
>>  
>> -	if (status == set_mask)
>> -		goto setmask;
>> +	probe_masking_msrs();
>>  
>> -	ASSERT((status == not_parsed) && (c == &boot_cpu_data));
>> -	status = no_mask;
>> +	if (*opt_famrev != '\0') {
>> +		m = get_cpuidmask(opt_famrev);
>>  
>> -	/* Fam11 doesn't support masking at all. */
>> -	if (c->x86 == 0x11)
>> -		return;
>> +		if (!m)
>> +			printk("Invalid processor string: %s\n", opt_famrev);
>> +	}
>>  
>> -	if (~(opt_cpuid_mask_ecx & opt_cpuid_mask_edx &
>> -	      opt_cpuid_mask_ext_ecx & opt_cpuid_mask_ext_edx &
>> -	      opt_cpuid_mask_l7s0_eax & opt_cpuid_mask_l7s0_ebx &
>> -	      opt_cpuid_mask_thermal_ecx)) {
>> -		feat_ecx = opt_cpuid_mask_ecx;
>> -		feat_edx = opt_cpuid_mask_edx;
>> -		extfeat_ecx = opt_cpuid_mask_ext_ecx;
>> -		extfeat_edx = opt_cpuid_mask_ext_edx;
>> -		l7s0_eax = opt_cpuid_mask_l7s0_eax;
>> -		l7s0_ebx = opt_cpuid_mask_l7s0_ebx;
>> -		thermal_ecx = opt_cpuid_mask_thermal_ecx;
>> -	} else if (*opt_famrev == '\0') {
>> -		return;
>> -	} else {
>> -		const struct cpuidmask *m = get_cpuidmask(opt_famrev);
>> +	if ((levelling_caps & LCAP_1cd) == LCAP_1cd) {
>> +		uint32_t ecx, edx, tmp;
>>  
>> -		if (!m) {
>> -			printk("Invalid processor string: %s\n", opt_famrev);
>> -			printk("CPUID will not be masked\n");
>> -			return;
>> +		cpuid(0x00000001, &tmp, &tmp, &ecx, &edx);
> Didn't you collect raw CPUID output already?

This is now c_early_init(), which is ahead of populating c->x86_capability

~Andrew
diff mbox

Patch

diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c
index 5908cba..1708dd9 100644
--- a/xen/arch/x86/cpu/amd.c
+++ b/xen/arch/x86/cpu/amd.c
@@ -80,6 +80,13 @@  static inline int wrmsr_amd_safe(unsigned int msr, unsigned int lo,
 	return err;
 }
 
+static void wrmsr_amd(unsigned int msr, uint64_t val)
+{
+	asm volatile("wrmsr" ::
+		     "c" (msr), "a" ((uint32_t)val),
+		     "d" (val >> 32), "D" (0x9c5a203a));
+}
+
 static const struct cpuidmask {
 	uint16_t fam;
 	char rev[2];
@@ -126,126 +133,189 @@  static const struct cpuidmask *__init noinline get_cpuidmask(const char *opt)
 }
 
 /*
+ * Sets caps in expected_levelling_cap, probes for the specified mask MSR, and
+ * set caps in levelling_caps if it is found.  Processors prior to Fam 10h
+ * required a 32-bit password for masking MSRs.  Reads the default value into
+ * msr_val.
+ */
+static void __init __probe_mask_msr(unsigned int msr, uint64_t caps,
+                                    uint64_t *msr_val)
+{
+	unsigned int hi, lo;
+
+        expected_levelling_cap |= caps;
+
+	if ((rdmsr_amd_safe(msr, &lo, &hi) == 0) &&
+	    (wrmsr_amd_safe(msr, lo, hi) == 0))
+		levelling_caps |= caps;
+
+	*msr_val = ((uint64_t)hi << 32) | lo;
+}
+
+/*
+ * Probe for the existance of the expected masking MSRs.  They might easily
+ * not be available if Xen is running virtualised.
+ */
+static void __init noinline probe_masking_msrs(void)
+{
+	const struct cpuinfo_x86 *c = &boot_cpu_data;
+
+	/*
+	 * First, work out which masking MSRs we should have, based on
+	 * revision and cpuid.
+	 */
+
+	/* Fam11 doesn't support masking at all. */
+	if (c->x86 == 0x11)
+		return;
+
+	__probe_mask_msr(MSR_K8_FEATURE_MASK, LCAP_1cd,
+			 &cpuidmask_defaults._1cd);
+	__probe_mask_msr(MSR_K8_EXT_FEATURE_MASK, LCAP_e1cd,
+			 &cpuidmask_defaults.e1cd);
+
+	if (c->cpuid_level >= 7)
+		__probe_mask_msr(MSR_AMD_L7S0_FEATURE_MASK, LCAP_7ab0,
+				 &cpuidmask_defaults._7ab0);
+
+	if (c->x86 == 0x15 && c->cpuid_level >= 6 && cpuid_ecx(6))
+		__probe_mask_msr(MSR_AMD_THRM_FEATURE_MASK, LCAP_6c,
+				 &cpuidmask_defaults._6c);
+
+	/*
+	 * Don't bother warning about a mismatch if virtualised.  These MSRs
+	 * are not architectural and almost never virtualised.
+	 */
+	if ((expected_levelling_cap == levelling_caps) ||
+	    cpu_has_hypervisor)
+		return;
+
+	printk(XENLOG_WARNING "Mismatch between expected (%#x) "
+	       "and real (%#x) levelling caps: missing %#x\n",
+	       expected_levelling_cap, levelling_caps,
+	       (expected_levelling_cap ^ levelling_caps) & levelling_caps);
+	printk(XENLOG_WARNING "Fam %#x, model %#x level %#x\n",
+	       c->x86, c->x86_model, c->cpuid_level);
+	printk(XENLOG_WARNING
+	       "If not running virtualised, please report a bug\n");
+}
+
+/*
+ * Context switch levelling state to the next domain.  A parameter of NULL is
+ * used to context switch to the default host state, and is used by the BSP/AP
+ * startup code.
+ */
+static void amd_ctxt_switch_levelling(const struct domain *nextd)
+{
+	struct cpuidmasks *these_masks = &this_cpu(cpuidmasks);
+	const struct cpuidmasks *masks = &cpuidmask_defaults;
+
+#define LAZY(cap, msr, field)						\
+	({								\
+		if (((levelling_caps & cap) == cap) &&			\
+		    (these_masks->field != masks->field))		\
+		{							\
+			wrmsr_amd(msr, masks->field);			\
+			these_masks->field = masks->field;		\
+		}							\
+	})
+
+	LAZY(LCAP_1cd,  MSR_K8_FEATURE_MASK,       _1cd);
+	LAZY(LCAP_e1cd, MSR_K8_EXT_FEATURE_MASK,   e1cd);
+	LAZY(LCAP_7ab0, MSR_AMD_L7S0_FEATURE_MASK, _7ab0);
+	LAZY(LCAP_6c,   MSR_AMD_THRM_FEATURE_MASK, _6c);
+
+#undef LAZY
+}
+
+/*
  * Mask the features and extended features returned by CPUID.  Parameters are
  * set from the boot line via two methods:
  *
  *   1) Specific processor revision string
  *   2) User-defined masks
  *
- * The processor revision string parameter has precedene.
+ * The user-defined masks take precedence.
  */
-static void set_cpuidmask(const struct cpuinfo_x86 *c)
+static void __init noinline amd_init_levelling(void)
 {
-	static unsigned int feat_ecx, feat_edx;
-	static unsigned int extfeat_ecx, extfeat_edx;
-	static unsigned int l7s0_eax, l7s0_ebx;
-	static unsigned int thermal_ecx;
-	static bool_t skip_feat, skip_extfeat;
-	static bool_t skip_l7s0_eax_ebx, skip_thermal_ecx;
-	static enum { not_parsed, no_mask, set_mask } status;
-	unsigned int eax, ebx, ecx, edx;
-
-	if (status == no_mask)
-		return;
+	const struct cpuidmask *m = NULL;
 
-	if (status == set_mask)
-		goto setmask;
+	probe_masking_msrs();
 
-	ASSERT((status == not_parsed) && (c == &boot_cpu_data));
-	status = no_mask;
+	if (*opt_famrev != '\0') {
+		m = get_cpuidmask(opt_famrev);
 
-	/* Fam11 doesn't support masking at all. */
-	if (c->x86 == 0x11)
-		return;
+		if (!m)
+			printk("Invalid processor string: %s\n", opt_famrev);
+	}
 
-	if (~(opt_cpuid_mask_ecx & opt_cpuid_mask_edx &
-	      opt_cpuid_mask_ext_ecx & opt_cpuid_mask_ext_edx &
-	      opt_cpuid_mask_l7s0_eax & opt_cpuid_mask_l7s0_ebx &
-	      opt_cpuid_mask_thermal_ecx)) {
-		feat_ecx = opt_cpuid_mask_ecx;
-		feat_edx = opt_cpuid_mask_edx;
-		extfeat_ecx = opt_cpuid_mask_ext_ecx;
-		extfeat_edx = opt_cpuid_mask_ext_edx;
-		l7s0_eax = opt_cpuid_mask_l7s0_eax;
-		l7s0_ebx = opt_cpuid_mask_l7s0_ebx;
-		thermal_ecx = opt_cpuid_mask_thermal_ecx;
-	} else if (*opt_famrev == '\0') {
-		return;
-	} else {
-		const struct cpuidmask *m = get_cpuidmask(opt_famrev);
+	if ((levelling_caps & LCAP_1cd) == LCAP_1cd) {
+		uint32_t ecx, edx, tmp;
 
-		if (!m) {
-			printk("Invalid processor string: %s\n", opt_famrev);
-			printk("CPUID will not be masked\n");
-			return;
+		cpuid(0x00000001, &tmp, &tmp, &ecx, &edx);
+
+		if(~(opt_cpuid_mask_ecx & opt_cpuid_mask_edx)) {
+			ecx &= opt_cpuid_mask_ecx;
+			edx &= opt_cpuid_mask_edx;
+		} else if ( m ) {
+			ecx &= m->ecx;
+			edx &= m->edx;
 		}
-		feat_ecx = m->ecx;
-		feat_edx = m->edx;
-		extfeat_ecx = m->ext_ecx;
-		extfeat_edx = m->ext_edx;
+
+		cpuidmask_defaults._1cd &= ((uint64_t)ecx << 32) | edx;
 	}
 
-        /* Setting bits in the CPUID mask MSR that are not set in the
-         * unmasked CPUID response can cause those bits to be set in the
-         * masked response.  Avoid that by explicitly masking in software. */
-        feat_ecx &= cpuid_ecx(0x00000001);
-        feat_edx &= cpuid_edx(0x00000001);
-        extfeat_ecx &= cpuid_ecx(0x80000001);
-        extfeat_edx &= cpuid_edx(0x80000001);
+	if ((levelling_caps & LCAP_e1cd) == LCAP_e1cd) {
+		uint32_t ecx, edx, tmp;
 
-	status = set_mask;
-	printk("Writing CPUID feature mask ECX:EDX -> %08Xh:%08Xh\n", 
-	       feat_ecx, feat_edx);
-	printk("Writing CPUID extended feature mask ECX:EDX -> %08Xh:%08Xh\n", 
-	       extfeat_ecx, extfeat_edx);
+		cpuid(0x80000001, &tmp, &tmp, &ecx, &edx);
 
-	if (c->cpuid_level >= 7)
-		cpuid_count(7, 0, &eax, &ebx, &ecx, &edx);
-	else
-		ebx = eax = 0;
-	if ((eax | ebx) && ~(l7s0_eax & l7s0_ebx)) {
-		if (l7s0_eax > eax)
-			l7s0_eax = eax;
-		l7s0_ebx &= ebx;
-		printk("Writing CPUID leaf 7 subleaf 0 feature mask EAX:EBX -> %08Xh:%08Xh\n",
-		       l7s0_eax, l7s0_ebx);
-	} else
-		skip_l7s0_eax_ebx = 1;
-
-	/* Only Fam15 has the respective MSR. */
-	ecx = c->x86 == 0x15 && c->cpuid_level >= 6 ? cpuid_ecx(6) : 0;
-	if (ecx && ~thermal_ecx) {
-		thermal_ecx &= ecx;
-		printk("Writing CPUID thermal/power feature mask ECX -> %08Xh\n",
-		       thermal_ecx);
-	} else
-		skip_thermal_ecx = 1;
-
- setmask:
-	/* AMD processors prior to family 10h required a 32-bit password */
-	if (!skip_feat &&
-	    wrmsr_amd_safe(MSR_K8_FEATURE_MASK, feat_edx, feat_ecx)) {
-		skip_feat = 1;
-		printk("Failed to set CPUID feature mask\n");
+		if(~(opt_cpuid_mask_ext_ecx & opt_cpuid_mask_ext_edx)) {
+			ecx &= opt_cpuid_mask_ext_ecx;
+			edx &= opt_cpuid_mask_ext_edx;
+		} else if ( m ) {
+			ecx &= m->ext_ecx;
+			edx &= m->ext_edx;
+		}
+
+		cpuidmask_defaults.e1cd &= ((uint64_t)ecx << 32) | edx;
 	}
 
-	if (!skip_extfeat &&
-	    wrmsr_amd_safe(MSR_K8_EXT_FEATURE_MASK, extfeat_edx, extfeat_ecx)) {
-		skip_extfeat = 1;
-		printk("Failed to set CPUID extended feature mask\n");
+	if ((levelling_caps & LCAP_7ab0) == LCAP_7ab0) {
+		uint32_t eax, ebx, tmp;
+
+		cpuid(0x00000007, &eax, &ebx, &tmp, &tmp);
+
+		if(~(opt_cpuid_mask_l7s0_eax & opt_cpuid_mask_l7s0_ebx)) {
+			eax &= opt_cpuid_mask_l7s0_eax;
+			ebx &= opt_cpuid_mask_l7s0_ebx;
+		}
+
+		cpuidmask_defaults._7ab0 &= ((uint64_t)eax << 32) | ebx;
 	}
 
-	if (!skip_l7s0_eax_ebx &&
-	    wrmsr_amd_safe(MSR_AMD_L7S0_FEATURE_MASK, l7s0_ebx, l7s0_eax)) {
-		skip_l7s0_eax_ebx = 1;
-		printk("Failed to set CPUID leaf 7 subleaf 0 feature mask\n");
+	if ((levelling_caps & LCAP_6c) == LCAP_6c) {
+		uint32_t ecx = cpuid_ecx(6);
+
+		if (~opt_cpuid_mask_thermal_ecx)
+			ecx &= opt_cpuid_mask_thermal_ecx;
+
+		cpuidmask_defaults._6c &= (~0ULL << 32) | ecx;
 	}
 
-	if (!skip_thermal_ecx &&
-	    (rdmsr_amd_safe(MSR_AMD_THRM_FEATURE_MASK, &eax, &edx) ||
-	     wrmsr_amd_safe(MSR_AMD_THRM_FEATURE_MASK, thermal_ecx, edx))){
-		skip_thermal_ecx = 1;
-		printk("Failed to set CPUID thermal/power feature mask\n");
+	if (opt_cpu_info) {
+		printk(XENLOG_INFO "Levelling caps: %#x\n", levelling_caps);
+		printk(XENLOG_INFO
+		       "MSR defaults: 1d 0x%08x, 1c 0x%08x, e1d 0x%08x, "
+		       "e1c 0x%08x, 7a0 0x%08x, 7b0 0x%08x, 6c 0x%08x\n",
+		       (uint32_t)cpuidmask_defaults._1cd,
+		       (uint32_t)(cpuidmask_defaults._1cd >> 32),
+		       (uint32_t)cpuidmask_defaults.e1cd,
+		       (uint32_t)(cpuidmask_defaults.e1cd >> 32),
+		       (uint32_t)(cpuidmask_defaults._7ab0 >> 32),
+		       (uint32_t)cpuidmask_defaults._7ab0,
+		       (uint32_t)cpuidmask_defaults._6c);
 	}
 }
 
@@ -409,7 +479,10 @@  static void amd_get_topology(struct cpuinfo_x86 *c)
 
 static void early_init_amd(struct cpuinfo_x86 *c)
 {
-	set_cpuidmask(c);
+	if (c == &boot_cpu_data)
+		amd_init_levelling();
+
+	amd_ctxt_switch_levelling(NULL);
 }
 
 static void init_amd(struct cpuinfo_x86 *c)