diff mbox series

[for-4.18,2/2] x86/amd: Prevent potentially fetching wrong instruction bytes on Zen4

Message ID 20231012172601.22600-3-alejandro.vallejo@cloud.com (mailing list archive)
State New, archived
Headers show
Series Fix AMD erratum #1485 on AMD Zen4 | expand

Commit Message

Alejandro Vallejo Oct. 12, 2023, 5:26 p.m. UTC
If Zen4 runs with SMT and without STIBP, then it may corrupt its own code
stream. This is erratum #1485 in the AMD specs.

Fix adapted off Linux's mailing list:
  https://lore.kernel.org/lkml/D99589F4-BC5D-430B-87B2-72C20370CF57@exactcode.com/T/#u

Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
---
 xen/arch/x86/cpu/amd.c               | 11 +++++++++++
 xen/arch/x86/include/asm/amd.h       |  8 ++++++++
 xen/arch/x86/include/asm/msr-index.h |  2 ++
 3 files changed, 21 insertions(+)

Comments

Andrew Cooper Oct. 13, 2023, 2:28 a.m. UTC | #1
On 13/10/2023 1:26 am, Alejandro Vallejo wrote:
> If Zen4 runs with SMT and without STIBP, then it may corrupt its own code
> stream. This is erratum #1485 in the AMD specs.

I'm afraid this description isn't fully accurate, and I can't elaborate
any further at this juncture.

Just say "address AMD erratum #1485".  When the revision guides do get
published, things will become clearer.

> diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c
> index 22aa8c0085..2426e4cf15 100644
> --- a/xen/arch/x86/cpu/amd.c
> +++ b/xen/arch/x86/cpu/amd.c
> @@ -1166,6 +1166,17 @@ static void cf_check init_amd(struct cpuinfo_x86 *c)
>  	if (c->x86 == 0x10)
>  		__clear_bit(X86_FEATURE_MONITOR, c->x86_capability);
>  
> +	/*
> +	 * Erratum #1485: Outdated microcode on Zen4 may cause corruption
> +	 * in the code stream if SMT is enabled and STIBP is not. The fix
> +	 * is cheap, so it's applied irrespective of the loaded microcode.

Again, unfortunately not accurate.  Just link to the erratum here.

> +	 */
> +	if (!cpu_has_hypervisor && is_zen4_uarch()) {
> +		rdmsrl(MSR_AMD64_BP_CFG, value);
> +		wrmsrl(MSR_AMD64_BP_CFG,
> +		       value | AMD64_BP_CFG_SHARED_BTB_FIX);
> +	}
> +
>  	if (!cpu_has_amd_erratum(c, AMD_ERRATUM_121))
>  		opt_allow_unsafe = 1;
>  	else if (opt_allow_unsafe < 0)
> diff --git a/xen/arch/x86/include/asm/amd.h b/xen/arch/x86/include/asm/amd.h
> index 5a40bcc2ba..7d18f7c66b 100644
> --- a/xen/arch/x86/include/asm/amd.h
> +++ b/xen/arch/x86/include/asm/amd.h
> @@ -145,12 +145,20 @@
>   * Hygon (Fam18h) but without simple model number rules.  Instead, use STIBP
>   * as a heuristic that distinguishes the two.
>   *
> + * Zen3 and Zen4 are easier to spot by model number, but it's still not a
> + * single range. We use AutoIBRS to to discriminate instead, as that's a
> + * Zen4-specific feature.

I'd strongly advise not passing commentary on whether Zen3/4 are easier
or harder to spot.  Just discuss the technical aspect.

> + *
>   * The caller is required to perform the appropriate vendor check first.
>   */
>  #define is_zen1_uarch() ((boot_cpu_data.x86 == 0x17 || boot_cpu_data.x86 == 0x18) && \
>                           !boot_cpu_has(X86_FEATURE_AMD_STIBP))
>  #define is_zen2_uarch() (boot_cpu_data.x86 == 0x17 && \
>                           boot_cpu_has(X86_FEATURE_AMD_STIBP))
> +#define is_zen3_uarch() (boot_cpu_data.x86 == 0x19 && \
> +                         !boot_cpu_has(X86_FEATURE_AUTO_IBRS))
> +#define is_zen4_uarch() (boot_cpu_data.x86 == 0x19 && \
> +                         boot_cpu_has(X86_FEATURE_AUTO_IBRS))
>  
>  struct cpuinfo_x86;
>  int cpu_has_amd_erratum(const struct cpuinfo_x86 *, int, ...);
> diff --git a/xen/arch/x86/include/asm/msr-index.h b/xen/arch/x86/include/asm/msr-index.h
> index 11ffed543a..4437e8a63e 100644
> --- a/xen/arch/x86/include/asm/msr-index.h
> +++ b/xen/arch/x86/include/asm/msr-index.h
> @@ -403,6 +403,8 @@
>  #define MSR_AMD64_DE_CFG		0xc0011029
>  #define AMD64_DE_CFG_LFENCE_SERIALISE	(_AC(1, ULL) << 1)
>  #define MSR_AMD64_EX_CFG		0xc001102c
> +#define MSR_AMD64_BP_CFG		0xc001102e
> +#define AMD64_BP_CFG_SHARED_BTB_FIX	(_AC(1, ULL) << 5)

MSR_AMD64_BP_CFG is fine to have in msr-index.h (it's consistent across
generations), but SHARED_BTB_FIX is not.  It's model specific, so keep
it local to the errata workaround.

LFENCE_SERIALISE was special.  It happened to have always been
consistent, and was retroactively declared to be architectural.

~Andrew
diff mbox series

Patch

diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c
index 22aa8c0085..2426e4cf15 100644
--- a/xen/arch/x86/cpu/amd.c
+++ b/xen/arch/x86/cpu/amd.c
@@ -1166,6 +1166,17 @@  static void cf_check init_amd(struct cpuinfo_x86 *c)
 	if (c->x86 == 0x10)
 		__clear_bit(X86_FEATURE_MONITOR, c->x86_capability);
 
+	/*
+	 * Erratum #1485: Outdated microcode on Zen4 may cause corruption
+	 * in the code stream if SMT is enabled and STIBP is not. The fix
+	 * is cheap, so it's applied irrespective of the loaded microcode.
+	 */
+	if (!cpu_has_hypervisor && is_zen4_uarch()) {
+		rdmsrl(MSR_AMD64_BP_CFG, value);
+		wrmsrl(MSR_AMD64_BP_CFG,
+		       value | AMD64_BP_CFG_SHARED_BTB_FIX);
+	}
+
 	if (!cpu_has_amd_erratum(c, AMD_ERRATUM_121))
 		opt_allow_unsafe = 1;
 	else if (opt_allow_unsafe < 0)
diff --git a/xen/arch/x86/include/asm/amd.h b/xen/arch/x86/include/asm/amd.h
index 5a40bcc2ba..7d18f7c66b 100644
--- a/xen/arch/x86/include/asm/amd.h
+++ b/xen/arch/x86/include/asm/amd.h
@@ -145,12 +145,20 @@ 
  * Hygon (Fam18h) but without simple model number rules.  Instead, use STIBP
  * as a heuristic that distinguishes the two.
  *
+ * Zen3 and Zen4 are easier to spot by model number, but it's still not a
+ * single range. We use AutoIBRS to to discriminate instead, as that's a
+ * Zen4-specific feature.
+ *
  * The caller is required to perform the appropriate vendor check first.
  */
 #define is_zen1_uarch() ((boot_cpu_data.x86 == 0x17 || boot_cpu_data.x86 == 0x18) && \
                          !boot_cpu_has(X86_FEATURE_AMD_STIBP))
 #define is_zen2_uarch() (boot_cpu_data.x86 == 0x17 && \
                          boot_cpu_has(X86_FEATURE_AMD_STIBP))
+#define is_zen3_uarch() (boot_cpu_data.x86 == 0x19 && \
+                         !boot_cpu_has(X86_FEATURE_AUTO_IBRS))
+#define is_zen4_uarch() (boot_cpu_data.x86 == 0x19 && \
+                         boot_cpu_has(X86_FEATURE_AUTO_IBRS))
 
 struct cpuinfo_x86;
 int cpu_has_amd_erratum(const struct cpuinfo_x86 *, int, ...);
diff --git a/xen/arch/x86/include/asm/msr-index.h b/xen/arch/x86/include/asm/msr-index.h
index 11ffed543a..4437e8a63e 100644
--- a/xen/arch/x86/include/asm/msr-index.h
+++ b/xen/arch/x86/include/asm/msr-index.h
@@ -403,6 +403,8 @@ 
 #define MSR_AMD64_DE_CFG		0xc0011029
 #define AMD64_DE_CFG_LFENCE_SERIALISE	(_AC(1, ULL) << 1)
 #define MSR_AMD64_EX_CFG		0xc001102c
+#define MSR_AMD64_BP_CFG		0xc001102e
+#define AMD64_BP_CFG_SHARED_BTB_FIX	(_AC(1, ULL) << 5)
 #define MSR_AMD64_DE_CFG2		0xc00110e3
 
 #define MSR_AMD64_DR0_ADDRESS_MASK	0xc0011027