@@ -2523,6 +2523,7 @@ config MITIGATION_ADDRESS_SPACE_ISOLATION
bool "Allow code to run with a reduced kernel address space"
default n
depends on X86_64 && !PARAVIRT && !UML
+ select X86_L1TF_FLUSH_LIB
help
This feature provides the ability to run some kernel code
with a reduced kernel address space. This can be used to
@@ -3201,6 +3202,9 @@ config HAVE_ATOMIC_IOMAP
def_bool y
depends on X86_32
+config X86_L1TF_FLUSH_LIB
+ def_bool n
+
source "arch/x86/kvm/Kconfig"
source "arch/x86/Kconfig.assembler"
new file mode 100644
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_L1TF_FLUSH_H
+#define _ASM_L1TF_FLUSH_H
+
+#ifdef CONFIG_X86_L1TF_FLUSH_LIB
+int l1tf_flush_setup(void);
+void l1tf_flush(void);
+#endif /* CONFIG_X86_L1TF_FLUSH_LIB */
+
+#endif
+
@@ -92,6 +92,7 @@ config KVM_SW_PROTECTED_VM
config KVM_INTEL
tristate "KVM for Intel (and compatible) processors support"
depends on KVM && IA32_FEAT_CTL
+ select X86_L1TF_FLUSH_LIB
help
Provides support for KVM on processors equipped with Intel's VT
extensions, a.k.a. Virtual Machine Extensions (VMX).
@@ -42,6 +42,7 @@
#include <asm/idtentry.h>
#include <asm/io.h>
#include <asm/irq_remapping.h>
+#include <asm/l1tf.h>
#include <asm/reboot.h>
#include <asm/perf_event.h>
#include <asm/mmu_context.h>
@@ -250,9 +251,6 @@ static void *vmx_l1d_flush_pages;
static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
{
- struct page *page;
- unsigned int i;
-
if (!boot_cpu_has_bug(X86_BUG_L1TF)) {
l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
return 0;
@@ -288,26 +286,11 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
l1tf = VMENTER_L1D_FLUSH_ALWAYS;
}
- if (l1tf != VMENTER_L1D_FLUSH_NEVER && !vmx_l1d_flush_pages &&
- !boot_cpu_has(X86_FEATURE_FLUSH_L1D)) {
- /*
- * This allocation for vmx_l1d_flush_pages is not tied to a VM
- * lifetime and so should not be charged to a memcg.
- */
- page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER);
- if (!page)
- return -ENOMEM;
- vmx_l1d_flush_pages = page_address(page);
+ if (l1tf != VMENTER_L1D_FLUSH_NEVER) {
+ int err = l1tf_flush_setup();
- /*
- * Initialize each page with a different pattern in
- * order to protect against KSM in the nested
- * virtualization case.
- */
- for (i = 0; i < 1u << L1D_CACHE_ORDER; ++i) {
- memset(vmx_l1d_flush_pages + i * PAGE_SIZE, i + 1,
- PAGE_SIZE);
- }
+ if (err)
+ return err;
}
l1tf_vmx_mitigation = l1tf;
@@ -6652,20 +6635,8 @@ int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
return ret;
}
-/*
- * Software based L1D cache flush which is used when microcode providing
- * the cache control MSR is not loaded.
- *
- * The L1D cache is 32 KiB on Nehalem and later microarchitectures, but to
- * flush it is required to read in 64 KiB because the replacement algorithm
- * is not exactly LRU. This could be sized at runtime via topology
- * information but as all relevant affected CPUs have 32KiB L1D cache size
- * there is no point in doing so.
- */
static noinstr void vmx_l1d_flush(struct kvm_vcpu *vcpu)
{
- int size = PAGE_SIZE << L1D_CACHE_ORDER;
-
/*
* This code is only executed when the flush mode is 'cond' or
* 'always'
@@ -6695,32 +6666,7 @@ static noinstr void vmx_l1d_flush(struct kvm_vcpu *vcpu)
vcpu->stat.l1d_flush++;
- if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) {
- native_wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
- return;
- }
-
- asm volatile(
- /* First ensure the pages are in the TLB */
- "xorl %%eax, %%eax\n"
- ".Lpopulate_tlb:\n\t"
- "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
- "addl $4096, %%eax\n\t"
- "cmpl %%eax, %[size]\n\t"
- "jne .Lpopulate_tlb\n\t"
- "xorl %%eax, %%eax\n\t"
- "cpuid\n\t"
- /* Now fill the cache */
- "xorl %%eax, %%eax\n"
- ".Lfill_cache:\n"
- "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
- "addl $64, %%eax\n\t"
- "cmpl %%eax, %[size]\n\t"
- "jne .Lfill_cache\n\t"
- "lfence\n"
- :: [flush_pages] "r" (vmx_l1d_flush_pages),
- [size] "r" (size)
- : "eax", "ebx", "ecx", "edx");
+ l1tf_flush();
}
void vmx_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
@@ -37,6 +37,7 @@ lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
lib-$(CONFIG_MITIGATION_RETPOLINE) += retpoline.o
+lib-$(CONFIG_X86_L1TF_FLUSH_LIB) += l1tf.o
obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
obj-y += iomem.o
new file mode 100644
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/gfp.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+
+#include <asm/cpufeature.h>
+#include <asm/l1tf.h>
+#include <asm/msr.h>
+
+#define L1D_CACHE_ORDER 4
+static void *l1tf_flush_pages;
+
+int l1tf_flush_setup(void)
+{
+ struct page *page;
+ unsigned int i;
+
+ if (l1tf_flush_pages || boot_cpu_has(X86_FEATURE_FLUSH_L1D))
+ return 0;
+
+ page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER);
+ if (!page)
+ return -ENOMEM;
+ l1tf_flush_pages = page_address(page);
+
+ /*
+ * Initialize each page with a different pattern in
+ * order to protect against KSM in the nested
+ * virtualization case.
+ */
+ for (i = 0; i < 1u << L1D_CACHE_ORDER; ++i) {
+ memset(l1tf_flush_pages + i * PAGE_SIZE, i + 1,
+ PAGE_SIZE);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(l1tf_flush_setup);
+
+/*
+ * Flush L1D in a way that:
+ *
+ * - definitely works on CPUs X86_FEATURE_FLUSH_L1D (because the SDM says so).
+ * - almost definitely works on other CPUs with L1TF (because someone on LKML
+ * said someone from Intel said so).
+ * - may or may not work on other CPUs.
+ *
+ * Don't call unless l1tf_flush_setup() has returned successfully.
+ */
+noinstr void l1tf_flush(void)
+{
+ int size = PAGE_SIZE << L1D_CACHE_ORDER;
+
+ if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) {
+ native_wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
+ return;
+ }
+
+ if (WARN_ON(!l1tf_flush_pages))
+ return;
+
+ /*
+ * This sequence was provided by Intel for the purpose of mitigating
+ * L1TF on VMX.
+ *
+ * The L1D cache is 32 KiB on Nehalem and some later microarchitectures,
+ * but to flush it is required to read in 64 KiB because the replacement
+ * algorithm is not exactly LRU. This could be sized at runtime via
+ * topology information but as all relevant affected CPUs have 32KiB L1D
+ * cache size there is no point in doing so.
+ */
+ asm volatile(
+ /* First ensure the pages are in the TLB */
+ "xorl %%eax, %%eax\n"
+ ".Lpopulate_tlb:\n\t"
+ "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
+ "addl $4096, %%eax\n\t"
+ "cmpl %%eax, %[size]\n\t"
+ "jne .Lpopulate_tlb\n\t"
+ "xorl %%eax, %%eax\n\t"
+ "cpuid\n\t"
+ /* Now fill the cache */
+ "xorl %%eax, %%eax\n"
+ ".Lfill_cache:\n"
+ "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
+ "addl $64, %%eax\n\t"
+ "cmpl %%eax, %[size]\n\t"
+ "jne .Lfill_cache\n\t"
+ "lfence\n"
+ :: [flush_pages] "r" (l1tf_flush_pages),
+ [size] "r" (size)
+ : "eax", "ebx", "ecx", "edx");
+}
+EXPORT_SYMBOL(l1tf_flush);
ASI will need to use this L1D flushing logic so put it in a library where it can be used independently of KVM. Since we're creating this library, it starts to look messy if we don't also use it in the double-opt-in (both kernel cmdline and prctl) mm-switching flush logic which is there for mitigating Snoop-Assisted L1 Data Sampling ("SAL1DS"). However, that logic doesn't use any software-based fallback for flushing on CPUs without the L1D_FLUSH command. In that case the prctl opt-in will fail. One option would be to just start using the software fallback sequence currently done by VMX code, but Linus didn't seem happy with a similar sequence being used here [1]. CPUs affected by SAL1DS are a subset of those affected by L1TF, so it wouldn't be completely insane to assume that the same sequence works for both cases, but I'll err on the side of caution and avoid risk of giving users a false impression that the kernel has really flushed L1D for them. [1] https://lore.kernel.org/linux-kernel/CAHk-=whC4PUhErcoDhCbTOdmPPy-Pj8j9ytsdcyz9TorOb4KUw@mail.gmail.com/ Instead, create this awkward library that is scoped specifically to L1TF, which will be used only by VMX and ASI, and has an annoying "only sometimes works" doc-comment. Users of the library can then infer from that comment whether they have flushed L1D. No functional change intended. Checkpatch-args: --ignore=COMMIT_LOG_LONG_LINE Signed-off-by: Brendan Jackman <jackmanb@google.com> --- arch/x86/Kconfig | 4 ++ arch/x86/include/asm/l1tf.h | 11 ++++++ arch/x86/kvm/Kconfig | 1 + arch/x86/kvm/vmx/vmx.c | 66 +++---------------------------- arch/x86/lib/Makefile | 1 + arch/x86/lib/l1tf.c | 94 +++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 117 insertions(+), 60 deletions(-)