diff mbox series

[v3,1/6] x86/bugs: Add asm helpers for executing VERW

Message ID 20231025-delay-verw-v3-1-52663677ee35@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series Delay VERW | expand

Commit Message

Pawan Gupta Oct. 25, 2023, 8:52 p.m. UTC
MDS mitigation requires clearing the CPU buffers before returning to
user. This needs to be done late in the exit-to-user path. Current
location of VERW leaves a possibility of kernel data ending up in CPU
buffers for memory accesses done after VERW such as:

  1. Kernel data accessed by an NMI between VERW and return-to-user can
     remain in CPU buffers ( since NMI returning to kernel does not
     execute VERW to clear CPU buffers.
  2. Alyssa reported that after VERW is executed,
     CONFIG_GCC_PLUGIN_STACKLEAK=y scrubs the stack used by a system
     call. Memory accesses during stack scrubbing can move kernel stack
     contents into CPU buffers.
  3. When caller saved registers are restored after a return from
     function executing VERW, the kernel stack accesses can remain in
     CPU buffers(since they occur after VERW).

To fix this VERW needs to be moved very late in exit-to-user path.

In preparation for moving VERW to entry/exit asm code, create macros
that can be used in asm. Also make them depend on a new feature flag
X86_FEATURE_CLEAR_CPU_BUF.

Reported-by: Alyssa Milburn <alyssa.milburn@intel.com>
Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
---
 arch/x86/entry/entry.S               | 16 ++++++++++++++++
 arch/x86/include/asm/cpufeatures.h   |  2 +-
 arch/x86/include/asm/nospec-branch.h | 15 +++++++++++++++
 3 files changed, 32 insertions(+), 1 deletion(-)

Comments

Andrew Cooper Oct. 25, 2023, 9:10 p.m. UTC | #1
On 25/10/2023 9:52 pm, Pawan Gupta wrote:
> diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S
> index bfb7bcb362bc..f8ba0c0b6e60 100644
> --- a/arch/x86/entry/entry.S
> +++ b/arch/x86/entry/entry.S
> @@ -20,3 +23,16 @@ SYM_FUNC_END(entry_ibpb)
>  EXPORT_SYMBOL_GPL(entry_ibpb);
>  
>  .popsection
> +
> +.pushsection .entry.text, "ax"
> +
> +.align L1_CACHE_BYTES, 0xcc
> +SYM_CODE_START_NOALIGN(mds_verw_sel)
> +	UNWIND_HINT_UNDEFINED
> +	ANNOTATE_NOENDBR
> +	.word __KERNEL_DS

You need another .align here.  Otherwise subsequent code will still
start in this cacheline and defeat the purpose of trying to keep it
separate.

> +SYM_CODE_END(mds_verw_sel);

Thinking about it, should this really be CODE and not a data entry?

It lives in .entry.text but it really is data and objtool shouldn't be
writing ORC data for it at all.

(Not to mention that if it's marked as STT_OBJECT, objdump -d will do
the sensible thing and not even try to disassemble it).

~Andrew

P.S. Please CC on the full series.  Far less effort than fishing the
rest off lore.
Josh Poimboeuf Oct. 25, 2023, 9:28 p.m. UTC | #2
On Wed, Oct 25, 2023 at 10:10:41PM +0100, Andrew Cooper wrote:
> On 25/10/2023 9:52 pm, Pawan Gupta wrote:
> > diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S
> > index bfb7bcb362bc..f8ba0c0b6e60 100644
> > --- a/arch/x86/entry/entry.S
> > +++ b/arch/x86/entry/entry.S
> > @@ -20,3 +23,16 @@ SYM_FUNC_END(entry_ibpb)
> >  EXPORT_SYMBOL_GPL(entry_ibpb);
> >  
> >  .popsection
> > +
> > +.pushsection .entry.text, "ax"
> > +
> > +.align L1_CACHE_BYTES, 0xcc
> > +SYM_CODE_START_NOALIGN(mds_verw_sel)
> > +	UNWIND_HINT_UNDEFINED
> > +	ANNOTATE_NOENDBR
> > +	.word __KERNEL_DS
> 
> You need another .align here.  Otherwise subsequent code will still
> start in this cacheline and defeat the purpose of trying to keep it
> separate.
> 
> > +SYM_CODE_END(mds_verw_sel);
> 
> Thinking about it, should this really be CODE and not a data entry?
> 
> It lives in .entry.text but it really is data and objtool shouldn't be
> writing ORC data for it at all.
> 
> (Not to mention that if it's marked as STT_OBJECT, objdump -d will do
> the sensible thing and not even try to disassemble it).
> 
> ~Andrew
> 
> P.S. Please CC on the full series.  Far less effort than fishing the
> rest off lore.

+1 to putting it in .rodata or so.
Andrew Cooper Oct. 25, 2023, 9:30 p.m. UTC | #3
On 25/10/2023 10:28 pm, Josh Poimboeuf wrote:
> On Wed, Oct 25, 2023 at 10:10:41PM +0100, Andrew Cooper wrote:
>> On 25/10/2023 9:52 pm, Pawan Gupta wrote:
>>> diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S
>>> index bfb7bcb362bc..f8ba0c0b6e60 100644
>>> --- a/arch/x86/entry/entry.S
>>> +++ b/arch/x86/entry/entry.S
>>> @@ -20,3 +23,16 @@ SYM_FUNC_END(entry_ibpb)
>>>  EXPORT_SYMBOL_GPL(entry_ibpb);
>>>  
>>>  .popsection
>>> +
>>> +.pushsection .entry.text, "ax"
>>> +
>>> +.align L1_CACHE_BYTES, 0xcc
>>> +SYM_CODE_START_NOALIGN(mds_verw_sel)
>>> +	UNWIND_HINT_UNDEFINED
>>> +	ANNOTATE_NOENDBR
>>> +	.word __KERNEL_DS
>> You need another .align here.  Otherwise subsequent code will still
>> start in this cacheline and defeat the purpose of trying to keep it
>> separate.
>>
>>> +SYM_CODE_END(mds_verw_sel);
>> Thinking about it, should this really be CODE and not a data entry?
>>
>> It lives in .entry.text but it really is data and objtool shouldn't be
>> writing ORC data for it at all.
>>
>> (Not to mention that if it's marked as STT_OBJECT, objdump -d will do
>> the sensible thing and not even try to disassemble it).
>>
>> ~Andrew
>>
>> P.S. Please CC on the full series.  Far less effort than fishing the
>> rest off lore.
> +1 to putting it in .rodata or so.

It's necessarily in .entry.text so it doesn't explode with KPTI active.

~Andrew
Josh Poimboeuf Oct. 25, 2023, 9:49 p.m. UTC | #4
On Wed, Oct 25, 2023 at 10:30:52PM +0100, Andrew Cooper wrote:
> On 25/10/2023 10:28 pm, Josh Poimboeuf wrote:
> > On Wed, Oct 25, 2023 at 10:10:41PM +0100, Andrew Cooper wrote:
> >> On 25/10/2023 9:52 pm, Pawan Gupta wrote:
> >>> diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S
> >>> index bfb7bcb362bc..f8ba0c0b6e60 100644
> >>> --- a/arch/x86/entry/entry.S
> >>> +++ b/arch/x86/entry/entry.S
> >>> @@ -20,3 +23,16 @@ SYM_FUNC_END(entry_ibpb)
> >>>  EXPORT_SYMBOL_GPL(entry_ibpb);
> >>>  
> >>>  .popsection
> >>> +
> >>> +.pushsection .entry.text, "ax"
> >>> +
> >>> +.align L1_CACHE_BYTES, 0xcc
> >>> +SYM_CODE_START_NOALIGN(mds_verw_sel)
> >>> +	UNWIND_HINT_UNDEFINED
> >>> +	ANNOTATE_NOENDBR
> >>> +	.word __KERNEL_DS
> >> You need another .align here.  Otherwise subsequent code will still
> >> start in this cacheline and defeat the purpose of trying to keep it
> >> separate.
> >>
> >>> +SYM_CODE_END(mds_verw_sel);
> >> Thinking about it, should this really be CODE and not a data entry?
> >>
> >> It lives in .entry.text but it really is data and objtool shouldn't be
> >> writing ORC data for it at all.
> >>
> >> (Not to mention that if it's marked as STT_OBJECT, objdump -d will do
> >> the sensible thing and not even try to disassemble it).
> >>
> >> ~Andrew
> >>
> >> P.S. Please CC on the full series.  Far less effort than fishing the
> >> rest off lore.
> > +1 to putting it in .rodata or so.
> 
> It's necessarily in .entry.text so it doesn't explode with KPTI active.

Ah, right.  In general tooling doesn't take too kindly to putting data
in a text section.  But it might be ok.
Pawan Gupta Oct. 25, 2023, 10:07 p.m. UTC | #5
On Wed, Oct 25, 2023 at 10:10:41PM +0100, Andrew Cooper wrote:
> > +.align L1_CACHE_BYTES, 0xcc
> > +SYM_CODE_START_NOALIGN(mds_verw_sel)
> > +	UNWIND_HINT_UNDEFINED
> > +	ANNOTATE_NOENDBR
> > +	.word __KERNEL_DS
> 
> You need another .align here.  Otherwise subsequent code will still
> start in this cacheline and defeat the purpose of trying to keep it
> separate.

Right.

> > +SYM_CODE_END(mds_verw_sel);
> 
> Thinking about it, should this really be CODE and not a data entry?

Would that require adding a data equivalent of .entry.text and update
KPTI to keep it mapped? Or is there an easier option?

> P.S. Please CC on the full series.  Far less effort than fishing the
> rest off lore.

I didn't realize get_maintainer.pl isn't doing that already. Proposing
below update to MAINTAINERS:

---
From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Date: Wed, 25 Oct 2023 14:50:41 -0700
Subject: [PATCH] MAINTAINERS: Update entry for X86 HARDWARE VULNERABILITIES

Add Andrew Cooper to maintainers of hardware vulnerabilities
mitigations.

Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 2894f0777537..bf8c8707b8f8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -23382,6 +23382,7 @@ M:	Thomas Gleixner <tglx@linutronix.de>
 M:	Borislav Petkov <bp@alien8.de>
 M:	Peter Zijlstra <peterz@infradead.org>
 M:	Josh Poimboeuf <jpoimboe@kernel.org>
+M:	Andrew Cooper <andrew.cooper3@citrix.com>
 R:	Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
 S:	Maintained
 F:	Documentation/admin-guide/hw-vuln/
Andrew Cooper Oct. 25, 2023, 10:13 p.m. UTC | #6
On 25/10/2023 11:07 pm, Pawan Gupta wrote:
> On Wed, Oct 25, 2023 at 10:10:41PM +0100, Andrew Cooper wrote:
>>> +.align L1_CACHE_BYTES, 0xcc
>>> +SYM_CODE_START_NOALIGN(mds_verw_sel)
>>> +	UNWIND_HINT_UNDEFINED
>>> +	ANNOTATE_NOENDBR
>>> +	.word __KERNEL_DS
>> You need another .align here.  Otherwise subsequent code will still
>> start in this cacheline and defeat the purpose of trying to keep it
>> separate.
> Right.
>
>>> +SYM_CODE_END(mds_verw_sel);
>> Thinking about it, should this really be CODE and not a data entry?
> Would that require adding a data equivalent of .entry.text and update
> KPTI to keep it mapped? Or is there an easier option?

Leave it right here in .entry.text , but try using SYM_DATA() and
friends.  See whether objtool vomits over the result or not.

And if objtool does vomit over the result, then leaving it as it is in
this patch with SYM_CODE() is good enough.

>
>> P.S. Please CC on the full series.  Far less effort than fishing the
>> rest off lore.
> I didn't realize get_maintainer.pl isn't doing that already. Proposing
> below update to MAINTAINERS:
>
> ---
> From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
> Date: Wed, 25 Oct 2023 14:50:41 -0700
> Subject: [PATCH] MAINTAINERS: Update entry for X86 HARDWARE VULNERABILITIES
>
> Add Andrew Cooper to maintainers of hardware vulnerabilities
> mitigations.
>
> Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
> ---
>  MAINTAINERS | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 2894f0777537..bf8c8707b8f8 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -23382,6 +23382,7 @@ M:	Thomas Gleixner <tglx@linutronix.de>
>  M:	Borislav Petkov <bp@alien8.de>
>  M:	Peter Zijlstra <peterz@infradead.org>
>  M:	Josh Poimboeuf <jpoimboe@kernel.org>
> +M:	Andrew Cooper <andrew.cooper3@citrix.com>

Oh, right.  Perhaps R rather than M seeing as I can't make any time
commitments, but sure.

~Andrew
Nikolay Borisov Oct. 26, 2023, 1:44 p.m. UTC | #7
<snip>
> +
> +.pushsection .entry.text, "ax"
> +
> +.align L1_CACHE_BYTES, 0xcc
> +SYM_CODE_START_NOALIGN(mds_verw_sel)
> +	UNWIND_HINT_UNDEFINED
> +	ANNOTATE_NOENDBR
> +	.word __KERNEL_DS
> +SYM_CODE_END(mds_verw_sel);
> +/* For KVM */
> +EXPORT_SYMBOL_GPL(mds_verw_sel);
> +
> +.popsection

<snip>

> diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
> index c55cc243592e..005e69f93115 100644
> --- a/arch/x86/include/asm/nospec-branch.h
> +++ b/arch/x86/include/asm/nospec-branch.h
> @@ -329,6 +329,21 @@
>   #endif
>   .endm
>   
> +/*
> + * Macros to execute VERW instruction that mitigate transient data sampling
> + * attacks such as MDS. On affected systems a microcode update overloaded VERW
> + * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF.
> + *
> + * Note: Only the memory operand variant of VERW clears the CPU buffers.
> + */
> +.macro EXEC_VERW
> +	verw _ASM_RIP(mds_verw_sel)
> +.endm
> +
> +.macro CLEAR_CPU_BUFFERS
> +	ALTERNATIVE "", __stringify(EXEC_VERW), X86_FEATURE_CLEAR_CPU_BUF
> +.endm


What happened with the first 5 bytes of a 7 byte nop being complemented 
by __KERNEL_DS in order to handle VERW being executed after user 
registers are restored and having its memory operand ?

> +
>   #else /* __ASSEMBLY__ */
>   
>   #define ANNOTATE_RETPOLINE_SAFE					\
>
Andrew Cooper Oct. 26, 2023, 1:58 p.m. UTC | #8
On 26/10/2023 2:44 pm, Nikolay Borisov wrote:
>
>
> <snip>
>> +
>> +.pushsection .entry.text, "ax"
>> +
>> +.align L1_CACHE_BYTES, 0xcc
>> +SYM_CODE_START_NOALIGN(mds_verw_sel)
>> +    UNWIND_HINT_UNDEFINED
>> +    ANNOTATE_NOENDBR
>> +    .word __KERNEL_DS
>> +SYM_CODE_END(mds_verw_sel);
>> +/* For KVM */
>> +EXPORT_SYMBOL_GPL(mds_verw_sel);
>> +
>> +.popsection
>
> <snip>
>
>> diff --git a/arch/x86/include/asm/nospec-branch.h
>> b/arch/x86/include/asm/nospec-branch.h
>> index c55cc243592e..005e69f93115 100644
>> --- a/arch/x86/include/asm/nospec-branch.h
>> +++ b/arch/x86/include/asm/nospec-branch.h
>> @@ -329,6 +329,21 @@
>>   #endif
>>   .endm
>>   +/*
>> + * Macros to execute VERW instruction that mitigate transient data
>> sampling
>> + * attacks such as MDS. On affected systems a microcode update
>> overloaded VERW
>> + * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF.
>> + *
>> + * Note: Only the memory operand variant of VERW clears the CPU
>> buffers.
>> + */
>> +.macro EXEC_VERW
>> +    verw _ASM_RIP(mds_verw_sel)
>> +.endm
>> +
>> +.macro CLEAR_CPU_BUFFERS
>> +    ALTERNATIVE "", __stringify(EXEC_VERW), X86_FEATURE_CLEAR_CPU_BUF
>> +.endm
>
>
> What happened with the first 5 bytes of a 7 byte nop being
> complemented by __KERNEL_DS in order to handle VERW being executed
> after user registers are restored and having its memory operand ?

It was moved out of line (so no need to hide a constant in a nop),
deduped, and renamed to mds_verw_sel.

verw _ASM_RIP(mds_verw_sel) *is* the memory form.

~Andrew
diff mbox series

Patch

diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S
index bfb7bcb362bc..f8ba0c0b6e60 100644
--- a/arch/x86/entry/entry.S
+++ b/arch/x86/entry/entry.S
@@ -6,6 +6,9 @@ 
 #include <linux/linkage.h>
 #include <asm/export.h>
 #include <asm/msr-index.h>
+#include <asm/unwind_hints.h>
+#include <asm/segment.h>
+#include <asm/cache.h>
 
 .pushsection .noinstr.text, "ax"
 
@@ -20,3 +23,16 @@  SYM_FUNC_END(entry_ibpb)
 EXPORT_SYMBOL_GPL(entry_ibpb);
 
 .popsection
+
+.pushsection .entry.text, "ax"
+
+.align L1_CACHE_BYTES, 0xcc
+SYM_CODE_START_NOALIGN(mds_verw_sel)
+	UNWIND_HINT_UNDEFINED
+	ANNOTATE_NOENDBR
+	.word __KERNEL_DS
+SYM_CODE_END(mds_verw_sel);
+/* For KVM */
+EXPORT_SYMBOL_GPL(mds_verw_sel);
+
+.popsection
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 58cb9495e40f..f21fc0f12737 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -308,10 +308,10 @@ 
 #define X86_FEATURE_SMBA		(11*32+21) /* "" Slow Memory Bandwidth Allocation */
 #define X86_FEATURE_BMEC		(11*32+22) /* "" Bandwidth Monitoring Event Configuration */
 #define X86_FEATURE_USER_SHSTK		(11*32+23) /* Shadow stack support for user mode applications */
-
 #define X86_FEATURE_SRSO		(11*32+24) /* "" AMD BTB untrain RETs */
 #define X86_FEATURE_SRSO_ALIAS		(11*32+25) /* "" AMD BTB untrain RETs through aliasing */
 #define X86_FEATURE_IBPB_ON_VMEXIT	(11*32+26) /* "" Issue an IBPB only on VMEXIT */
+#define X86_FEATURE_CLEAR_CPU_BUF	(11*32+27) /* "" Clear CPU buffers */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
 #define X86_FEATURE_AVX_VNNI		(12*32+ 4) /* AVX VNNI instructions */
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index c55cc243592e..005e69f93115 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -329,6 +329,21 @@ 
 #endif
 .endm
 
+/*
+ * Macros to execute VERW instruction that mitigate transient data sampling
+ * attacks such as MDS. On affected systems a microcode update overloaded VERW
+ * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF.
+ *
+ * Note: Only the memory operand variant of VERW clears the CPU buffers.
+ */
+.macro EXEC_VERW
+	verw _ASM_RIP(mds_verw_sel)
+.endm
+
+.macro CLEAR_CPU_BUFFERS
+	ALTERNATIVE "", __stringify(EXEC_VERW), X86_FEATURE_CLEAR_CPU_BUF
+.endm
+
 #else /* __ASSEMBLY__ */
 
 #define ANNOTATE_RETPOLINE_SAFE					\