diff mbox series

[v3] x86/traps: Enable UBSAN traps on x86

Message ID 20240625032509.4155839-1-gatlin.newhouse@gmail.com (mailing list archive)
State Superseded
Headers show
Series [v3] x86/traps: Enable UBSAN traps on x86 | expand

Commit Message

Gatlin Newhouse June 25, 2024, 3:24 a.m. UTC
Currently ARM architectures output which specific sanitizer caused
the trap, via the encoded data in the trap instruction. Clang on
x86 currently encodes the same data in ud1 instructions but the x86
handle_bug() and is_valid_bugaddr() functions currently only look
at ud2s.

Bring x86 to parity with arm64, similar to commit 25b84002afb9
("arm64: Support Clang UBSAN trap codes for better reporting").
Enable the output of UBSAN type information on x86 architectures
compiled with clang when CONFIG_UBSAN_TRAP=y.

Signed-off-by: Gatlin Newhouse <gatlin.newhouse@gmail.com>
---
Changes in v3:
  - Address Thomas's remarks about: change log structure,
    get_ud_type() instead of is_valid_bugaddr(), handle_bug()
    changes, and handle_ubsan_failure().

Changes in v2:
  - Name the new constants 'LEN_ASOP' and 'INSN_ASOP' instead of
    'LEN_REX' and 'INSN_REX'
  - Change handle_ubsan_failure() from enum bug_trap_type to void
    function

v1: https://lore.kernel.org/linux-hardening/20240529022043.3661757-1-gatlin.newhouse@gmail.com/
v2: https://lore.kernel.org/linux-hardening/20240601031019.3708758-1-gatlin.newhouse@gmail.com/
---
 MAINTAINERS                  |  2 ++
 arch/x86/include/asm/bug.h   | 11 ++++++++++
 arch/x86/include/asm/ubsan.h | 23 +++++++++++++++++++++
 arch/x86/kernel/Makefile     |  1 +
 arch/x86/kernel/traps.c      | 40 +++++++++++++++++++++++++++++++-----
 arch/x86/kernel/ubsan.c      | 21 +++++++++++++++++++
 6 files changed, 93 insertions(+), 5 deletions(-)
 create mode 100644 arch/x86/include/asm/ubsan.h
 create mode 100644 arch/x86/kernel/ubsan.c

Comments

Xin Li June 25, 2024, 6:45 a.m. UTC | #1
On 6/24/2024 8:24 PM, Gatlin Newhouse wrote:
> Currently ARM architectures output which specific sanitizer caused
> the trap, via the encoded data in the trap instruction. Clang on
> x86 currently encodes the same data in ud1 instructions but the x86
> handle_bug() and is_valid_bugaddr() functions currently only look
> at ud2s.
> 
> Bring x86 to parity with arm64, similar to commit 25b84002afb9
> ("arm64: Support Clang UBSAN trap codes for better reporting").
> Enable the output of UBSAN type information on x86 architectures
> compiled with clang when CONFIG_UBSAN_TRAP=y.
> 
> Signed-off-by: Gatlin Newhouse <gatlin.newhouse@gmail.com>
> ---
> Changes in v3:
>    - Address Thomas's remarks about: change log structure,
>      get_ud_type() instead of is_valid_bugaddr(), handle_bug()
>      changes, and handle_ubsan_failure().
> 
> Changes in v2:
>    - Name the new constants 'LEN_ASOP' and 'INSN_ASOP' instead of
>      'LEN_REX' and 'INSN_REX'
>    - Change handle_ubsan_failure() from enum bug_trap_type to void
>      function
> 
> v1: https://lore.kernel.org/linux-hardening/20240529022043.3661757-1-gatlin.newhouse@gmail.com/
> v2: https://lore.kernel.org/linux-hardening/20240601031019.3708758-1-gatlin.newhouse@gmail.com/
> ---
>   MAINTAINERS                  |  2 ++
>   arch/x86/include/asm/bug.h   | 11 ++++++++++
>   arch/x86/include/asm/ubsan.h | 23 +++++++++++++++++++++
>   arch/x86/kernel/Makefile     |  1 +
>   arch/x86/kernel/traps.c      | 40 +++++++++++++++++++++++++++++++-----
>   arch/x86/kernel/ubsan.c      | 21 +++++++++++++++++++
>   6 files changed, 93 insertions(+), 5 deletions(-)
>   create mode 100644 arch/x86/include/asm/ubsan.h
>   create mode 100644 arch/x86/kernel/ubsan.c
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 28e20975c26f..b8512887ffb1 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -22635,6 +22635,8 @@ L:	kasan-dev@googlegroups.com
>   L:	linux-hardening@vger.kernel.org
>   S:	Supported
>   T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/hardening
> +F:	arch/x86/include/asm/ubsan.h
> +F:	arch/x86/kernel/ubsan.c
>   F:	Documentation/dev-tools/ubsan.rst
>   F:	include/linux/ubsan.h
>   F:	lib/Kconfig.ubsan
> diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
> index a3ec87d198ac..a363d13c263b 100644
> --- a/arch/x86/include/asm/bug.h
> +++ b/arch/x86/include/asm/bug.h
> @@ -13,6 +13,17 @@
>   #define INSN_UD2	0x0b0f
>   #define LEN_UD2		2
>   
> +/*
> + * In clang we have UD1s reporting UBSAN failures on X86, 64 and 32bit.
> + */
> +#define INSN_UD1	0xb90f
> +#define INSN_UD_MASK	0xFFFF
> +#define LEN_UD1		2
> +#define INSN_ASOP	0x67
> +#define INSN_ASOP_MASK	0x00FF
> +#define BUG_UD_NONE	0xFFFF
> +#define BUG_UD2		0xFFFE
> +
>   #ifdef CONFIG_GENERIC_BUG
>   
>   #ifdef CONFIG_X86_32
> diff --git a/arch/x86/include/asm/ubsan.h b/arch/x86/include/asm/ubsan.h
> new file mode 100644
> index 000000000000..ac2080984e83
> --- /dev/null
> +++ b/arch/x86/include/asm/ubsan.h
> @@ -0,0 +1,23 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _ASM_X86_UBSAN_H
> +#define _ASM_X86_UBSAN_H
> +
> +/*
> + * Clang Undefined Behavior Sanitizer trap mode support.
> + */
> +#include <linux/bug.h>
> +#include <linux/ubsan.h>
> +#include <asm/ptrace.h>
> +
> +/*
> + * UBSAN uses the EAX register to encode its type in the ModRM byte.
> + */
> +#define UBSAN_REG	0x40
> +
> +#ifdef CONFIG_UBSAN_TRAP
> +void handle_ubsan_failure(struct pt_regs *regs, u16 insn);
> +#else
> +static inline void handle_ubsan_failure(struct pt_regs *regs, u16 insn) { return; }
> +#endif /* CONFIG_UBSAN_TRAP */
> +
> +#endif /* _ASM_X86_UBSAN_H */
> diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
> index 74077694da7d..fe1d9db27500 100644
> --- a/arch/x86/kernel/Makefile
> +++ b/arch/x86/kernel/Makefile
> @@ -145,6 +145,7 @@ obj-$(CONFIG_UNWINDER_GUESS)		+= unwind_guess.o
>   obj-$(CONFIG_AMD_MEM_ENCRYPT)		+= sev.o
>   
>   obj-$(CONFIG_CFI_CLANG)			+= cfi.o
> +obj-$(CONFIG_UBSAN_TRAP)		+= ubsan.o
>   
>   obj-$(CONFIG_CALL_THUNKS)		+= callthunks.o
>   
> diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
> index 4fa0b17e5043..aef21287e7ed 100644
> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
> @@ -67,6 +67,7 @@
>   #include <asm/vdso.h>
>   #include <asm/tdx.h>
>   #include <asm/cfi.h>
> +#include <asm/ubsan.h>
>   
>   #ifdef CONFIG_X86_64
>   #include <asm/x86_init.h>
> @@ -91,6 +92,29 @@ __always_inline int is_valid_bugaddr(unsigned long addr)
>   	return *(unsigned short *)addr == INSN_UD2;
>   }
>   
> +/*
> + * Check for UD1, UD2, with or without Address Size Override Prefixes instructions.
> + */
> +__always_inline u16 get_ud_type(unsigned long addr)
> +{
> +	u16 insn;
> +
> +	if (addr < TASK_SIZE_MAX)
> +		return BUG_UD_NONE;

Add an empty line for better readability.

> +	insn = *(u16 *)addr;
> +	if ((insn & INSN_UD_MASK) == INSN_UD2)
> +		return BUG_UD2;

Ditto.

There are extra empty lines in tglx's suggestion.

> +	if ((insn & INSN_ASOP_MASK) == INSN_ASOP)
> +		insn = *(u16 *)(++addr);
> +
> +	// UBSAN encode the failure type in the two bytes after UD1
> +	if ((insn & INSN_UD_MASK) == INSN_UD1)
> +		return *(u16 *)(addr + LEN_UD1);
> +
> +	return BUG_UD_NONE;
> +}
> +
> +

Better to add only one empty line.

>   static nokprobe_inline int
>   do_trap_no_signal(struct task_struct *tsk, int trapnr, const char *str,
>   		  struct pt_regs *regs,	long error_code)
> @@ -216,6 +240,7 @@ static inline void handle_invalid_op(struct pt_regs *regs)
>   static noinstr bool handle_bug(struct pt_regs *regs)
>   {
>   	bool handled = false;
> +	int ud_type;
>   
>   	/*
>   	 * Normally @regs are unpoisoned by irqentry_enter(), but handle_bug()
> @@ -223,7 +248,8 @@ static noinstr bool handle_bug(struct pt_regs *regs)
>   	 * irqentry_enter().
>   	 */
>   	kmsan_unpoison_entry_regs(regs);
> -	if (!is_valid_bugaddr(regs->ip))
> +	ud_type = get_ud_type(regs->ip);
> +	if (ud_type == BUG_UD_NONE)
>   		return handled;
>   
>   	/*
> @@ -236,10 +262,14 @@ static noinstr bool handle_bug(struct pt_regs *regs)
>   	 */
>   	if (regs->flags & X86_EFLAGS_IF)
>   		raw_local_irq_enable();
> -	if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN ||
> -	    handle_cfi_failure(regs) == BUG_TRAP_TYPE_WARN) {
> -		regs->ip += LEN_UD2;
> -		handled = true;
> +	if (ud_type == INSN_UD2) {
> +		if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN ||
> +		    handle_cfi_failure(regs) == BUG_TRAP_TYPE_WARN) {
> +			regs->ip += LEN_UD2;
> +			handled = true;
> +		}
> +	} else {
> +		handle_ubsan_failure(regs, ud_type);
>   	}

Add one empty line.

>   	if (regs->flags & X86_EFLAGS_IF)
>   		raw_local_irq_disable();
> diff --git a/arch/x86/kernel/ubsan.c b/arch/x86/kernel/ubsan.c
> new file mode 100644
> index 000000000000..c90e337a1b6a
> --- /dev/null
> +++ b/arch/x86/kernel/ubsan.c
> @@ -0,0 +1,21 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Clang Undefined Behavior Sanitizer trap mode support.
> + */
> +#include <linux/bug.h>
> +#include <linux/string.h>
> +#include <linux/printk.h>
> +#include <linux/ubsan.h>
> +#include <asm/ptrace.h>
> +#include <asm/ubsan.h>
> +
> +/*
> + * Checks for the information embedded in the UD1 trap instruction
> + * for the UB Sanitizer in order to pass along debugging output.
> + */
> +void handle_ubsan_failure(struct pt_regs *regs, u16 type)
> +{
> +	if ((type & 0xFF) == UBSAN_REG)
> +		type >>= 8;
> +	pr_crit("%s at %pS\n", report_ubsan_failure(regs, type), (void *)regs->ip);
> +}
Peter Zijlstra June 25, 2024, 9:37 a.m. UTC | #2
On Tue, Jun 25, 2024 at 03:24:55AM +0000, Gatlin Newhouse wrote:
> Currently ARM architectures output which specific sanitizer caused
> the trap, via the encoded data in the trap instruction. Clang on
> x86 currently encodes the same data in ud1 instructions but the x86
> handle_bug() and is_valid_bugaddr() functions currently only look
> at ud2s.
> 
> Bring x86 to parity with arm64, similar to commit 25b84002afb9
> ("arm64: Support Clang UBSAN trap codes for better reporting").
> Enable the output of UBSAN type information on x86 architectures
> compiled with clang when CONFIG_UBSAN_TRAP=y.
> 
> Signed-off-by: Gatlin Newhouse <gatlin.newhouse@gmail.com>
> ---
> Changes in v3:
>   - Address Thomas's remarks about: change log structure,
>     get_ud_type() instead of is_valid_bugaddr(), handle_bug()
>     changes, and handle_ubsan_failure().
> 
> Changes in v2:
>   - Name the new constants 'LEN_ASOP' and 'INSN_ASOP' instead of
>     'LEN_REX' and 'INSN_REX'
>   - Change handle_ubsan_failure() from enum bug_trap_type to void
>     function
> 
> v1: https://lore.kernel.org/linux-hardening/20240529022043.3661757-1-gatlin.newhouse@gmail.com/
> v2: https://lore.kernel.org/linux-hardening/20240601031019.3708758-1-gatlin.newhouse@gmail.com/
> ---
>  MAINTAINERS                  |  2 ++
>  arch/x86/include/asm/bug.h   | 11 ++++++++++
>  arch/x86/include/asm/ubsan.h | 23 +++++++++++++++++++++
>  arch/x86/kernel/Makefile     |  1 +
>  arch/x86/kernel/traps.c      | 40 +++++++++++++++++++++++++++++++-----
>  arch/x86/kernel/ubsan.c      | 21 +++++++++++++++++++
>  6 files changed, 93 insertions(+), 5 deletions(-)
>  create mode 100644 arch/x86/include/asm/ubsan.h
>  create mode 100644 arch/x86/kernel/ubsan.c
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 28e20975c26f..b8512887ffb1 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -22635,6 +22635,8 @@ L:	kasan-dev@googlegroups.com
>  L:	linux-hardening@vger.kernel.org
>  S:	Supported
>  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/hardening
> +F:	arch/x86/include/asm/ubsan.h
> +F:	arch/x86/kernel/ubsan.c
>  F:	Documentation/dev-tools/ubsan.rst
>  F:	include/linux/ubsan.h
>  F:	lib/Kconfig.ubsan
> diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
> index a3ec87d198ac..a363d13c263b 100644
> --- a/arch/x86/include/asm/bug.h
> +++ b/arch/x86/include/asm/bug.h
> @@ -13,6 +13,17 @@
>  #define INSN_UD2	0x0b0f
>  #define LEN_UD2		2
>  
> +/*
> + * In clang we have UD1s reporting UBSAN failures on X86, 64 and 32bit.
> + */
> +#define INSN_UD1	0xb90f
> +#define INSN_UD_MASK	0xFFFF
> +#define LEN_UD1		2
> +#define INSN_ASOP	0x67
> +#define INSN_ASOP_MASK	0x00FF
> +#define BUG_UD_NONE	0xFFFF
> +#define BUG_UD2		0xFFFE
> +

Please look at 790d1ce71de. Also your style above is inconsistent,
please use lower case consistently for the hex values.


>  #ifdef CONFIG_GENERIC_BUG
>  
>  #ifdef CONFIG_X86_32
> diff --git a/arch/x86/include/asm/ubsan.h b/arch/x86/include/asm/ubsan.h
> new file mode 100644
> index 000000000000..ac2080984e83
> --- /dev/null
> +++ b/arch/x86/include/asm/ubsan.h
> @@ -0,0 +1,23 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _ASM_X86_UBSAN_H
> +#define _ASM_X86_UBSAN_H
> +
> +/*
> + * Clang Undefined Behavior Sanitizer trap mode support.
> + */
> +#include <linux/bug.h>
> +#include <linux/ubsan.h>
> +#include <asm/ptrace.h>
> +
> +/*
> + * UBSAN uses the EAX register to encode its type in the ModRM byte.
> + */
> +#define UBSAN_REG	0x40
> +
> +#ifdef CONFIG_UBSAN_TRAP
> +void handle_ubsan_failure(struct pt_regs *regs, u16 insn);
> +#else
> +static inline void handle_ubsan_failure(struct pt_regs *regs, u16 insn) { return; }
> +#endif /* CONFIG_UBSAN_TRAP */
> +
> +#endif /* _ASM_X86_UBSAN_H */
> diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
> index 74077694da7d..fe1d9db27500 100644
> --- a/arch/x86/kernel/Makefile
> +++ b/arch/x86/kernel/Makefile
> @@ -145,6 +145,7 @@ obj-$(CONFIG_UNWINDER_GUESS)		+= unwind_guess.o
>  obj-$(CONFIG_AMD_MEM_ENCRYPT)		+= sev.o
>  
>  obj-$(CONFIG_CFI_CLANG)			+= cfi.o
> +obj-$(CONFIG_UBSAN_TRAP)		+= ubsan.o
>  
>  obj-$(CONFIG_CALL_THUNKS)		+= callthunks.o
>  
> diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
> index 4fa0b17e5043..aef21287e7ed 100644
> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
> @@ -67,6 +67,7 @@
>  #include <asm/vdso.h>
>  #include <asm/tdx.h>
>  #include <asm/cfi.h>
> +#include <asm/ubsan.h>
>  
>  #ifdef CONFIG_X86_64
>  #include <asm/x86_init.h>
> @@ -91,6 +92,29 @@ __always_inline int is_valid_bugaddr(unsigned long addr)
>  	return *(unsigned short *)addr == INSN_UD2;
>  }
>  
> +/*
> + * Check for UD1, UD2, with or without Address Size Override Prefixes instructions.
> + */
> +__always_inline u16 get_ud_type(unsigned long addr)
> +{
> +	u16 insn;
> +
> +	if (addr < TASK_SIZE_MAX)
> +		return BUG_UD_NONE;
> +	insn = *(u16 *)addr;
> +	if ((insn & INSN_UD_MASK) == INSN_UD2)
> +		return BUG_UD2;
> +	if ((insn & INSN_ASOP_MASK) == INSN_ASOP)
> +		insn = *(u16 *)(++addr);
> +
> +	// UBSAN encode the failure type in the two bytes after UD1
> +	if ((insn & INSN_UD_MASK) == INSN_UD1)
> +		return *(u16 *)(addr + LEN_UD1);
> +
> +	return BUG_UD_NONE;
> +}

Given that insn is u16, this INSN_UD_MASK seems eminently pointless.

Are the bytes after UD1 a proper ModRM such that the whole forms a
decodable instruction? You seem to not mention this anywhere. It is
paramount that the instruction stream is still correctly decodable.

Also, wouldn't it be saner to write this something like:

__always_inline int decode_bug(unsigned long addr, u32 *imm)
{
	u8 v;

	if (addr < TASK_SIZE)
		return BUG_NONE;

	v = *(u8 *)(addr++);
	if (v == 0x67)
		v = *(u8 *)(addr++);
	if (v != 0x0f)
		return BUG_NONE;
	v = *(u8 *)(addr++);
	if (v == 0x0b)
		return BUG_UD2;
	if (v != 0xb9)
		return BUG_NONE;

	if (X86_MODRM_RM(v) == 4)
		addr++; /* consume SiB */

	*imm = 0;
	if (X86_MODRM_MOD(v) == 1)
		*imm = *(u8 *)addr;
	if (X86_MORRM_MOD(v) == 2)
		*imm = *(u32 *)addr;

	// WARN on MOD(v)==3 ??

	return BUG_UD1;
}

Why does the thing emit the asop prefix at all through? afaict it
doesn't affect the immediate you want to get at. And if it does this
prefix, should we worry about other prefixes? Ideally we'd not accept
any prefixes.
Kees Cook June 26, 2024, 7:07 p.m. UTC | #3
On Tue, Jun 25, 2024 at 11:37:19AM +0200, Peter Zijlstra wrote:
> Also, wouldn't it be saner to write this something like:
> 
> __always_inline int decode_bug(unsigned long addr, u32 *imm)
> {
> 	u8 v;
> 
> 	if (addr < TASK_SIZE)
> 		return BUG_NONE;
> 
> 	v = *(u8 *)(addr++);
> 	if (v == 0x67)
> 		v = *(u8 *)(addr++);
> 	if (v != 0x0f)
> 		return BUG_NONE;
> 	v = *(u8 *)(addr++);
> 	if (v == 0x0b)
> 		return BUG_UD2;
> 	if (v != 0xb9)
> 		return BUG_NONE;
> 
> 	if (X86_MODRM_RM(v) == 4)
> 		addr++; /* consume SiB */
> 
> 	*imm = 0;
> 	if (X86_MODRM_MOD(v) == 1)
> 		*imm = *(u8 *)addr;
> 	if (X86_MORRM_MOD(v) == 2)
> 		*imm = *(u32 *)addr;
> 
> 	// WARN on MOD(v)==3 ??
> 
> 	return BUG_UD1;
> }

Thanks for the example! (I think it should use macros instead of
open-coded "0x67", "0x0f", etc, but yeah.)

> Why does the thing emit the asop prefix at all through? afaict it
> doesn't affect the immediate you want to get at. And if it does this
> prefix, should we worry about other prefixes? Ideally we'd not accept
> any prefixes.

AFAICT it's because it's a small immediate? For an x86_64 build, this is
how Clang is generating the UD1.

-Kees
Peter Zijlstra June 28, 2024, 6:04 p.m. UTC | #4
On Wed, Jun 26, 2024 at 12:07:52PM -0700, Kees Cook wrote:
> On Tue, Jun 25, 2024 at 11:37:19AM +0200, Peter Zijlstra wrote:
> > Also, wouldn't it be saner to write this something like:
> > 
> > __always_inline int decode_bug(unsigned long addr, u32 *imm)
> > {
> > 	u8 v;
> > 
> > 	if (addr < TASK_SIZE)
> > 		return BUG_NONE;
> > 
> > 	v = *(u8 *)(addr++);
> > 	if (v == 0x67)
> > 		v = *(u8 *)(addr++);
> > 	if (v != 0x0f)
> > 		return BUG_NONE;
> > 	v = *(u8 *)(addr++);
> > 	if (v == 0x0b)
> > 		return BUG_UD2;
> > 	if (v != 0xb9)
> > 		return BUG_NONE;
> > 

Looks like I lost:

	v = *(u8 *)(addr++);
> > 	if (X86_MODRM_RM(v) == 4)
> > 		addr++; /* consume SiB */
> > 
> > 	*imm = 0;
> > 	if (X86_MODRM_MOD(v) == 1)
> > 		*imm = *(u8 *)addr;
> > 	if (X86_MORRM_MOD(v) == 2)
> > 		*imm = *(u32 *)addr;
> > 
> > 	// WARN on MOD(v)==3 ??
> > 
> > 	return BUG_UD1;
> > }
> 
> Thanks for the example! (I think it should use macros instead of
> open-coded "0x67", "0x0f", etc, but yeah.)

Yeah, I didn't feel like hunting down pre-existing defines for all of
them, but yeah.

> > Why does the thing emit the asop prefix at all through? afaict it
> > doesn't affect the immediate you want to get at. And if it does this
> > prefix, should we worry about other prefixes? Ideally we'd not accept
> > any prefixes.
> 
> AFAICT it's because it's a small immediate? For an x86_64 build, this is
> how Clang is generating the UD1.

So the disp8 immediate comes from MOD==1, MOD==2 has a disp32. What the
prefix does is change the size of the memory being referenced from 32bit
to 16bit iirc, but since UD does not actually perform the load, this is
entirely superfluous afaict.

It might be good to figure out *why* clang thinks it needs this.

A REX prefix is far more likely to be useful (upper 8 destination
register for instance).

Anyway, it seems to basically boil down to needing a fairly complete
instruction decoder without being able the use the normal one :/
diff mbox series

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index 28e20975c26f..b8512887ffb1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -22635,6 +22635,8 @@  L:	kasan-dev@googlegroups.com
 L:	linux-hardening@vger.kernel.org
 S:	Supported
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/hardening
+F:	arch/x86/include/asm/ubsan.h
+F:	arch/x86/kernel/ubsan.c
 F:	Documentation/dev-tools/ubsan.rst
 F:	include/linux/ubsan.h
 F:	lib/Kconfig.ubsan
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index a3ec87d198ac..a363d13c263b 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -13,6 +13,17 @@ 
 #define INSN_UD2	0x0b0f
 #define LEN_UD2		2
 
+/*
+ * In clang we have UD1s reporting UBSAN failures on X86, 64 and 32bit.
+ */
+#define INSN_UD1	0xb90f
+#define INSN_UD_MASK	0xFFFF
+#define LEN_UD1		2
+#define INSN_ASOP	0x67
+#define INSN_ASOP_MASK	0x00FF
+#define BUG_UD_NONE	0xFFFF
+#define BUG_UD2		0xFFFE
+
 #ifdef CONFIG_GENERIC_BUG
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/include/asm/ubsan.h b/arch/x86/include/asm/ubsan.h
new file mode 100644
index 000000000000..ac2080984e83
--- /dev/null
+++ b/arch/x86/include/asm/ubsan.h
@@ -0,0 +1,23 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_UBSAN_H
+#define _ASM_X86_UBSAN_H
+
+/*
+ * Clang Undefined Behavior Sanitizer trap mode support.
+ */
+#include <linux/bug.h>
+#include <linux/ubsan.h>
+#include <asm/ptrace.h>
+
+/*
+ * UBSAN uses the EAX register to encode its type in the ModRM byte.
+ */
+#define UBSAN_REG	0x40
+
+#ifdef CONFIG_UBSAN_TRAP
+void handle_ubsan_failure(struct pt_regs *regs, u16 insn);
+#else
+static inline void handle_ubsan_failure(struct pt_regs *regs, u16 insn) { return; }
+#endif /* CONFIG_UBSAN_TRAP */
+
+#endif /* _ASM_X86_UBSAN_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 74077694da7d..fe1d9db27500 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -145,6 +145,7 @@  obj-$(CONFIG_UNWINDER_GUESS)		+= unwind_guess.o
 obj-$(CONFIG_AMD_MEM_ENCRYPT)		+= sev.o
 
 obj-$(CONFIG_CFI_CLANG)			+= cfi.o
+obj-$(CONFIG_UBSAN_TRAP)		+= ubsan.o
 
 obj-$(CONFIG_CALL_THUNKS)		+= callthunks.o
 
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 4fa0b17e5043..aef21287e7ed 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -67,6 +67,7 @@ 
 #include <asm/vdso.h>
 #include <asm/tdx.h>
 #include <asm/cfi.h>
+#include <asm/ubsan.h>
 
 #ifdef CONFIG_X86_64
 #include <asm/x86_init.h>
@@ -91,6 +92,29 @@  __always_inline int is_valid_bugaddr(unsigned long addr)
 	return *(unsigned short *)addr == INSN_UD2;
 }
 
+/*
+ * Check for UD1, UD2, with or without Address Size Override Prefixes instructions.
+ */
+__always_inline u16 get_ud_type(unsigned long addr)
+{
+	u16 insn;
+
+	if (addr < TASK_SIZE_MAX)
+		return BUG_UD_NONE;
+	insn = *(u16 *)addr;
+	if ((insn & INSN_UD_MASK) == INSN_UD2)
+		return BUG_UD2;
+	if ((insn & INSN_ASOP_MASK) == INSN_ASOP)
+		insn = *(u16 *)(++addr);
+
+	// UBSAN encode the failure type in the two bytes after UD1
+	if ((insn & INSN_UD_MASK) == INSN_UD1)
+		return *(u16 *)(addr + LEN_UD1);
+
+	return BUG_UD_NONE;
+}
+
+
 static nokprobe_inline int
 do_trap_no_signal(struct task_struct *tsk, int trapnr, const char *str,
 		  struct pt_regs *regs,	long error_code)
@@ -216,6 +240,7 @@  static inline void handle_invalid_op(struct pt_regs *regs)
 static noinstr bool handle_bug(struct pt_regs *regs)
 {
 	bool handled = false;
+	int ud_type;
 
 	/*
 	 * Normally @regs are unpoisoned by irqentry_enter(), but handle_bug()
@@ -223,7 +248,8 @@  static noinstr bool handle_bug(struct pt_regs *regs)
 	 * irqentry_enter().
 	 */
 	kmsan_unpoison_entry_regs(regs);
-	if (!is_valid_bugaddr(regs->ip))
+	ud_type = get_ud_type(regs->ip);
+	if (ud_type == BUG_UD_NONE)
 		return handled;
 
 	/*
@@ -236,10 +262,14 @@  static noinstr bool handle_bug(struct pt_regs *regs)
 	 */
 	if (regs->flags & X86_EFLAGS_IF)
 		raw_local_irq_enable();
-	if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN ||
-	    handle_cfi_failure(regs) == BUG_TRAP_TYPE_WARN) {
-		regs->ip += LEN_UD2;
-		handled = true;
+	if (ud_type == INSN_UD2) {
+		if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN ||
+		    handle_cfi_failure(regs) == BUG_TRAP_TYPE_WARN) {
+			regs->ip += LEN_UD2;
+			handled = true;
+		}
+	} else {
+		handle_ubsan_failure(regs, ud_type);
 	}
 	if (regs->flags & X86_EFLAGS_IF)
 		raw_local_irq_disable();
diff --git a/arch/x86/kernel/ubsan.c b/arch/x86/kernel/ubsan.c
new file mode 100644
index 000000000000..c90e337a1b6a
--- /dev/null
+++ b/arch/x86/kernel/ubsan.c
@@ -0,0 +1,21 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Clang Undefined Behavior Sanitizer trap mode support.
+ */
+#include <linux/bug.h>
+#include <linux/string.h>
+#include <linux/printk.h>
+#include <linux/ubsan.h>
+#include <asm/ptrace.h>
+#include <asm/ubsan.h>
+
+/*
+ * Checks for the information embedded in the UD1 trap instruction
+ * for the UB Sanitizer in order to pass along debugging output.
+ */
+void handle_ubsan_failure(struct pt_regs *regs, u16 type)
+{
+	if ((type & 0xFF) == UBSAN_REG)
+		type >>= 8;
+	pr_crit("%s at %pS\n", report_ubsan_failure(regs, type), (void *)regs->ip);
+}