diff mbox series

[v3,11/14] x86: Handle int3 for inline KASAN reports

Message ID 012c84049b853d6853a7d6c887ce0c2323bcd80a.1743772053.git.maciej.wieczor-retman@intel.com (mailing list archive)
State New
Headers show
Series [v3,01/14] kasan: sw_tags: Use arithmetic shift for shadow computation | expand

Commit Message

Maciej Wieczor-Retman April 4, 2025, 1:14 p.m. UTC
When a tag mismatch happens in inline software tag-based KASAN on x86 an
int3 instruction is executed and needs proper handling.

Call kasan_report() from the int3 handler and pass down the proper
information from registers - RDI should contain the problematic address
and RAX other metadata.

Also early return from the int3 selftest if inline KASAN is enabled
since it will cause a kernel panic otherwise.

Signed-off-by: Maciej Wieczor-Retman <maciej.wieczor-retman@intel.com>
---
Changelog v3:
- Add this patch to the series.

 arch/x86/kernel/alternative.c |  3 ++
 arch/x86/kernel/traps.c       | 52 +++++++++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+)

Comments

Dave Hansen April 4, 2025, 5:55 p.m. UTC | #1
On 4/4/25 06:14, Maciej Wieczor-Retman wrote:
> When a tag mismatch happens in inline software tag-based KASAN on x86 an
> int3 instruction is executed and needs proper handling.

Does this mean "inline software"? Or "inline" functions? I'm not quite
parsing that. I think it needs some more background.

> Call kasan_report() from the int3 handler and pass down the proper
> information from registers - RDI should contain the problematic address
> and RAX other metadata.
> 
> Also early return from the int3 selftest if inline KASAN is enabled
> since it will cause a kernel panic otherwise.
...
> diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
> index bf82c6f7d690..ba277a25b57f 100644
> --- a/arch/x86/kernel/alternative.c
> +++ b/arch/x86/kernel/alternative.c
> @@ -1979,6 +1979,9 @@ static noinline void __init int3_selftest(void)
>  	};
>  	unsigned int val = 0;
>  
> +	if (IS_ENABLED(CONFIG_KASAN_INLINE))
> +		return;

Comments, please. This is a total non sequitur otherwise.

>  	BUG_ON(register_die_notifier(&int3_exception_nb));
>  
>  	/*
> diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
> index 9f88b8a78e50..32c81fc2d439 100644
> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
...
> @@ -849,6 +850,51 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
>  	cond_local_irq_disable(regs);
>  }
>  
> +#ifdef CONFIG_KASAN_SW_TAGS
> +
> +#define KASAN_RAX_RECOVER	0x20
> +#define KASAN_RAX_WRITE	0x10
> +#define KASAN_RAX_SIZE_MASK	0x0f
> +#define KASAN_RAX_SIZE(rax)	(1 << ((rax) & KASAN_RAX_SIZE_MASK))

This ABI _looks_ like it was conjured out out of thin air. I assume it's
coming from the compiler. Any pointers to that ABI definition in or out
of the kernel would be appreciated.

> +static bool kasan_handler(struct pt_regs *regs)
> +{
> +	int metadata = regs->ax;
> +	u64 addr = regs->di;
> +	u64 pc = regs->ip;
> +	bool recover = metadata & KASAN_RAX_RECOVER;
> +	bool write = metadata & KASAN_RAX_WRITE;
> +	size_t size = KASAN_RAX_SIZE(metadata);

"metadata" is exactly the same length as "regs->ax", so it seems a
little silly. Also, please use vertical alignment as a tool to make code
more readable. Isn't this much more readable?

	bool recover = regs->ax & KASAN_RAX_RECOVER;
	bool write   = regs->ax & KASAN_RAX_WRITE;
	size_t size  = KASAN_RAX_SIZE(regs->ax);
	u64 addr     = regs->di;
	u64 pc       = regs->ip;

> +	if (!IS_ENABLED(CONFIG_KASAN_INLINE))
> +		return false;
> +
> +	if (user_mode(regs))
> +		return false;
> +
> +	kasan_report((void *)addr, size, write, pc);
> +
> +	/*
> +	 * The instrumentation allows to control whether we can proceed after
> +	 * a crash was detected. This is done by passing the -recover flag to
> +	 * the compiler. Disabling recovery allows to generate more compact
> +	 * code.
> +	 *
> +	 * Unfortunately disabling recovery doesn't work for the kernel right
> +	 * now. KASAN reporting is disabled in some contexts (for example when
> +	 * the allocator accesses slab object metadata; this is controlled by
> +	 * current->kasan_depth). All these accesses are detected by the tool,
> +	 * even though the reports for them are not printed.
> +	 *
> +	 * This is something that might be fixed at some point in the future.
> +	 */

Can we please find a way to do this that doesn't copy and paste a rather
verbose comment?

What if we passed 'recover' into kasan_report() and had it do the die()?

> +	if (!recover)
> +		die("Oops - KASAN", regs, 0);
> +	return true;
> +}
> +
> +#endif
> +
>  static bool do_int3(struct pt_regs *regs)
>  {
>  	int res;
> @@ -863,6 +909,12 @@ static bool do_int3(struct pt_regs *regs)
>  	if (kprobe_int3_handler(regs))
>  		return true;
>  #endif
> +
> +#ifdef CONFIG_KASAN_SW_TAGS
> +	if (kasan_handler(regs))
> +		return true;
> +#endif
I won't get _too_ grumbly about ti since there's another culprit right
above, but the "no #fidefs in .c files" rule still applies. The right
way to do this is with a stub kasan_handler() in a header with the
#ifdef in the header.

Actually, ditto on the kasan_handler() #ifdef. I suspect it can go away
too and be replaced with a IS_ENABLED(CONFIG_KASAN_SW_TAGS) check.
diff mbox series

Patch

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index bf82c6f7d690..ba277a25b57f 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -1979,6 +1979,9 @@  static noinline void __init int3_selftest(void)
 	};
 	unsigned int val = 0;
 
+	if (IS_ENABLED(CONFIG_KASAN_INLINE))
+		return;
+
 	BUG_ON(register_die_notifier(&int3_exception_nb));
 
 	/*
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 9f88b8a78e50..32c81fc2d439 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -16,6 +16,7 @@ 
 #include <linux/interrupt.h>
 #include <linux/kallsyms.h>
 #include <linux/kmsan.h>
+#include <linux/kasan.h>
 #include <linux/spinlock.h>
 #include <linux/kprobes.h>
 #include <linux/uaccess.h>
@@ -849,6 +850,51 @@  DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
 	cond_local_irq_disable(regs);
 }
 
+#ifdef CONFIG_KASAN_SW_TAGS
+
+#define KASAN_RAX_RECOVER	0x20
+#define KASAN_RAX_WRITE	0x10
+#define KASAN_RAX_SIZE_MASK	0x0f
+#define KASAN_RAX_SIZE(rax)	(1 << ((rax) & KASAN_RAX_SIZE_MASK))
+
+static bool kasan_handler(struct pt_regs *regs)
+{
+	int metadata = regs->ax;
+	u64 addr = regs->di;
+	u64 pc = regs->ip;
+	bool recover = metadata & KASAN_RAX_RECOVER;
+	bool write = metadata & KASAN_RAX_WRITE;
+	size_t size = KASAN_RAX_SIZE(metadata);
+
+	if (!IS_ENABLED(CONFIG_KASAN_INLINE))
+		return false;
+
+	if (user_mode(regs))
+		return false;
+
+	kasan_report((void *)addr, size, write, pc);
+
+	/*
+	 * The instrumentation allows to control whether we can proceed after
+	 * a crash was detected. This is done by passing the -recover flag to
+	 * the compiler. Disabling recovery allows to generate more compact
+	 * code.
+	 *
+	 * Unfortunately disabling recovery doesn't work for the kernel right
+	 * now. KASAN reporting is disabled in some contexts (for example when
+	 * the allocator accesses slab object metadata; this is controlled by
+	 * current->kasan_depth). All these accesses are detected by the tool,
+	 * even though the reports for them are not printed.
+	 *
+	 * This is something that might be fixed at some point in the future.
+	 */
+	if (!recover)
+		die("Oops - KASAN", regs, 0);
+	return true;
+}
+
+#endif
+
 static bool do_int3(struct pt_regs *regs)
 {
 	int res;
@@ -863,6 +909,12 @@  static bool do_int3(struct pt_regs *regs)
 	if (kprobe_int3_handler(regs))
 		return true;
 #endif
+
+#ifdef CONFIG_KASAN_SW_TAGS
+	if (kasan_handler(regs))
+		return true;
+#endif
+
 	res = notify_die(DIE_INT3, "int3", regs, 0, X86_TRAP_BP, SIGTRAP);
 
 	return res == NOTIFY_STOP;