diff mbox series

[v2,2/2] sysctl: introduce kernel.pkill_on_warn

Message ID 20211027233215.306111-3-alex.popov@linux.com (mailing list archive)
State Changes Requested
Delegated to: Kees Cook
Headers show
Series [v2,1/2] bug: do refactoring allowing to add a warning handling action | expand

Commit Message

Alexander Popov Oct. 27, 2021, 11:32 p.m. UTC
Currently, the Linux kernel provides two types of reaction to kernel
warnings:
 1. Do nothing (by default),
 2. Call panic() if panic_on_warn is set. That's a very strong reaction,
    so panic_on_warn is usually disabled on production systems.

From a safety point of view, the Linux kernel misses a middle way of
handling kernel warnings:
 - The kernel should stop the activity that provokes a warning,
 - But the kernel should avoid complete denial of service.

From a security point of view, kernel warning messages provide a lot of
useful information for attackers. Many GNU/Linux distributions allow
unprivileged users to read the kernel log, so attackers use kernel
warning infoleak in vulnerability exploits. See the examples:
https://a13xp0p0v.github.io/2021/02/09/CVE-2021-26708.html
https://a13xp0p0v.github.io/2020/02/15/CVE-2019-18683.html
https://googleprojectzero.blogspot.com/2018/09/a-cache-invalidation-bug-in-linux.html

Let's introduce the pkill_on_warn sysctl.
If this parameter is set, the kernel kills all threads in a process that
provoked a kernel warning. This behavior is reasonable from a safety point of
view described above. It is also useful for kernel security hardening because
the system kills an exploit process that hits a kernel warning.

Moreover, bugs usually don't come alone, and a kernel warning may be
followed by memory corruption or other bad effects. So pkill_on_warn allows
the kernel to stop the process when the first signs of wrong behavior
are detected.

Signed-off-by: Alexander Popov <alex.popov@linux.com>
---
 Documentation/admin-guide/sysctl/kernel.rst | 14 +++++++++++++
 include/asm-generic/bug.h                   | 12 ++++++++---
 include/linux/panic.h                       |  3 +++
 kernel/panic.c                              | 22 ++++++++++++++++++++-
 kernel/sysctl.c                             |  9 +++++++++
 lib/bug.c                                   |  3 +++
 6 files changed, 59 insertions(+), 4 deletions(-)

Comments

Steven Rostedt Nov. 12, 2021, 9:24 p.m. UTC | #1
On Thu, 28 Oct 2021 02:32:15 +0300
Alexander Popov <alex.popov@linux.com> wrote:

> Signed-off-by: Alexander Popov <alex.popov@linux.com>
> ---
>  Documentation/admin-guide/sysctl/kernel.rst | 14 +++++++++++++
>  include/asm-generic/bug.h                   | 12 ++++++++---
>  include/linux/panic.h                       |  3 +++
>  kernel/panic.c                              | 22 ++++++++++++++++++++-
>  kernel/sysctl.c                             |  9 +++++++++
>  lib/bug.c                                   |  3 +++
>  6 files changed, 59 insertions(+), 4 deletions(-)
> 
> diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
> index 426162009ce9..5faf395fdf8f 100644
> --- a/Documentation/admin-guide/sysctl/kernel.rst
> +++ b/Documentation/admin-guide/sysctl/kernel.rst
> @@ -921,6 +921,20 @@ lives in) pid namespace. When selecting a pid for a next task on fork
>  kernel tries to allocate a number starting from this one.
>  
>  
> +pkill_on_warn
> +=============
> +
> +Kills all threads in a process that provoked a kernel warning.
> +That allows the kernel to stop the process when the first signs
> +of wrong behavior are detected.
> +
> += =====================================================================
> +0 Allows a process to proceed execution after hitting a kernel warning,
> +  this is the default behavior.
> +1 Kills all threads in a process that provoked a kernel warning.
> += =====================================================================
> +
> +
>  powersave-nap (PPC only)
>  ========================
>  
> diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
> index 881aeaf5a2d5..959000b5856a 100644
> --- a/include/asm-generic/bug.h
> +++ b/include/asm-generic/bug.h
> @@ -94,8 +94,10 @@ void warn_slowpath_fmt(const char *file, const int line, unsigned taint,
>  #ifndef WARN_ON_ONCE
>  #define WARN_ON_ONCE(condition) ({					\
>  	int __ret_warn_on = !!(condition);				\
> -	if (unlikely(__ret_warn_on))					\
> +	if (unlikely(__ret_warn_on)) {					\
>  		DO_ONCE_LITE(__WARN_printf, TAINT_WARN, NULL);		\
> +		do_pkill_on_warn();					\

Should this be a config option so that those that do not want this do not
need to have the added overhead of a function call embedded at every
WARN*() in their code?

That is, do_pkill_on_warn() should be defined as do { } while (0), and not
add any I$ overhead when compiled out?

> +	}								\
>  	unlikely(__ret_warn_on);					\
>  })
>  #endif
> @@ -151,15 +153,19 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
>  
>  #define WARN_ONCE(condition, format...) ({				\
>  	int __ret_warn_on = !!(condition);				\
> -	if (unlikely(__ret_warn_on))					\
> +	if (unlikely(__ret_warn_on)) {					\
>  		DO_ONCE_LITE(__WARN_printf, TAINT_WARN, format);	\
> +		do_pkill_on_warn();					\
> +	}								\
>  	unlikely(__ret_warn_on);					\
>  })
>  
>  #define WARN_TAINT_ONCE(condition, taint, format...) ({			\
>  	int __ret_warn_on = !!(condition);				\
> -	if (unlikely(__ret_warn_on))					\
> +	if (unlikely(__ret_warn_on)) {					\
>  		DO_ONCE_LITE(__WARN_printf, taint, format);		\
> +		do_pkill_on_warn();					\
> +	}								\
>  	unlikely(__ret_warn_on);					\
>  })
>  
> diff --git a/include/linux/panic.h b/include/linux/panic.h
> index f5844908a089..f79c69279859 100644
> --- a/include/linux/panic.h
> +++ b/include/linux/panic.h
> @@ -27,6 +27,9 @@ extern int panic_on_oops;
>  extern int panic_on_unrecovered_nmi;
>  extern int panic_on_io_nmi;
>  extern int panic_on_warn;
> +extern int pkill_on_warn;
> +
> +extern void do_pkill_on_warn(void);
>  
>  extern unsigned long panic_on_taint;
>  extern bool panic_on_taint_nousertaint;
> diff --git a/kernel/panic.c b/kernel/panic.c
> index cefd7d82366f..1323c9e2630f 100644
> --- a/kernel/panic.c
> +++ b/kernel/panic.c
> @@ -53,6 +53,7 @@ static int pause_on_oops_flag;
>  static DEFINE_SPINLOCK(pause_on_oops_lock);
>  bool crash_kexec_post_notifiers;
>  int panic_on_warn __read_mostly;
> +int pkill_on_warn __read_mostly;
>  unsigned long panic_on_taint;
>  bool panic_on_taint_nousertaint = false;
>  
> @@ -625,13 +626,16 @@ void warn_slowpath_fmt(const char *file, int line, unsigned taint,
>  	if (!fmt) {
>  		__warn(file, line, __builtin_return_address(0), taint,
>  		       NULL, NULL);
> -		return;
> +		goto out;
>  	}
>  
>  	args.fmt = fmt;
>  	va_start(args.args, fmt);
>  	__warn(file, line, __builtin_return_address(0), taint, NULL, &args);
>  	va_end(args.args);
> +
> +out:
> +	do_pkill_on_warn();
>  }
>  EXPORT_SYMBOL(warn_slowpath_fmt);
>  #else
> @@ -732,3 +736,19 @@ static int __init panic_on_taint_setup(char *s)
>  	return 0;
>  }
>  early_param("panic_on_taint", panic_on_taint_setup);
> +
> +void do_pkill_on_warn(void)
> +{
> +	if (!pkill_on_warn)
> +		return;
> +
> +	if (is_global_init(current))
> +		return;
> +
> +	if (current->flags & PF_KTHREAD)
> +		return;
> +
> +	if (system_state >= SYSTEM_RUNNING)
> +		do_send_sig_info(SIGKILL, SEND_SIG_PRIV, current, PIDTYPE_TGID);

I believe this was mentioned before. I'm not sure how safe it is to call
do_send_sig_info() in random areas of the kernel, as that could possibly
cause a deadlock with the locking that is taken.

Best to use irq_work() and have a irq_work interrupt handle the
do_sig_info, because then you know you are safe to call this. Of course,
then you need some way to pass the task to the handler.

Could do some kind of clever trick, where we add a PF_KILL_ME flag to the
task, and the irq worker just loops over all the tasks kill all those with
it set?


> +}
> +EXPORT_SYMBOL(do_pkill_on_warn);
> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> index 083be6af29d7..7fe6f0aaad2b 100644
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -2656,6 +2656,15 @@ static struct ctl_table kern_table[] = {
>  		.extra1		= SYSCTL_ZERO,
>  		.extra2		= SYSCTL_ONE,
>  	},
> +		{

extra tab?

-- Steve

> +		.procname	= "pkill_on_warn",
> +		.data		= &pkill_on_warn,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= SYSCTL_ONE,
> +	},
>  #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
>  	{
>  		.procname	= "timer_migration",
> diff --git a/lib/bug.c b/lib/bug.c
> index 1a91f01412b8..28cc8a5b2ee0 100644
> --- a/lib/bug.c
> +++ b/lib/bug.c
> @@ -214,6 +214,9 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
>  	bug_type = BUG_TRAP_TYPE_BUG;
>  
>  out:
> +	if (bug_type == BUG_TRAP_TYPE_WARN)
> +		do_pkill_on_warn();
> +
>  	return bug_type;
>  }
>
diff mbox series

Patch

diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index 426162009ce9..5faf395fdf8f 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -921,6 +921,20 @@  lives in) pid namespace. When selecting a pid for a next task on fork
 kernel tries to allocate a number starting from this one.
 
 
+pkill_on_warn
+=============
+
+Kills all threads in a process that provoked a kernel warning.
+That allows the kernel to stop the process when the first signs
+of wrong behavior are detected.
+
+= =====================================================================
+0 Allows a process to proceed execution after hitting a kernel warning,
+  this is the default behavior.
+1 Kills all threads in a process that provoked a kernel warning.
+= =====================================================================
+
+
 powersave-nap (PPC only)
 ========================
 
diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index 881aeaf5a2d5..959000b5856a 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -94,8 +94,10 @@  void warn_slowpath_fmt(const char *file, const int line, unsigned taint,
 #ifndef WARN_ON_ONCE
 #define WARN_ON_ONCE(condition) ({					\
 	int __ret_warn_on = !!(condition);				\
-	if (unlikely(__ret_warn_on))					\
+	if (unlikely(__ret_warn_on)) {					\
 		DO_ONCE_LITE(__WARN_printf, TAINT_WARN, NULL);		\
+		do_pkill_on_warn();					\
+	}								\
 	unlikely(__ret_warn_on);					\
 })
 #endif
@@ -151,15 +153,19 @@  void __warn(const char *file, int line, void *caller, unsigned taint,
 
 #define WARN_ONCE(condition, format...) ({				\
 	int __ret_warn_on = !!(condition);				\
-	if (unlikely(__ret_warn_on))					\
+	if (unlikely(__ret_warn_on)) {					\
 		DO_ONCE_LITE(__WARN_printf, TAINT_WARN, format);	\
+		do_pkill_on_warn();					\
+	}								\
 	unlikely(__ret_warn_on);					\
 })
 
 #define WARN_TAINT_ONCE(condition, taint, format...) ({			\
 	int __ret_warn_on = !!(condition);				\
-	if (unlikely(__ret_warn_on))					\
+	if (unlikely(__ret_warn_on)) {					\
 		DO_ONCE_LITE(__WARN_printf, taint, format);		\
+		do_pkill_on_warn();					\
+	}								\
 	unlikely(__ret_warn_on);					\
 })
 
diff --git a/include/linux/panic.h b/include/linux/panic.h
index f5844908a089..f79c69279859 100644
--- a/include/linux/panic.h
+++ b/include/linux/panic.h
@@ -27,6 +27,9 @@  extern int panic_on_oops;
 extern int panic_on_unrecovered_nmi;
 extern int panic_on_io_nmi;
 extern int panic_on_warn;
+extern int pkill_on_warn;
+
+extern void do_pkill_on_warn(void);
 
 extern unsigned long panic_on_taint;
 extern bool panic_on_taint_nousertaint;
diff --git a/kernel/panic.c b/kernel/panic.c
index cefd7d82366f..1323c9e2630f 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -53,6 +53,7 @@  static int pause_on_oops_flag;
 static DEFINE_SPINLOCK(pause_on_oops_lock);
 bool crash_kexec_post_notifiers;
 int panic_on_warn __read_mostly;
+int pkill_on_warn __read_mostly;
 unsigned long panic_on_taint;
 bool panic_on_taint_nousertaint = false;
 
@@ -625,13 +626,16 @@  void warn_slowpath_fmt(const char *file, int line, unsigned taint,
 	if (!fmt) {
 		__warn(file, line, __builtin_return_address(0), taint,
 		       NULL, NULL);
-		return;
+		goto out;
 	}
 
 	args.fmt = fmt;
 	va_start(args.args, fmt);
 	__warn(file, line, __builtin_return_address(0), taint, NULL, &args);
 	va_end(args.args);
+
+out:
+	do_pkill_on_warn();
 }
 EXPORT_SYMBOL(warn_slowpath_fmt);
 #else
@@ -732,3 +736,19 @@  static int __init panic_on_taint_setup(char *s)
 	return 0;
 }
 early_param("panic_on_taint", panic_on_taint_setup);
+
+void do_pkill_on_warn(void)
+{
+	if (!pkill_on_warn)
+		return;
+
+	if (is_global_init(current))
+		return;
+
+	if (current->flags & PF_KTHREAD)
+		return;
+
+	if (system_state >= SYSTEM_RUNNING)
+		do_send_sig_info(SIGKILL, SEND_SIG_PRIV, current, PIDTYPE_TGID);
+}
+EXPORT_SYMBOL(do_pkill_on_warn);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 083be6af29d7..7fe6f0aaad2b 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2656,6 +2656,15 @@  static struct ctl_table kern_table[] = {
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= SYSCTL_ONE,
 	},
+		{
+		.procname	= "pkill_on_warn",
+		.data		= &pkill_on_warn,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
 	{
 		.procname	= "timer_migration",
diff --git a/lib/bug.c b/lib/bug.c
index 1a91f01412b8..28cc8a5b2ee0 100644
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -214,6 +214,9 @@  enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
 	bug_type = BUG_TRAP_TYPE_BUG;
 
 out:
+	if (bug_type == BUG_TRAP_TYPE_WARN)
+		do_pkill_on_warn();
+
 	return bug_type;
 }