[v2] kernel: add panic_on_taint
diff mbox series

Message ID 20200507180631.308441-1-aquini@redhat.com
State New
Headers show
Series
  • [v2] kernel: add panic_on_taint
Related show

Commit Message

Rafael Aquini May 7, 2020, 6:06 p.m. UTC
Analogously to the introduction of panic_on_warn, this patch
introduces a kernel option named panic_on_taint in order to
provide a simple and generic way to stop execution and catch
a coredump when the kernel gets tainted by any given taint flag.

This is useful for debugging sessions as it avoids rebuilding
the kernel to explicitly add calls to panic() or BUG() into
code sites that introduce the taint flags of interest.
Another, perhaps less frequent, use for this option would be
as a mean for assuring a security policy (in paranoid mode)
case where no single taint is allowed for the running system.

Suggested-by: Qian Cai <cai@lca.pw>
Signed-off-by: Rafael Aquini <aquini@redhat.com>
---
Changelog, from v1:
* get rid of unnecessary/misguided compiler hints		(Luis)
* enhance documentation text for the new kernel parameter	(Randy)

 Documentation/admin-guide/kdump/kdump.rst      | 10 ++++++++++
 .../admin-guide/kernel-parameters.txt          |  7 +++++++
 Documentation/admin-guide/sysctl/kernel.rst    | 18 ++++++++++++++++++
 include/linux/kernel.h                         |  1 +
 kernel/panic.c                                 |  7 +++++++
 kernel/sysctl.c                                |  7 +++++++
 6 files changed, 50 insertions(+)

Comments

Luis Chamberlain May 7, 2020, 6:22 p.m. UTC | #1
On Thu, May 07, 2020 at 02:06:31PM -0400, Rafael Aquini wrote:
> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> index 8a176d8727a3..b80ab660d727 100644
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -1217,6 +1217,13 @@ static struct ctl_table kern_table[] = {
>  		.extra1		= SYSCTL_ZERO,
>  		.extra2		= SYSCTL_ONE,
>  	},
> +	{
> +		.procname	= "panic_on_taint",
> +		.data		= &panic_on_taint,
> +		.maxlen		= sizeof(unsigned long),
> +		.mode		= 0644,
> +		.proc_handler	= proc_doulongvec_minmax,
> +	},

You sent this out before I could reply to the other thread on v1.
My thoughts on the min / max values, or lack here:
                                                                                
Valid range doesn't mean "currently allowed defined" masks.                     

For example, if you expect to panic due to a taint, but a new taint type
you want was not added on an older kernel you would be under a very
*false* sense of security that your kernel may not have hit such a
taint, but the reality of the situation was that the kernel didn't
support that taint flag only added in future kernels.                           

You may need to define a new flag (MAX_TAINT) which should be the last
value + 1, the allowed max values would be                                      

(2^MAX_TAINT)-1                                                                 

or                                                                              

(1<<MAX_TAINT)-1  

Since this is to *PANIC* I think we do want to test ranges and ensure
only valid ones are allowed.

  Luis
Rafael Aquini May 7, 2020, 6:43 p.m. UTC | #2
On Thu, May 07, 2020 at 06:22:57PM +0000, Luis Chamberlain wrote:
> On Thu, May 07, 2020 at 02:06:31PM -0400, Rafael Aquini wrote:
> > diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> > index 8a176d8727a3..b80ab660d727 100644
> > --- a/kernel/sysctl.c
> > +++ b/kernel/sysctl.c
> > @@ -1217,6 +1217,13 @@ static struct ctl_table kern_table[] = {
> >  		.extra1		= SYSCTL_ZERO,
> >  		.extra2		= SYSCTL_ONE,
> >  	},
> > +	{
> > +		.procname	= "panic_on_taint",
> > +		.data		= &panic_on_taint,
> > +		.maxlen		= sizeof(unsigned long),
> > +		.mode		= 0644,
> > +		.proc_handler	= proc_doulongvec_minmax,
> > +	},
> 
> You sent this out before I could reply to the other thread on v1.
> My thoughts on the min / max values, or lack here:
>                                                                                 
> Valid range doesn't mean "currently allowed defined" masks.                     
> 
> For example, if you expect to panic due to a taint, but a new taint type
> you want was not added on an older kernel you would be under a very
> *false* sense of security that your kernel may not have hit such a
> taint, but the reality of the situation was that the kernel didn't
> support that taint flag only added in future kernels.                           
> 
> You may need to define a new flag (MAX_TAINT) which should be the last
> value + 1, the allowed max values would be                                      
> 
> (2^MAX_TAINT)-1                                                                 
> 
> or                                                                              
> 
> (1<<MAX_TAINT)-1  
> 
> Since this is to *PANIC* I think we do want to test ranges and ensure
> only valid ones are allowed.
>

Ok. I'm thinking in:

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8a176d8727a3..ee492431e7b0 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1217,6 +1217,15 @@ static struct ctl_table kern_table[] = {
                .extra1         = SYSCTL_ZERO,
                .extra2         = SYSCTL_ONE,
        },
+       {
+               .procname       = "panic_on_taint",
+               .data           = &panic_on_taint,
+               .maxlen         = sizeof(unsigned long),
+               .mode           = 0644,
+               .proc_handler   = proc_doulongvec_minmax,
+               .extra1         = SYSCTL_ZERO,
+               .extra2         = (1 << TAINT_FLAGS_COUNT << 1) - 1,
+       },


Would that address your concerns wrt this one?

Cheers!
-- Rafael
Rafael Aquini May 7, 2020, 6:47 p.m. UTC | #3
On Thu, May 07, 2020 at 02:43:16PM -0400, Rafael Aquini wrote:
> On Thu, May 07, 2020 at 06:22:57PM +0000, Luis Chamberlain wrote:
> > On Thu, May 07, 2020 at 02:06:31PM -0400, Rafael Aquini wrote:
> > > diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> > > index 8a176d8727a3..b80ab660d727 100644
> > > --- a/kernel/sysctl.c
> > > +++ b/kernel/sysctl.c
> > > @@ -1217,6 +1217,13 @@ static struct ctl_table kern_table[] = {
> > >  		.extra1		= SYSCTL_ZERO,
> > >  		.extra2		= SYSCTL_ONE,
> > >  	},
> > > +	{
> > > +		.procname	= "panic_on_taint",
> > > +		.data		= &panic_on_taint,
> > > +		.maxlen		= sizeof(unsigned long),
> > > +		.mode		= 0644,
> > > +		.proc_handler	= proc_doulongvec_minmax,
> > > +	},
> > 
> > You sent this out before I could reply to the other thread on v1.
> > My thoughts on the min / max values, or lack here:
> >                                                                                 
> > Valid range doesn't mean "currently allowed defined" masks.                     
> > 
> > For example, if you expect to panic due to a taint, but a new taint type
> > you want was not added on an older kernel you would be under a very
> > *false* sense of security that your kernel may not have hit such a
> > taint, but the reality of the situation was that the kernel didn't
> > support that taint flag only added in future kernels.                           
> > 
> > You may need to define a new flag (MAX_TAINT) which should be the last
> > value + 1, the allowed max values would be                                      
> > 
> > (2^MAX_TAINT)-1                                                                 
> > 
> > or                                                                              
> > 
> > (1<<MAX_TAINT)-1  
> > 
> > Since this is to *PANIC* I think we do want to test ranges and ensure
> > only valid ones are allowed.
> >
> 
> Ok. I'm thinking in:
> 
> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> index 8a176d8727a3..ee492431e7b0 100644
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -1217,6 +1217,15 @@ static struct ctl_table kern_table[] = {
>                 .extra1         = SYSCTL_ZERO,
>                 .extra2         = SYSCTL_ONE,
>         },
> +       {
> +               .procname       = "panic_on_taint",
> +               .data           = &panic_on_taint,
> +               .maxlen         = sizeof(unsigned long),
> +               .mode           = 0644,
> +               .proc_handler   = proc_doulongvec_minmax,
> +               .extra1         = SYSCTL_ZERO,
> +               .extra2         = (1 << TAINT_FLAGS_COUNT << 1) - 1,
							^^^^^^^^
Without that crap, obviously. Sorry. That was a screw up on my side,
when copyin' and pasting.

-- Rafael
	
> +       },
> 
> 
> Would that address your concerns wrt this one?
> 
> Cheers!
> -- Rafael
Luis Chamberlain May 7, 2020, 6:50 p.m. UTC | #4
On Thu, May 07, 2020 at 02:06:31PM -0400, Rafael Aquini wrote:
> Another, perhaps less frequent, use for this option would be
> as a mean for assuring a security policy (in paranoid mode)
> case where no single taint is allowed for the running system.

If used for this purpose then we must add a new TAINT flag for
proc_taint() was used, otherwise we can cheat to show a taint
*did* happen, where in fact it never happened, some punk just
echo'd a value into the kernel's /proc/sys/kernel/tainted.

Forunately proc_taint() only allows to *increment* the taint, not
reduce.

  Luis
Rafael Aquini May 7, 2020, 6:53 p.m. UTC | #5
On Thu, May 07, 2020 at 06:50:46PM +0000, Luis Chamberlain wrote:
> On Thu, May 07, 2020 at 02:06:31PM -0400, Rafael Aquini wrote:
> > Another, perhaps less frequent, use for this option would be
> > as a mean for assuring a security policy (in paranoid mode)
> > case where no single taint is allowed for the running system.
> 
> If used for this purpose then we must add a new TAINT flag for
> proc_taint() was used, otherwise we can cheat to show a taint
> *did* happen, where in fact it never happened, some punk just
> echo'd a value into the kernel's /proc/sys/kernel/tainted.
>

To accomplish that, the punk would need to be root, though, in which 
case everything else is doomed, already.
 
> Forunately proc_taint() only allows to *increment* the taint, not
> reduce.
> 
>   Luis
>
Luis Chamberlain May 7, 2020, 8:33 p.m. UTC | #6
On Thu, May 07, 2020 at 02:47:05PM -0400, Rafael Aquini wrote:
> On Thu, May 07, 2020 at 02:43:16PM -0400, Rafael Aquini wrote:
> > On Thu, May 07, 2020 at 06:22:57PM +0000, Luis Chamberlain wrote:
> > > On Thu, May 07, 2020 at 02:06:31PM -0400, Rafael Aquini wrote:
> > > > diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> > > > index 8a176d8727a3..b80ab660d727 100644
> > > > --- a/kernel/sysctl.c
> > > > +++ b/kernel/sysctl.c
> > > > @@ -1217,6 +1217,13 @@ static struct ctl_table kern_table[] = {
> > > >  		.extra1		= SYSCTL_ZERO,
> > > >  		.extra2		= SYSCTL_ONE,
> > > >  	},
> > > > +	{
> > > > +		.procname	= "panic_on_taint",
> > > > +		.data		= &panic_on_taint,
> > > > +		.maxlen		= sizeof(unsigned long),
> > > > +		.mode		= 0644,
> > > > +		.proc_handler	= proc_doulongvec_minmax,
> > > > +	},
> > > 
> > > You sent this out before I could reply to the other thread on v1.
> > > My thoughts on the min / max values, or lack here:
> > >                                                                                 
> > > Valid range doesn't mean "currently allowed defined" masks.                     
> > > 
> > > For example, if you expect to panic due to a taint, but a new taint type
> > > you want was not added on an older kernel you would be under a very
> > > *false* sense of security that your kernel may not have hit such a
> > > taint, but the reality of the situation was that the kernel didn't
> > > support that taint flag only added in future kernels.                           
> > > 
> > > You may need to define a new flag (MAX_TAINT) which should be the last
> > > value + 1, the allowed max values would be                                      
> > > 
> > > (2^MAX_TAINT)-1                                                                 
> > > 
> > > or                                                                              
> > > 
> > > (1<<MAX_TAINT)-1  
> > > 
> > > Since this is to *PANIC* I think we do want to test ranges and ensure
> > > only valid ones are allowed.
> > >
> > 
> > Ok. I'm thinking in:
> > 
> > diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> > index 8a176d8727a3..ee492431e7b0 100644
> > --- a/kernel/sysctl.c
> > +++ b/kernel/sysctl.c
> > @@ -1217,6 +1217,15 @@ static struct ctl_table kern_table[] = {
> >                 .extra1         = SYSCTL_ZERO,
> >                 .extra2         = SYSCTL_ONE,
> >         },
> > +       {
> > +               .procname       = "panic_on_taint",
> > +               .data           = &panic_on_taint,
> > +               .maxlen         = sizeof(unsigned long),
> > +               .mode           = 0644,
> > +               .proc_handler   = proc_doulongvec_minmax,
> > +               .extra1         = SYSCTL_ZERO,
> > +               .extra2         = (1 << TAINT_FLAGS_COUNT << 1) - 1,
> 							^^^^^^^^
> Without that crap, obviously. Sorry. That was a screw up on my side,
> when copyin' and pasting.

I really think that the implications of this needs a bit further review,
hence the wider CCs.

Since this can trivially crash a system, I think we need to be careful
about this value. First, proc_doulongvec_minmax() will not suffice alone,
we'll *at least* want to check for capable(CAP_SYS_ADMIN)) as in
proc_taint().  Second first note that we *always* build proc_taint(), if
just CONFIG_PROC_SYSCTL is enabled. That has been the way since it got
merged via commit 34f5a39899f3f ("Add TAINT_USER and ability to set
taint flags from userspace") since v2.6.21. We need to evaluate if this
little *new* knob you are introducing merits its own kconfig tucked away
under debugging first. The ship has already sailed for proc_taint().
Anyone with CAP_SYS_ADMIN can taint.

The good thing is that proc_taint() added its own TAINT_USER, *but*, hey
it didn't use it. A panic-on-taint system would be able to tell if a
panic was caused by proc_taint() throught the stack trace only. 
If panic-on-taint proc was used *later* after a custom taint was set
or happened naturally, no panic would trigger since your panic-on-taint
check on your patch only happens on add_taint(). This means that for
those thinking about using this for QA or security purposes, the only
sensible *reliable* way to use panic-on-taint would be through cmdline,
from boot. Post-boot means to enable this would either need to check
existing taint flags, or we'd want to a way to check if this was not
added post boot. Also, a post-booteed system with panic-on-taint could
easily allow for reductions of the intended goal, thereby allowing one
to cheat.

I think a new TAINT_MODIFIED for use when proc_taint() is used is worth
considering. Ted? Even though 'M' is taken -- I think its silly to rely
on the character to be anything of meaning, once we run out of the
alphabet letters that will be the way anyway, unless we-redo this a bit.
Note we use value for when this is on and off, typically an empty space
when a taint is not seen.

The good thing is that proc_taint() only *increments* taint, it doesn't
remove taints.

Are we OK with panic-on-taint only with CAP_SYS_ADMIN?

I can see this building up to a "testing" solution to ensure / gaurantee
no bugs have happened during QA, but since QA would want the same binary
for production it is hard to see this enabled for QA but not production.
To resolve that last concern, if we do go with moving this under a
kconfig value, a simple cmdline append would address the concerns. Ie,
even if you enabled this mechanism through its kconfig you would not be
able to modify the panic-on-tain unless you passed a cmdline option.

Note that Vlastimil has some patches which are visible on linux-next,
but not yet merged on Linus' tree, which enable these params to be set
on the cmdline too now, so perhaps yet-another cmdline param is not
needed anymore.

I *think* that a cmdline route to enable this would likely remove the
need for the kernel config for this. But even with Vlastimil's work
merged, I think we'd want yet-another value to enable / disable this
feature. Do we need yet-another-taint flag to tell us that this feature
was enabled?

  Luis
Rafael Aquini May 7, 2020, 10:06 p.m. UTC | #7
On Thu, May 07, 2020 at 08:33:40PM +0000, Luis Chamberlain wrote:
> On Thu, May 07, 2020 at 02:47:05PM -0400, Rafael Aquini wrote:
> > On Thu, May 07, 2020 at 02:43:16PM -0400, Rafael Aquini wrote:
> > > On Thu, May 07, 2020 at 06:22:57PM +0000, Luis Chamberlain wrote:
> > > > On Thu, May 07, 2020 at 02:06:31PM -0400, Rafael Aquini wrote:
> > > > > diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> > > > > index 8a176d8727a3..b80ab660d727 100644
> > > > > --- a/kernel/sysctl.c
> > > > > +++ b/kernel/sysctl.c
> > > > > @@ -1217,6 +1217,13 @@ static struct ctl_table kern_table[] = {
> > > > >  		.extra1		= SYSCTL_ZERO,
> > > > >  		.extra2		= SYSCTL_ONE,
> > > > >  	},
> > > > > +	{
> > > > > +		.procname	= "panic_on_taint",
> > > > > +		.data		= &panic_on_taint,
> > > > > +		.maxlen		= sizeof(unsigned long),
> > > > > +		.mode		= 0644,
> > > > > +		.proc_handler	= proc_doulongvec_minmax,
> > > > > +	},
> > > > 
> > > > You sent this out before I could reply to the other thread on v1.
> > > > My thoughts on the min / max values, or lack here:
> > > >                                                                                 
> > > > Valid range doesn't mean "currently allowed defined" masks.                     
> > > > 
> > > > For example, if you expect to panic due to a taint, but a new taint type
> > > > you want was not added on an older kernel you would be under a very
> > > > *false* sense of security that your kernel may not have hit such a
> > > > taint, but the reality of the situation was that the kernel didn't
> > > > support that taint flag only added in future kernels.                           
> > > > 
> > > > You may need to define a new flag (MAX_TAINT) which should be the last
> > > > value + 1, the allowed max values would be                                      
> > > > 
> > > > (2^MAX_TAINT)-1                                                                 
> > > > 
> > > > or                                                                              
> > > > 
> > > > (1<<MAX_TAINT)-1  
> > > > 
> > > > Since this is to *PANIC* I think we do want to test ranges and ensure
> > > > only valid ones are allowed.
> > > >
> > > 
> > > Ok. I'm thinking in:
> > > 
> > > diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> > > index 8a176d8727a3..ee492431e7b0 100644
> > > --- a/kernel/sysctl.c
> > > +++ b/kernel/sysctl.c
> > > @@ -1217,6 +1217,15 @@ static struct ctl_table kern_table[] = {
> > >                 .extra1         = SYSCTL_ZERO,
> > >                 .extra2         = SYSCTL_ONE,
> > >         },
> > > +       {
> > > +               .procname       = "panic_on_taint",
> > > +               .data           = &panic_on_taint,
> > > +               .maxlen         = sizeof(unsigned long),
> > > +               .mode           = 0644,
> > > +               .proc_handler   = proc_doulongvec_minmax,
> > > +               .extra1         = SYSCTL_ZERO,
> > > +               .extra2         = (1 << TAINT_FLAGS_COUNT << 1) - 1,
> > 							^^^^^^^^
> > Without that crap, obviously. Sorry. That was a screw up on my side,
> > when copyin' and pasting.
> 
> I really think that the implications of this needs a bit further review,
> hence the wider CCs.
> 
> Since this can trivially crash a system, I think we need to be careful
> about this value. First, proc_doulongvec_minmax() will not suffice alone,
> we'll *at least* want to check for capable(CAP_SYS_ADMIN)) as in
> proc_taint().  Second first note that we *always* build proc_taint(), if
> just CONFIG_PROC_SYSCTL is enabled. That has been the way since it got
> merged via commit 34f5a39899f3f ("Add TAINT_USER and ability to set
> taint flags from userspace") since v2.6.21. We need to evaluate if this
> little *new* knob you are introducing merits its own kconfig tucked away
> under debugging first. The ship has already sailed for proc_taint().
> Anyone with CAP_SYS_ADMIN can taint.
> 
> The good thing is that proc_taint() added its own TAINT_USER, *but*, hey
> it didn't use it. A panic-on-taint system would be able to tell if a
> panic was caused by proc_taint() throught the stack trace only. 
> If panic-on-taint proc was used *later* after a custom taint was set
> or happened naturally, no panic would trigger since your panic-on-taint
> check on your patch only happens on add_taint(). This means that for
> those thinking about using this for QA or security purposes, the only
> sensible *reliable* way to use panic-on-taint would be through cmdline,
> from boot. Post-boot means to enable this would either need to check
> existing taint flags, or we'd want to a way to check if this was not
> added post boot. Also, a post-booteed system with panic-on-taint could
> easily allow for reductions of the intended goal, thereby allowing one
> to cheat.
> 
> I think a new TAINT_MODIFIED for use when proc_taint() is used is worth
> considering. Ted? Even though 'M' is taken -- I think its silly to rely
> on the character to be anything of meaning, once we run out of the
> alphabet letters that will be the way anyway, unless we-redo this a bit.
> Note we use value for when this is on and off, typically an empty space
> when a taint is not seen.
> 
> The good thing is that proc_taint() only *increments* taint, it doesn't
> remove taints.
> 
> Are we OK with panic-on-taint only with CAP_SYS_ADMIN?
> 
> I can see this building up to a "testing" solution to ensure / gaurantee
> no bugs have happened during QA, but since QA would want the same binary
> for production it is hard to see this enabled for QA but not production.
> To resolve that last concern, if we do go with moving this under a
> kconfig value, a simple cmdline append would address the concerns. Ie,
> even if you enabled this mechanism through its kconfig you would not be
> able to modify the panic-on-tain unless you passed a cmdline option.
> 
> Note that Vlastimil has some patches which are visible on linux-next,
> but not yet merged on Linus' tree, which enable these params to be set
> on the cmdline too now, so perhaps yet-another cmdline param is not
> needed anymore.
> 
> I *think* that a cmdline route to enable this would likely remove the
> need for the kernel config for this. But even with Vlastimil's work
> merged, I think we'd want yet-another value to enable / disable this
> feature. Do we need yet-another-taint flag to tell us that this feature
> was enabled?
>

I guess it makes sense to get rid of the sysctl interface for
proc_on_taint, and only keep it as a cmdline option. 

But the real issue seems to be, regardless we go with a cmdline-only option
or not, the ability of proc_taint() to set any arbitrary taint flag 
other than just marking the kernel with TAINT_USER. 

-- Rafael
Luis Chamberlain May 7, 2020, 10:25 p.m. UTC | #8
On Thu, May 07, 2020 at 06:06:06PM -0400, Rafael Aquini wrote:
> On Thu, May 07, 2020 at 08:33:40PM +0000, Luis Chamberlain wrote:
> > I *think* that a cmdline route to enable this would likely remove the
> > need for the kernel config for this. But even with Vlastimil's work
> > merged, I think we'd want yet-another value to enable / disable this
> > feature. Do we need yet-another-taint flag to tell us that this feature
> > was enabled?
> >
> 
> I guess it makes sense to get rid of the sysctl interface for
> proc_on_taint, and only keep it as a cmdline option. 

That would be easier to support and k3eps this simple.

> But the real issue seems to be, regardless we go with a cmdline-only option
> or not, the ability of proc_taint() to set any arbitrary taint flag 
> other than just marking the kernel with TAINT_USER. 

I think we would have no other option but to add a new TAINT flag so
that we know that the taint flag was modified by a user. Perhaps just
re-using TAINT_USER when proc_taint() would suffice.

  Luis
Rafael Aquini May 8, 2020, 12:47 p.m. UTC | #9
On Thu, May 07, 2020 at 10:25:58PM +0000, Luis Chamberlain wrote:
> On Thu, May 07, 2020 at 06:06:06PM -0400, Rafael Aquini wrote:
> > On Thu, May 07, 2020 at 08:33:40PM +0000, Luis Chamberlain wrote:
> > > I *think* that a cmdline route to enable this would likely remove the
> > > need for the kernel config for this. But even with Vlastimil's work
> > > merged, I think we'd want yet-another value to enable / disable this
> > > feature. Do we need yet-another-taint flag to tell us that this feature
> > > was enabled?
> > >
> > 
> > I guess it makes sense to get rid of the sysctl interface for
> > proc_on_taint, and only keep it as a cmdline option. 
> 
> That would be easier to support and k3eps this simple.
> 
> > But the real issue seems to be, regardless we go with a cmdline-only option
> > or not, the ability of proc_taint() to set any arbitrary taint flag 
> > other than just marking the kernel with TAINT_USER. 
> 
> I think we would have no other option but to add a new TAINT flag so
> that we know that the taint flag was modified by a user. Perhaps just
> re-using TAINT_USER when proc_taint() would suffice.
>

We might not need an extra taint flag if, perhaps, we could make these
two features mutually exclusive. The idea here is that bitmasks added 
via panic_on_taint get filtered out in proc_taint(), so a malicious 
user couldn't exploit the latter interface to easily panic the system,
when the first one is also in use. 
 
-- Rafael
Luis Chamberlain May 9, 2020, 3:48 a.m. UTC | #10
On Fri, May 08, 2020 at 08:47:19AM -0400, Rafael Aquini wrote:
> On Thu, May 07, 2020 at 10:25:58PM +0000, Luis Chamberlain wrote:
> > On Thu, May 07, 2020 at 06:06:06PM -0400, Rafael Aquini wrote:
> > > On Thu, May 07, 2020 at 08:33:40PM +0000, Luis Chamberlain wrote:
> > > > I *think* that a cmdline route to enable this would likely remove the
> > > > need for the kernel config for this. But even with Vlastimil's work
> > > > merged, I think we'd want yet-another value to enable / disable this
> > > > feature. Do we need yet-another-taint flag to tell us that this feature
> > > > was enabled?
> > > >
> > > 
> > > I guess it makes sense to get rid of the sysctl interface for
> > > proc_on_taint, and only keep it as a cmdline option. 
> > 
> > That would be easier to support and k3eps this simple.
> > 
> > > But the real issue seems to be, regardless we go with a cmdline-only option
> > > or not, the ability of proc_taint() to set any arbitrary taint flag 
> > > other than just marking the kernel with TAINT_USER. 
> > 
> > I think we would have no other option but to add a new TAINT flag so
> > that we know that the taint flag was modified by a user. Perhaps just
> > re-using TAINT_USER when proc_taint() would suffice.
> >
> 
> We might not need an extra taint flag if, perhaps, we could make these
> two features mutually exclusive. The idea here is that bitmasks added 
> via panic_on_taint get filtered out in proc_taint(), so a malicious 
> user couldn't exploit the latter interface to easily panic the system,
> when the first one is also in use. 

I get it, however I I can still see the person who enables enabling
panic-on-tain wanting to know if proc_taint() was used. So even if
it was not on their mask, if it was modified that seems like important
information for a bug report analysis.

  Luis
Rafael Aquini May 9, 2020, 2:56 p.m. UTC | #11
On Sat, May 09, 2020 at 03:48:54AM +0000, Luis Chamberlain wrote:
> On Fri, May 08, 2020 at 08:47:19AM -0400, Rafael Aquini wrote:
> > On Thu, May 07, 2020 at 10:25:58PM +0000, Luis Chamberlain wrote:
> > > On Thu, May 07, 2020 at 06:06:06PM -0400, Rafael Aquini wrote:
> > > > On Thu, May 07, 2020 at 08:33:40PM +0000, Luis Chamberlain wrote:
> > > > > I *think* that a cmdline route to enable this would likely remove the
> > > > > need for the kernel config for this. But even with Vlastimil's work
> > > > > merged, I think we'd want yet-another value to enable / disable this
> > > > > feature. Do we need yet-another-taint flag to tell us that this feature
> > > > > was enabled?
> > > > >
> > > > 
> > > > I guess it makes sense to get rid of the sysctl interface for
> > > > proc_on_taint, and only keep it as a cmdline option. 
> > > 
> > > That would be easier to support and k3eps this simple.
> > > 
> > > > But the real issue seems to be, regardless we go with a cmdline-only option
> > > > or not, the ability of proc_taint() to set any arbitrary taint flag 
> > > > other than just marking the kernel with TAINT_USER. 
> > > 
> > > I think we would have no other option but to add a new TAINT flag so
> > > that we know that the taint flag was modified by a user. Perhaps just
> > > re-using TAINT_USER when proc_taint() would suffice.
> > >
> > 
> > We might not need an extra taint flag if, perhaps, we could make these
> > two features mutually exclusive. The idea here is that bitmasks added 
> > via panic_on_taint get filtered out in proc_taint(), so a malicious 
> > user couldn't exploit the latter interface to easily panic the system,
> > when the first one is also in use. 
> 
> I get it, however I I can still see the person who enables enabling
> panic-on-tain wanting to know if proc_taint() was used. So even if
> it was not on their mask, if it was modified that seems like important
> information for a bug report analysis.
>

For that purpose (tracking user taints) I think sth between these lines
would work:

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8a176d8727a3..651a82c13621 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2602,6 +2602,9 @@ int proc_douintvec(struct ctl_table *table, int write,
                                 do_proc_douintvec_conv, NULL);
 }

+/* track which taint bits were set by the user */
+static unsigned long user_tainted;
+
 /*
  * Taint values can only be increased
  * This means we can safely use a temporary.
@@ -2629,11 +2632,20 @@ static int proc_taint(struct ctl_table *table, int write,
                 */
                int i;
                for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
-                       if ((tmptaint >> i) & 1)
+                       if ((tmptaint >> i) & 1) {
+                               set_bit(i, &user_tainted);
                                add_taint(i, LOCKDEP_STILL_OK);
+                       }
                }
        }

+       /*
+        * Users with SYS_ADMIN capability can fiddle with any arbitrary
+        * taint flag through this interface.
+        * If that's the case, we also need to mark the kernel "tainted by user"
+        */
+       add_taint(TAINT_USER, LOCKDEP_STILL_OK);
+
        return err;
 }


I don't think, though, it's panic_on_taint work to track that. I posted a v3 for
this feature with a way to select if one wants to avoid user forced taints
triggering panic() for flags also set for panic_on_taint.

Cheers,

-- Rafael

Patch
diff mbox series

diff --git a/Documentation/admin-guide/kdump/kdump.rst b/Documentation/admin-guide/kdump/kdump.rst
index ac7e131d2935..de3cf6d377cc 100644
--- a/Documentation/admin-guide/kdump/kdump.rst
+++ b/Documentation/admin-guide/kdump/kdump.rst
@@ -521,6 +521,16 @@  will cause a kdump to occur at the panic() call.  In cases where a user wants
 to specify this during runtime, /proc/sys/kernel/panic_on_warn can be set to 1
 to achieve the same behaviour.
 
+Trigger Kdump on add_taint()
+============================
+
+The kernel parameter, panic_on_taint, calls panic() from within add_taint(),
+whenever the value set in this bitmask matches with the bit flag being set
+by add_taint(). This will cause a kdump to occur at the panic() call.
+In cases where a user wants to specify this during runtime,
+/proc/sys/kernel/panic_on_taint can be set to a respective bitmask value
+to achieve the same behaviour.
+
 Contact
 =======
 
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 7bc83f3d9bdf..6aa524928399 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3404,6 +3404,13 @@ 
 	panic_on_warn	panic() instead of WARN().  Useful to cause kdump
 			on a WARN().
 
+	panic_on_taint=	Bitmask for calling panic() when the kernel gets tainted
+			and the taint bit set matches with the assigned bitmask.
+			See Documentation/admin-guide/tainted-kernels.rst for
+			details on the taint flag bits that user can pick to
+			compose the bitmask to assign to panic_on_taint.
+			When set to 0 (default), this option is disabled.
+
 	crash_kexec_post_notifiers
 			Run kdump after running panic-notifiers and dumping
 			kmsg. This only for the users who doubt kdump always
diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index 0d427fd10941..60500d5c1ee0 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -658,6 +658,24 @@  a kernel rebuild when attempting to kdump at the location of a WARN().
 = ================================================
 
 
+panic_on_taint
+==============
+
+Bitmask for calling panic() in the add_taint() path.
+This is useful to avoid a kernel rebuild when attempting to
+kdump at the insertion of any specific TAINT flags.
+When set to 0 (default), the non-crashing default behavior
+of add_panic() is maintained.
+
+See :doc:`/admin-guide/tainted-kernels` for a complete list of
+taint flag bits and their decimal decoded values.
+
+So, for example, to panic if the kernel gets tainted due to
+occurrences of bad pages and/or machine check errors, a user can::
+
+  echo 48 > /proc/sys/kernel/panic_on_taint
+
+
 panic_print
 ===========
 
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 9b7a8d74a9d6..518b9fd381c2 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -528,6 +528,7 @@  extern int panic_on_oops;
 extern int panic_on_unrecovered_nmi;
 extern int panic_on_io_nmi;
 extern int panic_on_warn;
+extern unsigned long panic_on_taint;
 extern int sysctl_panic_on_rcu_stall;
 extern int sysctl_panic_on_stackoverflow;
 
diff --git a/kernel/panic.c b/kernel/panic.c
index b69ee9e76cb2..d284241e702b 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -44,6 +44,7 @@  static int pause_on_oops_flag;
 static DEFINE_SPINLOCK(pause_on_oops_lock);
 bool crash_kexec_post_notifiers;
 int panic_on_warn __read_mostly;
+unsigned long panic_on_taint;
 
 int panic_timeout = CONFIG_PANIC_TIMEOUT;
 EXPORT_SYMBOL_GPL(panic_timeout);
@@ -434,6 +435,11 @@  void add_taint(unsigned flag, enum lockdep_ok lockdep_ok)
 		pr_warn("Disabling lock debugging due to kernel taint\n");
 
 	set_bit(flag, &tainted_mask);
+
+	if (tainted_mask & panic_on_taint) {
+		panic_on_taint = 0;
+		panic("panic_on_taint set ...");
+	}
 }
 EXPORT_SYMBOL(add_taint);
 
@@ -675,6 +681,7 @@  core_param(panic, panic_timeout, int, 0644);
 core_param(panic_print, panic_print, ulong, 0644);
 core_param(pause_on_oops, pause_on_oops, int, 0644);
 core_param(panic_on_warn, panic_on_warn, int, 0644);
+core_param(panic_on_taint, panic_on_taint, ulong, 0644);
 core_param(crash_kexec_post_notifiers, crash_kexec_post_notifiers, bool, 0644);
 
 static int __init oops_setup(char *s)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8a176d8727a3..b80ab660d727 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1217,6 +1217,13 @@  static struct ctl_table kern_table[] = {
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= SYSCTL_ONE,
 	},
+	{
+		.procname	= "panic_on_taint",
+		.data		= &panic_on_taint,
+		.maxlen		= sizeof(unsigned long),
+		.mode		= 0644,
+		.proc_handler	= proc_doulongvec_minmax,
+	},
 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
 	{
 		.procname	= "timer_migration",