diff mbox series

selinux: add tracepoint on denials

Message ID 20200724091520.880211-1-tweek@google.com (mailing list archive)
State Changes Requested
Headers show
Series selinux: add tracepoint on denials | expand

Commit Message

Thiébaud Weksteen July 24, 2020, 9:15 a.m. UTC
The audit data currently captures which process and which target
is responsible for a denial. There is no data on where exactly in the
process that call occurred. Debugging can be made easier by being able to
reconstruct the unified kernel and userland stack traces [1]. Add a
tracepoint on the SELinux denials which can then be used by userland
(i.e. perf).

Although this patch could manually be added by each OS developer to
trouble shoot a denial, adding it to the kernel streamlines the
developers workflow.

[1] https://source.android.com/devices/tech/debug/native_stack_dump

Signed-off-by: Thiébaud Weksteen <tweek@google.com>
Signed-off-by: Joel Fernandes <joelaf@google.com>
---
 MAINTAINERS                    |  1 +
 include/trace/events/selinux.h | 35 ++++++++++++++++++++++++++++++++++
 security/selinux/avc.c         |  6 ++++++
 3 files changed, 42 insertions(+)
 create mode 100644 include/trace/events/selinux.h

Comments

Stephen Smalley July 24, 2020, 1:32 p.m. UTC | #1
On Fri, Jul 24, 2020 at 5:15 AM Thiébaud Weksteen <tweek@google.com> wrote:
>
> The audit data currently captures which process and which target
> is responsible for a denial. There is no data on where exactly in the
> process that call occurred. Debugging can be made easier by being able to
> reconstruct the unified kernel and userland stack traces [1]. Add a
> tracepoint on the SELinux denials which can then be used by userland
> (i.e. perf).
>
> Although this patch could manually be added by each OS developer to
> trouble shoot a denial, adding it to the kernel streamlines the
> developers workflow.
>
> [1] https://source.android.com/devices/tech/debug/native_stack_dump
>
> Signed-off-by: Thiébaud Weksteen <tweek@google.com>
> Signed-off-by: Joel Fernandes <joelaf@google.com>
> ---
>  MAINTAINERS                    |  1 +
>  include/trace/events/selinux.h | 35 ++++++++++++++++++++++++++++++++++
>  security/selinux/avc.c         |  6 ++++++
>  3 files changed, 42 insertions(+)
>  create mode 100644 include/trace/events/selinux.h
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index e64cdde81851..6b6cd5e13537 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -15358,6 +15358,7 @@ T:      git git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/selinux.git
>  F:     Documentation/ABI/obsolete/sysfs-selinux-checkreqprot
>  F:     Documentation/ABI/obsolete/sysfs-selinux-disable
>  F:     Documentation/admin-guide/LSM/SELinux.rst
> +F:     include/trace/events/selinux.h
>  F:     include/uapi/linux/selinux_netlink.h
>  F:     scripts/selinux/
>  F:     security/selinux/
> diff --git a/include/trace/events/selinux.h b/include/trace/events/selinux.h
> new file mode 100644
> index 000000000000..e247187a8135
> --- /dev/null
> +++ b/include/trace/events/selinux.h
> @@ -0,0 +1,35 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM selinux
> +
> +#if !defined(_TRACE_SELINUX_H) || defined(TRACE_HEADER_MULTI_READ)
> +#define _TRACE_SELINUX_H
> +
> +#include <linux/ktime.h>
> +#include <linux/tracepoint.h>
> +
> +TRACE_EVENT(selinux_denied,
> +
> +       TP_PROTO(int cls, int av),
> +
> +       TP_ARGS(cls, av),
> +
> +       TP_STRUCT__entry(
> +               __field(int, cls)
> +               __field(int, av)
> +       ),
> +
> +       TP_fast_assign(
> +               __entry->cls = cls;
> +               __entry->av = av;
> +       ),
> +
> +       TP_printk("denied %d %d",
> +               __entry->cls,
> +               __entry->av)
> +);

I would think you would want to log av as %x for easier interpretation
especially when there are multiple permissions being checked at once
(which can happen). Also both cls and av would properly be unsigned
values.  Only other question I have is whether it would be beneficial
to include other information here to help uniquely identify/correlate
the denial with the avc: message and whether any decoding of the
class, av, or other information could/should be done here versus in
some userland helper.
Steven Rostedt July 24, 2020, 1:52 p.m. UTC | #2
On Fri, 24 Jul 2020 11:15:03 +0200
"Thiébaud Weksteen" <tweek@google.com> wrote:
> diff --git a/security/selinux/avc.c b/security/selinux/avc.c
> index d18cb32a242a..85d2e22ab656 100644
> --- a/security/selinux/avc.c
> +++ b/security/selinux/avc.c
> @@ -31,6 +31,9 @@
>  #include "avc_ss.h"
>  #include "classmap.h"
>  
> +#define CREATE_TRACE_POINTS
> +#include <trace/events/selinux.h>
> +
>  #define AVC_CACHE_SLOTS			512
>  #define AVC_DEF_CACHE_THRESHOLD		512
>  #define AVC_CACHE_RECLAIM		16
> @@ -672,6 +675,9 @@ static void avc_audit_pre_callback(struct audit_buffer *ab, void *a)
>  		return;
>  	}
>  
> +	if (sad->denied)

First, I would like to deny sadness as well ;-)

Now, there is a way to add that branch within the "nop" area of the
trace event, and remove the conditional branch from the main code.

> +		trace_selinux_denied(sad->tclass, av);
> +

Instead have this:

	trace_selinux_denied(sad, av);

>  	perms = secclass_map[sad->tclass-1].perms;
>  
>  	audit_log_format(ab, " {");

> --- /dev/null
> +++ b/include/trace/events/selinux.h
> @@ -0,0 +1,35 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM selinux
> +
> +#if !defined(_TRACE_SELINUX_H) || defined(TRACE_HEADER_MULTI_READ)
> +#define _TRACE_SELINUX_H
> +
> +#include <linux/ktime.h>
> +#include <linux/tracepoint.h>
> +
> +TRACE_EVENT(selinux_denied,

TRACE_EVENT_CONDITION(selinux_denied,

> +
> +	TP_PROTO(int cls, int av),

	TP_PROTO(struct selinux_audit_data sad, int av)

> +
> +	TP_ARGS(cls, av),
> +

	TP_CONDITION(sad->denied),

The above condition will be tested before calling the tracepoint. But
only if the trace event is enabled.

> +	TP_STRUCT__entry(
> +		__field(int, cls)
> +		__field(int, av)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->cls = cls;

		__entry->cls = sad->tclass;

> +		__entry->av = av;
> +	),
> +
> +	TP_printk("denied %d %d",
> +		__entry->cls,
> +		__entry->av)
> +);
> +
> +#endif
> +
> +/* This part must be outside protection */
> +#include <trace/define_trace.h>

-- Steve
Paul Moore July 24, 2020, 1:54 p.m. UTC | #3
On Fri, Jul 24, 2020 at 9:32 AM Stephen Smalley
<stephen.smalley.work@gmail.com> wrote:
> On Fri, Jul 24, 2020 at 5:15 AM Thiébaud Weksteen <tweek@google.com> wrote:
> > The audit data currently captures which process and which target
> > is responsible for a denial. There is no data on where exactly in the
> > process that call occurred. Debugging can be made easier by being able to
> > reconstruct the unified kernel and userland stack traces [1]. Add a
> > tracepoint on the SELinux denials which can then be used by userland
> > (i.e. perf).
> >
> > Although this patch could manually be added by each OS developer to
> > trouble shoot a denial, adding it to the kernel streamlines the
> > developers workflow.
> >
> > [1] https://source.android.com/devices/tech/debug/native_stack_dump
> >
> > Signed-off-by: Thiébaud Weksteen <tweek@google.com>
> > Signed-off-by: Joel Fernandes <joelaf@google.com>
> > ---
> >  MAINTAINERS                    |  1 +
> >  include/trace/events/selinux.h | 35 ++++++++++++++++++++++++++++++++++
> >  security/selinux/avc.c         |  6 ++++++
> >  3 files changed, 42 insertions(+)
> >  create mode 100644 include/trace/events/selinux.h
> >
> > diff --git a/MAINTAINERS b/MAINTAINERS
> > index e64cdde81851..6b6cd5e13537 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -15358,6 +15358,7 @@ T:      git git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/selinux.git
> >  F:     Documentation/ABI/obsolete/sysfs-selinux-checkreqprot
> >  F:     Documentation/ABI/obsolete/sysfs-selinux-disable
> >  F:     Documentation/admin-guide/LSM/SELinux.rst
> > +F:     include/trace/events/selinux.h
> >  F:     include/uapi/linux/selinux_netlink.h
> >  F:     scripts/selinux/
> >  F:     security/selinux/
> > diff --git a/include/trace/events/selinux.h b/include/trace/events/selinux.h
> > new file mode 100644
> > index 000000000000..e247187a8135
> > --- /dev/null
> > +++ b/include/trace/events/selinux.h
> > @@ -0,0 +1,35 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +#undef TRACE_SYSTEM
> > +#define TRACE_SYSTEM selinux
> > +
> > +#if !defined(_TRACE_SELINUX_H) || defined(TRACE_HEADER_MULTI_READ)
> > +#define _TRACE_SELINUX_H
> > +
> > +#include <linux/ktime.h>
> > +#include <linux/tracepoint.h>
> > +
> > +TRACE_EVENT(selinux_denied,
> > +
> > +       TP_PROTO(int cls, int av),
> > +
> > +       TP_ARGS(cls, av),
> > +
> > +       TP_STRUCT__entry(
> > +               __field(int, cls)
> > +               __field(int, av)
> > +       ),
> > +
> > +       TP_fast_assign(
> > +               __entry->cls = cls;
> > +               __entry->av = av;
> > +       ),
> > +
> > +       TP_printk("denied %d %d",
> > +               __entry->cls,
> > +               __entry->av)
> > +);
>
> I would think you would want to log av as %x for easier interpretation
> especially when there are multiple permissions being checked at once
> (which can happen). Also both cls and av would properly be unsigned
> values.  Only other question I have is whether it would be beneficial
> to include other information here to help uniquely identify/correlate
> the denial with the avc: message and whether any decoding of the
> class, av, or other information could/should be done here versus in
> some userland helper.

It does seem like at the very least it would be nice to see the av as
hex values instead of integers, e.g. "%x" in the TP_printk() call.
Considering this patch is about making dev's lives easier, I tend to
agree with Stephen questioning if you should go a step further and
convert both the class and av values into string representations.
Thiébaud Weksteen July 28, 2020, 12:49 p.m. UTC | #4
Thanks for the review! I'll send a new revision of the patch with the
%x formatter and using the TP_CONDITION macro.

On adding further information to the trace event, I would prefer
adding the strict minimum to be able to correlate the event with the
avc message. The reason is that tracevents have a fixed size (see
https://www.kernel.org/doc/Documentation/trace/events.txt). For
instance, we would need to decide on a maximum size for the string
representation of the list of permissions. This would also duplicate
the reporting done in the avc audit event. I'll simply add the pid as
part of the printk, which should be sufficient for the correlation.


On Fri, Jul 24, 2020 at 3:55 PM Paul Moore <paul@paul-moore.com> wrote:
>
> On Fri, Jul 24, 2020 at 9:32 AM Stephen Smalley
> <stephen.smalley.work@gmail.com> wrote:
> > On Fri, Jul 24, 2020 at 5:15 AM Thiébaud Weksteen <tweek@google.com> wrote:
> > > The audit data currently captures which process and which target
> > > is responsible for a denial. There is no data on where exactly in the
> > > process that call occurred. Debugging can be made easier by being able to
> > > reconstruct the unified kernel and userland stack traces [1]. Add a
> > > tracepoint on the SELinux denials which can then be used by userland
> > > (i.e. perf).
> > >
> > > Although this patch could manually be added by each OS developer to
> > > trouble shoot a denial, adding it to the kernel streamlines the
> > > developers workflow.
> > >
> > > [1] https://source.android.com/devices/tech/debug/native_stack_dump
> > >
> > > Signed-off-by: Thiébaud Weksteen <tweek@google.com>
> > > Signed-off-by: Joel Fernandes <joelaf@google.com>
> > > ---
> > >  MAINTAINERS                    |  1 +
> > >  include/trace/events/selinux.h | 35 ++++++++++++++++++++++++++++++++++
> > >  security/selinux/avc.c         |  6 ++++++
> > >  3 files changed, 42 insertions(+)
> > >  create mode 100644 include/trace/events/selinux.h
> > >
> > > diff --git a/MAINTAINERS b/MAINTAINERS
> > > index e64cdde81851..6b6cd5e13537 100644
> > > --- a/MAINTAINERS
> > > +++ b/MAINTAINERS
> > > @@ -15358,6 +15358,7 @@ T:      git git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/selinux.git
> > >  F:     Documentation/ABI/obsolete/sysfs-selinux-checkreqprot
> > >  F:     Documentation/ABI/obsolete/sysfs-selinux-disable
> > >  F:     Documentation/admin-guide/LSM/SELinux.rst
> > > +F:     include/trace/events/selinux.h
> > >  F:     include/uapi/linux/selinux_netlink.h
> > >  F:     scripts/selinux/
> > >  F:     security/selinux/
> > > diff --git a/include/trace/events/selinux.h b/include/trace/events/selinux.h
> > > new file mode 100644
> > > index 000000000000..e247187a8135
> > > --- /dev/null
> > > +++ b/include/trace/events/selinux.h
> > > @@ -0,0 +1,35 @@
> > > +/* SPDX-License-Identifier: GPL-2.0 */
> > > +#undef TRACE_SYSTEM
> > > +#define TRACE_SYSTEM selinux
> > > +
> > > +#if !defined(_TRACE_SELINUX_H) || defined(TRACE_HEADER_MULTI_READ)
> > > +#define _TRACE_SELINUX_H
> > > +
> > > +#include <linux/ktime.h>
> > > +#include <linux/tracepoint.h>
> > > +
> > > +TRACE_EVENT(selinux_denied,
> > > +
> > > +       TP_PROTO(int cls, int av),
> > > +
> > > +       TP_ARGS(cls, av),
> > > +
> > > +       TP_STRUCT__entry(
> > > +               __field(int, cls)
> > > +               __field(int, av)
> > > +       ),
> > > +
> > > +       TP_fast_assign(
> > > +               __entry->cls = cls;
> > > +               __entry->av = av;
> > > +       ),
> > > +
> > > +       TP_printk("denied %d %d",
> > > +               __entry->cls,
> > > +               __entry->av)
> > > +);
> >
> > I would think you would want to log av as %x for easier interpretation
> > especially when there are multiple permissions being checked at once
> > (which can happen). Also both cls and av would properly be unsigned
> > values.  Only other question I have is whether it would be beneficial
> > to include other information here to help uniquely identify/correlate
> > the denial with the avc: message and whether any decoding of the
> > class, av, or other information could/should be done here versus in
> > some userland helper.
>
> It does seem like at the very least it would be nice to see the av as
> hex values instead of integers, e.g. "%x" in the TP_printk() call.
> Considering this patch is about making dev's lives easier, I tend to
> agree with Stephen questioning if you should go a step further and
> convert both the class and av values into string representations.
>
> --
> paul moore
> www.paul-moore.com
Stephen Smalley July 28, 2020, 1:04 p.m. UTC | #5
On 7/28/20 8:49 AM, Thiébaud Weksteen wrote:

> Thanks for the review! I'll send a new revision of the patch with the
> %x formatter and using the TP_CONDITION macro.
>
> On adding further information to the trace event, I would prefer
> adding the strict minimum to be able to correlate the event with the
> avc message. The reason is that tracevents have a fixed size (see
> https://www.kernel.org/doc/Documentation/trace/events.txt). For
> instance, we would need to decide on a maximum size for the string
> representation of the list of permissions. This would also duplicate
> the reporting done in the avc audit event. I'll simply add the pid as
> part of the printk, which should be sufficient for the correlation.

Ok, also please use unsigned int for the fields and %u for the cls value.

(btw top-posting is discouraged for mailing list discussions, see 
http://vger.kernel.org/lkml/#s3-9)
Steven Rostedt July 28, 2020, 1:12 p.m. UTC | #6
On Tue, 28 Jul 2020 14:49:24 +0200
Thiébaud Weksteen <tweek@google.com> wrote:

> Thanks for the review! I'll send a new revision of the patch with the
> %x formatter and using the TP_CONDITION macro.
> 
> On adding further information to the trace event, I would prefer
> adding the strict minimum to be able to correlate the event with the
> avc message. The reason is that tracevents have a fixed size (see
> https://www.kernel.org/doc/Documentation/trace/events.txt). For

Wait! What?

Where in that document does it say that trace events have a fixed size.
We have a lot of dynamically sized trace events.

> instance, we would need to decide on a maximum size for the string
> representation of the list of permissions. This would also duplicate
> the reporting done in the avc audit event. I'll simply add the pid as
> part of the printk, which should be sufficient for the correlation.
> 

Please take a look at samples/trace_events/trace_events_sample.h

and read the example on __print_symbolic().

I think that's what you are looking for.

-- Steve
Thiébaud Weksteen July 28, 2020, 1:19 p.m. UTC | #7
On Tue, Jul 28, 2020 at 3:04 PM Stephen Smalley
<stephen.smalley.work@gmail.com> wrote:
> Ok, also please use unsigned int for the fields and %u for the cls value.

Will do in v3. Thanks.
Thiébaud Weksteen July 28, 2020, 1:23 p.m. UTC | #8
On Tue, Jul 28, 2020 at 3:12 PM Steven Rostedt <rostedt@goodmis.org> wrote:
> Where in that document does it say that trace events have a fixed size.
> We have a lot of dynamically sized trace events.

My mistake. From the "format" pseudo-file, I assumed the offset and
size were fixed.

> Please take a look at samples/trace_events/trace_events_sample.h
> and read the example on __print_symbolic().
> I think that's what you are looking for.

Ack, thanks for pointing these out. I still think that my other
argument (i.e. duplication of avc message) holds.
Paul Moore July 28, 2020, 3:12 p.m. UTC | #9
On Tue, Jul 28, 2020 at 8:49 AM Thiébaud Weksteen <tweek@google.com> wrote:
>
> Thanks for the review! I'll send a new revision of the patch with the
> %x formatter and using the TP_CONDITION macro.
>
> On adding further information to the trace event, I would prefer
> adding the strict minimum to be able to correlate the event with the
> avc message. The reason is that tracevents have a fixed size (see
> https://www.kernel.org/doc/Documentation/trace/events.txt). For
> instance, we would need to decide on a maximum size for the string
> representation of the list of permissions.

It sounds like this is no longer an issue, hopefully this changes your
thinking as I'm not sure how usable it would be in practice for users
not overly familiar with SELinux.

Perhaps it would be helpful if you provided an example of how one
would be expected to use this new tracepoint?  That would help put
things in the proper perspective.

> This would also duplicate
> the reporting done in the avc audit event. I'll simply add the pid as
> part of the printk, which should be sufficient for the correlation.

Well, to be honest, the very nature of this tracepoint is duplicating
the AVC audit record with a focus on using perf to establish a full
backtrace at the expense of reduced information.  At least that is how
it appears to me.
Joel Fernandes July 28, 2020, 3:22 p.m. UTC | #10
On Fri, Jul 24, 2020 at 5:15 AM Thiébaud Weksteen <tweek@google.com> wrote:
>
> The audit data currently captures which process and which target
> is responsible for a denial. There is no data on where exactly in the
> process that call occurred. Debugging can be made easier by being able to
> reconstruct the unified kernel and userland stack traces [1]. Add a
> tracepoint on the SELinux denials which can then be used by userland
> (i.e. perf).
>
> Although this patch could manually be added by each OS developer to
> trouble shoot a denial, adding it to the kernel streamlines the
> developers workflow.
>
> [1] https://source.android.com/devices/tech/debug/native_stack_dump
>
> Signed-off-by: Thiébaud Weksteen <tweek@google.com>
> Signed-off-by: Joel Fernandes <joelaf@google.com>

While I am in support of the general idea, could you change my SOB to
something like Inspired-by?

This is really your patch, but I did demonstrate the idea in an
article where the intention was to apply a patch out of tree to do
stack dumps / tracing.  SOB on the other hand is supposed to track the
flow of a patch (the people who the patch goes through) when it is
sent upstream.

Thanks,

 - Joel
Thiébaud Weksteen July 28, 2020, 4:02 p.m. UTC | #11
On Tue, Jul 28, 2020 at 5:12 PM Paul Moore <paul@paul-moore.com> wrote:
> Perhaps it would be helpful if you provided an example of how one
> would be expected to use this new tracepoint?  That would help put
> things in the proper perspective.

The best example is the one I provided in the commit message, that is
using perf (or a perf equivalent), to hook onto that tracepoint.

> Well, to be honest, the very nature of this tracepoint is duplicating
> the AVC audit record with a focus on using perf to establish a full
> backtrace at the expense of reduced information.  At least that is how
> it appears to me.

I see both methods as complementary. By default, the kernel itself can
do some reporting (i.e avc message) on which process triggered the
denial, what was the context, etc. This is useful even in production
and doesn't require any extra tooling.
The case for adding this tracepoint can be seen as advanced debugging.
That is, once an avc denial has been confirmed, a developer can use
this tracepoint to surface the userland stacktrace. It requires more
userland tools and symbols on the userland binaries.
Stephen Smalley July 28, 2020, 4:19 p.m. UTC | #12
On 7/28/20 12:02 PM, Thiébaud Weksteen wrote:

> On Tue, Jul 28, 2020 at 5:12 PM Paul Moore <paul@paul-moore.com> wrote:
>> Perhaps it would be helpful if you provided an example of how one
>> would be expected to use this new tracepoint?  That would help put
>> things in the proper perspective.
> The best example is the one I provided in the commit message, that is
> using perf (or a perf equivalent), to hook onto that tracepoint.
>
>> Well, to be honest, the very nature of this tracepoint is duplicating
>> the AVC audit record with a focus on using perf to establish a full
>> backtrace at the expense of reduced information.  At least that is how
>> it appears to me.
> I see both methods as complementary. By default, the kernel itself can
> do some reporting (i.e avc message) on which process triggered the
> denial, what was the context, etc. This is useful even in production
> and doesn't require any extra tooling.
> The case for adding this tracepoint can be seen as advanced debugging.
> That is, once an avc denial has been confirmed, a developer can use
> this tracepoint to surface the userland stacktrace. It requires more
> userland tools and symbols on the userland binaries.

Providing an example of the tracepoint output in the patch description 
would be helpful IMHO.
Paul Moore July 28, 2020, 4:20 p.m. UTC | #13
On Tue, Jul 28, 2020 at 12:02 PM Thiébaud Weksteen <tweek@google.com> wrote:
> On Tue, Jul 28, 2020 at 5:12 PM Paul Moore <paul@paul-moore.com> wrote:
> > Perhaps it would be helpful if you provided an example of how one
> > would be expected to use this new tracepoint?  That would help put
> > things in the proper perspective.
>
> The best example is the one I provided in the commit message, that is
> using perf (or a perf equivalent), to hook onto that tracepoint.

I probably wasn't as clear as I should have been.  I think it would be
helpful if you demonstrated how one would take the SELinux data in the
perf event and translated that into something meaningful.
Peter Enderborg July 30, 2020, 8:03 a.m. UTC | #14
On 7/28/20 6:02 PM, Thiébaud Weksteen wrote:
> On Tue, Jul 28, 2020 at 5:12 PM Paul Moore <paul@paul-moore.com> wrote:
>> Perhaps it would be helpful if you provided an example of how one
>> would be expected to use this new tracepoint?  That would help put
>> things in the proper perspective.
> The best example is the one I provided in the commit message, that is
> using perf (or a perf equivalent), to hook onto that tracepoint.
>
>> Well, to be honest, the very nature of this tracepoint is duplicating
>> the AVC audit record with a focus on using perf to establish a full
>> backtrace at the expense of reduced information.  At least that is how
>> it appears to me.
> I see both methods as complementary. By default, the kernel itself can
> do some reporting (i.e avc message) on which process triggered the
> denial, what was the context, etc. This is useful even in production
> and doesn't require any extra tooling.
> The case for adding this tracepoint can be seen as advanced debugging.
> That is, once an avc denial has been confirmed, a developer can use
> this tracepoint to surface the userland stacktrace. It requires more
> userland tools and symbols on the userland binaries.

I think from development view you would like to have a better
way to trap this events in userspace. One idea that I have is
is to have more outcomes from a rule. We have today allow,
dontaudit, auditallow i think it would be good to have signal sent too.
"signal-xxx-allow" for some set of signals. SIGBUS, SIGSEGV, SIGABRT maybe.

That will be a good way to pickup the problem with a debugger or generate a
a core file.

I have also done some selinux trace functions. I think they collide with this set,
but I think I can rebase them upon yours and see if they give some more functionality.

I see this functionality very much needed in some form.
Thiébaud Weksteen July 30, 2020, 3:50 p.m. UTC | #15
On Tue, Jul 28, 2020 at 6:20 PM Paul Moore <paul@paul-moore.com> wrote:
> I probably wasn't as clear as I should have been.  I think it would be
> helpful if you demonstrated how one would take the SELinux data in the
> perf event and translated that into something meaningful.

So the data itself is not that relevant. What is important is the
ability to hook the kernel at the right location, at the right time.
Here is an example on how this patch can be used on Android
(simpleperf is the Android equivalent of perf), running dmesg as the
shell user which is not permitted:
# simpleperf record -e selinux:selinux_denied -a -g --duration 10
# simpleperf report -g --full-callgraph
Cmdline: /system/bin/simpleperf record -e selinux:selinux_denied -a -g
--duration 10
Arch: arm64
Event: selinux:selinux_denied (type 2, config 493)
Samples: 1
Event count: 1

Children  Self     Command  Pid   Tid   Shared Object
                 Symbol
100.00%   0.00%    dmesg    3511  3511
/apex/com.android.runtime/lib64/bionic/libc.so  __libc_init
       |
       -- __libc_init
          |
           -- main
              toybox_main
              toy_exec_which
              dmesg_main
              klogctl
              el0_svc_naked
              sys_syslog
              do_syslog
              security_syslog
              selinux_syslog
              avc_has_perm
              slow_avc_audit
              common_lsm_audit
              avc_audit_pre_callback

You can see the combined user and kernel stacks which is useful to
understand where and why the denial happened.
The key point is that simpleperf is doing the heavy work (i.e names
resolution), while the kernel only shares the strict minimum for that
to happen.
This can be correlated with the pid of the avc denial message (I'm
assuming we are trouble shooting one specific denial).

It is also possible to manually use ftrace. For instance, after
enabling and triggering the denial:
bonito:/sys/kernel/debug/tracing # cat trace
# tracer: nop
#
# entries-in-buffer/entries-written: 1/1   #P:8
#
#                              _-----=> irqs-off
#                             / _----=> need-resched
#                            | / _---=> hardirq/softirq
#                            || / _--=> preempt-depth
#                            ||| /     delay
#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
#              | |       |   ||||       |         |
           dmesg-3624  [001] .... 13072.325358: selinux_denied: denied
pid=3624 tclass=4 audited=2

This can be correlated with the following avc denial:
[ 2180.183062] type=1400 audit(1596111144.026:27): avc: denied {
syslog_read } for comm="dmesg" scontext=u:r:shell:s0
tcontext=u:r:kernel:s0 tclass=system permissive=0
Here, there is limited value of having that tracepoint as we are only
duplicating the avc message content.

Nevertheless, the filtering part of Peter's patch would be useful to
be more precise on which denial we are targeting (I'll reply to the
other thread as well).
I hope this clarifies the usage. Thanks.
diff mbox series

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index e64cdde81851..6b6cd5e13537 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -15358,6 +15358,7 @@  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/selinux.git
 F:	Documentation/ABI/obsolete/sysfs-selinux-checkreqprot
 F:	Documentation/ABI/obsolete/sysfs-selinux-disable
 F:	Documentation/admin-guide/LSM/SELinux.rst
+F:	include/trace/events/selinux.h
 F:	include/uapi/linux/selinux_netlink.h
 F:	scripts/selinux/
 F:	security/selinux/
diff --git a/include/trace/events/selinux.h b/include/trace/events/selinux.h
new file mode 100644
index 000000000000..e247187a8135
--- /dev/null
+++ b/include/trace/events/selinux.h
@@ -0,0 +1,35 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM selinux
+
+#if !defined(_TRACE_SELINUX_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_SELINUX_H
+
+#include <linux/ktime.h>
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(selinux_denied,
+
+	TP_PROTO(int cls, int av),
+
+	TP_ARGS(cls, av),
+
+	TP_STRUCT__entry(
+		__field(int, cls)
+		__field(int, av)
+	),
+
+	TP_fast_assign(
+		__entry->cls = cls;
+		__entry->av = av;
+	),
+
+	TP_printk("denied %d %d",
+		__entry->cls,
+		__entry->av)
+);
+
+#endif
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/security/selinux/avc.c b/security/selinux/avc.c
index d18cb32a242a..85d2e22ab656 100644
--- a/security/selinux/avc.c
+++ b/security/selinux/avc.c
@@ -31,6 +31,9 @@ 
 #include "avc_ss.h"
 #include "classmap.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/selinux.h>
+
 #define AVC_CACHE_SLOTS			512
 #define AVC_DEF_CACHE_THRESHOLD		512
 #define AVC_CACHE_RECLAIM		16
@@ -672,6 +675,9 @@  static void avc_audit_pre_callback(struct audit_buffer *ab, void *a)
 		return;
 	}
 
+	if (sad->denied)
+		trace_selinux_denied(sad->tclass, av);
+
 	perms = secclass_map[sad->tclass-1].perms;
 
 	audit_log_format(ab, " {");