Message ID | c93309dc-b920-f5fa-f997-e8b2faf47b88@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Introduce CAP_SYS_PERFMON to secure system performance monitoring and observability | expand |
On Wed, Dec 18, 2019 at 12:25:35PM +0300, Alexey Budankov wrote: > > Open access to perf_events monitoring for CAP_SYS_PERFMON privileged > processes. For backward compatibility reasons access to perf_events > subsystem remains open for CAP_SYS_ADMIN privileged processes but > CAP_SYS_ADMIN usage for secure perf_events monitoring is discouraged > with respect to CAP_SYS_PERFMON capability. > > Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com> > --- > include/linux/perf_event.h | 6 +++--- > kernel/events/core.c | 6 +++--- > 2 files changed, 6 insertions(+), 6 deletions(-) > > diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h > index 34c7c6910026..f46acd69425f 100644 > --- a/include/linux/perf_event.h > +++ b/include/linux/perf_event.h > @@ -1285,7 +1285,7 @@ static inline int perf_is_paranoid(void) > > static inline int perf_allow_kernel(struct perf_event_attr *attr) > { > - if (sysctl_perf_event_paranoid > 1 && !capable(CAP_SYS_ADMIN)) > + if (sysctl_perf_event_paranoid > 1 && !perfmon_capable()) > return -EACCES; > > return security_perf_event_open(attr, PERF_SECURITY_KERNEL); > @@ -1293,7 +1293,7 @@ static inline int perf_allow_kernel(struct perf_event_attr *attr) > > static inline int perf_allow_cpu(struct perf_event_attr *attr) > { > - if (sysctl_perf_event_paranoid > 0 && !capable(CAP_SYS_ADMIN)) > + if (sysctl_perf_event_paranoid > 0 && !perfmon_capable()) > return -EACCES; > > return security_perf_event_open(attr, PERF_SECURITY_CPU); > @@ -1301,7 +1301,7 @@ static inline int perf_allow_cpu(struct perf_event_attr *attr) > > static inline int perf_allow_tracepoint(struct perf_event_attr *attr) > { > - if (sysctl_perf_event_paranoid > -1 && !capable(CAP_SYS_ADMIN)) > + if (sysctl_perf_event_paranoid > -1 && !perfmon_capable()) > return -EPERM; > > return security_perf_event_open(attr, PERF_SECURITY_TRACEPOINT); These are OK I suppose. > diff --git a/kernel/events/core.c b/kernel/events/core.c > index 059ee7116008..d9db414f2197 100644 > --- a/kernel/events/core.c > +++ b/kernel/events/core.c > @@ -9056,7 +9056,7 @@ static int perf_kprobe_event_init(struct perf_event *event) > if (event->attr.type != perf_kprobe.type) > return -ENOENT; > > - if (!capable(CAP_SYS_ADMIN)) > + if (!perfmon_capable()) > return -EACCES; > > /* This one only allows attaching to already extant kprobes, right? It does not allow creation of kprobes. > @@ -9116,7 +9116,7 @@ static int perf_uprobe_event_init(struct perf_event *event) > if (event->attr.type != perf_uprobe.type) > return -ENOENT; > > - if (!capable(CAP_SYS_ADMIN)) > + if (!perfmon_capable()) > return -EACCES; > > /* Idem, I presume. > @@ -11157,7 +11157,7 @@ SYSCALL_DEFINE5(perf_event_open, > } > > if (attr.namespaces) { > - if (!capable(CAP_SYS_ADMIN)) > + if (!perfmon_capable()) > return -EACCES; > } And given we basically make the entire kernel observable with this CAP, busting namespaces shoulnd't be a problem either. So yeah, I suppose that works.
On 08.01.2020 19:07, Peter Zijlstra wrote: > On Wed, Dec 18, 2019 at 12:25:35PM +0300, Alexey Budankov wrote: >> >> Open access to perf_events monitoring for CAP_SYS_PERFMON privileged >> processes. For backward compatibility reasons access to perf_events >> subsystem remains open for CAP_SYS_ADMIN privileged processes but >> CAP_SYS_ADMIN usage for secure perf_events monitoring is discouraged >> with respect to CAP_SYS_PERFMON capability. >> >> Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com> >> --- >> include/linux/perf_event.h | 6 +++--- >> kernel/events/core.c | 6 +++--- >> 2 files changed, 6 insertions(+), 6 deletions(-) >> >> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h >> index 34c7c6910026..f46acd69425f 100644 >> --- a/include/linux/perf_event.h >> +++ b/include/linux/perf_event.h >> @@ -1285,7 +1285,7 @@ static inline int perf_is_paranoid(void) >> >> static inline int perf_allow_kernel(struct perf_event_attr *attr) >> { >> - if (sysctl_perf_event_paranoid > 1 && !capable(CAP_SYS_ADMIN)) >> + if (sysctl_perf_event_paranoid > 1 && !perfmon_capable()) >> return -EACCES; >> >> return security_perf_event_open(attr, PERF_SECURITY_KERNEL); >> @@ -1293,7 +1293,7 @@ static inline int perf_allow_kernel(struct perf_event_attr *attr) >> >> static inline int perf_allow_cpu(struct perf_event_attr *attr) >> { >> - if (sysctl_perf_event_paranoid > 0 && !capable(CAP_SYS_ADMIN)) >> + if (sysctl_perf_event_paranoid > 0 && !perfmon_capable()) >> return -EACCES; >> >> return security_perf_event_open(attr, PERF_SECURITY_CPU); >> @@ -1301,7 +1301,7 @@ static inline int perf_allow_cpu(struct perf_event_attr *attr) >> >> static inline int perf_allow_tracepoint(struct perf_event_attr *attr) >> { >> - if (sysctl_perf_event_paranoid > -1 && !capable(CAP_SYS_ADMIN)) >> + if (sysctl_perf_event_paranoid > -1 && !perfmon_capable()) >> return -EPERM; >> >> return security_perf_event_open(attr, PERF_SECURITY_TRACEPOINT); > > These are OK I suppose. > >> diff --git a/kernel/events/core.c b/kernel/events/core.c >> index 059ee7116008..d9db414f2197 100644 >> --- a/kernel/events/core.c >> +++ b/kernel/events/core.c >> @@ -9056,7 +9056,7 @@ static int perf_kprobe_event_init(struct perf_event *event) >> if (event->attr.type != perf_kprobe.type) >> return -ENOENT; >> >> - if (!capable(CAP_SYS_ADMIN)) >> + if (!perfmon_capable()) >> return -EACCES; >> >> /* > > This one only allows attaching to already extant kprobes, right? It does > not allow creation of kprobes. This unblocks creation of local trace kprobes and uprobes by CAP_SYS_PERFMON privileged process, exactly the same as for CAP_SYS_ADMIN privileged process. > >> @@ -9116,7 +9116,7 @@ static int perf_uprobe_event_init(struct perf_event *event) >> if (event->attr.type != perf_uprobe.type) >> return -ENOENT; >> >> - if (!capable(CAP_SYS_ADMIN)) >> + if (!perfmon_capable()) >> return -EACCES; >> >> /* > > Idem, I presume. > >> @@ -11157,7 +11157,7 @@ SYSCALL_DEFINE5(perf_event_open, >> } >> >> if (attr.namespaces) { >> - if (!capable(CAP_SYS_ADMIN)) >> + if (!perfmon_capable()) >> return -EACCES; >> } > > And given we basically make the entire kernel observable with this CAP, > busting namespaces shoulnd't be a problem either. > > So yeah, I suppose that works. >
On Thu, Jan 09, 2020 at 02:36:50PM +0300, Alexey Budankov wrote: > On 08.01.2020 19:07, Peter Zijlstra wrote: > > On Wed, Dec 18, 2019 at 12:25:35PM +0300, Alexey Budankov wrote: > >> diff --git a/kernel/events/core.c b/kernel/events/core.c > >> index 059ee7116008..d9db414f2197 100644 > >> --- a/kernel/events/core.c > >> +++ b/kernel/events/core.c > >> @@ -9056,7 +9056,7 @@ static int perf_kprobe_event_init(struct perf_event *event) > >> if (event->attr.type != perf_kprobe.type) > >> return -ENOENT; > >> > >> - if (!capable(CAP_SYS_ADMIN)) > >> + if (!perfmon_capable()) > >> return -EACCES; > >> > >> /* > > > > This one only allows attaching to already extant kprobes, right? It does > > not allow creation of kprobes. > > This unblocks creation of local trace kprobes and uprobes by CAP_SYS_PERFMON > privileged process, exactly the same as for CAP_SYS_ADMIN privileged process. I've no idea what you just said; it's just words. Again, this only allows attaching to previously created kprobes, it does not allow creating kprobes, right? That is; I don't think CAP_SYS_PERFMON should be allowed to create kprobes. As might be clear; I don't actually know what the user-ABI is for creating kprobes.
On Fri, 10 Jan 2020 15:02:34 +0100 Peter Zijlstra <peterz@infradead.org> wrote: > On Thu, Jan 09, 2020 at 02:36:50PM +0300, Alexey Budankov wrote: > > On 08.01.2020 19:07, Peter Zijlstra wrote: > > > On Wed, Dec 18, 2019 at 12:25:35PM +0300, Alexey Budankov wrote: > > > >> diff --git a/kernel/events/core.c b/kernel/events/core.c > > >> index 059ee7116008..d9db414f2197 100644 > > >> --- a/kernel/events/core.c > > >> +++ b/kernel/events/core.c > > >> @@ -9056,7 +9056,7 @@ static int perf_kprobe_event_init(struct perf_event *event) > > >> if (event->attr.type != perf_kprobe.type) > > >> return -ENOENT; > > >> > > >> - if (!capable(CAP_SYS_ADMIN)) > > >> + if (!perfmon_capable()) > > >> return -EACCES; > > >> > > >> /* > > > > > > This one only allows attaching to already extant kprobes, right? It does > > > not allow creation of kprobes. > > > > This unblocks creation of local trace kprobes and uprobes by CAP_SYS_PERFMON > > privileged process, exactly the same as for CAP_SYS_ADMIN privileged process. > > I've no idea what you just said; it's just words. > > Again, this only allows attaching to previously created kprobes, it does > not allow creating kprobes, right? > > That is; I don't think CAP_SYS_PERFMON should be allowed to create > kprobes. > > As might be clear; I don't actually know what the user-ABI is for > creating kprobes. There are 2 ABIs nowadays, ftrace and ebpf. perf-probe uses ftrace interface to define new kprobe events, and those events are treated as completely same as tracepoint events. On the other hand, ebpf tries to define new probe event via perf_event interface. Above one is that interface. IOW, it creates new kprobe. Thank you,
On 10.01.2020 17:02, Peter Zijlstra wrote: > On Thu, Jan 09, 2020 at 02:36:50PM +0300, Alexey Budankov wrote: >> On 08.01.2020 19:07, Peter Zijlstra wrote: >>> On Wed, Dec 18, 2019 at 12:25:35PM +0300, Alexey Budankov wrote: > >>>> diff --git a/kernel/events/core.c b/kernel/events/core.c >>>> index 059ee7116008..d9db414f2197 100644 >>>> --- a/kernel/events/core.c >>>> +++ b/kernel/events/core.c >>>> @@ -9056,7 +9056,7 @@ static int perf_kprobe_event_init(struct perf_event *event) >>>> if (event->attr.type != perf_kprobe.type) >>>> return -ENOENT; >>>> >>>> - if (!capable(CAP_SYS_ADMIN)) >>>> + if (!perfmon_capable()) >>>> return -EACCES; >>>> >>>> /* >>> >>> This one only allows attaching to already extant kprobes, right? It does >>> not allow creation of kprobes. >> >> This unblocks creation of local trace kprobes and uprobes by CAP_SYS_PERFMON >> privileged process, exactly the same as for CAP_SYS_ADMIN privileged process. > > I've no idea what you just said; it's just words. > > Again, this only allows attaching to previously created kprobes, it does > not allow creating kprobes, right? Not really, this allows creating a kprobe using perf_event_open syscall that associates file descriptor with the kprobe [1]. Lifetime of that kprobe is equal to the lifetime of the file descriptor and the kprobe is not visible in tracefs: /sys/kernel/debug/tracing/kprobe_events > > That is; I don't think CAP_SYS_PERFMON should be allowed to create > kprobes. > > As might be clear; I don't actually know what the user-ABI is for > creating kprobes. ~Alexey --- [1] https://lore.kernel.org/lkml/20171206224518.3598254-1-songliubraving@fb.com/
Em Sat, Jan 11, 2020 at 12:52:13AM +0900, Masami Hiramatsu escreveu: > On Fri, 10 Jan 2020 15:02:34 +0100 Peter Zijlstra <peterz@infradead.org> wrote: > > Again, this only allows attaching to previously created kprobes, it does > > not allow creating kprobes, right? > > That is; I don't think CAP_SYS_PERFMON should be allowed to create > > kprobes. > > As might be clear; I don't actually know what the user-ABI is for > > creating kprobes. > There are 2 ABIs nowadays, ftrace and ebpf. perf-probe uses ftrace interface to > define new kprobe events, and those events are treated as completely same as > tracepoint events. On the other hand, ebpf tries to define new probe event > via perf_event interface. Above one is that interface. IOW, it creates new kprobe. Masami, any plans to make 'perf probe' use the perf_event_open() interface for creating kprobes/uprobes? - Arnaldo
On 10.01.2020 17:02, Peter Zijlstra wrote: > On Thu, Jan 09, 2020 at 02:36:50PM +0300, Alexey Budankov wrote: >> On 08.01.2020 19:07, Peter Zijlstra wrote: >>> On Wed, Dec 18, 2019 at 12:25:35PM +0300, Alexey Budankov wrote: > >>>> diff --git a/kernel/events/core.c b/kernel/events/core.c >>>> index 059ee7116008..d9db414f2197 100644 >>>> --- a/kernel/events/core.c >>>> +++ b/kernel/events/core.c >>>> @@ -9056,7 +9056,7 @@ static int perf_kprobe_event_init(struct perf_event *event) >>>> if (event->attr.type != perf_kprobe.type) >>>> return -ENOENT; >>>> >>>> - if (!capable(CAP_SYS_ADMIN)) >>>> + if (!perfmon_capable()) >>>> return -EACCES; >>>> >>>> /* >>> >>> This one only allows attaching to already extant kprobes, right? It does >>> not allow creation of kprobes. >> >> This unblocks creation of local trace kprobes and uprobes by CAP_SYS_PERFMON >> privileged process, exactly the same as for CAP_SYS_ADMIN privileged process. > > I've no idea what you just said; it's just words. > > Again, this only allows attaching to previously created kprobes, it does > not allow creating kprobes, right? Not really, this allows creating a kprobe using perf_event_open syscall that associates file descriptor with the kprobe [1]. Lifetime of that kprobe is equal to the lifetime of the file descriptor and the kprobe is not visible in tracefs: /sys/kernel/debug/tracing/kprobe_events > > That is; I don't think CAP_SYS_PERFMON should be allowed to create > kprobes. > > As might be clear; I don't actually know what the user-ABI is for > creating kprobes. > ~Alexey [1] https://lore.kernel.org/lkml/20171206224518.3598254-1-songliubraving@fb.com/
On Fri, 10 Jan 2020 13:45:31 -0300 Arnaldo Carvalho de Melo <acme@kernel.org> wrote: > Em Sat, Jan 11, 2020 at 12:52:13AM +0900, Masami Hiramatsu escreveu: > > On Fri, 10 Jan 2020 15:02:34 +0100 Peter Zijlstra <peterz@infradead.org> wrote: > > > Again, this only allows attaching to previously created kprobes, it does > > > not allow creating kprobes, right? > > > > That is; I don't think CAP_SYS_PERFMON should be allowed to create > > > kprobes. > > > > As might be clear; I don't actually know what the user-ABI is for > > > creating kprobes. > > > There are 2 ABIs nowadays, ftrace and ebpf. perf-probe uses ftrace interface to > > define new kprobe events, and those events are treated as completely same as > > tracepoint events. On the other hand, ebpf tries to define new probe event > > via perf_event interface. Above one is that interface. IOW, it creates new kprobe. > > Masami, any plans to make 'perf probe' use the perf_event_open() > interface for creating kprobes/uprobes? Would you mean perf probe to switch to perf_event_open()? No, perf probe is for setting up the ftrace probe events. I think we can add an option to use perf_event_open(). But current kprobe creation from perf_event_open() is separated from ftrace by design. I think the reason why ebpf uses perf_event_open() interface is to avoid conflict with ftrace users. Those probes are temporally used by ebpf, but if it is appeared on ftrace, it is easy to be used by ftrace. In that case, it can not be removed when the ebpf exits. Thank you,
> On Jan 10, 2020, at 3:47 PM, Masami Hiramatsu <mhiramat@kernel.org> wrote: > > On Fri, 10 Jan 2020 13:45:31 -0300 > Arnaldo Carvalho de Melo <acme@kernel.org> wrote: > >> Em Sat, Jan 11, 2020 at 12:52:13AM +0900, Masami Hiramatsu escreveu: >>> On Fri, 10 Jan 2020 15:02:34 +0100 Peter Zijlstra <peterz@infradead.org> wrote: >>>> Again, this only allows attaching to previously created kprobes, it does >>>> not allow creating kprobes, right? >> >>>> That is; I don't think CAP_SYS_PERFMON should be allowed to create >>>> kprobes. >> >>>> As might be clear; I don't actually know what the user-ABI is for >>>> creating kprobes. >> >>> There are 2 ABIs nowadays, ftrace and ebpf. perf-probe uses ftrace interface to >>> define new kprobe events, and those events are treated as completely same as >>> tracepoint events. On the other hand, ebpf tries to define new probe event >>> via perf_event interface. Above one is that interface. IOW, it creates new kprobe. >> >> Masami, any plans to make 'perf probe' use the perf_event_open() >> interface for creating kprobes/uprobes? > > Would you mean perf probe to switch to perf_event_open()? > No, perf probe is for setting up the ftrace probe events. I think we can add an > option to use perf_event_open(). But current kprobe creation from perf_event_open() > is separated from ftrace by design. I guess we can extend event parser to understand kprobe directly. Instead of perf probe kernel_func perf stat/record -e probe:kernel_func ... We can just do perf stat/record -e kprobe:kernel_func ... Thanks, Song
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 34c7c6910026..f46acd69425f 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1285,7 +1285,7 @@ static inline int perf_is_paranoid(void) static inline int perf_allow_kernel(struct perf_event_attr *attr) { - if (sysctl_perf_event_paranoid > 1 && !capable(CAP_SYS_ADMIN)) + if (sysctl_perf_event_paranoid > 1 && !perfmon_capable()) return -EACCES; return security_perf_event_open(attr, PERF_SECURITY_KERNEL); @@ -1293,7 +1293,7 @@ static inline int perf_allow_kernel(struct perf_event_attr *attr) static inline int perf_allow_cpu(struct perf_event_attr *attr) { - if (sysctl_perf_event_paranoid > 0 && !capable(CAP_SYS_ADMIN)) + if (sysctl_perf_event_paranoid > 0 && !perfmon_capable()) return -EACCES; return security_perf_event_open(attr, PERF_SECURITY_CPU); @@ -1301,7 +1301,7 @@ static inline int perf_allow_cpu(struct perf_event_attr *attr) static inline int perf_allow_tracepoint(struct perf_event_attr *attr) { - if (sysctl_perf_event_paranoid > -1 && !capable(CAP_SYS_ADMIN)) + if (sysctl_perf_event_paranoid > -1 && !perfmon_capable()) return -EPERM; return security_perf_event_open(attr, PERF_SECURITY_TRACEPOINT); diff --git a/kernel/events/core.c b/kernel/events/core.c index 059ee7116008..d9db414f2197 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9056,7 +9056,7 @@ static int perf_kprobe_event_init(struct perf_event *event) if (event->attr.type != perf_kprobe.type) return -ENOENT; - if (!capable(CAP_SYS_ADMIN)) + if (!perfmon_capable()) return -EACCES; /* @@ -9116,7 +9116,7 @@ static int perf_uprobe_event_init(struct perf_event *event) if (event->attr.type != perf_uprobe.type) return -ENOENT; - if (!capable(CAP_SYS_ADMIN)) + if (!perfmon_capable()) return -EACCES; /* @@ -11157,7 +11157,7 @@ SYSCALL_DEFINE5(perf_event_open, } if (attr.namespaces) { - if (!capable(CAP_SYS_ADMIN)) + if (!perfmon_capable()) return -EACCES; }
Open access to perf_events monitoring for CAP_SYS_PERFMON privileged processes. For backward compatibility reasons access to perf_events subsystem remains open for CAP_SYS_ADMIN privileged processes but CAP_SYS_ADMIN usage for secure perf_events monitoring is discouraged with respect to CAP_SYS_PERFMON capability. Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com> --- include/linux/perf_event.h | 6 +++--- kernel/events/core.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-)