Message ID | 1656667620-18718-2-git-send-email-alan.maguire@oracle.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | BPF |
Headers | show |
Series | bpf: add a ksym BPF iterator | expand |
On 7/1/22 2:26 AM, Alan Maguire wrote: > add a "ksym" iterator which provides access to a "struct kallsym_iter" > for each symbol. Intent is to support more flexible symbol parsing > as discussed in [1]. > > [1] https://lore.kernel.org/all/YjRPZj6Z8vuLeEZo@krava/ > > Suggested-by: Alexei Starovoitov <alexei.starovoitov@gmail.com> > Signed-off-by: Alan Maguire <alan.maguire@oracle.com> > --- > kernel/kallsyms.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 89 insertions(+) > > diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c > index fbdf8d3..8b662da 100644 > --- a/kernel/kallsyms.c > +++ b/kernel/kallsyms.c > @@ -30,6 +30,7 @@ > #include <linux/module.h> > #include <linux/kernel.h> > #include <linux/bsearch.h> > +#include <linux/btf_ids.h> > > /* > * These will be re-linked against their real values > @@ -799,6 +800,91 @@ static int s_show(struct seq_file *m, void *p) > .show = s_show > }; > > +#ifdef CONFIG_BPF_SYSCALL > + > +struct bpf_iter__ksym { > + __bpf_md_ptr(struct bpf_iter_meta *, meta); > + __bpf_md_ptr(struct kallsym_iter *, ksym); > +}; > + > +static int ksym_prog_seq_show(struct seq_file *m, bool in_stop) > +{ > + struct bpf_iter__ksym ctx; > + struct bpf_iter_meta meta; > + struct bpf_prog *prog; > + > + meta.seq = m; > + prog = bpf_iter_get_info(&meta, in_stop); > + if (!prog) > + return 0; > + > + ctx.meta = &meta; > + ctx.ksym = m ? m->private : NULL; > + return bpf_iter_run_prog(prog, &ctx); > +} > + > +static int bpf_iter_ksym_seq_show(struct seq_file *m, void *p) > +{ > + return ksym_prog_seq_show(m, false); > +} > + > +static void bpf_iter_ksym_seq_stop(struct seq_file *m, void *p) > +{ > + if (!p) > + (void) ksym_prog_seq_show(m, true); > + else > + s_stop(m, p); > +} > + > +static const struct seq_operations bpf_iter_ksym_ops = { > + .start = s_start, > + .next = s_next, > + .stop = bpf_iter_ksym_seq_stop, > + .show = bpf_iter_ksym_seq_show, > +}; > + > +static int bpf_iter_ksym_init(void *priv_data, struct bpf_iter_aux_info *aux) > +{ > + struct kallsym_iter *iter = priv_data; > + > + reset_iter(iter, 0); > + > + iter->show_value = true; I think instead of always having show_value = true, we should have iter->show_value = kallsyms_show_value(...); this is consistent with what `cat /proc/kallsyms` is doing, and also consistent with bpf_dump_raw_ok() used when dumping various kernel info in syscall.c. We don't have a file here, so credential can be from the current process with current_cred(). > + > + return 0; > +} > + > +DEFINE_BPF_ITER_FUNC(ksym, struct bpf_iter_meta *meta, struct kallsym_iter *ksym) > + > +static const struct bpf_iter_seq_info ksym_iter_seq_info = { > + .seq_ops = &bpf_iter_ksym_ops, > + .init_seq_private = bpf_iter_ksym_init, > + .fini_seq_private = NULL, > + .seq_priv_size = sizeof(struct kallsym_iter), > +}; > + > +static struct bpf_iter_reg ksym_iter_reg_info = { > + .target = "ksym", > + .ctx_arg_info_size = 1, > + .ctx_arg_info = { > + { offsetof(struct bpf_iter__ksym, ksym), > + PTR_TO_BTF_ID_OR_NULL }, > + }, > + .seq_info = &ksym_iter_seq_info, > +}; > + > +BTF_ID_LIST(btf_ksym_iter_id) > +BTF_ID(struct, kallsym_iter) > + > +static void __init bpf_ksym_iter_register(void) > +{ > + ksym_iter_reg_info.ctx_arg_info[0].btf_id = *btf_ksym_iter_id; > + if (bpf_iter_reg_target(&ksym_iter_reg_info)) > + pr_warn("Warning: could not register bpf ksym iterator\n"); > +} > + > +#endif /* CONFIG_BPF_SYSCALL */ > + > static inline int kallsyms_for_perf(void) > { > #ifdef CONFIG_PERF_EVENTS > @@ -885,6 +971,9 @@ const char *kdb_walk_kallsyms(loff_t *pos) > static int __init kallsyms_init(void) > { > proc_create("kallsyms", 0444, NULL, &kallsyms_proc_ops); > +#if defined(CONFIG_BPF_SYSCALL) > + bpf_ksym_iter_register(); You can inline this function here and if bpf_iter_reg_target(...) failed, just return the error code. > +#endif > return 0; > } > device_initcall(kallsyms_init);
On Fri, Jul 1, 2022 at 10:58 PM Yonghong Song <yhs@fb.com> wrote: > > > > On 7/1/22 2:26 AM, Alan Maguire wrote: > > add a "ksym" iterator which provides access to a "struct kallsym_iter" > > for each symbol. Intent is to support more flexible symbol parsing > > as discussed in [1]. > > > > [1] https://lore.kernel.org/all/YjRPZj6Z8vuLeEZo@krava/ > > > > Suggested-by: Alexei Starovoitov <alexei.starovoitov@gmail.com> > > Signed-off-by: Alan Maguire <alan.maguire@oracle.com> > > --- > > kernel/kallsyms.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ > > 1 file changed, 89 insertions(+) > > > > diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c > > index fbdf8d3..8b662da 100644 > > --- a/kernel/kallsyms.c > > +++ b/kernel/kallsyms.c > > @@ -30,6 +30,7 @@ > > #include <linux/module.h> > > #include <linux/kernel.h> > > #include <linux/bsearch.h> > > +#include <linux/btf_ids.h> > > > > /* > > * These will be re-linked against their real values > > @@ -799,6 +800,91 @@ static int s_show(struct seq_file *m, void *p) > > .show = s_show > > }; > > > > +#ifdef CONFIG_BPF_SYSCALL > > + > > +struct bpf_iter__ksym { > > + __bpf_md_ptr(struct bpf_iter_meta *, meta); > > + __bpf_md_ptr(struct kallsym_iter *, ksym); > > +}; > > + > > +static int ksym_prog_seq_show(struct seq_file *m, bool in_stop) > > +{ > > + struct bpf_iter__ksym ctx; > > + struct bpf_iter_meta meta; > > + struct bpf_prog *prog; > > + > > + meta.seq = m; > > + prog = bpf_iter_get_info(&meta, in_stop); > > + if (!prog) > > + return 0; > > + > > + ctx.meta = &meta; > > + ctx.ksym = m ? m->private : NULL; > > + return bpf_iter_run_prog(prog, &ctx); > > +} > > + > > +static int bpf_iter_ksym_seq_show(struct seq_file *m, void *p) > > +{ > > + return ksym_prog_seq_show(m, false); > > +} > > + > > +static void bpf_iter_ksym_seq_stop(struct seq_file *m, void *p) > > +{ > > + if (!p) > > + (void) ksym_prog_seq_show(m, true); > > + else > > + s_stop(m, p); > > +} > > + > > +static const struct seq_operations bpf_iter_ksym_ops = { > > + .start = s_start, > > + .next = s_next, > > + .stop = bpf_iter_ksym_seq_stop, > > + .show = bpf_iter_ksym_seq_show, > > +}; > > + > > +static int bpf_iter_ksym_init(void *priv_data, struct bpf_iter_aux_info *aux) > > +{ > > + struct kallsym_iter *iter = priv_data; > > + > > + reset_iter(iter, 0); > > + > > + iter->show_value = true; > > I think instead of always having show_value = true, we should have > iter->show_value = kallsyms_show_value(...); > > this is consistent with what `cat /proc/kallsyms` is doing, and > also consistent with bpf_dump_raw_ok() used when dumping various > kernel info in syscall.c. > > We don't have a file here, so credential can be from the current > process with current_cred(). This seems wrong to use current_cred(). show_value is used to not "leak" pointer values to unprivileged user-space, right? In our case BPF iterator is privileged, so there is no need to hide (or mangle, didn't check) values. If it happens that a privileged process loads iter/ksym program and then passes prog FD to unprivileged one to read iterator output, iter/ksym should still get correct symbol values. I think the initial approach with show_value = true is the right one -- give all the information as it is to BPF iterator. > > > + > > + return 0; > > +} > > + > > +DEFINE_BPF_ITER_FUNC(ksym, struct bpf_iter_meta *meta, struct kallsym_iter *ksym) > > + > > +static const struct bpf_iter_seq_info ksym_iter_seq_info = { > > + .seq_ops = &bpf_iter_ksym_ops, > > + .init_seq_private = bpf_iter_ksym_init, > > + .fini_seq_private = NULL, > > + .seq_priv_size = sizeof(struct kallsym_iter), > > +}; > > + > > +static struct bpf_iter_reg ksym_iter_reg_info = { > > + .target = "ksym", > > + .ctx_arg_info_size = 1, > > + .ctx_arg_info = { > > + { offsetof(struct bpf_iter__ksym, ksym), > > + PTR_TO_BTF_ID_OR_NULL }, > > + }, > > + .seq_info = &ksym_iter_seq_info, > > +}; > > + > > +BTF_ID_LIST(btf_ksym_iter_id) > > +BTF_ID(struct, kallsym_iter) > > + > > +static void __init bpf_ksym_iter_register(void) > > +{ > > + ksym_iter_reg_info.ctx_arg_info[0].btf_id = *btf_ksym_iter_id; > > + if (bpf_iter_reg_target(&ksym_iter_reg_info)) > > + pr_warn("Warning: could not register bpf ksym iterator\n"); > > +} > > + > > +#endif /* CONFIG_BPF_SYSCALL */ > > + > > static inline int kallsyms_for_perf(void) > > { > > #ifdef CONFIG_PERF_EVENTS > > @@ -885,6 +971,9 @@ const char *kdb_walk_kallsyms(loff_t *pos) > > static int __init kallsyms_init(void) > > { > > proc_create("kallsyms", 0444, NULL, &kallsyms_proc_ops); > > +#if defined(CONFIG_BPF_SYSCALL) > > + bpf_ksym_iter_register(); > > You can inline this function here and if bpf_iter_reg_target(...) > failed, just return the error code. > > > +#endif > > return 0; > > } > > device_initcall(kallsyms_init);
On Tue, Jul 5, 2022 at 9:44 PM Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote: > > On Fri, Jul 1, 2022 at 10:58 PM Yonghong Song <yhs@fb.com> wrote: > > > > > > > > On 7/1/22 2:26 AM, Alan Maguire wrote: > > > add a "ksym" iterator which provides access to a "struct kallsym_iter" > > > for each symbol. Intent is to support more flexible symbol parsing > > > as discussed in [1]. > > > > > > [1] https://lore.kernel.org/all/YjRPZj6Z8vuLeEZo@krava/ > > > > > > Suggested-by: Alexei Starovoitov <alexei.starovoitov@gmail.com> > > > Signed-off-by: Alan Maguire <alan.maguire@oracle.com> > > > --- > > > kernel/kallsyms.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ > > > 1 file changed, 89 insertions(+) > > > > > > diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c > > > index fbdf8d3..8b662da 100644 > > > --- a/kernel/kallsyms.c > > > +++ b/kernel/kallsyms.c > > > @@ -30,6 +30,7 @@ > > > #include <linux/module.h> > > > #include <linux/kernel.h> > > > #include <linux/bsearch.h> > > > +#include <linux/btf_ids.h> > > > > > > /* > > > * These will be re-linked against their real values > > > @@ -799,6 +800,91 @@ static int s_show(struct seq_file *m, void *p) > > > .show = s_show > > > }; > > > > > > +#ifdef CONFIG_BPF_SYSCALL > > > + > > > +struct bpf_iter__ksym { > > > + __bpf_md_ptr(struct bpf_iter_meta *, meta); > > > + __bpf_md_ptr(struct kallsym_iter *, ksym); > > > +}; > > > + > > > +static int ksym_prog_seq_show(struct seq_file *m, bool in_stop) > > > +{ > > > + struct bpf_iter__ksym ctx; > > > + struct bpf_iter_meta meta; > > > + struct bpf_prog *prog; > > > + > > > + meta.seq = m; > > > + prog = bpf_iter_get_info(&meta, in_stop); > > > + if (!prog) > > > + return 0; > > > + > > > + ctx.meta = &meta; > > > + ctx.ksym = m ? m->private : NULL; > > > + return bpf_iter_run_prog(prog, &ctx); > > > +} > > > + > > > +static int bpf_iter_ksym_seq_show(struct seq_file *m, void *p) > > > +{ > > > + return ksym_prog_seq_show(m, false); > > > +} > > > + > > > +static void bpf_iter_ksym_seq_stop(struct seq_file *m, void *p) > > > +{ > > > + if (!p) > > > + (void) ksym_prog_seq_show(m, true); > > > + else > > > + s_stop(m, p); > > > +} > > > + > > > +static const struct seq_operations bpf_iter_ksym_ops = { > > > + .start = s_start, > > > + .next = s_next, > > > + .stop = bpf_iter_ksym_seq_stop, > > > + .show = bpf_iter_ksym_seq_show, > > > +}; > > > + > > > +static int bpf_iter_ksym_init(void *priv_data, struct bpf_iter_aux_info *aux) > > > +{ > > > + struct kallsym_iter *iter = priv_data; > > > + > > > + reset_iter(iter, 0); > > > + > > > + iter->show_value = true; > > > > I think instead of always having show_value = true, we should have > > iter->show_value = kallsyms_show_value(...); > > > > this is consistent with what `cat /proc/kallsyms` is doing, and > > also consistent with bpf_dump_raw_ok() used when dumping various > > kernel info in syscall.c. > > > > We don't have a file here, so credential can be from the current > > process with current_cred(). > > This seems wrong to use current_cred(). show_value is used to not > "leak" pointer values to unprivileged user-space, right? In our case > BPF iterator is privileged, so there is no need to hide (or mangle, > didn't check) values. > > If it happens that a privileged process loads iter/ksym program and > then passes prog FD to unprivileged one to read iterator output, > iter/ksym should still get correct symbol values. > > I think the initial approach with show_value = true is the right one > -- give all the information as it is to BPF iterator. Ok, I should have looked at the selftest first. Seems like this just passes indicator to iter/ksym program and program can choose to ignore it, if necessary. In which case I retract my comment, sorry :) > > > > > > > + > > > + return 0; > > > +} > > > + > > > +DEFINE_BPF_ITER_FUNC(ksym, struct bpf_iter_meta *meta, struct kallsym_iter *ksym) > > > + > > > +static const struct bpf_iter_seq_info ksym_iter_seq_info = { > > > + .seq_ops = &bpf_iter_ksym_ops, > > > + .init_seq_private = bpf_iter_ksym_init, > > > + .fini_seq_private = NULL, > > > + .seq_priv_size = sizeof(struct kallsym_iter), > > > +}; > > > + > > > +static struct bpf_iter_reg ksym_iter_reg_info = { > > > + .target = "ksym", > > > + .ctx_arg_info_size = 1, > > > + .ctx_arg_info = { > > > + { offsetof(struct bpf_iter__ksym, ksym), > > > + PTR_TO_BTF_ID_OR_NULL }, > > > + }, > > > + .seq_info = &ksym_iter_seq_info, > > > +}; > > > + > > > +BTF_ID_LIST(btf_ksym_iter_id) > > > +BTF_ID(struct, kallsym_iter) > > > + > > > +static void __init bpf_ksym_iter_register(void) > > > +{ > > > + ksym_iter_reg_info.ctx_arg_info[0].btf_id = *btf_ksym_iter_id; > > > + if (bpf_iter_reg_target(&ksym_iter_reg_info)) > > > + pr_warn("Warning: could not register bpf ksym iterator\n"); > > > +} > > > + > > > +#endif /* CONFIG_BPF_SYSCALL */ > > > + > > > static inline int kallsyms_for_perf(void) > > > { > > > #ifdef CONFIG_PERF_EVENTS > > > @@ -885,6 +971,9 @@ const char *kdb_walk_kallsyms(loff_t *pos) > > > static int __init kallsyms_init(void) > > > { > > > proc_create("kallsyms", 0444, NULL, &kallsyms_proc_ops); > > > +#if defined(CONFIG_BPF_SYSCALL) > > > + bpf_ksym_iter_register(); > > > > You can inline this function here and if bpf_iter_reg_target(...) > > failed, just return the error code. > > > > > +#endif > > > return 0; > > > } > > > device_initcall(kallsyms_init);
On Fri, Jul 1, 2022 at 2:28 AM Alan Maguire <alan.maguire@oracle.com> wrote: > > add a "ksym" iterator which provides access to a "struct kallsym_iter" > for each symbol. Intent is to support more flexible symbol parsing > as discussed in [1]. > > [1] https://lore.kernel.org/all/YjRPZj6Z8vuLeEZo@krava/ > > Suggested-by: Alexei Starovoitov <alexei.starovoitov@gmail.com> > Signed-off-by: Alan Maguire <alan.maguire@oracle.com> > --- > kernel/kallsyms.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 89 insertions(+) > > diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c > index fbdf8d3..8b662da 100644 > --- a/kernel/kallsyms.c > +++ b/kernel/kallsyms.c > @@ -30,6 +30,7 @@ [...] > + > +static struct bpf_iter_reg ksym_iter_reg_info = { > + .target = "ksym", > + .ctx_arg_info_size = 1, > + .ctx_arg_info = { > + { offsetof(struct bpf_iter__ksym, ksym), > + PTR_TO_BTF_ID_OR_NULL }, > + }, > + .seq_info = &ksym_iter_seq_info, > +}; It would be great to allow cond_resched() while iterating. Disabling resched is unnecessary for iterating ksyms IMO. .feature = BPF_ITER_RESCHED, Hao > + [...] > -- > 1.8.3.1 >
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index fbdf8d3..8b662da 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -30,6 +30,7 @@ #include <linux/module.h> #include <linux/kernel.h> #include <linux/bsearch.h> +#include <linux/btf_ids.h> /* * These will be re-linked against their real values @@ -799,6 +800,91 @@ static int s_show(struct seq_file *m, void *p) .show = s_show }; +#ifdef CONFIG_BPF_SYSCALL + +struct bpf_iter__ksym { + __bpf_md_ptr(struct bpf_iter_meta *, meta); + __bpf_md_ptr(struct kallsym_iter *, ksym); +}; + +static int ksym_prog_seq_show(struct seq_file *m, bool in_stop) +{ + struct bpf_iter__ksym ctx; + struct bpf_iter_meta meta; + struct bpf_prog *prog; + + meta.seq = m; + prog = bpf_iter_get_info(&meta, in_stop); + if (!prog) + return 0; + + ctx.meta = &meta; + ctx.ksym = m ? m->private : NULL; + return bpf_iter_run_prog(prog, &ctx); +} + +static int bpf_iter_ksym_seq_show(struct seq_file *m, void *p) +{ + return ksym_prog_seq_show(m, false); +} + +static void bpf_iter_ksym_seq_stop(struct seq_file *m, void *p) +{ + if (!p) + (void) ksym_prog_seq_show(m, true); + else + s_stop(m, p); +} + +static const struct seq_operations bpf_iter_ksym_ops = { + .start = s_start, + .next = s_next, + .stop = bpf_iter_ksym_seq_stop, + .show = bpf_iter_ksym_seq_show, +}; + +static int bpf_iter_ksym_init(void *priv_data, struct bpf_iter_aux_info *aux) +{ + struct kallsym_iter *iter = priv_data; + + reset_iter(iter, 0); + + iter->show_value = true; + + return 0; +} + +DEFINE_BPF_ITER_FUNC(ksym, struct bpf_iter_meta *meta, struct kallsym_iter *ksym) + +static const struct bpf_iter_seq_info ksym_iter_seq_info = { + .seq_ops = &bpf_iter_ksym_ops, + .init_seq_private = bpf_iter_ksym_init, + .fini_seq_private = NULL, + .seq_priv_size = sizeof(struct kallsym_iter), +}; + +static struct bpf_iter_reg ksym_iter_reg_info = { + .target = "ksym", + .ctx_arg_info_size = 1, + .ctx_arg_info = { + { offsetof(struct bpf_iter__ksym, ksym), + PTR_TO_BTF_ID_OR_NULL }, + }, + .seq_info = &ksym_iter_seq_info, +}; + +BTF_ID_LIST(btf_ksym_iter_id) +BTF_ID(struct, kallsym_iter) + +static void __init bpf_ksym_iter_register(void) +{ + ksym_iter_reg_info.ctx_arg_info[0].btf_id = *btf_ksym_iter_id; + if (bpf_iter_reg_target(&ksym_iter_reg_info)) + pr_warn("Warning: could not register bpf ksym iterator\n"); +} + +#endif /* CONFIG_BPF_SYSCALL */ + static inline int kallsyms_for_perf(void) { #ifdef CONFIG_PERF_EVENTS @@ -885,6 +971,9 @@ const char *kdb_walk_kallsyms(loff_t *pos) static int __init kallsyms_init(void) { proc_create("kallsyms", 0444, NULL, &kallsyms_proc_ops); +#if defined(CONFIG_BPF_SYSCALL) + bpf_ksym_iter_register(); +#endif return 0; } device_initcall(kallsyms_init);
add a "ksym" iterator which provides access to a "struct kallsym_iter" for each symbol. Intent is to support more flexible symbol parsing as discussed in [1]. [1] https://lore.kernel.org/all/YjRPZj6Z8vuLeEZo@krava/ Suggested-by: Alexei Starovoitov <alexei.starovoitov@gmail.com> Signed-off-by: Alan Maguire <alan.maguire@oracle.com> --- kernel/kallsyms.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+)