Message ID | 20230622095330.1023453-2-aspsk@isovalent.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | BPF |
Headers | show |
Series | bpf: add percpu stats for bpf_map | expand |
On Thu, Jun 22, 2023 at 09:53:27AM +0000, Anton Protopopov wrote: > Add a generic percpu stats for bpf_map elements insertions/deletions in order > to keep track of both, the current (approximate) number of elements in a map > and per-cpu statistics on update/delete operations. > > To expose these stats a particular map implementation should initialize the > counter and adjust it as needed using the 'bpf_map_*_elements_counter' helpers > provided by this commit. The counter can be read by an iterator program. > > A bpf_map_sum_elements_counter kfunc was added to simplify getting the sum of > the per-cpu values. If a map doesn't implement the counter, then it will always > return 0. > > Signed-off-by: Anton Protopopov <aspsk@isovalent.com> > --- > include/linux/bpf.h | 30 +++++++++++++++++++++++++++ > kernel/bpf/map_iter.c | 48 ++++++++++++++++++++++++++++++++++++++++++- > 2 files changed, 77 insertions(+), 1 deletion(-) > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index f58895830ada..20292a096188 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -275,6 +275,7 @@ struct bpf_map { > } owner; > bool bypass_spec_v1; > bool frozen; /* write-once; write-protected by freeze_mutex */ > + s64 __percpu *elements_count; > }; > > static inline const char *btf_field_type_name(enum btf_field_type type) > @@ -2040,6 +2041,35 @@ bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, size_t align, > } > #endif > > +static inline int > +bpf_map_init_elements_counter(struct bpf_map *map) > +{ > + size_t size = sizeof(*map->elements_count), align = size; > + gfp_t flags = GFP_USER | __GFP_NOWARN; > + > + map->elements_count = bpf_map_alloc_percpu(map, size, align, flags); > + if (!map->elements_count) > + return -ENOMEM; > + > + return 0; > +} > + > +static inline void > +bpf_map_free_elements_counter(struct bpf_map *map) > +{ > + free_percpu(map->elements_count); > +} > + > +static inline void bpf_map_inc_elements_counter(struct bpf_map *map) bpf_map_inc_elem_count() to match existing inc_elem_count() ? > +{ > + this_cpu_inc(*map->elements_count); > +} > + > +static inline void bpf_map_dec_elements_counter(struct bpf_map *map) > +{ > + this_cpu_dec(*map->elements_count); > +} > + > extern int sysctl_unprivileged_bpf_disabled; > > static inline bool bpf_allow_ptr_leaks(void) > diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c > index b0fa190b0979..26ca00dde962 100644 > --- a/kernel/bpf/map_iter.c > +++ b/kernel/bpf/map_iter.c > @@ -93,7 +93,7 @@ static struct bpf_iter_reg bpf_map_reg_info = { > .ctx_arg_info_size = 1, > .ctx_arg_info = { > { offsetof(struct bpf_iter__bpf_map, map), > - PTR_TO_BTF_ID_OR_NULL }, > + PTR_TO_BTF_ID_OR_NULL | PTR_TRUSTED }, this and below should be in separate patch. > }, > .seq_info = &bpf_map_seq_info, > }; > @@ -193,3 +193,49 @@ static int __init bpf_map_iter_init(void) > } > > late_initcall(bpf_map_iter_init); > + > +__diag_push(); > +__diag_ignore_all("-Wmissing-prototypes", > + "Global functions as their definitions will be in vmlinux BTF"); > + > +__bpf_kfunc s64 bpf_map_sum_elements_counter(struct bpf_map *map) > +{ > + s64 *pcount; > + s64 ret = 0; > + int cpu; > + > + if (!map || !map->elements_count) > + return 0; > + > + for_each_possible_cpu(cpu) { > + pcount = per_cpu_ptr(map->elements_count, cpu); > + ret += READ_ONCE(*pcount); > + } > + return ret; > +} > + > +__diag_pop(); > + > +BTF_SET8_START(bpf_map_iter_kfunc_ids) > +BTF_ID_FLAGS(func, bpf_map_sum_elements_counter, KF_TRUSTED_ARGS) > +BTF_SET8_END(bpf_map_iter_kfunc_ids) > + > +static int tracing_iter_filter(const struct bpf_prog *prog, u32 kfunc_id) > +{ > + if (btf_id_set8_contains(&bpf_map_iter_kfunc_ids, kfunc_id) && > + prog->expected_attach_type != BPF_TRACE_ITER) why restrict to trace_iter? > + return -EACCES; > + return 0; > +} > + > +static const struct btf_kfunc_id_set bpf_map_iter_kfunc_set = { > + .owner = THIS_MODULE, > + .set = &bpf_map_iter_kfunc_ids, > + .filter = tracing_iter_filter, > +}; > + > +static int init_subsystem(void) > +{ > + return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_map_iter_kfunc_set); > +} > +late_initcall(init_subsystem); > -- > 2.34.1 >
On 6/22/23 11:53 AM, Anton Protopopov wrote: > Add a generic percpu stats for bpf_map elements insertions/deletions in order > to keep track of both, the current (approximate) number of elements in a map > and per-cpu statistics on update/delete operations. > > To expose these stats a particular map implementation should initialize the > counter and adjust it as needed using the 'bpf_map_*_elements_counter' helpers > provided by this commit. The counter can be read by an iterator program. > > A bpf_map_sum_elements_counter kfunc was added to simplify getting the sum of > the per-cpu values. If a map doesn't implement the counter, then it will always > return 0. > > Signed-off-by: Anton Protopopov <aspsk@isovalent.com> > --- > include/linux/bpf.h | 30 +++++++++++++++++++++++++++ > kernel/bpf/map_iter.c | 48 ++++++++++++++++++++++++++++++++++++++++++- > 2 files changed, 77 insertions(+), 1 deletion(-) > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index f58895830ada..20292a096188 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -275,6 +275,7 @@ struct bpf_map { > } owner; > bool bypass_spec_v1; > bool frozen; /* write-once; write-protected by freeze_mutex */ > + s64 __percpu *elements_count; To avoid corruption on 32 bit archs, should we convert this into local64_t here?
On Fri, Jun 23, 2023 at 12:51:21PM +0200, Daniel Borkmann wrote: > On 6/22/23 11:53 AM, Anton Protopopov wrote: > > Add a generic percpu stats for bpf_map elements insertions/deletions in order > > to keep track of both, the current (approximate) number of elements in a map > > and per-cpu statistics on update/delete operations. > > > > To expose these stats a particular map implementation should initialize the > > counter and adjust it as needed using the 'bpf_map_*_elements_counter' helpers > > provided by this commit. The counter can be read by an iterator program. > > > > A bpf_map_sum_elements_counter kfunc was added to simplify getting the sum of > > the per-cpu values. If a map doesn't implement the counter, then it will always > > return 0. > > > > Signed-off-by: Anton Protopopov <aspsk@isovalent.com> > > --- > > include/linux/bpf.h | 30 +++++++++++++++++++++++++++ > > kernel/bpf/map_iter.c | 48 ++++++++++++++++++++++++++++++++++++++++++- > > 2 files changed, 77 insertions(+), 1 deletion(-) > > > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > > index f58895830ada..20292a096188 100644 > > --- a/include/linux/bpf.h > > +++ b/include/linux/bpf.h > > @@ -275,6 +275,7 @@ struct bpf_map { > > } owner; > > bool bypass_spec_v1; > > bool frozen; /* write-once; write-protected by freeze_mutex */ > > + s64 __percpu *elements_count; > > To avoid corruption on 32 bit archs, should we convert this into local64_t here? Looks like using this_cpu_inc we can do it lockless on archs which support it (AFAICS this is x86_64, arm64, s390, and loongarch). Otherwise we can use atomic64_t (local64_t will switch to atomic64_t in any case for such systems).
On Thu, Jun 22, 2023 at 01:11:58PM -0700, Alexei Starovoitov wrote: > On Thu, Jun 22, 2023 at 09:53:27AM +0000, Anton Protopopov wrote: > > Add a generic percpu stats for bpf_map elements insertions/deletions in order > > to keep track of both, the current (approximate) number of elements in a map > > and per-cpu statistics on update/delete operations. > > > > To expose these stats a particular map implementation should initialize the > > counter and adjust it as needed using the 'bpf_map_*_elements_counter' helpers > > provided by this commit. The counter can be read by an iterator program. > > > > A bpf_map_sum_elements_counter kfunc was added to simplify getting the sum of > > the per-cpu values. If a map doesn't implement the counter, then it will always > > return 0. > > > > Signed-off-by: Anton Protopopov <aspsk@isovalent.com> > > --- > > include/linux/bpf.h | 30 +++++++++++++++++++++++++++ > > kernel/bpf/map_iter.c | 48 ++++++++++++++++++++++++++++++++++++++++++- > > 2 files changed, 77 insertions(+), 1 deletion(-) > > > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > > index f58895830ada..20292a096188 100644 > > --- a/include/linux/bpf.h > > +++ b/include/linux/bpf.h > > @@ -275,6 +275,7 @@ struct bpf_map { > > } owner; > > bool bypass_spec_v1; > > bool frozen; /* write-once; write-protected by freeze_mutex */ > > + s64 __percpu *elements_count; > > }; > > > > static inline const char *btf_field_type_name(enum btf_field_type type) > > @@ -2040,6 +2041,35 @@ bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, size_t align, > > } > > #endif > > > > +static inline int > > +bpf_map_init_elements_counter(struct bpf_map *map) > > +{ > > + size_t size = sizeof(*map->elements_count), align = size; > > + gfp_t flags = GFP_USER | __GFP_NOWARN; > > + > > + map->elements_count = bpf_map_alloc_percpu(map, size, align, flags); > > + if (!map->elements_count) > > + return -ENOMEM; > > + > > + return 0; > > +} > > + > > +static inline void > > +bpf_map_free_elements_counter(struct bpf_map *map) > > +{ > > + free_percpu(map->elements_count); > > +} > > + > > +static inline void bpf_map_inc_elements_counter(struct bpf_map *map) > > bpf_map_inc_elem_count() to match existing inc_elem_count() ? > > > +{ > > + this_cpu_inc(*map->elements_count); > > +} > > + > > +static inline void bpf_map_dec_elements_counter(struct bpf_map *map) > > +{ > > + this_cpu_dec(*map->elements_count); > > +} > > + > > extern int sysctl_unprivileged_bpf_disabled; > > > > static inline bool bpf_allow_ptr_leaks(void) > > diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c > > index b0fa190b0979..26ca00dde962 100644 > > --- a/kernel/bpf/map_iter.c > > +++ b/kernel/bpf/map_iter.c > > @@ -93,7 +93,7 @@ static struct bpf_iter_reg bpf_map_reg_info = { > > .ctx_arg_info_size = 1, > > .ctx_arg_info = { > > { offsetof(struct bpf_iter__bpf_map, map), > > - PTR_TO_BTF_ID_OR_NULL }, > > + PTR_TO_BTF_ID_OR_NULL | PTR_TRUSTED }, > > this and below should be in separate patch. > > > }, > > .seq_info = &bpf_map_seq_info, > > }; > > @@ -193,3 +193,49 @@ static int __init bpf_map_iter_init(void) > > } > > > > late_initcall(bpf_map_iter_init); > > + > > +__diag_push(); > > +__diag_ignore_all("-Wmissing-prototypes", > > + "Global functions as their definitions will be in vmlinux BTF"); > > + > > +__bpf_kfunc s64 bpf_map_sum_elements_counter(struct bpf_map *map) > > +{ > > + s64 *pcount; > > + s64 ret = 0; > > + int cpu; > > + > > + if (!map || !map->elements_count) > > + return 0; > > + > > + for_each_possible_cpu(cpu) { > > + pcount = per_cpu_ptr(map->elements_count, cpu); > > + ret += READ_ONCE(*pcount); > > + } > > + return ret; > > +} > > + > > +__diag_pop(); > > + > > +BTF_SET8_START(bpf_map_iter_kfunc_ids) > > +BTF_ID_FLAGS(func, bpf_map_sum_elements_counter, KF_TRUSTED_ARGS) > > +BTF_SET8_END(bpf_map_iter_kfunc_ids) > > + > > +static int tracing_iter_filter(const struct bpf_prog *prog, u32 kfunc_id) > > +{ > > + if (btf_id_set8_contains(&bpf_map_iter_kfunc_ids, kfunc_id) && > > + prog->expected_attach_type != BPF_TRACE_ITER) > > why restrict to trace_iter? Thanks, I will remove it. All your other comments in this series make sense as well, will address them. > > + return -EACCES; > > + return 0; > > +} > > + > > +static const struct btf_kfunc_id_set bpf_map_iter_kfunc_set = { > > + .owner = THIS_MODULE, > > + .set = &bpf_map_iter_kfunc_ids, > > + .filter = tracing_iter_filter, > > +}; > > + > > +static int init_subsystem(void) > > +{ > > + return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_map_iter_kfunc_set); > > +} > > +late_initcall(init_subsystem); > > -- > > 2.34.1 > >
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f58895830ada..20292a096188 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -275,6 +275,7 @@ struct bpf_map { } owner; bool bypass_spec_v1; bool frozen; /* write-once; write-protected by freeze_mutex */ + s64 __percpu *elements_count; }; static inline const char *btf_field_type_name(enum btf_field_type type) @@ -2040,6 +2041,35 @@ bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, size_t align, } #endif +static inline int +bpf_map_init_elements_counter(struct bpf_map *map) +{ + size_t size = sizeof(*map->elements_count), align = size; + gfp_t flags = GFP_USER | __GFP_NOWARN; + + map->elements_count = bpf_map_alloc_percpu(map, size, align, flags); + if (!map->elements_count) + return -ENOMEM; + + return 0; +} + +static inline void +bpf_map_free_elements_counter(struct bpf_map *map) +{ + free_percpu(map->elements_count); +} + +static inline void bpf_map_inc_elements_counter(struct bpf_map *map) +{ + this_cpu_inc(*map->elements_count); +} + +static inline void bpf_map_dec_elements_counter(struct bpf_map *map) +{ + this_cpu_dec(*map->elements_count); +} + extern int sysctl_unprivileged_bpf_disabled; static inline bool bpf_allow_ptr_leaks(void) diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c index b0fa190b0979..26ca00dde962 100644 --- a/kernel/bpf/map_iter.c +++ b/kernel/bpf/map_iter.c @@ -93,7 +93,7 @@ static struct bpf_iter_reg bpf_map_reg_info = { .ctx_arg_info_size = 1, .ctx_arg_info = { { offsetof(struct bpf_iter__bpf_map, map), - PTR_TO_BTF_ID_OR_NULL }, + PTR_TO_BTF_ID_OR_NULL | PTR_TRUSTED }, }, .seq_info = &bpf_map_seq_info, }; @@ -193,3 +193,49 @@ static int __init bpf_map_iter_init(void) } late_initcall(bpf_map_iter_init); + +__diag_push(); +__diag_ignore_all("-Wmissing-prototypes", + "Global functions as their definitions will be in vmlinux BTF"); + +__bpf_kfunc s64 bpf_map_sum_elements_counter(struct bpf_map *map) +{ + s64 *pcount; + s64 ret = 0; + int cpu; + + if (!map || !map->elements_count) + return 0; + + for_each_possible_cpu(cpu) { + pcount = per_cpu_ptr(map->elements_count, cpu); + ret += READ_ONCE(*pcount); + } + return ret; +} + +__diag_pop(); + +BTF_SET8_START(bpf_map_iter_kfunc_ids) +BTF_ID_FLAGS(func, bpf_map_sum_elements_counter, KF_TRUSTED_ARGS) +BTF_SET8_END(bpf_map_iter_kfunc_ids) + +static int tracing_iter_filter(const struct bpf_prog *prog, u32 kfunc_id) +{ + if (btf_id_set8_contains(&bpf_map_iter_kfunc_ids, kfunc_id) && + prog->expected_attach_type != BPF_TRACE_ITER) + return -EACCES; + return 0; +} + +static const struct btf_kfunc_id_set bpf_map_iter_kfunc_set = { + .owner = THIS_MODULE, + .set = &bpf_map_iter_kfunc_ids, + .filter = tracing_iter_filter, +}; + +static int init_subsystem(void) +{ + return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_map_iter_kfunc_set); +} +late_initcall(init_subsystem);
Add a generic percpu stats for bpf_map elements insertions/deletions in order to keep track of both, the current (approximate) number of elements in a map and per-cpu statistics on update/delete operations. To expose these stats a particular map implementation should initialize the counter and adjust it as needed using the 'bpf_map_*_elements_counter' helpers provided by this commit. The counter can be read by an iterator program. A bpf_map_sum_elements_counter kfunc was added to simplify getting the sum of the per-cpu values. If a map doesn't implement the counter, then it will always return 0. Signed-off-by: Anton Protopopov <aspsk@isovalent.com> --- include/linux/bpf.h | 30 +++++++++++++++++++++++++++ kernel/bpf/map_iter.c | 48 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 77 insertions(+), 1 deletion(-)