diff mbox series

[RFC,v2,bpf-next,1/4] bpf: add percpu stats for bpf_map elements insertions/deletions

Message ID 20230622095330.1023453-2-aspsk@isovalent.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf: add percpu stats for bpf_map | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR fail PR summary
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ${{ matrix.test }} on ${{ matrix.arch }} with ${{ matrix.toolchain_full }}
bpf/vmtest-bpf-next-VM_Test-2 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-4 fail Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-5 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-7 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-8 success Logs for veristat
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 1448 this patch: 1449
netdev/cc_maintainers success CCed 12 of 12 maintainers
netdev/build_clang success Errors and warnings before: 176 this patch: 176
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 1443 this patch: 1444
netdev/checkpatch warning WARNING: line length of 89 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Anton Protopopov June 22, 2023, 9:53 a.m. UTC
Add a generic percpu stats for bpf_map elements insertions/deletions in order
to keep track of both, the current (approximate) number of elements in a map
and per-cpu statistics on update/delete operations.

To expose these stats a particular map implementation should initialize the
counter and adjust it as needed using the 'bpf_map_*_elements_counter' helpers
provided by this commit. The counter can be read by an iterator program.

A bpf_map_sum_elements_counter kfunc was added to simplify getting the sum of
the per-cpu values. If a map doesn't implement the counter, then it will always
return 0.

Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
---
 include/linux/bpf.h   | 30 +++++++++++++++++++++++++++
 kernel/bpf/map_iter.c | 48 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 77 insertions(+), 1 deletion(-)

Comments

Alexei Starovoitov June 22, 2023, 8:11 p.m. UTC | #1
On Thu, Jun 22, 2023 at 09:53:27AM +0000, Anton Protopopov wrote:
> Add a generic percpu stats for bpf_map elements insertions/deletions in order
> to keep track of both, the current (approximate) number of elements in a map
> and per-cpu statistics on update/delete operations.
> 
> To expose these stats a particular map implementation should initialize the
> counter and adjust it as needed using the 'bpf_map_*_elements_counter' helpers
> provided by this commit. The counter can be read by an iterator program.
> 
> A bpf_map_sum_elements_counter kfunc was added to simplify getting the sum of
> the per-cpu values. If a map doesn't implement the counter, then it will always
> return 0.
> 
> Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
> ---
>  include/linux/bpf.h   | 30 +++++++++++++++++++++++++++
>  kernel/bpf/map_iter.c | 48 ++++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 77 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index f58895830ada..20292a096188 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -275,6 +275,7 @@ struct bpf_map {
>  	} owner;
>  	bool bypass_spec_v1;
>  	bool frozen; /* write-once; write-protected by freeze_mutex */
> +	s64 __percpu *elements_count;
>  };
>  
>  static inline const char *btf_field_type_name(enum btf_field_type type)
> @@ -2040,6 +2041,35 @@ bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, size_t align,
>  }
>  #endif
>  
> +static inline int
> +bpf_map_init_elements_counter(struct bpf_map *map)
> +{
> +	size_t size = sizeof(*map->elements_count), align = size;
> +	gfp_t flags = GFP_USER | __GFP_NOWARN;
> +
> +	map->elements_count = bpf_map_alloc_percpu(map, size, align, flags);
> +	if (!map->elements_count)
> +		return -ENOMEM;
> +
> +	return 0;
> +}
> +
> +static inline void
> +bpf_map_free_elements_counter(struct bpf_map *map)
> +{
> +	free_percpu(map->elements_count);
> +}
> +
> +static inline void bpf_map_inc_elements_counter(struct bpf_map *map)

bpf_map_inc_elem_count() to match existing inc_elem_count() ?

> +{
> +	this_cpu_inc(*map->elements_count);
> +}
> +
> +static inline void bpf_map_dec_elements_counter(struct bpf_map *map)
> +{
> +	this_cpu_dec(*map->elements_count);
> +}
> +
>  extern int sysctl_unprivileged_bpf_disabled;
>  
>  static inline bool bpf_allow_ptr_leaks(void)
> diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c
> index b0fa190b0979..26ca00dde962 100644
> --- a/kernel/bpf/map_iter.c
> +++ b/kernel/bpf/map_iter.c
> @@ -93,7 +93,7 @@ static struct bpf_iter_reg bpf_map_reg_info = {
>  	.ctx_arg_info_size	= 1,
>  	.ctx_arg_info		= {
>  		{ offsetof(struct bpf_iter__bpf_map, map),
> -		  PTR_TO_BTF_ID_OR_NULL },
> +		  PTR_TO_BTF_ID_OR_NULL | PTR_TRUSTED },

this and below should be in separate patch.

>  	},
>  	.seq_info		= &bpf_map_seq_info,
>  };
> @@ -193,3 +193,49 @@ static int __init bpf_map_iter_init(void)
>  }
>  
>  late_initcall(bpf_map_iter_init);
> +
> +__diag_push();
> +__diag_ignore_all("-Wmissing-prototypes",
> +		  "Global functions as their definitions will be in vmlinux BTF");
> +
> +__bpf_kfunc s64 bpf_map_sum_elements_counter(struct bpf_map *map)
> +{
> +	s64 *pcount;
> +	s64 ret = 0;
> +	int cpu;
> +
> +	if (!map || !map->elements_count)
> +		return 0;
> +
> +	for_each_possible_cpu(cpu) {
> +		pcount = per_cpu_ptr(map->elements_count, cpu);
> +		ret += READ_ONCE(*pcount);
> +	}
> +	return ret;
> +}
> +
> +__diag_pop();
> +
> +BTF_SET8_START(bpf_map_iter_kfunc_ids)
> +BTF_ID_FLAGS(func, bpf_map_sum_elements_counter, KF_TRUSTED_ARGS)
> +BTF_SET8_END(bpf_map_iter_kfunc_ids)
> +
> +static int tracing_iter_filter(const struct bpf_prog *prog, u32 kfunc_id)
> +{
> +	if (btf_id_set8_contains(&bpf_map_iter_kfunc_ids, kfunc_id) &&
> +	    prog->expected_attach_type != BPF_TRACE_ITER)

why restrict to trace_iter?

> +		return -EACCES;
> +	return 0;
> +}
> +
> +static const struct btf_kfunc_id_set bpf_map_iter_kfunc_set = {
> +	.owner = THIS_MODULE,
> +	.set   = &bpf_map_iter_kfunc_ids,
> +	.filter = tracing_iter_filter,
> +};
> +
> +static int init_subsystem(void)
> +{
> +	return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_map_iter_kfunc_set);
> +}
> +late_initcall(init_subsystem);
> -- 
> 2.34.1
>
Daniel Borkmann June 23, 2023, 10:51 a.m. UTC | #2
On 6/22/23 11:53 AM, Anton Protopopov wrote:
> Add a generic percpu stats for bpf_map elements insertions/deletions in order
> to keep track of both, the current (approximate) number of elements in a map
> and per-cpu statistics on update/delete operations.
> 
> To expose these stats a particular map implementation should initialize the
> counter and adjust it as needed using the 'bpf_map_*_elements_counter' helpers
> provided by this commit. The counter can be read by an iterator program.
> 
> A bpf_map_sum_elements_counter kfunc was added to simplify getting the sum of
> the per-cpu values. If a map doesn't implement the counter, then it will always
> return 0.
> 
> Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
> ---
>   include/linux/bpf.h   | 30 +++++++++++++++++++++++++++
>   kernel/bpf/map_iter.c | 48 ++++++++++++++++++++++++++++++++++++++++++-
>   2 files changed, 77 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index f58895830ada..20292a096188 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -275,6 +275,7 @@ struct bpf_map {
>   	} owner;
>   	bool bypass_spec_v1;
>   	bool frozen; /* write-once; write-protected by freeze_mutex */
> +	s64 __percpu *elements_count;

To avoid corruption on 32 bit archs, should we convert this into local64_t here?
Anton Protopopov June 23, 2023, 12:35 p.m. UTC | #3
On Fri, Jun 23, 2023 at 12:51:21PM +0200, Daniel Borkmann wrote:
> On 6/22/23 11:53 AM, Anton Protopopov wrote:
> > Add a generic percpu stats for bpf_map elements insertions/deletions in order
> > to keep track of both, the current (approximate) number of elements in a map
> > and per-cpu statistics on update/delete operations.
> > 
> > To expose these stats a particular map implementation should initialize the
> > counter and adjust it as needed using the 'bpf_map_*_elements_counter' helpers
> > provided by this commit. The counter can be read by an iterator program.
> > 
> > A bpf_map_sum_elements_counter kfunc was added to simplify getting the sum of
> > the per-cpu values. If a map doesn't implement the counter, then it will always
> > return 0.
> > 
> > Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
> > ---
> >   include/linux/bpf.h   | 30 +++++++++++++++++++++++++++
> >   kernel/bpf/map_iter.c | 48 ++++++++++++++++++++++++++++++++++++++++++-
> >   2 files changed, 77 insertions(+), 1 deletion(-)
> > 
> > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > index f58895830ada..20292a096188 100644
> > --- a/include/linux/bpf.h
> > +++ b/include/linux/bpf.h
> > @@ -275,6 +275,7 @@ struct bpf_map {
> >   	} owner;
> >   	bool bypass_spec_v1;
> >   	bool frozen; /* write-once; write-protected by freeze_mutex */
> > +	s64 __percpu *elements_count;
> 
> To avoid corruption on 32 bit archs, should we convert this into local64_t here?

Looks like using this_cpu_inc we can do it lockless on archs which support it
(AFAICS this is x86_64, arm64, s390, and loongarch). Otherwise we can use
atomic64_t (local64_t will switch to atomic64_t in any case for such systems).
Anton Protopopov June 23, 2023, 12:47 p.m. UTC | #4
On Thu, Jun 22, 2023 at 01:11:58PM -0700, Alexei Starovoitov wrote:
> On Thu, Jun 22, 2023 at 09:53:27AM +0000, Anton Protopopov wrote:
> > Add a generic percpu stats for bpf_map elements insertions/deletions in order
> > to keep track of both, the current (approximate) number of elements in a map
> > and per-cpu statistics on update/delete operations.
> > 
> > To expose these stats a particular map implementation should initialize the
> > counter and adjust it as needed using the 'bpf_map_*_elements_counter' helpers
> > provided by this commit. The counter can be read by an iterator program.
> > 
> > A bpf_map_sum_elements_counter kfunc was added to simplify getting the sum of
> > the per-cpu values. If a map doesn't implement the counter, then it will always
> > return 0.
> > 
> > Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
> > ---
> >  include/linux/bpf.h   | 30 +++++++++++++++++++++++++++
> >  kernel/bpf/map_iter.c | 48 ++++++++++++++++++++++++++++++++++++++++++-
> >  2 files changed, 77 insertions(+), 1 deletion(-)
> > 
> > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > index f58895830ada..20292a096188 100644
> > --- a/include/linux/bpf.h
> > +++ b/include/linux/bpf.h
> > @@ -275,6 +275,7 @@ struct bpf_map {
> >  	} owner;
> >  	bool bypass_spec_v1;
> >  	bool frozen; /* write-once; write-protected by freeze_mutex */
> > +	s64 __percpu *elements_count;
> >  };
> >  
> >  static inline const char *btf_field_type_name(enum btf_field_type type)
> > @@ -2040,6 +2041,35 @@ bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, size_t align,
> >  }
> >  #endif
> >  
> > +static inline int
> > +bpf_map_init_elements_counter(struct bpf_map *map)
> > +{
> > +	size_t size = sizeof(*map->elements_count), align = size;
> > +	gfp_t flags = GFP_USER | __GFP_NOWARN;
> > +
> > +	map->elements_count = bpf_map_alloc_percpu(map, size, align, flags);
> > +	if (!map->elements_count)
> > +		return -ENOMEM;
> > +
> > +	return 0;
> > +}
> > +
> > +static inline void
> > +bpf_map_free_elements_counter(struct bpf_map *map)
> > +{
> > +	free_percpu(map->elements_count);
> > +}
> > +
> > +static inline void bpf_map_inc_elements_counter(struct bpf_map *map)
> 
> bpf_map_inc_elem_count() to match existing inc_elem_count() ?
> 
> > +{
> > +	this_cpu_inc(*map->elements_count);
> > +}
> > +
> > +static inline void bpf_map_dec_elements_counter(struct bpf_map *map)
> > +{
> > +	this_cpu_dec(*map->elements_count);
> > +}
> > +
> >  extern int sysctl_unprivileged_bpf_disabled;
> >  
> >  static inline bool bpf_allow_ptr_leaks(void)
> > diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c
> > index b0fa190b0979..26ca00dde962 100644
> > --- a/kernel/bpf/map_iter.c
> > +++ b/kernel/bpf/map_iter.c
> > @@ -93,7 +93,7 @@ static struct bpf_iter_reg bpf_map_reg_info = {
> >  	.ctx_arg_info_size	= 1,
> >  	.ctx_arg_info		= {
> >  		{ offsetof(struct bpf_iter__bpf_map, map),
> > -		  PTR_TO_BTF_ID_OR_NULL },
> > +		  PTR_TO_BTF_ID_OR_NULL | PTR_TRUSTED },
> 
> this and below should be in separate patch.
> 
> >  	},
> >  	.seq_info		= &bpf_map_seq_info,
> >  };
> > @@ -193,3 +193,49 @@ static int __init bpf_map_iter_init(void)
> >  }
> >  
> >  late_initcall(bpf_map_iter_init);
> > +
> > +__diag_push();
> > +__diag_ignore_all("-Wmissing-prototypes",
> > +		  "Global functions as their definitions will be in vmlinux BTF");
> > +
> > +__bpf_kfunc s64 bpf_map_sum_elements_counter(struct bpf_map *map)
> > +{
> > +	s64 *pcount;
> > +	s64 ret = 0;
> > +	int cpu;
> > +
> > +	if (!map || !map->elements_count)
> > +		return 0;
> > +
> > +	for_each_possible_cpu(cpu) {
> > +		pcount = per_cpu_ptr(map->elements_count, cpu);
> > +		ret += READ_ONCE(*pcount);
> > +	}
> > +	return ret;
> > +}
> > +
> > +__diag_pop();
> > +
> > +BTF_SET8_START(bpf_map_iter_kfunc_ids)
> > +BTF_ID_FLAGS(func, bpf_map_sum_elements_counter, KF_TRUSTED_ARGS)
> > +BTF_SET8_END(bpf_map_iter_kfunc_ids)
> > +
> > +static int tracing_iter_filter(const struct bpf_prog *prog, u32 kfunc_id)
> > +{
> > +	if (btf_id_set8_contains(&bpf_map_iter_kfunc_ids, kfunc_id) &&
> > +	    prog->expected_attach_type != BPF_TRACE_ITER)
> 
> why restrict to trace_iter?

Thanks, I will remove it.

All your other comments in this series make sense as well, will address them.

> > +		return -EACCES;
> > +	return 0;
> > +}
> > +
> > +static const struct btf_kfunc_id_set bpf_map_iter_kfunc_set = {
> > +	.owner = THIS_MODULE,
> > +	.set   = &bpf_map_iter_kfunc_ids,
> > +	.filter = tracing_iter_filter,
> > +};
> > +
> > +static int init_subsystem(void)
> > +{
> > +	return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_map_iter_kfunc_set);
> > +}
> > +late_initcall(init_subsystem);
> > -- 
> > 2.34.1
> >
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f58895830ada..20292a096188 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -275,6 +275,7 @@  struct bpf_map {
 	} owner;
 	bool bypass_spec_v1;
 	bool frozen; /* write-once; write-protected by freeze_mutex */
+	s64 __percpu *elements_count;
 };
 
 static inline const char *btf_field_type_name(enum btf_field_type type)
@@ -2040,6 +2041,35 @@  bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, size_t align,
 }
 #endif
 
+static inline int
+bpf_map_init_elements_counter(struct bpf_map *map)
+{
+	size_t size = sizeof(*map->elements_count), align = size;
+	gfp_t flags = GFP_USER | __GFP_NOWARN;
+
+	map->elements_count = bpf_map_alloc_percpu(map, size, align, flags);
+	if (!map->elements_count)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static inline void
+bpf_map_free_elements_counter(struct bpf_map *map)
+{
+	free_percpu(map->elements_count);
+}
+
+static inline void bpf_map_inc_elements_counter(struct bpf_map *map)
+{
+	this_cpu_inc(*map->elements_count);
+}
+
+static inline void bpf_map_dec_elements_counter(struct bpf_map *map)
+{
+	this_cpu_dec(*map->elements_count);
+}
+
 extern int sysctl_unprivileged_bpf_disabled;
 
 static inline bool bpf_allow_ptr_leaks(void)
diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c
index b0fa190b0979..26ca00dde962 100644
--- a/kernel/bpf/map_iter.c
+++ b/kernel/bpf/map_iter.c
@@ -93,7 +93,7 @@  static struct bpf_iter_reg bpf_map_reg_info = {
 	.ctx_arg_info_size	= 1,
 	.ctx_arg_info		= {
 		{ offsetof(struct bpf_iter__bpf_map, map),
-		  PTR_TO_BTF_ID_OR_NULL },
+		  PTR_TO_BTF_ID_OR_NULL | PTR_TRUSTED },
 	},
 	.seq_info		= &bpf_map_seq_info,
 };
@@ -193,3 +193,49 @@  static int __init bpf_map_iter_init(void)
 }
 
 late_initcall(bpf_map_iter_init);
+
+__diag_push();
+__diag_ignore_all("-Wmissing-prototypes",
+		  "Global functions as their definitions will be in vmlinux BTF");
+
+__bpf_kfunc s64 bpf_map_sum_elements_counter(struct bpf_map *map)
+{
+	s64 *pcount;
+	s64 ret = 0;
+	int cpu;
+
+	if (!map || !map->elements_count)
+		return 0;
+
+	for_each_possible_cpu(cpu) {
+		pcount = per_cpu_ptr(map->elements_count, cpu);
+		ret += READ_ONCE(*pcount);
+	}
+	return ret;
+}
+
+__diag_pop();
+
+BTF_SET8_START(bpf_map_iter_kfunc_ids)
+BTF_ID_FLAGS(func, bpf_map_sum_elements_counter, KF_TRUSTED_ARGS)
+BTF_SET8_END(bpf_map_iter_kfunc_ids)
+
+static int tracing_iter_filter(const struct bpf_prog *prog, u32 kfunc_id)
+{
+	if (btf_id_set8_contains(&bpf_map_iter_kfunc_ids, kfunc_id) &&
+	    prog->expected_attach_type != BPF_TRACE_ITER)
+		return -EACCES;
+	return 0;
+}
+
+static const struct btf_kfunc_id_set bpf_map_iter_kfunc_set = {
+	.owner = THIS_MODULE,
+	.set   = &bpf_map_iter_kfunc_ids,
+	.filter = tracing_iter_filter,
+};
+
+static int init_subsystem(void)
+{
+	return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_map_iter_kfunc_set);
+}
+late_initcall(init_subsystem);