diff mbox series

[bpf-next,v4] libbpf: Expose API to consume one ring at a time

Message ID 20230727083436.293201-1-adam@wowsignal.io (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series [bpf-next,v4] libbpf: Expose API to consume one ring at a time | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for bpf-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 9 this patch: 9
netdev/cc_maintainers warning 10 maintainers not CCed: daniel@iogearbox.net kpsingh@kernel.org martin.lau@linux.dev john.fastabend@gmail.com song@kernel.org sdf@google.com andrii@kernel.org yonghong.song@linux.dev jolsa@kernel.org haoluo@google.com
netdev/build_clang success Errors and warnings before: 9 this patch: 9
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 9 this patch: 9
netdev/checkpatch warning WARNING: line length of 83 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 116 this patch: 116
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-5 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-6 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-7 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-11 success Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-15 success Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-16 fail Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-18 success Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-19 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-22 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for test_progs_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-25 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-29 success Logs for veristat

Commit Message

Adam Sindelar July 27, 2023, 8:34 a.m. UTC
We already provide ring_buffer__epoll_fd to enable use of external
polling systems. However, the only API available to consume the ring
buffer is ring_buffer__consume, which always checks all rings. When
polling for many events, this can be wasteful.

Signed-off-by: Adam Sindelar <adam@wowsignal.io>
---
v1->v2: Added entry to libbpf.map
v2->v3: Correctly set errno and handle overflow
v3->v4: Fixed an embarrasing typo from zealous autocomplete

 tools/lib/bpf/libbpf.h   |  1 +
 tools/lib/bpf/libbpf.map |  1 +
 tools/lib/bpf/ringbuf.c  | 22 ++++++++++++++++++++++
 3 files changed, 24 insertions(+)

Comments

John Fastabend July 27, 2023, 7 p.m. UTC | #1
Adam Sindelar wrote:
> We already provide ring_buffer__epoll_fd to enable use of external
> polling systems. However, the only API available to consume the ring
> buffer is ring_buffer__consume, which always checks all rings. When
> polling for many events, this can be wasteful.
> 
> Signed-off-by: Adam Sindelar <adam@wowsignal.io>
> ---
> v1->v2: Added entry to libbpf.map
> v2->v3: Correctly set errno and handle overflow
> v3->v4: Fixed an embarrasing typo from zealous autocomplete
> 
>  tools/lib/bpf/libbpf.h   |  1 +
>  tools/lib/bpf/libbpf.map |  1 +
>  tools/lib/bpf/ringbuf.c  | 22 ++++++++++++++++++++++
>  3 files changed, 24 insertions(+)
> 
> diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
> index 55b97b2087540..20ccc65eb3f9d 100644
> --- a/tools/lib/bpf/libbpf.h
> +++ b/tools/lib/bpf/libbpf.h
> @@ -1195,6 +1195,7 @@ LIBBPF_API int ring_buffer__add(struct ring_buffer *rb, int map_fd,
>  				ring_buffer_sample_fn sample_cb, void *ctx);
>  LIBBPF_API int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms);
>  LIBBPF_API int ring_buffer__consume(struct ring_buffer *rb);
> +LIBBPF_API int ring_buffer__consume_ring(struct ring_buffer *rb, uint32_t ring_id);
>  LIBBPF_API int ring_buffer__epoll_fd(const struct ring_buffer *rb);
>  
>  struct user_ring_buffer_opts {
> diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
> index 9c7538dd5835e..42dc418b4672f 100644
> --- a/tools/lib/bpf/libbpf.map
> +++ b/tools/lib/bpf/libbpf.map
> @@ -398,4 +398,5 @@ LIBBPF_1.3.0 {
>  		bpf_prog_detach_opts;
>  		bpf_program__attach_netfilter;
>  		bpf_program__attach_tcx;
> +		ring_buffer__consume_ring;
>  } LIBBPF_1.2.0;
> diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c
> index 02199364db136..457469fc7d71e 100644
> --- a/tools/lib/bpf/ringbuf.c
> +++ b/tools/lib/bpf/ringbuf.c
> @@ -290,6 +290,28 @@ int ring_buffer__consume(struct ring_buffer *rb)
>  	return res;
>  }
>  
> +/* Consume available data from a single RINGBUF map identified by its ID.
> + * The ring ID is returned in epoll_data by epoll_wait when called with
> + * ring_buffer__epoll_fd.
> + */
> +int ring_buffer__consume_ring(struct ring_buffer *rb, uint32_t ring_id)
> +{
> +	struct ring *ring;
> +	int64_t res;
> +
> +	if (ring_id >= rb->ring_cnt)
> +		return libbpf_err(-EINVAL);
> +
> +	ring = &rb->rings[ring_id];
> +	res = ringbuf_process_ring(ring);
> +	if (res < 0)
> +		return libbpf_err(res);
> +
> +	if (res > INT_MAX)
> +		return INT_MAX;
> +	return res;

Why not just return int64_t here? Then skip the INT_MAX check? I would
just assume get the actual value if I was calling this.

> +}
> +
>  /* Poll for available data and consume records, if any are available.
>   * Returns number of records consumed (or INT_MAX, whichever is less), or
>   * negative number, if any of the registered callbacks returned error.
> -- 
> 2.39.2
> 
>
Adam Sindelar July 27, 2023, 9:02 p.m. UTC | #2
On Thu, Jul 27, 2023 at 12:00:10PM -0700, John Fastabend wrote:
> Adam Sindelar wrote:
> > We already provide ring_buffer__epoll_fd to enable use of external
> > polling systems. However, the only API available to consume the ring
> > buffer is ring_buffer__consume, which always checks all rings. When
> > polling for many events, this can be wasteful.
> > 
> > Signed-off-by: Adam Sindelar <adam@wowsignal.io>
> > ---
> > v1->v2: Added entry to libbpf.map
> > v2->v3: Correctly set errno and handle overflow
> > v3->v4: Fixed an embarrasing typo from zealous autocomplete
> > 
> >  tools/lib/bpf/libbpf.h   |  1 +
> >  tools/lib/bpf/libbpf.map |  1 +
> >  tools/lib/bpf/ringbuf.c  | 22 ++++++++++++++++++++++
> >  3 files changed, 24 insertions(+)
> > 
> > diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
> > index 55b97b2087540..20ccc65eb3f9d 100644
> > --- a/tools/lib/bpf/libbpf.h
> > +++ b/tools/lib/bpf/libbpf.h
> > @@ -1195,6 +1195,7 @@ LIBBPF_API int ring_buffer__add(struct ring_buffer *rb, int map_fd,
> >  				ring_buffer_sample_fn sample_cb, void *ctx);
> >  LIBBPF_API int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms);
> >  LIBBPF_API int ring_buffer__consume(struct ring_buffer *rb);
> > +LIBBPF_API int ring_buffer__consume_ring(struct ring_buffer *rb, uint32_t ring_id);
> >  LIBBPF_API int ring_buffer__epoll_fd(const struct ring_buffer *rb);
> >  
> >  struct user_ring_buffer_opts {
> > diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
> > index 9c7538dd5835e..42dc418b4672f 100644
> > --- a/tools/lib/bpf/libbpf.map
> > +++ b/tools/lib/bpf/libbpf.map
> > @@ -398,4 +398,5 @@ LIBBPF_1.3.0 {
> >  		bpf_prog_detach_opts;
> >  		bpf_program__attach_netfilter;
> >  		bpf_program__attach_tcx;
> > +		ring_buffer__consume_ring;
> >  } LIBBPF_1.2.0;
> > diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c
> > index 02199364db136..457469fc7d71e 100644
> > --- a/tools/lib/bpf/ringbuf.c
> > +++ b/tools/lib/bpf/ringbuf.c
> > @@ -290,6 +290,28 @@ int ring_buffer__consume(struct ring_buffer *rb)
> >  	return res;
> >  }
> >  
> > +/* Consume available data from a single RINGBUF map identified by its ID.
> > + * The ring ID is returned in epoll_data by epoll_wait when called with
> > + * ring_buffer__epoll_fd.
> > + */
> > +int ring_buffer__consume_ring(struct ring_buffer *rb, uint32_t ring_id)
> > +{
> > +	struct ring *ring;
> > +	int64_t res;
> > +
> > +	if (ring_id >= rb->ring_cnt)
> > +		return libbpf_err(-EINVAL);
> > +
> > +	ring = &rb->rings[ring_id];
> > +	res = ringbuf_process_ring(ring);
> > +	if (res < 0)
> > +		return libbpf_err(res);
> > +
> > +	if (res > INT_MAX)
> > +		return INT_MAX;
> > +	return res;
> 
> Why not just return int64_t here? Then skip the INT_MAX check? I would
> just assume get the actual value if I was calling this.
> 

Mainly for consistency with the existing API. So far, the comparable
LIBBPF_API functions use int. It's hard to imagine that the number of
records would exceed ~2 billion in a single call - I think the
abberation is that ringbuf_process_ring using a 64-bit counter. If you
do exceed INT_MAX records, something is probably wrong and maybe the function
should return error instead. (But that would be outside the scope of
this patch.)

> > +}
> > +
> >  /* Poll for available data and consume records, if any are available.
> >   * Returns number of records consumed (or INT_MAX, whichever is less), or
> >   * negative number, if any of the registered callbacks returned error.
> > -- 
> > 2.39.2
> > 
> > 
> 
>
Yonghong Song July 27, 2023, 10:11 p.m. UTC | #3
On 7/27/23 1:34 AM, Adam Sindelar wrote:
> We already provide ring_buffer__epoll_fd to enable use of external
> polling systems. However, the only API available to consume the ring
> buffer is ring_buffer__consume, which always checks all rings. When
> polling for many events, this can be wasteful.
> 
> Signed-off-by: Adam Sindelar <adam@wowsignal.io>
> ---
> v1->v2: Added entry to libbpf.map
> v2->v3: Correctly set errno and handle overflow
> v3->v4: Fixed an embarrasing typo from zealous autocomplete
> 
>   tools/lib/bpf/libbpf.h   |  1 +
>   tools/lib/bpf/libbpf.map |  1 +
>   tools/lib/bpf/ringbuf.c  | 22 ++++++++++++++++++++++
>   3 files changed, 24 insertions(+)

Could you add a selftest to exercise ring_buffer__consume_ring()?
This way, people can better understand how this API could be used.

> 
> diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
> index 55b97b2087540..20ccc65eb3f9d 100644
> --- a/tools/lib/bpf/libbpf.h
> +++ b/tools/lib/bpf/libbpf.h
> @@ -1195,6 +1195,7 @@ LIBBPF_API int ring_buffer__add(struct ring_buffer *rb, int map_fd,
>   				ring_buffer_sample_fn sample_cb, void *ctx);
>   LIBBPF_API int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms);
>   LIBBPF_API int ring_buffer__consume(struct ring_buffer *rb);
> +LIBBPF_API int ring_buffer__consume_ring(struct ring_buffer *rb, uint32_t ring_id);
>   LIBBPF_API int ring_buffer__epoll_fd(const struct ring_buffer *rb);
>   
[...]
Adam Sindelar July 28, 2023, 7:32 a.m. UTC | #4
On Thu, Jul 27, 2023 at 03:11:00PM -0700, Yonghong Song wrote:
> 
> 
> On 7/27/23 1:34 AM, Adam Sindelar wrote:
> > We already provide ring_buffer__epoll_fd to enable use of external
> > polling systems. However, the only API available to consume the ring
> > buffer is ring_buffer__consume, which always checks all rings. When
> > polling for many events, this can be wasteful.
> > 
> > Signed-off-by: Adam Sindelar <adam@wowsignal.io>
> > ---
> > v1->v2: Added entry to libbpf.map
> > v2->v3: Correctly set errno and handle overflow
> > v3->v4: Fixed an embarrasing typo from zealous autocomplete
> > 
> >   tools/lib/bpf/libbpf.h   |  1 +
> >   tools/lib/bpf/libbpf.map |  1 +
> >   tools/lib/bpf/ringbuf.c  | 22 ++++++++++++++++++++++
> >   3 files changed, 24 insertions(+)
> 
> Could you add a selftest to exercise ring_buffer__consume_ring()?
> This way, people can better understand how this API could be used.
> 

Yes, will do - I'll try to send out v5 with a test case this afternoon.

As a note, I looked at the perf ring buffer API and it turns out Andrii
Nakryiko saw the same problem and solved in a similar way in 2020. (See
perf_buffer__consume_buffer).

> > 
> > diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
> > index 55b97b2087540..20ccc65eb3f9d 100644
> > --- a/tools/lib/bpf/libbpf.h
> > +++ b/tools/lib/bpf/libbpf.h
> > @@ -1195,6 +1195,7 @@ LIBBPF_API int ring_buffer__add(struct ring_buffer *rb, int map_fd,
> >   				ring_buffer_sample_fn sample_cb, void *ctx);
> >   LIBBPF_API int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms);
> >   LIBBPF_API int ring_buffer__consume(struct ring_buffer *rb);
> > +LIBBPF_API int ring_buffer__consume_ring(struct ring_buffer *rb, uint32_t ring_id);
> >   LIBBPF_API int ring_buffer__epoll_fd(const struct ring_buffer *rb);
> [...]
diff mbox series

Patch

diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 55b97b2087540..20ccc65eb3f9d 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -1195,6 +1195,7 @@  LIBBPF_API int ring_buffer__add(struct ring_buffer *rb, int map_fd,
 				ring_buffer_sample_fn sample_cb, void *ctx);
 LIBBPF_API int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms);
 LIBBPF_API int ring_buffer__consume(struct ring_buffer *rb);
+LIBBPF_API int ring_buffer__consume_ring(struct ring_buffer *rb, uint32_t ring_id);
 LIBBPF_API int ring_buffer__epoll_fd(const struct ring_buffer *rb);
 
 struct user_ring_buffer_opts {
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 9c7538dd5835e..42dc418b4672f 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -398,4 +398,5 @@  LIBBPF_1.3.0 {
 		bpf_prog_detach_opts;
 		bpf_program__attach_netfilter;
 		bpf_program__attach_tcx;
+		ring_buffer__consume_ring;
 } LIBBPF_1.2.0;
diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c
index 02199364db136..457469fc7d71e 100644
--- a/tools/lib/bpf/ringbuf.c
+++ b/tools/lib/bpf/ringbuf.c
@@ -290,6 +290,28 @@  int ring_buffer__consume(struct ring_buffer *rb)
 	return res;
 }
 
+/* Consume available data from a single RINGBUF map identified by its ID.
+ * The ring ID is returned in epoll_data by epoll_wait when called with
+ * ring_buffer__epoll_fd.
+ */
+int ring_buffer__consume_ring(struct ring_buffer *rb, uint32_t ring_id)
+{
+	struct ring *ring;
+	int64_t res;
+
+	if (ring_id >= rb->ring_cnt)
+		return libbpf_err(-EINVAL);
+
+	ring = &rb->rings[ring_id];
+	res = ringbuf_process_ring(ring);
+	if (res < 0)
+		return libbpf_err(res);
+
+	if (res > INT_MAX)
+		return INT_MAX;
+	return res;
+}
+
 /* Poll for available data and consume records, if any are available.
  * Returns number of records consumed (or INT_MAX, whichever is less), or
  * negative number, if any of the registered callbacks returned error.