diff mbox series

[net-next,2/3] bpf: test_run: Use system page pool for XDP live frame mode

Message ID 20240215132634.474055-3-toke@redhat.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series Change BPF_TEST_RUN use the system page pool for live XDP frames | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1008 this patch: 1008
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 18 of 18 maintainers
netdev/build_clang success Errors and warnings before: 1007 this patch: 1007
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1025 this patch: 1025
netdev/checkpatch warning WARNING: line length of 83 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2024-02-16--06-00 (tests: 1443)
bpf/vmtest-bpf-PR fail merge-conflict
bpf/vmtest-bpf-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-VM_Test-13 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc
bpf/vmtest-bpf-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-38 fail Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
bpf/vmtest-bpf-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17 and -O2 optimization
bpf/vmtest-bpf-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17

Commit Message

Toke Høiland-Jørgensen Feb. 15, 2024, 1:26 p.m. UTC
The BPF_TEST_RUN code in XDP live frame mode creates a new page pool
each time it is called and uses that to allocate the frames used for the
XDP run. This works well if the syscall is used with a high repetitions
number, as it allows for efficient page recycling. However, if used with
a small number of repetitions, the overhead of creating and tearing down
the page pool is significant, and can even lead to system stalls if the
syscall is called in a tight loop.

Now that we have a persistent system page pool instance, it becomes
pretty straight forward to change the test_run code to use it. The only
wrinkle is that we can no longer rely on a custom page init callback
from page_pool itself; instead, we change the test_run code to write a
random cookie value to the beginning of the page as an indicator that
the page has been initialised and can be re-used without copying the
initial data again.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
---
 net/bpf/test_run.c | 134 ++++++++++++++++++++++-----------------------
 1 file changed, 66 insertions(+), 68 deletions(-)

Comments

Daniel Borkmann Feb. 20, 2024, 9:06 a.m. UTC | #1
On 2/15/24 2:26 PM, Toke Høiland-Jørgensen wrote:
> The BPF_TEST_RUN code in XDP live frame mode creates a new page pool
> each time it is called and uses that to allocate the frames used for the
> XDP run. This works well if the syscall is used with a high repetitions
> number, as it allows for efficient page recycling. However, if used with
> a small number of repetitions, the overhead of creating and tearing down
> the page pool is significant, and can even lead to system stalls if the
> syscall is called in a tight loop.
> 
> Now that we have a persistent system page pool instance, it becomes
> pretty straight forward to change the test_run code to use it. The only
> wrinkle is that we can no longer rely on a custom page init callback
> from page_pool itself; instead, we change the test_run code to write a
> random cookie value to the beginning of the page as an indicator that
> the page has been initialised and can be re-used without copying the
> initial data again.
> 
> Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>

[...]
> -
>   	/* We create a 'fake' RXQ referencing the original dev, but with an
>   	 * xdp_mem_info pointing to our page_pool
>   	 */
>   	xdp_rxq_info_reg(&xdp->rxq, orig_ctx->rxq->dev, 0, 0);
> -	xdp->rxq.mem.type = MEM_TYPE_PAGE_POOL;
> -	xdp->rxq.mem.id = pp->xdp_mem_id;
> +	xdp->rxq.mem.type = MEM_TYPE_PAGE_POOL; /* mem id is set per-frame below */
>   	xdp->dev = orig_ctx->rxq->dev;
>   	xdp->orig_ctx = orig_ctx;
>   
> +	/* We need a random cookie for each run as pages can stick around
> +	 * between runs in the system page pool
> +	 */
> +	get_random_bytes(&xdp->cookie, sizeof(xdp->cookie));
> +

So the assumption is that there is only a tiny chance of collisions with
users outside of xdp test_run. If they do collide however, you'd leak data.
Presumably the 64 bit cookie might suffice.. nit, perhaps makes sense to
explicitly exclude zero cookie?

Acked-by: Daniel Borkmann <daniel@iogearbox.net>

Thanks,
Daniel
Paolo Abeni Feb. 20, 2024, 9:45 a.m. UTC | #2
On Tue, 2024-02-20 at 10:06 +0100, Daniel Borkmann wrote:
> On 2/15/24 2:26 PM, Toke Høiland-Jørgensen wrote:
> > The BPF_TEST_RUN code in XDP live frame mode creates a new page pool
> > each time it is called and uses that to allocate the frames used for the
> > XDP run. This works well if the syscall is used with a high repetitions
> > number, as it allows for efficient page recycling. However, if used with
> > a small number of repetitions, the overhead of creating and tearing down
> > the page pool is significant, and can even lead to system stalls if the
> > syscall is called in a tight loop.
> > 
> > Now that we have a persistent system page pool instance, it becomes
> > pretty straight forward to change the test_run code to use it. The only
> > wrinkle is that we can no longer rely on a custom page init callback
> > from page_pool itself; instead, we change the test_run code to write a
> > random cookie value to the beginning of the page as an indicator that
> > the page has been initialised and can be re-used without copying the
> > initial data again.
> > 
> > Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
> 
> [...]
> > -
> >   	/* We create a 'fake' RXQ referencing the original dev, but with an
> >   	 * xdp_mem_info pointing to our page_pool
> >   	 */
> >   	xdp_rxq_info_reg(&xdp->rxq, orig_ctx->rxq->dev, 0, 0);
> > -	xdp->rxq.mem.type = MEM_TYPE_PAGE_POOL;
> > -	xdp->rxq.mem.id = pp->xdp_mem_id;
> > +	xdp->rxq.mem.type = MEM_TYPE_PAGE_POOL; /* mem id is set per-frame below */
> >   	xdp->dev = orig_ctx->rxq->dev;
> >   	xdp->orig_ctx = orig_ctx;
> >   
> > +	/* We need a random cookie for each run as pages can stick around
> > +	 * between runs in the system page pool
> > +	 */
> > +	get_random_bytes(&xdp->cookie, sizeof(xdp->cookie));
> > +
> 
> So the assumption is that there is only a tiny chance of collisions with
> users outside of xdp test_run. If they do collide however, you'd leak data.

Good point. @Toke: what is the worst-case thing that could happen in
case a page is recycled from another pool's user?

could we possibly end-up matching the cookie for a page containing
'random' orig_ctx/ctx, so that bpf program later tries to access
equally random ptrs?

Thanks!

Paolo
Toke Høiland-Jørgensen Feb. 20, 2024, 1:14 p.m. UTC | #3
Paolo Abeni <pabeni@redhat.com> writes:

> On Tue, 2024-02-20 at 10:06 +0100, Daniel Borkmann wrote:
>> On 2/15/24 2:26 PM, Toke Høiland-Jørgensen wrote:
>> > The BPF_TEST_RUN code in XDP live frame mode creates a new page pool
>> > each time it is called and uses that to allocate the frames used for the
>> > XDP run. This works well if the syscall is used with a high repetitions
>> > number, as it allows for efficient page recycling. However, if used with
>> > a small number of repetitions, the overhead of creating and tearing down
>> > the page pool is significant, and can even lead to system stalls if the
>> > syscall is called in a tight loop.
>> > 
>> > Now that we have a persistent system page pool instance, it becomes
>> > pretty straight forward to change the test_run code to use it. The only
>> > wrinkle is that we can no longer rely on a custom page init callback
>> > from page_pool itself; instead, we change the test_run code to write a
>> > random cookie value to the beginning of the page as an indicator that
>> > the page has been initialised and can be re-used without copying the
>> > initial data again.
>> > 
>> > Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
>> 
>> [...]
>> > -
>> >   	/* We create a 'fake' RXQ referencing the original dev, but with an
>> >   	 * xdp_mem_info pointing to our page_pool
>> >   	 */
>> >   	xdp_rxq_info_reg(&xdp->rxq, orig_ctx->rxq->dev, 0, 0);
>> > -	xdp->rxq.mem.type = MEM_TYPE_PAGE_POOL;
>> > -	xdp->rxq.mem.id = pp->xdp_mem_id;
>> > +	xdp->rxq.mem.type = MEM_TYPE_PAGE_POOL; /* mem id is set per-frame below */
>> >   	xdp->dev = orig_ctx->rxq->dev;
>> >   	xdp->orig_ctx = orig_ctx;
>> >   
>> > +	/* We need a random cookie for each run as pages can stick around
>> > +	 * between runs in the system page pool
>> > +	 */
>> > +	get_random_bytes(&xdp->cookie, sizeof(xdp->cookie));
>> > +
>> 
>> So the assumption is that there is only a tiny chance of collisions with
>> users outside of xdp test_run. If they do collide however, you'd leak data.
>
> Good point. @Toke: what is the worst-case thing that could happen in
> case a page is recycled from another pool's user?
>
> could we possibly end-up matching the cookie for a page containing
> 'random' orig_ctx/ctx, so that bpf program later tries to access
> equally random ptrs?

Well, yes, if there's a collision in the cookie value we'll end up
basically dereferencing garbage pointer values, with all the badness
that ensues (most likely just a crash, but system compromise is probably
also possible in such a case).

A 64-bit value is probably too small to be resistant against random
collisions in a "protect global data across the internet" type scenario
(for instance, a 64-bit cryptographic key is considered weak). However,
in this case the collision domain is only for the lifetime of the
running system, and each cookie value only stays valid for the duration
of a single syscall (seconds, at most), so I figured it was acceptable.

We could exclude all-zeros as a valid cookie value (and also anything
that looks as a valid pointer), but that only removes a few of the
possible random collision values, so if we're really worried about
random collisions of 64-bit numbers, I think a better approach would be
to just make the cookie a 128-bit value instead. I can respin with that
if you prefer? :)

-Toke
Paolo Abeni Feb. 20, 2024, 2:57 p.m. UTC | #4
On Tue, 2024-02-20 at 14:14 +0100, Toke Høiland-Jørgensen wrote:
> Paolo Abeni <pabeni@redhat.com> writes:
> 
> > On Tue, 2024-02-20 at 10:06 +0100, Daniel Borkmann wrote:
> > > On 2/15/24 2:26 PM, Toke Høiland-Jørgensen wrote:
> > > > The BPF_TEST_RUN code in XDP live frame mode creates a new page pool
> > > > each time it is called and uses that to allocate the frames used for the
> > > > XDP run. This works well if the syscall is used with a high repetitions
> > > > number, as it allows for efficient page recycling. However, if used with
> > > > a small number of repetitions, the overhead of creating and tearing down
> > > > the page pool is significant, and can even lead to system stalls if the
> > > > syscall is called in a tight loop.
> > > > 
> > > > Now that we have a persistent system page pool instance, it becomes
> > > > pretty straight forward to change the test_run code to use it. The only
> > > > wrinkle is that we can no longer rely on a custom page init callback
> > > > from page_pool itself; instead, we change the test_run code to write a
> > > > random cookie value to the beginning of the page as an indicator that
> > > > the page has been initialised and can be re-used without copying the
> > > > initial data again.
> > > > 
> > > > Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
> > > 
> > > [...]
> > > > -
> > > >   	/* We create a 'fake' RXQ referencing the original dev, but with an
> > > >   	 * xdp_mem_info pointing to our page_pool
> > > >   	 */
> > > >   	xdp_rxq_info_reg(&xdp->rxq, orig_ctx->rxq->dev, 0, 0);
> > > > -	xdp->rxq.mem.type = MEM_TYPE_PAGE_POOL;
> > > > -	xdp->rxq.mem.id = pp->xdp_mem_id;
> > > > +	xdp->rxq.mem.type = MEM_TYPE_PAGE_POOL; /* mem id is set per-frame below */
> > > >   	xdp->dev = orig_ctx->rxq->dev;
> > > >   	xdp->orig_ctx = orig_ctx;
> > > >   
> > > > +	/* We need a random cookie for each run as pages can stick around
> > > > +	 * between runs in the system page pool
> > > > +	 */
> > > > +	get_random_bytes(&xdp->cookie, sizeof(xdp->cookie));
> > > > +
> > > 
> > > So the assumption is that there is only a tiny chance of collisions with
> > > users outside of xdp test_run. If they do collide however, you'd leak data.
> > 
> > Good point. @Toke: what is the worst-case thing that could happen in
> > case a page is recycled from another pool's user?
> > 
> > could we possibly end-up matching the cookie for a page containing
> > 'random' orig_ctx/ctx, so that bpf program later tries to access
> > equally random ptrs?
> 
> Well, yes, if there's a collision in the cookie value we'll end up
> basically dereferencing garbage pointer values, with all the badness
> that ensues (most likely just a crash, but system compromise is probably
> also possible in such a case).
> 
> A 64-bit value is probably too small to be resistant against random
> collisions in a "protect global data across the internet" type scenario
> (for instance, a 64-bit cryptographic key is considered weak). However,
> in this case the collision domain is only for the lifetime of the
> running system, and each cookie value only stays valid for the duration
> of a single syscall (seconds, at most), so I figured it was acceptable.
> 
> We could exclude all-zeros as a valid cookie value (and also anything
> that looks as a valid pointer), but that only removes a few of the
> possible random collision values, so if we're really worried about
> random collisions of 64-bit numbers, I think a better approach would be
> to just make the cookie a 128-bit value instead. I can respin with that
> if you prefer? :)

I must admit that merging a code that will allow trashing the kernel -
even with a very low probability - is quite scaring to me.

How much relevant is the recycle case optimization? Could removing
completely that optimization be considered?

Thanks!

Paolo
Toke Høiland-Jørgensen Feb. 20, 2024, 7:33 p.m. UTC | #5
Paolo Abeni <pabeni@redhat.com> writes:

> On Tue, 2024-02-20 at 14:14 +0100, Toke Høiland-Jørgensen wrote:
>> Paolo Abeni <pabeni@redhat.com> writes:
>> 
>> > On Tue, 2024-02-20 at 10:06 +0100, Daniel Borkmann wrote:
>> > > On 2/15/24 2:26 PM, Toke Høiland-Jørgensen wrote:
>> > > > The BPF_TEST_RUN code in XDP live frame mode creates a new page pool
>> > > > each time it is called and uses that to allocate the frames used for the
>> > > > XDP run. This works well if the syscall is used with a high repetitions
>> > > > number, as it allows for efficient page recycling. However, if used with
>> > > > a small number of repetitions, the overhead of creating and tearing down
>> > > > the page pool is significant, and can even lead to system stalls if the
>> > > > syscall is called in a tight loop.
>> > > > 
>> > > > Now that we have a persistent system page pool instance, it becomes
>> > > > pretty straight forward to change the test_run code to use it. The only
>> > > > wrinkle is that we can no longer rely on a custom page init callback
>> > > > from page_pool itself; instead, we change the test_run code to write a
>> > > > random cookie value to the beginning of the page as an indicator that
>> > > > the page has been initialised and can be re-used without copying the
>> > > > initial data again.
>> > > > 
>> > > > Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
>> > > 
>> > > [...]
>> > > > -
>> > > >   	/* We create a 'fake' RXQ referencing the original dev, but with an
>> > > >   	 * xdp_mem_info pointing to our page_pool
>> > > >   	 */
>> > > >   	xdp_rxq_info_reg(&xdp->rxq, orig_ctx->rxq->dev, 0, 0);
>> > > > -	xdp->rxq.mem.type = MEM_TYPE_PAGE_POOL;
>> > > > -	xdp->rxq.mem.id = pp->xdp_mem_id;
>> > > > +	xdp->rxq.mem.type = MEM_TYPE_PAGE_POOL; /* mem id is set per-frame below */
>> > > >   	xdp->dev = orig_ctx->rxq->dev;
>> > > >   	xdp->orig_ctx = orig_ctx;
>> > > >   
>> > > > +	/* We need a random cookie for each run as pages can stick around
>> > > > +	 * between runs in the system page pool
>> > > > +	 */
>> > > > +	get_random_bytes(&xdp->cookie, sizeof(xdp->cookie));
>> > > > +
>> > > 
>> > > So the assumption is that there is only a tiny chance of collisions with
>> > > users outside of xdp test_run. If they do collide however, you'd leak data.
>> > 
>> > Good point. @Toke: what is the worst-case thing that could happen in
>> > case a page is recycled from another pool's user?
>> > 
>> > could we possibly end-up matching the cookie for a page containing
>> > 'random' orig_ctx/ctx, so that bpf program later tries to access
>> > equally random ptrs?
>> 
>> Well, yes, if there's a collision in the cookie value we'll end up
>> basically dereferencing garbage pointer values, with all the badness
>> that ensues (most likely just a crash, but system compromise is probably
>> also possible in such a case).
>> 
>> A 64-bit value is probably too small to be resistant against random
>> collisions in a "protect global data across the internet" type scenario
>> (for instance, a 64-bit cryptographic key is considered weak). However,
>> in this case the collision domain is only for the lifetime of the
>> running system, and each cookie value only stays valid for the duration
>> of a single syscall (seconds, at most), so I figured it was acceptable.
>> 
>> We could exclude all-zeros as a valid cookie value (and also anything
>> that looks as a valid pointer), but that only removes a few of the
>> possible random collision values, so if we're really worried about
>> random collisions of 64-bit numbers, I think a better approach would be
>> to just make the cookie a 128-bit value instead. I can respin with that
>> if you prefer? :)
>
> I must admit that merging a code that will allow trashing the kernel -
> even with a very low probability - is quite scaring to me.
>
> How much relevant is the recycle case optimization? Could removing
> completely that optimization be considered?

Did a quick test of this, and skipping the recycling eats ~12.5%
performance, so I don't think getting rid of it is a good solution.

However, increasing the cookie size to 128 bits makes no performance
difference (everything stays in the same cache lines). If we do that,
the collision probability enters "won't happen before the heat death of
the universe" territory, so I don't think there's any real concern that
this will happen.

I'll respin with the bigger cookie size (and add that patch Olek
suggested to remove the init callback from the page pool code).

-Toke
diff mbox series

Patch

diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index dfd919374017..c742869a0612 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -94,10 +94,14 @@  static bool bpf_test_timer_continue(struct bpf_test_timer *t, int iterations,
 }
 
 /* We put this struct at the head of each page with a context and frame
- * initialised when the page is allocated, so we don't have to do this on each
- * repetition of the test run.
+ * initialised the first time a given page is used, saving the memcpy() of the
+ * data on subsequent repetition of the test run. The cookie value is used to
+ * mark the page data the first time we initialise it so we can skip it the next
+ * time we see that page.
  */
+
 struct xdp_page_head {
+	u64 cookie;
 	struct xdp_buff orig_ctx;
 	struct xdp_buff ctx;
 	union {
@@ -111,10 +115,9 @@  struct xdp_test_data {
 	struct xdp_buff *orig_ctx;
 	struct xdp_rxq_info rxq;
 	struct net_device *dev;
-	struct page_pool *pp;
 	struct xdp_frame **frames;
 	struct sk_buff **skbs;
-	struct xdp_mem_info mem;
+	u64 cookie;
 	u32 batch_size;
 	u32 frame_cnt;
 };
@@ -126,48 +129,9 @@  struct xdp_test_data {
 #define TEST_XDP_FRAME_SIZE (PAGE_SIZE - sizeof(struct xdp_page_head))
 #define TEST_XDP_MAX_BATCH 256
 
-static void xdp_test_run_init_page(struct page *page, void *arg)
-{
-	struct xdp_page_head *head = phys_to_virt(page_to_phys(page));
-	struct xdp_buff *new_ctx, *orig_ctx;
-	u32 headroom = XDP_PACKET_HEADROOM;
-	struct xdp_test_data *xdp = arg;
-	size_t frm_len, meta_len;
-	struct xdp_frame *frm;
-	void *data;
-
-	orig_ctx = xdp->orig_ctx;
-	frm_len = orig_ctx->data_end - orig_ctx->data_meta;
-	meta_len = orig_ctx->data - orig_ctx->data_meta;
-	headroom -= meta_len;
-
-	new_ctx = &head->ctx;
-	frm = head->frame;
-	data = head->data;
-	memcpy(data + headroom, orig_ctx->data_meta, frm_len);
-
-	xdp_init_buff(new_ctx, TEST_XDP_FRAME_SIZE, &xdp->rxq);
-	xdp_prepare_buff(new_ctx, data, headroom, frm_len, true);
-	new_ctx->data = new_ctx->data_meta + meta_len;
-
-	xdp_update_frame_from_buff(new_ctx, frm);
-	frm->mem = new_ctx->rxq->mem;
-
-	memcpy(&head->orig_ctx, new_ctx, sizeof(head->orig_ctx));
-}
-
 static int xdp_test_run_setup(struct xdp_test_data *xdp, struct xdp_buff *orig_ctx)
 {
-	struct page_pool *pp;
 	int err = -ENOMEM;
-	struct page_pool_params pp_params = {
-		.order = 0,
-		.flags = 0,
-		.pool_size = xdp->batch_size,
-		.nid = NUMA_NO_NODE,
-		.init_callback = xdp_test_run_init_page,
-		.init_arg = xdp,
-	};
 
 	xdp->frames = kvmalloc_array(xdp->batch_size, sizeof(void *), GFP_KERNEL);
 	if (!xdp->frames)
@@ -177,34 +141,21 @@  static int xdp_test_run_setup(struct xdp_test_data *xdp, struct xdp_buff *orig_c
 	if (!xdp->skbs)
 		goto err_skbs;
 
-	pp = page_pool_create(&pp_params);
-	if (IS_ERR(pp)) {
-		err = PTR_ERR(pp);
-		goto err_pp;
-	}
-
-	/* will copy 'mem.id' into pp->xdp_mem_id */
-	err = xdp_reg_mem_model(&xdp->mem, MEM_TYPE_PAGE_POOL, pp);
-	if (err)
-		goto err_mmodel;
-
-	xdp->pp = pp;
-
 	/* We create a 'fake' RXQ referencing the original dev, but with an
 	 * xdp_mem_info pointing to our page_pool
 	 */
 	xdp_rxq_info_reg(&xdp->rxq, orig_ctx->rxq->dev, 0, 0);
-	xdp->rxq.mem.type = MEM_TYPE_PAGE_POOL;
-	xdp->rxq.mem.id = pp->xdp_mem_id;
+	xdp->rxq.mem.type = MEM_TYPE_PAGE_POOL; /* mem id is set per-frame below */
 	xdp->dev = orig_ctx->rxq->dev;
 	xdp->orig_ctx = orig_ctx;
 
+	/* We need a random cookie for each run as pages can stick around
+	 * between runs in the system page pool
+	 */
+	get_random_bytes(&xdp->cookie, sizeof(xdp->cookie));
+
 	return 0;
 
-err_mmodel:
-	page_pool_destroy(pp);
-err_pp:
-	kvfree(xdp->skbs);
 err_skbs:
 	kvfree(xdp->frames);
 	return err;
@@ -212,8 +163,6 @@  static int xdp_test_run_setup(struct xdp_test_data *xdp, struct xdp_buff *orig_c
 
 static void xdp_test_run_teardown(struct xdp_test_data *xdp)
 {
-	xdp_unreg_mem_model(&xdp->mem);
-	page_pool_destroy(xdp->pp);
 	kfree(xdp->frames);
 	kfree(xdp->skbs);
 }
@@ -235,8 +184,12 @@  static bool ctx_was_changed(struct xdp_page_head *head)
 		head->orig_ctx.data_end != head->ctx.data_end;
 }
 
-static void reset_ctx(struct xdp_page_head *head)
+static void reset_ctx(struct xdp_page_head *head, struct xdp_test_data *xdp)
 {
+	/* mem id can change if we migrate CPUs between batches */
+	if (head->frame->mem.id != xdp->rxq.mem.id)
+		head->frame->mem.id = xdp->rxq.mem.id;
+
 	if (likely(!frame_was_changed(head) && !ctx_was_changed(head)))
 		return;
 
@@ -246,6 +199,48 @@  static void reset_ctx(struct xdp_page_head *head)
 	xdp_update_frame_from_buff(&head->ctx, head->frame);
 }
 
+static struct xdp_page_head *
+xdp_test_run_init_page(struct page *page, struct xdp_test_data *xdp)
+{
+	struct xdp_page_head *head = phys_to_virt(page_to_phys(page));
+	struct xdp_buff *new_ctx, *orig_ctx;
+	u32 headroom = XDP_PACKET_HEADROOM;
+	size_t frm_len, meta_len;
+	struct xdp_frame *frm;
+	void *data;
+
+	/* Optimise for the recycle case, which is the normal case when doing
+	 * high-repetition REDIRECTS to drivers that return frames.
+	 */
+	if (likely(head->cookie == xdp->cookie)) {
+		reset_ctx(head, xdp);
+		return head;
+	}
+
+	head->cookie = xdp->cookie;
+
+	orig_ctx = xdp->orig_ctx;
+	frm_len = orig_ctx->data_end - orig_ctx->data_meta;
+	meta_len = orig_ctx->data - orig_ctx->data_meta;
+	headroom -= meta_len;
+
+	new_ctx = &head->ctx;
+	frm = head->frame;
+	data = head->data;
+	memcpy(data + headroom, orig_ctx->data_meta, frm_len);
+
+	xdp_init_buff(new_ctx, TEST_XDP_FRAME_SIZE, &xdp->rxq);
+	xdp_prepare_buff(new_ctx, data, headroom, frm_len, true);
+	new_ctx->data = new_ctx->data_meta + meta_len;
+
+	xdp_update_frame_from_buff(new_ctx, frm);
+	frm->mem = new_ctx->rxq->mem;
+
+	memcpy(&head->orig_ctx, new_ctx, sizeof(head->orig_ctx));
+
+	return head;
+}
+
 static int xdp_recv_frames(struct xdp_frame **frames, int nframes,
 			   struct sk_buff **skbs,
 			   struct net_device *dev)
@@ -287,6 +282,7 @@  static int xdp_test_run_batch(struct xdp_test_data *xdp, struct bpf_prog *prog,
 	struct xdp_page_head *head;
 	struct xdp_frame *frm;
 	bool redirect = false;
+	struct page_pool *pp;
 	struct xdp_buff *ctx;
 	struct page *page;
 
@@ -295,15 +291,17 @@  static int xdp_test_run_batch(struct xdp_test_data *xdp, struct bpf_prog *prog,
 	local_bh_disable();
 	xdp_set_return_frame_no_direct();
 
+	pp = this_cpu_read(system_page_pool);
+	xdp->rxq.mem.id = pp->xdp_mem_id;
+
 	for (i = 0; i < batch_sz; i++) {
-		page = page_pool_dev_alloc_pages(xdp->pp);
+		page = page_pool_dev_alloc_pages(pp);
 		if (!page) {
 			err = -ENOMEM;
 			goto out;
 		}
 
-		head = phys_to_virt(page_to_phys(page));
-		reset_ctx(head);
+		head = xdp_test_run_init_page(page, xdp);
 		ctx = &head->ctx;
 		frm = head->frame;
 		xdp->frame_cnt++;