diff mbox series

[v2,bpf-next,17/20] selftests/bpf: Add unit tests for bpf_arena_alloc/free_pages

Message ID 20240209040608.98927-18-alexei.starovoitov@gmail.com (mailing list archive)
State New
Headers show
Series bpf: Introduce BPF arena. | expand

Commit Message

Alexei Starovoitov Feb. 9, 2024, 4:06 a.m. UTC
From: Alexei Starovoitov <ast@kernel.org>

Add unit tests for bpf_arena_alloc/free_pages() functionality
and bpf_arena_common.h with a set of common helpers and macros that
is used in this test and the following patches.

Also modify test_loader that didn't support running bpf_prog_type_syscall
programs.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/DENYLIST.aarch64  |  1 +
 tools/testing/selftests/bpf/DENYLIST.s390x    |  1 +
 .../testing/selftests/bpf/bpf_arena_common.h  | 70 ++++++++++++++
 .../selftests/bpf/prog_tests/verifier.c       |  2 +
 .../selftests/bpf/progs/verifier_arena.c      | 91 +++++++++++++++++++
 tools/testing/selftests/bpf/test_loader.c     |  9 +-
 6 files changed, 172 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/bpf_arena_common.h
 create mode 100644 tools/testing/selftests/bpf/progs/verifier_arena.c

Comments

David Vernet Feb. 9, 2024, 11:14 p.m. UTC | #1
On Thu, Feb 08, 2024 at 08:06:05PM -0800, Alexei Starovoitov wrote:
> From: Alexei Starovoitov <ast@kernel.org>
> 
> Add unit tests for bpf_arena_alloc/free_pages() functionality
> and bpf_arena_common.h with a set of common helpers and macros that
> is used in this test and the following patches.
> 
> Also modify test_loader that didn't support running bpf_prog_type_syscall
> programs.
> 
> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
> ---
>  tools/testing/selftests/bpf/DENYLIST.aarch64  |  1 +
>  tools/testing/selftests/bpf/DENYLIST.s390x    |  1 +
>  .../testing/selftests/bpf/bpf_arena_common.h  | 70 ++++++++++++++
>  .../selftests/bpf/prog_tests/verifier.c       |  2 +
>  .../selftests/bpf/progs/verifier_arena.c      | 91 +++++++++++++++++++
>  tools/testing/selftests/bpf/test_loader.c     |  9 +-
>  6 files changed, 172 insertions(+), 2 deletions(-)
>  create mode 100644 tools/testing/selftests/bpf/bpf_arena_common.h
>  create mode 100644 tools/testing/selftests/bpf/progs/verifier_arena.c
> 
> diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64
> index 5c2cc7e8c5d0..8e70af386e52 100644
> --- a/tools/testing/selftests/bpf/DENYLIST.aarch64
> +++ b/tools/testing/selftests/bpf/DENYLIST.aarch64
> @@ -11,3 +11,4 @@ fill_link_info/kprobe_multi_link_info            # bpf_program__attach_kprobe_mu
>  fill_link_info/kretprobe_multi_link_info         # bpf_program__attach_kprobe_multi_opts unexpected error: -95
>  fill_link_info/kprobe_multi_invalid_ubuff        # bpf_program__attach_kprobe_multi_opts unexpected error: -95
>  missed/kprobe_recursion                          # missed_kprobe_recursion__attach unexpected error: -95 (errno 95)
> +verifier_arena                                   # JIT does not support arena
> diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
> index 1a63996c0304..ded440277f6e 100644
> --- a/tools/testing/selftests/bpf/DENYLIST.s390x
> +++ b/tools/testing/selftests/bpf/DENYLIST.s390x
> @@ -3,3 +3,4 @@
>  exceptions				 # JIT does not support calling kfunc bpf_throw				       (exceptions)
>  get_stack_raw_tp                         # user_stack corrupted user stack                                             (no backchain userspace)
>  stacktrace_build_id                      # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2                   (?)
> +verifier_arena                           # JIT does not support arena
> diff --git a/tools/testing/selftests/bpf/bpf_arena_common.h b/tools/testing/selftests/bpf/bpf_arena_common.h
> new file mode 100644
> index 000000000000..07849d502f40
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/bpf_arena_common.h
> @@ -0,0 +1,70 @@
> +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
> +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
> +#pragma once
> +
> +#ifndef WRITE_ONCE
> +#define WRITE_ONCE(x, val) ((*(volatile typeof(x) *) &(x)) = (val))
> +#endif
> +
> +#ifndef NUMA_NO_NODE
> +#define	NUMA_NO_NODE	(-1)
> +#endif
> +
> +#ifndef arena_container_of

Why is this ifndef required if we have a pragma once above?

> +#define arena_container_of(ptr, type, member)			\
> +	({							\
> +		void __arena *__mptr = (void __arena *)(ptr);	\
> +		((type *)(__mptr - offsetof(type, member)));	\
> +	})
> +#endif
> +
> +#ifdef __BPF__ /* when compiled as bpf program */
> +
> +#ifndef PAGE_SIZE
> +#define PAGE_SIZE __PAGE_SIZE
> +/*
> + * for older kernels try sizeof(struct genradix_node)
> + * or flexible:
> + * static inline long __bpf_page_size(void) {
> + *   return bpf_core_enum_value(enum page_size_enum___l, __PAGE_SIZE___l) ?: sizeof(struct genradix_node);
> + * }
> + * but generated code is not great.
> + */
> +#endif
> +
> +#if defined(__BPF_FEATURE_ARENA_CAST) && !defined(BPF_ARENA_FORCE_ASM)
> +#define __arena __attribute__((address_space(1)))
> +#define cast_kern(ptr) /* nop for bpf prog. emitted by LLVM */
> +#define cast_user(ptr) /* nop for bpf prog. emitted by LLVM */
> +#else
> +#define __arena
> +#define cast_kern(ptr) bpf_arena_cast(ptr, BPF_ARENA_CAST_KERN, 1)
> +#define cast_user(ptr) bpf_arena_cast(ptr, BPF_ARENA_CAST_USER, 1)
> +#endif
> +
> +void __arena* bpf_arena_alloc_pages(void *map, void __arena *addr, __u32 page_cnt,
> +				    int node_id, __u64 flags) __ksym __weak;
> +void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt) __ksym __weak;
> +
> +#else /* when compiled as user space code */
> +
> +#define __arena
> +#define __arg_arena
> +#define cast_kern(ptr) /* nop for user space */
> +#define cast_user(ptr) /* nop for user space */
> +__weak char arena[1];
> +
> +#ifndef offsetof
> +#define offsetof(type, member)  ((unsigned long)&((type *)0)->member)
> +#endif
> +
> +static inline void __arena* bpf_arena_alloc_pages(void *map, void *addr, __u32 page_cnt,
> +						  int node_id, __u64 flags)
> +{
> +	return NULL;
> +}
> +static inline void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt)
> +{
> +}
> +
> +#endif
> diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
> index 9c6072a19745..985273832f89 100644
> --- a/tools/testing/selftests/bpf/prog_tests/verifier.c
> +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
> @@ -4,6 +4,7 @@
>  
>  #include "cap_helpers.h"
>  #include "verifier_and.skel.h"
> +#include "verifier_arena.skel.h"
>  #include "verifier_array_access.skel.h"
>  #include "verifier_basic_stack.skel.h"
>  #include "verifier_bitfield_write.skel.h"
> @@ -118,6 +119,7 @@ static void run_tests_aux(const char *skel_name,
>  #define RUN(skel) run_tests_aux(#skel, skel##__elf_bytes, NULL)
>  
>  void test_verifier_and(void)                  { RUN(verifier_and); }
> +void test_verifier_arena(void)                { RUN(verifier_arena); }
>  void test_verifier_basic_stack(void)          { RUN(verifier_basic_stack); }
>  void test_verifier_bitfield_write(void)       { RUN(verifier_bitfield_write); }
>  void test_verifier_bounds(void)               { RUN(verifier_bounds); }
> diff --git a/tools/testing/selftests/bpf/progs/verifier_arena.c b/tools/testing/selftests/bpf/progs/verifier_arena.c
> new file mode 100644
> index 000000000000..0e667132ef92
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/verifier_arena.c
> @@ -0,0 +1,91 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
> +
> +#include <vmlinux.h>
> +#include <bpf/bpf_helpers.h>
> +#include <bpf/bpf_tracing.h>
> +#include "bpf_misc.h"
> +#include "bpf_experimental.h"
> +#include "bpf_arena_common.h"
> +
> +struct {
> +	__uint(type, BPF_MAP_TYPE_ARENA);
> +	__uint(map_flags, BPF_F_MMAPABLE);
> +	__uint(max_entries, 2); /* arena of two pages close to 32-bit boundary*/
> +	__ulong(map_extra, (1ull << 44) | (~0u - __PAGE_SIZE * 2 + 1)); /* start of mmap() region */
> +} arena SEC(".maps");
> +
> +SEC("syscall")
> +__success __retval(0)
> +int basic_alloc1(void *ctx)
> +{
> +	volatile int __arena *page1, *page2, *no_page, *page3;
> +
> +	page1 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
> +	if (!page1)
> +		return 1;
> +	*page1 = 1;
> +	page2 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
> +	if (!page2)
> +		return 2;
> +	*page2 = 2;
> +	no_page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
> +	if (no_page)
> +		return 3;
> +	if (*page1 != 1)
> +		return 4;
> +	if (*page2 != 2)
> +		return 5;
> +	bpf_arena_free_pages(&arena, (void __arena *)page2, 1);
> +	if (*page1 != 1)
> +		return 6;
> +	if (*page2 != 0) /* use-after-free should return 0 */

So I know that longer term the plan is to leverage exceptions and have
us exit and unwind the program here, but I think it's somewhat important
to underscore how significant of a usability improvement that will be.
Reading 0 isn't terribly uncommon for typical scheduling use cases. For
example, if we stored a set of cpumasks in arena pages, we may AND them
together and not be concerned if there are no CPUs set as that would be
a perfectly normal state. E.g. if we're using arena cpumasks to track
idle cores and a task's allowed CPUs, and we AND them together and see
0.  We'd just assume there were no idle cores available on the system.
Another example would be scx_nest where we would incorrectly think that
a nest didn't have enough cores, seeing if a task could run in a domain,
etc.

Obviously it's way better for us to actually have arenas in the interim
so this is fine for now, but UAF bugs could potentially be pretty
painful until we get proper exception unwinding support.

Otherwise, in terms of usability, this looks really good. The only thing
to bear in mind is that I don't think we can fully get away from kptrs
that will have some duplicated logic compared to what we can enable in
an arena. For example, we will have to retain at least some of the
struct cpumask * kptrs for e.g. copying a struct task_struct's struct
cpumask *cpus_ptr field.

For now, we could iterate over the cpumask and manually set the bits, so
maybe even just supporting bpf_cpumask_test_cpu() would be enough
(though donig a bitmap_copy() would be better of course)? This is
probably fine for most use cases as we'd likely only be doing struct
cpumask * -> arena copies on slowpaths. But is there any kind of more
generalized integration we want to have between arenas and kptrs?  Not
sure, can't think of any off the top of my head.

> +		return 7;
> +	page3 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
> +	if (!page3)
> +		return 8;
> +	*page3 = 3;
> +	if (page2 != page3)
> +		return 9;
> +	if (*page1 != 1)
> +		return 10;

Should we also test doing a store after an arena has been freed?

> +	return 0;
> +}
Alexei Starovoitov Feb. 10, 2024, 4:35 a.m. UTC | #2
On Fri, Feb 9, 2024 at 3:14 PM David Vernet <void@manifault.com> wrote:
>
> > +
> > +#ifndef arena_container_of
>
> Why is this ifndef required if we have a pragma once above?

Just a habit to check for a macro before defining it.

> Obviously it's way better for us to actually have arenas in the interim
> so this is fine for now, but UAF bugs could potentially be pretty
> painful until we get proper exception unwinding support.

Detection that arena access faulted doesn't have to come after
exception unwinding. Exceptions vs cancellable progs are also different.
A record of the line in bpf prog that caused the first fault is probably
good enough for prog debugging.

> Otherwise, in terms of usability, this looks really good. The only thing
> to bear in mind is that I don't think we can fully get away from kptrs
> that will have some duplicated logic compared to what we can enable in
> an arena. For example, we will have to retain at least some of the
> struct cpumask * kptrs for e.g. copying a struct task_struct's struct
> cpumask *cpus_ptr field.

I think that's a bit orthogonal.
task->cpus_ptr is a trusted_ptr_to_btf_id access that can be mixed
within a program with arena access.

> For now, we could iterate over the cpumask and manually set the bits, so
> maybe even just supporting bpf_cpumask_test_cpu() would be enough
> (though donig a bitmap_copy() would be better of course)? This is
> probably fine for most use cases as we'd likely only be doing struct
> cpumask * -> arena copies on slowpaths. But is there any kind of more
> generalized integration we want to have between arenas and kptrs?  Not
> sure, can't think of any off the top of my head.

Hopefully we'll be able to invent a way to store kptr-s inside the arena,
but from a cpumask perspective bpf_cpumask_test_cpu() can be made
polymorphic to work with arena ptrs and kptrs.
Same with bpf_cpumask_and(). Mixed arguments can be allowed.
Args can be either kptr or ptr_to_arena.

I still believe that we can deprecate 'struct bpf_cpumask'.
The cpumask_t will stay, of course, but we won't need to
bpf_obj_new(bpf_cpumask) and carefully track refcnt.
The arena can do the same much faster.

>
> > +             return 7;
> > +     page3 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
> > +     if (!page3)
> > +             return 8;
> > +     *page3 = 3;
> > +     if (page2 != page3)
> > +             return 9;
> > +     if (*page1 != 1)
> > +             return 10;
>
> Should we also test doing a store after an arena has been freed?

You mean the whole bpf arena map was freed ?
I don't see how the verifier would allow that.
If you meant a few pages were freed from the arena then such a test is
already in the patches.
David Vernet Feb. 12, 2024, 4:48 p.m. UTC | #3
On Fri, Feb 09, 2024 at 08:35:01PM -0800, Alexei Starovoitov wrote:
> On Fri, Feb 9, 2024 at 3:14 PM David Vernet <void@manifault.com> wrote:
> >
> > > +
> > > +#ifndef arena_container_of
> >
> > Why is this ifndef required if we have a pragma once above?
> 
> Just a habit to check for a macro before defining it.
> 
> > Obviously it's way better for us to actually have arenas in the interim
> > so this is fine for now, but UAF bugs could potentially be pretty
> > painful until we get proper exception unwinding support.
> 
> Detection that arena access faulted doesn't have to come after
> exception unwinding. Exceptions vs cancellable progs are also different.
> A record of the line in bpf prog that caused the first fault is probably
> good enough for prog debugging.
> 
> > Otherwise, in terms of usability, this looks really good. The only thing
> > to bear in mind is that I don't think we can fully get away from kptrs
> > that will have some duplicated logic compared to what we can enable in
> > an arena. For example, we will have to retain at least some of the
> > struct cpumask * kptrs for e.g. copying a struct task_struct's struct
> > cpumask *cpus_ptr field.
> 
> I think that's a bit orthogonal.
> task->cpus_ptr is a trusted_ptr_to_btf_id access that can be mixed
> within a program with arena access.

I see, so the idea is that we'd just use regular accesses to query the
bits in that cpumask rather than kfuncs? Similar to how we e.g. read a
task field such as p->comm with a regular load? Ok, that should work.

> > For now, we could iterate over the cpumask and manually set the bits, so
> > maybe even just supporting bpf_cpumask_test_cpu() would be enough
> > (though donig a bitmap_copy() would be better of course)? This is
> > probably fine for most use cases as we'd likely only be doing struct
> > cpumask * -> arena copies on slowpaths. But is there any kind of more
> > generalized integration we want to have between arenas and kptrs?  Not
> > sure, can't think of any off the top of my head.
> 
> Hopefully we'll be able to invent a way to store kptr-s inside the arena,
> but from a cpumask perspective bpf_cpumask_test_cpu() can be made
> polymorphic to work with arena ptrs and kptrs.
> Same with bpf_cpumask_and(). Mixed arguments can be allowed.
> Args can be either kptr or ptr_to_arena.

This would be ideal. And to make sure I understand, many of these
wouldn't even be kfuncs, right? We'd just be doing loads on two
safe/trusted objects that were both pointers to a bitmap of size
NR_CPUS?

> I still believe that we can deprecate 'struct bpf_cpumask'.
> The cpumask_t will stay, of course, but we won't need to
> bpf_obj_new(bpf_cpumask) and carefully track refcnt.
> The arena can do the same much faster.

Yes, I agree. Any use of struct bpf_cpumask * can just be stored in an
arena, and any kfuncs where we were previously passing a struct
bpf_cpumask * could instead just take an arena cpumask, or be done
entirely using BPF instructions per your point above.

> > > +             return 7;
> > > +     page3 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
> > > +     if (!page3)
> > > +             return 8;
> > > +     *page3 = 3;
> > > +     if (page2 != page3)
> > > +             return 9;
> > > +     if (*page1 != 1)
> > > +             return 10;
> >
> > Should we also test doing a store after an arena has been freed?
> 
> You mean the whole bpf arena map was freed ?
> I don't see how the verifier would allow that.
> If you meant a few pages were freed from the arena then such a test is
> already in the patches.

I meant a negative test that verifies we fail to load a prog that does a
write to a freed arena pointer.
Alexei Starovoitov Feb. 13, 2024, 11:19 p.m. UTC | #4
On Fri, Feb 9, 2024 at 11:03 PM Kumar Kartikeya Dwivedi
<memxor@gmail.com> wrote:
>
> On Sat, 10 Feb 2024 at 05:35, Alexei Starovoitov
> <alexei.starovoitov@gmail.com> wrote:
> >
> > On Fri, Feb 9, 2024 at 3:14 PM David Vernet <void@manifault.com> wrote:
> > >
> > > > +
> > > > +#ifndef arena_container_of
> > >
> > > Why is this ifndef required if we have a pragma once above?
> >
> > Just a habit to check for a macro before defining it.
> >
> > > Obviously it's way better for us to actually have arenas in the interim
> > > so this is fine for now, but UAF bugs could potentially be pretty
> > > painful until we get proper exception unwinding support.
> >
> > Detection that arena access faulted doesn't have to come after
> > exception unwinding. Exceptions vs cancellable progs are also different.
>
> What do you mean exactly by 'cancellable progs'? That they can be
> interrupted at any (or well-known) points and stopped? I believe
> whatever plumbing was done to enable exceptions will be useful there
> as well. The verifier would just need to know e.g. that a load into
> PTR_TO_ARENA may fault, and thus generate descriptors for all frames
> for that pc. Then, at runtime, you could technically release all
> resources by looking up the frame descriptor and unwind the stack and
> return back to the caller of the prog.

I don't think it's a scalable approach.
I'm still trying to understand your exceptions part 2 series,
but from what I understand so far the scalability is a real concern.

>
> > A record of the line in bpf prog that caused the first fault is probably
> > good enough for prog debugging.
> >
>
> I think it would make more sense to abort the program by default,
> because use-after-free in the arena most certainly means a bug in the
> program.

yes, but aborting vs safe continue and remember the first wrong access
from debuggability pov is the same thing.
aborting by itself also doesn't mean that the prog is auto-detached.
It may run again a split second later and won't hit abort condition.

Recording of first wrong access (either abort or pf in arena) is
must have regardless.
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64
index 5c2cc7e8c5d0..8e70af386e52 100644
--- a/tools/testing/selftests/bpf/DENYLIST.aarch64
+++ b/tools/testing/selftests/bpf/DENYLIST.aarch64
@@ -11,3 +11,4 @@  fill_link_info/kprobe_multi_link_info            # bpf_program__attach_kprobe_mu
 fill_link_info/kretprobe_multi_link_info         # bpf_program__attach_kprobe_multi_opts unexpected error: -95
 fill_link_info/kprobe_multi_invalid_ubuff        # bpf_program__attach_kprobe_multi_opts unexpected error: -95
 missed/kprobe_recursion                          # missed_kprobe_recursion__attach unexpected error: -95 (errno 95)
+verifier_arena                                   # JIT does not support arena
diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index 1a63996c0304..ded440277f6e 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -3,3 +3,4 @@ 
 exceptions				 # JIT does not support calling kfunc bpf_throw				       (exceptions)
 get_stack_raw_tp                         # user_stack corrupted user stack                                             (no backchain userspace)
 stacktrace_build_id                      # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2                   (?)
+verifier_arena                           # JIT does not support arena
diff --git a/tools/testing/selftests/bpf/bpf_arena_common.h b/tools/testing/selftests/bpf/bpf_arena_common.h
new file mode 100644
index 000000000000..07849d502f40
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_arena_common.h
@@ -0,0 +1,70 @@ 
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#pragma once
+
+#ifndef WRITE_ONCE
+#define WRITE_ONCE(x, val) ((*(volatile typeof(x) *) &(x)) = (val))
+#endif
+
+#ifndef NUMA_NO_NODE
+#define	NUMA_NO_NODE	(-1)
+#endif
+
+#ifndef arena_container_of
+#define arena_container_of(ptr, type, member)			\
+	({							\
+		void __arena *__mptr = (void __arena *)(ptr);	\
+		((type *)(__mptr - offsetof(type, member)));	\
+	})
+#endif
+
+#ifdef __BPF__ /* when compiled as bpf program */
+
+#ifndef PAGE_SIZE
+#define PAGE_SIZE __PAGE_SIZE
+/*
+ * for older kernels try sizeof(struct genradix_node)
+ * or flexible:
+ * static inline long __bpf_page_size(void) {
+ *   return bpf_core_enum_value(enum page_size_enum___l, __PAGE_SIZE___l) ?: sizeof(struct genradix_node);
+ * }
+ * but generated code is not great.
+ */
+#endif
+
+#if defined(__BPF_FEATURE_ARENA_CAST) && !defined(BPF_ARENA_FORCE_ASM)
+#define __arena __attribute__((address_space(1)))
+#define cast_kern(ptr) /* nop for bpf prog. emitted by LLVM */
+#define cast_user(ptr) /* nop for bpf prog. emitted by LLVM */
+#else
+#define __arena
+#define cast_kern(ptr) bpf_arena_cast(ptr, BPF_ARENA_CAST_KERN, 1)
+#define cast_user(ptr) bpf_arena_cast(ptr, BPF_ARENA_CAST_USER, 1)
+#endif
+
+void __arena* bpf_arena_alloc_pages(void *map, void __arena *addr, __u32 page_cnt,
+				    int node_id, __u64 flags) __ksym __weak;
+void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt) __ksym __weak;
+
+#else /* when compiled as user space code */
+
+#define __arena
+#define __arg_arena
+#define cast_kern(ptr) /* nop for user space */
+#define cast_user(ptr) /* nop for user space */
+__weak char arena[1];
+
+#ifndef offsetof
+#define offsetof(type, member)  ((unsigned long)&((type *)0)->member)
+#endif
+
+static inline void __arena* bpf_arena_alloc_pages(void *map, void *addr, __u32 page_cnt,
+						  int node_id, __u64 flags)
+{
+	return NULL;
+}
+static inline void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt)
+{
+}
+
+#endif
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
index 9c6072a19745..985273832f89 100644
--- a/tools/testing/selftests/bpf/prog_tests/verifier.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -4,6 +4,7 @@ 
 
 #include "cap_helpers.h"
 #include "verifier_and.skel.h"
+#include "verifier_arena.skel.h"
 #include "verifier_array_access.skel.h"
 #include "verifier_basic_stack.skel.h"
 #include "verifier_bitfield_write.skel.h"
@@ -118,6 +119,7 @@  static void run_tests_aux(const char *skel_name,
 #define RUN(skel) run_tests_aux(#skel, skel##__elf_bytes, NULL)
 
 void test_verifier_and(void)                  { RUN(verifier_and); }
+void test_verifier_arena(void)                { RUN(verifier_arena); }
 void test_verifier_basic_stack(void)          { RUN(verifier_basic_stack); }
 void test_verifier_bitfield_write(void)       { RUN(verifier_bitfield_write); }
 void test_verifier_bounds(void)               { RUN(verifier_bounds); }
diff --git a/tools/testing/selftests/bpf/progs/verifier_arena.c b/tools/testing/selftests/bpf/progs/verifier_arena.c
new file mode 100644
index 000000000000..0e667132ef92
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_arena.c
@@ -0,0 +1,91 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+#include "bpf_arena_common.h"
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARENA);
+	__uint(map_flags, BPF_F_MMAPABLE);
+	__uint(max_entries, 2); /* arena of two pages close to 32-bit boundary*/
+	__ulong(map_extra, (1ull << 44) | (~0u - __PAGE_SIZE * 2 + 1)); /* start of mmap() region */
+} arena SEC(".maps");
+
+SEC("syscall")
+__success __retval(0)
+int basic_alloc1(void *ctx)
+{
+	volatile int __arena *page1, *page2, *no_page, *page3;
+
+	page1 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+	if (!page1)
+		return 1;
+	*page1 = 1;
+	page2 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+	if (!page2)
+		return 2;
+	*page2 = 2;
+	no_page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+	if (no_page)
+		return 3;
+	if (*page1 != 1)
+		return 4;
+	if (*page2 != 2)
+		return 5;
+	bpf_arena_free_pages(&arena, (void __arena *)page2, 1);
+	if (*page1 != 1)
+		return 6;
+	if (*page2 != 0) /* use-after-free should return 0 */
+		return 7;
+	page3 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+	if (!page3)
+		return 8;
+	*page3 = 3;
+	if (page2 != page3)
+		return 9;
+	if (*page1 != 1)
+		return 10;
+	return 0;
+}
+
+SEC("syscall")
+__success __retval(0)
+int basic_alloc2(void *ctx)
+{
+	volatile char __arena *page1, *page2, *page3, *page4;
+
+	page1 = bpf_arena_alloc_pages(&arena, NULL, 2, NUMA_NO_NODE, 0);
+	if (!page1)
+		return 1;
+	page2 = page1 + __PAGE_SIZE;
+	page3 = page1 + __PAGE_SIZE * 2;
+	page4 = page1 - __PAGE_SIZE;
+	*page1 = 1;
+	*page2 = 2;
+	*page3 = 3;
+	*page4 = 4;
+	if (*page1 != 1)
+		return 1;
+	if (*page2 != 2)
+		return 2;
+	if (*page3 != 0)
+		return 3;
+	if (*page4 != 0)
+		return 4;
+	bpf_arena_free_pages(&arena, (void __arena *)page1, 2);
+	if (*page1 != 0)
+		return 5;
+	if (*page2 != 0)
+		return 6;
+	if (*page3 != 0)
+		return 7;
+	if (*page4 != 0)
+		return 8;
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c
index ba57601c2a4d..524c38e9cde4 100644
--- a/tools/testing/selftests/bpf/test_loader.c
+++ b/tools/testing/selftests/bpf/test_loader.c
@@ -501,7 +501,7 @@  static bool is_unpriv_capable_map(struct bpf_map *map)
 	}
 }
 
-static int do_prog_test_run(int fd_prog, int *retval)
+static int do_prog_test_run(int fd_prog, int *retval, bool empty_opts)
 {
 	__u8 tmp_out[TEST_DATA_LEN << 2] = {};
 	__u8 tmp_in[TEST_DATA_LEN] = {};
@@ -514,6 +514,10 @@  static int do_prog_test_run(int fd_prog, int *retval)
 		.repeat = 1,
 	);
 
+	if (empty_opts) {
+		memset(&topts, 0, sizeof(struct bpf_test_run_opts));
+		topts.sz = sizeof(struct bpf_test_run_opts);
+	}
 	err = bpf_prog_test_run_opts(fd_prog, &topts);
 	saved_errno = errno;
 
@@ -649,7 +653,8 @@  void run_subtest(struct test_loader *tester,
 			}
 		}
 
-		do_prog_test_run(bpf_program__fd(tprog), &retval);
+		do_prog_test_run(bpf_program__fd(tprog), &retval,
+				 bpf_program__type(tprog) == BPF_PROG_TYPE_SYSCALL ? true : false);
 		if (retval != subspec->retval && subspec->retval != POINTER_VALUE) {
 			PRINT_FAIL("Unexpected retval: %d != %d\n", retval, subspec->retval);
 			goto tobj_cleanup;