diff mbox series

[v2,bpf-next,9/9] selftests/bpf: Add refcounted_kptr tests

Message ID 20230415201811.343116-10-davemarchevsky@fb.com (mailing list archive)
State Accepted
Commit 6147f15131e2df544a5449815f456da48c0c88e7
Delegated to: BPF
Headers show
Series Shared ownership for local kptrs | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-7 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-2 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-5 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-31 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-32 success Logs for test_verifier on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-34 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-35 success Logs for test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-8 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for test_maps on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-11 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-13 success Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for test_progs on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-16 fail Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-17 fail Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-18 success Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for test_progs_no_alu32 on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-21 fail Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 fail Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-23 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for test_progs_no_alu32_parallel on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-25 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-27 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for test_progs_parallel on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-29 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for test_progs_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-33 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-next-PR fail PR summary
bpf/vmtest-bpf-next-VM_Test-10 success Logs for test_maps on s390x with gcc
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 18 this patch: 18
netdev/cc_maintainers warning 11 maintainers not CCed: mykolal@fb.com song@kernel.org shuah@kernel.org sdf@google.com haoluo@google.com yhs@fb.com john.fastabend@gmail.com kpsingh@kernel.org jolsa@kernel.org martin.lau@linux.dev linux-kselftest@vger.kernel.org
netdev/build_clang success Errors and warnings before: 18 this patch: 18
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 18 this patch: 18
netdev/checkpatch fail CHECK: Macro argument 'read_root' may be better as '(read_root)' to avoid precedence issues CHECK: spaces preferred around that '*' (ctx:WxV) ERROR: Macros with complex values should be enclosed in parentheses WARNING: Prefer __aligned(8) over __attribute__((aligned(8))) WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? WARNING: line length of 82 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 99 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Dave Marchevsky April 15, 2023, 8:18 p.m. UTC
Test refcounted local kptr functionality added in previous patches in
the series.

Usecases which pass verification:

* Add refcounted local kptr to both tree and list. Then, read and -
  possibly, depending on test variant - delete from tree, then list.
  * Also test doing read-and-maybe-delete in opposite order
* Stash a refcounted local kptr in a map_value, then add it to a
  rbtree. Read from both, possibly deleting after tree read.
* Add refcounted local kptr to both tree and list. Then, try reading and
  deleting twice from one of the collections.
* bpf_refcount_acquire of just-added non-owning ref should work, as
  should bpf_refcount_acquire of owning ref just out of bpf_obj_new

Usecases which fail verification:

* The simple successful bpf_refcount_acquire cases from above should
  both fail to verify if the newly-acquired owning ref is not dropped

Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
---
 .../bpf/prog_tests/refcounted_kptr.c          |  18 +
 .../selftests/bpf/progs/refcounted_kptr.c     | 406 ++++++++++++++++++
 .../bpf/progs/refcounted_kptr_fail.c          |  72 ++++
 3 files changed, 496 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
 create mode 100644 tools/testing/selftests/bpf/progs/refcounted_kptr.c
 create mode 100644 tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c

Comments

Kumar Kartikeya Dwivedi April 21, 2023, 10:17 p.m. UTC | #1
On Sat, Apr 15, 2023 at 10:18:11PM CEST, Dave Marchevsky wrote:
> Test refcounted local kptr functionality added in previous patches in
> the series.
>
> Usecases which pass verification:
>
> * Add refcounted local kptr to both tree and list. Then, read and -
>   possibly, depending on test variant - delete from tree, then list.
>   * Also test doing read-and-maybe-delete in opposite order
> * Stash a refcounted local kptr in a map_value, then add it to a
>   rbtree. Read from both, possibly deleting after tree read.
> * Add refcounted local kptr to both tree and list. Then, try reading and
>   deleting twice from one of the collections.
> * bpf_refcount_acquire of just-added non-owning ref should work, as
>   should bpf_refcount_acquire of owning ref just out of bpf_obj_new
>
> Usecases which fail verification:
>
> * The simple successful bpf_refcount_acquire cases from above should
>   both fail to verify if the newly-acquired owning ref is not dropped
>
> Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
> ---
> [...]
> +SEC("?tc")
> +__failure __msg("Unreleased reference id=3 alloc_insn=21")
> +long rbtree_refcounted_node_ref_escapes(void *ctx)
> +{
> +	struct node_acquire *n, *m;
> +
> +	n = bpf_obj_new(typeof(*n));
> +	if (!n)
> +		return 1;
> +
> +	bpf_spin_lock(&glock);
> +	bpf_rbtree_add(&groot, &n->node, less);
> +	/* m becomes an owning ref but is never drop'd or added to a tree */
> +	m = bpf_refcount_acquire(n);

I am analyzing the set (and I'll reply in detail to the cover letter), but this
stood out.

Isn't this going to be problematic if n has refcount == 1 and is dropped
internally by bpf_rbtree_add? Are we sure this can never occur? It took me some
time, but the following schedule seems problematic.

CPU 0					CPU 1
n = bpf_obj_new
lock(lock1)
bpf_rbtree_add(rbtree1, n)
m = bpf_rbtree_acquire(n)
unlock(lock1)

kptr_xchg(map, m) // move to map
// at this point, refcount = 2
					m = kptr_xchg(map, NULL)
					lock(lock2)
lock(lock1)				bpf_rbtree_add(rbtree2, m)
p = bpf_rbtree_first(rbtree1)			if (!RB_EMPTY_NODE) bpf_obj_drop_impl(m) // A
bpf_rbtree_remove(rbtree1, p)
unlock(lock1)
bpf_obj_drop(p) // B
					bpf_refcount_acquire(m) // use-after-free
					...

B will decrement refcount from 1 to 0, after which bpf_refcount_acquire is
basically performing a use-after-free (when fortunate, one will get a
WARN_ON_ONCE splat for 0 to 1, otherwise, a silent refcount raise for some
different object).
Alexei Starovoitov April 21, 2023, 11:49 p.m. UTC | #2
On Sat, Apr 22, 2023 at 12:17:47AM +0200, Kumar Kartikeya Dwivedi wrote:
> On Sat, Apr 15, 2023 at 10:18:11PM CEST, Dave Marchevsky wrote:
> > Test refcounted local kptr functionality added in previous patches in
> > the series.
> >
> > Usecases which pass verification:
> >
> > * Add refcounted local kptr to both tree and list. Then, read and -
> >   possibly, depending on test variant - delete from tree, then list.
> >   * Also test doing read-and-maybe-delete in opposite order
> > * Stash a refcounted local kptr in a map_value, then add it to a
> >   rbtree. Read from both, possibly deleting after tree read.
> > * Add refcounted local kptr to both tree and list. Then, try reading and
> >   deleting twice from one of the collections.
> > * bpf_refcount_acquire of just-added non-owning ref should work, as
> >   should bpf_refcount_acquire of owning ref just out of bpf_obj_new
> >
> > Usecases which fail verification:
> >
> > * The simple successful bpf_refcount_acquire cases from above should
> >   both fail to verify if the newly-acquired owning ref is not dropped
> >
> > Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
> > ---
> > [...]
> > +SEC("?tc")
> > +__failure __msg("Unreleased reference id=3 alloc_insn=21")
> > +long rbtree_refcounted_node_ref_escapes(void *ctx)
> > +{
> > +	struct node_acquire *n, *m;
> > +
> > +	n = bpf_obj_new(typeof(*n));
> > +	if (!n)
> > +		return 1;
> > +
> > +	bpf_spin_lock(&glock);
> > +	bpf_rbtree_add(&groot, &n->node, less);
> > +	/* m becomes an owning ref but is never drop'd or added to a tree */
> > +	m = bpf_refcount_acquire(n);
> 
> I am analyzing the set (and I'll reply in detail to the cover letter), but this
> stood out.
> 
> Isn't this going to be problematic if n has refcount == 1 and is dropped
> internally by bpf_rbtree_add? Are we sure this can never occur? It took me some
> time, but the following schedule seems problematic.
> 
> CPU 0					CPU 1
> n = bpf_obj_new
> lock(lock1)
> bpf_rbtree_add(rbtree1, n)
> m = bpf_rbtree_acquire(n)
> unlock(lock1)
> 
> kptr_xchg(map, m) // move to map
> // at this point, refcount = 2
> 					m = kptr_xchg(map, NULL)
> 					lock(lock2)
> lock(lock1)				bpf_rbtree_add(rbtree2, m)
> p = bpf_rbtree_first(rbtree1)			if (!RB_EMPTY_NODE) bpf_obj_drop_impl(m) // A
> bpf_rbtree_remove(rbtree1, p)
> unlock(lock1)
> bpf_obj_drop(p) // B

You probably meant:
p2 = bpf_rbtree_remove(rbtree1, p)
unlock(lock1)
if (p2)
  bpf_obj_drop(p2)

> 					bpf_refcount_acquire(m) // use-after-free
> 					...
> 
> B will decrement refcount from 1 to 0, after which bpf_refcount_acquire is
> basically performing a use-after-free (when fortunate, one will get a
> WARN_ON_ONCE splat for 0 to 1, otherwise, a silent refcount raise for some
> different object).

As discussed earlier we'll be switching all bpf_obj_new to use BPF_MA_REUSE_AFTER_RCU_GP.

and to adress 0->1 transition.. it does look like we need to two flavors of bpf_refcount_acquire.
One of owned refs and another for non-owned.
The owned bpf_refcount_acquire() can stay KF_ACQUIRE with refcount_inc,
while bpf_refcount_acquire() for non-own will use KF_ACQUIRE | KF_RET_NULL and refcount_inc_not_zero.
The bpf prog can use bpf_refcount_acquire everywhere and the verifier will treat it on the spot
differently depending on the argument.
So the code:
n = bpf_obj_new();
if (!n) ...;
m = bpf_refcount_acquire(n);
doesn't need to check if (!m).
Kumar Kartikeya Dwivedi April 22, 2023, 2:06 a.m. UTC | #3
On Sat, Apr 22, 2023 at 01:49:08AM CEST, Alexei Starovoitov wrote:
> On Sat, Apr 22, 2023 at 12:17:47AM +0200, Kumar Kartikeya Dwivedi wrote:
> > On Sat, Apr 15, 2023 at 10:18:11PM CEST, Dave Marchevsky wrote:
> > > Test refcounted local kptr functionality added in previous patches in
> > > the series.
> > >
> > > Usecases which pass verification:
> > >
> > > * Add refcounted local kptr to both tree and list. Then, read and -
> > >   possibly, depending on test variant - delete from tree, then list.
> > >   * Also test doing read-and-maybe-delete in opposite order
> > > * Stash a refcounted local kptr in a map_value, then add it to a
> > >   rbtree. Read from both, possibly deleting after tree read.
> > > * Add refcounted local kptr to both tree and list. Then, try reading and
> > >   deleting twice from one of the collections.
> > > * bpf_refcount_acquire of just-added non-owning ref should work, as
> > >   should bpf_refcount_acquire of owning ref just out of bpf_obj_new
> > >
> > > Usecases which fail verification:
> > >
> > > * The simple successful bpf_refcount_acquire cases from above should
> > >   both fail to verify if the newly-acquired owning ref is not dropped
> > >
> > > Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
> > > ---
> > > [...]
> > > +SEC("?tc")
> > > +__failure __msg("Unreleased reference id=3 alloc_insn=21")
> > > +long rbtree_refcounted_node_ref_escapes(void *ctx)
> > > +{
> > > +	struct node_acquire *n, *m;
> > > +
> > > +	n = bpf_obj_new(typeof(*n));
> > > +	if (!n)
> > > +		return 1;
> > > +
> > > +	bpf_spin_lock(&glock);
> > > +	bpf_rbtree_add(&groot, &n->node, less);
> > > +	/* m becomes an owning ref but is never drop'd or added to a tree */
> > > +	m = bpf_refcount_acquire(n);
> >
> > I am analyzing the set (and I'll reply in detail to the cover letter), but this
> > stood out.
> >
> > Isn't this going to be problematic if n has refcount == 1 and is dropped
> > internally by bpf_rbtree_add? Are we sure this can never occur? It took me some
> > time, but the following schedule seems problematic.
> >
> > CPU 0					CPU 1
> > n = bpf_obj_new
> > lock(lock1)
> > bpf_rbtree_add(rbtree1, n)
> > m = bpf_rbtree_acquire(n)
> > unlock(lock1)
> >
> > kptr_xchg(map, m) // move to map
> > // at this point, refcount = 2
> > 					m = kptr_xchg(map, NULL)
> > 					lock(lock2)
> > lock(lock1)				bpf_rbtree_add(rbtree2, m)
> > p = bpf_rbtree_first(rbtree1)			if (!RB_EMPTY_NODE) bpf_obj_drop_impl(m) // A
> > bpf_rbtree_remove(rbtree1, p)
> > unlock(lock1)
> > bpf_obj_drop(p) // B
>
> You probably meant:
> p2 = bpf_rbtree_remove(rbtree1, p)
> unlock(lock1)
> if (p2)
>   bpf_obj_drop(p2)
>
> > 					bpf_refcount_acquire(m) // use-after-free
> > 					...
> >
> > B will decrement refcount from 1 to 0, after which bpf_refcount_acquire is
> > basically performing a use-after-free (when fortunate, one will get a
> > WARN_ON_ONCE splat for 0 to 1, otherwise, a silent refcount raise for some
> > different object).
>
> As discussed earlier we'll be switching all bpf_obj_new to use BPF_MA_REUSE_AFTER_RCU_GP

I probably missed that thread. In that case, is use of this stuff going to
require bpf_rcu_read_lock in sleepable programs?

>
> and to adress 0->1 transition.. it does look like we need to two flavors of bpf_refcount_acquire.
> One of owned refs and another for non-owned.
> The owned bpf_refcount_acquire() can stay KF_ACQUIRE with refcount_inc,
> while bpf_refcount_acquire() for non-own will use KF_ACQUIRE | KF_RET_NULL and refcount_inc_not_zero.
> The bpf prog can use bpf_refcount_acquire everywhere and the verifier will treat it on the spot
> differently depending on the argument.
> So the code:
> n = bpf_obj_new();
> if (!n) ...;
> m = bpf_refcount_acquire(n);
> doesn't need to check if (!m).

If memory reuse is prevented, than indeed the above should fix the problem.
Though it might be a bit surprising if a pointer from the same helper has to be
null checked in one context, and not in another. Though that's just a minor
point.
Alexei Starovoitov April 22, 2023, 2:18 a.m. UTC | #4
On Fri, Apr 21, 2023 at 7:06 PM Kumar Kartikeya Dwivedi
<memxor@gmail.com> wrote:
>
> On Sat, Apr 22, 2023 at 01:49:08AM CEST, Alexei Starovoitov wrote:
> > On Sat, Apr 22, 2023 at 12:17:47AM +0200, Kumar Kartikeya Dwivedi wrote:
> > > On Sat, Apr 15, 2023 at 10:18:11PM CEST, Dave Marchevsky wrote:
> > > > Test refcounted local kptr functionality added in previous patches in
> > > > the series.
> > > >
> > > > Usecases which pass verification:
> > > >
> > > > * Add refcounted local kptr to both tree and list. Then, read and -
> > > >   possibly, depending on test variant - delete from tree, then list.
> > > >   * Also test doing read-and-maybe-delete in opposite order
> > > > * Stash a refcounted local kptr in a map_value, then add it to a
> > > >   rbtree. Read from both, possibly deleting after tree read.
> > > > * Add refcounted local kptr to both tree and list. Then, try reading and
> > > >   deleting twice from one of the collections.
> > > > * bpf_refcount_acquire of just-added non-owning ref should work, as
> > > >   should bpf_refcount_acquire of owning ref just out of bpf_obj_new
> > > >
> > > > Usecases which fail verification:
> > > >
> > > > * The simple successful bpf_refcount_acquire cases from above should
> > > >   both fail to verify if the newly-acquired owning ref is not dropped
> > > >
> > > > Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
> > > > ---
> > > > [...]
> > > > +SEC("?tc")
> > > > +__failure __msg("Unreleased reference id=3 alloc_insn=21")
> > > > +long rbtree_refcounted_node_ref_escapes(void *ctx)
> > > > +{
> > > > + struct node_acquire *n, *m;
> > > > +
> > > > + n = bpf_obj_new(typeof(*n));
> > > > + if (!n)
> > > > +         return 1;
> > > > +
> > > > + bpf_spin_lock(&glock);
> > > > + bpf_rbtree_add(&groot, &n->node, less);
> > > > + /* m becomes an owning ref but is never drop'd or added to a tree */
> > > > + m = bpf_refcount_acquire(n);
> > >
> > > I am analyzing the set (and I'll reply in detail to the cover letter), but this
> > > stood out.
> > >
> > > Isn't this going to be problematic if n has refcount == 1 and is dropped
> > > internally by bpf_rbtree_add? Are we sure this can never occur? It took me some
> > > time, but the following schedule seems problematic.
> > >
> > > CPU 0                                       CPU 1
> > > n = bpf_obj_new
> > > lock(lock1)
> > > bpf_rbtree_add(rbtree1, n)
> > > m = bpf_rbtree_acquire(n)
> > > unlock(lock1)
> > >
> > > kptr_xchg(map, m) // move to map
> > > // at this point, refcount = 2
> > >                                     m = kptr_xchg(map, NULL)
> > >                                     lock(lock2)
> > > lock(lock1)                         bpf_rbtree_add(rbtree2, m)
> > > p = bpf_rbtree_first(rbtree1)                       if (!RB_EMPTY_NODE) bpf_obj_drop_impl(m) // A
> > > bpf_rbtree_remove(rbtree1, p)
> > > unlock(lock1)
> > > bpf_obj_drop(p) // B
> >
> > You probably meant:
> > p2 = bpf_rbtree_remove(rbtree1, p)
> > unlock(lock1)
> > if (p2)
> >   bpf_obj_drop(p2)
> >
> > >                                     bpf_refcount_acquire(m) // use-after-free
> > >                                     ...
> > >
> > > B will decrement refcount from 1 to 0, after which bpf_refcount_acquire is
> > > basically performing a use-after-free (when fortunate, one will get a
> > > WARN_ON_ONCE splat for 0 to 1, otherwise, a silent refcount raise for some
> > > different object).
> >
> > As discussed earlier we'll be switching all bpf_obj_new to use BPF_MA_REUSE_AFTER_RCU_GP
>
> I probably missed that thread. In that case, is use of this stuff going to
> require bpf_rcu_read_lock in sleepable programs?

Correct.
We've refactored rcu enforcement.
We also decided to stop waiting for gcc btf_tag support to land and
went with allowlisting BTF_TYPE_SAFE_RCU[_OR_NULL] for fields that
we need to use now.
Overall it allowed us to get rid of kptr_get.
Dave Marchevsky April 25, 2023, 6:53 a.m. UTC | #5
On 4/21/23 6:17 PM, Kumar Kartikeya Dwivedi wrote:
> On Sat, Apr 15, 2023 at 10:18:11PM CEST, Dave Marchevsky wrote:
>> Test refcounted local kptr functionality added in previous patches in
>> the series.
>>
>> Usecases which pass verification:
>>
>> * Add refcounted local kptr to both tree and list. Then, read and -
>>   possibly, depending on test variant - delete from tree, then list.
>>   * Also test doing read-and-maybe-delete in opposite order
>> * Stash a refcounted local kptr in a map_value, then add it to a
>>   rbtree. Read from both, possibly deleting after tree read.
>> * Add refcounted local kptr to both tree and list. Then, try reading and
>>   deleting twice from one of the collections.
>> * bpf_refcount_acquire of just-added non-owning ref should work, as
>>   should bpf_refcount_acquire of owning ref just out of bpf_obj_new
>>
>> Usecases which fail verification:
>>
>> * The simple successful bpf_refcount_acquire cases from above should
>>   both fail to verify if the newly-acquired owning ref is not dropped
>>
>> Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
>> ---
>> [...]
>> +SEC("?tc")
>> +__failure __msg("Unreleased reference id=3 alloc_insn=21")
>> +long rbtree_refcounted_node_ref_escapes(void *ctx)
>> +{
>> +	struct node_acquire *n, *m;
>> +
>> +	n = bpf_obj_new(typeof(*n));
>> +	if (!n)
>> +		return 1;
>> +
>> +	bpf_spin_lock(&glock);
>> +	bpf_rbtree_add(&groot, &n->node, less);
>> +	/* m becomes an owning ref but is never drop'd or added to a tree */
>> +	m = bpf_refcount_acquire(n);
> 
> I am analyzing the set (and I'll reply in detail to the cover letter), but this
> stood out.
> 
> Isn't this going to be problematic if n has refcount == 1 and is dropped
> internally by bpf_rbtree_add? Are we sure this can never occur? It took me some
> time, but the following schedule seems problematic.
> 
> CPU 0					CPU 1
> n = bpf_obj_new
> lock(lock1)
> bpf_rbtree_add(rbtree1, n)
> m = bpf_rbtree_acquire(n)
> unlock(lock1)
> 
> kptr_xchg(map, m) // move to map
> // at this point, refcount = 2
> 					m = kptr_xchg(map, NULL)
> 					lock(lock2)
> lock(lock1)				bpf_rbtree_add(rbtree2, m)
> p = bpf_rbtree_first(rbtree1)			if (!RB_EMPTY_NODE) bpf_obj_drop_impl(m) // A
> bpf_rbtree_remove(rbtree1, p)
> unlock(lock1)
> bpf_obj_drop(p) // B
> 					bpf_refcount_acquire(m) // use-after-free
> 					...
> 
> B will decrement refcount from 1 to 0, after which bpf_refcount_acquire is
> basically performing a use-after-free (when fortunate, one will get a
> WARN_ON_ONCE splat for 0 to 1, otherwise, a silent refcount raise for some
> different object).

Thanks for the detailed feedback here and in the other thread in the series.
I will address the issues you raised ASAP, starting with this one, which I've
confirmed via a repro selftest. Will be sending fixes soon.
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
new file mode 100644
index 000000000000..2ab23832062d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
@@ -0,0 +1,18 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "refcounted_kptr.skel.h"
+#include "refcounted_kptr_fail.skel.h"
+
+void test_refcounted_kptr(void)
+{
+	RUN_TESTS(refcounted_kptr);
+}
+
+void test_refcounted_kptr_fail(void)
+{
+	RUN_TESTS(refcounted_kptr_fail);
+}
diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr.c b/tools/testing/selftests/bpf/progs/refcounted_kptr.c
new file mode 100644
index 000000000000..1d348a225140
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/refcounted_kptr.c
@@ -0,0 +1,406 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+struct node_data {
+	long key;
+	long list_data;
+	struct bpf_rb_node r;
+	struct bpf_list_node l;
+	struct bpf_refcount ref;
+};
+
+struct map_value {
+	struct node_data __kptr *node;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, int);
+	__type(value, struct map_value);
+	__uint(max_entries, 1);
+} stashed_nodes SEC(".maps");
+
+struct node_acquire {
+	long key;
+	long data;
+	struct bpf_rb_node node;
+	struct bpf_refcount refcount;
+};
+
+#define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8)))
+private(A) struct bpf_spin_lock lock;
+private(A) struct bpf_rb_root root __contains(node_data, r);
+private(A) struct bpf_list_head head __contains(node_data, l);
+
+private(B) struct bpf_spin_lock alock;
+private(B) struct bpf_rb_root aroot __contains(node_acquire, node);
+
+static bool less(struct bpf_rb_node *node_a, const struct bpf_rb_node *node_b)
+{
+	struct node_data *a;
+	struct node_data *b;
+
+	a = container_of(node_a, struct node_data, r);
+	b = container_of(node_b, struct node_data, r);
+
+	return a->key < b->key;
+}
+
+static bool less_a(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+	struct node_acquire *node_a;
+	struct node_acquire *node_b;
+
+	node_a = container_of(a, struct node_acquire, node);
+	node_b = container_of(b, struct node_acquire, node);
+
+	return node_a->key < node_b->key;
+}
+
+static long __insert_in_tree_and_list(struct bpf_list_head *head,
+				      struct bpf_rb_root *root,
+				      struct bpf_spin_lock *lock)
+{
+	struct node_data *n, *m;
+
+	n = bpf_obj_new(typeof(*n));
+	if (!n)
+		return -1;
+
+	m = bpf_refcount_acquire(n);
+	m->key = 123;
+	m->list_data = 456;
+
+	bpf_spin_lock(lock);
+	if (bpf_rbtree_add(root, &n->r, less)) {
+		/* Failure to insert - unexpected */
+		bpf_spin_unlock(lock);
+		bpf_obj_drop(m);
+		return -2;
+	}
+	bpf_spin_unlock(lock);
+
+	bpf_spin_lock(lock);
+	if (bpf_list_push_front(head, &m->l)) {
+		/* Failure to insert - unexpected */
+		bpf_spin_unlock(lock);
+		return -3;
+	}
+	bpf_spin_unlock(lock);
+	return 0;
+}
+
+static long __stash_map_insert_tree(int idx, int val, struct bpf_rb_root *root,
+				    struct bpf_spin_lock *lock)
+{
+	struct map_value *mapval;
+	struct node_data *n, *m;
+
+	mapval = bpf_map_lookup_elem(&stashed_nodes, &idx);
+	if (!mapval)
+		return -1;
+
+	n = bpf_obj_new(typeof(*n));
+	if (!n)
+		return -2;
+
+	n->key = val;
+	m = bpf_refcount_acquire(n);
+
+	n = bpf_kptr_xchg(&mapval->node, n);
+	if (n) {
+		bpf_obj_drop(n);
+		bpf_obj_drop(m);
+		return -3;
+	}
+
+	bpf_spin_lock(lock);
+	if (bpf_rbtree_add(root, &m->r, less)) {
+		/* Failure to insert - unexpected */
+		bpf_spin_unlock(lock);
+		return -4;
+	}
+	bpf_spin_unlock(lock);
+	return 0;
+}
+
+static long __read_from_tree(struct bpf_rb_root *root,
+			     struct bpf_spin_lock *lock,
+			     bool remove_from_tree)
+{
+	struct bpf_rb_node *rb;
+	struct node_data *n;
+	long res = -99;
+
+	bpf_spin_lock(lock);
+
+	rb = bpf_rbtree_first(root);
+	if (!rb) {
+		bpf_spin_unlock(lock);
+		return -1;
+	}
+
+	n = container_of(rb, struct node_data, r);
+	res = n->key;
+
+	if (!remove_from_tree) {
+		bpf_spin_unlock(lock);
+		return res;
+	}
+
+	rb = bpf_rbtree_remove(root, rb);
+	bpf_spin_unlock(lock);
+	if (!rb)
+		return -2;
+	n = container_of(rb, struct node_data, r);
+	bpf_obj_drop(n);
+	return res;
+}
+
+static long __read_from_list(struct bpf_list_head *head,
+			     struct bpf_spin_lock *lock,
+			     bool remove_from_list)
+{
+	struct bpf_list_node *l;
+	struct node_data *n;
+	long res = -99;
+
+	bpf_spin_lock(lock);
+
+	l = bpf_list_pop_front(head);
+	if (!l) {
+		bpf_spin_unlock(lock);
+		return -1;
+	}
+
+	n = container_of(l, struct node_data, l);
+	res = n->list_data;
+
+	if (!remove_from_list) {
+		if (bpf_list_push_back(head, &n->l)) {
+			bpf_spin_unlock(lock);
+			return -2;
+		}
+	}
+
+	bpf_spin_unlock(lock);
+
+	if (remove_from_list)
+		bpf_obj_drop(n);
+	return res;
+}
+
+static long __read_from_unstash(int idx)
+{
+	struct node_data *n = NULL;
+	struct map_value *mapval;
+	long val = -99;
+
+	mapval = bpf_map_lookup_elem(&stashed_nodes, &idx);
+	if (!mapval)
+		return -1;
+
+	n = bpf_kptr_xchg(&mapval->node, n);
+	if (!n)
+		return -2;
+
+	val = n->key;
+	bpf_obj_drop(n);
+	return val;
+}
+
+#define INSERT_READ_BOTH(rem_tree, rem_list, desc)			\
+SEC("tc")								\
+__description(desc)							\
+__success __retval(579)							\
+long insert_and_remove_tree_##rem_tree##_list_##rem_list(void *ctx)	\
+{									\
+	long err, tree_data, list_data;					\
+									\
+	err = __insert_in_tree_and_list(&head, &root, &lock);		\
+	if (err)							\
+		return err;						\
+									\
+	err = __read_from_tree(&root, &lock, rem_tree);			\
+	if (err < 0)							\
+		return err;						\
+	else								\
+		tree_data = err;					\
+									\
+	err = __read_from_list(&head, &lock, rem_list);			\
+	if (err < 0)							\
+		return err;						\
+	else								\
+		list_data = err;					\
+									\
+	return tree_data + list_data;					\
+}
+
+/* After successful insert of struct node_data into both collections:
+ *   - it should have refcount = 2
+ *   - removing / not removing the node_data from a collection after
+ *     reading should have no effect on ability to read / remove from
+ *     the other collection
+ */
+INSERT_READ_BOTH(true, true, "insert_read_both: remove from tree + list");
+INSERT_READ_BOTH(false, false, "insert_read_both: remove from neither");
+INSERT_READ_BOTH(true, false, "insert_read_both: remove from tree");
+INSERT_READ_BOTH(false, true, "insert_read_both: remove from list");
+
+#undef INSERT_READ_BOTH
+#define INSERT_READ_BOTH(rem_tree, rem_list, desc)			\
+SEC("tc")								\
+__description(desc)							\
+__success __retval(579)							\
+long insert_and_remove_lf_tree_##rem_tree##_list_##rem_list(void *ctx)	\
+{									\
+	long err, tree_data, list_data;					\
+									\
+	err = __insert_in_tree_and_list(&head, &root, &lock);		\
+	if (err)							\
+		return err;						\
+									\
+	err = __read_from_list(&head, &lock, rem_list);			\
+	if (err < 0)							\
+		return err;						\
+	else								\
+		list_data = err;					\
+									\
+	err = __read_from_tree(&root, &lock, rem_tree);			\
+	if (err < 0)							\
+		return err;						\
+	else								\
+		tree_data = err;					\
+									\
+	return tree_data + list_data;					\
+}
+
+/* Similar to insert_read_both, but list data is read and possibly removed
+ * first
+ *
+ * Results should be no different than reading and possibly removing rbtree
+ * node first
+ */
+INSERT_READ_BOTH(true, true, "insert_read_both_list_first: remove from tree + list");
+INSERT_READ_BOTH(false, false, "insert_read_both_list_first: remove from neither");
+INSERT_READ_BOTH(true, false, "insert_read_both_list_first: remove from tree");
+INSERT_READ_BOTH(false, true, "insert_read_both_list_first: remove from list");
+
+#define INSERT_DOUBLE_READ_AND_DEL(read_fn, read_root, desc)		\
+SEC("tc")								\
+__description(desc)							\
+__success __retval(-1)							\
+long insert_double_##read_fn##_and_del_##read_root(void *ctx)		\
+{									\
+	long err, list_data;						\
+									\
+	err = __insert_in_tree_and_list(&head, &root, &lock);		\
+	if (err)							\
+		return err;						\
+									\
+	err = read_fn(&read_root, &lock, true);				\
+	if (err < 0)							\
+		return err;						\
+	else								\
+		list_data = err;					\
+									\
+	err = read_fn(&read_root, &lock, true);				\
+	if (err < 0)							\
+		return err;						\
+									\
+	return err + list_data;						\
+}
+
+/* Insert into both tree and list, then try reading-and-removing from either twice
+ *
+ * The second read-and-remove should fail on read step since the node has
+ * already been removed
+ */
+INSERT_DOUBLE_READ_AND_DEL(__read_from_tree, root, "insert_double_del: 2x read-and-del from tree");
+INSERT_DOUBLE_READ_AND_DEL(__read_from_list, head, "insert_double_del: 2x read-and-del from list");
+
+#define INSERT_STASH_READ(rem_tree, desc)				\
+SEC("tc")								\
+__description(desc)							\
+__success __retval(84)							\
+long insert_rbtree_and_stash__del_tree_##rem_tree(void *ctx)		\
+{									\
+	long err, tree_data, map_data;					\
+									\
+	err = __stash_map_insert_tree(0, 42, &root, &lock);		\
+	if (err)							\
+		return err;						\
+									\
+	err = __read_from_tree(&root, &lock, rem_tree);			\
+	if (err < 0)							\
+		return err;						\
+	else								\
+		tree_data = err;					\
+									\
+	err = __read_from_unstash(0);					\
+	if (err < 0)							\
+		return err;						\
+	else								\
+		map_data = err;						\
+									\
+	return tree_data + map_data;					\
+}
+
+/* Stash a refcounted node in map_val, insert same node into tree, then try
+ * reading data from tree then unstashed map_val, possibly removing from tree
+ *
+ * Removing from tree should have no effect on map_val kptr validity
+ */
+INSERT_STASH_READ(true, "insert_stash_read: remove from tree");
+INSERT_STASH_READ(false, "insert_stash_read: don't remove from tree");
+
+SEC("tc")
+__success
+long rbtree_refcounted_node_ref_escapes(void *ctx)
+{
+	struct node_acquire *n, *m;
+
+	n = bpf_obj_new(typeof(*n));
+	if (!n)
+		return 1;
+
+	bpf_spin_lock(&alock);
+	bpf_rbtree_add(&aroot, &n->node, less_a);
+	m = bpf_refcount_acquire(n);
+	bpf_spin_unlock(&alock);
+
+	m->key = 2;
+	bpf_obj_drop(m);
+	return 0;
+}
+
+SEC("tc")
+__success
+long rbtree_refcounted_node_ref_escapes_owning_input(void *ctx)
+{
+	struct node_acquire *n, *m;
+
+	n = bpf_obj_new(typeof(*n));
+	if (!n)
+		return 1;
+
+	m = bpf_refcount_acquire(n);
+	m->key = 2;
+
+	bpf_spin_lock(&alock);
+	bpf_rbtree_add(&aroot, &n->node, less_a);
+	bpf_spin_unlock(&alock);
+
+	bpf_obj_drop(m);
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c
new file mode 100644
index 000000000000..efcb308f80ad
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c
@@ -0,0 +1,72 @@ 
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+#include "bpf_misc.h"
+
+struct node_acquire {
+	long key;
+	long data;
+	struct bpf_rb_node node;
+	struct bpf_refcount refcount;
+};
+
+#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+private(A) struct bpf_spin_lock glock;
+private(A) struct bpf_rb_root groot __contains(node_acquire, node);
+
+static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+	struct node_acquire *node_a;
+	struct node_acquire *node_b;
+
+	node_a = container_of(a, struct node_acquire, node);
+	node_b = container_of(b, struct node_acquire, node);
+
+	return node_a->key < node_b->key;
+}
+
+SEC("?tc")
+__failure __msg("Unreleased reference id=3 alloc_insn=21")
+long rbtree_refcounted_node_ref_escapes(void *ctx)
+{
+	struct node_acquire *n, *m;
+
+	n = bpf_obj_new(typeof(*n));
+	if (!n)
+		return 1;
+
+	bpf_spin_lock(&glock);
+	bpf_rbtree_add(&groot, &n->node, less);
+	/* m becomes an owning ref but is never drop'd or added to a tree */
+	m = bpf_refcount_acquire(n);
+	bpf_spin_unlock(&glock);
+
+	m->key = 2;
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("Unreleased reference id=3 alloc_insn=9")
+long rbtree_refcounted_node_ref_escapes_owning_input(void *ctx)
+{
+	struct node_acquire *n, *m;
+
+	n = bpf_obj_new(typeof(*n));
+	if (!n)
+		return 1;
+
+	/* m becomes an owning ref but is never drop'd or added to a tree */
+	m = bpf_refcount_acquire(n);
+	m->key = 2;
+
+	bpf_spin_lock(&glock);
+	bpf_rbtree_add(&groot, &n->node, less);
+	bpf_spin_unlock(&glock);
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";