diff mbox series

[bpf-next,v5,3/4] bpf: verify scalar ids mapping in regsafe() using check_ids()

Message ID 20230612160801.2804666-4-eddyz87@gmail.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series verify scalar ids mapping in regsafe() | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-5 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-6 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-2 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 pending Logs for test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-29 success Logs for veristat
bpf/vmtest-bpf-next-VM_Test-10 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-11 success Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-15 success Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-18 success Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-21 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-24 success Logs for test_progs_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-28 success Logs for test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-16 success Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for test_progs on s390x with gcc
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 81 this patch: 82
netdev/cc_maintainers fail 1 blamed authors not CCed: john.fastabend@gmail.com; 6 maintainers not CCed: kpsingh@kernel.org john.fastabend@gmail.com sdf@google.com song@kernel.org jolsa@kernel.org haoluo@google.com
netdev/build_clang fail Errors and warnings before: 20 this patch: 22
netdev/verify_signedoff fail author Signed-off-by missing
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 81 this patch: 82
netdev/checkpatch warning WARNING: line length of 82 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 89 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Eduard Zingerman June 12, 2023, 4:08 p.m. UTC
Make sure that the following unsafe example is rejected by verifier:

1: r9 = ... some pointer with range X ...
2: r6 = ... unbound scalar ID=a ...
3: r7 = ... unbound scalar ID=b ...
4: if (r6 > r7) goto +1
5: r6 = r7
6: if (r6 > X) goto ...
--- checkpoint ---
7: r9 += r7
8: *(u64 *)r9 = Y

This example is unsafe because not all execution paths verify r7 range.
Because of the jump at (4) the verifier would arrive at (6) in two states:
I.  r6{.id=b}, r7{.id=b} via path 1-6;
II. r6{.id=a}, r7{.id=b} via path 1-4, 6.

Currently regsafe() does not call check_ids() for scalar registers,
thus from POV of regsafe() states (I) and (II) are identical. If the
path 1-6 is taken by verifier first, and checkpoint is created at (6)
the path [1-4, 6] would be considered safe.

Changes in this commit:
- Function check_scalar_ids() is added, it differs from check_ids() in
  the following aspects:
  - treats ID zero as a unique scalar ID;
  - disallows mapping same 'cur_id' to multiple 'old_id'.
- check_scalar_ids() needs to generate temporary unique IDs, field
  'tmp_id_gen' is added to bpf_verifier_env::idmap_scratch to
  facilitate this.
- Function scalar_regs_exact() is added, it differs from regs_exact()
  in the following aspects:
  - uses check_scalar_ids() for ID comparison;
  - does not check reg_obj_id as this field is not used for scalar
    values.
- regsafe() is updated to:
  - use check_scalar_ids() for precise scalar registers.
  - use scalar_regs_exact() for scalar registers, but only for
    explore_alu_limits branch. This simplifies control flow for scalar
    case, and has no measurable performance impact.
- check_alu_op() is updated avoid generating bpf_reg_state::id for
  constant scalar values when processing BPF_MOV. ID is needed to
  propagate range information for identical values, but there is
  nothing to propagate for constants.

Fixes: 75748837b7e5 ("bpf: Propagate scalar ranges through register assignments.")
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
---
 include/linux/bpf_verifier.h |  17 ++++--
 kernel/bpf/verifier.c        | 108 ++++++++++++++++++++++++++++-------
 2 files changed, 97 insertions(+), 28 deletions(-)

Comments

Andrii Nakryiko June 12, 2023, 7:56 p.m. UTC | #1
On Mon, Jun 12, 2023 at 9:08 AM Eduard Zingerman <eddyz87@gmail.com> wrote:
>
> Make sure that the following unsafe example is rejected by verifier:
>
> 1: r9 = ... some pointer with range X ...
> 2: r6 = ... unbound scalar ID=a ...
> 3: r7 = ... unbound scalar ID=b ...
> 4: if (r6 > r7) goto +1
> 5: r6 = r7
> 6: if (r6 > X) goto ...
> --- checkpoint ---
> 7: r9 += r7
> 8: *(u64 *)r9 = Y
>
> This example is unsafe because not all execution paths verify r7 range.
> Because of the jump at (4) the verifier would arrive at (6) in two states:
> I.  r6{.id=b}, r7{.id=b} via path 1-6;
> II. r6{.id=a}, r7{.id=b} via path 1-4, 6.
>
> Currently regsafe() does not call check_ids() for scalar registers,
> thus from POV of regsafe() states (I) and (II) are identical. If the
> path 1-6 is taken by verifier first, and checkpoint is created at (6)
> the path [1-4, 6] would be considered safe.
>
> Changes in this commit:
> - Function check_scalar_ids() is added, it differs from check_ids() in
>   the following aspects:
>   - treats ID zero as a unique scalar ID;
>   - disallows mapping same 'cur_id' to multiple 'old_id'.
> - check_scalar_ids() needs to generate temporary unique IDs, field
>   'tmp_id_gen' is added to bpf_verifier_env::idmap_scratch to
>   facilitate this.
> - Function scalar_regs_exact() is added, it differs from regs_exact()
>   in the following aspects:
>   - uses check_scalar_ids() for ID comparison;
>   - does not check reg_obj_id as this field is not used for scalar
>     values.
> - regsafe() is updated to:
>   - use check_scalar_ids() for precise scalar registers.
>   - use scalar_regs_exact() for scalar registers, but only for
>     explore_alu_limits branch. This simplifies control flow for scalar
>     case, and has no measurable performance impact.
> - check_alu_op() is updated avoid generating bpf_reg_state::id for
>   constant scalar values when processing BPF_MOV. ID is needed to
>   propagate range information for identical values, but there is
>   nothing to propagate for constants.
>
> Fixes: 75748837b7e5 ("bpf: Propagate scalar ranges through register assignments.")
> Acked-by: Andrii Nakryiko <andrii@kernel.org>
> Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
> ---
>  include/linux/bpf_verifier.h |  17 ++++--
>  kernel/bpf/verifier.c        | 108 ++++++++++++++++++++++++++++-------
>  2 files changed, 97 insertions(+), 28 deletions(-)
>
> diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
> index 73a98f6240fd..042b76fe8e29 100644
> --- a/include/linux/bpf_verifier.h
> +++ b/include/linux/bpf_verifier.h
> @@ -313,11 +313,6 @@ struct bpf_idx_pair {
>         u32 idx;
>  };
>
> -struct bpf_id_pair {
> -       u32 old;
> -       u32 cur;
> -};
> -
>  #define MAX_CALL_FRAMES 8
>  /* Maximum number of register states that can exist at once */
>  #define BPF_ID_MAP_SIZE ((MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE) * MAX_CALL_FRAMES)
> @@ -559,6 +554,16 @@ struct backtrack_state {
>         u64 stack_masks[MAX_CALL_FRAMES];
>  };
>
> +struct bpf_id_pair {
> +       u32 old;
> +       u32 cur;
> +};
> +
> +struct bpf_idmap {
> +       u32 tmp_id_gen;
> +       struct bpf_id_pair map[BPF_ID_MAP_SIZE];
> +};
> +
>  struct bpf_idset {
>         u32 count;
>         u32 ids[BPF_ID_MAP_SIZE];
> @@ -596,7 +601,7 @@ struct bpf_verifier_env {
>         struct bpf_verifier_log log;
>         struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1];
>         union {
> -               struct bpf_id_pair idmap_scratch[BPF_ID_MAP_SIZE];
> +               struct bpf_idmap idmap_scratch;
>                 struct bpf_idset idset_scratch;
>         };
>         struct {
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 9b5f2433194f..b15ebfed207a 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -12942,12 +12942,14 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
>                 if (BPF_SRC(insn->code) == BPF_X) {
>                         struct bpf_reg_state *src_reg = regs + insn->src_reg;
>                         struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
> +                       bool need_id = src_reg->type == SCALAR_VALUE && !src_reg->id &&
> +                                      !tnum_is_const(src_reg->var_off);
>
>                         if (BPF_CLASS(insn->code) == BPF_ALU64) {
>                                 /* case: R1 = R2
>                                  * copy register state to dest reg
>                                  */
> -                               if (src_reg->type == SCALAR_VALUE && !src_reg->id)
> +                               if (need_id)
>                                         /* Assign src and dst registers the same ID
>                                          * that will be used by find_equal_scalars()
>                                          * to propagate min/max range.
> @@ -12966,7 +12968,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
>                                 } else if (src_reg->type == SCALAR_VALUE) {
>                                         bool is_src_reg_u32 = src_reg->umax_value <= U32_MAX;
>
> -                                       if (is_src_reg_u32 && !src_reg->id)
> +                                       if (is_src_reg_u32 && need_id)
>                                                 src_reg->id = ++env->id_gen;
>                                         copy_register_state(dst_reg, src_reg);
>                                         /* Make sure ID is cleared if src_reg is not in u32 range otherwise
> @@ -15122,8 +15124,9 @@ static bool range_within(struct bpf_reg_state *old,
>   * So we look through our idmap to see if this old id has been seen before.  If
>   * so, we require the new id to match; otherwise, we add the id pair to the map.
>   */
> -static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)
> +static bool check_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
>  {
> +       struct bpf_id_pair *map = idmap->map;
>         unsigned int i;
>
>         /* either both IDs should be set or both should be zero */
> @@ -15134,14 +15137,44 @@ static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)
>                 return true;
>
>         for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
> -               if (!idmap[i].old) {
> +               if (!map[i].old) {
>                         /* Reached an empty slot; haven't seen this id before */
> -                       idmap[i].old = old_id;
> -                       idmap[i].cur = cur_id;
> +                       map[i].old = old_id;
> +                       map[i].cur = cur_id;
>                         return true;
>                 }
> -               if (idmap[i].old == old_id)
> -                       return idmap[i].cur == cur_id;
> +               if (map[i].old == old_id)
> +                       return map[i].cur == cur_id;
> +       }
> +       /* We ran out of idmap slots, which should be impossible */
> +       WARN_ON_ONCE(1);
> +       return false;
> +}
> +
> +/* Similar to check_ids(), but:
> + * - disallow mapping of different 'old_id' values to same 'cur_id' value;
> + * - for zero 'old_id' or 'cur_id' allocate a unique temporary ID
> + *   to allow pairs like '0 vs unique ID', 'unique ID vs 0'.
> + */
> +static bool check_scalar_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
> +{
> +       struct bpf_id_pair *map = idmap->map;
> +       unsigned int i;
> +
> +       old_id = old_id ? old_id : ++idmap->tmp_id_gen;
> +       cur_id = cur_id ? cur_id : ++idmap->tmp_id_gen;
> +
> +       for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
> +               if (!map[i].old) {
> +                       /* Reached an empty slot; haven't seen this id before */
> +                       map[i].old = old_id;
> +                       map[i].cur = cur_id;
> +                       return true;
> +               }
> +               if (map[i].old == old_id)
> +                       return map[i].cur == cur_id;
> +               if (map[i].cur == cur_id)
> +                       return false;

We were discussing w/ Alexei (offline) making these changes as
backportable and minimal as possible, so I looked again how to
minimize all the extra code added.

I still insist that the current logic in check_ids() is invalid to
allow the same cur_id to map to two different old_ids, especially for
non-SCALAR, actually. E.g., a situation where we have a register that
is auto-invalidated. E.g., bpf_iter element (each gets an ID), or it
could be id'ed dynptr_ringbuf.

Think about the following situation:

In the old state, we could have r1.id = 1; r2.id = 2; Two registers
keep two separate pointers to ringbuf.

In the new state, we have r1.id = r2.id = 3; That is, two registers
keep the *same* pointer to ringbuf elem.

Now imagine that a BPF program has bpf_ringbuf_submit(r1) and
invalidates this register. With the old state it will invalidate r1
and will keep r2 valid. So it's safe for the BPF program to keep
working with r2 as valid ringbuf (and eventually submit it as well).

Now this is entirely unsafe for the new state. Once
bpf_ringbuf_submit(r1) happens, r2 shouldn't be accessed anymore. But
yet it will be declared safe with current check_ids() logic.

Ergo, check_ids() should make sure we do not have multi-mapping for
any of the IDs. Even if in some corner case that might be ok.

I actually tested this change with veristat, there are no regressions
at all. I think it's both safe from a perf standpoint, and necessary
and safe from a correctness standpoint.

So all in all (I did inline scalar_regs_exact in a separate patch, up
to you), I have these changes on top and they all are good from
veristat perspective:

Author: Andrii Nakryiko <andrii@kernel.org>
Date:   Mon Jun 12 12:53:25 2023 -0700

    bpf: inline scalar_regs_exact

    Signed-off-by: Andrii Nakryiko <andrii@kernel.org>

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9d5fefd970a3..c5606613136d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -15262,14 +15262,6 @@ static bool regs_exact(const struct
bpf_reg_state *rold,
               check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
 }

-static bool scalar_regs_exact(const struct bpf_reg_state *rold,
-                             const struct bpf_reg_state *rcur,
-                             struct bpf_idmap *idmap)
-{
-       return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
-              check_scalar_ids(rold->id, rcur->id, idmap);
-}
-
 /* Returns true if (rold safe implies rcur safe) */
 static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
                    struct bpf_reg_state *rcur, struct bpf_idmap *idmap)
@@ -15309,8 +15301,13 @@ static bool regsafe(struct bpf_verifier_env
*env, struct bpf_reg_state *rold,

        switch (base_type(rold->type)) {
        case SCALAR_VALUE:
-               if (env->explore_alu_limits)
-                       return scalar_regs_exact(rold, rcur, idmap);
+               if (env->explore_alu_limits) {
+                       /* explore_alu_limits disables tnum_in() and
range_within()
+                        * logic and requires everything to be strict
+                        */
+                       return memcmp(rold, rcur, offsetof(struct
bpf_reg_state, id)) == 0 &&
+                              check_scalar_ids(rold->id, rcur->id, idmap);
+               }
                if (!rold->precise)
                        return true;
                /* Why check_ids() for scalar registers?

commit 57297c01fe747e423cd3c924ef492c0688d8057a
Author: Andrii Nakryiko <andrii@kernel.org>
Date:   Mon Jun 12 11:46:46 2023 -0700

    bpf: make check_ids disallow multi-mapping of IDs universally

    Signed-off-by: Andrii Nakryiko <andrii@kernel.org>

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 3da77713d1ca..9d5fefd970a3 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -15137,6 +15137,8 @@ static bool check_ids(u32 old_id, u32 cur_id,
struct bpf_idmap *idmap)
                }
                if (map[i].old == old_id)
                        return map[i].cur == cur_id;
+               if (map[i].cur == cur_id)
+                       return false;
        }
        /* We ran out of idmap slots, which should be impossible */
        WARN_ON_ONCE(1);
@@ -15144,33 +15146,15 @@ static bool check_ids(u32 old_id, u32
cur_id, struct bpf_idmap *idmap)
 }

 /* Similar to check_ids(), but:
- * - disallow mapping of different 'old_id' values to same 'cur_id' value;
  * - for zero 'old_id' or 'cur_id' allocate a unique temporary ID
  *   to allow pairs like '0 vs unique ID', 'unique ID vs 0'.
  */
 static bool check_scalar_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
 {
-       struct bpf_id_pair *map = idmap->map;
-       unsigned int i;
-
        old_id = old_id ? old_id : ++idmap->tmp_id_gen;
        cur_id = cur_id ? cur_id : ++idmap->tmp_id_gen;

-       for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
-               if (!map[i].old) {
-                       /* Reached an empty slot; haven't seen this id before */
-                       map[i].old = old_id;
-                       map[i].cur = cur_id;
-                       return true;
-               }
-               if (map[i].old == old_id)
-                       return map[i].cur == cur_id;
-               if (map[i].cur == cur_id)
-                       return false;
-       }
-       /* We ran out of idmap slots, which should be impossible */
-       WARN_ON_ONCE(1);
-       return false;
+       return check_ids(old_id, cur_id, idmap);
 }

 static void clean_func_state(struct bpf_verifier_env *env,



>         }
>         /* We ran out of idmap slots, which should be impossible */
>         WARN_ON_ONCE(1);
> @@ -15246,16 +15279,24 @@ static void clean_live_states(struct bpf_verifier_env *env, int insn,
>
>  static bool regs_exact(const struct bpf_reg_state *rold,
>                        const struct bpf_reg_state *rcur,
> -                      struct bpf_id_pair *idmap)
> +                      struct bpf_idmap *idmap)
>  {
>         return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
>                check_ids(rold->id, rcur->id, idmap) &&
>                check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
>  }
>

[...]
Eduard Zingerman June 12, 2023, 9:01 p.m. UTC | #2
On Mon, 2023-06-12 at 12:56 -0700, Andrii Nakryiko wrote:
> On Mon, Jun 12, 2023 at 9:08 AM Eduard Zingerman <eddyz87@gmail.com> wrote:
> > 
> > Make sure that the following unsafe example is rejected by verifier:
> > 
> > 1: r9 = ... some pointer with range X ...
> > 2: r6 = ... unbound scalar ID=a ...
> > 3: r7 = ... unbound scalar ID=b ...
> > 4: if (r6 > r7) goto +1
> > 5: r6 = r7
> > 6: if (r6 > X) goto ...
> > --- checkpoint ---
> > 7: r9 += r7
> > 8: *(u64 *)r9 = Y
> > 
> > This example is unsafe because not all execution paths verify r7 range.
> > Because of the jump at (4) the verifier would arrive at (6) in two states:
> > I.  r6{.id=b}, r7{.id=b} via path 1-6;
> > II. r6{.id=a}, r7{.id=b} via path 1-4, 6.
> > 
> > Currently regsafe() does not call check_ids() for scalar registers,
> > thus from POV of regsafe() states (I) and (II) are identical. If the
> > path 1-6 is taken by verifier first, and checkpoint is created at (6)
> > the path [1-4, 6] would be considered safe.
> > 
> > Changes in this commit:
> > - Function check_scalar_ids() is added, it differs from check_ids() in
> >   the following aspects:
> >   - treats ID zero as a unique scalar ID;
> >   - disallows mapping same 'cur_id' to multiple 'old_id'.
> > - check_scalar_ids() needs to generate temporary unique IDs, field
> >   'tmp_id_gen' is added to bpf_verifier_env::idmap_scratch to
> >   facilitate this.
> > - Function scalar_regs_exact() is added, it differs from regs_exact()
> >   in the following aspects:
> >   - uses check_scalar_ids() for ID comparison;
> >   - does not check reg_obj_id as this field is not used for scalar
> >     values.
> > - regsafe() is updated to:
> >   - use check_scalar_ids() for precise scalar registers.
> >   - use scalar_regs_exact() for scalar registers, but only for
> >     explore_alu_limits branch. This simplifies control flow for scalar
> >     case, and has no measurable performance impact.
> > - check_alu_op() is updated avoid generating bpf_reg_state::id for
> >   constant scalar values when processing BPF_MOV. ID is needed to
> >   propagate range information for identical values, but there is
> >   nothing to propagate for constants.
> > 
> > Fixes: 75748837b7e5 ("bpf: Propagate scalar ranges through register assignments.")
> > Acked-by: Andrii Nakryiko <andrii@kernel.org>
> > Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
> > ---
> >  include/linux/bpf_verifier.h |  17 ++++--
> >  kernel/bpf/verifier.c        | 108 ++++++++++++++++++++++++++++-------
> >  2 files changed, 97 insertions(+), 28 deletions(-)
> > 
> > diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
> > index 73a98f6240fd..042b76fe8e29 100644
> > --- a/include/linux/bpf_verifier.h
> > +++ b/include/linux/bpf_verifier.h
> > @@ -313,11 +313,6 @@ struct bpf_idx_pair {
> >         u32 idx;
> >  };
> > 
> > -struct bpf_id_pair {
> > -       u32 old;
> > -       u32 cur;
> > -};
> > -
> >  #define MAX_CALL_FRAMES 8
> >  /* Maximum number of register states that can exist at once */
> >  #define BPF_ID_MAP_SIZE ((MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE) * MAX_CALL_FRAMES)
> > @@ -559,6 +554,16 @@ struct backtrack_state {
> >         u64 stack_masks[MAX_CALL_FRAMES];
> >  };
> > 
> > +struct bpf_id_pair {
> > +       u32 old;
> > +       u32 cur;
> > +};
> > +
> > +struct bpf_idmap {
> > +       u32 tmp_id_gen;
> > +       struct bpf_id_pair map[BPF_ID_MAP_SIZE];
> > +};
> > +
> >  struct bpf_idset {
> >         u32 count;
> >         u32 ids[BPF_ID_MAP_SIZE];
> > @@ -596,7 +601,7 @@ struct bpf_verifier_env {
> >         struct bpf_verifier_log log;
> >         struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1];
> >         union {
> > -               struct bpf_id_pair idmap_scratch[BPF_ID_MAP_SIZE];
> > +               struct bpf_idmap idmap_scratch;
> >                 struct bpf_idset idset_scratch;
> >         };
> >         struct {
> > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > index 9b5f2433194f..b15ebfed207a 100644
> > --- a/kernel/bpf/verifier.c
> > +++ b/kernel/bpf/verifier.c
> > @@ -12942,12 +12942,14 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
> >                 if (BPF_SRC(insn->code) == BPF_X) {
> >                         struct bpf_reg_state *src_reg = regs + insn->src_reg;
> >                         struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
> > +                       bool need_id = src_reg->type == SCALAR_VALUE && !src_reg->id &&
> > +                                      !tnum_is_const(src_reg->var_off);
> > 
> >                         if (BPF_CLASS(insn->code) == BPF_ALU64) {
> >                                 /* case: R1 = R2
> >                                  * copy register state to dest reg
> >                                  */
> > -                               if (src_reg->type == SCALAR_VALUE && !src_reg->id)
> > +                               if (need_id)
> >                                         /* Assign src and dst registers the same ID
> >                                          * that will be used by find_equal_scalars()
> >                                          * to propagate min/max range.
> > @@ -12966,7 +12968,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
> >                                 } else if (src_reg->type == SCALAR_VALUE) {
> >                                         bool is_src_reg_u32 = src_reg->umax_value <= U32_MAX;
> > 
> > -                                       if (is_src_reg_u32 && !src_reg->id)
> > +                                       if (is_src_reg_u32 && need_id)
> >                                                 src_reg->id = ++env->id_gen;
> >                                         copy_register_state(dst_reg, src_reg);
> >                                         /* Make sure ID is cleared if src_reg is not in u32 range otherwise
> > @@ -15122,8 +15124,9 @@ static bool range_within(struct bpf_reg_state *old,
> >   * So we look through our idmap to see if this old id has been seen before.  If
> >   * so, we require the new id to match; otherwise, we add the id pair to the map.
> >   */
> > -static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)
> > +static bool check_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
> >  {
> > +       struct bpf_id_pair *map = idmap->map;
> >         unsigned int i;
> > 
> >         /* either both IDs should be set or both should be zero */
> > @@ -15134,14 +15137,44 @@ static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)
> >                 return true;
> > 
> >         for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
> > -               if (!idmap[i].old) {
> > +               if (!map[i].old) {
> >                         /* Reached an empty slot; haven't seen this id before */
> > -                       idmap[i].old = old_id;
> > -                       idmap[i].cur = cur_id;
> > +                       map[i].old = old_id;
> > +                       map[i].cur = cur_id;
> >                         return true;
> >                 }
> > -               if (idmap[i].old == old_id)
> > -                       return idmap[i].cur == cur_id;
> > +               if (map[i].old == old_id)
> > +                       return map[i].cur == cur_id;
> > +       }
> > +       /* We ran out of idmap slots, which should be impossible */
> > +       WARN_ON_ONCE(1);
> > +       return false;
> > +}
> > +
> > +/* Similar to check_ids(), but:
> > + * - disallow mapping of different 'old_id' values to same 'cur_id' value;
> > + * - for zero 'old_id' or 'cur_id' allocate a unique temporary ID
> > + *   to allow pairs like '0 vs unique ID', 'unique ID vs 0'.
> > + */
> > +static bool check_scalar_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
> > +{
> > +       struct bpf_id_pair *map = idmap->map;
> > +       unsigned int i;
> > +
> > +       old_id = old_id ? old_id : ++idmap->tmp_id_gen;
> > +       cur_id = cur_id ? cur_id : ++idmap->tmp_id_gen;
> > +
> > +       for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
> > +               if (!map[i].old) {
> > +                       /* Reached an empty slot; haven't seen this id before */
> > +                       map[i].old = old_id;
> > +                       map[i].cur = cur_id;
> > +                       return true;
> > +               }
> > +               if (map[i].old == old_id)
> > +                       return map[i].cur == cur_id;
> > +               if (map[i].cur == cur_id)
> > +                       return false;
> 
> We were discussing w/ Alexei (offline) making these changes as
> backportable and minimal as possible, so I looked again how to
> minimize all the extra code added.
> 
> I still insist that the current logic in check_ids() is invalid to
> allow the same cur_id to map to two different old_ids, especially for
> non-SCALAR, actually. E.g., a situation where we have a register that
> is auto-invalidated. E.g., bpf_iter element (each gets an ID), or it
> could be id'ed dynptr_ringbuf.
> 
> Think about the following situation:
> 
> In the old state, we could have r1.id = 1; r2.id = 2; Two registers
> keep two separate pointers to ringbuf.
> 
> In the new state, we have r1.id = r2.id = 3; That is, two registers
> keep the *same* pointer to ringbuf elem.
> 
> Now imagine that a BPF program has bpf_ringbuf_submit(r1) and
> invalidates this register. With the old state it will invalidate r1
> and will keep r2 valid. So it's safe for the BPF program to keep
> working with r2 as valid ringbuf (and eventually submit it as well).
> 
> Now this is entirely unsafe for the new state. Once
> bpf_ringbuf_submit(r1) happens, r2 shouldn't be accessed anymore. But
> yet it will be declared safe with current check_ids() logic.
> 
> Ergo, check_ids() should make sure we do not have multi-mapping for
> any of the IDs. Even if in some corner case that might be ok.
> 
> I actually tested this change with veristat, there are no regressions
> at all. I think it's both safe from a perf standpoint, and necessary
> and safe from a correctness standpoint.
> 
> So all in all (I did inline scalar_regs_exact in a separate patch, up
> to you), I have these changes on top and they all are good from
> veristat perspective:

Ok, rinbuf is a good example.
To minimize the patch-set I'll do the following:
- move your check_ids patch to the beginning of the series
- add selftest for ringbuf
- squash the scalar_regs_exact patch with current patch #3

And resubmit with EFAULT fix in patch #1.
Thank you for looking into this.

> 
> Author: Andrii Nakryiko <andrii@kernel.org>
> Date:   Mon Jun 12 12:53:25 2023 -0700
> 
>     bpf: inline scalar_regs_exact
> 
>     Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> 
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 9d5fefd970a3..c5606613136d 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -15262,14 +15262,6 @@ static bool regs_exact(const struct
> bpf_reg_state *rold,
>                check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
>  }
> 
> -static bool scalar_regs_exact(const struct bpf_reg_state *rold,
> -                             const struct bpf_reg_state *rcur,
> -                             struct bpf_idmap *idmap)
> -{
> -       return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
> -              check_scalar_ids(rold->id, rcur->id, idmap);
> -}
> -
>  /* Returns true if (rold safe implies rcur safe) */
>  static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
>                     struct bpf_reg_state *rcur, struct bpf_idmap *idmap)
> @@ -15309,8 +15301,13 @@ static bool regsafe(struct bpf_verifier_env
> *env, struct bpf_reg_state *rold,
> 
>         switch (base_type(rold->type)) {
>         case SCALAR_VALUE:
> -               if (env->explore_alu_limits)
> -                       return scalar_regs_exact(rold, rcur, idmap);
> +               if (env->explore_alu_limits) {
> +                       /* explore_alu_limits disables tnum_in() and
> range_within()
> +                        * logic and requires everything to be strict
> +                        */
> +                       return memcmp(rold, rcur, offsetof(struct
> bpf_reg_state, id)) == 0 &&
> +                              check_scalar_ids(rold->id, rcur->id, idmap);
> +               }
>                 if (!rold->precise)
>                         return true;
>                 /* Why check_ids() for scalar registers?
> 
> commit 57297c01fe747e423cd3c924ef492c0688d8057a
> Author: Andrii Nakryiko <andrii@kernel.org>
> Date:   Mon Jun 12 11:46:46 2023 -0700
> 
>     bpf: make check_ids disallow multi-mapping of IDs universally
> 
>     Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> 
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 3da77713d1ca..9d5fefd970a3 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -15137,6 +15137,8 @@ static bool check_ids(u32 old_id, u32 cur_id,
> struct bpf_idmap *idmap)
>                 }
>                 if (map[i].old == old_id)
>                         return map[i].cur == cur_id;
> +               if (map[i].cur == cur_id)
> +                       return false;
>         }
>         /* We ran out of idmap slots, which should be impossible */
>         WARN_ON_ONCE(1);
> @@ -15144,33 +15146,15 @@ static bool check_ids(u32 old_id, u32
> cur_id, struct bpf_idmap *idmap)
>  }
> 
>  /* Similar to check_ids(), but:
> - * - disallow mapping of different 'old_id' values to same 'cur_id' value;
>   * - for zero 'old_id' or 'cur_id' allocate a unique temporary ID
>   *   to allow pairs like '0 vs unique ID', 'unique ID vs 0'.
>   */
>  static bool check_scalar_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
>  {
> -       struct bpf_id_pair *map = idmap->map;
> -       unsigned int i;
> -
>         old_id = old_id ? old_id : ++idmap->tmp_id_gen;
>         cur_id = cur_id ? cur_id : ++idmap->tmp_id_gen;
> 
> -       for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
> -               if (!map[i].old) {
> -                       /* Reached an empty slot; haven't seen this id before */
> -                       map[i].old = old_id;
> -                       map[i].cur = cur_id;
> -                       return true;
> -               }
> -               if (map[i].old == old_id)
> -                       return map[i].cur == cur_id;
> -               if (map[i].cur == cur_id)
> -                       return false;
> -       }
> -       /* We ran out of idmap slots, which should be impossible */
> -       WARN_ON_ONCE(1);
> -       return false;
> +       return check_ids(old_id, cur_id, idmap);
>  }
> 
>  static void clean_func_state(struct bpf_verifier_env *env,
> 
> 
> 
> >         }
> >         /* We ran out of idmap slots, which should be impossible */
> >         WARN_ON_ONCE(1);
> > @@ -15246,16 +15279,24 @@ static void clean_live_states(struct bpf_verifier_env *env, int insn,
> > 
> >  static bool regs_exact(const struct bpf_reg_state *rold,
> >                        const struct bpf_reg_state *rcur,
> > -                      struct bpf_id_pair *idmap)
> > +                      struct bpf_idmap *idmap)
> >  {
> >         return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
> >                check_ids(rold->id, rcur->id, idmap) &&
> >                check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
> >  }
> > 
> 
> [...]
Eduard Zingerman June 13, 2023, 12:10 a.m. UTC | #3
On Tue, 2023-06-13 at 00:01 +0300, Eduard Zingerman wrote:
> On Mon, 2023-06-12 at 12:56 -0700, Andrii Nakryiko wrote:
> > On Mon, Jun 12, 2023 at 9:08 AM Eduard Zingerman <eddyz87@gmail.com> wrote:
> > > 
> > > Make sure that the following unsafe example is rejected by verifier:
> > > 
> > > 1: r9 = ... some pointer with range X ...
> > > 2: r6 = ... unbound scalar ID=a ...
> > > 3: r7 = ... unbound scalar ID=b ...
> > > 4: if (r6 > r7) goto +1
> > > 5: r6 = r7
> > > 6: if (r6 > X) goto ...
> > > --- checkpoint ---
> > > 7: r9 += r7
> > > 8: *(u64 *)r9 = Y
> > > 
> > > This example is unsafe because not all execution paths verify r7 range.
> > > Because of the jump at (4) the verifier would arrive at (6) in two states:
> > > I.  r6{.id=b}, r7{.id=b} via path 1-6;
> > > II. r6{.id=a}, r7{.id=b} via path 1-4, 6.
> > > 
> > > Currently regsafe() does not call check_ids() for scalar registers,
> > > thus from POV of regsafe() states (I) and (II) are identical. If the
> > > path 1-6 is taken by verifier first, and checkpoint is created at (6)
> > > the path [1-4, 6] would be considered safe.
> > > 
> > > Changes in this commit:
> > > - Function check_scalar_ids() is added, it differs from check_ids() in
> > >   the following aspects:
> > >   - treats ID zero as a unique scalar ID;
> > >   - disallows mapping same 'cur_id' to multiple 'old_id'.
> > > - check_scalar_ids() needs to generate temporary unique IDs, field
> > >   'tmp_id_gen' is added to bpf_verifier_env::idmap_scratch to
> > >   facilitate this.
> > > - Function scalar_regs_exact() is added, it differs from regs_exact()
> > >   in the following aspects:
> > >   - uses check_scalar_ids() for ID comparison;
> > >   - does not check reg_obj_id as this field is not used for scalar
> > >     values.
> > > - regsafe() is updated to:
> > >   - use check_scalar_ids() for precise scalar registers.
> > >   - use scalar_regs_exact() for scalar registers, but only for
> > >     explore_alu_limits branch. This simplifies control flow for scalar
> > >     case, and has no measurable performance impact.
> > > - check_alu_op() is updated avoid generating bpf_reg_state::id for
> > >   constant scalar values when processing BPF_MOV. ID is needed to
> > >   propagate range information for identical values, but there is
> > >   nothing to propagate for constants.
> > > 
> > > Fixes: 75748837b7e5 ("bpf: Propagate scalar ranges through register assignments.")
> > > Acked-by: Andrii Nakryiko <andrii@kernel.org>
> > > Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
> > > ---
> > >  include/linux/bpf_verifier.h |  17 ++++--
> > >  kernel/bpf/verifier.c        | 108 ++++++++++++++++++++++++++++-------
> > >  2 files changed, 97 insertions(+), 28 deletions(-)
> > > 
> > > diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
> > > index 73a98f6240fd..042b76fe8e29 100644
> > > --- a/include/linux/bpf_verifier.h
> > > +++ b/include/linux/bpf_verifier.h
> > > @@ -313,11 +313,6 @@ struct bpf_idx_pair {
> > >         u32 idx;
> > >  };
> > > 
> > > -struct bpf_id_pair {
> > > -       u32 old;
> > > -       u32 cur;
> > > -};
> > > -
> > >  #define MAX_CALL_FRAMES 8
> > >  /* Maximum number of register states that can exist at once */
> > >  #define BPF_ID_MAP_SIZE ((MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE) * MAX_CALL_FRAMES)
> > > @@ -559,6 +554,16 @@ struct backtrack_state {
> > >         u64 stack_masks[MAX_CALL_FRAMES];
> > >  };
> > > 
> > > +struct bpf_id_pair {
> > > +       u32 old;
> > > +       u32 cur;
> > > +};
> > > +
> > > +struct bpf_idmap {
> > > +       u32 tmp_id_gen;
> > > +       struct bpf_id_pair map[BPF_ID_MAP_SIZE];
> > > +};
> > > +
> > >  struct bpf_idset {
> > >         u32 count;
> > >         u32 ids[BPF_ID_MAP_SIZE];
> > > @@ -596,7 +601,7 @@ struct bpf_verifier_env {
> > >         struct bpf_verifier_log log;
> > >         struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1];
> > >         union {
> > > -               struct bpf_id_pair idmap_scratch[BPF_ID_MAP_SIZE];
> > > +               struct bpf_idmap idmap_scratch;
> > >                 struct bpf_idset idset_scratch;
> > >         };
> > >         struct {
> > > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > > index 9b5f2433194f..b15ebfed207a 100644
> > > --- a/kernel/bpf/verifier.c
> > > +++ b/kernel/bpf/verifier.c
> > > @@ -12942,12 +12942,14 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
> > >                 if (BPF_SRC(insn->code) == BPF_X) {
> > >                         struct bpf_reg_state *src_reg = regs + insn->src_reg;
> > >                         struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
> > > +                       bool need_id = src_reg->type == SCALAR_VALUE && !src_reg->id &&
> > > +                                      !tnum_is_const(src_reg->var_off);
> > > 
> > >                         if (BPF_CLASS(insn->code) == BPF_ALU64) {
> > >                                 /* case: R1 = R2
> > >                                  * copy register state to dest reg
> > >                                  */
> > > -                               if (src_reg->type == SCALAR_VALUE && !src_reg->id)
> > > +                               if (need_id)
> > >                                         /* Assign src and dst registers the same ID
> > >                                          * that will be used by find_equal_scalars()
> > >                                          * to propagate min/max range.
> > > @@ -12966,7 +12968,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
> > >                                 } else if (src_reg->type == SCALAR_VALUE) {
> > >                                         bool is_src_reg_u32 = src_reg->umax_value <= U32_MAX;
> > > 
> > > -                                       if (is_src_reg_u32 && !src_reg->id)
> > > +                                       if (is_src_reg_u32 && need_id)
> > >                                                 src_reg->id = ++env->id_gen;
> > >                                         copy_register_state(dst_reg, src_reg);
> > >                                         /* Make sure ID is cleared if src_reg is not in u32 range otherwise
> > > @@ -15122,8 +15124,9 @@ static bool range_within(struct bpf_reg_state *old,
> > >   * So we look through our idmap to see if this old id has been seen before.  If
> > >   * so, we require the new id to match; otherwise, we add the id pair to the map.
> > >   */
> > > -static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)
> > > +static bool check_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
> > >  {
> > > +       struct bpf_id_pair *map = idmap->map;
> > >         unsigned int i;
> > > 
> > >         /* either both IDs should be set or both should be zero */
> > > @@ -15134,14 +15137,44 @@ static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)
> > >                 return true;
> > > 
> > >         for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
> > > -               if (!idmap[i].old) {
> > > +               if (!map[i].old) {
> > >                         /* Reached an empty slot; haven't seen this id before */
> > > -                       idmap[i].old = old_id;
> > > -                       idmap[i].cur = cur_id;
> > > +                       map[i].old = old_id;
> > > +                       map[i].cur = cur_id;
> > >                         return true;
> > >                 }
> > > -               if (idmap[i].old == old_id)
> > > -                       return idmap[i].cur == cur_id;
> > > +               if (map[i].old == old_id)
> > > +                       return map[i].cur == cur_id;
> > > +       }
> > > +       /* We ran out of idmap slots, which should be impossible */
> > > +       WARN_ON_ONCE(1);
> > > +       return false;
> > > +}
> > > +
> > > +/* Similar to check_ids(), but:
> > > + * - disallow mapping of different 'old_id' values to same 'cur_id' value;
> > > + * - for zero 'old_id' or 'cur_id' allocate a unique temporary ID
> > > + *   to allow pairs like '0 vs unique ID', 'unique ID vs 0'.
> > > + */
> > > +static bool check_scalar_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
> > > +{
> > > +       struct bpf_id_pair *map = idmap->map;
> > > +       unsigned int i;
> > > +
> > > +       old_id = old_id ? old_id : ++idmap->tmp_id_gen;
> > > +       cur_id = cur_id ? cur_id : ++idmap->tmp_id_gen;
> > > +
> > > +       for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
> > > +               if (!map[i].old) {
> > > +                       /* Reached an empty slot; haven't seen this id before */
> > > +                       map[i].old = old_id;
> > > +                       map[i].cur = cur_id;
> > > +                       return true;
> > > +               }
> > > +               if (map[i].old == old_id)
> > > +                       return map[i].cur == cur_id;
> > > +               if (map[i].cur == cur_id)
> > > +                       return false;
> > 
> > We were discussing w/ Alexei (offline) making these changes as
> > backportable and minimal as possible, so I looked again how to
> > minimize all the extra code added.
> > 
> > I still insist that the current logic in check_ids() is invalid to
> > allow the same cur_id to map to two different old_ids, especially for
> > non-SCALAR, actually. E.g., a situation where we have a register that
> > is auto-invalidated. E.g., bpf_iter element (each gets an ID), or it
> > could be id'ed dynptr_ringbuf.
> > 
> > Think about the following situation:
> > 
> > In the old state, we could have r1.id = 1; r2.id = 2; Two registers
> > keep two separate pointers to ringbuf.
> > 
> > In the new state, we have r1.id = r2.id = 3; That is, two registers
> > keep the *same* pointer to ringbuf elem.
> > 
> > Now imagine that a BPF program has bpf_ringbuf_submit(r1) and
> > invalidates this register. With the old state it will invalidate r1
> > and will keep r2 valid. So it's safe for the BPF program to keep
> > working with r2 as valid ringbuf (and eventually submit it as well).
> > 
> > Now this is entirely unsafe for the new state. Once
> > bpf_ringbuf_submit(r1) happens, r2 shouldn't be accessed anymore. But
> > yet it will be declared safe with current check_ids() logic.
> > 
> > Ergo, check_ids() should make sure we do not have multi-mapping for
> > any of the IDs. Even if in some corner case that might be ok.
> > 
> > I actually tested this change with veristat, there are no regressions
> > at all. I think it's both safe from a perf standpoint, and necessary
> > and safe from a correctness standpoint.
> > 
> > So all in all (I did inline scalar_regs_exact in a separate patch, up
> > to you), I have these changes on top and they all are good from
> > veristat perspective:
> 
> Ok, rinbuf is a good example.
> To minimize the patch-set I'll do the following:
> - move your check_ids patch to the beginning of the series
> - add selftest for ringbuf
> - squash the scalar_regs_exact patch with current patch #3
> 
> And resubmit with EFAULT fix in patch #1.
> Thank you for looking into this.

Welp, it turns out it is not possible to create a failing example with
ringbuf. This is so, because ringbuf objects are tracked in
bpf_func_state::refs (of type struct bpf_reference_state).

ID of each acquired object is stored in this array and compared in
func_states_equal() using refsafe() function:

  static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur,
  		    struct bpf_idmap *idmap)
  {
  	int i;
  
  	if (old->acquired_refs != cur->acquired_refs)
  		return false;
  
  	for (i = 0; i < old->acquired_refs; i++) {
  		if (!check_ids(old->refs[i].id, cur->refs[i].id, idmap))
  			return false;
  	}
  
  	return true;
  }
  
Whatever combination of old and current IDs we get in register is
later checked against full list of acquired IDs.

So far I haven't been able to create a counter-example that shows the
following snippet is necessary in check_ids():

+               if (map[i].cur == cur_id)
+                       return false;

But this saga has to end somehow :)
I'll just squash your patches on top of patch #3, since there are not
verification performance regressions.

> 
> > 
> > Author: Andrii Nakryiko <andrii@kernel.org>
> > Date:   Mon Jun 12 12:53:25 2023 -0700
> > 
> >     bpf: inline scalar_regs_exact
> > 
> >     Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> > 
> > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > index 9d5fefd970a3..c5606613136d 100644
> > --- a/kernel/bpf/verifier.c
> > +++ b/kernel/bpf/verifier.c
> > @@ -15262,14 +15262,6 @@ static bool regs_exact(const struct
> > bpf_reg_state *rold,
> >                check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
> >  }
> > 
> > -static bool scalar_regs_exact(const struct bpf_reg_state *rold,
> > -                             const struct bpf_reg_state *rcur,
> > -                             struct bpf_idmap *idmap)
> > -{
> > -       return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
> > -              check_scalar_ids(rold->id, rcur->id, idmap);
> > -}
> > -
> >  /* Returns true if (rold safe implies rcur safe) */
> >  static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
> >                     struct bpf_reg_state *rcur, struct bpf_idmap *idmap)
> > @@ -15309,8 +15301,13 @@ static bool regsafe(struct bpf_verifier_env
> > *env, struct bpf_reg_state *rold,
> > 
> >         switch (base_type(rold->type)) {
> >         case SCALAR_VALUE:
> > -               if (env->explore_alu_limits)
> > -                       return scalar_regs_exact(rold, rcur, idmap);
> > +               if (env->explore_alu_limits) {
> > +                       /* explore_alu_limits disables tnum_in() and
> > range_within()
> > +                        * logic and requires everything to be strict
> > +                        */
> > +                       return memcmp(rold, rcur, offsetof(struct
> > bpf_reg_state, id)) == 0 &&
> > +                              check_scalar_ids(rold->id, rcur->id, idmap);
> > +               }
> >                 if (!rold->precise)
> >                         return true;
> >                 /* Why check_ids() for scalar registers?
> > 
> > commit 57297c01fe747e423cd3c924ef492c0688d8057a
> > Author: Andrii Nakryiko <andrii@kernel.org>
> > Date:   Mon Jun 12 11:46:46 2023 -0700
> > 
> >     bpf: make check_ids disallow multi-mapping of IDs universally
> > 
> >     Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> > 
> > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > index 3da77713d1ca..9d5fefd970a3 100644
> > --- a/kernel/bpf/verifier.c
> > +++ b/kernel/bpf/verifier.c
> > @@ -15137,6 +15137,8 @@ static bool check_ids(u32 old_id, u32 cur_id,
> > struct bpf_idmap *idmap)
> >                 }
> >                 if (map[i].old == old_id)
> >                         return map[i].cur == cur_id;
> > +               if (map[i].cur == cur_id)
> > +                       return false;
> >         }
> >         /* We ran out of idmap slots, which should be impossible */
> >         WARN_ON_ONCE(1);
> > @@ -15144,33 +15146,15 @@ static bool check_ids(u32 old_id, u32
> > cur_id, struct bpf_idmap *idmap)
> >  }
> > 
> >  /* Similar to check_ids(), but:
> > - * - disallow mapping of different 'old_id' values to same 'cur_id' value;
> >   * - for zero 'old_id' or 'cur_id' allocate a unique temporary ID
> >   *   to allow pairs like '0 vs unique ID', 'unique ID vs 0'.
> >   */
> >  static bool check_scalar_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
> >  {
> > -       struct bpf_id_pair *map = idmap->map;
> > -       unsigned int i;
> > -
> >         old_id = old_id ? old_id : ++idmap->tmp_id_gen;
> >         cur_id = cur_id ? cur_id : ++idmap->tmp_id_gen;
> > 
> > -       for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
> > -               if (!map[i].old) {
> > -                       /* Reached an empty slot; haven't seen this id before */
> > -                       map[i].old = old_id;
> > -                       map[i].cur = cur_id;
> > -                       return true;
> > -               }
> > -               if (map[i].old == old_id)
> > -                       return map[i].cur == cur_id;
> > -               if (map[i].cur == cur_id)
> > -                       return false;
> > -       }
> > -       /* We ran out of idmap slots, which should be impossible */
> > -       WARN_ON_ONCE(1);
> > -       return false;
> > +       return check_ids(old_id, cur_id, idmap);
> >  }
> > 
> >  static void clean_func_state(struct bpf_verifier_env *env,
> > 
> > 
> > 
> > >         }
> > >         /* We ran out of idmap slots, which should be impossible */
> > >         WARN_ON_ONCE(1);
> > > @@ -15246,16 +15279,24 @@ static void clean_live_states(struct bpf_verifier_env *env, int insn,
> > > 
> > >  static bool regs_exact(const struct bpf_reg_state *rold,
> > >                        const struct bpf_reg_state *rcur,
> > > -                      struct bpf_id_pair *idmap)
> > > +                      struct bpf_idmap *idmap)
> > >  {
> > >         return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
> > >                check_ids(rold->id, rcur->id, idmap) &&
> > >                check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
> > >  }
> > > 
> > 
> > [...]
>
Andrii Nakryiko June 13, 2023, 12:28 a.m. UTC | #4
On Mon, Jun 12, 2023 at 5:10 PM Eduard Zingerman <eddyz87@gmail.com> wrote:
>
> On Tue, 2023-06-13 at 00:01 +0300, Eduard Zingerman wrote:
> > On Mon, 2023-06-12 at 12:56 -0700, Andrii Nakryiko wrote:
> > > On Mon, Jun 12, 2023 at 9:08 AM Eduard Zingerman <eddyz87@gmail.com> wrote:
> > > >
> > > > Make sure that the following unsafe example is rejected by verifier:
> > > >
> > > > 1: r9 = ... some pointer with range X ...
> > > > 2: r6 = ... unbound scalar ID=a ...
> > > > 3: r7 = ... unbound scalar ID=b ...
> > > > 4: if (r6 > r7) goto +1
> > > > 5: r6 = r7
> > > > 6: if (r6 > X) goto ...
> > > > --- checkpoint ---
> > > > 7: r9 += r7
> > > > 8: *(u64 *)r9 = Y
> > > >
> > > > This example is unsafe because not all execution paths verify r7 range.
> > > > Because of the jump at (4) the verifier would arrive at (6) in two states:
> > > > I.  r6{.id=b}, r7{.id=b} via path 1-6;
> > > > II. r6{.id=a}, r7{.id=b} via path 1-4, 6.
> > > >
> > > > Currently regsafe() does not call check_ids() for scalar registers,
> > > > thus from POV of regsafe() states (I) and (II) are identical. If the
> > > > path 1-6 is taken by verifier first, and checkpoint is created at (6)
> > > > the path [1-4, 6] would be considered safe.
> > > >
> > > > Changes in this commit:
> > > > - Function check_scalar_ids() is added, it differs from check_ids() in
> > > >   the following aspects:
> > > >   - treats ID zero as a unique scalar ID;
> > > >   - disallows mapping same 'cur_id' to multiple 'old_id'.
> > > > - check_scalar_ids() needs to generate temporary unique IDs, field
> > > >   'tmp_id_gen' is added to bpf_verifier_env::idmap_scratch to
> > > >   facilitate this.
> > > > - Function scalar_regs_exact() is added, it differs from regs_exact()
> > > >   in the following aspects:
> > > >   - uses check_scalar_ids() for ID comparison;
> > > >   - does not check reg_obj_id as this field is not used for scalar
> > > >     values.
> > > > - regsafe() is updated to:
> > > >   - use check_scalar_ids() for precise scalar registers.
> > > >   - use scalar_regs_exact() for scalar registers, but only for
> > > >     explore_alu_limits branch. This simplifies control flow for scalar
> > > >     case, and has no measurable performance impact.
> > > > - check_alu_op() is updated avoid generating bpf_reg_state::id for
> > > >   constant scalar values when processing BPF_MOV. ID is needed to
> > > >   propagate range information for identical values, but there is
> > > >   nothing to propagate for constants.
> > > >
> > > > Fixes: 75748837b7e5 ("bpf: Propagate scalar ranges through register assignments.")
> > > > Acked-by: Andrii Nakryiko <andrii@kernel.org>
> > > > Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
> > > > ---
> > > >  include/linux/bpf_verifier.h |  17 ++++--
> > > >  kernel/bpf/verifier.c        | 108 ++++++++++++++++++++++++++++-------
> > > >  2 files changed, 97 insertions(+), 28 deletions(-)
> > > >
> > > > diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
> > > > index 73a98f6240fd..042b76fe8e29 100644
> > > > --- a/include/linux/bpf_verifier.h
> > > > +++ b/include/linux/bpf_verifier.h
> > > > @@ -313,11 +313,6 @@ struct bpf_idx_pair {
> > > >         u32 idx;
> > > >  };
> > > >
> > > > -struct bpf_id_pair {
> > > > -       u32 old;
> > > > -       u32 cur;
> > > > -};
> > > > -
> > > >  #define MAX_CALL_FRAMES 8
> > > >  /* Maximum number of register states that can exist at once */
> > > >  #define BPF_ID_MAP_SIZE ((MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE) * MAX_CALL_FRAMES)
> > > > @@ -559,6 +554,16 @@ struct backtrack_state {
> > > >         u64 stack_masks[MAX_CALL_FRAMES];
> > > >  };
> > > >
> > > > +struct bpf_id_pair {
> > > > +       u32 old;
> > > > +       u32 cur;
> > > > +};
> > > > +
> > > > +struct bpf_idmap {
> > > > +       u32 tmp_id_gen;
> > > > +       struct bpf_id_pair map[BPF_ID_MAP_SIZE];
> > > > +};
> > > > +
> > > >  struct bpf_idset {
> > > >         u32 count;
> > > >         u32 ids[BPF_ID_MAP_SIZE];
> > > > @@ -596,7 +601,7 @@ struct bpf_verifier_env {
> > > >         struct bpf_verifier_log log;
> > > >         struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1];
> > > >         union {
> > > > -               struct bpf_id_pair idmap_scratch[BPF_ID_MAP_SIZE];
> > > > +               struct bpf_idmap idmap_scratch;
> > > >                 struct bpf_idset idset_scratch;
> > > >         };
> > > >         struct {
> > > > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > > > index 9b5f2433194f..b15ebfed207a 100644
> > > > --- a/kernel/bpf/verifier.c
> > > > +++ b/kernel/bpf/verifier.c
> > > > @@ -12942,12 +12942,14 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
> > > >                 if (BPF_SRC(insn->code) == BPF_X) {
> > > >                         struct bpf_reg_state *src_reg = regs + insn->src_reg;
> > > >                         struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
> > > > +                       bool need_id = src_reg->type == SCALAR_VALUE && !src_reg->id &&
> > > > +                                      !tnum_is_const(src_reg->var_off);
> > > >
> > > >                         if (BPF_CLASS(insn->code) == BPF_ALU64) {
> > > >                                 /* case: R1 = R2
> > > >                                  * copy register state to dest reg
> > > >                                  */
> > > > -                               if (src_reg->type == SCALAR_VALUE && !src_reg->id)
> > > > +                               if (need_id)
> > > >                                         /* Assign src and dst registers the same ID
> > > >                                          * that will be used by find_equal_scalars()
> > > >                                          * to propagate min/max range.
> > > > @@ -12966,7 +12968,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
> > > >                                 } else if (src_reg->type == SCALAR_VALUE) {
> > > >                                         bool is_src_reg_u32 = src_reg->umax_value <= U32_MAX;
> > > >
> > > > -                                       if (is_src_reg_u32 && !src_reg->id)
> > > > +                                       if (is_src_reg_u32 && need_id)
> > > >                                                 src_reg->id = ++env->id_gen;
> > > >                                         copy_register_state(dst_reg, src_reg);
> > > >                                         /* Make sure ID is cleared if src_reg is not in u32 range otherwise
> > > > @@ -15122,8 +15124,9 @@ static bool range_within(struct bpf_reg_state *old,
> > > >   * So we look through our idmap to see if this old id has been seen before.  If
> > > >   * so, we require the new id to match; otherwise, we add the id pair to the map.
> > > >   */
> > > > -static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)
> > > > +static bool check_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
> > > >  {
> > > > +       struct bpf_id_pair *map = idmap->map;
> > > >         unsigned int i;
> > > >
> > > >         /* either both IDs should be set or both should be zero */
> > > > @@ -15134,14 +15137,44 @@ static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)
> > > >                 return true;
> > > >
> > > >         for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
> > > > -               if (!idmap[i].old) {
> > > > +               if (!map[i].old) {
> > > >                         /* Reached an empty slot; haven't seen this id before */
> > > > -                       idmap[i].old = old_id;
> > > > -                       idmap[i].cur = cur_id;
> > > > +                       map[i].old = old_id;
> > > > +                       map[i].cur = cur_id;
> > > >                         return true;
> > > >                 }
> > > > -               if (idmap[i].old == old_id)
> > > > -                       return idmap[i].cur == cur_id;
> > > > +               if (map[i].old == old_id)
> > > > +                       return map[i].cur == cur_id;
> > > > +       }
> > > > +       /* We ran out of idmap slots, which should be impossible */
> > > > +       WARN_ON_ONCE(1);
> > > > +       return false;
> > > > +}
> > > > +
> > > > +/* Similar to check_ids(), but:
> > > > + * - disallow mapping of different 'old_id' values to same 'cur_id' value;
> > > > + * - for zero 'old_id' or 'cur_id' allocate a unique temporary ID
> > > > + *   to allow pairs like '0 vs unique ID', 'unique ID vs 0'.
> > > > + */
> > > > +static bool check_scalar_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
> > > > +{
> > > > +       struct bpf_id_pair *map = idmap->map;
> > > > +       unsigned int i;
> > > > +
> > > > +       old_id = old_id ? old_id : ++idmap->tmp_id_gen;
> > > > +       cur_id = cur_id ? cur_id : ++idmap->tmp_id_gen;
> > > > +
> > > > +       for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
> > > > +               if (!map[i].old) {
> > > > +                       /* Reached an empty slot; haven't seen this id before */
> > > > +                       map[i].old = old_id;
> > > > +                       map[i].cur = cur_id;
> > > > +                       return true;
> > > > +               }
> > > > +               if (map[i].old == old_id)
> > > > +                       return map[i].cur == cur_id;
> > > > +               if (map[i].cur == cur_id)
> > > > +                       return false;
> > >
> > > We were discussing w/ Alexei (offline) making these changes as
> > > backportable and minimal as possible, so I looked again how to
> > > minimize all the extra code added.
> > >
> > > I still insist that the current logic in check_ids() is invalid to
> > > allow the same cur_id to map to two different old_ids, especially for
> > > non-SCALAR, actually. E.g., a situation where we have a register that
> > > is auto-invalidated. E.g., bpf_iter element (each gets an ID), or it
> > > could be id'ed dynptr_ringbuf.
> > >
> > > Think about the following situation:
> > >
> > > In the old state, we could have r1.id = 1; r2.id = 2; Two registers
> > > keep two separate pointers to ringbuf.
> > >
> > > In the new state, we have r1.id = r2.id = 3; That is, two registers
> > > keep the *same* pointer to ringbuf elem.
> > >
> > > Now imagine that a BPF program has bpf_ringbuf_submit(r1) and
> > > invalidates this register. With the old state it will invalidate r1
> > > and will keep r2 valid. So it's safe for the BPF program to keep
> > > working with r2 as valid ringbuf (and eventually submit it as well).
> > >
> > > Now this is entirely unsafe for the new state. Once
> > > bpf_ringbuf_submit(r1) happens, r2 shouldn't be accessed anymore. But
> > > yet it will be declared safe with current check_ids() logic.
> > >
> > > Ergo, check_ids() should make sure we do not have multi-mapping for
> > > any of the IDs. Even if in some corner case that might be ok.
> > >
> > > I actually tested this change with veristat, there are no regressions
> > > at all. I think it's both safe from a perf standpoint, and necessary
> > > and safe from a correctness standpoint.
> > >
> > > So all in all (I did inline scalar_regs_exact in a separate patch, up
> > > to you), I have these changes on top and they all are good from
> > > veristat perspective:
> >
> > Ok, rinbuf is a good example.
> > To minimize the patch-set I'll do the following:
> > - move your check_ids patch to the beginning of the series
> > - add selftest for ringbuf
> > - squash the scalar_regs_exact patch with current patch #3
> >
> > And resubmit with EFAULT fix in patch #1.
> > Thank you for looking into this.
>
> Welp, it turns out it is not possible to create a failing example with
> ringbuf. This is so, because ringbuf objects are tracked in
> bpf_func_state::refs (of type struct bpf_reference_state).
>
> ID of each acquired object is stored in this array and compared in
> func_states_equal() using refsafe() function:
>
>   static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur,
>                     struct bpf_idmap *idmap)
>   {
>         int i;
>
>         if (old->acquired_refs != cur->acquired_refs)
>                 return false;
>
>         for (i = 0; i < old->acquired_refs; i++) {
>                 if (!check_ids(old->refs[i].id, cur->refs[i].id, idmap))
>                         return false;
>         }
>
>         return true;
>   }
>
> Whatever combination of old and current IDs we get in register is
> later checked against full list of acquired IDs.
>
> So far I haven't been able to create a counter-example that shows the
> following snippet is necessary in check_ids():
>
> +               if (map[i].cur == cur_id)
> +                       return false;
>
> But this saga has to end somehow :)

ha-ha, yep. I think we should add this check and not rely on some
side-effects of other checks for correctness.

> I'll just squash your patches on top of patch #3, since there are not
> verification performance regressions.

Thanks!

>
> >
> > >
> > > Author: Andrii Nakryiko <andrii@kernel.org>
> > > Date:   Mon Jun 12 12:53:25 2023 -0700
> > >
> > >     bpf: inline scalar_regs_exact
> > >
> > >     Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> > >
> > > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > > index 9d5fefd970a3..c5606613136d 100644
> > > --- a/kernel/bpf/verifier.c
> > > +++ b/kernel/bpf/verifier.c
> > > @@ -15262,14 +15262,6 @@ static bool regs_exact(const struct
> > > bpf_reg_state *rold,
> > >                check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
> > >  }
> > >
> > > -static bool scalar_regs_exact(const struct bpf_reg_state *rold,
> > > -                             const struct bpf_reg_state *rcur,
> > > -                             struct bpf_idmap *idmap)
> > > -{
> > > -       return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
> > > -              check_scalar_ids(rold->id, rcur->id, idmap);
> > > -}
> > > -
> > >  /* Returns true if (rold safe implies rcur safe) */
> > >  static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
> > >                     struct bpf_reg_state *rcur, struct bpf_idmap *idmap)
> > > @@ -15309,8 +15301,13 @@ static bool regsafe(struct bpf_verifier_env
> > > *env, struct bpf_reg_state *rold,
> > >
> > >         switch (base_type(rold->type)) {
> > >         case SCALAR_VALUE:
> > > -               if (env->explore_alu_limits)
> > > -                       return scalar_regs_exact(rold, rcur, idmap);
> > > +               if (env->explore_alu_limits) {
> > > +                       /* explore_alu_limits disables tnum_in() and
> > > range_within()
> > > +                        * logic and requires everything to be strict
> > > +                        */
> > > +                       return memcmp(rold, rcur, offsetof(struct
> > > bpf_reg_state, id)) == 0 &&
> > > +                              check_scalar_ids(rold->id, rcur->id, idmap);
> > > +               }
> > >                 if (!rold->precise)
> > >                         return true;
> > >                 /* Why check_ids() for scalar registers?
> > >
> > > commit 57297c01fe747e423cd3c924ef492c0688d8057a
> > > Author: Andrii Nakryiko <andrii@kernel.org>
> > > Date:   Mon Jun 12 11:46:46 2023 -0700
> > >
> > >     bpf: make check_ids disallow multi-mapping of IDs universally
> > >
> > >     Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> > >
> > > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > > index 3da77713d1ca..9d5fefd970a3 100644
> > > --- a/kernel/bpf/verifier.c
> > > +++ b/kernel/bpf/verifier.c
> > > @@ -15137,6 +15137,8 @@ static bool check_ids(u32 old_id, u32 cur_id,
> > > struct bpf_idmap *idmap)
> > >                 }
> > >                 if (map[i].old == old_id)
> > >                         return map[i].cur == cur_id;
> > > +               if (map[i].cur == cur_id)
> > > +                       return false;
> > >         }
> > >         /* We ran out of idmap slots, which should be impossible */
> > >         WARN_ON_ONCE(1);
> > > @@ -15144,33 +15146,15 @@ static bool check_ids(u32 old_id, u32
> > > cur_id, struct bpf_idmap *idmap)
> > >  }
> > >
> > >  /* Similar to check_ids(), but:
> > > - * - disallow mapping of different 'old_id' values to same 'cur_id' value;
> > >   * - for zero 'old_id' or 'cur_id' allocate a unique temporary ID
> > >   *   to allow pairs like '0 vs unique ID', 'unique ID vs 0'.
> > >   */
> > >  static bool check_scalar_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
> > >  {
> > > -       struct bpf_id_pair *map = idmap->map;
> > > -       unsigned int i;
> > > -
> > >         old_id = old_id ? old_id : ++idmap->tmp_id_gen;
> > >         cur_id = cur_id ? cur_id : ++idmap->tmp_id_gen;
> > >
> > > -       for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
> > > -               if (!map[i].old) {
> > > -                       /* Reached an empty slot; haven't seen this id before */
> > > -                       map[i].old = old_id;
> > > -                       map[i].cur = cur_id;
> > > -                       return true;
> > > -               }
> > > -               if (map[i].old == old_id)
> > > -                       return map[i].cur == cur_id;
> > > -               if (map[i].cur == cur_id)
> > > -                       return false;
> > > -       }
> > > -       /* We ran out of idmap slots, which should be impossible */
> > > -       WARN_ON_ONCE(1);
> > > -       return false;
> > > +       return check_ids(old_id, cur_id, idmap);
> > >  }
> > >
> > >  static void clean_func_state(struct bpf_verifier_env *env,
> > >
> > >
> > >
> > > >         }
> > > >         /* We ran out of idmap slots, which should be impossible */
> > > >         WARN_ON_ONCE(1);
> > > > @@ -15246,16 +15279,24 @@ static void clean_live_states(struct bpf_verifier_env *env, int insn,
> > > >
> > > >  static bool regs_exact(const struct bpf_reg_state *rold,
> > > >                        const struct bpf_reg_state *rcur,
> > > > -                      struct bpf_id_pair *idmap)
> > > > +                      struct bpf_idmap *idmap)
> > > >  {
> > > >         return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
> > > >                check_ids(rold->id, rcur->id, idmap) &&
> > > >                check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
> > > >  }
> > > >
> > >
> > > [...]
> >
>
kernel test robot June 13, 2023, 8:02 a.m. UTC | #5
Hi Eduard,

kernel test robot noticed the following build warnings:

[auto build test WARNING on bpf-next/master]

url:    https://github.com/intel-lab-lkp/linux/commits/Eduard-Zingerman/bpf-use-scalar-ids-in-mark_chain_precision/20230613-001651
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
patch link:    https://lore.kernel.org/r/20230612160801.2804666-4-eddyz87%40gmail.com
patch subject: [PATCH bpf-next v5 3/4] bpf: verify scalar ids mapping in regsafe() using check_ids()
config: x86_64-rhel-8.3-rust (https://download.01.org/0day-ci/archive/20230613/202306131550.U3M9AJGm-lkp@intel.com/config)
compiler: clang version 15.0.7 (https://github.com/llvm/llvm-project.git 8dfdcc7b7bf66834a761bd8de445840ef68e4d1a)
reproduce (this is a W=1 build):
        mkdir -p ~/bin
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        git remote add bpf-next https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git
        git fetch bpf-next master
        git checkout bpf-next/master
        b4 shazam https://lore.kernel.org/r/20230612160801.2804666-4-eddyz87@gmail.com
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=x86_64 olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash kernel/bpf/

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202306131550.U3M9AJGm-lkp@intel.com/

All warnings (new ones prefixed by >>):

   In file included from kernel/bpf/verifier.c:7:
   In file included from include/linux/bpf-cgroup.h:5:
   In file included from include/linux/bpf.h:10:
   In file included from include/linux/workqueue.h:9:
   In file included from include/linux/timer.h:6:
   In file included from include/linux/ktime.h:24:
   In file included from include/linux/time.h:60:
   In file included from include/linux/time32.h:13:
   In file included from include/linux/timex.h:67:
   In file included from arch/x86/include/asm/timex.h:5:
   In file included from arch/x86/include/asm/processor.h:23:
   In file included from arch/x86/include/asm/msr.h:11:
   In file included from arch/x86/include/asm/cpumask.h:5:
   In file included from include/linux/cpumask.h:12:
   In file included from include/linux/bitmap.h:11:
   In file included from include/linux/string.h:254:
>> include/linux/fortify-string.h:430:4: warning: call to __write_overflow_field declared with 'warning' attribute: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wattribute-warning]
                           __write_overflow_field(p_size_field, size);
                           ^
   1 warning generated.


vim +/warning +430 include/linux/fortify-string.h

a28a6e860c6cf2 Francis Laniel 2021-02-25  411  
28e77cc1c06866 Kees Cook      2021-06-16  412  __FORTIFY_INLINE void fortify_memset_chk(__kernel_size_t size,
28e77cc1c06866 Kees Cook      2021-06-16  413  					 const size_t p_size,
28e77cc1c06866 Kees Cook      2021-06-16  414  					 const size_t p_size_field)
a28a6e860c6cf2 Francis Laniel 2021-02-25  415  {
28e77cc1c06866 Kees Cook      2021-06-16  416  	if (__builtin_constant_p(size)) {
28e77cc1c06866 Kees Cook      2021-06-16  417  		/*
28e77cc1c06866 Kees Cook      2021-06-16  418  		 * Length argument is a constant expression, so we
28e77cc1c06866 Kees Cook      2021-06-16  419  		 * can perform compile-time bounds checking where
fa35198f39571b Kees Cook      2022-09-19  420  		 * buffer sizes are also known at compile time.
28e77cc1c06866 Kees Cook      2021-06-16  421  		 */
a28a6e860c6cf2 Francis Laniel 2021-02-25  422  
28e77cc1c06866 Kees Cook      2021-06-16  423  		/* Error when size is larger than enclosing struct. */
fa35198f39571b Kees Cook      2022-09-19  424  		if (__compiletime_lessthan(p_size_field, p_size) &&
fa35198f39571b Kees Cook      2022-09-19  425  		    __compiletime_lessthan(p_size, size))
a28a6e860c6cf2 Francis Laniel 2021-02-25  426  			__write_overflow();
28e77cc1c06866 Kees Cook      2021-06-16  427  
28e77cc1c06866 Kees Cook      2021-06-16  428  		/* Warn when write size is larger than dest field. */
fa35198f39571b Kees Cook      2022-09-19  429  		if (__compiletime_lessthan(p_size_field, size))
28e77cc1c06866 Kees Cook      2021-06-16 @430  			__write_overflow_field(p_size_field, size);
a28a6e860c6cf2 Francis Laniel 2021-02-25  431  	}
28e77cc1c06866 Kees Cook      2021-06-16  432  	/*
28e77cc1c06866 Kees Cook      2021-06-16  433  	 * At this point, length argument may not be a constant expression,
28e77cc1c06866 Kees Cook      2021-06-16  434  	 * so run-time bounds checking can be done where buffer sizes are
28e77cc1c06866 Kees Cook      2021-06-16  435  	 * known. (This is not an "else" because the above checks may only
28e77cc1c06866 Kees Cook      2021-06-16  436  	 * be compile-time warnings, and we want to still warn for run-time
28e77cc1c06866 Kees Cook      2021-06-16  437  	 * overflows.)
28e77cc1c06866 Kees Cook      2021-06-16  438  	 */
28e77cc1c06866 Kees Cook      2021-06-16  439  
28e77cc1c06866 Kees Cook      2021-06-16  440  	/*
28e77cc1c06866 Kees Cook      2021-06-16  441  	 * Always stop accesses beyond the struct that contains the
28e77cc1c06866 Kees Cook      2021-06-16  442  	 * field, when the buffer's remaining size is known.
311fb40aa0569a Kees Cook      2022-09-02  443  	 * (The SIZE_MAX test is to optimize away checks where the buffer
28e77cc1c06866 Kees Cook      2021-06-16  444  	 * lengths are unknown.)
28e77cc1c06866 Kees Cook      2021-06-16  445  	 */
311fb40aa0569a Kees Cook      2022-09-02  446  	if (p_size != SIZE_MAX && p_size < size)
28e77cc1c06866 Kees Cook      2021-06-16  447  		fortify_panic("memset");
28e77cc1c06866 Kees Cook      2021-06-16  448  }
28e77cc1c06866 Kees Cook      2021-06-16  449
diff mbox series

Patch

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 73a98f6240fd..042b76fe8e29 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -313,11 +313,6 @@  struct bpf_idx_pair {
 	u32 idx;
 };
 
-struct bpf_id_pair {
-	u32 old;
-	u32 cur;
-};
-
 #define MAX_CALL_FRAMES 8
 /* Maximum number of register states that can exist at once */
 #define BPF_ID_MAP_SIZE ((MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE) * MAX_CALL_FRAMES)
@@ -559,6 +554,16 @@  struct backtrack_state {
 	u64 stack_masks[MAX_CALL_FRAMES];
 };
 
+struct bpf_id_pair {
+	u32 old;
+	u32 cur;
+};
+
+struct bpf_idmap {
+	u32 tmp_id_gen;
+	struct bpf_id_pair map[BPF_ID_MAP_SIZE];
+};
+
 struct bpf_idset {
 	u32 count;
 	u32 ids[BPF_ID_MAP_SIZE];
@@ -596,7 +601,7 @@  struct bpf_verifier_env {
 	struct bpf_verifier_log log;
 	struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1];
 	union {
-		struct bpf_id_pair idmap_scratch[BPF_ID_MAP_SIZE];
+		struct bpf_idmap idmap_scratch;
 		struct bpf_idset idset_scratch;
 	};
 	struct {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9b5f2433194f..b15ebfed207a 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -12942,12 +12942,14 @@  static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 		if (BPF_SRC(insn->code) == BPF_X) {
 			struct bpf_reg_state *src_reg = regs + insn->src_reg;
 			struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
+			bool need_id = src_reg->type == SCALAR_VALUE && !src_reg->id &&
+				       !tnum_is_const(src_reg->var_off);
 
 			if (BPF_CLASS(insn->code) == BPF_ALU64) {
 				/* case: R1 = R2
 				 * copy register state to dest reg
 				 */
-				if (src_reg->type == SCALAR_VALUE && !src_reg->id)
+				if (need_id)
 					/* Assign src and dst registers the same ID
 					 * that will be used by find_equal_scalars()
 					 * to propagate min/max range.
@@ -12966,7 +12968,7 @@  static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 				} else if (src_reg->type == SCALAR_VALUE) {
 					bool is_src_reg_u32 = src_reg->umax_value <= U32_MAX;
 
-					if (is_src_reg_u32 && !src_reg->id)
+					if (is_src_reg_u32 && need_id)
 						src_reg->id = ++env->id_gen;
 					copy_register_state(dst_reg, src_reg);
 					/* Make sure ID is cleared if src_reg is not in u32 range otherwise
@@ -15122,8 +15124,9 @@  static bool range_within(struct bpf_reg_state *old,
  * So we look through our idmap to see if this old id has been seen before.  If
  * so, we require the new id to match; otherwise, we add the id pair to the map.
  */
-static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)
+static bool check_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
 {
+	struct bpf_id_pair *map = idmap->map;
 	unsigned int i;
 
 	/* either both IDs should be set or both should be zero */
@@ -15134,14 +15137,44 @@  static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)
 		return true;
 
 	for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
-		if (!idmap[i].old) {
+		if (!map[i].old) {
 			/* Reached an empty slot; haven't seen this id before */
-			idmap[i].old = old_id;
-			idmap[i].cur = cur_id;
+			map[i].old = old_id;
+			map[i].cur = cur_id;
 			return true;
 		}
-		if (idmap[i].old == old_id)
-			return idmap[i].cur == cur_id;
+		if (map[i].old == old_id)
+			return map[i].cur == cur_id;
+	}
+	/* We ran out of idmap slots, which should be impossible */
+	WARN_ON_ONCE(1);
+	return false;
+}
+
+/* Similar to check_ids(), but:
+ * - disallow mapping of different 'old_id' values to same 'cur_id' value;
+ * - for zero 'old_id' or 'cur_id' allocate a unique temporary ID
+ *   to allow pairs like '0 vs unique ID', 'unique ID vs 0'.
+ */
+static bool check_scalar_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
+{
+	struct bpf_id_pair *map = idmap->map;
+	unsigned int i;
+
+	old_id = old_id ? old_id : ++idmap->tmp_id_gen;
+	cur_id = cur_id ? cur_id : ++idmap->tmp_id_gen;
+
+	for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
+		if (!map[i].old) {
+			/* Reached an empty slot; haven't seen this id before */
+			map[i].old = old_id;
+			map[i].cur = cur_id;
+			return true;
+		}
+		if (map[i].old == old_id)
+			return map[i].cur == cur_id;
+		if (map[i].cur == cur_id)
+			return false;
 	}
 	/* We ran out of idmap slots, which should be impossible */
 	WARN_ON_ONCE(1);
@@ -15246,16 +15279,24 @@  static void clean_live_states(struct bpf_verifier_env *env, int insn,
 
 static bool regs_exact(const struct bpf_reg_state *rold,
 		       const struct bpf_reg_state *rcur,
-		       struct bpf_id_pair *idmap)
+		       struct bpf_idmap *idmap)
 {
 	return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
 	       check_ids(rold->id, rcur->id, idmap) &&
 	       check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
 }
 
+static bool scalar_regs_exact(const struct bpf_reg_state *rold,
+			      const struct bpf_reg_state *rcur,
+			      struct bpf_idmap *idmap)
+{
+	return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
+	       check_scalar_ids(rold->id, rcur->id, idmap);
+}
+
 /* Returns true if (rold safe implies rcur safe) */
 static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
-		    struct bpf_reg_state *rcur, struct bpf_id_pair *idmap)
+		    struct bpf_reg_state *rcur, struct bpf_idmap *idmap)
 {
 	if (!(rold->live & REG_LIVE_READ))
 		/* explored state didn't use this */
@@ -15292,15 +15333,37 @@  static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
 
 	switch (base_type(rold->type)) {
 	case SCALAR_VALUE:
-		if (regs_exact(rold, rcur, idmap))
-			return true;
 		if (env->explore_alu_limits)
-			return false;
+			return scalar_regs_exact(rold, rcur, idmap);
 		if (!rold->precise)
 			return true;
-		/* new val must satisfy old val knowledge */
+		/* Why check_ids() for scalar registers?
+		 *
+		 * Consider the following BPF code:
+		 *   1: r6 = ... unbound scalar, ID=a ...
+		 *   2: r7 = ... unbound scalar, ID=b ...
+		 *   3: if (r6 > r7) goto +1
+		 *   4: r6 = r7
+		 *   5: if (r6 > X) goto ...
+		 *   6: ... memory operation using r7 ...
+		 *
+		 * First verification path is [1-6]:
+		 * - at (4) same bpf_reg_state::id (b) would be assigned to r6 and r7;
+		 * - at (5) r6 would be marked <= X, find_equal_scalars() would also mark
+		 *   r7 <= X, because r6 and r7 share same id.
+		 * Next verification path is [1-4, 6].
+		 *
+		 * Instruction (6) would be reached in two states:
+		 *   I.  r6{.id=b}, r7{.id=b} via path 1-6;
+		 *   II. r6{.id=a}, r7{.id=b} via path 1-4, 6.
+		 *
+		 * Use check_ids() to distinguish these states.
+		 * ---
+		 * Also verify that new value satisfies old value range knowledge.
+		 */
 		return range_within(rold, rcur) &&
-		       tnum_in(rold->var_off, rcur->var_off);
+		       tnum_in(rold->var_off, rcur->var_off) &&
+		       check_scalar_ids(rold->id, rcur->id, idmap);
 	case PTR_TO_MAP_KEY:
 	case PTR_TO_MAP_VALUE:
 	case PTR_TO_MEM:
@@ -15346,7 +15409,7 @@  static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
 }
 
 static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
-		      struct bpf_func_state *cur, struct bpf_id_pair *idmap)
+		      struct bpf_func_state *cur, struct bpf_idmap *idmap)
 {
 	int i, spi;
 
@@ -15449,7 +15512,7 @@  static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
 }
 
 static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur,
-		    struct bpf_id_pair *idmap)
+		    struct bpf_idmap *idmap)
 {
 	int i;
 
@@ -15497,13 +15560,13 @@  static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_stat
 
 	for (i = 0; i < MAX_BPF_REG; i++)
 		if (!regsafe(env, &old->regs[i], &cur->regs[i],
-			     env->idmap_scratch))
+			     &env->idmap_scratch))
 			return false;
 
-	if (!stacksafe(env, old, cur, env->idmap_scratch))
+	if (!stacksafe(env, old, cur, &env->idmap_scratch))
 		return false;
 
-	if (!refsafe(old, cur, env->idmap_scratch))
+	if (!refsafe(old, cur, &env->idmap_scratch))
 		return false;
 
 	return true;
@@ -15518,7 +15581,8 @@  static bool states_equal(struct bpf_verifier_env *env,
 	if (old->curframe != cur->curframe)
 		return false;
 
-	memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
+	env->idmap_scratch.tmp_id_gen = env->id_gen;
+	memset(&env->idmap_scratch.map, 0, sizeof(env->idmap_scratch));
 
 	/* Verification state from speculative execution simulation
 	 * must never prune a non-speculative execution one.
@@ -15536,7 +15600,7 @@  static bool states_equal(struct bpf_verifier_env *env,
 		return false;
 
 	if (old->active_lock.id &&
-	    !check_ids(old->active_lock.id, cur->active_lock.id, env->idmap_scratch))
+	    !check_ids(old->active_lock.id, cur->active_lock.id, &env->idmap_scratch))
 		return false;
 
 	if (old->active_rcu_lock != cur->active_rcu_lock)