diff mbox series

[bpf-next,v2,14/15] bpf: Optimize state pruning for spilled scalars

Message ID 20240108205209.838365-15-maxtram95@gmail.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series Improvements for tracking scalars in the BPF verifier | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next
netdev/ynl success SINGLE THREAD; Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1094 this patch: 1093
netdev/cc_maintainers success CCed 0 of 0 maintainers
netdev/build_clang success Errors and warnings before: 1111 this patch: 1109
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1121 this patch: 1120
netdev/checkpatch warning WARNING: line length of 82 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 92 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-14 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 fail Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-7 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-8 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-13 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-15 success Logs for x86_64-gcc / build-release

Commit Message

Maxim Mikityanskiy Jan. 8, 2024, 8:52 p.m. UTC
From: Eduard Zingerman <eddyz87@gmail.com>

Changes for scalar ID tracking of spilled unbound scalars lead to
certain verification performance regression. This commit mitigates the
regression by exploiting the following properties maintained by
check_stack_read_fixed_off():
- a mix of STACK_MISC, STACK_ZERO and STACK_INVALID marks is read as
  unbounded scalar register;
- spi with all slots marked STACK_ZERO is read as scalar register with
  value zero.

This commit modifies stacksafe() to consider situations above
equivalent.

Veristat results after this patch show significant gains:

$ ./veristat -e file,prog,states -f '!states_pct<10' -f '!states_b<10' -C not-opt after
File              Program   States (A)  States (B)  States    (DIFF)
----------------  --------  ----------  ----------  ----------------
pyperf180.bpf.o   on_event       10456        8422   -2034 (-19.45%)
pyperf600.bpf.o   on_event       37319       22519  -14800 (-39.66%)
strobemeta.bpf.o  on_event       13435        4703   -8732 (-64.99%)

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
---
 kernel/bpf/verifier.c | 83 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 83 insertions(+)

Comments

Andrii Nakryiko Jan. 10, 2024, 12:22 a.m. UTC | #1
On Mon, Jan 8, 2024 at 12:53 PM Maxim Mikityanskiy <maxtram95@gmail.com> wrote:
>
> From: Eduard Zingerman <eddyz87@gmail.com>
>
> Changes for scalar ID tracking of spilled unbound scalars lead to
> certain verification performance regression. This commit mitigates the
> regression by exploiting the following properties maintained by
> check_stack_read_fixed_off():
> - a mix of STACK_MISC, STACK_ZERO and STACK_INVALID marks is read as
>   unbounded scalar register;
> - spi with all slots marked STACK_ZERO is read as scalar register with
>   value zero.
>
> This commit modifies stacksafe() to consider situations above
> equivalent.
>
> Veristat results after this patch show significant gains:
>
> $ ./veristat -e file,prog,states -f '!states_pct<10' -f '!states_b<10' -C not-opt after
> File              Program   States (A)  States (B)  States    (DIFF)
> ----------------  --------  ----------  ----------  ----------------
> pyperf180.bpf.o   on_event       10456        8422   -2034 (-19.45%)
> pyperf600.bpf.o   on_event       37319       22519  -14800 (-39.66%)
> strobemeta.bpf.o  on_event       13435        4703   -8732 (-64.99%)
>
> Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
> ---
>  kernel/bpf/verifier.c | 83 +++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 83 insertions(+)
>
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index aeb3e198a5ea..cb82f8d4226f 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -1170,6 +1170,12 @@ static void mark_stack_slot_misc(struct bpf_verifier_env *env, u8 *stype)
>         *stype = STACK_MISC;
>  }
>
> +static bool is_spilled_scalar_reg64(const struct bpf_stack_state *stack)
> +{
> +       return stack->slot_type[0] == STACK_SPILL &&
> +              stack->spilled_ptr.type == SCALAR_VALUE;
> +}
> +
>  static void scrub_spilled_slot(u8 *stype)
>  {
>         if (*stype != STACK_INVALID)
> @@ -16459,11 +16465,45 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
>         }
>  }
>
> +static bool is_stack_zero64(struct bpf_stack_state *stack)
> +{
> +       u32 i;
> +
> +       for (i = 0; i < ARRAY_SIZE(stack->slot_type); ++i)
> +               if (stack->slot_type[i] != STACK_ZERO)
> +                       return false;
> +       return true;
> +}
> +
> +static bool is_stack_unbound_slot64(struct bpf_verifier_env *env,
> +                                   struct bpf_stack_state *stack)
> +{
> +       u32 i;
> +
> +       for (i = 0; i < ARRAY_SIZE(stack->slot_type); ++i)
> +               if (stack->slot_type[i] != STACK_ZERO &&
> +                   stack->slot_type[i] != STACK_MISC &&
> +                   (!env->allow_uninit_stack || stack->slot_type[i] != STACK_INVALID))
> +                       return false;
> +       return true;
> +}
> +
> +static bool is_spilled_unbound_scalar_reg64(struct bpf_stack_state *stack)
> +{
> +       return is_spilled_scalar_reg64(stack) && __is_scalar_unbounded(&stack->spilled_ptr);
> +}
> +
>  static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
>                       struct bpf_func_state *cur, struct bpf_idmap *idmap, bool exact)
>  {
> +       struct bpf_reg_state unbound_reg = {};
> +       struct bpf_reg_state zero_reg = {};
>         int i, spi;
>
> +       __mark_reg_unknown(env, &unbound_reg);
> +       __mark_reg_const_zero(env, &zero_reg);
> +       zero_reg.precise = true;

these are immutable, right? Would it make sense to set them up just
once as static variables instead of initializing on each check?

> +
>         /* walk slots of the explored stack and ignore any additional
>          * slots in the current stack, since explored(safe) state
>          * didn't use them
> @@ -16484,6 +16524,49 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
>                         continue;
>                 }
>

we didn't check that cur->stack[spi] is ok to access yet, it's done a
bit later with `if (i >= cur->allocated_stack)`, if I'm not mistaken.
So these checks would need to be moved a bit lower, probably.

> +               /* load of stack value with all MISC and ZERO slots produces unbounded
> +                * scalar value, call regsafe to ensure scalar ids are compared.
> +                */
> +               if (is_spilled_unbound_scalar_reg64(&old->stack[spi]) &&
> +                   is_stack_unbound_slot64(env, &cur->stack[spi])) {
> +                       i += BPF_REG_SIZE - 1;
> +                       if (!regsafe(env, &old->stack[spi].spilled_ptr, &unbound_reg,
> +                                    idmap, exact))
> +                               return false;
> +                       continue;
> +               }
> +
> +               if (is_stack_unbound_slot64(env, &old->stack[spi]) &&
> +                   is_spilled_unbound_scalar_reg64(&cur->stack[spi])) {
> +                       i += BPF_REG_SIZE - 1;
> +                       if (!regsafe(env,  &unbound_reg, &cur->stack[spi].spilled_ptr,
> +                                    idmap, exact))
> +                               return false;
> +                       continue;
> +               }

scalar_old = scalar_cur = NULL;
if (is_spilled_unbound64(&old->..))
    scalar_old = old->stack[spi].slot_type[0] == STACK_SPILL ?
&old->stack[spi].spilled_ptr : &unbound_reg;
if (is_spilled_unbound64(&cur->..))
    scalar_cur = cur->stack[spi].slot_type[0] == STACK_SPILL ?
&cur->stack[spi].spilled_ptr : &unbound_reg;
if (scalar_old && scalar_cur) {
    if (!regsafe(env, scalar_old, scalar_new, idmap, exact)
        return false;
    i += BPF_REG_SIZE - 1;
    continue;
}

where is_spilled_unbound64() would be basically `return
is_spilled_unbound_scalar_reg64(&old->..) ||
is_stack_unbound_slot64(&old->...)`;

Similarly for zero case? Though I'm wondering if zero case should be
checked first, as it's actually a subset of is_spilled_unbound64 when
it comes to STACK_ZERO/STACK_MISC mixes, no?


> +
> +               /* load of stack value with all ZERO slots produces scalar value 0,
> +                * call regsafe to ensure scalar ids are compared and precision
> +                * flags are taken into account.
> +                */
> +               if (is_spilled_scalar_reg64(&old->stack[spi]) &&
> +                   is_stack_zero64(&cur->stack[spi])) {
> +                       if (!regsafe(env, &old->stack[spi].spilled_ptr, &zero_reg,
> +                                    idmap, exact))
> +                               return false;
> +                       i += BPF_REG_SIZE - 1;
> +                       continue;
> +               }
> +
> +               if (is_stack_zero64(&old->stack[spi]) &&
> +                   is_spilled_scalar_reg64(&cur->stack[spi])) {
> +                       if (!regsafe(env, &zero_reg, &cur->stack[spi].spilled_ptr,
> +                                    idmap, exact))
> +                               return false;
> +                       i += BPF_REG_SIZE - 1;
> +                       continue;
> +               }
> +
>                 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
>                         continue;
>
> --
> 2.43.0
>
Eduard Zingerman Jan. 10, 2024, 9:04 p.m. UTC | #2
On Tue, 2024-01-09 at 16:22 -0800, Andrii Nakryiko wrote:
[...]
> >  static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
> >                       struct bpf_func_state *cur, struct bpf_idmap *idmap, bool exact)
> >  {
> > +       struct bpf_reg_state unbound_reg = {};
> > +       struct bpf_reg_state zero_reg = {};
> >         int i, spi;
> > 
> > +       __mark_reg_unknown(env, &unbound_reg);
> > +       __mark_reg_const_zero(env, &zero_reg);
> > +       zero_reg.precise = true;
> 
> these are immutable, right? Would it make sense to set them up just
> once as static variables instead of initializing on each check?

Should be possible.

> > +
> >         /* walk slots of the explored stack and ignore any additional
> >          * slots in the current stack, since explored(safe) state
> >          * didn't use them
> > @@ -16484,6 +16524,49 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
> >                         continue;
> >                 }
> > 
> 
> we didn't check that cur->stack[spi] is ok to access yet, it's done a
> bit later with `if (i >= cur->allocated_stack)`, if I'm not mistaken.
> So these checks would need to be moved a bit lower, probably.

Right. And it seems the issue is already present:

		if (exact &&
		    old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
		    cur->stack[spi].slot_type[i % BPF_REG_SIZE])
			return false;

This is currently executed before `if (i >= cur->allocated_stack)` check as well.
Introduced by another commit of mine :(

> > +               /* load of stack value with all MISC and ZERO slots produces unbounded
> > +                * scalar value, call regsafe to ensure scalar ids are compared.
> > +                */
> > +               if (is_spilled_unbound_scalar_reg64(&old->stack[spi]) &&
> > +                   is_stack_unbound_slot64(env, &cur->stack[spi])) {
> > +                       i += BPF_REG_SIZE - 1;
> > +                       if (!regsafe(env, &old->stack[spi].spilled_ptr, &unbound_reg,
> > +                                    idmap, exact))
> > +                               return false;
> > +                       continue;
> > +               }
> > +
> > +               if (is_stack_unbound_slot64(env, &old->stack[spi]) &&
> > +                   is_spilled_unbound_scalar_reg64(&cur->stack[spi])) {
> > +                       i += BPF_REG_SIZE - 1;
> > +                       if (!regsafe(env,  &unbound_reg, &cur->stack[spi].spilled_ptr,
> > +                                    idmap, exact))
> > +                               return false;
> > +                       continue;
> > +               }
> 
> scalar_old = scalar_cur = NULL;
> if (is_spilled_unbound64(&old->..))
>     scalar_old = old->stack[spi].slot_type[0] == STACK_SPILL ?
> &old->stack[spi].spilled_ptr : &unbound_reg;
> if (is_spilled_unbound64(&cur->..))
>     scalar_cur = cur->stack[spi].slot_type[0] == STACK_SPILL ?
> &cur->stack[spi].spilled_ptr : &unbound_reg;
> if (scalar_old && scalar_cur) {
>     if (!regsafe(env, scalar_old, scalar_new, idmap, exact)
>         return false;
>     i += BPF_REG_SIZE - 1;
>     continue;
> }

Ok, I'll switch to this.
(Although, I think old variant is a bit simpler to follow).

> where is_spilled_unbound64() would be basically `return
> is_spilled_unbound_scalar_reg64(&old->..) ||
> is_stack_unbound_slot64(&old->...)`;
> 
> Similarly for zero case? Though I'm wondering if zero case should be
> checked first, as it's actually a subset of is_spilled_unbound64 when
> it comes to STACK_ZERO/STACK_MISC mixes, no?

Yes, makes sense.

[...]
Andrii Nakryiko Jan. 10, 2024, 9:52 p.m. UTC | #3
On Wed, Jan 10, 2024 at 1:04 PM Eduard Zingerman <eddyz87@gmail.com> wrote:
>
> On Tue, 2024-01-09 at 16:22 -0800, Andrii Nakryiko wrote:
> [...]
> > >  static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
> > >                       struct bpf_func_state *cur, struct bpf_idmap *idmap, bool exact)
> > >  {
> > > +       struct bpf_reg_state unbound_reg = {};
> > > +       struct bpf_reg_state zero_reg = {};
> > >         int i, spi;
> > >
> > > +       __mark_reg_unknown(env, &unbound_reg);
> > > +       __mark_reg_const_zero(env, &zero_reg);
> > > +       zero_reg.precise = true;
> >
> > these are immutable, right? Would it make sense to set them up just
> > once as static variables instead of initializing on each check?
>
> Should be possible.
>
> > > +
> > >         /* walk slots of the explored stack and ignore any additional
> > >          * slots in the current stack, since explored(safe) state
> > >          * didn't use them
> > > @@ -16484,6 +16524,49 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
> > >                         continue;
> > >                 }
> > >
> >
> > we didn't check that cur->stack[spi] is ok to access yet, it's done a
> > bit later with `if (i >= cur->allocated_stack)`, if I'm not mistaken.
> > So these checks would need to be moved a bit lower, probably.
>
> Right. And it seems the issue is already present:
>
>                 if (exact &&
>                     old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
>                     cur->stack[spi].slot_type[i % BPF_REG_SIZE])
>                         return false;
>
> This is currently executed before `if (i >= cur->allocated_stack)` check as well.
> Introduced by another commit of mine :(

I guess we'll need to move that too, then

>
> > > +               /* load of stack value with all MISC and ZERO slots produces unbounded
> > > +                * scalar value, call regsafe to ensure scalar ids are compared.
> > > +                */
> > > +               if (is_spilled_unbound_scalar_reg64(&old->stack[spi]) &&
> > > +                   is_stack_unbound_slot64(env, &cur->stack[spi])) {
> > > +                       i += BPF_REG_SIZE - 1;
> > > +                       if (!regsafe(env, &old->stack[spi].spilled_ptr, &unbound_reg,
> > > +                                    idmap, exact))
> > > +                               return false;
> > > +                       continue;
> > > +               }
> > > +
> > > +               if (is_stack_unbound_slot64(env, &old->stack[spi]) &&
> > > +                   is_spilled_unbound_scalar_reg64(&cur->stack[spi])) {
> > > +                       i += BPF_REG_SIZE - 1;
> > > +                       if (!regsafe(env,  &unbound_reg, &cur->stack[spi].spilled_ptr,
> > > +                                    idmap, exact))
> > > +                               return false;
> > > +                       continue;
> > > +               }
> >
> > scalar_old = scalar_cur = NULL;
> > if (is_spilled_unbound64(&old->..))
> >     scalar_old = old->stack[spi].slot_type[0] == STACK_SPILL ?
> > &old->stack[spi].spilled_ptr : &unbound_reg;
> > if (is_spilled_unbound64(&cur->..))
> >     scalar_cur = cur->stack[spi].slot_type[0] == STACK_SPILL ?
> > &cur->stack[spi].spilled_ptr : &unbound_reg;
> > if (scalar_old && scalar_cur) {
> >     if (!regsafe(env, scalar_old, scalar_new, idmap, exact)
> >         return false;
> >     i += BPF_REG_SIZE - 1;
> >     continue;
> > }
>
> Ok, I'll switch to this.
> (Although, I think old variant is a bit simpler to follow).

my goal was to eliminate duplicated logic inside each if and kind of
showing at high level that we are comparing two "logically unbound
scalars", regardless of whether that's STACK_xxx mix or spilled
scalar.

I haven't thought this through, but if we can simplify further to
something like this:

if (is_spilled_unbound64(old) && is_spilled_unbound64(cur)) {
  scalar_cur = ...
  scalar_old = ...
  if (!regsafe(...))
    return false;
  i += BPF_REG_SIZE - 1;
}

In general, this symmetry in two consecutive if conditions seems like
an opportunity to simplify. But if you think it's more complicated,
I'm fine with leaving it as is.

>
> > where is_spilled_unbound64() would be basically `return
> > is_spilled_unbound_scalar_reg64(&old->..) ||
> > is_stack_unbound_slot64(&old->...)`;
> >
> > Similarly for zero case? Though I'm wondering if zero case should be
> > checked first, as it's actually a subset of is_spilled_unbound64 when
> > it comes to STACK_ZERO/STACK_MISC mixes, no?
>
> Yes, makes sense.
>
> [...]
diff mbox series

Patch

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index aeb3e198a5ea..cb82f8d4226f 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1170,6 +1170,12 @@  static void mark_stack_slot_misc(struct bpf_verifier_env *env, u8 *stype)
 	*stype = STACK_MISC;
 }
 
+static bool is_spilled_scalar_reg64(const struct bpf_stack_state *stack)
+{
+	return stack->slot_type[0] == STACK_SPILL &&
+	       stack->spilled_ptr.type == SCALAR_VALUE;
+}
+
 static void scrub_spilled_slot(u8 *stype)
 {
 	if (*stype != STACK_INVALID)
@@ -16459,11 +16465,45 @@  static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
 	}
 }
 
+static bool is_stack_zero64(struct bpf_stack_state *stack)
+{
+	u32 i;
+
+	for (i = 0; i < ARRAY_SIZE(stack->slot_type); ++i)
+		if (stack->slot_type[i] != STACK_ZERO)
+			return false;
+	return true;
+}
+
+static bool is_stack_unbound_slot64(struct bpf_verifier_env *env,
+				    struct bpf_stack_state *stack)
+{
+	u32 i;
+
+	for (i = 0; i < ARRAY_SIZE(stack->slot_type); ++i)
+		if (stack->slot_type[i] != STACK_ZERO &&
+		    stack->slot_type[i] != STACK_MISC &&
+		    (!env->allow_uninit_stack || stack->slot_type[i] != STACK_INVALID))
+			return false;
+	return true;
+}
+
+static bool is_spilled_unbound_scalar_reg64(struct bpf_stack_state *stack)
+{
+	return is_spilled_scalar_reg64(stack) && __is_scalar_unbounded(&stack->spilled_ptr);
+}
+
 static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
 		      struct bpf_func_state *cur, struct bpf_idmap *idmap, bool exact)
 {
+	struct bpf_reg_state unbound_reg = {};
+	struct bpf_reg_state zero_reg = {};
 	int i, spi;
 
+	__mark_reg_unknown(env, &unbound_reg);
+	__mark_reg_const_zero(env, &zero_reg);
+	zero_reg.precise = true;
+
 	/* walk slots of the explored stack and ignore any additional
 	 * slots in the current stack, since explored(safe) state
 	 * didn't use them
@@ -16484,6 +16524,49 @@  static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
 			continue;
 		}
 
+		/* load of stack value with all MISC and ZERO slots produces unbounded
+		 * scalar value, call regsafe to ensure scalar ids are compared.
+		 */
+		if (is_spilled_unbound_scalar_reg64(&old->stack[spi]) &&
+		    is_stack_unbound_slot64(env, &cur->stack[spi])) {
+			i += BPF_REG_SIZE - 1;
+			if (!regsafe(env, &old->stack[spi].spilled_ptr, &unbound_reg,
+				     idmap, exact))
+				return false;
+			continue;
+		}
+
+		if (is_stack_unbound_slot64(env, &old->stack[spi]) &&
+		    is_spilled_unbound_scalar_reg64(&cur->stack[spi])) {
+			i += BPF_REG_SIZE - 1;
+			if (!regsafe(env,  &unbound_reg, &cur->stack[spi].spilled_ptr,
+				     idmap, exact))
+				return false;
+			continue;
+		}
+
+		/* load of stack value with all ZERO slots produces scalar value 0,
+		 * call regsafe to ensure scalar ids are compared and precision
+		 * flags are taken into account.
+		 */
+		if (is_spilled_scalar_reg64(&old->stack[spi]) &&
+		    is_stack_zero64(&cur->stack[spi])) {
+			if (!regsafe(env, &old->stack[spi].spilled_ptr, &zero_reg,
+				     idmap, exact))
+				return false;
+			i += BPF_REG_SIZE - 1;
+			continue;
+		}
+
+		if (is_stack_zero64(&old->stack[spi]) &&
+		    is_spilled_scalar_reg64(&cur->stack[spi])) {
+			if (!regsafe(env, &zero_reg, &cur->stack[spi].spilled_ptr,
+				     idmap, exact))
+				return false;
+			i += BPF_REG_SIZE - 1;
+			continue;
+		}
+
 		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
 			continue;