diff mbox series

[bpf-next,06/10] bpf: fix propagate_precision() logic for inner frames

Message ID 20230425234911.2113352-7-andrii@kernel.org (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series Add precision propagation for subprogs and callbacks | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 20 this patch: 20
netdev/cc_maintainers warning 8 maintainers not CCed: yhs@fb.com kpsingh@kernel.org martin.lau@linux.dev john.fastabend@gmail.com song@kernel.org sdf@google.com jolsa@kernel.org haoluo@google.com
netdev/build_clang success Errors and warnings before: 12 this patch: 12
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 20 this patch: 20
netdev/checkpatch warning WARNING: line length of 82 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 89 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-PR fail PR summary
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-5 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-7 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-34 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for test_maps on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-11 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-13 success Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for test_progs on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-16 success Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-18 fail Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for test_progs_no_alu32 on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-21 success Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-23 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for test_progs_no_alu32_parallel on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-25 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-27 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for test_progs_parallel on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-29 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for test_progs_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-31 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-32 success Logs for test_verifier on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-33 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-35 success Logs for test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-36 success Logs for veristat
bpf/vmtest-bpf-next-VM_Test-20 success Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for test_maps on s390x with gcc

Commit Message

Andrii Nakryiko April 25, 2023, 11:49 p.m. UTC
Fix propagate_precision() logic to perform propagation of all necessary
registers and stack slots across all active frames *in one batch step*.

Doing this for each register/slot in each individual frame is wasteful,
but the main problem is that backtracking of instruction in any frame
except the deepest one just doesn't work. This is due to backtracking
logic relying on jump history, and available jump history always starts
(or ends, depending how you view it) in current frame. So, if
prog A (frame #0) called subprog B (frame #1) and we need to propagate
precision of, say, register R6 (callee-saved) within frame #0, we
actually don't even know where jump history that corresponds to prog
A even starts. We'd need to skip subprog part of jump history first to
be able to do this.

Luckily, with struct backtrack_state and __mark_chain_precision()
handling bitmasks tracking/propagation across all active frames at the
same time (added in previous patch), propagate_precision() can be both
fixed and sped up by setting all the necessary bits across all frames
and then performing one __mark_chain_precision() pass. This makes it
unnecessary to skip subprog parts of jump history.

We also improve logging along the way, to clearly specify which
registers' and slots' precision markings are propagated within which
frame.

Fixes: 529409ea92d5 ("bpf: propagate precision across all frames, not just the last one")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
---
 kernel/bpf/verifier.c | 49 +++++++++++++++++++++++++++----------------
 1 file changed, 31 insertions(+), 18 deletions(-)

Comments

Alexei Starovoitov May 4, 2023, 3:14 a.m. UTC | #1
On Tue, Apr 25, 2023 at 04:49:07PM -0700, Andrii Nakryiko wrote:
> Fix propagate_precision() logic to perform propagation of all necessary
> registers and stack slots across all active frames *in one batch step*.
> 
> Doing this for each register/slot in each individual frame is wasteful,
> but the main problem is that backtracking of instruction in any frame
> except the deepest one just doesn't work. This is due to backtracking
> logic relying on jump history, and available jump history always starts
> (or ends, depending how you view it) in current frame. So, if
> prog A (frame #0) called subprog B (frame #1) and we need to propagate
> precision of, say, register R6 (callee-saved) within frame #0, we
> actually don't even know where jump history that corresponds to prog
> A even starts. We'd need to skip subprog part of jump history first to
> be able to do this.
> 
> Luckily, with struct backtrack_state and __mark_chain_precision()
> handling bitmasks tracking/propagation across all active frames at the
> same time (added in previous patch), propagate_precision() can be both
> fixed and sped up by setting all the necessary bits across all frames
> and then performing one __mark_chain_precision() pass. This makes it
> unnecessary to skip subprog parts of jump history.
> 
> We also improve logging along the way, to clearly specify which
> registers' and slots' precision markings are propagated within which
> frame.
> 
> Fixes: 529409ea92d5 ("bpf: propagate precision across all frames, not just the last one")
> Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> ---
>  kernel/bpf/verifier.c | 49 +++++++++++++++++++++++++++----------------
>  1 file changed, 31 insertions(+), 18 deletions(-)
> 
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 0b19b3d9af65..66d64ac10fb1 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -3885,14 +3885,12 @@ int mark_chain_precision(struct bpf_verifier_env *env, int regno)
>  	return __mark_chain_precision(env, env->cur_state->curframe, regno, -1);
>  }
>  
> -static int mark_chain_precision_frame(struct bpf_verifier_env *env, int frame, int regno)
> +static int mark_chain_precision_batch(struct bpf_verifier_env *env, int frame)
>  {
> -	return __mark_chain_precision(env, frame, regno, -1);
> -}
> -
> -static int mark_chain_precision_stack_frame(struct bpf_verifier_env *env, int frame, int spi)
> -{
> -	return __mark_chain_precision(env, frame, -1, spi);
> +	/* we assume env->bt was set outside with desired reg and stack masks
> +	 * for all frames
> +	 */
> +	return __mark_chain_precision(env, frame, -1, -1);
>  }
>  
>  static bool is_spillable_regtype(enum bpf_reg_type type)
> @@ -15308,20 +15306,25 @@ static int propagate_precision(struct bpf_verifier_env *env,
>  	struct bpf_reg_state *state_reg;
>  	struct bpf_func_state *state;
>  	int i, err = 0, fr;
> +	bool first;
>  
>  	for (fr = old->curframe; fr >= 0; fr--) {
>  		state = old->frame[fr];
>  		state_reg = state->regs;
> +		first = true;
>  		for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
>  			if (state_reg->type != SCALAR_VALUE ||
>  			    !state_reg->precise ||
>  			    !(state_reg->live & REG_LIVE_READ))
>  				continue;
> -			if (env->log.level & BPF_LOG_LEVEL2)
> -				verbose(env, "frame %d: propagating r%d\n", fr, i);
> -			err = mark_chain_precision_frame(env, fr, i);
> -			if (err < 0)
> -				return err;
> +			if (env->log.level & BPF_LOG_LEVEL2) {
> +				if (first)
> +					verbose(env, "frame %d: propagating r%d", fr, i);
> +				else
> +					verbose(env, ",r%d", i);
> +			}
> +			bt_set_frame_reg(&env->bt, fr, i);
> +			first = false;
>  		}
>  
>  		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
> @@ -15332,14 +15335,24 @@ static int propagate_precision(struct bpf_verifier_env *env,
>  			    !state_reg->precise ||
>  			    !(state_reg->live & REG_LIVE_READ))
>  				continue;
> -			if (env->log.level & BPF_LOG_LEVEL2)
> -				verbose(env, "frame %d: propagating fp%d\n",
> -					fr, (-i - 1) * BPF_REG_SIZE);
> -			err = mark_chain_precision_stack_frame(env, fr, i);
> -			if (err < 0)
> -				return err;
> +			if (env->log.level & BPF_LOG_LEVEL2) {
> +				if (first)
> +					verbose(env, "frame %d: propagating fp%d",
> +						fr, (-i - 1) * BPF_REG_SIZE);
> +				else
> +					verbose(env, ",fp%d", (-i - 1) * BPF_REG_SIZE);
> +			}
> +			bt_set_frame_slot(&env->bt, fr, i);
> +			first = false;
>  		}
> +		if (!first)
> +			verbose(env, "\n");
>  	}
> +
> +	err = mark_chain_precision_batch(env, old->curframe);

The optimization makes sense, sort-of, but 'first' flag is to separate frames on
different lines ?
The code in propagate_precision() before mark_chain_precision_batch()
will only print '... propagating...' few times.
regs and stack will be on one line anyway.
Is it that ugly to print all frames on one line in practice?
The 2nd and 3rd frames will be on one line too. Only 1st frame is on its own line?
I'm not getting the idea of 'first'.
Alexei Starovoitov May 4, 2023, 4:04 p.m. UTC | #2
On Tue, Apr 25, 2023 at 04:49:07PM -0700, Andrii Nakryiko wrote:
> +
> +	err = mark_chain_precision_batch(env, old->curframe);

I think I'm sort-of starting to get it, but
above should be env->cur_state->curframe instead of old->curframe, no?
mark_chain_precision_batch will be using branch history of current frame.
__mark_chain_precision() always operates on
  struct bpf_verifier_state *st = env->cur_state;
Alexei Starovoitov May 4, 2023, 4:27 p.m. UTC | #3
On Thu, May 4, 2023 at 9:04 AM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Tue, Apr 25, 2023 at 04:49:07PM -0700, Andrii Nakryiko wrote:
> > +
> > +     err = mark_chain_precision_batch(env, old->curframe);
>
> I think I'm sort-of starting to get it, but
> above should be env->cur_state->curframe instead of old->curframe, no?
> mark_chain_precision_batch will be using branch history of current frame.
> __mark_chain_precision() always operates on
>   struct bpf_verifier_state *st = env->cur_state;

wait. patch 5 made __mark_chain_precision() to ignore 'frame' argument.
Why did you keep it in mark_chain_precision_batch() and pass it here?
Andrii Nakryiko May 4, 2023, 8:57 p.m. UTC | #4
On Wed, May 3, 2023 at 8:14 PM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Tue, Apr 25, 2023 at 04:49:07PM -0700, Andrii Nakryiko wrote:
> > Fix propagate_precision() logic to perform propagation of all necessary
> > registers and stack slots across all active frames *in one batch step*.
> >
> > Doing this for each register/slot in each individual frame is wasteful,
> > but the main problem is that backtracking of instruction in any frame
> > except the deepest one just doesn't work. This is due to backtracking
> > logic relying on jump history, and available jump history always starts
> > (or ends, depending how you view it) in current frame. So, if
> > prog A (frame #0) called subprog B (frame #1) and we need to propagate
> > precision of, say, register R6 (callee-saved) within frame #0, we
> > actually don't even know where jump history that corresponds to prog
> > A even starts. We'd need to skip subprog part of jump history first to
> > be able to do this.
> >
> > Luckily, with struct backtrack_state and __mark_chain_precision()
> > handling bitmasks tracking/propagation across all active frames at the
> > same time (added in previous patch), propagate_precision() can be both
> > fixed and sped up by setting all the necessary bits across all frames
> > and then performing one __mark_chain_precision() pass. This makes it
> > unnecessary to skip subprog parts of jump history.
> >
> > We also improve logging along the way, to clearly specify which
> > registers' and slots' precision markings are propagated within which
> > frame.
> >
> > Fixes: 529409ea92d5 ("bpf: propagate precision across all frames, not just the last one")
> > Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> > ---
> >  kernel/bpf/verifier.c | 49 +++++++++++++++++++++++++++----------------
> >  1 file changed, 31 insertions(+), 18 deletions(-)
> >
> > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > index 0b19b3d9af65..66d64ac10fb1 100644
> > --- a/kernel/bpf/verifier.c
> > +++ b/kernel/bpf/verifier.c
> > @@ -3885,14 +3885,12 @@ int mark_chain_precision(struct bpf_verifier_env *env, int regno)
> >       return __mark_chain_precision(env, env->cur_state->curframe, regno, -1);
> >  }
> >
> > -static int mark_chain_precision_frame(struct bpf_verifier_env *env, int frame, int regno)
> > +static int mark_chain_precision_batch(struct bpf_verifier_env *env, int frame)
> >  {
> > -     return __mark_chain_precision(env, frame, regno, -1);
> > -}
> > -
> > -static int mark_chain_precision_stack_frame(struct bpf_verifier_env *env, int frame, int spi)
> > -{
> > -     return __mark_chain_precision(env, frame, -1, spi);
> > +     /* we assume env->bt was set outside with desired reg and stack masks
> > +      * for all frames
> > +      */
> > +     return __mark_chain_precision(env, frame, -1, -1);
> >  }
> >
> >  static bool is_spillable_regtype(enum bpf_reg_type type)
> > @@ -15308,20 +15306,25 @@ static int propagate_precision(struct bpf_verifier_env *env,
> >       struct bpf_reg_state *state_reg;
> >       struct bpf_func_state *state;
> >       int i, err = 0, fr;
> > +     bool first;
> >
> >       for (fr = old->curframe; fr >= 0; fr--) {
> >               state = old->frame[fr];
> >               state_reg = state->regs;
> > +             first = true;
> >               for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
> >                       if (state_reg->type != SCALAR_VALUE ||
> >                           !state_reg->precise ||
> >                           !(state_reg->live & REG_LIVE_READ))
> >                               continue;
> > -                     if (env->log.level & BPF_LOG_LEVEL2)
> > -                             verbose(env, "frame %d: propagating r%d\n", fr, i);
> > -                     err = mark_chain_precision_frame(env, fr, i);
> > -                     if (err < 0)
> > -                             return err;
> > +                     if (env->log.level & BPF_LOG_LEVEL2) {
> > +                             if (first)
> > +                                     verbose(env, "frame %d: propagating r%d", fr, i);
> > +                             else
> > +                                     verbose(env, ",r%d", i);
> > +                     }
> > +                     bt_set_frame_reg(&env->bt, fr, i);
> > +                     first = false;
> >               }
> >
> >               for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
> > @@ -15332,14 +15335,24 @@ static int propagate_precision(struct bpf_verifier_env *env,
> >                           !state_reg->precise ||
> >                           !(state_reg->live & REG_LIVE_READ))
> >                               continue;
> > -                     if (env->log.level & BPF_LOG_LEVEL2)
> > -                             verbose(env, "frame %d: propagating fp%d\n",
> > -                                     fr, (-i - 1) * BPF_REG_SIZE);
> > -                     err = mark_chain_precision_stack_frame(env, fr, i);
> > -                     if (err < 0)
> > -                             return err;
> > +                     if (env->log.level & BPF_LOG_LEVEL2) {
> > +                             if (first)
> > +                                     verbose(env, "frame %d: propagating fp%d",
> > +                                             fr, (-i - 1) * BPF_REG_SIZE);
> > +                             else
> > +                                     verbose(env, ",fp%d", (-i - 1) * BPF_REG_SIZE);
> > +                     }
> > +                     bt_set_frame_slot(&env->bt, fr, i);
> > +                     first = false;
> >               }
> > +             if (!first)
> > +                     verbose(env, "\n");
> >       }
> > +
> > +     err = mark_chain_precision_batch(env, old->curframe);
>
> The optimization makes sense, sort-of, but 'first' flag is to separate frames on
> different lines ?

Yes, first is purely for formatting. Each frame will have a separate
line, but all registers and stack frames for that frame will be on
that single line, like so:

frame 1: propagating r1,r2,r3,fp-8,fp-16
frame 0: propagating r3,r9,fp-120

> The code in propagate_precision() before mark_chain_precision_batch()
> will only print '... propagating...' few times.
> regs and stack will be on one line anyway.
> Is it that ugly to print all frames on one line in practice?
> The 2nd and 3rd frames will be on one line too. Only 1st frame is on its own line?
> I'm not getting the idea of 'first'.

So the above example shows how it looks now. Same situation without
this formatting would look like:

frame 1: propagating r1
frame 1: propagating r2
frame 1: propagating r3
frame 1: propagating fp-8
frame 1: propagating fp-16
frame 0: propagating r3
frame 0: propagating r9
frame 0: propagating fp-120

I wanted to have the same formatted set of registers and stack slots,
so make it more succinct and printing entire sets of registers/stack
slots, consistent with the backtrack log format.
Alexei Starovoitov May 4, 2023, 9:08 p.m. UTC | #5
On Thu, May 04, 2023 at 01:57:15PM -0700, Andrii Nakryiko wrote:
> On Wed, May 3, 2023 at 8:14 PM Alexei Starovoitov
> <alexei.starovoitov@gmail.com> wrote:
> >
> > On Tue, Apr 25, 2023 at 04:49:07PM -0700, Andrii Nakryiko wrote:
> > > Fix propagate_precision() logic to perform propagation of all necessary
> > > registers and stack slots across all active frames *in one batch step*.
> > >
> > > Doing this for each register/slot in each individual frame is wasteful,
> > > but the main problem is that backtracking of instruction in any frame
> > > except the deepest one just doesn't work. This is due to backtracking
> > > logic relying on jump history, and available jump history always starts
> > > (or ends, depending how you view it) in current frame. So, if
> > > prog A (frame #0) called subprog B (frame #1) and we need to propagate
> > > precision of, say, register R6 (callee-saved) within frame #0, we
> > > actually don't even know where jump history that corresponds to prog
> > > A even starts. We'd need to skip subprog part of jump history first to
> > > be able to do this.
> > >
> > > Luckily, with struct backtrack_state and __mark_chain_precision()
> > > handling bitmasks tracking/propagation across all active frames at the
> > > same time (added in previous patch), propagate_precision() can be both
> > > fixed and sped up by setting all the necessary bits across all frames
> > > and then performing one __mark_chain_precision() pass. This makes it
> > > unnecessary to skip subprog parts of jump history.
> > >
> > > We also improve logging along the way, to clearly specify which
> > > registers' and slots' precision markings are propagated within which
> > > frame.
> > >
> > > Fixes: 529409ea92d5 ("bpf: propagate precision across all frames, not just the last one")
> > > Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> > > ---
> > >  kernel/bpf/verifier.c | 49 +++++++++++++++++++++++++++----------------
> > >  1 file changed, 31 insertions(+), 18 deletions(-)
> > >
> > > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > > index 0b19b3d9af65..66d64ac10fb1 100644
> > > --- a/kernel/bpf/verifier.c
> > > +++ b/kernel/bpf/verifier.c
> > > @@ -3885,14 +3885,12 @@ int mark_chain_precision(struct bpf_verifier_env *env, int regno)
> > >       return __mark_chain_precision(env, env->cur_state->curframe, regno, -1);
> > >  }
> > >
> > > -static int mark_chain_precision_frame(struct bpf_verifier_env *env, int frame, int regno)
> > > +static int mark_chain_precision_batch(struct bpf_verifier_env *env, int frame)
> > >  {
> > > -     return __mark_chain_precision(env, frame, regno, -1);
> > > -}
> > > -
> > > -static int mark_chain_precision_stack_frame(struct bpf_verifier_env *env, int frame, int spi)
> > > -{
> > > -     return __mark_chain_precision(env, frame, -1, spi);
> > > +     /* we assume env->bt was set outside with desired reg and stack masks
> > > +      * for all frames
> > > +      */
> > > +     return __mark_chain_precision(env, frame, -1, -1);
> > >  }
> > >
> > >  static bool is_spillable_regtype(enum bpf_reg_type type)
> > > @@ -15308,20 +15306,25 @@ static int propagate_precision(struct bpf_verifier_env *env,
> > >       struct bpf_reg_state *state_reg;
> > >       struct bpf_func_state *state;
> > >       int i, err = 0, fr;
> > > +     bool first;
> > >
> > >       for (fr = old->curframe; fr >= 0; fr--) {
> > >               state = old->frame[fr];
> > >               state_reg = state->regs;
> > > +             first = true;
> > >               for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
> > >                       if (state_reg->type != SCALAR_VALUE ||
> > >                           !state_reg->precise ||
> > >                           !(state_reg->live & REG_LIVE_READ))
> > >                               continue;
> > > -                     if (env->log.level & BPF_LOG_LEVEL2)
> > > -                             verbose(env, "frame %d: propagating r%d\n", fr, i);
> > > -                     err = mark_chain_precision_frame(env, fr, i);
> > > -                     if (err < 0)
> > > -                             return err;
> > > +                     if (env->log.level & BPF_LOG_LEVEL2) {
> > > +                             if (first)
> > > +                                     verbose(env, "frame %d: propagating r%d", fr, i);
> > > +                             else
> > > +                                     verbose(env, ",r%d", i);
> > > +                     }
> > > +                     bt_set_frame_reg(&env->bt, fr, i);
> > > +                     first = false;
> > >               }
> > >
> > >               for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
> > > @@ -15332,14 +15335,24 @@ static int propagate_precision(struct bpf_verifier_env *env,
> > >                           !state_reg->precise ||
> > >                           !(state_reg->live & REG_LIVE_READ))
> > >                               continue;
> > > -                     if (env->log.level & BPF_LOG_LEVEL2)
> > > -                             verbose(env, "frame %d: propagating fp%d\n",
> > > -                                     fr, (-i - 1) * BPF_REG_SIZE);
> > > -                     err = mark_chain_precision_stack_frame(env, fr, i);
> > > -                     if (err < 0)
> > > -                             return err;
> > > +                     if (env->log.level & BPF_LOG_LEVEL2) {
> > > +                             if (first)
> > > +                                     verbose(env, "frame %d: propagating fp%d",
> > > +                                             fr, (-i - 1) * BPF_REG_SIZE);
> > > +                             else
> > > +                                     verbose(env, ",fp%d", (-i - 1) * BPF_REG_SIZE);
> > > +                     }
> > > +                     bt_set_frame_slot(&env->bt, fr, i);
> > > +                     first = false;
> > >               }
> > > +             if (!first)
> > > +                     verbose(env, "\n");
> > >       }
> > > +
> > > +     err = mark_chain_precision_batch(env, old->curframe);
> >
> > The optimization makes sense, sort-of, but 'first' flag is to separate frames on
> > different lines ?
> 
> Yes, first is purely for formatting. Each frame will have a separate
> line, but all registers and stack frames for that frame will be on
> that single line, like so:
> 
> frame 1: propagating r1,r2,r3,fp-8,fp-16
> frame 0: propagating r3,r9,fp-120

I see. The example of the output helped a lot.
Now the code makes sense. Pls include it in a commit or a comment.
Andrii Nakryiko May 4, 2023, 9:39 p.m. UTC | #6
On Thu, May 4, 2023 at 9:28 AM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Thu, May 4, 2023 at 9:04 AM Alexei Starovoitov
> <alexei.starovoitov@gmail.com> wrote:
> >
> > On Tue, Apr 25, 2023 at 04:49:07PM -0700, Andrii Nakryiko wrote:
> > > +
> > > +     err = mark_chain_precision_batch(env, old->curframe);
> >
> > I think I'm sort-of starting to get it, but
> > above should be env->cur_state->curframe instead of old->curframe, no?
> > mark_chain_precision_batch will be using branch history of current frame.
> > __mark_chain_precision() always operates on
> >   struct bpf_verifier_state *st = env->cur_state;
>
> wait. patch 5 made __mark_chain_precision() to ignore 'frame' argument.
> Why did you keep it in mark_chain_precision_batch() and pass it here?

Ok, few things here.

1. We don't ignore frame, bt_init(bt, frame) is what sets that for
backtrack_state. Currently we get it as input argument, but see below,
we can get it from the current state now.

2. old->curframe and cur_state->curframe should be the same because
two states are equivalent. I chose to pass old->curframe because
that's what existing for loop was using for iteration.

But you are right, it's a missed opportunity to simplify.
__mark_chain_precision() always works with env->cur_state and should
always start from its deepest frame, so I'll drop the frame argument
from __mark_chain_precision() completely and will be getting it from
cur_state. Good observation!
diff mbox series

Patch

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 0b19b3d9af65..66d64ac10fb1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3885,14 +3885,12 @@  int mark_chain_precision(struct bpf_verifier_env *env, int regno)
 	return __mark_chain_precision(env, env->cur_state->curframe, regno, -1);
 }
 
-static int mark_chain_precision_frame(struct bpf_verifier_env *env, int frame, int regno)
+static int mark_chain_precision_batch(struct bpf_verifier_env *env, int frame)
 {
-	return __mark_chain_precision(env, frame, regno, -1);
-}
-
-static int mark_chain_precision_stack_frame(struct bpf_verifier_env *env, int frame, int spi)
-{
-	return __mark_chain_precision(env, frame, -1, spi);
+	/* we assume env->bt was set outside with desired reg and stack masks
+	 * for all frames
+	 */
+	return __mark_chain_precision(env, frame, -1, -1);
 }
 
 static bool is_spillable_regtype(enum bpf_reg_type type)
@@ -15308,20 +15306,25 @@  static int propagate_precision(struct bpf_verifier_env *env,
 	struct bpf_reg_state *state_reg;
 	struct bpf_func_state *state;
 	int i, err = 0, fr;
+	bool first;
 
 	for (fr = old->curframe; fr >= 0; fr--) {
 		state = old->frame[fr];
 		state_reg = state->regs;
+		first = true;
 		for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
 			if (state_reg->type != SCALAR_VALUE ||
 			    !state_reg->precise ||
 			    !(state_reg->live & REG_LIVE_READ))
 				continue;
-			if (env->log.level & BPF_LOG_LEVEL2)
-				verbose(env, "frame %d: propagating r%d\n", fr, i);
-			err = mark_chain_precision_frame(env, fr, i);
-			if (err < 0)
-				return err;
+			if (env->log.level & BPF_LOG_LEVEL2) {
+				if (first)
+					verbose(env, "frame %d: propagating r%d", fr, i);
+				else
+					verbose(env, ",r%d", i);
+			}
+			bt_set_frame_reg(&env->bt, fr, i);
+			first = false;
 		}
 
 		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
@@ -15332,14 +15335,24 @@  static int propagate_precision(struct bpf_verifier_env *env,
 			    !state_reg->precise ||
 			    !(state_reg->live & REG_LIVE_READ))
 				continue;
-			if (env->log.level & BPF_LOG_LEVEL2)
-				verbose(env, "frame %d: propagating fp%d\n",
-					fr, (-i - 1) * BPF_REG_SIZE);
-			err = mark_chain_precision_stack_frame(env, fr, i);
-			if (err < 0)
-				return err;
+			if (env->log.level & BPF_LOG_LEVEL2) {
+				if (first)
+					verbose(env, "frame %d: propagating fp%d",
+						fr, (-i - 1) * BPF_REG_SIZE);
+				else
+					verbose(env, ",fp%d", (-i - 1) * BPF_REG_SIZE);
+			}
+			bt_set_frame_slot(&env->bt, fr, i);
+			first = false;
 		}
+		if (!first)
+			verbose(env, "\n");
 	}
+
+	err = mark_chain_precision_batch(env, old->curframe);
+	if (err < 0)
+		return err;
+
 	return 0;
 }